python爬取大学排名信息
思路:
1.获取排名所在网页
2.寻找大学排名信息,并填充到相关列表中
3.将大学信息打印显示出来
import requests
from bs4 import BeautifulSoup
import bs4
import re
import time
def getHTMLText(url): //获取网页内容
try:
r = requests.get(url,timeout=30)
r.raise_for_status()
r.encoding=r.apparent_encoding
demo = r.text
return demo
except:
return ""
def fillUnivlist(ulist,html)://获取相关的大学排名信息填充到列表当中
soup=BeautifulSoup(html,'html.parser')
for tr in soup.find_all('tr'):
try:
if isinstance(tr, bs4.element.Tag):
tds = tr('td')
ulist.append([tds[0].string, tds[1].string, tds[2].string, tds[3].string])
except:
continue
def printUnivList(ulist,num)://将获取的信息打印出来
#print("{:^10} {:^20} {:^10} {:^10}".format("排名","学校名称","地区","总分"))
tplt="{0:^10} {1:{4}^10} {2:^10} {3:^10}"
print(tplt.format("排名","学校名称","地区","总分",chr(12288)))
for i in range(num):
u=ulist[i]
#print("{} {:^20} {} {}".format(u[0],u[1],u[2],u[3]))
# if(u[1]=='华东交通大学'):
print(tplt.format(u[0],u[1],u[2],u[3],chr(12288)))
time.sleep(1000)
print("Suc"+str(num))
def main(): //主函数调用
uinfo=[]
url='http://www.zuihaodaxue.cn/zuihaodaxuepaiming2019.html'
html=getHTMLText(url)
fillUnivlist(uinfo,html)
printUnivList(uinfo,300) //获取前300名的大学信息 可以修改
main()