• 中国大学排名


    1 # -*- coding: utf-8 -*-
     2 '''
     3 获取中国大学的排名
     4 @author: bpf
     5 '''
     6 import requests
     7 from bs4 import BeautifulSoup
     8 import pandas
     9 # 1. 获取网页内容
    10 def getHTMLText(url):
    11     try:
    12         r = requests.get(url, timeout = 30)
    13         r.raise_for_status()
    14         r.encoding = 'utf-8'
    15         return r.text
    16     except Exception as e:
    17         print("Error:", e)
    18         return ""
    19 
    20 # 2. 分析网页内容并提取有用数据
    21 def fillTabelList(soup): # 获取表格的数据
    22     tabel_list = []      # 存储整个表格数据
    23     Tr = soup.find_all('tr')
    24     for tr in Tr:
    25         Td = tr.find_all('td')
    26         if len(Td) == 0:
    27             continue
    28         tr_list = [] # 存储一行的数据
    29         for td in Td:
    30             tr_list.append(td.string)
    31         tabel_list.append(tr_list)
    32     return tabel_list
    33 
    34 # 3. 可视化展示数据
    35 def PrintTableList(tabel_list, num):
    36     # 输出前num行数据
    37     print("{1:^2}{2:{0}^10}{3:{0}^5}{4:{0}^5}{5:{0}^8}".format(chr(12288), "排名", "学校名称", "省市", "总分", "生涯质量"))
    38     for i in range(num):
    39         text = tabel_list[i]
    40         print("{1:{0}^2}{2:{0}^10}{3:{0}^5}{4:{0}^8}{5:{0}^10}".format(chr(12288), *text))
    41 
    42 # 4. 将数据存储为csv文件
    43 def saveAsCsv(filename, tabel_list):
    44     FormData = pandas.DataFrame(tabel_list)
    45     FormData.columns = ["排名", "学校名称", "省市", "总分", "生涯质量", "培养结果", "科研规模", "科研质量", "顶尖成果", "顶尖人才", "科技服务", "产学研合作", "成果转化"]
    46     FormData.to_csv(filename, encoding='utf-8', index=False)
    47 
    48 if __name__ == "__main__":
    49     url = "http://www.zuihaodaxue.cn/zuihaodaxuepaiming2016.html"
    50     html = getHTMLText(url)
    51     soup = BeautifulSoup(html, features="html.parser")
    52     data = fillTabelList(soup)
    53     #print(data)
    54     PrintTableList(data, 10)   # 输出前10行数据
    55     saveAsCsv("D:\University_Rank.csv", data)
    复制代码
    复制代码

     运行结果:

  • 相关阅读:
    外挂方法.md
    第三章.md
    沙箱逃逸.md
    flex
    flash builder 关联svn
    flash 动画
    Flash移动开发的一本好书Android&IOS
    Foundation ActionScript 3.0.With Flash CS3 And Flex ..
    flash移动基础开发(PDF)
    O'Reilly.HTML5.Up.and.Running HTML5的一本好书
  • 原文地址:https://www.cnblogs.com/luorunsb/p/10929726.html
Copyright © 2020-2023  润新知