题目:
编程爬取每日最新的疫情统计数据。
并将爬取结果导入到数据库中。
将可视化结果与统计数据结合,实时显示当前最新数据。
这次的作业与上次周的可视化可以整合成一个完整的代码,只需要在这次加上python爬取数据即可
本次爬取的是丁香医生网站的数据,网址为:https://ncov.dxy.cn/ncovh5/view/pneumonia
爬取的代码如下
1 from os import path 2 import requests 3 from bs4 import BeautifulSoup 4 import json 5 import pymysql 6 #import numpy as np 7 import time 8 from _ast import Try 9 10 url = 'https://ncov.dxy.cn/ncovh5/view/pneumonia?from=timeline&isappinstalled=0' #请求地址 11 headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'}#创建头部信息 12 response = requests.get(url,headers = headers) #发送网络请求 13 #print(response.content.decode('utf-8'))#以字节流形式打印网页源码 14 content = response.content.decode('utf-8') 15 #print(content) 16 soup = BeautifulSoup(content, 'html.parser') 17 listA = soup.find_all(name='script',attrs={"id":"getAreaStat"}) 18 #世界确诊 19 listB = soup.find_all(name='script',attrs={"id":"getListByCountryTypeService2"}) 20 #listA = soup.find_all(name='div',attrs={"class":"c-touchable-feedback c-touchable-feedback-no-default"}) 21 account = str(listA) 22 world_messages = str(listB)[87:-21] 23 messages = account[52:-21] 24 messages_json = json.loads(messages) 25 world_messages_json = json.loads(world_messages) 26 valuesList = [] 27 cityList = [] 28 worldList = [] 29 localtime = time.localtime(time.time()) 30 L=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 31 print(L) 32 for i in range(len(messages_json)): 33 #value = messages_json[i] 34 #value = (messages_json[i].get('provinceName'),messages_json[i].get('provinceShortName'),messages_json[i].get('currentConfirmedCount'),messages_json[i].get('confirmedCount'),messages_json[i].get('suspectedCount'),messages_json[i].get('curedCount'),messages_json[i].get('deadCount'),messages_json[i].get('comment'),messages_json[i].get('locationId')) 35 value = (messages_json[i].get('provinceName'),messages_json[i].get('confirmedCount'),messages_json[i].get('curedCount'),messages_json[i].get('deadCount'),messages_json[i].get('locationId')) 36 valuesList.append(value) 37 cityValue = messages_json[i].get('cities') 38 #print(cityValue) 一个省内没有划分开的值 39 for j in range(len(cityValue)): 40 #cityValueList = (cityValue[j].get('cityName'),cityValue[j].get('currentConfirmedCount'),cityValue[j].get('confirmedCount'),cityValue[j].get('suspectedCount'),cityValue[j].get('curedCount'),cityValue[j].get('deadCount'),cityValue[j].get('locationId'),messages_json[i].get('provinceShortName')) 41 cityValueList = (messages_json[i].get('provinceName'),cityValue[j].get('cityName'),cityValue[j].get('confirmedCount'),cityValue[j].get('curedCount'),cityValue[j].get('deadCount'),cityValue[j].get('locationId')) 42 #print(cityValueList) 省份内各个城市的值 43 cityList.append(cityValueList) 44 45 #print(cityList) #城市 46 #print(valuesList) #省份 47 db=pymysql.connect("localhost","root","123456","payiqing", charset='utf8') 48 cursor = db.cursor() 49 50 sql_city="insert into info_copy (Province,City,Confirmed_num,Cured_num,Dead_num,Code,Date) values (%s,%s,%s,%s,%s,%s,'"+L+"')" 51 sql_province="insert into info_copy (Province,Confirmed_num,Cured_num,Dead_num,Code,Date) values (%s,%s,%s,%s,%s,'"+L+"')" 52 #print(sql) 53 54 value_tuple= tuple(valuesList) 55 city_tuple=tuple(cityList) 56 57 try: 58 cursor.executemany(sql_province,valuesList) 59 cursor.executemany(sql_city,city_tuple) 60 db.commit() 61 except: 62 print('执行失败,进入回调4') 63 db.rollback() 64 65 66 67 db.close()
加上上次的代码,效果如下图所示:
psp表格
缺陷记录日志