• python爬取丁香园疫情数据


    丁香园中数据是由js加载出来的,因此需要用正则去截取数据,最后转成json或其他数据形式并处理

    爬取代码:

    import pymysql
    import requests
    import pprint
    import re
    import json
    
    
    def write_to_file(item):
        with open('yiqing.json','w',encoding='utf-8') as f:
            f.write(json.dumps(item,indent=4,ensure_ascii=False))
            f.close()
    
    def mysql_():
        conn = pymysql.connect(host='127.0.0.1', user='root', passwd='yuanpeng0', db='test',
                               port=3306, charset='utf8',
                               cursorclass=pymysql.cursors.DictCursor)
        cur = conn.cursor()
        return conn,cur
    
    reault=requests.get('https://ncov.dxy.cn/ncovh5/view/pneumonia?scene=2&clicktime=1579583352&enterid=1579583352&from=timeline&isappinstalled=0')
    url_text=reault.content.decode()
    
    url_result=re.search(r'window.getAreaStat = (.*?)}]}catch',url_text,re.S)
    texts=url_result.group()
    
    texts=texts.replace('window.getAreaStat = ','')
    texts=texts.replace('}catch','')
    c=json.loads(texts)
    # pprint.pprint(c)
    write_to_file(c)
    
    result=re.search(r' window.getStatisticsService(.*?)该字段已替换为说明1',url_text,re.S)
    result2=result.group()
    result3=result2.replace(' window.getStatisticsService = ','')+'"}'
    texts4=json.loads(result3)
    currentConfirmedCount=(texts4['currentConfirmedCount'])
    suspectedCount=(texts4['suspectedCount'])
    seriousCount=(texts4['seriousCount'])
    confirmedCount=(texts4['confirmedCount'])
    deadCount=(texts4['deadCount'])
    curedCount=(texts4['curedCount'])
    list_result=[]
    list_result.append((currentConfirmedCount,suspectedCount,seriousCount,confirmedCount,deadCount,curedCount))
    print(list_result)
    conn, cur = mysql_()
    select_sql = "update yiqingcount set currentConfirmedCount=%s,suspectedCount=%s,seriousCount=%s,confirmedCount=%s,deadCount=%s,curedCount=%s where id = 1"
    cur.executemany(select_sql,list_result)
    conn.commit()

    爬取数据形式如下:

  • 相关阅读:
    浅谈线段树
    浅谈KMP
    20200729线上模拟题解
    20200727线上模拟题解
    声明
    tarjan--割点,缩点
    20201029模拟
    高精模板
    二分图--二分图的几种模型
    树的直径与树的重心
  • 原文地址:https://www.cnblogs.com/yuanxiaochou/p/12318451.html
Copyright © 2020-2023  润新知