• 爬取腾讯疫情数据


    def get_tencent_data():
        url_other = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_other'
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"
        }
        r = requests.get(url_other, headers)
        res = json.loads(r.text)
        data_all = json.loads(res['data'])
        
        history = {}
        for i in data_all['chinaDayList']:
            ds = "2020." + i['date']
            tup = time.strptime(ds, "%Y.%m.%d")
            ds = time.strftime("%Y-%m-%d", tup)
            confirm = i['confirm']
            suspect = i['suspect']
            heal = i['heal']
            dead = i['dead']
            history[ds] = {'confirm': confirm, 'suspect': suspect, 'heal': heal, 'dead': dead}
        for i in data_all['chinaDayAddList']:
            ds = "2020." + i['date']
            tup = time.strptime(ds, "%Y.%m.%d")
            ds = time.strftime("%Y-%m-%d", tup)
            confirm = i['confirm']
            suspect = i['suspect']
            heal = i['heal']
            dead = i['dead']
            history[ds].update({"confirm_add": confirm, "suspect_add": suspect, "heal_add": heal, "dead_add": dead})
            
        details = []
        url_h5 = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
        res_h5 = requests.get(url_h5, headers)
        d = json.loads(res_h5.text)
        data_h5 = json.loads(d['data'])
        update_time = data_h5['lastUpdateTime']
        data_country = data_h5['areaTree']
        data_province = data_country[0]['children']
        for pro_infos in data_province:
            province = pro_infos['name']
            for city_infos in pro_infos['children']:
                city = city_infos['name']
                confirm = city_infos['total']['confirm']
                confirm_add = city_infos['today']['confirm']
                heal = city_infos['total']['heal']
                dead = city_infos['total']['dead']
                details.append([update_time, province, city, confirm, confirm_add, heal, dead])
        return history, details
    tencent.py
    import traceback
    def update_details():
        cursor = None
        conn = None
        try:
            li = get_tencent_data()[1]
            conn, cursor = get_conn()
            sql = "insert into details(update_time,province,city,confirm,confirm_add,heal,dead) values(%s,%s,%s,%s,%s,%s,%s)"
            sql_query = "select %s=(select update_time from details order by id desc limit 1)"
            cursor.execute(sql_query, li[0][0])
            if not cursor.fetchone()[0]:
                print(f"{time.asctime()}开始更新最新数据")
                for item in li:
                    cursor.execute(sql, item)
                conn.commit()
                print(f"{time.asctime()}更新最新数据完毕")
            else:
                print(f"{time.asctime()}已是最新数据!")
        except:
            traceback.print_exc()
        finally:
            close_conn(conn, cursor)
    def insert_history():
        cursor = None
        conn = None
        try:
            dic = get_tencent_data()[0]
            print(f"{time.asctime()}开始插入历史数据")
            conn, cursor = get_conn()
            sql = "insert into history values(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            for k, v in dic.items():
                cursor.execute(sql, [k, v.get('confirm'), v.get('confirm_add'), v.get('suspect'),
                                    v.get('suspect_add'), v.get('heal'), v.get('heal_add'),
                                    v.get('dead'), v.get('dead_add')])
            conn.commit()
            print(f"{time.asctime()}插入历史数据完毕")
        except:
            traceback.print_exc()
        finally:
            close_conn(conn, cursor)
    insert
    def update_history():
        cursor = None
        conn = None
        try:
            dic = get_tencent_data()[0]
            print(f"{time.asctime()}开始更新历史数据")
            conn, cursor = get_conn()
            sql = "insert into history values(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            sql_query = "select confirm from history where ds=%s"
            for k, v in dic.items():
                if not cursor.execute(sql_query, k):
                    cursor.execute(sql, [k, v.get('confirm'), v.get('confirm_add'), v.get('suspect'),
                                    v.get('suspect_add'), v.get('heal'), v.get('heal_add'),
                                    v.get('dead'), v.get('dead_add')])
            conn.commit()
            print(f"{time.asctime()}历史数据更新完毕")
        except:
            traceback.print_exc()
        finally:
            close_conn(conn, cursor)
    update
  • 相关阅读:
    scala入门-03基础知识->表达式
    scala入门-02基础知识->方法
    jetty命令行方式启动jetty-runner.jar 容器
    本地开发spark代码上传spark集群服务并运行(基于spark官网文档)
    Linux下查看进程和线程
    scala入门-01-IDEA安装scala插件
    spark-1.2.0 集群环境搭建
    ubuntu每次登陆都用root账号登陆
    hadoop2.6.0版本集群环境搭建
    spark ssh配置
  • 原文地址:https://www.cnblogs.com/xuqidong/p/13311202.html
Copyright © 2020-2023  润新知