• 爬虫_1



    import urllib.request
    import time,re
    import pandas as pd
    from datetime import datetime, timedelta
    # from tqdm import tqdm

    # def get_data(base_url,tt):
    # html=urllib.request.urlopen(base_url).read().decode('utf-8')
    # html = str(html)
    # key_names =re.compile('.*?"NAME_":"(.*?)",')
    # key_aqi =re.compile('.*?,"AQI_":(.*?),')
    # PRIMARY_POLLUTANTS_ =re.compile('.*?"PRIMARY_POLLUTANTS_":"(.*?)",')
    # names = re.findall( key_names, html)
    # aqi = re.findall( key_aqi, html)
    # PRIMARY_POLLUTANTS_=re.findall(PRIMARY_POLLUTANTS_,html)
    # data = pd.DataFrame()
    # data['市/区/县'] = names
    # data['AQI_'+str(tt)] = aqi
    # data['首污_'+str(tt)]=PRIMARY_POLLUTANTS_
    # return data

    def get_data_chengdu(base_url,tt):
    html=urllib.request.urlopen(base_url).read().decode('utf-8')
    html = str(html)
    key_names =re.compile('.*?{"CITY_":"(.*?)",')
    key_aqi =re.compile('.*?"AQI_":"(.*?)",')
    PRIMARY_POLLUTANTS_ =re.compile('.*?,"PRIMARY_POLLUTANTS_":"(.*?)"}')
    names = re.findall( key_names, html)
    aqi = re.findall( key_aqi, html)
    PRIMARY_POLLUTANTS_=re.findall(PRIMARY_POLLUTANTS_,html)
    data = pd.DataFrame()
    data['市/区/县'] = names
    data['AQI_'+str(tt)] = aqi
    data['首污_'+str(tt)]=PRIMARY_POLLUTANTS_
    return data


    # def day_chengdu_area(days_2018,data_days):
    # for day_index in range(days_2018-3):
    # tt = datetime.strptime('20180102', "%Y%m%d") + timedelta(days=day_index)
    # tt = tt.strftime("%Y%m%d")
    # base_url = r'http://weixin.cdepb.gov.cn:20005/data/w/%E5%8C%BA%E5%8E%BF%E6%97%A5%E6%8E%92%E5%90%8D?'
    # r'token=78C579FA28C9B1E6C156A806C392458C&date={}&rows=1000&page=1'.format(tt)
    # # print(base_url)
    # data=get_data(base_url,tt)
    # data.index=data['市/区/县']
    # # data.sort_values(by=['市/区/县'])
    # # data.sort(['市/区/县'])
    # # print(data)
    # # data_days=data_days.set_index('市/区/县').join(data.set_index('市/区/县'))
    # data_days=pd.merge(data_days,data,on='市/区/县')
    #
    # # data_days.join(data, lsuffix='_caller', rsuffix='_other')
    # print(data_days)
    # # time.sleep(60)
    # break
    # # data_days.to_excel(r'data_days.xlsx')



    def day_chengdu(days_2018,data_days):
    for day_index in range(days_2018-3):
    tt = datetime.strptime('20180102', "%Y%m%d") + timedelta(days=day_index)
    tt = tt.strftime("%Y%m%d")
    base_url = r'http://weixin.cdepb.gov.cn:20005/data/w/%E5%9B%9B%E5%B7%9D%E7%9C%81%E5%90%84%E5%9F%8E%E5%B8%82%E7%A9%'
    r'BA%E6%B0%94%E8%B4%A8%E9%87%8F%E6%97%A5%E6%95%B0%E6%8D%AE?token=78C579FA28C9B1E6C156A806C392458C&date={}&rows=1000&page=1'.format(tt)
    # print(base_url)
    data=get_data_chengdu(base_url,tt)
    data.index=data['市/区/县']
    # data.sort_values(by=['市/区/县'])
    # data.sort(['市/区/县'])
    # print(data)
    # data_days=data_days.set_index('市/区/县').join(data.set_index('市/区/县'))
    data_days=pd.merge(data_days,data,on='市/区/县')

    # data_days.join(data, lsuffix='_caller', rsuffix='_other')
    # print(data_days)
    # time.sleep(60)
    # break
    print(data_days)
    data_days.to_excel(r'data_days.xlsx')

    if __name__ == '__main__':

    # 获取并保存 每天 排名 区县 AQI 首要污染物
    # days_2018=time.localtime(time.time()).tm_yday
    # days_2017=365
    # days_2016=366
    # day_sum=days_2018+days_2017+days_2016
    # # print(day_sum)
    # tt = datetime.strptime('20180101', "%Y%m%d") + timedelta(days=0)
    # tt = tt.strftime("%Y%m%d")
    #
    # base_url = r'http://weixin.cdepb.gov.cn:20005/data/w/%E5%8C%BA%E5%8E%BF%E6%97%A5%E6%8E%92%E5%90%8D?'
    # r'token=78C579FA28C9B1E6C156A806C392458C&date={}&rows=1000&page=1'.format(tt)
    # data_days=get_data(base_url,tt)
    # day_chengdu_area(days_2018, data_days)

    # 获取成都总体的AQI
    days_2018 = time.localtime(time.time()).tm_yday
    tt = datetime.strptime('20180101', "%Y%m%d") + timedelta(days=0)
    tt = tt.strftime("%Y%m%d")
    chengdu_url=r'http://weixin.cdepb.gov.cn:20005/data/w/%E5%9B%9B%E5%B7%9D%E7%9C%81%E5%90%84%E5%9F%8E%E5%B8%82%E7%A9%BA'
    r'%E6%B0%94%E8%B4%A8%E9%87%8F%E6%97%A5%E6%95%B0%E6%8D%AE?token=78C579FA28C9B1E6C156A806C392458C&date={}&rows=1000&page=1'.format(tt)
    data_chengdu=get_data_chengdu(chengdu_url,tt)
    # print(data_chengdu)
    day_chengdu(days_2018, data_chengdu)


  • 相关阅读:
    BZOJ_2002_[Hnoi2010]Bounce 弹飞绵羊_LCT
    BZOJ_4154_[Ipsc2015]Generating Synergy_KDTree
    BZOJ_2801_[Poi2012]Minimalist Security_dfs树+特判+乱搞
    BZOJ_3123_[Sdoi2013]森林_主席树+启发式合并
    2019集训队作业做题实况[1](1-30):
    牛客挑战赛33 F 淳平的形态形成场(无向图计数,EGF,多项式求逆)
    【NOIP2019模拟2019.10.07】果实摘取 (约瑟夫环、Mobius反演、类欧、Stern-Brocot Tree)
    CodeChef Max-digit Tree(动态规划)
    骚操作:c++如何用goto便捷地写人工栈?
    Comet OJ
  • 原文地址:https://www.cnblogs.com/avivi/p/11354299.html
Copyright © 2020-2023  润新知