• python简单爬数据(这两个成功了)


    这两个做的比较早,也比较幸运,成功做出来了,把代码扔在这里

    老师叫我从两个网站上弄点数据,数据不多,但是要分月份,小时,经度,纬度,虽然不用排列组合还是挺麻烦的

    人生苦短,我用Python

    之前就大半年前看了看语法,没有实践过,数据的网页也比较简单,也算是拿来练练手


    代码里面已经包含了目标网址,就不具体介绍了,保存下来的是网页,还需要一个脚本来处理一下,这个比较简单也就不放了。

    1

    #!usr/bin/python
    
    import requests
    import time
    import sys
    
    
    #-------- 配置以选择要爬的东西
    #'hour', 'month', 'latitude', 'longitude'
    sel = 'longitude'
    #--------
    
    web_url = r'https://omniweb.gsfc.nasa.gov/vitmo/iri2012_vitmo.html' #IRI2012
    request_url = r'https://omniweb.gsfc.nasa.gov/cgi/vitmo/vitmo_model.cgi'
    #filepath = sys.path[0] + '\dataaa_' + time.strftime("%Y%m%d%H%M%S", time.localtime()) + '.txt'
    filepath = sys.path[0] + '\data_iri2012_raw_' + sel + '.txt'
    print(filepath)
    fid = open(filepath, 'w', encoding = 'utf-8')
    
    headers = {#POST /cgi/vitmo/vitmo_model.cgi HTTP/1.1
               'Host' : 'omniweb.gsfc.nasa.gov',
               'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; rv:53.0) Gecko/20100101 Firefox/53.0',
               'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
               'Accept-Language' : 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
               'Accept-Encoding' : 'gzip, deflate, br',
               'Content-Type' : 'application/x-www-form-urlencoded',
               'Content-Length' : '452',
               'Referer' : 'https://omniweb.gsfc.nasa.gov/vitmo/iri2012_vitmo.html',
               'Cookie' : '_ga=GA1.4.167527256.1494290145; _gid=GA1.4.2137494148.1494290145; _gat_GSA_ENOR0=1',
               'Connection' : 'keep-alive',
               'Upgrade-Insecure-Requests' : '1',
               'Pragma' : 'no-cache',
               'Cache-Control' : 'no-cache'
               }
    
    payload = {'model' : 'iri_2012',
               'year' : '2016',        #
               'month' : '12',         #
               'day' : '01',           #
               'time_flag' : '1',
               'hour' : '8',           #
               'geo_flag' : '0.',
               'latitude' : '50.',     #
               'longitude' : '40.',    #
               'height' : '100.',      #
               'profile' : '1',
               'start' : '100.',       #起始
               'stop' : '1000.',       #结束
               'step' : '50.',         #步长
               'sun_n' : '',
               'ion_n' : '',
               'radio_f' : '',
               'radio_f81 ' :'',
               'htec_max' : '',
               'ne_top':'0.',
               'imap' : '0.',
               'ffof2' : '0.',
               'ib0' : '2.',
               'probab' : '0.',
               'fauroralb' : '1.',
               'ffoE' : '1.',
               'dreg' : '0.',
               'tset' : '0.',
               'icomp' : '0.',
               'nmf2' : '0.',
               'hmf2' : '0.',
               'user_nme' : '0.',
               'user_hme' : '0.',
               'format' : '0',
               'vars' : ['17', '19', '20', '21'],#Ne,Tn,Ti,Te : 电子密度,中子温度,离子温度,电子温度
               'linestyle' : 'solid',
               'charsize' : '',
               'symbol' : '2',
               'symsize' : '',
               'yscale' : 'Linear',
               'xscale' : 'Linear',
               'imagex' : '640',
               'imagey' : '480'
               }
    
    
    payload['year'] = '2016'
    payload['month'] = '12'
    payload['day'] = '01'
    payload['hour'] = '8'
    payload['longitude'] = '120'
    payload['latitude'] = '60'
    payload['start'] = '60'
    payload['stop'] = '1000'
    payload['step'] = '1'
    
    count = 0
    
    hours = range(1, 25)
    months = range(1, 13)
    latitudes = range(-90, 100, 10)
    longitudes = range(0, 360, 10)
    dic = {'hour' : hours,
           'month' : months,
           'latitude' : latitudes,
           'longitude' : longitudes
           }
    
    items = dic[sel]
    itemname = sel
    
    for item in items:
        payload[itemname] = str(item)
        fid.write('
    #===================== ' + str(item) + ' =====================
    ')
        TOGET = True
        while TOGET:
            TOGET = True
            try:
                print('
    ===================== ' + str(item) + ' =====================
    ')
                count = count + 1
                print('count : ' + str(count))
                r = requests.post(request_url, data = payload, headers = headers)#这里如果出错是不会向下执行的
                fid.write(r.text)
                TOGET = False
            except Exception as e:
                print(e)
                TOGET = True
        fid.write('
    --------------------- ' + str(item) + ' ---------------------
    ')
    
    fid.close();

    2

    #!usr/bin/python
    
    import requests
    import time
    import sys
    
    
    #-------- 配置以选择要爬的东西
    #'hour', 'month', 'latitude', 'longitude'
    sel = 'longitude'
    #--------
    
    web_url = r'https://ccmc.gsfc.nasa.gov/modelweb/models/nrlmsise00.php' 
    request_url = r'https://ccmc.gsfc.nasa.gov/cgi-bin/modelweb/models/vitmo_model.cgi'
    #filepath = sys.path[0] + '\dataaa_' + time.strftime("%Y%m%d%H%M%S", time.localtime()) + '.txt'
    filepath = sys.path[0] + '\data_nrmlsise_raw_' + sel + '.txt'
    print(filepath)
    fid = open(filepath, 'w', encoding = 'utf-8')
    
    
    headers = {#POST /cgi-bin/modelweb/models/vitmo_model.cgi HTTP/1.1
               'Host': 'ccmc.gsfc.nasa.gov',
               'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:53.0) Gecko/20100101 Firefox/53.0',
               'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
               'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
               'Accept-Encoding': 'gzip, deflate, br',
               'Content-Type': 'application/x-www-form-urlencoded',
               'Content-Length': '296',
               'Referer': 'https://ccmc.gsfc.nasa.gov/modelweb/models/nrlmsise00.php',
               'Cookie': '__utma=35212851.490003371.1494462808.1494462808.1494462808.1; __utmb=35212851.12.10.1494462808; __utmc=35212851; __utmz=35212851.1494462808.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmt=1',
               'Connection': 'keep-alive',
               'Upgrade-Insecure-Requests': '1',
               'Cache-Control': 'max-age=0'
               }
    
    payload = {'model' : 'nrlmsise',
               'year' : '2016',
               'month' : '12',
               'day' : '01',
               'time_flag' : '1',
               'hour' : '8',
               'geo_flag' : '0.',
               'latitude' : '60',
               'longitude' : '120',
               'height' : '100.',
               'profile' : '1',
               'start' : '60.',
               'stop' : '1000.',
               'step' : '10.',
               'f10_7' : '',
               'f10_7_3' : '',
               'ap' : '',
               'format' : '0',
               'vars' : ['08', '09', '10'],#O,N2,O2 : 氧原子,氮分子,氧分子
               'linestyle' : 'solid',
               'charsize' : '1.0',
               'symbol' : '2',
               'symsize' : '1.0',
               'yscale' : 'Lin',
               'xscale' : 'Lin',
               'imagex' : '640',
               'imagey' : '480',
               }
    
    payload['year'] = '2016'
    payload['month'] = '12'
    payload['day'] = '01'
    payload['hour'] = '8'
    payload['longitude'] = '120'
    payload['latitude'] = '60'
    payload['start'] = '60'
    payload['stop'] = '1000'
    payload['step'] = '1'
    
    count = 0
    
    hours = range(1, 25)
    months = range(1, 13)
    latitudes = range(-90, 100, 10)
    longitudes = range(0, 360, 10)
    dic = {'hour' : hours,
           'month' : months,
           'latitude' : latitudes,
           'longitude' : longitudes
           }
    
    items = dic[sel]
    itemname = sel
    
    for item in items:
        payload[itemname] = str(item)
        fid.write('
    #===================== ' + str(item) + ' =====================
    ')
        TOGET = True
        while TOGET:
            TOGET = True
            try:
                print('
    ===================== ' + str(item) + ' =====================
    ')
                count = count + 1
                print('count : ' + str(count))
                r = requests.post(request_url, data = payload, headers = headers)#这里如果出错是不会向下执行的
                fid.write(r.text)
                TOGET = False
            except Exception as e:
                print(e)
                TOGET = True
        fid.write('
    --------------------- ' + str(item) + ' ---------------------
    ')
    
    fid.close();
  • 相关阅读:
    重定向与转发比较
    servlet_5
    servlet_4
    servlet_3
    字符串的操作以及格式化的操作
    2019的Python
    函数2
    函数
    文件操作
    集合 set
  • 原文地址:https://www.cnblogs.com/ippfcox/p/6947165.html
Copyright © 2020-2023  润新知