• 体彩数据爬取


    大乐透

    爬取1

    # 爬取大乐透的开奖历史数据
    # http://www.lottery.gov.cn/api/lottery_kj_detail_new.jspx?_ltype=4&_term=19026
    import requests
    import re
    import csv
    agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
    headers = {
        'User-Agent': agent,
    }
    proxy={
        "http":"125.39.9.34:9000",
    }
    url = 'http://www.lottery.gov.cn/api/lottery_kj_detail_new.jspx'
    start = int(input('输入开始期号:'))  # 18134
    end = int(input('输入结束期号:')) # 19029
    lottery_li = [] 
    for qihao in range(start,end+1):
        data={
            '_ltype':'4',
            '_term':qihao,
        }
        page_text = requests.post(url=url,headers=headers,data=data,proxies=proxy).text
        print(page_text)
        if page_text:
    
            # 根据返回数据解析
            # 开奖号码
            lottery_data = re.findall('codeNumber":[(.*?)],"',page_text,re.M)
            if lottery_data:
                num_data = lottery_data[0].replace(""",'')
                # print(num_data) # 10,12,15,17,19,02,03
                lottery_list = num_data.split(',')
                lottery_list.insert(0,qihao)
                # print(lottery_list) # ['10', '12', '15', '17', '19', '02', '03']
    
                lottery_li.append(lottery_list)
    
    with open('lottery_data.csv','w',newline='') as csvf:
        spanwriter=csv.writer(csvf,dialect='excel')   #创建writer对象
        spanwriter.writerow(['qihao','red1','red2','red3','red4','red5','blue1','blue2'])  #使用writer的方法writerow写入到文件
        spanwriter.writerows(lottery_li)  #迭代写入数据
        print('done.....................')
    

    爬取2(所有开奖记录)

    # 爬取大乐透的开奖历史数据
    # http://www.lottery.gov.cn/historykj/history.jspx?_ltype=dlt
    import requests
    import re
    import csv
    from lxml import etree
    import random
    import time
    agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
    headers = {
        'User-Agent': agent,
    }
    proxies=[
        {"http":"125.39.9.34:9000"},
        {"http":"222.139.125.232:8060"},
    ]
    proxy = random.choice(proxies)
    params={
        '_ltype':'dlt',
    }
    page = int(input("end page no:"))
    lottery_data = []
    for page_no in range(1,page+1):
        url = 'http://www.lottery.gov.cn/historykj/history_%s.jspx' % page_no
        page_text = requests.get(url=url,params=params,headers=headers,proxies=proxy).text
        time.sleep(1)
        # print(page_text)
        tree = etree.HTML(page_text)
        tr_list = tree.xpath('//div[@class="result"]/table/tbody/tr') # 每页所有的tr 20
        td_list = tree.xpath('//div[@class="result"]/table/tbody/tr/td') # 每页所有的td 400
        
        for num_tr in range(1,len(tr_list) + 1):
        #     print(num_tr)
            td_qihao = tree.xpath('//div[@class="result"]/table/tbody/tr[%s]/td[1]//text()'%(num_tr))
            td_red1 = tree.xpath('//div[@class="result"]/table/tbody/tr[%s]/td[2]//text()'%(num_tr))
            td_red2 = tree.xpath('//div[@class="result"]/table/tbody/tr[%s]/td[3]//text()'%(num_tr))
            td_red3 = tree.xpath('//div[@class="result"]/table/tbody/tr[%s]/td[4]//text()'%(num_tr))
            td_red4 = tree.xpath('//div[@class="result"]/table/tbody/tr[%s]/td[5]//text()'%(num_tr))
            td_red5 = tree.xpath('//div[@class="result"]/table/tbody/tr[%s]/td[6]//text()'%(num_tr))
            td_blue1 = tree.xpath('//div[@class="result"]/table/tbody/tr[%s]/td[7]//text()'%(num_tr))
            td_blue2 = tree.xpath('//div[@class="result"]/table/tbody/tr[%s]/td[8]//text()'%(num_tr))
            lottery_one = td_qihao + td_red1+ td_red2+td_red3+td_red4+td_red5+td_blue1+td_blue2
            lottery_data.append(lottery_one)
    # 写入csv
    with open('all_lottery.csv','w',newline='') as csvf:
        spanwriter=csv.writer(csvf,dialect='excel')   #创建writer对象
        spanwriter.writerow(['qihao','red1','red2','red3','red4','red5','blue1','blue2'])  #使用writer的方法writerow写入到文件
        spanwriter.writerows(lottery_data)  #迭代写入数据  
        print('done..................................')
    
  • 相关阅读:
    Spring Bean的生命周期
    Java中的Object类
    Java线程池七个参数
    ROS 第五讲 在模拟器中构建第一个机器人
    ROS 第四讲 让小车在RViz与Gazebo模拟器中跑起来
    ROS 第三讲 操控小乌龟
    ROS 第二讲 基本操作
    ROS 第一讲 引入
    自然语言处理(二) 新词发现或非监督词典构建
    递归找到一个复杂对象中的某个值和它的最小层级
  • 原文地址:https://www.cnblogs.com/fmgao-technology/p/10552202.html
Copyright © 2020-2023  润新知