• 【爬虫】抓取xicidaili可用代理ip


    # coding=utf-8
    import requests
    from lxml import etree
    ips=[]
    def run(page):
    url="https://www.xicidaili.com/nn/{}"
    headers = {
    'User-Agent': "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36,PostmanRuntime/7.16.3",
    'Accept': "*/*",
    'Cache-Control': "no-cache",
    'Postman-Token': "e17c0361-c140-4e67-b4d7-1d4297b6876d,2da41bb3-79f5-40fd-a5a7-63c0acbd4442",
    'Host': "www.xicidaili.com",
    'Accept-Encoding': "gzip, deflate",
    'Cookie': "_free_proxy_session=BAh7B0kiD3Nlc3Npb25faWQGOgZFVEkiJWYyNTA3YjBmOWFjNDAxOWJhYWEzNDg4YWQ0OTU5ZjYyBjsAVEkiEF9jc3JmX3Rva2VuBjsARkkiMUkxQnBlMzlsNmR3bExnWHltNklaWjFIdDJyNkdiVzE0cXUwR094TlErczQ9BjsARg%3D%3D--108c1be9a4e23604bde585654cfee79143f53fb6",
    'cache-control': "no-cache"
    }
    r=requests.get(url.format(page),headers=headers)

    selector=etree.HTML(r.text)
    info_list=selector.xpath('//table[@id="ip_list"]//tr')
    # print(info_list)
    info_list=info_list[1:]
    for info in info_list:
    ip=''.join(info.xpath('./td[2]/text()'))
    port=''.join(info.xpath('./td[3]/text()'))
    protocol=''.join(info.xpath('./td[6]/text()'))
    ips.append(protocol+"://"+ip+":"+port)
    print(ips)

    #存储到txt文件
    def write_to_txt(lists):
    with open('ips.txt','w',encoding='utf-8') as f:
    f.write(" ".join(lists))


    if __name__=="__main__":
    for i in range(1,5):
    print("==================同步第{}页=====================".format(i))
    run(i)
    write_to_txt(ips)
    print(len(ips))

  • 相关阅读:
    进程和线程
    进程通信、同步与调度
    文件和文件系统
    【nexys3】【verilog】小设计——拆弹游戏
    Qt4开发环境搭建(Qt4.8.7+mingw4.8.2+Qt Creator4.2.0)
    GPL和LGPL
    【rpi】使用putty远程连接rpi(ssh)
    mysql 命令 小结
    安装mysql zip 安装包 Navicat连接
    python虚拟环境 virtualenv工具
  • 原文地址:https://www.cnblogs.com/winstonsias/p/11528021.html
Copyright © 2020-2023  润新知