• 抓取代理IP,然后保存成txt


    #!/usr/bin/env python
    # coding=utf-8
    #针对 www.xicidaili.com
    import re
    import random
    import sys
    import time
    import datetime
    import threading
    from random import choice
    import requests
    import bs4
    import string


    file=open('data.txt','a')  
    def get_ip(str1):
        """获取代理IP"""
        url = "http://www.xicidaili.com/wt/"+str1
        headers = { "Accept":"text/html,application/xhtml+xml,application/xml;",
                    "Accept-Encoding":"gzip, deflate, sdch",
                    "Accept-Language":"zh-CN,zh;q=0.8,en;q=0.6",
                    "Referer":"http://www.xicidaili.com",
                    "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36"
                    }
        r = requests.get(url,headers=headers)
        soup = bs4.BeautifulSoup(r.text, 'html.parser')
        data = soup.table.find_all("td")
        ip_compile= re.compile(r'<td>(d+.d+.d+.d+)</td>')    # 匹配IP
        port_compile = re.compile(r'<td>(d+)</td>')                # 匹配端口
        ip = re.findall(ip_compile,str(data))       # 获取所有IP
        port = re.findall(port_compile,str(data))   # 获取所有端口
        list = [":".join(i) for i in zip(ip,port)]  
        print list  
        for i in list:
            file.write(str(i)+' ')

    def main():
        count = 1
        while (count < 2000):    
            str1 = str(count)
            get_ip(str1)   
            count += 1
            time.sleep(0.1)
        file.close()    
    if __name__ == '__main__':
        main()

  • 相关阅读:
    c++ 优化的动态数组 Vector
    C++ 重载赋值运算符
    k8s中引入外部服务
    MySQL----mysql_secure_installation 安全配置向导
    elk参考连接
    限制不同的用户操作k8s的资源
    tcpdump 抓包命令使用教程
    日志管理——rsyslog、logrotate
    lsyncd配置文件详细说明
    Systemd 服务配置文件(转载)
  • 原文地址:https://www.cnblogs.com/wj2ge/p/7009849.html
Copyright © 2020-2023  润新知