免费代理IP地址列表

import time
import requests
import random
from bs4 import BeautifulSoup

IP_POOL = []
def get_max_proxy():
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36",
        "Host": "www.kuaidaili.com",
        "origin": "https://www.kuaidaili.com"
    }
    html = requests.get(url="https://www.kuaidaili.com/free/inha/1/", headers=headers).text
    soup = BeautifulSoup(html,"lxml")
    div = soup.find_all(name="div",attrs={"id":"listnav"})
    max = 1
    for d in div:
        for num in d.find_all(name="a"):
            if int(num.text) > max:max = int(num.text)
    return max

def get_proxy_list(max):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36",
        "Host": "www.kuaidaili.com",
        "origin": "https://www.kuaidaili.com"
    }
    for i in range(1,max):
        if i>3:return IP_POOL#调试限制使用3个
        time.sleep(0.2*random.randint(1,5))
        html = requests.get(url="https://www.kuaidaili.com/free/inha/{}/".format(i), headers=headers).text
        soup = BeautifulSoup(html,"lxml")
        tbody =soup.find_all(name="tbody")
        for tb in tbody:
            for t in tb.find_all(name="td",attrs={"data-title":"IP"}):
                IP_POOL.append(t.text)
    return IP_POOL

max=get_max_proxy()
IP_POOL=get_proxy_list(max)
print(len(IP_POOL))
print(IP_POOL)

本文来自博客园，作者：topass123，转载请注明原文链接：https://www.cnblogs.com/topass123/p/15531608.html

相关阅读:
jsp初识
OAuth2.0
微服务参考案例
3.0技术架构落地
聚合层改进意见-彭泉锋-2018.10.31
图片分步加载（解决图片load函数刷新不加载问题）
IOS iframe宽高问题（来至stackoverflow）
jquery easy-ui 分页插件的运用(给td添加事件,获取汇总内容)
JS 获取当前日期(yy-mm-dd HH-MM-SS)
JQ iframe 子元素找父级的元素

原文地址：https://www.cnblogs.com/topass123/p/15531608.html