• python 爬虫入门1 爬取代理服务器网址


    刚学,只会一点正则,还只能爬1页。。以后还会加入测试

     1 #coding:utf-8
     2 
     3 import urllib
     4 import urllib2
     5 import re
     6 
     7 #抓取代理服务器地址
     8 Key = 1
     9 url = 'http://www.xicidaili.com/nt/%s' %Key
    10 #print url
    11 
    12 user_agent='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
    13 headers={'User-Agent' :user_agent}
    14 
    15 try:
    16     request = urllib2.Request(url,headers=headers)
    17     response=urllib2.urlopen(request)
    18     html=response.read()
    19     pattern=re.compile('<td class="country".*?<td>(d+).(d+).(d+).(d+)</td>.*?<td>(d+)</td>',re.S)
    20     items=re.findall(pattern,html)
    21     for item in items:
    22     #    if item !='HTTP'or'HTTPS':
    23             print "%s.%s.%s.%s:%s" %(item[0],item[1],item[2],item[3],item[4])
    24 except urllib2.URLError,e:
    25     if hasattr(e,'code'):
    26         print e.code
    27     if hasattr(e,'reason'):
    28         print e.reason

    Output

    112.112.95.25:9999
    113.66.236.53:9797
    14.221.165.46:9797
    123.121.79.213:9000
    219.133.10.211:9797
    113.109.248.12:9797
    27.46.48.187:9797
    115.183.11.158:9999
    112.93.208.231:8080
    113.78.254.84:9000
    121.35.243.157:8080
    42.157.5.154:9999
    218.75.144.25:9000
    113.65.8.221:9999
    218.56.132.158:8080
    59.59.144.135:53281
    119.129.96.33:9797
    115.213.60.99:53281
    221.237.154.58:9797
    120.86.180.173:9797
    112.250.65.222:53281
    27.37.22.243:9000
    123.138.89.133:9999
    175.171.184.36:53281
    113.76.96.161:9797
    183.29.130.106:9000
    119.90.63.3:3128
    175.171.186.171:53281
    183.184.194.15:9797
    218.241.234.48:8080
    113.200.159.155:9999
    218.6.145.11:9797
    218.56.132.156:8080
    223.199.175.107:808
    14.221.166.140:9000
    220.249.185.178:9999
    122.72.18.34:80
    139.224.24.26:8888
    122.72.18.60:80
    61.163.139.168:9797
    202.120.46.180:443
    122.72.18.61:80
    125.45.87.12:9999
    116.85.24.26:8080
    222.86.191.44:8080
    112.74.94.142:3128
    61.163.139.168:9797
    114.255.212.17:808
    118.178.228.175:3128
    122.72.18.35:80
    101.37.79.125:3128
    113.89.52.86:9999
    113.118.96.132:9797
    101.81.142.10:9000
    61.155.164.106:3128
    114.115.140.25:3128
    171.37.176.140:9797
    58.252.6.165:9000
    61.163.39.70:9999
    121.8.170.53:9797
    175.174.118.141:8080
    118.119.168.172:9999
    171.37.143.140:9797
    119.39.68.212:808
    124.90.30.103:8118
    59.38.61.23:9797
    1.196.161.163:9999
    113.116.76.212:8088
    122.136.212.132:53281
    203.174.112.13:3128
    221.217.49.196:9000
    14.29.84.50:8080
    175.17.156.139:8080
    175.17.174.218:9000
    114.221.125.161:8118
    123.139.56.238:9999
    113.87.163.152:808
    101.6.33.113:8123
    61.155.164.112:3128
    180.140.161.138:9797
    221.7.49.209:53281
    120.9.75.45:9999
    183.184.112.78:9797
    116.236.151.166:8080
    119.122.2.160:9000
    119.129.96.142:9797
    116.52.195.113:9999
    61.155.164.109:3128
    112.86.248.163:8118
    115.171.47.184:9000
    116.30.218.76:9000
    123.7.38.31:9999
    218.29.111.106:9999
    114.101.35.113:54214
    124.89.33.75:9999
    114.254.4.208:9797
    183.54.192.211:9797
    218.17.8.110:8118
    183.30.201.123:9797
    119.123.244.95:9000
    
    ***Repl Closed***
  • 相关阅读:
    接口与实现分离
    C++的explicit关键字
    C++的类型转换
    使用catch做单元测试简介
    C++可调用对象与函数表
    在sublime中使用cppcheck
    你需要的代码静态检查
    构造析构与拷贝赋值那些事
    c++的关联容器入门(map and set)
    【致敬程序猿】
  • 原文地址:https://www.cnblogs.com/sub2020/p/7988111.html
Copyright © 2020-2023  润新知