1 # -*- coding:utf-8 -*- 2 #author : willowj 3 import urllib 4 import urllib2 5 from bs4 import BeautifulSoup 6 import re 7 import bs4 8 9 import sys 10 11 12 reload(sys) 13 sys.setdefaultencoding('utf8') 14 15 16 def ip_test(ip,url= "https://www.baidu.com"): 17 #test ip if can be used 18 #url = "http://ip.chinaz.com/getip.aspx"# 默认测试网址 19 ip1="http://"+ip 20 try : 21 res = urllib.urlopen(url,proxies={'http:':ip1}).read() #尝试代理访问 22 print 'ok',ip1 #,res 23 return True 24 except Exception,e: 25 print "failed" 26 return False 27 28 29 def get_iphtml_inyoudaili(): 30 url='http://www.youdaili.net' 31 html=urllib2.urlopen(url) 32 code=html.read() 33 #href="http://www.youdaili.net/Daili/http/26672.html" title="12月27号 最新代理http服务器ip地址" 34 regexp='href="(.*?)" .*?最新代理http服务器ip地址' 35 pat=re.compile(regexp) 36 met=re.findall(pat,code) 37 print met[0] 38 #最新代理http服务器ip地址 html 39 return met[0] 40 41 42 def getIps(url): 43 #getip from website, test,and return,save aviable ips in 'ips.txt' 44 htmlip=urllib2.urlopen(url) 45 codeip=htmlip.read() 46 47 regexpip='([1-9][0-9]{0,2}.S*?)@HTTP#' #IP样式 48 pat_ip=re.compile(regexpip) 49 50 met_ip=re.findall(pat_ip,codeip) 51 52 ips=[] 53 file_open=open('ips.txt','w') 54 for x in met_ip: 55 print x 56 if ip_test(x): 57 ips.append(x) 58 file_open.write(x+' ') 59 file_open.close() 60 #print ips,'youdaili' 61 return ips 62 63 64 def saveIps(list): 65 file_open=open('ips.txt','w') 66 for ip in list: 67 file_open.write(ip+' ') 68 file_open.close() 69 70 71 def read_ips(file='ips.txt'): 72 '''读取IP 以list返回''' 73 file_open=open(file) 74 lines=file_open.readlines() 75 ips=[] 76 for line in lines: 77 ip=line.strip(" ") 78 ips.append(ip) 79 print ips 80 return ips 81 82 83 if __name__=="__main__": 84 85 ips = getIps(get_iphtml_inyoudaili()) 86 87 saveIps(ips)