#-*-coding:gbk-*- #code by anyun.org import urllib import re import time def getHtml(url): page = urllib.urlopen(url) html = page.read() html = html.replace(' ', '') html = html.replace(' ', ' ') html = html.replace(' ', '') html = html.replace(' ', '') # html = html.replace(' ','') return html def getcontext(html): reg = (r'<span class="Whwtdhalf w15-0">(.*?)</span>') listre = re.compile(reg) mylist = re.findall(listre, html) return mylist def getadd(html): reg = (r'<span class="Whwtdhalf w50-0">(.*?)</span>') listre = re.compile(reg) mylist = re.findall(listre, html) return mylist def geterr(html): reg = (r'<div class="col-red lh30 fz14 jspu">(.*?)</div>') listre = re.compile(reg) mylist = re.findall(listre, html) return mylist if __name__ == '__main__': f =open('list.txt','r') for i in f.readlines(): i=i.strip() try: Url='http://ip.chinaz.com/?ip=http://'+i except: print 'error' Html = getHtml(Url) # print (getcontext(Html)) if len(geterr(Html))==0: print getcontext(Html)[0],getcontext(Html)[3] ,getcontext(Html)[1],getcontext(Html)[4] ,getcontext(Html)[2],getcontext(Html)[5] ,getadd(Html)[0],getadd(Html)[1] f1 = open('ok.txt','a') print >>f1,getcontext(Html)[0],getcontext(Html)[3] ,getcontext(Html)[1],getcontext(Html)[4] ,getcontext(Html)[2],getcontext(Html)[5] ,getadd(Html)[0],getadd(Html)[1] f1.close() else: print i,'解析失败' f2=open('err.txt','a') print >>f2,i,'解析失败' f2.close() time.sleep(0.5) print 'over'