import re
import urllib2
def get_html():
html = urllib2.urlopen("http://uland.taobao.com/sem/tbsearch").read()
r = r'target="_blank">¥ <strong>(.*)</strong>'
reg = re.compile(r)
text = re.findall(reg,html)
return text
def get_html1():
html = urllib2.urlopen("http://uland.taobao.com/sem/tbsearch").read()
r = r' target="_blank" class="title" title="(.*)">(.*)</a>'
reg = re.compile(r)
text = re.findall(reg,html)
return text
dict = {}
price = get_html()
title = get_html1()
list = []
for k in title:
list.append(k[-1])
for i in range(len(price)):
for j in range(len(list)):
if i==j:
dict[price[i]]=list[j]
fd=open('aaa.txt','a')
for k,v in dict.items():
fd.write(k+' '+v+'
')
fd.close()
# - *- coding:utf-8 -*-
import urllib2
import re
def geturl():
html=urllib2.urlopen("http://uland.taobao.com/sem/tbsearch").read()
reg=re.compile(r' target="_blank" class="title" title="(.*?)">.*?</a>')
title=re.findall(reg,html)
return title
def geturl1():
html=urllib2.urlopen("http://uland.taobao.com/sem/tbsearch").read()
reg=re.compile(r' <strong>(.*?)</strong>')
title1=re.findall(reg,html)
return title1
fd=open('baobiao.txt','wb')
for(i,j) in zip(geturl(),geturl1()):
fd.write(i+' '+ j + '
')
fd.close()