1 import re 2 import urllib 3 import os 4 5 def rename(name): 6 name = name + '.jpg' 7 return name 8 9 def getHtml(url): 10 page = urllib.urlopen(url) 11 html = page.read() 12 return html 13 14 def getImg(html): 15 reg = r'src="(.+?.jpg)" pic_ext' 16 imgre = re.compile(reg) 17 imglist = re.findall(imgre,html) 18 19 20 os.chdir("E:\pic") 21 os.getcwd() 22 x=1 23 for imgurl in imglist: 24 img=urllib.urlopen(imgurl) 25 26 27 name=str(x) 28 name = rename(name) 29 print(name) 30 x=x+1 31 32 f=open(name,'wb') 33 f.write(img.read()) 34 f.close() 35 36 37 38 39 html = getHtml("http://tieba.baidu.com/p/3553148164") 40 getImg(html) 41 print 'pic save!'
爬取的网页是 http://tieba.baidu.com/p/3553148164
图片保存在E盘pic文件夹下
爬取结果如下: