python-爬某页面图
注意:python3+版本与python2有一定区别,需要注意多点。
1 #! /usr/bin/env python3.5.4 2 # coding=utf-8 3 # 爬百度某贴吧页面图 4 5 import urllib.request 6 import re 7 8 9 def get_html(url): 10 page = urllib.request.urlopen(url) 11 htmla = page.read() 12 return htmla 13 14 15 def get_img(htmlb): 16 reg = r'src="(.+?.jpg)" pic_ext' 17 imgre = re.compile(reg) 18 htmlb = htmlb.decode('utf-8') # python3.5特性,必须添加 19 imglist = re.findall(imgre, htmlb) 20 x = 0 21 for imgurl in imglist: 22 urllib.request.urlretrieve(imgurl, '%s.jpg' % x) 23 x += 1 24 25 htmlb = get_html("http://tieba.baidu.com/p/2460150866") 26 27 # 结尾要加换行符 28 print(get_img(htmlb))