1 import os 2 import io 3 import sys 4 import re 5 import urllib.request 6 7 sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb2312') 8 9 """ 10 headers = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6', 11 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 12 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 13 'Accept-Encoding': 'none', 14 'Accept-Language': 'en-US,en;q=0.8', 15 'Connection': 'keep-alive'} 16 """ 17 18 headers = { 19 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36', 20 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 21 'Accept-Encoding': 'gzip, deflate', 22 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4,en-GB;q=0.2' 23 } 24 """ 25 def get_image(url): 26 request = urllib.request.Request(url, headers=headers) 27 # params = urllib.urlencode(post_params) 28 responseurl = urllib.request.urlopen(request) 29 get_img = responseurl.read() 30 with open('001.jpg', 'wb') as fp: 31 fp.write(get_img) 32 print('图片下载完成') 33 return 34 35 url = 'http://image.tianjimedia.com/uploadImages/2016/009/27/FW632S21L801.jpg' 36 get_image(url) 37 38 """ 39 # headers = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'} 40 41 42 def download_page(url): 43 # request = urllib.request.Request(url) 44 request = urllib.request.Request(url, headers=headers) 45 responseurl = urllib.request.urlopen(url) 46 data = responseurl.read() 47 # data = data.decode('gbk') 48 return data 49 50 51 def get_image(html): 52 regx = r'http://[S]*.jpg' 53 pattern = re.compile(regx) 54 get_img = re.findall(pattern, repr(html)) 55 num = 1 56 for img in get_img: 57 image = download_page(img) 58 with open('%s.jpg' % num, 'wb') as fp: 59 fp.write(image) 60 num += 1 61 # fp.close() 62 print(u'正在下载第%s张图片' % num) 63 return 64 65 url = 'http://pic.yesky.com/180/99839180_2.shtml' 66 html = download_page(url) 67 get_image(html)