#!/usr/local/bin/python3.7 import re import urllib.request import urllib.parse import os import time """ @File : qiushibaike.py @Time : 2020/04/06 @Author : Mozili """ """ 爬取糗事百科中指定页码的图片 """ def handler_request(url): headers = { 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1 Safari/605.1.15' } # 创建请求 req = urllib.request.Request(url=url, headers=headers) # 发送请求 rep = urllib.request.urlopen(req) # 获取返回内容 cont = rep.read().decode() return cont def download_image(content): pattern = re.compile(r'<div class="thumb">.*?<img src="(.*?)" .*?">.*?</div>', re.S) ret = pattern.findall(content) # print(ret) # 生成图片url,下载图片到本地 for image_url in ret: image_url = 'https:' + image_url # 创建文件夹保存下载下来的文件 driname = 'Reptile/images' if not os.path.exists(driname): os.mkdir(driname) # 创建保存路径 image_name = image_url.split('/')[-1] image_path = driname + '/' + image_name # 直接将返回的内容保存 print('图片{}开始下载....'.format(image_name)) time.sleep(1) resp = urllib.request.urlretrieve(image_url, image_path) def main(): # 提示输入爬取第几页到第几页的图片 start_page = int(input('请输入起始页码:')) end_page = int(input('请输入结束页码:')) for i in range(start_page, end_page + 1): url = 'https://www.qiushibaike.com/imgrank/page/' + str(i) + '/' # 生成一个请求 content = handler_request(url) # 从返回内容中获取图片链接,下载图片 download_image(content) print('第%s页下载结束...'%i) time.sleep(1) print() print() if __name__ == "__main__": main()