import requests from bs4 import BeautifulSoup from PIL import Image import os from io import BytesIO import time url = "http://www.yestone.com/gallery/1501754333627" headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"} r = requests.get(url, headers=headers) soup = BeautifulSoup(r.content, 'html.parser') items = soup.find('div',class_='images-list') folder_path = './photo' # html = requests.get('http://st2.cdn.yestone.com/thumbs/2704315/vector/11688/116887226/api_thumb_450.jpg') # html = requests.get('/_nuxt/img/4c1a746.jpg') # print(html.content) # image = Image.open(BytesIO(html.content)) # image.save(folder_path+"/aaa.jpg") # print(items.find_all('img')) # exit() if os.path.exists(folder_path) == False: os.makedirs(folder_path) for index, item in enumerate(items.find_all('img')): if item: html = requests.get(item.get('src')) img_name = str(index + 1) + '.jpg' image = Image.open(BytesIO(html.content)) image.save(folder_path+"/"+img_name) print('第%d张图片下载完成' % (index + 1)) time.sleep(1) # 自定义延时 print('抓取完成')
# html的解析库
pip install BeautifulSoup4
# 图片处理的库
pip install pillow
pip install requests
转 :https://blog.csdn.net/enter89/article/details/89555795?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-1.nonecase&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-1.nonecase