#coding=gbk import requests from fake_useragent import UserAgent from lxml import etree import urllib import re import os pattern = 'https://(.+?).(.*).com' # url = 'https://wangxu.tuchong.com/23892889/' url = input("请输入图虫网图片地址:") headers = { 'User-Agent':UserAgent().chrome } response = requests.get(url,headers = headers) e = etree.HTML(response.text) img_path = '//article//img/@src' img_urls = e.xpath(img_path) # print(img_urls) num = 1 for img_url in img_urls: response = requests.get(img_url,headers = headers) name = re.search(pattern,url).group(1) if os.path.exists("图虫_{}".format(name)): pass else: os.mkdir('图虫_{}'.format(name)) urllib.request.urlretrieve(img_url, './图虫_{0}/图{1}.png'.format(name,num)) print("第{}张图片下载完毕".format(num)) num += 1
2020-07-15