import requests from lxml import etree from furl import furl url = 'https://dsd.com' html = requests.get(url).text #re.findall('"objURL":"(.*?)",',html, re.S) element = etree.HTML(html) #//div/img/@src #li[contains(@title, '省')] #[@href and @lmv='电视剧'] #[@href|@lmv] #item[@公司名称='" + strArray[0] + "' and @是否发过='0'] #xpath('//div[contains(@class,"a") and contains(@class,"b")]') #//div[contains(concat(' ', @class, ' '), 'demo')] imgs = [img.xpath('./text()') for img in element.xpath('//div[@class="reader-container"]/div//img')] html = '''<div class="mod flow-ppt-mod"> <div class="page-1 ppt-page-item batch-50-1" id="pageNo-1"> <div class="ppt-image-wrap ppt-16-9"> <img src="https://sdsd.com?pn=1" alt=""> </div> </div> <div class="page-2 ppt-page-item batch-50-1" id="pageNo-2"> <div class="ppt-image-wrap ppt-16-9"> <img data-src="https://sdsd.com?pn=2"> </div> </div>''' element = etree.HTML(html) #//div/img/@src #li[contains(@title, '省')] #[@href and @lmv='电视剧'] #[@href|@lmv] #item[@公司名称='" + strArray[0] + "' and @是否发过='0'] #xpath('//div[contains(@class,"a") and contains(@class,"b")]') #//div[contains(concat(' ', @class, ' '), 'demo')] urls = [url for img in element.xpath('//div//img') for url in img.xpath('./@src') + img.xpath('./@data-src')] def download(url): try: pic = requests.get(url, timeout=5) except requests.exceptions.ConnectionError: print('图片无法下载') #保存图片路径 #kv = dict([s.split('=') for s in urls[0].split('?')[1].split('&')]) f = furl(url) path = r'C:\Users\Semi-Luy\Desktop\ppt' + '\\' + f.args['pn'] + '.jpg' fp = open(path, 'wb') fp.write(pic.content) fp.close() print("开始下载图片:\r\n") for url in urls: print(url) download(url)