• 抓取url中图片并保存到本地demo


    import requests
    from lxml import etree
    from furl import furl
    
    url = 'https://dsd.com'
    html = requests.get(url).text
    
    #re.findall('"objURL":"(.*?)",',html, re.S)
    element = etree.HTML(html)
    #//div/img/@src
    #li[contains(@title, '省')]
    #[@href and @lmv='电视剧']
    #[@href|@lmv]
    #item[@公司名称='" + strArray[0] + "' and @是否发过='0']
    #xpath('//div[contains(@class,"a") and contains(@class,"b")]')
    #//div[contains(concat(' ', @class, ' '), 'demo')]
    imgs = [img.xpath('./text()')
            for img in element.xpath('//div[@class="reader-container"]/div//img')]
    
    
    html = '''<div class="mod flow-ppt-mod">
    <div class="page-1 ppt-page-item  batch-50-1" id="pageNo-1">
    <div class="ppt-image-wrap ppt-16-9">
    <img src="https://sdsd.com?pn=1" alt="">
    </div>
    </div>
    <div class="page-2 ppt-page-item  batch-50-1" id="pageNo-2">
    <div class="ppt-image-wrap ppt-16-9">
    <img data-src="https://sdsd.com?pn=2">
    </div>
    </div>'''
    
    
    element = etree.HTML(html)
    #//div/img/@src
    #li[contains(@title, '省')]
    #[@href and @lmv='电视剧']
    #[@href|@lmv]
    #item[@公司名称='" + strArray[0] + "' and @是否发过='0']
    #xpath('//div[contains(@class,"a") and contains(@class,"b")]')
    #//div[contains(concat(' ', @class, ' '), 'demo')]
    
    urls = [url
            for img in element.xpath('//div//img') 
            for url in img.xpath('./@src') + img.xpath('./@data-src')]
    
    
    def download(url):
        try:
            pic = requests.get(url, timeout=5)
        except requests.exceptions.ConnectionError:
            print('图片无法下载')
        #保存图片路径
        #kv = dict([s.split('=') for s in urls[0].split('?')[1].split('&')])
        f = furl(url)
        path = r'C:\Users\Semi-Luy\Desktop\ppt' + '\\' + f.args['pn'] + '.jpg'
        fp = open(path, 'wb')
        fp.write(pic.content)
        fp.close()
    
    print("开始下载图片:\r\n")
    for url in urls:
        print(url)
        download(url)
    

      

  • 相关阅读:
    摊还分析
    web端手机方向传感器闲谈
    研一一年论文总结(下)
    Jupyter自定义设置详解
    HAProxy实现动静分离和负载均衡
    欧拉项目 323题
    mysql基本操作
    以后的IT路还很长(1)
    【翻译】在Ext JS集成第三方库
    吴裕雄--天生自然JAVA SPRING框架开发学习笔记:Spring IoC容器BeanFactory和ApplicationContext
  • 原文地址:https://www.cnblogs.com/iupoint/p/15624274.html
Copyright © 2020-2023  润新知