• 宝马5系图片分类下载自动创建文件夹并保存


    import os
    import requests
    from lxml import etree
    from urllib import request
    
    
    headers = {
      'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
      'Accept-Language': 'en',
      'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'
    }
    
    def parse(url):
        """解析网页,提取数据封装为列表返回"""
        page_source = requests.get(url,headers=headers).text
        html = etree.HTML(page_source)
    
        uiboxs = html.xpath("//div[@class='uibox']")[1:]
        items = []
        for uibox in uiboxs:
            category = uibox.xpath("./div[@class='uibox-title']/a/text()")[0]
            img_urls = uibox.xpath(".//li//img/@src")
            img_urls = list(map(lambda url : "https:"+url, img_urls))#将map对象转换成list对象# for img_url in img_urls:#     img_url = "https"+img_url
            
            item = {'category':category, 'img_urls': img_urls}#将数据封装为字典加入列表并返回
            items.append(item)
        return items
    
    def pipeline(url):
        """自动创建分类文件夹保存图片"""
        abspath = os.path.dirname(__file__)#获取当前文件所在的父路径
        imgpath = os.path.join(abspath,"images")#拼接当前路径
        
        if not os.path.exists(imgpath):#不存在则创建
            os.mkdir(imgpath)
            
        for item in parse(url):
            category = item['category']
            img_urls = item['img_urls']
        
            category_path = os.path.join(imgpath, category)#分类列表不存在则创建
            if not os.path.exists(category_path):
                os.mkdir(category_path)
                
            for img_url in img_urls:
                img_name = img_url.split('_')[-1]
                savepath = os.path.join(category_path, img_name)
                request.urlretrieve(img_url, savepath)
                print(img_name, "下载完成")
    
    
    if __name__ == "__main__":
        url = "https://car.autohome.com.cn/pic/series/65.html#pvareaid=3454507"
        pipeline(url)
    
    
    
    
    
    
    
  • 相关阅读:
    MVC模式简介
    UEditor插入表格没有边框但有间距
    MVC准备前基础知识
    如何关闭ie9烦人的提示信息?
    javaScript中利用ActiveXObject来创建FileSystemObject操作文件
    win7下IIS安装与配置运行网站
    javascript函数
    减小SSN影响
    EMC (电磁兼容性)
    电源完整性设计
  • 原文地址:https://www.cnblogs.com/zxfei/p/12148817.html
Copyright © 2020-2023  润新知