• 批量下载某网的美女图片


    #!/usr/bin/python3.6
    # -*- coding: utf-8 -*-
    
    import requests
    from lxml import etree
    import time
    import os
    
    sum_page = 0
    sum_images = 0
    
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0"}
    def download_page(url):
        # 返回响应内容
        try:
            r = requests.get(url, headers=headers, timeout=10)
            r.raise_for_status()
            return r.text
        except:
            print('页面访问失败,', url)
            return None
    
    def download_pic(imgUrl):
        # 返回响应对象
        r = requests.get(imgUrl,headers=headers, timeout=10)
        return r
    
    def mkdir(path):
        if not os.path.exists(path):
            os.mkdir(path)
        return path
    
    def get_content(html):
        selector = etree.HTML(html)
        # 第1页面
        pages = selector.xpath('//div[@class="archive-row"]//ul/li//a[@class="thumb-link"]/@href')
    
        for i in range(len(pages)):
            global sum_page
            sum_page += 1
            print('这是下载的第%s个页面,%s'% (sum_page, pages[i]))
            # 页面中的图片链接
            page_html = download_page(pages[i])
            page_selector = etree.HTML(page_html)
            img_lists = page_selector.xpath('//div[@class="entry-content"]//img/@src')
            # 图片保存目录
            pictures = mkdir(os.path.join(os.path.curdir, 'pictures'))
    
            # 图片url
            for j in range(len(img_lists)):
                # 图片名称
                file_name = os.path.basename(img_lists[j])
                img_name = os.path.join(pictures, file_name)
                response = download_pic(img_lists[j])
    
                with open(img_name, 'wb') as f:
                    global sum_images
                    sum_images += 1
                    res = requests.get(img_lists[j])
                    for chunk in response.iter_content(1024):
                        f.write(chunk)
    
    def get_content_pages(url, html):
        # 翻页获取图片
        selector = etree.HTML(html)
        with open('temp.html', 'w', encoding='utf-8') as f:
            f.write(html)
        # 首页号和最后一页面数
        start_page = 1
        end_page = selector.xpath('//div[@class="btn-group"]//button[last()]//text()')
        end_page = ''.join(end_page)
        if not end_page:
            end_page = '30'
        # 构建页面url
        while start_page <= int(end_page.strip()):
            page_url = url + '/page/' + str(start_page)
            html = download_page(page_url)
            if html:
                get_content(html)
            start_page += 1
    
    def main():
        url = 'https://www.jder.net/mx'
        html = download_page(url)
        get_content_pages(url, html)
        print('共下载图片数为:', sum_images)
    
    if __name__ == '__main__':
        main()
  • 相关阅读:
    软件工程第四次作业
    软件工程第三次作业
    图片
    软件工程第二次作业
    软件工程第一次作业
    我的大学生活-3-35-任延勇
    我的未来只有我知道
    cpu占用率高排查知识点
    LeetCode字符串题目
    hashmap
  • 原文地址:https://www.cnblogs.com/xiaojiaocx/p/15956482.html
Copyright © 2020-2023  润新知