• aiohttp 异步http请求3.异步批量下载图片 上海


    前言

    当我们需要批量下载图片的时候,requests 库会比较慢,如果一个个下载,出现阻塞的时候,后面的都会阻塞卡住,假死状态。当然你用多线程也能提高效率。
    这里介绍用aiohttp 异步批量下载图片

    异步批量下载图片

    话不多说,直接看代码

    import aiohttp
    import asyncio
    from pathlib import Path
    
    
    async def down_img(session, url):
        """下载图片"""
        name = url.split('/')[-1]  # 获得图片名字
        img = await session.get(url)
        # 触发到await就切换,等待get到数据
        content = await img.read()
        # 读取内容
        with open('./down_img/'+str(name), 'wb') as f:
            # 写入至文件
            f.write(content)
            print(f'{name} 下载完成!')
        return str(url)
    
    
    async def main(URL):
        # 建立会话session
        conn = aiohttp.TCPConnector(ssl=False)  # 防止ssl报错
        async with aiohttp.ClientSession(connector=conn) as session:
            # 建立所有任务
            tasks = [asyncio.create_task(down_img(session, img_url)) for img_url in URL]
            # 触发await,等待任务完成
            done, pending = await asyncio.wait(tasks)
            all_results = [done_task.result() for done_task in done]
            # 获取所有结果
            print("ALL RESULT:"+str(all_results))
    
    
    URL = [
        'https://cdn.pixabay.com/photo/2014/10/07/13/48/mountain-477832_960_720.jpg',
        'https://cdn.pixabay.com/photo/2013/07/18/10/56/railroad-163518_960_720.jpg',
        'https://cdn.pixabay.com/photo/2018/03/12/20/07/maldives-3220702_960_720.jpg',
        'https://cdn.pixabay.com/photo/2017/08/04/17/56/dolomites-2580866_960_720.jpg',
        'https://cdn.pixabay.com/photo/2016/06/20/03/15/pier-1467984_960_720.jpg',
        'https://cdn.pixabay.com/photo/2014/07/30/02/00/iceberg-404966_960_720.jpg',
        'https://cdn.pixabay.com/photo/2014/11/02/10/41/plane-513641_960_720.jpg',
        'https://cdn.pixabay.com/photo/2015/10/30/20/13/sea-1014710_960_720.jpg'
    ]
    
    fp = Path('./down_img')
    if not fp.exists():
        fp.mkdir()
    
    
    loop = asyncio.get_event_loop()
    loop.run_until_complete(main(URL))
    

    运行结果

    mountain-477832_960_720.jpg 下载完成!
    railroad-163518_960_720.jpg 下载完成!
    maldives-3220702_960_720.jpg 下载完成!
    dolomites-2580866_960_720.jpg 下载完成!
    pier-1467984_960_720.jpg 下载完成!
    plane-513641_960_720.jpg 下载完成!
    iceberg-404966_960_720.jpg 下载完成!
    sea-1014710_960_720.jpg 下载完成!
    ALL RESULT:['https://cdn.pixabay.com/photo/2014/07/30/02/00/iceberg-404966_960_720.jpg', 'https://cdn.pixabay.com/photo/2018/03/12/20/07/maldives-3220702_960_720.jpg', 'https://cdn.pixabay.com/photo/2014/10/07/13/48/mountain-477832_960_720.jpg', 'https://cdn.pixabay.com/photo/2014/11/02/10/41/plane-513641_960_720.jpg', 'https://cdn.pixabay.com/photo/2017/08/04/17/56/dolomites-2580866_960_720.jpg', 'https://cdn.pixabay.com/photo/2013/07/18/10/56/railroad-163518_960_720.jpg', 'https://cdn.pixabay.com/photo/2015/10/30/20/13/sea-1014710_960_720.jpg', 'https://cdn.pixabay.com/photo/2016/06/20/03/15/pier-1467984_960_720.jpg']
    

    Semaphore控制并发

    上面的代码是把8个url一起加入到并发任务,当url数量很多的时候,我们希望可以控制并发量,于是可以用到Semaphore控制并发。

    semaphore = asyncio.Semaphore(2)  # 限制并发量为2
    

    优化后的代码

    import aiohttp
    import asyncio
    from pathlib import Path
    
    
    async def down_img(session, url, semaphore):
        """下载图片"""
        async with semaphore:
            name = url.split('/')[-1]  # 获得图片名字
            img = await session.get(url)
            # 触发到await就切换,等待get到数据
            content = await img.read()
            # 读取内容
            with open('./down_img/'+str(name), 'wb') as f:
                # 写入至文件
                f.write(content)
                print(f'{name} 下载完成!')
            return str(url)
    
    
    async def main(URL):
        semaphore = asyncio.Semaphore(2)  # 限制并发量为2
        # 建立会话session
        conn = aiohttp.TCPConnector(ssl=False)  # 防止ssl报错
        async with aiohttp.ClientSession(connector=conn) as session:
            # 建立所有任务
            tasks = [asyncio.create_task(down_img(session, img_url, semaphore)) for img_url in URL]
            # 触发await,等待任务完成
            done, pending = await asyncio.wait(tasks)
            all_results = [done_task.result() for done_task in done]
            # 获取所有结果
            print("ALL RESULT:"+str(all_results))
    
    
    URL = [
        'https://cdn.pixabay.com/photo/2014/10/07/13/48/mountain-477832_960_720.jpg',
        'https://cdn.pixabay.com/photo/2013/07/18/10/56/railroad-163518_960_720.jpg',
        'https://cdn.pixabay.com/photo/2018/03/12/20/07/maldives-3220702_960_720.jpg',
        'https://cdn.pixabay.com/photo/2017/08/04/17/56/dolomites-2580866_960_720.jpg',
        'https://cdn.pixabay.com/photo/2016/06/20/03/15/pier-1467984_960_720.jpg',
        'https://cdn.pixabay.com/photo/2014/07/30/02/00/iceberg-404966_960_720.jpg',
        'https://cdn.pixabay.com/photo/2014/11/02/10/41/plane-513641_960_720.jpg',
        'https://cdn.pixabay.com/photo/2015/10/30/20/13/sea-1014710_960_720.jpg'
    ]
    
    fp = Path('./down_img')
    if not fp.exists():
        fp.mkdir()
    
    loop = asyncio.get_event_loop()
    loop.run_until_complete(main(URL))
    
    

    使用TCPConnector控制并发

    TCPConnector 使用 limit 参数控制并发数

        conn = aiohttp.TCPConnector(limit=2, ssl=False)
        # 建立会话session
        async with aiohttp.ClientSession(connector=conn) as session:
    

    完成代码如下

    import aiohttp
    import asyncio
    from pathlib import Path
    
    
    async def down_img(session, url):
        """下载图片"""
        name = url.split('/')[-1]  # 获得图片名字
        img = await session.get(url)
        # 触发到await就切换,等待get到数据
        content = await img.read()
        # 读取内容
        with open('./down_img/'+str(name), 'wb') as f:
            # 写入至文件
            f.write(content)
            print(f'{name} 下载完成!')
        return str(url)
    
    
    async def main(URL):
        conn = aiohttp.TCPConnector(limit=2, ssl=False)
        # 建立会话session
        async with aiohttp.ClientSession(connector=conn) as session:
            # 建立所有任务
            tasks = [asyncio.create_task(down_img(session, img_url)) for img_url in URL]
            # 触发await,等待任务完成
            done, pending = await asyncio.wait(tasks)
            all_results = [done_task.result() for done_task in done]
            # 获取所有结果
            print("ALL RESULT:"+str(all_results))
    
    
    URL = [
        'https://cdn.pixabay.com/photo/2014/10/07/13/48/mountain-477832_960_720.jpg',
        'https://cdn.pixabay.com/photo/2013/07/18/10/56/railroad-163518_960_720.jpg',
        'https://cdn.pixabay.com/photo/2018/03/12/20/07/maldives-3220702_960_720.jpg',
        'https://cdn.pixabay.com/photo/2017/08/04/17/56/dolomites-2580866_960_720.jpg',
        'https://cdn.pixabay.com/photo/2016/06/20/03/15/pier-1467984_960_720.jpg',
        'https://cdn.pixabay.com/photo/2014/07/30/02/00/iceberg-404966_960_720.jpg',
        'https://cdn.pixabay.com/photo/2014/11/02/10/41/plane-513641_960_720.jpg',
        'https://cdn.pixabay.com/photo/2015/10/30/20/13/sea-1014710_960_720.jpg'
    ]
    
    fp = Path('./down_img')
    if not fp.exists():
        fp.mkdir()
    
    loop = asyncio.get_event_loop()
    loop.run_until_complete(main(URL))
    
    
  • 相关阅读:
    Delphi 农历算法
    Installing Custom Maps for L4D
    双网卡,上网走外网网卡,内网走内网网卡设置
    L4D的指令合集
    两个RGB的颜色半透明混色算法
    中国省级行政区划变动情况
    Win7编程:在按钮中加入管理员权限运行
    教你快速识别手机质量的好坏
    如何利用预编译指令来判断Delphi 的版本?
    在.NET中读写INI文件 ——兼谈正则表达式的应用
  • 原文地址:https://www.cnblogs.com/yoyoketang/p/16170421.html
Copyright © 2020-2023  润新知