• requests_html使用asyncio


    import asyncio
    import functools
    from concurrent.futures.thread import ThreadPoolExecutor
    from requests_html import HTMLSession
    import sys
    session = HTMLSession()
    
    
    async def get_response(executor, *, url, loop: asyncio.AbstractEventLoop = None, ):
        if not loop:
            loop = asyncio.get_running_loop()
        request = functools.partial(session.get, url)
        return loop.run_in_executor(executor, request)
    
    
    async def bulk_requests(executor, *,
                            urls,
                            loop: asyncio.AbstractEventLoop = None, ):
        for url in urls:
            yield await get_response(executor, url=url, loop=loop)
    
    
    def filter_unsuccesful_requests(responses_and_exceptions):
        return filter(
            lambda url_and_response: not isinstance(url_and_response[1], Exception),
            responses_and_exceptions.items()
        )
    
    
    async def main():
        executor = ThreadPoolExecutor(10)
        urls = [
            "https://baidu.com",
            "https://cnblogs.com",
            "https://163.com",
        ]
        requests = [request async for request in bulk_requests(executor, urls=urls, )]
        responses_and_exceptions = dict(zip(urls, await asyncio.gather(*requests, return_exceptions=True)))
        responses = {url: resp.html for (url, resp) in filter_unsuccesful_requests(responses_and_exceptions)}
    
        for res in responses.items():
            print(res[1].xpath("//head//title//text()")[0])
    
        for url in urls:
            if url not in responses:
                print(f"No successful request could be made to {url}. Reason: {responses_and_exceptions[url]}",
                      file=sys.stderr)
    
    
    asyncio.run(main())
    
    
  • 相关阅读:
    第十六节,基本数据类型,字典dict
    第十五节,基本数据类型,元组tuple
    第十四节,基本数据类型,列表list
    liunx rm 命令修改
    linux 创建用户和密码
    linux 权限
    system
    一个tomcat上部署多个项目,并通过不同端口号访问不同的项目
    Java 清理和垃圾回收
    static 方法
  • 原文地址:https://www.cnblogs.com/c-x-a/p/11028356.html
Copyright © 2020-2023  润新知