• 爬虫-- 初级


    普通同步代码 耗时

    import requests
    from functools import wraps
    import time
    
    def time_count(func):
        @wraps(func)
        def inner_func(*args,**kw):
            start = time.time()
            result = func(*args,**kw)
            end  = time.time()
            print('func {} cost {:.2f} s'.format(func.__name__,end-start))
            return result
        return inner_func
    
    
    @time_count
    def normal():
        for i in range(2):
            r = requests.get(URL)
            url = r.url
            print(url)
            
    if __name__ == '__main__':
        URL = 'https://morvanzhou.github.io'
        normal()
    

    异步 io (asyncio , aiohttp 等) 对于 IO 密集型 使用 异步 io 来处理,对于 计算密集型也就是依赖于 CPU 的,采用多进程

    import asyncio
    import aiohttp
    from functools import wraps
    import time
    
    def time_count(func):
        @wraps(func)
        def inner_func(*args,**kw):
            start = time.time()
            t = args[0]
            result = func(*args,**kw)
            end  = time.time()
            print('func {}-{} cost {:.2f} s'.format(func.__name__,t,end-start))
            return result
        return inner_func
    
    @time_count
    async def job(session):
        response = await session.get(URL)
        return str(response.url)
    
    # @time_count()
    async def main(loop):
        async with aiohttp.ClientSession() as session:
                tasks = [loop.create_task(job(session)) for _ in range(2)]
                finished, unfinished = await asyncio.wait(tasks)
                all_results = [r.result() for r in finished]
                print(all_results)
    
    if __name__ == '__main__':
        URL = 'https://morvanzhou.github.io'
        loop = asyncio.get_event_loop()
        loop.run_until_complete(main(loop))
    
    

    另一个 异步 举例

    import asyncio
    from functools import wraps
    import time
    
    def time_count(func):
        @wraps(func)
        def inner_func(*args,**kw):
            start = time.time()
            t = args[0]
            result = func(*args,**kw)
            end  = time.time()
            print('func {}-{} cost {:.2f} s'.format(func.__name__,t,end-start))
            return result
        return inner_func
    
    @time_count
    async def job(t):
    #     print('String job', t)
        await asyncio.sleep(t)
    #     print('Job', t , 'takes ', t, 's')
    
    @time_count
    async def main(loop):
        tasks = [loop.create_task(job(i)) for i in range(3)]
        await asyncio.wait(tasks)
        
    if __name__ == '__main__':
        loop = asyncio.get_event_loop()
        loop.run_until_complete(main(loop))
    

    同步举例

    from functools import wraps
    import time
    
    def time_count(func):
        @wraps(func)
        def inner_func(*args,**kw):
            start = time.time()
            result = func(*args,**kw)
            end  = time.time()
            print('func {} cost {:.2f} s'.format(func.__name__,end-start))
            return result
        return inner_func
    
    @time_count
    def job(t):
    #     print('String job', t)
        time.sleep(t)
    #     print('Job', t , 'takes ', t, 's')
    
    @time_count    
    def main():
        [job(i) for i in range(3)]
        
    
    if __name__ == '__main__':
        main()
        
        
    
  • 相关阅读:
    VB与SQL Server实现文件上传下载
    rszl数据表和crjsj数据表的关联查询
    网吧忘关QQ的后果
    走 近 WSH
    形容长得丑的30句经典句子
    关机VBS脚本
    C51单片机中断定义
    .NET架构的核心技术
    SQL SERVER的命令行工具Osql的用法
    七七情人节
  • 原文地址:https://www.cnblogs.com/Frank99/p/10397334.html
Copyright © 2020-2023  润新知