• 协程


    协程

    1.1 我的理解

    在函数之间切换,有些代码需要io,就要立马切换运行其他函数,协程最重要的就是事件循环,去查看哪些io好了,好了就执行,没好就去执行其他的函数,遇到io的就来回切。单线程内函数切换不耗资源。
    

    1.2 asyncio

    1.包含各种特定系统实现的模块化事件循环
    2.传输层和协议抽象  TCP、UDP等
    3.模仿futures模块但适用于事件循环使用的Future类
    4.同步代码编写并发代码
    5.当必须使用一个将产生阻塞IO对的调用时,有接口可以把这个给事件提交给线程池。
    asyncio 是解决异步的一整套方案
    tornado(实现了web服务器)、gevent、twisted、sanic、
    

    1.2.1 asyncio 简单使用

    import asyncio
    import time
    # 事件循环+回调(驱动生成器、协程)+ epoll
    async  def  test():
        print('start get url')
        #time.sleep(2)
        await  asyncio.sleep(2)
        print('end get url')
        
    start_time  = time.time()
    loop = asyncio.get_event_loop()
    tasks = [test() for i in range(10)]
    loop.run_until_complete(asyncio.wait(tasks))
    print(time.time()-start_time)
    
    done,pending = await asyncio.wait(task_list) # done 是已经完成的,pending是没有完成的,wait可以设置timeout,在timeout还没有执行完的就会放到pending中
    

    1.2.2 获取协程返回值

    # v1
    async  def  test():
        print('start get url')
        #time.sleep(2)
        await  asyncio.sleep(2)
        print('end get url')
        return 1
    loop = asyncio.get_event_loop()
    future = asyncio.ensure_future(test())
    loop.run_until_complete(future)
    print(future.result())
    # v2 
    async  def  test():
        print('start get url')
        #time.sleep(2)
        await  asyncio.sleep(2)
        print('end get url')
        return 1
    loop = asyncio.get_event_loop()
    # future = asyncio.ensure_future(test())
    task = loop.create_task(test())
    loop.run_until_complete(task)
    
    print(task.result())
    

    1.2.3 callback 回调

    from functools import partial # 偏函数
    
    async  def  test():
        print('start get url')
        #time.sleep(2)
        await  asyncio.sleep(2)
        print('end get url')
        return 1
    def callback(name,future):
        print(name,future.result())
    loop = asyncio.get_event_loop()
    # future = asyncio.ensure_future(test())
    task = loop.create_task(test())
    task.add_done_callback(partial(callback,"lqw"))
    loop.run_until_complete(task,)
    

    1.2.4 wait 和 gather

    1.2.4.1 wait
    async  def  test(time):
        print('start get url')
        #time.sleep(2)
        await  asyncio.sleep(time)
        print('end get url')
        return 1
    def callback(name,future):
        print(name,future.result())
    import time
    import concurrent
    start_time = time.time()
    loop = asyncio.get_event_loop()
    tasks = [ test(i) for i in range(1,3)]
    loop.run_until_complete(asyncio.wait(tasks,return_when=concurrent.futures.FIRST_COMPLETED))
    print(time.time() -start_time)  # 1.几秒。
    
    1.2.4.2 gather
    async  def  test(time):
        print('start get url')
        #time.sleep(2)
        await  asyncio.sleep(time)
        print('end get url')
        return 1
    def callback(name,future):
        print(name,future.result())
    import time
    import concurrent
    start_time = time.time()
    loop = asyncio.get_event_loop()
    tasks = [ test(i) for i in range(1,3)]
    # task.add_done_callback(partial(callback,"lqw"))
    loop.run_until_complete(asyncio.gather(*tasks)) # 解析
    print(time.time() -start_time)
    
    gather 更加高层。可以分组
    group1 = [task1,task2]
    group2 = [task3,task4]
    group1 = asyncio.gather(*group1)
    group2 = asyncio.gather(*group2)
    loop.run_until_complete(asyncio.gather(group1,group2))
    
    1.2.4.3 取消 task(future)
    async  def get_html(time):
        print('get ')
        await asyncio.sleep(time)
        print(' end')
    task1 = get_html(2)
    task2 = get_html(3)
    task3 = get_html(3)
    tasks = [task1,task2,task3]
    loop = asyncio.get_event_loop()
    try:
    	loop.run_until_complete(asyncio.wait(tasks))
    except:
        all_tasks = asyncio.Task.all_tasks()
    	for task in all_tasks:
        	task.cancel()
    	loop.stop()
    	loop.run_forever() # 一定要
    finally:
        loop.close()
    

    1.2.5 call_soon、call_later、call_at、call_soon_threadsafe

    def callback(sleep_times):
        print('shijian %s'%sleep_times)
    def stop(loop):
        loop.stop()
    loop = asyncio.get_event_loop()
    time = loop.time()
    loop.call_at(time+1,callback,2)  # 以loop.time()  为准
    loop.call_soon(callback,2)
    loop.call_soon(stop,loop)
    loop.call_later(2,callback,1)  # 延迟后执行
    loop.call_soon_threadsafe() # 线程安全的。
    loop.run_forever()
    

    1.2.6 结合 线程池

    from concurrent.futures import ThreadPoolExecutor
    # 运用多线程
    def get_url(s):
        time.sleep(s)
        print('end',s)
        return  111
    if __name__ == '__main__':
        loop = asyncio.get_event_loop()
        executor = ThreadPoolExecutor()
        tasks = [loop.run_in_executor(executor,get_url,i) for i in range(1,3)]
        loop.run_until_complete(asyncio.wait(tasks))
    

    1.2.7 asyncio 模拟 http请求,(as_completed的使用)

    from urllib.parse import urlparse
    async  def get_url(url):
        url_obj = urlparse(url)
        host = url_obj.netloc
        address = url_obj.path if url_obj.path else '/'
        reader,writer  =  await  asyncio.open_connection(host,80)
        writer.write(f"GET {address} HTTP/1.1\r\nHost:{host}\r\nConnection:close\r\n\r\n".encode('utf8'))
        content_list = []
        async  for content in reader:   # 实现了 anext:
            data = content.decode('utf8')
            content_list.append(data)
        # data = await reader.read()  # 也可以用这个。
        # print(data.decode('utf8'))
        return '\n'.join(content_list)
        # pprint.pprint(data.split('\r\n\r\n')[1])
    async def main(loop):
        tasks = [asyncio.ensure_future(get_url('http://www.baidu.com')) for i in range(10)]
        for tasks in asyncio.as_completed(tasks):
            result =  await  tasks
            print(result)
    loop = asyncio.get_event_loop()
    loop.run_until_complete(main(loop)) 
    

    1.2.8 task 和 future

    将 task 和 线程 池的future统一  task是future的子类。 线程不需要 send(None),协程必须要启动,抽象出了task方法,task 是 future的桥梁。设计角度,解决协程和线程不一样之间的地方。
    

    1.2.9 协程锁

    import asyncio
    import aiohttp
    cache = {}
    lock = asyncio.Lock()
    queue = asyncio.Queue()  # 单线程也可以达到消息通信的机制
    async  def get_stuff(url):
        async  with  lock:   
            if url in cache:
                return cache[url]
            stuff = await aiohttp.request('get',url)
            cache[url] = stuff
            return stuff
    async def test1():
         res = await  get_stuff('dada')
    async def test2():
        res1 = await  get_stuff('dada')
    

    1.3.0 异步爬虫

    import aiohttp
    import asyncio
    import re
    import aiomysql
    
    # 爬虫,去重,入库
    
    from pyquery import PyQuery
    waiting_urls = []
    seen_urls = set()
    stopping = False
    async  def fetch(url,session):
            async with session.get(url) as response:
                if response.status in [200,201]:
                    html = await response.text()
                    return html
    
    def parse_contene(html):
        urls  = []
        pq  = PyQuery(html)
        for link in pq.items('a'):
            url = link.attr('href')
            print(url)
            if url and url.startswith('http') and url not in seen_urls:
                urls.append(url)
                waiting_urls.append(url)
        return  urls
    
    async  def consumer(pool):
        async with aiohttp.ClientSession() as session:
            while not stopping:
                if len(waiting_urls) == 0:
                    await  asyncio.sleep(0.5)
                    continue
                url = waiting_urls.pop()
                print('pop 出的',url)
                if re.match('http://.*?jobbole.com.*?\d+.html',url):
                    if url not in seen_urls:
                        # 提交一个协程,解析一个
                        asyncio.ensure_future(article_handle(url,session,pool))
                else:
                    asyncio.ensure_future(init_urls(url,session))
    async  def article_handle(url,session,pool):
            # 获取详情
            html = await  fetch(url,session)
            seen_urls.add(url)
            parse_contene(html)
            pq = PyQuery(html)
            title = pq('title').text()
            print('标题',title)
            # async with  pool.acquire() as conn:
            #     async with conn.cursor() as cur:
            #         # 插入。
            #         await cur.execute('')
    
    
    
    async  def init_urls(start_url,session):
        html = await  fetch(start_url,session)
        parse_contene(html)
    
    
    async  def main(loop):
        # 等待mysql连接好
        start_url = 'http://www.jobbole.com/'
        #pool = await  aiomysql.connect(host='127.0.0.1', port=3306,
                                           # user='root', password='', db='mysql',
                                           # loop=loop,charset='utf8',autocommit=True)
        pool = 'xx'
        async with aiohttp.ClientSession() as session:
            html = await fetch(start_url,session)
            seen_urls.add(start_url)
            parse_contene(html)
    
        asyncio.ensure_future(consumer(pool))
    
    if __name__ == '__main__':
        loop = asyncio.get_event_loop()
        asyncio.ensure_future(main(loop))
        loop.run_forever()
    

    1.4 set_result 的使用

    import asyncio
    loop = asyncio.get_event_loop()
    def set_res(fur):
        fur.set_result("test")
    async  def main():
        fur  = loop.create_future()
        loop.call_later(3,set_res,fur)
        res = await  fur   # 会一直等到有结果,如果不执行 loop.call_later(3,set_res,fur) 会一直卡住。
        print(res)
    loop.run_until_complete(main())
    

    1.5 future 对象

    asyncio 可以将线程池的future对象转成,asyncio 里的future 对象。
    import time
    loop = asyncio.get_event_loop()
    def mysleep(s):
        time.sleep(s)
      
    async def demo():
        print('start')
        fur = loop.run_in_executor(None,mysleep,3)  # 默认为线程池,也可以方进程池,变成future对象了
        await  fur
        print('end')
    
    tasks = [demo(),demo(),demo()]
    loop.run_until_complete(asyncio.wait(tasks))
    

    1.6 异步爬虫

    import asyncio
    import aiofiles
    import aiohttp
    import time
    img_urls  =['https://img.lianzhixiu.com/uploads/allimg/202010/9999/bfed842d57.jpg','https://img.lianzhixiu.com/uploads/allimg/202010/9999/7d0c02fd74.jpg','https://img.lianzhixiu.com/uploads/allimg/202010/9999/edb377d1b3.jpg']
    async def get_images(url,session):
            async with session.get(url) as response:
                if response.status in [200,201]:
    
                    content = await response.content.read()
                    async with aiofiles.open(f'{url.split("/")[-1]}',"wb") as fp:
                        await fp.write(content)
    async  def main():
        async with aiohttp.ClientSession() as session:
            tasks = [asyncio.ensure_future(get_images(url,session)) for url in img_urls]
            await asyncio.wait(tasks)
            
    import requests
    def request_get(url):
        content = requests.get(url).content
        with open(f'{url.split("/")[-1]}', "wb") as fp:
             fp.write(content)
    
    def main_v1():
        for url in img_urls:
            request_get(url)
    
    if __name__ == '__main__':
        # start_time = time.time()
        # loop = asyncio.get_event_loop()
        # loop.run_until_complete(main())
        # print(time.time()-start_time)
        start_time = time.time()
        main_v1()
        print(time.time()-start_time)
    # 异步1秒
    # 同步2秒
    

    1.7 异步迭代器

    import random
    import time
    import asyncio
    class MyAsyncIter():
        def __init__(self):
            self.count = 0
        def __aiter__(self):
            return self
    
        async  def __anext__(self):
            await  asyncio.sleep(random.randint(1,3))
            self.count+=1
            print(self.count)
            if self.count >10:
                raise StopAsyncIteration
            return self.count
    
    async  def test():
        await  asyncio.sleep(1)
        print('asda')
        await  asyncio.sleep(2)
        print('asda')
    
    async  def main():
        asyncio.ensure_future(test())
        async for i in MyAsyncIter():
            print(i)
    
    if __name__ == '__main__':
        asyncio.run(main())
    

    1.8 异步上下文管理器

    import asyncio
    import random
    class MyEnter():
    
        def __init__(self):
            pass
        async def __aenter__(self):
            await asyncio.sleep(3)
            print('返回了')
            return self
        async def close(self):
            await  asyncio.sleep(3)
            print('close')
        async def __aexit__(self, *args):
            await self.close()
    
    async def  test():
        await asyncio.sleep(4)
        print('end')
    
    async  def main():
        asyncio.ensure_future(test())
        async with MyEnter() as d:
            await d.close()
    if __name__ == '__main__':
        asyncio.run(main())
    
    
    
    永远不要高估自己
  • 相关阅读:
    模板插件aTpl
    KFold交叉验证方式
    XGboost数据比赛实战
    机器学习特征选择方法
    机器学习项目实战
    python二维表格数据的导入与导出
    centos-linux系统如何进入root权限?
    centos7解决yum install mysql-server没有可用包?
    Keras实现简单分类神经网络
    MySQL学习入门总结(非常重要)
  • 原文地址:https://www.cnblogs.com/liqiangwei/p/15819770.html
Copyright © 2020-2023  润新知