### 5.单线程+加多任务异步协程 **线程池:** ```python from multiprocessing.dummy import Pool import requests import time #同步代码 start = time.time() pool = Pool(3) urls = ['http://127.0.0.1:5000/bobo','http://127.0.0.1:5000/jay','http://127.0.0.1:5000/tom'] for url in urls: page_text = requests.get(url).text print(page_text) print('总耗时:',time.time()-start) #异步代码 start = time.time() pool = Pool(3) urls = ['http://127.0.0.1:5000/bobo','http://127.0.0.1:5000/jay','http://127.0.0.1:5000/tom'] def get_request(url): return requests.get(url).text response_list = pool.map(get_request,urls) print(response_list) #解析 def parse(page_text): print(len(page_text)) pool.map(parse,response_list) print('总耗时:',time.time()-start) ``` **协程对象** ```python from time import sleep import asyncio async def get_request(url): print('正在请求:',url) sleep(2) print('请求结束:',url) c = get_request('www.1.com') print(c) ``` **任务对象** ```python from time import sleep import asyncio #回调函数: #默认参数:任务对象 def callback(task): print('i am callback!!1') print(task.result())#result返回的就是任务对象对应的那个特殊函数的返回值 async def get_request(url): print('正在请求:',url) sleep(2) print('请求结束:',url) return 'hello bobo' #创建一个协程对象 c = get_request('www.1.com') #封装一个任务对象 task = asyncio.ensure_future(c) #给任务对象绑定回调函数,协程执行之后就会执行回调函数 task.add_done_callback(callback) #创建一个事件循环对象 loop = asyncio.get_event_loop() loop.run_until_complete(task)#将任务对象注册到事件循环对象中并且开启了事件循环 ``` #### 5.1 多任务异步协程 ```python import asyncio from time import sleep import time start = time.time() urls = [ 'http://localhost:5000/bobo', 'http://localhost:5000/bobo', 'http://localhost:5000/bobo' ] #在待执行的代码块中不可以出现不支持异步模块的代码 #在该函数内部如果有阻塞操作必须使用await关键字进行修饰 async def get_request(url): print('正在请求:',url) await asyncio.sleep(2) print('请求结束:',url) return 'hello bobo' tasks = [] #放置所有的任务对象 for url in urls: c = get_request(url) task = asyncio.ensure_future(c) tasks.append(task) loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks)) print(time.time()-start) ``` **在爬虫中应用多任务异步协程** ```python import asyncio import requests import time start = time.time() urls = [ 'http://localhost:5000/bobo', 'http://localhost:5000/bobo', 'http://localhost:5000/bobo' ] #无法实现异步的效果:是因为requests模块是一个不支持异步的模块 async def req(url): page_text = requests.get(url).text return page_text tasks = [] for url in urls: c = req(url) task = asyncio.ensure_future(c) tasks.append(task) loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks)) print(time.time()-start) ``` #### 5.2 aiohttp(requests不支持异步) ```python import asyncio import requests import time import aiohttp from lxml import etree urls = [ 'http://localhost:5000/bobo', 'http://localhost:5000/bobo', 'http://localhost:5000/bobo', 'http://localhost:5000/bobo', 'http://localhost:5000/bobo', 'http://localhost:5000/bobo', ] #无法实现异步的效果:是因为requests模块是一个不支持异步的模块 async def req(url): async with aiohttp.ClientSession() as s: async with await s.get(url) as response: #response.read():byte page_text = await response.text() return page_text #细节:在每一个with前面加上async,在每一步的阻塞操作前加上await def parse(task): page_text = task.result() tree = etree.HTML(page_text) name = tree.xpath('//p/text()')[0] print(name) if __name__ == '__main__': start = time.time() tasks = [] for url in urls: c = req(url) task = asyncio.ensure_future(c) task.add_done_callback(parse) tasks.append(task) loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks)) print(time.time()-start) ```