socket发送请求:两种方式
方式一:
import socket import requests key_list=['alex','db','sb'] for item in key_list: ret = requests.get('https://www.baidu.com/s?wd=%s' %item)
方式二:
import socket def get_data(key): client=socket.socket() client.connect(('www.baidu.com',80)) client.sendall(b'GET /s?wd=%s HTTP/1.0 host:www.baidu.com '%key) chuck_list=[] while True: chuck=client.recv(1024) if not chuck: break chuck_list.append(chuck) body=b''.join(chuck_list) print(body.decode(utf-8)) key_list=['alex','db','sb'] for item in key_list: get_data(item)
解决线程阻塞:
import socket client=socket.socket() client.setblocking(False) try: client.connect(('www.baidu.com',80)) except BlockingIOError as e: pass client.sendall(b'GET /s?wd=alex HTTP/1.0 host:www.baidu.com ') chuck_list=[] while True: chuck=client.recv(1024) if not chuck: break chuck_list.append(chuck) body=b''.join(chuck_list) print(body.decode(utf8))
单线程的并发:
wimport socket import select client1=socket.socket() client1.setblocking(False) try: client1.connect((www.baidu.com',80)) except BlockingIOError as e: pass client2=socket.socket() client2.setblocking(False) try: client2.connect((www.sogou.com',80)) except BlockingIOError as e: pass client3=socket.socket() client3.setblocking(False) try: client3.connect((www.oldbouedu.com',80)) except BlockingIOError as e: pass socket_list=[client1,client2,client3] conn_list=[client1,client2,client3] while True: rlist,wlist,elist=select.select(socket_list,conn_list,[],0.005) for sk in wlist: if sk ==client1: sk.sendall(b'GET /S?wd=alex HTTP/1.0 host:www.baidu.com ) if sk ==client2: sk.sendall(b'GET /web?query=fdf HTTP/1.0 host:www.sogou.com ) if sk ==client3: sk.sendall(b'GET /S?wd=alex HTTP/1.0 host:www.oldboyedu.com ) for sk in rlist: chuck_list=[] while True: try: chuck=sk.recv(1024) if not chuck: break chuck_list.append(chuck) except BlockingIOError as e: break body = b''.join(chuck_list) print('------>',body) sk.close() socket_list.remove(sk) if not socket_list: break
IO多路复用:
检测多个socket是否发生变化.是否连接成功,是否收到数据.
操作系统检测socket是否发生变化有三种方法:select :最多检测1024个socket,循环去检测 pool:不限制监听的个数,循环去检测(水平触发). epool:不限制监听socket个数,回调方式,(边缘触发).
异步非阻塞:
提高并发的方案:三种
---多线程
---多进程
---异步非阻塞模块(Twisted) scrapy框架(单线程完成并发
非阻塞:创建多个客户端,对服务端进行访问,在connect,和recv操作时都会发生阻塞现象,设置setblocking(False),在执行以上操作时就不需要等待,直接发送过以后就执行下一个客户端的连接请求,以此类推.但是最后会报BlockingIOError错误,用try,except捕获一下就ok
异步: 执行完之后会自动执行回调函数或者自动执行某些操作,比如爬虫中向某个网站发送请求,执行完成以后执行回调函数
协程IO切换:greenlet gevent
import greenlet def f1(): print(11) gr2.swith() print(22) gr2.swith() def f2(): print(33) gr1.switch() print(44) gr1=greenlet.greenlet(f1) gr2=greenlet.greenlet(f2) gr1.switch() 打印结果为: 11 33 22 44
from gevent import monkey monkey.patch_all() import requests import gevent def get_page1(url1): ret = requests.get(url1) print(url,ret.content) def get_page2(url1): ret = requests.get(url1) print(url,ret.content) def get_page3(url1): ret = requests.get(url1) print(url,ret.content) gevent.joinall([ gevent.spawn(get_page1, 'https://www.python.org/'), # 协程1 gevent.spawn(get_page2, 'https://www.yahoo.com/'), # 协程2 gevent.spawn(get_page3, 'https://github.com/'), # 协程3 ])
基于yield实现协程:
def f1(): print(11) yield print(22) yield print(33) def f2(): print(55) yield print(66) yield print(77) v1 = f1() v2 = f2() next(v1) # v1.send(None) next(v2) # v1.send(None) next(v1) # v1.send(None) next(v2) # v1.send(None) next(v1) # v1.send(None) next(v2) # v1.send(None)