Python的Web框架中Tornado以异步非阻塞而闻名。本文基于非阻塞的Socket以及IO多路复用从而实现异步非阻塞的Web框架
一、异步非阻塞和io多路复用
出现的原因:
由于进程的执行过程是线性的(也就是顺序执行),当我们调用低速系统I/O(read,write, accept等等),进程可能阻塞,此时进程就阻塞 在这个调用上,不能执行其他操作.阻塞很正常。接下来考虑这么一个问题: 一个服务器进程和一个客户端进程通信,服务器端read(sockfd1,bud,bufsize),此时客户端进程没有发送数据,那么read(阻塞调用)将 阻塞直到客户端调用write(sockfd,but,size) 发来数据. 在一个客户和服务器通信时这没 什么问题,当多个客户与服务器通信时,若服 务器阻塞于其中一个客户sockfd1,当另一个 客户的数据到达套接字sockfd2时,服务器不能处理,仍然阻塞在read(sockfd1,...)上;此时问题就出现了,不能及时处理另一个客户的 服务,咋么办?I/O多路复用来解决!
io多路复用:
继续上面的问题,有多个客户连接, sockfd1,sockfd2,sockfd3..sockfdn 同时监听这n个客户,当其中有一个发来消息时就从select的阻塞中返回,然后就调用read读取收到消息的sockfd,然后又循环回select 阻塞; 这样就不会因为阻塞在其中一个上而不能处理另一个客户的消息 原理:(socket 设置socket对象点setblocking(False)+select)
Q: 那这样子,在读取socket1的数据时,如果其它socket有数据来,那么也要等到socket1读取完了才能继续读取其它socket的数据吧。那不是也阻塞住了吗?而且读取到的数据也要开启线程处理吧,那这和多线程IO有什么区别呢?
A:
- CPU本来就是线性的 不论什么都需要顺序处理 并行只能是多核CPU
- io多路复用本来就是用来解决对多个I/O监听时,一个I/O阻塞影响其他I/O的问题,跟多线程没关系.
- 跟多线程相比较,线程切换需要切换到内核进行线程切换,需要消耗时间和资源. 而I/O多路复用不需要切换线/进程,效率相对较高,特别是对高并发的应用nginx就是用I/O多路复用,故而性能极佳.但多线程编程逻辑和处理上比I/O多路复用简单.而I/O多路复用处理起来较为复杂.
二、实现流程
1、sleep异步非阻塞
import tornado.ioloop
import tornado.web
class MainHandler(tornado.web.RequestHandler):
def get(self):
import time
time.sleep(10)
self.write("Hello, world")
class IndexHandler(tornado.web.RequestHandler):
def get(self):
self.write("Index")
application = tornado.web.Application([
(r"/main", MainHandler),
(r"/index", IndexHandler),
])
if __name__ == "__main__":
application.listen(8888)
tornado.ioloop.IOLoop.instance().start()
import tornado.ioloop
import tornado.web
from tornado import gen
from tornado.concurrent import Future
import time
class MainHandler(tornado.web.RequestHandler):
@gen.coroutine
def get(self):
future = Future() #Future对象 记住这个很重要,后面主要靠他实现异步非阻塞
# 特殊的形式等待5s
tornado.ioloop.IOLoop.current().add_timeout(time.time() + 5, self.done) # 5秒之后回调函数done
yield future
def done(self, *args, **kwargs):
self.write('Main')
self.finish()
class IndexHandler(tornado.web.RequestHandler):
def get(self):
self.write("Index")
application = tornado.web.Application([
(r"/main", MainHandler),
(r"/index", IndexHandler),
])
if __name__ == "__main__":
application.listen(8888)
tornado.ioloop.IOLoop.instance().start()
2、requests请求异步非阻塞
import tornado.ioloop import tornado.web class MainHandler(tornado.web.RequestHandler): def get(self): import requests requests.get('http://www.google.com') self.write('xxxxx') class IndexHandler(tornado.web.RequestHandler): def get(self): self.write("Index") application = tornado.web.Application([ (r"/main", MainHandler), (r"/index", IndexHandler), ]) if __name__ == "__main__": application.listen(8888) tornado.ioloop.IOLoop.instance().start()
import tornado.ioloop import tornado.web from tornado import gen class MainHandler(tornado.web.RequestHandler): @gen.coroutine def get(self): from tornado import httpclient http = httpclient.AsyncHTTPClient() yield http.fetch("http://www.google.com", self.done) def done(self, *args, **kwargs): self.write('Main') self.finish() class IndexHandler(tornado.web.RequestHandler): def get(self): self.write("Index") application = tornado.web.Application([ (r"/main", MainHandler), (r"/index", IndexHandler), ]) if __name__ == "__main__": application.listen(8888) tornado.ioloop.IOLoop.instance().start()
3、future异步非阻塞
import tornado.ioloop import tornado.web from tornado import gen from tornado.concurrent import Future future = None class MainHandler(tornado.web.RequestHandler): @gen.coroutine def get(self): global future future = Future() future.add_done_callback(self.done) yield future def done(self, *args, **kwargs): self.write('Main') self.finish() class IndexHandler(tornado.web.RequestHandler): def get(self): global future future.set_result(None) # 不设置值会一直请求下去,直到地老天荒 self.write("Index") application = tornado.web.Application([ (r"/main", MainHandler), (r"/index", IndexHandler), ]) if __name__ == "__main__": application.listen(8888) tornado.ioloop.IOLoop.instance().start()
future = Future()
原理:返回future 看看future的result里面有没有值 (有值就断开返回回去)
4、自定义服务端web框架(不支持异步)
import socket import select class HttpRequest(object): """ 用户封装用户请求信息 """ def __init__(self, content): """ :param content:用户发送的请求数据:请求头和请求体 """ self.content = content self.header_bytes = bytes() self.body_bytes = bytes() self.header_dict = {} self.method = "" self.url = "" self.protocol = "" self.initialize() self.initialize_headers() def initialize(self): temp = self.content.split(b' ', 1) if len(temp) == 1: self.header_bytes += temp else: h, b = temp self.header_bytes += h self.body_bytes += b @property def header_str(self): return str(self.header_bytes, encoding='utf-8') def initialize_headers(self): headers = self.header_str.split(' ') first_line = headers[0].split(' ') if len(first_line) == 3: self.method, self.url, self.protocol = headers[0].split(' ') for line in headers: kv = line.split(':') if len(kv) == 2: k, v = kv self.header_dict[k] = v def main(request): return "main" def index(request): return "index" routers = [ ('/main/',main), ('/index/',index), ] def run(): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sock.bind(("127.0.0.1", 9999,)) sock.setblocking(False) sock.listen(128) inputs = [] inputs.append(sock) while True: rlist,wlist,elist = select.select(inputs,[],[],0.05) for r in rlist: if r == sock: """新请求到来""" conn,addr = sock.accept() conn.setblocking(False) inputs.append(conn) else: """客户端发来数据""" data = b"" while True: try: chunk = r.recv(1024) data = data + chunk except Exception as e: chunk = None if not chunk: break # 没数据终止接受 # data进行处理:请求头和请求体 request = HttpRequest(data) # 1. 请求头中获取url # 2. 去路由中匹配,获取指定的函数 # 3. 执行函数,获取返回值 # 4. 将返回值 r.sendall(b'alskdjalksdjf;asfd') import re flag = False func = None for route in routers: if re.match(route[0],request.url): flag = True func = route[1] break if flag: # 如果有这个路由调用函数执行 result = func(request) r.sendall(bytes(result,encoding='utf-8')) else: r.sendall(b"404") inputs.remove(r) r.close() if __name__ == '__main__': run()
5、支持异步非阻塞的web框架:
import socket import select import time class HttpRequest(object): """ 用户封装用户请求信息 """ def __init__(self, content): """ :param content:用户发送的请求数据:请求头和请求体 """ self.content = content self.header_bytes = bytes() self.body_bytes = bytes() self.header_dict = {} self.method = "" self.url = "" self.protocol = "" self.initialize() self.initialize_headers() def initialize(self): temp = self.content.split(b' ', 1) if len(temp) == 1: self.header_bytes += temp else: h, b = temp self.header_bytes += h self.body_bytes += b @property def header_str(self): return str(self.header_bytes, encoding='utf-8') def initialize_headers(self): headers = self.header_str.split(' ') first_line = headers[0].split(' ') if len(first_line) == 3: self.method, self.url, self.protocol = headers[0].split(' ') for line in headers: kv = line.split(':') if len(kv) == 2: k, v = kv self.header_dict[k] = v class Future(object): def __init__(self,timeout=0): self.result = None self.timeout = timeout self.start = time.time() def main(request): f = Future(5) return f def index(request): return "indexasdfasdfasdf" routers = [ ('/main/',main), ('/index/',index), ] def run(): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sock.bind(("127.0.0.1", 8080,)) sock.setblocking(False) sock.listen(128) inputs = [] inputs.append(sock) async_request_dict = { # 'socket': futrue } while True: rlist,wlist,elist = select.select(inputs,[],[],0.05) for r in rlist: if r == sock: """新请求到来""" conn,addr = sock.accept() conn.setblocking(False) inputs.append(conn) else: """客户端发来数据""" data = b"" while True: try: chunk = r.recv(1024) data = data + chunk except Exception as e: chunk = None if not chunk: break # data进行处理:请求头和请求体 request = HttpRequest(data) # 1. 请求头中获取url # 2. 去路由中匹配,获取指定的函数 # 3. 执行函数,获取返回值 # 4. 将返回值 r.sendall(b'alskdjalksdjf;asfd') import re flag = False func = None for route in routers: if re.match(route[0],request.url): flag = True func = route[1] break if flag: result = func(request) if isinstance(result,Future): async_request_dict[r] = result else: r.sendall(bytes(result,encoding='utf-8')) inputs.remove(r) r.close() else: r.sendall(b"404") inputs.remove(r) r.close() for conn in async_request_dict.keys(): future = async_request_dict[conn] start = future.start timeout = future.timeout ctime = time.time() if (start + timeout) <= ctime : future.result = b"timeout" if future.result: conn.sendall(future.result) conn.close() del async_request_dict[conn] inputs.remove(conn) if __name__ == '__main__': run()
想看完整且详细用法请参考:200行自定义异步非阻塞Web框架