1. HTTP协议(超文本传输协议)
浏览器===>服务器发送的请求格式如下:(浏览器告诉服务器,浏览器的信息)
GET / HTTP/1.1 Host: www.baidu.com Connection: keep-alive Cache-Control: max-age=0 Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9
服务器==>浏览器回送的数据格式如下:(告诉浏览器,服务器的版本,按照什么格式解析)
header:告诉浏览器特殊信息(必须有HTTP/1.1 200 OK)
HTTP/1.1 200 OK Bdpagetype: 2 Bdqid: 0xb49ba00a00010431 Cache-Control: private Connection: Keep-Alive Content-Encoding: gzip Content-Type: text/html;charset=utf-8 Date: Sat, 09 Mar 2019 14:40:59 GMT Expires: Sat, 09 Mar 2019 14:40:59 GMT Server: BWS/1.1 Set-Cookie: BDSVRTM=70; path=/ Set-Cookie: BD_HOME=1; path=/ Set-Cookie: H_PS_PSSID=28648_1455_28395_21114_28608_28584_28557_28604_28625_28605; path=/; domain=.baidu.com Strict-Transport-Security: max-age=172800 X-Ua-Compatible: IE=Edge,chrome=1 Transfer-Encoding: chunked
Cookie: BAIDUID=969EF83E73AFFBF96897E645871A1957:FG=1; BIDUPSID=969EF83E73AFFBF96897E645871A1957; PSTM=1549905544; BD_UPN=12314353; delPer=0; BD_CK_SAM=1; PSINO=1; ___rl__test__cookies=1552141627935; OUTFOX_SEARCH_USER_ID_NCOO=1556761245.5429947; BDRCVFR[QxxZVyx49rf]=I67x6TjHwwYf0; H_WISE_SIDS=125704_114553_129323_106370_128146_128229_120193_123018_129449_118893_118871_118854_118832_118787_107312_129945_129387_129088_129558_117336_129751_117432_128791_128402_129655_128246_124639_129620_129008_128967_129641_129293_128805_129692_129838_129981_129808_127764_129482_129643_129508_124030_130091_110085_129844_123289_128842_127417_128808_129049; FEED_SIDS=231735_0309_22; plus_lsv=393c3756be30db54; BDORZ=AE84CDB3A529C0F8A2B9DCDD1D18B695; plus_cv=1::m:49a3f4a6; Hm_lvt_12423ecbc0e2ca965d84259063d35238=1552141644; SE_LAUNCH=5%3A25869027_0%3A25869027; rsv_i=caa1rmCs0PpQpYzAbKe5ZOe7IPqcdsJjz9yFp5uzkt9iporuXUkXb39N0K1sIreyWXdiYvSq2TEnLzJMu1rSJdPaAoRZgSo; Hm_lpvt_12423ecbc0e2ca965d84259063d35238=1552141679; BDRCVFR[Usf3Hj-5366]=mk3SLVN4HKm; BDUSS=RJTzhiLTA2fkFSRmxiOGZYRVZEbVVMRU1FQmNsbHJDT2xRSHlPT1ZaV2NYS3RjQUFBQUFBJCQAAAAAAAAAAAEAAABEIlRw0LC2uTkwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAJzPg1ycz4NcY2; BD_HOME=1; BDRCVFR[4r8LXJfwh-6]=8QV4RScte5tfjRLnjbdnHRsg17xUvNV; H_PS_645EC=1e9fAcA0iG5RIf%2Bi4FlV0onp3XbZL2oEMPgGRb1L2abD%2BGzuKThL6MgB%2Be%2FwIJ6wYVm0C2fUHAUL; H_PS_PSSID=28648_1455_28395_21114_28608_28584_28557_28604_28625_28605; sug=3; sugstore=1; ORIGIN=2; bdime=0
body
<!DOCTYPE html>
<!--STATUS OK-->
2. Python模拟返回固定页面的http服务器
import socket def service_client(new_socket): """为这个客户端返回数据""" # 1. 接受浏览器发送过来的请求,即Http请求 # GET / HTTP/1.1 # ..... request = new_socket.recv(1024) print(request) # 2. 返回Http格式的数据,给浏览器 # 2.1 准备发送给浏览器数据---header response = "HTTP/1.1 200 OK " response += " " # 2.2 准备发送给浏览器的数据---body response += "haaaaaaaaaaa" new_socket.send(response.encode("utf-8")) new_socket.close() def main(): """用来完成整体的控制""" # 1. 创建套接字 tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # 2. 绑定 tcp_server_socket.bind(("", 7890)) # 3. 变为监听套接字 tcp_server_socket.listen(128) while True: # 4. 等待新客户端的链接 new_socket, client_addr = tcp_server_socket.accept() # 5. 为这个客户端服务 service_client(new_socket) # 关闭监听套接字 tcp_server_socket.close() if __name__ == '__main__': main()
返回指定html页面
import socket import re def service_client(new_socket): """为这个客户端返回数据""" # 1. 接受浏览器发送过来的请求,即Http请求 # GET / HTTP/1.1 # ..... request = new_socket.recv(1024).decode("utf-8") # print(request) request_lines = request.splitlines() print(request_lines) # GET /page.html HTTP/1.1 ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0]) if ret: file_name = ret.group(1) print("*"*50, file_name) # 2. 返回Http格式 # 2.1 准备发送给浏览器数据---header response = "HTTP/1.1 200 OK " response += " " # 2.2 准备发送给浏览器的数据---body # response += "haaaaaaaaaaa" f = open("./templates" + file_name, "rb") html_content = f.read() f.close() # 将response header发送给浏览器 new_socket.send(response.encode("utf-8")) new_socket.send(html_content) # 关闭套接字 new_socket.close() def main(): """用来完成整体的控制""" # 1. 创建套接字 tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # 2. 绑定 # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。 # 下次运行程序时,可以立即执行 tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) tcp_server_socket.bind(("", 7890)) # 3. 变为监听套接字 tcp_server_socket.listen(128) while True: # 4. 等待新客户端的链接 new_socket, client_addr = tcp_server_socket.accept() # 5. 为这个客户端服务 service_client(new_socket) # 关闭监听套接字 tcp_server_socket.close() if __name__ == '__main__': main()
3. 多进程/线程实现http服务器
3.1 多进程: 需要在主进程调用new_socket.close()
import socket import re import multiprocessing def service_client(new_socket): """为这个客户端返回数据""" # 1. 接受浏览器发送过来的请求,即Http请求 # GET / HTTP/1.1 # ..... request = new_socket.recv(1024).decode("utf-8") # print(request) request_lines = request.splitlines() print(request_lines) # GET /page.html HTTP/1.1 ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0]) if ret: file_name = ret.group(1) print("*"*50, file_name) # 2. 返回Http格式 # 2.1 准备发送给浏览器数据---header response = "HTTP/1.1 200 OK " response += " " # 2.2 准备发送给浏览器的数据---body # response += "haaaaaaaaaaa" f = open("./templates" + file_name, "rb") html_content = f.read() f.close() # 将response header发送给浏览器 new_socket.send(response.encode("utf-8")) new_socket.send(html_content) # 关闭套接字 new_socket.close() def main(): """用来完成整体的控制""" # 1. 创建套接字 tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # 2. 绑定 # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。 # 下次运行程序时,可以立即执行 tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) tcp_server_socket.bind(("", 7890)) # 3. 变为监听套接字 tcp_server_socket.listen(128) while True: # 4. 等待新客户端的链接 new_socket, client_addr = tcp_server_socket.accept() # 5. 为这个客户端服务 p = multiprocessing.Process(target=service_client, args=(new_socket, )) p.start() new_socket.close() # 关闭监听套接字 tcp_server_socket.close() if __name__ == '__main__': main()
主进程里的: new_socket.close()作用
# fd: 文件描述符, 就是一个数字, 对应一个特殊的文件, 例如网络接口
# 到子进程时候, new_socket会被复制一份, 所以要在主进程里 调用 new_socket.close()
# 主进程不调用close时, 浏览器会一直的等待, 四次挥手就不会开始!
3.2 多线程: 无需在主线程调用new_socket.close()--否则报错
import socket import re import threading def service_client(new_socket): """为这个客户端返回数据""" # 1. 接受浏览器发送过来的请求,即Http请求 # GET / HTTP/1.1 # ..... request = new_socket.recv(1024).decode("utf-8") # print(request) request_lines = request.splitlines() print("request_lines:", "*"*50, request_lines) # GET /page.html HTTP/1.1 ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0]) if ret: file_name = ret.group(1) print("*"*50, file_name) # 2. 返回Http格式 # 2.1 准备发送给浏览器数据---header response = "HTTP/1.1 200 OK " response += " " # 2.2 准备发送给浏览器的数据---body # response += "haaaaaaaaaaa" f = open("./templates" + file_name, "rb") html_content = f.read() f.close() # 将response header发送给浏览器 new_socket.send(response.encode("utf-8")) new_socket.send(html_content) # 关闭套接字 new_socket.close() def main(): """用来完成整体的控制""" # 1. 创建套接字 tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # 2. 绑定 # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。 # 下次运行程序时,可以立即执行 tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) tcp_server_socket.bind(("", 7890)) # 3. 变为监听套接字 tcp_server_socket.listen(128) while True: # 4. 等待新客户端的链接 new_socket, client_addr = tcp_server_socket.accept() # 5. 为这个客户端服务 p = threading.Thread(target=service_client, args=(new_socket, )) p.start() # new_socket.close() # 关闭监听套接字 tcp_server_socket.close() if __name__ == '__main__': main()
区别: 线程比线程耗费的资源小, 以下用协程实现, 会更方便
3.3 协程: 使用gevent实现http服务器
效率最高
import socket import re import gevent from gevent import monkey monkey.patch_all() def service_client(new_socket): """为这个客户端返回数据""" # 1. 接受浏览器发送过来的请求,即Http请求 # GET / HTTP/1.1 # ..... request = new_socket.recv(1024).decode("utf-8") # print(request) request_lines = request.splitlines() print("request_lines:", "*"*50, request_lines) # GET /page.html HTTP/1.1 ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0]) if ret: file_name = ret.group(1) print("*"*50, file_name) # 2. 返回Http格式 # 2.1 准备发送给浏览器数据---header response = "HTTP/1.1 200 OK " response += " " # 2.2 准备发送给浏览器的数据---body # response += "haaaaaaaaaaa" f = open("./templates" + file_name, "rb") html_content = f.read() f.close() # 将response header发送给浏览器 new_socket.send(response.encode("utf-8")) new_socket.send(html_content) # 关闭套接字 new_socket.close() def main(): """用来完成整体的控制""" # 1. 创建套接字 tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # 2. 绑定 # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。 # 下次运行程序时,可以立即执行 tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) tcp_server_socket.bind(("", 7890)) # 3. 变为监听套接字 tcp_server_socket.listen(128) while True: # 4. 等待新客户端的链接 new_socket, client_addr = tcp_server_socket.accept() # 5. 为这个客户端服务 gevent.spawn(service_client, new_socket) # new_socket.close() # 关闭监听套接字 tcp_server_socket.close() if __name__ == '__main__': main()
4. Web静态服务器--单进程/线程/非堵塞模式
4.1 长连接和短连接
- HTTP/1.1 长连接: 三次握手一次, 不断开的情况下, 通过一个Socket, 可以连续获取数据
- HTTP/1.0 短连接
短连接
import socket import re import gevent from gevent import monkey import time client_socker_list = list() def main(): """用来完成整体的控制""" # 1. 创建套接字 tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # 2. 绑定 # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。 # 下次运行程序时,可以立即执行 tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) tcp_server_socket.bind(("", 7890)) # 3. 变为监听套接字 tcp_server_socket.listen(128) tcp_server_socket.setblocking(False) # 设置套接字为不堵塞 while True: try: new_socket, new_addr = tcp_server_socket.accept() except Exception as ret: print("没有新的客户端到来") else: print("只要没产生一次, 那么也就意味着 来了一个新的客户端") new_socket.setblocking(False) client_socker_list.append(new_socket) for client_socket in client_socker_list: try: recv_data = new_socket.recv(1024) except Exception as ret: print(ret) print("这个客户端没有发送过来数据") else: # 对方发送过来数据 if recv_data: print("客户端发送过来了数据") else: # 对方调用close 导致了 recv返回 client_socker_list.remove(client_socket) client_socket.close() print("客户端已经关闭") time.sleep(1) # 关闭监听套接字 tcp_server_socket.close() time.sleep(1) # 关闭监听套接字 tcp_server_socket.close() if __name__ == '__main__': main()
- 核心: 用多线程/进程原因, 这个套接字必定堵塞, 堵塞到数据收到为止
- 只要设置为非堵塞, 则可以实现 单进程单线程单任务, 还能做到多个客户端一起服务.
4.2 长连接来实现单进程/单线程--非堵塞模式(Content-Length)
上述实现都是基于短连接, 请求一次之后就断开连接了
import socket import re import gevent from gevent import monkey import time client_socker_list = list() def service_client(new_socket, request): """为这个客户端返回数据""" # 1. 接受浏览器发送过来的请求,即Http请求 # GET / HTTP/1.1 # ..... # request = new_socket.recv(1024).decode("utf-8") # print(request) request_lines = request.splitlines() print("request_lines:", "*"*50, request_lines) # GET /page.html HTTP/1.1 ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0]) if ret: file_name = ret.group(1) print("*"*50, file_name) # 2. 返回Http格式 try: f = open("./templates" + file_name, "rb") except: response = "HTTP/1.1 404 NOT FOUND " response += " " response += "-----------File note found----------" new_socket.send(response.encode("utf-8")) else: html_content = f.read() f.close() # 2.1 准备发送给浏览器数据---header response_body = html_content response_header = "HTTP/1.1 200 OK " response_header += "Content-Length:%d " % len(response_body) # Content-Length:len-->浏览器此时不需要调用close会自动发起新请求 response_header += " " # 此时都是二进制字符串 response = response_header.encode("utf-8") + response_body new_socket.send(response) # response body发送给 浏览器 new_socket.send(html_content) def main(): """用来完成整体的控制""" # 1. 创建套接字 tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # 2. 绑定 # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。 # 下次运行程序时,可以立即执行 tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) tcp_server_socket.bind(("", 7890)) # 3. 变为监听套接字 tcp_server_socket.listen(128) tcp_server_socket.setblocking(False) # 设置套接字为不堵塞 while True: try: new_socket, new_addr = tcp_server_socket.accept() except Exception as ret: print("没有新的客户端到来") else: print("只要没产生一次, 那么也就意味着 来了一个新的客户端") new_socket.setblocking(False) client_socker_list.append(new_socket) for client_socket in client_socker_list: try: recv_data = new_socket.recv(1024).decode("utf-8") except Exception as ret: print(ret) print("这个客户端没有发送过来数据") else: # 对方发送过来数据 if recv_data: print("客户端发送过来了数据") service_client(client_socket, recv_data) else: client_socket.close() client_socker_list.remove(client_socket) print("客户端已经关闭") # 关闭监听套接字 tcp_server_socket.close() if __name__ == '__main__': main()
5. Web静态服务器--epoll
5.1 IO多路复用
select/epoll的好处: 在于单个process就可以同时处理多个网络连接的IO
它的基本原理就是 select, poll, epoll 这个function会不断的轮询所负责的所有Socket, 当某个Socket有数据到达了, 就通知用户进程
5.2 epoll简单模型
1. 减少了复制的过程
2. 以事件通知的方式(高效率, 注: 轮询的方式效率很低)
3. 有一个特殊的内存, 是应用程序和Kernel共享的, 在这个内存里要添加的,监听的,判断到来的套接字对应的文件描述符, 检测时不是轮询而是事件通知。
4. 使用了内存映射技术(mmap)技术
5. 采用基于事件的就绪通知方式
5.3 epoll版的http服务器
#!/bin/python3 # -*- encoding=utf-8 -*- import socket import re import time import select def service_client(new_socket, request): """为这个客户端返回数据""" # 1. 接受浏览器发送过来的请求,即Http请求 # GET / HTTP/1.1 # ..... # request = new_socket.recv(1024).decode("utf-8") # print(request) request_lines = request.splitlines() print("request_lines:", "*"*50, request_lines) # GET /page.html HTTP/1.1 ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0]) if ret: file_name = ret.group(1) print("*"*50, file_name) # 2. 返回Http格式 try: f = open("./templates" + file_name, "rb") except: response = "HTTP/1.1 404 NOT FOUND " response += " " response += "-----------File note found----------" new_socket.send(response.encode("utf-8")) else: html_content = f.read() f.close() # 2.1 准备发送给浏览器数据---header response_body = html_content response_header = "HTTP/1.1 200 OK " response_header += "Content-Length:%d " % len(response_body) # Content-Length:len-->浏览器此时不需要调用close会自动发起新请求 response_header += " " # 此时都是二进制字符串 response = response_header.encode("utf-8") + response_body new_socket.send(response) # response body发送给 浏览器 new_socket.send(html_content) def main(): """用来完成整体的控制""" # 1. 创建套接字 tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # 2. 绑定 # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。 # 下次运行程序时,可以立即执行 tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) tcp_server_socket.bind(("", 7890)) # 3. 变为监听套接字 tcp_server_socket.listen(128) tcp_server_socket.setblocking(False) # 设置套接字为不堵塞 # 创建一个epoll对象 epl = select.epoll() # 将监听套接字对应的fd(文件描述符)注册到epoll中 epl.register(tcp_server_socket.fileno(), select.EPOLLIN) # EPOLLIN:表示检测是否有输入 # {fd:socket, fd2:socket2} fd_event_dict = dict() while True: # 返回值是个列表 fd_event_list = epl.poll() # poll默认会堵塞, 直到 os检测到数据到来, 通过事件通知方式 告诉这个程序, 此时才会解堵塞 # [(fd, event), (套接字对应的文件描述符, 这个文件描述符到底是什么事件 例如 可以调用recv接收等)] for fd, event in fd_event_list: # 4. 等待新客户端的链接 if fd == tcp_server_socket.fileno(): new_socket, client_addr = tcp_server_socket.accept() epl.register(new_socket.fileno(), select.EPOLLIN) fd_event_dict[new_socket.fileno()] = new_socket # fd:socket elif event == select.EPOLLIN: # 判断已经链接的客户端是否有数据发送过来 recv_data = fd_event_dict[fd].recv(1024).decode("utf-8") if recv_data: service_client(fd_event_dict[fd], recv_data) else: fd_event_dict[fd].close() epl.unregister(fd) del fd_event_dict[fd] # 关闭监听套接字 tcp_server_socket.close() if __name__ == '__main__': main()