做web开发的你,真的熟悉web服务器处理机制吗?
分析请求数据
下面是一段原始的请求数据:
b'GET / HTTP/1.1 Host: 127.0.0.1:8000 Connection: keep-alive Cache-Control: max-age=0 Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,/;q=0.8 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.8 Cookie: csrftoken=gnaOXtlMlCxgjq62jUzBjv6XedPpVvxotWq2R6KpTv9dK2eHCwXlApyNEl9anzp9 '
是不是有种似曾相识的感觉,没错,打开chrome开发者工具,随便打开一个网页,就可以看到如上信息,只不过发送到服务器端就是上面的格式,已 分割处理。
- 通过分析以上内容,可以发现各项之间都是以 分割
- 最后以2个 结束,也就是我们在响应给客户端时,内容和响应头中间就是用2个 分割
简单的webserver实现
1、静态资源
主要思路如下:
- 开启socket服务监听客户端连接
- 采用多进程方式实现并发连接处理
- 基于报文分析规则,拼接响应内容,输出到客户端
代码如下:
`
# coding:utf-8
import socket
import re
from multiprocessing import Process
HTML_ROOT_DIR = './static'
class WebServer(object):
'''
简单的webserver
'''
def __init__(self):
self.sock_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.sock_server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
def start(self):
self.sock_server.listen(128)
while True:
sock_client, addr = self.sock_server.accept()
print('[%s,%s]用户已连接......' % addr)
handle_client_process = Process(target=self.handle_client, args=(sock_client,))
handle_client_process.start()
sock_client.close()
def handle_client(self, sock_client):
'''处理客户端请求'''
recv_data = sock_client.recv(1024)
#print('请求数据:', recv_data)
req_lines = recv_data.splitlines()
#for line in req_lines:
# print(line)
req_start_line = req_lines[0]
#print(req_start_line.decode('utf-8'))
file_name = re.match(r'w+ +(/[^ ]*) ', req_start_line.decode('utf-8')).group(1)
if '/' == file_name:
file_name = "/index.html"
try:
file = open(HTML_ROOT_DIR + file_name, 'rb')
except IOError:
resp_start_line = 'HTTP/1.1 404 Not Found '
resp_headers = 'Server: My Web Server '
resp_body = 'The file is not found!'
else:
file_data = file.read()
file.close()
resp_start_line = 'HTTP/1.1 200 OK '
resp_headers = 'Server: My Web Server '
resp_body = file_data.decode('utf-8')
# 构造响应内容
resp_data = resp_start_line + resp_headers + ' ' + resp_body
#print('构造响应内容:', resp_data)
# response
sock_client.send(bytes(resp_data, 'utf-8'))
# 关闭客户端连接
sock_client.close()
def bind(self, port):
self.sock_server.bind(('', port))
def main():
webServer = WebServer()
webServer.bind(8000)
webServer.start()
if __name__ == '__main__':
main()
`
- 地址栏输入:127.0.0.1:8000
2、动态资源
主要思路如下:
- 开启socket服务监听客户端连接
- 采用多进程方式实现并发连接处理
- 基于报文分析规则,如果是以.py结尾的请求,自动分发到相应的模块做处理
web服务器,代码如下:
`
# coding:utf-8
import socket
import re
import sys
from multiprocessing import Process
HTML_ROOT_DIR = './static'
WSGI_PY = './wsgipy'
class WebServer(object):
'''
简单的webserver
'''
def __init__(self):
self.sock_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.sock_server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
def start(self):
self.sock_server.listen(128)
while True:
sock_client, addr = self.sock_server.accept()
print('[%s,%s]用户已连接......' % addr)
handle_client_process = Process(target=self.handle_client, args=(sock_client,))
handle_client_process.start()
sock_client.close()
def start_response(self, status, headers):
"""
status = "200 OK"
headers = [
("Content-Type", "text/plain")
]
star
"""
resp_headers = 'HTTP/1.1 ' + status + ' '
for header in headers:
resp_headers += '%s: %s ' % header
self.resp_headers = resp_headers
def handle_client(self, sock_client):
'''处理客户端请求'''
recv_data = sock_client.recv(1024)
#print('请求数据:', recv_data)
req_lines = recv_data.splitlines()
#for line in req_lines:
# print(line)
req_start_line = req_lines[0]
#print(req_start_line.decode('utf-8'))
file_name = re.match(r'w+ +(/[^ ]*) ', req_start_line.decode('utf-8')).group(1)
method = re.match(r'(w+) +/[^ ]*',req_start_line.decode("utf-8")).group(1)
if file_name.endswith('.py'):
try:
m = __import__(file_name[1:-3])
except Exception:
self.resp_headers = 'HTTP/1.1 404 Not Found '
resp_body = 'not found'
else:
env = {
'PATH_INFO': file_name,
'METHOD': method
}
resp_body = m.application(env, self.start_response)
resp_data = self.resp_headers+' '+resp_body
else:
if '/' == file_name:
file_name = '/index.html'
try:
file = open(HTML_ROOT_DIR + file_name, 'rb')
except IOError:
resp_start_line = 'HTTP/1.1 404 Not Found '
resp_headers = 'Server: My Web Server '
resp_body = 'The file is not found!'
else:
file_data = file.read()
file.close()
resp_start_line = 'HTTP/1.1 200 OK '
resp_headers = 'Server: My Web Server '
resp_body = file_data.decode('utf-8')
# 构造响应内容
resp_data = resp_start_line + resp_headers + ' ' + resp_body
#print('构造响应内容:', resp_data)
# response
sock_client.send(bytes(resp_data, 'utf-8'))
# 关闭客户端连接
sock_client.close()
def bind(self, port):
self.sock_server.bind(('', port))
def main():
sys.path.insert(1,WSGI_PY)
webServer = WebServer()
webServer.bind(8000)
webServer.start()
if __name__ == '__main__':
main()`
helloworld.py:
`
# coding:Utf-8
def application(env,start_response):
status = '200 OK'
headers = [
('Content-Type','text/plain')
]
start_response(status,headers)
return 'hello world'
`
主要是技术点解析:
1、根据请求 .py 名称自动导入模块,使用了一个小魔法:m = __import__('filename') ,会返回具体应用,然后根据应用实例调用具体处理程序。
2、在具体的应用中设置状态码、响应头;application(env,start_response)其中env是request信息,start_response是web服务器中的函数,通过调用start_response(status,headers)把状态码和headers传递给web服务器,这样web服务器就可以拿到status、headers拼接response
3、具体应用中返回响应体response_body,也就是应用中的return 'hello world',结合2中的status、headers得到最终的response data发给客户端
代码已上传到github:python 小程序