简要的分析一下一个GET方法的正常请求,到生成一个正常的responce响应信息的过程。以python中标准模块BaseHTTPServer模块为例,以下是代码:
from BaseHTTPServer import BaseHTTPRequestHandler
import urlparse
class GetHandler(BaseHTTPRequestHandler):
def do_GET(self):
parsed_path = urlparse.urlparse(self.path)
message_parts = [
'CLIENT VALUES:',
'client_address=%s (%s)' % (self.client_address,
self.address_string()),
'command=%s' % self.command,
'path=%s' % self.path,
'real path=%s' % parsed_path.path,
'query=%s' % parsed_path.query,
'request_version=%s' % self.request_version,
'',
'SERVER VALUES:',
'server_version=%s' % self.server_version,
'sys_version=%s' % self.sys_version,
'protocol_version=%s' % self.protocol_version,
'',
'HEADERS RECEIVED:',
]
for name, value in sorted(self.headers.items()):
message_parts.append('%s=%s' % (name, value.rstrip()))
message_parts.append('')
message = '
'.join(message_parts)
self.send_response(200)
self.end_headers()
self.wfile.write(message)
return
if __name__ == '__main__':
from BaseHTTPServer import HTTPServer
server = HTTPServer(('localhost', 8080), GetHandler)
print 'Starting server, use <Ctrl-C> to stop'
server.serve_forever()
程序运行后,首先建立一个HTTPServer
服务器,将服务器地址和请求处理的类绑定到服务器,这里的初始化绑定方式代码如下:
class BaseServer:
timeout = None
def __init__(self, server_address, RequestHandlerClass):
self.server_address = server_address
self.RequestHandlerClass = RequestHandlerClass
self.__is_shut_down = threading.Event()
self.__shutdown_request = False
class TCPServer(BaseServer):
"""省略部分代码"""
def __init__(self, server_address, RequestHandlerClass, bind_and_activate=True):
BaseServer.__init__(self, server_address, RequestHandlerClass)
self.socket = socket.socket(self.address_family,
self.socket_type)
上述类的继承关系是:BaseServer(SocketServer模块)---->TCPServer(SocketServer模块)---->HTTPServer(BaseHTTPServer模块),实例化时,TCPServer调用父类的__init__方法,完成地址的和类的绑定。
接着服务器对象调用serve.forever()方法,等待服务端请求。当服务端请求进来后,该方法通过调用线程threading
模块处理请求, 此线程调用_handle_request_noblock
方法,此方法调用get_request
获得一个socket对象赋值给request和一个客户端地址client_address。
def serve_forever(self, poll_interval=0.5):
try:
while not self.__shutdown_request:
r, w, e = _eintr_retry(select.select, [self], [], [],
poll_interval)
if self in r:
self._handle_request_noblock()
def _handle_request_noblock(self):
try:
request, client_address = self.get_request() #get_request方法是TCPServer类的方法
except socket.error:
return
if self.verify_request(request, client_address):
try:
self.process_request(request, client_address)
except:
self.handle_error(request, client_address) #对于错误的响应方法,我们不去讨论
self.shutdown_request(request)
def get_request(self):
'''TCPServer类__init__方法创建了一个基于internet和TCp协议的socket对象'''
return self.socket.accept() #获取请求的socket对象和地址,这是线程级的真正接收request的方法
调用get_request
返回值后,接着调用verify_request
验证request和client_address,源码中是return True
,所以总是会通过。通过后,调用process_request
,process_request
方法调用finish_request
,finish_request
实例化RequestHandlerClass
也就是初始化时传进来的类。
def process_request(self, request, client_address):
self.finish_request(request, client_address) #调用finish_request
self.shutdown_request(request)
def finish_request(self, request, client_address):
self.RequestHandlerClass(request, client_address, self)
将请求传给RequestHandler
后,进入对请求的处理和响应阶段。
RequestHandlerClass
实例化:
class BaseRequestHandler:
def __init__(self, request, client_address, server): # request传给类对象之后,在这里初始化。
self.request = request
self.client_address = client_address
self.server = server
self.setup() #调用setup
try:
self.handle() #调用handle()
finally:
self.finish() #调用finish
初始化首先调用setup
方法:
def setup(self):
self.connection = self.request
if self.timeout is not None:
self.connection.settimeout(self.timeout)
if self.disable_nagle_algorithm:
self.connection.setsockopt(socket.IPPROTO_TCP,
socket.TCP_NODELAY, True)
#调用makefile方法,返回一个与socket相对应的文件对象赋值给相应的属性, 后面的代码raw_requestline要用到.
self.rfile = self.connection.makefile('rb', self.rbufsize)
self.wfile = self.connection.makefile('wb', self.wbufsize)
setup
调用完后,接着调用handle
方法,此方法调用handle_one_request
方法:
'''属于BaseHTTPServer类'''
def handle(self):
self.close_connection = 1
self.handle_one_request()
while not self.close_connection:
self.handle_one_request()
def handle_one_request(self):
try:
self.raw_requestline = self.rfile.readline(65537) #setup里面的rfile.readline
if len(self.raw_requestline) > 65536:
self.requestline = ''
self.request_version = ''
self.command = ''
self.send_error(414)
return #不正常的响应,不讨论
if not self.raw_requestline:
self.close_connection = 1
return
if not self.parse_request(): #调用parse_request方法
# An error code has been sent, just exit
return
mname = 'do_' + self.command
if not hasattr(self, mname):
self.send_error(501, "Unsupported method (%r)" % self.command)
return
method = getattr(self, mname)
method() #根据返回的command值,调用method,这里是调用do_GET
self.wfile.flush() #actually send the response if not already done.
except socket.timeout, e:
#a read or a write timed out. Discard this connection
self.log_error("Request timed out: %r", e)
self.close_connection = 1
return
上述handle_one_request
方法中,首先调用parse_request
方法,根据该方法的返回值,调用合适的command。首先分析parse_request
方法:
def parse_request(self):
self.command = None # set in case of error on the first line
self.request_version = version = self.default_request_version
self.close_connection = 1
requestline = self.raw_requestline `parse_request` #raw_requestline从self.raw_requestline = self.rfile.readline(65537)获得
requestline = requestline.rstrip('
')
self.requestline = requestline
words = requestline.split()
if len(words) == 3:
command, path, version = words #command path version赋值,在GET中首页中大部分都是‘GET’,‘/’,‘HTTP/1.0'
'''省略部分代码'''
self.command, self.path, self.request_version = command, path, version #path command request_version,赋值成类的属性
# Examine the headers and look for a Connection `RequestHandlerClass`实例化directive
self.headers = self.MessageClass(self.rfile, 0) #self.headers返回的信息
'''省略代码'''
return True #正常返回,即调用command
当parse_request
正常返回后,此时调用command,这里就是在GetHandler
类中定义的do_GET
方法。
def do_GET(self):
parsed_path = urlparse.urlparse(self.path) #解析path信息
message_parts = [
'CLIENT VALUES:',
'client_address=%s (%s)' % (self.client_address,
self.address_string()),
'command=%s' % self.command,
'path=%s' % self.path,
'real path=%s' % parsed_path.path,
'query=%s' % parsed_path.query,
'request_version=%s' % self.request_version,
'',
'SERVER VALUES:',
'server_version=%s' % self.server_version,
'sys_version=%s' % self.sys_version,
'protocol_version=%s' % self.protocol_version,
'',
'HEADERS RECEIVED:',
]
for name, value in sorted(self.headers.items()): #迭代在parse_request中得到的头信息。
message_parts.append('%s=%s' % (name, value.rstrip()))
message_parts.append('')
message = '
'.join(message_parts)
self.send_response(200) #返回响应信息,包含请求中的信息和自己生成的响应头
self.end_headers()
self.wfile.write(message)
return
至此整个请求响应过程完毕。在RequestHandlerClass
实例化时首先是调用self.handle()
,最终生成响应返回,最后调用self.finish()
方法,结束当前线程。
上面粗线条的勾勒了一下整个请求响应过程,整个过程大致是请求进来,经过BaseServer-->TCPServer-->HTTPServer-->BaseRequestHandler-->StreamRequestHandler-->BaseHTTPRequestHandler-->自定义APP,完成响应。
主要参考:
https://pymotw.com/2/BaseHTTPServer/index.html#module-BaseHTTPServer