• Python复习笔记(十)Http协议--Web服务器-并发服务器


    1. HTTP协议(超文本传输协议)

    浏览器===>服务器发送的请求格式如下:(浏览器告诉服务器,浏览器的信息)

    GET / HTTP/1.1
    Host: www.baidu.com
    Connection: keep-alive
    Cache-Control: max-age=0
    Upgrade-Insecure-Requests: 1
    User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36
    Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
    Accept-Encoding: gzip, deflate, br
    Accept-Language: zh-CN,zh;q=0.9

    服务器==>浏览器回送的数据格式如下:(告诉浏览器,服务器的版本,按照什么格式解析)

    header:告诉浏览器特殊信息(必须有HTTP/1.1 200 OK)

    HTTP/1.1 200 OK
    Bdpagetype: 2
    Bdqid: 0xb49ba00a00010431
    Cache-Control: private
    Connection: Keep-Alive
    Content-Encoding: gzip
    Content-Type: text/html;charset=utf-8
    Date: Sat, 09 Mar 2019 14:40:59 GMT
    Expires: Sat, 09 Mar 2019 14:40:59 GMT
    Server: BWS/1.1
    Set-Cookie: BDSVRTM=70; path=/
    Set-Cookie: BD_HOME=1; path=/
    Set-Cookie: H_PS_PSSID=28648_1455_28395_21114_28608_28584_28557_28604_28625_28605; path=/; domain=.baidu.com
    Strict-Transport-Security: max-age=172800
    X-Ua-Compatible: IE=Edge,chrome=1
    Transfer-Encoding: chunked
    Cookie: BAIDUID=969EF83E73AFFBF96897E645871A1957:FG=1; BIDUPSID=969EF83E73AFFBF96897E645871A1957; PSTM=1549905544; BD_UPN=12314353; delPer=0; BD_CK_SAM=1; PSINO=1; ___rl__test__cookies=1552141627935; OUTFOX_SEARCH_USER_ID_NCOO=1556761245.5429947; BDRCVFR[QxxZVyx49rf]=I67x6TjHwwYf0; H_WISE_SIDS=125704_114553_129323_106370_128146_128229_120193_123018_129449_118893_118871_118854_118832_118787_107312_129945_129387_129088_129558_117336_129751_117432_128791_128402_129655_128246_124639_129620_129008_128967_129641_129293_128805_129692_129838_129981_129808_127764_129482_129643_129508_124030_130091_110085_129844_123289_128842_127417_128808_129049; FEED_SIDS=231735_0309_22; plus_lsv=393c3756be30db54; BDORZ=AE84CDB3A529C0F8A2B9DCDD1D18B695; plus_cv=1::m:49a3f4a6; Hm_lvt_12423ecbc0e2ca965d84259063d35238=1552141644; SE_LAUNCH=5%3A25869027_0%3A25869027; rsv_i=caa1rmCs0PpQpYzAbKe5ZOe7IPqcdsJjz9yFp5uzkt9iporuXUkXb39N0K1sIreyWXdiYvSq2TEnLzJMu1rSJdPaAoRZgSo; Hm_lpvt_12423ecbc0e2ca965d84259063d35238=1552141679; BDRCVFR[Usf3Hj-5366]=mk3SLVN4HKm; BDUSS=RJTzhiLTA2fkFSRmxiOGZYRVZEbVVMRU1FQmNsbHJDT2xRSHlPT1ZaV2NYS3RjQUFBQUFBJCQAAAAAAAAAAAEAAABEIlRw0LC2uTkwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAJzPg1ycz4NcY2; BD_HOME=1; BDRCVFR[4r8LXJfwh-6]=8QV4RScte5tfjRLnjbdnHRsg17xUvNV; H_PS_645EC=1e9fAcA0iG5RIf%2Bi4FlV0onp3XbZL2oEMPgGRb1L2abD%2BGzuKThL6MgB%2Be%2FwIJ6wYVm0C2fUHAUL; H_PS_PSSID=28648_1455_28395_21114_28608_28584_28557_28604_28625_28605; sug=3; sugstore=1; ORIGIN=2; bdime=0

    body

    <!DOCTYPE html>
    <!--STATUS OK-->

     

    2. Python模拟返回固定页面的http服务器

    import socket
    
    def service_client(new_socket):
        """为这个客户端返回数据"""
        # 1. 接受浏览器发送过来的请求,即Http请求
        # GET / HTTP/1.1
        # .....
        request = new_socket.recv(1024)
        print(request)
    
        # 2. 返回Http格式的数据,给浏览器
        # 2.1 准备发送给浏览器数据---header
        response = "HTTP/1.1 200 OK
    "
        response += "
    "
        # 2.2 准备发送给浏览器的数据---body
        response += "haaaaaaaaaaa"
        new_socket.send(response.encode("utf-8"))
        new_socket.close()
    
    
    def main():
        """用来完成整体的控制"""
        # 1. 创建套接字
        tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    
        # 2. 绑定
        tcp_server_socket.bind(("", 7890))
    
        # 3. 变为监听套接字
        tcp_server_socket.listen(128)
    
        while True:
            # 4. 等待新客户端的链接
            new_socket, client_addr = tcp_server_socket.accept()
    
            # 5. 为这个客户端服务
            service_client(new_socket)
    
        # 关闭监听套接字
        tcp_server_socket.close()
    
    
    if __name__ == '__main__':
        main()

     返回指定html页面

    import socket
    import re
    
    def service_client(new_socket):
        """为这个客户端返回数据"""
        # 1. 接受浏览器发送过来的请求,即Http请求
        # GET / HTTP/1.1
        # .....
        request = new_socket.recv(1024).decode("utf-8")
        # print(request)
    
        request_lines = request.splitlines()
        print(request_lines)
    
        # GET /page.html HTTP/1.1
        ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0])
        if ret:
            file_name = ret.group(1)
            print("*"*50, file_name)
    
        # 2. 返回Http格式
        # 2.1 准备发送给浏览器数据---header
        response = "HTTP/1.1 200 OK
    "
        response += "
    "
        # 2.2 准备发送给浏览器的数据---body
        # response += "haaaaaaaaaaa"
    
        f = open("./templates" + file_name, "rb")
        html_content = f.read()
        f.close()
    
        # 将response header发送给浏览器
        new_socket.send(response.encode("utf-8"))
        new_socket.send(html_content)
    
        # 关闭套接字
        new_socket.close()
    
    
    def main():
        """用来完成整体的控制"""
        # 1. 创建套接字
        tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    
        # 2. 绑定
        # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
        # 下次运行程序时,可以立即执行
        tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        tcp_server_socket.bind(("", 7890))
    
        # 3. 变为监听套接字
        tcp_server_socket.listen(128)
    
        while True:
            # 4. 等待新客户端的链接
            new_socket, client_addr = tcp_server_socket.accept()
    
            # 5. 为这个客户端服务
            service_client(new_socket)
    
        # 关闭监听套接字
        tcp_server_socket.close()
    
    
    if __name__ == '__main__':
        main()

    3. 多进程/线程实现http服务器

    3.1 多进程: 需要在主进程调用new_socket.close()

    import socket
    import re
    import multiprocessing
    
    def service_client(new_socket):
        """为这个客户端返回数据"""
        # 1. 接受浏览器发送过来的请求,即Http请求
        # GET / HTTP/1.1
        # .....
        request = new_socket.recv(1024).decode("utf-8")
        # print(request)
        request_lines = request.splitlines()
        print(request_lines)
        # GET /page.html HTTP/1.1
        ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0])
        if ret:
            file_name = ret.group(1)
            print("*"*50, file_name)
        # 2. 返回Http格式
        # 2.1 准备发送给浏览器数据---header
        response = "HTTP/1.1 200 OK
    "
        response += "
    "
        # 2.2 准备发送给浏览器的数据---body
        # response += "haaaaaaaaaaa"
        f = open("./templates" + file_name, "rb")
        html_content = f.read()
        f.close()
        # 将response header发送给浏览器
        new_socket.send(response.encode("utf-8"))
        new_socket.send(html_content)
        # 关闭套接字
        new_socket.close()
    
    def main():
        """用来完成整体的控制"""
        # 1. 创建套接字
        tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    
        # 2. 绑定
        # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
        # 下次运行程序时,可以立即执行
        tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        tcp_server_socket.bind(("", 7890))
    
        # 3. 变为监听套接字
        tcp_server_socket.listen(128)
        while True:
            # 4. 等待新客户端的链接
            new_socket, client_addr = tcp_server_socket.accept()
    
            # 5. 为这个客户端服务
            p = multiprocessing.Process(target=service_client, args=(new_socket, ))
            p.start()
            new_socket.close()    
    
        # 关闭监听套接字
        tcp_server_socket.close()
    if __name__ == '__main__':
        main()
    主进程里的: new_socket.close()作用
    # fd: 文件描述符, 就是一个数字, 对应一个特殊的文件, 例如网络接口
    # 到子进程时候, new_socket会被复制一份, 所以要在主进程里 调用 new_socket.close()
    # 主进程不调用close时, 浏览器会一直的等待, 四次挥手就不会开始!

    3.2 多线程: 无需在主线程调用new_socket.close()--否则报错

    import socket
    import re
    import threading
    
    def service_client(new_socket):
        """为这个客户端返回数据"""
        # 1. 接受浏览器发送过来的请求,即Http请求
        # GET / HTTP/1.1
        # .....
        request = new_socket.recv(1024).decode("utf-8")
        # print(request)
        request_lines = request.splitlines()
        print("request_lines:", "*"*50, request_lines)
    
        # GET /page.html HTTP/1.1
        ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0])
        if ret:
            file_name = ret.group(1)
            print("*"*50, file_name)
        # 2. 返回Http格式
        # 2.1 准备发送给浏览器数据---header
        response = "HTTP/1.1 200 OK
    "
        response += "
    "
        # 2.2 准备发送给浏览器的数据---body
        # response += "haaaaaaaaaaa"
        f = open("./templates" + file_name, "rb")
        html_content = f.read()
        f.close()
        # 将response header发送给浏览器
        new_socket.send(response.encode("utf-8"))
        new_socket.send(html_content)
        # 关闭套接字
        new_socket.close()
    
    def main():
        """用来完成整体的控制"""
        # 1. 创建套接字
        tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    
        # 2. 绑定
        # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
        # 下次运行程序时,可以立即执行
        tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        tcp_server_socket.bind(("", 7890))
    
        # 3. 变为监听套接字
        tcp_server_socket.listen(128)
        while True:
            # 4. 等待新客户端的链接
            new_socket, client_addr = tcp_server_socket.accept()    
                                                                    
            # 5. 为这个客户端服务                                      
            p = threading.Thread(target=service_client, args=(new_socket, ))
            p.start()
    
            # new_socket.close()
    
        # 关闭监听套接字
        tcp_server_socket.close()
        
    if __name__ == '__main__':
        main()

     区别: 线程比线程耗费的资源小, 以下用协程实现, 会更方便

    3.3 协程: 使用gevent实现http服务器

    效率最高

    import socket
    import re
    import gevent
    from gevent import monkey
    
    monkey.patch_all()
    
    def service_client(new_socket):
        """为这个客户端返回数据"""
        # 1. 接受浏览器发送过来的请求,即Http请求
        # GET / HTTP/1.1
        # .....
        request = new_socket.recv(1024).decode("utf-8")
        # print(request)
        request_lines = request.splitlines()
        print("request_lines:", "*"*50, request_lines)
    
        # GET /page.html HTTP/1.1
        ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0])
        if ret:
            file_name = ret.group(1)
            print("*"*50, file_name)
        # 2. 返回Http格式
        # 2.1 准备发送给浏览器数据---header
        response = "HTTP/1.1 200 OK
    "
        response += "
    "
        # 2.2 准备发送给浏览器的数据---body
        # response += "haaaaaaaaaaa"
        f = open("./templates" + file_name, "rb")
        html_content = f.read()
        f.close()
        # 将response header发送给浏览器
        new_socket.send(response.encode("utf-8"))
        new_socket.send(html_content)
        # 关闭套接字
        new_socket.close()
    
    def main():
        """用来完成整体的控制"""
        # 1. 创建套接字
        tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    
        # 2. 绑定
        # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
        # 下次运行程序时,可以立即执行
        tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        tcp_server_socket.bind(("", 7890))
    
        # 3. 变为监听套接字
        tcp_server_socket.listen(128)
        while True:
            # 4. 等待新客户端的链接
            new_socket, client_addr = tcp_server_socket.accept()    
                                                                   
            # 5. 为这个客户端服务                                      
            gevent.spawn(service_client, new_socket)
    
            # new_socket.close()
    
        # 关闭监听套接字
        tcp_server_socket.close()
        
    if __name__ == '__main__':
        main()

    4. Web静态服务器--单进程/线程/非堵塞模式

    4.1 长连接和短连接

    • HTTP/1.1 长连接:  三次握手一次, 不断开的情况下, 通过一个Socket, 可以连续获取数据
    • HTTP/1.0 短连接

    短连接

    import socket
    import re
    import gevent
    from gevent import monkey
    import time
    
    client_socker_list = list()
    
    def main():
        """用来完成整体的控制"""
        # 1. 创建套接字
        tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    
        # 2. 绑定
        # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
        # 下次运行程序时,可以立即执行
        tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        tcp_server_socket.bind(("", 7890))
    
        # 3. 变为监听套接字
        tcp_server_socket.listen(128)
            
        tcp_server_socket.setblocking(False)    # 设置套接字为不堵塞
    
        while True:
            try:
                new_socket, new_addr = tcp_server_socket.accept()
            except Exception as ret:
                print("没有新的客户端到来")
            else:
                print("只要没产生一次, 那么也就意味着 来了一个新的客户端")
                new_socket.setblocking(False)
                client_socker_list.append(new_socket)
                
            for client_socket in client_socker_list:
                try:
                    recv_data = new_socket.recv(1024)
                except Exception as ret:
                    print(ret)
                    print("这个客户端没有发送过来数据")
                else:
                    # 对方发送过来数据
                    if recv_data:
                        print("客户端发送过来了数据")
                    else:
                        # 对方调用close 导致了 recv返回
                        client_socker_list.remove(client_socket)
                        client_socket.close()
                        print("客户端已经关闭")
    
            time.sleep(1)                
                
        # 关闭监听套接字
        tcp_server_socket.close()
            time.sleep(1)                
                
        # 关闭监听套接字
        tcp_server_socket.close()
            
    if __name__ == '__main__':
        main()
    • 核心: 用多线程/进程原因, 这个套接字必定堵塞,  堵塞到数据收到为止
    • 只要设置为非堵塞, 则可以实现 单进程单线程单任务, 还能做到多个客户端一起服务.

     4.2 长连接来实现单进程/单线程--非堵塞模式(Content-Length)

    上述实现都是基于短连接, 请求一次之后就断开连接了

    import socket
    import re
    import gevent
    from gevent import monkey
    import time
    
    client_socker_list = list()
    
    def service_client(new_socket, request):
        """为这个客户端返回数据"""
        # 1. 接受浏览器发送过来的请求,即Http请求
        # GET / HTTP/1.1
        # .....
        # request = new_socket.recv(1024).decode("utf-8")
        # print(request)
        request_lines = request.splitlines()
        print("request_lines:", "*"*50, request_lines)
    
        # GET /page.html HTTP/1.1
        ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0])
        if ret:
            file_name = ret.group(1)
            print("*"*50, file_name)
    
    
        # 2. 返回Http格式
        try:
            f = open("./templates" + file_name, "rb")
        except:
            response = "HTTP/1.1 404 NOT FOUND
    "
            response += "
    "
            response += "-----------File note found----------"
            new_socket.send(response.encode("utf-8"))
        else:
            html_content = f.read()
            f.close()
            
            # 2.1 准备发送给浏览器数据---header
            response_body = html_content
            
            response_header = "HTTP/1.1 200 OK
    "
            response_header += "Content-Length:%d
    " % len(response_body)  # Content-Length:len-->浏览器此时不需要调用close会自动发起新请求
            response_header += "
    "
    
            # 此时都是二进制字符串
            response = response_header.encode("utf-8") + response_body
    
            new_socket.send(response)
            # response body发送给 浏览器
            new_socket.send(html_content)
        
    
    
    def main():
        """用来完成整体的控制"""
        # 1. 创建套接字
        tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    
        # 2. 绑定
        # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
        # 下次运行程序时,可以立即执行
        tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        tcp_server_socket.bind(("", 7890))
    
        # 3. 变为监听套接字
        tcp_server_socket.listen(128)
        
        tcp_server_socket.setblocking(False)    # 设置套接字为不堵塞
    
        while True:
            try:
                new_socket, new_addr = tcp_server_socket.accept()
            except Exception as ret:
                print("没有新的客户端到来")
            else:
                print("只要没产生一次, 那么也就意味着 来了一个新的客户端")
                new_socket.setblocking(False)
                client_socker_list.append(new_socket)
            
            for client_socket in client_socker_list:
                try:
                    recv_data = new_socket.recv(1024).decode("utf-8")
                except Exception as ret:
                    print(ret)
                    print("这个客户端没有发送过来数据")
                else:
                    # 对方发送过来数据
                    if recv_data:
                        print("客户端发送过来了数据")
                        service_client(client_socket, recv_data)
                    else:
                        client_socket.close()
                        client_socker_list.remove(client_socket)
                        print("客户端已经关闭")
            
        # 关闭监听套接字
        tcp_server_socket.close()
        
    if __name__ == '__main__':
        main()

     5. Web静态服务器--epoll

    5.1 IO多路复用

    select/epoll的好处: 在于单个process就可以同时处理多个网络连接的IO

    它的基本原理就是 select, poll, epoll 这个function会不断的轮询所负责的所有Socket, 当某个Socket有数据到达了, 就通知用户进程

    5.2 epoll简单模型

    1. 减少了复制的过程

    2. 以事件通知的方式(高效率, 注: 轮询的方式效率很低)

    3. 有一个特殊的内存, 是应用程序和Kernel共享的, 在这个内存里要添加的,监听的,判断到来的套接字对应的文件描述符, 检测时不是轮询而是事件通知。

    4. 使用了内存映射技术(mmap)技术

    5. 采用基于事件的就绪通知方式

    5.3 epoll版的http服务器

    #!/bin/python3
    # -*- encoding=utf-8 -*-
    
    import socket
    import re
    import time
    import select
    
    
    
    def service_client(new_socket, request):
        """为这个客户端返回数据"""
        # 1. 接受浏览器发送过来的请求,即Http请求
        # GET / HTTP/1.1
        # .....
        # request = new_socket.recv(1024).decode("utf-8")
        # print(request)
        request_lines = request.splitlines()
        print("request_lines:", "*"*50, request_lines)
    
        # GET /page.html HTTP/1.1
        ret = re.match(r"[^/]+(/[^ ]*)", request_lines[0])
        if ret:
            file_name = ret.group(1)
            print("*"*50, file_name)
    
    
        # 2. 返回Http格式
        try:
            f = open("./templates" + file_name, "rb")
        except:
            response = "HTTP/1.1 404 NOT FOUND
    "
            response += "
    "
            response += "-----------File note found----------"
            new_socket.send(response.encode("utf-8"))
        else:
            html_content = f.read()
            f.close()
            
            # 2.1 准备发送给浏览器数据---header
            response_body = html_content
            
            response_header = "HTTP/1.1 200 OK
    "
            response_header += "Content-Length:%d
    " % len(response_body)  # Content-Length:len-->浏览器此时不需要调用close会自动发起新请求
            response_header += "
    "
    
            # 此时都是二进制字符串
            response = response_header.encode("utf-8") + response_body
    
            new_socket.send(response)
            # response body发送给 浏览器
            new_socket.send(html_content)
        
    
    
    def main():
        """用来完成整体的控制"""
        # 1. 创建套接字
        tcp_server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    
        # 2. 绑定
        # 设置当前服务器先close 即服务器4次挥手后资源能够立即是否,这样就保证了。
        # 下次运行程序时,可以立即执行
        tcp_server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        tcp_server_socket.bind(("", 7890))
    
        # 3. 变为监听套接字
        tcp_server_socket.listen(128)
        
        tcp_server_socket.setblocking(False)    # 设置套接字为不堵塞
    
        # 创建一个epoll对象
        epl = select.epoll()
    
        # 将监听套接字对应的fd(文件描述符)注册到epoll中
        epl.register(tcp_server_socket.fileno(), select.EPOLLIN) # EPOLLIN:表示检测是否有输入
    
        # {fd:socket, fd2:socket2}
        fd_event_dict = dict()
        while True:
            # 返回值是个列表        
            fd_event_list = epl.poll()  # poll默认会堵塞, 直到 os检测到数据到来, 通过事件通知方式 告诉这个程序, 此时才会解堵塞
    
            # [(fd, event), (套接字对应的文件描述符, 这个文件描述符到底是什么事件 例如 可以调用recv接收等)]
            for fd, event in fd_event_list:
                # 4. 等待新客户端的链接
                if fd == tcp_server_socket.fileno():
                    new_socket, client_addr = tcp_server_socket.accept()
                    epl.register(new_socket.fileno(), select.EPOLLIN)
                    fd_event_dict[new_socket.fileno()] = new_socket   # fd:socket
                elif event == select.EPOLLIN:
                    # 判断已经链接的客户端是否有数据发送过来
                    recv_data = fd_event_dict[fd].recv(1024).decode("utf-8")
                    if recv_data:
                        service_client(fd_event_dict[fd], recv_data)
                    else:
                        fd_event_dict[fd].close()
                        epl.unregister(fd)
                        del fd_event_dict[fd]
                        
        # 关闭监听套接字
        tcp_server_socket.close()
        
    if __name__ == '__main__':
        main()

  • 相关阅读:
    在地址栏里输入一个地址回车会发生哪些事情
    ROS librviz库
    jumpserver运行源码
    django源码剖析(steup、runserver、生命周期)
    QT在linux下编译出现Unable to create a debugging engine错误的解决
    node 服务启动
    查看端口占用
    自定义reducer,pipe,compose函数 码农
    talk 码农
    实现简版redux和中件间 码农
  • 原文地址:https://www.cnblogs.com/douzujun/p/10503602.html
Copyright © 2020-2023  润新知