import socket from urllib.parse import urlparse from selectors import DefaultSelector, EVENT_READ, EVENT_WRITE def get_url(url): #通过socket请求html url = urlparse(url) host = url.netloc path = url.path if path == "": path = "/" #建立socket连接 client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) client.setblocking(False) try: #如果不try的话 则会有异常,因为链接还没建立好 #BlockingIOError: [WinError 10035] 无法立即完成一个非阻止性套接字操作。 client.connect((host, 80)) #阻塞不会消耗cpu except BlockingIOError as e: pass #不停的询问连接是否建立好, 需要while循环不停的去检查状态 #做计算任务或者再次发起其他的连接请求 while True: try: #发送http请求 #请求行 请求头 请求体 client.send("GET {} HTTP/1.1 Host:{} Connection:close ".format(path, host).encode("utf8")) break except OSError as e: pass data = b"" while True: try: d = client.recv(1024) except BlockingIOError as e: continue if d: data += d else: break data = data.decode("utf8") html_data = data.split(" ")[1] print(html_data) client.close() get_url('http://www.baidu.com')
响应结果:
响应行
响应头
...........
...........
...........
响应体
"C:Program Filespython3.5python.exe" E:/pythoncode/day07/01.py HTTP/1.1 200 OK Accept-Ranges: bytes Cache-Control: no-cache Content-Length: 14615 Content-Type: text/html Date: Thu, 16 Aug 2018 08:52:32 GMT Etag: "5b56b4a8-3917" Last-Modified: Tue, 24 Jul 2018 05:10:00 GMT P3p: CP=" OTI DSP COR IVA OUR IND COM " Pragma: no-cache Server: BWS/1.1 Set-Cookie: BAIDUID=DCD58464E426E409DBF02803181718DB:FG=1; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com Set-Cookie: BIDUPSID=DCD58464E426E409DBF02803181718DB; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com Set-Cookie: PSTM=1534409552; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com Vary: Accept-Encoding X-Ua-Compatible: IE=Edge,chrome=1 Connection: close <!DOCTYPE html><!--STATUS OK--> <html> <head> <meta http-equiv="content-type" content="text/html;charset=utf-8"> <meta http-equiv="X-UA-Compatible" content="IE=Edge"> <link rel="dns-prefetch" href="//s1.bdstatic.com"/> <link rel="dns-prefetch" href="//t1.baidu.com"/> <link rel="dns-prefetch" href="//t2.baidu.com"/> <link rel="dns-prefetch" href="//t3.baidu.com"/> <link rel="dns-prefetch" href="//t10.baidu.com"/> <link rel="dns-prefetch" href="//t11.baidu.com"/> <link rel="dns-prefetch" href="//t12.baidu.com"/> <link rel="dns-prefetch" href="//b1.bdstatic.com"/>