requests库
# 1.记得安装 第三方 模块 requests # pip install requests import requests url = 'http://www.baidu.com' response = requests.get(url) # content属性 返回的类型 是bytes data = response.content.decode('utf-8') # text 属性 返回的类型 是文本str data = response.text print(type(data))
带header
# 1.记得安装 第三方 模块 requests # pip install requests import requests class RequestSpider(object): def __init__(self): url = 'https://www.baidu.com' headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36' } self.response = requests.get(url, headers=headers) def run(self): data = self.response.content # 1.获取请求头 request_headers = self.response.request.headers # 2.获取相应头 coderesponse_headers = self.response.headers # 3.响应状态码 code = self.response.status_code # 4. 请求的cookie request_cookie = self.response.request._cookies print(request_cookie) # 5. 响应的cookie response_cookie = self.response.cookies print(response_cookie) RequestSpider().run()
参数 自动转译
# https://www.baidu.com/s?wd=%E7%BE%8E%E5%A5%B3&rsv_spt=1&rsv_iqid=0xefb8b43600013949&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_enter=1&oq=%25E5%25A4%25B4%25E6%259D%25A1&rsv_t=6e3aSjYtw0WgEg7MAIuUlOc3D5lwFBJUVw3KsdkhkWYhZWcNMn9kLBO12GflHlOeUHxx&inputT=506&rsv_pq=81d8f9470001b348&rsv_sug3=19&rsv_sug1=16&rsv_sug7=100&bs=%E5%A4%B4%E6%9D%A1 import requests # 参数 自动转译 # url = 'https://www.baidu.com/s?wd=美女' url = 'https://www.baidu.com/s' params = { 'wd':"美女" } headers = { 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36' } response = requests.get(url,headers=headers, params=params) data = response.content.decode() with open('baidu.html', 'w') as f: f.write(data) # 发送post 和添加参数 requests.post(url,data=(参数{}),json=(参数))
返回json
# https://www.baidu.com/s?wd=%E7%BE%8E%E5%A5%B3&rsv_spt=1&rsv_iqid=0xefb8b43600013949&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_enter=1&oq=%25E5%25A4%25B4%25E6%259D%25A1&rsv_t=6e3aSjYtw0WgEg7MAIuUlOc3D5lwFBJUVw3KsdkhkWYhZWcNMn9kLBO12GflHlOeUHxx&inputT=506&rsv_pq=81d8f9470001b348&rsv_sug3=19&rsv_sug1=16&rsv_sug7=100&bs=%E5%A4%B4%E6%9D%A1 import requests import json url = 'https://api.github.com/user' headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36' } # 这个 网址 返回的内容不是html 而是标准的json response = requests.get(url, headers=headers) # str # data = response.content.decode() # # str-- dict # data_dict = json.loads(data) # json() 自动将json字符串 转换成Python dict list data = response.json() print(data['message'])