1. Requests模块API详解
Requests中文文档地址:http://docs.python-requests.org/zh_CN/latest/
1.1 Requests模块快速入门
import requests #1.HTTP请求类型 r = requests.get('https://github.com/timeline.json') # get类型 r1 = requests.get(url='http://dict.baidu.com/s', params={'wd': 'python'}) # 带参数的get请求 r2 = requests.post("http://m.ctrip.com/post") # post类型 r3 = requests.put("http://m.ctrip.com/put") # put类型 r4 = requests.delete("http://m.ctrip.com/delete") # delete类型 r5 = requests.head("http://m.ctrip.com/head") # head类型 r6 = requests.options("http://m.ctrip.com/get") # options类型 #2.传递URL参数 url_params = {'key1': 'value1', 'key2': 'value2'} r7 = requests.get("http://httpbin.org/get", params=url_params) #字典传递参数,注意字典里值为 None 的键都不会被添加到 URL 的查询字符串里。 print(r7.url)#输出:http://httpbin.org/get?key1=value1&key2=value2 #3.获取/修改网页编码 print (r1.encoding) #获取网页编码 r1.encoding = 'ISO-8859-1' #修改网页编码 #4.获取响应内容 print(r1.content) # 以字节的方式去显示,中文显示为字符,这个是直接从网络上面抓取的数据,没有经过任何解码。所以是一个bytes类型。其实在硬盘上和在网络上传输的字符串都是bytes类型。 print(r1.text) # 以文本的方式去显示,这个是requests将response.content进行解码的字符串。解码需要指定一个编码方式,requests会根据自己的猜测来判断编码的方式。所以有的时候可能会猜测错误,产生乱码。这时就应该使用response.content.decode('utf-8')指定解码使用的编码方式(这里使用的utf-8)进行手动解码。 #5.获取json格式的响应内容 print(r.json())#Requests中有一个内置的 JSON 解码器,助你处理 JSON 数据 #注意:如果 JSON 解码失败,r.json() 就会抛出一个异常。例如,响应内容是 401 (Unauthorized),尝试访问 r.json() 将会抛出 ValueError: No JSON object could be decoded 异常。 #需要注意的是,成功调用 r.json() 并**不**意味着响应的成功。有的服务器会在失败的响应中包含一个 JSON 对象(比如 HTTP 500 的错误细节)。这种 JSON 会被解码返回。 # 要检查请求是否成功,请使用 r.raise_for_status() 或者检查 r.status_code 是否和你的期望相同。 #6.获取原始响应内容 #在罕见的情况下,你可能想获取来自服务器的原始套接字响应,那么你可以访问 r.raw。 使用raw属性时,确保在初始请求中设置了 stream=True。如下所示: r8 = requests.get('https://api.github.com/events', stream=True) print(r8.raw) print(r8.raw.read(10)) #7.定制请求头 url = 'http://m.ctrip.com' headers = {'User-Agent' : 'Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 4 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19'} r9 = requests.post(url, headers=headers) print(r9.request.headers) #8.复杂post请求 #a)给data传字典参数,类似实现HTML中的form表单形式 payload = {'key1': 'value1', 'key2': 'value2'} r10 = requests.post("http://httpbin.org/post", data=payload) print(r10.text) #输出 # { # ... # "form": { # "key2": "value2", # "key1": "value1" # }, # ... # } #b)给data传元组列表参数,对应于form表单中多个元素使用同一 key的情况 payload = (('key1', 'value1'), ('key1', 'value2')) r11 = requests.post('http://httpbin.org/post', data=payload) print(r11.text) #输出 # { # ... # "form": { # "key1": [ # "value1", # "value2" # ] # }, # ... # } #9.) POST一个Multipart-Encoded的文件 #示例1 url = 'http://httpbin.org/post' files = {'file': open('report.xls', 'rb')} r = requests.post(url, files=files) print(r.text) #示例2:显式地设置文件名,文件类型和请求头 url = 'http://httpbin.org/post' files = {'file': ('report.xls', open('report.xls', 'rb'), 'application/vnd.ms-excel', {'Expires': '0'})} r = requests.post(url, files=files) #10.)状态响应码 r1 = requests.get('http://httpbin.org/get') print(r1.status_code) #11.)响应头 r2 = requests.get('http://m.ctrip.com') #a)查看以一个 Python 字典形式展示的服务器响应头 print (r2.headers) #b)访问响应头字段的两种方式 print (r2.headers['Content-Type']) print (r2.headers.get('content-type')) #12.Cookie #a)读取cookies url = 'https://www.baidu.com/' r3 = requests.get(url) print(r3.cookies) #b)发送cookies到服务器 url = 'http://httpbin.org/cookies' cookies = dict(cookies_are='working') r4 = requests.get(url, cookies=cookies) print(r4.text) #输出 # { # "cookies": { # "cookies_are": "working" # } # } #13.)设置超时时间 #你可以告诉 requests 在经过以 timeout 参数设定的秒数时间之后停止等待响应。基本上所有的生产代码都应该使用这一参数。如果不使用,你的程序可能会永远失去响应: r = requests.get('http://m.ctrip.com', timeout=0.001) #14.)设置访问代理 proxies = { "http": "http://10.10.1.10:3128", "https": "http://10.10.1.100:4444", } r = requests.get('http://m.ctrip.com', proxies=proxies) #如果代理需要用户名和密码,则需要这样: proxies = { "http": "http://user:pass@10.10.1.10:3128/", }
1)get 请求 # a、无参数实例 import requests ret = requests.get('https://github.com/timeline.json') print ret.url print ret.text # b、有参数实例 import requests payload = {'key1': 'value1', 'key2': 'value2'} ret = requests.get("http://httpbin.org/get", params=payload) print ret.url print ret.text 2)Post请求 # a、基本POST实例 import requests payload = {'key1': 'value1', 'key2': 'value2'} ret = requests.post("http://httpbin.org/post", data=payload) print ret.text # b、发送请求头和数据实例 import requests import json url = 'https://api.github.com/some/endpoint' payload = {'some': 'data'} headers = {'content-type': 'application/json'} ret = requests.post(url, data=json.dumps(payload), headers=headers) print ret.text print ret.cookies 3)其他请求 requests.get(url, params=None, **kwargs) requests.post(url, data=None, json=None, **kwargs) requests.put(url, data=None, **kwargs) requests.head(url, **kwargs) requests.delete(url, **kwargs) requests.patch(url, data=None, **kwargs) requests.options(url, **kwargs) # 以上方法均是在此方法的基础上构建 requests.request(method, url, **kwargs) 4)request方法参数详解 def request(method, url, **kwargs): """Constructs and sends a :class:`Request <Request>`. :param method: method for the new :class:`Request` object. :param url: URL for the new :class:`Request` object. :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. :param json: (optional) json data to send in the body of the :class:`Request`. :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': file-tuple}``) for multipart encoding upload. ``file-tuple`` can be a 2-tuple ``('filename', fileobj)``, 3-tuple ``('filename', fileobj, 'content_type')`` or a 4-tuple ``('filename', fileobj, 'content_type', custom_headers)``, where ``'content-type'`` is a string defining the content type of the given file and ``custom_headers`` a dict-like object containing additional headers to add for the file. :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. :param timeout: (optional) How long to wait for the server to send data before giving up, as a float, or a :ref:`(connect timeout, read timeout) <timeouts>` tuple. :type timeout: float or tuple :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed. :type allow_redirects: bool :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. :param verify: (optional) whether the SSL cert will be verified. A CA_BUNDLE path can also be provided. Defaults to ``True``. :param stream: (optional) if ``False``, the response content will be immediately downloaded. :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. :return: :class:`Response <Response>` object :rtype: requests.Response Usage:: >>> import requests >>> req = requests.request('GET', 'http://httpbin.org/get') <Response [200]> """ 参数详解 #1)--param url:请求地址 def param_method_url(): # requests.request(method='get', url='http://127.0.0.1:8000/test/') # requests.request(method='post', url='http://127.0.0.1:8000/test/') pass #2)--param params:在URL地址中传递的参数 def param_param(): # - 可以是字典 # - 可以是字符串 # - 可以是字节(ascii编码以内) # requests.request(method='get', # url= 'http://www.oldboyedu.com', # params={'k1': 'v1', 'k2': 'v2'}) # 实际请求的地址为http://www.oldboyedu.com?k1=v1&k2=v2 # requests.request(method='get', # url='http://127.0.0.1:8000/test/', # params="k1=v1&k2=水电费&k3=v3&k3=vv3") # requests.request(method='get', # url='http://127.0.0.1:8000/test/', # params=bytes("k1=v1&k2=k2&k3=v3&k3=vv3", encoding='utf8')) # 错误 # requests.request(method='get', # url='http://127.0.0.1:8000/test/', # params=bytes("k1=v1&k2=水电费&k3=v3&k3=vv3", encoding='utf8')) pass #3)--param data:在请求体中传递的数据 def param_data(): # 可以是字典 # 可以是字符串 # 可以是字节 # 可以是文件对象 # requests.request(method='POST', # url= 'http://www.oldboyedu.com', #params = {'k1':'v1','k2':'v2'}, # data = {'use':'alex','pwd': '123','x':[11,2,3]}) #此时,content-type为application/x-www-form-urlencoded # requests.request(method='POST', # url='http://127.0.0.1:8000/test/', # data="k1=v1& k2=v2& k3=v3&k3=v4" # ) # requests.request(method='POST', # url='http://127.0.0.1:8000/test/', # data="k1=v1&k2=v2&k3=v3&k3=v4", # headers={'Content-Type': 'application/x-www-form-urlencoded'} # ) # requests.request(method='POST', # url='http://127.0.0.1:8000/test/', # data=open('data_file.py', mode='r', encoding='utf-8'), # 文件内容是:k1=v1;k2=v2;k3=v3;k3=v4 # headers={'Content-Type': 'application/x-www-form-urlencoded'} # ) pass #4)--param json:在请求体里传递的数据 def param_json(): # 将json中对应的数据进行序列化成一个字符串,json.dumps(...) # 然后发送到服务器端的body中,并且Content-Type是 {'Content-Type': 'application/json'} requests.request( method='POST', url= 'http://www.oldboyedu.com', params = {'k1':'v1','k2':'v2'}, json = {'use':'alex','pwd': '123'} ) #即此时请求头: content-type: application/json,请求体:"{'use':'alex','pwd': '123'}",字典中嵌套字典时,只能用此json格式发请求体 #5)--param headers:请求头 def param_headers(): # 发送请求头到服务器端 requests.request( method='POST', url= 'http://www.oldboyedu.com', params = {'k1':'v1','k2':'v2'}, json = {'use':'alex','pwd': '123'}, headers={ 'Referer': 'http://dig.chouti.com/', 'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36" } ) #6)--param cookies def param_cookies(): # 发送Cookie到服务器端 requests.request(method='POST', url='http://127.0.0.1:8000/test/', data={'k1': 'v1', 'k2': 'v2'}, cookies={'cook1': 'value1'}, ) # 也可以使用CookieJar(字典形式就是在此基础上封装) from http.cookiejar import CookieJar from http.cookiejar import Cookie obj = CookieJar() obj.set_cookie(Cookie(version=0, name='c1', value='v1', port=None, domain='', path='/', secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False, port_specified=False, domain_specified=False, domain_initial_dot=False, path_specified=False) ) requests.request(method='POST', url='http://127.0.0.1:8000/test/', data={'k1': 'v1', 'k2': 'v2'}, cookies=obj) #7)--param files:上传文件 def param_files(): # 发送文件 # file_dict = { # 'f1': open('readme', 'rb') # } # requests.request(method='POST', # url='http://127.0.0.1:8000/test/', # files=file_dict) # 发送文件,定制文件名 # file_dict = { # 'f1': ('test.txt', open('readme', 'rb')) # } # requests.request(method='POST', # url='http://127.0.0.1:8000/test/', # files=file_dict) # 发送文件,定制文件名 # file_dict = { # 'f1': ('test.txt', "hahsfaksfa9kasdjflaksdjf") # } # requests.request(method='POST', # url='http://127.0.0.1:8000/test/', # files=file_dict) # 发送文件,定制文件名 # file_dict = { # 'f1': ('test.txt', "hahsfaksfa9kasdjflaksdjf", 'application/text', {'k1': '0'}) # } # requests.request(method='POST', # url='http://127.0.0.1:8000/test/', # files=file_dict) pass #8)--param auth:认证 def param_auth(): from requests.auth import HTTPBasicAuth, HTTPDigestAuth ret = requests.get('https://api.github.com/user', auth=HTTPBasicAuth('wupeiqi', 'sdfasdfasdf')) print(ret.text) # ret = requests.get('http://192.168.1.1', # auth=HTTPBasicAuth('admin', 'admin')) # ret.encoding = 'gbk' # print(ret.text) # ret = requests.get('http://httpbin.org/digest-auth/auth/user/pass', auth=HTTPDigestAuth('user', 'pass')) # print(ret) # #9)--param timeout:响应超时时间 def param_timeout(): # ret = requests.get('http://google.com/', timeout=1) # print(ret) # ret = requests.get('http://google.com/', timeout=(5, 1)) # print(ret) pass #10)--param allow_redirects:是否允许重定向 def param_allow_redirects(): ret = requests.get('http://127.0.0.1:8000/test/', allow_redirects=False) print(ret.text) #11)--param proxies:代理 def param_proxies(): # proxies = { # "http": "61.172.249.96:80", # "https": "http://61.185.219.126:3128", # } # proxies = {'http://10.20.1.128': 'http://10.10.1.10:5323'} # ret = requests.get("http://www.proxy360.cn/Proxy", proxies=proxies) # print(ret.headers) # from requests.auth import HTTPProxyAuth # # proxyDict = { # 'http': '77.75.105.165', # 'https': '77.75.105.165' # } # auth = HTTPProxyAuth('username', 'mypassword') # # r = requests.get("http://www.google.com", proxies=proxyDict, auth=auth) # print(r.text) pass #12)--param stream def param_stream(): ret = requests.get('http://127.0.0.1:8000/test/', stream=True) print(ret.content) ret.close() # from contextlib import closing # with closing(requests.get('http://httpbin.org/get', stream=True)) as r: # # 在此处理响应。 # for i in r.iter_content(): # print(i) #13) --param session:用于保存客户端历史访问信息 def requests_session(): import requests session = requests.Session() ### 1、首先登陆任何页面,获取cookie i1 = session.get(url="http://dig.chouti.com/help/service") ### 2、用户登陆,携带上一次的cookie,后台对cookie中的 gpsd 进行授权 i2 = session.post( url="http://dig.chouti.com/login", data={ 'phone': "8615131255089", 'password': "xxxxxx", 'oneMonth': "" } ) i3 = session.post( url="http://dig.chouti.com/link/vote?linksId=8589623", ) print(i3.text)
import requests respone=requests.get('http://www.jianshu.com') # respone属性 print(respone.text) print(respone.content) print(respone.status_code) print(respone.headers) print(respone.cookies) print(respone.cookies.get_dict()) print(respone.cookies.items()) print(respone.url) print(respone.history) print(respone.encoding) #关闭:response.close() from contextlib import closing with closing(requests.get('xxx',stream=True)) as response: for line in response.iter_content(): pass
>>>>>待续