Requests:HTTP for humans
HTTP请求
Requests 中HTTP请求的6种方法
import requests
r = requests.get("http://httpbin.org/get") # get 访问资源
r = requests.post("http://httpbin.org/post") # post 修改部分资源
r = requests.put("http://httpbin.org/put") # put 按唯一标识修改整个资源
r = requests.delete("http://httpbin.org/delete") # delete 删除指定唯一标识资源
r = requests.head("http://httpbin.org/get") # head 返回资源信息,而不是资源本身
r = requests.options("http://httpbin.org/get") #options 返回资源支持的所有请求方式
GET params
用dict表示参数
import requests
payload = {'page': '1', 'per_page': '10'}
r = requests.get("http://httpbin.org/get", params=payload)
>>>r.url
'http://httpbin.org/get?page=1&per_page=10' #自动编码后的URL
>>>r.status_code)
200 #状态码
>>>r.text #服务器响应返回的原数据
{
"args": {},
"headers": {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
"Host": "httpbin.org",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0",
"X-Amzn-Trace-Id": "Root=1-61069277-63437d5e5d48a6ec102f8b92"
},
"origin": "39.149.82.108",
"url": "http://httpbin.org/get"
}
POST data/json
以表单形式传递
import requests
payload = {'page': 1, 'per_page': 10}
r = requests.post("http://httpbin.org/post", data=payload)
>>>r.text
{
"args": {},
"data": "",
"files": {},
"form": {
"page": "1",
"per_page": "10"
},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Content-Length": "18",
"Content-Type": "application/x-www-form-urlencoded",
"Host": "httpbin.org",
"User-Agent": "python-requests/2.25.0",
"X-Amzn-Trace-Id": "Root=1-61069443-38e30b8e7490c5eb75b60c0f"
},
"json": null,
"origin": "36.229.102.208",
"url": "http://httpbin.org/post"
}
用json形式传递
编码后传递
import json
import requests
payload = {'page': 1, 'per_page': 10}
r = requests.post("http://httpbin.org/post", data=json.dumps(payload))
直接json
import requests
payload = {'page': 1, 'per_page': 10}
r = requests.post("http://httpbin.org/post", json=payload)
这两种做法是等价的
headers
import requests
url = 'http://httpbin.org/post'
payload = {'page': 1, 'per_page': 10}
headers = {'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}# 修改HTTP头部
r = requests.post("http://httpbin.org/post", json=payload, headers=headers)
>>>r.request.headers #查看请求的头部
>>>r.headers #查看服务器返回的头部
HTTP响应
对于响应的状态码,我们用r.status_code访问
对于响应的正文我们有多种方式读取
r.text 普通
读取unicode形式响应
import requests
r = requests.get("https://github.com/timeline.json")
print r.text
print r.encoding
# 输出
{"message":"Hello there, wayfaring stranger. If you’re reading this then you probably didn’t see our blog post a couple of years back announcing that this API would go away: http://git.io/17AROg Fear not, you should be able to get what you need from the shiny new Events API instead.","documentation_url":"https://developer.github.com/v3/activity/events/#list-public-events"}
utf-8
r.json() JSON
读取json形式响应,并解析成python的对象
import requests
r = requests.get("https://github.com/timeline.json")
if r.status_code == 200:
print r.headers.get('content-type')
print r.json()
# 输出
application/json; charset=utf-8
{u'documentation_url': u'https://developer.github.com/v3/activity/events/#list-public-events', u'message': u'Hello there, wayfaring stranger. If youu2019re reading this then you probably didnu2019t see our blog post a couple of years back announcing that this API would go away: http://git.io/17AROg Fear not, you should be able to get what you need from the shiny new Events API instead.'}
r.content 二进制
以字节方式访问响应数据
import requests
url = 'https://github.com/reactjs/redux/blob/master/logo/logo.png?raw=true'
r = requests.get(url)
image_data = r.content # 获取二进制数据
with open('/Users/Ethan/Downloads/redux.png', 'wb') as fout:
fout.write(image_data)
r.raw 原始
获取原始的套字节响应
import requests
url = 'https://github.com/reactjs/redux/blob/master/logo/logo.png?raw=true'
r = requests.get(url, stream=True)
print r.raw
r.raw.read(10)
# 输出
<requests.packages.urllib3.response.HTTPResponse object at 0x1113b0a90>
'x89PNG
x1a
x00x00'
r.history 重定向
r.history 是一个响应列表
>>> import requests
>>> headers = {'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
>>> r = requests.get('https://toutiao.io/k/c32y51', headers=headers)
>>> r.status_code
200
>>> r.url # 发生了重定向,响应对象的 url,跟请求对象不一样
u'http://www.jianshu.com/p/490441391db6?hmsr=toutiao.io&utm_medium=toutiao.io&utm_source=toutiao.io'
>>> r.history
[<Response [302]>]
>>> r.history[0].text
u'<html><body>You are being <a href="http://www.jianshu.com/p/490441391db6?hmsr=toutiao.io&utm_medium=toutiao.io&utm_source=toutiao.io">redirected</a>.</body></html>'
allow_redirects=False 禁止重定向
>>> import requests
>>> headers = {'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
>>> r = requests.get('https://toutiao.io/k/c32y51', headers=headers, allow_redirects=False)
>>> r.url # 禁止重定向,响应对象的 url 跟请求对象一致
u'https://toutiao.io/k/c32y51'
>>> r.history
[]
>>> r.text
u'<html><body>You are being <a href="http://www.jianshu.com/p/490441391db6?hmsr=toutiao.io&utm_medium=toutiao.io&utm_source=toutiao.io">redirected</a>.</body></html>'
Cookie
cookies=cookies发送cookie到服务器
>>> import requests
>>> url = 'http://httpbin.org/cookies'
>>> cookies = dict(key1='value1')
>>> r = requests.get(url, cookies=cookies)
>>> r.text
u'{
"cookies": {
"key1": "value1"
}
}
'
>>> print r.text
{
"cookies": {
"key1": "value1"
}
}
r.cookies['some_key']读取响应cookie
>>> import requests
>>> url = 'http://exmaple.com/some/cookie/setting/url'
>>> r = requests.get(url)
>>> r.cookies['some_key']
'some_value'
requests.Session() 会话对象
夸请求保持cookie
>>> import requests
>>> s = requests.Session()
>>> s.get('http://httpbin.org/cookies/set/sessioncookie/123456789')
<Response [200]>
>>> r = s.get("http://httpbin.org/cookies")
>>> print r.text
{
"cookies": {
"sessioncookie": "123456789"
}
}
为请求方提供缺省数据
import requests
s = requests.Session()
s.auth = ('user', 'pass')
s.headers.update({'x-test': 'true'})
# x-test 和 x-test2 都会被发送
s.get('http://httpbin.org/headers', headers={'x-test2': 'true'})
HTTP代理 proxies=proxies
为任意请求设置HTTP代理
import requests
proxies = {
"http": "http://10.10.1.10:3128",
"https": "http://10.10.1.10:1080",
}
requests.get("http://example.org", proxies=proxies)
通过设置环境变量 HTTP_PROXY=host:port 和 HTTPS_PROXY=host:port 来配置代理
$ export HTTP_PROXY="http://10.10.1.10:3128"
$ export HTTPS_PROXY="http://10.10.1.10:1080"
$ python
>>> import requests
>>> requests.get("http://example.org")
SOCKS代理
需装第三方库pip install requests[socks]
import requests
proxies = {
"http": "socks5://user:pass@host:port",
"https": "socks5://user:pass@host:port",
}
requests.get("http://example.org", proxies=proxies)
身份认证
Basic Auth 基本身份认证
>>> from requests.auth import HTTPBasicAuth
>>> requests.get('https://api.github.com/user', auth=HTTPBasicAuth('user', 'pass'))
或
requests.get('https://api.github.com/user', auth=('user', 'pass'))
OAuth 2 认证
Web API认证方式,配合requests-oauthlib库使用
>>> # Credentials you get from registering a new application
>>> client_id = '<the id you get from github>'
>>> client_secret = '<the secret you get from github>'
>>> # OAuth endpoints given in the GitHub API documentation
>>> authorization_base_url = 'https://github.com/login/oauth/authorize'
>>> token_url = 'https://github.com/login/oauth/access_token'
>>> from requests_oauthlib import OAuth2Session
>>> github = OAuth2Session(client_id)
>>> # Redirect user to GitHub for authorization
>>> authorization_url, state = github.authorization_url(authorization_base_url)
>>> print 'Please go here and authorize,', authorization_url
>>> # Get the authorization verifier code from the callback url
>>> redirect_response = raw_input('Paste the full redirect URL here:')
>>> # Fetch the access token
>>> github.fetch_token(token_url, client_secret=client_secret,
>>> authorization_response=redirect_response)
>>> # Fetch a protected resource, i.e. user profile
>>> r = github.get('https://api.github.com/user')
>>> print r.content