• Python用法速查@HTTP


    Requests:HTTP for humans

    HTTP请求

    Requests 中HTTP请求的6种方法

    import requests
    
    r = requests.get("http://httpbin.org/get") # get 访问资源
    r = requests.post("http://httpbin.org/post") # post 修改部分资源
    r = requests.put("http://httpbin.org/put") # put 按唯一标识修改整个资源
    r = requests.delete("http://httpbin.org/delete") # delete 删除指定唯一标识资源
    r = requests.head("http://httpbin.org/get") # head 返回资源信息,而不是资源本身
    r = requests.options("http://httpbin.org/get") #options 返回资源支持的所有请求方式
    
    

    GET params

    用dict表示参数

    import requests
    
    payload = {'page': '1', 'per_page': '10'}
    r = requests.get("http://httpbin.org/get", params=payload)
    
    >>>r.url 
    'http://httpbin.org/get?page=1&per_page=10' #自动编码后的URL 
    >>>r.status_code)
    200  #状态码 
    >>>r.text #服务器响应返回的原数据
    {
      "args": {}, 
      "headers": {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", 
        "Accept-Encoding": "gzip, deflate", 
        "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2", 
        "Host": "httpbin.org", 
        "Upgrade-Insecure-Requests": "1", 
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0", 
        "X-Amzn-Trace-Id": "Root=1-61069277-63437d5e5d48a6ec102f8b92"
      }, 
      "origin": "39.149.82.108", 
      "url": "http://httpbin.org/get"
    }
    
    

    POST data/json

    以表单形式传递

    import requests
    
    payload = {'page': 1, 'per_page': 10}
    r = requests.post("http://httpbin.org/post", data=payload)
    
    >>>r.text
    {
      "args": {}, 
      "data": "", 
      "files": {}, 
      "form": {
        "page": "1", 
        "per_page": "10"
      }, 
      "headers": {
        "Accept": "*/*", 
        "Accept-Encoding": "gzip, deflate", 
        "Content-Length": "18", 
        "Content-Type": "application/x-www-form-urlencoded", 
        "Host": "httpbin.org", 
        "User-Agent": "python-requests/2.25.0", 
        "X-Amzn-Trace-Id": "Root=1-61069443-38e30b8e7490c5eb75b60c0f"
      }, 
      "json": null, 
      "origin": "36.229.102.208", 
      "url": "http://httpbin.org/post"
    }
    

    用json形式传递
    编码后传递

    import json
    import requests
    
    payload = {'page': 1, 'per_page': 10}
    r = requests.post("http://httpbin.org/post", data=json.dumps(payload))
    

    直接json

    import requests
    
    payload = {'page': 1, 'per_page': 10}
    r = requests.post("http://httpbin.org/post", json=payload)
    
    

    这两种做法是等价的

    headers

    import requests
    
    url = 'http://httpbin.org/post'
    payload = {'page': 1, 'per_page': 10}
    headers = {'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}# 修改HTTP头部
    
    r = requests.post("http://httpbin.org/post", json=payload, headers=headers)
    
    >>>r.request.headers #查看请求的头部
    >>>r.headers #查看服务器返回的头部
    

    HTTP响应

    对于响应的状态码,我们用r.status_code访问
    对于响应的正文我们有多种方式读取

    r.text 普通

    读取unicode形式响应

    import requests
    
    r = requests.get("https://github.com/timeline.json")
    print r.text
    print r.encoding
    
    # 输出
    {"message":"Hello there, wayfaring stranger. If you’re reading this then you probably didn’t see our blog post a couple of years back announcing that this API would go away: http://git.io/17AROg Fear not, you should be able to get what you need from the shiny new Events API instead.","documentation_url":"https://developer.github.com/v3/activity/events/#list-public-events"}
    utf-8
    
    

    r.json() JSON

    读取json形式响应,并解析成python的对象

    import requests
    
    r = requests.get("https://github.com/timeline.json")
    
    if r.status_code == 200:
        print r.headers.get('content-type')
        print r.json()
        
    # 输出
    application/json; charset=utf-8
    {u'documentation_url': u'https://developer.github.com/v3/activity/events/#list-public-events', u'message': u'Hello there, wayfaring stranger. If youu2019re reading this then you probably didnu2019t see our blog post a couple of years back announcing that this API would go away: http://git.io/17AROg Fear not, you should be able to get what you need from the shiny new Events API instead.'}
    
    

    r.content 二进制

    以字节方式访问响应数据

    import requests
    
    url = 'https://github.com/reactjs/redux/blob/master/logo/logo.png?raw=true'
    r = requests.get(url)
    image_data = r.content   # 获取二进制数据
    
    with open('/Users/Ethan/Downloads/redux.png', 'wb') as fout:
        fout.write(image_data)
    
    

    r.raw 原始

    获取原始的套字节响应

    import requests
    
    url = 'https://github.com/reactjs/redux/blob/master/logo/logo.png?raw=true'
    r = requests.get(url, stream=True)
    print r.raw
    r.raw.read(10)
    
    # 输出
    <requests.packages.urllib3.response.HTTPResponse object at 0x1113b0a90>
    'x89PNG
    x1a
    x00x00'
    
    

    r.history 重定向

    r.history 是一个响应列表

    >>> import requests
    
    >>> headers = {'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
    >>> r = requests.get('https://toutiao.io/k/c32y51', headers=headers)
    
    >>> r.status_code
    200
    
    >>> r.url   # 发生了重定向,响应对象的 url,跟请求对象不一样
    u'http://www.jianshu.com/p/490441391db6?hmsr=toutiao.io&utm_medium=toutiao.io&utm_source=toutiao.io'
    
    >>> r.history
    [<Response [302]>]
    
    >>> r.history[0].text
    u'<html><body>You are being <a href="http://www.jianshu.com/p/490441391db6?hmsr=toutiao.io&amp;utm_medium=toutiao.io&amp;utm_source=toutiao.io">redirected</a>.</body></html>'
    
    

    allow_redirects=False 禁止重定向

    >>> import requests
    
    >>> headers = {'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
    >>> r = requests.get('https://toutiao.io/k/c32y51', headers=headers, allow_redirects=False)
    >>> r.url    # 禁止重定向,响应对象的 url 跟请求对象一致
    u'https://toutiao.io/k/c32y51'
    >>> r.history
    []
    >>> r.text
    u'<html><body>You are being <a href="http://www.jianshu.com/p/490441391db6?hmsr=toutiao.io&amp;utm_medium=toutiao.io&amp;utm_source=toutiao.io">redirected</a>.</body></html>'
    
    

    Cookie

    cookies=cookies发送cookie到服务器

    >>> import requests
    
    >>> url = 'http://httpbin.org/cookies'
    >>> cookies = dict(key1='value1')
    
    >>> r = requests.get(url, cookies=cookies)
    >>> r.text
    u'{
      "cookies": {
        "key1": "value1"
      }
    }
    '
    >>> print r.text
    {
      "cookies": {
        "key1": "value1"
      }
    }
    
    
    

    r.cookies['some_key']读取响应cookie

    >>> import requests
    
    >>> url = 'http://exmaple.com/some/cookie/setting/url'
    >>> r = requests.get(url)
    
    >>> r.cookies['some_key']
    'some_value'
    

    requests.Session() 会话对象

    夸请求保持cookie

    >>> import requests
    >>> s = requests.Session()
    >>> s.get('http://httpbin.org/cookies/set/sessioncookie/123456789')
    <Response [200]>
    >>> r = s.get("http://httpbin.org/cookies")
    >>> print r.text
    {
      "cookies": {
        "sessioncookie": "123456789"
      }
    }
    
    

    为请求方提供缺省数据

    import requests
    
    s = requests.Session()
    s.auth = ('user', 'pass')
    s.headers.update({'x-test': 'true'})
    
    # x-test 和 x-test2 都会被发送
    s.get('http://httpbin.org/headers', headers={'x-test2': 'true'})
    
    

    HTTP代理 proxies=proxies

    为任意请求设置HTTP代理

    import requests
    
    proxies = {
      "http": "http://10.10.1.10:3128",
      "https": "http://10.10.1.10:1080",
    }
    
    requests.get("http://example.org", proxies=proxies)
    
    

    通过设置环境变量 HTTP_PROXY=host:port 和 HTTPS_PROXY=host:port 来配置代理

    $ export HTTP_PROXY="http://10.10.1.10:3128"
    $ export HTTPS_PROXY="http://10.10.1.10:1080"
    
    $ python
    >>> import requests
    >>> requests.get("http://example.org")
    
    

    SOCKS代理

    需装第三方库pip install requests[socks]

    import requests
    
    proxies = {
      "http": "socks5://user:pass@host:port",
      "https": "socks5://user:pass@host:port",
    }
    
    requests.get("http://example.org", proxies=proxies)
    
    

    身份认证

    Basic Auth 基本身份认证

    >>> from requests.auth import HTTPBasicAuth
    >>> requests.get('https://api.github.com/user', auth=HTTPBasicAuth('user', 'pass'))
    或
    requests.get('https://api.github.com/user', auth=('user', 'pass'))
    
    

    OAuth 2 认证

    Web API认证方式,配合requests-oauthlib库使用

    >>> # Credentials you get from registering a new application
    >>> client_id = '<the id you get from github>'
    >>> client_secret = '<the secret you get from github>'
    
    >>> # OAuth endpoints given in the GitHub API documentation
    >>> authorization_base_url = 'https://github.com/login/oauth/authorize'
    >>> token_url = 'https://github.com/login/oauth/access_token'
    
    >>> from requests_oauthlib import OAuth2Session
    >>> github = OAuth2Session(client_id)
    
    >>> # Redirect user to GitHub for authorization
    >>> authorization_url, state = github.authorization_url(authorization_base_url)
    >>> print 'Please go here and authorize,', authorization_url
    
    >>> # Get the authorization verifier code from the callback url
    >>> redirect_response = raw_input('Paste the full redirect URL here:')
    
    >>> # Fetch the access token
    >>> github.fetch_token(token_url, client_secret=client_secret,
    >>>         authorization_response=redirect_response)
    
    >>> # Fetch a protected resource, i.e. user profile
    >>> r = github.get('https://api.github.com/user')
    >>> print r.content
    
    

    参考

    explore-python/HTTP/Requests


    ________________________________________________________

    Every good deed you do will someday come back to you.

    Love you,love word !
  • 相关阅读:
    python-web 创建一个输入链接生成的网站
    查看端口有没被占用
    bs的过滤器功能例子
    爬图片的方法
    python 下载图片的方法
    request 里面参数设置 (有空瞄下)
    python 面向对象 初始化(类变量 和 函数内变量)
    访问https请求出现警告,去掉警告的方法
    find 和 find_all 用法
    D3的基本设计思路
  • 原文地址:https://www.cnblogs.com/hugboy/p/15087659.html
Copyright © 2020-2023  润新知