• python-爬虫-requests


    requests库的使用

    >>>特性

    Keep-Alive&连接池

    国际化域名和URL

    带持久cookie的会话

    浏览器式的SSL认证

    自动内容解码

    基本/摘要式的身份认证

    优雅的key/value cookie

    自动解压

    Unicode响应体

    HTTP(S)代理

    文件分块上传

    流下载

    连接超时

    分块请求

    支持.netrc

    2 请求方法

    response = requests.get(‘https://httpbin.org/get’)
    response = requests.post(‘http://gttpbin.org/post’,data={‘key’:'value’})

    3传递URL参数

    params = {‘key1’:‘value1’,‘key2’:‘value2’}
    response = requests.get(‘http://httpbin.org/get’,params=params)

    4自定义Headers

    headers = {‘user-agent’:‘my-app/0.0.1’} #自定义headers
    response = requests.get(url,headers=headers)

    5自定义cookies

    co = {‘cookies_are’:‘working’}
    response = requests.get(url,cookies=co)

    6设置代理

    proxies = {
    ‘http’:‘http://10.10.1.10:3128’,
    ‘https’:‘https://10.10.1.10:1080’
    }
    requests.get(‘http://httpbin.org/ip’,proxies=proxy)

    7重定向

    response = requests.get(‘http://github.com’,allow_redirects=False)

    8禁止证书验证

    response = requests.get(‘http://httpbin.org/post’,verify=False)
    
    # 但是关闭验证后,会有一个比较烦人的warning,可以使用以下方法关闭警告
    
    From requests.packages.urllib3.exceptions import InsecureRequestWarning
    
    requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

    9设置超时

    requests.get(‘http://github.com’,timeout=0.01)

     

    接收响应

    >>>字符编码

    response = requests.get(‘https://api.github.com/events’)
    response.encoding = ‘utf-8print(response.text)

    >>>二进制数据

    response = requests.get(‘https://api.github.com/events’)
    print(response.content)

    >>>json数据

    response = requests.get(‘https://api.github.com/events’)
    print(response.json())

    >>>状态码

    response = requests.get(‘http://httpbin.org/get’)
    print(response.status_code)

    >>>服务器返回的cookies

    response = requests.get(url)
    print(response.cookies[‘example_cookie_name’])

    >>>session对象

    session = requests.Session()
    
    session.get(‘http://httpbin.org/cookies/set/sessioncookie/123456789’)
    
    response = session.get(‘http://httpbin.org/cookies’)
    
    print(response.text)
    
    #{“cookies”: {“sessioncookie”: “123456789”}}

     >>>lxml使用

    from lxml import html
    import requests
    url = 'http://navi.cnki.net/knavi/JournalDetail/GetArticleList?year=2018&issue=04&pykm=DZXU&pageIdx=0&pcode=CJFD'
    res = requests.get(url)
    tree = html.fromstring(res.text)
    name = tree.xpath("//dd[@class='row clearfix ']")
    print(name)
  • 相关阅读:
    GateWay程序分析02_IAP_FLASH.H
    GateWay程序分析05_WDT.h
    GateWay程序分析03_timer.h
    GateWay程序分析_主函数_02整体流程
    网关系统软件设计_系统需求分析v1
    [收藏]DIV中图片居中
    CSS HACK 手记
    一道题“谁养鱼”的穷举解法。
    简单好用的联动下拉控件(修正)
    权限认证的WEB页面实施
  • 原文地址:https://www.cnblogs.com/person1-0-1/p/11311163.html
Copyright © 2020-2023  润新知