• Python爬虫之Requests库的基本使用


      1 import requests
      2 response = requests.get('http://www.baidu.com/')
      3 print(type(response))
      4 print(response.status_code)
      5 print(type(response.text))
      6 print(response.text)
      7 print(response.cookies)
      8 
      9 # 各种请求方式
     10 import requests
     11 requests.post('http://httpbin.org/post')
     12 requests.put('http://httpbin.org/put')
     13 requests.delete('http://httpbin.org/delete')
     14 requests.head('http://httpbin.org/get')
     15 requests.options('http://httpbin.org/get')
     16 
     17 # 基本GET请求
     18 import requests
     19 response = requests.get('http://httpbin.org/get')
     20 print(response.text)
     21 
     22 # 带参数GET请求
     23 import requests
     24 response = requests.get('http://httpbin.org/get?name=germey&age=22')
     25 print(response.text)
     26 
     27 import requests
     28 data = {
     29     'name': 'germey',
     30     'age': 22
     31 }
     32 response = requests.get('http://httpbin.org/get', params = data)
     33 print(response.text)
     34 
     35 # 解析json
     36 import requests
     37 import json
     38 response = requests.get('http://httpbin.org/get')
     39 print(type(response.text))
     40 print(response.json())
     41 print(json.loads(response.text))
     42 print(type(response.json()))
     43 
     44 # 获取二进制数据
     45 import requests
     46 response = requests.get('http://github.com/favicon.ico')
     47 print(type(response.text), type(response.content))
     48 print(response.text)
     49 print(response.content)
     50 
     51 # 保存图片
     52 import requests
     53 response = requests.get('http://github.com/favicon.ico')
     54 with open('1.ico', 'wb') as f:
     55     f.write(response.content)
     56     f.close()
     57 
     58 # 添加headers 不添加的话会请求失败的
     59 import requests
     60 response = requests.get('http://www.zhihu.com/explore')
     61 print(response.text)
     62 
     63 import requests
     64 headers = {
     65     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'
     66 }
     67 response = requests.get('http://zhihu.com/explore', headers = headers)
     68 print(response.text)
     69 
     70 # 基本的POST请求
     71 import requests
     72 data = {'name': 'germey', 'age': 22}
     73 response = requests.post('http://httpbin.org/post', data = data)
     74 print(response.text)
     75 
     76 import requests
     77 data = {'name':'germey', 'age':22}
     78 headers = {
     79     'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'
     80 }
     81 response = requests.post('http://httpbin.org/post', data = data, headers = headers)
     82 print(response.json())
     83 
     84 # response属性
     85 import requests
     86 response = requests.get('http://www.jianshu.com')
     87 print(type(response.status_code), response.status_code)
     88 print(type(response.headers), response.headers)
     89 print(type(response.cookies), response.cookies)
     90 print(type(response.url), response.url)
     91 print(type(response.history), response.history)
     92 
     93 # 文件上传
     94 import requests
     95 files = {'file':open('1.ico', 'rb')}
     96 response = requests.post('http://httpbin.org/post', files = files)
     97 print(response.text)
     98 
     99 # 获取cookie
    100 import requests
    101 response = requests.get('http://www.baidu.com')
    102 print(response.cookies)
    103 for key, value in response.cookies.items():
    104     print(key + ' = ' + value)
    105 
    106 # 会话维持 模拟登陆(第一个例子,相当于在两个不同的浏览器请求页面,所以获取不到cookies,所以要用第二个session对象)
    107 import requests
    108 requests.get('http://httpbin.org/cookies/set/number/123456789')
    109 response = requests.get('http://httpbin.org/cookies')
    110 print(response.text)
    111 
    112 import requests
    113 s = requests.session()
    114 s.get('http://httpbin.org/cookies/set/number/123456789')
    115 response = s.get('http://httpbin.org/cookies')
    116 print(response.text)
    117 
    118 # 证书验证
    119 import requests
    120 response = requests.get('https://www.12306.cn')
    121 print(response.status_code)
     1 import requests
     2 from requests.exceptions import ConnectTimeout, HTTPError, ReadTimeout, RequestException
     3 from requests.auth import HTTPBasicAuth
     4 import urllib3
     5 
     6 # 证书验证
     7 # 消除警告(下面的HTTPS页面的请求如果不进行验证就会报警告)
     8 urllib3.disable_warnings()
     9 # verify=False 请求HTTPS页面的时候不进行证书验证,默认为True
    10 response = requests.get('https://www.12306.cn', verify=False)
    11 print(response.status_code)
    12 
    13 # 代理设置
    14 proxies = {
    15     "http": "http://127.0.0.1:9743",
    16     "https": "https://127.0.0.1:9743",
    17 }
    18 response = requests.get("https://www.taobao.com", proxies = proxies)
    19 print(response.status_code)
    20 
    21 # 代理有用户名和密码的情况
    22 proxies = {
    23     "http": "http://user:password@127.0.0.1:9743/",
    24 }
    25 response = requests.get("https://www.taobao.com", proxies = proxies)
    26 print(response.status_code)
    27 
    28 # socks代理
    29 proxies = {
    30     "http": "socks5://127.0.0.1:9742",
    31     "https": "socks5://127.0.0.1:9742",
    32 }
    33 response = requests.get("https://www.taobao.com", proxies = proxies)
    34 print(response.status_code)
    35 
    36 # 超时设置
    37 try:
    38     response = requests.get("http://httpbin.org/get", timeout=0.2)
    39     print(response.status_code)
    40 except ConnectTimeout:
    41     print("timeout!")
    42 
    43 # 认证设置 下面两种方法都可以
    44 response = requests.get("http://120.27.34.24:9001", auth=HTTPBasicAuth("user", "123"))
    45 print(response.status_code)
    46 
    47 response = requests.get("http://120.27.34.24:9001", auth=("user", "123"))
    48 print(response.status_code)
    49 
    50 # 异常处理
    51 try:
    52     response = requests.get("http://httpbin.org/get", timeout=0.2)
    53     print(response.status_code)
    54 except ConnectTimeout:
    55     print("timeout!")
    56 except RequestException:
    57     print("RequestException!")
    58 except HTTPError:
    59     print("HttpError!")
    60 except ReadTimeout:
    61     print("ReadTimeout")
     1 import requests
     2 response = requests.get("http://www.baidu.com")
     3 print(response.cookies)
     4 print("----------")
     5 # 把cookie对象转化为字典
     6 d = requests.utils.dict_from_cookiejar(response.cookies)
     7 print(d)
     8 print("----------")
     9 # 把字典转化为cookie对象
    10 print(requests.utils.cookiejar_from_dict(d))
    1 # url解码
    2 print(requests.utils.unquote("http://tieba.baidu.com/f?kw=%D2%D7%D3%EF%D1%D4&fr=ala0&tpl=5", encoding="gb18030"))
    3 # url编码
    4 print(requests.utils.quote("http://tieba.baidu.com/f?kw=%D2%D7%D3%EF%D1%D4&fr=ala0&tpl=5"))
  • 相关阅读:
    网络爬虫的基本原理(一)
    灵光一闪-软件应用
    sql语句变量定义和样例
    windows+linux环境部署搭建
    jdk1.6安装
    系统部署
    tomcat部署
    maven各种插件在总结
    maven项目tomcat部署问题
    两种数据源
  • 原文地址:https://www.cnblogs.com/duxie/p/10024919.html
Copyright © 2020-2023  润新知