• urllib基本使用1


    urllib不需要下载

     1 # 使用urllib来获取百度首页的源码
     2 import urllib.request
     3 
     4 # 1、定义一个url
     5 url = 'http://www.baidu.com'
     6 
     7 # 2、模拟浏览器向服务器发送请求
     8 response = urllib.request.urlopen(url)
     9 
    10 # 3、获取响应中的页面的源码
    11 # read方法返回的是字节形式的二进制数据
    12 content = response.read().decode('utf-8')
    13 
    14 print(content)
    15 
    16 # print(type(response))
    17 # HTTPResponse类型
    18 
    19 # response.read()
    20 # 一个字节一个字节的读
    21 # response.read(5)
    22 # 一次读5个字节
    23 
    24 # response.readline()
    25 # 一次读一行
    26 
    27 # response.readlines()
    28 # 一行一行读直至读完
    29 
    30 # response.getcode()
    31 # 状态码
    32 
    33 # response.geturl()
    34 # 访问的url地址
    35 
    36 # response.getheaders()
    37 # 获取响应头

    urllib下载

    1 import urllib.request
    2 
    3 # 下载网页
    4 url_page = 'http://www.baidu.com'
    5 # url:下载路径,filename文件名字
    6 urllib.request.urlretrieve(url_page,'baidu.html')
    7 
    8 # 下载图片
    9 # 下载视频

    定制对象

     1 import urllib.request
     2 
     3 url = 'https://www.baidu.com'
     4 
     5 # url的组成
     6 # http/https  www.baidu.com  80/443                            #
     7 # 协议            主机        端口号       路径      参数      锚点
     8 
     9 
    10 headers = {
    11     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0'
    12 }
    13 request = urllib.request.Request(url=url, headers=headers)
    14 response = urllib.request.urlopen(request)
    15 content = response.read().decode('utf-8')
    16 print(content)

    编解码

      get请求

     1 import urllib.request
     2 import urllib.parse
     3 
     4 url = 'https://www.baidu.com/s?wd='
     5 name = urllib.parse.quote("周杰伦")
     6 # 多参数 https://www.baidu.com/s?wd=周杰伦&sex=男
     7 data = {
     8     'wd': '周杰伦',
     9     'sex': ''
    10 }
    11 params = urllib.parse.urlencode(data)
    12 print(params)

      post请求

     1 import urllib.request
     2 import urllib.parse
     3 
     4 
     5 url = 'https://fanyi.baidu.com/sug'
     6 headers = {
     7     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0'
     8 }
     9 data = {
    10     'kw': '蜘蛛'
    11 }
    12 # post请求的参数 必须进行编码(字节码)
    13 data = urllib.parse.urlencode(data).encode("utf-8")
    14 
    15 request = urllib.request.Request(url=url, data=data, headers=headers)
    16 response = urllib.request.urlopen(request)
    17 content = response.read().decode('utf-8')
    18 print(content)

      cookie

     1 import urllib.request
     2 import urllib.parse
     3 
     4 url = 'https://fanyi.baidu.com/v2transapi?from=en&to=zh'
     5 
     6 headers = {"Accept": "*/*",
     7            # "Accept-Encoding": "gzip, deflate, br",
     8            "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
     9             "Connection": "keep-alive", "Content-Length": "136",
    10             "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
    11             "Cookie": "BAIDUID=5157A4D583A89C37A7545DADAB81726C:FG=1; BIDUPSID=36874D3B080FE4D3FE00F130757B1DCB; PSTM=1618275809; __yjs_duid=1_5c8c0adae28380c2c87efcd8b022c3551618281414735; MCITY=-289%3A179%3A; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1648619861,1649214103,1649658988,1649730956; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; APPGUIDE_10_0_2=1; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1649730956; ab_sr=1.0.1_MWVlMDU1ZjZmYWEwNzE5N2UxMzJmMjkwZWQ4Y2U5ZTU3NWQ2NzE1M2YxYWYxYzc3M2Y5NWM5MzQ5YzU2YTRkMTZkNTUyYzQzOWViMjJkMzdiNGQxZjAyNTYxYmRlN2Q5MTcxODg4NDFjYWMxM2I0ZThjZGZmM2YxNTNmNGJlYzNkZDczODgxMzAzMTM1NTlhYTk3ZmYxZGY2ODBkZTMzMw==",
    12              "Host": "fanyi.baidu.com", "Origin": "https://fanyi.baidu.com", "Referer": "https://fanyi.baidu.com/"
    13             , "Sec-Fetch-Dest": "empty", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Site": "same-origin"
    14             , "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0"
    15             , "X-Requested-With": "XMLHttpRequest"
    16          }
    17 
    18 data = {
    19     "from": "en",
    20     "to": "zh",
    21     "query": "spider",
    22     "transtype": "realtime",
    23     "simple_means_flag": "3",
    24     "sign": "63766.268839",
    25     "token": "c45b7821850766d1e62222dc6115e145",
    26     "domain": "common"
    27 }
    28 
    29 data = urllib.parse.urlencode(data).encode("utf-8")
    30 
    31 request = urllib.request.Request(url=url, data=data, headers=headers)
    32 response = urllib.request.urlopen(request)
    33 content = response.read().decode('utf-8')
    34 print(content)
    35 
    36 import json
    37 print(json.loads(content))
  • 相关阅读:
    C# winform 使用FastReport.Net自动打印一维码条码和二维码的解决方法
    C# winform 使用rdlc打印小票其中包含动态显示多条形码的解决方法
    我学习的LIS系统业务
    C# DataTable DataSet DataRow 转实体类集合,实体类和实体类集合转成DataTable 扩展方法分享
    我的自动化设备上位机软件开发设计(一)
    打开操作系统数据执行保护,关闭操作系统数据执行保护
    visualstudio2019 的报表技术rdlc在windows10上出现乱码的问题解决方法
    我带旅游ERP管理系统开发的经历
    C# web程序,winform程序,控制台程序配置log4net,使用log4net
    freemodbus modbus TCP 学习笔记
  • 原文地址:https://www.cnblogs.com/onroad2019/p/16134239.html
Copyright © 2020-2023  润新知