• requests模块的使用


    requests模块

    • 什么是request模块:requests是python原生一个基于网络请求的模块,模拟浏览器发起请求。

    requests-get请求

    # get请求
    import requests
    # 指定url
    url = 'https://www.sogou.com/'
    
    # 发起get请求:get方法会返回请求成功的响应对象
    response = requests.get(url)
    if response.status_code == 200:
        with open('sougo.html','w') as f:
            f.write(response.text)
    else:
        print('页面获取失败')
    

    response常用属性

    # get请求
    import requests
    # 指定url
    url = 'https://www.sogou.com/'
    
    # 发起get请求:get方法会返回请求成功的响应对象
    response = requests.get(url)
    if response.status_code == 200:
        # print(response.text) # 文本
        print(response.status_code) # 返回一个响应状态码
        print(response.content) # content获取的是response对象中二进制(byte)类型的页面数据
        print(response.headers) # 获取响应头信息
        print(response.url) # 获取请求的url
    else:
        print('页面获取失败')
    

    携带参数的get请求

    • 方式1
    import requests
    # 指定url,参数不需要进行编码处理
    url = 'https://www.sogou.com/web?query=周杰伦&ie=utf-8'
    
    # 发起get请求:get方法会返回请求成功的响应对象
    response = requests.get(url)
    if response.status_code == 200:
        with open('jay.html','wb') as f:
            f.write(response.content)
    else:
        print('页面获取失败')
    
    • 方式2
    import requests
    url = 'https://www.sogou.com/web'
    
    params = {
        'query':'周杰伦',
        'ie':'utf-8'
    }
    response = requests.get(url,params=params)
    if response.status_code == 200:
        with open('jay.html','wb') as f:
            f.write(response.content)
    else:
        print('页面获取失败')
    

    get请求自定义请求头信息

    # 自定义请求头信息
    import requests
    url = 'https://www.sogou.com/web'
    # 自定义的请求头信息放在该字典中,然后发请求的时候传到headers参数中
    headers = {
        'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    }
    params = {
        'query':'林宥嘉',
        'ie':'utf-8'
    }
    response = requests.get(url=url,params=params,headers=headers)
    print(response.status_code)
    

    requests-post请求

    # post请求
    
    # 指定url
    url = 'https://github.com/session'
    data = {
        'commit': 'Sign in',
        'utf8': '✓',
        'authenticity_token': 'IRdX8jflo9hKJAZ9mOzQBNnVnOFD7z9MfKvSYCOvrVN4uWz/LDQ81b6wWWy4d8YrvYobfiuLYS92zoK6XgH/LQ==',
        'login': '1032298871@qq.com',
        'password': '09212427zlh'
    }
    headers = {
        'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    }
    response = requests.post(url=url,data=data,headers=headers)
    with open('github.html','w',encoding='utf-8') as f:
        f.write(response.text)
    

    requests模块ajax的get请求

    # 基于ajax的get请求
    import requests
    url = 'https://movie.douban.com/j/new_search_subjects?'
    data = {
        'sort': 'U',
        'range':'0,10',
        'tags': '电影',
        'start': '40'
    }
    headers = {
        'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    }
    response = requests.get(url=url,data=data,headers=headers)
    # ajax返回的数据类型是json字符串类型
    print(response.text)
    

    requests模块ajax的post请求

    # 基于ajax的post请求
    import requests
    import json
    url = 'https://fanyi.baidu.com/sug'
    data = {
        'kw': '西瓜'
    }
    headers = {
        'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    }
    response = requests.post(url=url,headers=headers,data=data)
    json_text =response.text
    json_data = json.loads(json_text)
    print(json_data)
    

    爬取多页数据

    # 爬取带有分页的数据
    import requests
    import os
    
    if not os.path.exists('./page'):
        os.mkdir('page')
    
    url = 'https://zhihu.sogou.com/zhihu?'
    work= input('想搜索什么内容')
    page_number = input('想获取前几页的内容')
    for page in range(1,int(page_number)+1):
        print(page)
        params = {
            'query': work,
            'sut': '13598',
            'lkt': '1,1546144033954,1546144033954',
            'sst0': '1546144034930',
            'page': page,
            'ie': 'utf8'
        }
        response = requests.get(url=url,params=params)
        page_text = response.text
        page_file = './page/%s%s.html'%(work,page)
        with open(page_file,'w',encoding='utf-8') as f:
            f.write(page_text)
    

    requests模块高级:

    cookie作用:服务器端使用cookie来记录客户端的状态信息

    import requests
    
    session = requests.session()
    #1.发起登录请求:将cookie获取,切存储到session对象中
    login_url = 'https://accounts.douban.com/login'
    data = {
        "source": "None",
        "redir": "https://www.douban.com/people/185687620/",
        "form_email": "15027900535",
        "form_password": "bobo@15027900535",
        "login": "登录",
    }
    headers={
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
        }
    #使用session发起post请求
    login_response = session.post(url=login_url,data=data,headers=headers)
    
    #2.对个人主页发起请求(session(cookie)),获取响应页面数据
    url = 'https://www.douban.com/people/185687620/'
    response = session.get(url=url,headers=headers)
    page_text = response.text
    
    with open('./douban110.html','w',encoding='utf-8') as fp:
        fp.write(page_text)
    

    requests使用ip代理

    # 使用代理ip爬取百度搜索ip
    import requests
    
    url = 'http://www.baidu.com/s?ie=UTF-8&wd=ip'
    
    # 传入的代理ip是个字典,key是协议,value是ip:端口
    proxy = {
        'http':'115.28.209.249:3128'
    }
    response = requests.get(url=url,proxies=proxy)
    with open('daili.html','w') as f:
        f.write(response.text)
    
  • 相关阅读:
    c#中使用多线程访问winform中控件的若干问题(转)
    Winform 分页控件(转)
    C#争论:什么时候应该使用var?
    C#的Contains() 值还是引用
    DataTemplate
    DX11_基于GPU_ComputeShader的3D精确拾取
    串行的BitonicSort双调排序
    Directx11_使用Effect框架包装ComputeShader
    Silverlight自适应布局
    poj3626广搜
  • 原文地址:https://www.cnblogs.com/wualin/p/10202916.html
Copyright © 2020-2023  润新知