• requests 后续1


    发送带数据post请求
    import requests
    
    # 发送post请求
    data = {
    
    }
    response = requests.post(url, data=data)
    
    
    # 内网 需要 认证
    auth = (user,pwd)
    response = requests.get(url,auth=auth)
    发送代理post请求
    import requests
    
    # 1.请求url
    url = 'http://www.baidu.com'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36'
    }
    
    free_proxy = {'http': '27.17.45.90:43411'}
    
    response = requests.get(url=url, headers=headers, proxies=free_proxy)
    
    print(response.status_code)
    发送带CA证书认证post请求
    import requests
    
    url = 'https://www.12306.cn/mormhweb/'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36'
    }
    
    # 因为hhtps  是有第三方 CA 证书认证的
    # 但是 12306  虽然是https 但是 它不是 CA证书, 他是自己 颁布的证书
    # 解决方法 是: 告诉 web 忽略证书 访问
    response = requests.get(url=url, headers=headers, verify=False)
    data = response.content.decode()
    
    with open('03-ssl.html', 'w') as f:
        f.write(data)
    
    # requests.exceptions.SSLError: HTTPSConnectionPool(host=
    发送带cookie(字符串)post请求
    import requests
    
    # 请求数据url
    member_url = 'https://www.yaozh.com/member/'
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36'
    }
    #  cookies 的字符串
    cookies = '_ga=GA1.2.1820447474.1535025127; MEIQIA_EXTRA_TRACK_ID=199Tty9OyANCXtHaSobJs67FU7J; WAF_SESSION_ID=7d88ae0fc48bffa022729657cf09807d; PHPSESSID=70kadg2ahpv7uuc8docd09iat4; _gid=GA1.2.133568065.1540383729; _gat=1; MEIQIA_VISIT_ID=1C1OdtdqpgpGeJ5A2lCKLMGiR4b; yaozh_logintime=1540383753; yaozh_user=381740%09xiaomaoera12; yaozh_userId=381740; db_w_auth=368675%09xiaomaoera12; UtzD_f52b_saltkey=ylH82082; UtzD_f52b_lastvisit=1540380154; UtzD_f52b_lastact=1540383754%09uc.php%09; UtzD_f52b_auth=f958AVKmmdzQ2CWwmr6GMrIS5oKlW%2BkP5dWz3SNLzr%2F1b6tOE6vzf7ssgZDjhuXa2JsO%2FIWtqd%2FZFelWpPHThohKQho; yaozh_uidhas=1; yaozh_mylogin=1540383756; MEIQIA_EXTRA_TRACK_ID=199Tty9OyANCXtHaSobJs67FU7J; WAF_SESSION_ID=7d88ae0fc48bffa022729657cf09807d; Hm_lvt_65968db3ac154c3089d7f9a4cbb98c94=1535025126%2C1535283389%2C1535283401%2C1539351081%2C1539512967%2C1540209934%2C1540383729; MEIQIA_VISIT_ID=1C1OdtdqpgpGeJ5A2lCKLMGiR4b; Hm_lpvt_65968db3ac154c3089d7f9a4cbb98c94=1540383761'
    
    # 需要的是 字典类型
    cook_dict = {}
    cookies_list = cookies.split('; ')
    for cookie in cookies_list:
        cook_dict[cookie.split('=')[0]] = cookie.split('=')[1]
    
    
    # 字典推导式
    cook_dict = {cookie.split('=')[0]:cookie.split('=')[1] for cookie in cookies.split('; ')}
    
    response = requests.get(member_url, headers=headers, cookies=cook_dict)
    
    data = response.content.decode()
    
    with open('05-cookie.html','w') as f:
        f.write(data)
    发送post请求(自动携带session)
    import requests
    
    # 请求数据url
    member_url = 'https://www.yaozh.com/member/'
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36'
    }
    # session 类 可以自动保存cookies === cookiesJar
    session = requests.session()
    # 1.代码登录
    login_url = 'https://www.yaozh.com/login'
    login_form_data = {
        'username':'aoa1',
        'pwd': 'l812',
        'formhash': '54AEE419',
        'backurl': 'https%3AF%2Fwww.yaozh.com%2F',
    }
    login_response = session.post(login_url,data=login_form_data,headers=headers)
    print(login_response.content.decode())
    # 2.登录成功之后 带着 有效的cookies 访问 请求目标数据
    data = session.get(member_url,headers=headers).content.decode()
    
    with open('05-cookie2.html','w') as f:
        f.write(data)

    正则表达式

    import re
    
    # 贪婪模式  从开头匹配到结尾 默认  'm(.*)n'
    # 非贪婪  'm(.*?)n'
    one = 'mdfsdsfffdsn12345656n'
    two = "a\d"
    pattern = re.compile('a\b')
    # pattern = re.compile('m(.*?)n')
    
    result = pattern.findall(two)
    
    print(result)
    .匹配除换行符号\n 之外的
    import re
    
    # . 除了 换行符号\n 之外的 匹配
    #   re.S忽略\n
    #   re.I忽略大小写
    one = """
        msfdsdffdsdfsn
        1234567778888N
    """
    
    pattern = re.compile('m(.*)n', re.S | re.I)
    result = pattern.findall(one)
    print(result)

     匹配数字

    import re
    
    
    # 纯数字的正则 \d 0-9之间的一个数
    pattern = re.compile('^\d+$')
    one = '234'
    
    # 匹配判断的方法
    # match 方法 是否匹配成功 从头开始 匹配一次
    result = pattern.match(one)
    
    
    print(result.group())

    范围匹配

    import re
    
    
    # 范围运算 [123] [1-9]
    one = '7893452'
    
    pattern = re.compile('[1-9]')
    
    result = pattern.findall(one)
    
    
    print(result)
    import re
    
    one = 'abc 123'
    patter = re.compile('\d+')
    # match 从头匹配 匹配一次
    
    result = patter.match(one)
    
    # search 从任意位置 , 匹配一次
    result = patter.search(one)
    
    # findall  查找符合正则的 内容 -- list
    result = patter.findall(one)
    
    # sub  替换字符串
    result = patter.sub('#',one)
    
    # split  拆分
    patter = re.compile(' ')
    result = patter.split(one)
    
    
    
    print(result)
  • 相关阅读:
    模糊查询和聚合函数
    数据查询基础
    使用RestSharp请求GBK编码的网站乱码(NetCore环境)
    使用VsCode的Rest Client进行请求测试
    基于c#发送Outlook邮件(仅SMTP版本)
    创建Gitblit本地服务器(For windows )01
    获取本地文件然后批量存储到数据库
    描点的改进:运用chart画图。
    获取ADO连接字符串
    lock(this)
  • 原文地址:https://www.cnblogs.com/sunBinary/p/10624070.html
Copyright © 2020-2023  润新知