• urllib


    urllib

    from urllib.request import Request, urlopen
    
    url = "http://www.baidu.com"
    header = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
    }
    
    request = Request(url, headers=header)
    
    response = urlopen(request)
    
    info = response.read().decode()
    
    print(info)
    

    response方法

    #响应状态码
    response.getcode()
    
    #返回数据的实际url,防止重定向
    response.geturl()
    
    #响应报头
    response.info()
    

    request方法

    #获取请求头
    request.get_header('User-Agent')
    

    动态UA

    from fake_useragent import UserAgent
    
    ua = UserAgent()
    print(ua.ie)   #随机打印ie浏览器任意版本
    print(ua.firefox) #随机打印firefox浏览器任意版本
    print(ua.chrome)  #随机打印chrome浏览器任意版本
    print(ua.random)  #随机打印任意厂家的浏览器
    

    post参数

    from urllib.parse import urlencode
    f_data = {
    	pa = 123
    }
    
    f_data = urlencode(f_data)
    request = Request(url, headers=header, data=data.encode())
    

    https忽略CA认证的证书(不是CA认证的而是个人的)

    import ssl
    context = ssl._create_unverified_context()
    
    response = urlopen(request,context=context)
    

    代理

    from urllib.request import Request, urlopen, build_opener, ProxyHandler
    
    url = "http://www.baidu.com"
    header = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
    }
    
    request = Request(url, headers=header)
    handler = ProxyHandler({'http': 'zx:123@127.0.0.1:80'})
    opener = build_opener(handler)
    response = opener.open(request)
    
    info = response.read().decode()
    
    print(info)
    
    from urllib.request import Request, urlopen
    from fake_useragent import UserAgent
    from urllib.parse import urlencode
    from urllib.request import HTTPCookieProcessor,build_opener
    # 登录
    login_url = "http://www.sxt.cn/index/login/login"
    headers = {
        "User-Agent": UserAgent().chrome,
    }
    form_data = {
        "user": "17703181473",
        "password": "123456"
    }
    f_data = urlencode(form_data).encode()
    request = Request(login_url, headers=headers, data=f_data)
    
    #保存cookies,用于下次访问
    handler = HTTPCookieProcessor()
    opener = build_opener(handler)
    response = opener.open(request)
    
    # 访问页面
    info_url = "http://www.sxt.cn/index/user.html"
    request = Request(info_url, headers=headers)
    
    response = opener.open(request)
    print(response.read().decode())
    

    cookiejar

    from urllib.request import Request, build_opener, HTTPCookieProcessor
    from fake_useragent import UserAgent
    from http.cookiejar import MozillaCookieJar
    from urllib.parse import urlencode
    
    
    # 登录
    # 保存cookie到文件中
    def get_cookie():
        login_url = "http://www.sxt.cn/index/login/login"
        headers = {
            "User-Agent": UserAgent().chrome
        }
        form_data = {
            "user": "17703181473",
            "password": "123456"
        }
        f_data = urlencode(form_data).encode()
        request = Request(login_url, headers=headers, data=f_data)
        
        cookie_jar = MozillaCookieJar()
        handler = HTTPCookieProcessor(cookie_jar)
        opener = build_opener(handler)
        response = opener.open(request)
        #保存cookies到本地文件,过期依然保存
        cookie_jar.save("cookie.txt", ignore_expires=True, ignore_discard=True)
    
    
    def use_cookie():
        info_url = "http://www.sxt.cn/index/user.html"
        headers = {
            "User-Agent": UserAgent().chrome
        }
        request = Request(info_url, headers=headers)
        cookie_jar = MozillaCookieJar()
        #取本地cookies
        cookie_jar.load("cookie.txt", ignore_discard=True, ignore_expires=True)
        handler = HTTPCookieProcessor(cookie_jar)
        opener = build_opener(handler)
        response = opener.open(request)
        print(response.read().decode())
    
    
    # 获取cookie从文件中
    # 访问页面
    if __name__ == '__main__':
        # get_cookie()
        use_cookie()
    

    URLError

    from urllib.request import Request, urlopen
    from fake_useragent import UserAgent
    from urllib.error import URLError
    
    url = "http://www.sx123t.cn/index/login/login123"
    
    headers = {
        "User-Agent": UserAgent().chrome
    }
    try:
        req = Request(url, headers=headers)
        resp = urlopen(req)
        print(resp.read().decode())s
    except URLError as e:
    	
        if e.args == ():
            print(e.code)
        else:
            print(e.args[0].errno)
    print("访问完成")
    
    
  • 相关阅读:
    ubuntu 查看cpu核数
    安装pytorch
    杀死用kill id+父进程
    ubuntu 更改目录所有者
    查看cuda版本+安装cuda+安装conda
    挂载新硬盘
    Linux负载过高【10.13】
    C++中c_str()
    为什么C++比python快?
    C++匿名函数
  • 原文地址:https://www.cnblogs.com/zx125/p/12865278.html
Copyright © 2020-2023  润新知