• day02


    
    
     1 import urllib.request
     2 import urllib.parse
     3 import string
     4 
     5 
     6 def get_params():
     7     url = "http://www.baidu.com/s?"
     8 
     9     params = {
    10         "wd":"中文",
    11         "key":"zhang",
    12         "value":"san"
    13 
    14     }
    15     str_params = urllib.parse.urlencode(params)
    16     print(str_params)
    17     final_url = url + str_params
    18 
    19     #将带有中文的url 转译成计算机可以识别的url
    20     end_url = urllib.parse.quote(final_url,safe=string.printable)
    21 
    22     response = urllib.request.urlopen(end_url)
    23 
    24     data = response.read().decode("utf-8")
    25     print(data)
    26 
    27 
    28 get_params()
    
    
     1 import urllib.request
     2 
     3 def load_baidu():
     4     url= "http://www.baidu.com"
     5     #添加请求头的信息
     6 
     7 
     8     #创建请求对象
     9     request = urllib.request.Request(url)
    10     #请求网络数据
    11     response = urllib.request.urlopen(request)
    12     print(response)
    13     data = response.read().decode("utf-8")
    14 
    15     #响应头
    16     # print(response.headers)
    17     #获取请求头的信息
    18     request_headers = request.headers
    19     print(request_headers)
    20     with open("02header.html","w")as f:
    21         f.write(data)
    22 
    23 
    24 
    25 load_baidu()
     1 import urllib.request
     2 import random
     3 
     4 def load_baidu():
     5 
     6     url = "http://www.baidu.com"
     7     user_agent_list = [
     8         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1",
     9         "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0",
    10         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
    11         "Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.9.168 Version/11.50"
    12 
    13     ]
    14     #每次请求的浏览器都是不一样的
    15     random_user_agent = random.choice(user_agent_list)
    16 
    17     request = urllib.request.Request(url)
    18 
    19     #增加对应的请求头信息(user_agent)
    20     request.add_header("User-Agent",random_user_agent)
    21 
    22     #请求数据
    23     response = urllib.request.urlopen(request)
    24     #请求头的信息
    25     print(request.get_header("User-agent"))
    26 
    27 load_baidu()
     1 import urllib.request
     2 
     3 def handler_openner():
     4 
     5     #系统的urlopen并没有添加代理的功能所以需要我们自定义这个功能
     6     #安全 套接层 ssl第三方的CA数字证书
     7     #http80端口# 和https443
     8     #urlopen为什么可以请求数据 handler处理器
     9     #自己的oppener请求数据
    10 
    11     # urllib.request.urlopen()
    12     url = "https://blog.csdn.net/m0_37499059/article/details/79003731"
    13 
    14     #创建自己的处理器
    15     handler = urllib.request.HTTPHandler()
    16     #创建自己的oppener
    17     opener=urllib.request.build_opener(handler)
    18     #用自己创建的opener调用open方法请求数据
    19     response = opener.open(url)
    20     # data = response.read()
    21     data = response.read().decode("utf-8")
    22 
    23 
    24     with open("02header.html", "w")as f:
    25         f.write(data)
    26 
    27 handler_openner()
     1 import urllib.request
     2 
     3 
     4 def create_proxy_handler():
     5     url = "https://blog.csdn.net/m0_37499059/article/details/79003731"
     6 
     7     #添加代理
     8     proxy = {
     9         #免费的写法
    10         "http":""
    11         # "http":"120.77.249.46:8080"
    12         #付费的代理
    13         # "http":"xiaoming":123@115.
    14 
    15 
    16     }
    17     #代理处理器
    18     proxy_handler = urllib.request.ProxyHandler(proxy)
    19 
    20     #创建自己opener
    21     opener = urllib.request.build_opener(proxy_handler)
    22     #拿着代理ip去发送请求
    23     response = opener.open(url)
    24     data = response.read().decode("utf-8")
    25 
    26 
    27     with open("03header.html", "w")as f:
    28         f.write(data)
    29 
    30 create_proxy_handler()
    import urllib.request
    
    def proxy_user():
    
        proxy_list = [
            {"https":""},
            # {"https":"106.75.226.36:808"},
            # {"https":"61.135.217.7:80"},
            # {"https":"125.70.13.77:8080"},
            # {"https":"118.190.95.35:9001"}
        ]
        for proxy in proxy_list:
            print(proxy)
            #利用遍历出来的ip创建处理器
            proxy_handler = urllib.request.ProxyHandler(proxy)
            #创建opener
            opener = urllib.request.build_opener(proxy_handler)
    
            try:
                data = opener.open("http://www.baidu.com",timeout=1)
    
                haha = data.read()
                print(haha)
            except Exception as e:
                print(e)
    
    
    proxy_user()
     1 import urllib.request
     2 
     3 def load_baidu():
     4     url= "https://www.baidu.com"
     5     header = {
     6         #浏览器的版本
     7         "User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",
     8 
     9     }
    10 
    11 
    12     #创建请求对象
    13     request = urllib.request.Request(url)
    14     #动态的去添加head的信息
    15     request.add_header("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36")
    16     #请求网络数据(不在此处增加请求头信息因为此方法系统没有提供参数)
    17     response = urllib.request.urlopen(request)
    18     print(response)
    19     data = response.read().decode("utf-8")
    20 
    21     #获取到完整的url
    22     final_url = request.get_full_url()
    23     print(final_url)
    24 
    25     #响应头
    26     # print(response.headers)
    27     #获取请求头的信息(所有的头的信息)
    28     # request_headers = request.headers
    29     # print(request_headers)
    30     #(2)第二种方式打印headers的信息
    31     #注意点:首字母需要大写,其他字母都小写
    32     request_headers = request.get_header("User-agent")
    33     # print(request_headers)
    34     with open("02header.html","w")as f:
    35         f.write(data)
    36 
    37 
    38 
    39 load_baidu()
  • 相关阅读:
    暑假自学(19)
    暑假自学(18)
    暑假自学(17)
    暑假自学(16)
    暑假自学(15)
    rsync简介
    10.Go语言-面向对象简单了解
    dp 数字三角形
    P1359租用游艇(dp+dfs)
    油田问题 bfs
  • 原文地址:https://www.cnblogs.com/mai1994/p/10726275.html
Copyright © 2020-2023  润新知