• day02


     1 import urllib.request
     2 import urllib.parse
     3 import string
     4 
     5 
     6 def get_params():
     7     url = "http://www.baidu.com/s?"
     8 
     9     params = {
    10         "wd":"中文",
    11         "key":"zhang",
    12         "value":"san"
    13 
    14     }
    15     str_params = urllib.parse.urlencode(params)
    16     print(str_params)
    17     final_url = url + str_params
    18 
    19     #将带有中文的url 转译成计算机可以识别的url
    20     end_url = urllib.parse.quote(final_url,safe=string.printable)
    21 
    22     response = urllib.request.urlopen(end_url)
    23 
    24     data = response.read().decode("utf-8")
    25     print(data)
    26 
    27
     1 import urllib.request
     2 
     3 def load_baidu():
     4     url= "https://www.baidu.com"
     5     header = {
     6         #浏览器的版本
     7         "User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",
     8         
     9     }
    10    #创建请求对象
    13     request = urllib.request.Request(url)
    14     #动态的去添加head的信息
    15     request.add_header("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36")
    16     #请求网络数据(不在此处增加请求头信息因为此方法系统没有提供参数)
    17     response = urllib.request.urlopen(request)
    18     print(response)
    19     data = response.read().decode("utf-8")
    20 
    21     #获取到完整的url
    22     final_url = request.get_full_url()
    23     print(final_url)
    24 
    25     #响应头
    26     # print(response.headers)
    27     #获取请求头的信息(所有的头的信息)
    28     # request_headers = request.headers
    29     # print(request_headers)
    30     #(2)第二种方式打印headers的信息
    31     #注意点:首字母需要大写,其他字母都小写
    32     request_headers = request.get_header("User-agent")
    33     # print(request_headers)
    34     with open("02header.html","w")as f:
    35         f.write(data)
    36 
    37 
    38
     1 import urllib.request
     2 import random
     3 
     4 def load_baidu():
     5 
     6     url = "http://www.baidu.com"
     7     user_agent_list = [
     8         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1",
     9         "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0",
    10         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
    11         "Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.9.168 Version/11.50"
    12 
    13     ]
    14     #每次请求的浏览器都是不一样的
    15     random_user_agent = random.choice(user_agent_list)
    16 
    17     request = urllib.request.Request(url)
    18 
    19     #增加对应的请求头信息(user_agent)
    20     request.add_header("User-Agent",random_user_agent)
    21 
    22     #请求数据
    23     response = urllib.request.urlopen(request)
    24     #请求头的信息
    25     print(request.get_header("User-agent"))
    26 
    27 load_baidu()
     1 import urllib.request
     2 
     3 def handler_openner():
     4 
     5     #系统的urlopen并没有添加代理的功能所以需要我们自定义这个功能
     6     #安全 套接层 ssl第三方的CA数字证书
     7     #http80端口# 和https443
     8     #urlopen为什么可以请求数据 handler处理器
     9     #自己的oppener请求数据
    10 
    11     # urllib.request.urlopen()
    12     url = "https://blog.csdn.net/m0_37499059/article/details/79003731"
    13 
    14     #创建自己的处理器
    15     handler = urllib.request.HTTPHandler()
    16     #创建自己的oppener
    17     opener=urllib.request.build_opener(handler)
    18     #用自己创建的opener调用open方法请求数据
    19     response = opener.open(url)
    20     # data = response.read()
    21     data = response.read().decode("utf-8")
    22 
    23 
    24     with open("02header.html", "w")as f:
    25         f.write(data)
    26 
    27 handler_openner()
    39 load_baidu()
     1 import urllib.request
     2 
     3 
     4 def create_proxy_handler():
     5     url = "https://blog.csdn.net/m0_37499059/article/details/79003731"
     6 
     7     #添加代理
     8     proxy = {
     9         #免费的写法
    10         "http":""
    11         # "http":"120.77.249.46:8080"
    12         #付费的代理
    13         # "http":"xiaoming":123@115.
    14 
    15 
    16     }
    17     #代理处理器
    18     proxy_handler = urllib.request.ProxyHandler(proxy)
    19 
    20     #创建自己opener
    21     opener = urllib.request.build_opener(proxy_handler)
    22     #拿着代理ip去发送请求
    23     response = opener.open(url)
    24     data = response.read().decode("utf-8")
    25 
    26 
    27     with open("03header.html", "w")as f:
    28         f.write(data)
    29 
    30 create_proxy_handler()
    免费代理IP代码
     1 mport urllib.request
     2 
     3 def proxy_user():
     4 
     5     proxy_list = [
     6         {"https":""},
     7         # {"https":"106.75.226.36:808"},
     8         # {"https":"61.135.217.7:80"},
     9         # {"https":"125.70.13.77:8080"},
    10         # {"https":"118.190.95.35:9001"}
    11     ]
    12     for proxy in proxy_list:
    13         print(proxy)
    14         #利用遍历出来的ip创建处理器
    15         proxy_handler = urllib.request.ProxyHandler(proxy)
    16         #创建opener
    17         opener = urllib.request.build_opener(proxy_handler)
    18 
    19         try:
    20             data = opener.open("http://www.baidu.com",timeout=1)
    21 
    22             haha = data.read()
    23             print(haha)
    24         except Exception as e:
    25             print(e)
    26 
    27 
    28 proxy_user()
    28 get_params()
  • 相关阅读:
    201621123058《java程序设计》第八周学习总结
    201621123058《java程序设计》第七周学习总结
    201621123058《java程序设计》第六周学习总结
    201621123058《java程序设计》第五周学习总结
    关于HTK工具下载安装的问题
    load 和 loads的区别
    flask 数据库操作
    flask 计数器
    flask form表单
    flask 宏,继承和包含
  • 原文地址:https://www.cnblogs.com/mai1994/p/10644674.html
Copyright © 2020-2023  润新知