1 import urllib.request 2 import urllib.parse 3 import string 4 5 6 def get_params(): 7 url = "http://www.baidu.com/s?" 8 9 params = { 10 "wd":"中文", 11 "key":"zhang", 12 "value":"san" 13 14 } 15 str_params = urllib.parse.urlencode(params) 16 print(str_params) 17 final_url = url + str_params 18 19 #将带有中文的url 转译成计算机可以识别的url 20 end_url = urllib.parse.quote(final_url,safe=string.printable) 21 22 response = urllib.request.urlopen(end_url) 23 24 data = response.read().decode("utf-8") 25 print(data) 26 27
1 import urllib.request 2 3 def load_baidu(): 4 url= "https://www.baidu.com" 5 header = { 6 #浏览器的版本 7 "User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36", 8 9 } 10 #创建请求对象 13 request = urllib.request.Request(url) 14 #动态的去添加head的信息 15 request.add_header("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36") 16 #请求网络数据(不在此处增加请求头信息因为此方法系统没有提供参数) 17 response = urllib.request.urlopen(request) 18 print(response) 19 data = response.read().decode("utf-8") 20 21 #获取到完整的url 22 final_url = request.get_full_url() 23 print(final_url) 24 25 #响应头 26 # print(response.headers) 27 #获取请求头的信息(所有的头的信息) 28 # request_headers = request.headers 29 # print(request_headers) 30 #(2)第二种方式打印headers的信息 31 #注意点:首字母需要大写,其他字母都小写 32 request_headers = request.get_header("User-agent") 33 # print(request_headers) 34 with open("02header.html","w")as f: 35 f.write(data) 36 37 38
1 import urllib.request 2 import random 3 4 def load_baidu(): 5 6 url = "http://www.baidu.com" 7 user_agent_list = [ 8 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1", 9 "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0", 10 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50", 11 "Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.9.168 Version/11.50" 12 13 ] 14 #每次请求的浏览器都是不一样的 15 random_user_agent = random.choice(user_agent_list) 16 17 request = urllib.request.Request(url) 18 19 #增加对应的请求头信息(user_agent) 20 request.add_header("User-Agent",random_user_agent) 21 22 #请求数据 23 response = urllib.request.urlopen(request) 24 #请求头的信息 25 print(request.get_header("User-agent")) 26 27 load_baidu()
1 import urllib.request 2 3 def handler_openner(): 4 5 #系统的urlopen并没有添加代理的功能所以需要我们自定义这个功能 6 #安全 套接层 ssl第三方的CA数字证书 7 #http80端口# 和https443 8 #urlopen为什么可以请求数据 handler处理器 9 #自己的oppener请求数据 10 11 # urllib.request.urlopen() 12 url = "https://blog.csdn.net/m0_37499059/article/details/79003731" 13 14 #创建自己的处理器 15 handler = urllib.request.HTTPHandler() 16 #创建自己的oppener 17 opener=urllib.request.build_opener(handler) 18 #用自己创建的opener调用open方法请求数据 19 response = opener.open(url) 20 # data = response.read() 21 data = response.read().decode("utf-8") 22 23 24 with open("02header.html", "w")as f: 25 f.write(data) 26 27 handler_openner()
39 load_baidu()
1 import urllib.request 2 3 4 def create_proxy_handler(): 5 url = "https://blog.csdn.net/m0_37499059/article/details/79003731" 6 7 #添加代理 8 proxy = { 9 #免费的写法 10 "http":"" 11 # "http":"120.77.249.46:8080" 12 #付费的代理 13 # "http":"xiaoming":123@115. 14 15 16 } 17 #代理处理器 18 proxy_handler = urllib.request.ProxyHandler(proxy) 19 20 #创建自己opener 21 opener = urllib.request.build_opener(proxy_handler) 22 #拿着代理ip去发送请求 23 response = opener.open(url) 24 data = response.read().decode("utf-8") 25 26 27 with open("03header.html", "w")as f: 28 f.write(data) 29 30 create_proxy_handler()
1 mport urllib.request 2 3 def proxy_user(): 4 5 proxy_list = [ 6 {"https":""}, 7 # {"https":"106.75.226.36:808"}, 8 # {"https":"61.135.217.7:80"}, 9 # {"https":"125.70.13.77:8080"}, 10 # {"https":"118.190.95.35:9001"} 11 ] 12 for proxy in proxy_list: 13 print(proxy) 14 #利用遍历出来的ip创建处理器 15 proxy_handler = urllib.request.ProxyHandler(proxy) 16 #创建opener 17 opener = urllib.request.build_opener(proxy_handler) 18 19 try: 20 data = opener.open("http://www.baidu.com",timeout=1) 21 22 haha = data.read() 23 print(haha) 24 except Exception as e: 25 print(e) 26 27 28 proxy_user()
28 get_params()