1 import urllib.request 2 def load_baidu(): 3 url = "https://www.baidu.com/" 4 header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3650.400 QQBrowser/10.4.3341.400", 5 "haha":"hehe"} 6 7 #长久请求对象 8 request = urllib.request.Request(url,headers = header) 9 #请求网络数据 10 response = urllib.request.urlopen(request) 11 print(response) 12 data = response.read().decode("utf-8") 13 14 #响应头 15 # print(response.headers) 16 #获取请求头的信息(所有头的信息) 17 request_headers = request.headers 18 print(request_headers) 19 with open("headers.html","w")as f: 20 f.write(data,) 21 load_baidu()
以上代码出现“UnicodeEncodeError: 'gbk' codec can't encode character 'xbb' in position 30633: illegal multibyte sequence”,应该是with里面的编码没有指定编码格式报错的。所以指定一下编码格式为utf-8j就行
with open("headers.html","w",encoding="utf-8")as f: