爬取有道页面,实现中文翻译成英文:
#_*_ coding: utf-8 _*_ ''' Created on 2018-7-12 @author: sss 功能:爬取有道翻译 ''' import urllib import urllib.request import urllib.parse import urllib.response from pip._vendor.urllib3.filepost import encode_multipart_formdata from pip._vendor.distlib.compat import raw_input import json import random # url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=null" url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule" #http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule要把_o去掉 #user-agent列表,每次请求随机选一个: ua_list = [ "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko", "Mozilla/5.0 (X11; CrOS i686 2268.111.0)like Gecko", "Mozilla/5.0 (Macintosh; U; PPC Mac OS X ", "Mozilla/5.0 (Macintosh; Intel Mac OS " ] user_agnet = random.choice(ua_list) headers = { "Connection" : "keep-alive", "Accept" : "application/json, text/javascript, */*; q=0.01", "X-Requested-With" : "XMLHttpRequest", "User-Agent" : user_agnet, "Content-Type" : "application/x-www-form-urlencoded; charset=UTF-8", "Referer" : "http://fanyi.youdao.com/" } key = raw_input("请输入需要翻译的文字: ") formdata = { "i" :key, "from " :"AUTO", "to" :"AUTO", "smartresult" :"dict", "client" :"fanyideskweb", "salt" :"1531403738742", #这个应该是个时间戳 "sign" :"ffa2b29fe52953208226d97a174bcea7", #应该是根据时间戳+你要翻译的内容加密后生成的验证字段 "doctype" :"json", "version" :"2.1", "keyfrom" :"fanyi.web", "action" :"FY_BY_REALTIME", "typoResult" :"false" } data = urllib.parse.urlencode(formdata ).encode(encoding='UTF8') #这里后面要加encoding='utf-8' request = urllib.request.Request(url, data = data, headers = headers) html = urllib.request.urlopen(request).read() print(html) print('完成!') target = json.loads(html) print("翻译结果:%s"%(target['translateResult'][0][0]['tgt'])) #读出结果 print(headers)