• 有道翻译和百度翻译在线爬取


    import requests
    import time
    import random
    from hashlib import md5
    
    
    def get_salt_sign_ts(word):
        ts = str(int(time.time() * 1000))
        salt = ts + str(random.randint(0, 9))
        string = "fanyideskweb" + word + salt + "n%A-rKaT5fb[Gy?;N5@Tj"
        s = md5()
        s.update(string.encode())
        sign = s.hexdigest()
    
        return salt, ts, sign
    
    
    def attack_yd(word):
        salt, ts, sign = get_salt_sign_ts(word)
        url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'
        headers = {
            'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
            'Accept-Encoding': 'gzip, deflate',
            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
            'Connection': 'keep-alive',
            'Cookie': 'OUTFOX_SEARCH_USER_ID=1966607151@10.169.0.83; OUTFOX_SEARCH_USER_ID_NCOO=250069037.7227244; JSESSIONID=aaakkyCArmplF4qJhJHWw; DICT_UGC=be3af0da19b5c5e6aa4e17bd8d90b28a|; JSESSIONID=abc9CfcjVp7bS6v1XUIWw; ___rl__test__cookies=1563952124524',
            'Host': 'fanyi.youdao.com',
            'Referer': 'http://fanyi.youdao.com/',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36',
        }
        data = {
            'i': word,
            'from': 'AUTO',
            'to': 'AUTO',
            'smartresult': 'dict',
            'client': 'fanyideskweb',
            'salt': salt,
            'sign': sign,
            'ts': ts,
            # 'bv': '6cf12640614e68ba598ee58ceccb0605',
            # 'doctype': 'json',
            # 'version': '2.1',
            'keyfrom': 'fanyi.web',
            'action': 'FY_BY_REALTlME',
        }
        html_json = requests.post(url, data=data, headers=headers).json()
        return html_json['translateResult'][0][0]
    
    
    if __name__ == '__main__':
        word = input('请输入要翻译的单词:')
        result = attack_yd(word)
        print(result)
    
    请输入要翻译的单词:你好
    {'tgt': 'hello', 'src': '你好'}
    import requests
    import re
    import execjs
    
    
    class BaiduTranslateSpider:
        def __init__(self):
            self.get_url = 'https://fanyi.baidu.com/?aldtype=16047'
            self.headers = {
                'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
                'accept-encoding': 'gzip, deflate, br',
                'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
                'cookie': 'BAIDUID=68E904D92F2C8ACC62C7542C397FBD0B:FG=1; PSTM=1561529234; BIDUPSID=2633580F87BCDFE102C31514DA3EACA6; BDUSS=HdWTDhTajh0ZTd3QmFIbzZjeDdhTTE5Wkd0R1FGcFFSaDFJVVRRSHN3ZjNnbGRkSVFBQUFBJCQAAAAAAAAAAAEAAADE7I5C06LTwrDUxvjLq9fTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPf1L1339S9dM; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; locale=zh; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; delPer=0; PSINO=3; H_PS_PSSID=29546_1466_21083_29578_29519_28518_29099_29568_28835_29221_29460_22157; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1563869496,1563869600,1563953532,1564019873; to_lang_often=%5B%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%2C%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%5D; from_lang_often=%5B%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%2C%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%5D; yjs_js_security_passport=47e142eebb082b8c92ef506657211ad704b97215_1564026801_js; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1564026808',
                'upgrade-insecure-requests': '1',
                'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36',
            }
    
        # 获取token
        def get_token(self):
            html = requests.get(url=self.get_url, headers=self.headers).text
            # 用正则解析
            pattern = re.compile(r"token: '(.*?)'", re.S)
            token = pattern.findall(html)
            print(token)
    
        # 获取sign
        def get_sign(self, word):
            with open('./node.js', 'r') as f:
                js_data = f.read()
            execjs_obj = execjs.compile(js_data)
            sign = execjs_obj.eval('e("{}")'.format(word))
    
            return sign
    
        # 获取翻译结果
        def get_result(self, word, fro, to):
            token = self.get_token()
            sign = self.get_sign(word)
            # 把formdata定义成字典
            formdata = {
                'from': fro,
                'to': to,
                'query': word,
                'transtype': 'realtime',
                'simple_means_flag': '3',
                'sign': sign,
                'token': token
            }
            html_json = requests.post(url='https://fanyi.baidu.com/v2transapi', data=formdata, headers=self.headers).json()
            return html_json['trans_result']['data'][0]['dst']
    
    
    if __name__ == '__main__':
        spider = BaiduTranslateSpider()
        num = input('1.翻译英语,2.翻译汉语,请选择(1/2):')
        if num == '1':
            fro = 'zh'
            to = 'en'
        else:
            fro = 'en'
            to = 'zh'
        word = input('请输入要翻译的单词:')
        result = spider.get_result(word, 'zh', 'en')
        print(result)
  • 相关阅读:
    Firefly 介绍
    9秒社团跨平台开发引擎CrossApp宣布正式开源
    页面置换算法
    Selenium
    C++虚函数、虚继承
    链表题目
    二叉树分类
    求连续子数组的最大和
    拓扑排序
    二叉树题目
  • 原文地址:https://www.cnblogs.com/yuxiangyang/p/11242753.html
Copyright © 2020-2023  润新知