• 学习笔记-Python-爬虫2-SSL、js加密、ajax


    - SSL
    - SSL证书就是指遵守SSL安全套阶层协议的服务器数字证书(SercureSocketLayer)
    - 美国网景公司开发
    - CA(CertifacateAuthprity)是数字证书认证中心,是发放、管理、废除数字证书的授信人的第三方机构
    - 遇到不信任的SSL证书,需要单独处理,案例v17

    - js加密
    - 有的反爬虫策略采用js对需要传输的数据进行加密处理(通常是取md5值)
    - 经过加密、传输的就是密文
    - 加密函数或者过程一定是在浏览器完成,也就是一定把代码(js代码)暴漏给使用者
    - 通过阅读加密算法,就可以模拟出加密过程,从而达到破解
    - 参看案例v18
    - 破解有道查询单词js加密,案例v19
    '''
    案例v18
    破解有道词典
    '''
    from urllib import request, parse
    
    def youdao(key):
        url = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
        data = {
            "i": key,
            "from": "AUTO",
            "to": "AUTO",
            "smartresult": "dict",
            "client": "fanyideskweb",
            "salt": "1543290787192",
            "sign": "afb3deca4f9d25f946b6fa54ad9bdef4",
            "doctype": "json",
            "version": 2.1,
            "keyfrom": "fanyi.web",
            "action": "FY_BY_REALTIME",
            "typoResult": "false",
        }
        data = parse.urlencode(data).encode()
        headers = {
            "Accept": "application/json, text/javascript, */*; q=0.01",
            #"Accept-Encoding": "gzip, deflate",
            "Accept-Language": "zh-CN, zh;q=0.9",
            "Connection": "keep-alive",
            "Content-Length": 203,
            "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
            "Cookie": '_ntes_nnid=e8766845039ffdc1e3db5ea6f4b8d288,1542789184238; OUTFOX_SEARCH_USER_ID_NCOO=1421081855.16469; OUTFOX_SEARCH_USER_ID="-1158124882@10.169.0.82"; JSESSIONID=aaa1FImCZD0bd-wSfttDw; ___rl__test__cookies=1543290787183',
            "Host": "fanyi.youdao.com",
            "Origin": "http://fanyi.youdao.com",
            "Referer": "http://fanyi.youdao.com/",
            "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36",
            "X-Requested-With": "XMLHttpRequest",
        }
        req = request.Request(url=url, headers=headers, data=data)
        rsp = request.urlopen(req)
        html = rsp.read().decode()
        print(html)
    if __name__ == '__main__':
        youdao("girl")
    '''
    案例v19
    在案例v18基础上进行改造
    破解有道词典
    处理js加密代码
    通过js查找以下代码
    var t = "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10))
    salt: t
    sign: n.md5("fanyideskweb" + e + t + "sr_3(QOHT)L2dx#uuGR@r")
    md5一共需要4个参数,第二个参数e是用户输入的要查找的单词
    '''
    from urllib import request, parse
    def getSalt():
        '''
        把计算salt的js代码转写成python代码
        salt = "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10))
        :return:
        '''
        import time, random
        salt = int(time.time()*1000) + random.randint(0,10)
        return salt
    def getMd5(v):
        import hashlib
        md5 = hashlib.md5()
        # update需要一个bytes格式的参数
        md5.update(v.encode('utf-8'))
        md5_hexdigest = md5.hexdigest()
        return md5_hexdigest
    def getSign(key, salt):
        sign = "fanyideskweb" + key + str(salt) + "sr_3(QOHT)L2dx#uuGR@r"
        sign = getMd5(sign)
        return sign
    def youdao(key):
        url = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
        salt = getSalt()
        data = {
            "i": key,
            "from": "AUTO",
            "to": "AUTO",
            "smartresult": "dict",
            "client": "fanyideskweb",
            "salt": str(salt),
            "sign": getSign(key, salt),
            "doctype": "json",
            "version": 2.1,
            "keyfrom": "fanyi.web",
            "action": "FY_BY_REALTIME",
            "typoResult": "false",
        }
        print(data)
        data = parse.urlencode(data).encode()
        headers = {
            "Accept": "application/json, text/javascript, */*; q=0.01",
            #"Accept-Encoding": "gzip, deflate",
            "Accept-Language": "zh-CN, zh;q=0.9",
            "Connection": "keep-alive",
            "Content-Length": len(data),
            "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
            "Cookie": '_ntes_nnid=e8766845039ffdc1e3db5ea6f4b8d288,1542789184238; OUTFOX_SEARCH_USER_ID_NCOO=1421081855.16469; OUTFOX_SEARCH_USER_ID="-1158124882@10.169.0.82"; JSESSIONID=aaa1FImCZD0bd-wSfttDw; ___rl__test__cookies=1543290787183',
            "Host": "fanyi.youdao.com",
            "Origin": "http://fanyi.youdao.com",
            "Referer": "http://fanyi.youdao.com/",
            "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36",
            "X-Requested-With": "XMLHttpRequest",
        }
        req = request.Request(url=url, headers=headers, data=data)
        rsp = request.urlopen(req)
        html = rsp.read().decode()
        print(html)
    if __name__ == '__main__':
        youdao("girl")

    - ajax
    - 异步请求
    - 一定会有url、请求方式、可能有数据
    - 一般使用json格式
    - 案例v20,豆瓣电影

     

  • 相关阅读:
    2018年第九届蓝桥杯国赛总结(JavaB组)
    yzm10的小简介
    论文学习笔记
    Tied Block Convolution:一种共享filter的卷积形态
    AI艺术鉴赏挑战赛
    论文学习笔记
    (转)论文学习笔记
    论文学习笔记
    2020 计蒜之道 预赛 第三场 石子游戏(简单)(暴力DP)
    第六周:生成式对抗网络
  • 原文地址:https://www.cnblogs.com/Cloudloong/p/10021785.html
Copyright © 2020-2023  润新知