• 汽车之家反爬


    修改转换编码方式进行破解

    只是为练习字体反爬

    #!/usr/bin/env python  
    # encoding: utf-8  
    from requests_html import HTMLSession
    import re
    import os
    from fontTools.ttLib import TTFont
    
    class QiCheZhiJia():
        def __init__(self):
            self.url="https://club.autohome.com.cn/bbs/thread/bb8c36ced93ce182/74203500-1.html"
            self.hanzi=['不','了','呢','更','是','四','小','七','三','多','得','一','着','下','十','少','长','二','六','远','左','地','短','九','五','上','坏','很','右','低','高','矮','八','近','大','好','的','和']
            self.session=HTMLSession()
            self.f_dict={}
            self.uniWordList=[]
            self.utf8WordList=[]
        def create_font(self,font_url):
            # 列出已下载文件
            font_file=font_url.split('/')[-1]
    
            if not os.path.exists("./fonts"):
                os.makedirs("./fonts")
            file_list = os.listdir('./fonts')
    
            if font_file not in file_list:
                # 未下载则下载新库
                print('不在字体库中, 下载:', font_file)
                new_file = self.session.get(font_url).content
                with open('./fonts/' + font_file, 'wb') as f:
                    f.write(new_file)
                font = TTFont('./fonts/' + font_file)
            else:
                font = TTFont('./fonts/' + font_file)
                gly_list = font.getGlyphOrder()[1:]
            gly_list = font.getGlyphOrder()[1:]
            for number,gly in enumerate(gly_list):
                self.f_dict.setdefault(gly.lower().replace('uni','&#x'),self.hanzi[number])
            self.uniWordList = font['cmap'].tables[0].ttFont.getGlyphOrder()
            self.utf8WordList = [uniWord.replace("uni",r"u").encode('utf-8').decode("unicode-escape") for uniWord in self.uniWordList[1:]]
            print(self.utf8WordList)
        def run(self):
            req=self.session.get(self.url)
            source=req.text
            font_url=self.parse(source)
            self.create_font(font_url)
            info=req.html.xpath("//div[@class='tz-paragraph' and string-length(text())>1]//text()")
            print(info)
            elem=""
            for item in info:
                elem += item
            for i in range(len(self.utf8WordList)):
                # 将自定的字体信息,替换成国际标准
                elem = elem.replace(self.utf8WordList[i], self.hanzi[i])
            print(elem)
    
        def parse(self,source):
            plat=re.compile("'),url('(.*?)')")
            font_url="http:"+plat.findall(source)[0]
            return font_url
    if __name__ == '__main__':
        QiCheZhiJia().run()
    
  • 相关阅读:
    Nginx源码编译安装与负载均衡配置实践
    jquery_file_upload in Rails(ajax实现多张图片上传)
    Nokogiri相关网站
    rails中 url和path的区别
    Unique Paths
    Kinect for windows的脸部识别
    .Net给图片添加水印效果
    大型高性能ASP.NET系统架构设计
    如何来提高网站的访问速度,其中包括服务器的配置建议,asp.NET代码的编写及前端页面的优化等等
    URL中#号(井号)的作用
  • 原文地址:https://www.cnblogs.com/c-x-a/p/9288841.html
Copyright © 2020-2023  润新知