• 破解滑动验证码最新版本(97%左右通过率)


    一、简述:

           最近无聊想搞一下极验的滑块验证码破解这块,发现破解js代码耗时又耗力出现版本更新可能以前的所有努力都要推翻重做,不够通用性,最后还是选用selenium + PIL 来实现滑块验证码的破解。
           期间也翻阅过很多文章,大多都已经失效,并且缺口位置查找和模拟滑动轨迹成功率很低,很难应用到实际开发项目中,本次是针对最新版本的极验滑块验证码进行破解。

    二、项目环境

    大致需要用到以下模块各位看观请提前准备好:
    python3.6、selenium、numpy、PIL、chromedriver

    三、分析步骤以及代码编写

    1.首先分析目标网站(本次主要以geetest官网滑块demo为参考)   

      网站大致长这个样子,首先f12打开 开发者工具选择Elements查看节点,发现最新版本的滑块图片是使用画布来进行呈现的,期间查阅大量文档,使用如下代码获得画布中的图片数据,获  取到的图片是base64进行编码的

    1 document.getElementsByClassName("geetest_canvas_bg geetest_absolute")[0].toDataURL("image/png")

    2.通过分析发现这两个画布放的是所需要的背景图和缺口图(其实一眼就看出来的)

    3. 接下来就是代码的编写了

      3.1 首先是获得背景图和缺口图的数据

     1    def get_images(self):
     2         """
     3         获取验证码图片
     4         :return: 图片的location信息
     5         """
     6         time.sleep(1)
     7         self.browser.web_driver_wait_ruishu(10, "class", 'geetest_canvas_slice')
     8         fullgb = self.browser.execute_js('document.getElementsByClassName("geetest_canvas_bg geetest_'
     9                                              'absolute")[0].toDataURL("image/png")')["value"]
    10 
    11         bg = self.browser.execute_js('document.getElementsByClassName("geetest_canvas_fullbg geetest_fade'
    12                                          ' geetest_absolute")[0].toDataURL("image/png")')["value"]
    13         return bg, fullgb

      3.2 对数据进行解码操作并保存图片

    1     def get_decode_image(self, filename, location_list):
    2         """
    3         解码base64数据
    4         """
    5         _, img = location_list.split(",")
    6         img = base64.decodebytes(img.encode())
    7         new_im: image.Image = image.open(BytesIO(img))
    8 
    9         return new_im

      3.3 接下来就是计算缺口位置了(这里使用的PIL中计算两张图片的差值获得缺口位置)

        def compute_gap(self, img1, img2):
            """计算缺口偏移 这种方式成功率很高"""
            # 将图片修改为RGB模式
            img1 = img1.convert("RGB")
            img2 = img2.convert("RGB")
    
            # 计算差值
            diff = ImageChops.difference(img1, img2)
    
            # 灰度图
            diff = diff.convert("L")
    
            # 二值化
            diff = diff.point(self.table, '1')
    
            left = 43
            # 这里做了优化为减少误差 纵坐标的像素点大于5时才认为是找到
            # 防止缺口有凸起时有误差
            for w in range(left, diff.size[0]):
                lis = []
                for h in range(diff.size[1]):
                    if diff.load()[w, h] == 1:
                        lis.append(w)
                    if len(lis) > 5:
                        return w

      3.4 当滑块的缺口位置找到以后就需要生成滑动轨迹(其中加20是保证在滑动时先超过缺口位置然后在慢慢还原到正确位置)

     1 def ease_out_quart(self, x):
     2         return 1 - pow(1 - x, 4)
     3 
     4     def get_tracks_2(self, distance, seconds, ease_func):
     5         """
     6         根据轨迹离散分布生成的数学 生成  # 参考文档  https://www.jianshu.com/p/3f968958af5a
     7         成功率很高 90% 往上
     8         :param distance: 缺口位置
     9         :param seconds:  时间
    10         :param ease_func: 生成函数
    11         :return: 轨迹数组
    12         """
    13         distance += 20
    14         tracks = [0]
    15         offsets = [0]
    16         for t in np.arange(0.0, seconds, 0.1):
    17             ease = ease_func
    18             offset = round(ease(t / seconds) * distance)
    19             tracks.append(offset - offsets[-1])
    20             offsets.append(offset)
    21         tracks.extend([-3, -2, -3, -2, -2, -2, -2, -1, -0, -1, -1, -1])
    22         return tracks

      3.5 最后也就是滑动滑块到缺口位置

     1    def move_to_gap(self, track):
     2        """移动滑块到缺口处"""
     3        slider = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'geetest_slider_button')))
     4        ActionChains(self.browser).click_and_hold(slider).perform()
     5 
     6        while track:
     7            x = track.pop(0)
     8            ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
     9            time.sleep(0.02)
    10 
    11        ActionChains(self.browser).release().perform()

    贴出完整代码(注意selenium有些方法会被极验检测到所以使用js命令直接运行的方式来达到效果)

    crack.py

      1 # -*-coding:utf-8 -*-
      2 import base64
      3 import time
      4 import functools
      5 import numpy as np
      6 
      7 from tools.selenium_spider import SeleniumSpider
      8 
      9 from selenium.webdriver import ActionChains
     10 from selenium.webdriver.support import expected_conditions as EC
     11 from selenium.webdriver.support.ui import WebDriverWait
     12 from selenium.webdriver.common.by import By
     13 import PIL.Image as image
     14 from PIL import ImageChops, PngImagePlugin
     15 from io import BytesIO
     16 
     17 
     18 class Crack(object):
     19     """
     20     解决三代极验滑块验证码
     21     """
     22     def __init__(self):
     23         self.url = 'https://www.geetest.com'
     24         self.browser = SeleniumSpider(path="/personalwork/personal_tools_project/adbtools/chromedriver", max_window=True)
     25         self.wait = WebDriverWait(self.browser, 100)
     26         self.BORDER = 8
     27         self.table = []
     28 
     29         for i in range(256):
     30             if i < 50:
     31                 self.table.append(0)
     32             else:
     33                 self.table.append(1)
     34 
     35     def open(self):
     36         """
     37         打开浏览器,并输入查询内容
     38         """
     39         self.browser.get(self.url)
     40         self.browser.get(self.url + "/Sensebot/")
     41         self.browser.web_driver_wait_ruishu(10, "class", 'experience--area')
     42         time.sleep(1)
     43         self.browser.execute_js('document.getElementsByClassName("experience--area")[0].getElementsByTagName("div")'
     44                                 '[2].getElementsByTagName("ul")[0].getElementsByTagName("li")[1].click()')
     45 
     46         time.sleep(1)
     47         self.browser.web_driver_wait_ruishu(10, "class", 'geetest_radar_tip')
     48 
     49         self.browser.execute_js('document.getElementsByClassName("geetest_radar_tip")[0].click()')
     50 
     51     def check_status(self):
     52         """
     53         检测是否需要滑块验证码
     54         :return:
     55         """
     56         self.browser.web_driver_wait_ruishu(10, "class", 'geetest_success_radar_tip_content')
     57         try:
     58             time.sleep(0.5)
     59             message = self.browser.find_element_by_class_name("geetest_success_radar_tip_content").text
     60             if message == "验证成功":
     61                 return False
     62             else:
     63                 return True
     64         except Exception as e:
     65             return True
     66 
     67     def get_images(self):
     68         """
     69         获取验证码图片
     70         :return: 图片的location信息
     71         """
     72         time.sleep(1)
     73         self.browser.web_driver_wait_ruishu(10, "class", 'geetest_canvas_slice')
     74         fullgb = self.browser.execute_js('document.getElementsByClassName("geetest_canvas_bg geetest_'
     75                                              'absolute")[0].toDataURL("image/png")')["value"]
     76 
     77         bg = self.browser.execute_js('document.getElementsByClassName("geetest_canvas_fullbg geetest_fade'
     78                                          ' geetest_absolute")[0].toDataURL("image/png")')["value"]
     79         return bg, fullgb
     80 
     81     def get_decode_image(self, filename, location_list):
     82         """
     83         解码base64数据
     84         """
     85         _, img = location_list.split(",")
     86         img = base64.decodebytes(img.encode())
     87         new_im: PngImagePlugin.PngImageFile = image.open(BytesIO(img))
     88         # new_im.convert("RGB")
     89         # new_im.save(filename)
     90 
     91         return new_im
     92 
     93     def compute_gap(self, img1, img2):
     94         """计算缺口偏移 这种方式成功率很高"""
     95         # 将图片修改为RGB模式
     96         img1 = img1.convert("RGB")
     97         img2 = img2.convert("RGB")
     98 
     99         # 计算差值
    100         diff = ImageChops.difference(img1, img2)
    101 
    102         # 灰度图
    103         diff = diff.convert("L")
    104 
    105         # 二值化
    106         diff = diff.point(self.table, '1')
    107 
    108         left = 43
    109 
    110         for w in range(left, diff.size[0]):
    111             lis = []
    112             for h in range(diff.size[1]):
    113                 if diff.load()[w, h] == 1:
    114                     lis.append(w)
    115                 if len(lis) > 5:
    116                     return w
    117 
    118     def ease_out_quad(self, x):
    119         return 1 - (1 - x) * (1 - x)
    120 
    121     def ease_out_quart(self, x):
    122         return 1 - pow(1 - x, 4)
    123 
    124     def ease_out_expo(self, x):
    125         if x == 1:
    126             return 1
    127         else:
    128             return 1 - pow(2, -10 * x)
    129 
    130     def get_tracks_2(self, distance, seconds, ease_func):
    131         """
    132         根据轨迹离散分布生成的数学 生成  # 参考文档  https://www.jianshu.com/p/3f968958af5a
    133         成功率很高 90% 往上
    134         :param distance: 缺口位置
    135         :param seconds:  时间
    136         :param ease_func: 生成函数
    137         :return: 轨迹数组
    138         """
    139         distance += 20
    140         tracks = [0]
    141         offsets = [0]
    142         for t in np.arange(0.0, seconds, 0.1):
    143             ease = ease_func
    144             offset = round(ease(t / seconds) * distance)
    145             tracks.append(offset - offsets[-1])
    146             offsets.append(offset)
    147         tracks.extend([-3, -2, -3, -2, -2, -2, -2, -1, -0, -1, -1, -1])
    148         return tracks
    149 
    150     def get_track(self, distance):
    151         """
    152         根据物理学生成方式   极验不能用 成功率基本为0
    153         :param distance: 偏移量
    154         :return: 移动轨迹
    155         """
    156         distance += 20
    157         # 移动轨迹
    158         track = []
    159         # 当前位移
    160         current = 0
    161         # 减速阈值
    162         mid = distance * 3 / 5
    163         # 计算间隔
    164         t = 0.5
    165         # 初速度
    166         v = 0
    167 
    168         while current < distance:
    169             if current < mid:
    170                 # 加速度为正2
    171                 a = 2
    172             else:
    173                 # 加速度为负3
    174                 a = -3
    175             # 初速度v0
    176             v0 = v
    177             # 当前速度v = v0 + at
    178             v = v0 + a * t
    179             # 移动距离x = v0t + 1/2 * a * t^2
    180             move = v0 * t + 0.5 * a * (t ** 2)
    181             # 当前位移
    182             current += move
    183             # 加入轨迹
    184             track.append(round(move))
    185         track.extend([-3, -3, -2, -2, -2, -2, -2, -1, -1, -1, -1])
    186         return track
    187 
    188     def move_to_gap(self, track):
    189         """移动滑块到缺口处"""
    190         slider = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'geetest_slider_button')))
    191         ActionChains(self.browser).click_and_hold(slider).perform()
    192 
    193         while track:
    194             x = track.pop(0)
    195             ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
    196             time.sleep(0.02)
    197 
    198         ActionChains(self.browser).release().perform()
    199 
    200     def crack(self, n):
    201         # 打开浏览器
    202         self.open()
    203 
    204         if self.check_status():
    205             # 保存的图片名字
    206             bg_filename = 'bg.png'
    207             fullbg_filename = 'fullbg.png'
    208 
    209             # 获取图片
    210             bg_location_base64, fullbg_location_64 = self.get_images()
    211 
    212             # 根据位置对图片进行合并还原
    213             bg_img = self.get_decode_image(bg_filename, bg_location_base64)
    214             fullbg_img = self.get_decode_image(fullbg_filename, fullbg_location_64)
    215             # 获取缺口位置
    216             gap = self.compute_gap(fullbg_img, bg_img)
    217             print('缺口位置', gap)
    218 
    219             track = self.get_tracks_2(gap - self.BORDER, 1, self.ease_out_quart)
    220             print("滑动轨迹", track)
    221             print("滑动距离", functools.reduce(lambda x, y: x+y, track))
    222             self.move_to_gap(track)
    223 
    224             time.sleep(1)
    225             if not self.check_status():
    226                 print('验证成功')
    227                 return True
    228             else:
    229                 print('验证失败')
    230                 # 保存图片方便调试
    231                 bg_img.save(f"bg_img{n}.png")
    232                 fullbg_img.save(f"fullbg{n}.png")
    233                 return False
    234 
    235         else:
    236             print("验证成功")
    237             return True
    238 
    239 
    240 if __name__ == '__main__':
    241     print('开始验证')
    242     crack = Crack()
    243     # crack.crack(0)
    244     count = 0
    245     for i in range(200):
    246         if crack.crack(i):
    247             count += 1
    248     print(f"成功率:{count / 200 * 100}%")

    selenium_spider.py

      1 #!/usr/local/bin/python
      2 # coding:utf-8
      3 
      4 """
      5 @author: Liubing
      6 @software: PyCharm
      7 @file: selenium_spider.py
      8 @time: 2019-03-11 13:46
      9 @describe: 基于selenium版本进一步封装 只针对于谷歌浏览器 其他浏览器需要自己封装
     10 """
     11 import json
     12 import time as time_
     13 
     14 from lxml import etree
     15 from selenium.webdriver.support.wait import WebDriverWait
     16 from selenium.webdriver.support import expected_conditions as EC
     17 from selenium.webdriver.chrome.webdriver import WebDriver
     18 from selenium.webdriver.chrome.options import Options
     19 
     20 
     21 class SeleniumSpider(WebDriver):
     22    """基于selenium进一步封装"""
     23 
     24    def __init__(self, path, params=None, max_window=False, *args, **kwargs):
     25        """
     26        初始化
     27        :param path: str selenium驱动路径
     28        :param params: list driver 附加参数
     29        :param args: tuple
     30        :param kwargs:
     31        """
     32        self.__path = path
     33        self.__params = params
     34        # 初始化
     35        self.__options = Options()
     36        self.__options.add_argument('--dns-prefetch-disable')
     37        self.__options.add_argument('--disable-gpu')  # 谷歌文档提到需要加上这个属性来规避bug
     38        self.__options.add_argument('disable-infobars')  # 隐藏"Chrome正在受到自动软件的控制"
     39        # self.__options.add_argument('--headless')
     40        self.is_maximize_window = max_window  # 是否开启全屏模式
     41 
     42        # 过检测 具体参考文档: https://juejin.im/post/5c62b6d5f265da2dab17ae3c
     43        self.__options.add_experimental_option('excludeSwitches', ['enable-automation'])
     44 
     45        if params:
     46            for i in params:
     47                self.__options.add_argument(i)
     48        super(SeleniumSpider, self).__init__(executable_path=self.__path, options=self.__options, *args, **kwargs) 56        if self.is_maximize_window:
     57            self.maximize_window()
     58 
     59        # 规则部分
     60        self.ID = "id"
     61        self.XPATH = "xpath"
     62        self.LINK_TEXT = "link text"
     63        self.PARTIAL_LINK_TEXT = "partial link text"
     64        self.NAME = "name"
     65        self.TAG_NAME = "tag name"
     66        self.CLASS_NAME = "class name"
     67        self.CSS_SELECTOR = "css selector"
     68 
     69    def cookies_dict_to_selenium_cookies(self, cookies: dict, domain):
     70        """
     71        requests cookies 转换到 selenium cookies
     72        :param cookies: requests cookies
     73        :return: selenium 支持的cookies
     74        """
     75        temp_cookies = []
     76        for key, value in cookies.items():
     77            # requests 有bug 域区分的不是很清楚 手动区分 只限全国电信接口能用
     78            temp_cookies.append({"name": key, "value": value, "domain": domain})
     79        return temp_cookies
     80 
     81    def get(self, url: str, cookies=None, domain=None):
     82        """
     83        请求数据
     84        :param url: 待请求的url
     85        :param cookies: 添加cookies cookies 格式 [{"name": key, "value": value, "domain": domain},...]
     86        :param domain: cookie作用域
     87        :return:
     88        """
     89        super().get(url)
     90        if cookies:
     91            # 执行
     92            if type(cookies) == list:
     93                for cookie in cookies:
     94                    if "name" in cookie.keys() and "value" in cookie.keys() and "domain" in cookie.keys():
     95                        self.add_cookie(cookie)
     96                    else:
     97                        raise TypeError('cookies错误请传入正确格式[{"name": key, "value": value, "domain": domain},...'
     98                                        '] 或者{key: vale,...}')
     99            elif type(cookies) == dict:
    100                if domain:
    101                    for i in self.cookies_dict_to_selenium_cookies(cookies, domain):
    102                        self.add_cookie(i)
    103                else:
    104                    raise ValueError("{key:vale}格式必须传入doamin参数")
    105            # 刷新页面
    106            self.refresh()
    107 
    108    def web_driver_wait(self, time: int, rule: str, num: str):
    109        """
    110        页面等待  瑞数产品弃用这种方法 不然会400错误
    111        :param time: 等待时间
    112        :param rule: 规则 [id, xpath, link text, partial link text, name, tag name, class name, css selector]
    113        :param num: 根据元素id
    114        :return:
    115        """
    116        WebDriverWait(self, time, 0.5).until(
    117            EC.presence_of_element_located((rule, num)))
    118 
    119    def web_driver_wait_ruishu(self, time: int, rule: str, num: str):
    120        """
    121        笨方法 遍历页面匹配
    122        :param time: 等待时间
    123        :param rule: 规则 [id, class]
    124        :param num: 根据元素id
    125        :return:
    126        """
    127        while time:
    128            response = self.execute_js("document.documentElement.outerHTML")
    129            try:
    130                html = etree.HTML(text=response["value"])
    131                inp = html.xpath("//*[contains(@%s, '%s')]" % (rule, num))
    132                if inp:
    133                    break
    134            except Exception as e:
    135                continue
    136            time_.sleep(1)
    137            time -= 1
    138        if not time:
    139            raise Exception("未找到 %s" % num)
    140 
    141    def execute_chrome_protocol_js(self, protocol, params: dict):
    142        """
    143        Chrome DevTools 协议操作 具体协议请参考 https://chromedevtools.github.io/devtools-protocol/
    144        :param protocol: str 协议名称
    145        :param params: dict 参数
    146        :return:
    147        """
    148        resource = "/session/%s/chromium/send_command_and_get_result" % self.session_id
    149        command_executor = self.command_executor
    150        url = command_executor._url + resource
    151        body = json.dumps({'cmd': protocol, 'params': params})
    152        response = command_executor._request('POST', url, body)
    153        if response['status']:
    154            return response
    155        return response["value"]
    156 
    157    def execute_js(self, js):
    158        """
    159        执行js  过瑞数检测
    160        :param js: str 待执行的js
    161        :return:  {"type": "xxx", value: "xxx"}
    162        """
    163 response = self.executor_chrome_protocol_js('Runtime.evaluate', js) 164 if response['status']: 165 return response 166 return response["value"]["result"]

    问题:对于windows下滑动卡顿导致不成功问题解决办法

    修改源码文件 site-packagesseleniumwebdrivercommonactionspointer_input.py

    将默认250改为30或者其他值就可以(不能太快)

    最后以极验官网demo为例进行了200次的测试,准确率高达了99%。

     

  • 相关阅读:
    添加组合索引时,做相等运算字段应该放在最前面
    常用位运算
    redis php扩展简单使用
    mysql优化之简单概念
    mysql优化之sql语句优化
    简单画图 gd库函数
    win下 安装mongodb
    伪静态之实际应用
    tomcat win简单配置
    docker
  • 原文地址:https://www.cnblogs.com/damon-/p/11044402.html
Copyright © 2020-2023  润新知