• 缺口验证码


    # -*-coding:utf-8 -*-
    from selenium import webdriver
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.ui import WebDriverWait
    from urllib.request import urlretrieve
    import time, random
    from selenium.webdriver.common.action_chains import ActionChains
    from selenium.webdriver.chrome.options import Options
    from selenium.common.exceptions import TimeoutException
    from PIL import ImageChops
    import PIL.Image as image
    import cv2
    from scrapy.http import HtmlResponse
    
    
    # options = Options()
    # options.add_argument('--headless')
    # options.add_argument('--no-sandbox')
    # options.add_argument('--disable-dev-shm-usage')
    # driver = webdriver.Chrome(executable_path="d:CaiPanChromechromedriver.exe", chrome_options=options)
    
    class Crack(object):
        def __init__(self, url):
    
            self.options = Options()
            # self.options.add_argument('--headless')
            # self.options.add_argument('--disable-dev-shm-usage')
            self.options.add_argument('--disable-gpu')
            self.options.add_argument("--no-sandbox")
    
            # self.options.add_experimental_option('excludeSwitches', ['enable-automation'])
            # self.options.add_experimental_option('debuggerAddress', '127.0.0.1:9222')
            self.options.add_argument('user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36"')
            self.options.add_argument('upgrade-insecure-requests="1"')
            self.options.add_argument('sec-fetch-user="?1"')
            self.options.add_argument('sec-fetch-site="none"')
            self.options.add_argument('sec-fetch-mode="navigate"')
            self.options.add_argument('pragma="no-cache"')
    
            # self.options.add_argument('cookie="xhsTrackerId=05e3b581-4bbf-4f7b-cf0c-d0aa80b4151a; ra-user-id-ares=5bfe244a9df0a90001b38b2c; Hm_lvt_d0ae755ac51e3c5ff9b1596b0c09c826=1565592524,1565593646,1565593678,1565594279; Hm_lpvt_d0ae755ac51e3c5ff9b1596b0c09c826=1565595993; Hm_lvt_b344979f0455853bf22b3ef05fa7b4ac=1566370861; Hm_lpvt_b344979f0455853bf22b3ef05fa7b4ac=1566370861; xhs_spses.6983=*; solar.beaker.session.id=6bc441db11c89bbade2ee08edcd106efe516723egAJ9cQAoWAwAAABsb2dpbkFjY291bnRxAVgLAAAAMTMxMjIyNTIzMjRxAlgEAAAAcm9sZXEDWAUAAABicmFuZHEEWAoAAABiQWNjb3VudE5vcQVYCgAAADk5NzMyNTg2NTFxBlgJAAAAbG9naW5UeXBlcQdLAVgIAAAAbmlja05hbWVxCFgOAAAAQmVsbGEncyBHYXJkZW5xCVgIAAAAc2hvcE5hbWVxCk5YCQAAAHJlc291cmNlc3ELXXEMKFgYAAAANWMzNTUzZGUxZDk3NzE1OGEzNjc0OTM0cQ1lWAYAAABhdmF0YXJxDlhaAAAAaHR0cHM6Ly9pbWcueGlhb2hvbmdzaHUuY29tL2F2YXRhci81YzM0NTU1NGMxMmZkNDAwMDExMjE3NzYuanBnQDEyMHdfMTIwaF85MnFfMWVfMWNfMXguanBncQ9YDgAAAF9hY2Nlc3NlZF90aW1lcRBHQddXnAhVcKRYBgAAAHVzZXJJZHERWBgAAAA1YmZlMjQ0YTlkZjBhOTAwMDFiMzhiMmNxElgIAAAAYXR0ckxpc3RxE05YDAAAAGJBY2NvdW50VHlwZXEUWAcAAABQUklNQVJZcRVYCwAAAGRlYWN0aXZhdGVkcRaJWAgAAABzZWxsZXJJZHEXWBgAAAA1YzM1NTNkZTFkOTc3MTU4YTM2NzQ5MzRxGFgOAAAAX2NyZWF0aW9uX3RpbWVxGUdB11ecCFVwpFgLAAAAcGVybWlzc2lvbnNxGl1xGyhlWAsAAABzZWxsZXJSb2xlc3EcTlgDAAAAX2lkcR1YIAAAADUxZDNhZWM0NzRmNDQ2MmRhNTM2NDFiNzU5Y2QzYWM2cR5YCgAAAHNlbGxlclR5cGVxH0sEWAUAAABlbWFpbHEgWCEAAAA1YmZlMjQ0YTlkZjBhOTAwMDFiMzhiMmNAeGhzLmZha2VxIXUu; xhs_spid.6983=fe43536f085a4a3f.1565594090.21.1566470205.1566270695.ba61cacc-e97c-41c6-b72f-c6ca1a6b4d9c"')
            self.options.add_argument('cache-control="no-cache"')
            self.options.add_argument('accept-language="en-US,en;q=0.9"')
            self.options.add_argument('accept-encoding="gzip, deflate, br"')
            self.options.add_argument(':scheme="https"')
            self.options.add_argument(':method="GET"')
            self.options.add_argument(':authority="www.xiaohongshu.com"')
            self.options.add_argument('accept="text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3"')
    
            self.url = url
            # self.url = 'https://www.xiaohongshu.com/web-login/captcha?redirectPath=https%3A%2F%2Finfluencer.xiaohongshu.com%2Fsolar%2Fadvertiser%2Fpatterns%2Fkol'
            self.browser = webdriver.Chrome('D:CaiPanChromechromedriver.exe', chrome_options=self.options)
            self.wait = WebDriverWait(self.browser, 100)
            # self.keyword = keyword
            self.BORDER = 6
    
        def open(self):
            """
            打开浏览器,并输入查询内容
            """
            cookie1 = {'name': 'xhsTrackerId', 'value': '05e3b581-4bbf-4f7b-cf0c-d0aa80b4151a'}
            cookie2 = {'name': 'ra-user-id-ares', 'value': '5bfe244a9df0a90001b38b2c'}
            cookie3 = {'name': 'Hm_lvt_d0ae755ac51e3c5ff9b1596b0c09c826', 'value': '1565592524,1565593646,1565593678,1565594279'}
            cookie4 = {'name': 'Hm_lpvt_d0ae755ac51e3c5ff9b1596b0c09c826', 'value': '1565595993'}
            cookie5 = {'name': 'Hm_lvt_b344979f0455853bf22b3ef05fa7b4ac', 'value': '1566370861'}
            cookie6 = {'name': 'Hm_lpvt_b344979f0455853bf22b3ef05fa7b4ac', 'value': '1566370861'}
            cookie7 = {'name': 'xhs_spses.6983', 'value': '*'}
            cookie8 = {'name': 'solar.beaker.session.id', 'value': '6bc441db11c89bbade2ee08edcd106efe516723egAJ9cQAoWAwAAABsb2dpbkFjY291bnRxAVgLAAAAMTMxMjIyNTIzMjRxAlgEAAAAcm9sZXEDWAUAAABicmFuZHEEWAoAAABiQWNjb3VudE5vcQVYCgAAADk5NzMyNTg2NTFxBlgJAAAAbG9naW5UeXBlcQdLAVgIAAAAbmlja05hbWVxCFgOAAAAQmVsbGEncyBHYXJkZW5xCVgIAAAAc2hvcE5hbWVxCk5YCQAAAHJlc291cmNlc3ELXXEMKFgYAAAANWMzNTUzZGUxZDk3NzE1OGEzNjc0OTM0cQ1lWAYAAABhdmF0YXJxDlhaAAAAaHR0cHM6Ly9pbWcueGlhb2hvbmdzaHUuY29tL2F2YXRhci81YzM0NTU1NGMxMmZkNDAwMDExMjE3NzYuanBnQDEyMHdfMTIwaF85MnFfMWVfMWNfMXguanBncQ9YDgAAAF9hY2Nlc3NlZF90aW1lcRBHQddXnAhVcKRYBgAAAHVzZXJJZHERWBgAAAA1YmZlMjQ0YTlkZjBhOTAwMDFiMzhiMmNxElgIAAAAYXR0ckxpc3RxE05YDAAAAGJBY2NvdW50VHlwZXEUWAcAAABQUklNQVJZcRVYCwAAAGRlYWN0aXZhdGVkcRaJWAgAAABzZWxsZXJJZHEXWBgAAAA1YzM1NTNkZTFkOTc3MTU4YTM2NzQ5MzRxGFgOAAAAX2NyZWF0aW9uX3RpbWVxGUdB11ecCFVwpFgLAAAAcGVybWlzc2lvbnNxGl1xGyhlWAsAAABzZWxsZXJSb2xlc3EcTlgDAAAAX2lkcR1YIAAAADUxZDNhZWM0NzRmNDQ2MmRhNTM2NDFiNzU5Y2QzYWM2cR5YCgAAAHNlbGxlclR5cGVxH0sEWAUAAABlbWFpbHEgWCEAAAA1YmZlMjQ0YTlkZjBhOTAwMDFiMzhiMmNAeGhzLmZha2VxIXUu'}
            cookie9 = {'name': 'xhs_spid.6983', 'value': 'fe43536f085a4a3f.1565594090.21.1566470205.1566270695.ba61cacc-e97c-41c6-b72f-c6ca1a6b4d9c'}
    
            self.browser.get(self.url)
            # self.browser.add_cookie(cookie1)
            # self.browser.add_cookie(cookie2)
            # self.browser.add_cookie(cookie3)
            # self.browser.add_cookie(cookie4)
            # self.browser.add_cookie(cookie5)
            # self.browser.add_cookie(cookie6)
            # self.browser.add_cookie(cookie7)
            # self.browser.add_cookie(cookie8)
            # self.browser.add_cookie(cookie9)
            # self.browser.get(self.url)
            self.browser.implicitly_wait(30)
    
        def get_size(self):
            screenSize = self.browser.get_window_size()  # 返回个字典
    
            print(f"当前屏幕尺寸为{screenSize}")  # 当前屏幕尺寸为{'width': 1080, 'height': 2280}
    
            # width = screenSize['width']
            # height = screenSize['height']
    
        def get_images(self):
            """
            获取验证码图片
            :return: 图片的location信息
            """
            # bg = []
            # gb = []
            bg_filename = 'bg.jpg'
            fg_filename = 'fg.jpg'
            bg_location_list = []
            fg_location_list = []
            while True:
                try:
                    fg = self.browser.find_element_by_class_name("shumei_captcha_loaded_img_fg")
                    if fg:
                        time.sleep(2)
                        print(fg.location)
                        fg_location_list.append(fg.location)
                        print(fg.size)
                        start_x = fg.location["x"] + int(fg.size['width']) * 0.2
                        start_y = fg.location["y"] + int(fg.size['height']) * 0.5
                        end_x = fg.location['x'] + int(fg.size['width']) * 0.8
                        end_y = fg.location['y'] + int(fg.size['height']) * 0.5
                        print(start_x, start_y, end_x, end_y)
                        fg_url = fg.get_attribute("src")
                        if fg_url:
                            print(fg_url)
                            urlretrieve(url=fg_url, filename=fg_filename)
                            print('缺口图片下载完成')
                            break
                except TimeoutException:
                    self.get_images()
    
            while True:
                try:
                    bgfull = self.browser.find_element_by_class_name("shumei_captcha_loaded_img_bg")
                    time.sleep(2)
                    if bgfull:
                        print(bgfull.location)
                        bg_location_list.append(bgfull.location)
                        print(bgfull.size)
                        start_xx = bgfull.location["x"] + int(bgfull.size['width']) * 0.2
                        start_yy = bgfull.location["y"] + int(bgfull.size['height']) * 0.5
                        end_xx = bgfull.location['x'] + int(bgfull.size['width']) * 0.8
                        end_yy = bgfull.location['y'] + int(bgfull.size['height']) * 0.5
                        print(start_xx, start_yy, end_xx, end_yy)
                        bg_url = bgfull.get_attribute("src")
                        if bg_url:
                            print(bg_url)
                            urlretrieve(url=bg_url, filename=bg_filename)
                            print('背景图片下载完成')
                            break
                except TimeoutException:
                    self.get_images()
            distance = end_xx - end_x
            print(distance)
            return distance
            # return bg_location_list, fg_location_list
    
        def get_gap(self, img1, img2):
            """
            获取缺口偏移量
            :param img1: 不带缺口图片
            :param img2: 带缺口图片
            :return:
            """
            left = 15
            for i in range(left, img1.size[0]):
                for j in range(img1.size[1]):
                    if not self.is_pixel_equal(img1, img2, i, j):
                        left = i
                        return left
            return left
    
        def is_pixel_equal(self, img1, img2, x, y):
            """
            判断两个像素是否相同
            :param image1: 图片1
            :param image2: 图片2
            :param x: 位置x
            :param y: 位置y
            :return: 像素是否相同
            """
            # 取两个图片的像素点
            pix1 = img1.load()[x, y]
            pix2 = img2.load()[x, y]
            threshold = 60
            if (abs(pix1[0] - pix2[0] < threshold) and abs(pix1[1] - pix2[1] < threshold) and abs(
                    pix1[2] - pix2[2] < threshold)):
                return True
            else:
                return False
    
        def crack(self):
            # 打开浏览器
            self.open()
            bg_filename = 'bg.jpg'
            fg_filename = 'fg.jpg'
            # 获取图片
            bg_location_list, fullbg_location_list = self.get_images()
    
            # 根据位置对图片进行合并还原
            bg_img = self.get_merge_image(bg_filename, bg_location_list)
            fullbg_img = self.get_merge_image(fg_filename, fullbg_location_list)
    
            # 获取缺口位置
            gap = self.get_gap(fullbg_img, bg_img)
            print('缺口位置', gap)
    
            track = self.get_track(gap - self.BORDER)
            print('滑动滑块')
            print(track)
    
        def get_merge_image(self, filename, location_list):
            """
            根据位置对图片进行合并还原
            :filename:图片
            :location_list:图片位置
            """
            im = image.open(filename)
    
            # 浏览器生成的图片规格是260px * 116px , 所以指定image.new('RGB', (260, 116))·
            new_im = image.new('RGB', (400, 200))
            im_list_upper = []
            im_list_down = []
    
            for location in location_list:
                if location['y'] == -100:
                    im_list_upper.append(im.crop((abs(location['x']), 100, abs(location['x']) + 10, 200)))
                if location['y'] == 0:
                    im_list_down.append(im.crop((abs(location['x']), 0, abs(location['x']) + 10, 100)))
            new_im = image.new('RGB', (400, 200))
            x_offset = 0
            for im in im_list_upper:
                new_im.paste(im, (x_offset, 0))
                x_offset += im.size[0]
            x_offset = 0
            for im in im_list_down:
                new_im.paste(im, (x_offset, 100))
                x_offset += im.size[0]
            new_im.save(filename)
            print(new_im)
            return new_im
    
        def fixed_size(self, infile, outfile, width, height):
            # infile = 'fg.jpg'
            # outfile = 'new_fg.png'
    
            """按照固定尺寸处理图片"""
            im = image.open(infile)
            out = im.resize((width, height), image.ANTIALIAS)
            out.save(outfile)
    
        def FindPic(self, target, template):
            """
            找出图像中最佳匹配位置
            :param target: 目标即背景图
            :param template: 模板即需要找到的图
            :return: 返回最佳匹配及其最差匹配和对应的坐标
            """
            target_rgb = cv2.imread(target)
            target_gray = cv2.cvtColor(target_rgb, cv2.COLOR_BGR2GRAY)
            template_rgb = cv2.imread(template, 0)
            res = cv2.matchTemplate(target_gray, template_rgb, cv2.TM_CCOEFF_NORMED)
            value = cv2.minMaxLoc(res)
            print('*****')
            print(value)
            return value[-1][0]
    
        def get_slider(self):
            """
            获取滑块
            :return: 滑块对象
            """
            while True:
                try:
                    slider = self.browser.find_element_by_xpath("//div[@class='shumei_captcha_slide_btn']")
                    # print(slider)
                    break
                except:
                    time.sleep(0.5)
            return slider
    
    
        def get_track(self, distance):
            """
            根据偏移量获取移动轨迹
            :param distance: 偏移量
            :return: 移动轨迹
            """
            print("=" * 10, distance)
            # 移动轨迹
            track = []
            # 当前位移
            current = 0
            # 减速阈值
            mid = distance * 4 / 5
            print(mid)
            # 计算间隔
            t = 0.2
            # 初速度
            v = 0
    
            while current < distance:
                if current < mid:
                    # 加速度为正2
                    a = 4
                else:
                    # 加速度为负3
                    a = -3.5
                # 初速度v0
                v0 = v
                # 当前速度v = v0 + at
                v = v0 + a * t
                # 移动距离x = v0t + 1/2 * a * t^2
                move = v0 * t + 1 / 2 * a * t * t
                # 当前位移
                current += move
                # 加入轨迹
                track.append(round(move))
            # print(track)
            return track
    
    
        def move_to_gap(self, slider, track):
            """
            拖动滑块到缺口处
            :param slider: 滑块
            :param track: 轨迹
            :return:
            """
            ActionChains(self.browser).click_and_hold(slider).perform()
            a = []
            b = track
            for x in track:
                ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
    
            time.sleep(0.8)
            ActionChains(self.browser).release().perform()
    
        def result_html(self):
            response = HtmlResponse(url=self.browser.current_url, body=self.browser.page_source,
                                    encoding='utf-8')
            if '验证失败,请重新再试' in response.text:
                c.process()
            else:
                print(response.text)
                return response
    
        def close(self):
            self.browser.close()
    
        def process(self):
            self.get_images()
            self.fixed_size('bg.jpg', '1bg.jpg', 400, 200)
            self.fixed_size('fg.jpg', '1fg.png', 60, 200)
            x = self.FindPic('1bg.jpg', '1fg.png')
            a = self.get_slider()
            r = self.get_track(x)
            self.move_to_gap(a, r)
            time.sleep(2)
            self.result_html()
            self.close()
    
    
    if __name__ == '__main__':
       url = 'https://www.xiaohongshu.com/web-login/captcha?redirectPath=http%3A%2F%2Fwww.xiaohongshu.com%2Fuser%2Fprofile%2F590d4d5950c4b4281396ea20'
       c = Crack(url)
       c.open()
       c.get_size()
       for i in range(1, 2):
           c.process()

    缺口验证码的验证!!

  • 相关阅读:
    最近花了几个夜晚帮师妹整了一个企业网站
    英文学习网站
    Visual Studio 常用快捷键 (二)
    Visual Studio 常用快捷键
    学习英文之社区,博客及源码
    CodeForces 676D代码 哪里有问题呢?
    线程中调用python win32com
    Python 打包工具cx_freeze 问题记录及解决办法
    HDU1301 Jungle Roads
    HDU 1875 畅通工程再续
  • 原文地址:https://www.cnblogs.com/542684416-qq/p/11909981.html
Copyright © 2020-2023  润新知