• Python——使用代码平台进行识别验证码


    打码平台介绍

    一般使用超级鹰或打码兔的打码平台。

    超级鹰介绍

    打开http://www.chaojiying.com/contact.html注册用户,生成软件ID

    下载python的demo文件

    查看打码类型

    使用方法

    from chaojiying import Chaojiying
    
    CHAOJIYING_USERNAME = 'xxxxxx' # 账号
    CHAOJIYING_PASSWORD = '123456' # 密码
    CHAOJIYING_SOFT_ID = 894611 # 生成的唯一key
    CHAOJIYING_KIND = 9004 # 题型
    
    cjy = Chaojiying(CHAOJIYING_USERNAME, CHAOJIYING_PASSWORD, CHAOJIYING_SOFT_ID) # 创建实例
    result = cjy.post_pic(bytes_array.getvalue(), CHAOJIYING_KIND) # 上传图片

    逻辑实现

    1.获取需要识别的图片

    在获取需要的识别的图片时,一般需要讲图片以及文字提示。通过selenium的截图方法,获取到所需的信息。

    self.browser.save_screenshot('aa.png') # 先把整个屏幕截图
    element = self.browser.find_element_by_xpath('/html/body/div[2]/div/div[2]/div[2]/div[3]/div/div[2]/div[3]/div/div') # 获取图片所在的div
    
    left = element.location['x']
    top = element.location['y'] - 100
    right = element.location['x'] + element.size['width']
    bottom = element.location['y'] + element.size['height']
    
    im = Image.open('aa.png')
    captcha = im.crop((left, top, right, bottom)) # 根据div的长宽在整个屏幕上面截图
    captcha.save('captcha.png')

    2.识别需要点击的坐标

     把需要识别的图片和提示一起上传 返回坐标

    result = self.chaojiying.post_pic(bytes_array.getvalue(), CHAOJIYING_KIND) # 提交图片进行验证
    groups = result.get('pic_str').split('|') # 对返回的数据进行解析 获取x坐标和y坐标
    locations = [[int(number) for number in group.split(',')] for group in groups]

    3.根据坐标顺序依次页面进行点击

    根据x和y坐标依次点击图片当中的文字self.get_touclick_element() 获取图片的位置

    move_to_element_with_offset 将鼠标移动到距某个元素多少距离的位置

    for location in locations:
      ActionChains(self.browser).move_to_element_with_offset(self.get_touclick_element(), location[0],location[1]).click().perform()
      time.sleep(1)

    识别案例

    conf 目录

    [DEFAULT]
    CODE_USERNAME = xxxxxxxxx
    CODE_PASSWORD = 1111111
    CODE_SOFT_ID = 894611
    CODE_KIND = 9004
    TRACK_TICKET_USERNAME = uuuuuu
    TRACK_TICKET_PASSWORD = ya1111
    
    [OTHER]
    image=1
    check_config.ini
    import logging
    import os
    from logging import handlers
    
    # 日志格式
    c_format = '[%(asctime)s] [%(levelname)s] [%(filename)s:%(lineno)d] [%(message)s]'
    f_format = '[%(asctime)s]-[%(levelname)s]-[%(filename)s:%(lineno)d]-[%(message)s]'
    # log文件目录
    logfile_dir = r'%slog'%os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    log_name = 'scrapy.log'
    
    # 文件绝对路径
    logfile_path = os.path.join(logfile_dir, log_name)
    
    
    def get_mylogger(name):
        """
        get log
        :param name: 
        :return: 
        """
        logger = logging.getLogger(name)
        logger.setLevel('DEBUG')
    
        console_handler = logging.StreamHandler()
        console_handler.setLevel('INFO')
    
        file_handler = logging.FileHandler(logfile_path)
        file_handler.setLevel('WARNING')
    
        file_size_handler = handlers.RotatingFileHandler(logfile_path, maxBytes=5 * 1024*1024, backupCount=5)
        file_time_handler = handlers.TimedRotatingFileHandler(logfile_path,when="W0",backupCount=5)
    
        logger.addHandler(console_handler)
        logger.addHandler(file_handler)
    
        file_format = logging.Formatter(fmt=f_format)
        console_format = logging.Formatter(fmt=c_format, datefmt='%Y-%m-%d %H:%M:%S ')
    
        console_handler.setFormatter(console_format)
        file_handler.setFormatter(file_format)
    
        return logger
    
    if __name__ == '__main__':
        log = get_mylogger('test')
    my_logset.py
    import os
    config_path = r'%s\%s' %(os.path.dirname(os.path.abspath(__file__)),'check_config.ini')
    image_path =  r'%simage' %os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    settings.py

    lib目录

    #!/usr/local/bin/python3
    # -*- coding: utf-8 -*-
    import configparser
    import os
    
    
    
    class Read_Ini(object):
    
        def __init__(self,config_file):
            self.config = configparser.ConfigParser()
            self.config.read(filenames=config_file)
    
    
    
        def get_value(self, name,section='DEFAULT'):
            """
             得到section下的属性name的值
            :param section: 
            :param name: 
            :return: 
            """
            return  self.config.get(section,name)
    
        def get_section_dict(self,section='DEFAULT'):
            """
             得到section下所有属性值
            :param section: 
            :return: 
            """
            return self.config.options(section)
    
    if __name__ == '__main__':
        path = r'%sconf\%s' % (os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'check_config.ini')
        print(path)
        a = Read_Ini(path)
        print(a.get_value('CODE_USERNAME'))
    read_ini.py
    import requests
    from hashlib import md5
    
    
    class Chaojiying(object):
    
        def __init__(self, username, password, soft_id):
            self.username = username
            self.password = md5(password.encode('utf-8')).hexdigest()
            self.soft_id = soft_id
            self.base_params = {
                'user': self.username,
                'pass2': self.password,
                'softid': self.soft_id,
            }
            self.headers = {
                'Connection': 'Keep-Alive',
                'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
            }
    
    
        def post_pic(self, im, codetype):
            """
            im: 图片字节
            codetype: 题目类型 参考 http://www.chaojiying.com/price.html
            """
            params = {
                'codetype': codetype,
            }
            params.update(self.base_params)
            files = {'userfile': ('ccc.jpg', im)}
            r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
            return r.json()
    
        def report_error(self, im_id):
            """
            im_id:报错题目的图片ID
            """
            params = {
                'id': im_id,
            }
            params.update(self.base_params)
            r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
            return r.json()
    chaojiying.py

    core目录

    #!/usr/local/bin/python3
    # -*- coding: utf-8 -*-
    # @Time    : 2018/4/22 20:16
    # @Author  : hyang
    # @File    : demo.py
    # @Software:
    
    import time
    from io import BytesIO
    from selenium import webdriver
    from selenium.webdriver.support.ui import WebDriverWait # 等待元素加载的
    from selenium.webdriver.common.action_chains import ActionChains  #拖拽
    from selenium.webdriver.support import expected_conditions as EC
    # from selenium.common.exceptions import TimeoutException, NoSuchElementException
    from selenium.webdriver.common.by import By
    from PIL import Image
    from lib import chaojiying
    from lib import read_ini
    from conf import settings
    from conf import my_logset as mylog
    from collections import namedtuple
    import os
    
    
    
    
    class Check_Code_Click_Demo(object):
    
        def __init__(self):
            self.url = 'https://kyfw.12306.cn/otn/login/init'
            self.driver = webdriver.Chrome()
            self.wait = WebDriverWait(self.driver,10)
            # print(settings.config_path)
            self.config = read_ini.Read_Ini(settings.config_path)
            self.logger = mylog.get_mylogger('demo')
            '''
            使用超级鹰打码平台-用户名,密码,软件ID
            '''
            self.chaojiying = chaojiying.Chaojiying(self.config.get_value('CODE_USERNAME'), self.config.get_value('CODE_PASSWORD'), self.config.get_value('CODE_SOFT_ID'))
    
        def __del__(self):
            self.driver.close() # 当对象销毁后,执行关闭
    
        def open_url(self):
            '''
            打开网页,最大化,操作滚动条
            :return:
            '''
            self.driver.get(self.url)
            time.sleep(1)
            self.driver.maximize_window()  # 最大化
            # self.driver.set_window_size(1920, 1080)  # 分辨率 1920*1080
            self.logger.info('打开url,输入用户名,密码')
            self.driver.find_element_by_id('username').send_keys(self.config.get_value('TRACK_TICKET_USERNAME'))
            time.sleep(0.5)
            self.driver.find_element_by_id('password').send_keys(self.config.get_value('TRACK_TICKET_PASSWORD'))
            time.sleep(0.5)
            # js = 'var q=document.documentElement.scrollTop=280'  # 操作滚动条
            # self.driver.execute_script(js)
    
    
        # 区域截图(对指定的区域/元素截图)
        def element_screenshot(self,element):
            # 截取全屏图片
            self.driver.save_screenshot(os.path.join(settings.image_path,'full.png'))
            # 获取element的顶点坐标
            x_point = element.location['x']
            y_point = element.location['y']
            # 获取element的宽、高
            element_width = x_point + element.size['width']
            element_height = y_point + element.size['height']
            points = namedtuple('points',['x','y','width','height'])
            p = points(x_point,y_point,element_width,element_height)
            pl = (p.x, p.y, p.width, p.height)
            self.logger.info('得到验证码位置%s' %str(pl))
            # 开始截取指定区域
            picture = Image.open(os.path.join(settings.image_path,'full.png'))
    
            ''''' 
            crop()--  一个显式的参数:一个4元组 
              Image.crop(box=None):图像返回一个矩形区域,box是一个四元组 限定所述左,上,右,和下像素坐标 
              参数:box--裁剪矩形,作为(左,上,右,下)-tuple;返回类型:Image;返回:一个Image对象 
              所以你应该重写它: 
              #        ^    4-tuple    ^ 
            '''
            picture = picture.crop((pl))
            picture.save(os.path.join(settings.image_path,'fullcrop.png'))
    
            return picture
    
        def get_touclick_image(self):
            """
            得到要点击的图像
            :return:
            """
            element = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.touclick-img-par.touclick-bgimg')))
            return element
    
        def get_image(self,name='captcha.png',num = 100):
            """
             得到要识别的图片
            :param name:
            :param num:
            :return:
            """
            image_element = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.touclick-img-par.touclick-bgimg')))
            captcha = self.element_screenshot(image_element)
            self.logger.info('得到要识别的图片')
            return captcha
    
        def get_click_words(self, location):
            """
             根据返回坐标的位置,点击图像中识别字的坐标
            :param location:
            :return:
            """
            for loc in location:
                im_element = self.get_touclick_image()
                # print(im_element.location['x'], im_element.location['y'], im_element.size)
                #print('loc=',loc)
                # 将鼠标移动到距某个元素多少距离的位置点击
                ActionChains(self.driver).move_to_element_with_offset(im_element,loc[0],
                                                                      loc[1]).click().perform()
                self.driver.save_screenshot(os.path.join(settings.image_path,'check.png'))
                time.sleep(0.5)
    
    
        def get_points(self,res):
            """
             解析坐标的位置
            :param res:
            :return:
            """
            groups = res.get('pic_str').split('|')  # 解析返回的坐标
            location = [[int(num) for num in group.split(',')] for
     group in groups]
    
            return location
    
    
        def login(self):
            time.sleep(1)
            login_element = self.driver.find_element_by_id('loginSub')
            self.logger.info('开始点击登录')
            login_element.click()
    
    
        def main(self):
            image = self.get_image()
            #time.sleep(10)
            byte_array = BytesIO()
            image.save(byte_array, format('png'))  # 把图片转换为二进制格式保存到内存中
            try:
                # 获取图像二进制数据,把数据提交到打码平台
                res = self.chaojiying.post_pic(byte_array.getvalue(), self.config.get_value('CODE_KIND'))  
                self.logger.info('得到打码平台%s' % res)
                # 解析坐标
                location = self.get_points(res)
                if location:
                    self.logger.info('得到识别的坐标%s'%location)
                    self.get_click_words(location)
                    self.login()
                    time.sleep(6)
    
                    self.driver.switch_to.window(self.driver.window_handles[-1])
                    self.logger.info('url:%s' %self.driver.current_url)
                    #[url:https://kyfw.12306.cn/otn/index/initMy12306]
    
                    if 'initMy' in self.driver.current_url:
                        self.logger.info('用户登录成功')
                        self.driver.save_screenshot(os.path.join(settings.image_path,'full_login.png'))
                    else:
                        check_error = self.driver.find_element_by_id('error_msgmypasscode1')
                        check_style = check_error.get_attribute('style')
                        print('style=', check_style)
                        if 'none' in check_style:
                            self.logger.info('验证成功')
                            # self.driver.save_screenshot('True.png')
                        else:
                            self.logger.error('验证失败!!!')
                            self.main()
            except Exception as e:
                self.logger.error('返回异常!!!%s'% e)
    
    if __name__ == '__main__':
        c = Check_Code_Click_Demo()
        c.open_url()
        c.main()
    demo.py

    bin目录

    import sys,os
    
    BASE_DIR=os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    sys.path.append(BASE_DIR)  # 加入环境变量
    
    from core import demo
    from conf import my_log_settings
    
    if __name__ == '__main__':
        # my_log_settings.load_my_logging_cfg(__name__)
        de = demo.Check_Code_Click_Demo()
        de.open_url()
        de.main()
    start.py
  • 相关阅读:
    SQL的高级属性-
    查询
    SQL语句
    CSS的创建样式
    表单部分属性值
    HTML的语法和基本标签
    网页制作与HTML基本结构
    小程序button 去边框
    关于axios跨域带cookie
    Uncaught Error: code length overflow. (1604>1056)
  • 原文地址:https://www.cnblogs.com/xiao-apple36/p/8911787.html
Copyright © 2020-2023  润新知