解析图形验证码登录系统

windows 10 下pip install 报错【PermissionError: [WinError 32] 另一个程序正在使用此文件，进程无法访问。】

C:UsershaseePython36Libsite-packagespipcompat_init_.py
第75行改为 return s.decode('cp936')
原因：
编码问题，虽然py3统一用utf-8了。但win下的终端显示用的还是gbk编码。
CP936其实就是GBK，IBM在发明Code Page的时候将GBK放在第936页，所以叫CP936。

下载whl包后安装

下载对应python、tesseract版本的teserocr包：
https://github.com/simonflueckiger/tesserocr-windows_build/releases
pip install XXX.whl 安装下载包

# -*- coding: utf-8 -*-
# @Time       : 2018/11/20 13:30
# @Author     : Philly
# @File       : crackImage.py
# @Description: 实时获取并解析图片验证码进行登录系统
import tesserocr
from PIL import Image
from urllib import request
from selenium import webdriver
from time import sleep
import pyautogui
from selenium.webdriver.common.action_chains import ActionChains
import os
import re




# 把验证码另存为图片
def image_save_as():
    image = driver.find_element_by_id("valiCode")
    actions = ActionChains(driver)
    actions.context_click(image)
    actions.perform()
    pyautogui.typewrite(['down', 'down', 'enter', 'enter']) # 右键找到图片另存为
    sleep(2)
    pyautogui.typewrite(['enter'])
    sleep(2)

def get_newest_image(image_path):

    lists = os.listdir(image_path)
    lists.sort(key=lambda fn:os.path.getmtime(image_path + "\" + fn))  # 按时间排序
    image_new = os.path.join(image_path, lists[-1])

    return image_new


"""
driver.save_screenshot("all.png")
print(image.location, image.size)
left = image.location['x']
top = image.location['y']
right = image.location['x'] + image.size['width']
bottom = image.location['y'] + image.size['height']

im = Image.open('all.png')
im = im.crop((left, top, right, bottom))
im.save('code9.png')

imageUrl = url2 + "/GetValidateCode"
request.urlretrieve(imageUrl, "code8.jpg")  # 把图片保存到本地code8.jpg

"""
# 转换验证码图片为文字
def image_to_txt(image_new):
    image = Image.open(image_new)
    image = image.convert('L')
    threshold = 80  # 设置阈值
    table = []
    for i in range(256):
        if i < threshold:
            table.append(0)
        else:
            table.append(1)

    image = image.point(table, '1')
    result = tesserocr.image_to_text(image)
    f = open('result.txt', 'w')
    f.write(result)
    f.close()
    # 去掉中间空格
    result_e = ''.join(result.split())
    # 只取数字
    result_w = re.findall('d+', result_e)[0]

    return result_w

def login(result_w):
    driver.find_element_by_id("LoginAccount").send_keys("zbsqy")
    sleep(1)
    driver.find_element_by_id("Password").send_keys("admin")
    sleep(1)
    driver.find_element_by_id("verifyCode").send_keys(result_w)
    sleep(1)
    driver.find_element_by_id("btnLogin").click()
    sleep(3)


if __name__ == "__main__":
    driver = webdriver.Chrome()
    driver.maximize_window()
    driver.get("http://192.168.14.38:88/User/Login")
    sleep(2)

    image_path = r"C:UsershaseeDownloads"    # 验证码另存为的路径

    image_save_as()
    image_new = get_newest_image(image_path)
    result_w = image_to_txt(image_new)
    login(result_w)

报错

File "tesserocr.pyx", line 2445, in tesserocr._tesserocr.image_to_text
RuntimeError: Failed to init API, possibly an invalid tessdata path: C:

把对应的 tessdata 文件夹放到报错路径即可

相关阅读:
python开发必备：virtualenv虚拟环境（自用）
JavaScript经典实例
 javascript事件驱动及事件处理
 在HTML网页中嵌入脚本的方式
 JavaScript数据结构
 JavaScript语言调试技巧
 CSS+DIV布局
 在HTML文档中应用CSS
CSS常用属性
 定义CSS
原文地址：https://www.cnblogs.com/liuliu3/p/9988588.html