• ocr识别


    步骤分两步:

    第一步先将图片摆正,输出扫描版(应用透视变换)

    第二步用pytesseract识别即可

    main.py

    import torch
    import numpy as np
    import cv2
    import math
    import test
    
    
    def show(img):
        cv2.imshow('name', img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
    
    
    def pers_transform(img, coor):
        coor = sorted(coor, key = lambda a : (a[0], -a[1])) #根据第0个元素升序,若相同则根据第二个降序
        lx1, lx2, rx1, rx2 = coor
        scoor = np.array([lx1, lx2, rx1, rx2], dtype = np.float32)
        w1 = math.sqrt(math.pow((lx1[0] - rx1[0]), 2) + math.pow((lx1[1] - rx1[1]), 2))
        w2 = math.sqrt(math.pow((lx2[0] - rx2[0]), 2) + math.pow((lx2[1] - rx2[1]), 2))
        w = int(max(w1, w2))  #w和h均取大的那一个,因为近似轮廓可能只是个四边形,不是矩形
        h1 = math.sqrt(math.pow((lx1[0] - lx2[0]), 2) + math.pow((lx1[1] - lx2[1]), 2))
        h2 = math.sqrt(math.pow((rx1[0] - rx2[0]), 2) + math.pow((rx1[1] - rx2[1]), 2))
        h = int(max(h1, h2))
        dcoor = np.array([[0, 0], [0, h], [w, 0], [w, h]], dtype = np.float32) #摆正后的坐标,顺序与原坐标相对应
        trans_m = cv2.getPerspectiveTransform(scoor, dcoor) #获取透视变换矩阵
        return cv2.warpPerspective(img, trans_m, (w, h))    #传入图像、矩阵、宽和长,返回变换完成的图像
    
    
    
    
    if __name__ == '__main__':
        dst = 'C:/Users/Dell/PycharmProjects/ExtractText/scan.jpg'
        img = cv2.imread('C:/Users/Dell/PycharmProjects/ExtractText/images/receipt.jpg')
        ratio = img.shape[0] / 600
        img_k = cv2.resize(img, (int(img.shape[1] / (img.shape[0] / 1000)), 1000))
        img_gray = cv2.cvtColor(img_k, cv2.COLOR_BGR2GRAY)
        img_canny = cv2.Canny(img_gray, 200, 255)
    #img_bin = cv2.threshold(img_canny, 200, 255, cv2.THRESH_BINARY)[1]
        contours = cv2.findContours(img_canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[1]
        contours = sorted(contours, key = cv2.contourArea, reverse = True)
        img_k_copy = img_k.copy()
        peri = cv2.arcLength(contours[0], True)
        approx = cv2.approxPolyDP(contours[0], 0.02 * peri, True)
        cv2.drawContours(img_k_copy, [approx], -1, (0, 0, 255), 2)
        img_pers = pers_transform(img_gray, approx.reshape(4, 2)) #轮廓是三维的,所以reshape一下,可以变成2维
        img_pers_bin = cv2.threshold(img_pers, 150, 255, cv2.THRESH_BINARY)[1]
        cv2.imwrite('scan.jpg', img_pers_bin)
        print('Accepted')
        test.scan(dst)

    test.py

    from PIL import Image
    import cv2
    import numpy as np
    import pytesseract
    
    def show(img):
        cv2.imshow('name', img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
    
    def scan(dst):
        img = cv2.imread(dst)
        # img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # img_bin = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    
        img_k = Image.open(dst)
        text = pytesseract.image_to_string(img_k)
        print(text)
        show(img)
    自己选择的路,跪着也要走完。朋友们,虽然这个世界日益浮躁起来,只要能够为了当时纯粹的梦想和感动坚持努力下去,不管其它人怎么样,我们也能够保持自己的本色走下去。
  • 相关阅读:
    ubuntu下配置Apache
    ubuntu 下配置Web服务器
    ubuntu 笔记一
    域名解析
    C# Enum,Int,String的互相转换
    C# 得到本机局域网IP地址
    C# 连接 SQLServer 及操作
    C# OpenFileDialog 使用
    如何解决 IntelliJ Idea 编译 Java 项目时,找不到包或找不到符号的问题?
    阿里巴巴 MySQL 数据库之 SQL 语句规约 (三)
  • 原文地址:https://www.cnblogs.com/WTSRUVF/p/15292218.html
Copyright © 2020-2023  润新知