• opencv实战-文档扫描


    一、文档扫描步骤

    1、原图操作-边缘检测
    2、原图操作-获取轮廓
    3、原图操作-变换方正
    4、OCR识别

    二、原图操作

    import numpy as np
    import cv2
    
    def cv_show(name, img):
        cv2.imshow(name, img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
    
    def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
        dim = None
        (h, w) = image.shape[:2]
        if width is None and height is None:
            return image
        if width is None:
            r = height / float(h)
            dim = (int(w * r), height)
        else:
            r = width / float(w)
            dim = (width, int(h * r))
        resized = cv2.resize(image, dim, interpolation=inter)
        return resized
    
    def order_points(pts):
        # 一共4个坐标点
        rect = np.zeros((4, 2), dtype = "float32")
    
        # 按顺序找到对应坐标0123分别是 左上,右上,右下,左下
        # 计算左上,右下
        s = pts.sum(axis = 1)
        rect[0] = pts[np.argmin(s)]
        rect[2] = pts[np.argmax(s)]
    
        # 计算右上和左下
        diff = np.diff(pts, axis = 1)
        rect[1] = pts[np.argmin(diff)]
        rect[3] = pts[np.argmax(diff)]
        return rect
    
    def four_point_transform(image, pts):
        # 获取输入坐标点
        rect = order_points(pts)
        (tl, tr, br, bl) = rect
    
        # 计算输入的w值,
        widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
        widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
        maxWidth = max(int(widthA), int(widthB))
    
        # 计算输入的h值
        heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
        heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
        maxHeight = max(int(heightA), int(heightB))
    
        # 变换后对应坐标位置
        dst = np.array([
            [0, 0],
            [maxWidth - 1, 0],
            [maxWidth - 1, maxHeight - 1],
            [0, maxHeight - 1]], dtype = "float32")
    
        # 计算变换矩阵,rect原始近视轮廓和目标轮廓的计算值
        M = cv2.getPerspectiveTransform(rect, dst)
        warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
    
        # 返回变换后结果
        return warped
    
    image = cv2.imread('receipt.jpg')
    # 得到比例供透视变换使用
    ratio = image.shape[0] /500
    orig  = image.copy()
    # 将原图进行resize处理
    image = resize(orig, height= 500)
    # 将图片进行预处理,转为灰度图
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # 高斯滤波去除噪声
    gray = cv2.GaussianBlur(gray, (5, 5), 0)
    # 进行边缘检测
    edged = cv2.Canny(gray, 75, 100)
    # 轮廓检测
    cnts = cv2.findContours(edged.copy(),cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)[0]
    # 对检测的轮廓进行按照面积排序,并取出前五个
    cnts = sorted(cnts,key=cv2.contourArea,reverse=True)[:5]
    # 遍历轮廓
    for c in cnts:
        # 计算轮廓近似长度
        # C表示输入的点集
        # epsilon表示从原始轮廓到近似轮廓的最大距离,它是一个准确度参数
        # True表示封闭的
        peri = cv2.arcLength(c, True)
        # 算出近似轮廓
        approx = cv2.approxPolyDP(c, 0.02 * peri, True)
        # 4个点的时候就拿出来(即是遍历的第一次)
        if len(approx) == 4:
            screenCnt = approx
    # 画出轮廓
    cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
    # 透视变换,转为方正的图像;输入原图,近似图,
    warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)
    # 转为灰度图
    warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
    # 阈值处理
    ref = cv2.threshold(warped, 100, 255, cv2.THRESH_BINARY)[1]
    cv2.imwrite('scan.jpg', ref)
    cv2.waitKey(0)

    三、调用OCR识别

    # https://digi.bib.uni-mannheim.de/tesseract/
    # 配置环境变量如E:Program Files (x86)Tesseract-OCR
    # tesseract -v进行测试
    # tesseract XXX.png 得到结果 
    # pip install pytesseract
    # anaconda lib site-packges pytesseract pytesseract.py
    # tesseract_cmd 修改为绝对路径即可
    from PIL import Image
    import pytesseract
    import cv2
    import os
    
    preprocess = 'blur' #thresh
    image = cv2.imread('scan.jpg')
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    if preprocess == "thresh":
        gray = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    if preprocess == "blur":
        gray = cv2.medianBlur(gray, 3)  
    filename = "{}.png".format(os.getpid())
    cv2.imwrite(filename, gray)  
    text = pytesseract.image_to_string(Image.open(filename))
    print(text)
    os.remove(filename)
  • 相关阅读:
    Javascript FP-ramdajs
    微信小程序开发
    SPA for HTML5
    One Liners to Impress Your Friends
    Sass (Syntactically Awesome StyleSheets)
    iOS App Icon Template 5.0
    React Native Life Cycle and Communication
    Meteor framework
    RESTful Mongodb
    Server-sent Events
  • 原文地址:https://www.cnblogs.com/wu-wu/p/14043192.html
Copyright © 2020-2023  润新知