opencv实战-文档扫描

一、文档扫描步骤

1、原图操作-边缘检测
2、原图操作-获取轮廓
3、原图操作-变换方正
4、OCR识别

二、原图操作

import numpy as np
import cv2

def cv_show(name, img):
    cv2.imshow(name, img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
    dim = None
    (h, w) = image.shape[:2]
    if width is None and height is None:
        return image
    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)
    else:
        r = width / float(w)
        dim = (width, int(h * r))
    resized = cv2.resize(image, dim, interpolation=inter)
    return resized

def order_points(pts):
    # 一共4个坐标点
    rect = np.zeros((4, 2), dtype = "float32")

    # 按顺序找到对应坐标0123分别是 左上，右上，右下，左下
    # 计算左上，右下
    s = pts.sum(axis = 1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]

    # 计算右上和左下
    diff = np.diff(pts, axis = 1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]
    return rect

def four_point_transform(image, pts):
    # 获取输入坐标点
    rect = order_points(pts)
    (tl, tr, br, bl) = rect

    # 计算输入的w值，
    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(int(widthA), int(widthB))

    # 计算输入的h值
    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))

    # 变换后对应坐标位置
    dst = np.array([
        [0, 0],
        [maxWidth - 1, 0],
        [maxWidth - 1, maxHeight - 1],
        [0, maxHeight - 1]], dtype = "float32")

    # 计算变换矩阵，rect原始近视轮廓和目标轮廓的计算值
    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))

    # 返回变换后结果
    return warped

image = cv2.imread('receipt.jpg')
# 得到比例供透视变换使用
ratio = image.shape[0] /500
orig  = image.copy()
# 将原图进行resize处理
image = resize(orig, height= 500)
# 将图片进行预处理,转为灰度图
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 高斯滤波去除噪声
gray = cv2.GaussianBlur(gray, (5, 5), 0)
# 进行边缘检测
edged = cv2.Canny(gray, 75, 100)
# 轮廓检测
cnts = cv2.findContours(edged.copy(),cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)[0]
# 对检测的轮廓进行按照面积排序,并取出前五个
cnts = sorted(cnts,key=cv2.contourArea,reverse=True)[:5]
# 遍历轮廓
for c in cnts:
    # 计算轮廓近似长度
    # C表示输入的点集
    # epsilon表示从原始轮廓到近似轮廓的最大距离，它是一个准确度参数
    # True表示封闭的
    peri = cv2.arcLength(c, True)
    # 算出近似轮廓
    approx = cv2.approxPolyDP(c, 0.02 * peri, True)
    # 4个点的时候就拿出来(即是遍历的第一次)
    if len(approx) == 4:
        screenCnt = approx
# 画出轮廓
cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
# 透视变换，转为方正的图像；输入原图，近似图，
warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)
# 转为灰度图
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
# 阈值处理
ref = cv2.threshold(warped, 100, 255, cv2.THRESH_BINARY)[1]
cv2.imwrite('scan.jpg', ref)
cv2.waitKey(0)

三、调用OCR识别

# https://digi.bib.uni-mannheim.de/tesseract/
# 配置环境变量如E:Program Files (x86)Tesseract-OCR
# tesseract -v进行测试
# tesseract XXX.png 得到结果 
# pip install pytesseract
# anaconda lib site-packges pytesseract pytesseract.py
# tesseract_cmd 修改为绝对路径即可
from PIL import Image
import pytesseract
import cv2
import os

preprocess = 'blur' #thresh
image = cv2.imread('scan.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
if preprocess == "thresh":
    gray = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
if preprocess == "blur":
    gray = cv2.medianBlur(gray, 3)  
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, gray)  
text = pytesseract.image_to_string(Image.open(filename))
print(text)
os.remove(filename)

相关阅读:
Javascript FP-ramdajs
微信小程序开发
 SPA for HTML5
One Liners to Impress Your Friends
Sass (Syntactically Awesome StyleSheets)
iOS App Icon Template 5.0
React Native Life Cycle and Communication
Meteor framework
RESTful Mongodb
Server-sent Events
原文地址：https://www.cnblogs.com/wu-wu/p/14043192.html