• 图像ocr识别(一)


    研究了点OCR识别,本文讲下opencv方式-找出字符区域,虽然还不完善,但是记录下,后续往CNN+RNN+CTC方向走,此处就作为练手了。

    效果1:

    效果2:

    效果3:

    效果4(识别率不太好,只把大框识别了,字符的分割有问题):

    import cv2
    import imutils
    import numpy as np
    from imageio import imread
    import math
    import matplotlib.pyplot as plt
    
    
    def point_distance(p1, p2):
        return math.sqrt(math.pow(p2[0] - p1[0], 2) + math.pow(p2[1] - p1[1], 2))
    
    
    def calc_height_width(box):
        width = point_distance(box[1], box[0])
        height = point_distance(box[0], box[3])
        return (width, height)
    
    
    fileName = 'test1'
    
    img = imread('imgs\' + fileName + '.jpg')
    img = imutils.resize(img, width=1920, height=2080)
    
    
    
    cannyImg = cv2.Canny(img, 200, 200)
    
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    closed = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
    eroded = cv2.erode(closed, kernel)
    
    cannyImg = cv2.Canny(eroded, 200, 200)
    blurred = cv2.GaussianBlur(cannyImg, (105, 105), 0)
    # blurred = cv2.GaussianBlur(cannyImg, (15, 15),0)
    
    
    _, skin = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    
    contours, hierarchy = cv2.findContours(skin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    contours = sorted(contours, key=cv2.contourArea, reverse=True)
    
    boxes = []
    for i in range(len(contours)):
        c = contours[i]
        rect = cv2.minAreaRect(c)
        box = np.int0(cv2.boxPoints(rect))
        (w, h) = calc_height_width(box)
        if w == 0 or h == 0:
            continue
        if w < 20 or h < 20:
            continue
        # boxes.append(box)
        rate1 = h / w * 100
        rate2 = w / h * 100
        if (10 <= rate1 <= 20) or (10 <= rate2 <= 20):
            print((w, h), '--------', rate1, '%', rate2, '%')
            boxes.append(box)
    
    img = img.copy()
    i = 0
    
    
    def parse_chars(positions, min_thresh, min_range, max_range):
        charInfos = []
        begin = 0
        end = 0
        for idx in range(len(positions)):
            if positions[idx] > min_thresh and begin == 0:
                begin = idx
            elif positions[idx] > min_thresh and begin != 0:
                if idx - begin > max_range:
                    charInfo = {'begin': begin, 'end': idx}
                    charInfos.append(charInfo)
    
                    begin = 0
                    end = 0
                continue
            elif positions[idx] < min_thresh and begin != 0:
                end = idx
                if end - begin >= min_range:
                    charInfo = {'begin': begin, 'end': end}
                    charInfos.append(charInfo)
    
                    begin = 0
                    end = 0
            elif positions[idx] < min_thresh or begin == 0:
                continue
    
        return charInfos
    
    
    def process_more(windowName, imgSrc):
        ori_imgSrc = imgSrc.copy()
        # cv2.imshow(windowName+'111', ori_imgSrc)
    
        kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
        closed = cv2.morphologyEx(imgSrc, cv2.MORPH_CLOSE, kernel)
        imgSrc = cv2.erode(closed, kernel)
    
        imgSrc = cv2.Canny(imgSrc, 300, 300)
        kernel = np.ones((5, 5), np.uint8)
        imgSrc = cv2.dilate(imgSrc, kernel, iterations=1)
        _, imgSrc = cv2.threshold(imgSrc, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
        # rows---> ori_imgSrc.shape[0]
        # cols---> ori_imgSrc.shape[1]
        rows = ori_imgSrc.shape[0]
        cols = ori_imgSrc.shape[1]
        tags = np.zeros((cols))
    
        for row in range(rows):
            for col in range(cols):
                if imgSrc[row][col] == 255:
                    tags[col] += 1
    
        char_positions = parse_chars(positions=tags, min_thresh=8, min_range=25, max_range=100)
    
    print(len(char_positions)) for p in char_positions: leftTop = (p['begin'], 0) rightBottom = (p['end'], rows - 2) cv2.rectangle(ori_imgSrc, (leftTop[0], leftTop[1]), (rightBottom[0], rightBottom[1]), (0, 255, 0), 2) ori_imgSrc = imutils.resize(ori_imgSrc, width=450) cv2.imshow(windowName, ori_imgSrc) for box in boxes: # img = cv2.drawContours(img, [box], -1, (0, 0, 255), 3) x_from = np.min(box[:, 1]) x_end = np.max(box[:, 1]) y_from = np.min(box[:, 0]) y_end = np.max(box[:, 0]) if x_from < 0: x_from = 0 if y_from < 0: y_from = 0 img_tmp = img[x_from:x_end, y_from:y_end] # cv2.imshow("ffff111" + str(i), img_tmp) (w, h) = calc_height_width(box) if w > h: # 左上角, 左下角,右上角 # 3,2,4 matSrc = np.float32([ [box[2][0], box[2][1]], [box[1][0], box[1][1]], [box[3][0], box[3][1]] ]) matDst = np.float32([ [0, 0], [0, h], [w, 0] ]) matAffine = cv2.getAffineTransform(matSrc, matDst) dst = cv2.warpAffine(img, matAffine, (int(w), int(h))) else: # 左上角, 左下角,右上角 # 右上角, 左上角, 右下角 # 3,2,4 # 4,3,1 matSrc = np.float32([ [box[3][0], box[3][1]], [box[2][0], box[2][1]], [box[0][0], box[0][1]] ]) matDst = np.float32([ [0, 0], [0, w], [h, 0] ]) matAffine = cv2.getAffineTransform(matSrc, matDst) dst = cv2.warpAffine(img, matAffine, (int(h), int(w))) process_more("ffff222asdfas" + str(i), dst.copy()) i += 1 img = imutils.resize(img, width=600, height=600) cv2.imshow("Frame6", img) cv2.waitKey(100000) & 0xFF cv2.destroyAllWindows()

      

  • 相关阅读:
    智能合约初体验
    安装solidity遇见的问题——unused variable 'returned'
    Clojure学习笔记(二)——函数式编程
    《Java虚拟机并发编程》学习笔记
    Clojure学习笔记(一)——介绍、安装和语法
    Ubuntu配置pyethapp
    no leveldbjni64-1.8 in java.library.path
    Merkle Patricia Tree (MPT) 树详解
    Ubuntu下配置和编译cpp-ethereum客户端
    conda安装python库出现ssl error
  • 原文地址:https://www.cnblogs.com/aarond/p/OCR-1.html
Copyright © 2020-2023  润新知