• tf识别固定长度验证码图片ocr(0到9 4位)- CNN方式


    我们先生成些验证码图片

    import cv2 as cv
    import numpy as np
    import os
    
    
    def create_digit_image(dir_path):
        image = np.ones(shape=[24, 72], dtype=np.uint8)
        image = image * 127
        a = np.random.randint(0, 10)
        b = np.random.randint(0, 10)
        c = np.random.randint(0, 10)
        d = np.random.randint(0, 10)
        text = str(a)+str(b)+str(c)+str(d)
        print(text)
        cv.putText(image, text, (6, 20), cv.FONT_HERSHEY_PLAIN, 1.5, (255), 2)
        for i in range(100):
            row = np.random.randint(0, 24)
            col = np.random.randint(0, 72)
            image[row, col] = 0
        full_path = dir_path + text + ".png"
        cv.imwrite(full_path, image)
    
    
    os.mkdir(os.getcwd()+'\train\')
    os.mkdir(os.getcwd()+'\test\')
    
    for i in range(1000):
        create_digit_image(os.getcwd()+'\train\')
    
    for i in range(100):
        create_digit_image(os.getcwd()+'\test\')  

    会生成1000张训练图片+100张测试图片

    One-hot编码:

    def text2vec(text):
        text_len = len(text)
        if text_len > 4:
            print("text code : ", text)
            raise ValueError('验证码最长4个字符')
        vector = np.zeros(4 * 10)
    
        def char2pos(c):
            k = ord(c)
            if 48 <= k <= 57:
                return k - 48
        for i, c in enumerate(text):
            idx = i * 10 + char2pos(c)
            vector[idx] = 1
        return vector
    
    
    # 向量转回文本
    def vec2text(vec):
        char_pos = vec.nonzero()[0]
        text = []
        for i, c in enumerate(char_pos):
            char_idx = c % 10
            if char_idx < 10:
                char_code = char_idx + ord('0')
            else:
                raise ValueError('error')
            text.append(chr(char_code))
        return "".join(text)
    
    
    
    s=text2vec('1030')
    print(s)
    
    s = vec2text(s)
    print(s)
    

      

    [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
    1030
    

      

    变化成如下:

    [
      0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 
      1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
      0. 0. 0. 1. 0. 0. 0. 0. 0. 0.
      1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
    ]
    
    第一行代表1
    第二行代表0
    第三行代表3
    第四行代表0
    

      

    完整代码:

    import os
    import tensorflow as tf
    from random import choice
    from tfdemo3.data_engine import get_one_image, get_image_files
    
    w = 72
    h = 24
    label_vector_size = 40
    train_dir = os.getcwd()+'\train\'
    test_dir = os.getcwd()+'\test\'
    train_files = get_image_files(train_dir)
    test_files = get_image_files(test_dir)
    
    # 占位符
    x_image = tf.placeholder(shape=[None, h, w, 1], dtype=tf.float32)
    y = tf.placeholder(shape=[None, label_vector_size], dtype=tf.float32)
    keep_prob = tf.placeholder(dtype=tf.float32)
    
    # convolution layer 1
    conv1_w = tf.Variable(tf.random_normal(shape=[3, 3, 1, 32], stddev=0.1, dtype=tf.float32))
    conv1_bias = tf.Variable(tf.random_normal(shape=[32], stddev=0.1))
    conv1_out = tf.nn.conv2d(input=x_image, filter=conv1_w, strides=[1, 1, 1, 1], padding='SAME')
    conv1_relu = tf.nn.relu(tf.add(conv1_out, conv1_bias))
    
    # max pooling 1
    maxpooling_1 = tf.nn.max_pool(conv1_relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    
    # convolution layer 2
    conv2_w = tf.Variable(tf.random_normal(shape=[3, 3, 32, 64], stddev=0.1, dtype=tf.float32))
    conv2_bias = tf.Variable(tf.random_normal(shape=[64], stddev=0.1))
    conv2_out = tf.nn.conv2d(input=maxpooling_1, filter=conv2_w, strides=[1, 1, 1, 1], padding='SAME')
    conv2_relu = tf.nn.relu(tf.add(conv2_out, conv2_bias))
    
    # max pooling 2
    maxpooling_2 = tf.nn.max_pool(conv2_relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    
    # convolution layer 3
    conv3_w = tf.Variable(tf.random_normal(shape=[3, 3, 64, 64], stddev=0.1, dtype=tf.float32))
    conv3_bias = tf.Variable(tf.random_normal(shape=[64], stddev=0.1))
    conv3_out = tf.nn.conv2d(input=maxpooling_2, filter=conv3_w, strides=[1, 1, 1, 1], padding='SAME')
    conv3_relu = tf.nn.relu(tf.add(conv3_out, conv3_bias))
    
    # max pooling 2
    maxpooling_3 = tf.nn.max_pool(conv3_relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    
    # fc-1
    w_fc1 = tf.Variable(tf.random_normal(shape=[3*9*64, 1024], stddev=0.1, dtype=tf.float32))
    b_fc1 = tf.Variable(tf.constant(0.1, shape=[1024]))
    h_pool2 = tf.reshape(maxpooling_3, [-1, 3*9*64])
    output_fc1 = tf.nn.relu(tf.add(tf.matmul(h_pool2, w_fc1), b_fc1))
    
    # dropout
    h2 = tf.nn.dropout(output_fc1, keep_prob=keep_prob)
    
    # fc-2
    w_fc2 = tf.Variable(tf.random_normal(shape=[1024, 40], stddev=0.1, dtype=tf.float32))
    b_fc2 = tf.Variable(tf.constant(0.1, shape=[40]))
    y_conv = tf.add(tf.matmul(output_fc1, w_fc2), b_fc2)
    
    # loss
    cross_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_conv, labels=y)
    loss = tf.reduce_mean(cross_loss)
    step = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
    
    # accuracy
    saver = tf.train.Saver()
    predict = tf.reshape(y_conv, [-1, 4, 10])
    max_idx_p = tf.argmax(predict, 2)
    max_idx_l = tf.argmax(tf.reshape(y, [-1, 4, 10]), 2)
    correct_pred = tf.equal(max_idx_p, max_idx_l)
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    
    
    def get_train_batch(files, batch_size=128):
        images = []
        labels = []
        for f in range(batch_size):
            image, label = get_one_image(train_dir, choice(files))
            images.append(image)
            labels.append(label)
        return images, labels
    
    
    def get_batch(root_dir, files):
        images = []
        labels = []
        for f in files:
            image, label = get_one_image(root_dir, f)
            images.append(image)
            labels.append(label)
        return images, labels
    
    
    test_images, test_labels = get_batch(test_dir, test_files)
    
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(500):
            batch_xs, batch_ys = get_train_batch(train_files, 100)
            curr_loss, curr_ = sess.run([loss, step], feed_dict={x_image: batch_xs, y: batch_ys, keep_prob: 0.5})
            if (i + 1) % 100 == 0:
                print("run step (%d) ..., loss : (%f)" % (i+1, curr_loss))
                curr_acc = sess.run(accuracy, feed_dict={x_image: test_images, y: test_labels, keep_prob: 1.0})
                print("current test Accuracy : %f" % (curr_acc))
        saver.save(sess, "./ckp/code_break.ckpt", global_step=500)
    

      

    data_engine.py

    import numpy as np
    import cv2 as cv
    import os
    
    
    def text2vec(text):
        text_len = len(text)
        if text_len > 4:
            print("text code : ", text)
            raise ValueError('验证码最长4个字符')
        vector = np.zeros(4 * 10)
    
        def char2pos(c):
            k = ord(c)
            if 48 <= k <= 57:
                return k - 48
        for i, c in enumerate(text):
            idx = i * 10 + char2pos(c)
            vector[idx] = 1
        return vector
    
    
    # 向量转回文本
    def vec2text(vec):
        char_pos = vec.nonzero()[0]
        text = []
        for i, c in enumerate(char_pos):
            char_idx = c % 10
            if char_idx < 10:
                char_code = char_idx + ord('0')
            else:
                raise ValueError('error')
            text.append(chr(char_code))
        return "".join(text)
    
    
    def get_one_image(root_dir, f):
        gray = cv.imread(os.path.join(root_dir, f), cv.IMREAD_GRAYSCALE)
        resize = cv.resize(gray, (72, 24))
        result = np.zeros(resize.shape, dtype=np.float32)
        cv.normalize(resize, result, 0, 1, cv.NORM_MINMAX, dtype=cv.CV_32F)
        image = np.expand_dims(result, axis=2)
        label = text2vec(f[0:4])
        return image, label
    
    
    def get_image_files(root_dir):
        img_list = []
        files = os.listdir(root_dir)
        for f in files:
            if os.path.isfile(os.path.join(root_dir, f)):
                img_list.append(f)
        return img_list
    

      

    run step (100) ..., loss : (0.023609)
    current test Accuracy : 0.992500
    run step (200) ..., loss : (0.000665)
    current test Accuracy : 1.000000
    run step (300) ..., loss : (0.000046)
    current test Accuracy : 1.000000
    run step (400) ..., loss : (0.000010)
    current test Accuracy : 1.000000
    run step (500) ..., loss : (0.000005)
    current test Accuracy : 1.000000
    

      

    卷积网络确实比较好。

  • 相关阅读:
    Centos6.5下搭建nagios详解
    Centos6.5下升级Python版本
    Python生成随机密码
    配置apache使用https访问
    Irrlicht 论坛好贴 精选(不断补充中...)
    [原创]一个在Irrlicht中会常用的字符串转换函数
    [转]Scrolling Credits Code
    [原创]Irrlicht中的Texture透明色(colorkey)
    [原创]IrrLicht的GUI使用
    [转](C++) How to animate and move an entity
  • 原文地址:https://www.cnblogs.com/aarond/p/ocr_validate_code.html
Copyright © 2020-2023  润新知