• 2019年中国软杯-基于深度学习的银行卡号识别系统


      这次比赛,虽然没有挺进决赛,但是整个比赛的过程让自己成长了很多,也明白了很多

      首先这次比赛,名为“基于深度学习的银行卡号识别系统”,所以我们组在每个环节就是用的深度学习的内容(虽然有很多的图像处理技术可以直接做初期的银行卡的处理,但是我们毅然决然的选择了整个过程使用深度学习的技术),我们这也是第一次这么系统的接触深度学习,在这之前,可以说是对深度学习就是一无所知,就是一窍不通,深度学习不仅仅应用的计算机知识,不仅仅是敲代码的能力,同时更需要你的数学知识(如果你想深层次的接触深度学习的话,仅仅应用深度学习现成的模型的话,了解模型的原理和代码的含义就行了)

      我们小组对这方面的知识不是很全面,但是通过我们一段时间的学习还是有所突破

      下面就说一下我们小组对整个比赛的的流程

      (尝试过很多的方法,想直接对我们的银行卡进行长字符的识别,但是我们的训练集实在是太小了,所以还是走了把银行卡号切成单字符进行单字符的识别的方法(这种方法就是在时间和空间上比较浪费))

      1.首先就是将整张银行卡号里面的银行卡号部分进行识别,且分出来,这一个环节我们用的技术就是faster-rcnn的方法

      (详细代码不在这里粘了,有需要的可以在下面的Github进行下载)

    https://github.com/H-Designer/DeepLearning-Bank_Card_OCR

      将目标识别部分的银行卡号部门且分出来,进行保存

      主程序的代码如下:

      

    #!/usr/bin/env python
    from __future__ import absolute_import
    from __future__ import division
    from __future__ import print_function
    import argparse
    import os
    import cv2
    import matplotlib.pyplot as plt
    import numpy as np
    import tensorflow as tf
    from lib.config import config as cfg
    from lib.utils.nms_wrapper import nms
    from lib.utils.test import im_detect
    from lib.nets.vgg16 import vgg16
    from lib.utils.timer import Timer
    
    os.environ["CUDA_VISIBLE_DEVICES"] = '0'   #指定第一块GPU可用
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.8  # 程序最多只能占用指定gpu50%的显存
    config.gpu_options.allow_growth = True      #程序按需申请内存
    sess = tf.Session(config = config)
    
    CLASSES = ('__background__','lb')
    NETS = {'vgg16': ('vgg16_faster_rcnn_iter_70000.ckpt',), 'res101': ('res101_faster_rcnn_iter_110000.ckpt',)}
    DATASETS = {'pascal_voc': ('voc_2007_trainval',), 'pascal_voc_0712': ('voc_2007_trainval+voc_2012_trainval',)}
    
    def vis_detections(im, class_name, dets, thresh=0.5):
        """Draw detected bounding boxes."""
        inds = np.where(dets[:, -1] >= thresh)[0]
        if len(inds) == 0:
            return
    
        im = im[:, :, (2, 1, 0)]
        fig, ax = plt.subplots(figsize=(12, 12))
        ax.imshow(im, aspect='equal')
        sco=[]
        for i in inds:
            score = dets[i, -1]
            sco.append(score)
        maxscore=max(sco)
        # print(maxscore)成绩最大值
        for i in inds:
            # print(i)
            score = dets[i, -1]
            if score==maxscore:
                bbox = dets[i, :4]
                # print(bbox)#目标框的4个坐标
                img = cv2.imread("data/demo/"+filename)
                # img = cv2.imread('data/demo/000002.jpg')
                sp=img.shape
                width = sp[1]
                if bbox[0]>20 and bbox[2]+20<
                    cropped = img[int(bbox[1]):int(bbox[3]), int(bbox[0]-20):int(bbox[2])+20] # 裁剪坐标为[y0:y1, x0:x1]
                if bbox[0]<20 and bbox[2]+20<
                    cropped = img[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])+20] # 裁剪坐标为[y0:y1, x0:x1]
                if bbox[0] > 20 and bbox[2] + 20 > 
                    cropped = img[int(bbox[1]):int(bbox[3]), int(bbox[0] - 20):int(bbox[2])]  # 裁剪坐标为[y0:y1, x0:x1]
                path = 'cut1/'
                # 重定义图片的大小
                res = cv2.resize(cropped, (1000, 100), interpolation=cv2.INTER_CUBIC)  # dsize=(2*width,2*height)
                cv2.imwrite(path+str(i)+filename, res)
                ax.add_patch(plt.Rectangle((bbox[0], bbox[1]),
                                  bbox[2] - bbox[0],
                                  bbox[3] - bbox[1], fill=False,
                                  edgecolor='red', linewidth=3.5)
                )
                ax.text(bbox[0], bbox[1] - 2,
                        '{:s} {:.3f}'.format(class_name, score),
                        bbox=dict(facecolor='blue', alpha=0.5),
                        fontsize=14, color='white')
    
                ax.set_title(('{} detections with '
                              'p({} | box) >= {:.1f}').format(class_name, class_name,thresh),
                             fontsize=14)
        plt.axis('off')
        plt.tight_layout()
        plt.draw()
    
    
    def demo(sess, net, image_name):
        """Detect object classes in an image using pre-computed object proposals."""
    
        # Load the demo image
        im_file = os.path.join(cfg.FLAGS2["data_dir"], 'demo', image_name)
        im = cv2.imread(im_file)
        # Detect all object classes and regress object bounds
        timer = Timer()
        timer.tic()
        scores, boxes = im_detect(sess, net, im)
        timer.toc()
        print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time, boxes.shape[0]))
    
        # Visualize detections for each class
        CONF_THRESH = 0.1
        NMS_THRESH = 0.1
        for cls_ind, cls in enumerate(CLASSES[1:]):
            cls_ind += 1  # because we skipped background
            cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            # print(cls_scores)#一个300个数的数组
            #np.newaxis增加维度  np.hstack将数组拼接在一起
            dets = np.hstack((cls_boxes,cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets, NMS_THRESH)
            dets = dets[keep, :]
    
            vis_detections(im, cls, dets, thresh=CONF_THRESH)
    
    def parse_args():
        """Parse input arguments."""
        parser = argparse.ArgumentParser(description='Tensorflow Faster R-CNN demo')
        parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16 res101]',
                            choices=NETS.keys(), default='vgg16')
        parser.add_argument('--dataset', dest='dataset', help='Trained dataset [pascal_voc pascal_voc_0712]',
                            choices=DATASETS.keys(), default='pascal_voc')
        args = parser.parse_args()
    
        return args
    
    
    if __name__ == '__main__':
        args = parse_args()
    
        # model path
        demonet = args.demo_net
        dataset = args.dataset
    
        #tfmodel = os.path.join('output', demonet, DATASETS[dataset][0], 'default', NETS[demonet][0])
        tfmodel = r'./default/voc_2007_trainval/cut1/vgg16_faster_rcnn_iter_8000.ckpt'
        # 路径异常提醒
        if not os.path.isfile(tfmodel + '.meta'):
            print(tfmodel)
            raise IOError(('{:s} not found.
    Did you download the proper networks from '
                           'our server and place them properly?').format(tfmodel + '.meta'))
    
        # set config
        tfconfig = tf.ConfigProto(allow_soft_placement=True)
        tfconfig.gpu_options.allow_growth = True
    
        # init session
        sess = tf.Session(config=tfconfig)
        # load network
        if demonet == 'vgg16':
            net = vgg16(batch_size=1)
        # elif demonet == 'res101':
            # net = resnetv1(batch_size=1, num_layers=101)
        else:
            raise NotImplementedError
        net.create_architecture(sess, "TEST", 2,
                            tag='default', anchor_scales=[8, 16, 32])
        saver = tf.train.Saver()
        saver.restore(sess, tfmodel)
    
        print('Loaded network {:s}'.format(tfmodel))
        # # 文件夹下所有图片进行识别
        # for filename in os.listdir(r'data/demo/'):
        #     im_names = [filename]
        #     for im_name in im_names:
        #         print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
        #         print('Demo for data/demo/{}'.format(im_name))
        #         demo(sess, net, im_name)
        #
        #     plt.show()
        # 单一图片进行识别
        filename = '0001.jpg'
        im_names = [filename]
        for im_name in im_names:
            print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
            print('Demo for data/demo/{}'.format(im_name))
            demo(sess, net, im_name)
        plt.show()

      

    效果图如下:

      2.将切分出来的图片进行保存,然后就是将其进行切分:

      主程序的代码和上面第一步的步骤原理是相同的,不同的就是训练集的不同设置 

      效果图如下:

    3、最后一部就是训练一些单字符的 图片,然后保存模型,然后对切分出来的 单字符进行识别

    import os
    import tensorflow as tf
    from PIL import Image
    from nets2 import nets_factory
    import numpy as np
    import matplotlib.pyplot as plt
    # 不同字符数量
    CHAR_SET_LEN = 10
    # 图片高度
    IMAGE_HEIGHT = 60
    # 图片宽度
    IMAGE_WIDTH = 160
    # 批次
    BATCH_SIZE = 1
    # tfrecord文件存放路径
    TFRECORD_FILE = r"C:workspacePythonBank_Card_OCRdemo	est_result	frecords/1.tfrecords"
    
    # placeholder
    x = tf.placeholder(tf.float32, [None, 224, 224])
    
    os.environ["CUDA_VISIBLE_DEVICES"] = '0'   #指定第一块GPU可用
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.5  # 程序最多只能占用指定gpu50%的显存
    config.gpu_options.allow_growth = True      #程序按需申请内存
    sess = tf.Session(config = config)
    
    # 从tfrecord读出数据
    def read_and_decode(filename):
        # 根据文件名生成一个队列
        filename_queue = tf.train.string_input_producer([filename])
        reader = tf.TFRecordReader()
        # 返回文件名和文件
        _, serialized_example = reader.read(filename_queue)
        features = tf.parse_single_example(serialized_example,
                                           features={
                                               'image' : tf.FixedLenFeature([], tf.string),
                                               'label0': tf.FixedLenFeature([], tf.int64),
    
                                           })
        # 获取图片数据
        image = tf.decode_raw(features['image'], tf.uint8)
        # 没有经过预处理的灰度图
        image_raw = tf.reshape(image, [224, 224])
        # tf.train.shuffle_batch必须确定shape
        image = tf.reshape(image, [224, 224])
        # 图片预处理
        image = tf.cast(image, tf.float32) / 255.0
        image = tf.subtract(image, 0.5)
        image = tf.multiply(image, 2.0)
        # 获取label
        label0 = tf.cast(features['label0'], tf.int32)
    
    
        return image, image_raw, label0
    
    
    # 获取图片数据和标签
    image, image_raw, label0 = read_and_decode(TFRECORD_FILE)
    # 使用shuffle_batch可以随机打乱
    image_batch, image_raw_batch, label_batch0 = tf.train.shuffle_batch(
        [image, image_raw, label0], batch_size=BATCH_SIZE,
        capacity=50000, min_after_dequeue=10000, num_threads=1)
    
    
    # 定义网络结构
    train_network_fn = nets_factory.get_network_fn(
        'alexnet_v2',
        num_classes=CHAR_SET_LEN * 1,
        weight_decay=0.0005,
        is_training=False)
    
    with tf.Session() as sess:
        # inputs: a tensor of size [batch_size, height, width, channels]
        X = tf.reshape(x, [BATCH_SIZE, 224, 224, 1])
        # 数据输入网络得到输出值
        logits, end_points = train_network_fn(X)
        # 预测值
        logits0 = tf.slice(logits, [0, 0], [-1, 10])
    
    
        predict0 = tf.argmax(logits0, 1)
    
    
        # 初始化
        sess.run(tf.global_variables_initializer())
        # 载入训练好的模型
        saver = tf.train.Saver()
        saver.restore(sess, '../Cmodels/model/crack_captcha1.model-6000')
        # saver.restore(sess, '../1/crack_captcha1.model-2500')
    
        # 创建一个协调器,管理线程
        coord = tf.train.Coordinator()
        # 启动QueueRunner, 此时文件名队列已经进队
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    
        for i in range(6):
            # 获取一个批次的数据和标签
            b_image, b_image_raw, b_label0 = sess.run([image_batch,image_raw_batch,label_batch0])
            # 显示图片
            img = Image.fromarray(b_image_raw[0], 'L')
            plt.imshow(img)
            plt.axis('off')
            plt.show()
            # 打印标签
            print('label:', b_label0)
            # 预测
            label0 = sess.run([predict0], feed_dict={x: b_image})
            # 打印预测值
    
            print('predict:', label0[0])
            # 通知其他线程关闭
        coord.request_stop()
        # 其他所有线程关闭之后,这一函数才能返回
        coord.join(threads)

    这里就是对单字符进行识别

    在一步对单字符进行识别,但是需要的就是要知道每个图片的顺序,所以,在前面就要知道每个图片的坐标顺序,然后进行保存,然后根据坐标顺序挨个进行识别

    这就是整个银行卡识别的思想和流程,我将项目代码上传到github,有需要的可以进行下载,训练的模型就不上传了,需要的按照文件执行一下,保存的模型的文件夹自己设置,在代码里面引用模型的时候,自己设置就好了

  • 相关阅读:
    【插队问题-线段树-思维巧妙】【poj2828】Buy Tickets
    【线段树成段更新成段查询模板】【POJ3468】A Simple Problem with Integerst
    HDU 6156 Palindrome Function 数位DP
    HDU 6154 CaoHaha's staff 思维 找规律
    Educational Codeforces Round 18 A B 码力 比赛
    Codeforces 815 B Karen and Test 杨辉三角 组合数学
    Codeforces 815 A Karen and Game 贪心
    POJ 1006 Biorhythms 中国剩余定理 数论
    Codeforces 818 E Card Game Again 线段树 思维
    Educational Codeforces Round 24-A B C D 思维
  • 原文地址:https://www.cnblogs.com/zhaochunhui/p/11322582.html
Copyright © 2020-2023  润新知