• VOC数据集生成代码使用说明


    #split.py 文件 输入格式为images ,和标签txt文件,txt中的数据为坐标值共8个。
    
    import os
    import numpy as np
    import math
    import cv2 as cv
    import imageio
    
    #path = '/media/D/code/OCR/text-detection-ctpn/data/mlt_english+chinese/image'
    #path = '/home/chendali1/Gsj/text-detection-ctpn-master/prepare_training_data/image/image_1000/'
    path='/home/chendali1/Gsj/prepare_training_data/ICDAR/images_train/'
    #gt_path = '/home/chendali1/Gsj/text-detection-ctpn-master/prepare_training_data/label/labelDigit1000/'
    gt_path='/home/chendali1/Gsj/prepare_training_data/ICDAR/result_train/'
    out_path = 're_image'
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    files = os.listdir(path)
    files.sort()
    #files=files[:100]
    for file in files:
        _, basename = os.path.split(file)
        if basename.lower().split('.')[-1] not in ['jpg', 'png']:
            continue
        stem, ext = os.path.splitext(basename)
        
        
        #stem=stem0.split('_')[2]
        
        gt_file = os.path.join(gt_path, stem+'.txt')
        img_path = os.path.join(path, file)
        print(img_path)
        #print(gt_file)
        img = cv.imread(img_path)
        if img is None:
            print('****************************')
            print('Image ' + img_path + ' may be a bad picture!')
            print('****************************')
            newname = os.path.join(path,stem+'.gif')
            os.rename(img_path,newname)
            img_path=newname
            print(img_path)
            print('Try read with imageio.')
            gif = imageio.mimread(img_path)
            if gif is None:
                print('****************************')
                print("Image " + img_path + " can't be read!")
                print('****************************')
                
            print('Read success!')
            img = cv.cvtColor(gif[0], cv.COLOR_RGB2BGR)
        
        img_size = img.shape
        im_size_min = np.min(img_size[0:2])
        im_size_max = np.max(img_size[0:2])
    
        im_scale = float(600) / float(im_size_min)
        if np.round(im_scale * im_size_max) > 1200:
            im_scale = float(1200) / float(im_size_max)
        re_im = cv.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv.INTER_LINEAR)
        re_size = re_im.shape
        cv.imwrite(os.path.join(out_path, stem) + '.jpg', re_im)
    
        with open(gt_file, 'r') as f:
            lines = f.readlines()
        for line in lines:
            splitted_line = line.strip().lower().split(',')
            pt_x = np.zeros((4, 1))
            pt_y = np.zeros((4, 1))
            pt_x[0, 0] = int(float(splitted_line[0]) / img_size[1] * re_size[1])
            pt_y[0, 0] = int(float(splitted_line[1]) / img_size[0] * re_size[0])
            pt_x[1, 0] = int(float(splitted_line[2]) / img_size[1] * re_size[1])
            pt_y[1, 0] = int(float(splitted_line[3]) / img_size[0] * re_size[0])
            pt_x[2, 0] = int(float(splitted_line[4]) / img_size[1] * re_size[1])
            pt_y[2, 0] = int(float(splitted_line[5]) / img_size[0] * re_size[0])
            pt_x[3, 0] = int(float(splitted_line[6]) / img_size[1] * re_size[1])
            pt_y[3, 0] = int(float(splitted_line[7]) / img_size[0] * re_size[0])
    
            ind_x = np.argsort(pt_x, axis=0)
            pt_x = pt_x[ind_x]
            pt_y = pt_y[ind_x]
    
            if pt_y[0] < pt_y[1]:
                pt1 = (pt_x[0], pt_y[0])
                pt3 = (pt_x[1], pt_y[1])
            else:
                pt1 = (pt_x[1], pt_y[1])
                pt3 = (pt_x[0], pt_y[0])
    
            if pt_y[2] < pt_y[3]:
                pt2 = (pt_x[2], pt_y[2])
                pt4 = (pt_x[3], pt_y[3])
            else:
                pt2 = (pt_x[3], pt_y[3])
                pt4 = (pt_x[2], pt_y[2])
    
            xmin = int(min(pt1[0], pt2[0]))
            ymin = int(min(pt1[1], pt2[1]))
            xmax = int(max(pt2[0], pt4[0]))
            ymax = int(max(pt3[1], pt4[1]))
    
            if xmin < 0:
                xmin = 0
            if xmax > re_size[1] - 1:
                xmax = re_size[1] - 1
            if ymin < 0:
                ymin = 0
            if ymax > re_size[0] - 1:
                ymax = re_size[0] - 1
    
            width = xmax - xmin
            height = ymax - ymin
    
            # reimplement
            step = 16.0
            x_left = []
            x_right = []
            x_left.append(xmin)
            x_left_start = int(math.ceil(xmin / 16.0) * 16.0)
            if x_left_start == xmin:
                x_left_start = xmin + 16
            for i in np.arange(x_left_start, xmax, 16):
                x_left.append(i)
            x_left = np.array(x_left)
    
            x_right.append(x_left_start - 1)
            for i in range(1, len(x_left) - 1):
                x_right.append(x_left[i] + 15)
            x_right.append(xmax)
            x_right = np.array(x_right)
    
            idx = np.where(x_left == x_right)
            x_left = np.delete(x_left, idx, axis=0)
            x_right = np.delete(x_right, idx, axis=0)
    
            if not os.path.exists('label_tmp'):
                os.makedirs('label_tmp')
            with open(os.path.join('label_tmp', stem) + '.txt', 'a') as f:
                #for i in range(len(x_left)):
                f.writelines("tianchi	")
                f.writelines(str(int( pt_x[0, 0])))
                f.writelines("	")
                f.writelines(str(int( pt_y[0, 0])))
                f.writelines("	")
                f.writelines(str(int( pt_x[1, 0])))
                f.writelines("	")
                f.writelines(str(int( pt_y[1, 0])))
                f.writelines("	")
                f.writelines(str(int( pt_x[2, 0])))
                f.writelines("	")
                f.writelines(str(int( pt_y[2, 0])))
                f.writelines("	")
                f.writelines(str(int( pt_x[3, 0])))
                f.writelines("	")
                f.writelines(str(int( pt_y[3, 0])))                
                f.writelines("
    ")
    #ToVoc.py 上述执行完后直接运行这个脚本文件完美生成VOC文件
    from xml.dom.minidom import Document
    import cv2
    import os
    import glob
    import shutil
    import numpy as np
    
    def generate_xml(name, lines, img_size, class_sets, doncateothers=True):
        doc = Document()
    
        def append_xml_node_attr(child, parent=None, text=None):
            ele = doc.createElement(child)
            if not text is None:
                text_node = doc.createTextNode(text)
                ele.appendChild(text_node)
            parent = doc if parent is None else parent
            parent.appendChild(ele)
            return ele
    
        img_name = name + '.jpg'
        # create header
        annotation = append_xml_node_attr('annotation')
        append_xml_node_attr('folder', parent=annotation, text='tianchi')
        append_xml_node_attr('filename', parent=annotation, text=img_name)
        source = append_xml_node_attr('source', parent=annotation)
        append_xml_node_attr('database', parent=source, text='coco_text_database')
        append_xml_node_attr('annotation', parent=source, text='tianchi')
        append_xml_node_attr('image', parent=source, text='tianchi')
        append_xml_node_attr('flickrid', parent=source, text='000000')
        owner = append_xml_node_attr('owner', parent=annotation)
        append_xml_node_attr('name', parent=owner, text='ms')
        size = append_xml_node_attr('size', annotation)
        append_xml_node_attr('width', size, str(img_size[1]))
        append_xml_node_attr('height', size, str(img_size[0]))
        append_xml_node_attr('depth', size, str(img_size[2]))
        append_xml_node_attr('segmented', parent=annotation, text='0')
    
        # create objects
        objs = []
        for line in lines:
            splitted_line = line.strip().lower().split()
            cls = splitted_line[0].lower()
            if not doncateothers and cls not in class_sets:
                continue
            cls = 'dontcare' if cls not in class_sets else cls
            if cls == 'dontcare':
                continue
            obj = append_xml_node_attr('object', parent=annotation)
            occlusion = int(0)
            x1, y1, x2, y2 = int(float(splitted_line[1]) + 1), int(float(splitted_line[2]) + 1), 
                             int(float(splitted_line[3]) + 1), int(float(splitted_line[4]) + 1)
            x0,y0,x1,y1,x2,y2,x3,y3 = int(float(splitted_line[1])+1),int(float(splitted_line[2])+1),
            int(float(splitted_line[3])+1),int(float(splitted_line[4])+1),int(float(splitted_line[5])+1),
            int(float(splitted_line[6])+1),int(float(splitted_line[7])+1),int(float(splitted_line[8])+1)
            truncation = float(0)
            difficult = 1 if _is_hard(cls, truncation, occlusion, x1, y1, x2, y2) else 0
            truncted = 0 if truncation < 0.5 else 1
    
            append_xml_node_attr('name', parent=obj, text=cls)
            append_xml_node_attr('pose', parent=obj, text='none')
            append_xml_node_attr('truncated', parent=obj, text=str(truncted))
            append_xml_node_attr('difficult', parent=obj, text=str(int(difficult)))
            bb = append_xml_node_attr('bndbox', parent=obj)
            append_xml_node_attr('x0', parent=bb, text=str(int(x0)))
            append_xml_node_attr('y0', parent=bb, text=str(y0))
            append_xml_node_attr('x1', parent=bb, text=str(x1))
            append_xml_node_attr('y1', parent=bb, text=str(y1))
            append_xml_node_attr('x1', parent=bb, text=str(x2))
            append_xml_node_attr('y1', parent=bb, text=str(y2))
            append_xml_node_attr('x1', parent=bb, text=str(x3))
            append_xml_node_attr('y1', parent=bb, text=str(y3))
    
            o = {'class': cls, 'box': np.asarray([x0, y0,x1,y1, x2, y2,x3,y3], dtype=float), 
                 'truncation': truncation, 'difficult': difficult, 'occlusion': occlusion}
            objs.append(o)
    
        return doc, objs
    
    
    def _is_hard(cls, truncation, occlusion, x1, y1, x2, y2):
        hard = False
        if y2 - y1 < 25 and occlusion >= 2:
            hard = True
            return hard
        if occlusion >= 3:
            hard = True
            return hard
        if truncation > 0.8:
            hard = True
            return hard
        return hard
    
    
    def build_voc_dirs(outdir):
        mkdir = lambda dir: os.makedirs(dir) if not os.path.exists(dir) else None
        mkdir(outdir)
        mkdir(os.path.join(outdir, 'Annotations'))
        mkdir(os.path.join(outdir, 'ImageSets'))
        mkdir(os.path.join(outdir, 'ImageSets', 'Layout'))
        mkdir(os.path.join(outdir, 'ImageSets', 'Main'))
        mkdir(os.path.join(outdir, 'ImageSets', 'Segmentation'))
        mkdir(os.path.join(outdir, 'JPEGImages'))
        mkdir(os.path.join(outdir, 'SegmentationClass'))
        mkdir(os.path.join(outdir, 'SegmentationObject'))
        return os.path.join(outdir, 'Annotations'), os.path.join(outdir, 'JPEGImages'), os.path.join(outdir, 'ImageSets',
                                                                                                     'Main')
    
    
    if __name__ == '__main__':
        _outdir = 'TEXTVOC/VOC2007'
        _draw = bool(0)
        _dest_label_dir, _dest_img_dir, _dest_set_dir = build_voc_dirs(_outdir)
        _doncateothers = bool(1)
        for dset in ['train']:
            _labeldir = 'label_tmp'
            _imagedir = 're_image'
            class_sets = ('tianchi', 'dontcare')
            class_sets_dict = dict((k, i) for i, k in enumerate(class_sets))
            allclasses = {}
            fs = [open(os.path.join(_dest_set_dir, cls + '_' + dset + '.txt'), 'w') for cls in class_sets]
            ftrain = open(os.path.join(_dest_set_dir, dset + '.txt'), 'w')
    
            files = glob.glob(os.path.join(_labeldir, '*.txt'))
            files.sort()
            for file in files:
                path, basename = os.path.split(file)
                stem, ext = os.path.splitext(basename)
                with open(file, 'r') as f:
                    lines = f.readlines()
                img_file = os.path.join(_imagedir, stem + '.jpg')
    
                print(img_file)
                img = cv2.imread(img_file)
                img_size = img.shape
    
                doc, objs = generate_xml(stem, lines, img_size, class_sets=class_sets, doncateothers=_doncateothers)
    
                cv2.imwrite(os.path.join(_dest_img_dir, stem + '.jpg'), img)
                xmlfile = os.path.join(_dest_label_dir, stem + '.xml')
                with open(xmlfile, 'w') as f:
                    f.write(doc.toprettyxml(indent='    '))
    
                ftrain.writelines(stem + '
    ')
    
                cls_in_image = set([o['class'] for o in objs])
    
                for obj in objs:
                    cls = obj['class']
                    allclasses[cls] = 0 
                        if not cls in list(allclasses.keys()) else allclasses[cls] + 1
    
                for cls in cls_in_image:
                    if cls in class_sets:
                        fs[class_sets_dict[cls]].writelines(stem + ' 1
    ')
                for cls in class_sets:
                    if cls not in cls_in_image:
                        fs[class_sets_dict[cls]].writelines(stem + ' -1
    ')
    
    
            (f.close() for f in fs)
            ftrain.close()
    
            print('~~~~~~~~~~~~~~~~~~~')
            print(allclasses)
            print('~~~~~~~~~~~~~~~~~~~')
            shutil.copyfile(os.path.join(_dest_set_dir, 'train.txt'), os.path.join(_dest_set_dir, 'val.txt'))
            shutil.copyfile(os.path.join(_dest_set_dir, 'train.txt'), os.path.join(_dest_set_dir, 'trainval.txt'))
            for cls in class_sets:
                shutil.copyfile(os.path.join(_dest_set_dir, cls + '_train.txt'),
                                os.path.join(_dest_set_dir, cls + '_trainval.txt'))
                shutil.copyfile(os.path.join(_dest_set_dir, cls + '_train.txt'),
                                os.path.join(_dest_set_dir, cls + '_val.txt'))

  • 相关阅读:
    JavaScript 类私有方法的实现
    sublime小程序插件
    显示引擎innodb状态详解
    JAVA学习资料大全
    mongo-aggregate命令详解
    PHP error_reporting
    mongo基本命令
    php56升级后php7 mcrypt_encrypt 报错
    docker 基础命令
    敏捷建模:增强沟通和理解
  • 原文地址:https://www.cnblogs.com/fourmi/p/8947342.html
Copyright © 2020-2023  润新知