• 将caltech数组集做成VOC格式


    一、先下载caltech数据集

    二、格式转换代码将 ".seq" 转换为 ".jpg" 文件 ( https://github.com/mitmul/caltech-pedestrian-dataset-converter.git)

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    
    import os
    import glob
    import cv2 as cv
    
    
    def save_img(dname, fn, i, frame):
        cv.imwrite('{}/{}_{}_{}.png'.format(
            out_dir, os.path.basename(dname),
            os.path.basename(fn).split('.')[0], i), frame)
    
    out_dir = 'data/images'
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    for dname in sorted(glob.glob('data/set*')):
        for fn in sorted(glob.glob('{}/*.seq'.format(dname))):
            cap = cv.VideoCapture(fn)
            i = 0
            while True:
                ret, frame = cap.read()
                if not ret:
                    break
                save_img(dname, fn, i, frame)
                i += 1
            print(fn)
    

    三、将".vbb"  文件转化为 二进制文件文件(需要依赖code3.2.1)

    maindir = '/home/user/Downloads/caltech_data_set/data-USA/';
    subdir  = dir( maindir );
    for i = 1 : length( subdir )
        if( isequal( subdir( i ).name, '.' )||...
            isequal( subdir( i ).name, '..')||...
            ~subdir( i ).isdir)              
            continue;
        end
    %     subdirpath = fullfile( maindir, subdir( i ).name, '' );
    %     dat = dir( subdirpath );          
    % 
    %     for j = 1 : length( dat )
    %         datpath = fullfile( maindir, subdir( i ).name, dat( j ).name);
    %         fid = fopen( datpath );
    %       
    %     end
    
        if((subdir(i).isdir && isequal(subdir(i).name(1:3), 'set')))
            tmp = fullfile(maindir, subdir(i).name);
            ssdir = dir(tmp);
            for j = 1 : length( ssdir )
                if(ssdir(j).isdir)
                    if( isequal( ssdir( j ).name, '.' )||...
                        isequal( ssdir( j ).name, '..')||...
                        ~ssdir( j ).isdir)              
                        continue;
                    end
                    vName1 = fullfile(subdir(i).name, ssdir(j).name);
                    fprintf(vName1);
                    fnm = [subdir(i).name , ssdir(j).name];
                    fprintf(fnm);
                    vbb_to_txtt(vName1, fnm);
                    fprintf(vName1)
                end
            end
    %         vName1 = fullfile('set00', subdir(i).name);
    %         fprintf(vName1);
        end
    end
    
    function vbb_to_txtt(vName, fnm)
        % vName = 'set01/V000'
        A = vbb( 'vbbLoad', [dbInfo '/annotations/' vName] );  
        path = '/home/user/Downloads/caltech_data_set/data/annotations';  
        %fnm = 'set01-V000.txt';
        c=fopen([path '-' fnm],'w');  
        for i = 1:A.nFrame  
            iframe = A.objLists(1,i);  
            iframe_data = iframe{1,1};  
            n1length = length(iframe_data);  
            for  j = 1:n1length  
                iframe_dataj = iframe_data(j);  
                if iframe_dataj.pos(1) ~= 0  %pos  posv  
                    fprintf(c,'%d %f %f %f %f
    ', i, iframe_dataj.pos(1),iframe_dataj.pos(2),iframe_dataj.pos(3),iframe_dataj.pos(4));  
                end  
            end  
        end  
        fclose(c);  
    end
    

     四、将生成二进制文件中的标注框写成VOC格式的XML文件

    #!/usr/bin/env python
    # coding:utf-8
    
    #from xml.etree.ElementTree import Element, SubElement, tostring
    from lxml.etree import Element, SubElement, tostring
    import pprint
    from xml.dom.minidom import parseString
    import os
    
    def mkdir(path):
        import os
    
        path = path.strip()
        path = path.rstrip("\")
    
        isExists = os.path.exists(path)
        if not isExists:
            os.makedirs(path)
            print path + 'ok'
            return True
        else:
            print path + 'failed!'
            return False
    
    def generate_xml(file_info, obj):
        node_root = Element('annotation')
    
        node_folder = SubElement(node_root, 'folder')
        node_folder.text = file_info[0]
    
        node_filename = SubElement(node_root, 'filename')
        node_filename.text = file_info[1]
    
        node_size = SubElement(node_root, 'size')
        node_width = SubElement(node_size, 'width')
        node_width.text = '640'
    
        node_height = SubElement(node_size, 'height')
        node_height.text = '480'
    
        node_depth = SubElement(node_size, 'depth')
        node_depth.text = '3'
    
        for obj_i in obj:
            print obj_i
            node_object = SubElement(node_root, 'object')
            node_name = SubElement(node_object, 'name')
            #node_name.text = 'mouse'
            node_name.text = 'person'
    
            node_bndbox = SubElement(node_object, 'bndbox')
            node_xmin = SubElement(node_bndbox, 'xmin')
            #node_xmin.text = '99'
            node_xmin.text = obj_i['xmin']
    
            node_ymin = SubElement(node_bndbox, 'ymin')
            #node_ymin.text = '358'
            node_ymin.text = obj_i['ymin']
    
            node_xmax = SubElement(node_bndbox, 'xmax')
            #node_xmax.text = '135'
            node_xmax.text = obj_i['xmax']
    
            node_ymax = SubElement(node_bndbox, 'ymax')
            #node_ymax.text = '375'
            node_ymax.text = obj_i['ymax']
    
        xml = tostring(node_root, pretty_print=True)  #格式化显示,该换行的换行
        dom = parseString(xml)
        file_root = '/home/user/Downloads/caltech_data_set/data_test/'
    
        file_name = file_root + file_info[0];
        mkdir (file_name)
        fw = open(file_name+"/"+file_info[1].split('.')[0]+".xml", 'a+')
    
        fw.write(xml)
        print "xml _ ok"
        fw.close()
    
        #for debug
        #print xml
    
    def printPath(level, path):
        global allFileNum
        ''''' 
        打印一个目录下的所有文件夹和文件 
        '''
        # 所有文件夹,第一个字段是次目录的级别
        dirList = []
        # 所有文件
        fileList = []
        # 返回一个列表,其中包含在目录条目的名称(google翻译)
        files = os.listdir(path)
        # 先添加目录级别
        dirList.append(str(level))
        for f in files:
            if(os.path.isdir(path + '/' + f)):
                # 排除隐藏文件夹。因为隐藏文件夹过多
                if(f[0] == '.'):
                    pass
                else:
                    # 添加非隐藏文件夹
                    dirList.append(f)
            if(os.path.isfile(path + '/' + f)):
                # 添加文件
                fileList.append(f)
        # 当一个标志使用,文件夹列表第一个级别不打印
        i_dl = 0
        for dl in dirList:
            if(i_dl == 0):
                i_dl = i_dl + 1
            else:
                # 打印至控制台,不是第一个的目录
                print '-' * (int(dirList[0])), dl
                # 打印目录下的所有文件夹和文件,目录级别+1
                printPath((int(dirList[0]) + 1), path + '/' + dl)
        print fileList
        for fl in fileList:
            # 打印文件
            #print '-' * (int(dirList[0])), fl
            # 随便计算一下有多少个文件
            #allFileNum = allFileNum + 1
            """
            操作文件进行读写
            """
            print fl[12:17],fl[17:21]
            file_info = []
            file_info.append(fl[12:17]+'/'+fl[17:21])
    
            print file_info
            print path
            file_name = path+"/"+fl
            fw = open(file_name, 'r');
            line_content = fw.readlines()
            fw.close()
            print line_content
    
    
            tmp = -1
            obj = []
            con_len = len(line_content)
            try:
                string = line_content[0].split(" ")
                tmp = int(string[0])
            except Exception:
                continue
            file_info.append(str(tmp) + '.jpg')
            xmin = str(int(float(string[1])))
            ymin = str(int(float(string[2])))
            xmax = str(int(float(string[1]) + float(string[3])))
            ymax = str(int(float(string[2]) + float(string[4])))
            dict1 = {}
            dict1["xmin"] = xmin
            dict1["ymin"] = ymin
            dict1["xmax"] = xmax
            dict1["ymax"] = ymax
            obj.append(dict1)
    
            for con_i in xrange(1, con_len):
                string = line_content[con_i].split(" ")
                tmp1 = int(string[0])
                if tmp == tmp1:
                    xmin = str(int(float(string[1])))
                    ymin = str(int(float(string[2])))
                    xmax = str(int(float(string[1]) + float(string[3])))
                    ymax = str(int(float(string[2]) + float(string[4])))
                    dict1 = {}
                    dict1["xmin"] = xmin
                    dict1["ymin"] = ymin
                    dict1["xmax"] = xmax
                    dict1["ymax"] = ymax
                    obj.append(dict1)
                elif tmp1 > 0:
                    generate_xml(file_info, obj)
                    obj = []
                    tmp = tmp1
                    file_info[1] = str(tmp1) + ".jpg"
                    xmin = str(int(float(string[1])))
                    ymin = str(int(float(string[2])))
                    xmax = str(int(float(string[1]) + float(string[3])))
                    ymax = str(int(float(string[2]) + float(string[4])))
                    dict1 = {}
                    dict1["xmin"] = xmin
                    dict1["ymin"] = ymin
                    dict1["xmax"] = xmax
                    dict1["ymax"] = ymax
                    obj.append(dict1)
            continue
    
    def read_annotations_generate_fileinfo_obj(file_path):
        pass
    
    if __name__=="__main__":
    
        #
        # file_info = ['set00/V000', '1.jpg']
        #
        # obj = []
        # obj1 = {"xmin":"1", "ymin":"1", "xmax":"5", "ymax":"5"}
        # obj2 = {"xmin":"2", "ymin":"2", "xmax":"6", "ymax":"6"}
        # obj.append(obj1)
        # obj.append(obj2)
        #
        # generate_xml(file_info, obj)
        #
    
        printPath(1, "/home/user/Downloads/caltech_data_set/data_old")
    

    五、使用脚本,利用xml文件,生成train.txt trainval.txt test.txt 等文件

    import os
    import random
    
    def folder_struct(level, path):
        global allFileNum
    
        dirList = []
        fileList = []
        files = os.listdir(path)
        dirList.append(str(level))
    
        for f in files:
            if(os.path.isdir(path + '/' + f)):
                if f[0] != '.':
                    dirList.append(f)
            if (os.path.isfile(path + '/' + f)):
                fileList.append(f)
    
    
        i_dl = 0
        for dl in dirList:
            if i_dl == 0:
                i_dl = i_dl + 1
            else:
                #print '-' * (int(dirList[0])), dl
                folder_struct((int(dirList[0]) + 1), path+'/'+dl)
        print dirList
        # print fileList
        # print dirList
        for fl in fileList:
            #print fl[12:17], fl[17:21]
            file_info = (fl[12:17] + '/' + fl[17:21])
            print file_info
            generate_txt(file_info)
        pass
    
    
    def generate_txt(xml_folder):
        trainval_percent = 0.66
        train_percent = 0.5
        folder_root = '/home/user/Desktop/VOC/'
        xmlfilepath = folder_root + 'Annotations/'+xml_folder
        txtsavepath = folder_root + 'ImageSets/Main'
        try:
            total_xml = os.listdir(xmlfilepath)
        #print total_xml
    
            num = len(total_xml)
    
            #print num
            list = range(num)
            tv = int(num * trainval_percent)
            tr = int(tv * train_percent)
            trainval = random.sample(list, tv)
            train = random.sample(trainval, tr)
    
            ftrainval = open(folder_root + 'ImageSets/Main/trainval.txt', 'aw')
            ftest     = open(folder_root + 'ImageSets/Main/test.txt'    , 'aw')
            ftrain    = open(folder_root + 'ImageSets/Main/train.txt'   , 'aw')
            fval      = open(folder_root + 'ImageSets/Main/val.txt'     , 'aw')
    
            folder_name = xmlfilepath[-10:] + '/'
    
            print folder_name
            for i in list:
                name = folder_name + total_xml[i][:-4] + '
    '
                if i in trainval:
                    ftrainval.write(name)
                    if i in train:
                        ftrain.write(name)
                    else:
                        fval.write(name)
                else:
                    ftest.write(name)
    
            ftrainval.close()
            ftrain.close()
            fval.close()
            ftest.close()
        except Exception:
            pass
    #用来便利所有文件夹名的,没有用遍历文件夹名的方法,而是用一开始生成的注释文件的名字去遍历文件夹名。。。
    folder_struct(1, "/home/user/Downloads/caltech_data_set/data_old")
    

      

    在具体训练的时候,需要用0-5作为训练集,然后用6-10作为测试集

    在跑的过程中,可能会有标注越界的问题,在voc_eval.py 和pascal_voc.py中进行更改就好了。推荐博客 

  • 相关阅读:
    游戏修改器编写原理
    欲练 CSS ,必先宫 IE
    HTML结构化:CSS布局入门指南
    用css来定义table的样式
    HTML表格无空隙嵌套方法
    CSS布局学习笔记
    绽放最后的一丝美丽
    这种感觉真爽
    诗人,是否还有生存的空间
    提升人气的秘密武器
  • 原文地址:https://www.cnblogs.com/ya-cpp/p/8099135.html
Copyright © 2020-2023  润新知