• 通过DarkLabel构建DeepSort标注格式和ReID数据集


    上一篇文章推荐了DarkLabel标注软件,承诺会附上配套的代码,本文主要分享的是格式转换的几个脚本。

    先附上脚本地址: https://github.com/pprp/SimpleCVReproduction/tree/master/DarkLabel

    先来了解一下为何DarkLabel能生成这么多格式的数据集,来看看DarkLabel的格式:

    frame(从0开始计), 数量, id(从0开始), box(x1,y1,x2,y2), class=null
    0,4,0,450,194,558,276,null,1,408,147,469,206,null,2,374,199,435,307,null,3,153,213,218,314,null
    1,4,0,450,194,558,276,null,1,408,147,469,206,null,2,374,199,435,307,null,3,153,213,218,314,null
    2,4,0,450,194,558,276,null,1,408,147,469,206,null,2,374,199,435,307,null,3,153,213,218,314,null
    

    每一帧,每张图片上的目标都可以提取到,并且每个目标有bbox、分配了一个ID、class

    这些信息都可以满足目标检测、ReID、跟踪数据集。

    ps:说明一下,以下脚本都是笔者自己写的,专用于单类的检测、跟踪、重识别的代码,如果有需要多类的,还需要自己修改多类部分的代码。

    1. DarkLabel转Detection

    这里笔者写了一个脚本转成VOC2007中的xml格式的标注,代码如下:

    import cv2
    import os
    import shutil
    import tqdm
    import sys
    
    root_path = r"I:DatasetVideoAnnotation"
    
    def print_flush(str):
        print(str, end='
    ')
        sys.stdout.flush()
    
    def genXML(xml_dir, outname, bboxes, width, height):
        xml_file = open((xml_dir + '/' + outname + '.xml'), 'w')
        xml_file.write('<annotation>
    ')
        xml_file.write('    <folder>VOC2007</folder>
    ')
        xml_file.write('    <filename>' + outname + '.jpg' + '</filename>
    ')
        xml_file.write('    <size>
    ')
        xml_file.write('        <width>' + str(width) + '</width>
    ')
        xml_file.write('        <height>' + str(height) + '</height>
    ')
        xml_file.write('        <depth>3</depth>
    ')
        xml_file.write('    </size>
    ')
    
        for bbox in bboxes:
            x1, y1, x2, y2 = bbox
            xml_file.write('    <object>
    ')
            xml_file.write('        <name>' + 'cow' + '</name>
    ')
            xml_file.write('        <pose>Unspecified</pose>
    ')
            xml_file.write('        <truncated>0</truncated>
    ')
            xml_file.write('        <difficult>0</difficult>
    ')
            xml_file.write('        <bndbox>
    ')
            xml_file.write('            <xmin>' + str(x1) + '</xmin>
    ')
            xml_file.write('            <ymin>' + str(y1) + '</ymin>
    ')
            xml_file.write('            <xmax>' + str(x2) + '</xmax>
    ')
            xml_file.write('            <ymax>' + str(y2) + '</ymax>
    ')
            xml_file.write('        </bndbox>
    ')
            xml_file.write('    </object>
    ')
    
        xml_file.write('</annotation>')
    
    
    def gen_empty_xml(xml_dir, outname, width, height):
        xml_file = open((xml_dir + '/' + outname + '.xml'), 'w')
        xml_file.write('<annotation>
    ')
        xml_file.write('    <folder>VOC2007</folder>
    ')
        xml_file.write('    <filename>' + outname + '.png' + '</filename>
    ')
        xml_file.write('    <size>
    ')
        xml_file.write('        <width>' + str(width) + '</width>
    ')
        xml_file.write('        <height>' + str(height) + '</height>
    ')
        xml_file.write('        <depth>3</depth>
    ')
        xml_file.write('    </size>
    ')
        xml_file.write('</annotation>')
    
    
    def getJPG(src_video_file, tmp_video_frame_save_dir):
        # gen jpg from video
        cap = cv2.VideoCapture(src_video_file)
        if not os.path.exists(tmp_video_frame_save_dir):
            os.makedirs(tmp_video_frame_save_dir)
    
        frame_cnt = 0
        isrun, frame = cap.read()
        width, height = frame.shape[1], frame.shape[0]
    
        while (isrun):
            save_name = append_name + "_" + str(frame_cnt) + ".jpg"
            cv2.imwrite(os.path.join(tmp_video_frame_save_dir, save_name), frame)
            frame_cnt += 1
            print_flush("Extracting frame :%d" % frame_cnt)
            isrun, frame = cap.read()
        return width, height
    
    def delTmpFrame(tmp_video_frame_save_dir):
        if os.path.exists(tmp_video_frame_save_dir):
            shutil.rmtree(tmp_video_frame_save_dir)
        print('delete %s success!' % tmp_video_frame_save_dir)
    
    def assign_jpgAndAnnot(src_annot_file, dst_annot_dir, dst_jpg_dir, tmp_video_frame_save_dir, width, height):
        # get coords from annotations files
        txt_file = open(src_annot_file, "r")
    
        content = txt_file.readlines()
    
        for line in content:
            item = line[:-1]
            items = item.split(',')
            frame_id, num_of_cow = items[0], items[1]
            print_flush("Assign jpg and annotion : %s" % frame_id)
    
            bboxes = []
    
            for i in range(int(num_of_cow)):
                obj_id = items[1 + i * 6 + 1]
                obj_x1, obj_y1 = int(items[1 + i * 6 + 2]), int(items[1 + i * 6 + 3])
                obj_x2, obj_y2 = int(items[1 + i * 6 + 4]), int(items[1 + i * 6 + 5])
                # preprocess the coords
                obj_x1 = max(1, obj_x1)
                obj_y1 = max(1, obj_y1)
                obj_x2 = min(width, obj_x2)
                obj_y2 = min(height, obj_y2)
                bboxes.append([obj_x1, obj_y1, obj_x2, obj_y2])
    
            genXML(dst_annot_dir, append_name + "_" + str(frame_id), bboxes, width,
                height)
            shutil.copy(
                os.path.join(tmp_video_frame_save_dir,
                            append_name + "_" + str(frame_id) + ".jpg"),
                os.path.join(dst_jpg_dir, append_name + "_" + str(frame_id) + ".jpg"))
    
        txt_file.close()
    
    
    if __name__ == "__main__":
        append_names = ["cutout%d" % i for i in range(19, 66)]
    
        for append_name in append_names:
            print("processing",append_name)
            src_video_file = os.path.join(root_path, append_name + ".mp4")
    
            if not os.path.exists(src_video_file):
                continue
    
            src_annot_file = os.path.join(root_path, append_name + "_gt.txt")
    
            dst_annot_dir = os.path.join(root_path, "Annotations")
            dst_jpg_dir = os.path.join(root_path, "JPEGImages")
    
            tmp_video_frame_save_dir = os.path.join(root_path, append_name)
    
            width, height = getJPG(src_video_file, tmp_video_frame_save_dir)
    
            assign_jpgAndAnnot(src_annot_file, dst_annot_dir, dst_jpg_dir, tmp_video_frame_save_dir, width, height)
    
            delTmpFrame(tmp_video_frame_save_dir)
    

    如果想转成U版yolo需要的格式可以点击 https://github.com/pprp/voc2007_for_yolo_torch 使用这里的脚本。

    2. DarkLabel转ReID数据集

    ReID数据集其实与分类数据集很相似,最出名的是Market1501数据集,对这个数据集不熟悉的可以先百度一下。简单来说ReID数据集只比分类中多了query, gallery的概念,也很简单。转换代码如下:

    import os
    import shutil
    import cv2
    import numpy as np
    import glob
    import sys
    import random
    """[summary]
    根据视频和darklabel得到的标注文件
    """
    
    def preprocessVideo(video_path):
        '''
        预处理,将视频变为一帧一帧的图片
        '''
        if not os.path.exists(video_frame_save_path):
            os.mkdir(video_frame_save_path)
    
        vidcap = cv2.VideoCapture(video_path)
        (cap, frame) = vidcap.read()
    
        height = frame.shape[0]
        width = frame.shape[1]
    
        cnt_frame = 0
    
        while (cap):
            cv2.imwrite(
                os.path.join(video_frame_save_path, "frame_%d.jpg" % (cnt_frame)),
                frame)
            cnt_frame += 1
            print(cnt_frame, end="
    ")
            sys.stdout.flush()
            (cap, frame) = vidcap.read()
        vidcap.release()
        return width, height
    
    
    def postprocess(video_frame_save_path):
        '''
        后处理,删除无用的文件夹
        '''
        if os.path.exists(video_frame_save_path):
            shutil.rmtree(video_frame_save_path)
    
    
    def extractVideoImgs(frame, video_frame_save_path, coords):
        '''
        抠图
        '''
        x1, y1, x2, y2 = coords
        # get image from save path
        img = cv2.imread(
            os.path.join(video_frame_save_path, "frame_%d.jpg" % (frame)))
    
        if img is None:
            return None
        # crop
        save_img = img[y1:y2, x1:x2]
        return save_img
    
    
    def bbox_ious(box1, box2):
        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
    
        # Intersection area
        inter_area = (min(b1_x2, b2_x2) - max(b1_x1, b2_x1)) * 
                     (min(b1_y2, b2_y2) - max(b1_y1, b2_y1))
    
        # Union Area
        w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1
        w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1
        union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
    
        return inter_area / union_area
    
    
    def bbox_iou(box1, box2):
        # format box1: x1,y1,x2,y2
        # format box2: a1,b1,a2,b2
        x1, y1, x2, y2 = box1
        a1, b1, a2, b2 = box2
    
        i_left_top_x = max(a1, x1)
        i_left_top_y = max(b1, y1)
    
        i_bottom_right_x = min(a2, x2)
        i_bottom_right_y = min(b2, y2)
    
        intersection = (i_bottom_right_x - i_left_top_x) * (i_bottom_right_y -
                                                            i_left_top_y)
    
        area_two_box = (x2 - x1) * (y2 - y1) + (a2 - a1) * (b2 - b1)
    
        return intersection * 1.0 / (area_two_box - intersection)
    
    
    def restrictCoords(width, height, x, y):
        x = max(1, x)
        y = max(1, y)
        x = min(x, width)
        y = min(y, height)
        return x, y
    
    
    if __name__ == "__main__":
    
        total_cow_num = 0
    
        root_dir = "./data/videoAndLabel"
        reid_dst_path = "./data/reid"
        done_dir = "./data/done"
    
        txt_list = glob.glob(os.path.join(root_dir, "*.txt"))
        video_list = glob.glob(os.path.join(root_dir, "*.mp4"))
    
        for i in range(len(txt_list)):
            txt_path = txt_list[i]
            video_path = video_list[i]
    
            print("processing:", video_path)
    
            if not os.path.exists(txt_path):
                continue
    
            video_name = os.path.basename(video_path).split('.')[0]
            video_frame_save_path = os.path.join(os.path.dirname(video_path),
                                                 video_name)
    
            f_txt = open(txt_path, "r")
    
            width, height = preprocessVideo(video_path)
    
            print("done")
    
            # video_cow_id = video_name + str(total_cow_num)
    
            for line in f_txt.readlines():
                bboxes = line.split(',')
                ids = []
                frame_id = int(bboxes[0])
    
                box_list = []
    
                if frame_id % 30 != 0:
                    continue
    
                num_object = int(bboxes[1])
                for num_obj in range(num_object):
                    # obj = 0, 1, 2
                    obj_id = bboxes[1 + (num_obj) * 6 + 1]
                    obj_x1 = int(bboxes[1 + (num_obj) * 6 + 2])
                    obj_y1 = int(bboxes[1 + (num_obj) * 6 + 3])
                    obj_x2 = int(bboxes[1 + (num_obj) * 6 + 4])
                    obj_y2 = int(bboxes[1 + (num_obj) * 6 + 5])
    
                    box_list.append([obj_x1, obj_y1, obj_x2, obj_y2])
                    # process coord
                    obj_x1, obj_y1 = restrictCoords(width, height, obj_x1, obj_y1)
                    obj_x2, obj_y2 = restrictCoords(width, height, obj_x2, obj_y2)
    
                    specific_object_name = video_name + "_" + obj_id
    
                    # mkdir for reid dataset
                    id_dir = os.path.join(reid_dst_path, specific_object_name)
    
                    if not os.path.exists(id_dir):
                        os.makedirs(id_dir)
    
                    # save pic
                    img = extractVideoImgs(frame_id, video_frame_save_path,
                                           (obj_x1, obj_y1, obj_x2, obj_y2))
                    print(type(img))
    
                    if img is None or img.shape[0] == 0 or img.shape[1] == 0:
                        print(specific_object_name + " is empty")
                        continue
    
                    # print(frame_id)
                    img = cv2.resize(img, (256, 256))
    
                    normalizedImg = np.zeros((256, 256))
                    img = cv2.normalize(img, normalizedImg, 0, 255,
                                        cv2.NORM_MINMAX)
    
                    cv2.imwrite(
                        os.path.join(id_dir, "%s_%d.jpg") %
                        (specific_object_name, frame_id), img)
    
                max_w = width - 256
                max_h = height - 256
    
                # 随机选取左上角坐标
                select_x = random.randint(1, max_w)
                select_y = random.randint(1, max_h)
                rand_box = [select_x, select_y, select_x + 256, select_y + 256]
    
                # 背景图保存位置
                bg_dir = os.path.join(reid_dst_path, "bg")
                if not os.path.exists(bg_dir):
                    os.makedirs(bg_dir)
    
                iou_list = []
    
                for idx in range(len(box_list)):
                    cow_box = box_list[idx]
                    iou = bbox_iou(cow_box, rand_box)
                    iou_list.append(iou)
    
                # print("iou list:" , iou_list)
    
                if np.array(iou_list).all() < 0:
                    img = extractVideoImgs(frame_id, video_frame_save_path,
                                           rand_box)
                    if img is None:
                        print(specific_object_name + "is empty")
                        continue
                    normalizedImg = np.zeros((256, 256))
                    img = cv2.normalize(img, normalizedImg, 0, 255,
                                        cv2.NORM_MINMAX)
                    cv2.imwrite(
                        os.path.join(bg_dir, "bg_%s_%d.jpg") %
                        (video_name, frame_id), img)
    
            f_txt.close()
            postprocess(video_frame_save_path)
            shutil.move(video_path, done_dir)
            shutil.move(txt_path, done_dir)
    

    数据集配套代码在: https://github.com/pprp/reid_for_deepsort

    3. DarkLabel转MOT16格式

    其实DarkLabel标注得到信息和MOT16是几乎一致的,只不过需要转化一下,脚本如下:

    import os
    '''
    gt.txt:
    ---------
    frame(从1开始计), id, box(left top w, h),ignore=1(不忽略), class=1(从1开始),覆盖=1), 
    1,1,1363,569,103,241,1,1,0.86014
    2,1,1362,568,103,241,1,1,0.86173
    3,1,1362,568,103,241,1,1,0.86173
    4,1,1362,568,103,241,1,1,0.86173
    
    cutout24_gt.txt
    ---
    frame(从0开始计), 数量, id(从0开始), box(x1,y1,x2,y2), class=null
    0,4,0,450,194,558,276,null,1,408,147,469,206,null,2,374,199,435,307,null,3,153,213,218,314,null
    1,4,0,450,194,558,276,null,1,408,147,469,206,null,2,374,199,435,307,null,3,153,213,218,314,null
    2,4,0,450,194,558,276,null,1,408,147,469,206,null,2,374,199,435,307,null,3,153,213,218,314,null
    '''
    
    
    def xyxy2xywh(x):
        # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
        # y = torch.zeros_like(x) if isinstance(x,
        #                                       torch.Tensor) else np.zeros_like(x)
        y = [0, 0, 0, 0]
    
        y[0] = (x[0] + x[2]) / 2
        y[1] = (x[1] + x[3]) / 2
        y[2] = x[2] - x[0]
        y[3] = x[3] - x[1]
        return y
    
    def process_darklabel(video_label_path, mot_label_path):
        f = open(video_label_path, "r")
        f_o = open(mot_label_path, "w")
    
        contents = f.readlines()
    
        for line in contents:
            line = line[:-1]
            num_list = [num for num in line.split(',')]
    
            frame_id = int(num_list[0]) + 1
            total_num = int(num_list[1])
    
            base = 2
    
            for i in range(total_num):
    
                print(base, base + i * 6, base + i * 6 + 4)
    
                _id = int(num_list[base + i * 6]) + 1
                _box_x1 = int(num_list[base + i * 6 + 1])
                _box_y1 = int(num_list[base + i * 6 + 2])
                _box_x2 = int(num_list[base + i * 6 + 3])
                _box_y2 = int(num_list[base + i * 6 + 4])
    
                y = xyxy2xywh([_box_x1, _box_y1, _box_x2, _box_y2])
    
                write_line = "%d,%d,%d,%d,%d,%d,1,1,1
    " % (frame_id, _id, y[0],
                                                            y[1], y[2], y[3])
    
                f_o.write(write_line)
    
        f.close()
        f_o.close()
    
    if __name__ == "__main__":
        root_dir = "./data/videosample"
    
        for item in os.listdir(root_dir):
            full_path = os.path.join(root_dir, item)
    
            video_path = os.path.join(full_path, item+".mp4")
            video_label_path = os.path.join(full_path, item + "_gt.txt")
            mot_label_path = os.path.join(full_path, "gt.txt")
            process_darklabel(video_label_path, mot_label_path)
    

    以上就是DarkLabel转各种数据集格式的脚本了,DarkLabel还是非常方便的,可以快速构建自己的数据集。通常两分钟的视频可以生成2880张之多的图片,但是在目标检测中并不推荐将所有的图片都作为训练集,因为前后帧之间差距太小了,几乎是一模一样的。这种数据会导致训练速度很慢、泛化能力变差。

    有两种解决方案:

    • 可以选择隔几帧选取一帧作为数据集,比如每隔10帧作为数据集。具体选择多少作为间隔还是具体问题具体分析,如果视频中变化目标变化较快,可以适当缩短间隔;如果视频中大部分都是静止对象,可以适当增大间隔。
    • 还有一种更好的方案是:对原视频用ffmpeg提取关键帧,将关键帧的内容作为数据集。关键帧和关键帧之间的差距比较大,适合作为目标检测数据集。
  • 相关阅读:
    C#不显示在任务栏
    打开文件,文件夹
    C#文本操作
    C#路径2
    C#当前程序路径获取
    HDU 5155 Harry And Magic Box dp
    POJ 1971 Parallelogram Counting
    CodeForces 479C Exams 贪心
    CodeForces 508E Arthur and Brackets 贪心
    CodeForces 483B 二分答案
  • 原文地址:https://www.cnblogs.com/pprp/p/12807967.html
Copyright © 2020-2023  润新知