CrowdHuman数据集标注格式转换为YOLOv3可以使用的COCO格式

需要了解CrowdHuman的数据标注格式odgt，YOLOv3需要的COCO格式（不需要使用json文件，只需要图片位置信息和标注信息）

YOLOv3 github地址：https://github.com/eriklindernoren/PyTorch-YOLOv3

保存每一张图片的位置信息

 1 import os
 2 import json
 3 
 4 
 5 def load_file(fpath):  # fpath是具体的文件 ，作用：#str to list
 6     assert os.path.exists(fpath)  # assert() raise-if-not
 7     with open(fpath, 'r') as fid:
 8         lines = fid.readlines()
 9     records = [json.loads(line.strip('
')) for line in lines]  # str to list
10     return records
11 
12 
13 def img2txt(odgtpath, respath):
14     records = load_file(odgtpath)  # 提取odgt文件数据
15     record_list = len(records)  # 获得record的长度，循环遍历所有数据。
16     print(os.getcwd())
17     # os.mkdir(os.getcwd() + respath)
18     with open(respath, 'w') as txt:
19         for i in range(record_list):
20             file_name = records[i]['ID'] + '.jpg'
21             file_name = str("/datasets/crowdhuman/images/val/Image/" + file_name)
22             txt.write(file_name + '
')
23 
24 
25 if __name__ == '__main__':
26     odgtpath = "/datasets/crowdhuman/annotation_val.odgt"
27     respath = "/datasets/crowdhuman/val_name.txt"
28     img2txt(odgtpath, respath)

保存每一张图片标注信息中的全身坐标fbox

 1 import time
 2 import img2txt
 3 from PIL import Image
 4 
 5 
 6 def tonormlabel(odgtpath, storepath):
 7     records = img2txt.load_file(odgtpath)
 8     record_list = len(records)
 9     print(record_list)
10     categories = {}
11     # txt = open(respath, 'w')
12     for i in range(record_list):
13         txt_name = storepath + records[i]['ID'] + '.txt'
14         file_name = records[i]['ID'] + '.jpg'
15         #print(i)
16         im = Image.open("/datasets/crowdhuman/images/train_all/Image/" + file_name)
17         height = im.size[1]
18         width = im.size[0]
19         file = open(txt_name, 'w')
20         gt_box = records[i]['gtboxes']
21         gt_box_len = len(gt_box)  # 每一个字典gtboxes里，也有好几个记录，分别提取记录。
22         for j in range(gt_box_len):
23             category = gt_box[j]['tag']
24             if category not in categories:  # 该类型不在categories，就添加上去
25                 new_id = len(categories) + 1  # ID递增
26                 categories[category] = new_id
27             category_id = categories[category]  # 重新获取它的类别ID
28             fbox = gt_box[j]['fbox']  # 获得全身框
29             norm_x = fbox[0] / width
30             norm_y = fbox[1] / height
31             norm_w = fbox[2] / width
32             norm_h = fbox[3] / height
33             '''
34             norm_x = 0 if norm_x <= 0 else norm_x
35             norm_x = 1 if norm_x >= 1 else norm_x
36             norm_y = 0 if norm_y <= 0 else norm_y
37             norm_y = 1 if norm_y >= 1 else norm_y
38             norm_w = 0 if norm_w <= 0 else norm_w
39             norm_w = 1 if norm_w >= 1 else norm_w
40             norm_h = 0 if norm_h <= 0 else norm_h
41             norm_h = 1 if norm_h >= 1 else norm_h
42             '''
43             blank = ' '
44             if j == gt_box_len-1:
45                 file.write(str(category_id - 1) + blank + '{:.6f}'.format(norm_x) + blank + '{:.6f}'.format(norm_y) + blank
46                            + '{:.6f}'.format(norm_w) + blank + '{:.6f}'.format(norm_h))
47             else:
48                 file.write(str(category_id - 1) + blank + '{:.6f}'.format(norm_x) + blank + '{:.6f}'.format(norm_y) + blank
49                            + '{:.6f}'.format(norm_w) + blank + '{:.6f}'.format(norm_h) + '
')
50 
51 
52 if __name__ == '__main__':
53     odgtpath = "/datasets/crowdhuman/annotation_train.odgt"  
54     storepath = "/datasets/crowdhuman/labels/train_all/Image/"
55     print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))  # 格式化输出时间
56     start = time.time()
57     tonormlabel(odgtpath, storepath)
58     end = time.time()
59     print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
60     print('已完成转换，共耗时{:.5f}s'.format(end - start))

相关阅读:
nginx win10 配置启动bat脚本
 linux ctrl + s 导致锁死解决
 linux 执行shell 不小心导致无限死循环解决
 linux vim 意外退出导致下次vim进入报错提示恢复
 vue 自定义组件使用vmodel属性的具体说明，重点说明参数的定义
 echarts 官网首页能进去，但是演示和文档地址进不去的 win10解决办法
 mysql 报错 This function has none of DETERMINISTIC, NO SQL, or READS SQL DATA in its declaration and binary logging is enabled
elementui h5 引入elementui 报错提示没有字体
 .net 中文传参
 ASP.NET， IE6下URL中文乱码问题 ASP.NET程序，当URL后缀包含奇数个中文字符
原文地址：https://www.cnblogs.com/DJames23/p/13395699.html