参考博客:https://blog.csdn.net/qq_41672428/article/details/107451834
xml文件格式如下所示:
<annotation verified="no"> <folder>JPG</folder> <filename>driving_0</filename> <path>E:object detectiveFatigue DrivingDriving_Dataset_LabelimgJPGdriving_0.jpg</path> <source> <database>Unknown</database> </source> <size> <width>416</width> <height>416</height> <depth>3</depth> </size> <segmented>0</segmented> <object> <name>mouth_close</name> <pose>Unspecified</pose> <truncated>0</truncated> <Difficult>0</Difficult> <bndbox> <xmin>203</xmin> <ymin>168</ymin> <xmax>246</xmax> <ymax>198</ymax> </bndbox> </object> <object> <name>sunglasses</name> <pose>Unspecified</pose> <truncated>0</truncated> <Difficult>0</Difficult> <bndbox> <xmin>188</xmin> <ymin>99</ymin> <xmax>275</xmax> <ymax>144</ymax> </bndbox> </object> </annotation>
将xml数据集格式转为json格式(需利用百度飞桨平台训练) 主要获取文件名、标签和框坐标等信息
主要代码如下:
""" @author : Chenery @Time : 2020/8/19 @Function : 将xml文件转换为json文件(在百度开发平台训练) xml.etree.cElementTree 函数的用法 """ import os import xml.etree.cElementTree as ET import json #解析Xml中标注框的label和bbox def get_bbox(xmlname): sig_xml_box = [] label_name=[] tree = ET.parse(xmlname) root = tree.getroot() for i in root: # 遍历一级节点 if i.tag == 'object': for j in i: if j.tag == 'name': cls_name = j.text label_name.append(cls_name) if j.tag == 'bndbox': bbox = [] xmin = 0 ymin = 0 xmax = 0 ymax = 0 for r in j: if r.tag == 'xmin': xmin = eval(r.text) if r.tag == 'ymin': ymin = eval(r.text) if r.tag == 'xmax': xmax = eval(r.text) if r.tag == 'ymax': ymax = eval(r.text) bbox.append(xmin) bbox.append(ymin) bbox.append(xmax) bbox.append(ymax) sig_xml_box.append(bbox) return label_name, sig_xml_box #获得转换Json文件 def get_json(xml_dir): xml_list = os.listdir(xml_dir) for xml_name in xml_list: json_name = xml_name.split('.')[0]+'.json' json_path = os.path.join(json_dir, json_name) xml_path = os.path.join(xml_dir, xml_name) label_name, sigxml_bbox = get_bbox(xml_path) ann_js = {} annotations = [] for index, box in enumerate(sigxml_bbox): anno = {} anno['name'] = label_name[index] anno['x1'] = box[0] anno['y1'] = box[1] anno['x2'] = box[2] anno['y2'] = box[3] annotations.append(anno) ann_js['labels'] = annotations json.dump(ann_js, open(json_path, 'w'), indent=4) # indent=4 更加美观显示
Easy DL 平台要求数据格式如下: