python 批量生成xml标记文件(连通域坐标分割)

#!/usr/bin/python
# -*- coding=utf-8 -*-
# author : Manuel
# date: 2019-05-15

from xml.etree import ElementTree as ET
import numpy as np
from skimage import data,filters,segmentation,measure,morphology,color
from scipy.misc import imread
import os
from os import getcwd


IMAGES_LIST=os.listdir('ls')#图片路径

#连通域分割,返回连通域坐标列表
def connected_domain_position_get(image):
    coordinates_list=[]#创建坐标列表
    thresh = filters.threshold_otsu(image)  # 阈值分割，自动返回一个阈值
    bw = morphology.closing(image > thresh,
                            morphology.square(3))  # (image > thresh, morphology.square(3)) #闭运算#将0，1转换成bool
    cleared = bw.copy()  # 复制
    segmentation.clear_border(cleared)  # 清除与边界相连的目标物
    label_image = measure.label(cleared)  # 连通区域标记
    borders = np.logical_xor(bw, cleared)  # 逻辑异或
    label_image[borders] = -1  # ?
    # image_label_overlay = color.label2rgb(label_image, image=image)  # 不同标记用不同颜色显示
    for region in measure.regionprops(label_image):  # 循环得到每一个连通区域属性集
        # 忽略小区域
        if region.area < 1000:
            continue
        # print(region.bbox)
        # 绘制外包矩形
        minr, minc, maxr, maxc = region.bbox
        # rect = mpatches.Rectangle((minc - 10, minr - 10), maxc - minc + 20, maxr - minr + 20,
        #                           fill=False, edgecolor='red',
        #                           linewidth=2)  # mpatches.Rectangle(矩形左上顶点坐标(x，y), width, height)
        left = minc - 10
        upper = minr - 10
        right = maxc + 10
        lower = maxr + 10
        coordinates_list.append([left,upper,right,lower])#将每组连通域坐标添加进坐标列表
    return coordinates_list#返回连通域坐标列表

#创建一级分支object
def create_object(root,xi,yi,xa,ya):#参数依次，树根，xmin，ymin，xmax，ymax
    #创建一级分支object
    _object=ET.SubElement(root,'object')
    #创建二级分支
    name=ET.SubElement(_object,'name')
    name.text='AreaMissing'
    pose=ET.SubElement(_object,'pose')
    pose.text='Unspecified'
    truncated=ET.SubElement(_object,'truncated')
    truncated.text='0'
    difficult=ET.SubElement(_object,'difficult')
    difficult.text='0'
    #创建bndbox
    bndbox=ET.SubElement(_object,'bndbox')
    xmin=ET.SubElement(bndbox,'xmin')
    xmin.text='%s'%xi
    ymin = ET.SubElement(bndbox, 'ymin')
    ymin.text = '%s'%yi
    xmax = ET.SubElement(bndbox, 'xmax')
    xmax.text = '%s'%xa
    ymax = ET.SubElement(bndbox, 'ymax')
    ymax.text = '%s'%ya

#创建xml文件
def create_tree(image_name):
    global annotation
    # 创建树根annotation
    annotation = ET.Element('annotation')
    #创建一级分支folder
    folder = ET.SubElement(annotation,'folder')
    #添加folder标签内容
    folder.text=('ls')

    #创建一级分支filename
    filename=ET.SubElement(annotation,'filename')
    filename.text=image_name.strip('.jpg')

    #创建一级分支path
    path=ET.SubElement(annotation,'path')
    path.text=getcwd()+'/ls/%s'%image_name#用于返回当前工作目录

    #创建一级分支source
    source=ET.SubElement(annotation,'source')
    #创建source下的二级分支database
    database=ET.SubElement(source,'database')
    database.text='Unknown'

    #创建一级分支size
    size=ET.SubElement(annotation,'size')
    #创建size下的二级分支图像的宽、高及depth
    width=ET.SubElement(size,'width')
    width.text='512'
    height=ET.SubElement(size,'height')
    height.text='384'
    depth = ET.SubElement(size,'depth')
    depth.text = '3'

    #创建一级分支segmented
    segmented = ET.SubElement(annotation,'segmented')
    segmented.text = '0'




def main():
    for image_name in IMAGES_LIST:
        #只处理jpg文件
        if image_name.endswith('jpg'):
            #将图像通过连通域分割，得到连通域坐标列表，该列表的形式[[a,b,c,d],[e,f,g,h]...,]
            image = color.rgb2gray(imread(os.path.join(r'./ls', image_name)))
            coordinates_list = connected_domain_position_get(image)
            create_tree(image_name)

            for coordinate_list in coordinates_list:
                create_object(annotation, coordinate_list[0], coordinate_list[1], coordinate_list[2], coordinate_list[3])
                # if coordinates_list==[]:
                #     break
            # 将树模型写入xml文件
            tree = ET.ElementTree(annotation)
            tree.write('ls/%s.xml' % image_name.strip('.jpg'))



if __name__ == '__main__':
    main()

注：xml中所有值必须是字符串，否则报错

相关阅读:
118/119. Pascal's Triangle/II
160. Intersection of Two Linked Lists
168. Excel Sheet Column Title
167. Two Sum II
172. Factorial Trailing Zeroes
169. Majority Element
189. Rotate Array
202. Happy Number
204. Count Primes
MVC之Model元数据
原文地址：https://www.cnblogs.com/Manuel/p/10871188.html

最新文章
6.Hibernate 查询语言
 4.Hibernate O/R 映射
 3.Hibernate 映射类型
 2.Hibernate 会话
 day15
day14
day13
day12
day11
day10