1.命名
文件夹名VOC2007。图片名六位数字。将数据集相应的替换掉VOC2007中的数据。
2.画目标包围框
由于每张图片需要选取目标框,所需时间较长,需要工具辅助。
下面文字和代码源自wuzuyu365的博文深度学习python图像标记工具labelTool。
深度学习训练需要标记图像位置和类别,之前用的时候是叫做BBox-Label-Tool-master,遇到大图像就显示不完整了,没有自适应缩放, 这是改进后的Python脚本。
目录结构:图片目录名images, 标签目录名labels,图像目录下各类别目录名要以001,002,003,...的格式命名。
1 # -*- coding:utf-8 -*- 2 # ------------------------------------------------------------------------------- 3 # Name: Object bounding box label tool 4 # Purpose: Label object bboxes for ImageNet Detection data 5 # Author: Qiushi 6 # Created: 06/06/2014 7 8 # 9 # ------------------------------------------------------------------------------- 10 from __future__ import division 11 from Tkinter import * 12 import tkMessageBox 13 from PIL import Image, ImageTk 14 import os 15 import glob 16 import random 17 18 w0 = 1; # 图片原始宽度 19 h0 = 1; # 图片原始高度 20 21 # colors for the bboxes 22 COLORS = ['red', 'blue', 'yellow', 'pink', 'cyan', 'green', 'black'] 23 # image sizes for the examples 24 SIZE = 256, 256 25 26 # 指定缩放后的图像大小 27 DEST_SIZE = 500, 500 28 29 30 class LabelTool(): 31 def __init__(self, master): 32 # set up the main frame 33 self.parent = master 34 self.parent.title("LabelTool") 35 self.frame = Frame(self.parent) 36 self.frame.pack(fill=BOTH, expand=1) 37 self.parent.resizable(width=TRUE, height=TRUE) 38 39 # initialize global state 40 self.imageDir = '' 41 self.imageList = [] 42 self.egDir = '' 43 self.egList = [] 44 self.outDir = '' 45 self.cur = 0 46 self.total = 0 47 self.category = 0 48 self.imagename = '' 49 self.labelfilename = '' 50 self.tkimg = None 51 52 # initialize mouse state 53 self.STATE = {} 54 self.STATE['click'] = 0 55 self.STATE['x'], self.STATE['y'] = 0, 0 56 57 # reference to bbox 58 self.bboxIdList = [] 59 self.bboxId = None 60 self.bboxList = [] 61 self.hl = None 62 self.vl = None 63 64 # ----------------- GUI stuff --------------------- 65 # dir entry & load 66 self.label = Label(self.frame, text="Image Dir:") 67 self.label.grid(row=0, column=0, sticky=E) 68 self.entry = Entry(self.frame) 69 self.entry.grid(row=0, column=1, sticky=W + E) 70 self.ldBtn = Button(self.frame, text="Load", command=self.loadDir) 71 self.ldBtn.grid(row=0, column=2, sticky=W + E) 72 73 # main panel for labeling 74 self.mainPanel = Canvas(self.frame, cursor='tcross') 75 self.mainPanel.bind("<Button-1>", self.mouseClick) 76 self.mainPanel.bind("<Motion>", self.mouseMove) 77 self.parent.bind("<Escape>", self.cancelBBox) # press <Espace> to cancel current bbox 78 self.parent.bind("s", self.cancelBBox) 79 self.parent.bind("a", self.prevImage) # press 'a' to go backforward 80 self.parent.bind("d", self.nextImage) # press 'd' to go forward 81 self.mainPanel.grid(row=1, column=1, rowspan=4, sticky=W + N) 82 83 # showing bbox info & delete bbox 84 self.lb1 = Label(self.frame, text='Bounding boxes:') 85 self.lb1.grid(row=1, column=2, sticky=W + N) 86 87 self.listbox = Listbox(self.frame, width=28, height=12) 88 self.listbox.grid(row=2, column=2, sticky=N) 89 90 self.btnDel = Button(self.frame, text='Delete', command=self.delBBox) 91 self.btnDel.grid(row=3, column=2, sticky=W + E + N) 92 self.btnClear = Button(self.frame, text='ClearAll', command=self.clearBBox) 93 self.btnClear.grid(row=4, column=2, sticky=W + E + N) 94 95 # control panel for image navigation 96 self.ctrPanel = Frame(self.frame) 97 self.ctrPanel.grid(row=5, column=1, columnspan=2, sticky=W + E) 98 self.prevBtn = Button(self.ctrPanel, text='<< Prev', width=10, command=self.prevImage) 99 self.prevBtn.pack(side=LEFT, padx=5, pady=3) 100 self.nextBtn = Button(self.ctrPanel, text='Next >>', width=10, command=self.nextImage) 101 self.nextBtn.pack(side=LEFT, padx=5, pady=3) 102 self.progLabel = Label(self.ctrPanel, text="Progress: / ") 103 self.progLabel.pack(side=LEFT, padx=5) 104 self.tmpLabel = Label(self.ctrPanel, text="Go to Image No.") 105 self.tmpLabel.pack(side=LEFT, padx=5) 106 self.idxEntry = Entry(self.ctrPanel, width=5) 107 self.idxEntry.pack(side=LEFT) 108 self.goBtn = Button(self.ctrPanel, text='Go', command=self.gotoImage) 109 self.goBtn.pack(side=LEFT) 110 111 # example pannel for illustration 112 self.egPanel = Frame(self.frame, border=10) 113 self.egPanel.grid(row=1, column=0, rowspan=5, sticky=N) 114 self.tmpLabel2 = Label(self.egPanel, text="Examples:") 115 self.tmpLabel2.pack(side=TOP, pady=5) 116 117 self.egLabels = [] 118 for i in range(3): 119 self.egLabels.append(Label(self.egPanel)) 120 self.egLabels[-1].pack(side=TOP) 121 122 # display mouse position 123 self.disp = Label(self.ctrPanel, text='') 124 self.disp.pack(side=RIGHT) 125 126 self.frame.columnconfigure(1, weight=1) 127 self.frame.rowconfigure(4, weight=1) 128 129 # for debugging 130 131 ## self.setImage() 132 ## self.loadDir() 133 134 135 def loadDir(self, dbg=False): 136 if not dbg: 137 s = self.entry.get() 138 self.parent.focus() 139 self.category = int(s) 140 else: 141 s = r'D:workspacepythonlabelGUI' 142 ## if not os.path.isdir(s): 143 ## tkMessageBox.showerror("Error!", message = "The specified dir doesn't exist!") 144 ## return 145 # get image list 146 147 print 'self.category =%d' % (self.category) 148 149 self.imageDir = os.path.join(r'./images', '%03d' % (self.category)) 150 print(self.imageDir) 151 self.imageList = glob.glob(os.path.join(self.imageDir, '*.jpg')) 152 if len(self.imageList) == 0: 153 print 'No .jpg images found in the specified dir!' 154 return 155 else: 156 print 'num=%d' % (len(self.imageList)) 157 158 # default to the 1st image in the collection 159 self.cur = 1 160 self.total = len(self.imageList) 161 162 # set up output dir 163 self.outDir = os.path.join(r'./labels', '%03d' % (self.category)) 164 if not os.path.exists(self.outDir): 165 os.mkdir(self.outDir) 166 167 # load example bboxes 168 self.egDir = os.path.join(r'./Examples', '%03d' % (self.category)) 169 # if not os.path.exists(self.egDir): 170 # return 171 172 filelist = glob.glob(os.path.join(self.egDir, '*.jpg')) 173 self.tmp = [] 174 self.egList = [] 175 random.shuffle(filelist) 176 for (i, f) in enumerate(filelist): 177 if i == 3: 178 break 179 im = Image.open(f) 180 r = min(SIZE[0] / im.size[0], SIZE[1] / im.size[1]) 181 new_size = int(r * im.size[0]), int(r * im.size[1]) 182 self.tmp.append(im.resize(new_size, Image.ANTIALIAS)) 183 self.egList.append(ImageTk.PhotoImage(self.tmp[-1])) 184 self.egLabels[i].config(image=self.egList[-1], width=SIZE[0], height=SIZE[1]) 185 186 self.loadImage() 187 print '%d images loaded from %s' % (self.total, s) 188 189 def loadImage(self): 190 # load image 191 imagepath = self.imageList[self.cur - 1] 192 pil_image = Image.open(imagepath) 193 194 # get the size of the image 195 # 获取图像的原始大小 196 global w0, h0 197 w0, h0 = pil_image.size 198 199 # 缩放到指定大小 200 pil_image = pil_image.resize((DEST_SIZE[0], DEST_SIZE[1]), Image.ANTIALIAS) 201 202 # pil_image = imgresize(w, h, w_box, h_box, pil_image) 203 self.img = pil_image 204 205 self.tkimg = ImageTk.PhotoImage(pil_image) 206 207 self.mainPanel.config(width=max(self.tkimg.width(), 400), height=max(self.tkimg.height(), 400)) 208 self.mainPanel.create_image(0, 0, image=self.tkimg, anchor=NW) 209 self.progLabel.config(text="%04d/%04d" % (self.cur, self.total)) 210 211 # load labels 212 self.clearBBox() 213 self.imagename = os.path.split(imagepath)[-1].split('.')[0] 214 labelname = self.imagename + '.txt' 215 self.labelfilename = os.path.join(self.outDir, labelname) 216 bbox_cnt = 0 217 if os.path.exists(self.labelfilename): 218 with open(self.labelfilename) as f: 219 for (i, line) in enumerate(f): 220 if i == 0: 221 bbox_cnt = int(line.strip()) 222 continue 223 print line 224 tmp = [(t.strip()) for t in line.split()] 225 226 print "********************" 227 print DEST_SIZE 228 # tmp = (0.1, 0.3, 0.5, 0.5) 229 print "tmp[0,1,2,3]===%.2f, %.2f, %.2f, %.2f" % ( 230 float(tmp[0]), float(tmp[1]), float(tmp[2]), float(tmp[3])) 231 # print "%.2f,%.2f,%.2f,%.2f" %(tmp[0] tmp[1] tmp[2] tmp[3] ) 232 233 print "********************" 234 235 # tx = (10, 20, 30, 40) 236 # self.bboxList.append(tuple(tx)) 237 self.bboxList.append(tuple(tmp)) 238 tmp[0] = float(tmp[0]) 239 tmp[1] = float(tmp[1]) 240 tmp[2] = float(tmp[2]) 241 tmp[3] = float(tmp[3]) 242 243 tx0 = int(tmp[0] * DEST_SIZE[0]) 244 ty0 = int(tmp[1] * DEST_SIZE[1]) 245 246 tx1 = int(tmp[2] * DEST_SIZE[0]) 247 ty1 = int(tmp[3] * DEST_SIZE[1]) 248 print "tx0, ty0, tx1, ty1" 249 print tx0, ty0, tx1, ty1 250 251 tmpId = self.mainPanel.create_rectangle(tx0, ty0, tx1, ty1, 252 width=2, 253 outline=COLORS[(len(self.bboxList) - 1) % len(COLORS)]) 254 255 self.bboxIdList.append(tmpId) 256 self.listbox.insert(END, '(%.2f,%.2f)-(%.2f,%.2f)' % (tmp[0], tmp[1], tmp[2], tmp[3])) 257 258 # self.listbox.insert(END, '(%d, %d) -> (%d, %d)' %(tmp[0], tmp[1], tmp[2], tmp[3])) 259 self.listbox.itemconfig(len(self.bboxIdList) - 1, 260 fg=COLORS[(len(self.bboxIdList) - 1) % len(COLORS)]) 261 262 def saveImage(self): 263 # print "-----1--self.bboxList---------" 264 print self.bboxList 265 # print "-----2--self.bboxList---------" 266 267 with open(self.labelfilename, 'w') as f: 268 f.write('%d ' % len(self.bboxList)) 269 for bbox in self.bboxList: 270 f.write(' '.join(map(str, bbox)) + ' ') 271 print 'Image No. %d saved' % (self.cur) 272 273 def mouseClick(self, event): 274 if self.STATE['click'] == 0: 275 self.STATE['x'], self.STATE['y'] = event.x, event.y 276 else: 277 x1, x2 = min(self.STATE['x'], event.x), max(self.STATE['x'], event.x) 278 y1, y2 = min(self.STATE['y'], event.y), max(self.STATE['y'], event.y) 279 280 x1, x2 = x1 / DEST_SIZE[0], x2 / DEST_SIZE[0]; 281 y1, y2 = y1 / DEST_SIZE[1], y2 / DEST_SIZE[1]; 282 283 self.bboxList.append((x1, y1, x2, y2)) 284 self.bboxIdList.append(self.bboxId) 285 self.bboxId = None 286 self.listbox.insert(END, '(%.2f, %.2f)-(%.2f, %.2f)' % (x1, y1, x2, y2)) 287 self.listbox.itemconfig(len(self.bboxIdList) - 1, fg=COLORS[(len(self.bboxIdList) - 1) % len(COLORS)]) 288 self.STATE['click'] = 1 - self.STATE['click'] 289 290 def mouseMove(self, event): 291 self.disp.config(text='x: %.2f, y: %.2f' % (event.x / DEST_SIZE[0], event.y / DEST_SIZE[1])) 292 if self.tkimg: 293 if self.hl: 294 self.mainPanel.delete(self.hl) 295 self.hl = self.mainPanel.create_line(0, event.y, self.tkimg.width(), event.y, width=2) 296 if self.vl: 297 self.mainPanel.delete(self.vl) 298 self.vl = self.mainPanel.create_line(event.x, 0, event.x, self.tkimg.height(), width=2) 299 if 1 == self.STATE['click']: 300 if self.bboxId: 301 self.mainPanel.delete(self.bboxId) 302 self.bboxId = self.mainPanel.create_rectangle(self.STATE['x'], self.STATE['y'], 303 event.x, event.y, 304 width=2, 305 outline=COLORS[len(self.bboxList) % len(COLORS)]) 306 307 def cancelBBox(self, event): 308 if 1 == self.STATE['click']: 309 if self.bboxId: 310 self.mainPanel.delete(self.bboxId) 311 self.bboxId = None 312 self.STATE['click'] = 0 313 314 def delBBox(self): 315 sel = self.listbox.curselection() 316 if len(sel) != 1: 317 return 318 idx = int(sel[0]) 319 self.mainPanel.delete(self.bboxIdList[idx]) 320 self.bboxIdList.pop(idx) 321 self.bboxList.pop(idx) 322 self.listbox.delete(idx) 323 324 def clearBBox(self): 325 for idx in range(len(self.bboxIdList)): 326 self.mainPanel.delete(self.bboxIdList[idx]) 327 self.listbox.delete(0, len(self.bboxList)) 328 self.bboxIdList = [] 329 self.bboxList = [] 330 331 def prevImage(self, event=None): 332 self.saveImage() 333 if self.cur > 1: 334 self.cur -= 1 335 self.loadImage() 336 337 def nextImage(self, event=None): 338 self.saveImage() 339 if self.cur < self.total: 340 self.cur += 1 341 self.loadImage() 342 343 def gotoImage(self): 344 idx = int(self.idxEntry.get()) 345 if 1 <= idx and idx <= self.total: 346 self.saveImage() 347 self.cur = idx 348 self.loadImage() 349 350 ## def setImage(self, imagepath = r'test2.png'): 351 ## self.img = Image.open(imagepath) 352 ## self.tkimg = ImageTk.PhotoImage(self.img) 353 ## self.mainPanel.config(width = self.tkimg.width()) 354 ## self.mainPanel.config(height = self.tkimg.height()) 355 ## self.mainPanel.create_image(0, 0, image = self.tkimg, anchor=NW) 356 357 def imgresize(w, h, w_box, h_box, pil_image): 358 ''' 359 resize a pil_image object so it will fit into 360 a box of size w_box times h_box, but retain aspect ratio 361 ''' 362 f1 = 1.0 * w_box / w # 1.0 forces float division in Python2 363 f2 = 1.0 * h_box / h 364 factor = min([f1, f2]) 365 # print(f1, f2, factor) # test 366 # use best down-sizing filter 367 width = int(w * factor) 368 height = int(h * factor) 369 return pil_image.resize((width, height), Image.ANTIALIAS) 370 371 372 if __name__ == '__main__': 373 root = Tk() 374 tool = LabelTool(root) 375 root.mainloop()
快捷键a,d前后切换图像,esc取消当前框,使用效果如下。
3.制作xml
上一步完成后得到的结果是TXT文件。(我没有先重命名)
需要的XML文件格式如下。
<annotation> <folder>VOC2007</folder> <filename>000002.jpg</filename> <source> <database>My Database</database> <annotation>VOC2007</annotation> <image>flickr</image> <flickrid>NULL</flickrid> </source> <owner> <flickrid>NULL</flickrid> <name>J</name> </owner> <size> <width>256</width> <height>256</height> <depth>3</depth> </size> <segmented>0</segmented> <object> <name>2</name> <pose>Unspecified</pose> <truncated>0</truncated> <difficult>0</difficult> <bndbox> <xmin>31</xmin> <ymin>20</ymin> <xmax>241</xmax> <ymax>250</ymax> </bndbox> </object> </annotation>
生成代码如下。
1 import glob 2 3 s1=""" <object> 4 <name>{0}</name> 5 <pose>Unspecified</pose> 6 <truncated>0</truncated> 7 <difficult>0</difficult> 8 <bndbox> 9 <xmin>{1}</xmin> 10 <ymin>{2}</ymin> 11 <xmax>{3}</xmax> 12 <ymax>{4}</ymax> 13 </bndbox> 14 </object>""" 15 16 s2="""<annotation> 17 <folder>VOC2007</folder> 18 <filename>{0}</filename> 19 <source> 20 <database>My Database</database> 21 <annotation>VOC2007</annotation> 22 <image>flickr</image> 23 <flickrid>NULL</flickrid> 24 </source> 25 <owner> 26 <flickrid>NULL</flickrid> 27 <name>J</name> 28 </owner> 29 <size> 30 <width>256</width> 31 <height>256</height> 32 <depth>3</depth> 33 </size> 34 <segmented>0</segmented> 35 <object> 36 <name>{1}</name> 37 <pose>Unspecified</pose> 38 <truncated>0</truncated> 39 <difficult>0</difficult> 40 <bndbox> 41 <xmin>{2}</xmin> 42 <ymin>{3}</ymin> 43 <xmax>{4}</xmax> 44 <ymax>{5}</ymax> 45 </bndbox> 46 </object>{6} 47 </annotation> 48 """ 49 50 textlist=glob.glob('labels\001*.txt') 51 for text_ in textlist: 52 flabel = open(text_, 'r') 53 lb = flabel.readlines() 54 flabel.close() 55 ob2 = "" 56 if len(lb)<2: 57 continue # no annotation 58 x1=2 59 x2=lb[1].split(' ') 60 x3 = [int(float(i) * 256) for i in x2] 61 if len(lb)>2: # extra annotation 62 for i in range(2,len(lb)): 63 y2 = lb[i].split(' ') 64 y3 = [int(float(i) * 256) for i in y2] 65 ob2+=' ' + s1.format(x1,y3[0],y3[1],y3[2],y3[3]) 66 imgname=('%06d' % (int(text_[13:-4])))+'.jpg' 67 savename='Annotations\'+str('%06d' % (int(text_[13:-4])))+'.xml' 68 f = open(savename, 'w') 69 ob1=s2.format(imgname, x1, x3[0],x3[1],x3[2],x3[3], ob2) 70 f.write(ob1) 71 f.close()
4.ImageSetsMain里的四个txt文件
test.txt是测试集,train.txt是训练集,val.txt是验证集,trainval.txt是训练和验证集。内容为文件名。
VOC2007中,trainval大概是整个数据集的50%,test也大概是整个数据集的50%;train大概是trainval的50%,val大概是trainval的50%。
由于本次数据较少,两类共计约400张,train、val、test各占33%。
1 import os 2 import random 3 4 trainval_percent = 0.66 5 train_percent = 0.5 6 xmlfilepath = 'Annotations' 7 txtsavepath = 'ImageSetsMain' 8 total_xml = os.listdir(xmlfilepath) 9 10 num=len(total_xml) 11 list=range(num) 12 tv=int(num*trainval_percent) 13 tr=int(tv*train_percent) 14 trainval= random.sample(list,tv) 15 train=random.sample(trainval,tr) 16 17 ftrainval = open('ImageSetsMain\trainval.txt', 'w') 18 ftest = open('ImageSetsMain\test.txt', 'w') 19 ftrain = open('ImageSetsMain\train.txt', 'w') 20 fval = open('ImageSetsMain\val.txt', 'w') 21 22 for i in list: 23 name=total_xml[i][:-4]+' ' 24 if i in trainval: 25 ftrainval.write(name) 26 if i in train: 27 ftrain.write(name) 28 else: 29 fval.write(name) 30 else: 31 ftest.write(name) 32 33 ftrainval.close() 34 ftrain.close() 35 fval.close() 36 ftest .close()
数据制作就完成了。