1、widerface样本标签处理
图片名 x1 y1 x2 y2 x11 y11 x22 y22 多人脸框
# -*- coding: utf-8 -*- """ Created on Mon Jan 21 16:06:05 2019 @author: admin """ import re import linecache import os # 本程序用于将widerface数据集中label部分分离出来并且重新保存 FILEDIR = "G:\MTCNNTraining\faceData\widerFace\wider_face_split\" file = open(FILEDIR+'wider_face_train_bbx_gt.txt', 'r') def count_lines(file): lines_quantity = 0 while True: buffer = file.read(1024 * 8192) if not buffer: break lines_quantity += buffer.count(' ') file.close() return lines_quantity lines = count_lines(file) for i in range(lines): line = linecache.getline(FILEDIR+'wider_face_train_bbx_gt.txt',i)#读取一行 if re.search('jpg', line): position = line.index('/') file_name = line[position + 1: -5] #图片名 folder_name = line[:position] #文件夹名 print(file_name) with open(FILEDIR + "widerTrainx1x2y1y2Two.txt",'a') as f: f.write(file_name + ".jpg" + " ") i += 1 face_count = int(linecache.getline(FILEDIR+'wider_face_train_bbx_gt.txt', i)) for j in range(face_count): box_line = linecache.getline(FILEDIR + 'wider_face_train_bbx_gt.txt', i+j+1) #x1, y1, w, h, x1,y1 为人脸框左上角的坐标 po_x1 = box_line.index(' ') x1 = box_line[:po_x1] po_y1 = box_line.index(' ', po_x1 + 1) y1 = box_line[po_x1:po_y1] po_w = box_line.index(' ', po_y1 + 1) w = box_line[po_y1:po_w] ix2 = int(x1.strip()) + int(w.strip()) x2 = " " +str(ix2)+" " po_h = box_line.index(' ', po_w + 1) h = box_line[po_w:po_h] iy2 = int(y1.strip()) + int(h.strip()) y2 = str(iy2) #coordinates = x1 + y1 + w + h coordinates = x1 + y1 + x2 + y2 # print(coordinates) #if not(os.path.exists(FILEDIR + "wider_face_train\" + folder_name)): #os.makedirs(FILEDIR + "wider_face_train\" + folder_name) #with open(FILEDIR + "wider_face_train\"+ folder_name + "\" + file_name + ".txt", 'a') as f: #f.write(coordinates + " ") with open(FILEDIR + "widerTrainx1x2y1y2Two.txt",'a') as f: #f.write(file_name + ".jpg" + " " + coordinates+" ") f.write(coordinates+" ") with open(FILEDIR + "widerTrainx1x2y1y2Two.txt",'a') as f: f.write(" ") i += i + j + 1
2、PNET,制作正样本、负样本、部分样本
IOU:utils.py
import numpy as np def IoU(box, boxes): """Compute IoU between detect box and gt boxes Parameters: ---------- box: numpy array , shape (5, ): x1, y1, x2, y2, score input box boxes: numpy array, shape (n, 4): x1, y1, x2, y2 input ground truth boxes Returns: ------- ovr: numpy.array, shape (n, ) IoU """ box_area = (box[2] - box[0] + 1) * (box[3] - box[1] + 1) area = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1) xx1 = np.maximum(box[0], boxes[:, 0]) yy1 = np.maximum(box[1], boxes[:, 1]) xx2 = np.minimum(box[2], boxes[:, 2]) yy2 = np.minimum(box[3], boxes[:, 3]) # compute the width and height of the bounding box w = np.maximum(0, xx2 - xx1 + 1) h = np.maximum(0, yy2 - yy1 + 1) inter = w * h ovr = inter / (box_area + area - inter) return ovr def convert_to_square(bbox): """Convert bbox to square Parameters: ---------- bbox: numpy array , shape n x 5 input bbox Returns: ------- square bbox """ square_bbox = bbox.copy() h = bbox[:, 3] - bbox[:, 1] + 1 w = bbox[:, 2] - bbox[:, 0] + 1 max_side = np.maximum(h,w) square_bbox[:, 0] = bbox[:, 0] + w*0.5 - max_side*0.5 square_bbox[:, 1] = bbox[:, 1] + h*0.5 - max_side*0.5 square_bbox[:, 2] = square_bbox[:, 0] + max_side - 1 square_bbox[:, 3] = square_bbox[:, 1] + max_side - 1 return square_bbox
import sys import numpy as np import cv2 import os import numpy.random as npr from utils import IoU stdsize = 12 anno_file = "G:/MTCNNTraining/widefaceData/widerTrainx1x2y1y2Two.txt" im_dir = "G:/MTCNNTraining/widefaceData/widerfaceJPG/" pos_save_dir = str(stdsize) + "/positive" part_save_dir = str(stdsize) + "/part" neg_save_dir = str(stdsize) + '/negative' save_dir = "./" + str(stdsize) def mkr(dr): if not os.path.exists(dr): os.mkdir(dr) mkr(save_dir) mkr(pos_save_dir) mkr(part_save_dir) mkr(neg_save_dir) f1 = open(os.path.join(save_dir, 'pos_' + str(stdsize) + '.txt'), 'w') f2 = open(os.path.join(save_dir, 'neg_' + str(stdsize) + '.txt'), 'w') f3 = open(os.path.join(save_dir, 'part_' + str(stdsize) + '.txt'), 'w') with open(anno_file, 'r') as f: annotations = f.readlines() num = len(annotations) print ("%d pics in total" % num) p_idx = 0 # positive n_idx = 0 # negative d_idx = 0 # dont care idx = 0 box_idx = 0 for annotation in annotations: annotation = annotation.strip().split(' ') im_path = annotation[0] bbox = list(map(float, annotation[1:])) boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4) img = cv2.imread(im_dir+im_path) idx += 1 if idx % 100 == 0: print (idx, "images done") height, width, channel = img.shape #负样本 neg_num = 0 while neg_num < 100: size = npr.randint(40, min(width, height) / 2) nx = npr.randint(0, width - size) ny = npr.randint(0, height - size) crop_box = np.array([nx, ny, nx + size, ny + size]) Iou = IoU(crop_box, boxes) cropped_im = img[ny : ny + size, nx : nx + size, :] resized_im = cv2.resize(cropped_im, (stdsize, stdsize), interpolation=cv2.INTER_LINEAR) if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, "%s.jpg"%n_idx) f2.write(str(stdsize)+"/negative/%s"%n_idx + ' 0 ') cv2.imwrite(save_file, resized_im) n_idx += 1 neg_num += 1 #正样本、部分样本 for box in boxes: # box (x_left, y_top, x_right, y_bottom) x1, y1, x2, y2 = box w = x2 - x1 + 1 h = y2 - y1 + 1 # ignore small faces # in case the ground truth boxes of small faces are not accurate if max(w, h) < 12 or x1 < 0 or y1 < 0: continue if w<5 or h<5:
continue
# generate positive examples and part faces for i in range(50): size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h))) # delta here is the offset of box center delta_x = npr.randint(-w * 0.2, w * 0.2) delta_y = npr.randint(-h * 0.2, h * 0.2) nx1 = max(x1 + w / 2 + delta_x - size / 2, 0) ny1 = max(y1 + h / 2 + delta_y - size / 2, 0) nx2 = nx1 + size ny2 = ny1 + size if nx2 > width or ny2 > height: continue crop_box = np.array([nx1, ny1, nx2, ny2]) offset_x1 = (x1 - nx1) / float(size) offset_y1 = (y1 - ny1) / float(size) offset_x2 = (x2 - nx1) / float(size) offset_y2 = (y2 - ny1) / float(size) cropped_im = img[int(ny1) : int(ny2), int(nx1) : int(nx2), :] resized_im = cv2.resize(cropped_im, (stdsize, stdsize), interpolation=cv2.INTER_LINEAR) box_ = box.reshape(1, -1) if IoU(crop_box, box_) >= 0.65: save_file = os.path.join(pos_save_dir, "%s.jpg"%p_idx) f1.write(str(stdsize)+"/positive/%s"%p_idx + ' 1 %f %f %f %f '%(offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif IoU(crop_box, box_) >= 0.4: save_file = os.path.join(part_save_dir, "%s.jpg"%d_idx) f3.write(str(stdsize)+"/part/%s"%d_idx + ' -1 %f %f %f %f '%(offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 box_idx += 1 print ("%s images done, pos: %s part: %s neg: %s"%(idx, p_idx, d_idx, n_idx)) f1.close() f2.close() f3.close()
生成train-label.txt
import sys import os save_dir = "./12" if not os.path.exists(save_dir): os.mkdir(save_dir) f1 = open(os.path.join(save_dir, 'pos_12.txt'), 'r') f2 = open(os.path.join(save_dir, 'neg_12.txt'), 'r') f3 = open(os.path.join(save_dir, 'part_12.txt'), 'r') pos = f1.readlines() neg = f2.readlines() part = f3.readlines() f = open(os.path.join(save_dir, 'label-train.txt'), 'w') print (int(len(pos))) for i in range(int(len(pos))): p = pos[i].find(" ") + 1 print (p) pos[i] = pos[i][:p-1] + ".jpg " + pos[i][p:-1] + " " print (pos[i]) f.write(pos[i]) for i in range(int(len(neg))): p = neg[i].find(" ") + 1 neg[i] = neg[i][:p-1] + ".jpg " + neg[i][p:-1] + " -1 -1 -1 -1 " f.write(neg[i]) for i in range(int(len(part))): p = part[i].find(" ") + 1 part[i] = part[i][:p-1] + ".jpg " + part[i][p:-1] + " " f.write(part[i]) f1.close() f2.close() f3.close()
生成lmdb数据
@echo off if exist train_lmdb12 rd /q /s train_lmdb12 echo create train_lmdb12... "G:/MTCNNTraining/caffe-buildx64-cpu/convert_imageset.exe" "" 12/label-train.txt train_lmdb12 --backend=mtcnn --shuffle=true echo done. pause
开始训练
@echo off "G:/MTCNNTraining/caffe-buildx64-cpu/caffe.exe" train --solver=solver-12.prototxt --weights=det1.caffemodel pause
solver-12.prototxt
net : "det1-train.prototxt" # The base learning rate, momentum and the weight decay of the network. base_lr: 0.001 momentum: 0.9 weight_decay: 0.004 # The learning rate policy lr_policy: "step" stepsize: 30000 gamma: 0.8 display: 1000 max_iter: 500000 snapshot: 10000 snapshot_prefix: "./models-12/" solver_mode: GPU
det1-train.prototxt
name: "PNet" layer { name: "data" type: "MTCNNData" top: "data" top: "label" top: "roi" transform_param { mirror: false mean_value: 127.5 scale: 0.0078125 #crop_size: 12 } data_param { source: "train_lmdb12" batch_size: 64 backend: LMDB } } layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 10 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "PReLU1" type: "PReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 16 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "PReLU2" type: "PReLU" bottom: "conv2" top: "conv2" } layer { name: "conv3" type: "Convolution" bottom: "conv2" top: "conv3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 32 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "PReLU3" type: "PReLU" bottom: "conv3" top: "conv3" } layer { name: "conv4-1" type: "Convolution" bottom: "conv3" top: "conv4-1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 2 kernel_size: 1 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "cls_loss" type: "SoftmaxWithLoss" bottom: "conv4-1" bottom: "label" top: "cls_loss" propagate_down: 1 propagate_down: 0 loss_weight: 1 loss_param{ ignore_label: -1 } } layer { name: "cls_Acc" type: "Accuracy" bottom: "conv4-1" bottom: "label" top: "cls_acc" include { phase: TRAIN } accuracy_param{ ignore_label: -1 } } layer { name: "conv4-2" type: "Convolution" bottom: "conv3" top: "conv4-2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 4 kernel_size: 1 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "roi_loss" type: "MTCNNEuclideanLoss" bottom: "conv4-2" bottom: "roi" bottom: "label" top: "roi_loss" loss_weight: 0.5 loss_param{ ignore_label: 0 } }
train24.bat
@echo off "caffe/caffe.exe" train --solver=solver-24.prototxt --weights=det2.caffemodel pause
solver-24.prototxt
net : "det2-train.prototxt" # The base learning rate, momentum and the weight decay of the network. base_lr: 0.001 momentum: 0.9 weight_decay: 0.004 # The learning rate policy lr_policy: "step" stepsize: 10000 gamma: 0.8 display: 500 #500 max_iter: 500000 snapshot: 10000 snapshot_prefix: "./models-24/" solver_mode: GPU
det2-train.prototxt
name: "RNet" layer { name: "data" type: "MTCNNData" top: "data" top: "label" top: "roi" transform_param { mirror: false mean_value: 127.5 scale: 0.0078125 #crop_size: 24 } data_param { source: "train_lmdb24" batch_size: 64 backend: LMDB } } ################################################ layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 28 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu1" type: "PReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 48 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu2" type: "PReLU" bottom: "conv2" top: "conv2" } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } #################################### ################################## layer { name: "conv3" type: "Convolution" bottom: "pool2" top: "conv3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 64 kernel_size: 2 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu3" type: "PReLU" bottom: "conv3" top: "conv3" } ############################### ############################### layer { name: "conv4" type: "InnerProduct" bottom: "conv3" top: "conv4" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { num_output: 128 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu4" type: "PReLU" bottom: "conv4" top: "conv4" } layer { name: "conv5-1" type: "InnerProduct" bottom: "conv4" top: "conv5-1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { num_output: 2 #kernel_size: 1 #stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "cls_loss" type: "SoftmaxWithLoss" bottom: "conv5-1" bottom: "label" top: "cls_loss" propagate_down: 1 propagate_down: 0 loss_weight: 1 loss_param{ ignore_label: -1 } } layer { name: "cls_Acc" type: "Accuracy" bottom: "conv5-1" bottom: "label" top: "cls_acc" include { phase: TRAIN } accuracy_param{ ignore_label: -1 } } layer { name: "conv5-2" type: "InnerProduct" bottom: "conv4" top: "conv5-2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { num_output: 4 #kernel_size: 1 #stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "roi_loss" type: "MTCNNEuclideanLoss" bottom: "conv5-2" bottom: "roi" bottom: "label" top: "roi_loss" loss_weight: 0.5 loss_param{ ignore_label: 0 } }
train-48.bat
@echo off "caffe/caffe.exe" train --solver=solver-48.prototxt pause
solver-48.prototxt
net : "det3-train.prototxt" # The base learning rate, momentum and the weight decay of the network. base_lr: 0.001 momentum: 0.9 weight_decay: 0.004 # The learning rate policy lr_policy: "step" stepsize: 30000 gamma: 0.8 display: 100 #500 max_iter: 600000 snapshot: 10000 snapshot_prefix: "./models-48/" solver_mode: GPU
det3-train.prototxt
name: "ONet" layer { name: "data" type: "MTCNNData" top: "data" top: "label" top: "roi" transform_param { mirror: false mean_value: 127.5 scale: 0.0078125 #crop_size: 48 } data_param { source: "train_lmdb48" batch_size: 64 backend: LMDB } } ################################## layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 32 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu1" type: "PReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 64 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu2" type: "PReLU" bottom: "conv2" top: "conv2" } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv3" type: "Convolution" bottom: "pool2" top: "conv3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 64 kernel_size: 3 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu3" type: "PReLU" bottom: "conv3" top: "conv3" } layer { name: "pool3" type: "Pooling" bottom: "conv3" top: "pool3" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv4" type: "Convolution" bottom: "pool3" top: "conv4" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } convolution_param { num_output: 128 kernel_size: 2 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "prelu4" type: "PReLU" bottom: "conv4" top: "conv4" } layer { name: "conv5" type: "InnerProduct" bottom: "conv4" top: "conv5" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { #kernel_size: 3 num_output: 256 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "drop5" type: "Dropout" bottom: "conv5" top: "conv5" dropout_param { dropout_ratio: 0.25 } } layer { name: "prelu5" type: "PReLU" bottom: "conv5" top: "conv5" } layer { name: "conv6-1" type: "InnerProduct" bottom: "conv5" top: "conv6-1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { #kernel_size: 1 num_output: 2 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "cls_loss" type: "SoftmaxWithLoss" bottom: "conv6-1" bottom: "label" top: "cls_loss" propagate_down: 1 propagate_down: 0 loss_weight: 1 loss_param{ ignore_label: -1 } } layer { name: "cls_Acc" type: "Accuracy" bottom: "conv6-1" bottom: "label" top: "cls_acc" include { phase: TRAIN } accuracy_param{ ignore_label: -1 } } layer { name: "conv6-2" type: "InnerProduct" bottom: "conv5" top: "conv6-2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 1 } inner_product_param { #kernel_size: 1 num_output: 4 weight_filler { type: "xavier" } bias_filler { type: "constant" value: 0 } } } layer { name: "roi_loss" type: "MTCNNEuclideanLoss" bottom: "conv6-2" bottom: "roi" bottom: "label" top: "roi_loss" loss_weight: 0.5 loss_param{ ignore_label: 0 } accuracy_param{ ignore_label: 0 } }