前言
已完成TensorFlow Object Detection API环境搭建,具体搭建过程请参照:
安装运行谷歌开源的TensorFlow Object Detection API视频物体识别系统
或
Ubuntu系统安装配置tensorflow开发环境
准备工作
下载训练数据和验证数据
香港中文大学(Chinese University of Hong Kong)有大量的标注图像数据集。WIDER FACE数据集是一个人脸检测基准数据集。我用labelImg(https://github.com/tzutalin/labelImg)来显示边框。所选的文本是人脸检测注释。
结果集下载地址:http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/
labelImg安装命令
pip install labelImg
其他安装方式请参照上面提供的地址自行尝试
lableImg使用命令
labelImg
效果
结果集中的图片与标注文件XML一一对应
下载模型
我选择的模型是faster_rcnn_inception_v2_coco,下载地址是:
编写代码进行相关操作001_down_data.py
# -*- coding: utf-8 -*-
'''
结果集下载与模型下载
'''
import requests
import os
import shutil
# unzip the files
import zipfile
import tarfile
def download_file_from_google_drive(id, destination):
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None
def save_response_content(response, destination):
CHUNK_SIZE = 32768
with open(destination, "wb") as f:
for chunk in response.iter_content(CHUNK_SIZE):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
URL = "https://docs.google.com/uc?export=download"
session = requests.Session()
response = session.get(URL, params={ 'id' : id }, stream=True)
token = get_confirm_token(response)
if token:
params = { 'id' : id, 'confirm' : token }
response = session.get(URL, params=params, stream=True)
save_response_content(response, destination)
# The script
curr_path = os.getcwd()
models_path = os.path.join(curr_path, "data")
# make dir => wider_data in folder
try:
os.makedirs(models_path)
except Exception as e:
pass
print("files download start")
if os.path.exists(os.path.join(models_path, "train.zip")) == False:
print("downloading.. train.zip -- 1.47GB")
download_file_from_google_drive("0B6eKvaijfFUDQUUwd21EckhUbWs", os.path.join(models_path, "train.zip"))
if os.path.exists(os.path.join(models_path, "val.zip")) == False:
print("downloading.. val.zip -- 362.8MB")
download_file_from_google_drive("0B6eKvaijfFUDd3dIRmpvSk8tLUk", os.path.join(models_path, "val.zip"))
print("files download end")
print("files unzip start")
if os.path.exists(os.path.join(models_path, "WIDER_train")) == False:
with zipfile.ZipFile(os.path.join(models_path, "train.zip"), "r") as zip_ref:
zip_ref.extractall(models_path)
if os.path.exists(os.path.join(models_path, "WIDER_val")) == False:
with zipfile.ZipFile(os.path.join(models_path, "val.zip"), "r") as zip_ref:
zip_ref.extractall(models_path)
print("files unzip end")
print("annotation download start")
url = 'http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/bbx_annotation/wider_face_split.zip'
r = requests.get(url)
with open(os.path.join(models_path, "wider_face_split.zip"), "wb") as code:
code.write(r.content)
if os.path.exists(os.path.join(models_path, "wider_face_split")) == False:
with zipfile.ZipFile(os.path.join(models_path, "wider_face_split.zip"), "r") as zip_ref:
zip_ref.extractall(models_path)
print("annotation download end")
# downloading from: https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md
url = 'http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz'
if os.path.exists(os.path.join(models_path, "faster_rcnn_inception_v2_coco_2018_01_28.tar.gz")) == False:
response = requests.get(url, stream=True)
with open(os.path.join(models_path, "faster_rcnn_inception_v2_coco_2018_01_28.tar.gz"), 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
del response
filePath = os.path.join(models_path, "faster_rcnn_inception_v2_coco_2018_01_28.tar.gz")
os.chdir(models_path)
if (filePath.endswith("tar.gz")):
tar = tarfile.open(filePath, "r:gz")
tar.extractall()
tar.close()
elif (filePath.endswith("tar")):
tar = tarfile.open(filePath, "r:")
tar.extractall()
tar.close()
print("done")
数据预处理
将WIDERFace转换为Pascal XML
首先,我们需要将人脸检测数据集转换为Pascal XML。Tensorflow和labelImg使用不同的格式。这些人脸检测图像将下载到WIDER_train文件夹中。我们将使用002 _data-to-pascal-xml.py转换WIDERFace数据并且将数据复制到一个不同的子文件夹中。我的电脑需要5分钟处理9263张图片。
002_data-to-pascal-xml.py
#!/usr/bin/env python3
"""
This script crawls over 9263 training images and 1873 items
On my Macbook pro this takes: 4 minutes
"""
import cv2
import os
import numpy as np
from glob import iglob # python 3.5 or newer
from shutil import copyfile
# The script
curr_path = os.getcwd()
import xml.etree.cElementTree as ET
# settings
cnt = 0
hog = cv2.HOGDescriptor((80, 80), (16, 16), (8,8), (8,8), 9)
# data = []
# label = []
def newXMLPASCALfile(imageheight, imagewidth, path, basename):
# print(filename)
annotation = ET.Element("annotation", verified="yes")
ET.SubElement(annotation, "folder").text = "images"
ET.SubElement(annotation, "filename").text = basename
ET.SubElement(annotation, "path").text = path
source = ET.SubElement(annotation, "source")
ET.SubElement(source, "database").text = "test"
size = ET.SubElement(annotation, "size")
ET.SubElement(size, "width").text = str(imagewidth)
ET.SubElement(size, "height").text = str(imageheight)
ET.SubElement(size, "depth").text = "3"
ET.SubElement(annotation, "segmented").text = "0"
tree = ET.ElementTree(annotation)
# tree.write("filename.xml")
return tree
def appendXMLPASCAL(curr_et_object,x1, y1, w, h, filename):
et_object = ET.SubElement(curr_et_object.getroot(), "object")
ET.SubElement(et_object, "name").text = "face"
ET.SubElement(et_object, "pose").text = "Unspecified"
ET.SubElement(et_object, "truncated").text = "0"
ET.SubElement(et_object, "difficult").text = "0"
bndbox = ET.SubElement(et_object, "bndbox")
ET.SubElement(bndbox, "xmin").text = str(x1)
ET.SubElement(bndbox, "ymin").text = str(y1)
ET.SubElement(bndbox, "xmax").text = str(x1+w)
ET.SubElement(bndbox, "ymax").text = str(y1+h)
filename = filename.strip().replace(".jpg",".xml")
curr_et_object.write(filename)
return curr_et_object
def readAndWrite(bbx_gttxtPath):
cnt = 0
with open(bbx_gttxtPath, 'r') as f:
curr_img = ''
curr_filename = ""
curr_path = ""
curr_et_object = ET.ElementTree()
img = np.zeros((80, 80))
for line in f:
inp = line.split(' ')
# if line.find("--") != -1:
# curr_filename = line.split('--')[1]
# # reset elements
# # emptyEl = ET.Element("")
# curr_et_object = ET.ElementTree()
if len(inp)==1:
img_path = inp[0]
img_path = img_path[:-1]
curr_img = img_path
if curr_img.isdigit():
continue
# print(Train_path+'/'+curr_img)
img = cv2.imread(Train_path + '/' + curr_img, 2) # POSIX only
# print( len(list(curr_et_object.getroot()) ) )
curr_filename = curr_img.split("/")[1].strip()
curr_path = os.path.join(Train_path, os.path.dirname(curr_img))
curr_et_object = newXMLPASCALfile(img.shape[0],img.shape[1],curr_path, curr_filename )
# print( curr_et_object )
else:
# print(img)
inp = [int(i) for i in inp[:-1]]
x1, y1, w, h, blur, expression, illumination, invalid, occlusion, pose = inp
n = max(w,h)
if invalid == 1 or blur > 0 or n < 50:
continue
img2 = img[y1:y1+n, x1:x1+n]
img3 = cv2.resize(img2, (80, 80))
vec = hog.compute(img3)
# data.append(vec)
# label.append(1)
cnt += 1
fileNow = os.path.join(curr_path,curr_filename)
print("{}: {} {} {} {}".format(len(vec),x1, y1, w, h) + " " + fileNow)
curr_et_object = appendXMLPASCAL(curr_et_object,x1, y1, w, h, fileNow )
# ################################ TRAINING DATA 9263 ITEMS ##################################
# # # Run Script for Training data
Train_path = os.path.join(curr_path, "data", "WIDER_train", "images" )
## comment this out
bbx_gttxtPath = os.path.join(curr_path, "data", "wider_face_split", "wider_face_train_bbx_gt.txt" )
readAndWrite(bbx_gttxtPath)
# To folders:
to_xml_folder = os.path.join(curr_path, "data", "tf_wider_train", "annotations", "xmls" )
to_image_folder = os.path.join(curr_path, "data", "tf_wider_train", "images" )
# make dir => wider_data in folder
try:
os.makedirs(to_xml_folder)
os.makedirs(to_image_folder)
except Exception as e:
pass
rootdir_glob = Train_path + '/**/*' # Note the added asterisks # This will return absolute paths
file_list = [f for f in iglob(rootdir_glob, recursive=True) if os.path.isfile(f)]
train_annotations_index = os.path.join(curr_path, "data", "tf_wider_train", "annotations", "train.txt" )
with open(train_annotations_index, "a") as indexFile:
for f in file_list:
if ".xml" in f:
print(f)
copyfile(f, os.path.join(to_xml_folder, os.path.basename(f) ))
img = f.replace(".xml",".jpg")
copyfile(img, os.path.join(to_image_folder, os.path.basename(img) ))
indexFile.write(os.path.basename(f.replace(".xml","")) + "
")
################################ VALIDATION DATA 1873 ITEMS ##################################
# Run Script for Validation data
Train_path = os.path.join(curr_path, "data", "WIDER_val", "images" )
bbx_gttxtPath = os.path.join(curr_path, "data", "wider_face_split", "wider_face_val_bbx_gt.txt" )
readAndWrite(bbx_gttxtPath)
# To folders:
to_xml_folder = os.path.join(curr_path, "data", "tf_wider_val", "annotations", "xmls" )
to_image_folder = os.path.join(curr_path, "data", "tf_wider_val", "images" )
# make dir => wider_data in folder
try:
os.makedirs(to_xml_folder)
os.makedirs(to_image_folder)
except Exception as e:
pass
rootdir_glob = Train_path + '/**/*' # Note the added asterisks # This will return absolute paths
file_list = [f for f in iglob(rootdir_glob, recursive=True) if os.path.isfile(f)]
train_annotations_index = os.path.join(curr_path, "data", "tf_wider_val", "annotations", "val.txt" )
with open(train_annotations_index, "a") as indexFile:
for f in file_list:
if ".xml" in f:
print(f)
copyfile(f, os.path.join(to_xml_folder, os.path.basename(f) ))
img = f.replace(".xml",".jpg")
copyfile(img, os.path.join(to_image_folder, os.path.basename(img) ))
indexFile.write(os.path.basename(f.replace(".xml","")) + "
")
运行
python 002_data-to-pascal-xml.py
效果
创建Pascal XML到Tensorflow CSV的索引
当数据转换为Pascal XML时,索引已经被创建。通过训练和验证数据集,我们将这些文件作为输入来制作TFRecords。也可以用labelImg这样的工具来手动标记图像,并使用这个步骤在这里创建一个索引。
003_xml-to-csv.py
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
# source and credits:
# https://raw.githubusercontent.com/datitran/raccoon_dataset/master/xml_to_csv.py
def xml_to_csv(path):
xml_list = []
for xml_file in glob.glob(path + '/*.xml'):
tree = ET.parse(xml_file)
root = tree.getroot()
for member in root.findall('object'):
value = (root.find('filename').text,
int(root.find('size')[0].text),
int(root.find('size')[1].text),
member[0].text,
int(member[4][0].text),
int(member[4][1].text),
int(member[4][2].text),
int(member[4][3].text)
)
xml_list.append(value)
column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
xml_df = pd.DataFrame(xml_list, columns=column_name)
return xml_df
def train():
image_path = os.path.join(os.getcwd(), 'data', 'tf_wider_train', 'annotations','xmls')
xml_df = xml_to_csv(image_path)
labels_path = os.path.join(os.getcwd(), 'data', 'tf_wider_train','train.csv')
xml_df.to_csv(labels_path, index=None)
print('> tf_wider_train - Successfully converted xml to csv.')
def val():
image_path = os.path.join(os.getcwd(), 'data', 'tf_wider_val', 'annotations','xmls')
xml_df = xml_to_csv(image_path)
labels_path = os.path.join(os.getcwd(), 'data', 'tf_wider_val', 'val.csv')
xml_df.to_csv(labels_path, index=None)
print('> tf_wider_val - Successfully converted xml to csv.')
train()
val()
运行
python 003_xml-to-csv.py
效果
创建TFRecord文件
TFRecords文件是一个大型的二进制文件,该文件被读取以训练机器学习模型。在下一步中,该文件将被Tensorflow按顺序读取。训练和验证数据将被转换成二进制文件。
004_generate_tfrecord.py
"""
Usage:
# From tensorflow/models/
# Create train data:
python3 004_generate_tfrecord.py --images_path=data/tf_wider_train/images --csv_input=data/tf_wider_train/train.csv --output_path=data/train.record
# creates 847.6MB train.record
# Create test/validation data:
python3 004_generate_tfrecord.py --images_path=data/tf_wider_val/images --csv_input=data/tf_wider_val/val.csv --output_path=data/val.record
# creates 213.1MB val.record
source without adjustments: https://raw.githubusercontent.com/datitran/raccoon_dataset/master/generate_tfrecord.py
"""
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
import os
import io
import pandas as pd
import tensorflow as tf
from PIL import Image
from object_detection.utils import dataset_util # from path
from collections import namedtuple, OrderedDict # tf slim
flags = tf.app.flags
flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
flags.DEFINE_string('images_path', '', 'Path to images_folder')
FLAGS = flags.FLAGS
# TO-DO replace this with label map
def class_text_to_int(row_label):
if row_label == 'face':
return 1
else:
None
def split(df, group):
data = namedtuple('data', ['filename', 'object'])
gb = df.groupby(group)
return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]
def create_tf_example(group, path):
with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width, height = image.size
filename = group.filename.encode('utf8')
image_format = b'jpg'
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
for index, row in group.object.iterrows():
xmins.append(row['xmin'] / width)
xmaxs.append(row['xmax'] / width)
ymins.append(row['ymin'] / height)
ymaxs.append(row['ymax'] / height)
classes_text.append(row['class'].encode('utf8'))
classes.append(class_text_to_int(row['class']))
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
def main(_):
writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
path = os.path.join(os.getcwd(), FLAGS.images_path)
examples = pd.read_csv(FLAGS.csv_input)
grouped = split(examples, 'filename')
for group in grouped:
tf_example = create_tf_example(group, path)
writer.write(tf_example.SerializeToString())
writer.close()
output_path = os.path.join(os.getcwd(), FLAGS.output_path)
print('Successfully created the TFRecords: {}'.format(output_path))
if __name__ == '__main__':
tf.app.run()
TFRecord的训练数据(847.6 MB)
python 004_generate_tfrecord.py --images_path=data/tf_wider_train/images --csv_input=data/tf_wider_train/train.csv --output_path=data/train.record
TFRecord 的验证数据(213.1MB)
python 004_generate_tfrecord.py --images_path=data/tf_wider_val/images --csv_input=data/tf_wider_val/val.csv --output_path=data/val.record
至此数据预处理已全部完成,效果如下: