• 【深度学习】使用MTCNN+FaceNet进行人脸识别


    参考以下博客:

    1.https://segmentfault.com/a/1190000015917420

    2.https://blog.csdn.net/chenzhenyu123456/article/details/80751477(主要参考)

    3.https://zhuanlan.zhihu.com/p/37705980(MTCNN和FaceNet讲解)

    论文链接:

    MTCNN:https://arxiv.org/ftp/arxiv/papers/1604/1604.02878.pdf

    FaceNet:https://arxiv.org/pdf/1503.03832.pdf

    论文项目地址:

    https://github.com/AITTSMD/MTCNN-Tensorflow

    https://github.com/davidsandberg/facenet

    结合博客1和博客2实现了图片的验证、识别、查找实时的验证、识别、查找。

    本来想把2020年全明星的成员都训练一遍再进行识别,后来发现识别率较低,可能是数据量不够,每个文件夹下的图片大概有20-25张。因此最后提交作业时仅训练了三个类别。

    此次项目仅仅是融合了前两篇博客的内容,并对主函数做了一些调整,由于是课程项目作业,所以并未深究一些论文细节。

    完整项目可直接参考博客2里的源码,以下代码是对/face_recognition/test_image.py所做的修改:

      1 # -*- coding: utf-8 -*-
      2 import os
      3 import sys
      4 
      5 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
      6 sys.path.append(BASE_DIR)
      7 sys.path.append(os.path.join(BASE_DIR, '../ui_with_tkinter'))
      8 sys.path.append(os.path.join(BASE_DIR, '../face_recognition'))
      9 import classifier as clf
     10 import numpy as np
     11 import cv2
     12 import tensorflow as tf
     13 import facenet
     14 import detect_face
     15 import align_dlib
     16 import dlib
     17 from PIL import Image, ImageDraw, ImageFont
     18 
     19 os.environ["CUDA_VISIBLE_DEVICES"] = '0'  # use GPU with ID=0
     20 config = tf.ConfigProto()
     21 config.gpu_options.per_process_gpu_memory_fraction = 0.5  # maximun alloc gpu50% of MEM
     22 config.gpu_options.allow_growth = True  # allocate dynamically
     23 
     24 
     25 def predict_labels(best_model_list,
     26                    test_features,
     27                    all_data=False):
     28     """Predict with the best three models.
     29     Inputs:
     30       - best_model_list: A list contains the best three models.
     31       - test_features: A numpy array contains the features of the test images.
     32       - face_number: An integer, the number of faces that we found in the test image.
     33       - all_data: True or False, whether the model is trained with all data.
     34 
     35     Return:
     36       - labels_array: A numpy array contains the labels which predicted by
     37             the best three models .
     38     """
     39     labels_array = []
     40     for index, model in enumerate(best_model_list):
     41         if all_data:
     42             predict = model[0].predict(test_features)
     43         else:
     44             predict = model[0].predict(test_features)
     45         labels_array.append(predict)
     46 
     47     labels_array = np.array(labels_array)
     48     return labels_array
     49 
     50 
     51 def predict_probability(best_probability_model,
     52                         test_features,
     53                         face_number,
     54                         all_data=False):
     55     """Predict with softmax probability model.
     56     Inputs:
     57       - best_probability_model: The best pre_trained softmax model.
     58       - test_features: A numpy array contains the features of the test images.
     59       - face_number: An integer, the number of faces that we found in the test image.
     60       - all_data: True or False, whether the model is trained with all data.
     61 
     62     Returns:
     63       - labels: A numpy array contains the predicted labels.
     64       - probability: A numpy array contains the probability of the label.
     65     """
     66     labels, probability = None, None
     67     if face_number is not 0:
     68         if all_data:
     69             labels = best_probability_model[0].predict(test_features)
     70             all_probability = best_probability_model[0].predict_proba(test_features)
     71         else:
     72             labels = best_probability_model[0].predict(test_features)
     73             all_probability = best_probability_model[0].predict_proba(test_features)
     74         print('all_probability', all_probability)
     75         print('labels', labels)
     76         print('all_probability.shape', all_probability.shape)
     77 
     78         # print(probability.shape) -> (face_number, class_number)
     79         # print(all_probability)
     80         probability = all_probability[np.arange(face_number), np.argmax(all_probability, axis=1)]
     81         # np.argmax(all_probability, axis=1)]
     82         print('probability', probability)
     83         print('probability.shape', probability.shape)
     84 
     85     return labels, probability
     86 
     87 
     88 def find_people_from_image(best_probability_model, test_features, face_number, all_data=False):
     89     """
     90     Inputs:
     91       - best_probability_model: The best pre_trained softmax model.
     92       - test_features: A numpy array contains the features of the test images.
     93       - face_number: An integer, the number of faces that we found in the test image.
     94       - all_data: If all_data is True, using the models which trained by all
     95           training data. Otherwise using the models which trained by partial data.
     96 
     97     e.g.
     98     test_features: [feature_1, feature_2, feature_3, feature_4, feature_5]
     99 
    100     first we get all predictive labels and their corresponding probability
    101     [(A, 0.3), (A, 0.2), (B, 0.1), (B, 0.3), (C, 0.1)]
    102     second we choose the maximum probability for each unique label
    103     [(A, 0.3), (B, 0.3), (C, 0.1)]
    104     finally, get the indices for each unique label.
    105     [0, 3, 4]
    106 
    107     then return
    108     label:
    109     probability:
    110     unique_labels: [A, B, C]
    111     unique_probability: [0.3, 0.3, 0.1]
    112     unique_indices: [0, 3, 4]
    113     """
    114     labels, probability = predict_probability(
    115         best_probability_model, test_features, face_number, all_data=all_data)
    116 
    117     unique_labels = np.unique(labels)
    118     print('unique_labels', unique_labels)
    119     unique_probability, unique_indices = [], []
    120 
    121     for label in unique_labels:
    122         indices = np.argwhere(labels == label)[:, 0]
    123         unique_probability.append(np.max(probability[indices]))
    124         unique_indices.append(indices[np.argmax(probability[indices])])
    125 
    126     unique_probability = np.array(unique_probability)
    127     unique_indices = np.array(unique_indices)
    128     print('unique_probability', unique_probability)
    129     print('unique_indices', unique_indices)
    130 
    131     return labels, probability, unique_labels, unique_probability, unique_indices
    132 
    133 
    134 def check_sf_features(feature, label):
    135     """Verification.
    136     Inputs:
    137        - feature: The feature to be verified.
    138        - label: The label used for verification.
    139 
    140     Returns:
    141        - True or False, verification result.
    142        - sum_dis, the distance loss.
    143     """
    144     if os.path.exists('features'):
    145         sf_features = np.load(open('features/{}.npy'.format(label), 'rb'))
    146     else:
    147         sf_features = np.load(open('../features/{}.npy'.format(label), 'rb'))
    148     sf_dis = np.sqrt(np.sum((sf_features - feature) ** 2, axis=1))
    149     # print(sf_dis)
    150     sum_dis = np.sum(sf_dis)
    151     # print(sum_dis)
    152     valid_num1 = np.sum(sf_dis > 1.0)
    153     valid_num2 = np.sum(sf_dis > 1.1)
    154     if valid_num1 >= 4:
    155         return False, sum_dis
    156 
    157     if valid_num2 >= 3:
    158         return False, sum_dis
    159 
    160     return True, sum_dis
    161 
    162 
    163 def recognition(image, state, fr=None, all_data=False, language='english'):
    164     """Implement face verification, face recognition and face search functions.
    165     Inputs:
    166       - image_path: A string contains the path to the image.
    167       - fr: The object of the UI class.
    168       - state: 'verification', face verification
    169                'recognition', face recognition
    170                'search', face search.
    171       - used_labels: The labels used to face verification and face search,
    172             which does not used in face recognition
    173       - image_size: The input size of the MTCNN.
    174       - all_data: If all_data is True, using the models which trained by all
    175             training data. Otherwise using the models which trained by partial data.
    176       - output: True or False, output the process information.
    177 
    178     Returns:
    179       - answer: The answer predicted by the model.
    180       - image_data: The image data after prediction.
    181     """
    182     answer = ''
    183     predict_info = []
    184     for i in state:
    185         if i not in {'verification', 'recognition', 'search'}:
    186             raise ValueError('{} is not a valid argument!'.format(state))
    187     test_features, face_number, face_boxes, flag = process('test',
    188                                                            image,
    189                                                            image_size=144, )
    190     if flag:
    191         for i in state:
    192             if i == 'verification':
    193                 print('Start verification')
    194 
    195                 labels_array = predict_labels(best_classifier_model_list,
    196                                               test_features,
    197                                               all_data=all_data)
    198                 labels = []
    199                 for line in labels_array.T:  # 转置
    200                     unique, counts = np.unique(line, return_counts=True)  # 该函数是去除数组中的重复数字,并进行排序之后输出
    201                     temp_label = unique[np.argmax(counts)]
    202                     labels.append(temp_label)
    203 
    204                 if used_labels[0] in labels:
    205                     if language == 'chinese':
    206                         answer = answer + '验证成功!这张图像被认定为{}!'.format(used_labels[0])
    207                         info = '验证成功!这张图像被认定为{}!'.format(used_labels[0])
    208                     else:
    209                         answer = answer + 'Successful Verification! This image was ' 
    210                                           'identified as {}!'.format(used_labels[0])
    211                         info = 'Successful Verification! This image ' 
    212                                'was identified as {}!'.format(used_labels[0])
    213                 else:
    214                     if language == 'chinese':
    215                         answer = answer + '验证失败!这张图像不被认定为{}!' 
    216                                           ''.format(used_labels[0])
    217                         info = '验证失败!这张图像不被认定为{}!'.format(used_labels[0])
    218                     else:
    219                         answer = answer + 'Verification failed! This image is not ' 
    220                                           'recognized as {}!'.format(used_labels[0])
    221                         info = 'Verification failed! This image is not ' 
    222                                'recognized as {}!'.format(used_labels[0])
    223 
    224                 for index, box in enumerate(face_boxes):
    225                     face_position = box.astype(int)
    226                     cv2.rectangle(image, (face_position[0], face_position[1]), (
    227                         face_position[2], face_position[3]), (0, 255, 0), 2)
    228 
    229                 if fr is not None:
    230                     predict_info.append(info)
    231                     fr.show_information(predict_info, predict=True)
    232                 if mode == 'video':
    233                     cv2.imshow('camera', image)
    234                 else:
    235                     cv2.imshow('camera', image)
    236                     cv2.imwrite('../result/verification.jpg', image)
    237                     cv2.waitKey()
    238 
    239             elif i == 'recognition':
    240                 print('Start recognition')
    241                 labels, _, unique_labels, unique_probability, unique_indices = 
    242                     find_people_from_image(best_probability_model, test_features, face_number, all_data=all_data)
    243                 info = ''
    244                 if language == 'chinese':
    245                     info = info + '从图像中检测到'
    246                 else:
    247                     info = info + 'Detected '
    248                 answer = answer + info
    249 
    250                 for index, label in enumerate(labels):
    251                     if index in unique_indices:
    252                         if check_sf_features(test_features[index], label)[0] is False:
    253                             if language == 'chinese':
    254                                 # labels[index] = '未知'
    255                                 labels[index] = ''
    256                             else:
    257                                 # labels[index] = 'Unknown'
    258                                 labels[index] = ''
    259                         else:
    260                             if language == 'chinese':
    261                                 info = info + '{},'.format(label)
    262                                 answer = answer + '{},'.format(label)
    263                             else:
    264                                 info = info + '{},'.format(label)
    265                                 answer = answer + '{},'.format(label)
    266                     else:
    267                         if language == 'chinese':
    268                             labels[index] = ''
    269                             # labels[index] = '未知'
    270                         else:
    271                             # labels[index] = 'Unknown'
    272                             labels[index] = ''
    273                 info = info[:-1]
    274                 answer = answer[:-1]
    275                 if language == 'english':
    276                     info = info + ' in this image!'
    277                     answer = answer + ' in this image!'
    278                 else:
    279                     info = info + ''
    280                     answer = answer + ''
    281 
    282                 if fr is not None:
    283                     predict_info.append(info)
    284                     fr.show_information(predict_info, predict=True)
    285                 for index, label in enumerate(labels):
    286                     face_position = face_boxes[index].astype(int)
    287                     image_data = puttext(image, face_position, label, face_number, language='english')
    288                 if mode == 'video':
    289                     cv2.imshow('camera', image_data)
    290                 else:
    291                     cv2.imshow('camera', image_data)
    292                     cv2.imwrite('../result/recognition.jpg', image_data)
    293                     cv2.waitKey()
    294 
    295 
    296             elif i == 'search':
    297                 print('Start search')
    298                 _, _, unique_labels, unique_probability, unique_indices = 
    299                     find_people_from_image(best_probability_model, test_features, face_number, all_data=all_data)
    300                 n = unique_labels.shape[0]
    301 
    302                 found_indices = []
    303                 if language == 'chinese':
    304                     info = '从图像中找到'
    305                 else:
    306                     info = 'Found '
    307                 answer = answer + info
    308                 for i in range(n):
    309                     if unique_labels[i] not in used_labels:
    310                         continue
    311                     index = unique_indices[i]
    312                     if check_sf_features(test_features[index], unique_labels[i])[0] is False:
    313                         continue
    314                     if language == 'chinese':
    315                         answer = answer + '{},'.format(unique_labels[i])
    316                         info = info + '{},'.format(unique_labels[i])
    317                     else:
    318                         answer = answer + '{},'.format(unique_labels[i])
    319                         info = info + '{},'.format(unique_labels[i])
    320                     found_indices.append(i)
    321 
    322                 info = info[:-1]
    323                 answer = answer[:-1]
    324                 if language == 'english':
    325                     info = info + ' in this image!'
    326                     answer = answer + ' in this image!'
    327                 else:
    328                     info = info + ''
    329                     answer = answer + ''
    330 
    331                 if fr is not None:
    332                     predict_info.append(info)
    333                     fr.show_information(predict_info, predict=True)
    334                 for i in found_indices:
    335                     index = unique_indices[i]
    336                     face_position = face_boxes[index].astype(int)
    337                     image_data = puttext(image, face_position, unique_labels[i], face_number, language='english')
    338                     if mode == 'video':
    339                         cv2.imshow('camera', image_data)
    340                     else:
    341                         cv2.imshow('camera', image_data)
    342                         cv2.imwrite('../result/search.jpg', image_data)
    343                         cv2.waitKey()
    344             return answer
    345     else:
    346         return 0
    347 
    348 
    349 def process(state, image, image_size=144):
    350     if state == 'test':
    351         test_image_data = image.copy()
    352         test_features = []
    353         face_boxes, _ = detect_face.detect_face(
    354             test_image_data, minsize, p_net, r_net, o_net, threshold, factor)
    355         face_number = face_boxes.shape[0]
    356 
    357         if face_number is 0:
    358             print('face number is 0')
    359             return None, face_number, None, 0
    360         else:
    361             for face_position in face_boxes:
    362                 face_position = face_position.astype(int)
    363                 face_rect = dlib.rectangle(int(face_position[0]), int(
    364                     face_position[1]), int(face_position[2]), int(face_position[3]))
    365 
    366                 # test_pose_landmarks = face_pose_predictor(test_image_data, face_rect)
    367                 # test_image_landmarks = test_pose_landmarks.parts()
    368 
    369                 aligned_data = face_aligner.align(
    370                     image_size, test_image_data, face_rect, landmarkIndices=align_dlib.AlignDlib.OUTER_EYES_AND_NOSE)
    371 
    372                 # plt.subplot(face_number, 1, index)
    373                 # plt.imshow(aligned_data)
    374                 # plt.axis('off')
    375                 # plt.show()
    376                 # cv2.imwrite('datasets/team_aligned/{}.jpg'.format(str(index)),
    377                 #             cv2.cvtColor(aligned_data, cv2.COLOR_RGB2BGR))
    378 
    379                 aligned_data = facenet.prewhiten(aligned_data)
    380                 last_data = aligned_data.reshape((1, image_size, image_size, 3))
    381                 test_features.append(sess.run(embeddings, feed_dict={
    382                     images_placeholder: last_data, phase_train_placeholder: False})[0])
    383             test_features = np.array(test_features)
    384 
    385             return test_features, face_number, face_boxes, 1
    386 
    387 
    388 def puttext(image_data, face_position, label, face_number, language='english'):
    389     # print('face_position[%d]' % (index), face_position)
    390     label_pixels, font_size, line_size, rect_size, up_offset = 
    391         None, None, None, None, None
    392     if face_number == 1:
    393         rect_size = 2
    394         if language == 'chinese':
    395             label_pixels = 30 * len(label)  # 140 * len(label)
    396             font_size = 30  # 140
    397             up_offset = 40  # 140
    398         else:
    399             label_pixels = 20 * len(label)
    400             up_offset = 20
    401             font_size = 1
    402         line_size = 2
    403     elif face_number < 4:
    404         rect_size = 2  # 7
    405         if language == 'chinese':
    406             label_pixels = 30 * len(label)  # 140 * len(label)
    407             font_size = 30  # 140
    408             up_offset = 40  # 140
    409         else:
    410             label_pixels = 20 * len(label)
    411             up_offset = 20
    412             font_size = 1
    413         line_size = 2
    414     elif face_number >= 4:
    415         rect_size = 2  # 6
    416         if language == 'chinese':
    417             label_pixels = 30 * len(label)  # 100 * len(label)
    418             font_size = 30  # 100
    419             up_offset = 40  # 100
    420         else:
    421             label_pixels = 10 * len(label)
    422             up_offset = 20
    423             font_size = 1
    424         line_size = 2
    425 
    426     dis = (label_pixels - (face_position[2] - face_position[0])) // 2
    427     # dis = 0
    428 
    429     if language == 'chinese':
    430         # The color coded storage order in cv2(BGR) and PIL(RGB) is different
    431         cv2img = cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB)
    432         pilimg = Image.fromarray(cv2img)
    433         # use PIL to display Chinese characters in images
    434         draw = ImageDraw.Draw(pilimg)
    435         font = ImageFont.truetype('../resources/STKAITI.TTF',
    436                                   font_size, encoding="utf-8")
    437         # draw.text((face_position[0] - dis, face_position[1]-up_offset),
    438         # label, (0, 0, 255), font=font)
    439         draw.text((face_position[0] - dis, face_position[1] - up_offset),
    440                   label, (0, 0, 255), font=font)
    441         # convert to cv2
    442         image_data = cv2.cvtColor(np.array(pilimg), cv2.COLOR_RGB2BGR)
    443     else:
    444         cv2.putText(image_data, label,
    445                     (face_position[0] - dis, face_position[1] - up_offset),
    446                     cv2.FONT_HERSHEY_SIMPLEX, font_size,
    447                     (0, 0, 255), line_size)
    448     image_data = cv2.rectangle(image_data, (face_position[0], face_position[1]),
    449                                (face_position[2], face_position[3]),
    450                                (0, 255, 0), rect_size)
    451     return image_data
    452 
    453 
    454 if __name__ == '__main__':
    455     minsize = 20  # minimum size of face
    456     threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    457     factor = 0.709  # scale factor
    458     gpu_memory_fraction = 0.6
    459 
    460     with tf.Graph().as_default():
    461         gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction)
    462         sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
    463                                                 log_device_placement=False))
    464         with sess.as_default():
    465             p_net, r_net, o_net = detect_face.create_mtcnn(sess, '../models/mtcnn/')
    466     predictor_model = '../models/shape_predictor_68_face_landmarks.dat'
    467     face_aligner = align_dlib.AlignDlib(predictor_model)
    468     model_dir = '../models/20170512-110547/20170512-110547.pb'  # model directory
    469     tf.Graph().as_default()
    470     sess = tf.Session()
    471     facenet.load_model(model_dir)
    472     images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
    473     embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
    474     phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
    475     all_data = True
    476     best_classifier_model_list = clf.load_best_classifier_model(all_data=all_data)
    477     best_probability_model = clf.load_best_probability_model(all_data=True)
    478     #print('best_probability_model', best_probability_model)
         #以上是加载模型
    479 480 fit_all_data = True 481 mode = 'video' # 'video' or 'picture' 482 state = ['recognition'] # state = ['verification', 'recognition', 'search'] 483 used_labels = ['LeBron'] # verification只验证used_labels[0],search时会查找所有label 484 485 if mode == 'video': 486 video = "http://admin:admin@192.168.0.13:8081"#ip摄像头地址,需要修改为自己的ip地址 487 # video = 0 488 capture = cv2.VideoCapture(video) 489 cv2.namedWindow("camera", 1) 490 language = 'english' 491 c = 0 492 num = 0 493 frame_interval = 3 # frame intervals 494 test_features = [] 495 while True: 496 ret, frame = capture.read() 497 cv2.imshow("camera", frame) 498 answer = recognition(frame, state, fr=None, all_data=fit_all_data, language='english', ) 499 print(answer) 500 c += 1 501 key = cv2.waitKey(3) 502 if key == 27: 503 # esc键退出 504 print("esc break...") 505 break 506 if key == ord(' '): 507 # 保存一张图像 508 num = num + 1 509 filename = "frames_%s.jpg" % num 510 cv2.imwrite('../result/' + filename, frame) 511 # When everything is done, release the capture 512 capture.release() 513 cv2.destroyWindow("camera") 514 else: 515 image = cv2.imread('../test/42.jpg') 516 answer = recognition(image, fr=None, state=state, all_data=fit_all_data, language='english', ) 517 print(answer)
  • 相关阅读:
    A. Ivan the Fool and the Probability Theory
    C2. Good Numbers (hard version)
    C. p-binary
    H. Happy Birthday
    idea使用goeasy实现webSocket
    idea新建一个maven项目
    FreeMaker入门介绍
    mui预加载
    mui底部选项卡切换实现
    BootStrap 学习笔记一
  • 原文地址:https://www.cnblogs.com/DJames23/p/12404834.html
Copyright © 2020-2023  润新知