from openvino.inference_engine import IECore import numpy as np import time import cv2 as cv emotions = ['neutral', 'happy', 'sad', 'surprise', 'anger'] genders = ['female', 'male'] def face_landmark_demo(): ie = IECore() for device in ie.available_devices: print(device) model_xml = "/home/bhc/BHC/model/intel/face-detection-0200/FP16/face-detection-0200.xml" model_bin = "/home/bhc/BHC/model/intel/face-detection-0200/FP16/face-detection-0200.bin" net = ie.read_network(model=model_xml, weights=model_bin) input_blob = next(iter(net.input_info)) out_blob = next(iter(net.outputs)) n, c, h, w = net.input_info[input_blob].input_data.shape #人脸识别模型输入(1,3,256,256) print(n, c, h, w) cap = cv.VideoCapture("1.mp4") exec_net = ie.load_network(network=net, device_name="CPU") # 加载人脸表情识别模型 em_xml = "/home/bhc/BHC/model/intel/facial-landmarks-35-adas-0002/FP16/facial-landmarks-35-adas-0002.xml" em_bin = "/home/bhc/BHC/model/intel/facial-landmarks-35-adas-0002/FP16/facial-landmarks-35-adas-0002.bin" em_net = ie.read_network(model=em_xml, weights=em_bin) em_input_blob = next(iter(em_net.input_info)) em_out_blob = next(iter(em_net.outputs)) en, ec, eh, ew = em_net.input_info[em_input_blob].input_data.shape #人脸标志模型输入(1,3,60,60) print(en, ec, eh, ew) em_exec_net = ie.load_network(network=em_net, device_name="CPU") while True: ret, frame = cap.read() if ret is not True: break image = cv.resize(frame, (w, h)) image = image.transpose(2, 0, 1) inf_start = time.time() res = exec_net.infer(inputs={input_blob: [image]}) inf_end = time.time() - inf_start # print("infer time(ms):%.3f"%(inf_end*1000)) ih, iw, ic = frame.shape res = res[out_blob] #人脸识别模型输出(1, 1, N, 7) for obj in res[0][0]: if obj[2] > 0.75: #[image_id, label, conf, x_min, y_min, x_max, y_max], xmin = int(obj[3] * iw) ymin = int(obj[4] * ih) xmax = int(obj[5] * iw) ymax = int(obj[6] * ih) if xmin < 0: xmin = 0 if ymin < 0: ymin = 0 if xmax >= iw: xmax = iw - 1 if ymax >= ih: ymax = ih - 1 roi = frame[ymin:ymax, xmin:xmax, :] #人脸识别模出的人脸数据,作为人脸标志模型的输入 rh, rw, rc = roi.shape roi_img = cv.resize(roi, (ew, eh)) roi_img = roi_img.transpose(2, 0, 1) em_res = em_exec_net.infer(inputs={em_input_blob: [roi_img]}) prob_landmarks = em_res[em_out_blob] #人脸标志模型输出(1, 70) for index in range(0, len(prob_landmarks[0]), 2): #(x0, y0, x1, y1, …, x34, y34) x = np.int(prob_landmarks[0][index] * rw) y = np.int(prob_landmarks[0][index+1] * rh) cv.circle(roi, (x, y), 3, (0, 0, 255), -1, 8, 0) cv.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 255), 2, 8) cv.putText(frame, "infer time(ms): %.3f" % (inf_end * 1000), (50, 50), cv.FONT_HERSHEY_SIMPLEX, 1.0, (255, 0, 255), 2, 8) cv.imshow("Face+emotion Detection", frame) c = cv.waitKey(1) if c == 27: break cv.waitKey(0) cv.destroyAllWindows() def face_emotion_demo(): ie = IECore() for device in ie.available_devices: print(device) model_xml = "/home/bhc/BHC/model/intel/face-detection-0200/FP16/face-detection-0200.xml" model_bin = "/home/bhc/BHC/model/intel/face-detection-0200/FP16/face-detection-0200.bin" net = ie.read_network(model=model_xml, weights=model_bin) input_blob = next(iter(net.input_info)) out_blob = next(iter(net.outputs)) n, c, h, w = net.input_info[input_blob].input_data.shape print(n, c, h, w) cap = cv.VideoCapture("1.mp4") exec_net = ie.load_network(network=net, device_name="CPU") # 加载人脸表情识别模型 em_xml = "/home/bhc/BHC/model/intel/emotions-recognition-retail-0003/FP16/emotions-recognition-retail-0003.xml" em_bin = "/home/bhc/BHC/model/intel/emotions-recognition-retail-0003/FP16/emotions-recognition-retail-0003.bin" em_net = ie.read_network(model=em_xml, weights=em_bin) em_input_blob = next(iter(em_net.input_info)) em_out_blob = next(iter(em_net.outputs)) en, ec, eh, ew = em_net.input_info[em_input_blob].input_data.shape print(en, ec, eh, ew) em_exec_net = ie.load_network(network=em_net, device_name="CPU") while True: ret, frame = cap.read() if ret is not True: break image = cv.resize(frame, (w, h)) image = image.transpose(2, 0, 1) inf_start = time.time() res = exec_net.infer(inputs={input_blob: [image]}) inf_end = time.time() - inf_start # print("infer time(ms):%.3f"%(inf_end*1000)) ih, iw, ic = frame.shape res = res[out_blob] for obj in res[0][0]: if obj[2] > 0.75: xmin = int(obj[3] * iw) ymin = int(obj[4] * ih) xmax = int(obj[5] * iw) ymax = int(obj[6] * ih) if xmin < 0: xmin = 0 if ymin < 0: ymin = 0 if xmax >= iw: xmax = iw - 1 if ymax >= ih: ymax = ih - 1 roi = frame[ymin:ymax, xmin:xmax, :] roi_img = cv.resize(roi, (ew, eh)) roi_img = roi_img.transpose(2, 0, 1) em_res = em_exec_net.infer(inputs={em_input_blob: [roi_img]}) #人脸表情模型输出(1, 5, 1, 1) prob_emotion = em_res[em_out_blob].reshape(1, 5) label_index = np.argmax(prob_emotion, 1) #(0 - ‘neutral’, 1 - ‘happy’, 2 - ‘sad’, 3 - ‘surprise’, 4 - ‘anger’). cv.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 255), 2, 8) cv.putText(frame, "infer time(ms): %.3f" % (inf_end * 1000), (50, 50), cv.FONT_HERSHEY_SIMPLEX, 1.0, (255, 0, 255), 2, 8) cv.putText(frame, emotions[np.int(label_index)], (xmin, ymin), cv.FONT_HERSHEY_SIMPLEX, 0.55, (0, 0, 255), 2, 8) cv.imshow("Face+emotion Detection", frame) c = cv.waitKey(1) if c == 27: break cv.waitKey(0) cv.destroyAllWindows() def face_age_gender_demo(): ie = IECore() for device in ie.available_devices: print(device) model_xml = "/home/bhc/BHC/model/intel/face-detection-0200/FP16/face-detection-0200.xml" model_bin = "/home/bhc/BHC/model/intel/face-detection-0200/FP16/face-detection-0200.bin" net = ie.read_network(model=model_xml, weights=model_bin) input_blob = next(iter(net.input_info)) out_blob = next(iter(net.outputs)) n, c, h, w = net.input_info[input_blob].input_data.shape print(n, c, h, w) cap = cv.VideoCapture("1.mp4") exec_net = ie.load_network(network=net, device_name="CPU") # 加载年龄性别模型 em_xml = "/home/bhc/BHC/model/intel/age-gender-recognition-retail-0013/FP16/age-gender-recognition-retail-0013.xml" em_bin = "/home/bhc/BHC/model/intel/age-gender-recognition-retail-0013/FP16/age-gender-recognition-retail-0013.bin" em_net = ie.read_network(model=em_xml, weights=em_bin) em_input_blob = next(iter(em_net.input_info)) em_it = iter(em_net.outputs) em_out_blob1 = next(em_it) em_out_blob2 = next(em_it) en, ec, eh, ew = em_net.input_info[em_input_blob].input_data.shape print(en, ec, eh, ew) em_exec_net = ie.load_network(network=em_net, device_name="CPU") while True: ret, frame = cap.read() if ret is not True: break image = cv.resize(frame, (w, h)) image = image.transpose(2, 0, 1) inf_start = time.time() res = exec_net.infer(inputs={input_blob:[image]}) inf_end = time.time() - inf_start # print("infer time(ms):%.3f"%(inf_end*1000)) ih, iw, ic = frame.shape res = res[out_blob] for obj in res[0][0]: if obj[2] > 0.75: xmin = int(obj[3] * iw) ymin = int(obj[4] * ih) xmax = int(obj[5] * iw) ymax = int(obj[6] * ih) if xmin < 0: xmin = 0 if ymin < 0: ymin = 0 if xmax >= iw: xmax = iw - 1 if ymax >= ih: ymax = ih - 1 roi = frame[ymin:ymax,xmin:xmax,:] roi_img = cv.resize(roi, (ew, eh)) roi_img = roi_img.transpose(2, 0, 1) em_res = em_exec_net.infer(inputs={em_input_blob: [roi_img]}) age_conv3 = em_res[em_out_blob1].reshape(1, 1)[0][0] * 100 #age_conv3 (1, 1, 1, 1) age*100 prob_age = em_res[em_out_blob2].reshape(1, 2) #prob (1, 2, 1, 1) 0 - female, 1 - male label_index = np.int(np.argmax(prob_age, 1)) age = np.int(age_conv3) cv.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 255), 2, 8) cv.putText(frame, "infer time(ms): %.3f"%(inf_end*1000), (50, 50), cv.FONT_HERSHEY_SIMPLEX, 1.0, (255, 0, 255), 2, 8) cv.putText(frame, genders[label_index] + ', ' +str(age), (xmin, ymin), cv.FONT_HERSHEY_SIMPLEX, 0.55, (0, 0, 255), 2, 8) cv.imshow("Face+emotion Detection", frame) c = cv.waitKey(1) if c == 27: break cv.waitKey(0) cv.destroyAllWindows() if __name__ == "__main__": face_landmark_demo() # face_emotion_demo() # face_age_gender_demo()