import sys, os, cv2, time import numpy as np, math from argparse import ArgumentParser from openvino.inference_engine import IECore m_input_size = 416 #模型输入416*416 yolo_scale_13 = 13 #cell 13*13 yolo_scale_26 = 26 #cell 26*26 yolo_scale_52 = 52 #cell 52*52 classes = 2 #类别个数 coords = 4 #坐标占位 (中心点x,中心点y,高度,宽度) num = 3 #每个cell三个检测框 anchors = [10,13,16,30,33,23,30,61,62,45,59,119,116,90,156,198,373,326] #先验框 LABELS = ("operator1", "operator2") #label列表 label_text_color = (255, 255, 255) label_background_color = (125, 175, 75) box_color = (255, 128, 0) box_thickness = 1 def build_argparser(): parser = ArgumentParser() parser.add_argument("-d", "--device", help="Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. \ Sample will look for a suitable plugin for device specified (CPU by default)", default="CPU", type=str) return parser #获取展平后的数据的索引 def EntryIndex(side, lcoords, lclasses, location, entry): n = int(location / (side * side)) loc = location % (side * side) return int(n * side * side * (lcoords + lclasses + 1) + entry * side * side + loc) class DetectionObject(): xmin = 0 ymin = 0 xmax = 0 ymax = 0 class_id = 0 confidence = 0.0 def __init__(self, x, y, h, w, class_id, confidence, h_scale, w_scale): self.xmin = int((x - w / 2) * w_scale) self.ymin = int((y - h / 2) * h_scale) self.xmax = int(self.xmin + w * w_scale) self.ymax = int(self.ymin + h * h_scale) self.class_id = class_id self.confidence = confidence # IOU交并比 def IntersectionOverUnion(box_1, box_2): width_of_overlap_area = min(box_1.xmax, box_2.xmax) - max(box_1.xmin, box_2.xmin) height_of_overlap_area = min(box_1.ymax, box_2.ymax) - max(box_1.ymin, box_2.ymin) area_of_overlap = 0.0 if (width_of_overlap_area < 0.0 or height_of_overlap_area < 0.0): area_of_overlap = 0.0 else: area_of_overlap = width_of_overlap_area * height_of_overlap_area box_1_area = (box_1.ymax - box_1.ymin) * (box_1.xmax - box_1.xmin) box_2_area = (box_2.ymax - box_2.ymin) * (box_2.xmax - box_2.xmin) area_of_union = box_1_area + box_2_area - area_of_overlap retval = 0.0 if area_of_union <= 0.0: retval = 0.0 else: retval = (area_of_overlap / area_of_union) return retval def ParseYOLOV3Output(blob, resized_im_h, resized_im_w, original_im_h, original_im_w, threshold, objects): # 输出结果 例子V3 # conv2d_58/BiasAdd (1, 13, 13, 255) anchor(116,90, 156,198, 373,326) # conv2d_66/BiasAdd (1, 26, 26, 255) anchor(30,61, 62,45, 59,119) # conv2d_74/BiasAdd (1, 52, 52, 255) anchor(10,13, 16,30, 33,23) # 255(B, Cx, Cy, N*85) N:num of cell 85:(x, y, h, w, box_score, class_no_1, …, class_no_80) # print(blob.shape) #(B,N*(5+class),Cx,Cy) out_blob_h = blob.shape[2] #Cx out_blob_w = blob.shape[3] #Cy side = out_blob_h anchor_offset = 0 if len(anchors) == 18: ## YoloV3 #根据anchor判断yolo类型 if side == yolo_scale_13: anchor_offset = 2 * 6 elif side == yolo_scale_26: anchor_offset = 2 * 3 elif side == yolo_scale_52: anchor_offset = 2 * 0 elif len(anchors) == 12: ## tiny-YoloV3 if side == yolo_scale_13: anchor_offset = 2 * 3 elif side == yolo_scale_26: anchor_offset = 2 * 0 else: ## ??? if side == yolo_scale_13: anchor_offset = 2 * 6 elif side == yolo_scale_26: anchor_offset = 2 * 3 elif side == yolo_scale_52: anchor_offset = 2 * 0 side_square = side * side output_blob = blob.flatten() #展平(例:1*(5+2)*3*13*13) for i in range(side_square): row = int(i / side) #cell的x col = int(i % side) #cell的y for n in range(num): #3个预测框 obj_index = EntryIndex(side, coords, classes, n * side * side + i, coords) #置信度索引 box_score box_index = EntryIndex(side, coords, classes, n * side * side + i, 0) #表示框x索引 x scale = output_blob[obj_index] #取得置信度 if (scale < threshold): continue # 中心点坐标(x,y) x = (col + output_blob[box_index + 0 * side_square]) / side * resized_im_w #根据原始坐标,得到绝对坐标 y = (row + output_blob[box_index + 1 * side_square]) / side * resized_im_h # 高度和宽度 height = math.exp(output_blob[box_index + 3 * side_square]) * anchors[anchor_offset + 2 * n + 1] #得到绝对坐标 width = math.exp(output_blob[box_index + 2 * side_square]) * anchors[anchor_offset + 2 * n] for j in range(classes): class_index = EntryIndex(side, coords, classes, n * side_square + i, coords + 1 + j) #类索引 prob = scale * output_blob[class_index] #类置信度 if prob < threshold: continue # 将矩形框及置信度、类别进行存储 obj = DetectionObject(x, y, height, width, j, prob, (original_im_h / resized_im_h), (original_im_w / resized_im_w)) objects.append(obj) return objects def main_IE_infer(): camera_width = 320 camera_height = 240 fps = "" framepos = 0 frame_count = 0 vidfps = 0 skip_frame = 0 elapsedTime = 0 new_w = int(camera_width * m_input_size/camera_width) new_h = int(camera_height * m_input_size/camera_height) args = build_argparser().parse_args() model_xml = "/home/bhc/darknet-master/backup1/16/16.xml" #<--- CPU #model_xml = "lrmodels/YoloV3/FP16/frozen_yolo_v3.xml" #<--- MYRIAD model_bin = os.path.splitext(model_xml)[0] + ".bin" # cap = cv2.VideoCapture(0) # cap.set(cv2.CAP_PROP_FPS, 30) # cap.set(cv2.CAP_PROP_FRAME_WIDTH, camera_width) # cap.set(cv2.CAP_PROP_FRAME_HEIGHT, camera_height) cap = cv2.VideoCapture("/home/bhc/BHC/Q3/1_Astemo_DL/video/D16_operator/D16_20211025135235.mp4") camera_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) #视频的宽度 camera_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) #视频的高度 frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) #视频总帧数 vidfps = int(cap.get(cv2.CAP_PROP_FPS)) #视频fps print("videosFrameCount =", str(frame_count)) print("videosFPS =", str(vidfps)) time.sleep(1) ie = IECore() net = ie.read_network(model=model_xml) #读取模型 input_blob = next(iter(net.input_info)) #模型输入 exec_net = ie.load_network(network=net, device_name="CPU") #加载模型 while cap.isOpened(): #视频是否打开 t1 = time.time() ## Uncomment only when playing video files #cap.set(cv2.CAP_PROP_POS_FRAMES, framepos) ret, image = cap.read() #读取帧 if not ret: break resized_image = cv2.resize(image, (new_w, new_h), interpolation = cv2.INTER_CUBIC) #resize操作到输入要求 canvas = np.full((m_input_size, m_input_size, 3), 128) canvas[(m_input_size-new_h)//2:(m_input_size-new_h)//2 + new_h,(m_input_size-new_w)//2:(m_input_size-new_w)//2 + new_w, :] = resized_image prepimg = canvas prepimg = prepimg[np.newaxis, :, :, :] # Batch size axis add prepimg = prepimg.transpose((0, 3, 1, 2)) # NHWC to NCHW outputs = exec_net.infer(inputs={input_blob: prepimg}) # 推理BHWC(1,416,416,3) 3:RGB objects = [] for output in outputs.values(): objects = ParseYOLOV3Output(output, new_h, new_w, camera_height, camera_width, 0.7, objects) #解析推理结果 # Filtering overlapping boxes objlen = len(objects) for i in range(objlen): if (objects[i].confidence == 0.0): continue for j in range(i + 1, objlen): if (IntersectionOverUnion(objects[i], objects[j]) >= 0.4): #如果IOU大于0.4的则将置信度设置为0 objects[j].confidence = 0 # Drawing boxes for obj in objects: if obj.confidence < 0.2: continue label = obj.class_id confidence = obj.confidence if confidence > 0.2: label_text = LABELS[label] + " (" + "{:.1f}".format(confidence * 100) + "%)" cv2.rectangle(image, (obj.xmin, obj.ymin), (obj.xmax, obj.ymax), box_color, box_thickness) cv2.putText(image, label_text, (obj.xmin, obj.ymin - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, label_text_color, 1) cv2.putText(image, fps, (camera_width - 170, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38, 0, 255), 1, cv2.LINE_AA) cv2.imshow("Result", image) if cv2.waitKey(1)&0xFF == ord('q'): break elapsedTime = time.time() - t1 fps = "(Playback) {:.1f} FPS".format(1/elapsedTime) ## frame skip, video file only #skip_frame = int((vidfps - int(1/elapsedTime)) / int(1/elapsedTime)) #framepos += skip_frame cv2.destroyAllWindows() del net del exec_net if __name__ == '__main__': sys.exit(main_IE_infer() or 0)