• 将dlib xml数据转换成YOLOv3 数据


    dlib 的训练数据是一个测试文件夹和一个训练文件夹,分别放着若干图片和一个xml文件,xml文件保存了对应图片的标注信息。

    dlibData:
    +---test
    |       1.jpg
    |       10.jpg
    |       11.jpg
    |       ...
    |       56.jpg
    |       57.jpg
    |       test.xml
    |       
    ---train
            1.jpg
     	...
            95.jpg
            96.jpg
            97.jpg
            98.jpg
            99.jpg
            train.xml
    

    Yolo的训练数据结构如下:

    YOLOData:.
    |   classes.names # 类别名称
    |   test.txt # 验证的图片路径
    |   train.txt # 训练的图片路径
    |   SplitData.py # 脚本文件, 对labels的标注文件**对应的图片**进行划分得到train.txt 和 test.txt (由于xml文件有些图片没有标注,但是labels的标注文件中肯定有标注)
    |   xml2txt.py # 将train(test)下的图片转到JPEGImages中并随机命名, train.xml(test) 转成labels中的标注文件,图片对应标注文件
    |   YOLOData.data # 保存训练和验证的路径。。etc
    |   
    +---JPEGImages
    |       0eQ2ARay.jpg
    |       0HzMbDSE.jpg
    |       0K7SYueV.jpg
    |       ...
    |       0TIf1aij.jpg
    |       10QmnWfi.jpg
    |       1bVJ5Zkl.jpg
    
    |       
    +---labels
    |       0eQ2ARay.txt
    |       0HzMbDSE.txt
    |       0K7SYueV.txt
    |       ...
    |       Zi2Ec8Tt.txt
    |       znvQ045k.txt
    |       zOvPyFtR.txt
    |       ZViHLeBs.txt
    |       
    +---TestYOLOData # 测试训练结果
    |       darknet-yolov3.cfg # yolo配置文件
    |       img2video.py # 将图片转化成视频的工具
    |       object_detection_yolo.py # 从视频中进行目标检测
    |       test.avi # 由图片生成的视频
    |       test_yolo_out_py.avi # 视频输出结果
    |       
    +---weights
    |       darknet-yolov3_final.weights # 训练得到的权重文件
    |       
    +---test # dlib 数据格式的验证集
    |       1.jpg
    |       10.jpg
    |       11.jpg
    |       ...
    |       56.jpg
    |       57.jpg
    |       test.xml
    |       
    ---train # dlib 数据格式的训练集
            1.jpg
     	...
            95.jpg
            96.jpg
            97.jpg
            98.jpg
            99.jpg
            train.xml
           
    

    xml2txt.py

    '''
    dlib .xml file to yolo .txt file 
    
    python xml2txt.py dlib_train_path dlib_test_path 
    
    example:
    	python xml2txt.py /home/hichens/YOLOData/train/ /home/hichens/YOLOData/test/ 
    '''
    
    import cv2
    import os 
    import subprocess
    import sys 
    import random 
    import string
    
    train_path = sys.argv[1]
    test_path = sys.argv[2]
    file_path = "/".join(train_path.split("/")[:-2])
    subprocess.run(['rm', '-rf',  file_path + "/JPEGImages/"])
    subprocess.run(['mkdir', "JPEGImages"])
    subprocess.run(['rm', '-rf',  file_path + "/labels/"])
    subprocess.run(['mkdir', "labels"])
    
    def xml2txt(xml_path):
        base_path = "/".join(xml_path.split("/")[:-2])
        I_path = "/".join(xml_path.split("/")[:-1])
        with open(xml_path, 'r') as f:
            for line in f:
                ss = line.split()
                if(len(ss) < 1):
                    pass
                else:
                    if(ss[0] == "<image"):
                        img_name = line.split("'")[1]
                        print(img_name)
                    if(ss[0] == "<box"):
                        ll = line.split("'")
                        top, left, width, height = int(ll[1]), int(ll[3]), int(ll[5]), int(ll[7])
                        img_path = I_path + '/' + img_name # image int the xieshi_train or xieshi_test
                        move_path = base_path + "/JPEGImages/" + img_name 
                        subprocess.run(['cp', img_path, move_path]) # move the image to JPEGImages
                        add_label = ''.join(random.sample(string.ascii_letters + string.digits, 8))
                        new_name =  base_path + "/JPEGImages/" + add_label + '.jpg'
                        os.rename(move_path, new_name) # rename the imgage in the JPEGImages
    
                        img = cv2.imread(img_path)
                        H, W = img.shape[:2]
                        x_center, y_center =  (left+width / 2)  / W, (top+height / 2) / H
                        w, h = width / W, height / H
                        print(x_center, y_center, w, h)
                        file_name = base_path + "/labels/" + add_label +".txt" # accoding to image name in the JPEGImages name the txt
                        with open(file_name, 'w') as file:
                            sentence = " ".join(str(i) for i in [0, x_center, y_center, w, h])
                            file.write(sentence)
                
    if __name__ == "__main__":
        xml2txt(train_path + "train.xml")
        xml2txt(test_path + "test.xml")
            
    

    SplitData.py

    '''
    from labels to split the data into train data and validation data 
    
    python SplitData.py /home/hichens/YOLOData/
    
    '''
    
    
    import random
    import os
    import subprocess
    import sys
    
    def split_data_set(base_path):
        label_dir = base_path + 'labels'
        image_dir = base_path + 'JPEGImages'
        f_val = open("eye_test.txt", 'w')
        f_train = open("eye_train.txt", 'w')
        
        path, dirs, files = next(os.walk(label_dir))
        data_size = len(files)
    
        ind = 0
        data_test_size = int(0.1 * data_size)
        test_array = random.sample(range(data_size), k=data_test_size)
        
        for f in os.listdir(label_dir):
            if(f.split(".")[1] == "txt"):
                ind += 1
                file_name = f.split(".")[0] + '.jpg'
                if ind in test_array:
                    f_val.write(image_dir+'/'+file_name+'
    ')
                else:
                    f_train.write(image_dir+'/'+file_name+'
    ')
    
    if __name__ == "__main__":
        split_data_set(sys.argv[1])
    
    

    img2video.py

    '''
    combine the images to video 
    
    python img2video.py image_path
    
    exmpale:
    	python img2video.py /home/hichens/YOLOData/test/ 
    '''
    # encoding: UTF-8
    import glob as gb
    import cv2
    import sys
    in_path = sys.argv[1]
    
    img_path = gb.glob(in_path + "*")
    fps  = 4 # the bigger the value is, the faster is the video.
    size =  (640,480) # the image size
    videoWriter = cv2.VideoWriter('test.avi', 
                                  cv2.VideoWriter_fourcc('I','4','2','0'), fps, size)
    
    step = len(img_path) // 30
    print("[", end="")
    for i, path in enumerate(img_path):
        if(i % step == 0):
            img  = cv2.imread(path) 
            img = cv2.resize(img,(640,480))
            print(">", end="")
        videoWriter.write(img)
    print("]")
    print("OK!")
    
    

    object_detection_yolo.py

    '''
    test the training result 
    
    example:  
        python object_detection_yolo.py --video=test.avi
        python object_detection_yolo.py --image=bird.jpg
    
    '''
    
    import cv2 as cv
    import argparse
    import sys
    import numpy as np
    import os.path
    
    # Initialize the parameters
    confThreshold = 0.5  #Confidence threshold
    nmsThreshold = 0.4  #Non-maximum suppression threshold
    
    inpWidth = 416  #608     #Width of network's input image
    inpHeight = 416 #608     #Height of network's input image
    
    parser = argparse.ArgumentParser(description='Object Detection using YOLO in OPENCV')
    parser.add_argument('--image', help='Path to image file.')
    parser.add_argument('--video', help='Path to video file.')
    args = parser.parse_args()
            
    # Load names of classes
    classesFile = "classes.names";
    
    classes = None
    with open(classesFile, 'rt') as f:
        classes = f.read().rstrip('
    ').split('
    ')
    
    # Give the configuration and weight files for the model and load the network using them.
    
    modelConfiguration = "darknet-yolov3.cfg";
    modelWeights = "../weights/darknet-yolov3_800.weights"; 
    
    net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
    net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
    net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
    
    # Get the names of the output layers
    def getOutputsNames(net):
        # Get the names of all the layers in the network
        layersNames = net.getLayerNames()
        # Get the names of the output layers, i.e. the layers with unconnected outputs
        return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]
    
    # Draw the predicted bounding box
    def drawPred(classId, conf, left, top, right, bottom):
        # Draw a bounding box.
        #    cv.rectangle(frame, (left, top), (right, bottom), (255, 178, 50), 3)
        cv.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 3)
    
        label = '%.2f' % conf
            
        # Get the label for the class name and its confidence
        if classes:
            assert(classId < len(classes))
            label = '%s:%s' % (classes[classId], label)
    
        #Display the label at the top of the bounding box
        labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
        top = max(top, labelSize[1])
        cv.rectangle(frame, (left, top - round(1.5*labelSize[1])), (left + round(1.5*labelSize[0]), top + baseLine), (0, 0, 255), cv.FILLED)
        #cv.rectangle(frame, (left, top - round(1.5*labelSize[1])), (left + round(1.5*labelSize[0]), top + baseLine),    (255, 255, 255), cv.FILLED)
        cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,0), 2)
    
    # Remove the bounding boxes with low confidence using non-maxima suppression
    def postprocess(frame, outs):
        frameHeight = frame.shape[0]
        frameWidth = frame.shape[1]
    
        classIds = []
        confidences = []
        boxes = []
        # Scan through all the bounding boxes output from the network and keep only the
        # ones with high confidence scores. Assign the box's class label as the class with the highest score.
        classIds = []
        confidences = []
        boxes = []
        for out in outs:
            print("out.shape : ", out.shape)
            for detection in out:
                #if detection[4]>0.001:
                scores = detection[5:]
                classId = np.argmax(scores)
                #if scores[classId]>confThreshold:
                confidence = scores[classId]
                if detection[4]>confThreshold:
                    print(detection[4], " - ", scores[classId], " - th : ", confThreshold)
                    print(detection)
                if confidence > confThreshold:
                    center_x = int(detection[0] * frameWidth)
                    center_y = int(detection[1] * frameHeight)
                    width = int(detection[2] * frameWidth)
                    height = int(detection[3] * frameHeight)
                    left = int(center_x - width / 2)
                    top = int(center_y - height / 2)
                    classIds.append(classId)
                    confidences.append(float(confidence))
                    boxes.append([left, top, width, height])
    
        # Perform non maximum suppression to eliminate redundant overlapping boxes with
        # lower confidences.
        indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
        for i in indices:
            i = i[0]
            box = boxes[i]
            left = box[0]
            top = box[1]
            width = box[2]
            height = box[3]
            drawPred(classIds[i], confidences[i], left, top, left + width, top + height)
    
    # Process inputs
    winName = 'Deep learning object detection in OpenCV'
    cv.namedWindow(winName, cv.WINDOW_NORMAL)
    
    outputFile = "yolo_out_py.avi"
    if (args.image):
        # Open the image file
        if not os.path.isfile(args.image):
            print("Input image file ", args.image, " doesn't exist")
            sys.exit(1)
        cap = cv.VideoCapture(args.image)
        outputFile = args.image[:-4]+'_yolo_out_py.jpg'
    elif (args.video):
        # Open the video file
        if not os.path.isfile(args.video):
            print("Input video file ", args.video, " doesn't exist")
            sys.exit(1)
        cap = cv.VideoCapture(args.video)
        outputFile = args.video[:-4]+'_yolo_out_py.avi'
    else:
        # Webcam input
        cap = cv.VideoCapture(0)
    
    # Get the video writer initialized to save the output video
    if (not args.image):
        vid_writer = cv.VideoWriter(outputFile, 
                                    cv.VideoWriter_fourcc('M','J','P','G'), 
                                    4, 
                                    (round(cap.get(cv.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))
    
    while cv.waitKey(1) < 0:
        
        # get frame from the video
        hasFrame, frame = cap.read()
        
        # Stop the program if reached end of video
        if not hasFrame:
            print("Done processing !!!")
            print("Output file is stored as ", outputFile)
            cv.waitKey(3000)
            break
    
        # Create a 4D blob from a frame.
        blob = cv.dnn.blobFromImage(frame, 1/255, (inpWidth, inpHeight), [0,0,0], 1, crop=False)
    
        # Sets the input to the network
        net.setInput(blob)
    
        # Runs the forward pass to get output of the output layers
        outs = net.forward(getOutputsNames(net))
    
        # Remove the bounding boxes with low confidence
        postprocess(frame, outs)
    
        # Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)
        t, _ = net.getPerfProfile()
        label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
        #cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
    
        # Write the frame with the detection boxes
        if (args.image):
            cv.imwrite(outputFile, frame.astype(np.uint8));
        else:
            vid_writer.write(frame.astype(np.uint8))
    
        cv.imshow(winName, frame)
    
    
  • 相关阅读:
    js使用笔记
    rabbit-mq使用官方文档
    tomcat Enabling JMX Remote
    Venom的简单使用
    Random模块
    时间模块
    shulti模块简述
    Python的os模块
    Python压缩及解压文件
    Kali的内网穿透
  • 原文地址:https://www.cnblogs.com/hichens/p/12868154.html
Copyright © 2020-2023  润新知