• pytorch实现yolov3(5) 实现端到端的目标检测


    torch实现yolov3(1)
    torch实现yolov3(2)
    torch实现yolov3(3)
    torch实现yolov3(4)

    前面4篇已经实现了network的forward,并且将network的output已经转换成了易于操作的detection prediction格式.
    本篇把前面四篇实现的功能组织起来,实现端到端的推理过程.

    整体流程如下

    1. 读取图片,对图片前处理,把图片调整到模型的input size及输入顺序(rgb c x h x w).
    2. 加载模型,读取模型权重文件.
    3. 将第一步读到的矩阵送给模型.进行forward运算.得到prediction
    4. 后处理,我们得到的box坐标是相对于调整后的图片的.要处理成原图上的坐标.

    detector.py 实现完整的端到端的图片检测. 用法python detect.py --images dog-cycle-car.png --det det

    from __future__ import division
    import time
    import torch 
    import torch.nn as nn
    from torch.autograd import Variable
    import numpy as np
    import cv2 
    from util import *
    import argparse
    import os 
    import os.path as osp
    from darknet import Darknet
    import pickle as pkl
    import pandas as pd
    import random
    
    def arg_parse():
        """
        Parse arguements to the detect module
        
        """
        
        parser = argparse.ArgumentParser(description='YOLO v3 Detection Module')
       
        parser.add_argument("--images", dest = 'images', help = 
                            "Image / Directory containing images to perform detection upon",
                            default = "imgs", type = str)
        parser.add_argument("--det", dest = 'det', help = 
                            "Image / Directory to store detections to",
                            default = "det", type = str)
        parser.add_argument("--bs", dest = "bs", help = "Batch size", default = 1)
        parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.5)
        parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
        parser.add_argument("--cfg", dest = 'cfgfile', help = 
                            "Config file",
                            default = "cfg/yolov3.cfg", type = str)
        parser.add_argument("--weights", dest = 'weightsfile', help = 
                            "weightsfile",
                            default = "yolov3.weights", type = str)
        parser.add_argument("--reso", dest = 'reso', help = 
                            "Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
                            default = "416", type = str)
        
        return parser.parse_args()
        
    args = arg_parse()
    images = args.images
    batch_size = int(args.bs)
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0
    CUDA = torch.cuda.is_available()
    
    num_classes = 80
    classes = load_classes("data/coco.names")
    
    #Set up the neural network
    print("Loading network.....")
    model = Darknet(args.cfgfile)
    model.load_weights(args.weightsfile)
    print("Network successfully loaded")
    
    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0 
    assert inp_dim > 32
    
    #If there's a GPU availible, put the model on GPU
    if CUDA:
        model.cuda()
    
    
    #Set the model in evaluation mode
    model.eval()
    
    read_dir = time.time()
    #Detection phase
    try:
        imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images)]
    except NotADirectoryError:
        imlist = []
        imlist.append(osp.join(osp.realpath('.'), images))
    except FileNotFoundError:
        print ("No file or directory with the name {}".format(images))
        exit()
        
    if not os.path.exists(args.det):
        os.makedirs(args.det)
    
    load_batch = time.time()
    loaded_ims = [cv2.imread(x) for x in imlist]
    
    im_batches = list(map(prep_image, loaded_ims, [inp_dim for x in range(len(imlist))]))
    im_dim_list = [(x.shape[1], x.shape[0]) for x in loaded_ims]
    im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
    
    
    leftover = 0
    if (len(im_dim_list) % batch_size):
        leftover = 1
    
    if batch_size != 1:
        num_batches = len(imlist) // batch_size + leftover            
        im_batches = [torch.cat((im_batches[i*batch_size : min((i +  1)*batch_size,
                            len(im_batches))]))  for i in range(num_batches)]  
    
    write = 0
    
    
    if CUDA:
        im_dim_list = im_dim_list.cuda()
        
    start_det_loop = time.time()
    for i, batch in enumerate(im_batches):
    #load the image 
        start = time.time()
        if CUDA:
            batch = batch.cuda()
        with torch.no_grad():
            prediction = model(Variable(batch), CUDA) #类调用,相当于调用类的__call__()函数
    
        prediction = write_results(prediction, confidence, num_classes, nms_conf = nms_thesh)
    
        end = time.time()
    
        if type(prediction) == int:
    
            for im_num, image in enumerate(imlist[i*batch_size: min((i +  1)*batch_size, len(imlist))]):
                im_id = i*batch_size + im_num
                print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size))
                print("{0:20s} {1:s}".format("Objects Detected:", ""))
                print("----------------------------------------------------------")
            continue
    
        prediction[:,0] += i*batch_size    #transform the atribute from index in batch to index in imlist 
    
        if not write:                      #If we have't initialised output
            output = prediction  
            write = 1
        else:
            output = torch.cat((output,prediction))
    
        for im_num, image in enumerate(imlist[i*batch_size: min((i +  1)*batch_size, len(imlist))]):
            im_id = i*batch_size + im_num
            objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
            print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size))
            print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
            print("----------------------------------------------------------")
    
        if CUDA:
            torch.cuda.synchronize()       
    try:
        output
    except NameError:
        print ("No detections were made")
        exit()
    
    im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())
    
    scaling_factor = torch.min(416/im_dim_list,1)[0].view(-1,1)
    
    
    output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
    output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2
    
    
    
    output[:,1:5] /= scaling_factor
    
    for i in range(output.shape[0]):
        output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
        output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])
        
        
    output_recast = time.time()
    class_load = time.time()
    colors = pkl.load(open("pallete", "rb"))
    
    draw = time.time()
    
    
    def write(x, results):
        c1 = tuple(x[1:3].int())
        c2 = tuple(x[3:5].int())
        img = results[int(x[0])]
        cls = int(x[-1])
        color = random.choice(colors)
        label = "{0}".format(classes[cls])
        cv2.rectangle(img, c1, c2,color, 1)
        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
        c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
        cv2.rectangle(img, c1, c2,color, -1)
        cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1);
        return img
    
    
    list(map(lambda x: write(x, loaded_ims), output))
    
    det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format(args.det,x.split("/")[-1]))
    
    list(map(cv2.imwrite, det_names, loaded_ims))
    
    
    end = time.time()
    
    print("SUMMARY")
    print("----------------------------------------------------------")
    print("{:25s}: {}".format("Task", "Time Taken (in seconds)"))
    print()
    print("{:25s}: {:2.3f}".format("Reading addresses", load_batch - read_dir))
    print("{:25s}: {:2.3f}".format("Loading batch", start_det_loop - load_batch))
    print("{:25s}: {:2.3f}".format("Detection (" + str(len(imlist)) +  " images)", output_recast - start_det_loop))
    print("{:25s}: {:2.3f}".format("Output Processing", class_load - output_recast))
    print("{:25s}: {:2.3f}".format("Drawing Boxes", end - draw))
    print("{:25s}: {:2.3f}".format("Average time_per_img", (end - load_batch)/len(imlist)))
    print("----------------------------------------------------------")
    
    
    torch.cuda.empty_cache()
        
    
    

    第一段没啥好说的,我们希望可以通过命令行传参,所以用ArgParse模块来实现参数解析.

    第二段 模型加载

    #Set up the neural network
    print("Loading network.....")
    model = Darknet(args.cfgfile)
    model.load_weights(args.weightsfile)
    print("Network successfully loaded")
    

    第三段 图像预处理

    对任意一个图片,要先做预处理,把尺寸处理到model的input size.

    read_dir = time.time()
    #Detection phase
    try:
        imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images)]
    except NotADirectoryError:
        imlist = []
        imlist.append(osp.join(osp.realpath('.'), images))
    except FileNotFoundError:
        print ("No file or directory with the name {}".format(images))
        exit()
        
    if not os.path.exists(args.det):
        os.makedirs(args.det)
    
    load_batch = time.time()
    loaded_ims = [cv2.imread(x) for x in imlist]
    
    im_batches = list(map(prep_image, loaded_ims, [inp_dim for x in range(len(imlist))]))
    im_dim_list = [(x.shape[1], x.shape[0]) for x in loaded_ims]
    im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
    
    
    leftover = 0
    if (len(im_dim_list) % batch_size):
        leftover = 1
    
    if batch_size != 1:
        num_batches = len(imlist) // batch_size + leftover            
        im_batches = [torch.cat((im_batches[i*batch_size : min((i +  1)*batch_size,
                            len(im_batches))]))  for i in range(num_batches)]  
    
    

    从某个目录读入n多个图片.假设模型每个batch处理5个图片.图片为320 x 320 x 3. 则每次输入模型的矩阵为(320*5) x 320 x 3.即

    im_batches = [torch.cat((im_batches[i*batch_size : min((i +  1)*batch_size,
                            len(im_batches))]))  for i in range(num_batches)] 
    

    所做的事情.

    图片的前处理所用到的一些工具函数如下.

    def letterbox_image(img, inp_dim):
        '''resize image with unchanged aspect ratio using padding'''
        img_w, img_h = img.shape[1], img.shape[0]
        w, h = inp_dim
        new_w = int(img_w * min(w/img_w, h/img_h))
        new_h = int(img_h * min(w/img_w, h/img_h))
        resized_image = cv2.resize(img, (new_w,new_h), interpolation = cv2.INTER_CUBIC)
        
        canvas = np.full((inp_dim[1], inp_dim[0], 3), 128)
    
        canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w,  :] = resized_image
        
        return canvas
    

    保证原有图片的宽高比,其余位置灰度值填充.

    cv读进来的bgr格式,我们转成rgb的.然后transpose 把h x w x c的转成c x h x w的.

    def prep_image(img, inp_dim):
        """
        Prepare image for inputting to the neural network. 
        
        Returns a Variable 
        """
    
        img = cv2.resize(img, (inp_dim, inp_dim
        img = img[:,:,::-1].transpose((2,0,1)).copy()
        img = torch.from_numpy(img).float().div(255.0).unsqueeze(0)
        return img
    

    参考https://www.cnblogs.com/sdu20112013/p/11216322.html

    4.将矩阵喂给模型,进行forward

    for i, batch in enumerate(im_batches):
    #load the image 
        start = time.time()
        if CUDA:
            batch = batch.cuda()
        with torch.no_grad():
            prediction = model(Variable(batch), CUDA) #类调用,相当于调用类的__call__()函数
    
        prediction = write_results(prediction, confidence, num_classes, nms_conf = nms_thesh)
    
        end = time.time()
    
        if type(prediction) == int:
    
            for im_num, image in enumerate(imlist[i*batch_size: min((i +  1)*batch_size, len(imlist))]):
                im_id = i*batch_size + im_num
                print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size))
                print("{0:20s} {1:s}".format("Objects Detected:", ""))
                print("----------------------------------------------------------")
            continue
    
        prediction[:,0] += i*batch_size    #transform the atribute from index in batch to index in imlist 
    
        if not write:                      #If we have't initialised output
            output = prediction  
            write = 1
        else:
            output = torch.cat((output,prediction))
    
        for im_num, image in enumerate(imlist[i*batch_size: min((i +  1)*batch_size, len(imlist))]):
            im_id = i*batch_size + im_num
            objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
            print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size))
            print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
            print("----------------------------------------------------------")
    
    

    其中重点就是

    prediction = model(Variable(batch), CUDA) #类调用,相当于调用类的__call__()函数,
    
    prediction = write_results(prediction, confidence, num_classes, nms_conf = nms_thesh)
    

    涉及到一个python语法,类实例调用.其实就相当于调用__call__().基类nn.module的__call__()里调用了forward().所以这一句实际上就相当于调用model.forward(batch).

    5.后处理

    im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())
    
    scaling_factor = torch.min(416/im_dim_list,1)[0].view(-1,1)
    
    
    output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
    output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2
    
    
    
    output[:,1:5] /= scaling_factor
    
    for i in range(output.shape[0]):
        output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
        output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])
        
    

    output中的box坐标是相对于模型的输入图片的,将其映射到相对于原始图片的位置.

    图片绘制,涉及python基础语法参考https://www.cnblogs.com/sdu20112013/p/11216584.html

    list(map(lambda x: write(x, loaded_ims), output))
    
    det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format(args.det,x.split("/")[-1]))
    
    list(map(cv2.imwrite, det_names, loaded_ims))
    
  • 相关阅读:
    静态工具类中使用注解注入service
    赵伟国:芯片不像互联网 不能一招鲜吃遍天(发展芯片制造业已拥有三个纵深:市场纵深、资本纵深、人才纵深)
    将grub写入mbr
    Linux下Qt5.6 Fcitx无法输入中文输入解决办法
    为 Mac Finder 增加右键文件打包压缩(免费)
    Obtaining Directory Change Notifications(微软的例子,使用FindFirstChangeNotification,FindNextChangeNotification,FindCloseChangeNotification API函数)
    C#开发Linux守护进程
    排序算法比较与分析
    Mvc+Dapper+存储过程分页10万条数据
    RabbitMQ
  • 原文地址:https://www.cnblogs.com/sdu20112013/p/11216784.html
Copyright © 2020-2023  润新知