• few-shot-learning for object detection


    github  https://github.com/LiuXinyu12378/few-shot-learning-for-object-detection

    train.py

    from __future__ import print_function
    import sys
    
    import time
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    import torch.optim as optim
    import torch.backends.cudnn as cudnn
    from torchvision import datasets, transforms
    from torch.autograd import Variable
    from tqdm import tqdm
    
    import dataset
    import random
    import math
    import os
    from utils import *
    from cfg import parse_cfg, cfg
    from darknet import Darknet
    import pdb
    
    # Training settings
    # datacfg = sys.argv[1]
    # darknetcfg = parse_cfg(sys.argv[2])
    # learnetcfg = parse_cfg(sys.argv[3])
    
    datacfg = "cfg/fewyolov3_voc.data"
    darknetcfg = parse_cfg("cfg/darknet_yolov3_spp.cfg")
    learnetcfg = parse_cfg("cfg/reweighting_net.cfg")
    weightfile = "tmp/000050.weights"
    if len(sys.argv) == 5:
        weightfile = sys.argv[4]
    
    data_options = read_data_cfg(datacfg)
    net_options = darknetcfg[0]
    meta_options = learnetcfg[0]
    
    # Configure options
    cfg.config_data(data_options)
    cfg.config_meta(meta_options)
    cfg.config_net(net_options)
    
    # Parameters
    metadict = data_options['meta']
    trainlist = data_options['train']
    
    testlist = data_options['valid']
    backupdir = data_options['backup']
    gpus = data_options['gpus']  # e.g. 0,1,2,3
    ngpus = len(gpus.split(','))
    num_workers = int(data_options['num_workers'])
    
    batch_size = int(net_options['batch'])
    print("batch_size:",batch_size)
    max_batches = int(net_options['max_batches'])
    learning_rate = float(data_options['learning_rate'])
    momentum = float(net_options['momentum'])
    decay = float(net_options['decay'])
    steps = [float(step) for step in data_options['steps'].split(',')]
    scales = [float(scale) for scale in data_options['scales'].split(',')]
    
    # Train parameters
    use_cuda = True
    seed = int(time.time())
    
    ## --------------------------------------------------------------------------
    ## MAIN
    backupdir = cfg.backup
    print('logging to ' + backupdir)
    if not os.path.exists(backupdir):
        os.makedirs(backupdir)
    
    torch.manual_seed(seed)
    if use_cuda:
        os.environ['CUDA_VISIBLE_DEVICES'] = gpus
        torch.cuda.manual_seed(seed)
    
    model = Darknet(darknetcfg, learnetcfg)
    region_loss = model.loss
    
    model.print_network()
    # if len(sys.argv) == 5:
    model.load_weights(weightfile)
    
    ###################################################
    ### Meta-model parameters
    region_loss.seen = model.seen
    processed_batches = 0 if cfg.tuning else model.seen / batch_size
    trainlist = dataset.build_dataset(data_options)
    nsamples = len(trainlist)
    init_width = model.width
    init_height = model.height
    init_epoch = 0 if cfg.tuning else model.seen / nsamples
    max_epochs = max_batches * batch_size / nsamples + 1
    max_epochs = int(math.ceil(cfg.max_epoch * 1. / cfg.repeat)) if cfg.tuning else max_epochs
    print(cfg.repeat, nsamples, max_batches, batch_size)
    print(num_workers)
    
    kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {}
    
    if use_cuda:
        if ngpus > 1:
            model = torch.nn.DataParallel(model).cuda()
        else:
            model = model.cuda()
    
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    
    def adjust_learning_rate(optimizer, processed_batches):
        """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
        lr = learning_rate
        for i in range(len(steps)):
            scale = scales[i] if i < len(scales) else 1
            if processed_batches >= steps[i]:
                lr = lr * scale
                if processed_batches == steps[i]:
                    break
            else:
                break
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
        return lr
    
    
    def train(epoch):
        global processed_batches
        t0 = time.time()
        if ngpus > 1:
            cur_model = model.module
        else:
            cur_model = model
    
        train_loader = torch.utils.data.DataLoader(
            dataset.listDataset(trainlist, shape=(init_width, init_height),
                                shuffle=False,
                                transform=transforms.Compose([
                                    transforms.ToTensor(),
                                ]),
                                train=True,
                                seen=cur_model.seen,
                                batch_size=batch_size,
                                num_workers=num_workers),
            batch_size=batch_size, shuffle=False, **kwargs)
    
        metaset = dataset.MetaDataset(metafiles=metadict, train=True)
        metaloader = torch.utils.data.DataLoader(
            metaset,
            batch_size=metaset.batch_size,
            shuffle=False,
            num_workers=num_workers,
            pin_memory=True
        )
        metaloader = iter(metaloader)
    
        lr = adjust_learning_rate(optimizer, processed_batches)
        logging('epoch %d/%d, processed %d samples, lr %e' % (epoch, max_epochs, epoch * len(train_loader.dataset), lr))
    
        model.train()
        t1 = time.time()
        avg_time = torch.zeros(9)
        with tqdm(total=train_loader.__len__()) as t:
    
            for batch_idx, (data, target) in enumerate(train_loader):
                metax, mask = metaloader.next()
                t2 = time.time()
                adjust_learning_rate(optimizer, processed_batches)
                processed_batches = processed_batches + 1
                if use_cuda:
                    data = data.cuda()
                    metax = metax.cuda()
                    mask = mask.cuda()
                    # target= target.cuda()
                t3 = time.time()
                data, target = Variable(data), Variable(target)
                metax, mask = Variable(metax), Variable(mask)
                t4 = time.time()
                optimizer.zero_grad()
                t5 = time.time()
                output = model(data, metax, mask)
                t6 = time.time()
                region_loss.seen = region_loss.seen + data.data.size(0)
                cur_model.seen = region_loss.seen
                region_loss.input_size = (data.data.size(2), data.data.size(3))
                loss,loss_box,loss_conf,loss_cls,cls_acc,recall50,recall75,nProposals = region_loss(output, target)
                t.set_description('Epoch %d' % epoch)
                t.set_postfix(loss=loss.item(), loss_bbox=loss_box,loss_conf=loss_conf,loss_cls=loss_cls,
                              cls_acc=cls_acc, recall50=recall50, recall75=recall75,Proposals=nProposals)
                t.update()
    
                t7 = time.time()
                loss.backward()
                t8 = time.time()
                optimizer.step()
                t9 = time.time()
                if False and batch_idx > 1:
                    avg_time[0] = avg_time[0] + (t2 - t1)
                    avg_time[1] = avg_time[1] + (t3 - t2)
                    avg_time[2] = avg_time[2] + (t4 - t3)
                    avg_time[3] = avg_time[3] + (t5 - t4)
                    avg_time[4] = avg_time[4] + (t6 - t5)
                    avg_time[5] = avg_time[5] + (t7 - t6)
                    avg_time[6] = avg_time[6] + (t8 - t7)
                    avg_time[7] = avg_time[7] + (t9 - t8)
                    avg_time[8] = avg_time[8] + (t9 - t1)
                    print('-------------------------------')
                    print('       load data : %f' % (avg_time[0] / (batch_idx)))
                    print('     cpu to cuda : %f' % (avg_time[1] / (batch_idx)))
                    print('cuda to variable : %f' % (avg_time[2] / (batch_idx)))
                    print('       zero_grad : %f' % (avg_time[3] / (batch_idx)))
                    print(' forward feature : %f' % (avg_time[4] / (batch_idx)))
                    print('    forward loss : %f' % (avg_time[5] / (batch_idx)))
                    print('        backward : %f' % (avg_time[6] / (batch_idx)))
                    print('            step : %f' % (avg_time[7] / (batch_idx)))
                    print('           total : %f' % (avg_time[8] / (batch_idx)))
                t1 = time.time()
            print('')
            t1 = time.time()
            logging('training with %f samples/s' % (len(train_loader.dataset) / (t1 - t0)))
    
            if (epoch + 1) % cfg.save_interval == 0:
                logging('save weights to %s/%06d.weights' % (backupdir, epoch + 1))
                cur_model.save_weights('%s/%06d.weights' % (backupdir, epoch + 1))
    
    init_epoch = int(init_epoch)
    max_epochs = int(max_epochs)
    print("init_epoch:",init_epoch)
    print("max_epochs:",max_epochs)
    for epoch in range(init_epoch, max_epochs):
        train(epoch)
    

    region_loss.py

    import time
    import torch
    import math
    import torch.nn as nn
    import torch.nn.functional as F
    import numpy as np
    from torch.autograd import Variable
    from utils import *
    from cfg import cfg
    from numbers import Number
    from random import random, randint
    import pdb
    
    
    def neg_filter(pred_boxes, target, withids=False):
        assert pred_boxes.size(0) == target.size(0)
        if cfg.neg_ratio == 'full':
            inds = list(range(pred_boxes.size(0)))
        elif isinstance(cfg.neg_ratio, Number):
            flags = torch.sum(target, 1) != 0
            flags = flags.cpu().data.tolist()
            ratio = cfg.neg_ratio * sum(flags) * 1. / (len(flags) - sum(flags))
            if ratio >= 1:
                inds = list(range(pred_boxes.size(0)))
            else:
                flags = [0 if f == 0 and random() > ratio else 1 for f in flags]
                inds = np.argwhere(flags).squeeze()
                pred_boxes, target = pred_boxes[inds], target[inds]
        else:
            raise NotImplementedError('neg_ratio not recognized')
        if withids:
            return pred_boxes, target, inds
        else:
            return pred_boxes, target
    
    
    def neg_filter_v2(pred_boxes, target, withids=False):
        assert pred_boxes.size(0) == target.size(0)
        if cfg.neg_ratio == 'full':
            inds = list(range(pred_boxes.size(0)))
        elif isinstance(cfg.neg_ratio, Number):
            flags = torch.sum(target, 1) != 0
            flags = flags.cpu().data.tolist()
            ratio = cfg.neg_ratio * sum(flags) * 1. / (len(flags) - sum(flags))
            if ratio >= 1:
                inds = list(range(pred_boxes.size(0)))
            else:
                flags = [0 if f == 0 and random() > ratio else 1 for f in flags]
                if sum(flags) == 0:
                    flags[randint(0, len(flags) - 1)] = 1
                inds = np.nonzero(flags)[0]
                pred_boxes, target = pred_boxes[inds], target[inds]
        else:
            raise NotImplementedError('neg_ratio not recognized')
        if withids:
            return pred_boxes, target, inds
        else:
            return pred_boxes, target
    
    
    def build_targets(pred_boxes, target, conf, anchors, num_anchors, feature_size, input_size, ignore_thresh):
        nB = target.size(0)
        nA = num_anchors
        # print('anchor_step: ', anchor_step)
        obj_mask = torch.cuda.ByteTensor(nB, nA, feature_size[0], feature_size[1]).fill_(0)
        noobj_mask = torch.cuda.ByteTensor(nB, nA, feature_size[0], feature_size[1]).fill_(1)
        tx = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
        ty = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
        tw = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
        th = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
        tcls = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
        iou_scores = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
    
        tboxes = target.view(-1, 5)
        nonzero_ind = tboxes[:, 3] > 0
        tboxes = tboxes[nonzero_ind.unsqueeze(1).repeat(1, 5)].view(-1, 5)
        ind_B = torch.linspace(0, nB - 1, nB).unsqueeze(1).repeat(1, 50).view(-1).long().cuda()
        ind_B = ind_B[nonzero_ind]
        gx = (tboxes[:, 1] * feature_size[1]).float()
        gy = (tboxes[:, 2] * feature_size[0]).float()
        gw = (tboxes[:, 3] * input_size[1]).float()
        gh = (tboxes[:, 4] * input_size[0]).float()
        aw = anchors[:, 0]
        ah = anchors[:, 1]
        nbox = tboxes.size(0)
        gt_box = torch.cat([torch.zeros(1, nbox).cuda(), torch.zeros(1, nbox).cuda(), gw.unsqueeze(0), gh.unsqueeze(0)], 0)
        anchor_box = torch.cat([torch.zeros(1, nA).cuda(), torch.zeros(1, nA).cuda(), aw.unsqueeze(0), ah.unsqueeze(0)], 0)
        ious = bbox_ious(gt_box.unsqueeze(2).repeat(1, 1, nA), anchor_box.unsqueeze(1).repeat(1, nbox, 1), x1y1x2y2=False)
        best_ious, best_a = ious.max(1)
        gj = gy.long()
        gi = gx.long()
        obj_mask[ind_B, best_a, gj, gi] = 1
        noobj_mask[ind_B, best_a, gj, gi] = 0
    
        for i, iou in enumerate(ious):
            if (iou > ignore_thresh).sum():
                noobj_mask[ind_B[i:i + 1], (iou > ignore_thresh).nonzero().squeeze(1), gj[i:i + 1], gi[i:i + 1]] = 0
    
        tx[ind_B, best_a, gj, gi] = gx - gx.floor()
        ty[ind_B, best_a, gj, gi] = gy - gy.floor()
        tw[ind_B, best_a, gj, gi] = torch.log(gw / anchors[best_a][:, 0])
        th[ind_B, best_a, gj, gi] = torch.log(gh / anchors[best_a][:, 1])
        tcls[ind_B, best_a, gj, gi] = tboxes[:, 0].float()
        tconf = obj_mask.float()
        pred_boxes = pred_boxes.contiguous().view(nB, nA, feature_size[0], feature_size[1], 4).cuda()
        conf = conf.contiguous().view(nB, nA, feature_size[0], feature_size[1]).data
        target_boxes = torch.cat([(tboxes[:, 1] * input_size[1]).float().unsqueeze(0),
                                  (tboxes[:, 2] * input_size[0]).float().unsqueeze(0),
                                  gw.unsqueeze(0),
                                  gh.unsqueeze(0)], 0)
    
        iou_scores[ind_B, best_a, gj, gi] = bbox_ious(pred_boxes[ind_B, best_a, gj, gi].t(), target_boxes, x1y1x2y2=False)
        conf50 = (conf[ind_B, best_a, gj, gi] > 0.5).float()
        detected50 = (iou_scores[ind_B, best_a, gj, gi] > 0.5).float() * conf50
        detected75 = (iou_scores[ind_B, best_a, gj, gi] > 0.75).float() * conf50
    
        return nbox, iou_scores, obj_mask, noobj_mask, tx, ty, tw, th, tconf, tcls, detected50, detected75
    
    
    class RegionLoss(nn.Module):
        def __init__(self, num_classes=0, anchors=[], num_anchors=1):
            super(RegionLoss, self).__init__()
            self.num_classes = num_classes
            self.anchors = anchors
            self.num_anchors = num_anchors
            self.anchor_step = len(anchors) / num_anchors
            self.coord_scale = 1
            self.noobject_scale = 1
            self.object_scale = 5
            self.class_scale = 1
            self.thresh = 0.6
            self.seen = 0
    
        def forward(self, output, target):
            # import pdb; pdb.set_trace()
            # output : BxAs*(4+1+num_classes)*H*W
    
            # if target.dim() == 3:
            #     # target : B * n_cls * l
            #     l = target.size(-1)
            #     target = target.permute(1,0,2).contiguous().view(-1, l)
            if target.dim() == 3:
                target = target.view(-1, target.size(-1))
            bef = target.size(0)
            output, target = neg_filter(output, target)
            # print("{}/{}".format(target.size(0), bef))
    
            t0 = time.time()
            nB = output.data.size(0)
            nA = self.num_anchors
            nC = self.num_classes
            nH = output.data.size(2)
            nW = output.data.size(3)
    
            output = output.view(nB, nA, (5 + nC), nH, nW)
            x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))
            y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))
            w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)
            h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)
            conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW))
            # [nB, nA, nC, nW, nH] | (bs, 5, 1, 13, 13)
            cls = output.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda()))
            cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(nB * nA * nH * nW, nC)
    
            t1 = time.time()
    
            pred_boxes = torch.cuda.FloatTensor(4, nB * nA * nH * nW)
            grid_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
            grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
            anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda()
            anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda()
            anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
            anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
            pred_boxes[0] = x.data + grid_x
            pred_boxes[1] = y.data + grid_y
            pred_boxes[2] = torch.exp(w.data) * anchor_w
            pred_boxes[3] = torch.exp(h.data) * anchor_h
            pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4))
            t2 = time.time()
    
            nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes,
                                                                                                        target.data,
                                                                                                        self.anchors, nA,
                                                                                                        nC, 
                                                                                                        nH, nW,
                                                                                                        self.noobject_scale,
                                                                                                        self.object_scale,
                                                                                                        self.thresh,
                                                                                                        self.seen)
            cls_mask = (cls_mask == 1)
            if cfg.metayolo:
                tcls.zero_()
            nProposals = int((conf > 0.25).float().sum().data[0])
    
            tx = Variable(tx.cuda())
            ty = Variable(ty.cuda())
            tw = Variable(tw.cuda())
            th = Variable(th.cuda())
            tconf = Variable(tconf.cuda())
            tcls = Variable(tcls.view(-1)[cls_mask].long().cuda())
    
            coord_mask = Variable(coord_mask.cuda())
            conf_mask = Variable(conf_mask.cuda().sqrt())
            cls_mask = Variable(cls_mask.view(-1, 1).repeat(1, nC).cuda())
            cls = cls[cls_mask].view(-1, nC)
    
            t3 = time.time()
    
            loss_x = self.coord_scale * nn.MSELoss(size_average=False)(x * coord_mask, tx * coord_mask) / 2.0
            loss_y = self.coord_scale * nn.MSELoss(size_average=False)(y * coord_mask, ty * coord_mask) / 2.0
            loss_w = self.coord_scale * nn.MSELoss(size_average=False)(w * coord_mask, tw * coord_mask) / 2.0
            loss_h = self.coord_scale * nn.MSELoss(size_average=False)(h * coord_mask, th * coord_mask) / 2.0
            loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask) / 2.0
            loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls)
            loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
            t4 = time.time()
            if False:
                print('-----------------------------------')
                print('        activation : %f' % (t1 - t0))
                print(' create pred_boxes : %f' % (t2 - t1))
                print('     build targets : %f' % (t3 - t2))
                print('       create loss : %f' % (t4 - t3))
                print('             total : %f' % (t4 - t0))
            print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % (
                self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0],
                loss_conf.data[0], loss_cls.data[0], loss.data[0]))
            return loss
    
    
    class RegionLossV2(nn.Module):
        """
        Yolo region loss + Softmax classification across meta-inputs
        """
    
        def __init__(self, num_classes=0, anchors=[], num_anchors=1, input_size=(832, 832)):
            super(RegionLossV2, self).__init__()
            self.num_classes = num_classes
            self.anchors = anchors
            self.num_anchors = num_anchors
            self.coord_scale = 1
            self.class_scale = 1
            self.obj_scale = 1
            self.noobj_scale = 100
            self.thresh = 0.5
            self.seen = 0
            self.input_size = input_size
            self.feature_scale = [32, 16, 8]
            print('class_scale', self.class_scale)
    
        def forward(self, output, target):
            # output : (bs*cs, nA*(5+1), N)
            # target : (bs, cs, 50*5)
            # Get all classification prediction
            # pdb.set_trace()
            bs = target.size(0)
            cs = target.size(1)
            nA = self.num_anchors
            nC = self.num_classes
            N = output.data.size(2)
            # feature_size = [[26, 26], [52, 52], [104, 104]]
            cls = output.view(output.size(0), nA, (5 + nC), N)
            cls = cls.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda())).squeeze()
            cls = cls.view(bs, cs, nA * N).transpose(1, 2).contiguous().view(bs * nA * N, cs)
            cls_conf = F.softmax(cls, 1)
            _, cls_max_ids = torch.max(cls_conf, 1)
            cls_max_ids = cls_max_ids.data
            pre_cls_mask = torch.zeros(bs * nA * N, cs).cuda()
            pre_cls_mask[torch.linspace(0, bs * nA * N - 1, bs * nA * N).long().cuda(), cls_max_ids] = 1
            pre_cls_mask = pre_cls_mask.view(bs, nA * N, cs).transpose(1, 2).contiguous().view(bs * cs, nA, N)
    
            # Rearrange target and perform filtering operation
            target = target.view(-1, target.size(-1))
            # bef = target.size(0)
            output, target, inds = neg_filter_v2(output, target, withids=True)
            counts, _ = np.histogram(inds, bins=bs, range=(0, bs * cs))
            # print("{}/{}".format(target.size(0), bef))
            pre_cls_mask = pre_cls_mask[inds]
    
            t0 = time.time()
            nB = output.data.size(0)
    
            output = output.view(nB, nA, (5 + nC), N)  # (nB, nA, (5+nC), N)
            x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).squeeze(2))  # (nB, nA, N)
            y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).squeeze(2))
            w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).squeeze(2)
            h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).squeeze(2)
            conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).squeeze(2))
            # [nB, nA, nC, nW, nH] | (bs, 5, 1, 13, 13)
            # cls  = output.index_select(2, Variable(torch.linspace(5,5+nC-1,nC).long().cuda()))
            # cls  = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(nB*nA*nH*nW, nC)
            t1 = time.time()
    
            pred_boxes = torch.cuda.FloatTensor(4, nB, nA, N)
            grid_x = []
            grid_y = []
            anchor_w = []
            anchor_h = []
            scale = []
            feature_size = []
            for fs in self.feature_scale:
                feature_h = self.input_size[0] // fs
                feature_w = self.input_size[1] // fs
                # print("feature_h:",feature_h)
                # print("feature_w:",feature_w)
                feature_size.append([feature_h, feature_w])
                grid_x.append(torch.linspace(0, feature_w - 1, feature_w).repeat(feature_h, 1) 
                              .repeat(nB * nA, 1, 1).view(nB, nA, feature_h * feature_w).cuda())
                grid_y.append(torch.linspace(0, feature_h - 1, feature_h).repeat(feature_w, 1).t() 
                              .repeat(nB * nA, 1, 1).view(nB, nA, feature_h * feature_w).cuda())
                scale.append((torch.ones(nB, nA, feature_h * feature_w) * fs).cuda())
            grid_x = torch.cat(grid_x, 2)  # (nB, nA, N)
            grid_y = torch.cat(grid_y, 2)
            scale = torch.cat(scale, 2)
            for i in range(3):
                aw = torch.Tensor(self.anchors[6 * i:6 * (i + 1)]).view(nA, -1) 
                    .index_select(1, torch.LongTensor([0])).cuda()
                ah = torch.Tensor(self.anchors[6 * i:6 * (i + 1)]).view(nA, -1) 
                    .index_select(1, torch.LongTensor([1])).cuda()
                anchor_w.append(aw.repeat(nB, feature_size[i][0] * feature_size[i][1]) 
                                .view(nB, nA, feature_size[i][0] * feature_size[i][1]))
                anchor_h.append(ah.repeat(nB, feature_size[i][0] * feature_size[i][1]) 
                                .view(nB, nA, feature_size[i][0] * feature_size[i][1]))
            anchor_w = torch.cat(anchor_w, 2)
            anchor_h = torch.cat(anchor_h, 2)
            pred_boxes[0] = (x.data + grid_x) * scale
            pred_boxes[1] = (y.data + grid_y) * scale
            pred_boxes[2] = torch.exp(w.data) * anchor_w
            pred_boxes[3] = torch.exp(h.data) * anchor_h
            pred_boxes = convert2cpu(pred_boxes.permute(1, 2, 3, 0).contiguous())  # (nB, nA, N, 4)
            t2 = time.time()
            nGT = 0
            iou_scores = []
            obj_mask = []
            noobj_mask = []
            tx = []
            ty = []
            tw = []
            th = []
            tconf = []
            tcls = []
            start_N = 0
            detected50 = torch.zeros(0)
            detected75 = torch.zeros(0)
            for imap in range(3):
                nGT, iou_scores_temp, obj_mask_temp, noobj_mask_temp, tx_temp, ty_temp, tw_temp, th_temp, tconf_temp, 
                tcls_temp, detected50_temp, detected75_temp = build_targets(
                    pred_boxes[:, :, start_N:start_N + feature_size[imap][0] * feature_size[imap][1], :],
                    target.data.cuda(),
                    conf[:, :, start_N:start_N + feature_size[imap][0] * feature_size[imap][1]],
                    torch.Tensor(self.anchors[6 * imap:6 * (imap + 1)]).view(nA, -1).cuda(),
                    nA,
                    feature_size[imap],
                    self.input_size,
                    self.thresh)
                if not len(detected50):
                    detected50 = torch.zeros(nGT).cuda()
                if not len(detected75):
                    detected75 = torch.zeros(nGT).cuda()
                detected50 += detected50_temp
                detected75 += detected75_temp
                start_N += feature_size[imap][0] * feature_size[imap][1]
                iou_scores.append(iou_scores_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
                obj_mask.append(obj_mask_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
                noobj_mask.append(noobj_mask_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
                tx.append(tx_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
                ty.append(ty_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
                tw.append(tw_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
                th.append(th_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
                tconf.append(tconf_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
                tcls.append(tcls_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
    
            iou_scores = torch.cat(iou_scores, 2)
            obj_mask = torch.cat(obj_mask, 2)
            noobj_mask = torch.cat(noobj_mask, 2)
            tx = torch.cat(tx, 2)
            ty = torch.cat(ty, 2)
            tw = torch.cat(tw, 2)
            th = torch.cat(th, 2)
            tconf = torch.cat(tconf, 2)
            tcls = torch.cat(tcls, 2)
    
            # Take care of class mask
            idx_start = 0
            cls_mask_list = []
            tcls_list = []
            for i in range(len(counts)):
                if counts[i] == 0:
                    cur_mask = torch.zeros(nA, N).cuda()
                    cur_tcls = torch.zeros(nA, N).cuda()
                else:
                    cur_mask = torch.sum(obj_mask[idx_start:idx_start + counts[i]].float(), dim=0)
                    cur_tcls = torch.sum(tcls[idx_start:idx_start + counts[i]], dim=0)
                cls_mask_list.append(cur_mask)
                tcls_list.append(cur_tcls)
                idx_start += counts[i]
            cls_mask = torch.stack(cls_mask_list)  # (bs, nA, N)
            tcls = torch.stack(tcls_list)
    
            cls_mask = (cls_mask == 1)
            conf50 = (conf > 0.5).float().data
            iou50 = (iou_scores > 0.5).float()
            detected_mask = conf50 * tconf
            precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
            detected50 = (detected50 > 0).float()
            detected75 = (detected75 > 0).float()
            recall50 = detected50.sum() / (nGT + 1e-16)
            recall75 = detected75.sum() / (nGT + 1e-16)
            nProposals = int((conf > 0.25).float().sum().item())
            tx = Variable(tx)
            ty = Variable(ty)
            tw = Variable(tw)
            th = Variable(th)
            tconf = Variable(tconf)
    
            obj_mask = Variable(obj_mask.bool())
            noobj_mask = Variable(noobj_mask.bool())
            # cls_mask   = Variable(cls_mask.view(-1, 1).repeat(1,cs).cuda())
            cls = cls[Variable(cls_mask.view(-1, 1).repeat(1, cs))].view(-1, cs)
            cls_max_ids = cls_max_ids[cls_mask.view(-1)]
            tcls = Variable(tcls[cls_mask].long())
            cls_acc = float(torch.sum(cls_max_ids == tcls.data)) / (cls_max_ids.numel() + 1e-16)
    
            ClassificationLoss = nn.CrossEntropyLoss()
            MseLoss = nn.MSELoss()
            BceLoss = nn.BCELoss()
    
            t3 = time.time()
    
            loss_x = self.coord_scale * MseLoss(x[obj_mask], tx[obj_mask])
            loss_y = self.coord_scale * MseLoss(y[obj_mask], ty[obj_mask])
            loss_w = self.coord_scale * MseLoss(w[obj_mask], tw[obj_mask])
            loss_h = self.coord_scale * MseLoss(h[obj_mask], th[obj_mask])
            loss_conf_obj = BceLoss(conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = BceLoss(conf[noobj_mask], tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            if len(cls):
                loss_cls = self.class_scale * ClassificationLoss(cls, tcls)
            else:
                loss_cls = Variable(torch.Tensor([0]).float().cuda())
    
            # # pdb.set_trace()
            # ids = [9,11,12,16]
            # new_cls, new_tcls = select_classes(cls, tcls, ids)
            # new_tcls = Variable(torch.from_numpy(new_tcls).long().cuda())
            # loss_cls_new = self.class_scale * nn.CrossEntropyLoss(size_average=False)(new_cls, new_tcls)
            # loss_cls_new *= 10
            # loss_cls += loss_cls_new
    
            loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
            t4 = time.time()
            if False:
                print('-----------------------------------')
                print('        activation : %f' % (t1 - t0))
                print(' create pred_boxes : %f' % (t2 - t1))
                print('     build targets : %f' % (t3 - t2))
                print('       create loss : %f' % (t4 - t3))
                print('             total : %f' % (t4 - t0))
            # print(
            #     '%d: nGT %d, precision %f, recall50 %f, recall75 %f, cls_acc %f, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % 
            #     (self.seen, nGT, precision, recall50, recall75, cls_acc, loss_x.item(), loss_y.item(), 
            #      loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item(), loss.item()))
            # print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, cls_new %f, total %f' % (self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0], loss_conf.data[0], loss_cls.data[0], loss_cls_new.data[0], loss.data[0]))
            return loss,loss_x.item() + loss_y.item() + loss_w.item() + loss_h.item(),loss_conf.item(),loss_cls.item(),cls_acc,recall50.item(),recall75.item(),nProposals
    
    
    def select_classes(pred, tgt, ids):
        # convert tgt to numpy
        tgt = tgt.cpu().data.numpy()
        new_tgt = [(tgt == d) * i for i, d in enumerate(ids)]
        new_tgt = np.max(np.stack(new_tgt), axis=0)
        idxes = np.argwhere(new_tgt > 0).squeeze()
        new_pred = pred[idxes]
        new_pred = new_pred[:, ids]
        new_tgt = new_tgt[idxes]
        return new_pred, new_tgt
    

      

    多思考也是一种努力,做出正确的分析和选择,因为我们的时间和精力都有限,所以把时间花在更有价值的地方。
  • 相关阅读:
    springmvc结合freemarker,非自定义标签
    springmvc的ModelAndView的简单使用
    tomcat无法正常启动的一个原因
    通过springmvc的RequestMapping的headers属性的使用
    springmvc入门demo
    Redis的入门Demo(java)
    Ubuntu18.0.4查看显示器型号
    APS审核经验+审核资料汇总——计算机科学与技术专业上海德语审核
    Java连接GBase并封装增删改查
    SpringMVC源码阅读:异常解析器
  • 原文地址:https://www.cnblogs.com/LiuXinyu12378/p/14821803.html
Copyright © 2020-2023  润新知