few-shot-learning for object detection

github https://github.com/LiuXinyu12378/few-shot-learning-for-object-detection

train.py

from __future__ import print_function
import sys

import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torchvision import datasets, transforms
from torch.autograd import Variable
from tqdm import tqdm

import dataset
import random
import math
import os
from utils import *
from cfg import parse_cfg, cfg
from darknet import Darknet
import pdb

# Training settings
# datacfg = sys.argv[1]
# darknetcfg = parse_cfg(sys.argv[2])
# learnetcfg = parse_cfg(sys.argv[3])

datacfg = "cfg/fewyolov3_voc.data"
darknetcfg = parse_cfg("cfg/darknet_yolov3_spp.cfg")
learnetcfg = parse_cfg("cfg/reweighting_net.cfg")
weightfile = "tmp/000050.weights"
if len(sys.argv) == 5:
    weightfile = sys.argv[4]

data_options = read_data_cfg(datacfg)
net_options = darknetcfg[0]
meta_options = learnetcfg[0]

# Configure options
cfg.config_data(data_options)
cfg.config_meta(meta_options)
cfg.config_net(net_options)

# Parameters
metadict = data_options['meta']
trainlist = data_options['train']

testlist = data_options['valid']
backupdir = data_options['backup']
gpus = data_options['gpus']  # e.g. 0,1,2,3
ngpus = len(gpus.split(','))
num_workers = int(data_options['num_workers'])

batch_size = int(net_options['batch'])
print("batch_size:",batch_size)
max_batches = int(net_options['max_batches'])
learning_rate = float(data_options['learning_rate'])
momentum = float(net_options['momentum'])
decay = float(net_options['decay'])
steps = [float(step) for step in data_options['steps'].split(',')]
scales = [float(scale) for scale in data_options['scales'].split(',')]

# Train parameters
use_cuda = True
seed = int(time.time())

## --------------------------------------------------------------------------
## MAIN
backupdir = cfg.backup
print('logging to ' + backupdir)
if not os.path.exists(backupdir):
    os.makedirs(backupdir)

torch.manual_seed(seed)
if use_cuda:
    os.environ['CUDA_VISIBLE_DEVICES'] = gpus
    torch.cuda.manual_seed(seed)

model = Darknet(darknetcfg, learnetcfg)
region_loss = model.loss

model.print_network()
# if len(sys.argv) == 5:
model.load_weights(weightfile)

###################################################
### Meta-model parameters
region_loss.seen = model.seen
processed_batches = 0 if cfg.tuning else model.seen / batch_size
trainlist = dataset.build_dataset(data_options)
nsamples = len(trainlist)
init_width = model.width
init_height = model.height
init_epoch = 0 if cfg.tuning else model.seen / nsamples
max_epochs = max_batches * batch_size / nsamples + 1
max_epochs = int(math.ceil(cfg.max_epoch * 1. / cfg.repeat)) if cfg.tuning else max_epochs
print(cfg.repeat, nsamples, max_batches, batch_size)
print(num_workers)

kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {}

if use_cuda:
    if ngpus > 1:
        model = torch.nn.DataParallel(model).cuda()
    else:
        model = model.cuda()

optimizer = optim.Adam(model.parameters(), lr=learning_rate)


def adjust_learning_rate(optimizer, processed_batches):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = learning_rate
    for i in range(len(steps)):
        scale = scales[i] if i < len(scales) else 1
        if processed_batches >= steps[i]:
            lr = lr * scale
            if processed_batches == steps[i]:
                break
        else:
            break
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    return lr


def train(epoch):
    global processed_batches
    t0 = time.time()
    if ngpus > 1:
        cur_model = model.module
    else:
        cur_model = model

    train_loader = torch.utils.data.DataLoader(
        dataset.listDataset(trainlist, shape=(init_width, init_height),
                            shuffle=False,
                            transform=transforms.Compose([
                                transforms.ToTensor(),
                            ]),
                            train=True,
                            seen=cur_model.seen,
                            batch_size=batch_size,
                            num_workers=num_workers),
        batch_size=batch_size, shuffle=False, **kwargs)

    metaset = dataset.MetaDataset(metafiles=metadict, train=True)
    metaloader = torch.utils.data.DataLoader(
        metaset,
        batch_size=metaset.batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True
    )
    metaloader = iter(metaloader)

    lr = adjust_learning_rate(optimizer, processed_batches)
    logging('epoch %d/%d, processed %d samples, lr %e' % (epoch, max_epochs, epoch * len(train_loader.dataset), lr))

    model.train()
    t1 = time.time()
    avg_time = torch.zeros(9)
    with tqdm(total=train_loader.__len__()) as t:

        for batch_idx, (data, target) in enumerate(train_loader):
            metax, mask = metaloader.next()
            t2 = time.time()
            adjust_learning_rate(optimizer, processed_batches)
            processed_batches = processed_batches + 1
            if use_cuda:
                data = data.cuda()
                metax = metax.cuda()
                mask = mask.cuda()
                # target= target.cuda()
            t3 = time.time()
            data, target = Variable(data), Variable(target)
            metax, mask = Variable(metax), Variable(mask)
            t4 = time.time()
            optimizer.zero_grad()
            t5 = time.time()
            output = model(data, metax, mask)
            t6 = time.time()
            region_loss.seen = region_loss.seen + data.data.size(0)
            cur_model.seen = region_loss.seen
            region_loss.input_size = (data.data.size(2), data.data.size(3))
            loss,loss_box,loss_conf,loss_cls,cls_acc,recall50,recall75,nProposals = region_loss(output, target)
            t.set_description('Epoch %d' % epoch)
            t.set_postfix(loss=loss.item(), loss_bbox=loss_box,loss_conf=loss_conf,loss_cls=loss_cls,
                          cls_acc=cls_acc, recall50=recall50, recall75=recall75,Proposals=nProposals)
            t.update()

            t7 = time.time()
            loss.backward()
            t8 = time.time()
            optimizer.step()
            t9 = time.time()
            if False and batch_idx > 1:
                avg_time[0] = avg_time[0] + (t2 - t1)
                avg_time[1] = avg_time[1] + (t3 - t2)
                avg_time[2] = avg_time[2] + (t4 - t3)
                avg_time[3] = avg_time[3] + (t5 - t4)
                avg_time[4] = avg_time[4] + (t6 - t5)
                avg_time[5] = avg_time[5] + (t7 - t6)
                avg_time[6] = avg_time[6] + (t8 - t7)
                avg_time[7] = avg_time[7] + (t9 - t8)
                avg_time[8] = avg_time[8] + (t9 - t1)
                print('-------------------------------')
                print('       load data : %f' % (avg_time[0] / (batch_idx)))
                print('     cpu to cuda : %f' % (avg_time[1] / (batch_idx)))
                print('cuda to variable : %f' % (avg_time[2] / (batch_idx)))
                print('       zero_grad : %f' % (avg_time[3] / (batch_idx)))
                print(' forward feature : %f' % (avg_time[4] / (batch_idx)))
                print('    forward loss : %f' % (avg_time[5] / (batch_idx)))
                print('        backward : %f' % (avg_time[6] / (batch_idx)))
                print('            step : %f' % (avg_time[7] / (batch_idx)))
                print('           total : %f' % (avg_time[8] / (batch_idx)))
            t1 = time.time()
        print('')
        t1 = time.time()
        logging('training with %f samples/s' % (len(train_loader.dataset) / (t1 - t0)))

        if (epoch + 1) % cfg.save_interval == 0:
            logging('save weights to %s/%06d.weights' % (backupdir, epoch + 1))
            cur_model.save_weights('%s/%06d.weights' % (backupdir, epoch + 1))

init_epoch = int(init_epoch)
max_epochs = int(max_epochs)
print("init_epoch:",init_epoch)
print("max_epochs:",max_epochs)
for epoch in range(init_epoch, max_epochs):
    train(epoch)

region_loss.py

import time
import torch
import math
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.autograd import Variable
from utils import *
from cfg import cfg
from numbers import Number
from random import random, randint
import pdb


def neg_filter(pred_boxes, target, withids=False):
    assert pred_boxes.size(0) == target.size(0)
    if cfg.neg_ratio == 'full':
        inds = list(range(pred_boxes.size(0)))
    elif isinstance(cfg.neg_ratio, Number):
        flags = torch.sum(target, 1) != 0
        flags = flags.cpu().data.tolist()
        ratio = cfg.neg_ratio * sum(flags) * 1. / (len(flags) - sum(flags))
        if ratio >= 1:
            inds = list(range(pred_boxes.size(0)))
        else:
            flags = [0 if f == 0 and random() > ratio else 1 for f in flags]
            inds = np.argwhere(flags).squeeze()
            pred_boxes, target = pred_boxes[inds], target[inds]
    else:
        raise NotImplementedError('neg_ratio not recognized')
    if withids:
        return pred_boxes, target, inds
    else:
        return pred_boxes, target


def neg_filter_v2(pred_boxes, target, withids=False):
    assert pred_boxes.size(0) == target.size(0)
    if cfg.neg_ratio == 'full':
        inds = list(range(pred_boxes.size(0)))
    elif isinstance(cfg.neg_ratio, Number):
        flags = torch.sum(target, 1) != 0
        flags = flags.cpu().data.tolist()
        ratio = cfg.neg_ratio * sum(flags) * 1. / (len(flags) - sum(flags))
        if ratio >= 1:
            inds = list(range(pred_boxes.size(0)))
        else:
            flags = [0 if f == 0 and random() > ratio else 1 for f in flags]
            if sum(flags) == 0:
                flags[randint(0, len(flags) - 1)] = 1
            inds = np.nonzero(flags)[0]
            pred_boxes, target = pred_boxes[inds], target[inds]
    else:
        raise NotImplementedError('neg_ratio not recognized')
    if withids:
        return pred_boxes, target, inds
    else:
        return pred_boxes, target


def build_targets(pred_boxes, target, conf, anchors, num_anchors, feature_size, input_size, ignore_thresh):
    nB = target.size(0)
    nA = num_anchors
    # print('anchor_step: ', anchor_step)
    obj_mask = torch.cuda.ByteTensor(nB, nA, feature_size[0], feature_size[1]).fill_(0)
    noobj_mask = torch.cuda.ByteTensor(nB, nA, feature_size[0], feature_size[1]).fill_(1)
    tx = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
    ty = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
    tw = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
    th = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
    tcls = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
    iou_scores = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()

    tboxes = target.view(-1, 5)
    nonzero_ind = tboxes[:, 3] > 0
    tboxes = tboxes[nonzero_ind.unsqueeze(1).repeat(1, 5)].view(-1, 5)
    ind_B = torch.linspace(0, nB - 1, nB).unsqueeze(1).repeat(1, 50).view(-1).long().cuda()
    ind_B = ind_B[nonzero_ind]
    gx = (tboxes[:, 1] * feature_size[1]).float()
    gy = (tboxes[:, 2] * feature_size[0]).float()
    gw = (tboxes[:, 3] * input_size[1]).float()
    gh = (tboxes[:, 4] * input_size[0]).float()
    aw = anchors[:, 0]
    ah = anchors[:, 1]
    nbox = tboxes.size(0)
    gt_box = torch.cat([torch.zeros(1, nbox).cuda(), torch.zeros(1, nbox).cuda(), gw.unsqueeze(0), gh.unsqueeze(0)], 0)
    anchor_box = torch.cat([torch.zeros(1, nA).cuda(), torch.zeros(1, nA).cuda(), aw.unsqueeze(0), ah.unsqueeze(0)], 0)
    ious = bbox_ious(gt_box.unsqueeze(2).repeat(1, 1, nA), anchor_box.unsqueeze(1).repeat(1, nbox, 1), x1y1x2y2=False)
    best_ious, best_a = ious.max(1)
    gj = gy.long()
    gi = gx.long()
    obj_mask[ind_B, best_a, gj, gi] = 1
    noobj_mask[ind_B, best_a, gj, gi] = 0

    for i, iou in enumerate(ious):
        if (iou > ignore_thresh).sum():
            noobj_mask[ind_B[i:i + 1], (iou > ignore_thresh).nonzero().squeeze(1), gj[i:i + 1], gi[i:i + 1]] = 0

    tx[ind_B, best_a, gj, gi] = gx - gx.floor()
    ty[ind_B, best_a, gj, gi] = gy - gy.floor()
    tw[ind_B, best_a, gj, gi] = torch.log(gw / anchors[best_a][:, 0])
    th[ind_B, best_a, gj, gi] = torch.log(gh / anchors[best_a][:, 1])
    tcls[ind_B, best_a, gj, gi] = tboxes[:, 0].float()
    tconf = obj_mask.float()
    pred_boxes = pred_boxes.contiguous().view(nB, nA, feature_size[0], feature_size[1], 4).cuda()
    conf = conf.contiguous().view(nB, nA, feature_size[0], feature_size[1]).data
    target_boxes = torch.cat([(tboxes[:, 1] * input_size[1]).float().unsqueeze(0),
                              (tboxes[:, 2] * input_size[0]).float().unsqueeze(0),
                              gw.unsqueeze(0),
                              gh.unsqueeze(0)], 0)

    iou_scores[ind_B, best_a, gj, gi] = bbox_ious(pred_boxes[ind_B, best_a, gj, gi].t(), target_boxes, x1y1x2y2=False)
    conf50 = (conf[ind_B, best_a, gj, gi] > 0.5).float()
    detected50 = (iou_scores[ind_B, best_a, gj, gi] > 0.5).float() * conf50
    detected75 = (iou_scores[ind_B, best_a, gj, gi] > 0.75).float() * conf50

    return nbox, iou_scores, obj_mask, noobj_mask, tx, ty, tw, th, tconf, tcls, detected50, detected75


class RegionLoss(nn.Module):
    def __init__(self, num_classes=0, anchors=[], num_anchors=1):
        super(RegionLoss, self).__init__()
        self.num_classes = num_classes
        self.anchors = anchors
        self.num_anchors = num_anchors
        self.anchor_step = len(anchors) / num_anchors
        self.coord_scale = 1
        self.noobject_scale = 1
        self.object_scale = 5
        self.class_scale = 1
        self.thresh = 0.6
        self.seen = 0

    def forward(self, output, target):
        # import pdb; pdb.set_trace()
        # output : BxAs*(4+1+num_classes)*H*W

        # if target.dim() == 3:
        #     # target : B * n_cls * l
        #     l = target.size(-1)
        #     target = target.permute(1,0,2).contiguous().view(-1, l)
        if target.dim() == 3:
            target = target.view(-1, target.size(-1))
        bef = target.size(0)
        output, target = neg_filter(output, target)
        # print("{}/{}".format(target.size(0), bef))

        t0 = time.time()
        nB = output.data.size(0)
        nA = self.num_anchors
        nC = self.num_classes
        nH = output.data.size(2)
        nW = output.data.size(3)

        output = output.view(nB, nA, (5 + nC), nH, nW)
        x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))
        y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))
        w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)
        h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)
        conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW))
        # [nB, nA, nC, nW, nH] | (bs, 5, 1, 13, 13)
        cls = output.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda()))
        cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(nB * nA * nH * nW, nC)

        t1 = time.time()

        pred_boxes = torch.cuda.FloatTensor(4, nB * nA * nH * nW)
        grid_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
        grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
        anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda()
        anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda()
        anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
        anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
        pred_boxes[0] = x.data + grid_x
        pred_boxes[1] = y.data + grid_y
        pred_boxes[2] = torch.exp(w.data) * anchor_w
        pred_boxes[3] = torch.exp(h.data) * anchor_h
        pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4))
        t2 = time.time()

        nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes,
                                                                                                    target.data,
                                                                                                    self.anchors, nA,
                                                                                                    nC, 
                                                                                                    nH, nW,
                                                                                                    self.noobject_scale,
                                                                                                    self.object_scale,
                                                                                                    self.thresh,
                                                                                                    self.seen)
        cls_mask = (cls_mask == 1)
        if cfg.metayolo:
            tcls.zero_()
        nProposals = int((conf > 0.25).float().sum().data[0])

        tx = Variable(tx.cuda())
        ty = Variable(ty.cuda())
        tw = Variable(tw.cuda())
        th = Variable(th.cuda())
        tconf = Variable(tconf.cuda())
        tcls = Variable(tcls.view(-1)[cls_mask].long().cuda())

        coord_mask = Variable(coord_mask.cuda())
        conf_mask = Variable(conf_mask.cuda().sqrt())
        cls_mask = Variable(cls_mask.view(-1, 1).repeat(1, nC).cuda())
        cls = cls[cls_mask].view(-1, nC)

        t3 = time.time()

        loss_x = self.coord_scale * nn.MSELoss(size_average=False)(x * coord_mask, tx * coord_mask) / 2.0
        loss_y = self.coord_scale * nn.MSELoss(size_average=False)(y * coord_mask, ty * coord_mask) / 2.0
        loss_w = self.coord_scale * nn.MSELoss(size_average=False)(w * coord_mask, tw * coord_mask) / 2.0
        loss_h = self.coord_scale * nn.MSELoss(size_average=False)(h * coord_mask, th * coord_mask) / 2.0
        loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask) / 2.0
        loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls)
        loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
        t4 = time.time()
        if False:
            print('-----------------------------------')
            print('        activation : %f' % (t1 - t0))
            print(' create pred_boxes : %f' % (t2 - t1))
            print('     build targets : %f' % (t3 - t2))
            print('       create loss : %f' % (t4 - t3))
            print('             total : %f' % (t4 - t0))
        print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % (
            self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0],
            loss_conf.data[0], loss_cls.data[0], loss.data[0]))
        return loss


class RegionLossV2(nn.Module):
    """
    Yolo region loss + Softmax classification across meta-inputs
    """

    def __init__(self, num_classes=0, anchors=[], num_anchors=1, input_size=(832, 832)):
        super(RegionLossV2, self).__init__()
        self.num_classes = num_classes
        self.anchors = anchors
        self.num_anchors = num_anchors
        self.coord_scale = 1
        self.class_scale = 1
        self.obj_scale = 1
        self.noobj_scale = 100
        self.thresh = 0.5
        self.seen = 0
        self.input_size = input_size
        self.feature_scale = [32, 16, 8]
        print('class_scale', self.class_scale)

    def forward(self, output, target):
        # output : (bs*cs, nA*(5+1), N)
        # target : (bs, cs, 50*5)
        # Get all classification prediction
        # pdb.set_trace()
        bs = target.size(0)
        cs = target.size(1)
        nA = self.num_anchors
        nC = self.num_classes
        N = output.data.size(2)
        # feature_size = [[26, 26], [52, 52], [104, 104]]
        cls = output.view(output.size(0), nA, (5 + nC), N)
        cls = cls.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda())).squeeze()
        cls = cls.view(bs, cs, nA * N).transpose(1, 2).contiguous().view(bs * nA * N, cs)
        cls_conf = F.softmax(cls, 1)
        _, cls_max_ids = torch.max(cls_conf, 1)
        cls_max_ids = cls_max_ids.data
        pre_cls_mask = torch.zeros(bs * nA * N, cs).cuda()
        pre_cls_mask[torch.linspace(0, bs * nA * N - 1, bs * nA * N).long().cuda(), cls_max_ids] = 1
        pre_cls_mask = pre_cls_mask.view(bs, nA * N, cs).transpose(1, 2).contiguous().view(bs * cs, nA, N)

        # Rearrange target and perform filtering operation
        target = target.view(-1, target.size(-1))
        # bef = target.size(0)
        output, target, inds = neg_filter_v2(output, target, withids=True)
        counts, _ = np.histogram(inds, bins=bs, range=(0, bs * cs))
        # print("{}/{}".format(target.size(0), bef))
        pre_cls_mask = pre_cls_mask[inds]

        t0 = time.time()
        nB = output.data.size(0)

        output = output.view(nB, nA, (5 + nC), N)  # (nB, nA, (5+nC), N)
        x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).squeeze(2))  # (nB, nA, N)
        y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).squeeze(2))
        w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).squeeze(2)
        h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).squeeze(2)
        conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).squeeze(2))
        # [nB, nA, nC, nW, nH] | (bs, 5, 1, 13, 13)
        # cls  = output.index_select(2, Variable(torch.linspace(5,5+nC-1,nC).long().cuda()))
        # cls  = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(nB*nA*nH*nW, nC)
        t1 = time.time()

        pred_boxes = torch.cuda.FloatTensor(4, nB, nA, N)
        grid_x = []
        grid_y = []
        anchor_w = []
        anchor_h = []
        scale = []
        feature_size = []
        for fs in self.feature_scale:
            feature_h = self.input_size[0] // fs
            feature_w = self.input_size[1] // fs
            # print("feature_h:",feature_h)
            # print("feature_w:",feature_w)
            feature_size.append([feature_h, feature_w])
            grid_x.append(torch.linspace(0, feature_w - 1, feature_w).repeat(feature_h, 1) 
                          .repeat(nB * nA, 1, 1).view(nB, nA, feature_h * feature_w).cuda())
            grid_y.append(torch.linspace(0, feature_h - 1, feature_h).repeat(feature_w, 1).t() 
                          .repeat(nB * nA, 1, 1).view(nB, nA, feature_h * feature_w).cuda())
            scale.append((torch.ones(nB, nA, feature_h * feature_w) * fs).cuda())
        grid_x = torch.cat(grid_x, 2)  # (nB, nA, N)
        grid_y = torch.cat(grid_y, 2)
        scale = torch.cat(scale, 2)
        for i in range(3):
            aw = torch.Tensor(self.anchors[6 * i:6 * (i + 1)]).view(nA, -1) 
                .index_select(1, torch.LongTensor([0])).cuda()
            ah = torch.Tensor(self.anchors[6 * i:6 * (i + 1)]).view(nA, -1) 
                .index_select(1, torch.LongTensor([1])).cuda()
            anchor_w.append(aw.repeat(nB, feature_size[i][0] * feature_size[i][1]) 
                            .view(nB, nA, feature_size[i][0] * feature_size[i][1]))
            anchor_h.append(ah.repeat(nB, feature_size[i][0] * feature_size[i][1]) 
                            .view(nB, nA, feature_size[i][0] * feature_size[i][1]))
        anchor_w = torch.cat(anchor_w, 2)
        anchor_h = torch.cat(anchor_h, 2)
        pred_boxes[0] = (x.data + grid_x) * scale
        pred_boxes[1] = (y.data + grid_y) * scale
        pred_boxes[2] = torch.exp(w.data) * anchor_w
        pred_boxes[3] = torch.exp(h.data) * anchor_h
        pred_boxes = convert2cpu(pred_boxes.permute(1, 2, 3, 0).contiguous())  # (nB, nA, N, 4)
        t2 = time.time()
        nGT = 0
        iou_scores = []
        obj_mask = []
        noobj_mask = []
        tx = []
        ty = []
        tw = []
        th = []
        tconf = []
        tcls = []
        start_N = 0
        detected50 = torch.zeros(0)
        detected75 = torch.zeros(0)
        for imap in range(3):
            nGT, iou_scores_temp, obj_mask_temp, noobj_mask_temp, tx_temp, ty_temp, tw_temp, th_temp, tconf_temp, 
            tcls_temp, detected50_temp, detected75_temp = build_targets(
                pred_boxes[:, :, start_N:start_N + feature_size[imap][0] * feature_size[imap][1], :],
                target.data.cuda(),
                conf[:, :, start_N:start_N + feature_size[imap][0] * feature_size[imap][1]],
                torch.Tensor(self.anchors[6 * imap:6 * (imap + 1)]).view(nA, -1).cuda(),
                nA,
                feature_size[imap],
                self.input_size,
                self.thresh)
            if not len(detected50):
                detected50 = torch.zeros(nGT).cuda()
            if not len(detected75):
                detected75 = torch.zeros(nGT).cuda()
            detected50 += detected50_temp
            detected75 += detected75_temp
            start_N += feature_size[imap][0] * feature_size[imap][1]
            iou_scores.append(iou_scores_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
            obj_mask.append(obj_mask_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
            noobj_mask.append(noobj_mask_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
            tx.append(tx_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
            ty.append(ty_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
            tw.append(tw_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
            th.append(th_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
            tconf.append(tconf_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
            tcls.append(tcls_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))

        iou_scores = torch.cat(iou_scores, 2)
        obj_mask = torch.cat(obj_mask, 2)
        noobj_mask = torch.cat(noobj_mask, 2)
        tx = torch.cat(tx, 2)
        ty = torch.cat(ty, 2)
        tw = torch.cat(tw, 2)
        th = torch.cat(th, 2)
        tconf = torch.cat(tconf, 2)
        tcls = torch.cat(tcls, 2)

        # Take care of class mask
        idx_start = 0
        cls_mask_list = []
        tcls_list = []
        for i in range(len(counts)):
            if counts[i] == 0:
                cur_mask = torch.zeros(nA, N).cuda()
                cur_tcls = torch.zeros(nA, N).cuda()
            else:
                cur_mask = torch.sum(obj_mask[idx_start:idx_start + counts[i]].float(), dim=0)
                cur_tcls = torch.sum(tcls[idx_start:idx_start + counts[i]], dim=0)
            cls_mask_list.append(cur_mask)
            tcls_list.append(cur_tcls)
            idx_start += counts[i]
        cls_mask = torch.stack(cls_mask_list)  # (bs, nA, N)
        tcls = torch.stack(tcls_list)

        cls_mask = (cls_mask == 1)
        conf50 = (conf > 0.5).float().data
        iou50 = (iou_scores > 0.5).float()
        detected_mask = conf50 * tconf
        precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
        detected50 = (detected50 > 0).float()
        detected75 = (detected75 > 0).float()
        recall50 = detected50.sum() / (nGT + 1e-16)
        recall75 = detected75.sum() / (nGT + 1e-16)
        nProposals = int((conf > 0.25).float().sum().item())
        tx = Variable(tx)
        ty = Variable(ty)
        tw = Variable(tw)
        th = Variable(th)
        tconf = Variable(tconf)

        obj_mask = Variable(obj_mask.bool())
        noobj_mask = Variable(noobj_mask.bool())
        # cls_mask   = Variable(cls_mask.view(-1, 1).repeat(1,cs).cuda())
        cls = cls[Variable(cls_mask.view(-1, 1).repeat(1, cs))].view(-1, cs)
        cls_max_ids = cls_max_ids[cls_mask.view(-1)]
        tcls = Variable(tcls[cls_mask].long())
        cls_acc = float(torch.sum(cls_max_ids == tcls.data)) / (cls_max_ids.numel() + 1e-16)

        ClassificationLoss = nn.CrossEntropyLoss()
        MseLoss = nn.MSELoss()
        BceLoss = nn.BCELoss()

        t3 = time.time()

        loss_x = self.coord_scale * MseLoss(x[obj_mask], tx[obj_mask])
        loss_y = self.coord_scale * MseLoss(y[obj_mask], ty[obj_mask])
        loss_w = self.coord_scale * MseLoss(w[obj_mask], tw[obj_mask])
        loss_h = self.coord_scale * MseLoss(h[obj_mask], th[obj_mask])
        loss_conf_obj = BceLoss(conf[obj_mask], tconf[obj_mask])
        loss_conf_noobj = BceLoss(conf[noobj_mask], tconf[noobj_mask])
        loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
        if len(cls):
            loss_cls = self.class_scale * ClassificationLoss(cls, tcls)
        else:
            loss_cls = Variable(torch.Tensor([0]).float().cuda())

        # # pdb.set_trace()
        # ids = [9,11,12,16]
        # new_cls, new_tcls = select_classes(cls, tcls, ids)
        # new_tcls = Variable(torch.from_numpy(new_tcls).long().cuda())
        # loss_cls_new = self.class_scale * nn.CrossEntropyLoss(size_average=False)(new_cls, new_tcls)
        # loss_cls_new *= 10
        # loss_cls += loss_cls_new

        loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
        t4 = time.time()
        if False:
            print('-----------------------------------')
            print('        activation : %f' % (t1 - t0))
            print(' create pred_boxes : %f' % (t2 - t1))
            print('     build targets : %f' % (t3 - t2))
            print('       create loss : %f' % (t4 - t3))
            print('             total : %f' % (t4 - t0))
        # print(
        #     '%d: nGT %d, precision %f, recall50 %f, recall75 %f, cls_acc %f, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % 
        #     (self.seen, nGT, precision, recall50, recall75, cls_acc, loss_x.item(), loss_y.item(), 
        #      loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item(), loss.item()))
        # print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, cls_new %f, total %f' % (self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0], loss_conf.data[0], loss_cls.data[0], loss_cls_new.data[0], loss.data[0]))
        return loss,loss_x.item() + loss_y.item() + loss_w.item() + loss_h.item(),loss_conf.item(),loss_cls.item(),cls_acc,recall50.item(),recall75.item(),nProposals


def select_classes(pred, tgt, ids):
    # convert tgt to numpy
    tgt = tgt.cpu().data.numpy()
    new_tgt = [(tgt == d) * i for i, d in enumerate(ids)]
    new_tgt = np.max(np.stack(new_tgt), axis=0)
    idxes = np.argwhere(new_tgt > 0).squeeze()
    new_pred = pred[idxes]
    new_pred = new_pred[:, ids]
    new_tgt = new_tgt[idxes]
    return new_pred, new_tgt

多思考也是一种努力，做出正确的分析和选择，因为我们的时间和精力都有限，所以把时间花在更有价值的地方。

相关阅读:
springmvc结合freemarker，非自定义标签
 springmvc的ModelAndView的简单使用
 tomcat无法正常启动的一个原因
 通过springmvc的RequestMapping的headers属性的使用
 springmvc入门demo
Redis的入门Demo(java)
Ubuntu18.0.4查看显示器型号
 APS审核经验+审核资料汇总——计算机科学与技术专业上海德语审核
 Java连接GBase并封装增删改查
 SpringMVC源码阅读：异常解析器
原文地址：https://www.cnblogs.com/LiuXinyu12378/p/14821803.html