github https://github.com/LiuXinyu12378/few-shot-learning-for-object-detection
train.py
from __future__ import print_function import sys import time import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import torch.backends.cudnn as cudnn from torchvision import datasets, transforms from torch.autograd import Variable from tqdm import tqdm import dataset import random import math import os from utils import * from cfg import parse_cfg, cfg from darknet import Darknet import pdb # Training settings # datacfg = sys.argv[1] # darknetcfg = parse_cfg(sys.argv[2]) # learnetcfg = parse_cfg(sys.argv[3]) datacfg = "cfg/fewyolov3_voc.data" darknetcfg = parse_cfg("cfg/darknet_yolov3_spp.cfg") learnetcfg = parse_cfg("cfg/reweighting_net.cfg") weightfile = "tmp/000050.weights" if len(sys.argv) == 5: weightfile = sys.argv[4] data_options = read_data_cfg(datacfg) net_options = darknetcfg[0] meta_options = learnetcfg[0] # Configure options cfg.config_data(data_options) cfg.config_meta(meta_options) cfg.config_net(net_options) # Parameters metadict = data_options['meta'] trainlist = data_options['train'] testlist = data_options['valid'] backupdir = data_options['backup'] gpus = data_options['gpus'] # e.g. 0,1,2,3 ngpus = len(gpus.split(',')) num_workers = int(data_options['num_workers']) batch_size = int(net_options['batch']) print("batch_size:",batch_size) max_batches = int(net_options['max_batches']) learning_rate = float(data_options['learning_rate']) momentum = float(net_options['momentum']) decay = float(net_options['decay']) steps = [float(step) for step in data_options['steps'].split(',')] scales = [float(scale) for scale in data_options['scales'].split(',')] # Train parameters use_cuda = True seed = int(time.time()) ## -------------------------------------------------------------------------- ## MAIN backupdir = cfg.backup print('logging to ' + backupdir) if not os.path.exists(backupdir): os.makedirs(backupdir) torch.manual_seed(seed) if use_cuda: os.environ['CUDA_VISIBLE_DEVICES'] = gpus torch.cuda.manual_seed(seed) model = Darknet(darknetcfg, learnetcfg) region_loss = model.loss model.print_network() # if len(sys.argv) == 5: model.load_weights(weightfile) ################################################### ### Meta-model parameters region_loss.seen = model.seen processed_batches = 0 if cfg.tuning else model.seen / batch_size trainlist = dataset.build_dataset(data_options) nsamples = len(trainlist) init_width = model.width init_height = model.height init_epoch = 0 if cfg.tuning else model.seen / nsamples max_epochs = max_batches * batch_size / nsamples + 1 max_epochs = int(math.ceil(cfg.max_epoch * 1. / cfg.repeat)) if cfg.tuning else max_epochs print(cfg.repeat, nsamples, max_batches, batch_size) print(num_workers) kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {} if use_cuda: if ngpus > 1: model = torch.nn.DataParallel(model).cuda() else: model = model.cuda() optimizer = optim.Adam(model.parameters(), lr=learning_rate) def adjust_learning_rate(optimizer, processed_batches): """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" lr = learning_rate for i in range(len(steps)): scale = scales[i] if i < len(scales) else 1 if processed_batches >= steps[i]: lr = lr * scale if processed_batches == steps[i]: break else: break for param_group in optimizer.param_groups: param_group['lr'] = lr return lr def train(epoch): global processed_batches t0 = time.time() if ngpus > 1: cur_model = model.module else: cur_model = model train_loader = torch.utils.data.DataLoader( dataset.listDataset(trainlist, shape=(init_width, init_height), shuffle=False, transform=transforms.Compose([ transforms.ToTensor(), ]), train=True, seen=cur_model.seen, batch_size=batch_size, num_workers=num_workers), batch_size=batch_size, shuffle=False, **kwargs) metaset = dataset.MetaDataset(metafiles=metadict, train=True) metaloader = torch.utils.data.DataLoader( metaset, batch_size=metaset.batch_size, shuffle=False, num_workers=num_workers, pin_memory=True ) metaloader = iter(metaloader) lr = adjust_learning_rate(optimizer, processed_batches) logging('epoch %d/%d, processed %d samples, lr %e' % (epoch, max_epochs, epoch * len(train_loader.dataset), lr)) model.train() t1 = time.time() avg_time = torch.zeros(9) with tqdm(total=train_loader.__len__()) as t: for batch_idx, (data, target) in enumerate(train_loader): metax, mask = metaloader.next() t2 = time.time() adjust_learning_rate(optimizer, processed_batches) processed_batches = processed_batches + 1 if use_cuda: data = data.cuda() metax = metax.cuda() mask = mask.cuda() # target= target.cuda() t3 = time.time() data, target = Variable(data), Variable(target) metax, mask = Variable(metax), Variable(mask) t4 = time.time() optimizer.zero_grad() t5 = time.time() output = model(data, metax, mask) t6 = time.time() region_loss.seen = region_loss.seen + data.data.size(0) cur_model.seen = region_loss.seen region_loss.input_size = (data.data.size(2), data.data.size(3)) loss,loss_box,loss_conf,loss_cls,cls_acc,recall50,recall75,nProposals = region_loss(output, target) t.set_description('Epoch %d' % epoch) t.set_postfix(loss=loss.item(), loss_bbox=loss_box,loss_conf=loss_conf,loss_cls=loss_cls, cls_acc=cls_acc, recall50=recall50, recall75=recall75,Proposals=nProposals) t.update() t7 = time.time() loss.backward() t8 = time.time() optimizer.step() t9 = time.time() if False and batch_idx > 1: avg_time[0] = avg_time[0] + (t2 - t1) avg_time[1] = avg_time[1] + (t3 - t2) avg_time[2] = avg_time[2] + (t4 - t3) avg_time[3] = avg_time[3] + (t5 - t4) avg_time[4] = avg_time[4] + (t6 - t5) avg_time[5] = avg_time[5] + (t7 - t6) avg_time[6] = avg_time[6] + (t8 - t7) avg_time[7] = avg_time[7] + (t9 - t8) avg_time[8] = avg_time[8] + (t9 - t1) print('-------------------------------') print(' load data : %f' % (avg_time[0] / (batch_idx))) print(' cpu to cuda : %f' % (avg_time[1] / (batch_idx))) print('cuda to variable : %f' % (avg_time[2] / (batch_idx))) print(' zero_grad : %f' % (avg_time[3] / (batch_idx))) print(' forward feature : %f' % (avg_time[4] / (batch_idx))) print(' forward loss : %f' % (avg_time[5] / (batch_idx))) print(' backward : %f' % (avg_time[6] / (batch_idx))) print(' step : %f' % (avg_time[7] / (batch_idx))) print(' total : %f' % (avg_time[8] / (batch_idx))) t1 = time.time() print('') t1 = time.time() logging('training with %f samples/s' % (len(train_loader.dataset) / (t1 - t0))) if (epoch + 1) % cfg.save_interval == 0: logging('save weights to %s/%06d.weights' % (backupdir, epoch + 1)) cur_model.save_weights('%s/%06d.weights' % (backupdir, epoch + 1)) init_epoch = int(init_epoch) max_epochs = int(max_epochs) print("init_epoch:",init_epoch) print("max_epochs:",max_epochs) for epoch in range(init_epoch, max_epochs): train(epoch)
region_loss.py
import time import torch import math import torch.nn as nn import torch.nn.functional as F import numpy as np from torch.autograd import Variable from utils import * from cfg import cfg from numbers import Number from random import random, randint import pdb def neg_filter(pred_boxes, target, withids=False): assert pred_boxes.size(0) == target.size(0) if cfg.neg_ratio == 'full': inds = list(range(pred_boxes.size(0))) elif isinstance(cfg.neg_ratio, Number): flags = torch.sum(target, 1) != 0 flags = flags.cpu().data.tolist() ratio = cfg.neg_ratio * sum(flags) * 1. / (len(flags) - sum(flags)) if ratio >= 1: inds = list(range(pred_boxes.size(0))) else: flags = [0 if f == 0 and random() > ratio else 1 for f in flags] inds = np.argwhere(flags).squeeze() pred_boxes, target = pred_boxes[inds], target[inds] else: raise NotImplementedError('neg_ratio not recognized') if withids: return pred_boxes, target, inds else: return pred_boxes, target def neg_filter_v2(pred_boxes, target, withids=False): assert pred_boxes.size(0) == target.size(0) if cfg.neg_ratio == 'full': inds = list(range(pred_boxes.size(0))) elif isinstance(cfg.neg_ratio, Number): flags = torch.sum(target, 1) != 0 flags = flags.cpu().data.tolist() ratio = cfg.neg_ratio * sum(flags) * 1. / (len(flags) - sum(flags)) if ratio >= 1: inds = list(range(pred_boxes.size(0))) else: flags = [0 if f == 0 and random() > ratio else 1 for f in flags] if sum(flags) == 0: flags[randint(0, len(flags) - 1)] = 1 inds = np.nonzero(flags)[0] pred_boxes, target = pred_boxes[inds], target[inds] else: raise NotImplementedError('neg_ratio not recognized') if withids: return pred_boxes, target, inds else: return pred_boxes, target def build_targets(pred_boxes, target, conf, anchors, num_anchors, feature_size, input_size, ignore_thresh): nB = target.size(0) nA = num_anchors # print('anchor_step: ', anchor_step) obj_mask = torch.cuda.ByteTensor(nB, nA, feature_size[0], feature_size[1]).fill_(0) noobj_mask = torch.cuda.ByteTensor(nB, nA, feature_size[0], feature_size[1]).fill_(1) tx = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda() ty = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda() tw = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda() th = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda() tcls = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda() iou_scores = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda() tboxes = target.view(-1, 5) nonzero_ind = tboxes[:, 3] > 0 tboxes = tboxes[nonzero_ind.unsqueeze(1).repeat(1, 5)].view(-1, 5) ind_B = torch.linspace(0, nB - 1, nB).unsqueeze(1).repeat(1, 50).view(-1).long().cuda() ind_B = ind_B[nonzero_ind] gx = (tboxes[:, 1] * feature_size[1]).float() gy = (tboxes[:, 2] * feature_size[0]).float() gw = (tboxes[:, 3] * input_size[1]).float() gh = (tboxes[:, 4] * input_size[0]).float() aw = anchors[:, 0] ah = anchors[:, 1] nbox = tboxes.size(0) gt_box = torch.cat([torch.zeros(1, nbox).cuda(), torch.zeros(1, nbox).cuda(), gw.unsqueeze(0), gh.unsqueeze(0)], 0) anchor_box = torch.cat([torch.zeros(1, nA).cuda(), torch.zeros(1, nA).cuda(), aw.unsqueeze(0), ah.unsqueeze(0)], 0) ious = bbox_ious(gt_box.unsqueeze(2).repeat(1, 1, nA), anchor_box.unsqueeze(1).repeat(1, nbox, 1), x1y1x2y2=False) best_ious, best_a = ious.max(1) gj = gy.long() gi = gx.long() obj_mask[ind_B, best_a, gj, gi] = 1 noobj_mask[ind_B, best_a, gj, gi] = 0 for i, iou in enumerate(ious): if (iou > ignore_thresh).sum(): noobj_mask[ind_B[i:i + 1], (iou > ignore_thresh).nonzero().squeeze(1), gj[i:i + 1], gi[i:i + 1]] = 0 tx[ind_B, best_a, gj, gi] = gx - gx.floor() ty[ind_B, best_a, gj, gi] = gy - gy.floor() tw[ind_B, best_a, gj, gi] = torch.log(gw / anchors[best_a][:, 0]) th[ind_B, best_a, gj, gi] = torch.log(gh / anchors[best_a][:, 1]) tcls[ind_B, best_a, gj, gi] = tboxes[:, 0].float() tconf = obj_mask.float() pred_boxes = pred_boxes.contiguous().view(nB, nA, feature_size[0], feature_size[1], 4).cuda() conf = conf.contiguous().view(nB, nA, feature_size[0], feature_size[1]).data target_boxes = torch.cat([(tboxes[:, 1] * input_size[1]).float().unsqueeze(0), (tboxes[:, 2] * input_size[0]).float().unsqueeze(0), gw.unsqueeze(0), gh.unsqueeze(0)], 0) iou_scores[ind_B, best_a, gj, gi] = bbox_ious(pred_boxes[ind_B, best_a, gj, gi].t(), target_boxes, x1y1x2y2=False) conf50 = (conf[ind_B, best_a, gj, gi] > 0.5).float() detected50 = (iou_scores[ind_B, best_a, gj, gi] > 0.5).float() * conf50 detected75 = (iou_scores[ind_B, best_a, gj, gi] > 0.75).float() * conf50 return nbox, iou_scores, obj_mask, noobj_mask, tx, ty, tw, th, tconf, tcls, detected50, detected75 class RegionLoss(nn.Module): def __init__(self, num_classes=0, anchors=[], num_anchors=1): super(RegionLoss, self).__init__() self.num_classes = num_classes self.anchors = anchors self.num_anchors = num_anchors self.anchor_step = len(anchors) / num_anchors self.coord_scale = 1 self.noobject_scale = 1 self.object_scale = 5 self.class_scale = 1 self.thresh = 0.6 self.seen = 0 def forward(self, output, target): # import pdb; pdb.set_trace() # output : BxAs*(4+1+num_classes)*H*W # if target.dim() == 3: # # target : B * n_cls * l # l = target.size(-1) # target = target.permute(1,0,2).contiguous().view(-1, l) if target.dim() == 3: target = target.view(-1, target.size(-1)) bef = target.size(0) output, target = neg_filter(output, target) # print("{}/{}".format(target.size(0), bef)) t0 = time.time() nB = output.data.size(0) nA = self.num_anchors nC = self.num_classes nH = output.data.size(2) nW = output.data.size(3) output = output.view(nB, nA, (5 + nC), nH, nW) x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW)) y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW)) w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW) h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW) conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW)) # [nB, nA, nC, nW, nH] | (bs, 5, 1, 13, 13) cls = output.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda())) cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(nB * nA * nH * nW, nC) t1 = time.time() pred_boxes = torch.cuda.FloatTensor(4, nB * nA * nH * nW) grid_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda() grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda() anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda() anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda() anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW) anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW) pred_boxes[0] = x.data + grid_x pred_boxes[1] = y.data + grid_y pred_boxes[2] = torch.exp(w.data) * anchor_w pred_boxes[3] = torch.exp(h.data) * anchor_h pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4)) t2 = time.time() nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes, target.data, self.anchors, nA, nC, nH, nW, self.noobject_scale, self.object_scale, self.thresh, self.seen) cls_mask = (cls_mask == 1) if cfg.metayolo: tcls.zero_() nProposals = int((conf > 0.25).float().sum().data[0]) tx = Variable(tx.cuda()) ty = Variable(ty.cuda()) tw = Variable(tw.cuda()) th = Variable(th.cuda()) tconf = Variable(tconf.cuda()) tcls = Variable(tcls.view(-1)[cls_mask].long().cuda()) coord_mask = Variable(coord_mask.cuda()) conf_mask = Variable(conf_mask.cuda().sqrt()) cls_mask = Variable(cls_mask.view(-1, 1).repeat(1, nC).cuda()) cls = cls[cls_mask].view(-1, nC) t3 = time.time() loss_x = self.coord_scale * nn.MSELoss(size_average=False)(x * coord_mask, tx * coord_mask) / 2.0 loss_y = self.coord_scale * nn.MSELoss(size_average=False)(y * coord_mask, ty * coord_mask) / 2.0 loss_w = self.coord_scale * nn.MSELoss(size_average=False)(w * coord_mask, tw * coord_mask) / 2.0 loss_h = self.coord_scale * nn.MSELoss(size_average=False)(h * coord_mask, th * coord_mask) / 2.0 loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask) / 2.0 loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls) loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls t4 = time.time() if False: print('-----------------------------------') print(' activation : %f' % (t1 - t0)) print(' create pred_boxes : %f' % (t2 - t1)) print(' build targets : %f' % (t3 - t2)) print(' create loss : %f' % (t4 - t3)) print(' total : %f' % (t4 - t0)) print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % ( self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0], loss_conf.data[0], loss_cls.data[0], loss.data[0])) return loss class RegionLossV2(nn.Module): """ Yolo region loss + Softmax classification across meta-inputs """ def __init__(self, num_classes=0, anchors=[], num_anchors=1, input_size=(832, 832)): super(RegionLossV2, self).__init__() self.num_classes = num_classes self.anchors = anchors self.num_anchors = num_anchors self.coord_scale = 1 self.class_scale = 1 self.obj_scale = 1 self.noobj_scale = 100 self.thresh = 0.5 self.seen = 0 self.input_size = input_size self.feature_scale = [32, 16, 8] print('class_scale', self.class_scale) def forward(self, output, target): # output : (bs*cs, nA*(5+1), N) # target : (bs, cs, 50*5) # Get all classification prediction # pdb.set_trace() bs = target.size(0) cs = target.size(1) nA = self.num_anchors nC = self.num_classes N = output.data.size(2) # feature_size = [[26, 26], [52, 52], [104, 104]] cls = output.view(output.size(0), nA, (5 + nC), N) cls = cls.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda())).squeeze() cls = cls.view(bs, cs, nA * N).transpose(1, 2).contiguous().view(bs * nA * N, cs) cls_conf = F.softmax(cls, 1) _, cls_max_ids = torch.max(cls_conf, 1) cls_max_ids = cls_max_ids.data pre_cls_mask = torch.zeros(bs * nA * N, cs).cuda() pre_cls_mask[torch.linspace(0, bs * nA * N - 1, bs * nA * N).long().cuda(), cls_max_ids] = 1 pre_cls_mask = pre_cls_mask.view(bs, nA * N, cs).transpose(1, 2).contiguous().view(bs * cs, nA, N) # Rearrange target and perform filtering operation target = target.view(-1, target.size(-1)) # bef = target.size(0) output, target, inds = neg_filter_v2(output, target, withids=True) counts, _ = np.histogram(inds, bins=bs, range=(0, bs * cs)) # print("{}/{}".format(target.size(0), bef)) pre_cls_mask = pre_cls_mask[inds] t0 = time.time() nB = output.data.size(0) output = output.view(nB, nA, (5 + nC), N) # (nB, nA, (5+nC), N) x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).squeeze(2)) # (nB, nA, N) y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).squeeze(2)) w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).squeeze(2) h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).squeeze(2) conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).squeeze(2)) # [nB, nA, nC, nW, nH] | (bs, 5, 1, 13, 13) # cls = output.index_select(2, Variable(torch.linspace(5,5+nC-1,nC).long().cuda())) # cls = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(nB*nA*nH*nW, nC) t1 = time.time() pred_boxes = torch.cuda.FloatTensor(4, nB, nA, N) grid_x = [] grid_y = [] anchor_w = [] anchor_h = [] scale = [] feature_size = [] for fs in self.feature_scale: feature_h = self.input_size[0] // fs feature_w = self.input_size[1] // fs # print("feature_h:",feature_h) # print("feature_w:",feature_w) feature_size.append([feature_h, feature_w]) grid_x.append(torch.linspace(0, feature_w - 1, feature_w).repeat(feature_h, 1) .repeat(nB * nA, 1, 1).view(nB, nA, feature_h * feature_w).cuda()) grid_y.append(torch.linspace(0, feature_h - 1, feature_h).repeat(feature_w, 1).t() .repeat(nB * nA, 1, 1).view(nB, nA, feature_h * feature_w).cuda()) scale.append((torch.ones(nB, nA, feature_h * feature_w) * fs).cuda()) grid_x = torch.cat(grid_x, 2) # (nB, nA, N) grid_y = torch.cat(grid_y, 2) scale = torch.cat(scale, 2) for i in range(3): aw = torch.Tensor(self.anchors[6 * i:6 * (i + 1)]).view(nA, -1) .index_select(1, torch.LongTensor([0])).cuda() ah = torch.Tensor(self.anchors[6 * i:6 * (i + 1)]).view(nA, -1) .index_select(1, torch.LongTensor([1])).cuda() anchor_w.append(aw.repeat(nB, feature_size[i][0] * feature_size[i][1]) .view(nB, nA, feature_size[i][0] * feature_size[i][1])) anchor_h.append(ah.repeat(nB, feature_size[i][0] * feature_size[i][1]) .view(nB, nA, feature_size[i][0] * feature_size[i][1])) anchor_w = torch.cat(anchor_w, 2) anchor_h = torch.cat(anchor_h, 2) pred_boxes[0] = (x.data + grid_x) * scale pred_boxes[1] = (y.data + grid_y) * scale pred_boxes[2] = torch.exp(w.data) * anchor_w pred_boxes[3] = torch.exp(h.data) * anchor_h pred_boxes = convert2cpu(pred_boxes.permute(1, 2, 3, 0).contiguous()) # (nB, nA, N, 4) t2 = time.time() nGT = 0 iou_scores = [] obj_mask = [] noobj_mask = [] tx = [] ty = [] tw = [] th = [] tconf = [] tcls = [] start_N = 0 detected50 = torch.zeros(0) detected75 = torch.zeros(0) for imap in range(3): nGT, iou_scores_temp, obj_mask_temp, noobj_mask_temp, tx_temp, ty_temp, tw_temp, th_temp, tconf_temp, tcls_temp, detected50_temp, detected75_temp = build_targets( pred_boxes[:, :, start_N:start_N + feature_size[imap][0] * feature_size[imap][1], :], target.data.cuda(), conf[:, :, start_N:start_N + feature_size[imap][0] * feature_size[imap][1]], torch.Tensor(self.anchors[6 * imap:6 * (imap + 1)]).view(nA, -1).cuda(), nA, feature_size[imap], self.input_size, self.thresh) if not len(detected50): detected50 = torch.zeros(nGT).cuda() if not len(detected75): detected75 = torch.zeros(nGT).cuda() detected50 += detected50_temp detected75 += detected75_temp start_N += feature_size[imap][0] * feature_size[imap][1] iou_scores.append(iou_scores_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1])) obj_mask.append(obj_mask_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1])) noobj_mask.append(noobj_mask_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1])) tx.append(tx_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1])) ty.append(ty_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1])) tw.append(tw_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1])) th.append(th_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1])) tconf.append(tconf_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1])) tcls.append(tcls_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1])) iou_scores = torch.cat(iou_scores, 2) obj_mask = torch.cat(obj_mask, 2) noobj_mask = torch.cat(noobj_mask, 2) tx = torch.cat(tx, 2) ty = torch.cat(ty, 2) tw = torch.cat(tw, 2) th = torch.cat(th, 2) tconf = torch.cat(tconf, 2) tcls = torch.cat(tcls, 2) # Take care of class mask idx_start = 0 cls_mask_list = [] tcls_list = [] for i in range(len(counts)): if counts[i] == 0: cur_mask = torch.zeros(nA, N).cuda() cur_tcls = torch.zeros(nA, N).cuda() else: cur_mask = torch.sum(obj_mask[idx_start:idx_start + counts[i]].float(), dim=0) cur_tcls = torch.sum(tcls[idx_start:idx_start + counts[i]], dim=0) cls_mask_list.append(cur_mask) tcls_list.append(cur_tcls) idx_start += counts[i] cls_mask = torch.stack(cls_mask_list) # (bs, nA, N) tcls = torch.stack(tcls_list) cls_mask = (cls_mask == 1) conf50 = (conf > 0.5).float().data iou50 = (iou_scores > 0.5).float() detected_mask = conf50 * tconf precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16) detected50 = (detected50 > 0).float() detected75 = (detected75 > 0).float() recall50 = detected50.sum() / (nGT + 1e-16) recall75 = detected75.sum() / (nGT + 1e-16) nProposals = int((conf > 0.25).float().sum().item()) tx = Variable(tx) ty = Variable(ty) tw = Variable(tw) th = Variable(th) tconf = Variable(tconf) obj_mask = Variable(obj_mask.bool()) noobj_mask = Variable(noobj_mask.bool()) # cls_mask = Variable(cls_mask.view(-1, 1).repeat(1,cs).cuda()) cls = cls[Variable(cls_mask.view(-1, 1).repeat(1, cs))].view(-1, cs) cls_max_ids = cls_max_ids[cls_mask.view(-1)] tcls = Variable(tcls[cls_mask].long()) cls_acc = float(torch.sum(cls_max_ids == tcls.data)) / (cls_max_ids.numel() + 1e-16) ClassificationLoss = nn.CrossEntropyLoss() MseLoss = nn.MSELoss() BceLoss = nn.BCELoss() t3 = time.time() loss_x = self.coord_scale * MseLoss(x[obj_mask], tx[obj_mask]) loss_y = self.coord_scale * MseLoss(y[obj_mask], ty[obj_mask]) loss_w = self.coord_scale * MseLoss(w[obj_mask], tw[obj_mask]) loss_h = self.coord_scale * MseLoss(h[obj_mask], th[obj_mask]) loss_conf_obj = BceLoss(conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = BceLoss(conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj if len(cls): loss_cls = self.class_scale * ClassificationLoss(cls, tcls) else: loss_cls = Variable(torch.Tensor([0]).float().cuda()) # # pdb.set_trace() # ids = [9,11,12,16] # new_cls, new_tcls = select_classes(cls, tcls, ids) # new_tcls = Variable(torch.from_numpy(new_tcls).long().cuda()) # loss_cls_new = self.class_scale * nn.CrossEntropyLoss(size_average=False)(new_cls, new_tcls) # loss_cls_new *= 10 # loss_cls += loss_cls_new loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls t4 = time.time() if False: print('-----------------------------------') print(' activation : %f' % (t1 - t0)) print(' create pred_boxes : %f' % (t2 - t1)) print(' build targets : %f' % (t3 - t2)) print(' create loss : %f' % (t4 - t3)) print(' total : %f' % (t4 - t0)) # print( # '%d: nGT %d, precision %f, recall50 %f, recall75 %f, cls_acc %f, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % # (self.seen, nGT, precision, recall50, recall75, cls_acc, loss_x.item(), loss_y.item(), # loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item(), loss.item())) # print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, cls_new %f, total %f' % (self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0], loss_conf.data[0], loss_cls.data[0], loss_cls_new.data[0], loss.data[0])) return loss,loss_x.item() + loss_y.item() + loss_w.item() + loss_h.item(),loss_conf.item(),loss_cls.item(),cls_acc,recall50.item(),recall75.item(),nProposals def select_classes(pred, tgt, ids): # convert tgt to numpy tgt = tgt.cpu().data.numpy() new_tgt = [(tgt == d) * i for i, d in enumerate(ids)] new_tgt = np.max(np.stack(new_tgt), axis=0) idxes = np.argwhere(new_tgt > 0).squeeze() new_pred = pred[idxes] new_pred = new_pred[:, ids] new_tgt = new_tgt[idxes] return new_pred, new_tgt