[Paddle学习笔记][10][基于YOLOv3的昆虫检测-损失函数]

说明：

本例程使用YOLOv3进行昆虫检测。例程分为数据处理、模型设计、损失函数、训练模型、模型预测和测试模型六个部分。本篇为第三部分，设计了物体边框、物体置信度和物体类别的损失函数。物体边框的x、y使用sigmoid_cross_entropy_with_logits损失函数，w、h使用绝对值L1损失函数。物体置信度和物体类别使用sigmoid_cross_entropy_with_logits损坏函数。

实验代码：

损失函数输出：

import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable

from source.data import single_thread_reader
from source.model import YOLOv3
from source.loss import get_sum_loss

with fluid.dygraph.guard():
    # 读取数据
    train_set = './dataset/train/'
    
    train_reader = single_thread_reader(train_set, 1, 'train') # 单线程读数据
    image, gtbox, gtcls, image_size = next(train_reader())     # 读取一条数据
    image = to_variable(image)                                 # 转换数据格式
    
    # 前向传播
    num_classes = 7                                                                              # 类别数量
    anchor_size = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326] # 锚框大小
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]                                              # 锚框掩码
    ignore_threshold = 0.7                                                                       # 样本阈值
    downsample_ratio = 32                                                                        # 下采样率
    
    model = YOLOv3(num_classes=num_classes, anchor_mask=anchor_mask)
    
    infer = model(image)
    
    # 计算损失
    loss = get_sum_loss(infer, gtbox, gtcls, 
                        num_classes, anchor_size, anchor_mask, ignore_threshold, downsample_ratio)
    
    print(loss.numpy())

结果：

[8711.687]

loss.py文件

import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable

def sigmoid(x):
    """
    功能:
        计算sigmoid函数
    输入:
        x - 输入数值
    输出:
        y - 输出数值
    """
    return 0.5 * (1.0 + np.tanh(0.5 * x))

# def sigmoid(x):
#     return 1.0 / (1.0 + np.exp(-x))

def get_box_iou_xywh(box1, box2):
    """
    功能: 
        计算边框交并比值
    输入: 
        box1 - 边界框1
        box2 - 边界框2
    输出:
        iou  - 交并比值
    """
    # 计算交集面积
    x1_min = box1[0] - box1[2]/2.0
    y1_min = box1[1] - box1[3]/2.0
    x1_max = box1[0] + box1[2]/2.0
    y1_max = box1[1] + box1[3]/2.0
    
    x2_min = box2[0] - box2[2]/2.0
    y2_min = box2[1] - box2[3]/2.0
    x2_max = box2[0] + box2[2]/2.0
    y2_max = box2[1] + box2[3]/2.0
    
    x_min = np.maximum(x1_min, x2_min)
    y_min = np.maximum(y1_min, y2_min)
    x_max = np.minimum(x1_max, x2_max)
    y_max = np.minimum(y1_max, y2_max)
    
    w = np.maximum(x_max - x_min, 0.0)
    h = np.maximum(y_max - y_min, 0.0)
    
    intersection = w * h # 交集面积
    
    # 计算并集面积
    s1 = box1[2] * box1[3]
    s2 = box2[2] * box2[3]
    
    union = s1 + s2 - intersection # 并集面积
    
    # 计算交并比
    iou = intersection / union
    
    return iou

def get_ignore_label(infer, gtbox, num_classes, anchor_size, anchor_mask, ignore_threshold, downsample_ratio):
    """
    功能:
        计算大于阈值的物体标签，设置为-1，不计算损失值
    输入:
        infer            - 特征图像
        gtbox            - 真实边框
        num_classes      - 类别数量
        anchor_size      - 锚框大小
        anchor_mask      - 锚框掩码
        ignore_threshold - 忽略阈值
        downsample_ratio - 下采样率
    输出:
        lbobj            - 物体标签
    """
    # 调整特征形状
    batch_size = infer.shape[0]   # 特征批数
    num_rows   = infer.shape[2]   # 特征行数
    num_cols   = infer.shape[3]   # 特征列数
    num_anchor = len(anchor_mask) # 锚框数量
    
    infer = infer.numpy()
    infer = infer.reshape([-1, num_anchor, 5 + num_classes, num_rows, num_cols]) # 转换特征形状
    
    # 计算预测边框
    pdloc = infer[:, :, 0:4, :, :]        # 获取预测位置
    pdbox = np.zeros(pdloc.shape)         # 预测边框数组
    image_h = num_rows * downsample_ratio # 预测图像高度
    image_w = num_cols * downsample_ratio # 预测图像宽度
    
    for m in range(batch_size): # 遍历图像
        for i in range(num_rows): # 遍历行数
            for j in range(num_cols): # 遍历列数
                for k in range(num_anchor): # 遍历锚框
                    # 获取边框大小
                    anchor_w = anchor_size[2 * anchor_mask[k]]     # 锚框宽度
                    anchor_h = anchor_size[2 * anchor_mask[k] + 1] # 锚框高度
                    
                    # 设置预测边框
                    pdbox[m, k, 0, i, j] = j        # 预测边框cx
                    pdbox[m, k, 1, i, j] = i        # 预测边框cy
                    pdbox[m, k, 2, i, j] = anchor_w # 预测边框pw
                    pdbox[m, k, 3, i, j] = anchor_h # 预测边框ph
                    
    pdbox[:, :, 0, :, :] = (pdbox[:, :, 0, :, :] + sigmoid(pdloc[:, :, 0, :, :])) / num_cols # 预测边框x=cx + dx
    pdbox[:, :, 1, :, :] = (pdbox[:, :, 1, :, :] + sigmoid(pdloc[:, :, 1, :, :])) / num_rows # 预测边框y=cy + dy
    pdbox[:, :, 2, :, :] = (pdbox[:, :, 2, :, :] * np.exp(pdloc[:, :, 2, :, :])) / image_w   # 预测边框w=pw * exp(tw)
    pdbox[:, :, 3, :, :] = (pdbox[:, :, 3, :, :] * np.exp(pdloc[:, :, 3, :, :])) / image_h   # 预测边框h=ph * exp(th)
    pdbox = np.clip(pdbox, 0.0, 1.0) # 限制预测边框范围为[0,1]
    
    # 计算物体标签
    lbobj = np.zeros([batch_size, num_anchor, num_rows, num_cols]) # 物体标签
    for m in range(batch_size): # 遍历图像
        for n in range(len(gtbox[m])): # 遍历真实边框
            # 获取真实边框
            gtbox_x = gtbox[m][n][0] # 真实边框gtx
            gtbox_y = gtbox[m][n][1] # 真实边框gty
            gtbox_w = gtbox[m][n][2] # 真实边框gtw
            gtbox_h = gtbox[m][n][3] # 真实边框gth
            
            # 是否存在物体
            if gtbox_w < 1e-3 or gtbox_h < 1e-3:
                continue
            
            # 获取预测边框
            pdbox_x = pdbox[m, :, 0, :, :] # 预测边框pdx
            pdbox_y = pdbox[m, :, 1, :, :] # 预测边框pdy
            pdbox_w = pdbox[m, :, 2, :, :] # 预测边框pdw
            pdbox_h = pdbox[m, :, 3, :, :] # 预测边框pdh
            
            # 计算交并比值
            box1 = [pdbox_x, pdbox_y, pdbox_w, pdbox_h] # 设置预测边框
            box2 = [gtbox_x, gtbox_y, gtbox_w, gtbox_h] # 设置真实边框
            ious = get_box_iou_xywh(box1, box2)         # 计算交并比值
            
            # 计算物体标签
            index = np.where(ious > ignore_threshold) # 大于阈值标签索引
            lbobj[m][index] = -1                      # 大于阈值物体标签
    
    return lbobj

def get_predict_label(infer, gtbox, gtcls, num_classes, anchor_size, anchor_mask, ignore_threshold, downsample_ratio):
    """
    功能:
        计算预测标签
    输入:
        infer            - 特征图像
        gtbox            - 真实边框
        gtcls            - 真实类别
        num_classes      - 类别数量
        anchor_size      - 锚框大小
        anchor_mask      - 锚框掩码
        ignore_threshold - 忽略阈值
        downsample_ratio - 下采样率
    输出:
        lbloc            - 位置标签
        lbobj            - 物体标签
        lbcls            - 分类标签
        wtloc            - 位置权重
    """
    # 设置标签数据
    batch_size = infer.shape[0]   # 特征批数
    num_rows   = infer.shape[2]   # 特征行数
    num_cols   = infer.shape[3]   # 特征列数
    num_anchor = len(anchor_mask) # 锚框数量
    
    lbloc = np.zeros([batch_size, num_anchor, 4, num_rows, num_cols])           # 位置标签
    lbcls = np.zeros([batch_size, num_anchor, num_classes, num_rows, num_cols]) # 类别标签
    wtloc = np.ones([batch_size, num_anchor, num_rows, num_rows])               # 位置权重
    
    # 大于阈值物体
#     lbobj = np.zeros([batch_size, num_anchor, num_rows, num_cols])              # 物体标签
    lbobj = get_ignore_label(infer, gtbox, num_classes, anchor_size, anchor_mask, ignore_threshold, downsample_ratio)
    
    # 计算预测标签
    image_h = num_rows * downsample_ratio # 原图高度
    image_w = num_cols * downsample_ratio # 原图宽度
    
    for m in range(batch_size): # 遍历图像
        for n in range(len(gtbox[m])): # 遍历真实边框
            # 获取边框坐标
            gtbox_x = gtbox[m][n][0] # 真实边框gtx
            gtbox_y = gtbox[m][n][1] # 真实边框gty
            gtbox_w = gtbox[m][n][2] # 真实边框gtw
            gtbox_h = gtbox[m][n][3] # 真实边框gth
            
            # 是否存在物体
            if gtbox_w < 1e-3 or gtbox_h < 1e-3:
                continue
            
            # 计算交并比值
            iou_list = [] # 交并比值列表
            for k in range(num_anchor): # 遍历锚框
                anchor_w = anchor_size[2 * anchor_mask[k]]     # 锚框宽度
                anchor_h = anchor_size[2 * anchor_mask[k] + 1] # 锚框高度
                box1 = [0.0, 0.0, anchor_w/float(image_w), anchor_h/float(image_h)] # 设置锚框
                box2 = [0.0, 0.0, float(gtbox_w), float(gtbox_h)]                   # 真实边框
                
                iou = get_box_iou_xywh(box1, box2) # 计算交并比值
                iou_list.append(iou)               # 添加交并比值
            
            # 获取锚框序号
            iou_list = np.array(iou_list)   # 转换数据类型
            iou_sort = np.argsort(iou_list) # 交并比值排序
            k = iou_sort[-1]                # 最大锚框序号
            
            # 设置标签坐标
            i = int(gtbox_y * num_rows) # 特征图行坐标
            j = int(gtbox_x * num_cols) # 特征图列坐标
            
            # 设置位置标签
            lbloc[m, k, 0, i, j] = gtbox_x * num_cols - j # 位置标签dx=sigmoid(tx)=gtx-cx
            lbloc[m, k, 1, i, j] = gtbox_y * num_rows - i # 位置标签dy=sigmoid(ty)=gty-cy
            lbloc[m, k, 2, i, j] = np.log(gtbox_w * image_w / anchor_size[2 * anchor_mask[k]])     # 位置标签tw=log(gtw/pw)
            lbloc[m, k, 3, i, j] = np.log(gtbox_h * image_h / anchor_size[2 * anchor_mask[k] + 1]) # 位置标签th=log(gth/ph)
            lbloc = lbloc.astype('float32')
            
            # 设置物体标签
            lbobj[m, k, i, j] = 1
            lbobj = lbobj.astype('float32')
            
            # 设置类别标签
            c = gtcls[m][n] # 标签位置
            lbcls[m, k, c, i, j] = 1.0
            lbcls = lbcls.astype('float32')
            
            # 设置位置权重
            wtloc[m, k, i, j] = 2.0 - gtbox_w * gtbox_h # 调节不同尺寸锚框对损失函数的贡献，作为加权系数和位置损失函数相乘
            wtloc = wtloc.astype('float32')
            
    return lbloc, lbobj, lbcls, wtloc

def get_loss(infer, gtbox, gtcls, num_classes, anchor_size, anchor_mask, ignore_threshold, downsample_ratio):
    """
    功能:
        计算每张图像的损失总和
    输入:
        infer            - 特征图像
        gtbox            - 真实边框
        gtcls            - 真实类别
        num_classes      - 类别数量
        anchor_size      - 锚框大小
        anchor_mask      - 锚框掩码
        ignore_threshold - 忽略阈值
        downsample_ratio - 下采样率
    输出:
        sum_loss         - 损失总和
    """
    # 计算预测标签
    lbloc, lbobj, lbcls, wtloc = get_predict_label(infer, gtbox, gtcls, 
                                                   num_classes, anchor_size, anchor_mask, ignore_threshold, downsample_ratio)
    
    # 转换标签格式
    lbloc = to_variable(lbloc)
    lbobj = to_variable(lbobj)
    lbcls = to_variable(lbcls)
    wtloc = to_variable(wtloc)
    
    lbloc.stop_gradient=True # 停止梯度计算
    lbobj.stop_gradient=True # 停止梯度计算
    lbcls.stop_gradient=True # 停止梯度计算
    wtloc.stop_gradient=True # 停止梯度计算
    
    # 转换特征格式
    infer = fluid.layers.reshape(infer, [-1, len(anchor_mask), 5 + num_classes, infer.shape[2], infer.shape[3]])
    
    # 正样本值位置
    ploss = lbobj > 0                           # 正样本值位置
    ploss = fluid.layers.cast(ploss, 'float32') # 转换数据格式
    ploss.stop_gradient=True                    # 停止梯度计算
    
    # 计算位置损失
    pdloc_dx = infer[:, :, 0, :, :] # 预测位置dx=sigmoid(tx)
    pdloc_dy = infer[:, :, 1, :, :] # 预测位置dy=sigmoid(ty)
    pdloc_tw = infer[:, :, 2, :, :] # 预测位置tw
    pdloc_th = infer[:, :, 3, :, :] # 预测位置th
    
    lbloc_dx = lbloc[:, :, 0, :, :] # 标签位置dx=sigmoid(tx)
    lbloc_dy = lbloc[:, :, 1, :, :] # 标签位置dy=sigmoid(ty)
    lbloc_tw = lbloc[:, :, 2, :, :] # 标签位置tw
    lbloc_th = lbloc[:, :, 3, :, :] # 标签位置th
    
    loss_loc_dx = fluid.layers.sigmoid_cross_entropy_with_logits(pdloc_dx, lbloc_dx) # 计算位置损失dx
    loss_loc_dy = fluid.layers.sigmoid_cross_entropy_with_logits(pdloc_dy, lbloc_dy) # 计算位置损失dy
    loss_loc_tw = fluid.layers.abs(pdloc_tw - lbloc_tw)                              # 计算位置损失tw
    loss_loc_th = fluid.layers.abs(pdloc_th - lbloc_th)                              # 计算位置损失th
    
    loss_loc = loss_loc_dx + loss_loc_dy + loss_loc_tw + loss_loc_th # 计算总的位置损失
    loss_loc = loss_loc * wtloc                                      # 带权重的位置损失
    loss_loc = loss_loc * ploss                                      # 正样本的位置损失
    
    # 计算物体损失
    pdobj = infer[:, :, 4, :, :]                                                             # 物体预测数值
    loss_obj = fluid.layers.sigmoid_cross_entropy_with_logits(pdobj, lbobj, ignore_index=-1) # 忽略标签为-1梯度
    
    # 计算类别损失
    pdcls = infer[:, :, 5:5+num_classes, :, :]                              # 类别预测数值
    loss_cls = fluid.layers.sigmoid_cross_entropy_with_logits(pdcls, lbcls) # 计算类别损失
    loss_cls = fluid.layers.reduce_sum(loss_cls, dim=2)                     # 对通道维损失求和
    loss_cls = loss_cls * ploss                                             # 正样本的类别损失
    
    # 计算平均损失
    sum_loss = loss_loc + loss_obj + loss_cls                   # 计算损失总和
    sum_loss = fluid.layers.reduce_sum(sum_loss, dim=[1, 2, 3]) # 每张图像损失
    
    return sum_loss

def get_sum_loss(infer, gtbox, gtcls, num_classes, anchor_size, anchor_mask, ignore_threshold, downsample_ratio):
    """
    功能:
        计算三个输出的损失总和
    输入:
        infer            - 特征列表
        gtbox            - 真实边框
        gtcls            - 真实类别
        num_classes      - 类别数量
        anchor_size      - 锚框大小
        anchor_mask      - 锚框掩码
        ignore_threshold - 样本阈值
        downsample_ratio - 下采样率
    输出:
        sum_loss         - 平均损失总和
    """
    # 计算平均损失
    loss_list = [] # 平均损失列表
    for i in range(len(infer)):
        # 计算平均损失
        loss = get_loss(infer[i], gtbox, gtcls, num_classes, anchor_size, anchor_mask[i], ignore_threshold, downsample_ratio)
        loss_list.append(fluid.layers.reduce_mean(loss)) # 添加损失列表
        
        # 减小下采样率
        downsample_ratio //= 2 # 减小下采样率
    
    # 计算损失总和
    sum_loss = sum(loss_list)
    
    return sum_loss

# def get_sum_loss(infer, gtbox, gtcls, num_classes, anchor_size, anchor_mask, ignore_threshold, downsample_ratio):
#     # 计算平均损失
#     loss_list = [] # 平均损失列表
#     gtbox = to_variable(gtbox)
#     gtcls = to_variable(gtcls)
    
#     for i in range(len(infer)):
#         # 计算平均损失
#         loss = fluid.layers.yolov3_loss(
#             x=infer[i],
#             gt_box=gtbox,
#             gt_label=gtcls,
#             class_num=num_classes,
#             anchors=anchor_size,
#             anchor_mask=anchor_mask[i],
#             ignore_thresh=ignore_threshold,
#             downsample_ratio=downsample_ratio,
#             use_label_smooth=False)
#         loss_list.append(fluid.layers.reduce_mean(loss)) # 添加损失列表
        
#         # 减小下采样率
#         downsample_ratio //= 2 # 减小下采样率
    
#     # 计算损失总和
#     sum_loss = sum(loss_list)
    
#     return sum_loss

参考资料：

https://www.jianshu.com/p/47172eb86b39

https://www.cnblogs.com/houjun/p/10922352.html

https://blog.csdn.net/litt1e/article/details/88814417

https://blog.csdn.net/litt1e/article/details/88852745

https://blog.csdn.net/litt1e/article/details/88907542

https://aistudio.baidu.com/aistudio/projectdetail/742781

https://aistudio.baidu.com/aistudio/projectdetail/672017

https://aistudio.baidu.com/aistudio/projectdetail/868589

https://aistudio.baidu.com/aistudio/projectdetail/122277

相关阅读:
【问题备注】VS2012不能输入代码，文字…
犯错了又
 关于实习…
百度2014校招笔试题目题解（更新了第1题的算法，10.9下午）
人人校招笔试题
 腾讯2014软件开发笔试题目
 阿里巴巴笔试题选解
 批处理文件——多个QQ一键登录
 动态规划---LIS
动态规划--凑硬币问题
原文地址：https://www.cnblogs.com/d442130165/p/13685301.html