• [Paddle学习笔记][10][基于YOLOv3的昆虫检测-损失函数]


    说明:

    本例程使用YOLOv3进行昆虫检测。例程分为数据处理、模型设计、损失函数、训练模型、模型预测和测试模型六个部分。本篇为第三部分,设计了物体边框、物体置信度和物体类别的损失函数。物体边框的x、y使用sigmoid_cross_entropy_with_logits损失函数,w、h使用绝对值L1损失函数。物体置信度和物体类别使用sigmoid_cross_entropy_with_logits损坏函数。

    实验代码:

    损失函数输出:

    import paddle.fluid as fluid
    from paddle.fluid.dygraph.base import to_variable
    
    from source.data import single_thread_reader
    from source.model import YOLOv3
    from source.loss import get_sum_loss
    
    with fluid.dygraph.guard():
        # 读取数据
        train_set = './dataset/train/'
        
        train_reader = single_thread_reader(train_set, 1, 'train') # 单线程读数据
        image, gtbox, gtcls, image_size = next(train_reader())     # 读取一条数据
        image = to_variable(image)                                 # 转换数据格式
        
        # 前向传播
        num_classes = 7                                                                              # 类别数量
        anchor_size = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326] # 锚框大小
        anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]                                              # 锚框掩码
        ignore_threshold = 0.7                                                                       # 样本阈值
        downsample_ratio = 32                                                                        # 下采样率
        
        model = YOLOv3(num_classes=num_classes, anchor_mask=anchor_mask)
        
        infer = model(image)
        
        # 计算损失
        loss = get_sum_loss(infer, gtbox, gtcls, 
                            num_classes, anchor_size, anchor_mask, ignore_threshold, downsample_ratio)
        
        print(loss.numpy())

    结果:

    [8711.687]

    loss.py文件

    import numpy as np
    import paddle.fluid as fluid
    from paddle.fluid.dygraph.base import to_variable
    
    def sigmoid(x):
        """
        功能:
            计算sigmoid函数
        输入:
            x - 输入数值
        输出:
            y - 输出数值
        """
        return 0.5 * (1.0 + np.tanh(0.5 * x))
    
    # def sigmoid(x):
    #     return 1.0 / (1.0 + np.exp(-x))
    
    def get_box_iou_xywh(box1, box2):
        """
        功能: 
            计算边框交并比值
        输入: 
            box1 - 边界框1
            box2 - 边界框2
        输出:
            iou  - 交并比值
        """
        # 计算交集面积
        x1_min = box1[0] - box1[2]/2.0
        y1_min = box1[1] - box1[3]/2.0
        x1_max = box1[0] + box1[2]/2.0
        y1_max = box1[1] + box1[3]/2.0
        
        x2_min = box2[0] - box2[2]/2.0
        y2_min = box2[1] - box2[3]/2.0
        x2_max = box2[0] + box2[2]/2.0
        y2_max = box2[1] + box2[3]/2.0
        
        x_min = np.maximum(x1_min, x2_min)
        y_min = np.maximum(y1_min, y2_min)
        x_max = np.minimum(x1_max, x2_max)
        y_max = np.minimum(y1_max, y2_max)
        
        w = np.maximum(x_max - x_min, 0.0)
        h = np.maximum(y_max - y_min, 0.0)
        
        intersection = w * h # 交集面积
        
        # 计算并集面积
        s1 = box1[2] * box1[3]
        s2 = box2[2] * box2[3]
        
        union = s1 + s2 - intersection # 并集面积
        
        # 计算交并比
        iou = intersection / union
        
        return iou
    
    def get_ignore_label(infer, gtbox, num_classes, anchor_size, anchor_mask, ignore_threshold, downsample_ratio):
        """
        功能:
            计算大于阈值的物体标签,设置为-1,不计算损失值
        输入:
            infer            - 特征图像
            gtbox            - 真实边框
            num_classes      - 类别数量
            anchor_size      - 锚框大小
            anchor_mask      - 锚框掩码
            ignore_threshold - 忽略阈值
            downsample_ratio - 下采样率
        输出:
            lbobj            - 物体标签
        """
        # 调整特征形状
        batch_size = infer.shape[0]   # 特征批数
        num_rows   = infer.shape[2]   # 特征行数
        num_cols   = infer.shape[3]   # 特征列数
        num_anchor = len(anchor_mask) # 锚框数量
        
        infer = infer.numpy()
        infer = infer.reshape([-1, num_anchor, 5 + num_classes, num_rows, num_cols]) # 转换特征形状
        
        # 计算预测边框
        pdloc = infer[:, :, 0:4, :, :]        # 获取预测位置
        pdbox = np.zeros(pdloc.shape)         # 预测边框数组
        image_h = num_rows * downsample_ratio # 预测图像高度
        image_w = num_cols * downsample_ratio # 预测图像宽度
        
        for m in range(batch_size): # 遍历图像
            for i in range(num_rows): # 遍历行数
                for j in range(num_cols): # 遍历列数
                    for k in range(num_anchor): # 遍历锚框
                        # 获取边框大小
                        anchor_w = anchor_size[2 * anchor_mask[k]]     # 锚框宽度
                        anchor_h = anchor_size[2 * anchor_mask[k] + 1] # 锚框高度
                        
                        # 设置预测边框
                        pdbox[m, k, 0, i, j] = j        # 预测边框cx
                        pdbox[m, k, 1, i, j] = i        # 预测边框cy
                        pdbox[m, k, 2, i, j] = anchor_w # 预测边框pw
                        pdbox[m, k, 3, i, j] = anchor_h # 预测边框ph
                        
        pdbox[:, :, 0, :, :] = (pdbox[:, :, 0, :, :] + sigmoid(pdloc[:, :, 0, :, :])) / num_cols # 预测边框x=cx + dx
        pdbox[:, :, 1, :, :] = (pdbox[:, :, 1, :, :] + sigmoid(pdloc[:, :, 1, :, :])) / num_rows # 预测边框y=cy + dy
        pdbox[:, :, 2, :, :] = (pdbox[:, :, 2, :, :] * np.exp(pdloc[:, :, 2, :, :])) / image_w   # 预测边框w=pw * exp(tw)
        pdbox[:, :, 3, :, :] = (pdbox[:, :, 3, :, :] * np.exp(pdloc[:, :, 3, :, :])) / image_h   # 预测边框h=ph * exp(th)
        pdbox = np.clip(pdbox, 0.0, 1.0) # 限制预测边框范围为[0,1]
        
        # 计算物体标签
        lbobj = np.zeros([batch_size, num_anchor, num_rows, num_cols]) # 物体标签
        for m in range(batch_size): # 遍历图像
            for n in range(len(gtbox[m])): # 遍历真实边框
                # 获取真实边框
                gtbox_x = gtbox[m][n][0] # 真实边框gtx
                gtbox_y = gtbox[m][n][1] # 真实边框gty
                gtbox_w = gtbox[m][n][2] # 真实边框gtw
                gtbox_h = gtbox[m][n][3] # 真实边框gth
                
                # 是否存在物体
                if gtbox_w < 1e-3 or gtbox_h < 1e-3:
                    continue
                
                # 获取预测边框
                pdbox_x = pdbox[m, :, 0, :, :] # 预测边框pdx
                pdbox_y = pdbox[m, :, 1, :, :] # 预测边框pdy
                pdbox_w = pdbox[m, :, 2, :, :] # 预测边框pdw
                pdbox_h = pdbox[m, :, 3, :, :] # 预测边框pdh
                
                # 计算交并比值
                box1 = [pdbox_x, pdbox_y, pdbox_w, pdbox_h] # 设置预测边框
                box2 = [gtbox_x, gtbox_y, gtbox_w, gtbox_h] # 设置真实边框
                ious = get_box_iou_xywh(box1, box2)         # 计算交并比值
                
                # 计算物体标签
                index = np.where(ious > ignore_threshold) # 大于阈值标签索引
                lbobj[m][index] = -1                      # 大于阈值物体标签
        
        return lbobj
    
    def get_predict_label(infer, gtbox, gtcls, num_classes, anchor_size, anchor_mask, ignore_threshold, downsample_ratio):
        """
        功能:
            计算预测标签
        输入:
            infer            - 特征图像
            gtbox            - 真实边框
            gtcls            - 真实类别
            num_classes      - 类别数量
            anchor_size      - 锚框大小
            anchor_mask      - 锚框掩码
            ignore_threshold - 忽略阈值
            downsample_ratio - 下采样率
        输出:
            lbloc            - 位置标签
            lbobj            - 物体标签
            lbcls            - 分类标签
            wtloc            - 位置权重
        """
        # 设置标签数据
        batch_size = infer.shape[0]   # 特征批数
        num_rows   = infer.shape[2]   # 特征行数
        num_cols   = infer.shape[3]   # 特征列数
        num_anchor = len(anchor_mask) # 锚框数量
        
        lbloc = np.zeros([batch_size, num_anchor, 4, num_rows, num_cols])           # 位置标签
        lbcls = np.zeros([batch_size, num_anchor, num_classes, num_rows, num_cols]) # 类别标签
        wtloc = np.ones([batch_size, num_anchor, num_rows, num_rows])               # 位置权重
        
        # 大于阈值物体
    #     lbobj = np.zeros([batch_size, num_anchor, num_rows, num_cols])              # 物体标签
        lbobj = get_ignore_label(infer, gtbox, num_classes, anchor_size, anchor_mask, ignore_threshold, downsample_ratio)
        
        # 计算预测标签
        image_h = num_rows * downsample_ratio # 原图高度
        image_w = num_cols * downsample_ratio # 原图宽度
        
        for m in range(batch_size): # 遍历图像
            for n in range(len(gtbox[m])): # 遍历真实边框
                # 获取边框坐标
                gtbox_x = gtbox[m][n][0] # 真实边框gtx
                gtbox_y = gtbox[m][n][1] # 真实边框gty
                gtbox_w = gtbox[m][n][2] # 真实边框gtw
                gtbox_h = gtbox[m][n][3] # 真实边框gth
                
                # 是否存在物体
                if gtbox_w < 1e-3 or gtbox_h < 1e-3:
                    continue
                
                # 计算交并比值
                iou_list = [] # 交并比值列表
                for k in range(num_anchor): # 遍历锚框
                    anchor_w = anchor_size[2 * anchor_mask[k]]     # 锚框宽度
                    anchor_h = anchor_size[2 * anchor_mask[k] + 1] # 锚框高度
                    box1 = [0.0, 0.0, anchor_w/float(image_w), anchor_h/float(image_h)] # 设置锚框
                    box2 = [0.0, 0.0, float(gtbox_w), float(gtbox_h)]                   # 真实边框
                    
                    iou = get_box_iou_xywh(box1, box2) # 计算交并比值
                    iou_list.append(iou)               # 添加交并比值
                
                # 获取锚框序号
                iou_list = np.array(iou_list)   # 转换数据类型
                iou_sort = np.argsort(iou_list) # 交并比值排序
                k = iou_sort[-1]                # 最大锚框序号
                
                # 设置标签坐标
                i = int(gtbox_y * num_rows) # 特征图行坐标
                j = int(gtbox_x * num_cols) # 特征图列坐标
                
                # 设置位置标签
                lbloc[m, k, 0, i, j] = gtbox_x * num_cols - j # 位置标签dx=sigmoid(tx)=gtx-cx
                lbloc[m, k, 1, i, j] = gtbox_y * num_rows - i # 位置标签dy=sigmoid(ty)=gty-cy
                lbloc[m, k, 2, i, j] = np.log(gtbox_w * image_w / anchor_size[2 * anchor_mask[k]])     # 位置标签tw=log(gtw/pw)
                lbloc[m, k, 3, i, j] = np.log(gtbox_h * image_h / anchor_size[2 * anchor_mask[k] + 1]) # 位置标签th=log(gth/ph)
                lbloc = lbloc.astype('float32')
                
                # 设置物体标签
                lbobj[m, k, i, j] = 1
                lbobj = lbobj.astype('float32')
                
                # 设置类别标签
                c = gtcls[m][n] # 标签位置
                lbcls[m, k, c, i, j] = 1.0
                lbcls = lbcls.astype('float32')
                
                # 设置位置权重
                wtloc[m, k, i, j] = 2.0 - gtbox_w * gtbox_h # 调节不同尺寸锚框对损失函数的贡献,作为加权系数和位置损失函数相乘
                wtloc = wtloc.astype('float32')
                
        return lbloc, lbobj, lbcls, wtloc
    
    def get_loss(infer, gtbox, gtcls, num_classes, anchor_size, anchor_mask, ignore_threshold, downsample_ratio):
        """
        功能:
            计算每张图像的损失总和
        输入:
            infer            - 特征图像
            gtbox            - 真实边框
            gtcls            - 真实类别
            num_classes      - 类别数量
            anchor_size      - 锚框大小
            anchor_mask      - 锚框掩码
            ignore_threshold - 忽略阈值
            downsample_ratio - 下采样率
        输出:
            sum_loss         - 损失总和
        """
        # 计算预测标签
        lbloc, lbobj, lbcls, wtloc = get_predict_label(infer, gtbox, gtcls, 
                                                       num_classes, anchor_size, anchor_mask, ignore_threshold, downsample_ratio)
        
        # 转换标签格式
        lbloc = to_variable(lbloc)
        lbobj = to_variable(lbobj)
        lbcls = to_variable(lbcls)
        wtloc = to_variable(wtloc)
        
        lbloc.stop_gradient=True # 停止梯度计算
        lbobj.stop_gradient=True # 停止梯度计算
        lbcls.stop_gradient=True # 停止梯度计算
        wtloc.stop_gradient=True # 停止梯度计算
        
        # 转换特征格式
        infer = fluid.layers.reshape(infer, [-1, len(anchor_mask), 5 + num_classes, infer.shape[2], infer.shape[3]])
        
        # 正样本值位置
        ploss = lbobj > 0                           # 正样本值位置
        ploss = fluid.layers.cast(ploss, 'float32') # 转换数据格式
        ploss.stop_gradient=True                    # 停止梯度计算
        
        # 计算位置损失
        pdloc_dx = infer[:, :, 0, :, :] # 预测位置dx=sigmoid(tx)
        pdloc_dy = infer[:, :, 1, :, :] # 预测位置dy=sigmoid(ty)
        pdloc_tw = infer[:, :, 2, :, :] # 预测位置tw
        pdloc_th = infer[:, :, 3, :, :] # 预测位置th
        
        lbloc_dx = lbloc[:, :, 0, :, :] # 标签位置dx=sigmoid(tx)
        lbloc_dy = lbloc[:, :, 1, :, :] # 标签位置dy=sigmoid(ty)
        lbloc_tw = lbloc[:, :, 2, :, :] # 标签位置tw
        lbloc_th = lbloc[:, :, 3, :, :] # 标签位置th
        
        loss_loc_dx = fluid.layers.sigmoid_cross_entropy_with_logits(pdloc_dx, lbloc_dx) # 计算位置损失dx
        loss_loc_dy = fluid.layers.sigmoid_cross_entropy_with_logits(pdloc_dy, lbloc_dy) # 计算位置损失dy
        loss_loc_tw = fluid.layers.abs(pdloc_tw - lbloc_tw)                              # 计算位置损失tw
        loss_loc_th = fluid.layers.abs(pdloc_th - lbloc_th)                              # 计算位置损失th
        
        loss_loc = loss_loc_dx + loss_loc_dy + loss_loc_tw + loss_loc_th # 计算总的位置损失
        loss_loc = loss_loc * wtloc                                      # 带权重的位置损失
        loss_loc = loss_loc * ploss                                      # 正样本的位置损失
        
        # 计算物体损失
        pdobj = infer[:, :, 4, :, :]                                                             # 物体预测数值
        loss_obj = fluid.layers.sigmoid_cross_entropy_with_logits(pdobj, lbobj, ignore_index=-1) # 忽略标签为-1梯度
        
        # 计算类别损失
        pdcls = infer[:, :, 5:5+num_classes, :, :]                              # 类别预测数值
        loss_cls = fluid.layers.sigmoid_cross_entropy_with_logits(pdcls, lbcls) # 计算类别损失
        loss_cls = fluid.layers.reduce_sum(loss_cls, dim=2)                     # 对通道维损失求和
        loss_cls = loss_cls * ploss                                             # 正样本的类别损失
        
        # 计算平均损失
        sum_loss = loss_loc + loss_obj + loss_cls                   # 计算损失总和
        sum_loss = fluid.layers.reduce_sum(sum_loss, dim=[1, 2, 3]) # 每张图像损失
        
        return sum_loss
    
    def get_sum_loss(infer, gtbox, gtcls, num_classes, anchor_size, anchor_mask, ignore_threshold, downsample_ratio):
        """
        功能:
            计算三个输出的损失总和
        输入:
            infer            - 特征列表
            gtbox            - 真实边框
            gtcls            - 真实类别
            num_classes      - 类别数量
            anchor_size      - 锚框大小
            anchor_mask      - 锚框掩码
            ignore_threshold - 样本阈值
            downsample_ratio - 下采样率
        输出:
            sum_loss         - 平均损失总和
        """
        # 计算平均损失
        loss_list = [] # 平均损失列表
        for i in range(len(infer)):
            # 计算平均损失
            loss = get_loss(infer[i], gtbox, gtcls, num_classes, anchor_size, anchor_mask[i], ignore_threshold, downsample_ratio)
            loss_list.append(fluid.layers.reduce_mean(loss)) # 添加损失列表
            
            # 减小下采样率
            downsample_ratio //= 2 # 减小下采样率
        
        # 计算损失总和
        sum_loss = sum(loss_list)
        
        return sum_loss
    
    # def get_sum_loss(infer, gtbox, gtcls, num_classes, anchor_size, anchor_mask, ignore_threshold, downsample_ratio):
    #     # 计算平均损失
    #     loss_list = [] # 平均损失列表
    #     gtbox = to_variable(gtbox)
    #     gtcls = to_variable(gtcls)
        
    #     for i in range(len(infer)):
    #         # 计算平均损失
    #         loss = fluid.layers.yolov3_loss(
    #             x=infer[i],
    #             gt_box=gtbox,
    #             gt_label=gtcls,
    #             class_num=num_classes,
    #             anchors=anchor_size,
    #             anchor_mask=anchor_mask[i],
    #             ignore_thresh=ignore_threshold,
    #             downsample_ratio=downsample_ratio,
    #             use_label_smooth=False)
    #         loss_list.append(fluid.layers.reduce_mean(loss)) # 添加损失列表
            
    #         # 减小下采样率
    #         downsample_ratio //= 2 # 减小下采样率
        
    #     # 计算损失总和
    #     sum_loss = sum(loss_list)
        
    #     return sum_loss

    参考资料:

    https://www.jianshu.com/p/47172eb86b39

    https://www.cnblogs.com/houjun/p/10922352.html

    https://blog.csdn.net/litt1e/article/details/88814417

    https://blog.csdn.net/litt1e/article/details/88852745

    https://blog.csdn.net/litt1e/article/details/88907542

    https://aistudio.baidu.com/aistudio/projectdetail/742781

    https://aistudio.baidu.com/aistudio/projectdetail/672017

    https://aistudio.baidu.com/aistudio/projectdetail/868589

    https://aistudio.baidu.com/aistudio/projectdetail/122277

  • 相关阅读:
    【问题备注】VS2012不能输入代码,文字…
    犯错了又
    关于实习…
    百度2014校招笔试题目题解(更新了第1题的算法,10.9下午)
    人人校招笔试题
    腾讯2014软件开发笔试题目
    阿里巴巴笔试题选解
    批处理文件——多个QQ一键登录
    动态规划---LIS
    动态规划--凑硬币问题
  • 原文地址:https://www.cnblogs.com/d442130165/p/13685301.html
Copyright © 2020-2023  润新知