• [Paddle学习笔记][09][基于YOLOv3的昆虫检测-模型设计]


    说明:

    本例程使用YOLOv3进行昆虫检测。例程分为数据处理、模型设计、损失函数、训练模型、模型预测和测试模型六个部分。本篇为第二部分,使用Paddle动态图实现了YOLOv3,使用Darknet53骨干网络和YOLOv3的检测头部。

    实验代码:

    Darknet53骨干网络和YOLOv3头部

    import numpy as np
    import paddle.fluid as fluid
    from paddle.fluid.dygraph.base import to_variable
    
    from source.model import DarkNet53, YOLOHeader
    
    with fluid.dygraph.guard():
        # 输入数据
        x = np.random.randn(1, 3, 608, 608).astype(np.float32)
        x = to_variable(x)
        
        # 数据处理
        backbone = DarkNet53()                                    # 骨干网络
        detect_0 = YOLOHeader(num_channels=1024, num_filters=512) # 检测头部
        detect_1 = YOLOHeader(num_channels=512, num_filters=256)  # 检测头部
        detect_2 = YOLOHeader(num_channels=256, num_filters=128)  # 检测头部
        
        c0, c1, c2 = backbone(x)
        c0_r, c0_t = detect_0(c0)
        c1_r, c1_t = detect_1(c1)
        c2_r, c2_t = detect_2(c2)
        
        # 输出数据
        print('c0:', c0.shape, 'route:',  c0_r.shape, 'tip:', c0_t.shape)
        print('c1:', c1.shape, ' route:', c1_r.shape, 'tip:', c1_t.shape)
        print('c2:', c2.shape, ' route:', c2_r.shape, 'tip:', c2_t.shape)

     结果:

    c0: [1, 1024, 19, 19] route: [1, 512, 19, 19] tip: [1, 1024, 19, 19]

    c1: [1, 512, 38, 38]  route: [1, 256, 38, 38] tip: [1, 512, 38, 38]

    c2: [1, 256, 76, 76]  route: [1, 128, 76, 76] tip: [1, 256, 76, 76]

    完整的YOLOV3模型

    import numpy as np
    import paddle.fluid as fluid
    from paddle.fluid.dygraph.base import to_variable
    
    from source.model import YOLOv3
    
    with fluid.dygraph.guard():
        # 输入数据
        x = np.random.randn(1, 3, 608, 608).astype(np.float32)
        x = to_variable(x)
        
        # 数据处理
        num_classes = 7                                 # 类别数量
        anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # 锚框掩码
        
        model = YOLOv3(num_classes=num_classes, anchor_mask=anchor_mask)
        
        p0, p1, p2 = model(x)
        
        # 输出数据
        print('p0:', p0.shape)
        print('p1:', p1.shape)
        print('p2:', p2.shape)

    结果:

    p0: [1, 36, 19, 19]

    p1: [1, 36, 38, 38]

    p2: [1, 36, 76, 76]

    每个YOLOv3头部的输出特征

    import numpy as np
    import paddle.fluid as fluid
    from paddle.fluid.dygraph.base import to_variable
    
    from source.model import YOLOv3
    
    with fluid.dygraph.guard():
        # 输入数据
        x = np.random.randn(1, 3, 608, 608).astype(np.float32)
        x = to_variable(x)
        
        # 数据处理
        num_classes = 7                                 # 类别数量
        anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # 锚框掩码
        
        model = YOLOv3(num_classes=num_classes, anchor_mask=anchor_mask)
        
        p0, p1, p2 = model(x)
        
        p0 = fluid.layers.reshape(p0, [-1, len(anchor_mask[0]), 5 + num_classes, p0.shape[2], p0.shape[3]])
    
        pdloc = p0[:, :, 0:4, :, :]                                   # 位置概率
        pdobj = fluid.layers.sigmoid(p0[:, :, 4, :, :])               # 物体概率
        pdcls = fluid.layers.sigmoid(p0[:, :, 5:5+num_classes, :, :]) # 类别概率
        
        print('predict_loc', pdloc.shape)
        print('predict_obj', pdobj.shape)
        print('predict_cls', pdcls.shape)

     结果:

    predict_loc [1, 3, 4, 19, 19]

    predict_obj [1, 3, 19, 19]

    predict_cls [1, 3, 7, 19, 19]

    model.py文件

    import paddle.fluid as fluid
    from paddle.fluid.dygraph.nn import Conv2D, BatchNorm
    from paddle.fluid.param_attr import ParamAttr
    from paddle.fluid.regularizer import L2Decay
    
    # 卷积正则化层
    class ConvBN(fluid.dygraph.Layer):
        def __init__(self, num_channels, num_filters, filter_size, stride, padding):
            super(ConvBN, self).__init__()
            
            self.conv = Conv2D(
                num_channels=num_channels, num_filters=num_filters,
                filter_size=filter_size, stride=stride, padding=padding,
                param_attr=ParamAttr(initializer=fluid.initializer.Normal(0, 0.02)), # 正态分布权重
                bias_attr=False,                                                     # 没有偏置参数
                act=None)
            self.batch_norm = BatchNorm(
                num_channels=num_filters,
                param_attr=ParamAttr(
                    initializer=fluid.initializer.Normal(0, 0.02), 
                    regularizer=L2Decay(0)), # 正态分布权重,屏蔽正则项
                bias_attr=ParamAttr(
                    initializer=fluid.initializer.Constant(0), 
                    regularizer=L2Decay(0)), # 常量偏置参数,屏蔽正则项
                act=None)
        
        def forward(self, x):
            x = self.conv(x)
            x = self.batch_norm(x)
            x = fluid.layers.leaky_relu(x=x, alpha=0.1) # leaky_relu激活函数,输出out=max(x,0.1∗x)
            
            return x
    
    # 下采样层模块
    class DownSample(fluid.dygraph.Layer):
        def __init__(self, num_channels, num_filters, filter_size=3, stride=2, padding=1):
            super(DownSample, self).__init__()
            
            self.conv_bn = ConvBN(
                num_channels=num_channels, num_filters=num_filters, 
                filter_size=filter_size, stride=stride, padding=padding)
        
        def forward(self, x):
            x = self.conv_bn(x)
            
            return x
        
    # 上采样层模块
    class UpSample(fluid.dygraph.Layer):
        def __init__(self, scale=2):
            super(UpSample, self).__init__()
            
            self.scale = scale
            
        def forward(self, x):
            # 设置输出宽高
            shape = fluid.layers.shape(input=x)                                     # 获取输入形状
            sizes = fluid.layers.slice(input=shape, axes=[0], starts=[2], ends=[4]) # 获取输入宽高
            sizes = fluid.layers.cast(x=sizes, dtype='int32')                       # 转换数据类型
            sizes.stop_gradient = True                                              # 停止梯度计算
            
            # 调整输入宽高        
            sizes = sizes * self.scale                                              # 设置输出宽高
            x = fluid.layers.resize_nearest(input=x, out_shape=sizes, scale=self.scale)
            
            return x
    
    # 基础残差模块
    class BasicBlock(fluid.dygraph.Layer):
        def __init__(self, num_channels, num_filters):
            super(BasicBlock, self).__init__()
            
            self.conv_bn_1 = ConvBN(
                num_channels=num_channels, num_filters=num_filters,
                filter_size=1, stride=1, padding=0)
            self.conv_bn_2 = ConvBN(
                num_channels=num_filters, num_filters=num_filters * 2,
                filter_size=3, stride=1, padding=1)
        
        def forward(self, x):
            t = self.conv_bn_1(x)
            y = self.conv_bn_2(t)
            z = fluid.layers.elementwise_add(x=x, y=y, act=None)
            
            return z
    
    # 基础残差块组
    class BlockGroup(fluid.dygraph.Layer):
        def __init__(self, num_channels, num_filters, num_blocks):
            super(BlockGroup, self).__init__()
            
            # 第一个残差模块
            self.basicblock_0 = BasicBlock(num_channels=num_channels, num_filters=num_filters)
            
            # 剩余的残差模块
            self.block_list = [] # 基础模块列表
            for i in range(1, num_blocks):
                block_item = self.add_sublayer(
                    'block_' + str(i),
                    BasicBlock(num_channels=num_channels, num_filters=num_filters))
                self.block_list.append(block_item)
            
        def forward(self, x):
            # 第一个残差模块
            x = self.basicblock_0(x)
            
            # 剩余的残差模块
            for block_item in self.block_list:
                x = block_item(x)
            
            return x
    
    # 骨干网络模块
    class DarkNet53(fluid.dygraph.Layer):
        def __init__(self):
            super(DarkNet53, self).__init__()
            
            # 输入卷积采样
            self.conv_bn_1 = ConvBN(num_channels=3, num_filters=32, filter_size=3, stride=1, padding=1)
            self.down_sample_1 = DownSample(num_channels=32, num_filters=64)
            
            # 基础残差块组
            self.num_groups = [1, 2, 8, 8, 4] # 每组的模块数
            
            self.group_list = [] # 残差块组列表
            for i, num_blocks in enumerate(self.num_groups):
                group_item = self.add_sublayer( # 每组残差模块第一个模块输入维度,第一个模块输出维度,和每组残差模块个数
                    'group_' + str(i),
                    BlockGroup(num_channels=32*(2**(i+1)), num_filters=32*(2**i), num_blocks=num_blocks))
                self.group_list.append(group_item)
            
            # 向下采样块组
            self.downs_list = [] # 向下采样列表
            for i in range(len(self.num_groups) - 1):
                downs_item = self.add_sublayer(
                    'downs_' + str(i),
                    DownSample(num_channels=32*(2**(i+1)), num_filters=32*(2**(i+2))))
                self.downs_list.append(downs_item)
                
        def forward(self, x):
            # 提取特征图像
            x = self.conv_bn_1(x)     # 提取特征图像
            x = self.down_sample_1(x) # 缩小特征图像
            
            # 输出特征图像
            c_list = [] # 输出特征列表
            for i, group_item in enumerate(self.group_list):
                # 提取特征图像
                x = group_item(x) # 提取特征图像
                c_list.append(x)  # 添加输出列表
                
                # 缩小特征图像
                if i < len(self.num_groups) - 1:
                    x = self.downs_list[i](x)
            
            return c_list[-1:-4:-1] # 输出c0, c1, c2
        
    # 检测头部模块
    class YOLOHeader(fluid.dygraph.Layer):
        def __init__(self, num_channels, num_filters):
            super(YOLOHeader, self).__init__()
            
            assert num_filters % 2 == 0, "num_filters {} cannot be devided by 2".format(num_filters)
            
            self.conv_bn_1 = ConvBN(
                num_channels=num_channels, num_filters=num_filters,
                filter_size=1, stride=1, padding=0)
            self.conv_bn_2 = ConvBN(
                num_channels=num_filters, num_filters=num_filters * 2,
                filter_size=3, stride=1, padding=1)
            self.conv_bn_3 = ConvBN(
                num_channels=num_filters * 2, num_filters=num_filters,
                filter_size=1, stride=1, padding=0)
            self.conv_bn_4 = ConvBN(
                num_channels=num_filters, num_filters=num_filters * 2,
                filter_size=3, stride=1, padding=1)
            
            self.route = ConvBN(
                num_channels=num_filters * 2, num_filters=num_filters,
                filter_size=1, stride=1, padding=0)
            self.tip = ConvBN(
                num_channels=num_filters, num_filters=num_filters * 2,
                filter_size=3, stride=1, padding=1)
        
        def forward(self, x):
            # 提取特征图像
            x = self.conv_bn_1(x)
            x = self.conv_bn_2(x)
            x = self.conv_bn_3(x)
            x = self.conv_bn_4(x)
            
            # 输出特征图像
            route = self.route(x)
            tip = self.tip(route)
            
            return route, tip # 输出route, tip
        
    # 目标检测模块
    class YOLOv3(fluid.dygraph.Layer):
        def __init__(self, num_classes, anchor_mask):
            super(YOLOv3, self).__init__()
            
            # 初始骨干网络
            self.backbone = DarkNet53() # 骨干网络
            
            # 初始检测模块
            self.num_classes = num_classes # 类别数量
            self.anchor_mask = anchor_mask # 锚框掩码
            
            self.dete_list = [] # 检测头部列表
            self.conv_list = [] # 输出卷积列表
            self.rout_list = [] # 连接路径列表
            
            for i in range(len(self.anchor_mask)):
                # 添加检测头部
                dete_item = self.add_sublayer(
                    'dete_' + str(i),
                    YOLOHeader(
                        num_channels=1024//(2**i) if i==0 else 1024//(2**i) + 512//(2**i), 
                        num_filters=512//(2**i)))
                self.dete_list.append(dete_item)
                
                # 添加输出卷积
                conv_item = self.add_sublayer(
                    'conv_' + str(i),
                    Conv2D(
                        num_channels=1024//(2**i),
                        num_filters=len(self.anchor_mask[i]) * (self.num_classes + 5),
                        filter_size=1, stride=1, padding=0,
                        param_attr=ParamAttr( # 正态分布权重
                            initializer=fluid.initializer.Normal(0, 0.02)),
                        bias_attr=ParamAttr(  # 常量偏置参数,屏蔽正则项
                            initializer=fluid.initializer.Constant(0), 
                            regularizer=L2Decay(0)),
                        act=None))
                self.conv_list.append(conv_item)
                
                # 添加连接路径
                if i < len(self.anchor_mask) - 1:
                    rout_item = self.add_sublayer(
                        'rout_' + str(i),
                        ConvBN(
                            num_channels=512//(2**i), num_filters=256//(2**i),
                            filter_size=1, stride=1, padding=0))
                    self.rout_list.append(rout_item)
            
            # 初始上采样层
            self.upsample = UpSample()
            
        def forward(self, x):
            # 提取特征图像
            c_list = self.backbone(x) # 骨干网络输出
            
            # 输出检测结果
            p_list = []               # 检测模块输出
            for i, c_item in enumerate(c_list):
                # 连接上采样层
                if i > 0: # 如果不是c0输出,则用输出连接c0的route
                    c_item = fluid.layers.concat(input=[route, c_item], axis=1)
                
                # 输出检测结果
                route, tip = self.dete_list[i](c_item) # 检测头部输出
                p_item = self.conv_list[i](tip)        # 输出卷积结果
                p_list.append(p_item)                  # 添加输出列表
                
                # 输出上采样层
                if i < len(self.anchor_mask) - 1:
                    route = self.rout_list[i](route) # 提取采样特征
                    route = self.upsample(route)     # 放大采样特征
            
            return p_list

    参考资料:

    https://blog.csdn.net/litt1e/article/details/88814417

    https://blog.csdn.net/litt1e/article/details/88852745

    https://blog.csdn.net/litt1e/article/details/88907542

    https://aistudio.baidu.com/aistudio/projectdetail/742781

    https://aistudio.baidu.com/aistudio/projectdetail/672017

    https://aistudio.baidu.com/aistudio/projectdetail/868589

    https://aistudio.baidu.com/aistudio/projectdetail/122277

  • 相关阅读:
    推荐系统(9)—— 推荐系统重排序-注意力模型小结
    代码题(66)— 二叉树的所有路径、最长同值路径
    推荐系统(8)—— 多目标优化应用总结_1
    推荐系统(7)—— CTR 预估算法综述
    推荐系统(6)—— 特征组合作用
    推荐系统(5)—— 推荐系统多目标优化(ESMM、MMOE、CGC、PLE)
    代码题(65)— 在排序数组中查找元素的第一个和最后一个位置、长度最小的子数组
    机器学习(三十三)— 机器学习中如何利用id类特征
    navicate怎么快速写数据库设计文档
    SpringBoot:整合log4j2
  • 原文地址:https://www.cnblogs.com/d442130165/p/13685048.html
Copyright © 2020-2023  润新知