说明:
本例程使用YOLOv3进行昆虫检测。例程分为数据处理、模型设计、损失函数、训练模型、模型预测和测试模型六个部分。本篇为第二部分,使用Paddle动态图实现了YOLOv3,使用Darknet53骨干网络和YOLOv3的检测头部。
实验代码:
Darknet53骨干网络和YOLOv3头部:
import numpy as np import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable from source.model import DarkNet53, YOLOHeader with fluid.dygraph.guard(): # 输入数据 x = np.random.randn(1, 3, 608, 608).astype(np.float32) x = to_variable(x) # 数据处理 backbone = DarkNet53() # 骨干网络 detect_0 = YOLOHeader(num_channels=1024, num_filters=512) # 检测头部 detect_1 = YOLOHeader(num_channels=512, num_filters=256) # 检测头部 detect_2 = YOLOHeader(num_channels=256, num_filters=128) # 检测头部 c0, c1, c2 = backbone(x) c0_r, c0_t = detect_0(c0) c1_r, c1_t = detect_1(c1) c2_r, c2_t = detect_2(c2) # 输出数据 print('c0:', c0.shape, 'route:', c0_r.shape, 'tip:', c0_t.shape) print('c1:', c1.shape, ' route:', c1_r.shape, 'tip:', c1_t.shape) print('c2:', c2.shape, ' route:', c2_r.shape, 'tip:', c2_t.shape)
结果:
c0: [1, 1024, 19, 19] route: [1, 512, 19, 19] tip: [1, 1024, 19, 19]
c1: [1, 512, 38, 38] route: [1, 256, 38, 38] tip: [1, 512, 38, 38]
c2: [1, 256, 76, 76] route: [1, 128, 76, 76] tip: [1, 256, 76, 76]
完整的YOLOV3模型:
import numpy as np import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable from source.model import YOLOv3 with fluid.dygraph.guard(): # 输入数据 x = np.random.randn(1, 3, 608, 608).astype(np.float32) x = to_variable(x) # 数据处理 num_classes = 7 # 类别数量 anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # 锚框掩码 model = YOLOv3(num_classes=num_classes, anchor_mask=anchor_mask) p0, p1, p2 = model(x) # 输出数据 print('p0:', p0.shape) print('p1:', p1.shape) print('p2:', p2.shape)
结果:
p0: [1, 36, 19, 19]
p1: [1, 36, 38, 38]
p2: [1, 36, 76, 76]
每个YOLOv3头部的输出特征:
import numpy as np import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable from source.model import YOLOv3 with fluid.dygraph.guard(): # 输入数据 x = np.random.randn(1, 3, 608, 608).astype(np.float32) x = to_variable(x) # 数据处理 num_classes = 7 # 类别数量 anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # 锚框掩码 model = YOLOv3(num_classes=num_classes, anchor_mask=anchor_mask) p0, p1, p2 = model(x) p0 = fluid.layers.reshape(p0, [-1, len(anchor_mask[0]), 5 + num_classes, p0.shape[2], p0.shape[3]]) pdloc = p0[:, :, 0:4, :, :] # 位置概率 pdobj = fluid.layers.sigmoid(p0[:, :, 4, :, :]) # 物体概率 pdcls = fluid.layers.sigmoid(p0[:, :, 5:5+num_classes, :, :]) # 类别概率 print('predict_loc', pdloc.shape) print('predict_obj', pdobj.shape) print('predict_cls', pdcls.shape)
结果:
predict_loc [1, 3, 4, 19, 19]
predict_obj [1, 3, 19, 19]
predict_cls [1, 3, 7, 19, 19]
model.py文件
import paddle.fluid as fluid from paddle.fluid.dygraph.nn import Conv2D, BatchNorm from paddle.fluid.param_attr import ParamAttr from paddle.fluid.regularizer import L2Decay # 卷积正则化层 class ConvBN(fluid.dygraph.Layer): def __init__(self, num_channels, num_filters, filter_size, stride, padding): super(ConvBN, self).__init__() self.conv = Conv2D( num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, stride=stride, padding=padding, param_attr=ParamAttr(initializer=fluid.initializer.Normal(0, 0.02)), # 正态分布权重 bias_attr=False, # 没有偏置参数 act=None) self.batch_norm = BatchNorm( num_channels=num_filters, param_attr=ParamAttr( initializer=fluid.initializer.Normal(0, 0.02), regularizer=L2Decay(0)), # 正态分布权重,屏蔽正则项 bias_attr=ParamAttr( initializer=fluid.initializer.Constant(0), regularizer=L2Decay(0)), # 常量偏置参数,屏蔽正则项 act=None) def forward(self, x): x = self.conv(x) x = self.batch_norm(x) x = fluid.layers.leaky_relu(x=x, alpha=0.1) # leaky_relu激活函数,输出out=max(x,0.1∗x) return x # 下采样层模块 class DownSample(fluid.dygraph.Layer): def __init__(self, num_channels, num_filters, filter_size=3, stride=2, padding=1): super(DownSample, self).__init__() self.conv_bn = ConvBN( num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, stride=stride, padding=padding) def forward(self, x): x = self.conv_bn(x) return x # 上采样层模块 class UpSample(fluid.dygraph.Layer): def __init__(self, scale=2): super(UpSample, self).__init__() self.scale = scale def forward(self, x): # 设置输出宽高 shape = fluid.layers.shape(input=x) # 获取输入形状 sizes = fluid.layers.slice(input=shape, axes=[0], starts=[2], ends=[4]) # 获取输入宽高 sizes = fluid.layers.cast(x=sizes, dtype='int32') # 转换数据类型 sizes.stop_gradient = True # 停止梯度计算 # 调整输入宽高 sizes = sizes * self.scale # 设置输出宽高 x = fluid.layers.resize_nearest(input=x, out_shape=sizes, scale=self.scale) return x # 基础残差模块 class BasicBlock(fluid.dygraph.Layer): def __init__(self, num_channels, num_filters): super(BasicBlock, self).__init__() self.conv_bn_1 = ConvBN( num_channels=num_channels, num_filters=num_filters, filter_size=1, stride=1, padding=0) self.conv_bn_2 = ConvBN( num_channels=num_filters, num_filters=num_filters * 2, filter_size=3, stride=1, padding=1) def forward(self, x): t = self.conv_bn_1(x) y = self.conv_bn_2(t) z = fluid.layers.elementwise_add(x=x, y=y, act=None) return z # 基础残差块组 class BlockGroup(fluid.dygraph.Layer): def __init__(self, num_channels, num_filters, num_blocks): super(BlockGroup, self).__init__() # 第一个残差模块 self.basicblock_0 = BasicBlock(num_channels=num_channels, num_filters=num_filters) # 剩余的残差模块 self.block_list = [] # 基础模块列表 for i in range(1, num_blocks): block_item = self.add_sublayer( 'block_' + str(i), BasicBlock(num_channels=num_channels, num_filters=num_filters)) self.block_list.append(block_item) def forward(self, x): # 第一个残差模块 x = self.basicblock_0(x) # 剩余的残差模块 for block_item in self.block_list: x = block_item(x) return x # 骨干网络模块 class DarkNet53(fluid.dygraph.Layer): def __init__(self): super(DarkNet53, self).__init__() # 输入卷积采样 self.conv_bn_1 = ConvBN(num_channels=3, num_filters=32, filter_size=3, stride=1, padding=1) self.down_sample_1 = DownSample(num_channels=32, num_filters=64) # 基础残差块组 self.num_groups = [1, 2, 8, 8, 4] # 每组的模块数 self.group_list = [] # 残差块组列表 for i, num_blocks in enumerate(self.num_groups): group_item = self.add_sublayer( # 每组残差模块第一个模块输入维度,第一个模块输出维度,和每组残差模块个数 'group_' + str(i), BlockGroup(num_channels=32*(2**(i+1)), num_filters=32*(2**i), num_blocks=num_blocks)) self.group_list.append(group_item) # 向下采样块组 self.downs_list = [] # 向下采样列表 for i in range(len(self.num_groups) - 1): downs_item = self.add_sublayer( 'downs_' + str(i), DownSample(num_channels=32*(2**(i+1)), num_filters=32*(2**(i+2)))) self.downs_list.append(downs_item) def forward(self, x): # 提取特征图像 x = self.conv_bn_1(x) # 提取特征图像 x = self.down_sample_1(x) # 缩小特征图像 # 输出特征图像 c_list = [] # 输出特征列表 for i, group_item in enumerate(self.group_list): # 提取特征图像 x = group_item(x) # 提取特征图像 c_list.append(x) # 添加输出列表 # 缩小特征图像 if i < len(self.num_groups) - 1: x = self.downs_list[i](x) return c_list[-1:-4:-1] # 输出c0, c1, c2 # 检测头部模块 class YOLOHeader(fluid.dygraph.Layer): def __init__(self, num_channels, num_filters): super(YOLOHeader, self).__init__() assert num_filters % 2 == 0, "num_filters {} cannot be devided by 2".format(num_filters) self.conv_bn_1 = ConvBN( num_channels=num_channels, num_filters=num_filters, filter_size=1, stride=1, padding=0) self.conv_bn_2 = ConvBN( num_channels=num_filters, num_filters=num_filters * 2, filter_size=3, stride=1, padding=1) self.conv_bn_3 = ConvBN( num_channels=num_filters * 2, num_filters=num_filters, filter_size=1, stride=1, padding=0) self.conv_bn_4 = ConvBN( num_channels=num_filters, num_filters=num_filters * 2, filter_size=3, stride=1, padding=1) self.route = ConvBN( num_channels=num_filters * 2, num_filters=num_filters, filter_size=1, stride=1, padding=0) self.tip = ConvBN( num_channels=num_filters, num_filters=num_filters * 2, filter_size=3, stride=1, padding=1) def forward(self, x): # 提取特征图像 x = self.conv_bn_1(x) x = self.conv_bn_2(x) x = self.conv_bn_3(x) x = self.conv_bn_4(x) # 输出特征图像 route = self.route(x) tip = self.tip(route) return route, tip # 输出route, tip # 目标检测模块 class YOLOv3(fluid.dygraph.Layer): def __init__(self, num_classes, anchor_mask): super(YOLOv3, self).__init__() # 初始骨干网络 self.backbone = DarkNet53() # 骨干网络 # 初始检测模块 self.num_classes = num_classes # 类别数量 self.anchor_mask = anchor_mask # 锚框掩码 self.dete_list = [] # 检测头部列表 self.conv_list = [] # 输出卷积列表 self.rout_list = [] # 连接路径列表 for i in range(len(self.anchor_mask)): # 添加检测头部 dete_item = self.add_sublayer( 'dete_' + str(i), YOLOHeader( num_channels=1024//(2**i) if i==0 else 1024//(2**i) + 512//(2**i), num_filters=512//(2**i))) self.dete_list.append(dete_item) # 添加输出卷积 conv_item = self.add_sublayer( 'conv_' + str(i), Conv2D( num_channels=1024//(2**i), num_filters=len(self.anchor_mask[i]) * (self.num_classes + 5), filter_size=1, stride=1, padding=0, param_attr=ParamAttr( # 正态分布权重 initializer=fluid.initializer.Normal(0, 0.02)), bias_attr=ParamAttr( # 常量偏置参数,屏蔽正则项 initializer=fluid.initializer.Constant(0), regularizer=L2Decay(0)), act=None)) self.conv_list.append(conv_item) # 添加连接路径 if i < len(self.anchor_mask) - 1: rout_item = self.add_sublayer( 'rout_' + str(i), ConvBN( num_channels=512//(2**i), num_filters=256//(2**i), filter_size=1, stride=1, padding=0)) self.rout_list.append(rout_item) # 初始上采样层 self.upsample = UpSample() def forward(self, x): # 提取特征图像 c_list = self.backbone(x) # 骨干网络输出 # 输出检测结果 p_list = [] # 检测模块输出 for i, c_item in enumerate(c_list): # 连接上采样层 if i > 0: # 如果不是c0输出,则用输出连接c0的route c_item = fluid.layers.concat(input=[route, c_item], axis=1) # 输出检测结果 route, tip = self.dete_list[i](c_item) # 检测头部输出 p_item = self.conv_list[i](tip) # 输出卷积结果 p_list.append(p_item) # 添加输出列表 # 输出上采样层 if i < len(self.anchor_mask) - 1: route = self.rout_list[i](route) # 提取采样特征 route = self.upsample(route) # 放大采样特征 return p_list
参考资料:
https://blog.csdn.net/litt1e/article/details/88814417
https://blog.csdn.net/litt1e/article/details/88852745
https://blog.csdn.net/litt1e/article/details/88907542
https://aistudio.baidu.com/aistudio/projectdetail/742781
https://aistudio.baidu.com/aistudio/projectdetail/672017
https://aistudio.baidu.com/aistudio/projectdetail/868589
https://aistudio.baidu.com/aistudio/projectdetail/122277