▶ 卷积神经网络
● 代码,参考【https://www.zybuluo.com/hanbingtao/note/485480】。主要实现了卷积层神经 ConvLayer 和池化层神经 MaxPoolLayer。
1 import numpy as np 2 3 globalIta = 0.05 4 globalEpsilon = 1e-3 5 6 class ReluActivator(object): # ReLU 激活函数 7 def forward(self, x): 8 return max(0, x) 9 10 def backward(self, x): 11 return int(x > 0) 12 13 class IdentityActivator(object): # 激活函数 14 def forward(self, x): 15 return x 16 17 def backward(self, x): 18 return 1 19 20 def subArray(sArray, i, j, fRow, fCol, stride): # 获取卷积区域 21 startI = i * stride 22 startJ = j * stride 23 if sArray.ndim == 2: 24 return sArray[ startI : startI + fRow, startJ : startJ + fCol] 25 return sArray[:, startI : startI + fRow, startJ : startJ + fCol] 26 27 def get_max_index(array): # 获取一个2D区域的最大值所在的索引 28 maxI = 0 29 maxJ = 0 30 max_value = array[0,0] 31 for i in range(array.shape[0]): 32 for j in range(array.shape[1]): 33 if array[i,j] > max_value: 34 maxI, maxJ = i, j 35 max_value = array[i,j] 36 return maxI, maxJ 37 38 def conv(sArray, fArray, dArray, stride, fBias): # 计算卷积 39 dRow, dCol = dArray.shape 40 fRow, fCol = fArray.shape[:2] 41 for i in range(dRow): 42 for j in range(dCol): 43 dArray[i][j] = np.sum(subArray(sArray, i, j, fRow, fCol, stride) * fArray) + fBias 44 45 def padding(sArray, zpRow, zpCol = -1): # 为数组增加 Zero padding 46 if zpCol < 0: 47 zpCol = zpRow 48 49 if sArray.ndim == 2: 50 sRow, sCol = sArray.shape 51 dArray = np.zeros((sRow + 2 * zpRow, sCol + 2 * zpCol)) 52 dArray[zpRow : zpRow + sRow, zpCol : zpCol + sCol] = sArray 53 return dArray 54 55 if sArray.ndim == 3: 56 sPage, sRow, sCol = sArray.shape 57 dArray = np.zeros((sPage, sRow + 2 * zpRow, sCol + 2 * zpCol)) 58 dArray[:,zpRow : zpRow + sRow,zpCol : zpCol + sCol] = sArray 59 return dArray 60 return sArray 61 62 def myMap(array, op): # 对将数组做成迭代器,对每个元素依次操作 63 for i in np.nditer(array, op_flags=['readwrite']): 64 i[...] = op(i) 65 66 class Filter(object): # 卷积窗口类,三个维度 67 def __init__(self, row, col, page): 68 self.page = page 69 self.row = row 70 self.col = col 71 self.fArray = np.random.uniform(-1e-4, 1e-4, (self.page, self.row, self.col)) 72 self.fBias = 0 73 self.fArrayGrad = np.zeros(self.fArray.shape) 74 self.fBiasGrad = 0 75 76 def __repr__(self): 77 return 'filter fArray: %s bias: %s' % (repr(self.fArray), repr(self.fBias)) 78 79 def update(self, ita): # 使用梯度对窗口权值进行更新 80 self.fArray -= ita * self.fArrayGrad 81 self.fBias -= ita * self.fBiasGrad 82 83 class ConvLayer(object): # 单层卷积神经网络层,初始化时规定了输入图像、卷积窗口、输出图像的尺寸,并保存了卷积窗口和输出图像的数据 84 def __init__(self, sRow, sCol, nChannel, fRow, fCol, nFilter, zeroPad, stride = 1, activator = IdentityActivator(), ita = globalIta): 85 self.sRow = sRow 86 self.sCol = sCol 87 self.nChannel = nChannel 88 self.fRow = fRow 89 self.fCol = fCol 90 self.nFilter = nFilter 91 self.zeroPad = zeroPad 92 self.stride = stride 93 self.activator = activator 94 self.ita = ita 95 self.filters = [ Filter(self.fRow, self.fCol, self.nChannel) for i in range(self.nFilter) ] 96 self.dRow = ConvLayer.calculateDSize(self.sRow, fRow, self.zeroPad, self.stride) 97 self.dCol = ConvLayer.calculateDSize(self.sCol, fCol, self.zeroPad, self.stride) 98 self.dArray = np.zeros((self.nFilter, self.dRow, self.dCol)) 99 100 @staticmethod # 计算输出图像的大小 101 def calculateDSize(input_size, filter_size, zeroPad, stride): 102 return (input_size - filter_size + 2 * zeroPad) // stride + 1 103 104 def forward(self, sArray): # 正向卷积输出 105 self.sArray = sArray 106 self.sPadArray = padding(sArray, self.zeroPad) 107 for f in range(self.nFilter): 108 filter = self.filters[f] 109 conv(self.sPadArray, filter.fArray, self.dArray[f], self.stride, filter.fBias) 110 myMap(self.dArray, self.activator.forward) 111 112 def backward(self, sArray, deltaArrayNextLayer, activator): # 计算误差项和窗口梯度 113 self.forward(sArray) 114 self.bpDeltaAndGrad(deltaArrayNextLayer, activator) 115 116 def update(self): # 使用梯度每个窗口 117 for filter in self.filters: 118 filter.update(self.ita) 119 120 def bpDeltaAndGrad(self, deltaArrayNextLayer, activator): # 计算传递到上一层的sensitivity map 121 exArray = self.expandDelta(deltaArrayNextLayer) # 将次层误差项扩张为 stride = 1 的情形 122 exPAge, exRow, exCol = exArray.shape # 对 exArray 垫边,次层误差项的边缘也会获得残差,但不会向上传递 123 exPadArray = padding(exArray, (self.sRow + self.fRow - 1 - exRow) // 2, (self.sCol + self.fCol - 1 - exCol) // 2) 124 125 self.deltaArray = np.zeros((self.nChannel,self.sRow, self.sCol)) # 本层 deltaArray 126 for f in range(self.nFilter): # 依次计算每个窗口 127 filter = self.filters[f] 128 roteteFArray = np.array(list(map(lambda i: np.rot90(i, 2), filter.fArray))) # 旋转卷积窗口,进行数学意义上的卷积 129 temp = np.zeros((self.nChannel,self.sRow, self.sCol)) # 有多个 filter,则最终误差项等于各窗口误差项之和,需要累加 130 for d in range(self.nChannel): 131 conv(exPadArray[f], roteteFArray[d], temp[d], 1, 0) # 计算误差项 132 conv(self.sPadArray[d], exArray[f], filter.fArrayGrad[d], 1, 0) # 计算梯度 133 self.deltaArray += temp 134 filter.fBiasGrad = np.sum(exArray[f]) # 计算偏移值 135 136 temp = np.array(self.sArray) # 将误差项结果与激活函数的偏导数相乘 137 myMap(temp, activator.backward) 138 self.deltaArray *= temp 139 140 def expandDelta(self, deltaArray): 141 exRow = (self.sRow - self.fRow + 2 * self.zeroPad + 1) # 计算 stride 恢复到 1 时的 delta 阵大小 142 exCol = (self.sCol - self.fCol + 2 * self.zeroPad + 1) 143 144 exArray = np.zeros((deltaArray.shape[0], exRow, exCol)) # 构建新的 delta 阵 145 for i in range(self.dRow): 146 for j in range(self.dCol): 147 exArray[:, i * self.stride, j * self.stride] = deltaArray[:, i, j] 148 return exArray 149 150 class MaxPoolLayer(object): # 池化类,初始化时规定了输入图像、卷积窗口、输出图像的尺寸,并保存了输出图像的数据 151 def __init__(self, sRow, sCol, nChannel, fRow, fCol, stride): 152 self.sRow = sRow 153 self.sCol = sCol 154 self.nChannel = nChannel 155 self.fRow = fRow 156 self.fCol = fCol 157 self.stride = stride 158 self.dRow = (sRow - fRow) // self.stride + 1 159 self.dCol = (sCol - fCol) // self.stride + 1 160 self.dArray = np.zeros((self.nChannel,self.dRow, self.dCol)) # 正向池化 161 162 def forward(self, sArray): 163 for d in range(self.nChannel): 164 for i in range(self.dRow): 165 for j in range(self.dCol): 166 self.dArray[d,i,j] = (subArray(sArray[d], i, j, self.fRow, self.fCol, self.stride).max()) 167 168 def backward(self, sArray, deltaArrayNextLayer): # 反向池化 169 self.deltaArray = np.zeros(sArray.shape) 170 for d in range(self.nChannel): 171 for i in range(self.dRow): 172 for j in range(self.dCol): 173 patch_array = subArray(sArray[d], i, j, self.fRow, self.fCol, self.stride) 174 nonZeroRow, nonZeroCol = get_max_index(patch_array) 175 self.deltaArray[d, i * self.stride + nonZeroRow, j * self.stride + nonZeroCol] = deltaArrayNextLayer[d,i,j] 176 177 def gradCheck(): # 梯度检查 178 sArray, deltaNextLayer, convLayer = createTestDataConv() 179 convLayer.forward(sArray) 180 deltaArrayNextLayer = np.ones(convLayer.dArray.shape, dtype=np.float64) 181 convLayer.backward(sArray, deltaArrayNextLayer, IdentityActivator()) 182 for d in range(convLayer.filters[0].fArrayGrad.shape[0]): 183 for i in range(convLayer.filters[0].fArrayGrad.shape[1]): 184 for j in range(convLayer.filters[0].fArrayGrad.shape[2]): 185 convLayer.filters[0].fArray[d,i,j] += globalEpsilon 186 convLayer.forward(sArray) 187 err1 = np.sum(convLayer.dArray) 188 convLayer.filters[0].fArray[d,i,j] -= 2*globalEpsilon 189 convLayer.forward(sArray) 190 err2 = np.sum(convLayer.dArray) 191 expect_grad = (err1 - err2) / (2 * globalEpsilon) 192 convLayer.filters[0].fArray[d,i,j] += globalEpsilon 193 print('fArray(%d,%d,%d): expected - actural %f - %f' % (d, i, j, expect_grad, convLayer.filters[0].fArrayGrad[d,i,j])) 194 195 def createTestDataConv(): # 生成卷积测试数据 196 a = np.array( 197 [[[0,1,1,0,2],[2,2,2,2,1],[1,0,0,2,0],[0,1,1,0,0],[1,2,0,0,2]], 198 [[1,0,2,2,0],[0,0,0,2,0],[1,2,1,2,1],[1,0,0,0,0],[1,2,1,1,1]], 199 [[2,1,2,0,0],[1,0,0,1,0],[0,2,1,0,1],[0,1,2,2,2],[2,1,0,0,1]]] 200 ) 201 b = np.array( [[[0,1,1],[2,2,2],[1,0,0]],[[1,0,2],[0,0,0],[1,2,1]]] ) 202 c = ConvLayer(5,5,3,3,3,2,1,2,IdentityActivator(),0.001) 203 c.filters[0].fArray = np.array( [[[-1,1,0],[0,1,0],[0,1,1]],[[-1,-1,0],[0,0,0],[0,-1,0]],[[0,0,-1],[0,1,0],[1,-1,-1]]], dtype=np.float64 ) 204 c.filters[0].fBias = 1 205 c.filters[1].fArray = np.array( [[[1,1,-1],[-1,-1,1],[0,-1,1]],[[0,1,0],[-1,0,-1],[-1,1,0]],[[-1,0,0],[-1,0,1],[-1,0,0]]], dtype=np.float64 ) 206 c.filters[1].fBias = 0 207 return a, b, c 208 209 def createTestDataPool(): # 生成池化测试数据 210 a = np.array( [[[1,1,2,4],[5,6,7,8],[3,2,1,0],[1,2,3,4]],[[0,1,2,3],[4,5,6,7],[8,9,0,1],[3,4,5,6]]], dtype=np.float64 ) 211 b = np.array( [[[1,2],[2,4]],[[3,5],[8,2]]], dtype=np.float64 ) 212 c = MaxPoolLayer(4,4,2,2,2,2) 213 return a, b, c 214 215 def testConv(): 216 print(" testConv") 217 sArray, deltsNextLayer, convLayer = createTestDataConv() 218 convLayer.forward(sArray) # 前向卷积 219 print(convLayer.dArray) 220 convLayer.backward(sArray, deltsNextLayer, IdentityActivator()) # 后向修正窗口权值 221 convLayer.update() 222 print(convLayer.filters[0]) 223 print(convLayer.filters[1]) 224 225 def testPool(): 226 print(" testPool") 227 sArray, deltaNextLayer, maxPoolLayer = createTestDataPool() 228 maxPoolLayer.forward(sArray) # 正向池化 229 print('input array: %s output array: %s' % (sArray,maxPoolLayer.dArray)) 230 maxPoolLayer.backward(sArray, deltaNextLayer) # 反向池化 231 print('input array: %s sensitivity array: %s delta array: %s' % (sArray, deltaNextLayer, maxPoolLayer.deltaArray)) 232 233 if __name__ == '__main__': 234 testConv() 235 testPool() 236 gradCheck()
● 输出结果
[[[ 6. 7. 5.] [ 3. -1. -1.] [ 2. -1. 4.]] [[ 2. -5. -8.] [ 1. -4. -4.] [ 0. -5. -5.]]] filter weights: array([[[-1.008, 0.99 , -0.009], [-0.005, 0.994, -0.006], [-0.006, 0.995, 0.996]], [[-1.004, -1.001, -0.004], [-0.01 , -0.009, -0.012], [-0.002, -1.002, -0.002]], [[-0.002, -0.002, -1.003], [-0.005, 0.992, -0.005], [ 0.993, -1.008, -1.007]]]) bias: 0.991 filter weights: array([[[ 9.980e-01, 9.980e-01, -1.001e+00], [-1.004e+00, -1.007e+00, 9.970e-01], [-4.000e-03, -1.004e+00, 9.980e-01]], [[ 0.000e+00, 9.990e-01, 0.000e+00], [-1.009e+00, -5.000e-03, -1.004e+00], [-1.004e+00, 1.000e+00, 0.000e+00]], [[-1.004e+00, -6.000e-03, -5.000e-03], [-1.002e+00, -5.000e-03, 9.980e-01], [-1.002e+00, -1.000e-03, 0.000e+00]]]) bias: -0.007 input array: [[[1. 1. 2. 4.] [5. 6. 7. 8.] [3. 2. 1. 0.] [1. 2. 3. 4.]] [[0. 1. 2. 3.] [4. 5. 6. 7.] [8. 9. 0. 1.] [3. 4. 5. 6.]]] output array: [[[6. 8.] [3. 4.]] [[5. 7.] [9. 6.]]] input array: [[[1. 1. 2. 4.] [5. 6. 7. 8.] [3. 2. 1. 0.] [1. 2. 3. 4.]] [[0. 1. 2. 3.] [4. 5. 6. 7.] [8. 9. 0. 1.] [3. 4. 5. 6.]]] sensitivity array: [[[1. 2.] [2. 4.]] [[3. 5.] [8. 2.]]] delta array: [[[0. 0. 0. 0.] [0. 1. 0. 2.] [2. 0. 0. 0.] [0. 0. 0. 4.]] [[0. 0. 0. 0.] [0. 3. 0. 5.] [0. 8. 0. 0.] [0. 0. 0. 2.]]] weights(0,0,0): expected - actural 5.000000 - 5.000000 weights(0,0,1): expected - actural 6.000000 - 6.000000 weights(0,0,2): expected - actural 5.000000 - 5.000000 weights(0,1,0): expected - actural 5.000000 - 5.000000 weights(0,1,1): expected - actural 7.000000 - 7.000000 weights(0,1,2): expected - actural 5.000000 - 5.000000 weights(0,2,0): expected - actural 5.000000 - 5.000000 weights(0,2,1): expected - actural 6.000000 - 6.000000 weights(0,2,2): expected - actural 5.000000 - 5.000000 weights(1,0,0): expected - actural 2.000000 - 2.000000 weights(1,0,1): expected - actural 1.000000 - 1.000000 weights(1,0,2): expected - actural 2.000000 - 2.000000 weights(1,1,0): expected - actural 9.000000 - 9.000000 weights(1,1,1): expected - actural 9.000000 - 9.000000 weights(1,1,2): expected - actural 9.000000 - 9.000000 weights(1,2,0): expected - actural 2.000000 - 2.000000 weights(1,2,1): expected - actural 1.000000 - 1.000000 weights(1,2,2): expected - actural 2.000000 - 2.000000 weights(2,0,0): expected - actural 4.000000 - 4.000000 weights(2,0,1): expected - actural 5.000000 - 5.000000 weights(2,0,2): expected - actural 4.000000 - 4.000000 weights(2,1,0): expected - actural 4.000000 - 4.000000 weights(2,1,1): expected - actural 9.000000 - 9.000000 weights(2,1,2): expected - actural 4.000000 - 4.000000 weights(2,2,0): expected - actural 4.000000 - 4.000000 weights(2,2,1): expected - actural 5.000000 - 5.000000 weights(2,2,2): expected - actural 4.000000 - 4.000000