▶ 循环神经网络
● 代码,参考【https://zybuluo.com/hanbingtao/note/541458】。主要实现了一个单层循环神经网络类 RecurrentLayer
1 from functools import reduce 2 import numpy as np 3 4 global_epsilon = 10e-4 5 6 class ReluActivator(object): # ReLU 激活函数 7 def forward(self, x): 8 return max(0, x) 9 10 def backward(self, x): 11 return int(x > 0) 12 13 class IdentityActivator(object): # 激活函数 14 def forward(self, x): 15 return x 16 17 def backward(self, x): 18 return 1 19 20 def myMap(array, op): # 对numpy数组进行element wise操作 21 for i in np.nditer(array, op_flags=['readwrite']): 22 i[...] = op(i) 23 24 class RecurrentLayer(object): 25 def __init__(self, sCol, dCol, activator, ita): 26 self.sCol = sCol 27 self.dCol = dCol 28 self.activator = activator 29 self.ita = ita 30 self.time = 0 31 self.stateTable = [] # 状态表,每行表示依次迭代的所有神经元的状态值 32 self.stateTable.append(np.zeros((dCol, 1))) 33 self.U = np.random.uniform(-1e-4, 1e-4,(dCol, sCol)) # 初始化 U 34 self.W = np.random.uniform(-1e-4, 1e-4,(dCol, dCol)) # 初始化 W 35 36 def print(self): 37 print("inputSize = %d, outputSize = %d, ita = %f"%(self.sCol,self.dCol,self.ita)) 38 print("stateTable = ", self.stateTable) 39 print("U = ", self.U) 40 print("W = ", self.W) 41 42 def forward(self, sArray): # 前向计算 43 self.time += 1 44 state = (np.dot(self.U, sArray) + np.dot(self.W, self.stateTable[-1])) 45 myMap(state, self.activator.forward) 46 self.stateTable.append(state) 47 48 def backward(self, deltaArrayNextLayer, activator): 49 self.bpDelta(deltaArrayNextLayer, activator) 50 self.bpGrad() 51 52 def update(self): 53 self.W -= self.ita * self.grad 54 55 def bpDelta(self, deltaArrayNextLayer, activator): 56 self.deltaTable = [] # 用来保存各个时刻的误差项 57 for i in range(self.time): 58 self.deltaTable.append(np.zeros((self.dCol, 1))) 59 self.deltaTable.append(deltaArrayNextLayer) 60 for k in range(self.time - 1, 0, -1): # 迭代计算每个时刻的误差项 61 state = self.stateTable[k+1].copy() # 根据k+1时刻的delta计算k时刻的delta 62 myMap(self.stateTable[k+1], activator.backward) 63 self.deltaTable[k] = np.dot(np.dot(self.deltaTable[k+1].T, self.W), np.diag(state[:,0])).T 64 65 def bpGrad(self): 66 self.gradTable = [] # 保存各个时刻的权重梯度 67 for t in range(self.time + 1): 68 self.gradTable.append(np.zeros((self.dCol, self.dCol))) 69 for t in range(self.time, 0, -1): 70 self.gradTable[t] = np.dot(self.deltaTable[t], self.stateTable[t-1].T) 71 self.grad = reduce(lambda a, b: a + b, self.gradTable, self.gradTable[0]) # 各个时刻梯度之和 gradTable[0] 初始为零阵 72 73 def reset_state(self): 74 self.time = 0 # 当前时刻初始化为t0 75 self.stateTable = [] # 状态表,每行表示依次迭代的所有神经元的状态值 76 self.stateTable.append(np.zeros((self.dCol, 1))) 77 78 def createTestData(): 79 sArrayTest = [np.array([[1], [2], [3]]), np.array([[2], [3], [4]])] 80 dArrayTest = np.array([[1], [2]]) 81 return sArrayTest, dArrayTest 82 83 def test(): 84 recurrentLayer = RecurrentLayer(3, 2, ReluActivator(), 1e-3) 85 sArrayTest, dArrayTest = createTestData() 86 recurrentLayer.forward(sArrayTest[0]) 87 recurrentLayer.forward(sArrayTest[1]) 88 recurrentLayer.backward(dArrayTest, ReluActivator()) 89 recurrentLayer.print() 90 91 def gradCheck(): # 梯度检查 92 rl = RecurrentLayer(3, 2, IdentityActivator(), 1e-3) 93 sArrayTest, dArrayTest = createTestData() 94 rl.forward(sArrayTest[0]) 95 rl.forward(sArrayTest[1]) 96 rl.backward(np.ones(rl.stateTable[-1].shape, dtype=np.float64), IdentityActivator()) # 以全1阵为初始 deltaNextLayer 作反向计算 97 for i in range(rl.W.shape[0]): 98 for j in range(rl.W.shape[1]): 99 rl.W[i,j] += global_epsilon 100 rl.reset_state() 101 rl.forward(sArrayTest[0]) 102 rl.forward(sArrayTest[1]) 103 err1 = np.sum(rl.stateTable[-1]) 104 rl.W[i,j] -= 2*global_epsilon 105 rl.reset_state() 106 rl.forward(sArrayTest[0]) 107 rl.forward(sArrayTest[1]) 108 err2 = np.sum(rl.stateTable[-1]) 109 expect_grad = (err1 - err2) / (2 * global_epsilon) 110 rl.W[i,j] += global_epsilon 111 print('w(%d,%d): expected %f - actural %f' % (i, j, expect_grad, rl.grad[i,j])) 112 113 if __name__ == "__main__": 114 test() 115 gradCheck()
● 输出结果
inputSize = 3, outputSize = 2, ita = 0.001000 stateTable = [array([[0.], [0.]]), array([[8.47915158e-05], [0.00000000e+00]]), array([[1.], [0.]])] U = [[ 8.40480937e-05 -6.87332018e-05 4.60699419e-05] [ 6.21850646e-06 -8.85631147e-05 -5.18937760e-05]] W = [[ 5.08985344e-06 -2.33817326e-05] [ 8.48161836e-06 4.77010103e-05]] w(0,0): expected -0.000272 - actural -0.000272 w(0,1): expected -0.000151 - actural -0.000151 w(1,0): expected -0.000272 - actural -0.000272 w(1,1): expected -0.000151 - actural -0.000151