• 神经网络5:循环神经网络


    ▶ 循环神经网络

    ● 代码,参考【https://zybuluo.com/hanbingtao/note/541458】。主要实现了一个单层循环神经网络类 RecurrentLayer

      1 from functools import reduce
      2 import numpy as np
      3 
      4 global_epsilon = 10e-4
      5 
      6 class ReluActivator(object):                                    # ReLU 激活函数
      7     def forward(self, x):        
      8         return max(0, x)
      9 
     10     def backward(self, x):
     11         return int(x > 0)
     12 
     13 class IdentityActivator(object):                                # 激活函数
     14     def forward(self, x):
     15         return x         
     16 
     17     def backward(self, x):
     18         return 1
     19 
     20 def myMap(array, op):                                           # 对numpy数组进行element wise操作
     21     for i in np.nditer(array, op_flags=['readwrite']):
     22         i[...] = op(i)
     23 
     24 class RecurrentLayer(object):
     25     def __init__(self, sCol, dCol, activator, ita):
     26         self.sCol = sCol
     27         self.dCol = dCol
     28         self.activator = activator
     29         self.ita = ita
     30         self.time = 0       
     31         self.stateTable = []                                    # 状态表,每行表示依次迭代的所有神经元的状态值
     32         self.stateTable.append(np.zeros((dCol, 1)))           
     33         self.U = np.random.uniform(-1e-4, 1e-4,(dCol, sCol))    # 初始化 U
     34         self.W = np.random.uniform(-1e-4, 1e-4,(dCol, dCol))    # 初始化 W
     35 
     36     def print(self):
     37         print("inputSize = %d, outputSize = %d, ita = %f"%(self.sCol,self.dCol,self.ita))
     38         print("stateTable = 
    ", self.stateTable)
     39         print("U = 
    ", self.U)
     40         print("W = 
    ", self.W)
     41 
     42     def forward(self, sArray):                                  # 前向计算        
     43         self.time += 1
     44         state = (np.dot(self.U, sArray) + np.dot(self.W, self.stateTable[-1]))
     45         myMap(state, self.activator.forward)
     46         self.stateTable.append(state)
     47 
     48     def backward(self, deltaArrayNextLayer, activator):    
     49         self.bpDelta(deltaArrayNextLayer, activator)
     50         self.bpGrad()
     51 
     52     def update(self):
     53         self.W -= self.ita * self.grad
     54 
     55     def bpDelta(self, deltaArrayNextLayer, activator):
     56         self.deltaTable = []                                    # 用来保存各个时刻的误差项
     57         for i in range(self.time):
     58             self.deltaTable.append(np.zeros((self.dCol, 1)))
     59         self.deltaTable.append(deltaArrayNextLayer)        
     60         for k in range(self.time - 1, 0, -1):                   # 迭代计算每个时刻的误差项
     61             state = self.stateTable[k+1].copy()                 # 根据k+1时刻的delta计算k时刻的delta
     62             myMap(self.stateTable[k+1], activator.backward)
     63             self.deltaTable[k] = np.dot(np.dot(self.deltaTable[k+1].T, self.W), np.diag(state[:,0])).T
     64 
     65     def bpGrad(self):
     66         self.gradTable = []                                     # 保存各个时刻的权重梯度
     67         for t in range(self.time + 1):
     68             self.gradTable.append(np.zeros((self.dCol, self.dCol)))
     69         for t in range(self.time, 0, -1):            
     70             self.gradTable[t] = np.dot(self.deltaTable[t], self.stateTable[t-1].T)
     71         self.grad = reduce(lambda a, b: a + b, self.gradTable, self.gradTable[0]) # 各个时刻梯度之和 gradTable[0] 初始为零阵
     72             
     73     def reset_state(self):
     74         self.time = 0                                           # 当前时刻初始化为t0
     75         self.stateTable = []                                    # 状态表,每行表示依次迭代的所有神经元的状态值
     76         self.stateTable.append(np.zeros((self.dCol, 1)))      
     77 
     78 def createTestData():
     79     sArrayTest = [np.array([[1], [2], [3]]), np.array([[2], [3], [4]])]
     80     dArrayTest = np.array([[1], [2]])
     81     return sArrayTest, dArrayTest
     82 
     83 def test():
     84     recurrentLayer = RecurrentLayer(3, 2, ReluActivator(), 1e-3)
     85     sArrayTest, dArrayTest = createTestData()
     86     recurrentLayer.forward(sArrayTest[0])
     87     recurrentLayer.forward(sArrayTest[1])
     88     recurrentLayer.backward(dArrayTest, ReluActivator())
     89     recurrentLayer.print()
     90 
     91 def gradCheck():                                                # 梯度检查
     92     rl = RecurrentLayer(3, 2, IdentityActivator(), 1e-3)
     93     sArrayTest, dArrayTest = createTestData()
     94     rl.forward(sArrayTest[0])
     95     rl.forward(sArrayTest[1])            
     96     rl.backward(np.ones(rl.stateTable[-1].shape, dtype=np.float64), IdentityActivator())    # 以全1阵为初始 deltaNextLayer 作反向计算           
     97     for i in range(rl.W.shape[0]):
     98         for j in range(rl.W.shape[1]):
     99             rl.W[i,j] += global_epsilon
    100             rl.reset_state()
    101             rl.forward(sArrayTest[0])
    102             rl.forward(sArrayTest[1])
    103             err1 = np.sum(rl.stateTable[-1])
    104             rl.W[i,j] -= 2*global_epsilon
    105             rl.reset_state()
    106             rl.forward(sArrayTest[0])
    107             rl.forward(sArrayTest[1])
    108             err2 = np.sum(rl.stateTable[-1])
    109             expect_grad = (err1 - err2) / (2 * global_epsilon)
    110             rl.W[i,j] += global_epsilon
    111             print('w(%d,%d): expected %f - actural %f' % (i, j, expect_grad, rl.grad[i,j]))
    112 
    113 if __name__ == "__main__":
    114     test()
    115     gradCheck()

    ● 输出结果

    inputSize = 3, outputSize = 2, ita = 0.001000
    stateTable =
     [array([[0.],
           [0.]]), array([[8.47915158e-05],
           [0.00000000e+00]]), array([[1.],
           [0.]])]
    U =
     [[ 8.40480937e-05 -6.87332018e-05  4.60699419e-05]
     [ 6.21850646e-06 -8.85631147e-05 -5.18937760e-05]]
    W =
     [[ 5.08985344e-06 -2.33817326e-05]
     [ 8.48161836e-06  4.77010103e-05]]
    w(0,0): expected -0.000272 - actural -0.000272
    w(0,1): expected -0.000151 - actural -0.000151
    w(1,0): expected -0.000272 - actural -0.000272
    w(1,1): expected -0.000151 - actural -0.000151
  • 相关阅读:
    (转)python字符串函数
    分享二:架构设计分享一:关于API分布式服务提供方式
    架构设计分享一:关于分布式系统的数据一致性问题(一)
    分享四:分布式事务设计-两段式提交
    分享三:mysql跨库查询
    PHP笔试题
    我的mysql学习心得
    linux分享一:进程全攻略--守护进程(服务)
    分享二:签名原理与算法
    linux命令详解:pgrep命令
  • 原文地址:https://www.cnblogs.com/cuancuancuanhao/p/11581165.html
Copyright © 2020-2023  润新知