• RNN例子解释前向传导和后向反向传播


      1 import numpy as np
      2 from datetime import datetime
      3 import sys
      4 
      5 from numpy.core.fromnumeric import shape
      6 
      7 class RNN:
      8     def __init__(self, word_dim, hidden_dim=100, bptt_truncate=4):
      9         self.word_dim = word_dim
     10         self.hidden_dim = hidden_dim
     11         self.bptt_truncate = bptt_truncate
     12         # Randomly initialize the network parameters, np.random.uniform(low,high,size=(m,n)) -> matrix: m * n
     13         self.U = np.random.uniform(-np.sqrt(1./word_dim), np.sqrt(1./word_dim), (hidden_dim, word_dim))
     14         self.V = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (word_dim, hidden_dim))
     15         self.W = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (hidden_dim, hidden_dim))
     16 
     17     def softmax(self,x):
     18         exp_x = np.exp(x)
     19         softmax_x = exp_x / np.sum(exp_x)
     20         return softmax_x
     21 
     22     def forward_propagation(self, x):
     23         # hidden states is h, prediction is y_hat
     24         T = len(x)
     25         h = np.zeros((T + 1, self.hidden_dim))
     26         h[-1] = np.zeros(self.hidden_dim)
     27         y_hat = np.zeros((T, self.word_dim))
     28         # For each time step...
     29         for t in np.arange(T):
     30             x_t = np.array(x[t]).reshape(-1,1)
     31             h[t] = (self.U.dot(x_t) + self.W.dot(h[t-1].reshape(-1,1))).reshape(-1)
     32             o_t = self.V.dot(h[t])
     33             y_hat[t] = self.softmax(o_t)
     34         return y_hat, h
     35   
     36     def predict(self, x):
     37         # Perform forward propagation and return index of the highest score
     38         y, h = self.forward_propagation(x)
     39         return np.argmax(y, axis=1)
     40 
     41     def calculate_total_loss(self, x, labels):
     42         total_L = 0
     43         # For each sentence...
     44         for i in np.arange(len(labels)):
     45             y_hat, h = self.forward_propagation(x[i])
     46             total_L += -1 * sum([np.log(y_pred.T.dot(y_true)) for y_pred,y_true in zip(y_hat,np.array(labels[i]))])
     47         return total_L
     48     
     49     def calculate_loss(self, x, labels):
     50         # Divide the total loss by the number of training examples 
     51         N = np.sum([len(label_i) for label_i in labels])
     52         return self.calculate_total_loss(x,labels)/N
     53 
     54     def bptt(self, x, label):
     55         T = len(label)
     56         # Perform forward propagation
     57         y_hat, h = self.forward_propagation(x)
     58         # We accumulate the gradients in these variables
     59         dLdU = np.zeros(self.U.shape)
     60         dLdV = np.zeros(self.V.shape)
     61         dLdW = np.zeros(self.W.shape)
     62         # delta_y -> dLdy: y_hat_t - y_t
     63         delta_y = np.zeros(y_hat.shape)
     64         # For each output backwards...
     65         for t in np.arange(T - 1,-1,-1):
     66             delta_y[t] = y_hat[t] - np.array(label[t])
     67             dLdV += delta_y[t].reshape(-1,1) @ h[t].T.reshape(1,-1)
     68             # Initial delta_t calculation when t is T
     69             if t == T - 1:
     70                 delta_t = np.diag(1 - np.power(h[t],2)) @ self.V.T @ delta_y[t].reshape(-1,1)
     71             else:
     72                 delta_t = np.diag(1 - np.power(h[t],2)) @ (self.V.T @ delta_y[t].reshape(-1,1) + self.W.T @ delta_t.reshape(-1,1))
     73             dLdW += delta_t @ h[t - 1].reshape(1,-1)
     74             dLdU += delta_t @ np.array(x[t]).reshape(1,-1)
     75         return dLdU, dLdV, dLdW
     76 
     77     # Performs one step of SGD.
     78     def numpy_sdg_step(self, x, label, learning_rate):
     79         # Calculate the gradients
     80         dLdU, dLdV, dLdW = self.bptt(x, label)
     81         # Change parameters according to gradients and learning rate
     82         self.U -= learning_rate * dLdU
     83         self.V -= learning_rate * dLdV
     84         self.W -= learning_rate * dLdW
     85         
     86 # - model: The RNN model instance
     87 # - X_train: The training data set
     88 # - y_train: The training data labels
     89 # - learning_rate: Initial learning rate for SGD
     90 # - nepoch: Number of times to iterate through the complete dataset
     91 # - evaluate_loss_after: Evaluate the loss after this many epochs
     92 def train_with_sgd(model, X_train, y_train, learning_rate=0.005, nepoch=100, evaluate_loss_after=5):
     93     # We keep track of the losses so we can plot them later
     94     losses = []
     95     num_examples_seen = 0
     96     for epoch in range(nepoch):
     97         # Optionally evaluate the loss
     98         if (epoch % evaluate_loss_after == 0):
     99             loss = model.calculate_loss(X_train, y_train)
    100             losses.append((num_examples_seen, loss))
    101             time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    102             print(f'{time} Loss after num_examples_seen {num_examples_seen} epoch {epoch}, current loss is {loss}')
    103             # Adjust the learning rate if loss increases
    104             if(len(losses)>1 and losses[-1][1]>losses[-2][1]):
    105                 learning_rate = learning_rate * 0.5  
    106                 print("Setting learning rate to %f" % learning_rate)
    107                 
    108         # For each training example...
    109         for i in range(len(y_train)):
    110             # One SGD step
    111             model.numpy_sdg_step(X_train[i], y_train[i], learning_rate)
    112             num_examples_seen += 1
    113             
    114 if __name__=='__main__':
    115     s1 = '你 好 李 焕 英'
    116     s2 = '夏 洛 特 烦 恼'
    117     vocab_size= len(s1.split(' ')) + len(s2.split(' '))
    118     vocab = [[0] * vocab_size for _ in range(vocab_size)]
    119     for i in range(vocab_size): vocab[i][i] = 1
    120     x_sample = [vocab[:5]] + [vocab[5:]]
    121     labels = [vocab[1:6]] + [vocab[6:]+[vocab[0]]]
    122 
    123     rnn = RNN(10)
    124     train_with_sgd(rnn,x_sample,labels)

    参考博客:https://zhuanlan.zhihu.com/p/371849556

    每一个不曾起舞的日子,都是对生命的辜负。
  • 相关阅读:
    前端笔记7
    前端笔记6
    列表、行块元素
    我的学习方法(5)
    (读后摘抄)《计算机程序设计语言的发展》_王汝传
    电影《比利·林恩的中场战事》
    关于优化(1)
    Debug技巧(1)
    奇奇怪怪的东西(1)
    我的学习方法(4)
  • 原文地址:https://www.cnblogs.com/randy-lo/p/15268683.html
Copyright © 2020-2023  润新知