• cs224d 作业 problem set3 (一) 实现Recursive Nerual Net Work 递归神经网络


    1、Recursive Nerual Networks能够更好地体现每个词与词之间语法上的联系
    这里我们选取的损失函数仍然是交叉熵函数

    2、整个网络的结构如下图所示:

    每个参数的更新时的梯队值如何计算,稍后再给大家计算相应的数学公式

    这里先列出节点的合并规则

     

    1、即假设将一句话中的词先两个合并,并通过神经网络计算出合并后的得分情况

    2、然后找出合并后得分最高的两个词进行真正的合并,得到新的节点,其余节点不合并

    3、将得到的新节点加入到下一轮两两合并的计算过程中,直至得到最终节点

    下面是计算的代码:

    '''
    Created on 2017年10月5日
    
    @author: weizhen
    '''
    # 一个简单的递归神经网络的实现,有着一个ReLU层和一个softmax层
    # TODO : 必须要更新前向和后向传递函数
    # 你可以通过执行 python rnn.py 方法来执行一个梯度检验
    # 插入pdb.set_trace()  在你不确定将会发生什么的地方
    
    import numpy as np
    import collections
    import pdb
    import tree as treeM
    import pickle
    
    class RNN:
        
        def __init__(self, wvecDim, outputDim, numWords, mbSize=30, rho=1e-4):
            self.wvecDim = wvecDim
            self.outputDim = outputDim
            self.numWords = numWords
            self.mbSize = mbSize
            self.defaultVec = lambda : np.zeros((wvecDim,))
            self.rho = rho
        
        def initParams(self):
            np.random.seed(12341)
            
            # Word vectors
            self.L = 0.01 * np.random.randn(self.wvecDim, self.numWords)
            
            # Hidden layer parameters
            self.W = 0.01 * np.random.randn(self.wvecDim, 2 * self.wvecDim)
            self.b = np.zeros((self.wvecDim))
            
            # Softmax weights
            # note this is " U "in the notes and the handout...
            # there is a reason for the change in notation
            self.Ws = 0.01 * np.random.randn(self.outputDim, self.wvecDim)
            self.bs = np.zeros((self.outputDim))
            
            self.stack = [self.L, self.W, self.b, self.Ws, self.bs]
            
            # Gradients
            self.dW = np.empty(self.W.shape)
            self.db = np.empty((self.wvecDim))
            self.dWs = np.empty(self.Ws.shape)
            self.dbs = np.empty((self.outputDim))
            
        def costAndGrad(self, mbdata, test=False):
            """
                        每一个datum在minibatch里边都是一个树
                        前向计算每一个树,反向传播到每一个树
                        返回值:
                cost:
                                        梯度:w.r.t W,Ws,b,bs
                                        以上变量的梯度都是在稀疏形式存储的
                                        或者是以测试状态下的
                Returns:
                    cost,correctArray,guessArray,total
            """
            cost = 0.0
            correct = []
            guess = []
            total = 0.0
            
            self.L, self.W, self.b, self.Ws, self.bs = self.stack
            # 初始化所有梯度都是0
            self.dW[:] = 0
            self.db[:] = 0
            self.dWs[:] = 0
            self.dbs[:] = 0
            self.dL = collections.defaultdict(self.defaultVec)
            
            # 在每一个batch中前向计算每一个tree
            for tree in mbdata:
                c, tot = self.forwardProp(tree.root, correct, guess)
                cost += c
                total += tot
            if test:
                return (1. / len(mbdata)) * cost, correct, guess, total
            
            # 在每一个batch上进行反向传播
            for tree in mbdata:
                self.backProp(tree.root)
            
            # 通过mb的大小来计算损失和梯度
            scale = (1. / self.mbSize)
            for v in self.dL.values():
                v *= scale
            
            # 添加L2正则化项
            cost += (self.rho / 2) * np.sum(self.W ** 2)
            cost += (self.rho / 2) * np.sum(self.Ws ** 2)
            
            return scale * cost, [self.dL, scale * (self.dW + self.rho * self.W), scale * self.db, scale * (self.dWs + self.rho * self.Ws), scale * self.dbs]
        
        def forwardProp(self, node, correct=[], guess=[]):
            """损失应该是一个不断更新的变量,总损失是我们需要用在准确率报告里边的数据"""
            cost = total = 0.0
            # 下面实现递归神经网络前向传播的函数
            # 你应该更新 node.probs, node.hActsl,node.fprop,and cost
            # node :你当前节点是在语法树上的
            # correct : 这是一个不断更新的标记真值的列表
            # guess: 这是一个不断更新的猜测我们的模型会预测为哪一个结果的列表
            #       (我们会同时使用正确的和猜测的值来构造我们的混淆矩阵)
            L = self.L
            # 隐藏层的参数
            W = self.W
            b = self.b
            
            # Softmax 权重
            Ws = self.Ws
            bs = self.bs
            
            if node.isLeaf:
                node.hActsl = L[:, node.word]
            else:
                if not node.left.fprop:
                    cost_left, total_left = self.forwardProp(node.left, correct, guess)
                    cost += cost_left
                    total += total_left
                if not node.right.fprop:
                    cost_right, total_right = self.forwardProp(node.right, correct, guess)
                    cost += cost_right
                    total += total_right
                
                node.hActsl = W.dot(np.hstack((node.left.hActsl, node.right.hActsl))) + b
                node.hActsl[node.hActsl < 0] = 0
            
            x = Ws.dot(node.hActsl) + bs
            x -= np.max(x)
            node.probs = np.exp(x) / np.sum(np.exp(x))
            
            correct += [node.label]
            guess += [np.argmax(node.probs)]
            
            cost -= np.log(node.probs[node.label])
            
            node.fprop = True
            
            return cost, total + 1
        
        def backProp(self, node, error=None):
            """
                        实现递归神经网络的反向传播函数
                        应该更新 self.dWs, self.dbs, self.dW, self.db, and self.dL[node.word] 相关地
            node:你在语法树种的当前节点
            error:误差从之前一个迭代过程中传递进来的
            """
            # 清空节点
            node.fprop = False
            
            L = self.L
            # 隐藏节点的参数
            W = self.W
            b = self.b
            
            # Softmax层的权重
            Ws = self.Ws
            bs = self.bs
            
            error_this = node.probs
            error_this[node.label] -= 1.0
            delta = Ws.T.dot(error_this)
            
            self.dWs += np.outer(error_this, node.hActsl)
            self.dbs += error_this
            
            if error is not None:
                delta += error
            
            delta[node.hActsl == 0] = 0
            
            if node.isLeaf:
                self.dL[node.word] += delta
            else:
                self.dW += np.outer(delta, np.hstack([node.left.hActsl, node.right.hActsl]))
                self.db += delta
                
                delta = np.dot(self.W.T, delta)
                self.backProp(node.left, delta[:self.wvecDim])
                self.backProp(node.right, delta[self.wvecDim:])
        
        def updateParams(self, scale, update, log=False):
            """
                        如下这样更新参数
                        p:=p-scale*update
                        如果log是真的,输出根节点的均方误差,并且更新根节点的值
            """
            if log:
                for P, dP in zip(self.stack[1:], update[1:]):
                    pRMS = np.sqrt(np.mean(P ** 2))
                    dpRMS = np.sqrt(np.mean((scale * dP) ** 2))
                    print("weight rms=%f -- update rms=%f" % (pRMS, dpRMS))
            self.stack[1:] = [P + scale * dP for P, dP in zip(self.stack[1:], update[1:])]
            
            # 解决词典并且进行稀疏的更新
            dL = update[0]
            for j in dL.iterkeys():
                self.L[:, j] += scale.dL[j]
        
        def toFile(self, fid):
            pickle.dump(self.stack, fid)
        
        def fromFile(self, fid):
            self.stack = pickle.load(fid)
        
        def check_grad(self, data, epsilon=1e-6):
            cost, grad = self.costAndGrad(data)
            
            err1 = 0.0
            count = 0.0
            print("Checking dW...")
            for W, dW in zip(self.stack[1:], grad[1:]):
                W = W[..., None]
                dW = dW[..., None]
                for i in range(W.shape[0]):
                    for j in range(W.shape[1]):
                        W[i, j] += epsilon
                        costP, _ = self.costAndGrad(data)
                        W[i, j] -= epsilon
                        numGrad = (costP - cost) / epsilon
                        err = np.abs(dW[i, j] - numGrad)
                        err1 += err
                        count += 1
            if 0.001 > err1 / count:
                print("Grad Check Passed for dW")
            else:
                print("Grad Check Failed for dW:Sum of Error=%.9f" % (err1 / count))
            
            
            # check dL separately since dict
            dL = grad[0]
            L = self.stack[0]
            err2 = 0.0
            count = 0.0
            print("Checking dL...")
            for j in dL.keys():
                for i in range(L.shape[0]):
                    L[i, j] += epsilon
                    costP, _ = self.costAndGrad(data)
                    L[i, j] -= epsilon
                    numGrad = (costP - cost) / epsilon
                    err = np.abs(dL[j][i] - numGrad)
                    err2 += err
                    count += 1
            if 0.001 > err2 / count:
                print("Grad Check Passed for dL")
            else:
                print("Grad Check Failed for dL: Sum of Error = %.9f" % (err2 / count))
    
    if __name__ == '__main__':
    
        train = treeM.loadTrees()
        numW = len(treeM.loadWordMap())
        
        wvecDim = 10
        outputDim = 5
        
        rnn = RNN(wvecDim, outputDim, numW, mbSize=4)
        rnn.initParams()
        
        mbData = train[:4]
        print("Numerical gradient check...")
        rnn.check_grad(mbData)
            

     下面部分是构造节点的python文件tree.py

    在进行计算时需要先运行tree.py文件进行tree结构的生成,然后进行合并计算

    import collections
    import pickle
    UNK = 'UNK'
    # This file contains the dataset in a useful way. We populate a list of Trees to train/test our Neural Nets such that each Tree contains any number of Node objects.
    
    # The best way to get a feel for how these objects are used in the program is to drop pdb.set_trace() in a few places throughout the codebase
    # to see how the trees are used.. look where loadtrees() is called etc..
    
    
    class Node: # a node in the tree
        def __init__(self,label,word=None):
            self.label = label 
            self.word = word # NOT a word vector, but index into L.. i.e. wvec = L[:,node.word]
            self.parent = None # reference to parent
            self.left = None # reference to left child
            self.right = None # reference to right child
            self.isLeaf = False # true if I am a leaf (could have probably derived this from if I have a word)
            self.fprop = False # true if we have finished performing fowardprop on this node (note, there are many ways to implement the recursion.. some might not require this flag)
            self.hActs1 = None # h1 from the handout
            self.hActs2 = None # h2 from the handout (only used for RNN2)
            self.probs = None # yhat
    
    class Tree:
    
        def __init__(self,treeString,openChar='(',closeChar=')'):
            tokens = []
            self.open = '('
            self.close = ')'
            for toks in treeString.strip().split():
                tokens += list(toks)
            self.root = self.parse(tokens)
    
        def parse(self, tokens, parent=None):
            assert tokens[0] == self.open, "Malformed tree"
            assert tokens[-1] == self.close, "Malformed tree"
    
            split = 2 # position after open and label
            countOpen = countClose = 0
    
            if tokens[split] == self.open: 
                countOpen += 1
                split += 1
            # Find where left child and right child split
            while countOpen != countClose:
                if tokens[split] == self.open:
                    countOpen += 1
                if tokens[split] == self.close:
                    countClose += 1
                split += 1
    
            # New node
            node = Node(int(tokens[1])) # zero index labels
    
            node.parent = parent 
    
            # leaf Node
            if countOpen == 0:
                node.word = ''.join(tokens[2:-1]).lower() # lower case?
                node.isLeaf = True
                return node
    
            node.left = self.parse(tokens[2:split],parent=node)
            node.right = self.parse(tokens[split:-1],parent=node)
    
            return node
    
            
    
    def leftTraverse(root,nodeFn=None,args=None):
        """
        Recursive function traverses tree
        from left to right. 
        Calls nodeFn at each node
        """
        nodeFn(root,args)
        if root.left is not None:
            leftTraverse(root.left,nodeFn,args)
        if root.right is not None:
            leftTraverse(root.right,nodeFn,args)
    
    
    def countWords(node,words):
        if node.isLeaf:
            words[node.word] += 1
    
    def clearFprop(node,words):
        node.fprop = False
    
    def mapWords(node,wordMap):
        if node.isLeaf:
            if node.word not in wordMap:
                node.word = wordMap[UNK]
            else:
                node.word = wordMap[node.word]
        
    
    def loadWordMap():
        with open('wordMap.bin','rb') as fid:
            return pickle.load(fid)
    
    def buildWordMap():
        """
        Builds map of all words in training set
        to integer values.
        """
    
    
        file = 'trees/train.txt'
        print("Reading trees to build word map..")
        with open(file,'r') as fid:
            trees = [Tree(l) for l in fid.readlines()]
    
        print("Counting words to give each word an index..")
        
        words = collections.defaultdict(int)
        for tree in trees:
            leftTraverse(tree.root,nodeFn=countWords,args=words)
        
        wordMap = dict(zip(words.keys(),range(len(words))))
        wordMap[UNK] = len(words) # Add unknown as word
        
        print("Saving wordMap to wordMap.bin")
        with open('wordMap.bin','wb') as fid:
            pickle.dump(wordMap,fid)
    
    def loadTrees(dataSet='train'):
        """
        Loads training trees. Maps leaf node words to word ids.
        """
        wordMap = loadWordMap()
        file = 'trees/%s.txt'%dataSet
        print("Loading %sing trees.."%dataSet)
        with open(file,'r') as fid:
            trees = [Tree(l) for l in fid.readlines()]
        for tree in trees:
            leftTraverse(tree.root,nodeFn=mapWords,args=wordMap)
        return trees
          
    if __name__=='__main__':
        buildWordMap()
        
        train = loadTrees()
    
        print("Now you can do something with this list of trees!")

    更详细的代码请参考github:

    https://github.com/weizhenzhao/cs224d_problem_set3

  • 相关阅读:
    老鼠走迷宫
    堆栈的应用 问题还未解决
    计数排序与基数排序
    C++类的描述
    选择排序 冒泡排序 插入排序
    计算名次
    多项式求值 n维多项式 Horner解法
    003_sublime如何实时预览markdown
    002_git下载的源码怎么打成jar包呢?
    001_idea常用插件
  • 原文地址:https://www.cnblogs.com/weizhen/p/7633167.html
Copyright © 2020-2023  润新知