• 神经网络与深度学习(邱锡鹏)编程练习6 RNN 加法进位实验 Jupyter导出版


    加法进位实验

    本题为填空题,填入内容:
    def call(self, num1, num2):
    num1_emb = self.embed_layer(num1) # shape(b_sz, len, emb_sz)
    num2_emb = self.embed_layer(num2) # shape(b_sz, len, emb_sz)
    inp_emb = tf.concat([num1_emb, num2_emb], axis=-1)
    rnn_out = self.rnn_layer(inp_emb)
    logits = self.dense(rnn_out)
    return logits

    import numpy as np
    import tensorflow as tf
    import collections
    from tensorflow import keras
    from tensorflow.keras import layers
    from tensorflow.keras import layers, optimizers, datasets
    import os,sys,tqdm
    
    

    数据生成

    我们随机在 start->end之间采样除整数对(num1, num2),计算结果num1+num2作为监督信号。

    • 首先将数字转换成数字位列表 convertNum2Digits
    • 将数字位列表反向
    • 将数字位列表填充到同样的长度 pad2len
    def gen_data_batch(batch_size, start, end):
        '''在(start, end)区间采样生成一个batch的整型的数据
        Args :
            batch_size: batch_size
            start: 开始数值
            end: 结束数值
        '''
        numbers_1 = np.random.randint(start, end, batch_size)
        numbers_2 = np.random.randint(start, end, batch_size)
        results = numbers_1 + numbers_2
        return numbers_1, numbers_2, results
    
    def convertNum2Digits(Num):
        '''将一个整数转换成一个数字位的列表,例如 133412 ==> [1, 3, 3, 4, 1, 2]
        '''
        strNum = str(Num)
        chNums = list(strNum)
        digitNums = [int(o) for o in strNum]
        return digitNums
    
    def convertDigits2Num(Digits):
        '''将数字位列表反向, 例如 [1, 3, 3, 4, 1, 2] ==> [2, 1, 4, 3, 3, 1]
        '''
        digitStrs = [str(o) for o in Digits]
        numStr = ''.join(digitStrs)
        Num = int(numStr)
        return Num
    
    def pad2len(lst, length, pad=0):
        '''将一个列表用`pad`填充到`length`的长度 例如 pad2len([1, 3, 2, 3], 6, pad=0) ==> [1, 3, 2, 3, 0, 0]
        '''
        lst+=[pad]*(length - len(lst))
        return lst
    
    def results_converter(res_lst):
        '''将预测好的数字位列表批量转换成为原始整数
        Args:
            res_lst: shape(b_sz, len(digits))
        '''
        res = [reversed(digits) for digits in res_lst]
        return [convertDigits2Num(digits) for digits in res]
    
    def prepare_batch(Nums1, Nums2, results, maxlen):
        '''准备一个batch的数据,将数值转换成反转的数位列表并且填充到固定长度
        Args:
            Nums1: shape(batch_size,)
            Nums2: shape(batch_size,)
            results: shape(batch_size,)
            maxlen:  type(int)
        Returns:
            Nums1: shape(batch_size, maxlen)
            Nums2: shape(batch_size, maxlen)
            results: shape(batch_size, maxlen)
        '''
        Nums1 = [convertNum2Digits(o) for o in Nums1]
        Nums2 = [convertNum2Digits(o) for o in Nums2]
        results = [convertNum2Digits(o) for o in results]
        
        Nums1 = [list(reversed(o)) for o in Nums1]
        Nums2 = [list(reversed(o)) for o in Nums2]
        results = [list(reversed(o)) for o in results]
        
        Nums1 = [pad2len(o, maxlen) for o in Nums1]
        Nums2 = [pad2len(o, maxlen) for o in Nums2]
        results = [pad2len(o, maxlen) for o in results]
        
        return Nums1, Nums2, results
    

    建模过程, 按照图示完成建模

    class myRNNModel(keras.Model):
        def __init__(self):
            super(myRNNModel, self).__init__()
            self.embed_layer = tf.keras.layers.Embedding(10, 32,batch_input_shape=[None, None])        
            self.rnncell = tf.keras.layers.SimpleRNNCell(64)
            self.rnn_layer = tf.keras.layers.RNN(self.rnncell, return_sequences=True)
            self.dense = tf.keras.layers.Dense(10)
            
        @tf.function
        def call(self, num1, num2):
            '''
            此处完成上述图中模型
            '''
            num1_emb = self.embed_layer(num1) # shape(b_sz, len, emb_sz)
            num2_emb = self.embed_layer(num2) # shape(b_sz, len, emb_sz)
            inp_emb = tf.concat([num1_emb, num2_emb], axis=-1)
            rnn_out = self.rnn_layer(inp_emb)
            logits = self.dense(rnn_out)
            
            return logits
    
    @tf.function
    def compute_loss(logits, labels):
        losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=labels)
        return tf.reduce_mean(losses)
    
    @tf.function
    def train_one_step(model, optimizer, x, y, label):
        with tf.GradientTape() as tape:
            logits = model(x, y)
            loss = compute_loss(logits, label)
    
        # compute gradient
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        return loss
    
    def train(steps, model, optimizer):
        loss = 0.0
        accuracy = 0.0
        for step in range(steps):
            datas = gen_data_batch(batch_size=200, start=0, end=555555555)
            Nums1, Nums2, results = prepare_batch(*datas, maxlen=11)
            loss = train_one_step(model, optimizer, tf.constant(Nums1, dtype=tf.int32), 
                                  tf.constant(Nums2, dtype=tf.int32),
                                  tf.constant(results, dtype=tf.int32))
            if step%50 == 0:
                print('step', step, ': loss', loss.numpy())
    
        return loss
    
    def evaluate(model):
        datas = gen_data_batch(batch_size=2000, start=555555555, end=999999999)
        Nums1, Nums2, results = prepare_batch(*datas, maxlen=11)
        logits = model(tf.constant(Nums1, dtype=tf.int32), tf.constant(Nums2, dtype=tf.int32))
        logits = logits.numpy()
        pred = np.argmax(logits, axis=-1)
        res = results_converter(pred)
        for o in list(zip(datas[2], res))[:20]:
            print(o[0], o[1], o[0]==o[1])
    
        print('accuracy is: %g' % np.mean([o[0]==o[1] for o in zip(datas[2], res)]))
    
    
    optimizer = optimizers.Adam(0.001)
    model = myRNNModel()
    
    train(3000, model, optimizer)
    evaluate(model)
    
    step 0 : loss 2.3128169
    step 50 : loss 1.9332728
    step 100 : loss 1.901959
    step 150 : loss 1.8834128
    step 200 : loss 1.8941866
    step 250 : loss 1.883983
    step 300 : loss 1.8795818
    step 350 : loss 1.8715074
    step 400 : loss 1.8778303
    step 450 : loss 1.8824102
    step 500 : loss 1.8784071
    step 550 : loss 1.8804839
    step 600 : loss 1.8770535
    step 650 : loss 1.8731002
    step 700 : loss 1.883957
    step 750 : loss 1.8866007
    step 800 : loss 1.8686253
    step 850 : loss 1.8691077
    step 900 : loss 1.8789036
    step 950 : loss 1.8719782
    step 1000 : loss 1.8767223
    step 1050 : loss 1.8807548
    step 1100 : loss 1.8698553
    step 1150 : loss 1.863222
    step 1200 : loss 1.8729354
    step 1250 : loss 1.8697383
    step 1300 : loss 1.863727
    step 1350 : loss 1.8565942
    step 1400 : loss 1.823668
    step 1450 : loss 1.7782799
    step 1500 : loss 1.6455835
    step 1550 : loss 1.4902543
    step 1600 : loss 1.3107812
    step 1650 : loss 1.1358132
    step 1700 : loss 0.971002
    step 1750 : loss 0.8325506
    step 1800 : loss 0.7205786
    step 1850 : loss 0.6340592
    step 1900 : loss 0.55104315
    step 1950 : loss 0.49578613
    step 2000 : loss 0.43124878
    step 2050 : loss 0.37480894
    step 2100 : loss 0.32941413
    step 2150 : loss 0.2885746
    step 2200 : loss 0.24742316
    step 2250 : loss 0.21270446
    step 2300 : loss 0.18246596
    step 2350 : loss 0.15749024
    step 2400 : loss 0.1375851
    step 2450 : loss 0.120020166
    step 2500 : loss 0.105072536
    step 2550 : loss 0.092671186
    step 2600 : loss 0.081815556
    step 2650 : loss 0.071697846
    step 2700 : loss 0.06214186
    step 2750 : loss 0.055456445
    step 2800 : loss 0.050086357
    step 2850 : loss 0.04434098
    step 2900 : loss 0.040631484
    step 2950 : loss 0.037673675
    1667667009 1667667009 True
    1789950734 1789950734 True
    1710655485 1710655485 True
    1663521507 1663521507 True
    1896681877 1896681877 True
    1554165075 1554165075 True
    1578702243 1578702243 True
    1645886796 1645886796 True
    1267851483 1267851483 True
    1543259935 1543259935 True
    1621680881 1621680881 True
    1887850516 1887850516 True
    1416221863 1416221863 True
    1293380770 1293380770 True
    1421167341 1421167341 True
    1418403242 1418403242 True
    1327906642 1327906642 True
    1419538600 1419538600 True
    1532859597 1532859597 True
    1531071162 1531071162 True
    accuracy is: 1
  • 相关阅读:
    java 笔记(4) —— java I/O 流、字节流、字符流
    Java命令参数说明大全
    java 笔记(3) —— 动态代理,静态代理,cglib代理
    java 笔记(2) —— 内部类的作用
    java 笔记(1)-—— JVM基础,内存数据,内存释放,垃圾回收,即时编译技术JIT,高精度类型
    scala学习笔记(1)
    spring 小结
    收集一些java相关的文章
    Troubleshooting JDK
    WebService 的一些基本概念
  • 原文地址:https://www.cnblogs.com/hbuwyg/p/16354360.html
Copyright © 2020-2023  润新知