• keras中seq2seq实现


    • 这里只是简单的一个例子
    输入序列 目标序列
    [13, 28, 18, 7, 9, 5] [18, 28, 13]
    [29, 44, 38, 15, 26, 22] [38, 44, 29]
    [27, 40, 31, 29, 32, 1] [31, 40, 27]

    1.输入序列与目标序列向量化

    • 设置GPU进行计算
    import tensorflow as tf
    tf.debugging.set_log_device_placement(True)
    
    • 导入需要的函数和包
    from numpy import array
    from numpy import argmax
    from keras.utils import to_categorical
    import numpy as np
    
    • 定义数据生成函数
    # 随机产生在(1,n_features)区间的整数序列,序列长度为n_steps_in
    def generate_sequence(length, n_unique):
        return [np.random.randint(1, n_unique-1) for _ in range(length)]
    

    这个函数的目的是产生长度为length,数据范围为(1,n_unique)之间的整数。

    generate_sequence(10,100)
    

    产生10个在(1,100)之间的整数

    [9, 37, 38, 73, 4, 1, 42, 97, 48, 17]
    
    • 构造LSTM模型所需要的数据
    def get_dataset(n_in, n_out, cardinality, n_samples):
        X1, X2, y = list(), list(), list()
        for _ in range(n_samples):
            # 生成输入序列
            source = generate_sequence(n_in, cardinality) #n_in就是代表生成序列的个数,生成n个1到n_unique-1的数
            # 定义目标序列,这里就是输入序列的前n_out个数据
            target = source[:n_out]
            target.reverse() #对数据做一个逆序(前后调转)
            # 向前偏移一个时间步目标序列
            target_in = [0] + target[:-1] 
            # 直接使用to_categorical函数进行on_hot编码
            src_encoded = to_categorical(source, num_classes=cardinality) #这里就是将向量进行one_hot编码的
            tar_encoded = to_categorical(target, num_classes=cardinality)
            tar2_encoded = to_categorical(target_in, num_classes=cardinality)
    
            X1.append(src_encoded)
            X2.append(tar2_encoded)
            y.append(tar_encoded)
        return array(X1), array(X2), array(y)
    

    to_categorical(list,num)的作用是对一个数进行one_hot编码,其输入的参数有两个,list表示的是一个整数列表,例如[3,2,5,1,2],num就是one_hot编码时向量的长度,其有一个要求就是num>max(list)。to_categorical

    # one_hot解码,看一下那些位置不为0
    def one_hot_decode(encoded_seq):
        return [argmax(vector) for vector in encoded_seq]
    

    这里用这个one_hot解码的主要作用是来进行还原,看一下那些位置不为0。

    • 输入参数
    n_features = 50 + 1   #在输入时可以理解为,每个向量的长度
    n_steps_in = 6        #可以看作时间不
    n_steps_out = 3
    # 生成处理后的输入序列与目标序列,这里测试产生了一个序列样本
    X1, X2, y = get_dataset(n_steps_in, n_steps_out, n_features, 100)
    

    X1作为encoder的输入,X2作为decoder的输入,且encoder的输出同时也作为decoder的输入,y时最终的标签

    one_hot_decode(X1[1])
    one_hot_decode(X2[1])
    one_hot_decode(y[1])
    
    [21, 49, 21, 34, 48, 46]
    [0, 21, 49]
    [21, 49, 21]
    

    2.构造seq2seq模型

    • 导入所需的网络
    from numpy import array
    from numpy import argmax
    from numpy import array_equal
    from keras.utils import to_categorical
    from keras.models import Model
    from keras.layers import Input
    from keras.layers import LSTM
    from keras.layers import Dense
    
    • 构造网络
    # 构造Seq2Seq训练模型model, 以及进行新序列预测时需要的的Encoder模型:encoder_model 与Decoder模型:decoder_model
    def define_models(n_input, n_output, n_units):
        # 训练模型中的encoder
        encoder_inputs = Input(shape=(None, n_input))
        encoder = LSTM(n_units, return_state=True)
        
        #上述参数下,encoder_outputs和state_h都是最后一步的hidden_state
        # state_c 存放最后一个时间步的cell_state
        encoder_outputs, state_h, state_c = encoder(encoder_inputs) 
        
        
        encoder_states = [state_h, state_c]   #仅保留编码状态向量
        # 训练模型中的decoder
        decoder_inputs = Input(shape=(None, n_output))
        decoder_lstm = LSTM(n_units, return_sequences=True, return_state=True)
        
        #上述参数下,decoder_outputs是全部时间步的hidden_state
        #第一个_存放的是最后一个时间步的hidden_state
        #第二个_存放的是最后一个时间步的cell state
        
        decoder_outputs, _, _ = decoder_lstm(decoder_inputs,initial_state=encoder_states)
        decoder_dense = Dense(n_output, activation='softmax')
        decoder_outputs = decoder_dense(decoder_outputs)
        model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
        # 新序列预测时需要的encoder
        encoder_model = Model(encoder_inputs, encoder_states)
        # 新序列预测时需要的decoder
        decoder_state_input_h = Input(shape=(n_units,))
        decoder_state_input_c = Input(shape=(n_units,))
        decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
        decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
        decoder_states = [state_h, state_c]
        decoder_outputs = decoder_dense(decoder_outputs)
        decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
        # 返回需要的三个模型
        return model, encoder_model, decoder_model
    

    其中model是训练的网络,encoder_model是预测时的encoder模型,decoder_model是预测时的decoder模型。其中在LSTM模型中return_sequence和return_state的参数设置得到不同结果见另一个博客。https://www.jianshu.com/p/a74bb5a623dd

    model, encoder_model, decoder_model = define_models(10,5,15)
    model.summary()
    
    Model: "model_19"
    __________________________________________________________________________________________________
    Layer (type)                    Output Shape         Param #     Connected to                     
    ==================================================================================================
    input_25 (InputLayer)           (None, None, 10)     0                                            
    __________________________________________________________________________________________________
    input_26 (InputLayer)           (None, None, 5)      0                                            
    __________________________________________________________________________________________________
    lstm_13 (LSTM)                  [(None, 15), (None,  1560        input_25[0][0]                   
    __________________________________________________________________________________________________
    lstm_14 (LSTM)                  [(None, None, 15), ( 1260        input_26[0][0]                   
                                                                     lstm_13[0][1]                    
                                                                     lstm_13[0][2]                    
    __________________________________________________________________________________________________
    dense_7 (Dense)                 (None, None, 5)      80          lstm_14[0][0]                    
    ==================================================================================================
    Total params: 2,900
    Trainable params: 2,900
    Non-trainable params: 0
    __________________________________________________________________________________________________
    

    encoder包含了一个lstm结构,decoder包含一个lstm结构和一个全连接层,encoder的输出作为decoder的输入,在模型中可以看见,lstm_14中与之相关联的为input,lstm_13,其中lstm_13是encoder的输出。

    def predict_sequence(infenc, infdec, source, n_steps, cardinality): 
        """
        infenc:encoder_model
        infdec:decoder_model
        """
        # 输入序列编码得到编码状态向量
        state = infenc.predict(source)
        # 初始目标序列输入:通过开始字符计算目标序列第一个字符,这里是0
        target_seq = array([0.0 for _ in range(cardinality)]).reshape(1, 1, cardinality) 
        # 输出序列列表
        output = list()
        for t in range(n_steps):
            # predict next char
            yhat, h, c = infdec.predict([target_seq] + state)
            # 截取输出序列,取后三个
            output.append(yhat[0,0,:])
            # 更新状态
            state = [h, c]
            # 更新目标序列(用于下一个词预测的输入)
            target_seq = yhat
        return array(output)
    

    ``

    3 评估模型效果

    total, correct = 100, 0
    for _ in range(total):
        X1, X2, y = get_dataset(n_steps_in, n_steps_out, n_features, 1)
        target = predict_sequence(infenc, infdec, X1, n_steps_out, n_features)
        if array_equal(one_hot_decode(y[0]), one_hot_decode(target)):
            correct += 1
    print('Accuracy: %.2f%%' % (float(correct)/float(total)*100.0))
    

    4 完整代码

    from numpy import array
    from numpy import argmax
    from numpy import array_equal
    from keras.utils import to_categorical
    from keras.models import Model
    from keras.layers import Input
    from keras.layers import LSTM
    from keras.layers import Dense
    import numpy
    import tensorflow as tf
    tf.debugging.set_log_device_placement(True)
     
    # 随机产生在(1,n_features)区间的整数序列,序列长度为n_steps_in
    def generate_sequence(length, n_unique):
        return [np.random.randint(1, n_unique-1) for _ in range(length)]
     
    # 构造LSTM模型输入需要的训练数据
    def get_dataset(n_in, n_out, cardinality, n_samples):
        X1, X2, y = list(), list(), list()
        for _ in range(n_samples):
            # 生成输入序列
            source = generate_sequence(n_in, cardinality)
            # 定义目标序列,这里就是输入序列的前三个数据
            target = source[:n_out]
            target.reverse()
            # 向前偏移一个时间步目标序列
            target_in = [0] + target[:-1]
            # 直接使用to_categorical函数进行on_hot编码
            src_encoded = to_categorical(source, num_classes=cardinality)
            tar_encoded = to_categorical(target, num_classes=cardinality)
            tar2_encoded = to_categorical(target_in, num_classes=cardinality)
    
            X1.append(src_encoded)
            X2.append(tar2_encoded)
            y.append(tar_encoded)
        return array(X1), array(X2), array(y)
     
    # 构造Seq2Seq训练模型model, 以及进行新序列预测时需要的的Encoder模型:encoder_model 与Decoder模型:decoder_model
    def define_models(n_input, n_output, n_units):
        # 训练模型中的encoder
        encoder_inputs = Input(shape=(None, n_input))
        encoder = LSTM(n_units, return_state=True)
        encoder_outputs, state_h, state_c = encoder(encoder_inputs)
        encoder_states = [state_h, state_c]   #仅保留编码状态向量
        # 训练模型中的decoder
        decoder_inputs = Input(shape=(None, n_output))
        decoder_lstm = LSTM(n_units, return_sequences=True, return_state=True)
        decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
        decoder_dense = Dense(n_output, activation='softmax')
        decoder_outputs = decoder_dense(decoder_outputs)
        model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
        # 新序列预测时需要的encoder
        encoder_model = Model(encoder_inputs, encoder_states)
        # 新序列预测时需要的decoder
        decoder_state_input_h = Input(shape=(n_units,))
        decoder_state_input_c = Input(shape=(n_units,))
        decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
        decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
        decoder_states = [state_h, state_c]
        decoder_outputs = decoder_dense(decoder_outputs)
        decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
        # 返回需要的三个模型
        return model, encoder_model, decoder_model
     
    def predict_sequence(infenc, infdec, source, n_steps, cardinality):
        # 输入序列编码得到编码状态向量
        state = infenc.predict(source)
        # 初始目标序列输入:通过开始字符计算目标序列第一个字符,这里是0
        target_seq = array([0.0 for _ in range(cardinality)]).reshape(1, 1, cardinality)
        # 输出序列列表
        output = list()
        for t in range(n_steps):
            # predict next char
            yhat, h, c = infdec.predict([target_seq] + state)
            # 截取输出序列,取后三个
            output.append(yhat[0,0,:])
            # 更新状态
            state = [h, c]
            # 更新目标序列(用于下一个词预测的输入)
            target_seq = yhat
        return array(output)
     
    # one_hot解码
    def one_hot_decode(encoded_seq):
        return [argmax(vector) for vector in encoded_seq]
     
    # 参数设置
    n_features = 10 + 1
    n_steps_in = 6
    n_steps_out = 3
    # 定义模型
    train, infenc, infdec = define_models(n_features, n_features, 16)
    train.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])
    # 生成训练数据
    import numpy as np
    X1, X2, y = get_dataset(n_steps_in, n_steps_out, n_features, 1000)
    print(X1.shape,X2.shape,y.shape)
    
    # 训练模型
    train.fit([X1, X2], y, epochs=500)
    
    # 评估模型效果
    total, correct = 100, 0
    for _ in range(total):
        X1, X2, y = get_dataset(n_steps_in, n_steps_out, n_features, 1)
        target = predict_sequence(infenc, infdec, X1, n_steps_out, n_features)
        if array_equal(one_hot_decode(y[0]), one_hot_decode(target)):
            correct += 1
    print('Accuracy: %.2f%%' % (float(correct)/float(total)*100.0))
    # 查看预测结果
    for _ in range(10):
        X1, X2, y = get_dataset(n_steps_in, n_steps_out, n_features, 1)
        target = predict_sequence(infenc, infdec, X1, n_steps_out, n_features)
        print('X=%s y=%s, yhat=%s' % (one_hot_decode(X1[0]), one_hot_decode(y[0]), one_hot_decode(target)))
    
  • 相关阅读:
    Logistic Regression
    如何把日期格式化为指定格式?
    JavaScript的自调用函数
    elementui 在原生方法参数里,添加参数
    原生js实现随着滚动条滚动,导航会自动切换的效果
    微信小程序-canvas绘制文字实现自动换行
    visual studio 和 sql server 的激活密钥序列号
    跨多个服务器访问不同数据库的表的方法
    数据库面试中常问的几个问题
    聚集索引和非聚集索引的区别
  • 原文地址:https://www.cnblogs.com/zhou-lin/p/14033439.html
Copyright © 2020-2023  润新知