• 跟我学算法-match-LSTM(向唐老师看齐)


    对于match-lstm,将hi文本与输出的match-lstm(由si,hi,qi)组合重新输入到LSTM网络中,以端对端的操作理念。

    参考的博客:https://blog.csdn.net/laddie132/article/details/79159895  #MATCH-LSTM原理

                          https://blog.csdn.net/jdbc/article/details/80755576          # 将SQUAD数据集转换为id

                          https://blog.csdn.net/xbinworld/article/details/54607525   # 注意机制模型

                         https://blog.csdn.net/appleml/article/details/76607980      #point-net模型

    # !/usr/bin/env python3
    # -*- coding: utf-8 -*-
    
    import tensorflow as tf
    import numpy as np
    import tensorflow.contrib as contrib
    
    # from app.decorator import exe_time
    
    
    class MatchLstm:
        # @exe_time
        def __init__(self, vocab_size, sentence_size, embedding_size,
                     word_embedding, initializer=tf.truncated_normal_initializer(stddev=0.1),
                     session=tf.Session(), num_class=3,
                     window_size=4, name='MatchLstm', initial_lr=0.001):
            # 字典的大小
            self._vocab_size = vocab_size
            # 句子的大小
            self._sentence_size = sentence_size
            # 隐含层的大小
            self._embedding_size = embedding_size
            # 用于构造向量
            self._we = word_embedding
            # 初始化
            self._initializer = initializer
            # 名字
            self._name = name
            # 输出种类
            self._num_class = num_class
            self._sess = session
            # 窗口的大小
            self._window_size = window_size
            # 学习率
            self._initial_lr = initial_lr
            # 编码原文和上下文的信息
            self._build_inputs_and_vars()
            # 构造模型的结构
            self._inference()
            # 初始化
            self._initial_optimizer()
    
        def _build_inputs_and_vars(self):
            # 文章的内容
            self.premises = tf.placeholder(shape=[None, self._sentence_size], dtype=tf.int32,
                                           name='premises')
            # 问题
            self.hypotheses = tf.placeholder(shape=[None, self._sentence_size], dtype=tf.int32,
                                             name='hypotheses')
            # 标签
            self.labels = tf.placeholder(shape=[None, self._num_class], dtype=tf.float32,
                                         name='labels')
            # 根据输入的大小来获得样本的大小
            self._batch_size = tf.shape(self.premises)[0]
            # 初始化学习率
            self.lr = tf.get_variable(shape=[], dtype=tf.float32, trainable=False,
                                      initializer=tf.constant_initializer(self._initial_lr), name='lr')
            # 初始化new_lr
            self.new_lr = tf.placeholder(shape=[], dtype=tf.float32,
                                         name='new_lr')
            # 将self.new_lr 赋值给self.lr
            self.lr_update_op = tf.assign(self.lr, self.new_lr)
    
            with tf.variable_scope(self._name):
                # self._word_embedding用于进行单词向量化操作
                self._word_embedding = tf.get_variable(name='word_embedding',
                                                       shape=[self._vocab_size, self._embedding_size],
                                                       initializer=tf.constant_initializer(self._we),
                                                       trainable=False)
            # 对原文进行向量化操作,同时提取答案上下文的向量矩阵作为答案的向量
            self._embed_pre = self._embed_inputs(self.premises, self._word_embedding)
            # 对问题进行向量化操作
            self._embed_hyp = self._embed_inputs(self.hypotheses, self._word_embedding)
    
        def _inference(self):
            with tf.variable_scope('{}_lstm_s'.format(self._name)):
                # 对原文进行了一次LSTM操作
                lstm_s = contrib.rnn.BasicLSTMCell(num_units=self._embedding_size, forget_bias=0.0)
                pre_length = self._length(self.premises)
                h_s, _ = tf.nn.dynamic_rnn(lstm_s, self._embed_pre, sequence_length=pre_length,
                                           dtype=tf.float32)
                self.h_s = h_s
    
            with tf.variable_scope('{}_lstm_t'.format(self._name)):
                # 对问题进行了一次LSTM操作
                lstm_t = contrib.rnn.BasicLSTMCell(num_units=self._embedding_size, forget_bias=0.0)
                hyp_length = self._length(self.hypotheses)
                h_t, _ = tf.nn.dynamic_rnn(lstm_t, self._embed_hyp, sequence_length=hyp_length,
                                           dtype=tf.float32)
                self.h_t = h_t
                # 构造一个lstm网络
            self.lstm_m = contrib.rnn.BasicLSTMCell(num_units=self._embedding_size,
                                                    forget_bias=0.0)
            # 构造一个可以变化的向量矩阵
            h_m_arr = tf.TensorArray(dtype=tf.float32, size=self._batch_size)
    
            i = tf.constant(0)
            # while_loop,cond作为条件,body做为操作过程
            c = lambda x, y: tf.less(x, self._batch_size)
            b = lambda x, y: self._match_sent(x, y)
            res = tf.while_loop(cond=c, body=b, loop_vars=(i, h_m_arr))
            # LSTM的输出结果
            self.h_m_tensor = tf.squeeze(res[-1].stack(), axis=[1])
            # 进行一次全连接操作,使得最后的输出结果是一维的
            with tf.variable_scope('{}_fully_connect'.format(self._name)):
                w_fc = tf.get_variable(shape=[self._embedding_size, self._num_class],
                                       initializer=self._initializer, name='w_fc')
                b_fc = tf.get_variable(shape=[self._num_class],
                                       initializer=self._initializer, name='b_fc')
                self.logits = tf.matmul(self.h_m_tensor, w_fc) + b_fc
            # softmax损失函数,直接使用交叉熵损失函数,输出的结果只是一个数
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=self.labels,
                                                                    logits=self.logits,
                                                                    name='cross_entropy')
            # 把batch_size的样本的损失函数进行加和
            cross_entropy_sum = tf.reduce_sum(cross_entropy, name='cross_entropy_sum')
            # 加和以后相除,求损失的平均值
            self.loss_op = tf.div(cross_entropy_sum, tf.cast(self._batch_size, dtype=tf.float32))
            # argmax,求出每个样本中最大的概率值
            self.predict_op = tf.arg_max(self.logits, dimension=1)
    
        def _match_sent(self, i, h_m_arr):
            # 对每一个句子进行操作
            h_s_i = self.h_s[i]
            h_t_i = self.h_t[i]
            # 输入句子的长度
            length_s_i = self._length(self.premises[i])
            length_t_i = self._length(self.hypotheses[i])
    
            state = self.lstm_m.zero_state(batch_size=1, dtype=tf.float32)
    
            k = tf.constant(0)
            c = lambda a, x, y, z, s: tf.less(a, length_t_i)
            b = lambda a, x, y, z, s: self._match_attention(a, x, y, z, s)
            res = tf.while_loop(cond=c, body=b, loop_vars=(k, h_s_i, h_t_i, length_s_i, state))
            # 只获取最后一次的输出结果
            final_state_h = res[-1].h
            # 将其写入到h_m_arr文件中
            h_m_arr = h_m_arr.write(i, final_state_h)
    
            i = tf.add(i, 1)
            return i, h_m_arr
    
        def _match_attention(self, k, h_s, h_t, length_s, state):
    
            h_t_k = tf.reshape(h_t[k], [1, -1])
            h_s_j = tf.slice(h_s, begin=[0, 0], size=[length_s, self._embedding_size])
    
            with tf.variable_scope('{}_attention_w'.format(self._name)):
                w_s = tf.get_variable(shape=[self._embedding_size, self._embedding_size],
                                      initializer=self._initializer, name='w_s')
                w_t = tf.get_variable(shape=[self._embedding_size, self._embedding_size],
                                      initializer=self._initializer, name='w_t')
                w_m = tf.get_variable(shape=[self._embedding_size, self._embedding_size],
                                      initializer=self._initializer, name='w_m')
                w_e = tf.get_variable(shape=[self._embedding_size, 1],
                                      initializer=self._initializer, name='w_e')
    
            last_m_h = state.h
            # sum_h进行全连接操作,通过对原文进行操作,输出一个权重参数
            sum_h = tf.matmul(h_s_j, w_s) + tf.matmul(h_t_k, w_t) + tf.matmul(last_m_h, w_m)
            # 经过一个激活层然后再与w_e进行相乘
            e_kj = tf.matmul(tf.tanh(sum_h), w_e)
            # 求得ai,j
            a_kj = tf.nn.softmax(e_kj)
            # 进行原文的参数加权
            alpha_k = tf.matmul(a_kj, h_s_j, transpose_a=True)
    
            alpha_k.set_shape([1, self._embedding_size])
            # 将context与即将输入的h_t_k组合输入到下一次的LSTM中
            m_k = tf.concat([alpha_k, h_t_k], axis=1)
            #
            with tf.variable_scope('{}_lstm_m'.format(self._name)):
                # 输入到LSTM重新进行计算
                # state表示的是si
                _, new_state = self.lstm_m(inputs=m_k, state=state)
    
            k = tf.add(k, 1)
            return k, h_s, h_t, length_s, new_state
    
        def _embed_inputs(self, inputs, embeddings):
            ndim0_tensor_arr = tf.TensorArray(dtype=tf.float32, size=self._batch_size)
            i = tf.constant(0)
            # tf.less 当x大于self._batch_size时返回为假
            c = lambda x, y, z, n: tf.less(x, self._batch_size)
            b = lambda x, y, z, n: self._embed_line(x, y, z, n)
            # cond为条件,body为内容
            res = tf.while_loop(cond=c, body=b,
                                loop_vars=(i, inputs, embeddings, ndim0_tensor_arr))
            ndim0_tensor = res[-1].stack()
            ndim0_tensor = tf.reshape(ndim0_tensor, [-1, self._sentence_size, self._embedding_size])
            return ndim0_tensor
    
        def _embed_line(self, i, inputs, embeddings, ndim0_tensor_arr):
            ndim1_list = []
            # 对输入的每一句话进行操作
            for j in range(self._sentence_size):
                # 输入的第一个字符串
                word = inputs[i][j]
                unk_word = tf.constant(-1)
                # tf.squeeze删除所有大小为1的数组(6,1) 变成(6, ?),在构造的向量矩阵中根据word找出位置
                f1 = lambda: tf.squeeze(tf.nn.embedding_lookup(params=embeddings, ids=word))
                # 如果没有的话使用0向量代替
                f2 = lambda: tf.zeros(shape=[self._embedding_size])
                # 如果wordunk与word不相等,执行f1,否者执行f2
                res_tensor = tf.case([(tf.not_equal(word, unk_word), f1)], default=f2)
                # 添加到ndim1_list 向量中
                ndim1_list.append(res_tensor)
            for j in range(self._sentence_size):
                word = inputs[i][j]
                unk_word = tf.constant(-1)
                # 如果word等于-1代表了提取答案上下文的内容
                f1 = lambda: self._ave_vec(ndim1_list, j)
                f2 = lambda: ndim1_list[j]
                ndim1_list[j] = tf.case([(tf.not_equal(word, unk_word), f2)],
                                        default=f1)
            # tf.stack是一个函数拼接
            ndim1_tensor = tf.stack(ndim1_list)
            ndim0_tensor_arr = ndim0_tensor_arr.write(i, ndim1_tensor)
            i = tf.add(i, 1)
            return i, inputs, embeddings, ndim0_tensor_arr
    
        def _ave_vec(self, embed_list, cur_pos):
            """
            生词的词向量为词窗口的词向量平均值
            :param embed_list:
            :param cur_pos:
            :return:
            """
            # 根据句子的大小来获取当前词的上下文,self._window_size 表示提取词的大小
            left_pos = max(0, cur_pos - self._window_size)
            right_pos = min(cur_pos + self._window_size, self._sentence_size)
            # 获得上下文的词向量
            e_list = embed_list[left_pos:cur_pos] + embed_list[cur_pos + 1:right_pos + 1]
            # tf.stack合并词向量
            e_tensor = tf.stack(e_list)
            # 对上下文的内容使用reduce_mean来替代原来的位置的信息
            ave_tensor = tf.reduce_mean(e_tensor, axis=0)
            return ave_tensor
    
        @staticmethod
        def _length(sequence):
            mask = tf.sign(tf.abs(sequence))
            length = tf.reduce_sum(mask, axis=-1)
            return length
    
        def _initial_optimizer(self):
            with tf.variable_scope('{}_step'.format(self._name)):
                # 进行学习率的衰减, 使用Ada,容易找出全局的最优解,且速度快.
                self.global_step = tf.get_variable(shape=[],
                                                   initializer=tf.constant_initializer(0),
                                                   dtype=tf.int32,
                                                   name='global_step')
            # 根据动量平均跟新参数
            self._optimizer = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.9, beta2=0.999)
            # 缩小loss
            self.train_op = self._optimizer.minimize(self.loss_op, global_step=self.global_step)
    
    
    if __name__ == '__main__':
        with tf.Session() as sess:
            # embedding需要翻译的句子
            embedding = np.random.randn(4, 6)
            embedding[0] = 0.0
            model = MatchLstm(vocab_size=7, sentence_size=5, embedding_size=6,
                              word_embedding=embedding, session=sess)
            model.batch_size = 1
            sent1 = [[3, -1, 2, 1, 0],
                     [4, 5, 1, 0, 0],
                     [2, 1, 0, 0, 0]]
    
            sent2 = [[2, 1, 0, 0, 0],
                     [3, -1, 2, 1, 0],
                     [4, 5, 1, 0, 0]]
    
            labels = [[1, 0, 0],
                      [0, 1, 0],
                      [0, 0, 1]]
    
            sess.run(tf.global_variables_initializer())
            # 迭代优化
            for temp in range(300):
                loss, _, step = sess.run([model.loss_op, model.train_op, model.global_step],
                                         feed_dict={model.premises: sent1, model.hypotheses: sent2,
                                                    model.labels: labels, model.lr: 0.001})
                print(step, loss)
                sent1, sent2 = sent2, sent1
    # !/usr/bin/env python3
    # -*- coding: utf-8 -*-

    import tensorflow as tf
    import numpy as np
    import tensorflow.contrib as contrib

    # from app.decorator import exe_time


    class MatchLstm:
    # @exe_time
    def __init__(self, vocab_size, sentence_size, embedding_size,
    word_embedding, initializer=tf.truncated_normal_initializer(stddev=0.1),
    session=tf.Session(), num_class=3,
    window_size=4, name='MatchLstm', initial_lr=0.001):
    # 字典的大小
    self._vocab_size = vocab_size
    # 句子的大小
    self._sentence_size = sentence_size
    # 隐含层的大小
    self._embedding_size = embedding_size
    # 用于构造向量
    self._we = word_embedding
    # 初始化
    self._initializer = initializer
    # 名字
    self._name = name
    # 输出种类
    self._num_class = num_class
    self._sess = session
    # 窗口的大小
    self._window_size = window_size
    # 学习率
    self._initial_lr = initial_lr
    # 编码原文和上下文的信息
    self._build_inputs_and_vars()
    # 构造模型的结构
    self._inference()
    # 初始化
    self._initial_optimizer()

    def _build_inputs_and_vars(self):
    # 文章的内容
    self.premises = tf.placeholder(shape=[None, self._sentence_size], dtype=tf.int32,
    name='premises')
    # 问题
    self.hypotheses = tf.placeholder(shape=[None, self._sentence_size], dtype=tf.int32,
    name='hypotheses')
    # 标签
    self.labels = tf.placeholder(shape=[None, self._num_class], dtype=tf.float32,
    name='labels')
    # 根据输入的大小来获得样本的大小
    self._batch_size = tf.shape(self.premises)[0]
    # 初始化学习率
    self.lr = tf.get_variable(shape=[], dtype=tf.float32, trainable=False,
    initializer=tf.constant_initializer(self._initial_lr), name='lr')
    # 初始化new_lr
    self.new_lr = tf.placeholder(shape=[], dtype=tf.float32,
    name='new_lr')
    # 将self.new_lr 赋值给self.lr
    self.lr_update_op = tf.assign(self.lr, self.new_lr)

    with tf.variable_scope(self._name):
    # self._word_embedding用于进行单词向量化操作
    self._word_embedding = tf.get_variable(name='word_embedding',
    shape=[self._vocab_size, self._embedding_size],
    initializer=tf.constant_initializer(self._we),
    trainable=False)
    # 对原文进行向量化操作,同时提取答案上下文的向量矩阵作为答案的向量
    self._embed_pre = self._embed_inputs(self.premises, self._word_embedding)
    # 对问题进行向量化操作
    self._embed_hyp = self._embed_inputs(self.hypotheses, self._word_embedding)

    def _inference(self):
    with tf.variable_scope('{}_lstm_s'.format(self._name)):
    # 对原文进行了一次LSTM操作
    lstm_s = contrib.rnn.BasicLSTMCell(num_units=self._embedding_size, forget_bias=0.0)
    pre_length = self._length(self.premises)
    h_s, _ = tf.nn.dynamic_rnn(lstm_s, self._embed_pre, sequence_length=pre_length,
    dtype=tf.float32)
    self.h_s = h_s

    with tf.variable_scope('{}_lstm_t'.format(self._name)):
    # 对问题进行了一次LSTM操作
    lstm_t = contrib.rnn.BasicLSTMCell(num_units=self._embedding_size, forget_bias=0.0)
    hyp_length = self._length(self.hypotheses)
    h_t, _ = tf.nn.dynamic_rnn(lstm_t, self._embed_hyp, sequence_length=hyp_length,
    dtype=tf.float32)
    self.h_t = h_t
    # 构造一个lstm网络
    self.lstm_m = contrib.rnn.BasicLSTMCell(num_units=self._embedding_size,
    forget_bias=0.0)
    # 构造一个可以变化的向量矩阵
    h_m_arr = tf.TensorArray(dtype=tf.float32, size=self._batch_size)

    i = tf.constant(0)
    # while_loop,cond作为条件,body做为操作过程
    c = lambda x, y: tf.less(x, self._batch_size)
    b = lambda x, y: self._match_sent(x, y)
    res = tf.while_loop(cond=c, body=b, loop_vars=(i, h_m_arr))
    # LSTM的输出结果
    self.h_m_tensor = tf.squeeze(res[-1].stack(), axis=[1])
    # 进行一次全连接操作,使得最后的输出结果是一维的
    with tf.variable_scope('{}_fully_connect'.format(self._name)):
    w_fc = tf.get_variable(shape=[self._embedding_size, self._num_class],
    initializer=self._initializer, name='w_fc')
    b_fc = tf.get_variable(shape=[self._num_class],
    initializer=self._initializer, name='b_fc')
    self.logits = tf.matmul(self.h_m_tensor, w_fc) + b_fc
    # softmax损失函数,直接使用交叉熵损失函数,输出的结果只是一个数
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=self.labels,
    logits=self.logits,
    name='cross_entropy')
    # 把batch_size的样本的损失函数进行加和
    cross_entropy_sum = tf.reduce_sum(cross_entropy, name='cross_entropy_sum')
    # 加和以后相除,求损失的平均值
    self.loss_op = tf.div(cross_entropy_sum, tf.cast(self._batch_size, dtype=tf.float32))
    # argmax,求出每个样本中最大的概率值
    self.predict_op = tf.arg_max(self.logits, dimension=1)

    def _match_sent(self, i, h_m_arr):
    # 对每一个句子进行操作
    h_s_i = self.h_s[i]
    h_t_i = self.h_t[i]
    # 输入句子的长度
    length_s_i = self._length(self.premises[i])
    length_t_i = self._length(self.hypotheses[i])

    state = self.lstm_m.zero_state(batch_size=1, dtype=tf.float32)

    k = tf.constant(0)
    c = lambda a, x, y, z, s: tf.less(a, length_t_i)
    b = lambda a, x, y, z, s: self._match_attention(a, x, y, z, s)
    res = tf.while_loop(cond=c, body=b, loop_vars=(k, h_s_i, h_t_i, length_s_i, state))
    # 只获取最后一次的输出结果
    final_state_h = res[-1].h
    # 将其写入到h_m_arr文件中
    h_m_arr = h_m_arr.write(i, final_state_h)

    i = tf.add(i, 1)
    return i, h_m_arr

    def _match_attention(self, k, h_s, h_t, length_s, state):

    h_t_k = tf.reshape(h_t[k], [1, -1])
    h_s_j = tf.slice(h_s, begin=[0, 0], size=[length_s, self._embedding_size])

    with tf.variable_scope('{}_attention_w'.format(self._name)):
    w_s = tf.get_variable(shape=[self._embedding_size, self._embedding_size],
    initializer=self._initializer, name='w_s')
    w_t = tf.get_variable(shape=[self._embedding_size, self._embedding_size],
    initializer=self._initializer, name='w_t')
    w_m = tf.get_variable(shape=[self._embedding_size, self._embedding_size],
    initializer=self._initializer, name='w_m')
    w_e = tf.get_variable(shape=[self._embedding_size, 1],
    initializer=self._initializer, name='w_e')

    last_m_h = state.h
    # sum_h进行全连接操作,通过对原文进行操作,输出一个权重参数
    sum_h = tf.matmul(h_s_j, w_s) + tf.matmul(h_t_k, w_t) + tf.matmul(last_m_h, w_m)
    # 经过一个激活层然后再与w_e进行相乘
    e_kj = tf.matmul(tf.tanh(sum_h), w_e)
    # 求得ai,j
    a_kj = tf.nn.softmax(e_kj)
    # 进行原文的参数加权
    alpha_k = tf.matmul(a_kj, h_s_j, transpose_a=True)

    alpha_k.set_shape([1, self._embedding_size])
    # 将context与即将输入的h_t_k组合输入到下一次的LSTM中
    m_k = tf.concat([alpha_k, h_t_k], axis=1)
    #
    with tf.variable_scope('{}_lstm_m'.format(self._name)):
    # 输入到LSTM重新进行计算
    # state表示的是si
    _, new_state = self.lstm_m(inputs=m_k, state=state)

    k = tf.add(k, 1)
    return k, h_s, h_t, length_s, new_state

    def _embed_inputs(self, inputs, embeddings):
    ndim0_tensor_arr = tf.TensorArray(dtype=tf.float32, size=self._batch_size)
    i = tf.constant(0)
    # tf.less 当x大于self._batch_size时返回为假
    c = lambda x, y, z, n: tf.less(x, self._batch_size)
    b = lambda x, y, z, n: self._embed_line(x, y, z, n)
    # cond为条件,body为内容
    res = tf.while_loop(cond=c, body=b,
    loop_vars=(i, inputs, embeddings, ndim0_tensor_arr))
    ndim0_tensor = res[-1].stack()
    ndim0_tensor = tf.reshape(ndim0_tensor, [-1, self._sentence_size, self._embedding_size])
    return ndim0_tensor

    def _embed_line(self, i, inputs, embeddings, ndim0_tensor_arr):
    ndim1_list = []
    # 对输入的每一句话进行操作
    for j in range(self._sentence_size):
    # 输入的第一个字符串
    word = inputs[i][j]
    unk_word = tf.constant(-1)
    # tf.squeeze删除所有大小为1的数组(6,1) 变成(6, ?),在构造的向量矩阵中根据word找出位置
    f1 = lambda: tf.squeeze(tf.nn.embedding_lookup(params=embeddings, ids=word))
    # 如果没有的话使用0向量代替
    f2 = lambda: tf.zeros(shape=[self._embedding_size])
    # 如果wordunk与word不相等,执行f1,否者执行f2
    res_tensor = tf.case([(tf.not_equal(word, unk_word), f1)], default=f2)
    # 添加到ndim1_list 向量中
    ndim1_list.append(res_tensor)
    for j in range(self._sentence_size):
    word = inputs[i][j]
    unk_word = tf.constant(-1)
    # 如果word等于-1代表了提取答案上下文的内容
    f1 = lambda: self._ave_vec(ndim1_list, j)
    f2 = lambda: ndim1_list[j]
    ndim1_list[j] = tf.case([(tf.not_equal(word, unk_word), f2)],
    default=f1)
    # tf.stack是一个函数拼接
    ndim1_tensor = tf.stack(ndim1_list)
    ndim0_tensor_arr = ndim0_tensor_arr.write(i, ndim1_tensor)
    i = tf.add(i, 1)
    return i, inputs, embeddings, ndim0_tensor_arr

    def _ave_vec(self, embed_list, cur_pos):
    """
    生词的词向量为词窗口的词向量平均值
    :param embed_list:
    :param cur_pos:
    :return:
    """
    # 根据句子的大小来获取当前词的上下文,self._window_size 表示提取词的大小
    left_pos = max(0, cur_pos - self._window_size)
    right_pos = min(cur_pos + self._window_size, self._sentence_size)
    # 获得上下文的词向量
    e_list = embed_list[left_pos:cur_pos] + embed_list[cur_pos + 1:right_pos + 1]
    # tf.stack合并词向量
    e_tensor = tf.stack(e_list)
    # 对上下文的内容使用reduce_mean来替代原来的位置的信息
    ave_tensor = tf.reduce_mean(e_tensor, axis=0)
    return ave_tensor

    @staticmethod
    def _length(sequence):
    mask = tf.sign(tf.abs(sequence))
    length = tf.reduce_sum(mask, axis=-1)
    return length

    def _initial_optimizer(self):
    with tf.variable_scope('{}_step'.format(self._name)):
    # 进行学习率的衰减, 使用Ada,容易找出全局的最优解,且速度快.
    self.global_step = tf.get_variable(shape=[],
    initializer=tf.constant_initializer(0),
    dtype=tf.int32,
    name='global_step')
    # 根据动量平均跟新参数
    self._optimizer = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.9, beta2=0.999)
    # 缩小loss
    self.train_op = self._optimizer.minimize(self.loss_op, global_step=self.global_step)


    if __name__ == '__main__':
    with tf.Session() as sess:
    # embedding需要翻译的句子
    embedding = np.random.randn(4, 6)
    embedding[0] = 0.0
    model = MatchLstm(vocab_size=7, sentence_size=5, embedding_size=6,
    word_embedding=embedding, session=sess)
    model.batch_size = 1
    sent1 = [[3, -1, 2, 1, 0],
    [4, 5, 1, 0, 0],
    [2, 1, 0, 0, 0]]

    sent2 = [[2, 1, 0, 0, 0],
    [3, -1, 2, 1, 0],
    [4, 5, 1, 0, 0]]

    labels = [[1, 0, 0],
    [0, 1, 0],
    [0, 0, 1]]

    sess.run(tf.global_variables_initializer())
    # 迭代优化
    for temp in range(300):
    loss, _, step = sess.run([model.loss_op, model.train_op, model.global_step],
    feed_dict={model.premises: sent1, model.hypotheses: sent2,
    model.labels: labels, model.lr: 0.001})
    print(step, loss)
    sent1, sent2 = sent2, sent1
  • 相关阅读:
    Python for Infomatics 第14章 数据库和SQL应用四(译)
    展望2017
    bing的简单英文字典工具
    自我安慰
    Python for Infomatics 第14章 数据库和SQL应用三(译)
    Python for Infomatics 第14章 数据库和SQL应用二(译)
    Python for Infomatics 第14章 数据库和SQL应用一(译)
    希望父亲早日恢复
    Python for Infomatics 第13章 网页服务四(译)
    Python for Infomatics 第13章 网页服务三(译)
  • 原文地址:https://www.cnblogs.com/my-love-is-python/p/10079876.html
Copyright © 2020-2023  润新知