• cnn for qa


    最近在做QA系统,用tensorflow做了些实验,下面的的是一个cnn的评分网络。主要参考了《APPLYING DEEP LEARNING TO ANSWER SELECTION: A STUDY AND AN OPEN TASK》这篇论文与wildml博客中的一篇文章

    import tensorflow as tf
    import numpy as np
    class QaCNN():
        def __init__(self , batchsize , sequencesize , vecsize , outsize , filtersizes , num_filters):
            self.vecsize = vecsize
            self.outsize = outsize
            self.batchsize = batchsize
            self.sequencesize = sequencesize
            self.question = tf.placeholder(tf.float32 , [None , vecsize * sequencesize],name='question')
            self.answer_rigth = tf.placeholder(tf.float32 , [None , vecsize * sequencesize],name='answer_right')
            self.answer_wrong = tf.placeholder(tf.float32 , [None , vecsize * sequencesize],name='answer_wrong')
    
            tenQ = tf.reshape(self.question , [-1 , self.sequencesize , self.vecsize , 1])
            tenR = tf.reshape(self.answer_rigth , [-1 , self.sequencesize , self.vecsize , 1])
            tenW = tf.reshape(self.answer_wrong , [-1 , self.sequencesize , self.vecsize , 1])
            tensorResultQ = []
            tensorResultR = []
            tensorResultW = []
            for i , filtersize in enumerate(filtersizes):
                with tf.name_scope("conv-maxpool-%s" % filtersize):
                    filter_shape = [filtersize, self.vecsize, 1, num_filters]
                    #W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1),name='W')
                    W = tf.get_variable(initializer=tf.truncated_normal(filter_shape, stddev=0.1), 
                          name="W-%s" % str(filtersize))
                    #b = tf.Variable(tf.constant(0.1, shape=[num_filters]),name='b')
                    b = tf.get_variable(initializer=tf.constant(0.1, shape=[num_filters]), 
                          name="b-%s" % str(filtersize))
                    pooledQ = self.conv2dPool(tenQ , W , b , filtersize)
                    pooledR = self.conv2dPool(tenR, W , b , filtersize)
                    pooledW = self.conv2dPool(tenW , W , b , filtersize)
                    tensorResultQ.append(pooledQ)
                    tensorResultR.append(pooledR)
                    tensorResultW.append(pooledW)
    
            flat_length = len(filtersizes) * num_filters
            tenQ_flat = tf.reshape(tf.concat(tensorResultQ,3),[-1,flat_length])
            tenR_flat = tf.reshape(tf.concat(tensorResultR,3),[-1,flat_length])
            tenW_flat = tf.reshape(tf.concat(tensorResultW,3),[-1,flat_length])
    
            exy = tf.reduce_sum(tf.multiply(tenQ_flat , tenR_flat) , 1)
            x = tf.sqrt(tf.reduce_sum(tf.multiply(tenQ_flat , tenQ_flat) , 1))
            y = tf.sqrt(tf.reduce_sum(tf.multiply(tenR_flat , tenR_flat) , 1))
            cosineQR =  tf.div(exy , tf.multiply(x , y),name = 'cosineQR')
    
            exy = tf.reduce_sum(tf.multiply(tenQ_flat , tenW_flat) , 1)
            x = tf.sqrt(tf.reduce_sum(tf.multiply(tenQ_flat , tenQ_flat) , 1))
            y = tf.sqrt(tf.reduce_sum(tf.multiply(tenW_flat , tenW_flat) , 1))
            cosineQW =  tf.div(exy , tf.multiply(x , y),name = 'cosineQW')
    
            with tf.name_scope('losses'):
                zero = tf.constant(0, shape=[self.batchsize], dtype=tf.float32)
                margin = tf.constant(0.05, shape=[self.batchsize], dtype=tf.float32)
                self.losses = tf.maximum(zero, tf.subtract(margin, tf.subtract(cosineQR, cosineQW)),name = 'loss_tensor')
                self.loss =  tf.reduce_sum(self.losses,name='loss')
            with tf.name_scope('acc'):
                self.correct = tf.equal(zero,self.losses)
                self.accuracy = tf.reduce_mean(tf.cast(self.correct , 'float'),name='accuracy')
            tf.summary.scalar('loss',self.loss)
            self.variable_summaries(self.accuracy)
            self.merged = tf.summary.merge_all()
    
        def variable_summaries(self , var):
            '''Attach a lot of summaries to a Tensor (for TensorBoard visualization).'''
            with tf.name_scope('summaries'):
                mean = tf.reduce_mean(var)
                tf.summary.scalar('mean',mean)
                with tf.name_scope('stddev'):
                    stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
                tf.summary.scalar('stddev',stddev)
                tf.summary.scalar('max',tf.reduce_max(var))
                tf.summary.scalar('min',tf.reduce_min(var))
                tf.summary.histogram('histogram',var)
    
    
        def conv2dPool(self ,x,W,b,filtersize):
            conv = tf.nn.conv2d(x , W , strides =[1,1,1,1],padding='VALID')
            h = tf.nn.relu(tf.nn.bias_add(conv ,b))
            pooled = tf.nn.max_pool(h,ksize=[1,self.sequencesize - filtersize + 1 , 1, 1],strides=[1,1,1,1],padding='VALID')
            return pooled
    
    
    
    import numpy as np
    import time
    import os
    import tensorflow as tf
    from qacnn_g import *
    from process import *
    batchsize = 100
    sequencesize = 10
    vecsize = 200
    outsize = 10
    root = './lib/'
    filtersize = [1,2,3,5]
    num_filter = 500
    if os.path.exists('./lib/corpus.seg.length.out'):
        os.remove(root + 'corpus.seg.length.out')
    logfolder = time.strftime("%Y%m%d_%H%M%S", time.localtime())
    cnn = QaCNN(batchsize , sequencesize , vecsize , outsize , filtersize , num_filter)
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cnn.loss)
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    train_writer = tf.summary.FileWriter('./LOGS/'+logfolder,sess.graph)
    saver = tf.train.Saver(tf.all_variables())
    print ('init...')
    dataprocess = DataProcess(root + 'word2vec.bin')
    dataprocess.normalize(root + 'corpus.seg.out', root + 'corpus.seg.length.out')
    dataprocess.initdata(root + 'corpus.seg.length.out')
    start = time.time()
    for i in range(120000):
        batch =dataprocess.nextbatch(batchsize)#ake_data(batchsize) #fake_data(20, True)
        sess.run(train_step ,feed_dict={cnn.question: batch[0],cnn.answer_rigth:batch[1] , cnn.answer_wrong:batch[2]})
        if i % 10 == 0:
            summary,loss, accuracy,_  = sess.run([cnn.merged , cnn.loss , cnn.accuracy , train_step ] , {cnn.question: batch[0],cnn.answer_rigth:batch[1] , cnn.answer_wrong:batch[2]})
            train_writer.add_summary(summary , i)
            end = time.time()
            elapse = (end - start)
            print ('iterator %d.	loss=%f	accuracy=%f	elapse=%f'%(i,loss,accuracy,elapse))
            start = time.time()
        else:
            sess.run(train_step ,feed_dict={cnn.question: batch[0],cnn.answer_rigth:batch[1] , cnn.answer_wrong:batch[2]})
    train_writer.close()
    saver.save(sess , './model/qa.cnn')
    sess.close()
    print ('end...')
    
    

    使用模型进行打分,这里取了cosineR这个正向答案与问题的cosine值进行度量。

    # -*- coding: utf-8 -*-
    import tensorflow as tf
    import numpy as np
    import os
    import time
    from process import *
    
    tf.flags.DEFINE_string('./lib/corpus.out','','Data to predict')
    tf.flags.DEFINE_string('checkpoint_dir','./model/','checkpoint directory from training run')
    tf.flags.DEFINE_integer('batch_size',1000,'batch size')
    tf.flags.DEFINE_string('root','./lib','root dir')
    FLAGS = tf.flags.FLAGS
    FLAGS._parse_flags()
    print ('
    Parameters:')
    for attr , value in sorted(FLAGS.__flags.items()):
        print ('{}={}'.format(attr.upper() , value))
    
    print('')
    
    checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
    dataprocess = DataProcess('./lib/word2vec.bin')
    dataprocess.initdata('./lib/corpus.seg.length.out')
    graph = tf.Graph()
    with graph.as_default():
        sess = tf.Session()
        with sess.as_default():
            saver = tf.train.import_meta_graph('{}.meta'.format(checkpoint_file))
            saver.restore(sess , checkpoint_file)
    
            start = time.time()
            bestAnswer = None
            maxScore = -1.0
            for i in range(10000):
                question = graph.get_operation_by_name('question').outputs[0]
                answer_right = graph.get_operation_by_name('answer_right').outputs[0]
                answer_wrong = graph.get_operation_by_name('answer_wrong').outputs[0]
                loss = graph.get_operation_by_name('losses/loss').outputs[0]
                cosineQR = graph.get_operation_by_name('cosineQR').outputs[0]
                questionbatch = dataprocess.getSentenceVec('你叫什么名字',10,FLAGS.batch_size)
                batchs = dataprocess.nextbatch(FLAGS.batch_size)
                cosineQR =  sess.run(cosineQR, {question:questionbatch, answer_right:batchs[1],answer_wrong:batchs[2]})
                ndx = np.argmax(cosineQR)
                score = cosineQR[ndx]
                if maxScore < score:
                    maxScore = score
                    bestAnswer = batchs[3][ndx]
                print ('iterate : %d	score:%f	maxscore:%f	answer:%s'%(i,score,maxScore,(batchs[3][ndx]).strip('
    ')))
            end = time.time()
            print('time used:%f'%(end - start))
            print('maxScore:%f'%maxScore)
            print('best answer :%s'%bestAnswer)
    def find(cosineTensor):
        return  np.argmax(cosineTensor)
    
    
  • 相关阅读:
    (二分查找 拓展) leetcode 69. Sqrt(x)
    (二分查找 拓展) leetcode 162. Find Peak Element && lintcode 75. Find Peak Element
    (链表) lintcode 219. Insert Node in Sorted Linked List
    (二分查找 拓展) leetcode 34. Find First and Last Position of Element in Sorted Array && lintcode 61. Search for a Range
    (最短路 Floyd) P2910 [USACO08OPEN]寻宝之路Clear And Present Danger 洛谷
    (字符串 数组 递归 双指针) leetcode 344. Reverse String
    (二叉树 DFS 递归) leetcode 112. Path Sum
    (二叉树 DFS 递归) leetcode 101. Symmetric Tree
    (二叉树 递归) leetcode 144. Binary Tree Preorder Traversal
    (二叉树 递归 DFS) leetcode 100. Same Tree
  • 原文地址:https://www.cnblogs.com/nocml/p/6773058.html
Copyright © 2020-2023  润新知