cnn for qa

最近在做QA系统，用tensorflow做了些实验，下面的的是一个cnn的评分网络。主要参考了《APPLYING DEEP LEARNING TO ANSWER SELECTION: A STUDY AND AN OPEN TASK》这篇论文与wildml博客中的一篇文章。

import tensorflow as tf
import numpy as np
class QaCNN():
    def __init__(self , batchsize , sequencesize , vecsize , outsize , filtersizes , num_filters):
        self.vecsize = vecsize
        self.outsize = outsize
        self.batchsize = batchsize
        self.sequencesize = sequencesize
        self.question = tf.placeholder(tf.float32 , [None , vecsize * sequencesize],name='question')
        self.answer_rigth = tf.placeholder(tf.float32 , [None , vecsize * sequencesize],name='answer_right')
        self.answer_wrong = tf.placeholder(tf.float32 , [None , vecsize * sequencesize],name='answer_wrong')

        tenQ = tf.reshape(self.question , [-1 , self.sequencesize , self.vecsize , 1])
        tenR = tf.reshape(self.answer_rigth , [-1 , self.sequencesize , self.vecsize , 1])
        tenW = tf.reshape(self.answer_wrong , [-1 , self.sequencesize , self.vecsize , 1])
        tensorResultQ = []
        tensorResultR = []
        tensorResultW = []
        for i , filtersize in enumerate(filtersizes):
            with tf.name_scope("conv-maxpool-%s" % filtersize):
                filter_shape = [filtersize, self.vecsize, 1, num_filters]
                #W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1),name='W')
                W = tf.get_variable(initializer=tf.truncated_normal(filter_shape, stddev=0.1), 
                      name="W-%s" % str(filtersize))
                #b = tf.Variable(tf.constant(0.1, shape=[num_filters]),name='b')
                b = tf.get_variable(initializer=tf.constant(0.1, shape=[num_filters]), 
                      name="b-%s" % str(filtersize))
                pooledQ = self.conv2dPool(tenQ , W , b , filtersize)
                pooledR = self.conv2dPool(tenR, W , b , filtersize)
                pooledW = self.conv2dPool(tenW , W , b , filtersize)
                tensorResultQ.append(pooledQ)
                tensorResultR.append(pooledR)
                tensorResultW.append(pooledW)

        flat_length = len(filtersizes) * num_filters
        tenQ_flat = tf.reshape(tf.concat(tensorResultQ,3),[-1,flat_length])
        tenR_flat = tf.reshape(tf.concat(tensorResultR,3),[-1,flat_length])
        tenW_flat = tf.reshape(tf.concat(tensorResultW,3),[-1,flat_length])

        exy = tf.reduce_sum(tf.multiply(tenQ_flat , tenR_flat) , 1)
        x = tf.sqrt(tf.reduce_sum(tf.multiply(tenQ_flat , tenQ_flat) , 1))
        y = tf.sqrt(tf.reduce_sum(tf.multiply(tenR_flat , tenR_flat) , 1))
        cosineQR =  tf.div(exy , tf.multiply(x , y),name = 'cosineQR')

        exy = tf.reduce_sum(tf.multiply(tenQ_flat , tenW_flat) , 1)
        x = tf.sqrt(tf.reduce_sum(tf.multiply(tenQ_flat , tenQ_flat) , 1))
        y = tf.sqrt(tf.reduce_sum(tf.multiply(tenW_flat , tenW_flat) , 1))
        cosineQW =  tf.div(exy , tf.multiply(x , y),name = 'cosineQW')

        with tf.name_scope('losses'):
            zero = tf.constant(0, shape=[self.batchsize], dtype=tf.float32)
            margin = tf.constant(0.05, shape=[self.batchsize], dtype=tf.float32)
            self.losses = tf.maximum(zero, tf.subtract(margin, tf.subtract(cosineQR, cosineQW)),name = 'loss_tensor')
            self.loss =  tf.reduce_sum(self.losses,name='loss')
        with tf.name_scope('acc'):
            self.correct = tf.equal(zero,self.losses)
            self.accuracy = tf.reduce_mean(tf.cast(self.correct , 'float'),name='accuracy')
        tf.summary.scalar('loss',self.loss)
        self.variable_summaries(self.accuracy)
        self.merged = tf.summary.merge_all()

    def variable_summaries(self , var):
        '''Attach a lot of summaries to a Tensor (for TensorBoard visualization).'''
        with tf.name_scope('summaries'):
            mean = tf.reduce_mean(var)
            tf.summary.scalar('mean',mean)
            with tf.name_scope('stddev'):
                stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
            tf.summary.scalar('stddev',stddev)
            tf.summary.scalar('max',tf.reduce_max(var))
            tf.summary.scalar('min',tf.reduce_min(var))
            tf.summary.histogram('histogram',var)


    def conv2dPool(self ,x,W,b,filtersize):
        conv = tf.nn.conv2d(x , W , strides =[1,1,1,1],padding='VALID')
        h = tf.nn.relu(tf.nn.bias_add(conv ,b))
        pooled = tf.nn.max_pool(h,ksize=[1,self.sequencesize - filtersize + 1 , 1, 1],strides=[1,1,1,1],padding='VALID')
        return pooled

import numpy as np
import time
import os
import tensorflow as tf
from qacnn_g import *
from process import *
batchsize = 100
sequencesize = 10
vecsize = 200
outsize = 10
root = './lib/'
filtersize = [1,2,3,5]
num_filter = 500
if os.path.exists('./lib/corpus.seg.length.out'):
    os.remove(root + 'corpus.seg.length.out')
logfolder = time.strftime("%Y%m%d_%H%M%S", time.localtime())
cnn = QaCNN(batchsize , sequencesize , vecsize , outsize , filtersize , num_filter)
train_step = tf.train.AdamOptimizer(1e-4).minimize(cnn.loss)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
train_writer = tf.summary.FileWriter('./LOGS/'+logfolder,sess.graph)
saver = tf.train.Saver(tf.all_variables())
print ('init...')
dataprocess = DataProcess(root + 'word2vec.bin')
dataprocess.normalize(root + 'corpus.seg.out', root + 'corpus.seg.length.out')
dataprocess.initdata(root + 'corpus.seg.length.out')
start = time.time()
for i in range(120000):
    batch =dataprocess.nextbatch(batchsize)#ake_data(batchsize) #fake_data(20, True)
    sess.run(train_step ,feed_dict={cnn.question: batch[0],cnn.answer_rigth:batch[1] , cnn.answer_wrong:batch[2]})
    if i % 10 == 0:
        summary,loss, accuracy,_  = sess.run([cnn.merged , cnn.loss , cnn.accuracy , train_step ] , {cnn.question: batch[0],cnn.answer_rigth:batch[1] , cnn.answer_wrong:batch[2]})
        train_writer.add_summary(summary , i)
        end = time.time()
        elapse = (end - start)
        print ('iterator %d.	loss=%f	accuracy=%f	elapse=%f'%(i,loss,accuracy,elapse))
        start = time.time()
    else:
        sess.run(train_step ,feed_dict={cnn.question: batch[0],cnn.answer_rigth:batch[1] , cnn.answer_wrong:batch[2]})
train_writer.close()
saver.save(sess , './model/qa.cnn')
sess.close()
print ('end...')

使用模型进行打分，这里取了cosineR这个正向答案与问题的cosine值进行度量。

# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import os
import time
from process import *

tf.flags.DEFINE_string('./lib/corpus.out','','Data to predict')
tf.flags.DEFINE_string('checkpoint_dir','./model/','checkpoint directory from training run')
tf.flags.DEFINE_integer('batch_size',1000,'batch size')
tf.flags.DEFINE_string('root','./lib','root dir')
FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print ('
Parameters:')
for attr , value in sorted(FLAGS.__flags.items()):
    print ('{}={}'.format(attr.upper() , value))

print('')

checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
dataprocess = DataProcess('./lib/word2vec.bin')
dataprocess.initdata('./lib/corpus.seg.length.out')
graph = tf.Graph()
with graph.as_default():
    sess = tf.Session()
    with sess.as_default():
        saver = tf.train.import_meta_graph('{}.meta'.format(checkpoint_file))
        saver.restore(sess , checkpoint_file)

        start = time.time()
        bestAnswer = None
        maxScore = -1.0
        for i in range(10000):
            question = graph.get_operation_by_name('question').outputs[0]
            answer_right = graph.get_operation_by_name('answer_right').outputs[0]
            answer_wrong = graph.get_operation_by_name('answer_wrong').outputs[0]
            loss = graph.get_operation_by_name('losses/loss').outputs[0]
            cosineQR = graph.get_operation_by_name('cosineQR').outputs[0]
            questionbatch = dataprocess.getSentenceVec('你叫什么名字',10,FLAGS.batch_size)
            batchs = dataprocess.nextbatch(FLAGS.batch_size)
            cosineQR =  sess.run(cosineQR, {question:questionbatch, answer_right:batchs[1],answer_wrong:batchs[2]})
            ndx = np.argmax(cosineQR)
            score = cosineQR[ndx]
            if maxScore < score:
                maxScore = score
                bestAnswer = batchs[3][ndx]
            print ('iterate : %d	score:%f	maxscore:%f	answer:%s'%(i,score,maxScore,(batchs[3][ndx]).strip('
')))
        end = time.time()
        print('time used:%f'%(end - start))
        print('maxScore:%f'%maxScore)
        print('best answer :%s'%bestAnswer)
def find(cosineTensor):
    return  np.argmax(cosineTensor)

相关阅读:
(二分查找拓展) leetcode 69. Sqrt(x)
(二分查找拓展) leetcode 162. Find Peak Element && lintcode 75. Find Peak Element
(链表) lintcode 219. Insert Node in Sorted Linked List
(二分查找拓展) leetcode 34. Find First and Last Position of Element in Sorted Array && lintcode 61. Search for a Range
(最短路 Floyd) P2910 [USACO08OPEN]寻宝之路Clear And Present Danger 洛谷
 (字符串数组递归双指针) leetcode 344. Reverse String
(二叉树 DFS 递归) leetcode 112. Path Sum
(二叉树 DFS 递归) leetcode 101. Symmetric Tree
(二叉树递归) leetcode 144. Binary Tree Preorder Traversal
(二叉树递归 DFS) leetcode 100. Same Tree
原文地址：https://www.cnblogs.com/nocml/p/6773058.html