1 import numpy as np
2 import pandas as pd
3 import tensorflow as tf
4 from sklearn import metrics
5 import data_helper
6
7 n_class = 2
8 learning_rate = 0.001
9 s_limit_len = 10
10 word_embedding_size = 100
11 voc_size = 7000
12 filter_nums = 4
13 def get_weights(shape):
14 return tf.Variable(tf.truncated_normal(shape,stddev=0.1))
15 def get_bias(shape):
16 return tf.Variable(tf.constant(0.1))
17
18 def conv2d(input_x, W):
19 return tf.nn.conv2d(input_x,W,strides=[1,1,1,1],padding="VALID")
20
21 def maxpooling(x,kszie,strides):
22 return tf.nn.max_pool(x,ksize=kszie,strides=strides,padding="VALID")
23
24 inputs = tf.placeholder(tf.int32,[None,s_limit_len],name="inputs")
25 labels = tf.placeholder(tf.int32,[None,n_class],name="label_one-hot")
26 keep_prob = tf.placeholder(tf.float32,name="keep_prob")
27
28 embedding_w = tf.Variable(tf.truncated_normal([voc_size,word_embedding_size],stddev=0.1,dtype=tf.float32))
29 #这里需要多延展一个维度
30 embedding_layer = tf.expand_dims(tf.nn.embedding_lookup(embedding_w,inputs),-1)
31 # convoltional layers
32
33
34 conv_dict = {1:2,3:3,5:4}
35 filter_types = [1,3,5,7]
36 filter_types = [1,2,3,3]
37
38 # conv1_W = tf.Variable(tf.truncated_normal([1,word_embedding_size,1,1]),name="conv1_w")
39 # conv1_B = tf.Variable(tf.constant(0.1),name="conv1_b")
40 # conv1 = tf.nn.relu(tf.nn.conv2d(embedding_layer,conv1_W,[1,1,1,1],padding="SAME")+conv1_B)
41 # tf.nn.max_pool(conv1,[1,s_limit_len,1,1],[1,1,1,1])
42 # print("conv1",conv1)
43
44
45 conv1_W = get_weights([1,word_embedding_size,1,1])
46 conv1_bias = get_bias([1])
47 conv1 = tf.nn.relu(conv2d(embedding_layer,conv1_W)+conv1_bias)
48
49 conv3_W = get_weights([3,word_embedding_size,1,1])
50 conv3_bias = get_bias([1])
51 conv3 = tf.nn.relu(conv2d(embedding_layer,conv3_W)+conv3_bias)
52
53 conv5_W = get_weights([5,word_embedding_size,1,1])
54 conv5_b = get_bias([1])
55 conv5 = tf.nn.relu(conv2d(embedding_layer,conv5_W)+conv5_b)
56
57 conv7_W = get_weights([7,word_embedding_size,1,1])
58 conv7_B = get_bias([1])
59 conv7 = tf.nn.relu(conv2d(embedding_layer,conv7_W)+conv7_B)
60
61 #max_pool_layers
62 feature_map_1 = maxpooling(conv1, [1,s_limit_len-1+1, 1, 1], [1, 1, 1, 1])
63 feature_map_3 = maxpooling(conv3, [1,s_limit_len-3+1, 1, 1], [1, 1, 1, 1])
64 feature_map_5 = maxpooling(conv5, [1,s_limit_len-5+1, 1, 1], [1, 1, 1, 1])
65 feature_map_7 = maxpooling(conv7, [1,s_limit_len-7+1, 1, 1], [1, 1, 1, 1])
66
67 print("feature_map size:",feature_map_1,feature_map_3,feature_map_5,feature_map_7)
68 pool_outs = tf.concat([feature_map_1,feature_map_3,feature_map_5, feature_map_7], 3)
69 print("pool out:",pool_outs)
70 pool_flat = tf.reshape(pool_outs,[-1,filter_nums])
71 print("pool flat:",pool_flat)
72 #full connect layers
73 h_drop = tf.nn.dropout(pool_flat,keep_prob)
74
75 full_W = tf.Variable(tf.truncated_normal([4,n_class],stddev=0.1 ,dtype=tf.float32))
76 full_B = tf.Variable(tf.constant(0.1,dtype=tf.float32))
77
78 outputs = tf.nn.softmax(tf.matmul(h_drop,full_W)+full_B)
79 pred = tf.argmax(outputs,1)
80 loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=outputs,labels=labels))
81 acc = tf.reduce_mean(tf.cast(tf.equal(pred,tf.argmax(labels,1)),tf.float32))
82 train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
83 sess = tf.Session()
84 sess.run(tf.global_variables_initializer())
85
86 train_x, train_y, words_dict, labels_dict, all_len = data_helper.load("../data/train.txt",1000,s_limit_len)
87 test_x,test_y, testlen = data_helper.load_test_data("../data/test_filter_2.txt",s_limit_len,words_dict,labels_dict)
88
89 def test(sess,acc,pred,tes_x,test_y):
90 y_pred, acc_test = sess.run([pred,acc],feed_dict={inputs:test_x,labels:test_y,keep_prob:1.0})
91 y_true = sess.run(tf.argmax(test_y,1))
92
93 print(metrics.classification_report(y_true,y_pred))
94
95
96
97 for epoach in range(1000):
98 iter = 0
99 test(sess,acc,pred,test_x,test_y)
100 batchs = data_helper.get_batch(64,train_x,train_y,all_len)
101 for [batch_x,batch_y,batch_len] in batchs:
102 _,loss_,acc_,pred_list = sess.run([train_op,loss,acc,pred],feed_dict={inputs:batch_x, labels:batch_y,keep_prob:0.5})
103 if iter % 50 == 0:
104 print(pred_list[:15])
105 print("epoach-{0} iter-{1} loss:{2} acc-{3}".format(epoach,iter,loss_,acc_))
106 # print(acc_)
107 iter += 1