def do_rnn_wordbag(trainX, testX, trainY, testY): y_test=testY #trainX = pad_sequences(trainX, maxlen=100, value=0.) #testX = pad_sequences(testX, maxlen=100, value=0.) # Converting labels to binary vectors trainY = to_categorical(trainY, nb_classes=2) testY = to_categorical(testY, nb_classes=2) # Network building net = tflearn.input_data([None, 100]) net = tflearn.embedding(net, input_dim=1000, output_dim=128) net = tflearn.lstm(net, 128, dropout=0.1) net = tflearn.fully_connected(net, 2, activation='softmax') net = tflearn.regression(net, optimizer='adam', learning_rate=0.005, loss='categorical_crossentropy') # Training model = tflearn.DNN(net, tensorboard_verbose=0) model.fit(trainX, trainY, validation_set=0.1, show_metric=True, batch_size=1,run_id="uba",n_epoch=10) y_predict_list = model.predict(testX) #print y_predict_list y_predict = [] for i in y_predict_list: #print i[0] if i[0] >= 0.5: y_predict.append(0) else: y_predict.append(1) print(classification_report(y_test, y_predict)) print metrics.confusion_matrix(y_test, y_predict) print y_train print "ture" print y_test print "pre" print y_predict
传统方法贝叶斯:
def do_nb(x_train, x_test, y_train, y_test): gnb = GaussianNB() gnb.fit(x_train,y_train) y_pred=gnb.predict(x_test) print(classification_report(y_test, y_pred)) print metrics.confusion_matrix(y_test, y_pred)
传统方法hmm:
def do_hmm(trainX, testX, trainY, testY): T=-580 N=2 lengths=[1] X=[[0]] print len(trainX) for i in trainX: z=[] for j in i: z.append([j]) #print z #X.append(z) X=np.concatenate([X,np.array(z)]) lengths.append(len(i)) #print lengths #print X.shape remodel = hmm.GaussianHMM(n_components=N, covariance_type="full", n_iter=100) remodel.fit(X, lengths) y_predict=[] for i in testX: z=[] for j in i: z.append([j]) y_pred=remodel.score(z) print y_pred if y_pred < T: y_predict.append(1) else: y_predict.append(0) y_predict=np.array(y_predict) print(classification_report(testY, y_predict)) print metrics.confusion_matrix(testY, y_predict) print testY print y_predict