• keras—多层感知器MLP—IMDb情感分析


      1 import urllib.request
      2 import os
      3 import tarfile
      4 from keras.datasets import imdb
      5 from keras.preprocessing import sequence
      6 from keras.preprocessing.text import Tokenizer
      7 import re
      8 def rm_tags(text):
      9     re_tag=re.compile(r'<[^>]+>')
     10     return re_tag.sub('',text)
     11 def read_files(filetype):
     12     path="C:/Users/admin/.keras/aclImdb/"
     13     file_list=[]
     14     positive_path=path+filetype+"/pos/"
     15     for f in os.listdir(positive_path):
     16         file_list+=[positive_path+f]
     17     negative_path=path+filetype+"/pos/"
     18     for f in os.listdir(negative_path):
     19         file_list+=[negative_path+f]
     20     print('read',filetype,'files:',len(file_list))
     21     all_labels=([1]*12500+[0]*12500)
     22     all_texts=[]
     23     for fi in file_list:
     24         with open(fi,encoding='utf8') as file_input:
     25             all_texts+=[rm_tags(" ".join(file_input.readlines()))]
     26     return all_labels,all_texts
     27 y_train,train_text=read_files("train")
     28 y_test,test_text=read_files("test")
     29 print(train_text[0])
     30 print(y_train[0])
     31 token=Tokenizer(num_words=2000)
     32 token.fit_on_texts(train_text)
     33 print(token.document_count)
     34 print(token.word_index)
     35 x_train_seq=token.texts_to_sequences(train_text)
     36 x_test_seq=token.texts_to_sequences(test_text)
     37 print(train_text[0])
     38 print(x_train_seq[0])
     39 x_train=sequence.pad_sequences(x_train_seq,maxlen=100)
     40 x_test=sequence.pad_sequences(x_test_seq,maxlen=100)
     41 print('before pad_sequences lenfth=',len(x_train_seq[113]))
     42 print(x_train_seq[113])
     43 print('after pad_sequences lenfth=',len(x_train[113]))
     44 print(x_train[113])
     45 from keras.models import Sequential
     46 from keras.layers import Dense,Dropout,Flatten,Activation
     47 from keras.layers.embeddings import Embedding
     48 model=Sequential()
     49 model.add(Embedding(output_dim=32,
     50                  input_dim=2000,
     51                  input_length=100))
     52 model.add(Dropout(0.2))
     53 #model.add(SimpleRNN(units=16))
     54 model.add(Flatten())
     55 model.add(Dense(units=256,
     56                 activation='relu'))
     57 model.add(Dropout(0.35))
     58 model.add(Dense(units=1,
     59                 activation='sigmoid'))
     60 print(model.summary())
     61 model.compile(loss='binary_crossentropy',
     62               optimizer='adam',
     63               metrics=['accuracy'])
     64 train_history=model.fit(x=x_train,y=y_train,batch_size=100,
     65                         epochs=10,verbose=2,
     66                         validation_split=0.2)
     67 scores=model.evaluate(x_test,y_test,verbose=1)
     68 print('accuracy',scores[1])
     69 predict=model.predict_classes(x_test)
     70 print("prediction[:10]",predict[:10])
     71 predict_classes=predict.reshape(-1)
     72 print(predict_classes[:10])
     73 SentimentDict = {1: '正面的', 0: '负面的'}
     74 def display_test_Sentiment(i):
     75     print(test_text[i])
     76     print('label真实值:', SentimentDict[y_test[i]],
     77           '预测结果:', SentimentDict[predict_classes[i]])
     78 display_test_Sentiment(12502)
     79 input_text='''
     80 I saw this film with my 6-year-old a couple weeks ago. While there's plenty about which to gripe, here's one of 
     81 my biggest problems: I can't stand this constant CGI-heavy everything-must-be-a-sequel-or- a- remake era of film
     82 making. It's making movie makers lazy.
     83 '''
     84 input_seq=token.texts_to_sequences([input_text])
     85 len(input_seq[0])
     86 print(input_seq[0])
     87 pad_input_seq=sequence.pad_sequences(input_seq,maxlen=100)
     88 len(pad_input_seq[0])
     89 print(pad_input_seq[0])
     90 predict_result=model.predict_classes(pad_input_seq)
     91 print(predict_result)
     92 print(predict_result[0][0])
     93 print(SentimentDict[predict_result[0][0]])
     94 def predict_review(input_text):
     95     input_seq=token.texts_to_sequences([input_text])
     96     pad_input_seq=sequence.pad_sequences(input_seq,maxlen=100)
     97     predict_result=model.predict_classes(pad_input_seq)
     98     print(SentimentDict[predict_result[0][0]])
     99 
    100 predict_review('''
    101 They poured on the whole "LeFou is gay" thing a bit thick for my taste. It was the only thing that added levity to the movie (despite how much fun it should have been already), but it seemed a bit cheap. I'm not going to apologize for wanting more for my LGBTQ characters than to be just the comic relief.
    102 ''')

    验证的准确率为0问题待解决

    萍水相逢逢萍水,浮萍之水水浮萍!
  • 相关阅读:
    java——注解Annotation
    java——maven
    sklearn——回归评估指标
    java——单例模式
    java——极简handler机制
    java——为什么要有接口?和抽象类有什么不一样?
    java——cmd命令编译带包名的源程序
    [loj 2478][luogu P4843]「九省联考 2018」林克卡特树
    「线性基」学习小结
    FOI 冬令营 Day6
  • 原文地址:https://www.cnblogs.com/AIBigTruth/p/9773244.html
Copyright © 2020-2023  润新知