• 使用CNN做文本分类——将图像2维卷积换成1维


    使用CNN做文本分类
    
        from __future__ import division, print_function, absolute_import
        import tensorflow as tf
        import tflearn
        from tflearn.layers.core import input_data, dropout, fully_connected
        from tflearn.layers.conv import conv_1d, global_max_pool
        from tflearn.layers.merge_ops import merge
        from tflearn.layers.estimator import regression
        from tflearn.data_utils import to_categorical, pad_sequences
        from tflearn.datasets import imdb
        import pickle
        import numpy as np
        """
        还是加载imdb.pkl数据
        """
        train, test, _ = imdb.load_data(path='imdb.pkl', n_words=10000,
                                        valid_portion=0.1)
        trainX, trainY = train
        testX, testY = test
        """
        转化为固定长度的向量,这里固定长度为100
        """
        trainX = pad_sequences(trainX, maxlen=100, value=0.)
        testX = pad_sequences(testX, maxlen=100, value=0.)
        """
        二值化向量
        """
        trainY = to_categorical(trainY, nb_classes=2)
        testY = to_categorical(testY, nb_classes=2)
        """
        构建卷积神经网络,这里卷积神经网网络为1d卷积
        """
        network = input_data(shape=[None, 100], name='input')
        network = tflearn.embedding(network, input_dim=10000, output_dim=128)
        branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2")
        branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2")
        branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2")
        network = merge([branch1, branch2, branch3], mode='concat', axis=1)
        network = tf.expand_dims(network, 2)
        network = global_max_pool(network)
        network = dropout(network, 0.5)
        network = fully_connected(network, 2, activation='softmax')
        network = regression(network, optimizer='adam', learning_rate=0.001,
                             loss='categorical_crossentropy', name='target')
        """
        训练开始
        """
        model = tflearn.DNN(network, tensorboard_verbose=0)
        model.fit(trainX, trainY, n_epoch = 1, shuffle=True, validation_set=(testX, testY), show_metric=True, batch_size=32)
        """
        模型保存
        """
        model.save("cnn.model")
        """
        做测试使用
        """
        test=np.linspace(1,101,100).reshape(1,100)
        print("测试结果:",model.predict(test))
    
    模型训练结果以及模型保存情况:
    
        Training Step: 697  | total loss: 0.40838 | time: 79.960s
        | Adam | epoch: 001 | loss: 0.40838 - acc: 0.8247 -- iter: 22304/22500
        Training Step: 698  | total loss: 0.39128 | time: 80.112s
        | Adam | epoch: 001 | loss: 0.39128 - acc: 0.8329 -- iter: 22336/22500
        Training Step: 699  | total loss: 0.38896 | time: 80.298s
        | Adam | epoch: 001 | loss: 0.38896 - acc: 0.8402 -- iter: 22368/22500
        Training Step: 700  | total loss: 0.39468 | time: 80.456s
        | Adam | epoch: 001 | loss: 0.39468 - acc: 0.8343 -- iter: 22400/22500
        Training Step: 701  | total loss: 0.39380 | time: 80.640s
        | Adam | epoch: 001 | loss: 0.39380 - acc: 0.8353 -- iter: 22432/22500
        Training Step: 702  | total loss: 0.38980 | time: 80.787s
        | Adam | epoch: 001 | loss: 0.38980 - acc: 0.8392 -- iter: 22464/22500
        Training Step: 703  | total loss: 0.39020 | time: 80.970s
        | Adam | epoch: 001 | loss: 0.39020 - acc: 0.8397 -- iter: 22496/22500
        Training Step: 704  | total loss: 0.38543 | time: 82.891s
        | Adam | epoch: 001 | loss: 0.38543 - acc: 0.8370 | val_loss: 0.44625 - val_acc: 0.7880 -- iter: 22500/22500
        --
        测试结果: [[ 0.77064246  0.2293576 ]]
    
    
    
    
    加载模型并做预测:
    
        import tensorflow as tf
        import numpy as np
        import tflearn
        from tflearn.layers.core import input_data, dropout, fully_connected
        from tflearn.layers.conv import conv_1d, global_max_pool
        from tflearn.layers.merge_ops import merge
        from tflearn.layers.estimator import regression
        """
        跟训练模型的网络结构一样
        """
        network = input_data(shape=[None, 100], name='input')
        network = tflearn.embedding(network, input_dim=10000, output_dim=128)
        branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2")
        branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2")
        branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2")
        network = merge([branch1, branch2, branch3], mode='concat', axis=1)
        network = tf.expand_dims(network, 2)
        network = global_max_pool(network)
        network = dropout(network, 0.5)
        network = fully_connected(network, 2, activation='softmax')
        network = regression(network, optimizer='adam', learning_rate=0.001,
                             loss='categorical_crossentropy', name='target')
        """
        加载模型做预测
        """
        model = tflearn.DNN(network)
        model.load("cnn.model")
        test=np.linspace(1,101,100).reshape(1,100)
        # Predict  [[ 0.7725634   0.22743654]]
        prediction = model.predict(test)
        print("模型预测结果",prediction)
    
    
    
    结果:
    
        2017-10-15 19:35:14.940689: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use FMA instructions, but these are available on your machine and could speed up CPU computations.
        模型预测结果 [[ 0.77064246  0.2293576 ]]
        Process finished with exit code 0
    
    
    
    基于tflearn高阶api怎么做文本分类基本上完成
  • 相关阅读:
    SpringMvc 中绑定 checkbox 标签到form 中的List
    Oracle 误删除 DBF 补救措施
    枚举的作用与场景
    MySql 游标
    IDEA 常用设置
    HDFS源码分析数据块复制之PendingReplicationBlocks
    HDFS源码分析之编辑日志编辑相关双缓冲区EditsDoubleBuffer
    HDFS源码分析EditLog之获取编辑日志输入流
    HDFS源码分析EditLog之读取操作符
    HDFS源码分析之EditLogTailer
  • 原文地址:https://www.cnblogs.com/bonelee/p/7908358.html
Copyright © 2020-2023  润新知