• BP算法在minist数据集上的简单实现


    BP算法在minist上的简单实现

    数据:http://yann.lecun.com/exdb/mnist/

    参考:blog,blog2,blog3,tensorflow

    推导:http://www.cnblogs.com/yueshangzuo/p/8025157.html

    基本实现

    import struct
    import random
    import numpy as np
    from math import sqrt
    
    class Data:
        def __init__(self):
            print 'parameter initializing...'
            self.num_train= 50000
            self.num_confirm=10000
            self.num_test= 10000
            self.node_in=28*28
            self.node_out=10
            # need to adjust
            #epoch:8 hide_node:39 accuracy:0.9613
            #epoch:8 hide_node:44 accuracy:0.9612
            #epoch:8 hide_node:48 accuracy:0.9624
            #epoch:9 hide_node:48 accuracy:0.9648
            #epoch:10 hide_node:200 accuracy:0.9724
            self.epoch= 15
            self.node_hide= 30
            self.study_rate= 0.05
            self.error_limit= 1e-2
    
        def read_train_image(self,filename):
            print 'reading train-image data...'
            binfile=open(filename,'rb')
            buffer=binfile.read()
            index=0
            magic,num,rows,colums = struct.unpack_from('>IIII',buffer,index)  #>I:big-endian,unsigned int
            index+=struct.calcsize('IIII')
            for i in range(self.num_train):
                im=struct.unpack_from('784B',buffer,index)  #28*28=786,B unsigned char
                index+=struct.calcsize('784B')
                im=np.array(im)
                im=im.reshape(1,784)/255.0   #28*28-->1
                self.train_imag_list[i,:]=im
            j=0
            for i in range(self.num_train,self.num_train+self.num_confirm):
                im=struct.unpack_from('784B',buffer,index)
                index+=struct.calcsize('784B')
                im=np.array(im)
                im=im.reshape(1,784)/255.0
                self.confirm_imag_list[j,:]=im
                j=j+1
    
        def read_train_label(self,filename):
            print 'reading train-label data...'
            binfile=open(filename,'rb')
            buffer=binfile.read()
            index=0
            magic,num= struct.unpack_from('>II',buffer,index)
            index+=struct.calcsize('II')
            for i in range(self.num_train):
                lb=struct.unpack_from('B',buffer,index)
                index+=struct.calcsize('B')
                lb=int(lb[0])
                self.train_label_list[i,:]=lb
            j=0
            for i in range(self.num_train,self.num_train+self.num_confirm):
                lb=struct.unpack_from('B',buffer,index)
                index+=struct.calcsize('B')
                lb=int(lb[0])
                self.confirm_label_list[j,:]=lb
                j=j+1
    
    
        def read_test_image(self,filename):
            print 'reading test-image data...'
            binfile=open(filename,'rb')
            buffer=binfile.read()
            index=0
            magic,num,rows,colums = struct.unpack_from('>IIII',buffer,index)
            index+=struct.calcsize('IIII')
    
            for i in range(self.num_test):
                im=struct.unpack_from('784B',buffer,index)
                index+=struct.calcsize('784B')
                im=np.array(im)
                im=im.reshape(1,784)/256.0
                self.test_imag_list[i,:]=im
    
    
        def read_test_label(self,filename):
            print 'reading test-label data...'
            binfile=open(filename,'rb')
            buffer=binfile.read()
            index=0
            magic,num= struct.unpack_from('>II',buffer,index)
            index+=struct.calcsize('II')
    
            for i in range(self.num_test):
                lb=struct.unpack_from('B',buffer,index)
                index+=struct.calcsize('B')
                lb=int(lb[0])
                self.test_label_list[i,:]=lb
    
    
        def init_network(self):
            print 'network initializing...'
            self.train_imag_list=np.zeros((self.num_train,self.node_in))
            self.train_label_list=np.zeros((self.num_train,1))
            self.confirm_imag_list=np.zeros((self.num_confirm,self.node_in))
            self.confirm_label_list=np.zeros((self.num_confirm,1))
            self.test_imag_list=np.zeros((self.num_test,self.node_in))
            self.test_label_list=np.zeros((self.num_test,1))
    
            self.read_train_image('train-images.idx3-ubyte')
            self.read_train_label('train-labels.idx1-ubyte')
            self.read_test_image('t10k-images.idx3-ubyte')
            self.read_test_label('t10k-labels.idx1-ubyte')
    
            self.wjk=(np.random.rand(self.node_hide,self.node_out)-0.5)*2/sqrt(self.node_hide)
            self.wj0=(np.random.rand(self.node_out)-0.5)*2/sqrt(self.node_hide)
            self.wij=(np.random.rand(self.node_in,self.node_hide)-0.5)*2/sqrt(self.node_in)
            self.wi0=(np.random.rand(self.node_hide)-0.5)*2/sqrt(self.node_in)
    
    
        def sigmode(self,x):
                return 1.0/(1.0+np.exp(-x))
    
        def calc_yjzk(self,sample_i,imag_list):
            self.netj=np.dot(imag_list[sample_i],self.wij)+self.wi0
            self.yj=self.sigmode(self.netj)
    
            self.netk=np.dot(self.yj,self.wjk)+self.wj0
            self.zk=self.sigmode(self.netk)
    
        def calc_error(self):
            ans=0.0
            for sample_i in range(self.num_confirm):
                self.calc_yjzk(sample_i,self.confirm_imag_list)
                label_tmp=np.zeros(self.node_out)
                label_tmp[int(self.confirm_label_list[sample_i])]=1
                ans=ans+sum(np.square(label_tmp-self.zk)/2.0)
            # print ans
            return ans
    
        def training(self):
            print 'training model...'
            for epoch_i in range(self.epoch):
                for circle in range(self.num_train):
                    sample_i=np.random.randint(0,self.num_train)
                    #print 'debug epoch:%d sample:%d' % (epoch_i,sample_i)
                    #calc  error
                    #error_before=self.calc_error()
                    self.calc_yjzk(sample_i,self.train_imag_list)
                    #update weight hide->out
                    tmp_label=np.zeros(self.node_out)
                    tmp_label[int(self.train_label_list[sample_i])]=1
                    delta_k=(self.zk-tmp_label)*self.zk*(1-self.zk)
                    self.yj.shape=(self.node_hide,1)
                    delta_k.shape=(1,self.node_out)
                    self.wjk=self.wjk-self.study_rate*np.dot(self.yj,delta_k)
                    #update weight in->hide
                    self.yj=self.yj.T
                    delta_j=np.dot(delta_k,self.wjk.T)*self.yj*(1-self.yj)
                    tmp_imag=self.train_imag_list[sample_i]
                    tmp_imag.shape=(self.node_in,1)
                    self.wij=self.wij-self.study_rate*np.dot(tmp_imag,delta_j)
                    # calc error
                    # self.calc_yjzk(sample_i,self.train_imag_list)
                    # error_delta=error_before-self.calc_error()
                    # if np.abs(error_delta)<self.error_limit:
                    #     print 'debug break'
                    #     print error_delta
                    #     break
                #print 'error %d %.2f' % (epoch_i,self.calc_error())
    
        def testing(self):
            print 'testing...'
            num_right=0.0
            for sample_i in range(self.num_test):
                self.calc_yjzk(sample_i,self.test_imag_list)
                ans=self.zk.argmax()
                if ans==int(self.test_label_list[sample_i]):
                    num_right=num_right+1
            self.accuracy=num_right/self.num_test
            print 'accuracy: %.4f' % (self.accuracy*100) +'%'
    def main():
        data=Data()
        data.init_network()
        data.training()
        data.testing()
    
    if __name__=='__main__':
        main()
    

    注意

    1. 注意数据的编码格式,在数据来源网站最底下有指出,上面还展示了一些机器学习的经典模型在minist数据集上的错误率可供参考
    2. 权值合理的初始化,及迭代次数,学习速率,隐层节点数的设置可参考经验值
    3. 数据的归一化(防止sigmode函数溢出)
    4. 矩阵乘法时注意行列条件的满足
    5. 合理的epoch(即迭代次数,学习速率小的时候可以大一点的迭代次数,学习速率大的时候迭代次数取较小值)
    6. 确认合适的迭代次数后可去掉确认集,用全部的样本数据训练模型
    7. 隐层节点基本上越多越好

    调参脚本

    import ann
    
    f=open('best_parameter', 'a+')
    for e in range(10,40):
        for node in range(10,50):
            data=ann.Data()
            data.node_hide=node
            data.epoch=e
            data.init_network()
            data.training()
            data.testing()
            ans='circling to get best parameter----->epoch:%d hide_node:%d accuracy:%.4f
    ' % (e,node,data.accuracy)
            print ans
            f.write(ans)
    f.close()
    

    可迭代计算迭代次数和隐层节点的数目对准确率的影响,大致规律是在学习速率0.05时,迭代次数在10-15为宜,隐层节点30以上

    一些试验的结果如下:

    circling to get best parameter----->epoch:14 hide_node:43 accuracy:0.9656
    circling to get best parameter----->epoch:14 hide_node:44 accuracy:0.9651
    circling to get best parameter----->epoch:14 hide_node:45 accuracy:0.9638
    circling to get best parameter----->epoch:14 hide_node:46 accuracy:0.9641
    circling to get best parameter----->epoch:14 hide_node:47 accuracy:0.9649
    circling to get best parameter----->epoch:14 hide_node:48 accuracy:0.9651
    circling to get best parameter----->epoch:14 hide_node:49 accuracy:0.9671
    circling to get best parameter----->epoch:15 hide_node:46 accuracy:0.9661
    circling to get best parameter----->epoch:15 hide_node:47 accuracy:0.9660
    circling to get best parameter----->epoch:15 hide_node:48 accuracy:0.9650
    circling to get best parameter----->epoch:15 hide_node:49 accuracy:0.9655
    circling to get best parameter----->epoch:10 hide_node:100 accuracy:0.9685
    circling to get best parameter----->epoch:10 hide_node:200 accuracy:0.9724
    circling to get best parameter----->epoch:10 hide_node:300 accuracy:0.9718
    circling to get best parameter----->epoch:10 hide_node:1000 accuracy:0.9568
    

    Tensorflow实现

    import argparse
    
    # Import data
    from tensorflow.examples.tutorials.mnist import input_data
    
    import tensorflow as tf
    
    FLAGS = None
    
    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)
    
    def bias_variable(shape):
        initial = tf.constant(0.1, shape=shape)
        return tf.Variable(initial)
    
    def conv2d(x, W):
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
    
    def max_pool_2x2(x):
        return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                              strides=[1, 2, 2, 1], padding='SAME')
    
    
    def add_layer(inputs, in_size, out_size, activation_function=None):
        # add a fully collected layer
        Weights = weight_variable([in_size, out_size])
        biases = bias_variable([out_size])
        Wx_plus_b = tf.matmul(inputs, Weights) + biases
        if activation_function is None:
            outputs = Wx_plus_b
        else:
            outputs = activation_function(Wx_plus_b)
        return outputs
    
    
    def main(_):
        mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
    
    
    
        # reshape the input to have batch size, width, height, channel size
        x = tf.placeholder(tf.float32, [None, 784])
        x_image = tf.reshape(x, [-1, 28, 28, 1])
    
        # 5*5 patch size, input channel is 1, output channel is 32
        W_conv1 = weight_variable([5, 5, 1, 32])
    
        # bias, same size with the output channel
        b_conv1 = bias_variable([32])
    
        # the first convolutional layer with a max pooling layer
        h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
        h_pool1 = max_pool_2x2(h_conv1)
    
        #after pooling, we have a tensor with shape[-1, 14, 14, 32]
    
        # the weights and bias for the second layer, we will get 64 channels
        W_conv2 = weight_variable([5, 5, 32, 64])
        b_conv2 = bias_variable([64])
    
        # the second convolutional layer with a max pooling layer
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
        h_pool2 = max_pool_2x2(h_conv2)
    
        # after pooling, we have a tensor with shape[-1, 7, 7, 64]
    
        # add a fully connected layer with 1024 neurons and use relu as the activation function
        h_pool2_flat = tf.reshape(h_pool2, [-1,7*7*64])
        h_fc1 = add_layer(h_pool2_flat, 7*7*64, 1024, tf.nn.relu)
    
        # we add dropout for the fully connected layer to avoid overfitting
        keep_prob = tf.placeholder(tf.float32)
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
    
        # finally, the output layer
        y_conv = add_layer(h_fc1_drop, 1024, 10, None)
    
    
    
    
        # loss function and so on
        y_ = tf.placeholder(tf.float32, [None, 10])
        cross_entropy = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=y_conv, labels=y_))
        train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
        # start training, and we test our model every 100 steps
        sess = tf.InteractiveSession()
        sess.run(tf.initialize_all_variables())
        for i in range(10000):
            batch = mnist.train.next_batch(100)
            if i % 100 == 0:
                train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
                test_accuracy = accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})
                print("step %d, training accuracy %g, test accuracy %g" % (i, train_accuracy, test_accuracy))
    
            train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
    
    
    
    if __name__ == '__main__':
        parser = argparse.ArgumentParser()
    
        # modify the dir path to your own dataset
        parser.add_argument('--data_dir', type=str, default='/tmp/mnist',
                            help='Directory for storing data')
        FLAGS = parser.parse_args()
        tf.app.run()
    

    需要配置tensorflow和python3.+的运行环境

    结果如下

    step 0, training accuracy 0.06, test accuracy 0.0892
    step 100, training accuracy 0.86, test accuracy 0.8692
    step 200, training accuracy 0.97, test accuracy 0.9207
    step 300, training accuracy 0.92, test accuracy 0.9403
    step 400, training accuracy 0.95, test accuracy 0.9485
    step 500, training accuracy 0.91, test accuracy 0.9522
    step 600, training accuracy 0.97, test accuracy 0.9565
    step 700, training accuracy 0.97, test accuracy 0.9622
    step 800, training accuracy 0.96, test accuracy 0.9638
    step 900, training accuracy 0.98, test accuracy 0.9687
    step 1000, training accuracy 0.97, test accuracy 0.9703
    

    有任何环境配置的问题请联系,欢迎指出错误

  • 相关阅读:
    18.3.2从Class上获取信息(属性)
    18.3.2从Class上获取信息(方法)
    18.3.2从Class上获取信息(构造器)
    18.3.1获得Class对象
    ClassLoader.loadClass和Class.forName的区别
    java线程池原理
    如何理解「不要用战术上的勤奋掩盖战略上的懒惰」?
    (转)生产者/消费者问题的多种Java实现方式
    Machine learning system design---Error analysis
    Machine learning system design---prioritizing what to work on
  • 原文地址:https://www.cnblogs.com/yueshangzuo/p/8032300.html
Copyright © 2020-2023  润新知