• 2.5BatchNormalzation


    BatchNormalzation是一种解决深度神经网络层数太多,而没有办法有效前向传递的问题,因为每层的输出值都会有不同的均值和方差,所以输出数据的分布也不一样。

    如果对于输入的X*W本身得到的值通过tanh激活函数已经输出为1,在通过下一层的神经元之后结果并没有什么改变,因为该值已经很大(神经网络过敏感问题)

    对于这种情况我们可以首先做一些批处理正则化

     

      可以看出没有BN的时候,每层的值迅速全部都变为0,也可以说,所有的神经元迅速都已经死去了,而有BN,relu之后,每层的值都能有一个比较好的分布效果

    大部分神经元都还活着

    relu下的误差对比,no BN的曲线是没有的,很快消失

    如果改为tanh,误差变化如下

    """
    Build two networks.
    1. Without batch normalization
    2. With batch normalization
    
    Run tests on these two networks.
    """
    
    #  Batch Normalization
    
    import numpy as np
    import tensorflow as tf
    import matplotlib.pyplot as plt
    
    
    ACTIVATION = tf.nn.relu #tf.nn.tanh 
    N_LAYERS = 7
    N_HIDDEN_UNITS = 30
    
    
    def fix_seed(seed=1):
        # reproducible
        np.random.seed(seed)
        tf.set_random_seed(seed)
    
    
    def plot_his(inputs, inputs_norm):
        # plot histogram for the inputs of every layer
        for j, all_inputs in enumerate([inputs, inputs_norm]):
            for i, input in enumerate(all_inputs):
                plt.subplot(2, len(all_inputs), j*len(all_inputs)+(i+1))
                plt.cla()
                if i == 0:
                    the_range = (-7, 10)
                else:
                    the_range = (-1, 1)
                plt.hist(input.ravel(), bins=15, range=the_range, color='#FF5733')
                plt.yticks(())
                if j == 1:
                    plt.xticks(the_range)
                else:
                    plt.xticks(())
                ax = plt.gca()
                ax.spines['right'].set_color('none')
                ax.spines['top'].set_color('none')
            plt.title("%s normalizing" % ("Without" if j == 0 else "With"))
        plt.draw()
        plt.pause(0.01)
    
    
    def built_net(xs, ys, norm):
        def add_layer(inputs, in_size, out_size, activation_function=None, norm=False):
            # weights and biases (bad initialization for this case)
            Weights = tf.Variable(tf.random_normal([in_size, out_size], mean=0., stddev=1.))
            biases = tf.Variable(tf.zeros([1, out_size]) + 0.1)
    
            # fully connected product
            Wx_plus_b = tf.matmul(inputs, Weights) + biases
    
            # normalize fully connected product
            if norm:
                # Batch Normalize
                fc_mean, fc_var = tf.nn.moments(
                    Wx_plus_b,
                    axes=[0],   # the dimension you wanna normalize, here [0] for batch
                                # for image, you wanna do [0, 1, 2] for [batch, height, width] but not channel
                )
                scale = tf.Variable(tf.ones([out_size]))
                shift = tf.Variable(tf.zeros([out_size]))
                epsilon = 0.001
    
                # apply moving average for mean and var when train on batch
                ema = tf.train.ExponentialMovingAverage(decay=0.5)
                def mean_var_with_update():
                    ema_apply_op = ema.apply([fc_mean, fc_var])
                    with tf.control_dependencies([ema_apply_op]):
                        return tf.identity(fc_mean), tf.identity(fc_var)
                mean, var = mean_var_with_update()
                #all parmeter batch normalization
                Wx_plus_b = tf.nn.batch_normalization(Wx_plus_b, mean, var, shift, scale, epsilon)
                # similar with this two steps:
                # Wx_plus_b = (Wx_plus_b - fc_mean) / tf.sqrt(fc_var + 0.001)
                # Wx_plus_b = Wx_plus_b * scale + shift
    
            # activation
            if activation_function is None:
                outputs = Wx_plus_b
            else:
                outputs = activation_function(Wx_plus_b)
    
            return outputs
    
        
        
        fix_seed(1)
    
        if norm:
            # BN for the first input
            fc_mean, fc_var = tf.nn.moments(
                xs,
                axes=[0],
            )
            scale = tf.Variable(tf.ones([1]))
            shift = tf.Variable(tf.zeros([1]))
            epsilon = 0.001
            # apply moving average for mean and var when train on batch
            ema = tf.train.ExponentialMovingAverage(decay=0.5)
            def mean_var_with_update():
                ema_apply_op = ema.apply([fc_mean, fc_var])
                with tf.control_dependencies([ema_apply_op]):
                    return tf.identity(fc_mean), tf.identity(fc_var)
            mean, var = mean_var_with_update()
            xs = tf.nn.batch_normalization(xs, mean, var, shift, scale, epsilon)
    
        # record inputs for every layer
        layers_inputs = [xs]
    
        # build hidden layers
        for l_n in range(N_LAYERS):
            layer_input = layers_inputs[l_n]
            in_size = layers_inputs[l_n].get_shape()[1].value
    
            output = add_layer(
                layer_input,    # input
                in_size,        # input size
                N_HIDDEN_UNITS, # output size
                ACTIVATION,     # activation function
                norm,           # normalize before activation
            )
            layers_inputs.append(output)    # add output for next run
    
        # build output layer
        prediction = add_layer(layers_inputs[-1], 30, 1, activation_function=None)
    
        cost = tf.reduce_mean(tf.reduce_sum(tf.square(ys - prediction), reduction_indices=[1]))
        train_op = tf.train.GradientDescentOptimizer(0.001).minimize(cost)
        return [train_op, cost, layers_inputs]
    
    
    
    # make up data
    fix_seed(1)
    x_data = np.linspace(-7, 10, 2500)[:, np.newaxis]
    np.random.shuffle(x_data)
    noise = np.random.normal(0, 8, x_data.shape)
    y_data = np.square(x_data) - 5 + noise
    
    # plot input data
    plt.scatter(x_data, y_data)
    plt.show()
    
    xs = tf.placeholder(tf.float32, [None, 1])  # [num_samples, num_features]
    ys = tf.placeholder(tf.float32, [None, 1])
    #create 2 neuralnetwork
    train_op, cost, layers_inputs = built_net(xs, ys, norm=False)   # without BN
    train_op_norm, cost_norm, layers_inputs_norm = built_net(xs, ys, norm=True) # with BN
    
    
    
    
    sess = tf.Session()
    if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
        init = tf.initialize_all_variables()
    else:
        init = tf.global_variables_initializer()
    sess.run(init)
    
    # record cost
    cost_his = []
    cost_his_norm = []
    record_step = 5
    
    plt.ion()
    plt.figure(figsize=(7, 3))
    for i in range(250):
        if i % 50 == 0:
            # plot histogram
            all_inputs, all_inputs_norm = sess.run([layers_inputs, layers_inputs_norm], feed_dict={xs: x_data, ys: y_data})
            plot_his(all_inputs, all_inputs_norm)
    
        # train on batch
        sess.run([train_op, train_op_norm], feed_dict={xs: x_data[i*10:i*10+10], ys: y_data[i*10:i*10+10]})
    
        if i % record_step == 0:
            # record cost
            cost_his.append(sess.run(cost, feed_dict={xs: x_data, ys: y_data}))
            cost_his_norm.append(sess.run(cost_norm, feed_dict={xs: x_data, ys: y_data}))
    
    plt.ioff()
    plt.figure()
    plt.plot(np.arange(len(cost_his))*record_step, np.array(cost_his), label='no BN')     # no norm
    plt.plot(np.arange(len(cost_his))*record_step, np.array(cost_his_norm), label='BN')   # norm
    plt.legend()
    plt.show()

    数据集图示 

  • 相关阅读:
    Vue--Vue-CLI服务命令、项目结构、自定义配置
    Vue--Vue-CLI创建项目
    Git
    前后端vue和django配置
    Django REST framework(DRF)
    Vue各种配置
    Vue组件
    vue项目搭建
    VUE基础
    Django中间件详解
  • 原文地址:https://www.cnblogs.com/jackchen-Net/p/8126739.html
Copyright © 2020-2023  润新知