• tf.layers.batch_normalization


    来源:https://zhuanlan.zhihu.com/p/82354021

    Batch Normalization (BN) 的定义

    给定 [公式] 维向量 [公式] ,在每个特征上(即针对每一个维度而言)独立地减均值、除以标准差

    [公式]

    深度学习中,以 batch 为单位进行操作,减去 batch 内样本均值,除以 batch 内样本的标准差,最后进行平移和缩放,其中缩放参数 [公式] 和平移参数 [公式] 都是可学习的参数。

    [公式]

    tf.layers.batch_normalization

    基本参数

    tf.layers.batch_normalization(
        inputs,
        axis=-1,
        momentum=0.99,
        epsilon=0.001,
        center=True,
        scale=True,
        beta_initializer=tf.zeros_initializer(),
        gamma_initializer=tf.ones_initializer(),
        moving_mean_initializer=tf.zeros_initializer(),
        moving_variance_initializer=tf.ones_initializer(),
        beta_regularizer=None,
        gamma_regularizer=None,
        beta_constraint=None,
        gamma_constraint=None,
        training=False,
        trainable=True,
        name=None,
        reuse=None,
        renorm=False,
        renorm_clipping=None,
        renorm_momentum=0.99,
        fused=None,
        virtual_batch_size=None,
        adjustment=None
    )

    如果只令 training=True,无法实现缩放参数 [公式] 和平移参数 [公式] 的学习(动态更新),需在源代码中加入如下设置。

    # 用于设置 tf.layers.batch_normalization 的 training 参数
    is_train = tf.placeholder_with_default(False, (), 'is_train')
    
    # 第一种设置方式:手动加入 sess.run()
    # tf.GraphKeys.UPDATE_OPS 返回图中 UPDATE_OPS 的名字集合
    # UPDATE_OPS 维护一个需要在每步训练之前运行的操作列表。
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(EPOCHS):
            for i in range(NUM_BATCHES):
                sess.run(
                    [tf.get_collection(tf.GraphKeys.UPDATE_OPS), optimizer],
                    feed_dict={
                        x: x_train[i*BATCH_SIZE:(i+1)*BATCH_SIZE-1,:],
                        y: y_train[i*BATCH_SIZE:(i+1)*BATCH_SIZE-1,:],
                        is_train: True})  # 可通过修改该参数打开或关闭 BN。
    
    # 第二种设置方式:利用 tf.control_dependencies 
    # 定义 optimizer 的时候加上 tf.control_dependencies()。
    # control_dependencies 是一种机制,可以将依赖项添加到 with 块中创建的任何操作。
    # 具体地说,确保在运行 with 块中定义的任何内容之前,
    # 先估计 control_dependencies 的参数中指定的内容。
    # 此处 optimizer 的定义依赖于 loss,进而依赖于参与 loss 计算的 BN。
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        optimizer = tf.train.AdamOptimizer().minimize(loss)

    完整示例

    利用最简单的全链接神经网络在 mnist 数据集上展示 tf.layers.batch_normalization 的两种动态更新缩放参数和平移参数的方法。

    第一种设置方式

    #! /home/lizhongding/anaconda3/envs/tfp/bin/python3.6
    # -*- coding: utf-8 -*-
    """
    7-layer fully connected neural network
    """
    
    __author__ = "lizhongding"
    
    import tensorflow as tf
    import numpy as np
    
    
    def one_hot_encoding(x, depth=10):
        length = len(x)
        coder = np.zeros([length, depth])
        for i in range(length):
            coder[i, x[i]] = 1
        return coder
    
    
    (x_train, y_train), (x_test, y_test) = 
        tf.keras.datasets.mnist.load_data()
    
    x_train = x_train.reshape(x_train.shape[0], -1) / 255
    x_test = x_test.reshape(x_test.shape[0], -1) / 255
    y_train = one_hot_encoding(y_train)
    y_test = one_hot_encoding(y_test)
    
    BATCH_SIZE = 64
    EPOCHS = 50
    NUM_BATCHES = x_train.shape[0] // BATCH_SIZE
    
    x = tf.placeholder(tf.float32, [None, 784], 'input_x')
    y = tf.placeholder(tf.int32, [None, 10], 'input_y')
    
    w1 = tf.Variable(tf.truncated_normal([784, 1024]))
    b1 = tf.Variable(tf.truncated_normal([1, 1024]))
    w2 = tf.Variable(tf.truncated_normal([1024, 512]))
    b2 = tf.Variable(tf.truncated_normal([1, 512]))
    w3 = tf.Variable(tf.truncated_normal([512, 512]))
    b3 = tf.Variable(tf.truncated_normal([1, 512]))
    w4 = tf.Variable(tf.truncated_normal([512, 512]))
    b4 = tf.Variable(tf.truncated_normal([1, 512]))
    w5 = tf.Variable(tf.truncated_normal([512, 256]))
    b5 = tf.Variable(tf.truncated_normal([1, 256]))
    w6 = tf.Variable(tf.truncated_normal([256, 64]))
    b6 = tf.Variable(tf.truncated_normal([1, 64]))
    w7 = tf.Variable(tf.truncated_normal([64, 10]))
    b7 = tf.Variable(tf.truncated_normal([1, 10]))
    
    is_train = tf.placeholder_with_default(False, (), 'is_train')
    
    h1 = tf.nn.leaky_relu(
        tf.layers.batch_normalization(
            tf.add(tf.matmul(x, w1), b1),
            training=is_train))
    h2 = tf.nn.leaky_relu(
        tf.layers.batch_normalization(
            tf.add(tf.matmul(h1, w2), b2),
            training=is_train))
    h3 = tf.nn.leaky_relu(
        tf.layers.batch_normalization(
            tf.add(tf.matmul(h2, w3), b3),
            training=is_train))
    h4 = tf.nn.leaky_relu(
        tf.layers.batch_normalization(
            tf.add(tf.matmul(h3, w4), b4),
            training=is_train))
    h5 = tf.nn.leaky_relu(
        tf.layers.batch_normalization(
            tf.add(tf.matmul(h4, w5), b5),
            training=is_train))
    h6 = tf.nn.leaky_relu(
        tf.layers.batch_normalization(
            tf.add(tf.matmul(h5, w6), b6),
            training=is_train))
    h7 = tf.nn.leaky_relu(
        tf.add(tf.matmul(h6, w7), b7))
    
    loss = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=tf.argmax(y, 1),
            logits=h7
    ))
    
    # with tf.control_dependencies(
    #         tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
    
    optimizer = tf.train.AdamOptimizer().minimize(loss)
    
    accuracy = tf.reduce_mean(tf.to_float(
        tf.equal(tf.argmax(y, 1), tf.argmax(h7, 1))))
    
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(EPOCHS):
            for i in range(NUM_BATCHES):
                sess.run(
                    [tf.get_collection(tf.GraphKeys.UPDATE_OPS), optimizer],
                    feed_dict={
                        x: x_train[i*BATCH_SIZE:(i+1)*BATCH_SIZE-1,:],
                        y: y_train[i*BATCH_SIZE:(i+1)*BATCH_SIZE-1,:],
                        is_train: True})  # 可通过修改该参数打开或关闭 BN。
            print("After Epoch {0:d}, the test accuracy is {1:.4f} ".
                  format(epoch + 1,
                         sess.run(accuracy,
                                  feed_dict={x: x_test, y: y_test})))
        print("Finished!")

    第二种设置方式

    #! /home/lizhongding/anaconda3/envs/tfp/bin/python3.6
    # -*- coding: utf-8 -*-
    """
    7-layer fully connected neural network
    """
    
    __author__ = "lizhongding"
    
    import tensorflow as tf
    import numpy as np
    
    
    def one_hot_encoding(x, depth=10):
        length = len(x)
        coder = np.zeros([length, depth])
        for i in range(length):
            coder[i, x[i]] = 1
        return coder
    
    
    (x_train, y_train), (x_test, y_test) = 
        tf.keras.datasets.mnist.load_data()
    
    x_train = x_train.reshape(x_train.shape[0], -1) / 255
    x_test = x_test.reshape(x_test.shape[0], -1) / 255
    y_train = one_hot_encoding(y_train)
    y_test = one_hot_encoding(y_test)
    
    BATCH_SIZE = 64
    EPOCHS = 50
    NUM_BATCHES = x_train.shape[0] // BATCH_SIZE
    
    x = tf.placeholder(tf.float32, [None, 784], 'input_x')
    y = tf.placeholder(tf.int32, [None, 10], 'input_y')
    
    w1 = tf.Variable(tf.truncated_normal([784, 1024]))
    b1 = tf.Variable(tf.truncated_normal([1, 1024]))
    w2 = tf.Variable(tf.truncated_normal([1024, 512]))
    b2 = tf.Variable(tf.truncated_normal([1, 512]))
    w3 = tf.Variable(tf.truncated_normal([512, 512]))
    b3 = tf.Variable(tf.truncated_normal([1, 512]))
    w4 = tf.Variable(tf.truncated_normal([512, 512]))
    b4 = tf.Variable(tf.truncated_normal([1, 512]))
    w5 = tf.Variable(tf.truncated_normal([512, 256]))
    b5 = tf.Variable(tf.truncated_normal([1, 256]))
    w6 = tf.Variable(tf.truncated_normal([256, 64]))
    b6 = tf.Variable(tf.truncated_normal([1, 64]))
    w7 = tf.Variable(tf.truncated_normal([64, 10]))
    b7 = tf.Variable(tf.truncated_normal([1, 10]))
    
    is_train = tf.placeholder_with_default(False, (), 'is_train')
    
    h1 = tf.nn.leaky_relu(
        tf.layers.batch_normalization(
            tf.add(tf.matmul(x, w1), b1),
            training=is_train))
    h2 = tf.nn.leaky_relu(
        tf.layers.batch_normalization(
            tf.add(tf.matmul(h1, w2), b2),
            training=is_train))
    h3 = tf.nn.leaky_relu(
        tf.layers.batch_normalization(
            tf.add(tf.matmul(h2, w3), b3),
            training=is_train))
    h4 = tf.nn.leaky_relu(
        tf.layers.batch_normalization(
            tf.add(tf.matmul(h3, w4), b4),
            training=is_train))
    h5 = tf.nn.leaky_relu(
        tf.layers.batch_normalization(
            tf.add(tf.matmul(h4, w5), b5),
            training=is_train))
    h6 = tf.nn.leaky_relu(
        tf.layers.batch_normalization(
            tf.add(tf.matmul(h5, w6), b6),
            training=is_train))
    h7 = tf.nn.leaky_relu(
        tf.add(tf.matmul(h6, w7), b7))
    
    loss = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=tf.argmax(y, 1),
            logits=h7
    ))
    
    with tf.control_dependencies(
            tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        optimizer = tf.train.AdamOptimizer().minimize(loss)
    
    accuracy = tf.reduce_mean(tf.to_float(
        tf.equal(tf.argmax(y, 1), tf.argmax(h7, 1))))
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(EPOCHS):
            for i in range(NUM_BATCHES):
                sess.run(optimizer, feed_dict={
                    x: x_train[i*BATCH_SIZE:(i+1)*BATCH_SIZE-1,:],
                    y: y_train[i*BATCH_SIZE:(i+1)*BATCH_SIZE-1,:],
                    is_train: True})  # 可通过修改该参数打开或关闭 BN。
            print("After Epoch {0:d}, the test accuracy is {1:.4f} ".
                  format(epoch + 1,
                         sess.run(accuracy,
                                  feed_dict={x: x_test, y: y_test})))
        print("Finished!")

    参考链接

    编辑于 2019-09-13
  • 相关阅读:
    webuploader 上传文件参数设置
    数据库报插入异常
    System.Threading.Timer 定时器的用法
    JSON 获取属性值的方法
    JAVA Socket 编程学习笔记(二)
    JAVA Socket 编程学习笔记(一)
    JAVA 判断Socket 远程端是否断开连接
    JAVA 多线程和并发学习笔记(四)
    JAVA 多线程和并发学习笔记(三)
    巧用transform实现HTML5 video标签视频比例拉伸
  • 原文地址:https://www.cnblogs.com/yibeimingyue/p/15110499.html
Copyright © 2020-2023  润新知