• 两层神经网络实战


    1 概述

    两层全连接网络。
    代码实现环境:python3

    2 数据处理

    2.1 加载数据集

    将原始数据集放入“data/cifar10/”文件夹下。

    ### 加载cifar10数据集
    import os
    import pickle
    import random
    import numpy as np
    import matplotlib.pyplot as plt
    
    def load_CIFAR_batch(filename):
        """
        cifar-10数据集是分batch存储的,这是载入单个batch
    
        @参数 filename: cifar文件名
        @r返回值: X, Y: cifar batch中的 data 和 labels
        """
    
        with open(filename,'rb') as f:
            datadict=pickle.load(f,encoding='bytes')
    
            X=datadict[b'data']
            Y=datadict[b'labels']
            
            X=X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
            Y=np.array(Y)
            
            return X, Y
    
    def load_CIFAR10(ROOT):
        """
        读取载入整个 CIFAR-10 数据集
    
        @参数 ROOT: 根目录名
        @return: X_train, Y_train: 训练集 data 和 labels
                 X_test, Y_test: 测试集 data 和 labels
        """
    
        xs=[]
        ys=[]
    
        for b in range(1,6):
            f=os.path.join(ROOT, "data_batch_%d" % (b, ))
            X, Y=load_CIFAR_batch(f)
            xs.append(X)
            ys.append(Y)
    
        X_train=np.concatenate(xs)
        Y_train=np.concatenate(ys)
    
        del X, Y
    
        X_test, Y_test=load_CIFAR_batch(os.path.join(ROOT, "test_batch"))
    
        return X_train, Y_train, X_test, Y_test
      
      
    X_train, y_train, X_test, y_test = load_CIFAR10('data/cifar10/') 
    
    print(X_train.shape)
    print(y_train.shape)
    print(X_test.shape)
    print( y_test.shape)
    

    运行结果如下:

    (50000, 32, 32, 3)
    (50000,)
    (10000, 32, 32, 3)
    (10000,)
    

    2.2 划分数据集

    将加载好的数据集划分为训练集,验证集,以及测试集。

    ## 划分训练集,验证集,测试集
    
    num_train = 49000
    num_val = 1000
    num_test = 1000
    
    # Validation set
    mask = range(num_train, num_train + num_val)
    X_val = X_train[mask]
    y_val = y_train[mask]
    
    # Train set
    mask = range(num_train)
    X_train = X_train[mask]
    y_train = y_train[mask]
    
    # Test set
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]
    
    # Preprocessing: reshape the images data into rows
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    
    print('Train data shape: ', X_train.shape)
    print('Validation data shape: ', X_val.shape)
    print('Test data shape: ', X_test.shape)
    

    运行结果如下:

    Train data shape:  (49000, 3072)
    Validation data shape:  (1000, 3072)
    Test data shape:  (1000, 3072)
    

    2.3 归一化

    将划分好的数据集归一化,即:所有划分好的数据集减去均值图像。

    # Processing: subtract the mean images
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    

    3 两层神经网络的分类器

    3.1 定义两层的神经网络

    class TwoLayerNet(object):
        def __init__(self, input_size, hidden_size, output_size, std = 1e-4):
            """
            Initialize the model weights
            W1: First layer weights; has shape (D, H)
            b1: First layer biases; has shape (H,)
            W2: Second layer weights; has shape (H, C)
            b2: Second layer biases; has shape (C,)
            Inputs:
            - input_size: The dimension D of the input data.
            - hidden_size: The number of neurons H in the hidden layer.
            - output_size: The number of classes C.
            """
            self.params = {}
            self.params['W1'] = std * np.random.randn(input_size, hidden_size)
            self.params['b1'] = np.zeros(hidden_size)
            self.params['W2'] = std * np.random.randn(hidden_size, output_size)
            self.params['b2'] = np.zeros(output_size)
            
        def loss(self, X, y, reg = 0.0):
            """
            Two-layer network loss function, vectorized implementation (without loops).
            Inputs:
            - X: A numpy array of shape (num_train, D) contain the training data
              consisting of num_train samples each of dimension D
            - y: A numpy array of shape (num_train,) contain the training labels,
              where y[i] is the label of X[i]
            - reg: float, regularization strength
            Return:
            - loss: the loss value between predict value and ground truth
            - grads: Dictionary mapping parameter names to gradients of those parameters
              with respect to the loss function; has the same keys as self.params.
              Contain 'dW1', 'db1', 'dW2', 'db2'
            """
            N, dim = X.shape
            grads = {}
            
            # input layer ==> hidden layer ==> ReLU ==> output layer ==> Softmax
            W1 = self.params['W1']
            b1 = self.params['b1']
            W2 = self.params['W2']
            b2 = self.params['b2']
            
            # input layer==> hidden layer
            Z1 = np.dot(X, W1) + b1
            # hidden layer ==> ReLU
            A1 = np.maximum(0, Z1)    # ReLU function
            # ReLU ==> output layer
            scores = np.dot(A1, W2) + b2
            # output layer ==> Softmax
            scores_shift = scores - np.max(scores, axis=1).reshape(-1, 1)
            Softmax_output = np.exp(scores_shift) / np.sum(np.exp(scores_shift), axis=1).reshape(-1, 1)
            loss = -np.sum(np.log(Softmax_output[range(N), list(y)]))
            loss /= N
            loss += 0.5 * reg * np.sum(W1 * W1) + 0.5 * reg * np.sum(W2 * W2)
            
            # grads
            dS = Softmax_output.copy()
            dS[range(N), list(y)] += -1
            dS /= N
            dW2 = np.dot(A1.T, dS)
            db2 = np.sum(dS, axis=0)
            dA1 = np.dot(dS, W2.T)
            dZ1 = dA1 * (A1 > 0)
            dW1 = np.dot(X.T, dZ1)
            db1 = np.sum(dZ1, axis=0)
            dW2 += reg * W2
            dW1 += reg * W1
            
            grads['W1'] = dW1
            grads['b1'] = db1
            grads['W2'] = dW2
            grads['b2'] = db2
            
            return loss, grads
        
        def predict(self, X):
            """
            Use the trained weights to predict data labels
            Inputs:
            - X: A numpy array of shape (num_test, D) contain the test data
            Outputs:
            - y_pred: A numpy array, predicted labels for the data in X
            """
            W1 = self.params['W1']
            b1 = self.params['b1']
            W2 = self.params['W2']
            b2 = self.params['b2']
            
            Z1 = np.dot(X, W1) + b1
            A1 = np.maximum(0, Z1)    # ReLU function
            scores = np.dot(A1, W2) + b2
            y_pred = np.argmax(scores, axis=1)
                
            return y_pred
        
        def train(self, X, y, X_val, y_val, learning_rate=1e-3, learning_rate_decay=0.95,
                reg=5e-6, num_iters=100, batch_size=200, print_flag=False):
            """
            Train Two-layer neural network classifier using SGD
            Inputs:
            - X: A numpy array of shape (num_train, D) contain the training data
              consisting of num_train samples each of dimension D
            - y: A numpy array of shape (num_train,) contain the training labels,
              where y[i] is the label of X[i], y[i] = c, 0 <= c <= C
            - X_val: A numpy array of shape (num_val, D) contain the validation data
              consisting of num_val samples each of dimension D
            - y_val: A numpy array of shape (num_val,) contain the validation labels,
              where y_val[i] is the label of X_val[i], y_val[i] = c, 0 <= c <= C
            - learning rate: (float) learning rate for optimization
            - learning_rate_decay: Scalar giving factor used to decay the learning rate
              after each epoch.
            - reg: (float) regularization strength
            - num_iters: (integer) numbers of steps to take when optimization
            - batch_size: (integer) number of training examples to use at each step
            - print_flag: (boolean) If true, print the progress during optimization
            Outputs:
            - a dictionary contains the loss_history, train_accuracy_history and val_accuracy_history
            """
            num_train = X.shape[0]
            iterations_per_epoch = max(num_train / batch_size, 1)
            loss_history = []
            train_accuracy_history = []
            val_accuracy_history = []
            
            for t in range(num_iters):
                idx_batch = np.random.choice(num_train, batch_size, replace=True)
                X_batch = X[idx_batch]
                y_batch = y[idx_batch]
                loss, grads = self.loss(X_batch, y_batch, reg)
                loss_history.append(loss)
                self.params['W1'] += -learning_rate * grads['W1']
                self.params['b1'] += -learning_rate * grads['b1']
                self.params['W2'] += -learning_rate * grads['W2']
                self.params['b2'] += -learning_rate * grads['b2']
                
                # Every epoch, check train and val accuracy and decay learning rate.
                if t % iterations_per_epoch == 0:
                    train_accuracy = np.mean(self.predict(X_batch) == y_batch)
                    val_accuracy = np.mean(self.predict(X_val) == y_val)
                    train_accuracy_history.append(train_accuracy)
                    val_accuracy_history.append(val_accuracy)
                    
                    # Decay learning rate
                    learning_rate *= learning_rate_decay
                
                # print the progress during optimization
                if print_flag and t%100 == 0:
                    print('iteration %d / %d: loss %f' % (t, num_iters, loss))
                
            return {
                'loss_history': loss_history,
                'train_accuracy_history': train_accuracy_history,
                'val_accuracy_history': val_accuracy_history,
            }
            
    

    使用随机梯度下降训练模型,并在验证集上测试效果。

    input_size = 32 * 32 * 3
    hidden_size = 50
    num_classes = 10
    net = TwoLayerNet(input_size, hidden_size, num_classes)
    
    # Train the network
    stats = net.train(X_train, y_train, X_val, y_val,
                num_iters=1000, batch_size=200,
                learning_rate=1e-4, learning_rate_decay=0.95,
                reg=0.25, print_flag=True)
    
    # Predict on the validation set
    val_acc = (net.predict(X_val) == y_val).mean()
    print('Validation accuracy: ', val_acc)
    

    运行结果为:

    iteration 0 / 1000: loss 2.302776
    iteration 100 / 1000: loss 2.302100
    iteration 200 / 1000: loss 2.296930
    iteration 300 / 1000: loss 2.261319
    iteration 400 / 1000: loss 2.174613
    iteration 500 / 1000: loss 2.075674
    iteration 600 / 1000: loss 2.103124
    iteration 700 / 1000: loss 2.036040
    iteration 800 / 1000: loss 1.931442
    iteration 900 / 1000: loss 1.906379
    Validation accuracy:  0.281
    

    3.2 微调超参数

    # 微调超参数
    # Hyperparameters
    learning_rates = [1e-4, 5e-4, 9e-4, 13e-4, 15e-4]
    regularization_strengths = [0.25, 0.5, 0.75, 1.0]
    num_iters = 3000
    batch_size = 200
    learning_rate_decay = 0.98
    
    # Net structure
    input_size = 32 * 32 * 3
    hidden_size = [50, 100, 150]
    num_classes = 10
    
    # Initialization
    best_net = None
    best_hidden_size = None
    best_val = -1
    best_lr = None
    best_reg = None
    results = {}
    
    # Train the two layers network
    for i in range(len(hidden_size)):
        for lr in learning_rates:
            for reg in regularization_strengths:
                net = TwoLayerNet(input_size, hidden_size[i], num_classes)
                stats = net.train(X_train, y_train, X_val, y_val,
                    num_iters=num_iters, batch_size=batch_size,
                    learning_rate=lr, learning_rate_decay=learning_rate_decay,
                    reg=reg, print_flag=False)
                train_accuracy = stats['train_accuracy_history'][-1]
                val_accuracy = stats['val_accuracy_history'][-1]
                if val_accuracy > best_val:
                    best_lr = lr
                    best_reg = reg
                    best_val = val_accuracy
                    best_net = net
                    best_hidden_size = hidden_size[i]
                results[(lr, reg)] = train_accuracy, val_accuracy
                print('hidden_size: %d lr: %e reg: %e train accuracy: %f val accuracy: %f' %
                      (hidden_size[i], lr, reg, results[(lr, reg)][0], results[(lr, reg)][1]))
    print('Best hidden_size: %d
    Best lr: %e
    Best reg: %e
    train accuracy: %f
    val accuracy: %f' %
         (hidden_size[i], best_lr, best_reg, results[(best_lr, best_reg)][0], results[(best_lr, best_reg)][1]))
    

    运行结果如下:

    hidden_size: 50 lr: 1.000000e-04 reg: 2.500000e-01 train accuracy: 0.410000 val accuracy: 0.407000
    hidden_size: 50 lr: 1.000000e-04 reg: 5.000000e-01 train accuracy: 0.415000 val accuracy: 0.399000
    hidden_size: 50 lr: 1.000000e-04 reg: 7.500000e-01 train accuracy: 0.370000 val accuracy: 0.400000
    hidden_size: 50 lr: 1.000000e-04 reg: 1.000000e+00 train accuracy: 0.420000 val accuracy: 0.403000
    hidden_size: 50 lr: 5.000000e-04 reg: 2.500000e-01 train accuracy: 0.595000 val accuracy: 0.494000
    hidden_size: 50 lr: 5.000000e-04 reg: 5.000000e-01 train accuracy: 0.595000 val accuracy: 0.505000
    hidden_size: 50 lr: 5.000000e-04 reg: 7.500000e-01 train accuracy: 0.495000 val accuracy: 0.483000
    hidden_size: 50 lr: 5.000000e-04 reg: 1.000000e+00 train accuracy: 0.600000 val accuracy: 0.475000
    hidden_size: 50 lr: 9.000000e-04 reg: 2.500000e-01 train accuracy: 0.670000 val accuracy: 0.496000
    hidden_size: 50 lr: 9.000000e-04 reg: 5.000000e-01 train accuracy: 0.675000 val accuracy: 0.510000
    hidden_size: 50 lr: 9.000000e-04 reg: 7.500000e-01 train accuracy: 0.610000 val accuracy: 0.485000
    hidden_size: 50 lr: 9.000000e-04 reg: 1.000000e+00 train accuracy: 0.635000 val accuracy: 0.487000
    hidden_size: 50 lr: 1.300000e-03 reg: 2.500000e-01 train accuracy: 0.700000 val accuracy: 0.486000
    hidden_size: 50 lr: 1.300000e-03 reg: 5.000000e-01 train accuracy: 0.590000 val accuracy: 0.500000
    hidden_size: 50 lr: 1.300000e-03 reg: 7.500000e-01 train accuracy: 0.615000 val accuracy: 0.466000
    hidden_size: 50 lr: 1.300000e-03 reg: 1.000000e+00 train accuracy: 0.620000 val accuracy: 0.481000
    hidden_size: 50 lr: 1.500000e-03 reg: 2.500000e-01 train accuracy: 0.660000 val accuracy: 0.485000
    hidden_size: 50 lr: 1.500000e-03 reg: 5.000000e-01 train accuracy: 0.585000 val accuracy: 0.466000
    hidden_size: 50 lr: 1.500000e-03 reg: 7.500000e-01 train accuracy: 0.655000 val accuracy: 0.483000
    hidden_size: 50 lr: 1.500000e-03 reg: 1.000000e+00 train accuracy: 0.630000 val accuracy: 0.484000
    hidden_size: 100 lr: 1.000000e-04 reg: 2.500000e-01 train accuracy: 0.325000 val accuracy: 0.413000
    hidden_size: 100 lr: 1.000000e-04 reg: 5.000000e-01 train accuracy: 0.340000 val accuracy: 0.416000
    hidden_size: 100 lr: 1.000000e-04 reg: 7.500000e-01 train accuracy: 0.375000 val accuracy: 0.421000
    hidden_size: 100 lr: 1.000000e-04 reg: 1.000000e+00 train accuracy: 0.480000 val accuracy: 0.409000
    hidden_size: 100 lr: 5.000000e-04 reg: 2.500000e-01 train accuracy: 0.605000 val accuracy: 0.496000
    hidden_size: 100 lr: 5.000000e-04 reg: 5.000000e-01 train accuracy: 0.580000 val accuracy: 0.510000
    hidden_size: 100 lr: 5.000000e-04 reg: 7.500000e-01 train accuracy: 0.605000 val accuracy: 0.496000
    hidden_size: 100 lr: 5.000000e-04 reg: 1.000000e+00 train accuracy: 0.540000 val accuracy: 0.508000
    hidden_size: 100 lr: 9.000000e-04 reg: 2.500000e-01 train accuracy: 0.720000 val accuracy: 0.509000
    hidden_size: 100 lr: 9.000000e-04 reg: 5.000000e-01 train accuracy: 0.665000 val accuracy: 0.507000
    hidden_size: 100 lr: 9.000000e-04 reg: 7.500000e-01 train accuracy: 0.630000 val accuracy: 0.512000
    hidden_size: 100 lr: 9.000000e-04 reg: 1.000000e+00 train accuracy: 0.610000 val accuracy: 0.497000
    hidden_size: 100 lr: 1.300000e-03 reg: 2.500000e-01 train accuracy: 0.720000 val accuracy: 0.495000
    hidden_size: 100 lr: 1.300000e-03 reg: 5.000000e-01 train accuracy: 0.775000 val accuracy: 0.524000
    hidden_size: 100 lr: 1.300000e-03 reg: 7.500000e-01 train accuracy: 0.640000 val accuracy: 0.503000
    hidden_size: 100 lr: 1.300000e-03 reg: 1.000000e+00 train accuracy: 0.665000 val accuracy: 0.478000
    hidden_size: 100 lr: 1.500000e-03 reg: 2.500000e-01 train accuracy: 0.650000 val accuracy: 0.516000
    hidden_size: 100 lr: 1.500000e-03 reg: 5.000000e-01 train accuracy: 0.675000 val accuracy: 0.499000
    hidden_size: 100 lr: 1.500000e-03 reg: 7.500000e-01 train accuracy: 0.635000 val accuracy: 0.493000
    hidden_size: 100 lr: 1.500000e-03 reg: 1.000000e+00 train accuracy: 0.600000 val accuracy: 0.493000
    hidden_size: 150 lr: 1.000000e-04 reg: 2.500000e-01 train accuracy: 0.410000 val accuracy: 0.420000
    hidden_size: 150 lr: 1.000000e-04 reg: 5.000000e-01 train accuracy: 0.475000 val accuracy: 0.415000
    hidden_size: 150 lr: 1.000000e-04 reg: 7.500000e-01 train accuracy: 0.385000 val accuracy: 0.422000
    hidden_size: 150 lr: 1.000000e-04 reg: 1.000000e+00 train accuracy: 0.375000 val accuracy: 0.425000
    hidden_size: 150 lr: 5.000000e-04 reg: 2.500000e-01 train accuracy: 0.605000 val accuracy: 0.524000
    hidden_size: 150 lr: 5.000000e-04 reg: 5.000000e-01 train accuracy: 0.550000 val accuracy: 0.512000
    hidden_size: 150 lr: 5.000000e-04 reg: 7.500000e-01 train accuracy: 0.565000 val accuracy: 0.511000
    hidden_size: 150 lr: 5.000000e-04 reg: 1.000000e+00 train accuracy: 0.580000 val accuracy: 0.501000
    hidden_size: 150 lr: 9.000000e-04 reg: 2.500000e-01 train accuracy: 0.635000 val accuracy: 0.526000
    hidden_size: 150 lr: 9.000000e-04 reg: 5.000000e-01 train accuracy: 0.615000 val accuracy: 0.512000
    hidden_size: 150 lr: 9.000000e-04 reg: 7.500000e-01 train accuracy: 0.655000 val accuracy: 0.523000
    hidden_size: 150 lr: 9.000000e-04 reg: 1.000000e+00 train accuracy: 0.585000 val accuracy: 0.507000
    hidden_size: 150 lr: 1.300000e-03 reg: 2.500000e-01 train accuracy: 0.745000 val accuracy: 0.519000
    hidden_size: 150 lr: 1.300000e-03 reg: 5.000000e-01 train accuracy: 0.655000 val accuracy: 0.501000
    hidden_size: 150 lr: 1.300000e-03 reg: 7.500000e-01 train accuracy: 0.625000 val accuracy: 0.530000
    hidden_size: 150 lr: 1.300000e-03 reg: 1.000000e+00 train accuracy: 0.645000 val accuracy: 0.506000
    hidden_size: 150 lr: 1.500000e-03 reg: 2.500000e-01 train accuracy: 0.660000 val accuracy: 0.478000
    hidden_size: 150 lr: 1.500000e-03 reg: 5.000000e-01 train accuracy: 0.675000 val accuracy: 0.485000
    hidden_size: 150 lr: 1.500000e-03 reg: 7.500000e-01 train accuracy: 0.670000 val accuracy: 0.512000
    hidden_size: 150 lr: 1.500000e-03 reg: 1.000000e+00 train accuracy: 0.655000 val accuracy: 0.495000
    Best hidden_size: 150
    Best lr: 1.300000e-03
    Best reg: 7.500000e-01
    train accuracy: 0.625000
    val accuracy: 0.530000
    

    在测试集上测试其性能:

    test_acc = (best_net.predict(X_test) == y_test).mean()
    print('Test accuracy: ', test_acc)
    

    结果如下:

    Test accuracy:  0.504
    
  • 相关阅读:
    leetcode 576. Out of Boundary Paths 、688. Knight Probability in Chessboard
    leetcode 129. Sum Root to Leaf Numbers
    leetcode 542. 01 Matrix 、663. Walls and Gates(lintcode) 、773. Sliding Puzzle 、803. Shortest Distance from All Buildings
    leetcode 402. Remove K Digits 、321. Create Maximum Number
    leetcode 139. Word Break 、140. Word Break II
    leetcode 329. Longest Increasing Path in a Matrix
    leetcode 334. Increasing Triplet Subsequence
    leetcode 403. Frog Jump
    android中webView加载H5,JS不能调用问题的解决
    通过nginx中转获取不到IP的问题解决
  • 原文地址:https://www.cnblogs.com/Terrypython/p/11004865.html
Copyright © 2020-2023  润新知