• 线性Softmax分类器实战


    1 概述

    基础的理论知识参考线性SVM与Softmax分类器

    代码实现环境:python3

    2 数据预处理

    2.1 加载数据

    将原始数据集放入“data/cifar10/”文件夹下。

    ### 加载cifar10数据集
    import os
    import pickle
    import random
    import numpy as np
    import matplotlib.pyplot as plt
    
    def load_CIFAR_batch(filename):
        """
        cifar-10数据集是分batch存储的,这是载入单个batch
    
        @参数 filename: cifar文件名
        @r返回值: X, Y: cifar batch中的 data 和 labels
        """
    
        with open(filename,'rb') as f:
            datadict=pickle.load(f,encoding='bytes')
    
            X=datadict[b'data']
            Y=datadict[b'labels']
            
            X=X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
            Y=np.array(Y)
            
            return X, Y
    
    def load_CIFAR10(ROOT):
        """
        读取载入整个 CIFAR-10 数据集
    
        @参数 ROOT: 根目录名
        @return: X_train, Y_train: 训练集 data 和 labels
                 X_test, Y_test: 测试集 data 和 labels
        """
    
        xs=[]
        ys=[]
    
        for b in range(1,6):
            f=os.path.join(ROOT, "data_batch_%d" % (b, ))
            X, Y=load_CIFAR_batch(f)
            xs.append(X)
            ys.append(Y)
    
        X_train=np.concatenate(xs)
        Y_train=np.concatenate(ys)
    
        del X, Y
    
        X_test, Y_test=load_CIFAR_batch(os.path.join(ROOT, "test_batch"))
    
        return X_train, Y_train, X_test, Y_test
      
      
    X_train, y_train, X_test, y_test = load_CIFAR10('data/cifar10/') 
    
    print(X_train.shape)
    print(y_train.shape)
    print(X_test.shape)
    print( y_test.shape)
    

    运行结果如下:

    (50000, 32, 32, 3)
    (50000,)
    (10000, 32, 32, 3)
    (10000,)
    

    2.2 划分数据集

    将加载好的数据集划分为训练集,验证集,测试集。

    # 划分训练集,验证集,测试集
    num_train = 49000
    num_val = 1000
    num_test = 1000
    num_dev = 500#也是验证集,调节超参数使用
    
    # Validation set
    mask = range(num_train, num_train + num_val)
    X_val = X_train[mask]
    y_val = y_train[mask]
    
    # Train set
    mask = range(num_train)
    X_train = X_train[mask]
    y_train = y_train[mask]
    
    # Test set
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]
    
    # Development set
    mask = np.random.choice(num_train, num_dev, replace=False)
    X_dev = X_train[mask]
    y_dev = y_train[mask]
    
    #Reshape the images data into rows
    
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))
    
    print('Train data shape: ', X_train.shape)
    print('Validation data shape: ', X_val.shape)
    print('Test data shape: ', X_test.shape)
    print('Development data shape: ', X_dev.shape)
    

    运行结果如下:

    Train data shape:  (49000, 3072)
    Validation data shape:  (1000, 3072)
    Test data shape:  (1000, 3072)
    Development data shape:  (500, 3072)
    

    2.3 归一化

    将划分好的数据集归一化,即:所有划分好的数据集减去均值图像。

    # Processing: subtract the mean images
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    X_dev -= mean_image
    

    3 线性Softmax分类器

    3.1 定义线性Softmax分类器

    #Define a linear Softmax classifier
    
    class Softmax(object):
        def __init__(self):
            self.W = None
        
        def loss_vectorized(self, X, y, reg):
            """
            Structured Softmax loss function, vectorized implementation (without loops).
            Inputs:
            - X: A numpy array of shape (num_train, D) contain the training data
              consisting of num_train samples each of dimension D
            - y: A numpy array of shape (num_train,) contain the training labels,
              where y[i] is the label of X[i]
            - reg: float, regularization strength
            Return:
            - loss: the loss value between predict value and ground truth
            - dW: gradient of W
            """
            
            # Initialize loss and dW
            loss = 0.0
            dW = np.zeros(self.W.shape)
            
            # Compute the loss and dW
            num_train = X.shape[0]
            num_classes = self.W.shape[1]
            
            # loss
            scores = np.dot(X, self.W)
            scores -= np.max(scores, axis=1).reshape(-1, 1)
            softmax_output = np.exp(scores) / np.sum(np.exp(scores), axis=1).reshape(-1, 1)
            loss = np.sum(-np.log(softmax_output[range(softmax_output.shape[0]), list(y)]))
            loss /= num_train
            loss += 0.5 * reg * np.sum(self.W * self.W)
            
            # dW
            dS = softmax_output
            dS[range(dS.shape[0]), list(y)] += -1
            dW = np.dot(X.T, dS)
            dW /= num_train
            dW += reg * self.W
            
            return loss, dW
        
        def train(self, X, y, learning_rate = 1e-3, reg = 1e-5, num_iters = 100, 
                 batch_size = 200, print_flag = False):
            """
            Train Softmax classifier using SGD
            Inputs:
            - X: A numpy array of shape (num_train, D) contain the training data
              consisting of num_train samples each of dimension D
            - y: A numpy array of shape (num_train,) contain the training labels,
              where y[i] is the label of X[i], y[i] = c, 0 <= c <= C
            - learning rate: (float) learning rate for optimization
            - reg: (float) regularization strength
            - num_iters: (integer) numbers of steps to take when optimization
            - batch_size: (integer) number of training examples to use at each step
            - print_flag: (boolean) If true, print the progress during optimization
            Outputs:
            - loss_history: A list containing the loss at each training iteration
            """
            
            loss_history = []
            num_train = X.shape[0]
            dim = X.shape[1]
            num_classes = np.max(y) + 1
            
            # Initialize W
            if self.W == None:
                self.W = 0.001 * np.random.randn(dim, num_classes)
            
            # iteration and optimization
            for t in range(num_iters):
                idx_batch = np.random.choice(num_train, batch_size, replace=True)
                X_batch = X[idx_batch]
                y_batch = y[idx_batch]
                loss, dW = self.loss_vectorized(X_batch, y_batch, reg)
                loss_history.append(loss)
                self.W += -learning_rate * dW
                
                if print_flag and t%100 == 0:
                    print('iteration %d / %d: loss %f' % (t, num_iters, loss))
            
            return loss_history
        
        def predict(self, X):
            """
            Use the trained weights of Softmax to predict data labels
            Inputs:
            - X: A numpy array of shape (num_train, D) contain the training data
            Outputs:
            - y_pred: A numpy array, predicted labels for the data in X
            """
            
            y_pred = np.zeros(X.shape[0])
            scores = np.dot(X, self.W)
            y_pred = np.argmax(scores, axis=1)
            
            return y_pred        
    

    3.2 无交叉验证

    3.2.1 训练模型

    # 训练
    softmax = Softmax()
    loss_history = softmax.train(X_train, y_train, learning_rate = 1e-7, reg = 2.5e4, num_iters = 1500, 
                 batch_size = 200, print_flag = True)
    

    运行结果如下:

    iteration 0 / 1500: loss 386.819945
    iteration 100 / 1500: loss 233.345487
    iteration 200 / 1500: loss 141.912560
    iteration 300 / 1500: loss 86.616391
    iteration 400 / 1500: loss 53.114667
    iteration 500 / 1500: loss 32.912990
    iteration 600 / 1500: loss 20.637937
    iteration 700 / 1500: loss 13.341617
    iteration 800 / 1500: loss 8.934886
    iteration 900 / 1500: loss 6.200619
    iteration 1000 / 1500: loss 4.516009
    iteration 1100 / 1500: loss 3.514955
    iteration 1200 / 1500: loss 2.883086
    iteration 1300 / 1500: loss 2.538239
    iteration 1400 / 1500: loss 2.365773
    

    3.2.2 预测模型

    # Training set
    y_pred = softmax.predict(X_train)
    num_correct = np.sum(y_pred == y_train)
    accuracy = np.mean(y_pred == y_train)
    print('Training correct %d/%d: The accuracy is %f' % (num_correct, X_train.shape[0], accuracy))
    
    # Test set
    y_pred = softmax.predict(X_test)
    num_correct = np.sum(y_pred == y_test)
    accuracy = np.mean(y_pred == y_test)
    print('Test correct %d/%d: The accuracy is %f' % (num_correct, X_test.shape[0], accuracy))
    

    运行结果如下:

    Training correct 17246/49000: The accuracy is 0.351959
    Test correct 358/1000: The accuracy is 0.358000
    

    3.3 有交叉验证

    3.3.1 训练模型

    
    learning_rates = [1.4e-7, 1.5e-7, 1.6e-7]
    regularization_strengths = [8000.0, 9000.0, 10000.0, 11000.0, 18000.0, 19000.0, 20000.0, 21000.0]
    
    results = {}
    best_lr = None
    best_reg = None
    best_val = -1   # The highest validation accuracy that we have seen so far.
    best_softmax = None # The LinearSVM object that achieved the highest validation rate.
    
    for lr in learning_rates:
        for reg in regularization_strengths:
            softmax = Softmax()
            loss_history = softmax.train(X_train, y_train, learning_rate = lr, reg = reg, num_iters = 3000)
            y_train_pred = softmax.predict(X_train)
            accuracy_train = np.mean(y_train_pred == y_train)
            y_val_pred = softmax.predict(X_val)
            accuracy_val = np.mean(y_val_pred == y_val)
            results[(lr, reg)] = accuracy_train, accuracy_val
            if accuracy_val > best_val:
                best_lr = lr
                best_reg = reg
                best_val = accuracy_val
                best_softmax = softmax
            print('lr: %e reg: %e train accuracy: %f val accuracy: %f' %
                  (lr, reg, results[(lr, reg)][0], results[(lr, reg)][1]))
    print('Best validation accuracy during cross-validation:
    lr = %e, reg = %e, best_val = %f' %
          (best_lr, best_reg, best_val)) 
    
    

    运行结果为:

    lr: 1.400000e-07 reg: 8.000000e+03 train accuracy: 0.378184 val accuracy: 0.391000
    lr: 1.400000e-07 reg: 9.000000e+03 train accuracy: 0.374714 val accuracy: 0.387000
    lr: 1.400000e-07 reg: 1.000000e+04 train accuracy: 0.376000 val accuracy: 0.391000
    lr: 1.400000e-07 reg: 1.100000e+04 train accuracy: 0.373898 val accuracy: 0.387000
    lr: 1.400000e-07 reg: 1.800000e+04 train accuracy: 0.360347 val accuracy: 0.373000
    lr: 1.400000e-07 reg: 1.900000e+04 train accuracy: 0.354612 val accuracy: 0.379000
    lr: 1.400000e-07 reg: 2.000000e+04 train accuracy: 0.357184 val accuracy: 0.379000
    lr: 1.400000e-07 reg: 2.100000e+04 train accuracy: 0.357061 val accuracy: 0.380000
    lr: 1.500000e-07 reg: 8.000000e+03 train accuracy: 0.378633 val accuracy: 0.397000
    lr: 1.500000e-07 reg: 9.000000e+03 train accuracy: 0.377918 val accuracy: 0.399000
    lr: 1.500000e-07 reg: 1.000000e+04 train accuracy: 0.376347 val accuracy: 0.383000
    lr: 1.500000e-07 reg: 1.100000e+04 train accuracy: 0.374469 val accuracy: 0.391000
    lr: 1.500000e-07 reg: 1.800000e+04 train accuracy: 0.362714 val accuracy: 0.373000
    lr: 1.500000e-07 reg: 1.900000e+04 train accuracy: 0.358633 val accuracy: 0.370000
    lr: 1.500000e-07 reg: 2.000000e+04 train accuracy: 0.358939 val accuracy: 0.373000
    lr: 1.500000e-07 reg: 2.100000e+04 train accuracy: 0.360367 val accuracy: 0.379000
    lr: 1.600000e-07 reg: 8.000000e+03 train accuracy: 0.378143 val accuracy: 0.397000
    lr: 1.600000e-07 reg: 9.000000e+03 train accuracy: 0.372449 val accuracy: 0.386000
    lr: 1.600000e-07 reg: 1.000000e+04 train accuracy: 0.376184 val accuracy: 0.379000
    lr: 1.600000e-07 reg: 1.100000e+04 train accuracy: 0.369776 val accuracy: 0.377000
    lr: 1.600000e-07 reg: 1.800000e+04 train accuracy: 0.359735 val accuracy: 0.378000
    lr: 1.600000e-07 reg: 1.900000e+04 train accuracy: 0.359653 val accuracy: 0.374000
    lr: 1.600000e-07 reg: 2.000000e+04 train accuracy: 0.356041 val accuracy: 0.370000
    lr: 1.600000e-07 reg: 2.100000e+04 train accuracy: 0.353694 val accuracy: 0.370000
    Best validation accuracy during cross-validation:
    lr = 1.500000e-07, reg = 9.000000e+03, best_val = 0.399000
    

    3.3.2 预测模型

    #Use the best softmax to test
    
    y_pred = best_softmax.predict(X_test)
    num_correct = np.sum(y_pred == y_test)
    accuracy = np.mean(y_pred == y_test)
    print('Test correct %d/%d: The accuracy is %f' % (num_correct, num_test, accuracy))
    

    运行结果如下:

    Test correct 375/1000: The accuracy is 0.375000
    

    补充:线性SVM分类器与线性Softmax分类器只是损失函数不一样!!!

  • 相关阅读:
    自制游戏Zombie代码
    HNOI2020总结
    20200615题解:继续扮演
    20200611题解:树网的核
    历次考试总结
    寒假总结和省选大体规划
    每日总结
    有一种感动叫ACM(记WJMZBMR在成都赛区开幕式上的讲话)
    递推求欧拉函数的最简单的详解
    总结一些好用的C++小技巧
  • 原文地址:https://www.cnblogs.com/Terrypython/p/11004059.html
Copyright © 2020-2023  润新知