• 改善深层神经网络


    来自吴恩达深度学习视频,如果直接看代码对你来说有困难,参见:https://blog.csdn.net/u013733326/article/details/79847918#正则化模型
    请注意原博客与作业有一些差异而且计算L2 regularization时有一个错误,这篇博客完全正确且与原作业一致
    这次作业对同一模型在不使用正则化,使用L2正则化,使用Dropout三种情况进行了对比。

    https://github.com/Hongze-Wang/Deep-Learning-Andrew-Ng/tree/master/homework 戳这里看完整版

    # import packages
    import numpy as np
    import matplotlib.pyplot as plt
    from reg_utils import sigmoid, relu, plot_decision_boundary, initialize_parameters, load_2D_dataset, predict_dec
    from reg_utils import compute_cost, predict, forward_propagation, backward_propagation, update_parameters
    import sklearn
    import sklearn.datasets
    import scipy.io
    from testCases import *
    
    %matplotlib inline
    plt.rcParams['figure.figsize'] = (7.0, 4.0) # set default size of plots
    plt.rcParams['image.interpolation'] = 'nearest'
    plt.rcParams['image.cmap'] = 'gray'
    

    会出现可忽略警告:

    C:Userswangh
    eg_utils.py:85: SyntaxWarning: assertion is always true, perhaps remove parentheses?
      assert(parameters['W' + str(l)].shape == layer_dims[l], layer_dims[l-1])
    C:Userswangh
    eg_utils.py:86: SyntaxWarning: assertion is always true, perhaps remove parentheses?
      assert(parameters['W' + str(l)].shape == layer_dims[l], 1)
    
    train_X, train_Y, test_X, test_Y = load_2D_dataset()
    

    在这里插入图片描述

    def model(X, Y, learning_rate = 0.3, num_iterations = 30000, print_cost = True, lambd = 0, keep_prob = 1):
        """
        Implements a three-layer neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SIGMOID.
        
        Arguments:
        X -- input data, of shape (input size, number of examples)
        Y -- true "label" vector (1 for blue dot / 0 for red dot), of shape (output size, number of examples)
        learning_rate -- learning rate of the optimization
        num_iterations -- number of iterations of the optimization loop
        print_cost -- If True, print the cost every 10000 iterations
        lambd -- regularization hyperparameter, scalar
        keep_prob - probability of keeping a neuron active during drop-out, scalar.
        
        Returns:
        parameters -- parameters learned by the model. They can then be used to predict.
        """
            
        grads = {}
        costs = []                            # to keep track of the cost
        m = X.shape[1]                        # number of examples
        layers_dims = [X.shape[0], 20, 3, 1]
        
        # Initialize parameters dictionary.
        parameters = initialize_parameters(layers_dims)
    
        # Loop (gradient descent)
    
        for i in range(0, num_iterations):
    
            # Forward propagation: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID.
            if keep_prob == 1:
                a3, cache = forward_propagation(X, parameters)
            elif keep_prob < 1:
                a3, cache = forward_propagation_with_dropout(X, parameters, keep_prob)
            
            # Cost function
            if lambd == 0:
                cost = compute_cost(a3, Y)
            else:
                cost = compute_cost_with_regularization(a3, Y, parameters, lambd)
                
            # Backward propagation.
            assert(lambd==0 or keep_prob==1)    # it is possible to use both L2 regularization and dropout, 
                                                # but this assignment will only explore one at a time
            if lambd == 0 and keep_prob == 1:
                grads = backward_propagation(X, Y, cache)
            elif lambd != 0:
                grads = backward_propagation_with_regularization(X, Y, cache, lambd)
            elif keep_prob < 1:
                grads = backward_propagation_with_dropout(X, Y, cache, keep_prob)
            
            # Update parameters.
            parameters = update_parameters(parameters, grads, learning_rate)
            
            # Print the loss every 10000 iterations
            if print_cost and i % 10000 == 0:
                print("Cost after iteration {}: {}".format(i, cost))
            if print_cost and i % 1000 == 0:
                costs.append(cost)
        
        # plot the cost
        plt.plot(costs)
        plt.ylabel('cost')
        plt.xlabel('iterations (x1,000)')
        plt.title("Learning rate =" + str(learning_rate))
        plt.show()
        
        return parameters
    
    parameters = model(train_X, train_Y)
    print ("On the training set:")
    predictions_train = predict(train_X, train_Y, parameters)
    print ("On the test set:")
    predictions_test = predict(test_X, test_Y, parameters)
    
    Cost after iteration 0: 0.6557412523481002
    Cost after iteration 10000: 0.1632998752572419
    Cost after iteration 20000: 0.13851642423239133
    

    在这里插入图片描述

    On the training set:
    Accuracy: 0.9478672985781991
    On the test set:
    Accuracy: 0.915
    
    plt.title("Model without regularization")
    axes = plt.gca()
    axes.set_xlim([-0.75,0.40])
    axes.set_ylim([-0.75,0.65])
    plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)
    

    在这里插入图片描述

    # GRADED FUNCTION: compute_cost_with_regularization
    
    def compute_cost_with_regularization(A3, Y, parameters, lambd):
        """
        Implement the cost function with L2 regularization. See formula (2) above.
        
        Arguments:
        A3 -- post-activation, output of forward propagation, of shape (output size, number of examples)
        Y -- "true" labels vector, of shape (output size, number of examples)
        parameters -- python dictionary containing parameters of the model
        
        Returns:
        cost - value of the regularized loss function (formula (2))
        """
        m = Y.shape[1]
        W1 = parameters["W1"]
        W2 = parameters["W2"]
        W3 = parameters["W3"]
        
        cross_entropy_cost = compute_cost(A3, Y) # This gives you the cross-entropy part of the cost
        
        ### START CODE HERE ### (approx. 1 line)
        L2_regularization_cost = lambd * (np.sum(np.square(W1)) + np.sum(np.square(W2)) + np.sum(np.square(W3))) / (m*2)
        ### END CODER HERE ###
        
        cost = cross_entropy_cost + L2_regularization_cost
        
        return cost
    
    A3, Y_assess, parameters = compute_cost_with_regularization_test_case()
    
    print("cost = " + str(compute_cost_with_regularization(A3, Y_assess, parameters, lambd = 0.1)))
    

    cost = 1.7864859451590758

    # GRADED FUNCTION: backward_propagation_with_regularization
    
    def backward_propagation_with_regularization(X, Y, cache, lambd):
        """
        Implements the backward propagation of our baseline model to which we added an L2 regularization.
        
        Arguments:
        X -- input dataset, of shape (input size, number of examples)
        Y -- "true" labels vector, of shape (output size, number of examples)
        cache -- cache output from forward_propagation()
        lambd -- regularization hyperparameter, scalar
        
        Returns:
        gradients -- A dictionary with the gradients with respect to each parameter, activation and pre-activation variables
        """
        
        m = X.shape[1]
        (Z1, A1, W1, b1, Z2, A2, W2, b2, Z3, A3, W3, b3) = cache
        
        dZ3 = A3 - Y
        
        ### START CODE HERE ### (approx. 1 line)
        dW3 = 1./m * np.dot(dZ3, A2.T) + lambd/m*W3
        ### END CODE HERE ###
        db3 = 1./m * np.sum(dZ3, axis=1, keepdims = True)
        
        dA2 = np.dot(W3.T, dZ3)
        dZ2 = np.multiply(dA2, np.int64(A2 > 0))
        ### START CODE HERE ### (approx. 1 line)
        dW2 = 1./m * np.dot(dZ2, A1.T) + lambd/m*W2
        ### END CODE HERE ###
        db2 = 1./m * np.sum(dZ2, axis=1, keepdims = True)
        
        dA1 = np.dot(W2.T, dZ2)
        dZ1 = np.multiply(dA1, np.int64(A1 > 0))
        ### START CODE HERE ### (approx. 1 line)
        dW1 = 1./m * np.dot(dZ1, X.T) + lambd/m*W1
        ### END CODE HERE ###
        db1 = 1./m * np.sum(dZ1, axis=1, keepdims = True)
        
        gradients = {"dZ3": dZ3, "dW3": dW3, "db3": db3,"dA2": dA2,
                     "dZ2": dZ2, "dW2": dW2, "db2": db2, "dA1": dA1, 
                     "dZ1": dZ1, "dW1": dW1, "db1": db1}
        
        return gradients
    
    X_assess, Y_assess, cache = backward_propagation_with_regularization_test_case()
    
    grads = backward_propagation_with_regularization(X_assess, Y_assess, cache, lambd = 0.7)
    print ("dW1 = "+ str(grads["dW1"]))
    print ("dW2 = "+ str(grads["dW2"]))
    print ("dW3 = "+ str(grads["dW3"]))
    
    dW1 = [[-0.25604646  0.12298827 -0.28297129]
     [-0.17706303  0.34536094 -0.4410571 ]]
    dW2 = [[ 0.79276486  0.85133918]
     [-0.0957219  -0.01720463]
     [-0.13100772 -0.03750433]]
    dW3 = [[-1.77691347 -0.11832879 -0.09397446]]
    
    parameters = model(train_X, train_Y, lambd = 0.60) # (我把keep_prob从0.7改成了0.6 取得了测试集9535%的准确度)
    print ("On the train set:")
    predictions_train = predict(train_X, train_Y, parameters)
    print ("On the test set:")
    predictions_test = predict(test_X, test_Y, parameters)
    
    Cost after iteration 0: 0.6914902783181227
    Cost after iteration 10000: 0.2616965878421321
    Cost after iteration 20000: 0.2614637394190471
    

    在这里插入图片描述

    On the train set:
    Accuracy: 0.9289099526066351
    On the test set:
    Accuracy: 0.955
    
    plt.title("Model with L2-regularization")
    axes = plt.gca()
    axes.set_xlim([-0.75,0.40])
    axes.set_ylim([-0.75,0.65])
    plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)
    

    在这里插入图片描述

    # GRADED FUNCTION: forward_propagation_with_dropout
    
    def forward_propagation_with_dropout(X, parameters, keep_prob = 0.5):
        """
        Implements the forward propagation: LINEAR -> RELU + DROPOUT -> LINEAR -> RELU + DROPOUT -> LINEAR -> SIGMOID.
        
        Arguments:
        X -- input dataset, of shape (2, number of examples)
        parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
                        W1 -- weight matrix of shape (20, 2)
                        b1 -- bias vector of shape (20, 1)
                        W2 -- weight matrix of shape (3, 20)
                        b2 -- bias vector of shape (3, 1)
                        W3 -- weight matrix of shape (1, 3)
                        b3 -- bias vector of shape (1, 1)
        keep_prob - probability of keeping a neuron active during drop-out, scalar
        
        Returns:
        A3 -- last activation value, output of the forward propagation, of shape (1,1)
        cache -- tuple, information stored for computing the backward propagation
        """
        
        np.random.seed(1)
        
        # retrieve parameters
        W1 = parameters["W1"]
        b1 = parameters["b1"]
        W2 = parameters["W2"]
        b2 = parameters["b2"]
        W3 = parameters["W3"]
        b3 = parameters["b3"]
        
        # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID
        Z1 = np.dot(W1, X) + b1
        A1 = relu(Z1)
        ### START CODE HERE ### (approx. 4 lines)         # Steps 1-4 below correspond to the Steps 1-4 described above. 
        D1 = np.random.rand(A1.shape[0], A1.shape[1])     # Step 1: initialize matrix D1 = np.random.rand(..., ...)
        D1 = D1 < keep_prob                               # Step 2: convert entries of D1 to 0 or 1 (using keep_prob as the threshold)
        A1 = A1 * D1                                      # Step 3: shut down some neurons of A1
        A1 = A1 / keep_prob                               # Step 4: scale the value of neurons that haven't been shut down
        ### END CODE HERE ###
        Z2 = np.dot(W2, A1) + b2
        A2 = relu(Z2)
        ### START CODE HERE ### (approx. 4 lines)
        D2 = np.random.rand(A2.shape[0], A2.shape[1])     # Step 1: initialize matrix D2 = np.random.rand(..., ...)
        D2 = D2 < keep_prob                               # Step 2: convert entries of D2 to 0 or 1 (using keep_prob as the threshold)
        A2 = A2 * D2                                      # Step 3: shut down some neurons of A2
        A2 = A2 / keep_prob                               # Step 4: scale the value of neurons that haven't been shut down
        ### END CODE HERE ###
        Z3 = np.dot(W3, A2) + b3
        A3 = sigmoid(Z3)
        
        cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)
        
        return A3, cache
    
    X_assess, parameters = forward_propagation_with_dropout_test_case()
    
    A3, cache = forward_propagation_with_dropout(X_assess, parameters, keep_prob = 0.7)
    print ("A3 = " + str(A3))
    

    A3 = [[0.36974721 0.00305176 0.04565099 0.49683389 0.36974721]]

    # GRADED FUNCTION: backward_propagation_with_dropout
    
    def backward_propagation_with_dropout(X, Y, cache, keep_prob):
        """
        Implements the backward propagation of our baseline model to which we added dropout.
        
        Arguments:
        X -- input dataset, of shape (2, number of examples)
        Y -- "true" labels vector, of shape (output size, number of examples)
        cache -- cache output from forward_propagation_with_dropout()
        keep_prob - probability of keeping a neuron active during drop-out, scalar
        
        Returns:
        gradients -- A dictionary with the gradients with respect to each parameter, activation and pre-activation variables
        """
        
        m = X.shape[1]
        (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) = cache
        
        dZ3 = A3 - Y
        dW3 = 1./m * np.dot(dZ3, A2.T)
        db3 = 1./m * np.sum(dZ3, axis=1, keepdims = True)
        dA2 = np.dot(W3.T, dZ3)
        ### START CODE HERE ### (≈ 2 lines of code)
        dA2 = dA2 * D2          # Step 1: Apply mask D2 to shut down the same neurons as during the forward propagation
        dA2 = dA2 / keep_prob   # Step 2: Scale the value of neurons that haven't been shut down
        ### END CODE HERE ###
        dZ2 = np.multiply(dA2, np.int64(A2 > 0))
        dW2 = 1./m * np.dot(dZ2, A1.T)
        db2 = 1./m * np.sum(dZ2, axis=1, keepdims = True)
        
        dA1 = np.dot(W2.T, dZ2)
        ### START CODE HERE ### (≈ 2 lines of code)
        dA1 = dA1 * D1          # Step 1: Apply mask D1 to shut down the same neurons as during the forward propagation
        dA1 = dA1 / keep_prob   # Step 2: Scale the value of neurons that haven't been shut down
        ### END CODE HERE ###
        dZ1 = np.multiply(dA1, np.int64(A1 > 0))
        dW1 = 1./m * np.dot(dZ1, X.T)
        db1 = 1./m * np.sum(dZ1, axis=1, keepdims = True)
        
        gradients = {"dZ3": dZ3, "dW3": dW3, "db3": db3,"dA2": dA2,
                     "dZ2": dZ2, "dW2": dW2, "db2": db2, "dA1": dA1, 
                     "dZ1": dZ1, "dW1": dW1, "db1": db1}
        
        return gradients
    
    X_assess, Y_assess, cache = backward_propagation_with_dropout_test_case()
    
    gradients = backward_propagation_with_dropout(X_assess, Y_assess, cache, keep_prob = 0.8)
    
    print ("dA1 = " + str(gradients["dA1"]))
    print ("dA2 = " + str(gradients["dA2"]))
    
    dA1 = [[ 0.36544439  0.         -0.00188233  0.         -0.17408748]
     [ 0.65515713  0.         -0.00337459  0.         -0.        ]]
    dA2 = [[ 0.58180856  0.         -0.00299679  0.         -0.27715731]
     [ 0.          0.53159854 -0.          0.53159854 -0.34089673]
     [ 0.          0.         -0.00292733  0.         -0.        ]]
    
    parameters = model(train_X, train_Y, keep_prob = 0.86, learning_rate = 0.3)
    
    print ("On the train set:")
    predictions_train = predict(train_X, train_Y, parameters)
    print ("On the test set:")
    predictions_test = predict(test_X, test_Y, parameters)
    
    Cost after iteration 0: 0.6543912405149825
    C:Userswangh
    eg_utils.py:236: RuntimeWarning: divide by zero encountered in log
      logprobs = np.multiply(-np.log(a3),Y) + np.multiply(-np.log(1 - a3), 1 - Y)
    C:Userswangh
    eg_utils.py:236: RuntimeWarning: invalid value encountered in multiply
      logprobs = np.multiply(-np.log(a3),Y) + np.multiply(-np.log(1 - a3), 1 - Y)
    Cost after iteration 10000: 0.061016986574905605
    Cost after iteration 20000: 0.060582435798513114
    

    在这里插入图片描述

    On the train set:
    Accuracy: 0.9289099526066351
    On the test set:
    Accuracy: 0.95
    
    plt.title("Model with dropout")
    axes = plt.gca()
    axes.set_xlim([-0.75,0.40])
    axes.set_ylim([-0.75,0.65])
    plot_decision_boundary(lambda x: predict_dec(parameters, x.T), train_X, train_Y)
    

    在这里插入图片描述

    • 3-layer NN without regularization
      训练集准确度 95% 测试集准确度 91.5%
    • 3-layer NN with L2-regularization
      训练集准确度 94% 测试集准确度 93%
      (我把keep_prob从0.7改成了0.6 取得了测试集95%的准确度)
    • 3-layer NN with dropout
      训练集准确度 93% 测试集准确度 95%
  • 相关阅读:
    aws亚马逊配置 https,操作步骤记录
    八年phper的高级工程师面试之路八年phper的高级工程师面试之路
    laravel 获取目录下的所有目录Storage::directories
    Docker 入门看这一篇就够了,万字详解!
    PHP 引用是个坑,请慎用
    php面试常问方法汇总
    注册、登录和 token 的安全之道
    分享一下最近的面试题,都是大厂
    Xpath2
    ActiveX控件的实现
  • 原文地址:https://www.cnblogs.com/wanghongze95/p/13842537.html
Copyright © 2020-2023  润新知