• 神经网络和深度学习-week4编程题(1&2一步步搭建多层神经网络及应用)


    深度神经网络计算过程

    提前加载包

    1 import numpy as np
    2 import h5py
    3 import matplotlib.pyplot as plt
    4 from testCases_v2 import *
    5 from dnn_utils import sigmoid, sigmoid_backward, relu, relu_backward
    6 from scipy import ndimage
    7 import lr_utils

    初始化模型参数

    两层结构:

     1 def initialize_parameters(n_x, n_h, n_y):
     2     """
     3     Argument:
     4     n_x -- size of the input layer
     5     n_h -- size of the hidden layer
     6     n_y -- size of the output layer
     7     
     8     Returns:
     9     parameters -- python dictionary containing your parameters:
    10                     W1 -- weight matrix of shape (n_h, n_x)
    11                     b1 -- bias vector of shape (n_h, 1)
    12                     W2 -- weight matrix of shape (n_y, n_h)
    13                     b2 -- bias vector of shape (n_y, 1)
    14     """   
    15     np.random.seed(1)
    16     
    17     ### START CODE HERE ### (≈ 4 lines of code)
    18     W1=np.random.randn(n_h,n_x)*0.01
    19     W2=np.random.randn(n_y,n_h)*0.01
    20     b1=np.zeros((n_h,1))
    21     b2=np.zeros((n_y,1))
    22     ### END CODE HERE ###
    23     
    24     assert(W1.shape == (n_h, n_x))
    25     assert(b1.shape == (n_h, 1))
    26     assert(W2.shape == (n_y, n_h))
    27     assert(b2.shape == (n_y, 1))
    28     
    29     parameters = {"W1": W1,
    30                   "b1": b1,
    31                   "W2": W2,
    32                   "b2": b2}
    33     
    34     return parameters  

    L层结构:

     1 def initialize_parameters_deep(layer_dims):
     2     """
     3     Arguments:
     4     layer_dims -- python array (list) containing the dimensions of each layer in our network
     5     
     6     Returns:
     7     parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
     8                     Wl -- weight matrix of shape (layer_dims[l], layer_dims[l-1])
     9                     bl -- bias vector of shape (layer_dims[l], 1)
    10     """
    11     np.random.seed(3)
    12     parameters = {}
    13     L = len(layer_dims)            # number of layers in the network
    14 
    15     for l in range(1, L):
    16         ### START CODE HERE ### (≈ 2 lines of code)
    17         parameters['W' + str(l)]=np.random.randn(layer_dims[l],layer_dims[l-1])/np.sqrt(layers_dims[l - 1])
    18         parameters['b' + str(l)]=np.zeros((layer_dims[l],1))
    19         ### END CODE HERE ###
    20         
    21         assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l - 1]))
    22         assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))        
    23     return parameters

    Forward propagation 前向传播

    线性部分:

     1 def linear_forward(A, W, b):
     2     """
     3     Implement the linear part of a layer's forward propagation.
     4 
     5     Arguments:
     6     A -- activations from previous layer (or input data): (size of previous layer, number of examples)
     7     W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
     8     b -- bias vector, numpy array of shape (size of the current layer, 1)
     9 
    10     Returns:
    11     Z -- the input of the activation function, also called pre-activation parameter 
    12     cache -- a python dictionary containing "A", "W" and "b" ; stored for computing the backward pass efficiently
    13     """ 
    14     ### START CODE HERE ### (≈ 1 line of code)
    15     Z=np.dot(W,A)+b
    16     ### END CODE HERE ###
    17     
    18     assert(Z.shape == (W.shape[0], A.shape[1]))
    19     cache = (A, W, b)
    20     return Z, cache

    线性激活部分:

     1 def linear_activation_forward(A_prev, W, b, activation):
     2     """
     3     Implement the forward propagation for the LINEAR->ACTIVATION layer
     4 
     5     Arguments:
     6     A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)
     7     W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
     8     b -- bias vector, numpy array of shape (size of the current layer, 1)
     9     activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
    10 
    11     Returns:
    12     A -- the output of the activation function, also called the post-activation value 
    13     cache -- a python dictionary containing "linear_cache" and "activation_cache";
    14              stored for computing the backward pass efficiently
    15     """
    16     
    17     if activation == "sigmoid":
    18         # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
    19         ### START CODE HERE ### (≈ 2 lines of code)
    20         Z,linear_cache=linear_forward(A_prev, W, b)
    21         A,activation_cache=sigmoid(Z)
    22         ### END CODE HERE ###
    23     
    24     elif activation == "relu":
    25         # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
    26         ### START CODE HERE ### (≈ 2 lines of code)
    27         Z,linear_cache=linear_forward(A_prev, W, b)
    28         A,activation_cache=relu(Z)
    29         ### END CODE HERE ###
    30     
    31     assert (A.shape == (W.shape[0], A_prev.shape[1]))
    32     cache = (linear_cache, activation_cache)
    33 
    34     return A, cache

    多层模型前向传播:

     1 def L_model_forward(X, parameters):
     2     """
     3     Implement forward propagation for the [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID computation
     4     
     5     Arguments:
     6     X -- data, numpy array of shape (input size, number of examples)
     7     parameters -- output of initialize_parameters_deep()
     8     
     9     Returns:
    10     AL -- last post-activation value
    11     caches -- list of caches containing:
    12                 every cache of linear_relu_forward() (there are L-1 of them, indexed from 0 to L-2)
    13                 the cache of linear_sigmoid_forward() (there is one, indexed L-1)
    14     """
    15     caches = []
    16     A = X
    17     L = len(parameters) // 2                  # number of layers in the neural network
    18     
    19     # Implement [LINEAR -> RELU]*(L-1). Add "cache" to the "caches" list.
    20     for l in range(1, L):
    21         A_prev = A 
    22         ### START CODE HERE ### (≈ 2 lines of code)
    23         A,cache=linear_activation_forward(A_prev, parameters['W'+str(l)], parameters['b'+str(l)], "relu")
    24         caches.append(cache)
    25         ### END CODE HERE ###
    26     
    27     # Implement LINEAR -> SIGMOID. Add "cache" to the "caches" list.
    28     ### START CODE HERE ### (≈ 2 lines of code)
    29     AL,cache=linear_activation_forward(A, parameters['W'+str(L)], parameters['b'+str(L)], 'sigmoid')
    30     caches.append(cache)
    31     ### END CODE HERE ###
    32     
    33     assert(AL.shape == (1, X.shape[1]))
    34             
    35     return AL, caches

    计算损失

     1 def compute_cost(AL, Y):
     2     """
     3     Implement the cost function defined by equation (7).
     4 
     5     Arguments:
     6     AL -- probability vector corresponding to your label predictions, shape (1, number of examples)
     7     Y -- true "label" vector (for example: containing 0 if non-cat, 1 if cat), shape (1, number of examples)
     8 
     9     Returns:
    10     cost -- cross-entropy cost
    11     """
    12     m = Y.shape[1]
    13 
    14     # Compute loss from aL and y.
    15     ### START CODE HERE ### (≈ 1 lines of code)
    16     cost =-np.sum(np.multiply(np.log(AL),Y) + np.multiply(np.log(1 - AL), 1 - Y)) / m    
    17     ### END CODE HERE ###
    18     
    19     cost = np.squeeze(cost)      # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).
    20     assert(cost.shape == ())
    21     return cost

    Backward propagation反向传播

    线性部分:

     1 def linear_backward(dZ, cache):
     2     """
     3     Implement the linear portion of backward propagation for a single layer (layer l)
     4 
     5     Arguments:
     6     dZ -- Gradient of the cost with respect to the linear output (of current layer l)
     7     cache -- tuple of values (A_prev, W, b) coming from the forward propagation in the current layer
     8 
     9     Returns:
    10     dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    11     dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    12     db -- Gradient of the cost with respect to b (current layer l), same shape as b
    13     """
    14     A_prev, W, b = cache
    15     m = A_prev.shape[1]
    16 
    17     ### START CODE HERE ### (≈ 3 lines of code)
    18     dW=np.dot(dZ,A_prev.T)/m
    19     db=np.sum(dZ, axis=1, keepdims=True)/m
    20     dA_prev=np.dot(W.T, dZ)
    21     ### END CODE HERE ###
    22     
    23     assert (dA_prev.shape == A_prev.shape)
    24     assert (dW.shape == W.shape)
    25     assert (db.shape == b.shape)
    26     
    27     return dA_prev, dW, db

    线性激活部分:

     1 def linear_activation_backward(dA, cache, activation):
     2     """
     3     Implement the backward propagation for the LINEAR->ACTIVATION layer.
     4     
     5     Arguments:
     6     dA -- post-activation gradient for current layer l 
     7     cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
     8     activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
     9     
    10     Returns:
    11     dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    12     dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    13     db -- Gradient of the cost with respect to b (current layer l), same shape as b
    14     """
    15     linear_cache, activation_cache = cache
    16     
    17     if activation == "relu":
    18         ### START CODE HERE ### (≈ 2 lines of code)
    19         dZ=relu_backward(dA, activation_cache)
    20         dA_prev,dW,db=linear_backward(dZ,linear_cache)
    21         ### END CODE HERE ###
    22         
    23     elif activation == "sigmoid":
    24         ### START CODE HERE ### (≈ 2 lines of code)
    25         dZ=sigmoid_backward(dA, activation_cache)
    26         dA_prev,dW,db=linear_backward(dZ,linear_cache)
    27         ### END CODE HERE ###
    28     
    29     dA_prev, dW, db = linear_backward(dZ, linear_cache)  
    30     return dA_prev, dW, db

    多层模型向后传播函数:

     1 def L_model_backward(AL, Y, caches):
     2     """
     3     Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group
     4     
     5     Arguments:
     6     AL -- probability vector, output of the forward propagation (L_model_forward())
     7     Y -- true "label" vector (containing 0 if non-cat, 1 if cat)
     8     caches -- list of caches containing:
     9                 every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2)
    10                 the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1])
    11     
    12     Returns:
    13     grads -- A dictionary with the gradients
    14              grads["dA" + str(l)] = ... 
    15              grads["dW" + str(l)] = ...
    16              grads["db" + str(l)] = ... 
    17     """
    18     grads = {}
    19     L = len(caches)         # the number of layers
    20     m = AL.shape[1]
    21     Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL
    22     
    23     # Initializing the backpropagation
    24     ### START CODE HERE ### (1 line of code)
    25     dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    26     current_cache = caches[L-1]
    27     ### END CODE HERE ###
    28     
    29     # Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "AL, Y, caches". Outputs: "grads["dAL"], grads["dWL"], grads["dbL"]
    30     ### START CODE HERE ### (approx. 2 lines)
    31     grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, "sigmoid")                   
    32     ### END CODE HERE ###
    33     
    34     for l in reversed(range(L-1)):
    35         # lth layer: (RELU -> LINEAR) gradients.
    36         # Inputs: "grads["dA" + str(l + 2)], caches". Outputs: "grads["dA" + str(l + 1)] , grads["dW" + str(l + 1)] , grads["db" + str(l + 1)] 
    37         ### START CODE HERE ### (approx. 5 lines)
    38         current_cache = caches[l]
    39         dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 1)], current_cache, "relu") 
    40         grads["dA" + str(l)] = dA_prev_temp  
    41         grads["dW" + str(l + 1)] = dW_temp
    42         grads["db" + str(l + 1)] = db_temp
    43         ### END CODE HERE ###
    44 
    45     return grads

    更新参数

     1 def update_parameters(parameters, grads, learning_rate):
     2     """
     3     Update parameters using gradient descent
     4     
     5     Arguments:
     6     parameters -- python dictionary containing your parameters 
     7     grads -- python dictionary containing your gradients, output of L_model_backward
     8     
     9     Returns:
    10     parameters -- python dictionary containing your updated parameters 
    11                   parameters["W" + str(l)] = ... 
    12                   parameters["b" + str(l)] = ...
    13     """   
    14     L = len(parameters) // 2 # number of layers in the neural network
    15 
    16     # Update rule for each parameter. Use a for loop.
    17     ### START CODE HERE ### (≈ 3 lines of code)
    18     for l in range(L):
    19         parameters["W" + str(l+1)] = parameters["W" + str(l+1)]-learning_rate*grads['dW'+str(l+1)]
    20         parameters["b" + str(l+1)] = parameters["b" + str(l+1)]-learning_rate*grads['db'+str(l+1)]  
    21     ### END CODE HERE ###
    22         
    23     return parameters

    搭建两层神经网络

     1 def two_layer_model(X, Y, layers_dims, learning_rate=0.0075, num_iterations=3000, print_cost=False):
     2     """
     3     Implements a two-layer neural network: LINEAR->RELU->LINEAR->SIGMOID.
     4     
     5     Arguments:
     6     X -- input data, of shape (n_x, number of examples)
     7     Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
     8     layers_dims -- dimensions of the layers (n_x, n_h, n_y)
     9     num_iterations -- number of iterations of the optimization loop
    10     learning_rate -- learning rate of the gradient descent update rule
    11     print_cost -- If set to True, this will print the cost every 100 iterations 
    12     
    13     Returns:
    14     parameters -- a dictionary containing W1, W2, b1, and b2
    15     """    
    16     np.random.seed(1)
    17     grads = {}
    18     costs = []                              # to keep track of the cost
    19     m = X.shape[1]                           # number of examples
    20     (n_x, n_h, n_y) = layers_dims
    21     
    22     # Initialize parameters dictionary, by calling one of the functions you'd previously implemented
    23     ### START CODE HERE ### (≈ 1 line of code)
    24     parameters=initialize_parameters(n_x, n_h, n_y)
    25     ### END CODE HERE ###
    26     
    27     # Get W1, b1, W2 and b2 from the dictionary parameters.
    28     W1 = parameters["W1"]
    29     b1 = parameters["b1"]
    30     W2 = parameters["W2"]
    31     b2 = parameters["b2"]
    32     
    33     # Loop (gradient descent)
    34     for i in range(0, num_iterations):
    35 
    36         # Forward propagation: LINEAR -> RELU -> LINEAR -> SIGMOID. Inputs: "X, W1, b1". Output: "A1, cache1, A2, cache2".
    37         ### START CODE HERE ### (≈ 2 lines of code)
    38         A1,cache1=linear_activation_forward(X, W1, b1, 'relu')
    39         A2,cache2=linear_activation_forward(A1, W2, b2, 'sigmoid')
    40         ### END CODE HERE ###
    41         
    42         # Compute cost
    43         ### START CODE HERE ### (≈ 1 line of code)
    44         cost=compute_cost(A2, Y)        
    45         ### END CODE HERE ###
    46         
    47         # Initializing backward propagation
    48         dA2 = - (np.divide(Y, A2) - np.divide(1 - Y, 1 - A2))
    49         
    50         # Backward propagation. Inputs: "dA2, cache2, cache1". Outputs: "dA1, dW2, db2; also dA0 (not used), dW1, db1".
    51         ### START CODE HERE ### (≈ 2 lines of code)
    52         dA1,dW2,db2=linear_activation_backward(dA2, cache2, 'sigmoid')
    53         dA0,dW1,db1=linear_activation_backward(dA1, cache1, 'relu')
    54         ### END CODE HERE ###
    55         
    56         # Set grads['dWl'] to dW1, grads['db1'] to db1, grads['dW2'] to dW2, grads['db2'] to db2
    57         grads['dW1'] = dW1
    58         grads['db1'] = db1
    59         grads['dW2'] = dW2
    60         grads['db2'] = db2
    61         
    62         # Update parameters.
    63         ### START CODE HERE ### (approx. 1 line of code)
    64         parameters=update_parameters(parameters, grads, learning_rate) 
    65         ### END CODE HERE ###
    66 
    67         # Retrieve W1, b1, W2, b2 from parameters
    68         W1 = parameters["W1"]
    69         b1 = parameters["b1"]
    70         W2 = parameters["W2"]
    71         b2 = parameters["b2"]
    72         
    73         # Print the cost every 100 training example
    74         if print_cost and i % 100 == 0:
    75             print("Cost after iteration {}: {}".format(i, np.squeeze(cost)))
    76             costs.append(cost)
    77        
    78     # plot the cost
    79     plt.plot(np.squeeze(costs))
    80     plt.ylabel('cost')
    81     plt.xlabel('iterations (per tens)')
    82     plt.title("Learning rate =" + str(learning_rate))
    83     plt.show()    
    84     return parameters

    搭建多层神经网络

     1 def L_layer_model(X, Y, layers_dims, learning_rate=0.0075, num_iterations=3000, print_cost=False): #lr was 0.009
     2     """
     3     Implements a L-layer neural network: [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID.
     4     
     5     Arguments:
     6     X -- data, numpy array of shape (number of examples, num_px * num_px * 3)
     7     Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
     8     layers_dims -- list containing the input size and each layer size, of length (number of layers + 1).
     9     learning_rate -- learning rate of the gradient descent update rule
    10     num_iterations -- number of iterations of the optimization loop
    11     print_cost -- if True, it prints the cost every 100 steps
    12     
    13     Returns:
    14     parameters -- parameters learnt by the model. They can then be used to predict.
    15     """
    16     np.random.seed(1)
    17     costs = []                         # keep track of cost
    18     
    19     # Parameters initialization.
    20     ### START CODE HERE ###
    21     parameters=initialize_parameters_deep(layers_dims)    
    22     ### END CODE HERE ###
    23     
    24     # Loop (gradient descent)
    25     for i in range(0, num_iterations):
    26 
    27         # Forward propagation: [LINEAR -> RELU]*(L-1) -> LINEAR -> SIGMOID.
    28         ### START CODE HERE ### (≈ 1 line of code)
    29         AL,caches=L_model_forward(X, parameters)
    30         ### END CODE HERE ###
    31         
    32         # Compute cost.
    33         ### START CODE HERE ### (≈ 1 line of code)
    34         cost=compute_cost(AL, Y)
    35         ### END CODE HERE ###
    36     
    37         # Backward propagation.
    38         ### START CODE HERE ### (≈ 1 line of code)
    39         grads=L_model_backward(AL, Y, caches)     
    40         ### END CODE HERE ###
    41  
    42         # Update parameters.
    43         ### START CODE HERE ### (≈ 1 line of code)
    44         parameters=update_parameters(parameters, grads, learning_rate)        
    45         ### END CODE HERE ###
    46                 
    47         # Print the cost every 100 training example
    48         if print_cost and i % 100 == 0:
    49             print ("Cost after iteration %i: %f" % (i, cost))
    50             costs.append(cost)
    51             
    52     # plot the cost
    53     plt.plot(np.squeeze(costs))
    54     plt.ylabel('cost')
    55     plt.xlabel('iterations (per tens)')
    56     plt.title("Learning rate =" + str(learning_rate))
    57     plt.show()
    58     
    59     return parameters

    预测

     1 def predict(X, y, parameters):
     2     """
     3     该函数用于预测L层神经网络的结果,当然也包含两层   
     4     参数:
     5      X - 测试集
     6      y - 标签
     7      parameters - 训练模型的参数
     8     返回:
     9      p - 给定数据集X的预测
    10     """
    11     m = X.shape[1]
    12     n = len(parameters) // 2 # 神经网络的层数
    13     p = np.zeros((1,m))
    14     
    15     #根据参数前向传播
    16     probas, caches = L_model_forward(X, parameters)
    17     
    18     for i in range(0, probas.shape[1]):       
    19         p[0,i] = 1 if probas[0,i] > 0.5 else 0
    20     
    21     print("准确度为: "  + str(float(np.sum((p == y))/m)))     
    22     return p

    对搭建的神经网络进行预测:

     1 #准备数据
     2 train_x_orig, train_y, test_x_orig, test_y, classes = load_data()
     3 
     4 # Reshape the training and test examples 
     5 train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T   # The "-1" makes reshape flatten the remaining dimensions
     6 test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T
     7 
     8 # Standardize data to have feature values between 0 and 1.
     9 train_x = train_x_flatten / 255
    10 test_x = test_x_flatten / 255
    1 #对两层神经网络进行预测
    2 parameters1 = two_layer_model(train_x, train_y, layers_dims = (12288, 7, 1), num_iterations = 2500, print_cost=True)
    3 pred_test1=predict(test_x, test_y, parameters1)
    4 
    5 #对多层神经网络进行预测
    6 parameters2 = L_layer_model(train_x, train_y, layers_dims=(12288,20,7,5,1), num_iterations=250, print_cost=True)
    7 pred_test2=predict(test_x, test_y, parameters2)

     

    两层神经网络,精确率72%

    多层神经网络,精确率78%

  • 相关阅读:
    团队冲刺2.6
    团队冲刺2.5
    团队冲刺2.4
    团队冲刺2.3
    个人作业二
    个人作业二
    个人作业二
    课程总结
    每日博客
    每日博客
  • 原文地址:https://www.cnblogs.com/cxq1126/p/13080648.html
Copyright © 2020-2023  润新知