编程要求:
In this exercise, you will implement the backpropagation algorithm for neural networks and apply it to the task of hand-written digit recognition.
1.总体的思路
1.确定layer的层数,和每层layer的大小,这里确定包含最基本的三层结构(输入,隐含,输出)
2.随机初始化参数的大小
3.计算costfuntion,和神经网络各层的参数偏导(BP实现的过程)(梯度下降法)
4.利用梯度下降法(Matlab中用fminuc或fmincg(较fminuc快))多次迭代,求出参数
5.利用偏导的数学定义与用BP实现的偏导作比较,确保BP的过程是正确的
6.最后计算预测的准确率
2.关键代码段
1.初始化各层的参数
function W = randInitializeWeights(L_in, L_out)
W = zeros(L_out, 1 + L_in);
epsion_init = 0.12;
W = rand(L_out,L_in+1)*2*epsion_init - epsion_init;
end
2.计算代价函数和利用BP求偏导
function [J grad] = nnCostFunction(nn_params, ...
input_layer_size, ...
hidden_layer_size, ...
num_labels, ...
X, y, lambda)
Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ...
hidden_layer_size, (input_layer_size + 1));
Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ...
num_labels, (hidden_layer_size + 1));
m = size(X, 1);
X = [ones(m,1) X];
HidenOutput = sigmoid(X*Theta1');%z2=a1*theta1; a2=sigmoid(z2);
HidenOutput = [ones(m,1) HidenOutput];%train_size*hiddenlayer_size+1
Hx = sigmoid(HidenOutput*Theta2');%train_size*outputsize;
Y=zeros(m,num_labels);%train_size*10
for c =1:num_labels
y_temp=(y==c);
Y(:,c) = y_temp;
J = J+sum(-y_temp'*log(Hx(:,c))-(1-y_temp)'*log(1-Hx(:,c)));
end
%regulation
J=J/m+lambda*(sum(sum(Theta1(:,2:end).^2))+sum(sum(Theta2(:,2:end).^2)))/(2*m);
%BP的实现过程
det3 =Hx - Y;%train_szie*outputSize
det2 =det3*Theta2.* sigmoidGradient([ones(m,1) X*Theta1']);%train_size*hiddenlayer_size+1
det2 = det2(:,2:end);%train_size*hiddenlayer_size
%det2 =det3*Theta2(:,2:end).* sigmoidGradient(X*Theta1');
Theta1_grad = (det2'*X)/m;%hiddenlayer_size*inputlayer_size
Theta1_grad(:,2:end) =Theta1_grad(:,2:end)+lambda*Theta1(:,2:end)/m;
Theta2_grad = (det3'*HidenOutput)/m;%outputsize*(hiddenlayer_size+1)
Theta2_grad(:,2:end) =Theta2_grad(:,2:end)+lambda*Theta2(:,2:end)/m;
% Unroll gradients
grad = [Theta1_grad(:) ; Theta2_grad(:)];
end
3.利用偏导的定义,检查BP
function numgrad = computeNumericalGradient(J, theta)
numgrad = zeros(size(theta));
perturb = zeros(size(theta));
e = 1e-4;
for p = 1:numel(theta)
% Set perturbation vector
perturb(p) = e;
loss1 = J(theta - perturb);
loss2 = J(theta + perturb);
% Compute Numerical Gradient
numgrad(p) = (loss2 - loss1) / (2*e);
perturb(p) = 0;
end
4.训练和预测
[nn_params, cost] = fmincg(costFunction, initial_nn_params, options);
pred = predict(Theta1, Theta2, X);
fprintf(' Training Set Accuracy: %f ', mean(double(pred == y)) * 100);