说实话,具体的训练公式,我没有自己推导,姑且认为他写的代码是对的。总体上看,用bp的方法。特殊之处,在于输入层和输出层是完完全全的“同一层”。
void dA::get_corrupted_input ( int *x, // the original input 0-1 vector -- input int *tilde_x, // the resulted 0-1 vector gotten noised -- output double p // the p probability of noise, binomial test -- input ) { for(int i=0; i<n_visible; i++) { if(x[i] == 0) { // if the state is 0, do noghing tilde_x[i] = 0; } else { // if the state is 1, add the noise of p probability on it tilde_x[i] = binomial(1, p); } } } // Encode void dA::get_hidden_values ( int *x, // the input from visible nodes double *y // the output of hidden nodes ) { for(int i=0; i<n_hidden; i++) { // calculated sum_j(vj * wij) + bi y[i] = 0; for(int j=0; j<n_visible; j++) { y[i] += W[i][j] * x[j]; } y[i] += hbias[i]; // sigmod (y) y[i] = sigmoid(y[i]); } } // Decode void dA::get_reconstructed_input ( double *y, // the input from hidden nodes double *z // the output reconstructed of visible nodes ) { for(int i=0; i<n_visible; i++) { // calculated sum_j(hj * wij) + ci z[i] = 0; for(int j=0; j<n_hidden; j++) { z[i] += W[j][i] * y[j]; } z[i] += vbias[i]; // sigmod (z) z[i] = sigmoid(z[i]); } } void dA::train ( int *x, // the input sample from visiable node double lr, // the learning rate double corruption_level // corruption_level is the probability of noise ) { // the auto-encoder networks: // input(visible) layer --> hidden layer --> output(visible) layer // the input layer is the same as the output layer, the two layers are totally same. // we train it by the standard bp algorithm, from output layer to the hidden layer, and to the input layer // Here is the whole process: int *tilde_x = new int[n_visible]; // the noise input double *y = new double[n_hidden]; // the output of hidden layer double *z = new double[n_visible]; // the output of output layer, reconstruction double *L_vbias = new double[n_visible]; // temp value for visible bias double *L_hbias = new double[n_hidden]; // temp value for hidden bias double p = 1 - corruption_level; // make the input sample noise by the p probability get_corrupted_input(x, tilde_x, p); // calculate the output of hidden nodes by the noise input, encode get_hidden_values(tilde_x, y); // reconstruct the input sample from visible nodes, decode get_reconstructed_input(y, z); // update the bias of visible nodes for(int i=0; i<n_visible; i++) { // the difference between input sample and the PROBABILITY of reconstructed probability of visible node // it's different from RBM that in RBM we calcualte the difference between input sample and // the 0-1 state of the reconstructed visiable node // here use the standard bp algorithm, from visible layer to hidden layer L_vbias[i] = x[i] - z[i]; // update the value by the learning rate vbias[i] += lr * L_vbias[i] / N; } // update the bias of hidden nodes for(int i=0; i<n_hidden; i++) { // propgate the bias from visible nodes // here use the standard bp algorithm, from visible layer to hidden layer L_hbias[i] = 0; for(int j=0; j<n_visible; j++) { L_hbias[i] += W[i][j] * L_vbias[j]; } L_hbias[i] *= y[i] * (1 - y[i]); hbias[i] += lr * L_hbias[i] / N; } // update the weight of networks for(int i=0; i<n_hidden; i++) { for(int j=0; j<n_visible; j++) { W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / N; } } delete[] L_hbias; delete[] L_vbias; delete[] z; delete[] y; delete[] tilde_x; } void dA::reconstruct ( int *x, // the input sample -- input double *z // the reconstructed value -- output ) { double *y = new double[n_hidden]; // calculate the output of hidden layer get_hidden_values(x, y); // reconstruct from hidden layer to visible layer get_reconstructed_input(y, z); delete[] y; }