• 【deep learning学习笔记】注释yusugomori的DA代码 --- dA.cpp -- 训练


    说实话,具体的训练公式,我没有自己推导,姑且认为他写的代码是对的。总体上看,用bp的方法。特殊之处,在于输入层和输出层是完完全全的“同一层”。

    void dA::get_corrupted_input (
    					int *x,			// the original input 0-1 vector			-- input
    					int *tilde_x,	// the resulted 0-1 vector gotten noised	-- output
    					double p		// the p probability of noise, binomial test -- input
    					) 
    {
    	for(int i=0; i<n_visible; i++) 
    	{
    		if(x[i] == 0) 
    		{
    			// if the state is 0, do noghing 
    			tilde_x[i] = 0;
    		} 
    		else 
    		{
    			// if the state is 1, add the noise of p probability on it
    			tilde_x[i] = binomial(1, p);
    		}
    	}
    }
    
    // Encode
    void dA::get_hidden_values (
    					int *x,		// the input from visible nodes
    					double *y	// the output of hidden nodes
    					) 
    {
    	for(int i=0; i<n_hidden; i++) 
    	{
    		// calculated sum_j(vj * wij) + bi
    		y[i] = 0;
    		for(int j=0; j<n_visible; j++) 
    		{
    			y[i] += W[i][j] * x[j];
    		}
    		y[i] += hbias[i];
    		// sigmod (y)
    		y[i] = sigmoid(y[i]);
    	}
    }
    
    // Decode
    void dA::get_reconstructed_input (
    					double *y,	// the input from hidden nodes
    					double *z	// the output reconstructed of visible nodes
    					) 
    {
    	for(int i=0; i<n_visible; i++) 
    	{
    		// calculated sum_j(hj * wij) + ci
    		z[i] = 0;
    		for(int j=0; j<n_hidden; j++) 
    		{
    			z[i] += W[j][i] * y[j];
    		}
    		z[i] += vbias[i];
    		// sigmod (z)
    		z[i] = sigmoid(z[i]);
    	}
    }
    
    void dA::train (
    				int *x,						// the input sample from visiable node
    				double lr,					// the learning rate
    				double corruption_level		// corruption_level is the probability of noise
    				) 
    {
    	// the auto-encoder networks:
    	// input(visible) layer --> hidden layer --> output(visible) layer
    	// the input layer is the same as the output layer, the two layers are totally same.
    	// we train it by the standard bp algorithm, from output layer to the hidden layer, and to the input layer
    	// Here is the whole process: 
    
    
    	int *tilde_x = new int[n_visible];		// the noise input
    	double *y = new double[n_hidden];		// the output of hidden layer
    	double *z = new double[n_visible];		// the output of output layer, reconstruction
    
    	double *L_vbias = new double[n_visible];	// temp value for visible bias
    	double *L_hbias = new double[n_hidden];		// temp value for hidden bias
    
    	double p = 1 - corruption_level;
    
    	// make the input sample noise by the p probability
    	get_corrupted_input(x, tilde_x, p);
    	// calculate the output of hidden nodes by the noise input, encode
    	get_hidden_values(tilde_x, y);
    	// reconstruct the input sample from visible nodes, decode
    	get_reconstructed_input(y, z);
      
    	// update the bias of visible nodes
    	for(int i=0; i<n_visible; i++) 
    	{
    		// the difference between input sample and the PROBABILITY of reconstructed probability of visible node
    		// it's different from RBM that in RBM we calcualte the difference between input sample and 
    		// the 0-1 state of the reconstructed visiable node
    		// here use the standard bp algorithm, from visible layer to hidden layer
    		L_vbias[i] = x[i] - z[i];
    		// update the value by the learning rate
    		vbias[i] += lr * L_vbias[i] / N;
    	}
    
    	// update the bias of hidden nodes
    	for(int i=0; i<n_hidden; i++) 
    	{
    		// propgate the bias from visible nodes
    		// here use the standard bp algorithm, from visible layer to hidden layer
    		L_hbias[i] = 0;
    		for(int j=0; j<n_visible; j++) 
    		{
    			L_hbias[i] += W[i][j] * L_vbias[j];
    		}
    		L_hbias[i] *= y[i] * (1 - y[i]);
    		hbias[i] += lr * L_hbias[i] / N;
    	}
      
    	// update the weight of networks
    	for(int i=0; i<n_hidden; i++) 
    	{
    		for(int j=0; j<n_visible; j++) 
    		{
    			W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / N;
    		}
    	}
    
    	delete[] L_hbias;
    	delete[] L_vbias;
    	delete[] z;
    	delete[] y;
    	delete[] tilde_x;
    }
    
    void dA::reconstruct (
    				int *x,			// the input sample		-- input
    				double *z		// the reconstructed value -- output
    				) 
    {
    	double *y = new double[n_hidden];
    
    	// calculate the output of hidden layer
    	get_hidden_values(x, y);
    	// reconstruct from hidden layer to visible layer
    	get_reconstructed_input(y, z);
    
    	delete[] y;
    }


  • 相关阅读:
    java。多态
    java。构造方法
    java.final修饰符l
    java。this的用法
    数据库:内连接与外连接区别
    Java工具类-设置字符编码
    Java工具类-验证码工具
    Java工具类-加密算法
    java中的object... args参数
    针对MySql封装的JDBC通用框架类(包含增删改查、JavaBean反射原理)
  • 原文地址:https://www.cnblogs.com/xinyuyuanm/p/3206560.html
Copyright © 2020-2023  润新知