• 用java写bp神经网络(一)


    根据前篇博文《神经网络之后向传播算法》,现在用java实现一个bp神经网络。矩阵运算采用jblas库,然后逐渐增加功能,支持并行计算,然后支持输入向量调整,最后支持L-BFGS学习算法。

    上帝说,要有神经网络,于是,便有了一个神经网络。上帝还说,神经网络要有节点,权重,激活函数,输出函数,目标函数,然后也许还要有一个准确率函数,于是,神经网络完成了:

    public class Net {
    	List<DoubleMatrix> weights = new ArrayList<DoubleMatrix>();
    	List<DoubleMatrix> bs = new ArrayList<>();
    	List<ScalarDifferentiableFunction> activations = new ArrayList<>();
    	CostFunctionFactory costFunc;
    	CostFunctionFactory accuracyFunc;
    	int[] nodesNum;
    	int layersNum;
    	public Net(int[] nodesNum, ScalarDifferentiableFunction[] activations,CostFunctionFactory costFunc) {
    		super();
    		this.initNet(nodesNum, activations);
    		this.costFunc=costFunc;
    		this.layersNum=nodesNum.length-1;
    	}
    
    	public Net(int[] nodesNum, ScalarDifferentiableFunction[] activations,CostFunctionFactory costFunc,CostFunctionFactory accuracyFunc) {
    		this(nodesNum,activations,costFunc);
    		this.accuracyFunc=accuracyFunc;
    	}
    	public void resetNet() {
    		this.initNet(nodesNum, (ScalarDifferentiableFunction[]) activations.toArray());
    	}
    
    	private void initNet(int[] nodesNum, ScalarDifferentiableFunction[] activations) {
    		assert (nodesNum != null && activations != null
    				&& nodesNum.length == activations.length + 1 && nodesNum.length > 1);
    		this.nodesNum = nodesNum;
    		this.weights.clear();
    		this.bs.clear();
    		this.activations.clear();
    		for (int i = 0; i < nodesNum.length - 1; i++) {
    			// 列数==输入;行数==输出。
    			int columns = nodesNum[i];
    			int rows = nodesNum[i + 1];
    			double r1 = Math.sqrt(6) / Math.sqrt(rows + columns + 1);
    			//r1=0.001;
    			// W
    			DoubleMatrix weight = DoubleMatrix.rand(rows, columns).muli(2*r1).subi(r1);
    			//weight=DoubleMatrix.ones(rows, columns);
    			weights.add(weight);
    
    			// b
    			DoubleMatrix b = DoubleMatrix.zeros(rows, 1);
    			bs.add(b);
    
    			// activations
    			this.activations.add(activations[i]);
    		}
    	}
    }
    

     上帝造完了神经网络,去休息了。人说,我要使用神经网络,我要利用正向传播计算各层的结果,然后利用反向传播调整网络的状态,最后,我要让它能告诉我猎物在什么方向,花儿为什么这样香。

    public class Propagation {
    	Net net;
    
    	public Propagation(Net net) {
    		super();
    		this.net = net;
    	}
    
    
    	// 多个样本。
    	public ForwardResult forward(DoubleMatrix input) {
    		
    		ForwardResult result = new ForwardResult();
    		result.input = input;
    		DoubleMatrix currentResult = input;
    		int index = -1;
    		for (DoubleMatrix weight : net.weights) {
    			index++;
    			DoubleMatrix b = net.bs.get(index);
    			final ScalarDifferentiableFunction activation = net.activations
    					.get(index);
    			currentResult = weight.mmul(currentResult).addColumnVector(b);
    			result.netResult.add(currentResult);
    
    			// 乘以导数
    			DoubleMatrix derivative = MatrixUtil.applyNewElements(
    					new ScalarFunction() {
    						@Override
    						public double valueAt(double x) {
    							return activation.derivativeAt(x);
    						}
    
    					}, currentResult);
    
    			currentResult = MatrixUtil.applyNewElements(activation,
    					currentResult);
    			result.finalResult.add(currentResult);
    
    			result.derivativeResult.add(derivative);
    		}
    
    		result.netResult=null;// 不再需要。
    		
    		return result;
    	}
    
    	
    
        // 多个样本梯度平均值。
    	public BackwardResult backward(DoubleMatrix target,
    			ForwardResult forwardResult) {
    		BackwardResult result = new BackwardResult();
    		DoubleMatrix cost = DoubleMatrix.zeros(1,target.columns);
    		DoubleMatrix output = forwardResult.finalResult
    				.get(forwardResult.finalResult.size() - 1);
    		DoubleMatrix outputDelta = DoubleMatrix.zeros(output.rows,
    				output.columns);
    		DoubleMatrix outputDerivative = forwardResult.derivativeResult
    				.get(forwardResult.derivativeResult.size() - 1);
    
    		DoubleMatrix accuracy = null;
    		if (net.accuracyFunc != null) {
    			accuracy = DoubleMatrix.zeros(1,target.columns);
    		}
    
    		for (int i = 0; i < target.columns; i++) {
    			CostFunction costFunc = net.costFunc.create(target.getColumn(i)
    					.toArray());
    			cost.put(i, costFunc.valueAt(output.getColumn(i).toArray()));
    			// System.out.println(i);
    			DoubleMatrix column1 = new DoubleMatrix(
    					costFunc.derivativeAt(output.getColumn(i).toArray()));
    			DoubleMatrix column2 = outputDerivative.getColumn(i);
    			outputDelta.putColumn(i, column1.muli(column2));
    
    			if (net.accuracyFunc != null) {
    				CostFunction accuracyFunc = net.accuracyFunc.create(target
    						.getColumn(i).toArray());
    				accuracy.put(i,
    						accuracyFunc.valueAt(output.getColumn(i).toArray()));
    			}
    		}
    		result.deltas.add(outputDelta);
    		result.cost = cost;
    		result.accuracy = accuracy;
    		for (int i = net.layersNum - 1; i >= 0; i--) {
    			DoubleMatrix pdelta = result.deltas.get(result.deltas.size() - 1);
    
    			// 梯度计算,取所有样本平均
    			DoubleMatrix layerInput = i == 0 ? forwardResult.input
    					: forwardResult.finalResult.get(i - 1);
    			DoubleMatrix gradient = pdelta.mmul(layerInput.transpose()).div(
    					target.columns);
    			result.gradients.add(gradient);
    			// 偏置梯度
    			result.biasGradients.add(pdelta.rowMeans());
    
    			// 计算前一层delta,若i=0,delta为输入层误差,即input调整梯度,不作平均处理。
    			DoubleMatrix delta = net.weights.get(i).transpose().mmul(pdelta);
    			if (i > 0)
    				delta = delta.muli(forwardResult.derivativeResult.get(i - 1));
    			result.deltas.add(delta);
    		}
    		Collections.reverse(result.gradients);
    		Collections.reverse(result.biasGradients);
    		
    		//其它的delta都不需要。
    		DoubleMatrix inputDeltas=result.deltas.get(result.deltas.size()-1);
    		result.deltas.clear();
    		result.deltas.add(inputDeltas);
    		
    		return result;
    	}
    
    	public Net getNet() {
    		return net;
    	}
    
    }
    

     上面是一次正向/反向传播的具体代码。训练方式为批量训练,即所有样本一起训练。然而我们可以传入只有一列的input/target样本实现adapt方式的串行训练,也可以把样本分成很多批传入实现mini-batch方式的训练,这,不是Propagation要考虑的事情,它只是忠实的把传入的数据正向过一遍,反向过一遍,然后把过后的数据原封不动的返回给你。至于传入什么,以及结果怎么运用,是Trainer和Learner要做的事情。下回分解。

  • 相关阅读:
    linux读写锁
    正则表达式
    C++原型模式和模板模式
    C++外观模式和组合模式
    C++代理模式
    c++桥接模式
    Linux常用命令history/tcpdump/awk/grep
    C++委托模式
    c++ 读写锁
    布衣客
  • 原文地址:https://www.cnblogs.com/wuseguang/p/4124849.html
Copyright © 2020-2023  润新知