package com.excellence.splitsentence; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import org.jblas.ComplexDoubleMatrix; import org.jblas.ComplexFloatMatrix; import org.jblas.DoubleMatrix; import org.jblas.Eigen; import org.jblas.FloatMatrix; import com.mongodb.BasicDBList; import com.mongodb.DB; import com.mongodb.DBCollection; import com.mongodb.DBCursor; import com.mongodb.MongoClient; import com.mongodb.MongoCredential; import com.mongodb.ServerAddress; public class PCA { /** * Reduce matrix dimension 减少矩阵维度 * @param source 源矩阵 * @param dimension 目标维度 * @return Target matrix 返回目标矩阵 */ public static FloatMatrix dimensionReduction(FloatMatrix source, int dimension) { //C=X*X^t/m 矩阵*矩阵^异或/列数 FloatMatrix covMatrix = source.mmul(source.transpose()).div(source.columns); ComplexFloatMatrix eigVal = Eigen.eigenvalues(covMatrix); ComplexFloatMatrix[] eigVectorsVal = Eigen.eigenvectors(covMatrix); ComplexFloatMatrix eigVectors = eigVectorsVal[0]; //通过特征值将符号向量从大到小排序 List<PCABean> beans = new ArrayList<PCA.PCABean>(); for (int i = 0; i < eigVectors.columns; i++) { beans.add(new PCABean(eigVal.get(i).real(), eigVectors.getColumn(i))); } Collections.sort(beans); FloatMatrix newVec = new FloatMatrix(dimension, beans.get(0).vector.rows); for (int i = 0; i < dimension; i++) { ComplexFloatMatrix dm = beans.get(i).vector; FloatMatrix real = dm.getReal(); newVec.putRow(i, real); } return newVec.mmul(source); } static class PCABean implements Comparable<PCABean> { float eigenValue; ComplexFloatMatrix vector; public PCABean(Float eigenValue, ComplexFloatMatrix vector) { super(); this.eigenValue = eigenValue; this.vector = vector; } @Override public int compareTo(PCABean o) { return Float.compare(o.eigenValue, eigenValue); } @Override public String toString() { return "PCABean [eigenValue=" + eigenValue + ", vector=" + vector + "]"; } }
}
如何调用?
float[] vector = docvector.getElementArray();
FloatMatrix d = new FloatMatrix(vector);
FloatMatrix result = PCA.dimensionReduction(d, 10);