• Weka中EM算法详解


     1  private void EM_Init (Instances inst)
     2     throws Exception {
     3     int i, j, k;
     4 
     5     // 由于EM算法对初始值较敏感,故选择run k means 10 times and choose best solution
     6     SimpleKMeans bestK = null;
     7     double bestSqE = Double.MAX_VALUE;
     8     for (i = 0; i < 10; i++) {
     9       SimpleKMeans sk = new SimpleKMeans();
    10       sk.setSeed(m_rr.nextInt());
    11       sk.setNumClusters(m_num_clusters);
    12       sk.setDisplayStdDevs(true);
    13       sk.buildClusterer(inst);
    14       //KMeans中各个cluster的平方误差
    15       if (sk.getSquaredError() < bestSqE) {
    16          
    17           bestSqE = sk.getSquaredError();
    18           bestK = sk;
    19       }
    20     }
    21     
    22     /*************** KMeans Finds the best cluster number *****************/
    23     
    24     
    25     // initialize with best k-means solution
    26     m_num_clusters = bestK.numberOfClusters();
    27     // 每个样本所在各个集群的概率
    28     m_weights = new double[inst.numInstances()][m_num_clusters];
    29     // 评估每个集群所对应的离散型属性的相关取值
    30
    m_model = new DiscreteEstimator[m_num_clusters][m_num_attribs]; 31 // 每个集群所对应的连续性属性数所对应的相关取值(均值,标准偏差,样本权值(进行归一化)) 32 m_modelNormal = new double[m_num_clusters][m_num_attribs][3]; 33 // 每个集群所对应的先验概率 34 m_priors = new double[m_num_clusters]; 35 // 每个集群所对应的中心点 36 Instances centers = bestK.getClusterCentroids(); 37 // 每个集群所对应的标准差 38 Instances stdD = bestK.getClusterStandardDevs(); 39 // ??? Returns for each cluster the frequency counts for the values of each nominal attribute 40 int [][][] nominalCounts = bestK.getClusterNominalCounts(); 41 // 得到每个集群所对应的样本数 42 int [] clusterSizes = bestK.getClusterSizes(); 43 44 for (i = 0; i < m_num_clusters; i++) { 45 Instance center = centers.instance(i); 46 for (j = 0; j < m_num_attribs; j++) { 47 48 // 样本属性是离散型 49 if (inst.attribute(j).isNominal()) 50 { 51 m_model[i][j] = new DiscreteEstimator(m_theInstances.attribute(j).numValues() 52 , true); 53 for (k = 0; k < inst.attribute(j).numValues(); k++) { 54 m_model[i][j].addValue(k, nominalCounts[i][j][k]); 55 } 56 } 57 //// 样本属性是连续型 58 else 59 { 60 double minStdD = (m_minStdDevPerAtt != null)? m_minStdDevPerAtt[j]: m_minStdDev; 61 double mean = (center.isMissing(j))? inst.meanOrMode(j): center.value(j); 62 m_modelNormal[i][j][0] = mean; 63 double stdv = (stdD.instance(i).isMissing(j))? ((m_maxValues[j] - 64 m_minValues[j]) / (2 * m_num_clusters)): stdD.instance(i).value(j); 65 if (stdv < minStdD) 66 { 67 stdv = inst.attributeStats(j).numericStats.stdDev; 68 if (Double.isInfinite(stdv)) { 69 stdv = minStdD; 70 } 71 if (stdv < minStdD) { 72 stdv = minStdD; 73 } 74 } 75 if (stdv <= 0) { 76 stdv = m_minStdDev; 77 } 78 79 m_modelNormal[i][j][1] = stdv; 80 m_modelNormal[i][j][2] = 1.0; 81 } 82 } 83 } 84 85 86 for (j = 0; j < m_num_clusters; j++) { 87 // 计算每个集群的先验概率 88 m_priors[j] = clusterSizes[j]; 89 } 90 Utils.normalize(m_priors); 91 }
  • 相关阅读:
    4G DTU在城市景观照明中的应用解决方案
    物联网在物业管理和智慧楼宇中的应用解决方案
    4G工业路由器等物联网设备在食品安全检测中的应用
    NB-IoT网络在农业和畜牧业中的物联网智能灌溉应用案例
    串口服务器等应用于污水处理厂的自动监控和控制管理
    插卡式双卡4G工业路由器在数控机床远程控制中的应用
    4G工业路由器在水电站远程监控中的应用案例
    4G工业路由器在供水系统和道路交通远程检测中的应用案例
    HDU 6188 Duizi and Shunzi 贪心
    HDU 6185 Covering 矩阵快速幂
  • 原文地址:https://www.cnblogs.com/likai198981/p/3170568.html
Copyright © 2020-2023  润新知