• K-means (PRML) in C++


    原始数据

    #include <iostream>
    #include <fstream>
    #include <sstream>
    #include <vector>
    #include <string>
    #include <algorithm>
    #include <numeric>
    #include <cmath>
    #include <limits>

    template <class T>
    void ReadDataFromFile(const std::string &filename, std::vector<std::vector<T> > &vv_data) {
        std::ifstream vm_info(filename.c_str());
        T x, y;
        std::vector<T> v_data;

        while(!vm_info.eof()) {
            v_data.clear();
            vm_info >> x >> y;
            v_data.push_back(x);
            v_data.push_back(y);
            vv_data.push_back(v_data);
        }
        vm_info.close();
    }

    template <class T>
    void Display2DVector(std::vector<std::vector<T> > &vv) {
        for(size_t i=0;i<vv.size();++i) {
            for(typename::std::vector<T>::const_iterator it=vv.at(i).begin();it!=vv.at(i).end();++it) {
                std::cout<<*it<<" ";
            }
            std::cout<<" ";
        }
        std::cout<<"--------the total of the 2DVector is "<<vv.size()<<std::endl;
    }

    template <class T>
    void AddIndicator(std::vector<std::vector<T> > &vv, const int &k) {
        for(size_t i=0; i<vv.size(); ++i) {
            for(size_t j=0; j<k; ++j) {
                vv.at(i).push_back(0);
            }
        }
    }

    template <class T1, class T2>
    void UpdateIndicator(std::vector<std::vector<T1> > &vv, const std::vector<T2> &u, const int &k) {
        for(size_t i=0; i<vv.size(); ++i) {
            double dis=std::numeric_limits<double>::max(), dis_min, cluster;
            for(size_t j=0; j<k; ++j) {
                dis_min=pow(vv.at(i).at(0)-u.at(j*2), 2.0)+pow(vv.at(i).at(1)-u.at(j*2+1), 2.0);
                if(dis_min < dis) {
                    dis=dis_min;
                    cluster=j;
                }
            }
        vv.at(i).at(cluster+2)=1;
        }
    }

    template <class T1, class T2>
    void UpdateMeans(const std::vector<std::vector<T1> > &vv, std::vector<T2> &u, const int &k) {
        std::vector<T2> sum_set(u.size(), 0);

        for(size_t i=0; i<k; ++i) {
            double sum_indi=0.0;
            for(size_t j=0; j<vv.size(); ++j) {
                sum_indi+=vv.at(j).at(i+2);
                sum_set.at(i*2)+=vv.at(j).at(i+2)*vv.at(j).at(0);
                sum_set.at(i*2+1)+=vv.at(j).at(i+2)*vv.at(j).at(1);
            }
            sum_set.at(i*2)/=sum_indi;
            sum_set.at(i*2+1)/=sum_indi;
        }
        u=sum_set;
    }

    template <class T1, class T2>
    double DistortionMeasure(const std::vector<std::vector<T1> > &vv, const std::vector<T2> &u, const int &k) {
        double cost=0.0;
        for(size_t i=0; i<vv.size(); ++i) {
            for(size_t j=0; j<k; ++j) {
                cost+=vv.at(i).at(j+2)*(pow(vv.at(i).at(0)-u.at(j*2), 2.0)+pow(vv.at(i).at(1)-u.at(j*2+1), 2.0));
            }
        }

        return cost;
    }

    int main() {
        int k=4;
        double mean[]={39, 42, 70, 2, 230, 10, 190, 85};
        std::vector<double> u(mean, mean+sizeof(mean)/sizeof(mean[0]));

        std::string oridata="kmeans.dat";
        std::vector<std::vector<double> > vv_data;

        ReadDataFromFile(oridata, vv_data);

        AddIndicator(vv_data, k);

        std::cout<<"the original mean: ";
        for(std::vector<double>::const_iterator it=u.begin(); it!=u.end(); ++it) {
            std::cout<<*it<<" ";
        }
        std::cout<<std::endl;

        double cost_old=std::numeric_limits<double>::max();
        while(true) {
            double cost_new=DistortionMeasure(vv_data, u, k);

            if(std::abs(cost_new-cost_old)<0.0000001)
                break;

            UpdateIndicator(vv_data, u, k);

            UpdateMeans(vv_data, u, k);
            cost_old=cost_new;
        }

        std::cout<<"the new mean: ";
        for(std::vector<double>::const_iterator it=u.begin(); it!=u.end(); ++it) {
            std::cout<<*it<<" ";
        }
        std::cout<<std::endl;

        return 0;
    }

    The two phases of re-assigning data points to clusters and re-computing the cluster means are repeated in turn until there is no further change in the assignments(or  until some maximum number of iterations is exceeded).

  • 相关阅读:
    body标签相关
    前端基础
    26,进程
    网络编程基础socket 重要中:TCP/UDP/七层协议
    24,内置方法的应用,(实现单利模式)
    23,反射,内置方法。
    22,hashlib(md5,和,sha算法)logging日志模块
    21,钻石继承,多态,封装,几个装饰器函数
    20,序列化模块 json,pickle,shelve
    19,面向对象
  • 原文地址:https://www.cnblogs.com/donggongdechen/p/10439613.html
Copyright © 2020-2023  润新知