• k-means 简单实现


    同学很久以前做的,那时候我刚实习,他刚参加工作(他是两年制),那时候开始对数据挖掘感兴趣,他发给我的他自己做的demo。记得他要毕业时,还一起帮着想kmeans创新点,如今他已经从事数据挖掘工作两年了。
    他的博客地址:http://www.cnblogs.com/niuxiaoha/p/4645989.html

    package neugle.kmeans;
    
    import java.io.BufferedReader;
    import java.io.FileNotFoundException;
    import java.io.FileReader;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.Iterator;
    
    public class Kmeans {
        private static int k = 3;// 划分簇数目
        private static int dataCount = 150;// 文本数量
        private static int n = 0;// 迭代次数
    
        public static void main(String[] args) {
            ArrayList<IrisModel> irisList = ReadFile();// 取得文本中数据
            ArrayList<IrisModel> beforeRandomPot = new ArrayList<IrisModel>();// 记录上一次质心位置
            ArrayList<IrisModel> randomPot = RandomPot(irisList);// 获得随机数据
            ArrayList<ArrayList<IrisModel>> kMeansList = null;
            while (!CompareRandomPot(beforeRandomPot, randomPot)) {
                kMeansList = KMeans(irisList, randomPot);// 进行n次聚类
                n++;
            }
            Print(kMeansList);
            System.out.println("迭代了" + n + "次");
        }
    
        // 读取文件中数据
        private static ArrayList<IrisModel> ReadFile() {
            FileReader read = null;
            BufferedReader br = null;
            ArrayList<IrisModel> irisList = new ArrayList<IrisModel>();
            try {
                read = new FileReader("D:\iris.data");
                br = new BufferedReader(read);
                String readLine = null;
                while ((readLine = br.readLine()) != null) {
                    IrisModel iris = new IrisModel();
                    String[] agrs = readLine.split(",");
                    iris.Sep_len = Double.parseDouble(agrs[0]);
                    iris.Sep_wid = Double.parseDouble(agrs[1]);
                    iris.Pet_len = Double.parseDouble(agrs[2]);
                    iris.Pet_wid = Double.parseDouble(agrs[3]);
                    iris.Iris_type = agrs[4];
                    irisList.add(iris);
                }
            } catch (FileNotFoundException e) {
                System.out.println("读取文件异常");
                irisList = null;
            } catch (IOException e) {
                System.out.println("读取文件异常");
                irisList = null;
            } finally {
                try {
                    br.close();
                } catch (IOException e) {
                    System.out.println("关闭文件异常");
                }
            }
            return irisList;
        }
    
        // 随机生成初始k个点
        private static ArrayList<IrisModel> RandomPot(ArrayList<IrisModel> irisList) {
            ArrayList<Integer> initCenter = new ArrayList<Integer>();
            ArrayList<IrisModel> randomPot = new ArrayList<IrisModel>();
            for (int i = 0; i < k; i++) {
                int num = (int) (Math.random() * dataCount);
                if (!initCenter.contains(num))
                    initCenter.add(num);
                else
                    i--;
            }
            Iterator<Integer> i = initCenter.iterator();
            while (i.hasNext()) {
                randomPot.add(irisList.get(i.next()));
            }
            return randomPot;
        }
    
        // KMeans主程序
        private static ArrayList<ArrayList<IrisModel>> KMeans(
                ArrayList<IrisModel> irisList, ArrayList<IrisModel> randomPot) {
            ArrayList<ArrayList<IrisModel>> groupNum = new ArrayList<ArrayList<IrisModel>>();
            for (int i = 0; i < randomPot.size(); i++) {
                ArrayList<IrisModel> list = new ArrayList<IrisModel>();
                list.add(randomPot.get(i));
                groupNum.add(list);
            }
            for (int i = 0; i < irisList.size(); i++) {
                double temp = Double.MAX_VALUE;
                int flag = -1;
                for (int j = 0; j < randomPot.size(); j++) {
                    double distance = DistanceOfTwoPoint(irisList.get(i),
                            randomPot.get(j));
                    if (distance < temp) {
                        temp = distance;
                        flag = j;
                    }
                }
                groupNum.get(flag).add(irisList.get(i));
            }
            // 重新计算质心
            ArrayList<IrisModel> tempList = CalcCenter(groupNum);
            randomPot.clear();
            for (int i = 0; i < tempList.size(); i++) {
                randomPot.add(tempList.get(i));
            }
            return groupNum;
        }
    
        // 计算两点欧氏距离
        private static double DistanceOfTwoPoint(IrisModel d1, IrisModel d2) {
            double sum = Math.sqrt(Math.pow((d1.Sep_len - d2.Sep_len), 2)
                    + Math.pow((d1.Sep_wid - d2.Sep_wid), 2)
                    + Math.pow((d1.Pet_len - d2.Pet_len), 2)
                    + Math.pow((d1.Pet_wid - d2.Pet_wid), 2));
            return sum;
        }
    
        // 重新计算k个簇的质心
        private static ArrayList<IrisModel> CalcCenter(
                ArrayList<ArrayList<IrisModel>> c) {
            ArrayList<IrisModel> cIris = new ArrayList<IrisModel>();
            Iterator<ArrayList<IrisModel>> i = c.iterator();
            while (i.hasNext()) {
                ArrayList<IrisModel> irisList = i.next();
                IrisModel eIris = new IrisModel();
                for (int k = 0; k < irisList.size(); k++) {
                    eIris.Sep_len += irisList.get(k).Sep_len;
                    eIris.Sep_wid += irisList.get(k).Sep_wid;
                    eIris.Pet_len += irisList.get(k).Pet_len;
                    eIris.Pet_wid += irisList.get(k).Pet_wid;
                }
                eIris.Sep_len = eIris.Sep_len / irisList.size();
                eIris.Sep_wid = eIris.Sep_wid / irisList.size();
                eIris.Pet_len = eIris.Pet_len / irisList.size();
                eIris.Pet_wid = eIris.Pet_wid / irisList.size();
                cIris.add(eIris);
            }
    
            return cIris;
        }
    
        // 比较前后两次的质心,以确定是否结束
        private static Boolean CompareRandomPot(
                ArrayList<IrisModel> beforeRandomPot, ArrayList<IrisModel> randomPot) {
            boolean flag = true;
            for (int i = 0; i < randomPot.size(); i++) {
                if (beforeRandomPot.size() <= 0
                        || !beforeRandomPot.contains(randomPot.get(i))) {
                    flag = false;
                    break;
                }
            }
            if (flag == false) {
                if (beforeRandomPot.size() > 0) {
                    beforeRandomPot.clear();
                }
                for (int i = 0; i < randomPot.size(); i++) {
                    beforeRandomPot.add(randomPot.get(i));
                }
            }
            return flag;
        }
    
        // 打印
        private static void Print(ArrayList<ArrayList<IrisModel>> kmeansList) {
            System.out.println("------------------------------------");
            Iterator<ArrayList<IrisModel>> i = kmeansList.iterator();
            while (i.hasNext()) {
                Iterator<IrisModel> ii = i.next().iterator();
                int n = 0;
                while (ii.hasNext()) {
                    n++;
                    IrisModel irisModel = ii.next();
                    if (n == 1)
                        continue;
                    System.out.println(irisModel.Sep_len + " " + irisModel.Sep_wid
                            + " " + irisModel.Pet_len + " " + irisModel.Pet_wid
                            + " " + irisModel.Iris_type);
                }
                System.out.println(n - 1);
                System.out.println("------------------------------------");
            }
        }
    }
  • 相关阅读:
    Web API中的消息处理程序(Message Handler)
    Web API中的模型验证
    autoconf配置的项目,编译debug版本
    gcc编译工具生成动态库和静态库之一----介绍
    VMware中四种网络连接模式的区别
    vmware虚拟机三种网络模式的区别
    code block自动生成makefile
    centos7 安装 codeblock(rpm)
    利用cbmakegen导出Code::blocks的Makefile
    C/C++程序CPU问题分析
  • 原文地址:https://www.cnblogs.com/zhangdebin/p/5567898.html
Copyright © 2020-2023  润新知