• 数据挖掘聚类算法--DBSCAN


    数据集如下所示:

    1,1,1
    2,1.5,1
    3,0.5,1
    3,5,-1
    7,0.75,-1
    7,4,2
    8,5,2
    8,5.5,2
    

    数据集有三个属性,分别是二维坐标中的x和y,第三个属性是所属的类,-1代表为孤立点,坐标系如下图所示:

    源代码如下:

    package neugle.dbscan;
    
    import java.io.BufferedReader;
    import java.io.FileReader;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.Random;
    
    public class DBScan {
        private List<Point> pointList = new ArrayList<DBScan.Point>();// 读入的样本数据
    
        private List<List<Point>> clusterList = new ArrayList<List<Point>>();// 最终分类结果
    
        private List<Point> noiseList = new ArrayList<DBScan.Point>();// 噪声数据集合
    
        private List<Point> npointList = new ArrayList<DBScan.Point>();// 候选数据集合
    
        private List<Integer> unvisitedList = new ArrayList<Integer>();// unvisited集合
    
        private double eps;// 邻域半径
        private int minPts;// 密度
    
        class Point {
            public double x;
            public double y;
            public String point_type;
            public boolean isVisited = false;
        }
    
        public DBScan(double eps, int minPts) {
            this.eps = eps;
            this.minPts = minPts;
        }
    
        // 读取数据
        public List<Point> ReadFile(String filePath) {
            FileReader fr = null;
            BufferedReader br = null;
            try {
                fr = new FileReader(filePath);
                br = new BufferedReader(fr);
                String line = null;
                while ((line = br.readLine()) != null) {
                    Point point = new Point();
                    String[] agrs = line.split(",");
                    point.x = Double.parseDouble(agrs[0]);
                    point.y = Double.parseDouble(agrs[1]);
                    point.point_type = agrs[2];
                    this.pointList.add(point);
                }
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                try {
                    br.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
            return pointList;
        }
    
        // DBScan主方法
        public void DBScanFun(String filePath) {
            this.ReadFile(filePath);
            // this.Norm();
            while (this.IsOver()) {
                Point p = this.pointList.get(this.RandomNum());// 随机找到unvisited节点
                p.isVisited = true;// 标记p为visited
                List<Point> neighborList = this.GetNeighbors(p);// 找到满足最小密度的邻居节点
                if (neighborList.size() < this.minPts) {// 添加噪声数据
                    this.noiseList.add(p);// 将p放入噪声集合
                } else {
                    List<Point> clist = new ArrayList<DBScan.Point>();// 新建一个簇C
                    clist.add(p);// 将p放到簇C中
                    this.npointList = neighborList;// 令N为p的邻域对象的集合
                    for (int i = 0; i < this.npointList.size(); i++) {
                        if (this.npointList.get(i).isVisited == false) {// 查找p'中unvisited的节点
                            this.npointList.get(i).isVisited = true;// 标记p'为visited
                            List<Point> neighborLists = this
                                    .GetNeighbors(this.npointList.get(i));// 计算p'满足邻域的节点集合
                            if (neighborLists.size() >= this.minPts) {
                                for (int j = 0; j < neighborLists.size(); j++) {
                                    this.npointList.add(neighborLists.get(j));// 将p'的邻域节点加入到N
                                }
                            }
                            clist.add(this.npointList.get(i));// 将p'添加到簇C
                        }
                    }
                    this.clusterList.add(clist);
                }
            }
        }
    
        // 在未访问的集合中随机选取
        private int RandomNum() {
            int num = this.unvisitedList.size();
            Random rand = new Random();
            int randNum = rand.nextInt(num);
            return this.unvisitedList.get(randNum);
        }
    
        // 获得邻域集合
        private List<Point> GetNeighbors(Point p) {
            List<Point> list = new ArrayList<DBScan.Point>();
            for (int i = 0; i < this.pointList.size(); i++) {
                double value = this.DistanceCalculate(this.pointList.get(i), p);
                if (value != 0 && value <= this.eps) {
                    list.add(this.pointList.get(i));
                }
            }
            return list;
        }
    
        // 欧几里得距离公式
        private double DistanceCalculate(Point iris1, Point iris2) {
            double sum = Math.sqrt(Math.pow((iris1.x - iris2.x), 2)
                    + Math.pow((iris1.y - iris2.y), 2));
            return sum;
        }
    
        // 判断数据是否都被访问完
        private boolean IsOver() {
            this.unvisitedList = new ArrayList<Integer>();
            for (int i = 0; i < this.pointList.size(); i++) {
                if (this.pointList.get(i).isVisited == false) {
                    unvisitedList.add(i);
                }
            }
            if (this.unvisitedList.size() > 0) {
                return true;
            }
            return false;
        }
    
        public void Print() {
            System.out.println("聚为" + this.clusterList.size() + "类");
            for (int i = 0; i < this.clusterList.size(); i++) {
                List<Point> c = this.clusterList.get(i);
                System.out.println("------------");
                for (int j = 0; j < c.size(); j++) {
                    System.out.println(c.get(j).x + " " + c.get(j).y + " "
                            + c.get(j).point_type);
                }
                System.out.println(c.size());
                System.out.println("------------");
            }
    
            System.out.println("噪声点有" + this.noiseList.size() + "个");
            System.out.println("------------");
            for (int i = 0; i < this.noiseList.size(); i++) {
                System.out.println(this.noiseList.get(i).x + " "
                        + this.noiseList.get(i).y + " "
                        + this.noiseList.get(i).point_type);
            }
            System.out.println("------------");
        }
    
        public static void main(String[] args) {
            DBScan c = new DBScan(2.5, 2);
            c.DBScanFun("D:\data\DBScan\test.data");
            c.Print();
        }
    }

    实验结果如下所示:

    聚为2类
    ------------
    8.0 5.5 2
    7.0 4.0 2
    8.0 5.0 2
    3
    ------------
    ------------
    3.0 0.5 1
    1.0 1.0 1
    2.0 1.5 1
    3
    ------------
    噪声点有2个
    ------------
    3.0 5.0 -1
    7.0 0.75 -1
    ------------
    

      

  • 相关阅读:
    15、集合--TreeSet的源码分析(待完成)
    13、集合--HashSet相关方法源码解析(等map更新完成之后在进行补充)
    11、集合--Set接口
    10、集合--Set、AbstractSet、HashSet、TreeSet、SortedSet源码
    9、集合--ArrayList和LinkedList的一些对比
    8、集合--LinkedList的测试以及相关方法的源码分析
    7、集合--ArrayList的测试以及相关方法的源码解析
    6、集合--List接口
    Linux 高可用(HA)集群之keepalived详解
    CentOS7安装配置redis-3.0.0
  • 原文地址:https://www.cnblogs.com/niuxiaoha/p/4661935.html
Copyright © 2020-2023  润新知