• DBSCAN算法的java实现


    DBSCAN (Density-Based Spatial Clustering of Applications with Noise) is a data clustering algorithm proposed by Martin Ester, Hans-Peter Kriegel, Jörg Sander and Xiaowei Xu in 1996. It is a density based clustering algorithm because it finds a number of clusters starting from the estimated density distribution of corresponding nodes. DBSCAN is one of the most common clustering algorithms and also most cited in scientific literature.

    DBSCAN是一种基于密度的聚类算法,它的基本原理就是给定两个参数,ξ和minp,其中 ξ可以理解为半径,算法将在这个半径内查找样本,minp是一个以ξ为半径查找到的样本个数n的限制条件,只要n>=minp,查找到的样本点就是核心样本点,算法的具体描述见参考文件1,下边是这个算法的java实现:

    首先定义一个Point类,代表样本点:

      1: package com.sunzhenxing;
    
      2: 
    
      3: public class Point {
    
      4:   private int x;
    
      5:   private int y;
    
      6:   private boolean isKey;
    
      7:   private boolean isClassed;
    
      8:   
    
      9:   public boolean isKey() {
    
     10:     return isKey;
    
     11:   }
    
     12:   public void setKey(boolean isKey) {
    
     13:     this.isKey = isKey;
    
     14:     this.isClassed=true;
    
     15:   }
    
     16:   public boolean isClassed() {
    
     17:     return isClassed;
    
     18:   }
    
     19:   public void setClassed(boolean isClassed) {
    
     20:     this.isClassed = isClassed;
    
     21:   }
    
     22:   public int getX() {
    
     23:     return x;
    
     24:   }
    
     25:   public void setX(int x) {
    
     26:     this.x = x;
    
     27:   }
    
     28:   public int getY() {
    
     29:     return y;
    
     30:   }
    
     31:   public void setY(int y) {
    
     32:     this.y = y;
    
     33:   }
    
     34:   
    
     35:   public Point(){
    
     36:     x=0;
    
     37:     y=0;
    
     38:   }
    
     39:   public Point(int x,int y){
    
     40:     this.x=x;
    
     41:     this.y=y;
    
     42:   }
    
     43:   public Point(String str){
    
     44:     String[] p=str.split(",");
    
     45:     this.x=Integer.parseInt(p[0]);
    
     46:     this.y=Integer.parseInt(p[1]);
    
     47:   }
    
     48:   public String print(){
    
     49:     return "<"+this.x+","+this.y+">";
    
     50:   }
    
     51: }
    
     52: 

    然后定义一个工具类,为算法的实现服务:

      1: package com.sunzhenxing;
    
      2: 
    
      3: import java.io.BufferedReader;
    
      4: import java.io.FileReader;
    
      5: import java.io.IOException;
    
      6: import java.util.*;
    
      7: 
    
      8: public final class Utility {
    
      9:   //计算两点之间的距离
    
     10:   public static double getDistance(Point p,Point q){
    
     11:     int dx=p.getX()-q.getX();
    
     12:     int dy=p.getY()-q.getY();
    
     13:     double distance=Math.sqrt(dx*dx+dy*dy);
    
     14:     return distance;
    
     15:   }
    
     16:   //检测p点是不是核心点,tmpLst存储核心点的直达点
    
     17:   public static List<Point> isKeyPoint(List<Point> lst,Point p,int e,int minp){
    
     18:     int count=0;
    
     19:     List<Point> tmpLst=new ArrayList<Point>();
    
     20:     for(Iterator<Point> it=lst.iterator();it.hasNext();){
    
     21:       Point q=it.next();
    
     22:       if(getDistance(p,q)<=e){
    
     23:         ++count;
    
     24:         if(!tmpLst.contains(q)){
    
     25:           tmpLst.add(q);
    
     26:         }
    
     27:       }
    
     28:     }
    
     29:     if(count>=minp){
    
     30:       p.setKey(true);
    
     31:       return tmpLst;
    
     32:     }
    
     33:     return null;
    
     34:   }
    
     35:   //合并两个链表,前提是b中的核心点包含在a中
    
     36:   public static boolean mergeList(List<Point> a,List<Point> b){
    
     37:     boolean merge=false;
    
     38:     if(a==null || b==null){
    
     39:       return false;
    
     40:     }
    
     41:     for(int index=0;index<b.size();++index){
    
     42:       Point p=b.get(index);
    
     43:       if(p.isKey() && a.contains(p)){
    
     44:         merge=true;
    
     45:         break;
    
     46:       }
    
     47:     }
    
     48:     if(merge){
    
     49:       for(int index=0;index<b.size();++index){
    
     50:         if(!a.contains(b.get(index))){
    
     51:           a.add(b.get(index));
    
     52:         }
    
     53:       }
    
     54:     }
    
     55:     return merge;
    
     56:   }
    
     57:   //获取文本中的样本点集合
    
     58:   public static List<Point> getPointsList() throws IOException{
    
     59:     List<Point> lst=new ArrayList<Point>();
    
     60:     String txtPath="src\\com\\sunzhenxing\\points.txt";
    
     61:     BufferedReader br=new BufferedReader(new FileReader(txtPath));
    
     62:     String str="";
    
     63:     while((str=br.readLine())!=null && str!=""){
    
     64:       lst.add(new Point(str));
    
     65:     }
    
     66:     br.close();
    
     67:     return lst;
    
     68:   }
    
     69:   //显示聚类的结果
    
     70:   public static void display(List<List<Point>> resultList){
    
     71:     int index=1;
    
     72:     for(Iterator<List<Point>> it=resultList.iterator();it.hasNext();){
    
     73:       List<Point> lst=it.next();
    
     74:       if(lst.isEmpty()){
    
     75:         continue;
    
     76:       }
    
     77:       System.out.println("-----第"+index+"个聚类-----");
    
     78:       for(Iterator<Point> it1=lst.iterator();it1.hasNext();){
    
     79:         Point p=it1.next();
    
     80:         System.out.println(p.print());
    
     81:       }
    
     82:       index++;
    
     83:     }
    
     84:   }
    
     85: }
    
     86: 

    最后在主程序中实现算法,如下所示:

      1: package com.sunzhenxing;
    
      2: 
    
      3: import java.io.IOException;
    
      4: import java.util.*;
    
      5: 
    
      6: public class Dbscan {
    
      7:   private final static int e=2;//ε半径
    
      8:   private final static int minp=4;//密度阈值
    
      9:   private static List<Point> pointsList=new ArrayList<Point>();//存储原始样本点
    
     10:   private static List<List<Point>> resultList=new ArrayList<List<Point>>();//存储最后的聚类结果
    
     11:   
    
     12:   private static void applyDbscan() throws IOException{
    
     13:     pointsList=Utility.getPointsList();
    
     14:     for(int index=0;index<pointsList.size();++index){
    
     15:       List<Point> tmpLst=new ArrayList<Point>();
    
     16:       Point p=pointsList.get(index);
    
     17:       if(p.isClassed())
    
     18:         continue;
    
     19:       tmpLst=Utility.isKeyPoint(pointsList, p, e, minp);
    
     20:       if(tmpLst!=null){
    
     21:         resultList.add(tmpLst);
    
     22:       }
    
     23:     }
    
     24:     int length=resultList.size();
    
     25:     for(int i=0;i<length;++i){
    
     26:       for(int j=0;j<length;++j){
    
     27:         if(i!=j){
    
     28:           if(Utility.mergeList(resultList.get(i), resultList.get(j))){
    
     29:             resultList.get(j).clear();
    
     30:           }
    
     31:         }
    
     32:       }
    
     33:     }
    
     34:   }
    
     35:   public static void main(String[] args) {
    
     36:     try {
    
     37:       //调用DBSCAN的实现算法
    
     38:       applyDbscan();
    
     39:       Utility.display(resultList);
    
     40:     } catch (IOException e) {
    
     41:       // TODO Auto-generated catch block
    
     42:       e.printStackTrace();
    
     43:     }
    
     44:     
    
     45:   }
    
     46: 
    
     47: }
    
     48: 

    下边是一个小测试, 即使用src\\com\\sunzhenxing\\points.txt文件的内容进行测试,points.txt的文件内容是:

    0,0
    0,1
    0,2
    0,3
    0,4
    0,5
    12,1
    12,2
    12,3
    12,4
    12,5
    12,6
    0,6
    0,7
    12,7
    0,8
    0,9
    1,1

    最后算法的结果是:

    -----第1个聚类-----
    <0,0>
    <0,1>
    <0,2>
    <1,1>
    <0,3>
    <0,4>
    <0,5>
    <0,6>
    <0,7>
    <0,8>
    <0,9>
    -----第2个聚类-----
    <12,1>
    <12,2>
    <12,3>
    <12,4>
    <12,5>
    <12,6>
    <12,7>
    大家画一下坐标就可以理解实验的结论了。

  • 相关阅读:
    Solr环境配置
    SolrJ解析MoreLikeThis查询结果
    思维导图软件PersonalBrain 6.0.6.4破解版使用
    离散对数-详解
    转:pptp和l2tp的区别
    DiffieHellman Key Exchange (DH)源代码
    磁盘IOPS计算
    转:TCP/IP Network Performance Benchmarks and Tools
    转:弄清楚你的业务类型——OLTP or OLAP
    U8软件的端口
  • 原文地址:https://www.cnblogs.com/sunzhenxing19860608/p/1747452.html
Copyright © 2020-2023  润新知