• Dbscan的简单 实现


    .net 的机器学习资料真的少啊。这个聚类的算法到是很多。但是实现都是java或者c++的。c#真少(也许是我太孤陋寡闻了)。我找了sbscan 算法,那出来给大家分享,请大家看了给指点下。其中距离的计算实现了 欧式距离和cos的距离。大方法一共10几中吧。但是也 java的多。矩阵算法也是java的,c#的少。请大牛见赐个吧。废话不说了,代码如下:


    using System;
    using System.Collections;

    namespace DBSCAN
    {
     /// <summary>
     /// Cluster data using DBSCAN (Density-Based Spatical Clustering of Application with Noise) methed
     /// See "Data Mining" for further information
     /// </summary>
     public sealed class DBSCAN
     {
      public ArrayList DataPoints = new ArrayList(128);
      private ArrayList DP2DP;
      private int m_Core_Num;
      private int m_MinPts;
      private double m_eps;

            //增加数据点
      /// <summary>
      /// Add DataPoint to DBSCAN module to cluster
      /// </summary>
      public void AddDataPoint(DataPoint dp)
      {
       DataPoints.Add(dp);
       m_Core_Num = 0;
       m_MinPts = 0;
       m_eps = 0;
      }

            //清除数据
      public void RemoveAllDataPoints()
      {
       DataPoints.Clear();
       DP2DP.Clear();
       m_Core_Num = 0;
       m_MinPts = 0;
       m_eps = 0;
      }
            //数据初始化
      public void ResetAllDataPointsState()
      {
       foreach(DataPoint dp in DataPoints)
       {
        dp.class_id = 0;
        dp.core_tag = false;
        dp.used_tag = false;
       }
      }
            //数据初始化
      public void PrepareDBSCAN_Table()
      {
       int dp_count = DataPoints.Count;
       DP2DP = new ArrayList(dp_count);
       for(int i=0;i<dp_count;i++)
       {
        // SortedList use DBSCANSort so that can support duplicate key
        // dp_count also include the point itself
        DP2DP.Add(new SortedList(new DBSCANSort(), dp_count));
       }
       SortedList sl;
       DataPoint dp;
       for(int i=0;i<dp_count;i++)
       {
        sl=(SortedList)DP2DP[i];
        dp=(DataPoint)DataPoints[i];
        for(int j=0;j<dp_count;j++)
        {
         double distance = dp.Distance((DataPoint)DataPoints[j]);
         sl.Add(distance, DataPoints[j]);
        }
       }
      }
            //构建 核心
            /// <summary>
            /// eps 距离  minpts  聚类数
            /// </summary>
            /// <param name="eps"></param>
            /// <param name="MinPts"></param>
            /// <returns></returns>
      public int BuildCorePoint(double eps, int MinPts)
      {
       ResetAllDataPointsState();
       int core_num = 0;
       SortedList sl;
       DataPoint src_dp, des_dp;
       for(int i=0;i<DataPoints.Count;i++)
       {
        sl=(SortedList)DP2DP[i];
        des_dp=(DataPoint)sl.GetByIndex(MinPts);
        src_dp=(DataPoint)DataPoints[i];
        if(src_dp.Distance(des_dp)<eps)
        {
         src_dp.core_tag=true;
         core_num++;
        }
       }
       if(core_num>0)
       {
        m_Core_Num = core_num;
        m_MinPts = MinPts;
        m_eps = eps;
       }
       return core_num;
      }
            //聚类
      public void DBSCAN_Cluster()
      {
       DataPoint dp;
       int current_class_id = 1;
       for(int i=0;i<DataPoints.Count;i++)
       {
        dp=(DataPoint)DataPoints[i];
        if(dp.used_tag==false && dp.core_tag==true)
        {
         dp.class_id = current_class_id;
         dp.used_tag = true;
         CorePointCluster(i, current_class_id);
         current_class_id++;
        }
       }  
      }

     
            //核心点聚类
      private void CorePointCluster(int dp_pos, int core_class_id)
      {
       DataPoint src_dp, des_dp;
       SortedList sl=(SortedList)DP2DP[dp_pos];
       src_dp=(DataPoint)sl.GetByIndex(0);
       int i=1;
       des_dp=(DataPoint)sl.GetByIndex(i);
       while(src_dp.Distance(des_dp)<m_eps)
       {
        if(des_dp.used_tag == false)
        {
         des_dp.class_id = core_class_id;
         des_dp.used_tag = true;
         if(des_dp.core_tag == true)
          CorePointCluster(DataPoints.IndexOf(des_dp),core_class_id);
        }
        i++;
        try
        {
         des_dp=(DataPoint)sl.GetByIndex(i);
        }
        catch( ArgumentOutOfRangeException )
        {
         // To avoid eps is too large that out of index
         return;
        }
       }
      }
     }

     /// <summary>
     /// DBSCAN DataPoint
     /// </summary>
     public class DataPoint
     {

            //是否中心
      public bool core_tag = false;
      
            //是否噪音(无关的点 )
            public int class_id = 0; // 0 indicate NOISE
      
            //计算过
            public bool used_tag = false;

      public double d1; // dimension x-axis
      public double d2; // dimension y-axis
      // dimension n (n>=3) can be extend by inherient this class
      // and reimplement following two method.

      public DataPoint(double x, double y)
      {
       d1=x;
       d2=y;
      }


            /// <summary>
            /// 距离 ,可以使用  雅阁比系数等 0-1之间
            /// </summary>
            /// <param name="dp"></param>
            /// <returns></returns>
      public double Distance(DataPoint dp)
      {

                if (this != dp)
                {
                    double d1sq = (d1 - dp.d1) * (d1 - dp.d1);
                    double d2sq = (d2 - dp.d2) * (d2 - dp.d2);
                    return Math.Sqrt(d1sq + d2sq);

                    //计算平方差
                }
                else
                    return 0;//同1个点
      }
            /// <summary>
            /// 距离 ,可以使用  雅阁比系数等 0-1之间
            /// </summary>
            /// <param name="dp"></param>
            /// <returns></returns>
            public double CosineDistance(DataPoint dp)
            {
                if (this != dp)
                {
                    double d1sq = (d1 * dp.d1) + (d1 * dp.d1);
                    double d2sq = Math.Sqrt((d2 * d2) + (dp.d2 + dp.d2));
                     double  temp= d1sq/d2sq;
                     return temp;
                    //计算平方差
                }
                else
                    return 0;//同1个点
            }
     }
        /// <summary>
        /// 比较数据点的大小
        /// </summary>
     public class DBSCANSort:IComparer
     {
      public int Compare(object x, object y)
      {
       int iResult;
       if((double)x > (double)y)
        iResult = 1;
       else
        iResult = -1;
       return iResult;
      }
     }
    }

  • 相关阅读:
    Python3学习之路~8.5 SocketServer实现多并发
    Python3学习之路~8.4 利用socket实现文件传送+MD5校验
    [jzyzoj2021]lca模板题
    [BZOJ4542] [JZYZOJ2014][Hnoi2016] 大数(莫队+离散化)
    BZOJ3289[JZYZOJP2018]: Mato的文件管理 莫队+树状数组+离散化
    离散化的后续
    数据离散化 ( 以及 stl 中的 unique( ) 的用法 )+ bzoj3289:Mato的文件管理
    stl upper_bound()
    [BZOJ 3720][JZYZOJ 2016]gty的妹子树 强制在线 树分块/树套树
    所以学树分块的时候为什么要看vector啊sjb
  • 原文地址:https://www.cnblogs.com/wcLT/p/2300350.html
Copyright © 2020-2023  润新知