• 数据挖掘分类算法--KNN


    实验中使用的数据依然是UCI上的Iris,实验中分别有样本数据和测试使用的数据,分别如下:

    样本数据是分别算则iris中三类数据各30个:

    5.1,3.5,1.4,0.2,Iris-setosa
    4.9,3.0,1.4,0.2,Iris-setosa
    4.7,3.2,1.3,0.2,Iris-setosa
    4.6,3.1,1.5,0.2,Iris-setosa
    5.0,3.6,1.4,0.2,Iris-setosa
    5.4,3.9,1.7,0.4,Iris-setosa
    4.6,3.4,1.4,0.3,Iris-setosa
    5.0,3.4,1.5,0.2,Iris-setosa
    4.4,2.9,1.4,0.2,Iris-setosa
    4.9,3.1,1.5,0.1,Iris-setosa
    5.4,3.7,1.5,0.2,Iris-setosa
    4.8,3.4,1.6,0.2,Iris-setosa
    4.8,3.0,1.4,0.1,Iris-setosa
    4.3,3.0,1.1,0.1,Iris-setosa
    5.8,4.0,1.2,0.2,Iris-setosa
    5.7,4.4,1.5,0.4,Iris-setosa
    5.4,3.9,1.3,0.4,Iris-setosa
    5.1,3.5,1.4,0.3,Iris-setosa
    5.7,3.8,1.7,0.3,Iris-setosa
    5.1,3.8,1.5,0.3,Iris-setosa
    5.4,3.4,1.7,0.2,Iris-setosa
    5.1,3.7,1.5,0.4,Iris-setosa
    4.6,3.6,1.0,0.2,Iris-setosa
    5.1,3.3,1.7,0.5,Iris-setosa
    4.8,3.4,1.9,0.2,Iris-setosa
    5.0,3.0,1.6,0.2,Iris-setosa
    5.0,3.4,1.6,0.4,Iris-setosa
    5.2,3.5,1.5,0.2,Iris-setosa
    5.2,3.4,1.4,0.2,Iris-setosa
    4.7,3.2,1.6,0.2,Iris-setosa
    7.0,3.2,4.7,1.4,Iris-versicolor
    6.4,3.2,4.5,1.5,Iris-versicolor
    6.9,3.1,4.9,1.5,Iris-versicolor
    5.5,2.3,4.0,1.3,Iris-versicolor
    6.5,2.8,4.6,1.5,Iris-versicolor
    5.7,2.8,4.5,1.3,Iris-versicolor
    6.3,3.3,4.7,1.6,Iris-versicolor
    4.9,2.4,3.3,1.0,Iris-versicolor
    6.6,2.9,4.6,1.3,Iris-versicolor
    5.2,2.7,3.9,1.4,Iris-versicolor
    5.0,2.0,3.5,1.0,Iris-versicolor
    5.9,3.0,4.2,1.5,Iris-versicolor
    6.0,2.2,4.0,1.0,Iris-versicolor
    6.1,2.9,4.7,1.4,Iris-versicolor
    5.6,2.9,3.6,1.3,Iris-versicolor
    6.7,3.1,4.4,1.4,Iris-versicolor
    5.6,3.0,4.5,1.5,Iris-versicolor
    5.8,2.7,4.1,1.0,Iris-versicolor
    6.2,2.2,4.5,1.5,Iris-versicolor
    5.6,2.5,3.9,1.1,Iris-versicolor
    5.9,3.2,4.8,1.8,Iris-versicolor
    6.1,2.8,4.0,1.3,Iris-versicolor
    6.3,2.5,4.9,1.5,Iris-versicolor
    6.1,2.8,4.7,1.2,Iris-versicolor
    6.4,2.9,4.3,1.3,Iris-versicolor
    6.6,3.0,4.4,1.4,Iris-versicolor
    6.8,2.8,4.8,1.4,Iris-versicolor
    6.7,3.0,5.0,1.7,Iris-versicolor
    6.0,2.9,4.5,1.5,Iris-versicolor
    5.7,2.6,3.5,1.0,Iris-versicolor
    6.3,3.3,6.0,2.5,Iris-virginica
    5.8,2.7,5.1,1.9,Iris-virginica
    7.1,3.0,5.9,2.1,Iris-virginica
    6.3,2.9,5.6,1.8,Iris-virginica
    6.5,3.0,5.8,2.2,Iris-virginica
    7.6,3.0,6.6,2.1,Iris-virginica
    4.9,2.5,4.5,1.7,Iris-virginica
    7.3,2.9,6.3,1.8,Iris-virginica
    6.7,2.5,5.8,1.8,Iris-virginica
    7.2,3.6,6.1,2.5,Iris-virginica
    6.5,3.2,5.1,2.0,Iris-virginica
    6.4,2.7,5.3,1.9,Iris-virginica
    6.8,3.0,5.5,2.1,Iris-virginica
    5.7,2.5,5.0,2.0,Iris-virginica
    5.8,2.8,5.1,2.4,Iris-virginica
    6.4,3.2,5.3,2.3,Iris-virginica
    6.5,3.0,5.5,1.8,Iris-virginica
    7.7,3.8,6.7,2.2,Iris-virginica
    7.7,2.6,6.9,2.3,Iris-virginica
    6.0,2.2,5.0,1.5,Iris-virginica
    6.9,3.2,5.7,2.3,Iris-virginica
    5.6,2.8,4.9,2.0,Iris-virginica
    7.7,2.8,6.7,2.0,Iris-virginica
    6.3,2.7,4.9,1.8,Iris-virginica
    6.7,3.3,5.7,2.1,Iris-virginica
    7.2,3.2,6.0,1.8,Iris-virginica
    6.2,2.8,4.8,1.8,Iris-virginica
    6.1,3.0,4.9,1.8,Iris-virginica
    6.4,2.8,5.6,2.1,Iris-virginica
    7.2,3.0,5.8,1.6,Iris-virginica
    View Code

    测试数据为剩余的60条数据:

    4.8,3.1,1.6,0.2,Iris-setosa
    5.4,3.4,1.5,0.4,Iris-setosa
    5.2,4.1,1.5,0.1,Iris-setosa
    5.5,4.2,1.4,0.2,Iris-setosa
    4.9,3.1,1.5,0.1,Iris-setosa
    5.0,3.2,1.2,0.2,Iris-setosa
    5.5,3.5,1.3,0.2,Iris-setosa
    4.9,3.1,1.5,0.1,Iris-setosa
    4.4,3.0,1.3,0.2,Iris-setosa
    5.1,3.4,1.5,0.2,Iris-setosa
    5.0,3.5,1.3,0.3,Iris-setosa
    4.5,2.3,1.3,0.3,Iris-setosa
    4.4,3.2,1.3,0.2,Iris-setosa
    5.0,3.5,1.6,0.6,Iris-setosa
    5.1,3.8,1.9,0.4,Iris-setosa
    4.8,3.0,1.4,0.3,Iris-setosa
    5.1,3.8,1.6,0.2,Iris-setosa
    4.6,3.2,1.4,0.2,Iris-setosa
    5.3,3.7,1.5,0.2,Iris-setosa
    5.0,3.3,1.4,0.2,Iris-setosa
    5.5,2.4,3.8,1.1,Iris-versicolor
    5.5,2.4,3.7,1.0,Iris-versicolor
    5.8,2.7,3.9,1.2,Iris-versicolor
    6.0,2.7,5.1,1.6,Iris-versicolor
    5.4,3.0,4.5,1.5,Iris-versicolor
    6.0,3.4,4.5,1.6,Iris-versicolor
    6.7,3.1,4.7,1.5,Iris-versicolor
    6.3,2.3,4.4,1.3,Iris-versicolor
    5.6,3.0,4.1,1.3,Iris-versicolor
    5.5,2.5,4.0,1.3,Iris-versicolor
    5.5,2.6,4.4,1.2,Iris-versicolor
    6.1,3.0,4.6,1.4,Iris-versicolor
    5.8,2.6,4.0,1.2,Iris-versicolor
    5.0,2.3,3.3,1.0,Iris-versicolor
    5.6,2.7,4.2,1.3,Iris-versicolor
    5.7,3.0,4.2,1.2,Iris-versicolor
    5.7,2.9,4.2,1.3,Iris-versicolor
    6.2,2.9,4.3,1.3,Iris-versicolor
    5.1,2.5,3.0,1.1,Iris-versicolor
    5.7,2.8,4.1,1.3,Iris-versicolor
    7.4,2.8,6.1,1.9,Iris-virginica
    7.9,3.8,6.4,2.0,Iris-virginica
    6.4,2.8,5.6,2.2,Iris-virginica
    6.3,2.8,5.1,1.5,Iris-virginica
    6.1,2.6,5.6,1.4,Iris-virginica
    7.7,3.0,6.1,2.3,Iris-virginica
    6.3,3.4,5.6,2.4,Iris-virginica
    6.4,3.1,5.5,1.8,Iris-virginica
    6.0,3.0,4.8,1.8,Iris-virginica
    6.9,3.1,5.4,2.1,Iris-virginica
    6.7,3.1,5.6,2.4,Iris-virginica
    6.9,3.1,5.1,2.3,Iris-virginica
    5.8,2.7,5.1,1.9,Iris-virginica
    6.8,3.2,5.9,2.3,Iris-virginica
    6.7,3.3,5.7,2.5,Iris-virginica
    6.7,3.0,5.2,2.3,Iris-virginica
    6.3,2.5,5.0,1.9,Iris-virginica
    6.5,3.0,5.2,2.0,Iris-virginica
    6.2,3.4,5.4,2.3,Iris-virginica
    5.9,3.0,5.1,1.8,Iris-virginica
    View Code

    算法代码如下:

      1 package neugle.knn;
      2 
      3 import java.io.BufferedReader;
      4 import java.io.FileReader;
      5 import java.util.ArrayList;
      6 import java.util.HashMap;
      7 import java.util.LinkedHashMap;
      8 import java.util.List;
      9 import java.util.Map.Entry;
     10 import java.util.Set;
     11 
     12 public class KNN {
     13     private List<Iris> irisList = new ArrayList<KNN.Iris>();
     14 
     15     class Iris {
     16         public double Sep_len;
     17         public double Sep_wid;
     18         public double Pet_len;
     19         public double Pet_wid;
     20         public String Iris_type;
     21     }
     22 
     23     // 读取数据
     24     public List<Iris> ReadFile(String filePath) {
     25         FileReader fr = null;
     26         BufferedReader br = null;
     27         List<Iris> irisList = new ArrayList<KNN.Iris>();
     28         try {
     29             fr = new FileReader(filePath);
     30             br = new BufferedReader(fr);
     31             String line = null;
     32             while ((line = br.readLine()) != null) {
     33                 Iris iris = new Iris();
     34                 String[] agrs = line.split(",");
     35                 iris.Sep_len = Double.parseDouble(agrs[0]);
     36                 iris.Sep_wid = Double.parseDouble(agrs[1]);
     37                 iris.Pet_len = Double.parseDouble(agrs[2]);
     38                 iris.Pet_wid = Double.parseDouble(agrs[3]);
     39                 iris.Iris_type = agrs[4];
     40                 irisList.add(iris);
     41             }
     42         } catch (Exception e) {
     43             e.printStackTrace();
     44         } finally {
     45             try {
     46                 br.close();
     47             } catch (Exception e) {
     48                 e.printStackTrace();
     49             }
     50         }
     51         return irisList;
     52     }
     53 
     54     // 计算测试数据和样本点中每个点的距离
     55     public LinkedHashMap<Integer, Double> GetDistance(Iris iris) {
     56         LinkedHashMap<Integer, Double> irisMap = new LinkedHashMap<Integer, Double>();
     57         for (int i = 0; i < this.irisList.size(); i++) {
     58             double d = this.DistanceCalculate(iris, this.irisList.get(i));
     59             irisMap.put(i, d);
     60         }
     61         return irisMap;
     62     }
     63 
     64     private double DistanceCalculate(Iris iris1, Iris iris2) {
     65         double sum = Math.sqrt(Math.pow((iris1.Sep_len - iris2.Sep_len), 2)
     66                 + Math.pow((iris1.Sep_wid - iris2.Sep_wid), 2)
     67                 + Math.pow((iris1.Pet_len - iris2.Pet_len), 2)
     68                 + Math.pow((iris1.Pet_wid - iris2.Pet_wid), 2));
     69         return sum;
     70     }
     71 
     72     // 找出前k个数据
     73     public List<Iris> FindKData(int k, LinkedHashMap<Integer, Double> irisMap) {
     74         List<Integer> iList = new ArrayList<Integer>();
     75         List<Iris> rList = new ArrayList<KNN.Iris>();
     76         Set<Entry<Integer, Double>> set = irisMap.entrySet();
     77         for (int i = 0; i < k; i++) {
     78             int key = 0;
     79             double value = 0;
     80             boolean flag = true;
     81             for (Entry<Integer, Double> e : set) {
     82                 if (flag == true) {
     83                     key = e.getKey();
     84                     value = e.getValue();
     85                     flag = false;
     86                     continue;
     87                 }
     88                 if (e.getValue() < value) {
     89                     key = e.getKey();
     90                     value = e.getValue();
     91                 }
     92             }
     93             iList.add(key);
     94             irisMap.remove(key);
     95         }
     96 
     97         for (int i = 0; i < iList.size(); i++) {
     98             rList.add(this.irisList.get(iList.get(i)));
     99         }
    100         return rList;
    101     }
    102 
    103     // 找出该测试数据应属于哪一类
    104     public String FindClass(List<Iris> iList) {
    105         HashMap<String, Integer> map = new HashMap<String, Integer>();
    106         for (int i = 0; i < iList.size(); i++) {
    107             String s = iList.get(i).Iris_type;
    108             if (map.containsKey(s)) {
    109                 map.put(s, map.get(s) + 1);
    110             } else {
    111                 map.put(s, 1);
    112             }
    113         }
    114 
    115         String key = null;
    116         int value = 0;
    117         for (Entry<String, Integer> e : map.entrySet()) {
    118             if (e.getValue() > value) {
    119                 value = e.getValue();
    120                 key = e.getKey();
    121             }
    122         }
    123         return key;
    124     }
    125 
    126     // 操控方法
    127     public void Calc(String filePath1, String filePath2, int k) {
    128         this.irisList = this.ReadFile(filePath1);
    129         List<Iris> fList = this.ReadFile(filePath2);
    130         System.out.println("测试数据展示:");
    131         System.out.println("-----------------------");
    132         for (int i = 0; i < fList.size(); i++) {
    133             Iris iris = fList.get(i);
    134             System.out.println(iris.Pet_len + " " + iris.Pet_wid + " "
    135                     + iris.Sep_len + " " + iris.Sep_wid + " " + iris.Iris_type);
    136         }
    137         System.out.println("-----------------------");
    138         System.out.println("测试结果为:");
    139         System.out.println("-----------------------");
    140         for (int i = 0; i < fList.size(); i++) {
    141             Iris iris = fList.get(i);
    142             LinkedHashMap<Integer, Double> dMap = this.GetDistance(iris);
    143             List<Iris> iList = this.FindKData(k, dMap);
    144             String type = this.FindClass(iList);
    145             System.out.println(iris.Pet_len + " " + iris.Pet_wid + " "
    146                     + iris.Sep_len + " " + iris.Sep_wid + " " + type);
    147         }
    148         System.out.println("-----------------------");
    149     }
    150 
    151     public static void main(String[] args) {
    152         KNN knn = new KNN();
    153         String filePath1 = "D:\data\KNN\iris.data";// 样本数据位置
    154         String filePath2 = "D:\data\KNN\firis.data";// 测试数据位置
    155         int k = 3;
    156         knn.Calc(filePath1, filePath2, k);
    157     }
    158 }

    实验结果如下:

    1.6 0.2 4.8 3.1 Iris-setosa
    1.5 0.4 5.4 3.4 Iris-setosa
    1.5 0.1 5.2 4.1 Iris-setosa
    1.4 0.2 5.5 4.2 Iris-setosa
    1.5 0.1 4.9 3.1 Iris-setosa
    1.2 0.2 5.0 3.2 Iris-setosa
    1.3 0.2 5.5 3.5 Iris-setosa
    1.5 0.1 4.9 3.1 Iris-setosa
    1.3 0.2 4.4 3.0 Iris-setosa
    1.5 0.2 5.1 3.4 Iris-setosa
    1.3 0.3 5.0 3.5 Iris-setosa
    1.3 0.3 4.5 2.3 Iris-setosa
    1.3 0.2 4.4 3.2 Iris-setosa
    1.6 0.6 5.0 3.5 Iris-setosa
    1.9 0.4 5.1 3.8 Iris-setosa
    1.4 0.3 4.8 3.0 Iris-setosa
    1.6 0.2 5.1 3.8 Iris-setosa
    1.4 0.2 4.6 3.2 Iris-setosa
    1.5 0.2 5.3 3.7 Iris-setosa
    1.4 0.2 5.0 3.3 Iris-setosa
    3.8 1.1 5.5 2.4 Iris-versicolor
    3.7 1.0 5.5 2.4 Iris-versicolor
    3.9 1.2 5.8 2.7 Iris-versicolor
    5.1 1.6 6.0 2.7 Iris-virginica
    4.5 1.5 5.4 3.0 Iris-versicolor
    4.5 1.6 6.0 3.4 Iris-versicolor
    4.7 1.5 6.7 3.1 Iris-versicolor
    4.4 1.3 6.3 2.3 Iris-versicolor
    4.1 1.3 5.6 3.0 Iris-versicolor
    4.0 1.3 5.5 2.5 Iris-versicolor
    4.4 1.2 5.5 2.6 Iris-versicolor
    4.6 1.4 6.1 3.0 Iris-versicolor
    4.0 1.2 5.8 2.6 Iris-versicolor
    3.3 1.0 5.0 2.3 Iris-versicolor
    4.2 1.3 5.6 2.7 Iris-versicolor
    4.2 1.2 5.7 3.0 Iris-versicolor
    4.2 1.3 5.7 2.9 Iris-versicolor
    4.3 1.3 6.2 2.9 Iris-versicolor
    3.0 1.1 5.1 2.5 Iris-versicolor
    4.1 1.3 5.7 2.8 Iris-versicolor
    6.1 1.9 7.4 2.8 Iris-virginica
    6.4 2.0 7.9 3.8 Iris-virginica
    5.6 2.2 6.4 2.8 Iris-virginica
    5.1 1.5 6.3 2.8 Iris-virginica
    5.6 1.4 6.1 2.6 Iris-virginica
    6.1 2.3 7.7 3.0 Iris-virginica
    5.6 2.4 6.3 3.4 Iris-virginica
    5.5 1.8 6.4 3.1 Iris-virginica
    4.8 1.8 6.0 3.0 Iris-virginica
    5.4 2.1 6.9 3.1 Iris-virginica
    5.6 2.4 6.7 3.1 Iris-virginica
    5.1 2.3 6.9 3.1 Iris-virginica
    5.1 1.9 5.8 2.7 Iris-virginica
    5.9 2.3 6.8 3.2 Iris-virginica
    5.7 2.5 6.7 3.3 Iris-virginica
    5.2 2.3 6.7 3.0 Iris-virginica
    5.0 1.9 6.3 2.5 Iris-virginica
    5.2 2.0 6.5 3.0 Iris-virginica
    5.4 2.3 6.2 3.4 Iris-virginica
    5.1 1.8 5.9 3.0 Iris-virginica
    View Code
  • 相关阅读:
    RDD模型
    python代码实现自动化测试中字符串自动生成
    windows7下python2.6 + mysql5.5(No module named MySQLdb/DLL load failed/from sets import ImmutableSet)
    python import的用法
    linux命令后台运行
    jenkins插件库打开报错There were errors checking the update sites
    python虚拟环境pipenv的安装和使用
    微信支付v3 php回调函数 TP5 签名/验签/下载证书
    hive/spark的RoaringBitmap写入Clickhouse的bitmap
    收集与测试有关的网站
  • 原文地址:https://www.cnblogs.com/niuxiaoha/p/4651018.html
Copyright © 2020-2023  润新知