• INT104-lab11 [聚类] [iris数据集] [K-means Algorithm]


    [K-means Algorithm][3D子图]

    由于K-means Algorithm是基于随机点选取的,

    所以可能结果较差,甚至RE ! ! !

      1 import numpy as np
      2 import random
      3 import matplotlib.pyplot as plt
      4 from sklearn.manifold import TSNE
      5 
      6 
      7 def read(path: str) -> list:
      8     with open(path, "r") as f:
      9         text = f.readlines()
     10         D = []
     11         for row in text:
     12             features = str.split(row, ",")
     13             X = []
     14             for feature in features:
     15                 X.append(feature)
     16             if len(X) == 5:
     17                 D.append(X)
     18     return D
     19 
     20 
     21 def init(D) -> tuple:
     22     n, m = len(D), len(D[0]) - 1
     23     X, Y = [], []
     24     for i in range(n):
     25         x = []
     26         for j in range(m):
     27             x.append(float(D[i][j]))
     28         X.append(x)
     29         Y.append(str.split(D[i][m], "
    ")[0])
     30     return X, Y, n, m
     31 
     32 
     33 def randomDataset(seed: int, D) -> list:
     34     # set the random seed then the order is fixed and random
     35     # random.seed(seed)
     36     random.shuffle(D)
     37     return D
     38 
     39 
     40 def setPoints(K: int, X: list, n: int, m: int) -> tuple:
     41     minValues, maxValues = [], []
     42     for j in range(m):
     43         maxValue, minValue = -10000.0, 10000.0
     44         for i in range(n):
     45             maxValue = max(maxValue, X[i][j])
     46             minValue = min(minValue, X[i][j])
     47         maxValues.append(maxValue)
     48         minValues.append(minValue)
     49     P = []
     50     for i in range(K):
     51         X = []
     52         for j in range(m):
     53             X.append(random.uniform(minValues[j], maxValues[j]))
     54         P.append(X)
     55     return minValues, maxValues, P
     56 
     57 
     58 def getType(a, b, c):
     59     if a < b and a < b:
     60         return 1
     61     if b < a and b < c:
     62         return 2
     63     return 3
     64 
     65 
     66 eps = 1e-3
     67 
     68 
     69 def compare(P, newP, K, m):
     70     for i in range(K):
     71         for j in range(m):
     72             if abs(P[i][j] - newP[i][j]) > eps:
     73                 return False
     74     return True
     75 
     76 
     77 def G(A, m):
     78     n = len(A)
     79     sumA = [0 for _ in range(m)]
     80     for i in range(n):
     81         for j in range(m):
     82             sumA[j] += A[i][j]
     83     return [(x / n) for x in sumA]
     84 
     85 
     86 def getNewP(X, dis, n, m):
     87     A, B, C = [], [], []
     88     for i in range(n):
     89         if dis[i][0] == 1:
     90             A.append(X[i])
     91         elif dis[i][0] == 2:
     92             B.append(X[i])
     93         else:
     94             C.append(X[i])
     95     return [G(A, m), G(B, m), G(C, m)]
     96 
     97 
     98 def K_means_algorithm(K: int, X: list, Y: list, n: int, m: int):
     99     minValues, maxValues, P = setPoints(K, X, n, m)
    100     K_distances = []
    101 
    102     print(minValues)
    103     print(maxValues)
    104     print(np.array(P))
    105 
    106     while True:
    107 
    108         for i in range(n):
    109             dis1 = euclideanDistance(P[0], X[i], m)
    110             dis2 = euclideanDistance(P[1], X[i], m)
    111             dis3 = euclideanDistance(P[2], X[i], m)
    112             Type = getType(dis1, dis2, dis3)
    113             K_distances.append([Type, dis1, dis2, dis3])
    114 
    115         newP = getNewP(X, K_distances, n, m)
    116 
    117         if compare(P, newP, K, m):
    118             break
    119         P = newP
    120         print(np.array(newP))
    121         print("Yes")
    122     return P, K_distances
    123 
    124 
    125 def similarity(A, B, m) -> float:
    126     Sigma_AixBi = 0
    127     Sigma_Ai_Square = 0
    128     Sigma_Bi_Square = 0
    129     for i in range(m):
    130         Sigma_AixBi += A[i] * B[i]
    131         Sigma_Ai_Square += A[i] * A[i]
    132         Sigma_Bi_Square += B[i] * B[i]
    133     return Sigma_AixBi / (np.sqrt(Sigma_Ai_Square) * np.sqrt(Sigma_Bi_Square))
    134 
    135 
    136 def euclideanDistance(A, B, m) -> float:
    137     Sigma_Xi_Yi_square = 0
    138     for i in range(m):
    139         Sigma_Xi_Yi_square += (A[i] - B[i]) * (A[i] - B[i])
    140     return np.sqrt(Sigma_Xi_Yi_square)
    141 
    142 
    143 def answer(X, Y, dis, n):
    144     x = np.array(X)
    145     tsne = TSNE(n_components=3)
    146     tsne.fit_transform(x)
    147     one_x, one_y, one_z = [], [], []
    148     two_x, two_y, two_z = [], [], []
    149     three_x, three_y, three_z = [], [], []
    150     _one_x, _one_y, _one_z = [], [], []
    151     _two_x, _two_y, _two_z = [], [], []
    152     _three_x, _three_y, _three_z = [], [], []
    153     for i in range(n):
    154         _x = tsne.embedding_[i][0]
    155         _y = tsne.embedding_[i][1]
    156         _z = tsne.embedding_[i][2]
    157         if dis[i][0] == 1:
    158             one_x.append(_x)
    159             one_y.append(_y)
    160             one_z.append(_z)
    161         elif dis[i][0] == 2:
    162             two_x.append(_x)
    163             two_y.append(_y)
    164             two_z.append(_z)
    165         else:
    166             three_x.append(_x)
    167             three_y.append(_y)
    168             three_z.append(_z)
    169         if Y[i] == "Iris-setosa":
    170             _one_x.append(_x)
    171             _one_y.append(_y)
    172             _one_z.append(_z)
    173         elif Y[i] == "Iris-versicolor":
    174             _two_x.append(_x)
    175             _two_y.append(_y)
    176             _two_z.append(_z)
    177         else:
    178             _three_x.append(_x)
    179             _three_y.append(_y)
    180             _three_z.append(_z)
    181     # answer
    182     fig = plt.figure(figsize=(12, 6), facecolor='w')
    183     ax1 = fig.add_subplot(121, projection='3d')
    184     plt.title('answer')
    185     ax1.scatter(one_x, one_y, one_z)
    186     ax1.scatter(two_x, two_y, two_z)
    187     ax1.scatter(three_x, three_y, three_z)
    188     # data
    189     ax2 = fig.add_subplot(122, projection='3d')
    190     plt.title('data')
    191     ax2.scatter(_one_x, _one_y, _one_z)
    192     ax2.scatter(_two_x, _two_y, _two_z)
    193     ax2.scatter(_three_x, _three_y, _three_z)
    194     plt.show()
    195     print("Showing done")
    196 
    197 
    198 if __name__ == '__main__':
    199     dataset = read("iris.data")
    200     dataset = randomDataset(17, dataset)
    201     X, Y, n, m = init(dataset)
    202     P, dis = K_means_algorithm(3, X, Y, n, m)
    203 
    204     print("Done!")
    205     print(np.array(P))
    206 
    207     answer(X, Y, dis, n)
    208     plt.show()

    ~~Jason_liu O(∩_∩)O
  • 相关阅读:
    【NOI2000T4】单词查找树-trie树
    【POJ1698】Alice's Chance-二分图多重匹配
    【POJ1698】Alice's Chance-二分图多重匹配
    【POJ3159】Candies-差分约束系统
    【POJ3159】Candies-差分约束系统
    【POJ2914】Minimum Cut-无向图的全局最小割
    【POJ2914】Minimum Cut-无向图的全局最小割
    【HDU3555】Bomb-数位DP入门题
    【HDU3555】Bomb-数位DP入门题
    codevs 2018 反病毒软件
  • 原文地址:https://www.cnblogs.com/JasonCow/p/14819150.html
Copyright © 2020-2023  润新知