一、原理见Machine_Learning
from sklearn.datasets import make_blobs X, y = make_blobs(n_samples=150, n_features=2, centers=3, cluster_std=0.5, shuffle=True, random_state=0) from sklearn.cluster import KMeans km = KMeans(n_clusters=3, init='random', n_init=10, max_iter=300, tol=0.0004, random_state=0) #set n_init=10,该聚类算法按不同的随机中心跑10次,并选择最小的SSE作为最后的KMeans模型,这里SSE计算方法是属于某类的样本与该类中心点的欧式距离之和 y_km = km.fit_predict(X) #y_km会保存X中每个样本属于哪个类别 ''' [1, 0, 0, 0, 1, 0, 0, 1, 2, 0, 1, 2, 2, 0, 0, 2, 2, 1, 2, 1, 0, 1, 0, 0, 2, 1, 1, 0, 2, 1, 2, 2, 2, 2, 0, 1, 1, 1, 0, 0, 2, 2, 0, 1, 1, 1, 2, 0, 2, 0, 1, 0, 0, 1, 1, 2, 0, 1, 2, 0, 2, 2, 2, 2, 0, 2, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 2, 2, 0, 1, 1, 0, 0, 1, 1, 1, 2, 2, 1, 1, 0, 1, 0, 1, 0, 2, 2, 1, 1, 1, 1, 2, 1, 1, 0, 2, 0, 0, 0, 2, 0, 1, 2, 0, 2, 0, 0, 2, 2, 0, 1, 0, 0, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 0, 2, 1, 2, 0, 0, 1, 1, 2, 2, 2, 2, 1, 1] ''' import matplotlib.pyplot as plt plt.scatter(X[y_km==0, 0], X[y_km==0, 1], s=50, c='lightgreen', marker='s', label='cluster 1') plt.scatter(X[y_km==1, 0], X[y_km==1, 1], s=50, c='orange', marker='o', label='cluster 2') plt.scatter(X[y_km==2, 0], X[y_km==2, 1], s=50, c='lightblue', marker='v', label='cluster 3') plt.scatter(km.cluster_centers_[:, 0], km.cluster_centers_[:, 1], s=250, marker='*', c='red', label='centroids') plt.legend() plt.grid() plt.show()