轮廓系数
import numpy as np from sklearn.cluster import KMeans from pylab import * import codecs import matplotlib.pyplot as plt from sklearn.metrics import calinski_harabaz_score import pandas as pd from numpy.random import random from sklearn import preprocessing from sklearn import metrics import operator data = [] labels = [] number1=10 with codecs.open("red_nopca_nolabel.txt", "r") as f: for line in f.readlines(): line1=line.strip() line2 = line1.split(',') x2 = [] for i in range(0,number1): x1=line2[i] x2.append(float(x1)) data.append(x2) x2 = [] #label = line2[number1-1] #labels.append(float(label)) datas = np.array(data) ''' kmeans_model = KMeans(n_clusters=3, random_state=1).fit(datas) labels = kmeans_model.labels_ a = metrics.silhouette_score(datas, labels, metric='euclidean') print(a) ''' silhouette_all=[] for k in range(2,25): kmeans_model = KMeans(n_clusters=k, random_state=1).fit(datas) labels = kmeans_model.labels_ a = metrics.silhouette_score(datas, labels, metric='euclidean') silhouette_all.append(a) #print(a) print('这个是k={}次时的轮廓系数:'.format(k),a) dic={} #存放所有的互信息的键值对 mi_num=2 for i in silhouette_all: dic['k={}时轮廓系数'.format(mi_num)]='{}'.format(i) mi_num=mi_num+1 #print(dic) rankdata=sorted(dic.items(),key=operator.itemgetter(1),reverse=True) print(rankdata)