数据导入+欧式距离计算+互信息计算
# coding=utf-8 import numpy as np import math #import pandas as pd #from sklearn import preprocessing nd = np.genfromtxt('11111111.csv', delimiter=',', skip_header=True) final_list = nd.tolist()#转化为list #print(type(final_list)) #print(final_list) a=[] b=[] c=[] num=32 k=0 for one in final_list: #print(one) a.append(tuple(one))#list转化为tuple #print(a) for two in a: b.append(np.array(two))#tuple转化为array #print(b[0:5]) def distEclud(vecA, vecB): return np.sqrt(sum(np.square(vecA - vecB))) def NMI(A,B): # len(A) should be equal to len(B) total = len(A) A_ids = set(A) B_ids = set(B) #Mutual information MI = 0 eps = 1.4e-45 for idA in A_ids: for idB in B_ids: idAOccur = np.where(A==idA) idBOccur = np.where(B==idB) idABOccur = np.intersect1d(idAOccur,idBOccur) px = 1.0*len(idAOccur[0])/total py = 1.0*len(idBOccur[0])/total pxy = 1.0*len(idABOccur)/total MI = MI + pxy*math.log(pxy/(px*py)+eps,2) # Normalized Mutual information Hx = 0 for idA in A_ids: idAOccurCount = 1.0*len(np.where(A==idA)[0]) Hx = Hx - (idAOccurCount/total)*math.log(idAOccurCount/total+eps,2) Hy = 0 for idB in B_ids: idBOccurCount = 1.0*len(np.where(B==idB)[0]) Hy = Hy - (idBOccurCount/total)*math.log(idBOccurCount/total+eps,2) MIhat = 2.0*MI/(Hx+Hy) return MIhat if __name__ == '__main__': for i in range(num): for j in range(num-1): if i <j+1: k=k+1 #print('第{}个距离'.format(k),distEclud(b[i],b[j+1])) #print('第{}个互信息'.format(k),NMI(b[i],b[j+1])) c.append(NMI(b[i],b[j+1])) else: pass #n=[]#只有互信息 dic={} q=1 for i in c: dic['第{}个互信息'.format(q)]='{}'.format(i) q=q+1 #print(dic) rankdata=sorted(dic.items(),key=operator.itemgetter(1),reverse=True) print(rankdata)