• 数据导入+欧式距离计算+互信息计算


    数据导入+欧式距离计算+互信息计算

    # coding=utf-8
    import numpy as np
    import math
    #import pandas as pd
    #from sklearn import preprocessing 
    
    nd = np.genfromtxt('11111111.csv', delimiter=',', skip_header=True)
     
    final_list = nd.tolist()#转化为list
    #print(type(final_list))
    #print(final_list)
    a=[]
    b=[]
    c=[]
    num=32
    k=0
    for one in final_list:
        #print(one)
        a.append(tuple(one))#list转化为tuple
    #print(a)
    
    for two in a:
        b.append(np.array(two))#tuple转化为array
    #print(b[0:5])
    def distEclud(vecA, vecB):
        return np.sqrt(sum(np.square(vecA - vecB)))
    
    
    def NMI(A,B):
        # len(A) should be equal to len(B)
        total = len(A)
        A_ids = set(A)
        B_ids = set(B)
        #Mutual information
        MI = 0
        eps = 1.4e-45
        for idA in A_ids:
            for idB in B_ids:
                idAOccur = np.where(A==idA)
                idBOccur = np.where(B==idB)
                idABOccur = np.intersect1d(idAOccur,idBOccur)
                px = 1.0*len(idAOccur[0])/total
                py = 1.0*len(idBOccur[0])/total
                pxy = 1.0*len(idABOccur)/total
                MI = MI + pxy*math.log(pxy/(px*py)+eps,2)
        # Normalized Mutual information
        Hx = 0
        for idA in A_ids:
            idAOccurCount = 1.0*len(np.where(A==idA)[0])
            Hx = Hx - (idAOccurCount/total)*math.log(idAOccurCount/total+eps,2)
        Hy = 0
        for idB in B_ids:
            idBOccurCount = 1.0*len(np.where(B==idB)[0])
            Hy = Hy - (idBOccurCount/total)*math.log(idBOccurCount/total+eps,2)
        MIhat = 2.0*MI/(Hx+Hy)
        return MIhat
    
    if __name__ == '__main__':
        for i in range(num):
            for j in range(num-1):
                if i <j+1:
                    k=k+1
                    #print('第{}个距离'.format(k),distEclud(b[i],b[j+1]))
                    #print('第{}个互信息'.format(k),NMI(b[i],b[j+1]))
                    c.append(NMI(b[i],b[j+1]))
                else:
                    pass
        #n=[]#只有互信息
        dic={}
        q=1  
        for i in c:
            dic['第{}个互信息'.format(q)]='{}'.format(i)
            q=q+1
        #print(dic)
        rankdata=sorted(dic.items(),key=operator.itemgetter(1),reverse=True)
        print(rankdata)    
            
            
  • 相关阅读:
    MTV和MVC的区别
    django权限之二级菜单
    Python PEP8代码书写规范
    form表单
    forms组件
    Django的用户认证组件
    Django的分页
    cookie session
    文件上传
    ORM多表操作上
  • 原文地址:https://www.cnblogs.com/xingnie/p/10335013.html
Copyright © 2020-2023  润新知