• Kmeans_python


    from numpy import *
    import matplotlib.pyplot as plt
    def loadData(filename):
        data=[]
        for line in open(filename).readlines():
            linth=line.strip().split("	")
            dataline=map(float,linth)
            data.append(dataline)
        return data
    data=loadData("testSet.txt")
    def randcent(data,k):
        n=shape(data)[1]
        centdata=mat(zeros((k,n)))
        for i in range(n):
            mind=min(data[:,i])
            max2min=float(max(data[:,i])-mind)
            centdata[:,i]=mind+max2min*random.rand(k,1)
        return centdata
    
    def calcdist(A,B):
        return sqrt(sum(power(A-B,2)))
    def kmeans(data,k):
        m=shape(data)[0]
        mark=mat(zeros((m,2)))
        cent=randcent(data,k)
        centerchanged=True
        while centerchanged:
            centerchanged=False
            for i in range(m):
                index=-1
                mindata=10000
                for j in range(k):
                    temp=calcdist(cent[j,:],data[i,:])
                    if temp<mindata:
                        mindata=temp
                        index=j
                if mark[i, 0] != index: centerchanged = True
                mark[i, :] = index, mindata
            for t in range(k):
                ar=data[nonzero(mark[:,0].A==t)[0]]
                cent[t,:]=mean(ar,0)
        return cent,mark
    datamat=mat(loadData("testSet.txt"))
    centdata,cluster=kmeans(datamat,4)
    centarr=centdata.A
    clusterarr=cluster.A
    findonemax=max([tt[1] for tt in clusterarr if tt[0]==0])
    findtwomax=max(tt[1] for tt in clusterarr if tt[0]==1)
    findthreemax=max(tt[1] for tt in clusterarr if tt[0]==2)
    findfourmax=max(tt[1] for tt in clusterarr if tt[0]==3)
    theta=arange(0,2*pi,0.01)
    one=[tb[0] for tb in [tt for tt in datamat[nonzero(cluster[:,0].A==0)[0]].A]]
    onemore=[tb[1] for tb in [tt for tt in datamat[nonzero(cluster[:,0].A==0)[0]].A]]
    two=[tb[0] for tb in [tt for tt in datamat[nonzero(cluster[:,0].A==1)[0]].A]]
    twomore=[tb[1] for tb in [tt for tt in datamat[nonzero(cluster[:,0].A==1)[0]].A]]
    three=[tb[0] for tb in [tt for tt in datamat[nonzero(cluster[:,0].A==2)[0]].A]]
    threemore=[tb[1] for tb in [tt for tt in datamat[nonzero(cluster[:,0].A==2)[0]].A]]
    four=[tb[0] for tb in [tt for tt in datamat[nonzero(cluster[:,0].A==3)[0]].A]]
    fourmore=[tb[1] for tb in [tt for tt in datamat[nonzero(cluster[:,0].A==3)[0]].A]]
    fig=plt.figure()
    ax=fig.add_subplot(111)
    ax.scatter(one,onemore,c="blue",s=10,marker='s')
    ax.scatter(two,twomore,c="green",marker='*')
    ax.scatter(three,threemore,c='gray',marker='h')
    ax.scatter(four,fourmore)
    ax.scatter(centarr[:,0],centarr[:,1],c='red')
    ax.plot(centarr[0][0]+findonemax*cos(theta),centarr[0][1]+findonemax*sin(theta))
    ax.plot(centarr[1][0]+findtwomax*cos(theta),centarr[1][1]+findtwomax*sin(theta))
    ax.plot(centarr[2][0]+findthreemax*cos(theta),centarr[2][1]+findthreemax*sin(theta))
    ax.plot(centarr[3][0]+findfourmax*cos(theta),centarr[3][1]+findfourmax*sin(theta))
    plt.show()
    
    

      

  • 相关阅读:
    01_垂直居中body中的应用
    C++基础知识易错点总结(2)
    辗转相除求最大公约数
    C++基础知识易错点总结(1)
    类对象的建立方式总结
    LeetCode(131)Palindrome Partitioning
    基本套接字编程(7) -- udp篇
    LeetCode(124) Binary Tree Maximum Path Sum
    LeetCode(115) Distinct Subsequences
    LeetCode(97) Interleaving String
  • 原文地址:https://www.cnblogs.com/semen/p/7003967.html
Copyright © 2020-2023  润新知