• python 实现简单的KNN算法


    from numpy import *
    import operator
    
    def createDataSet():
        group = array([[3,104],[2,100],[1,81],[101,10],[99,5],[98,2]])
        labels = ['爱情片','爱情片','爱情片','动作片','动作片','动作片']
        return group, labels
    
    def classify0(inX, dataSet, labels, k):
        dataSetSize = dataSet.shape[0]
        diffMat = tile(inX, (dataSetSize,1)) - dataSet
        sqDiffMat = diffMat ** 2
        sqDistances = sqDiffMat.sum(axis=1)
        distances = sqDistances ** 0.5
        sortedDistIndicies = distances.argsort()
        classCount = {}
        for i in range(k):
            voteIlabel = labels[sortedDistIndicies[i]]
            classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
        sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
        return sortedClassCount[0][0]
    
    group,labels = createDataSet()
    print(classify0([500,90],group,labels,3))

     使用错误率来检验算法 

    from numpy import *
    
    import matplotlib
    import matplotlib.pyplot as plt 
    import operator
    
    def file2matrix(filename):
        fr = open(filename)
        arrayOLines = fr.readlines()
        numberOfLines = len(arrayOLines)
        returnMat = zeros((numberOfLines,3))
        classLabelVector = []
        index = 0
        for line in arrayOLines:
            line = line.strip()
            listFromLine = line.split('	')
            returnMat[index,:] = listFromLine[0:3]
            classLabelVector.append(int(listFromLine[-1]))
            index += 1
        return returnMat,classLabelVector
    
    def autoNorm(dataSet):
        minVals = dataSet.min(0)
        maxVals = dataSet.max(0)
        ranges = maxVals - minVals
        normDataSet = zeros(shape(dataSet))
        # print(shape(dataSet))
        # print(normDataSet)
        m = dataSet.shape[0]
        normDataSet = dataSet - tile(minVals,(m,1))
        normDataSet = normDataSet / tile(ranges,(m,1))
        return normDataSet, ranges, minVals
    
    
    def classify0(inX, dataSet, labels, k):
        dataSetSize = dataSet.shape[0]
        diffMat = tile(inX, (dataSetSize,1)) - dataSet
        sqDiffMat = diffMat ** 2
        sqDistances = sqDiffMat.sum(axis=1)
        distances = sqDistances ** 0.5
        sortedDistIndicies = distances.argsort()
        classCount = {}
        for i in range(k):
            voteIlabel = labels[sortedDistIndicies[i]]
            classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
        sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
        return sortedClassCount[0][0]
    
    
    def datingClassTest():
        hoRatio = 0.10
        datingDataMat,datingLabels = file2matrix('datingTestSet2.txt')
        normMat,ranges,minVals = autoNorm(datingDataMat)
        m = normMat.shape[0]
        # print(m)
        numTestVecs = int(m*hoRatio)
        errorCount = 0.0
        for i in range(numTestVecs):
            classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)
            print("the classifier came back with: %d,the real answer is: %d" % (classifierResult, datingLabels[i]))
            if (classifierResult != datingLabels[i]):
                errorCount += 1.0
        print("the total error rate is: %f" % (errorCount/float(numTestVecs)))
    datingClassTest()

     数据集下载:https://i.cnblogs.com/Files.aspx  

    datingTestSet2.rar
  • 相关阅读:
    【HDOJ5971】Wrestling Match(二分图,并查集)
    【HDOJ5978】To begin or not to begin(概率)
    【HDOJ5979】Convex(三角函数)
    【HDOJ5980】Find Small A(签到)
    【HDOJ5949】Relative atomic mass(签到)
    【HDOJ5948】Thickest Burger(签到)
    【HDOJ6228】Tree(树)
    【HDOJ6227】Rabbits(贪心)
    147.命题逻辑
    146.离散数学
  • 原文地址:https://www.cnblogs.com/ncuhwxiong/p/9456943.html
Copyright © 2020-2023  润新知