• KNN算法实现手写数字


    from numpy import *
    import operator
    from os import listdir
    
    
    def classify0(inX, dataSet, labels, k):
        dataSetSize = dataSet.shape[0]
        diffMat = tile(inX, (dataSetSize,1)) - dataSet
        sqDiffMat = diffMat ** 2
        sqDistances = sqDiffMat.sum(axis=1)
        distances = sqDistances ** 0.5
        sortedDistIndicies = distances.argsort()
        classCount = {}
        for i in range(k):
            voteIlabel = labels[sortedDistIndicies[i]]
            classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
        sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
        return sortedClassCount[0][0]
    
    
    def img2Vector(filename):
        returnVect = zeros((1,1024))
        # print(returnVect)
        fr = open(filename)
        for i in range(32):
            lineStr = fr.readline()
            for j in range(32):
                returnVect[0,32*i+j] = int(lineStr[j])
        return returnVect
    
    
    def handwritingClassTest():
        hwLabels = []
        trainingFileList = listdir('trainingDigits')
        m = len(trainingFileList)
        trainingMat = zeros((m,1024))
        for i in range(m):
            fileNameStr = trainingFileList[i]
            fileStr = fileNameStr.split('.')[0]
            classNumStr = int(fileStr.split('_')[0])
            hwLabels.append(classNumStr)
            trainingMat[i,:] = img2Vector('trainingDigits/%s'%fileNameStr)
        testFileList = listdir('testDigits')
        errorCount = 0.0
        mTest = len(testFileList)
        for i in range(mTest):
            fileNameStr = testFileList[i]
            fileStr = fileNameStr.split('.')[0]
            classNumStr = int(fileStr.split('_')[0])
            vectorUnderTest = img2Vector('testDigits/%s'%fileNameStr)
            classifierResult = classify0(vectorUnderTest,trainingMat,hwLabels,3)
            print("the classifier came back with:%d,the real answer is :%d"%(classifierResult,classNumStr))
            if (classifierResult != classNumStr):
                errorCount += 1
        print("the total number of errors is :%d"%errorCount)
        print("the total error rate is: %f"%(errorCount/float(mTest)))
    
    handwritingClassTest()

    测试集+训练集数据地址:https://i.cnblogs.com/Files.aspx

    knn.rar

  • 相关阅读:
    cocos2d-x 2.2 移植wp8遇到的坑
    程序员简单却激荡的一年
    关于manacher
    关于Tarjan
    洛谷 P4013 数字梯形问题
    洛谷 P2633 Count on a tree
    洛谷 P1709 隐藏口令Hidden Password
    洛谷 P3112 后卫马克Guard Mark
    洛谷 P1174 打砖块
    洛谷1903 数颜色
  • 原文地址:https://www.cnblogs.com/ncuhwxiong/p/9460380.html
Copyright © 2020-2023  润新知