• kNN算法


    import numpy as np
    import operator
    import os
    def createDataset():
            group=np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
            lables=['A','A','B','B']
            return  group,lables
    
    def classify0(inX,dataSet,labels,k):
            dataSetSize=dataSet.shape[0]
            diffMat=np.tile(inX,(dataSetSize,1))-dataSet
            sqDiffMat=diffMat**2
            sqDistances=sqDiffMat.sum(axis=1)
            distances=sqDistances**0.5
            sortDistancesIndex=distances.argsort()
            classCount={}#TODO  toOrder dectionary
            for i in range(k):
                    voteIlabel=labels[sortDistancesIndex[i]]
                    classCount[voteIlabel]=classCount.get(voteIlabel,0)+1
            sortedClassCount=sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
            return sortedClassCount[0][0]
    def filematrix(filename):
            fr=open(filename)
            arrayOfLines=fr.readlines()
            numberOfLines=len(arrayOfLines)
            returnMat=np.zeros((numberOfLines,3))
            classLableVector=[]
            index=0
            for line in arrayOfLines:
                line=line.strip()
                listFromLine=line.split('	')
                returnMat[index,:]=listFromLine[0:3]
                classLableVector.append(int(listFromLine[-1]))
                index+=1
            return returnMat,classLableVector
    
    def autoNorm(dataSet):
            minVals=dataSet.min(0)
            maxVals=dataSet.max(0)
            rangs=maxVals-minVals
            dtRow=dataSet.shape[0]
            normDataset=dataSet-np.tile(minVals,(dtRow,1))
            resultDataset=normDataset/np.tile(rangs,(dtRow,1))
            return resultDataset,rangs,minVals
    def datingClassTest():
            hoRatio=0.10
            errorCount=0.0
            datingMat,datingLabels=filematrix('dts.txt');
            normMat,normRang,normMin=autoNorm(datingMat)
            dataRows=normMat.shape[0]
            testDataRows=int(dataRows*hoRatio)
            for i in range(testDataRows):
                    classfileterResult=classfy0(normMat[i,:],normMat[testDataRows:dataRows,:],datingLabels[testDataRows:dataRows],3)
                    print("这次分类结果是: %d,这个真实的结果为:%d"%(classfileterResult,datingLabels[i]))
                    if(classfileterResult!= datingLabels[i]):errorCount+=1.0
                    print("这次分类的总错误率为:%f"%(errorCount/float(testDataRows)))
    
    
    def classifyPerson():
        resultList = ['没有魅力', '魅力一般', '很有魅力']
        percentTats = float(input("每天所玩电子游戏的占比?"))
        ffMiles = float(input("每年的飞行里程数?"))
        iceCream = float(input("每周吃多少冰淇淋(升)?"))
        datingDataMat, datingLabels = filematrix('dts.txt')
        normMat, ranges, minVals = autoNorm(datingDataMat)
        inArr = np.array([ffMiles, percentTats, iceCream])
        classifierResult = classify0((inArr - minVals)/ranges, normMat, datingLabels,3)
        print ('这个人让人感觉: ', resultList[classifierResult - 1])
        
    # 2:手写识别系统
    #将一个32*32的二进制图像矩阵转换成1*1024的向量
    
    def img2vector(filename):
        returnVect = np.zeros((1,1024))
        fr = open(filename)
        for i in range(32):
            lineStr = fr.readline()
            for j in range(32):
                returnVect[0, 32*i+j] = int(lineStr[j])
        return returnVect
    
    
    #手写识别系统测试代码
    def handwritingClassTest():
        hwLabels = []
        trainingFileList = os.listdir('trainingDigits')   #获取目录内容
        m = len(trainingFileList)
        trainingMat = np.zeros((m, 1024))
        for i in range(m):
            fileNameStr = trainingFileList[i]              #分割得到标签  从文件名解析得到分类数据
            fileStr = fileNameStr.split('.')[0]
            classStr = int(fileStr.split('_')[0])
            hwLabels.append(classStr)                 #测试样例标签
            trainingMat[i,:] = img2vector('trainingDigits/%s' % fileNameStr)
        testFileList = os.listdir('testDigits')
        errorCount = 0.0
        mTest = len(testFileList)
        for i in range(mTest):
            fileNameStr = testFileList[i]
            fileStr = fileNameStr.split('.')[0]
            classStr = int(fileStr.split('_')[0])
            vectorUnderTest = img2vector('testDigits/%s' % fileNameStr)
            classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3)
            print ('the classifier came back with: %d, the real answer is: %d' % (classifierResult, classStr))
            if(classifierResult != classStr): errorCount += 1.0
        print ("
    the total numbers of errors is : %d" % errorCount)
        print ("
    the total error rate is: %f" % (errorCount/float(mTest)))
  • 相关阅读:
    C#操作REDIS例子
    A C# Framework for Interprocess Synchronization and Communication
    UTF8 GBK UTF8 GB2312 之间的区别和关系
    开源项目选型问题
    Mysql命令大全——入门经典
    RAM, SDRAM ,ROM, NAND FLASH, NOR FLASH 详解(引用)
    zabbix邮件报警通过脚本来发送邮件
    centos启动提示unexpected inconsistency RUN fsck MANUALLY
    rm 或者ls 报Argument list too long
    初遇Citymaker (六)
  • 原文地址:https://www.cnblogs.com/daxiongblog/p/5538498.html
Copyright © 2020-2023  润新知