• 机器学习K近邻算法


    from numpy import *
    import operator
    from os import listdir
    def classify0(inX, dataSet, labels, k):
        dataSetSize = dataSet.shape[0]
        diffMat = tile(inX, (dataSetSize,1)) - dataSet
        sqDiffMat = diffMat**2
        sqDistances = sqDiffMat.sum(axis=1)
        distances = sqDistances**0.5
        sortedDistIndicies = distances.argsort()     
        classCount={}          
        for i in range(k):
            voteIlabel = labels[sortedDistIndicies[i]]
            classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
        sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
        return sortedClassCount[0][0];
    
    def autoNorm(dataSet):
        minVals=dataSet.min(0);
        maxVals=dataSet.max(0);
        ranges=maxVals-minVals;
        normDataSet=zeros(shape(dataSet));
        m=dataSet.shape[0];
        normDataSet=dataSet-tile(minVals,(m,1));
        normDataSet=normDataSet/tile(ranges,(m,1));
        return  normDataSet,ranges,minVals;
    
    def file2matrix(filename):
        fr= open(filename);
        arrayline=fr.readlines();
        numberoflines=len(arrayline);
        returnMat=zeros((numberoflines,3));
        classlabelvector=[];
        index=0;
        for line in arrayline:
            line=line.strip();
            listFromLine=line.split('	');
            returnMat[index,:]=listFromLine[0:3];
            classlabelvector.append(int(listFromLine[-1]));
            index+=1;
        return returnMat,classlabelvector;
    
    def datingClassTest():
        hoRatio=0.10;
        datingDataMat,datingLabels=file2matrix('datingTestSet.txt');
        normMat,ranges,minVals=autoNorm(datingDataMat);
        m=normMat.shape[0];
        numTestVecs=int(m*hoRatio);
        errorCount=0.0;
        for i in range(numTestVecs):
            classiferResult=classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3);
            print "the classifier came back with %d,the real answer is %d" %(classiferResult,datingLabels[i]);
            if classiferResult!=datingLabels[i]:
                errorCount+=1.0;
        print "the total error rate is %f" %(errorCount/float(numTestVecs));
    
    def classifyPerson():
        resultList=['not at all','in small doses','in large doses'];
        percentTats=float(raw_input("percentage of time spent playing video games?"))
        ffMiles=float(raw_input("frequent flier miles earned per year?"));
        iceCream=float(raw_input("liters of icecream cosumed per year?"));
        datingDataMat,datingLabels=file2matrix('datingTestSet2.txt');
        normMat,ranges,minVals=autoNorm(datingDataMat);
        inArr=array([ffMiles,percentTats,iceCream]);
        classifierResult = classify0((inArr-minVals)/ranges,normMat,datingLabels,3);
        print "You will probably like this person %s" %resultList[classifierResult-1];
  • 相关阅读:
    chrome 浏览器设置useragent为微信浏览器
    js 16进制颜色和RGBA颜色互转
    json parse 大数精度丢失
    taro 小程序react 搜索高亮关键字
    sourcetree 配置 openssh
    一次性卸载npm本地包(node_modules)依赖
    微信小程序订阅消息开发总结
    微信小程序请求设置权限
    taro
    浅谈JS之AJAX
  • 原文地址:https://www.cnblogs.com/cherryMJY/p/8525151.html
Copyright © 2020-2023  润新知