• 吴裕雄 python 机器学习-Logistic(1)


    import numpy as np
    
    def loadDataSet():
        dataMat = []
        labelMat = []
        fr = open('D:\LearningResource\machinelearninginaction\Ch05\testSet.txt')
        for line in fr.readlines():
            lineArr = line.strip().split()
            dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
            labelMat.append(int(lineArr[2]))
        return dataMat,labelMat
    
    dataMat,labelMat = loadDataSet()
    print(dataMat)
    print(labelMat)

    def sigmoid(z):
        sigmoid = 1.0/(1+np.exp(-z)) 
        return sigmoid
    
    def gradAscent(dataMatIn, classLabels):
        dataMatrix = np.mat(dataMatIn)             
        labelMat = np.mat(classLabels).transpose()
        m,n = np.shape(dataMatrix)
        alpha = 0.001
        maxCycles = 500
        weights = np.ones((n,1))
        for k in range(maxCycles):             
            h = sigmoid(dataMatrix*weights)     
            error = (labelMat - h)              
            weights = weights + alpha * dataMatrix.transpose()* error
        return weights
    
    weights = gradAscent(dataMat,labelMat)
    print(weights)

    def stocGradAscent0(dataMatrix, classLabels):
        m,n = np.shape(dataMatrix)
        alpha = 0.01
        weights = np.ones(n)   
        for i in range(m):
            h = sigmoid(sum(np.array(dataMatrix[i])*weights))
            error = classLabels[i] - h
            weights = weights + alpha * error * np.array(dataMatrix[i])
        return weights
    
    weights = stocGradAscent0(dataMat,labelMat)
    print(weights)

    def stocGradAscent1(dataMatrix, classLabels, numIter=150):
        m,n = np.shape(dataMatrix)
        weights = np.ones(n)   
        for j in range(numIter):
            dataIndex = list(range(m))
            for i in range(m):
                alpha = 4/(1.0+j+i)+0.0001    
                randIndex = int(np.random.uniform(0,len(dataIndex)))
                h = sigmoid(sum(np.array(dataMatrix[randIndex])*weights))
                error = classLabels[randIndex] - h
                weights = weights + alpha * error * np.array(dataMatrix[randIndex])
                del(dataIndex[randIndex])
        return weights
    
    weights = stocGradAscent1(dataMat,labelMat)
    print(weights)

    import matplotlib.pyplot as plt
    
    def plotBestFit():
        dataMat,labelMat=loadDataSet()
        weights = gradAscent(dataMat,labelMat)
        dataArr = np.array(dataMat)
        n = np.shape(dataArr)[0] 
        xcord1 = []
        ycord1 = []
        xcord2 = []
        ycord2 = []
        for i in range(n):
            if(int(labelMat[i])== 1):
                xcord1.append(dataArr[i,1])
                ycord1.append(dataArr[i,2])
            else:
                xcord2.append(dataArr[i,1])
                ycord2.append(dataArr[i,2])
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
        ax.scatter(xcord2, ycord2, s=30, c='green')
        x = np.arange(-3.0, 3.0, 0.1)
        y = (-weights[0]-weights[1]*x)/weights[2]
        y = np.array(y).reshape(len(x))
        ax.plot(x, y)
        plt.xlabel('X1')
        plt.ylabel('X2');
        plt.show()
        
    plotBestFit()

    def classifyVector(z, weights):
        prob = sigmoid(sum(z*weights))
        if(prob > 0.5):
            return 1.0
        else: 
            return 0.0
        
    def colicTest():
        frTrain = open('D:\LearningResource\machinelearninginaction\Ch05\horseColicTraining.txt')
        frTest = open('D:\LearningResource\machinelearninginaction\Ch05\horseColicTest.txt')
        trainingSet = []
        trainingLabels = []
        for line in frTrain.readlines():
            currLine = line.strip().split('	')
            lineArr =[]
            for i in range(21):
                lineArr.append(float(currLine[i]))
            trainingSet.append(lineArr)
            trainingLabels.append(float(currLine[21]))
        trainWeights = stocGradAscent1(np.array(trainingSet), trainingLabels, 1000)
        errorCount = 0
        numTestVec = 0.0
        for line in frTest.readlines():
            numTestVec += 1.0
            currLine = line.strip().split('	')
            lineArr =[]
            for i in range(21):
                lineArr.append(float(currLine[i]))
            if(int(classifyVector(np.array(lineArr), trainWeights))!= int(currLine[21])):
                errorCount += 1
        errorRate = (float(errorCount)/numTestVec)
        print("the error rate of this test is: %f" % errorRate)
        return errorRate
    
    errorRate = colicTest()
    print(errorRate)
    
    def multiTest():
        numTests = 10
        errorSum=0.0
        for k in range(numTests):
            errorSum += colicTest()
        print("after %d iterations the average error rate is: %f" % (numTests, errorSum/float(numTests)))
        
    multiTest()

  • 相关阅读:
    《C++ Primer》笔记string,vector
    《C++ Primer》笔记创建动态数组new,delete
    《C++ Primer》笔记迭代器iterator
    浅析jQuery基础框架
    linux常用命令
    Javascript常用对象详细讲解
    Javascript兼容多种浏览器
    Vim常用命令
    提高 web 应用性能之 JavaScript 性能调优
    jQuery LigerUI使用教程入门篇
  • 原文地址:https://www.cnblogs.com/tszr/p/10177325.html
Copyright © 2020-2023  润新知