• Python实现LR(逻辑回归)


    Python实现LR(逻辑回归)

    运行环境

    • Pyhton3
    • numpy(科学计算包)
    • matplotlib(画图所需,不画图可不必)

    计算过程

    st=>start: 开始
    e=>end
    op1=>operation: 读入数据
    op2=>operation: 格式化数据
    cond=>condition: 达到循环次数
    op3=>operation: 梯度上升
    op4=>operation: 输出结果
    
    st->op1->op2->cond
    cond(no)->op3->cond
    cond(yes)->op4->e
    

    输入样例

    /* Dataset.txt */
    训练集:
    
        vector(第一项是截距项)        label
        ------------------------------------------
        [1, 1, 4]                     1          
        [1, 2, 3]                     1
        [1, -2, 3]                    1
        [1, -2, 2]                    0
        [1, 0, 1]                     0
        [1, 1, 2]                     0
    
    测试集:
    
        vector(第一项是截距项)        label
        ------------------------------------------
        [1, 1, 1]                     ?
        [1, 2, 0]                     ? 
        [1, 2, 4]                     ? 
        [1, 1, 3]                     ?     
    

    代码实现

    # -*- coding: utf-8 -*-
    __author__ = 'Wsine'
    
    from numpy import *
    import matplotlib.pyplot as plt
    import operator
    import time
    
    LINE_OF_DATA = 6
    LINE_OF_TEST = 4
    
    def createTrainDataSet():
    	trainDataMat = [[1, 1, 4], 
    					[1, 2, 3], 
    					[1, -2, 3], 
    					[1, -2, 2], 
    					[1, 0, 1], 
    					[1, 1, 2]]
    	trainShares = [1, 1, 1, 0, 0,  0]
    	return trainDataMat, trainShares
    
    def createTestDataSet():
    	testDataMat = [[1, 1, 1], 
    				   [1, 2, 0], 
    				   [1, 2, 4], 
    				   [1, 1, 3]]
    	return testDataMat
    
    def autoNorm(dataSet):
    	minVals = dataSet.min(0)
    	maxVals = dataSet.max(0)
    	ranges = maxVals - minVals
    	normDataSet = zeros(shape(dataSet))
    	m = dataSet.shape[0]
    	normDataSet = dataSet - tile(minVals, (m, 1))
    	normDataSet = normDataSet / tile(ranges, (m, 1))
    	return normDataSet[:LINE_OF_DATA], normDataSet[LINE_OF_DATA:]
    
    def sigmoid(inX):
    	return 1.0 / (1 + exp(-inX))
    
    def gradAscent(dataMatIn, classLabels, alpha=0.001, maxCycles=1000):
    	dataMatrix = mat(dataMatIn)
    	labelMat = mat(classLabels).transpose()
    	m, n = shape(dataMatrix)
    	weights = ones((n, 1))
    	for k in range(maxCycles):
    		h = sigmoid(dataMatrix * weights)
    		error = (labelMat - h)
    		weights = weights + alpha * dataMatrix.transpose() * error
    	return weights
    
    def plotBestFit(weights):
    	dataMat, labelMat = createTrainDataSet()
    	dataArr = array(dataMat)
    	n = shape(dataArr)[0]
    	xcord1 = []; ycord1 = []
    	xcord2 = []; ycord2 = []
    	for i in range(n):
    		if int(labelMat[i]) == 1:
    			xcord1.append(dataArr[i, 1])
    			ycord1.append(dataArr[i, 2])
    		else:
    			xcord2.append(dataArr[i, 1])
    			ycord2.append(dataArr[i, 2])
    	fig = plt.figure()
    	ax = fig.add_subplot(111)
    	ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
    	ax.scatter(xcord2, ycord2, s=30, c='green')
    	x = arange(-3.0, 3.0, 0.1)
    	y = (-weights[0] - weights[1] * x) / weights[2]
    	ax.plot(x, y)
    	plt.xlabel('X1'); plt.ylabel('X2')
    	plt.show()
    
    def classifyVector(inX, weights):
    	prob = sigmoid(sum(inX * weights))
    	if prob > 0.5:
    		return 1
    	else:
    		return 0
    
    def classifyAll(dataSet, weights):
    	predict = []
    	for vector in dataSet:
    		predict.append(classifyVector(vector, weights))
    	return predict
    
    def main():
    	trainDataSet, trainShares = createTrainDataSet()
    	testDataSet = createTestDataSet()
    	#trainDataSet, testDataSet = autoNorm(vstack((mat(trainDataSet), mat(testDataSet))))
    	regMatrix = gradAscent(trainDataSet, trainShares, 0.01, 600)
    	print("regMatrix = 
    ", regMatrix)
    	plotBestFit(regMatrix.getA())
    	predictShares = classifyAll(testDataSet, regMatrix)
    	print("predictResult: 
    ", predictShares)
    
    if __name__ == '__main__':
    	start = time.clock()
    	main()
    	end = time.clock()
    	print('finish all in %s' % str(end - start))
    

    输出样例

    regMatrix =
     [[-2.7205211 ]
     [ 0.19112108]
     [ 1.23590529]]
    predictResult:
     [0, 0, 1, 1]
    finish all in 18.206848995807043
    

    图片挂了

  • 相关阅读:
    20200924-4 代码规范,结对要求
    20200924-2 功能测试
    20200917-1 每周例行报告
    20200917-3 白名单
    20200917-2 词频统计 已更新附加题!
    20200910-1 每周例行报告
    20200924-2功能测试
    20200924-1每周例行报告
    20200924-3单元测试
    20200924-5 四则运算,结对
  • 原文地址:https://www.cnblogs.com/wsine/p/5180343.html
Copyright © 2020-2023  润新知