注释:Ng的视频有完整的推到步骤,不过理论和实践还是有很大差别的,代码实现还得完成
1.Logistic回归理论
http://www.cnblogs.com/wjy-lulu/p/7759515.html,Ng的推导很完美,看懂就可以了,没必要自己推导一遍,因为几天不用就忘记 了。
2.代码实现
2.1全局梯度上升
每次训练针对整体,依据整体去找最值。
好处:容易过滤局部极值,找到真正的全局极值。
坏处:整体数据太多,花费时间太久,而且新来的样本必须重新训练。
推倒公式:见博文刚开始的链接,Ng大神的全部推导及证明!
1 def loadDataSet(): 2 dataMat = [] 3 labelMat = [] 4 fr = open('testSet.txt') 5 for line in fr.readlines(): 6 lineArr = line.strip().split()#分割空格 7 #改变存储data:[[a,b],[c,d]]/ 8 # labels:[1,0,0,1...] 9 dataMat.append([1.0,float(lineArr[0]),float(lineArr[1])]) 10 labelMat.append([int(lineArr[2])]) 11 return dataMat, labelMat 12 def sigmoid(intX): 13 return 1.0/(1.0+np.exp(-intX)) 14 #全局梯度上升法 15 def gradAscent(dataMatIn,classLabels): 16 dataMatrix = np.mat(dataMatIn) 17 labelsMat = np.mat(classLabels) 18 m, n = dataMatrix.shape 19 alpha = 0.001 20 maxCycle = 200 21 weight = np.ones((n,1))#这里为了简单写,把b也当作一个w了 22 for k in range(maxCycle): 23 h = sigmoid(dataMatrix*weight) 24 error = labelsMat - np.mat(h) 25 weight = weight + alpha*dataMatrix.transpose()*error 26 return weight
2.1简单分类可视化
利用matplotlib画出简单分类的决策边界
注意:这里plot转化为list之后绘制的,看网上说可以直接用matrix,但是我运行出错。
1 def plotBestFit(weight): 2 dataMat, labelMat = loadDataSet() 3 dataArr = np.array(dataMat)#转化为数组 4 n = dataArr.shape[0] 5 xcode1=[];ycode1=[] 6 xcode2=[];ycode2=[] 7 for i in range(n): 8 if int(labelMat[i][0])==1: 9 xcode1.append(dataArr[i,1]) 10 ycode1.append(dataArr[i,2]) 11 else: 12 xcode2.append(dataArr[i,1]) 13 ycode2.append(dataArr[i,2]) 14 fig = plt.figure("data_x_y") 15 ax = fig.add_subplot(111) 16 ax.scatter(xcode1,ycode1,s=30,c='r',marker='s') 17 ax.scatter(xcode2,ycode2,s=30,c='g') 18 x = np.mat(np.arange(-3.0,3.0,0.1)) 19 y = (-weight[0]-weight[1]*x)/weight[2] 20 ax.plot(x.tolist()[0],y.tolist()[0]) 21 plt.xlabel('X1') 22 plt.ylabel('X2') 23 plt.show()
2.3局部随机梯度上升法及改进
局部随机梯度:和全局相对,利用单个样本更新W,同时又是利用正太分布的规律去随机选择样本的次序。
好处:‘局部’训练效率高,而且新的样本可以直接添加不用重新训练,‘随机’解决了样本规律性的波动,树上有图解。
坏处:可能得到局部极值。
1 #局部梯度上升法-老版本 2 def stoGradAscent0(dataMatrix,classLabels): 3 m,n = dataMatrix.shape 4 alpha = 0.01 5 weights = np.ones(n)#最好别写0,因为0的拟合速度很慢 6 for i in range(m): 7 h = sigmoid(sum(dataMatrix[i]*weights)) 8 error = classLabels - h 9 weights = weights +alpha* error* dataMatrix[i] 10 return weights 11 #随机梯度上升法-新版本 12 def stoGradAscent1(dataMatraix,classLabels,numIter=150): 13 #alpha不断改变 14 #选取的样本随机改变 15 m,n = dataMatraix.shape 16 weights = np.ones(n) 17 for j in range(numIter): 18 dataIndex = list(range(m))#样本 19 for i in range(m): 20 alpha = 4/(1.0+j+i) +0.01#随着迭代次数和样本的训练次数的增加而减小 21 randIndex = int(np.random.uniform(0,len(dataIndex)))#随机样本下标 22 h = sigmoid(sum(dataMatraix[randIndex]*weights)) 23 error = classLabels[randIndex] - h 24 weights = weights +alpha*error*dataMatraix[randIndex] 25 del(dataIndex[randIndex])#执行之后删除,避免重复执行 26 return weights
2.4实际应用
和前面朴素贝叶斯都差不多,预处理数据-->>训练-->>测试
1 分类函数 2 def classifyVector(inX,weight): 3 prob = sigmoid(sum(inX*weight)) 4 if prob>0.5: return 1.0 5 return 0.0 6 def colicTest(): 7 frTrain = open('horseColicTraining.txt') 8 frtest = open('horseColicTest.txt') 9 trainingSet = [] 10 trainingLabel = [] 11 for line in frTrain.readlines(): 12 currLine = line.strip().split(' ') 13 lineArr = [] 14 #最后一个是标签 15 for i in range(len(currLine)-1): 16 lineArr.append(float(currLine[i])) 17 trainingSet.append(lineArr) 18 trainingLabel.append(float(currLine[-1])) 19 #改进之后的随机梯度下降法--->>>局部算法=在线学习 20 trainWeight = stoGradAscent1(np.array(trainingSet),trainingLabel,500) 21 errorCount = 0.0 22 numTestVec = 0.0 23 for line in frtest.readlines(): 24 numTestVec += 1.0 25 currLine =line.strip().split(' ') 26 lineArr = [] 27 for i in range(21): 28 lineArr.append(float(currLine[i])) 29 if int(classifyVector(np.array(lineArr),trainWeight)) != int(currLine[21]): 30 errorCount+=1 31 errorRate = (1.0*errorCount)/(1.0*numTestVec) 32 print('the error Rate is : ',errorRate,' ') 33 return errorRate 34 def multiTest(): 35 numTest = 10;errorSum = 0.0 36 for k in range(numTest): 37 errorSum += colicTest() 38 print('error Rate Average is : ',(errorSum/numTest))
2.5总程序
1 import numpy as np 2 import matplotlib.pyplot as plt 3 4 def loadDataSet(): 5 dataMat = [] 6 labelMat = [] 7 fr = open('testSet.txt') 8 for line in fr.readlines(): 9 lineArr = line.strip().split()#分割空格 10 #改变存储data:[[a,b],[c,d]]/ 11 # labels:[1,0,0,1...] 12 dataMat.append([1.0,float(lineArr[0]),float(lineArr[1])]) 13 labelMat.append([int(lineArr[2])]) 14 return dataMat, labelMat 15 def sigmoid(intX): 16 return 1.0/(1.0+np.exp(-intX)) 17 #全局梯度上升法 18 def gradAscent(dataMatIn,classLabels): 19 dataMatrix = np.mat(dataMatIn) 20 labelsMat = np.mat(classLabels) 21 m, n = dataMatrix.shape 22 alpha = 0.001 23 maxCycle = 200 24 weight = np.ones((n,1))#这里为了简单写,把b也当作一个w了 25 for k in range(maxCycle): 26 h = sigmoid(dataMatrix*weight) 27 error = labelsMat - np.mat(h) 28 weight = weight + alpha*dataMatrix.transpose()*error 29 return weight 30 31 def plotBestFit(weight): 32 dataMat, labelMat = loadDataSet() 33 dataArr = np.array(dataMat)#转化为数组 34 n = dataArr.shape[0] 35 xcode1=[];ycode1=[] 36 xcode2=[];ycode2=[] 37 for i in range(n): 38 if int(labelMat[i][0])==1: 39 xcode1.append(dataArr[i,1]) 40 ycode1.append(dataArr[i,2]) 41 else: 42 xcode2.append(dataArr[i,1]) 43 ycode2.append(dataArr[i,2]) 44 fig = plt.figure("data_x_y") 45 ax = fig.add_subplot(111) 46 ax.scatter(xcode1,ycode1,s=30,c='r',marker='s') 47 ax.scatter(xcode2,ycode2,s=30,c='g') 48 x = np.mat(np.arange(-3.0,3.0,0.1)) 49 y = (-weight[0]-weight[1]*x)/weight[2] 50 ax.plot(x.tolist()[0],y.tolist()[0]) 51 plt.xlabel('X1') 52 plt.ylabel('X2') 53 plt.show() 54 #局部梯度上升法-老版本 55 def stoGradAscent0(dataMatrix,classLabels): 56 m,n = dataMatrix.shape 57 alpha = 0.01 58 weights = np.ones(n)#最好别写0,因为0的拟合速度很慢 59 for i in range(m): 60 h = sigmoid(sum(dataMatrix[i]*weights)) 61 error = classLabels - h 62 weights = weights +alpha* error* dataMatrix[i] 63 return weights 64 #随机梯度上升法-新版本 65 def stoGradAscent1(dataMatraix,classLabels,numIter=150): 66 #alpha不断改变 67 #选取的样本随机改变 68 m,n = dataMatraix.shape 69 weights = np.ones(n) 70 for j in range(numIter): 71 dataIndex = list(range(m))#样本 72 for i in range(m): 73 alpha = 4/(1.0+j+i) +0.01#随着迭代次数和样本的训练次数的增加而减小 74 randIndex = int(np.random.uniform(0,len(dataIndex)))#随机样本下标 75 h = sigmoid(sum(dataMatraix[randIndex]*weights)) 76 error = classLabels[randIndex] - h 77 weights = weights +alpha*error*dataMatraix[randIndex] 78 del(dataIndex[randIndex])#执行之后删除,避免重复执行 79 return weights 80 #分类函数 81 def classifyVector(inX,weight): 82 prob = sigmoid(sum(inX*weight)) 83 if prob>0.5: return 1.0 84 return 0.0 85 def colicTest(): 86 frTrain = open('horseColicTraining.txt') 87 frtest = open('horseColicTest.txt') 88 trainingSet = [] 89 trainingLabel = [] 90 for line in frTrain.readlines(): 91 currLine = line.strip().split(' ') 92 lineArr = [] 93 #最后一个是标签 94 for i in range(len(currLine)-1): 95 lineArr.append(float(currLine[i])) 96 trainingSet.append(lineArr) 97 trainingLabel.append(float(currLine[-1])) 98 #改进之后的随机梯度下降法--->>>局部算法=在线学习 99 trainWeight = stoGradAscent1(np.array(trainingSet),trainingLabel,500) 100 errorCount = 0.0 101 numTestVec = 0.0 102 for line in frtest.readlines(): 103 numTestVec += 1.0 104 currLine =line.strip().split(' ') 105 lineArr = [] 106 for i in range(21): 107 lineArr.append(float(currLine[i])) 108 if int(classifyVector(np.array(lineArr),trainWeight)) != int(currLine[21]): 109 errorCount+=1 110 errorRate = (1.0*errorCount)/(1.0*numTestVec) 111 print('the error Rate is : ',errorRate,' ') 112 return errorRate 113 def multiTest(): 114 numTest = 10;errorSum = 0.0 115 for k in range(numTest): 116 errorSum += colicTest() 117 print('error Rate Average is : ',(errorSum/numTest))