线性回归和批量梯度下降法python

通过学习斯坦福公开课的线性规划和梯度下降，参考他人代码自己做了测试，写了个类以后有时间再去扩展，代码注释以后再加，作业好多：

import numpy as np
import matplotlib.pyplot as plt
import random

class dataMinning:
    datasets = []
    labelsets = []
    
    addressD = ''  #Data folder
    addressL = ''  #Label folder
    
    npDatasets = np.zeros(1)
    npLabelsets = np.zeros(1)
    
    cost = []
    numIterations = 0
    alpha = 0
    theta = np.ones(2)
    #pCols = 0
    #dRows = 0
    def __init__(self,addressD,addressL,theta,numIterations,alpha,datasets=None):
        if datasets is None:
            self.datasets = []
        else:
            self.datasets = datasets
        self.addressD = addressD
        self.addressL = addressL
        self.theta = theta
        self.numIterations = numIterations
        self.alpha = alpha
        
    def readFrom(self):
        fd = open(self.addressD,'r')
        for line in fd:
            tmp = line[:-1].split()
            self.datasets.append([int(i) for i in tmp])
        fd.close()
        self.npDatasets = np.array(self.datasets)

        fl = open(self.addressL,'r')
        for line in fl:
            tmp = line[:-1].split()
            self.labelsets.append([int(i) for i in tmp])
        fl.close()
        
        tm = []
        for item in self.labelsets:
            tm = tm + item
        self.npLabelsets = np.array(tm)

    def genData(self,numPoints,bias,variance):
        self.genx = np.zeros(shape = (numPoints,2))
        self.geny = np.zeros(shape = numPoints)

        for i in range(0,numPoints):
            self.genx[i][0] = 1
            self.genx[i][1] = i
            self.geny[i] = (i + bias) + random.uniform(0,1) * variance

    def gradientDescent(self):
        xTrans = self.genx.transpose() #
        i = 0
        while i < self.numIterations:
            hypothesis = np.dot(self.genx,self.theta)
            loss = hypothesis - self.geny
            #record the cost
            self.cost.append(np.sum(loss ** 2))
            #calculate the gradient
            gradient = np.dot(xTrans,loss)
            #updata, gradientDescent
            self.theta = self.theta - self.alpha * gradient
            i = i + 1
            
    
    def show(self):
        print 'yes'
        
if __name__ == "__main__":
    c = dataMinning('c:\city.txt','c:\st.txt',np.ones(2),100000,0.000005)
    c.genData(100,25,10)
    c.gradientDescent()
    cx = range(len(c.cost))
    plt.figure(1)
    plt.plot(cx,c.cost)
    plt.ylim(0,25000)
    plt.figure(2)
    plt.plot(c.genx[:,1],c.geny,'b.')
    x = np.arange(0,100,0.1)
    y = x * c.theta[1] + c.theta[0]
    plt.plot(x,y)
    plt.margins(0.2)
    plt.show()

　　　　　　　　　　图1. 迭代过程中的误差cost

　　　　　　　　　　图2. 数据散点图和解直线

参考资料：

1.python编写类：http://blog.csdn.net/wklken/article/details/6313265

2.python中if __name__ == __main__的用法：http://www.cnblogs.com/herbert/archive/2011/09/27/2193482.html

3.matplotlab gallery:http://matplotlib.org/gallery.html

4.python批量梯度下降参考代码：http://www.91r.net/ask/17784587.html

相关阅读:
二、编写输出“Hello World”
实验一：JDK下载与安装、Eclipse下载与使用总结心得
C++引用
数组类型与sizeof与指针的引用
电源已接通，未充电
改变Web Browser控件IE版本
“stdafx.h”: No such file or directory
word2013 blog test
Editplus配置VC++(1) 及相关注意事项
VC++6.0在Win7以上系统上Open或Add to Project files崩溃问题解决新办法

原文地址：https://www.cnblogs.com/Key-Ky/p/3468290.html