• (线性回归)Liner Regression简单应用


    警告:本文为小白入门学习笔记

    数据连接:

    http://openclassroom.stanford.edu/MainFolder/DocumentPage.php?course=DeepLearning&doc=exercises/ex2/ex2.html

    数据集是(x(i),y(i))

    x = load('ex2x.dat');
    y = load('ex2y.dat');

    plot(x, y, 'o');

      

      假设函数(hypothesis function):

       egin{displaymath}
h_{	heta}(x) = 	heta^Tx = sum_{i=0}^n 	heta_i x_i, 
onumber
end{displaymath}

       接下来用矩阵的形式表示x:

    m = length(y); % store the number of training examples
    x = [ones(m, 1), x]; % Add a column of ones to x

        egin{displaymath}
J(	heta)=frac{1}{2m}sum_{i=1}^{m}left(h_{	heta}(x^{(i)})-y^{(i)}
ight)^{2} 
onumber
end{displaymath}

       MATLAB实现:

    function [jVal,gradient] = linerCost(theta)
    x = load('ex2x.dat');
    y = load('ex2y.dat');
    m = length(y); % store the number of training examples
    %x = [ones(m, 1), x]; % Add a column of ones to x
    w = 0;
    for i = 1:m
    w = w + (theta(1) + theta(2) .* x(i) - y(i)).^2;
    end
    jVal = 1./2 .* m .* w;

    gradient = zeros(2,1);

    w = 0;
    for i = 1:m
    w = w + (theta(1) + theta(2) .* x(i) - y(i));
    end
    gradient(1) = w;

    w = 0;
    for i = 1:m
    w = w + (theta(1) + theta(2) .* x(i) - y(i)).*x(i);
    end
    gradient(2) = w;

    end

    命令控制台:

    >> options = optimset('GradObj','on','MaxIter',1000);
    >> initialTheta = zeros(2,1);
    >> [optTheta,functionVal,exitFlag] = fminunc(@costFunction,initialTheta ,options)

    返回结果:

    optTheta =

    0.7502
    0.0639


    functionVal =

    2.4677


    exitFlag =

    1

    由于本案例只有一个feature,所以还可以在二维平面上查看结果,如果是高维度就无法看结果。

    本实验中,theta的选择,和learning rate的选取都是由MATLAB自动实现的;

    如果要自己手写

    for i = 1:iter
      theta = theta - X'*(X*theta-y)/m*alpha;
    end

     需要自己选取alpha,还有确定迭代的次数iter

    如果使用矩阵直接计算会更加快而且准确(对于本题而言):

    egin{displaymath}
	heta=left(X^{T}X
ight)^{-1}X^{T}vec{y}.
end{displaymath}

    入门菜鸟,错误地方欢迎指教!

    补充

    局部加权线性回归

    如果是以下数据集

    左图可以看出,这是一种欠拟合的现象,怎么才能使拟合的更好呢?

    使用高斯核 的 局部加权线性回归

    高斯函数是

    随着xx的距离的距离的增大,其高斯核函数值在单调递减。根据这个特点,可以对预测点在附近的每一点赋值,离预测点越近的点权重越大

    用下面图说明

    图中设黑色点是预测点,红色区域σ的值最小,随着σ的增大影响范围也在增大,而它的拟合曲线也在改变

    所以可以通过改变σ的值来调整拟合的情况

    整体来说,σ=1时把所有的点都包含,局部加权没有作用

    当σ过小时会出现欠拟合现象

    普通正规矩阵回归曲线

    局部加权线性回归

    岭回归

    代码+注释

    #coding=utf-8
    from numpy import *
    import matplotlib.pyplot as plt
    
    #加载数据
    def loadDataSet(fileName):     
        numFeat = len(open(fileName).readline().split('	')) - 1 
        dataMat = []; labelMat = []
        fr = open(fileName)
        for line in fr.readlines():
            lineArr =[]
            curLine = line.strip().split('	')
            for i in range(numFeat):
                lineArr.append(float(curLine[i]))
            dataMat.append(lineArr)
            labelMat.append(float(curLine[-1]))
        return dataMat,labelMat
    
    #使用正规矩阵来计算回归系数w(w[0]是系数b,w[1]是斜率k)
    def standRegres(xArr,yArr):
        xMat = mat(xArr); yMat = mat(yArr).T
        xTx = xMat.T*xMat
        #不可逆判断
        if linalg.det(xTx) == 0.0:
            print ("This matrix is singular, cannot do inverse")
            return
        ws = xTx.I * (xMat.T*yMat)
        return ws
     
    #使用局部加权的线性回归
    def lwlr(testPoint,xArr,yArr,k=1.0):
        xMat = mat(xArr); yMat = mat(yArr).T
        m = shape(xMat)[0]
        weights = mat(eye((m)))
        #为每一个样本点增加权重
        for j in range(m):                      
            diffMat = testPoint - xMat[j,:]   
            #使用高斯核函数来加权
            weights[j,j] = exp(diffMat*diffMat.T/(-2.0*k**2))
        xTx = xMat.T * (weights * xMat)
        if linalg.det(xTx) == 0.0:
            print ("This matrix is singular, cannot do inverse")
            return
        ws = xTx.I * (xMat.T * (weights * yMat))
        return testPoint * ws
    
    #循环测试每一个点testPoint
    def lwlrTest(testArr,xArr,yArr,k=1.0):  
        m = shape(testArr)[0]
        yHat = zeros(m)
        for i in range(m):
            yHat[i] = lwlr(testArr[i],xArr,yArr,k)
        return yHat
    
    #岭回归
    def rssError(yArr,yHatArr): 
        return ((yArr-yHatArr)**2).sum()
    
    def ridgeRegres(xMat,yMat,lam=0.2):
        xTx = xMat.T*xMat
        #增加一个m*m的单位矩阵乘lambda默认值是0.2
        denom = xTx + eye(shape(xMat)[1])*lam
        if linalg.det(denom) == 0.0:
            print ("This matrix is singular, cannot do inverse")
            return
        ws = denom.I * (xMat.T*yMat)
        return ws
    
    #不断的调增lambd的取值(增加),测试结果
    def ridgeTest(xArr,yArr):
        xMat = mat(xArr); yMat=mat(yArr).T
        yMean = mean(yMat,0)
        yMat = yMat - yMean     #to eliminate X0 take mean off of Y
        #regularize X's
        xMeans = mean(xMat,0)   #calc mean then subtract it off
        xVar = var(xMat,0)      #calc variance of Xi then divide by it
        xMat = (xMat - xMeans)/xVar
        numTestPts = 30
        wMat = zeros((numTestPts,shape(xMat)[1]))
        for i in range(numTestPts):
            ws = ridgeRegres(xMat,yMat,exp(i-10))
            wMat[i,:]=ws.T
        return wMat
    
    #前向逐步向前线性回归
    def regularize(xMat):#regularize by columns
        inMat = xMat.copy()
        inMeans = mean(inMat,0)   #calc mean then subtract it off
        inVar = var(inMat,0)      #calc variance of Xi then divide by it
        inMat = (inMat - inMeans)/inVar
        return inMat
    
    def stageWise(xArr,yArr,eps=0.01,numIt=100):
        xMat = mat(xArr); yMat=mat(yArr).T
        yMean = mean(yMat,0)
        yMat = yMat - yMean     #can also regularize ys but will get smaller coef
        xMat = regularize(xMat)
        m,n=shape(xMat)
        #returnMat = zeros((numIt,n)) #testing code remove
        ws = zeros((n,1)); wsTest = ws.copy(); wsMax = ws.copy()
        for i in range(numIt):
            print ws.T
            lowestError = inf; 
            for j in range(n):
                for sign in [-1,1]:
                    wsTest = ws.copy()
                    wsTest[j] += eps*sign
                    yTest = xMat*wsTest
                    rssE = rssError(yMat.A,yTest.A)
                    if rssE < lowestError:
                        lowestError = rssE
                        wsMax = wsTest
            ws = wsMax.copy()
            returnMat[i,:]=ws.T
        return returnMat
        
    def test1():
        xArr,yArr = loadDataSet('ex0.txt')
        ws = standRegres(xArr,yArr)
        xMat = mat(xArr)
        yMat = mat(yArr)
        yHat = xMat * ws
        #绘制数据集的散点图
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.scatter(xMat[:,1].flatten().A[0],yMat.T[:,0].flatten().A[0])
        #绘制回归曲线
        xCopy = xMat.copy()
        xCopy.sort(0)
        yHat = xCopy * ws
        ax.plot(xCopy[:,1],yHat)
        plt.show()
        
    #-------------分割线-----以上是普通线性回归曲线------------------------
    
    def test2():
        xArr,yArr = loadDataSet('ex0.txt')
        print (yArr[0])
        #测试点xArr[0],在不同取值k下的ws
        lwlr(xArr[0],xArr,yArr,1.0)
        lwlr(xArr[0],xArr,yArr,0.001)
        #调整k的取值来获得不同的拟合曲线
        yHat = lwlrTest(xArr,xArr,yArr,0.003)
        xMat = mat(xArr)
        strInd = xMat[:,1].argsort(0)
        xSort = xMat[strInd][:,0,:]    
        #绘制数据集的散点图
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #绘制回归曲线
        ax.plot(xSort[:,1],yHat[strInd])
        ax.scatter(xMat[:,1].flatten().A[0],mat(yArr).T.flatten().A[0],s=2,c='red')
        plt.show()    
    
    #-------------分割线-----以上是局部加权线性回归曲线------------------------
    def test3():
        abX,abY = loadDataSet('abalone.txt')
        ridgeWeights = ridgeTest(abX,abY)
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #画出lambda的变化过程
        ax.plot(ridgeWeights)
        plt.show() #当lambda很小时,系数和普通回归一样,随着lambda增大,回归系数减小为零,可以在中间得到一个预测结果较好的lambda
        
  • 相关阅读:
    重写/覆盖基类的事件
    关于“日志”的输出控制
    WPF中DataGrid垂直滚动条滚动后导致每行CheckBox选择错乱
    WPF窗体应用程序开发
    C# 操作自定义config文件
    WPF绑定数据源之RelativeSource
    C# http请求 设置代理(标题可以作为搜索关键字)
    前端加载特效
    实现不同的项目,用不同的git 账号提交
    Casbin 使用记录
  • 原文地址:https://www.cnblogs.com/zhxuxu/p/9505372.html
Copyright © 2020-2023  润新知