• 梯度下降法-3.实现线性回归中的梯度下降法


    实现线性回归中的梯度下降法

    构造数据集

    import numpy
    import matplotlib.pyplot as plt
    
    # 设置随机数种子
    numpy.random.seed(666)  
    x = 2 * numpy.random.random(size=100)
    y = x * 3. + 4. + numpy.random.normal(size=100)  #random.normal 为产生正太分布的噪点
    
    #假设此样本有一个特征值,把100个数转化为100行,1列
    X = x.reshape(-1,1)  
    

    绘制此数据集:

    plt.scatter(x,y)
    plt.show()
    

    使用梯度下降法训练

    目标:使

    [ J = frac{1}{m}sum_{i=1}^{m}(y^{(i)} - hat{y}^{(i)})^2 $$尽可能的小 $$Lambda J = egin{bmatrix} frac{partial J}{partial heta _0} \ frac{partial J}{partial heta _1} \ frac{partial J}{partial heta _2} \ ... \ frac{partial J}{partial heta _n} end{bmatrix} = frac{2}{m}egin{bmatrix} sum(X^{(i)}_b heta - y^{(i)}) \ sum(X^{(i)}_b heta - y^{(i)}) cdot X_1^{(i)}\ sum(X^{(i)}_b heta - y^{(i)}) cdot X_2^{(i)}\ ...\ sum(X^{(i)}_b heta - y^{(i)}) cdot X_3^{(i)} end{bmatrix}]

    定义函数和导数的表达式

    def J(theta,X_b,y):   #损失函数的表达式
        try:
            return numpy.sum((y - X_b.dot(theta))**2)/len(X_b)
        except:
            return float('inf') #返回float的最大值
       
    def dJ(theta,X_b,y):  #求导
        #要返回的导数矩阵
        res = numpy.empty(len(theta))    
        
        res[0] = numpy.sum(X_b.dot(theta)-y)
        for i in range(1,len(theta)):
            res[i] = ((X_b.dot(theta)-y).dot(X_b[:,i]))
            
        return res*2/len(X_b)
    

    定义梯度下降的算法过程

    def gradient_descent(X_b,y,init_theta,eta,n_iters=1e4,espilon=1e-8):
        
        theta = init_theta
        i_iters = 0
        
        #n_iters 表示梯度下降的次数,超过这个值,有可能算法不收敛,退出
        while n_iters>i_iters: 
            gradient = dJ(theta,X_b,y)  #偏导数
            last_theta = theta
            theta = theta - eta * gradient  #梯度下降,向极值移动   
            if abs(J(theta,X_b,y) - J(last_theta,X_b,y)) < espilon:
                break
            i_iters += 1 
        # 返回求出的theta值
        return theta
    

    构造初始参数,并进行梯度下降的过程:

    X_b = numpy.hstack([numpy.ones((len(X),1)),X])
    init_theta = numpy.zeros(X_b.shape[1])
    eta = 0.01
    theta = gradient_descent(X_b,y,init_theta,eta)
    

    求得的theta值

    创建数据集时,截距为4,斜率为3,由此可以看出,此梯度下降法成功的训练了此模型

    算法的封装

    def fit_gd(self,X_train,y_train,eta=0.01,n_iters=1e4):
    
    		assert X_train.shape[0] == y_train.shape[0],
    			"size of x_train must be equal to the size of y_train"
    
    		def J(theta,X_b,y):   
    			try:
    				return numpy.sum((y - X_b.dot(theta))**2)/len(X_b)
    			except:
    				return float('inf')  # 返回float的最大值
    
    		def dJ(theta,X_b,y):  
    			#要返回的导数矩阵
    			res = numpy.empty(len(theta))    
    	
    			res[0] = numpy.sum(X_b.dot(theta)-y)
    			for i in range(1,len(theta)):
    				res[i] = ((X_b.dot(theta)-y).dot(X_b[:,i]))
    			return res*2/len(X_b)
    
    		def gradient_descent(X_b,y,init_theta,eta,n_iters,espilon=1e-8):
    			theta = init_theta
    			i_iters = 0
    			while n_iters>i_iters:
    				gradient = dJ(theta,X_b,y)  #偏导数
    				last_theta = theta
    				theta = theta - eta * gradient  #梯度下降,向极值移动
    	
    				if abs(J(theta,X_b,y) - J(last_theta,X_b,y)) < espilon:
    					break
    				i_iters += 1
    			return theta
    
    		X_b = numpy.hstack([numpy.ones((len(X_train),1)),X_train])
    
    		init_theta = numpy.zeros(X_b.shape[1])
    		self._theta = gradient_descent(X_b,y_train,init_theta,eta,n_iters)
    
    		self.coef_ = self._theta[1:]    #系数
    		self.interception_ = self._theta[0] #截距
    		return self
    

    封装后调用:

    from mylib.LineRegression import LineRegression
    lin_reg = LineRegression()
    # 用梯度下降法训练
    lin_reg.fit_gd(X,y)
    

  • 相关阅读:
    tcp_wrapper过滤
    cobbler无人值守批量安装Linux系统
    PXE+kickstart无人值守安装CentOS 7
    kickstart文件详解
    SHELL脚本--shell数组基础
    SHELL脚本--管道和重定向基础
    第4章 DHCP服务
    第3章 NFS基本应用
    man sm-notify(sm-notify命令中文手册)
    man statd(rpc.statd中文手册)
  • 原文地址:https://www.cnblogs.com/shuai-long/p/11197402.html
Copyright © 2020-2023  润新知