• 15.衡量回归算法的标准


    衡量回归算法的标准

    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn import datasets

    波士顿房产数据

    boston = datasets.load_boston()
    
    x = boston.data[:, 5] # 只使用房间数量这个特征
    
    x.shape
    (506,)
    y = boston.target
    y.shape
    (506,)
    plt.scatter(x, y)

    np.max(y)
    50.0

     去除干扰数据

    x = x[y < 50.0]
    y = y[y < 50.0]
    
    x.shape, y.shape
    ((490,), (490,))
    plt.scatter(x, y)

    使用简单线性回归

    # 数据分割为训练集和测试集
    
    from sklearn.model_selection import train_test_split
    x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=666)
    x_train.shape, y_train.shape
    ((367,), (367,))
    # 使用训练集求归回方程
    
    x_mean = np.mean(x_train)
    y_mean = np.mean(y_train)
    
    num = (x_train - x_mean).dot(y_train - y_mean)
    d = (x_train - x_mean).dot(x_train - x_mean)
    
    a = num / d
    b = y_mean - a * x_mean
    
    y_hat = a * x_train + b
    x_train.shape, y_train.shape
    ((367,), (367,))
    y_hat.shape
    (367,)
    plt.scatter(x_train, y_train)
    plt.plot(x_train, y_hat)

    衡量回归算法的标准

    # 在回归方程下求测试集的预测值
    y_predict = a * x_test + b
    # MSE 预测值与真实值误差衡量
    
    mse_test = np.sum((y_predict - y_test)**2) / len(y_test)
    mse_test
    28.215949368640807

    # RMSE
    
    from math import sqrt
    rmse_test = sqrt(mse_test)
    rmse_test
    5.311868726600913

    # MAE
    
    mae_test = np.sum(np.absolute(y_predict - y_test)) / len(y_test)
    mae_test
    3.9489046062737834

    sklearn 中的MSE MAE

    from sklearn.metrics import mean_squared_error
    from sklearn.metrics import mean_absolute_error


    mean_squared_error(y_test, y_predict)
    28.215949368640807
    mean_absolute_error(y_test, y_predict)
    3.9489046062737834

    R Square

    rsquare = 1 - mean_squared_error(y_test, y_predict) / np.var(y_test)
    rsquare
    0.5682464825049472

    from sklearn.metrics import r2_score
    r2_score(y_test, y_predict)
    0.5682464825049472
    
    
  • 相关阅读:
    class11_创建新的输出字段P2
    class10_创建新的输出字段
    class09_高级过滤数据
    Markdown高级语法
    class08_过滤数据
    class07_查询数据
    class06_插入数据
    class05_操纵表
    class04_创建表02
    class03_Create a new table by SQL
  • 原文地址:https://www.cnblogs.com/waterr/p/14039901.html
Copyright © 2020-2023  润新知