import numpy as np import matplotlib.pyplot as plt from sklearn import datasets
获取数据
boston = datasets.load_boston() X = boston.data y = boston.target
数据处理
X = X[y < 50.0]
y = y[y < 50.0]
数据分割
from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)
多元线性回归方程θ参数求解
from sklearn.linear_model import LinearRegression lin_reg = LinearRegression() lin_reg.fit(X_train, y_train)
θ参数
lin_reg.coef_
array([-1.15625837e-01, 3.13179564e-02, -4.35662825e-02, -9.73281610e-02, -1.09500653e+01, 3.49898935e+00, -1.41780625e-02, -1.06249020e+00, 2.46031503e-01, -1.23291876e-02, -8.79440522e-01, 8.31653623e-03, -3.98593455e-01])
θ截距
lin_reg.intercept_
32.59756158869991
预测结果 R2
lin_reg.score(X_test, y_test)
0.8009390227581037
kNN Regressor 线性回归
from sklearn.neighbors import KNeighborsRegressor knn_reg = KNeighborsRegressor() knn_reg.fit(X_train, y_train) knn_reg.score(X_test, y_test)
0.602674505080953
网格搜索超参数
from sklearn.model_selection import GridSearchCV param_grid = [ { "weights":["uniform"], "n_neighbors":[i for i in range(1, 11)] }, { "weights":["distance"], "n_neighbors":[i for i in range(1, 11)], "p":[i for i in range(1, 6)] } ] knn_reg = KNeighborsRegressor() grid_search = GridSearchCV(knn_reg, param_grid, n_jobs=-1, verbose=1) grid_search.fit(X_train, y_train)
Out[34]:
grid_search.best_params_
{'n_neighbors': 6, 'p': 1, 'weights': 'distance'}
grid_search.best_score_
0.6243135119018297
grid_search.best_estimator_.score(X_test, y_test)
0.7353138117643773