1 import pandas as pd 2 # pandas 读取数据 3 data = pd.read_csv("C:/Users/Administrator/Desktop/data/ccpp.csv") 4 data.head() 5 6 X = data[["AT","V","AP","RH"]] 7 print(X.shape) 8 y = data[["PE"]] 9 print (y.shape) 10 11 """ 12 sklearn.cross_validation是sklearn老版本的模块,新版本都迁移到了model_selection 13 """ 14 from sklearn.model_selection import train_test_split 15 # 划分训练集和测试集 16 X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=1) 17 print (X_train.shape) 18 print (y_train.shape) 19 print (X_test.shape) 20 print (y_test.shape) 21 22 23 from sklearn.linear_model import LinearRegression 24 linreg = LinearRegression() 25 linreg.fit(X_train,y_train) 26 # 训练模型完毕,查看结果 27 print (linreg.intercept_)# 截距 28 print (linreg.coef_) #系数 29 30 31 y_pred = linreg.predict(X_test) 32 from sklearn import metrics 33 import numpy as np 34 # 使用sklearn来计算mse和Rmse 35 print ("MSE:",metrics.mean_squared_error(y_test, y_pred)) 36 print ("RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_pred))) 37 38 39 # 交叉验证 40 from sklearn.model_selection import cross_val_predict 41 predicted = cross_val_predict(linreg,X,y,cv=10) 42 print ("MSE:",metrics.mean_squared_error(y, predicted)) 43 print ("RMSE:",np.sqrt(metrics.mean_squared_error(y, predicted))) 44 45 46 # 画图查看结果 47 import matplotlib.pyplot as plt 48 fig, ax = plt.subplots() 49 ax.scatter(y, predicted) 50 ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4) 51 ax.set_xlabel('Measured') 52 ax.set_ylabel('Predicted') 53 plt.show()