1.逻辑回归是怎么防止过拟合的?为什么正则化可以防止过拟合?(大家用自己的话介绍下)
(1)逻辑回归是利用正则化来防止过拟合的;
(2)正则化是通过约束参数的范数使其不要太大,所以可以在一定程度上减少过拟合情况;
2.用logiftic回归来进行实践操作,数据不限。
源代码
import pandas as pd import numpy as np from sklearn.preprocessing import StandardScaler from sklearn.tree import DecisionTreeClassifier, export_graphviz # 导入决策树分类器 from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.metrics import classification_report # 预测天气状况是否适合打球 data = pd.read_csv('data/data.csv') #数据预处理 data.loc[data['Play']=='Yes','Play'] =1 data.loc[data['Play']=='No','Play'] =0 data.loc[data['Windy']=='Strong','Windy'] =1 data.loc[data['Windy']=='Weak','Windy'] =0 data.loc[data['Humidity']=='High','Humidity'] =1 data.loc[data['Humidity']=='Normal','Humidity'] =0 data.loc[data['Temp']=='Hot','Temp'] =1 data.loc[data['Temp']=='Mild','Temp'] =2 data.loc[data['Temp']=='Cool','Temp'] =3 data.loc[data['Outlook']=='Sunny','Outlook'] =1 data.loc[data['Outlook']=='Overcast','Outlook'] =2 data.loc[data['Outlook']=='Rain','Outlook'] =3 #分出数据集和标签 # dataSet = np.array(data.loc[:,:]) #数据集 # labels = list(data.columns.values)#标签 x_data=data.iloc[1:,1:-1] #取出数据集 y_data=data.iloc[1:,-1] #取出目标值/标签 #2.构建和训练模型 x_tr,x_te,y_tr,y_te=train_test_split(x_data,y_data,test_size=0.2) std = StandardScaler() x_train = std.fit_transform(x_tr) x_test = std.transform(x_te) LG=LogisticRegression() LG.fit(x_tr.astype('int'),y_tr.astype('int')) pre = LG.predict(x_test) pre = np.rint(pre) print(data) print('模型的准确率:',LG.score(x_te.astype('int'),y_te.astype('int'))) print('模型的召回率:',classification_report(y_te.astype('int'),pre))
运行结果截图