1.随机森林
# 采用GridSearchCV进行超参数的设定 from sklearn.model_selection import GridSearchCV,StratifiedKFold RF1=RandomForestClassifier() parameter_grid={"n_estimators":[5,10,25,50,100], "max_features":[1,2,3,4,5,6,8,10], "warm_start":[True,False],
#"max_depth":[1,2,3,5,7] } # 交叉验证StratifiedKFold默认是3 cross_validatin=StratifiedKFold(n_splits=3, random_state=None, shuffle=False) grid_search=GridSearchCV(RF1,param_grid=parameter_grid,score="f1",cv=cross_validatin) grid_search.fit(X_train,y_train) print(grid_search.best_params_) print(grid_search.best_score_) #获取到最佳参数之后,重新进行预测 RF2=RandomForestClassifier(max_features=8,n_estimators=25,warm_start=True) RF2.fit(X_train,y_train)
2.逻辑回归
from sklearn.linear_model import LogisticRegression tuned_parameters=[{'penalty':['l1','l2'], 'C':[0.01,0.05,0.1,0.5,1,5,10,50,100], 'solver':['liblinear'], 'multi_class':['ovr']}, {'penalty':['l2'], 'C':[0.01,0.05,0.1,0.5,1,5,10,50,100], 'solver':['lbfgs'], 'multi_class':['ovr','multinomial']}] grid_logist = GridSearchCV(LogisticRegression(class_weight="balanced"), cv=3, n_jobs=-1, param_grid=tuned_parameters)
3.KNN
from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier() param_grid = [ { 'weights':['uniform'], 'n_neighbors':[1,3,5,7,9] }, { 'weights':['distance'], 'n_neighbors':[1,3,5,7,9], 'p':[1,3,4,5,7] } ] grid_search = GridSearchCV(knn,param_grid,n_jobs=-1,verbose=2)