1 GridSearch
import numpy as np from sklearn.datasets import load_digits from sklearn.ensemble import RandomForestClassifier from sklearn.grid_search import GridSearchCV from sklearn.grid_search import RandomizedSearchCV # 生成数据 digits = load_digits() X, y = digits.data, digits.target # 元分类器 meta_clf = RandomForestClassifier(n_estimators=20) # ================================================================= # 设置参数 param_dist = {"max_depth": [3, None], "max_features": sp_randint(1, 11), "min_samples_split": sp_randint(1, 11), "min_samples_leaf": sp_randint(1, 11), "bootstrap": [True, False], "criterion": ["gini", "entropy"]} # 运行随机搜索 RandomizedSearch n_iter_search = 20 rs_clf = RandomizedSearchCV(meta_clf, param_distributions=param_dist, n_iter=n_iter_search) start = time() rs_clf.fit(X, y) print("RandomizedSearchCV took %.2f seconds for %d candidates" " parameter settings." % ((time() - start), n_iter_search)) print(rs_clf.grid_scores_)
2search
# ================================================================= # 设置参数 param_grid = {"max_depth": [3, None], "max_features": [1, 3, 10], "min_samples_split": [1, 3, 10], "min_samples_leaf": [1, 3, 10], "bootstrap": [True, False], "criterion": ["gini", "entropy"]} # 运行网格搜索 GridSearch gs_clf = GridSearchCV(meta_clf, param_grid=param_grid) start = time() gs_clf.fit(X, y) print("GridSearchCV took %.2f seconds for %d candidate parameter settings." % (time() - start, len(gs_clf.grid_scores_))) print(gs_clf.grid_scores_)
3
1 from sklearn import svm 2 from sklearn.datasets import samples_generator 3 from sklearn.feature_selection import SelectKBest 4 from sklearn.feature_selection import f_regression 5 from sklearn.pipeline import Pipeline 6 7 # 生成数据 8 X, y = samples_generator.make_classification(n_informative=5, n_redundant=0, random_state=42) 9 10 # 定义Pipeline,先方差分析,再SVM 11 anova_filter = SelectKBest(f_regression, k=5) 12 clf = svm.SVC(kernel='linear') 13 pipe = Pipeline([('anova', anova_filter), ('svc', clf)]) 14 15 # 设置anova的参数k=10,svc的参数C=0.1(用双下划线"__"连接!) 16 pipe.set_params(anova__k=10, svc__C=.1) 17 pipe.fit(X, y) 18 19 prediction = pipe.predict(X) 20 21 pipe.score(X, y) 22 23 # 得到 anova_filter 选出来的特征 24 s = pipe.named_steps['anova'].get_support() 25 print(s)