1.两层for循环暴力检索
# naive grid search implementation from sklearn.datasets import load_iris from sklearn.svm import SVC from sklearn.model_selection import train_test_split iris = load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=0) print("Size of training set: %d size of test set: %d" % (X_train.shape[0], X_test.shape[0])) best_score = 0 for gamma in [0.001, 0.01, 0.1, 1, 10, 100]: for C in [0.001, 0.01, 0.1, 1, 10, 100]: # for each combination of parameters # train an SVC svm = SVC(gamma=gamma, C=C) svm.fit(X_train, y_train) # evaluate the SVC on the test set score = svm.score(X_test, y_test) # if we got a better score, store the score and parameters if score > best_score: best_score = score best_parameters = {'C': C, 'gamma': gamma} print("best score: ", best_score) print("best parameters: ", best_parameters)
输出:
Size of training set: 112 size of test set: 38 best score: 0.9736842105263158 best parameters: {'C': 100, 'gamma': 0.001}
2.构建字典暴力检索
from sklearn.svm import SVC from sklearn.model_selection import GridSearchCV pipe_svc = Pipeline([('scl', StandardScaler()), ('clf', SVC(random_state=1))]) param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0] param_grid = [{'clf__C': param_range, 'clf__kernel': ['linear']}, {'clf__C': param_range, 'clf__gamma': param_range, 'clf__kernel': ['rbf']}] gs = GridSearchCV(estimator=pipe_svc, param_grid=param_grid, scoring='accuracy', cv=10, n_jobs=-1) gs = gs.fit(X_train, y_train) print(gs.best_score_) print(gs.best_params_)
output:
0.978021978022
{'clf__C': 0.1, 'clf__kernel': 'linear'}
GridSearchCV中param_grid参数是字典构成的列表。对于线性SVM,我们只评估参数C;对于RBF核SVM,我们评估C和gamma。最后, 我们通过best_parmas_得到最优参数组合。
接着,我们直接利用最优参数建模(best_estimator_):
clf = gs.best_estimator_ clf.fit(X_train, y_train) print('Test accuracy: %.3f' % clf.score(X_test, y_test))
网格搜索虽然不错,但是穷举过于耗时,sklearn中还实现了随机搜索,使用 RandomizedSearchCV类,随机采样出不同的参数组合