import numpy as np import matplotlib.pyplot as plt from sklearn.svm import LinearSVC from sklearn.datasets import load_digits from sklearn.model_selection import validation_curve #模型选择验证曲线validation_curve模型 def test_validation_curve(): ''' 测试 validation_curve 的用法 。验证对于 LinearSVC 分类器 , C 参数对于预测准确率的影响 ''' ### 加载数据 digits = load_digits() X,y=digits.data,digits.target #### 获取验证曲线 ###### param_name="C" param_range = np.logspace(-2, 2) train_scores, test_scores = validation_curve(LinearSVC(), X, y, param_name=param_name,param_range=param_range,cv=10, scoring="accuracy") ###### 对每个 C ,获取 10 折交叉上的预测得分上的均值和方差 ##### train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) ####### 绘图 ###### fig=plt.figure() ax=fig.add_subplot(1,1,1) ax.semilogx(param_range, train_scores_mean, label="Training Accuracy", color="r") ax.fill_between(param_range, train_scores_mean - train_scores_std,train_scores_mean + train_scores_std, alpha=0.2, color="r") ax.semilogx(param_range, test_scores_mean, label="Testing Accuracy", color="g") ax.fill_between(param_range, test_scores_mean - test_scores_std,test_scores_mean + test_scores_std, alpha=0.2, color="g") ax.set_title("Validation Curve with LinearSVC") ax.set_xlabel("C") ax.set_ylabel("Score") ax.set_ylim(0,1.1) ax.legend(loc='best') plt.show() #调用test_validation_curve() test_validation_curve()