import numpy as np from sklearn.model_selection import train_test_split,KFold,StratifiedKFold,LeaveOneOut,cross_val_score #模型选择数据集切分train_test_split模型 def test_train_test_split(): X=[[1,2,3,4], [11,12,13,14], [21,22,23,24], [31,32,33,34], [41,42,43,44], [51,52,53,54], [61,62,63,64], [71,72,73,74]] y=[1,1,0,0,1,1,0,0] # 切分,测试集大小为原始数据集大小的 40% X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4, random_state=0) print("X_train=",X_train) print("X_test=",X_test) print("y_train=",y_train) print("y_test=",y_test) # 分层采样切分,测试集大小为原始数据集大小的 40% X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4,random_state=0,stratify=y) print("Stratify:X_train=",X_train) print("Stratify:X_test=",X_test) print("Stratify:y_train=",y_train) print("Stratify:y_test=",y_test) test_train_test_split()
#模型选择数据集切分KFold模型 def test_KFold(): X=np.array([[1,2,3,4], [11,12,13,14], [21,22,23,24], [31,32,33,34], [41,42,43,44], [51,52,53,54], [61,62,63,64], [71,72,73,74], [81,82,83,84]]) y=np.array([1,1,0,0,1,1,0,0,1]) # 切分之前不混洗数据集 folder=KFold(n_splits=3,random_state=0,shuffle=False) for train_index,test_index in folder.split(X,y): print("Train Index:",train_index) print("Test Index:",test_index) print("X_train:",X[train_index]) print("X_test:",X[test_index]) print("") # 切分之前混洗数据集 shuffle_folder=KFold(n_splits=3,random_state=0,shuffle=True) for train_index,test_index in shuffle_folder.split(X,y): print("Shuffled Train Index:",train_index) print("Shuffled Test Index:",test_index) print("Shuffled X_train:",X[train_index]) print("Shuffled X_test:",X[test_index]) print("") test_KFold()
#模型选择数据集切分StratifiedKFold模型 def test_StratifiedKFold(): X=np.array([[1,2,3,4], [11,12,13,14], [21,22,23,24], [31,32,33,34], [41,42,43,44], [51,52,53,54], [61,62,63,64], [71,72,73,74]]) y=np.array([1,1,0,0,1,1,0,0]) folder=KFold(n_splits=4,random_state=0,shuffle=False) stratified_folder=StratifiedKFold(n_splits=4,random_state=0,shuffle=False) for train_index,test_index in folder.split(X,y): print("Train Index:",train_index) print("Test Index:",test_index) print("y_train:",y[train_index]) print("y_test:",y[test_index]) print("") for train_index,test_index in stratified_folder.split(X,y): print("Stratified Train Index:",train_index) print("Stratified Test Index:",test_index) print("Stratified y_train:",y[train_index]) print("Stratified y_test:",y[test_index]) print("") test_StratifiedKFold()
#模型选择数据集切分LeaveOneOut模型 def test_LeaveOneOut(): X=np.array([[1,2,3,4], [11,12,13,14], [21,22,23,24], [31,32,33,34]]) y=np.array([1,1,0,0]) lo=LeaveOneOut() for train_index,test_index in lo.split(X): print("Train Index:",train_index) print("Test Index:",test_index) print("X_train:",X[train_index]) print("X_test:",X[test_index]) print("") test_LeaveOneOut()
#模型选择数据集切分cross_val_score模型 def test_cross_val_score(): from sklearn.datasets import load_digits from sklearn.svm import LinearSVC digits=load_digits() # 加载用于分类问题的数据集 X=digits.data y=digits.target # 使用 LinearSVC 作为分类器 result=cross_val_score(LinearSVC(),X,y,cv=10) print("Cross Val Score is:",result) test_cross_val_score()