• 吴裕雄 python 机器学习——模型选择数据集切分


    import  numpy as np
    from sklearn.model_selection import train_test_split,KFold,StratifiedKFold,LeaveOneOut,cross_val_score
    
    #模型选择数据集切分train_test_split模型
    def test_train_test_split():
        X=[[1,2,3,4],
           [11,12,13,14],
           [21,22,23,24],
           [31,32,33,34],
           [41,42,43,44],
           [51,52,53,54],
           [61,62,63,64],
           [71,72,73,74]]
        y=[1,1,0,0,1,1,0,0]
        # 切分,测试集大小为原始数据集大小的 40%
        X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4, random_state=0) 
        print("X_train=",X_train)
        print("X_test=",X_test)
        print("y_train=",y_train)
        print("y_test=",y_test)
        # 分层采样切分,测试集大小为原始数据集大小的 40%
        X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4,random_state=0,stratify=y) 
        print("Stratify:X_train=",X_train)
        print("Stratify:X_test=",X_test)
        print("Stratify:y_train=",y_train)
        print("Stratify:y_test=",y_test)
        
    test_train_test_split()

    #模型选择数据集切分KFold模型
    def test_KFold():
        X=np.array([[1,2,3,4],
           [11,12,13,14],
           [21,22,23,24],
           [31,32,33,34],
           [41,42,43,44],
           [51,52,53,54],
           [61,62,63,64],
           [71,72,73,74],
           [81,82,83,84]])
        y=np.array([1,1,0,0,1,1,0,0,1])
        # 切分之前不混洗数据集
        folder=KFold(n_splits=3,random_state=0,shuffle=False) 
        for train_index,test_index in folder.split(X,y):
            print("Train Index:",train_index)
            print("Test Index:",test_index)
            print("X_train:",X[train_index])
            print("X_test:",X[test_index])
            print("")
        # 切分之前混洗数据集
        shuffle_folder=KFold(n_splits=3,random_state=0,shuffle=True) 
        for train_index,test_index in shuffle_folder.split(X,y):
            print("Shuffled Train Index:",train_index)
            print("Shuffled Test Index:",test_index)
            print("Shuffled X_train:",X[train_index])
            print("Shuffled X_test:",X[test_index])
            print("")
            
    test_KFold()

    #模型选择数据集切分StratifiedKFold模型
    def test_StratifiedKFold():
        X=np.array([[1,2,3,4],
           [11,12,13,14],
           [21,22,23,24],
           [31,32,33,34],
           [41,42,43,44],
           [51,52,53,54],
           [61,62,63,64],
           [71,72,73,74]])
    
        y=np.array([1,1,0,0,1,1,0,0])
    
        folder=KFold(n_splits=4,random_state=0,shuffle=False)
        stratified_folder=StratifiedKFold(n_splits=4,random_state=0,shuffle=False)
        for train_index,test_index in folder.split(X,y):
            print("Train Index:",train_index)
            print("Test Index:",test_index)
            print("y_train:",y[train_index])
            print("y_test:",y[test_index])
            print("")
    
        for train_index,test_index in stratified_folder.split(X,y):
            print("Stratified Train Index:",train_index)
            print("Stratified Test Index:",test_index)
            print("Stratified y_train:",y[train_index])
            print("Stratified y_test:",y[test_index])
            print("")
            
    test_StratifiedKFold()

    #模型选择数据集切分LeaveOneOut模型
    def test_LeaveOneOut():
        X=np.array([[1,2,3,4],
           [11,12,13,14],
           [21,22,23,24],
           [31,32,33,34]])
        y=np.array([1,1,0,0])
        lo=LeaveOneOut()
        for train_index,test_index in lo.split(X):
            print("Train Index:",train_index)
            print("Test Index:",test_index)
            print("X_train:",X[train_index])
            print("X_test:",X[test_index])
            print("")
            
    test_LeaveOneOut()

    #模型选择数据集切分cross_val_score模型
    def test_cross_val_score():
        from sklearn.datasets import  load_digits
        from sklearn.svm import  LinearSVC
        digits=load_digits() # 加载用于分类问题的数据集
        X=digits.data
        y=digits.target
        # 使用 LinearSVC 作为分类器
        result=cross_val_score(LinearSVC(),X,y,cv=10) 
        print("Cross Val Score is:",result)
        
    test_cross_val_score()

  • 相关阅读:
    查看文件 ls -lh
    java Dom4j xml 写
    centos tar 常用
    os && shutil 模块
    Visual Studio
    ssh 无法登陆
    find 命令
    Centos7 安装redis
    zerorpc
    uwsgi
  • 原文地址:https://www.cnblogs.com/tszr/p/10802275.html
Copyright © 2020-2023  润新知