生成K-fold交叉验证数据集

import pandas as pd

from sklearn import model_selection

if __name__ == "__main__":

　　# Training data is in a CSV file called train.csv

　　df = pd.read_csv("train.csv")

　　# we create a new column called kfold and fill it with -1

　　df["kfold"] = -1

　　# the next step is to randomize the rows of the data

　　df = df.sample(frac=1).reset_index(drop=True)

　　# initiate the kfold class from model_selection module

　　kf = model_selection.KFold(n_splits=5)

　　# fill the new kfold column

　　for fold, (trn_, val_) in enumerate(kf.split(X=df)):

　　　　df.loc[val_, 'kfold'] = fold

　　　　# save the new csv with kfold column

　　df.to_csv("train_folds.csv", index=False)

============================================

import pandas as pd
from sklearn.datasets import make_regression
from sklearn import model_selection
if __name__ == '__main__':
    X,y = make_regression(n_samples=15000,n_features=8,n_targets=1)
    df = pd.DataFrame(X,columns=[f"f_{i}" for i in range(X.shape[1])])
    df.loc[:, "target"] = y
    df["kfold"] = -1
    df = df.sample(frac=1).reset_index(drop=True)
    kf = model_selection.KFold(n_splits=5)
    for f, (t_,v_) in enumerate(kf.split(X=df)):
        df.loc[v_, "kfold"] = f

    print(df)

相关阅读:
Magento安装教程
让老婆爱你的十大方法。
easy ui layout设计
下交叉综合症
fileloder.js+struts2实现文件异步上传，无页面刷新效果。
将mysql中的Blob的图片在jsp中显示
详解CSS样式的position属性
Struts2与Spring的整合
Play Framework常用标签list,set,如何遍历list、map类型数据
我所理解的团队

原文地址：https://www.cnblogs.com/songyuejie/p/14781125.html