一、pandas对CSV合并多个文件,分割训练集测试集
def all_data(self):
df1 = pd.read_csv("data/POI/negtive.csv")
df2 = pd.read_csv("data/POI/positive.csv")
df = pd.concat([df1,df2],ignore_index=True)
df.to_csv("data/POI/all.csv",index=False,sep=',')
def split(self):
df = pd.read_csv('data/POI/all.csv')
df = df.sample(frac=1.0)
cut_idx = int(round(0.2 * df.shape[0]))
df_test, df_train = df.iloc[:cut_idx], df.iloc[cut_idx:]
df_test.to_csv("data/POI/test.csv",index=False,sep=',')
df_train.to_csv("data/POI/train.csv", index=False, sep=',')