- 预处理excel文件中的数据不平衡问题
data = pd.read_excel ("file) list_label = [] train_list, dev_list, test_list = [],[],[] data_value = data.value for i in range(len(data_value)): if data_value[i][0] not in list_label: list_label.append(data_value[i][0]) for j in range(len(list_label)): s = data.loc[data["标签"] == list_label[j]]
s = s.loc[:,['a','c']] #选取'a','c'两列 s = s.ix[:].value s = s.tolist() "接下来就是对自己训练、测试、验证进行取值了"