在深度学习或者机器学习的时候,常常需要对数据进行整理和分类,最常见的是通过对数据路径和标签写入
到一个整合的txt或者csv文件中,训练进行读取。
1 #coding=utf-8 2 #!/usr/bin/env python 4 import sys 5 import os.path 6 7 # This is a tiny script to help you creating a CSV file from a face 8 # database with a similar hierarchie: 9 # 11 # . 12 # |-- README 13 # |-- s1 14 # | |-- 1.pgm 15 # | |-- ... 16 # | |-- 10.pgm 17 # |-- s2 18 # | |-- 1.pgm 19 # | |-- ... 20 # | |-- 10.pgm 21 # ... 22 # |-- s40 23 # | |-- 1.pgm 24 # | |-- ... 25 # | |-- 10.pgm 26 # 27 28 if __name__ == "__main__": 29 30 if len(sys.argv) != 3: 31 print "usage: create_csv <base_path>" 32 sys.exit(1) 33 34 BASE_PATH=sys.argv[1] 35 SEPARATOR=";" 36 fh = open(sys.argv[2], 'w') 37 38 label = 0 39 for dirname, dirnames, filenames in os.walk(BASE_PATH): 40 for subdirname in dirnames: 41 subject_path = os.path.join(dirname, subdirname) 42 for filename in os.listdir(subject_path): 43 abs_path = "%s/%s" % (subject_path, filename) 44 print "%s%s%d" % (abs_path, SEPARATOR, label) 45 fh.write(abs_path) 46 fh.write(SEPARATOR) 47 fh.write(str(label)) 48 fh.write(" ") 49 label = label + 1 50 fh.close()
调用方式如下
python create.py d:/test/jaf d:/a.csv
生成文件结果如下图所示
通过替换可以将斜杠调整一致