鉴于该脚本的重要性,很有必要对该脚本做一个全面的注释,以便可以灵活的使用libsvm。
#!/usr/bin/env python # 这种设置python路径的方法更为科学 import sys import os from subprocess import * # 输入参数太少就会提示程序用法 if len(sys.argv) <= 1: print('Usage: {0} training_file [testing_file]'.format(sys.argv[0])) raise SystemExit # svm, grid, and gnuplot executable files is_win32 = (sys.platform == 'win32') if not is_win32: # Linux系统下的程序路径配置 svmscale_exe = "../svm-scale" svmtrain_exe = "../svm-train" svmpredict_exe = "../svm-predict" grid_py = "./grid.py" gnuplot_exe = "/usr/bin/gnuplot" #需要修改次路径,gnuplot为可执行程序的路径,不是文件夹路径 else: # windows系统下的程序路径配置 svmscale_exe = r"..windowssvm-scale.exe" svmtrain_exe = r"..windowssvm-train.exe" svmpredict_exe = r"..windowssvm-predict.exe" gnuplot_exe = r"C:gnuplotingnuplot.exe" grid_py = r".grid.py" assert os.path.exists(svmscale_exe),"svm-scale executable not found" assert os.path.exists(svmtrain_exe),"svm-train executable not found" assert os.path.exists(svmpredict_exe),"svm-predict executable not found" assert os.path.exists(gnuplot_exe),"gnuplot executable not found" assert os.path.exists(grid_py),"grid.py not found" # 创建训练数据集相关的文件:".scale",".model",".range"三个文件 train_pathname = sys.argv[1] assert os.path.exists(train_pathname),"training file not found" file_name = os.path.split(train_pathname)[1] scaled_file = file_name + ".scale" model_file = file_name + ".model" range_file = file_name + ".range" # 创建测试数据集相关文件:".scale",".predict"两个文件 if len(sys.argv) > 2: test_pathname = sys.argv[2] file_name = os.path.split(test_pathname)[1] assert os.path.exists(test_pathname),"testing file not found" scaled_test_file = file_name + ".scale" predict_test_file = file_name + ".predict" # 流程化命令一:svm-scale缩放,训练集缩放,参数如下: cmd = '{0} -s "{1}" "{2}" > "{3}"'.format(svmscale_exe, range_file, train_pathname, scaled_file) print('Scaling training data...') Popen(cmd, shell = True, stdout = PIPE).communicate() # 流程化命令二:参数选优,使用grid.py脚本,进行交叉验证,参数如下: cmd = '{0} -svmtrain "{1}" -gnuplot "{2}" "{3}"'.format(grid_py, svmtrain_exe, gnuplot_exe, scaled_file) print('Cross validation...') f = Popen(cmd, shell = True, stdout = PIPE).stdout line = '' while True: last_line = line line = f.readline() if not line: break c,g,rate = map(float,last_line.split()) # 输出最优参数c,g print('Best c={0}, g={1} CV rate={2}'.format(c,g,rate)) # 流程化命令三:svm-train训练,参数设置如下 cmd = '{0} -c {1} -g {2} "{3}" "{4}"'.format(svmtrain_exe,c,g,scaled_file,model_file) print('Training...') Popen(cmd, shell = True, stdout = PIPE).communicate() print('Output model: {0}'.format(model_file)) if len(sys.argv) > 2: # 流程化命令四:svm-scale缩放,测试数据缩放,参数设置如下: cmd = '{0} -r "{1}" "{2}" > "{3}"'.format(svmscale_exe, range_file, test_pathname, scaled_test_file) print('Scaling testing data...') Popen(cmd, shell = True, stdout = PIPE).communicate() # 流程化命令五:svm-predict预测,参数设置如下: cmd = '{0} "{1}" "{2}" "{3}"'.format(svmpredict_exe, scaled_test_file, model_file, predict_test_file) print('Testing...') Popen(cmd, shell = True).communicate() print('Output prediction: {0}'.format(predict_test_file))