• libsvm 之 easy.py(流程化脚本)注释


    鉴于该脚本的重要性,很有必要对该脚本做一个全面的注释,以便可以灵活的使用libsvm。

    #!/usr/bin/env python
    # 这种设置python路径的方法更为科学
    
    import sys
    import os
    from subprocess import *
    
    # 输入参数太少就会提示程序用法
    if len(sys.argv) <= 1:
        print('Usage: {0} training_file [testing_file]'.format(sys.argv[0]))
        raise SystemExit
    
    # svm, grid, and gnuplot executable files
    
    is_win32 = (sys.platform == 'win32')
    if not is_win32:
        # Linux系统下的程序路径配置
        svmscale_exe = "../svm-scale"
        svmtrain_exe = "../svm-train"
        svmpredict_exe = "../svm-predict"
        grid_py = "./grid.py"
        gnuplot_exe = "/usr/bin/gnuplot"   #需要修改次路径,gnuplot为可执行程序的路径,不是文件夹路径
    else:
        # windows系统下的程序路径配置
        svmscale_exe = r"..windowssvm-scale.exe"
        svmtrain_exe = r"..windowssvm-train.exe"
        svmpredict_exe = r"..windowssvm-predict.exe"
        gnuplot_exe = r"C:gnuplotingnuplot.exe"
        grid_py = r".grid.py"
    
    assert os.path.exists(svmscale_exe),"svm-scale executable not found"
    assert os.path.exists(svmtrain_exe),"svm-train executable not found"
    assert os.path.exists(svmpredict_exe),"svm-predict executable not found"
    assert os.path.exists(gnuplot_exe),"gnuplot executable not found"
    assert os.path.exists(grid_py),"grid.py not found"
    
    # 创建训练数据集相关的文件:".scale",".model",".range"三个文件
    train_pathname = sys.argv[1]
    assert os.path.exists(train_pathname),"training file not found"
    file_name = os.path.split(train_pathname)[1]
    scaled_file = file_name + ".scale"
    model_file = file_name + ".model"
    range_file = file_name + ".range"
    
    # 创建测试数据集相关文件:".scale",".predict"两个文件
    if len(sys.argv) > 2:
        test_pathname = sys.argv[2]
        file_name = os.path.split(test_pathname)[1]
        assert os.path.exists(test_pathname),"testing file not found"
        scaled_test_file = file_name + ".scale"
        predict_test_file = file_name + ".predict"
    
    # 流程化命令一:svm-scale缩放,训练集缩放,参数如下:
    cmd = '{0} -s "{1}" "{2}" > "{3}"'.format(svmscale_exe, range_file, train_pathname, scaled_file)
    print('Scaling training data...')
    Popen(cmd, shell = True, stdout = PIPE).communicate()    
    
    # 流程化命令二:参数选优,使用grid.py脚本,进行交叉验证,参数如下:
    cmd = '{0} -svmtrain "{1}" -gnuplot "{2}" "{3}"'.format(grid_py, svmtrain_exe, gnuplot_exe, scaled_file)
    print('Cross validation...')
    f = Popen(cmd, shell = True, stdout = PIPE).stdout
    
    line = ''
    while True:
        last_line = line
        line = f.readline()
        if not line: break
    c,g,rate = map(float,last_line.split())
    # 输出最优参数c,g
    print('Best c={0}, g={1} CV rate={2}'.format(c,g,rate))
    
    # 流程化命令三:svm-train训练,参数设置如下
    cmd = '{0} -c {1} -g {2} "{3}" "{4}"'.format(svmtrain_exe,c,g,scaled_file,model_file)
    print('Training...')
    Popen(cmd, shell = True, stdout = PIPE).communicate()
    print('Output model: {0}'.format(model_file))
    if len(sys.argv) > 2:    
        # 流程化命令四:svm-scale缩放,测试数据缩放,参数设置如下:
        cmd = '{0} -r "{1}" "{2}" > "{3}"'.format(svmscale_exe, range_file, test_pathname, scaled_test_file)
        print('Scaling testing data...')
        Popen(cmd, shell = True, stdout = PIPE).communicate()    
    
        # 流程化命令五:svm-predict预测,参数设置如下:    
        cmd = '{0} "{1}" "{2}" "{3}"'.format(svmpredict_exe, scaled_test_file, model_file, predict_test_file)
        print('Testing...')
        Popen(cmd, shell = True).communicate()    
    
        print('Output prediction: {0}'.format(predict_test_file))
  • 相关阅读:
    2013年第四届蓝桥杯C/C++ A组国赛 —— 第三题:埃及分数
    单链表————链表
    单链表————顺序表
    跳水板
    平衡二叉搜索树(最小高度树)
    删除字符串中的所有相邻重复项
    用两个栈实现队列
    删除最外层的括号
    合并两个有序链表
    C++ cout格式化输出(输出格式)完全攻略
  • 原文地址:https://www.cnblogs.com/leezx/p/5668702.html
Copyright © 2020-2023  润新知