• linux中文乱码


    txt文件在linux环境下打开呈现了乱码状态。

    解决方法1:在linux用iconv命令,如乱码文件名为zhongwen.txt,那么在终端输入如下命令:

    iconv -f gbk -t utf8 zhongwen.txt > zhongwen.txt.utf8

    如果eclipse打开后仍是乱码,则需进入preferrence,修改默认编码格式为utf-8

    解决方法2:如果需要批量的将gbk文件转成utf8文件,则需要编写如下java程序:

    package classifier;

    import java.io.IOException;

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.mahout.classifier.bayes.BayesParameters;


    public class ClassifierDriver {

    public static void main(String[] args) throws Exception {

    // set bayes parameter
    BayesParameters params = new BayesParameters();
    params.setBasePath(args[2]);
    params.set("classifierType", args[3]);
    params.set("alpha_i", "1.0");
    params.set("defaultCat", "unknown");
    params.setGramSize(1);

    // set configuration
    Configuration conf = new Configuration();
    conf.set("bayes.parameters", params.toString());

    // create job
    Job job = new Job(conf,"Classifier");
    job.setJarByClass(ClassifierDriver.class);

    // specify input format
    job.setInputFormatClass(KeyValueTextInputFormat.class);

    // specify mapper & reducer
    job.setMapperClass(classifier.ClassifierMapper.class);
    job.setReducerClass(ClassifierReducer.class);

    // specify output types of mapper and reducer
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    // specify input and output DIRECTORIES
    Path inPath = new Path(args[0]);
    Path outPath = new Path(args[1]);
    FileInputFormat.addInputPath(job, inPath);
    FileOutputFormat.setOutputPath(job,outPath); // output path

    // delete output directory
    try{
    FileSystem hdfs = outPath.getFileSystem(conf);
    if(hdfs.exists(outPath))
    hdfs.delete(outPath);
    hdfs.close();
    } catch (Exception e){
    e.printStackTrace();
    return ;
    }

    // run the job
    System.exit(job.waitForCompletion(true) ? 0 : 1);

    }

    }

  • 相关阅读:
    连接MySQL错误:Can't connect to MySQL server (10060)
    PHP性状的使用
    PHP interface(接口)的示例代码
    jquery 设置页面元素不可点击、不可编辑、只读(备忘)
    ace_admin_1.3.1 wysiwyg 工具条下拉出不来
    类函数和对象函数 PHP
    PHP 回调、匿名函数和闭包
    simplexml_load_file 抑制警告的直接输出
    jQuery判断当前元素是第几个元素
    hihocoder #1445 : 后缀自动机二·重复旋律5
  • 原文地址:https://www.cnblogs.com/mlj5288/p/4439159.html
Copyright © 2020-2023  润新知