• hadoop-job(mapReducer计算单词出现的个数)


    1.============map===============

    package com.it18zhang.hadoop.mr;

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;

    import java.io.IOException;

    /**
    * Mapper
    */
    public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
    /**
    * key : 行首偏移量,字节数,意义不大。
    * value : 一行文本
    */
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    //
    String line = value.toString() ;
    String[] arr = line.split(" ");

    Text keyOut = new Text() ;
    IntWritable valueOut = new IntWritable(1) ;
    for(String word : arr){
    keyOut.set(word);
    context.write(keyOut,valueOut);
    }
    }
    }

    2.============refucer===============

    package com.it18zhang.hadoop.mr;

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;

    import java.io.IOException;

    /**
    * reducer
    */
    public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
    /**
    * key : word
    * values : 该key下聚合的value
    */
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
    int count = 0 ;
    for(IntWritable iw : values){
    count = count + iw.get() ;
    }
    context.write(key , new IntWritable(count));
    }
    }

    3.============统计===============

    package com.it18zhang.hadoop.mr;

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

    import java.io.IOException;
    public class App {
    public static void main(String[] args) throws Exception {
    if(args == null || args.length<2){
    throw new Exception("参数不足,需要2个参数");
    }
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    //递归删除输出目录
    fs.delete(new Path(args[1]),true);

    //创建一个作业
    Job job = Job.getInstance(conf);
    //调用job方法 名字随便期(word_count_add )
    job.setJobName("word_count_add");
    //获取类的路径
    job.setJarByClass(App.class);

    // //需要计算的文件路径
    // FileInputFormat.addInputPath(job,new Path("file:///Users/yangyanqing/godev/wc"));
    // //计算后文件输出
    // FileOutputFormat.setOutputPath(job,new Path("file:///Users/yangyanqing/godev/wc/out"));
    //需要计算的文件路径
    FileInputFormat.addInputPath(job,new Path(args[0]));
    //计算后文件输出
    FileOutputFormat.setOutputPath(job,new Path(args[1]));

    //设置mapper类和reducer类
    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReducer.class);

    //输出mapper类和reducer类的类型
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class );
    //设置readuce个数
    job.setNumReduceTasks(1);
    //开始作业
    job.waitForCompletion(true);
    }
    }

  • 相关阅读:
    图片的切换
    DOM查询
    表单
    《激素小史》读后感 读书笔记
    《比利时的哀愁》读后感 读书笔记
    《大宋之变》读后感 读书笔记
    《人体简史》读后感 读书笔记
    《全球房地产》读后感 读书笔记
    《失落的管理艺术》读后感 读书笔记
    《成为福克纳》读后感 读书笔记
  • 原文地址:https://www.cnblogs.com/nyfz/p/9041992.html
Copyright © 2020-2023  润新知