• MRWordCount


    一、map
    package com.pdd.mapreduce;

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;

    import java.io.IOException;

    /**

    • @Description todo:
    • @Author hfx
    • @Date 2018/2/14 21:03
      */
      public class WCmap extends Mapper<LongWritable, Text, Text, IntWritable> {
      Text k = new Text();
      IntWritable v = new IntWritable(1);
      @Override
      protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
      // 1 获取一行
      String line = value.toString();
      // 2 切割
      String[] words = line.split(" ");
      // 3 输出
      for (String word : words) {
      k.set(word);
      context.write(k, v);
      }
      }
      }
      二、reduce
      package com.pdd.mapreduce;

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;

    import java.io.IOException;

    /**

    • @Description todo:

    • @Author hfx

    • @Date 2018/2/14 21:05
      */
      public class WcReduce extends Reducer<Text, IntWritable, Text, IntWritable> {

      int sum;
      IntWritable v = new IntWritable();

      @Override
      protected void reduce(Text key, Iterable values,Context context) throws IOException, InterruptedException {
      // 1 累加求和
      sum = 0;
      for (IntWritable count : values) {
      sum += count.get();
      }
      // 2 输出
      v.set(sum);
      context.write(key,v);
      }
      }
      三、job测试
      package com.pdd.mapreduce;

    import java.io.IOException;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    /**

    • @Description todo:
    • @Author hfx
    • @Date 2018/2/14 21:15
      */
      public class JobTest {
      public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
      // args=new String[]{"d:/input","d:/output"}; 本地测试
      //打包集群运行 hadoop jar jar包 包全路径名 输入参数 输出不指定
      // 1 获取配置信息以及封装任务
      Configuration configuration = new Configuration();
      Job job = Job.getInstance(configuration);
      // 2 设置jar加载路径
      job.setJarByClass(JobTest.class);
      // 3 设置map和reduce类
      job.setMapperClass(WCmap.class);
      job.setReducerClass(WcReduce.class);
      // 4 设置map输出
      job.setMapOutputKeyClass(Text.class);
      job.setMapOutputValueClass(IntWritable.class);
      // 5 设置最终输出kv类型
      job.setOutputKeyClass(Text.class);
      job.setOutputValueClass(IntWritable.class);
      // 6 设置输入和输出路径
      FileInputFormat.setInputPaths(job, new Path(args[0]));
      FileOutputFormat.setOutputPath(job, new Path(args[1]));
      // 7 提交
      boolean result = job.waitForCompletion(true);
      System.exit(result ? 0 : 1);
      }
      }
  • 相关阅读:
    网站访问量大 怎样优化mysql数据库
    BootStrap 模态框禁用空白处点击关闭
    常用SQL语句
    诅咒JavaScript之----ArcGIS JavaScript 点聚合 ClusterLayer
    模态框与 天地图地图控件冲突
    FXK Javascript
    从列表中或数组中随机抽取固定数量的元素组成新的数组或列表
    wangEditor
    手把手教你用vue-cli构建一个简单的路由应用
    解决eclipse端口被占用的问题
  • 原文地址:https://www.cnblogs.com/sgjk/p/javaWordCount.html
Copyright © 2020-2023  润新知