• 【Hadoop】MapperReduce WordCount 代码示例


    MAC Idea Maven

    工程 maven-archetype-profiles

    pom.xml

    <!--HDFS-->
          <dependency>
              <groupId>org.apache.hadoop</groupId>
              <artifactId>hadoop-common</artifactId>
              <version>2.5.1</version>
          </dependency>
          <dependency>
              <groupId>org.apache.hadoop</groupId>
              <artifactId>hadoop-hdfs</artifactId>
              <version>2.5.1</version>
          </dependency>
    
          <!--MapReduce-->
    
          <dependency>
              <groupId>org.apache.hadoop</groupId>
              <artifactId>hadoop-mapreduce-client-common</artifactId>
              <version>2.5.1</version>
          </dependency>
    
          <dependency>
              <groupId>org.apache.hadoop</groupId>
              <artifactId>hadoop-mapreduce-client-core</artifactId>
              <version>2.5.1</version>
          </dependency>
    
    package com.ndz.mapreducedemo;
    
    import java.io.IOException;
    import java.util.StringTokenizer;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class WordCount {
    
    
        public static class TokenizerMapper
                extends Mapper<Object, Text, Text, IntWritable> {
    
            private final static IntWritable one = new IntWritable(1);
            private Text word = new Text();
    
            public void map(Object key, Text value, Context context  ) throws IOException, InterruptedException {
                StringTokenizer itr = new StringTokenizer(value.toString());
                while (itr.hasMoreTokens()) {
                    word.set(itr.nextToken());
                    context.write(word, one);
                }
    
                /*
                 *eg: map output:
                 *        hello  1
                 *        word   1
                 *        hello  1
                 *        hadoop 1
                 */
            }
        }
    
        public static class IntSumReducer
                extends Reducer<Text, IntWritable, Text, IntWritable> {
            private IntWritable result = new IntWritable();
    
            public void reduce(Text key, Iterable<IntWritable> values,
                               Context context
            ) throws IOException, InterruptedException {
                int sum = 0;
                System.out.println("-----------------------------------------");
                System.out.println("key: "+key);
                for (IntWritable val : values) {
                    sum += val.get();
                    System.out.println("val: "+val);
                }
                result.set(sum);
                System.out.println("result: "+result.toString());
                context.write(key, result);
            }
        }
    
    
        public static void main(String[] args) throws Exception {
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", "hdfs://172.16.152.130:8020");
            Job job = Job.getInstance(conf, "word count");
            job.setJarByClass(WordCount.class);
            job.setMapperClass(TokenizerMapper.class);
            job.setCombinerClass(IntSumReducer.class);
            job.setReducerClass(IntSumReducer.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
           // hdfs dfs -chmod -R 777 /usr/ 必须要给权限, 否则将运行失败
            FileInputFormat.addInputPath(job, new Path("/usr/input/data/wc"));
            FileOutputFormat.setOutputPath(job, new Path("/usr/output/data/wc"));
            System.exit(job.waitForCompletion(true) ? 0 : 1);
        }
    
    
    }
    
    

    附录:

    hdfs dfs -chmod -R 777 /usr/ 
     hdfs dfs -cat /usr/output/data/wc/part-r-00000
    hdfs dfs -ls -R  /usr/
    hdfs dfs -rm -r  /usr/output
    
    “年轻时,我没受过多少系统教育,但什么书都读。读得最多的是诗,包括烂诗,我坚信烂诗早晚会让我邂逅好诗。” by. 马尔克斯
  • 相关阅读:
    python脚本 快速执行mapreduce程序
    ArrayList中contains()的使用方法
    利用jstl标签实现国际化
    device eth0 does not seem to be present, delaying initialization
    java中compareTo和compare方法之比较
    CentOS 7 NAT模式LVS搭建
    CentOS 7 DR模式LVS搭建
    CentOS 7 开机延迟解决办法
    CentOS 7 nginx+tomcat9 session处理方案之session复制
    Jumpserver(跳板机、堡垒机)启动jms Django连接mysql数据库报错
  • 原文地址:https://www.cnblogs.com/jzsg/p/12627698.html
Copyright © 2020-2023  润新知