• hadoop-mapreduce-(1)-统计单词数量


    编写map程序

    package com.cvicse.ump.hadoop.mapreduce.map;
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    public class WordCountMap extends Mapper<LongWritable, Text, Text, IntWritable> {
    
        @Override
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            
            String line = value.toString();
            String[] words = line.split(" ");
            for(String word:words){
                context.write(new Text(word), new IntWritable(1));
            }
            
        }
    
    }

    编写reduce程序

    package com.cvicse.ump.hadoop.mapreduce.reduce;
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    public class WordCountReduce extends
            Reducer<Text, IntWritable, Text, IntWritable> {
    
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context)
                throws IOException, InterruptedException {
            
            Integer count = 0;
            for(IntWritable value:values){
                count+=value.get();
            }
            
            context.write(key, new IntWritable(count));
            
        }
    
    }

    编写main函数

    package com.cvicse.ump.hadoop.mapreduce;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import com.cvicse.ump.hadoop.mapreduce.map.WordCountMap;
    import com.cvicse.ump.hadoop.mapreduce.reduce.WordCountReduce;
    
    public class WordCount {
        
        public static void main(String[] args) throws Exception {
            
            Configuration conf = new Configuration();
            
            Job job = Job.getInstance(conf,"wordCount");
            job.setJarByClass(WordCount.class);
            job.setMapperClass(WordCountMap.class);
            job.setReducerClass(WordCountReduce.class);
            
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            
            FileInputFormat.setInputPaths(job, new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1]));
            
            boolean bb = job.waitForCompletion(true);
            if(!bb){
                System.out.println("wrodcount task fail!");
            }else{
                System.out.println("wordcount task success!");
            }
            
        }
    
    }

    把wordcount.txt放在hdfs的/dyh/data/input/目录下

    执行:hadoop jar hdfs.jar com.cvicse.ump.hadoop.mapreduce.WordCount /dyh/data/input/wordcount.txt /dyh/data/output/1

  • 相关阅读:
    sqlserver2005存储汉字成问号解决办法:
    .net c# 日期格式和常用处理
    文件夹无法访问拒绝访问,无法删除文件的,快速有效解决方法
    打印出所有的 info.plist 中的 keys、values
    利用时间戳来准确计算某个时间点具现在的时间差
    项目中的技巧经验汇总
    "Base SDK Missing"问题的解决
    UIPopoverController的使用
    给ios自带控件添加圆角边的方法
    实现程序内截屏功能的代码
  • 原文地址:https://www.cnblogs.com/dyh004/p/7878406.html
Copyright © 2020-2023  润新知