• MapReduce之WordCount


    用户统计文件中的单词出现的个数

    注意各个文件的导包,job的封装步骤

    WordCountMapper.java

    package top.wintp.mapreduce.wordcount;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    
    /**
     * @description: description:
     * <p>
     * @author: upuptop
     * <p>
     * @qq: 337081267
     * <p>
     * @CSDN: http://blog.csdn.net/pyfysf
     * <p>
     * @cnblogs: http://www.cnblogs.com/upuptop
     * <p>
     * @blog: http://wintp.top
     * <p>
     * @email: pyfysf@163.com
     * <p>
     * @time: 2019/05/2019/5/21
     * <p>
     */
    public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
        private Text K = new Text();
        private IntWritable V = new IntWritable(1);
    
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    
            String line = value.toString();
            String[] words = line.split(" ");
    
            for (String word : words) {
                K.set(word);
                context.write(K, V);
            }
    
    
        }
    }
    
    

    WordCountReduce

    package top.wintp.mapreduce.wordcount;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    
    /**
     * @description: description:
     * <p>
     * @author: upuptop
     * <p>
     * @qq: 337081267
     * <p>
     * @CSDN: http://blog.csdn.net/pyfysf
     * <p>
     * @cnblogs: http://www.cnblogs.com/upuptop
     * <p>
     * @blog: http://wintp.top
     * <p>
     * @email: pyfysf@163.com
     * <p>
     * @time: 2019/05/2019/5/21
     * <p>
     */
    public class WordCountReduce extends Reducer<Text, IntWritable, Text, IntWritable> {
        private IntWritable V = new IntWritable();
    
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
    
            int sum = 0;
    
            for (IntWritable value : values) {
                sum += value.get();
            }
    
            V.set(sum);
            context.write(key, V);
        }
    }
    
    

    WordCountRunner

    package top.wintp.mapreduce.wordcount;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.util.Tool;
    import org.apache.hadoop.util.ToolRunner;
    
    /**
     * @description: description:
     * <p>
     * @author: upuptop
     * <p>
     * @qq: 337081267
     * <p>
     * @CSDN: http://blog.csdn.net/pyfysf
     * <p>
     * @cnblogs: http://www.cnblogs.com/upuptop
     * <p>
     * @blog: http://wintp.top
     * <p>
     * @email: pyfysf@163.com
     * <p>
     * @time: 2019/05/2019/5/21
     * <p>
     */
    public class WordCountRunner implements Tool {
        private Configuration conf;
    
        public int run(String[] strings) throws Exception {
            //封装job
            Job job = Job.getInstance(this.conf);
            job.setJarByClass(WordCountRunner.class);
    
            job.setMapperClass(WordCountMapper.class);
            job.setReducerClass(WordCountReduce.class);
    
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
    
            FileInputFormat.setInputPaths(job, new Path("E:/input/wordcount/"));
            FileOutputFormat.setOutputPath(job, new Path("E:/output/wordcount/" + System.currentTimeMillis()));
    
            //提交任务
            int result = job.waitForCompletion(true) ? 0 : 1;
    
            return result;
        }
    
        public void setConf(Configuration configuration) {
            this.conf = configuration;
        }
    
        public Configuration getConf() {
            return this.conf;
        }
    
    
        public static void main(String[] args) throws Exception {
            int status = ToolRunner.run(new WordCountRunner(), args);
            System.exit(status);
        }
    }
    
    

    log4j.properties

    log4j.rootLogger=INFO, stdout  
    log4j.appender.stdout=org.apache.log4j.ConsoleAppender  
    log4j.appender.stdout.layout=org.apache.log4j.PatternLayout  
    log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n  
    log4j.appender.logfile=org.apache.log4j.FileAppender  
    log4j.appender.logfile.File=target/spring.log  
    log4j.appender.logfile.layout=org.apache.log4j.PatternLayout  
    log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n  
    
    
  • 相关阅读:
    window.open 打开全屏窗口
    H5实现全屏与F11全屏
    泛型 总结
    java 反射
    静态工厂模式
    设计模式---单例模式
    String、List、array相互转换
    将数组转换成list
    将对象转换成json字符串的几种方式
    在map中放入数据时,如果key相同,会替换掉之前的相同key的数据
  • 原文地址:https://www.cnblogs.com/shaofeer/p/11154313.html
Copyright © 2020-2023  润新知