• Hadoop基础(二十五):OutputFormat数据输出


    1 OutputFormat接口实现类

    2 自定义OutputFormat

    3 自定义OutputFormat案例实操

    1需求

    过滤输入的log日志,包含atguigu的网站输出到e:/atguigu.log,不包含atguigu的网站输出到e:/other.log。

    1)输入数据

    2)期望输出数据

     

    2.需求分析

     

    3.案例实操

    (1)编写FilterMapper

    package com.atguigu.mapreduce.outputformat;
    import java.io.IOException;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    public class FilterMapper extends Mapper<LongWritable, Text, Text, NullWritable>{
        
        @Override
        protected void map(LongWritable key, Text value, Context context)    throws IOException, InterruptedException {
    
            // 写出
            context.write(value, NullWritable.get());
        }
    }
    View Code

    (2)编写FilterReducer

    package com.atguigu.mapreduce.outputformat;
    import java.io.IOException;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    public class FilterReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
    
    Text k = new Text();
    
        @Override
        protected void reduce(Text key, Iterable<NullWritable> values, Context context)        throws IOException, InterruptedException {
    
           // 1 获取一行
            String line = key.toString();
    
           // 2 拼接
            line = line + "
    ";
    
           // 3 设置key
           k.set(line);
    
           // 4 输出
            context.write(k, NullWritable.get());
        }
    }

    (3)自定义一个OutputFormat类

    package com.atguigu.mapreduce.outputformat;
    import java.io.IOException;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.RecordWriter;
    import org.apache.hadoop.mapreduce.TaskAttemptContext;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class FilterOutputFormat extends FileOutputFormat<Text, NullWritable>{
    
        @Override
        public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext job)            throws IOException, InterruptedException {
    
            // 创建一个RecordWriter
            return new FilterRecordWriter(job);
        }
    }

    (4)编写RecordWriter

    package com.atguigu.mapreduce.outputformat;
    import java.io.IOException;
    import org.apache.hadoop.fs.FSDataOutputStream;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.RecordWriter;
    import org.apache.hadoop.mapreduce.TaskAttemptContext;
    
    public class FilterRecordWriter extends RecordWriter<Text, NullWritable> {
    
        FSDataOutputStream atguiguOut = null;
        FSDataOutputStream otherOut = null;
    
        public FilterRecordWriter(TaskAttemptContext job) {
    
            // 1 获取文件系统
            FileSystem fs;
    
            try {
                fs = FileSystem.get(job.getConfiguration());
    
                // 2 创建输出文件路径
                Path atguiguPath = new Path("e:/atguigu.log");
                Path otherPath = new Path("e:/other.log");
    
                // 3 创建输出流
                atguiguOut = fs.create(atguiguPath);
                otherOut = fs.create(otherPath);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    
        @Override
        public void write(Text key, NullWritable value) throws IOException, InterruptedException {
    
            // 判断是否包含“atguigu”输出到不同文件
            if (key.toString().contains("atguigu")) {
                atguiguOut.write(key.toString().getBytes());
            } else {
                otherOut.write(key.toString().getBytes());
            }
        }
    
        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
    
            // 关闭资源
    IOUtils.closeStream(atguiguOut);
            IOUtils.closeStream(otherOut);    }
    }
    View Code

    5)编写FilterDriver

    package com.atguigu.mapreduce.outputformat;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class FilterDriver {
    
        public static void main(String[] args) throws Exception {
    
    // 输入输出路径需要根据自己电脑上实际的输入输出路径设置
    args = new String[] { "e:/input/inputoutputformat", "e:/output2" };
    
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf);
    
            job.setJarByClass(FilterDriver.class);
            job.setMapperClass(FilterMapper.class);
            job.setReducerClass(FilterReducer.class);
    
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(NullWritable.class);
            
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(NullWritable.class);
    
            // 要将自定义的输出格式组件设置到job中
            job.setOutputFormatClass(FilterOutputFormat.class);
    
            FileInputFormat.setInputPaths(job, new Path(args[0]));
    
            // 虽然我们自定义了outputformat,但是因为我们的outputformat继承自fileoutputformat
            // 而fileoutputformat要输出一个_SUCCESS文件,所以,在这还得指定一个输出目录
            FileOutputFormat.setOutputPath(job, new Path(args[1]));
    
            boolean result = job.waitForCompletion(true);
            System.exit(result ? 0 : 1);
        }
    }
  • 相关阅读:
    CSS3---用户界面
    CSS3---媒体查询与响应式布局
    HDU 5285 wyh2000 and pupil
    POJ 2488 A Knight's Journey
    POJ 1067 取石子游戏
    POJ 2777 Count Color
    POJ 3259 Wormholes
    Project Euler 26 Reciprocal cycles
    POJ 2104 K-th Number
    POJ 1013 Counterfeit Dollar
  • 原文地址:https://www.cnblogs.com/qiu-hua/p/13341090.html
Copyright © 2020-2023  润新知