• OutputFormat---自定义输出方式


    简介

    可以自定义输出的格式和文件,例如包含某字段的输出到一个指定文件,不包含某字段的输出到另一个文件。

    案例

    数据

    www.nevesettle.com
    www.baidu.com
    www.qq.com
    www.mi.com
    www.jd.com
    www.std.com
    

    Mapper

    package com.neve.outputformat;
    
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    
    public class LogMapper extends Mapper<LongWritable, Text,Text, NullWritable> {
    
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    
            context.write(value,NullWritable.get());
        }
    }
    
    

    Reducer

    package com.neve.outputformat;
    
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    
    public class LogReducer extends Reducer<Text, NullWritable,Text,NullWritable> {
    
        @Override
        protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
    
            for (NullWritable value : values) {
                context.write(key,value);
            }
    
        }
    }
    
    

    Driver

    package com.neve.outputformat;
    
    import com.neve.phone.FlowBean;
    import com.neve.phone.FlowMapper;
    import com.neve.phone.FlowReducer;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    public class LogDriver {
    
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    
            //1.创建配置
            Configuration configuration = new Configuration();
            //2.创建job
            Job job = Job.getInstance(configuration);
            //3.关联驱动类
            job.setJarByClass(LogDriver.class);
            //4.关联mapper和reducer类
            job.setMapperClass(LogMapper.class);
            job.setReducerClass(LogReducer.class);
            //5.设置mapper的输出值和value
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(NullWritable.class);
            //6.设置最终的输出值和value
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(NullWritable.class);
            //7.设置输入输出路径
            FileInputFormat.setInputPaths(job,new Path("F:\Workplace\IDEA_Workplace\hadoopStudy2\outputformatinput"));
            FileOutputFormat.setOutputPath(job,new Path("F:\Workplace\IDEA_Workplace\hadoopStudy2\outputformatoutput"));
            //设置自定义的format类
            job.setOutputFormatClass(LogOutputFormat.class);
            //8.提交job
            job.waitForCompletion(true);
    
        }
    }
    
    

    LogOutputFormat

    package com.neve.outputformat;
    
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.RecordWriter;
    import org.apache.hadoop.mapreduce.TaskAttemptContext;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    public class LogOutputFormat extends FileOutputFormat<Text, NullWritable> {
    
        public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
            LogRecordWriter logw = new LogRecordWriter(job);
            return logw;
        }
    }
    	
    

    LogRecordWriter

    package com.neve.outputformat;
    
    import org.apache.hadoop.fs.FSDataOutputStream;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IOUtils;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.RecordWriter;
    import org.apache.hadoop.mapreduce.TaskAttemptContext;
    
    import java.io.IOException;
    
    public class LogRecordWriter extends RecordWriter<Text, NullWritable> {
    
        //定义输出路径
        private String nelog = "F:\nelog.log";
        private String otherlog = "F:\otherlog.log";
    
        private FileSystem fs ;
        private FSDataOutputStream neos;
        private FSDataOutputStream otheros;
    
    
        public LogRecordWriter(TaskAttemptContext job) throws IOException {
            //获取文件系统对象
            fs = FileSystem.get(job.getConfiguration());
            neos = fs.create(new Path(nelog));
            otheros = fs.create(new Path(otherlog));
        }
    
        public void write(Text key, NullWritable value) throws IOException, InterruptedException {
            String string = key.toString();
            if (string.contains("neve")){
                neos.writeBytes(string + "
    ");
            }else {
                otheros.writeBytes(string + "
    ");
            }
        }
    
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            IOUtils.closeStream(neos);
            IOUtils.closeStream(otheros);
        }
    }
    
    
  • 相关阅读:
    16进制字节和数字转换
    Vs2013 使用EF6 连接mysql数据库
    设计模式-单例模式(Singleton)
    WPF 10天修炼 第十天- WPF数据绑定
    WPF 10天修炼 第九天
    WPF 10天修炼 第八天
    WPF 10天修炼 第七天- WPF资源、样式、控件模板
    WPF 10天修炼 第六天- 系统属性和常用控件
    WPF 10天修炼 第五天- 内容控件
    WPF排版布局经验总结(干货)简短不疲倦
  • 原文地址:https://www.cnblogs.com/wuren-best/p/13797885.html
Copyright © 2020-2023  润新知