• 简单 实现CombineFileInputFormat


    import java.io.DataOutput;
    import java.io.IOException;
     
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.conf.Configured;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.io.Writable;
    import org.apache.hadoop.mapreduce.InputSplit;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.RecordReader;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.Reducer.Context;
    import org.apache.hadoop.mapreduce.TaskAttemptContext;
    import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader;
    import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.FileSplit;
    import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
    import org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.util.ReflectionUtils;
    import org.apache.hadoop.util.Tool;
    import org.apache.hadoop.util.ToolRunner;
     
    public class TestCombine extends Configured implements Tool {
        private static class ProvinceMapper extends
                Mapper<Object, Text, Text, Text> {
            @Override
            protected void map(Object key, Text value, Context context)
                    throws IOException, InterruptedException {
                System.out.println("value : " + value + " Context " + context);
                context.write(value, value);
            }
        }
     
        private static class ProvinceReducer extends
                Reducer<Text, Text, Text, Text> {
            @Override
            protected void reduce(Text key, Iterable<Text> values, Context context)
                    throws IOException, InterruptedException {
                for (Text va : values) {
                    System.out.println("reduce " + key);
                    context.write(key, key);
                }
            }
        }
         
        public static class CombineSequenceFileInputFormat<K, V> extends CombineFileInputFormat<K, V> { 
            @SuppressWarnings({ "unchecked", "rawtypes" }) 
            @Override 
            public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException { 
                return new CombineFileRecordReader((CombineFileSplit)split, context, CombineLineRecordReader.class); 
            } 
        } 
         
        public static class CombineLineRecordReader<K, V> extends RecordReader<K, V> { 
            private CombineFileSplit split; 
            private TaskAttemptContext context; 
            private int index; 
            private RecordReader<K, V> rr; 
           
            @SuppressWarnings("unchecked") 
            public CombineLineRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException, InterruptedException { 
                this.index = index;
                this.split = (CombineFileSplit) split; 
                this.context = context; 
           
                this.rr = (RecordReader<K, V>) ReflectionUtils.newInstance(LineRecordReader.class, context.getConfiguration()); 
            } 
           
            @SuppressWarnings("unchecked") 
            @Override 
            public void initialize(InputSplit curSplit, TaskAttemptContext curContext) throws IOException, InterruptedException { 
                this.split = (CombineFileSplit) curSplit; 
                this.context = curContext; 
           
                if (null == rr) { 
                    rr = ReflectionUtils.newInstance(SequenceFileRecordReader.class, context.getConfiguration()); 
                } 
           
                FileSplit fileSplit = new FileSplit(this.split.getPath(index), 
                        this.split.getOffset(index), this.split.getLength(index), 
                        this.split.getLocations()); 
                   
                this.rr.initialize(fileSplit, this.context); 
            } 
           
            @Override 
            public float getProgress() throws IOException, InterruptedException { 
                return rr.getProgress(); 
            } 
           
            @Override 
            public void close() throws IOException { 
                if (null != rr) { 
                    rr.close(); 
                    rr = null; 
                } 
            } 
           
            @Override 
            public K getCurrentKey() 
            throws IOException, InterruptedException { 
                return rr.getCurrentKey(); 
            } 
           
            @Override 
            public V getCurrentValue() 
            throws IOException, InterruptedException { 
                return rr.getCurrentValue(); 
            } 
           
            @Override 
            public boolean nextKeyValue() throws IOException, InterruptedException { 
                return rr.nextKeyValue(); 
            } 
        } 
     
         
        public int run(String[] args) throws Exception {
            Configuration conf = new Configuration();
             
            Job job = new Job(conf);
            job.setJobName("TestCombine");
            job.setJarByClass(TestCombine.class);
     
            job.setMapperClass(ProvinceMapper.class);
            job.setReducerClass(ProvinceReducer.class);
             
            job.setInputFormatClass(CombineSequenceFileInputFormat.class);
             
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
             
            String inpath = "/home/hadoop/tmp/combine";
            String outpath = "/home/hadoop/tmp/combineout";
            Path p = new Path(outpath);
             
            FileSystem fs = FileSystem.get(conf);
            if (fs.exists(p)){
                fs.delete(p);
            }
            FileInputFormat.addInputPaths(job, inpath);
            FileOutputFormat.setOutputPath(job, p);
     
            return job.waitForCompletion(true) ? 0 : 1;
        }
     
        public static void main(String[] args) throws Exception {
            int ret = ToolRunner.run(new TestCombine(), args);
            System.exit(ret);
        }
    }
    View Code
  • 相关阅读:
    简练软考知识点整理-项目采购管理简介
    简练软考知识点整理-项目风险管理简介
    SQL(replace)替换字段中指定的字符
    sh 脚本名字和./脚本名字有什么区别
    linux下tar命令解压到指定的目录
    查看文件MD5值
    Topo check failed. Mapred tasks exceed 1000000000
    cron表达式详解
    Linux下#!/usr/bin/env bash和#!/usr/bin/bash、#!/bin/bash的比较
    linux中seq命令用法
  • 原文地址:https://www.cnblogs.com/luolizhi/p/4943814.html
Copyright © 2020-2023  润新知