• 简单实现CombineFileInputFormat


    import java.io.DataOutput;
    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.conf.Configured;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.io.Writable;
    import org.apache.hadoop.mapreduce.InputSplit;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.RecordReader;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.Reducer.Context;
    import org.apache.hadoop.mapreduce.TaskAttemptContext;
    import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader;
    import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.FileSplit;
    import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
    import org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.util.ReflectionUtils;
    import org.apache.hadoop.util.Tool;
    import org.apache.hadoop.util.ToolRunner;
    
    public class TestCombine extends Configured implements Tool {
    	private static class ProvinceMapper extends
    			Mapper<Object, Text, Text, Text> {
    		@Override
    		protected void map(Object key, Text value, Context context)
    				throws IOException, InterruptedException {
    			System.out.println("value : " + value + " Context " + context);
    			context.write(value, value);
    		}
    	}
    
    	private static class ProvinceReducer extends
    			Reducer<Text, Text, Text, Text> {
    		@Override
    		protected void reduce(Text key, Iterable<Text> values, Context context)
    				throws IOException, InterruptedException {
    			for (Text va : values) {
    			    System.out.println("reduce " + key);
    				context.write(key, key);
    			}
    		}
    	}
    	
    	public static class CombineSequenceFileInputFormat<K, V> extends CombineFileInputFormat<K, V> {  
    	    @SuppressWarnings({ "unchecked", "rawtypes" })  
    	    @Override  
    	    public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException {  
    	        return new CombineFileRecordReader((CombineFileSplit)split, context, CombineLineRecordReader.class);  
    	    }  
    	}  
    	
    	public static class CombineLineRecordReader<K, V> extends RecordReader<K, V> {  
    	    private CombineFileSplit split;  
    	    private TaskAttemptContext context;  
    	    private int index;  
    	    private RecordReader<K, V> rr;  
    	  
    	    @SuppressWarnings("unchecked")  
    	    public CombineLineRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException, InterruptedException {  
    	        this.index = index;
    	        this.split = (CombineFileSplit) split;  
    	        this.context = context;  
    	  
    	        this.rr = (RecordReader<K, V>) ReflectionUtils.newInstance(LineRecordReader.class, context.getConfiguration());  
    	    }  
    	  
    	    @SuppressWarnings("unchecked")  
    	    @Override  
    	    public void initialize(InputSplit curSplit, TaskAttemptContext curContext) throws IOException, InterruptedException {  
    	        this.split = (CombineFileSplit) curSplit;  
    	        this.context = curContext;  
    	  
    	        if (null == rr) {  
    	            rr = ReflectionUtils.newInstance(SequenceFileRecordReader.class, context.getConfiguration());  
    	        }  
    	  
    	        FileSplit fileSplit = new FileSplit(this.split.getPath(index),  
    	                this.split.getOffset(index), this.split.getLength(index),  
    	                this.split.getLocations());  
    	          
    	        this.rr.initialize(fileSplit, this.context);  
    	    }  
    	  
    	    @Override  
    	    public float getProgress() throws IOException, InterruptedException {  
    	        return rr.getProgress();  
    	    }  
    	  
    	    @Override  
    	    public void close() throws IOException {  
    	        if (null != rr) {  
    	            rr.close();  
    	            rr = null;  
    	        }  
    	    }  
    	  
    	    @Override  
    	    public K getCurrentKey()  
    	    throws IOException, InterruptedException {  
    	        return rr.getCurrentKey();  
    	    }  
    	  
    	    @Override  
    	    public V getCurrentValue()  
    	    throws IOException, InterruptedException {  
    	        return rr.getCurrentValue();  
    	    }  
    	  
    	    @Override  
    	    public boolean nextKeyValue() throws IOException, InterruptedException {  
    	        return rr.nextKeyValue();  
    	    }  
    	}  
    
    	
    	public int run(String[] args) throws Exception {
    		Configuration conf = new Configuration();
    		
    		Job job = new Job(conf);
    		job.setJobName("TestCombine");
    		job.setJarByClass(TestCombine.class);
    
    		job.setMapperClass(ProvinceMapper.class);
    		job.setReducerClass(ProvinceReducer.class);
    		
    		job.setInputFormatClass(CombineSequenceFileInputFormat.class);
    		
    		job.setOutputKeyClass(Text.class);
    		job.setOutputValueClass(Text.class);
    		
    		String inpath = "/home/hadoop/tmp/combine";
    		String outpath = "/home/hadoop/tmp/combineout";
    		Path p = new Path(outpath);
    		
    		FileSystem fs = FileSystem.get(conf);
    		if (fs.exists(p)){
    			fs.delete(p);
    		}
    		FileInputFormat.addInputPaths(job, inpath);
    		FileOutputFormat.setOutputPath(job, p);
    
    		return job.waitForCompletion(true) ? 0 : 1;
    	} 
    
    	public static void main(String[] args) throws Exception {
    		int ret = ToolRunner.run(new TestCombine(), args);
    		System.exit(ret);
    	} 
    } 
    
  • 相关阅读:
    【Delphi】MD5算法(二):应用
    迅雷不能下载FlashPlayer,下载后自动删除,狂汗!!!
    工作笔记1
    GridControl 获取筛选后的数据{笔记}
    Invoke与BeginInvoke[转]
    这几项能力不知道要几年
    你永远不要去做的事1【译】
    window环境变量——心得【转】
    刚做好的网站客服系统,欢迎大家测试
    .Net 2.0里有一个有用的新功能:迭代器
  • 原文地址:https://www.cnblogs.com/chengxin1982/p/3961259.html
Copyright © 2020-2023  润新知