1 package com.euphe.filter; 2 3 import com.euphe.util.HUtils; 4 import com.euphe.util.Utils; 5 import org.apache.hadoop.conf.Configuration; 6 import org.apache.hadoop.conf.Configured; 7 import org.apache.hadoop.fs.FileSystem; 8 import org.apache.hadoop.fs.Path; 9 import org.apache.hadoop.io.Text; 10 import org.apache.hadoop.mapreduce.Job; 11 import org.apache.hadoop.mapreduce.Mapper; 12 import org.apache.hadoop.mapreduce.Reducer; 13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 15 import org.apache.hadoop.util.GenericOptionsParser; 16 import org.apache.hadoop.util.Tool; 17 18 import java.io.IOException; 19 20 public class ReductionJob extends Configured implements Tool { 21 public static class Map extends Mapper<Object, Text, Text, Text> { 22 private static Text text = new Text(); 23 24 public void map(Object key, Text value, Context context) throws IOException, InterruptedException { 25 text = value; 26 context.write(text, new Text()); 27 } 28 } 29 30 public static class Reduce extends Reducer<Text, Text, Text, Text> { 31 public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { 32 context.write(key, new Text()); 33 } 34 } 35 @Override 36 public int run(String[] args) throws Exception { 37 Configuration conf = HUtils.getConf(); 38 conf.set("mapreduce.job.jar", Utils.getRootPathBasedPath("WEB-INF/jars/redu.jar")); 39 String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();//解析命令行参数 40 if (otherArgs.length !=2) {//要求必须有输入和输出路径两个参数 41 System.err.println("Usage: com.euphe.filter.ReductionJob <in> <out>"); 42 System.exit(2); 43 } 44 Job job = Job.getInstance(conf,"Reduction input :"+otherArgs[0]+" to "+otherArgs[1]); 45 job.setJarByClass(ReductionJob.class); 46 job.setMapperClass(Map.class); 47 job.setReducerClass(Reduce.class); 48 job.setNumReduceTasks(1); 49 50 job.setOutputKeyClass(Text.class); 51 job.setOutputValueClass(Text.class); 52 53 FileInputFormat.addInputPath(job, new Path(otherArgs[0])); 54 FileOutputFormat.setOutputPath(job,new Path(otherArgs[1])); 55 FileSystem.get(conf).delete(new Path(otherArgs[1]), true);//调用任务前先删除输出目录 56 return job.waitForCompletion(true) ? 0 : 1; 57 } 58 }