1 import java.io.IOException; 2 import java.net.URI; 3 4 import org.apache.hadoop.conf.Configuration; 5 import org.apache.hadoop.conf.Configured; 6 import org.apache.hadoop.fs.FileSystem; 7 import org.apache.hadoop.fs.Path; 8 import org.apache.hadoop.io.LongWritable; 9 import org.apache.hadoop.io.IntWritable; 10 import org.apache.hadoop.io.Text; 11 import org.apache.hadoop.mapreduce.Job; 12 import org.apache.hadoop.mapreduce.Mapper; 13 import org.apache.hadoop.mapreduce.Reducer; 14 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 15 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 16 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 17 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 18 import org.apache.hadoop.util.Tool; 19 import org.apache.hadoop.util.ToolRunner; 20 21 //求最大值 22 public class MaxValue extends Configured implements Tool { 23 private static final String INPUT_PATH = "hdfs://h201:9000/user/hadoop/input_maxvalue"; 24 private static final String OUTPUT_PATH = "hdfs://h201:9000/user/hadoop/output"; 25 public static class MapClass extends Mapper<LongWritable, Text, IntWritable, IntWritable> { 26 private int maxNum = 0; 27 public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { 28 String[] str = value.toString().split(" "); 29 try {// 对于非数字字符我们忽略掉 30 for(int i=0;i<str.length;i++){ 31 int temp = Integer.parseInt(str[i]); 32 if (temp > maxNum) { 33 maxNum = temp; 34 } 35 } 36 } catch (NumberFormatException e) { 37 } 38 } 39 40 @Override 41 protected void cleanup(Context context) throws IOException, InterruptedException { 42 context.write(new IntWritable(maxNum), new IntWritable(maxNum)); 43 } 44 } 45 46 public static class Reduce extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> { 47 private int maxNum = 0; 48 public void reduce(IntWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { 49 for (IntWritable val : values) { 50 if ( val.get() > maxNum) { 51 maxNum = val.get(); 52 } 53 } 54 } 55 56 @Override 57 protected void cleanup(Context context) throws IOException, InterruptedException { 58 context.write(new IntWritable(maxNum), new IntWritable(maxNum)); 59 } 60 } 61 62 public int run(String[] args) throws Exception { 63 Configuration conf = getConf(); 64 conf.set("mapred.jar","mv.jar"); 65 final FileSystem fileSystem = FileSystem.get(new URI(OUTPUT_PATH), conf);//读路径信息 66 fileSystem.delete(new Path(OUTPUT_PATH), true);//删除路径信息 输出路径不能存在 67 Job job = new Job(conf, "MaxNum"); 68 job.setJarByClass(MaxValue.class); 69 FileInputFormat.setInputPaths(job, INPUT_PATH); 70 FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH)); 71 job.setMapperClass(MapClass.class); 72 job.setCombinerClass(Reduce.class); 73 job.setReducerClass(Reduce.class); 74 job.setInputFormatClass(TextInputFormat.class); 75 job.setOutputFormatClass(TextOutputFormat.class); 76 job.setOutputKeyClass(IntWritable.class); 77 job.setOutputValueClass(IntWritable.class); 78 System.exit(job.waitForCompletion(true) ? 0 : 1); 79 return 0; 80 } 81 82 public static void main(String[] args) throws Exception { 83 long start = System.nanoTime(); 84 int res = ToolRunner.run(new Configuration(), new MaxValue(), args); 85 System.out.println(System.nanoTime()-start); 86 System.exit(res); 87 } 88 }
*************
setup(),此方法被MapReduce框架仅且执行一次,在执行Map任务前,进行相关变量或者资源的集中初始化工作。若是将资源初始化工作放在方法map()中,导致Mapper任务在解析每一行输入时都会进行资源初始化工作,导致重复,程序运行效率不高!
cleanup(),此方法被MapReduce框架仅且执行一次,在执行完毕Map任务后,进行相关变量或资源的释放工作。若是将释放资源工作放入方法map()中,也会导致Mapper任务在解析、处理每一行文本后释放资源,而且在下一行文本解析前还要重复初始化,导致反复重复,程序运行效率不高!
*************