• MapReduce_MaxValue


     1 import java.io.IOException;
     2 import java.net.URI;
     3 
     4 import org.apache.hadoop.conf.Configuration; 
     5 import org.apache.hadoop.conf.Configured;
     6 import org.apache.hadoop.fs.FileSystem;
     7 import org.apache.hadoop.fs.Path; 
     8 import org.apache.hadoop.io.LongWritable; 
     9 import org.apache.hadoop.io.IntWritable; 
    10 import org.apache.hadoop.io.Text; 
    11 import org.apache.hadoop.mapreduce.Job; 
    12 import org.apache.hadoop.mapreduce.Mapper; 
    13 import org.apache.hadoop.mapreduce.Reducer; 
    14 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
    15 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 
    16 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
    17 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 
    18 import org.apache.hadoop.util.Tool; 
    19 import org.apache.hadoop.util.ToolRunner; 
    20 
    21 //求最大值 
    22 public class MaxValue extends Configured implements Tool { 
    23     private static final String INPUT_PATH = "hdfs://h201:9000/user/hadoop/input_maxvalue";
    24     private static final String OUTPUT_PATH = "hdfs://h201:9000/user/hadoop/output";
    25     public static class MapClass extends Mapper<LongWritable, Text, IntWritable, IntWritable> { 
    26         private int maxNum = 0; 
    27         public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { 
    28             String[] str = value.toString().split(" "); 
    29             try {// 对于非数字字符我们忽略掉
    30                 for(int i=0;i<str.length;i++){
    31                     int temp = Integer.parseInt(str[i]); 
    32                     if (temp > maxNum) { 
    33                         maxNum = temp; 
    34                     }
    35                 }
    36             } catch (NumberFormatException e) { 
    37             } 
    38         } 
    39 
    40         @Override 
    41     protected void cleanup(Context context) throws IOException, InterruptedException { 
    42             context.write(new IntWritable(maxNum), new IntWritable(maxNum)); 
    43         } 
    44     } 
    45 
    46     public static class Reduce extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> { 
    47         private int maxNum = 0; 
    48         public void reduce(IntWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { 
    49             for (IntWritable val : values) { 
    50                 if ( val.get() > maxNum) { 
    51                     maxNum = val.get(); 
    52                 } 
    53             } 
    54         } 
    55 
    56         @Override 
    57         protected void cleanup(Context context) throws IOException, InterruptedException { 
    58             context.write(new IntWritable(maxNum), new IntWritable(maxNum)); 
    59         } 
    60     } 
    61 
    62     public int run(String[] args) throws Exception { 
    63         Configuration conf = getConf(); 
    64         conf.set("mapred.jar","mv.jar");
    65         final FileSystem fileSystem = FileSystem.get(new URI(OUTPUT_PATH), conf);//读路径信息
    66         fileSystem.delete(new Path(OUTPUT_PATH), true);//删除路径信息 输出路径不能存在
    67         Job job = new Job(conf, "MaxNum"); 
    68         job.setJarByClass(MaxValue.class); 
    69         FileInputFormat.setInputPaths(job, INPUT_PATH); 
    70         FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH)); 
    71         job.setMapperClass(MapClass.class); 
    72         job.setCombinerClass(Reduce.class); 
    73         job.setReducerClass(Reduce.class); 
    74         job.setInputFormatClass(TextInputFormat.class); 
    75         job.setOutputFormatClass(TextOutputFormat.class); 
    76         job.setOutputKeyClass(IntWritable.class); 
    77         job.setOutputValueClass(IntWritable.class); 
    78         System.exit(job.waitForCompletion(true) ? 0 : 1); 
    79         return 0; 
    80     } 
    81 
    82     public static void main(String[] args) throws Exception { 
    83         long start = System.nanoTime(); 
    84         int res = ToolRunner.run(new Configuration(), new MaxValue(), args); 
    85         System.out.println(System.nanoTime()-start); 
    86         System.exit(res); 
    87     } 
    88 }

    *************
    setup(),此方法被MapReduce框架仅且执行一次,在执行Map任务前,进行相关变量或者资源的集中初始化工作。若是将资源初始化工作放在方法map()中,导致Mapper任务在解析每一行输入时都会进行资源初始化工作,导致重复,程序运行效率不高!

    cleanup(),此方法被MapReduce框架仅且执行一次,在执行完毕Map任务后,进行相关变量或资源的释放工作。若是将释放资源工作放入方法map()中,也会导致Mapper任务在解析、处理每一行文本后释放资源,而且在下一行文本解析前还要重复初始化,导致反复重复,程序运行效率不高!
    *************

  • 相关阅读:
    python爬虫之MongoDB测试环境安装
    python爬虫之pandas
    python爬虫之正则表达式
    SQL实现递归及存储过程中In()参数传递解决方案[转]
    SET XACT_ABORT 的用法[转]
    SqlServer排序(null值,和非空值排列顺序)
    poi操作excel设置数据有效性
    Oracle:ORA-01791: 不是 SELECTed 表达式
    java poi 合并单元格后边框问题
    java压缩文件出现中文乱码问题
  • 原文地址:https://www.cnblogs.com/jieran/p/9163382.html
Copyright © 2020-2023  润新知