• Hadoop1.x代码求海量数据最大值


     1 /**
     2   * 数据格式文件为:
     3   * 4
     4   * 7
     5   * 5
     6   * 即每个数字占一行
     7   */
     8 import java.io.IOException;
     9 import java.net.URI;
    10 
    11 import org.apache.hadoop.conf.Configuration;
    12 import org.apache.hadoop.fs.FileSystem;
    13 import org.apache.hadoop.fs.Path;
    14 import org.apache.hadoop.io.LongWritable;
    15 import org.apache.hadoop.io.NullWritable;
    16 import org.apache.hadoop.io.Text;
    17 import org.apache.hadoop.mapreduce.Job;
    18 import org.apache.hadoop.mapreduce.Mapper;
    19 import org.apache.hadoop.mapreduce.Reducer;
    20 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    21 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    22 
    23 
    24 public class TopKApp {
    25     
    26     static final String INPUT = "hdfs://192.168.56.100:9000/input";
    27     static final String OUT = "hdfs://192.168.56.100:9000/out";
    28     
    29     static final Path INPUT_PATH = new Path(INPUT);
    30     static final Path OUT_PATH = new Path(OUT);
    31     
    32     public static void main(String[] args) throws Exception{
    33 
    34         Configuration conf = new Configuration();
    35         FileSystem fileSystem = FileSystem.get(new URI(OUT),conf);
    36         if(fileSystem.exists(OUT_PATH)){
    37             fileSystem.delete(OUT_PATH, true);
    38         }
    39         /**
    40          * Map Reduce 天龙八步
    41          */
    42         Job job = new Job(conf,TopKApp.class.getSimpleName());
    43         FileInputFormat.setInputPaths(job, INPUT_PATH);
    44         job.setMapperClass(MyMapper.class);
    45         job.setReducerClass(MyReducer.class);
    46         job.setOutputKeyClass(LongWritable.class);
    47         job.setOutputValueClass(NullWritable.class);
    48         FileOutputFormat.setOutputPath(job, OUT_PATH);
    49         job.waitForCompletion(true);
    50     }
    51     
    52     static class MyMapper extends Mapper<LongWritable,Text,LongWritable,NullWritable>{
    53         @Override
    54         protected void map(
    55                 LongWritable key,
    56                 Text value,
    57                 Mapper<LongWritable, Text, LongWritable, NullWritable>.Context context)
    58                 throws IOException, InterruptedException {
    59             context.write(new LongWritable(Long.parseLong(value.toString())), NullWritable.get());
    60         }
    61             
    62     }
    63     
    64     
    65     static class MyReducer extends Reducer<LongWritable,NullWritable,LongWritable,NullWritable>{
    66         long max = Long.MIN_VALUE;
    67         @Override
    68         protected void reduce(
    69                 LongWritable k2,
    70                 Iterable<NullWritable> v2s,
    71                 Reducer<LongWritable, NullWritable, LongWritable, NullWritable>.Context context)
    72                 throws IOException, InterruptedException {
    73             
    74             long num = k2.get();
    75             if(num > max){
    76                 max = num;
    77             }
    78         }
    79         
    80         @Override
    81         protected void cleanup(
    82                 Reducer<LongWritable, NullWritable, LongWritable, NullWritable>.Context context)
    83                 throws IOException, InterruptedException {
    84             context.write(new LongWritable(max), NullWritable.get());
    85         }
    86     }
    87     
  • 相关阅读:
    scala :: , +:, :+ , ::: , ++ 的区别
    Scala 函数式编程思想
    Scala 关键字
    HDFS 的内存存储是什么?
    LRU
    高并发情况限流
    Java中List集合去除重复数据的方法
    MySQL支持的跨库事务
    死磕ConcurrentHashMap 1.8源码解析
    一致性Hash算法
  • 原文地址:https://www.cnblogs.com/litaiqing/p/4538508.html
Copyright © 2020-2023  润新知