• MapReduce_partition


      1 package MapReduce;
      2 
      3 import java.net.URI;
      4 
      5 import org.apache.hadoop.conf.Configuration;
      6 import org.apache.hadoop.fs.FileSystem;
      7 import org.apache.hadoop.fs.Path;  
      8 import org.apache.hadoop.io.LongWritable;  
      9 import org.apache.hadoop.io.NullWritable;  
     10 import org.apache.hadoop.io.Text;  
     11 import org.apache.hadoop.mapreduce.Job;  
     12 import org.apache.hadoop.mapreduce.Mapper;  
     13 import org.apache.hadoop.mapreduce.Partitioner;  
     14 import org.apache.hadoop.mapreduce.Reducer;  
     15 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
     16 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  
     17 import org.apache.hadoop.util.GenericOptionsParser;
     18  
     19   
     20 public class MyPartitioner {  
     21     private static final String INPUT_PATH = "hdfs://h201:9000/user/hadoop/input_par";
     22     private static final String OUTPUT_PATH = "hdfs://h201:9000/user/hadoop/output";
     23       
     24     public static class MyPartitionerMap extends Mapper<LongWritable, Text, Text, Text> {  
     25           
     26         protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)  
     27         throws java.io.IOException, InterruptedException {  
     28               
     29             String arr_value[] = value.toString().split("	");  
     30             if (arr_value.length > 3) {  
     31                 context.write(new Text("long"), value);  
     32             } else if (arr_value.length < 3) {  
     33                 context.write(new Text("short"), value);  
     34             } else {  
     35                 context.write(new Text("right"), value);  
     36             }  
     37         }  
     38     }  
     39   
     40     /** 
     41     * partitioner的输入就是map的输出 
     42     *  
     43     * @author Administrator 
     44     */  
     45     public static class MyPartitionerPar extends Partitioner<Text, Text> {  
     46       
     47         @Override  
     48         public int getPartition(Text key, Text value, int numPartitions) {  
     49             int result = 0;  
     50             /*********************************************************************/  
     51             /***key.toString().equals("long")  must use toString()!!!!  ***/  
     52             /***开始的时候我没有用 ,导致都在一个区里,结果也在一个reduce输出文件中。  ***/  
     53             /********************************************************************/  
     54             if (key.toString().equals("long")) {  
     55                 result = 0 % numPartitions;  
     56             } else if (key.toString().equals("short")) {  
     57                 result = 1 % numPartitions;  
     58             } else if (key.toString().equals("right")) {  
     59                 result = 2 % numPartitions;  
     60             }  
     61             return result;  
     62         }  
     63     }  
     64   
     65     public static class MyPartitionerReduce extends Reducer<Text, Text, Text, Text> {  
     66         protected void reduce(Text key, java.lang.Iterable<Text> value, Context context) throws java.io.IOException,  
     67         InterruptedException {  
     68             for (Text val : value) {  
     69                 context.write(key, val);  
     70                 //context.write(key, val);  
     71             }  
     72         }  
     73     }  
     74   
     75     public static void main(String[] args) throws Exception {  
     76         Configuration conf = new Configuration();
     77         /*
     78         String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();  
     79         if (otherArgs.length != 2) {  
     80             System.err.println("Usage: MyPartitioner <in> <out>");  
     81             System.exit(2);  
     82         }
     83         */
     84         conf.set("mapred.jar","mp1.jar");
     85         final FileSystem fileSystem = FileSystem.get(new URI(OUTPUT_PATH), conf);
     86         fileSystem.delete(new Path(OUTPUT_PATH), true);
     87         Job job = new Job(conf, "MyPartitioner");  
     88         job.setNumReduceTasks(3);  
     89           
     90         job.setJarByClass(MyPartitioner.class);  
     91           
     92         job.setMapperClass(MyPartitionerMap.class);  
     93         job.setCombinerClass(MyPartitionerReduce.class);
     94         
     95         job.setMapOutputKeyClass(Text.class);  
     96         job.setMapOutputValueClass(Text.class);  
     97           
     98         job.setPartitionerClass(MyPartitionerPar.class);  
     99         job.setReducerClass(MyPartitionerReduce.class);  
    100           
    101         job.setOutputKeyClass(Text.class);  
    102         job.setOutputValueClass(Text.class);  
    103           
    104         FileInputFormat.setInputPaths(job, INPUT_PATH);  
    105         FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));  
    106         System.exit(job.waitForCompletion(true) ? 0 : 1);  
    107     }  
    108 }  

    vim partititon

    aa      1       2
    bb      2       22
    cc      11
    dd      1
    ee      99      99      999
    ff      12      23      123

    [hadoop@h201 ~]$ hadoop fs -cat /user/hadoop/output/part-r-00001
    18/06/10 17:55:02 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    short   dd      1
    short   cc      11
    [hadoop@h201 ~]$ hadoop fs -cat /user/hadoop/output/part-r-00000
    18/06/10 17:55:16 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    long    ff      12      23      123
    long    ee      99      99      999
    [hadoop@h201 ~]$ hadoop fs -cat /user/hadoop/output/part-r-00002
    18/06/10 18:01:37 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    right   bb      2       22
    right   aa      1       2

  • 相关阅读:
    2016"百度之星"
    ZOJ 3703 Happy Programming Contest(01背包的灵活运用)
    LA 3942 Remember the Word (Trie树)
    ZOJ 3700 Ever Dream(Vector)
    Hdoj 1686 Oulipo
    2017总结,2018计划
    Ubuntu16.04 + caffe-ssd + [CPU_ONLY] + KITTI 训练总结
    【转载】The Elements of Programming Style之代码风格金科玉律
    qt中setStyleSheet导致的内存泄漏
    【转】用枚举定义有意义的数组下标
  • 原文地址:https://www.cnblogs.com/jieran/p/9163876.html
Copyright © 2020-2023  润新知