• 使用eclipse的快捷键自动生成的map或者reduce函数的参数中:“org.apache.hadoop.mapreduce.Reducer.Context context”


    今天在测试mapreduce的程序时,就是简单的去重,对照课本上的程序和自己的程序,唯一不同的就是“org.apache.hadoop.mapreduce.Reducer.Context context”,我写的程序如下:

    package com.pro.bq;
    
    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.util.GenericOptionsParser;
    
    public class Dedup {
        public static class Map extends Mapper<Object,Text, Text, Text>{
            private Text line=new Text();
      
        @Override
        protected void map(Object key, Text value,
                Context context)
                throws IOException, InterruptedException {
            // TODO Auto-generated method stub
              line=value;
              context.write(line, new Text(""));
            
        }
            
        }
        public static class Reduce extends Reducer<Text, Text, Text, Text>
        {
    
            @SuppressWarnings("unchecked")
            protected void reduce(Text key, Iterable<Text> value,
                    org.apache.hadoop.mapreduce.Reducer.Context context)
                    throws IOException, InterruptedException {
                // TODO Auto-generated method stub
                context.write(key, new Text(""));
    
            }
        }
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            Configuration conf=new Configuration();
    //        conf.set("mapred.job.tracker", "localhost:9001");
            String hdfs=new String("hdfs://localhost:9000/user/haduser/");
            String[] ioStr=new String[]{hdfs+"input",hdfs+"output/outDedup"};
            
            //自己在代码中定义路径,否则的话就要就要在程序的输入参数中设置了
            String[] otherStr=new GenericOptionsParser(conf, ioStr).getRemainingArgs();
    
            
            if(otherStr.length!=2)
            {
                System.err.println("Usage: Data deduplication <in> <out>");
                System.exit(2);
            }
            
            Job job=new Job(conf, "Data deduplication");
            job.setJarByClass(Dedup.class);
            
            job.setMapperClass(Map.class);
            job.setCombinerClass(Reduce.class);
            job.setReducerClass(Reduce.class);
            
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            
            FileInputFormat.addInputPath(job, new Path(otherStr[0]));
            FileOutputFormat.setOutputPath(job, new Path(otherStr[1]));
            System.exit(job.waitForCompletion(true) ? 0:1);
            
            
        }
    
    }

    课本上给出的程序如下:

    package com.pro.bq;
    
    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.util.GenericOptionsParser;
    
    public class Dedup {
        public static class Map extends Mapper<Object,Text, Text, Text>{
            private Text line=new Text();
    
        protected void map(Object key, Text value,
                Context context)
                throws IOException, InterruptedException {
            // TODO Auto-generated method stub
              line=value;
              context.write(line, new Text(""));
            
        }
            
        }
        public static class Reduce extends Reducer<Text, Text, Text, Text>
        {
    
            protected void reduce(Text key, Iterable<Text> value,
                    Context context)
                    throws IOException, InterruptedException {
                // TODO Auto-generated method stub
                context.write(key, new Text(""));
    
            }
        }
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            Configuration conf=new Configuration();
    //        conf.set("mapred.job.tracker", "localhost:9001");
            String hdfs=new String("hdfs://localhost:9000/user/haduser/");
            String[] ioStr=new String[]{hdfs+"input",hdfs+"output/outDedup"};
            
            //自己在代码中定义路径,否则的话就要就要在程序的输入参数中设置了
            String[] otherStr=new GenericOptionsParser(conf, ioStr).getRemainingArgs();
    
            
            if(otherStr.length!=2)
            {
                System.err.println("Usage: Data deduplication <in> <out>");
                System.exit(2);
            }
            
            Job job=new Job(conf, "Data deduplication");
            job.setJarByClass(Dedup.class);
            
            job.setMapperClass(Map.class);
            job.setCombinerClass(Reduce.class);
            job.setReducerClass(Reduce.class);
            
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            
            FileInputFormat.addInputPath(job, new Path(otherStr[0]));
            FileOutputFormat.setOutputPath(job, new Path(otherStr[1]));
            System.exit(job.waitForCompletion(true) ? 0:1);
            
            
        }
    
    }

    测试的文件file1.txt是:

    2012-3-1 a
    2012-3-2 b
    2012-3-3 c
    2012-3-4 d
    2012-3-5 a
    2012-3-6 b
    2012-3-7 c
    2012-3-3 c

    file2.txt:

    2012-3-1 b
    2012-3-2 a
    2012-3-3 b
    2012-3-4 d
    2012-3-5 a
    2012-3-6 c
    2012-3-7 d
    2012-3-3 c

    按照我写的运行的结果是:

    2012-3-1 a    
    2012-3-1 b    
    2012-3-2 a    
    2012-3-2 b    
    2012-3-3 b    
    2012-3-3 c    
    2012-3-3 c    
    2012-3-3 c    
    2012-3-4 d    
    2012-3-4 d    
    2012-3-5 a    
    2012-3-5 a    
    2012-3-6 b    
    2012-3-6 c    
    2012-3-7 c    
    2012-3-7 d    

    想要的结果是:

    2012-3-1 a
    2012-3-1 b
    2012-3-2 a
    2012-3-2 b
    2012-3-3 b
    2012-3-3 c
    2012-3-4 d
    2012-3-5 a
    2012-3-6 b
    2012-3-6 c
    2012-3-7 c
    2012-3-7 d

    不知道为什么?暂且记下,有懂的希望不吝赐教,我是菜鸟...

  • 相关阅读:
    ASP.NET Core MVC Razor小记
    ASP.NET Core引入第三方日志框架及简单实现日志策略配置
    test
    记录一个Windows explorer进程卡死的处理,有关于“MicrosoftWindows.Client.CBS_cw5n1h2txyewy”的,已解决!
    Windows版本sed工具
    相同xml批量创建替换脚本.sh
    springboot1.x apollo 更改属性值不起作用。 ConfigurationProperties
    jmeter固定定时器
    jmeter函数助手参数化
    jmeter循环控制器
  • 原文地址:https://www.cnblogs.com/wzyj/p/3554577.html
Copyright © 2020-2023  润新知