• mapreduce学习笔记三:平均值


    求平均数是MapReduce比较常见的算法,求平均数的算法也比较简单,一种思路是Map端读取数据,在数据输入到Reduce之前先经过shuffle,将map函数输出的key值相同的所有的value值形成一个集合value-list,然后将输入到Reduce端,Reduce端汇总并且统计记录数,然后作商即可。

    package mapreduce;  
    import java.io.IOException;  
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;  
    import org.apache.hadoop.io.IntWritable;  
    import org.apache.hadoop.io.Text;  
    import org.apache.hadoop.mapreduce.Job;  
    import org.apache.hadoop.mapreduce.Mapper;  
    import org.apache.hadoop.mapreduce.Reducer;  
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
    import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;  
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  
    import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;  
    public class MyAverage{  
        public static class Map extends Mapper<Object , Text , Text , IntWritable>{  
            private static Text newKey=new Text();  
            public void map(Object key,Text value,Context context) throws IOException, InterruptedException{  
                String line=value.toString();  
                System.out.println(line);  
                String arr[]=line.split("   ");  
                newKey.set(arr[0]);
                System.out.println(arr[0]);
                System.out.println(arr[1]);
                int click=Integer.parseInt(arr[1]);  
                context.write(newKey, new IntWritable(click));  
            }  
        }  
        public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable>{  
            public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException{  
                int num=0;  
                int count=0;  
                for(IntWritable val:values){  
                    num+=val.get();  
                    count++;  
                }  
                int avg=num/count;  
                context.write(key,new IntWritable(avg));  
            }  
        }  
        @SuppressWarnings("deprecation")
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException{  
            Configuration conf=new Configuration(); 
            conf.set("dfs.client.use.datanode.hostname", "true");
            System.out.println("start");  
            Job job =new Job(conf,"MyAverage");  
            job.setJarByClass(MyAverage.class);  
            job.setMapperClass(Map.class);  
            job.setReducerClass(Reduce.class);  
            job.setOutputKeyClass(Text.class);  
            job.setOutputValueClass(IntWritable.class);  
            job.setInputFormatClass(TextInputFormat.class);  
            job.setOutputFormatClass(TextOutputFormat.class);  
            Path in=new Path("hdfs://*:9000/user/hadoop/input/c.txt"); 
            System.out.println("in执行完毕");
            Path out=new Path("hdfs://*:9000/user/hadoop/output");
            System.out.println("out执行完毕");
            Path path = new Path("hdfs://*:9000/user/hadoop/output");// 取第1个表示输出目录参数(第0个参数是输入目录)
            FileSystem fileSystem = path.getFileSystem(conf);// 根据path找到这个文件
            if (fileSystem.exists(path)) {
                fileSystem.delete(path, true);// true的意思是,就算output有东西,也一带删除
            } 
            FileInputFormat.addInputPath(job,in);  
            FileOutputFormat.setOutputPath(job,out);  
            System.exit(job.waitForCompletion(true) ? 0 : 1);  
    
        }  
    }  
    https://necydcy.me/
  • 相关阅读:
    Google 推出开源博客迁移工具
    Google 的盲人科学家 T. V. RAMAN
    Sun收购Qlayer以积极推动云计算业务
    比尔盖茨:云计算再造软件边界
    一些CodeGuru的COM教程 (英文)
    机器人:人工智能与心理学的较量
    100个最古老互联网域名 最久只有23年(附名单)
    spring2 hibernate3 中包冲突问题解决
    spring2.5+struts2+hibernate+mysql
    [转].NET破解体验 ildasm.exe的使用
  • 原文地址:https://www.cnblogs.com/miria-486/p/9982524.html
Copyright © 2020-2023  润新知