mapreduce学习笔记三：平均值

求平均数是MapReduce比较常见的算法，求平均数的算法也比较简单，一种思路是Map端读取数据，在数据输入到Reduce之前先经过shuffle，将map函数输出的key值相同的所有的value值形成一个集合value-list，然后将输入到Reduce端，Reduce端汇总并且统计记录数，然后作商即可。

package mapreduce;  
import java.io.IOException;  
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;  
import org.apache.hadoop.io.IntWritable;  
import org.apache.hadoop.io.Text;  
import org.apache.hadoop.mapreduce.Job;  
import org.apache.hadoop.mapreduce.Mapper;  
import org.apache.hadoop.mapreduce.Reducer;  
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;  
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;  
public class MyAverage{  
    public static class Map extends Mapper<Object , Text , Text , IntWritable>{  
        private static Text newKey=new Text();  
        public void map(Object key,Text value,Context context) throws IOException, InterruptedException{  
            String line=value.toString();  
            System.out.println(line);  
            String arr[]=line.split("   ");  
            newKey.set(arr[0]);
            System.out.println(arr[0]);
            System.out.println(arr[1]);
            int click=Integer.parseInt(arr[1]);  
            context.write(newKey, new IntWritable(click));  
        }  
    }  
    public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable>{  
        public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException{  
            int num=0;  
            int count=0;  
            for(IntWritable val:values){  
                num+=val.get();  
                count++;  
            }  
            int avg=num/count;  
            context.write(key,new IntWritable(avg));  
        }  
    }  
    @SuppressWarnings("deprecation")
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException{  
        Configuration conf=new Configuration(); 
        conf.set("dfs.client.use.datanode.hostname", "true");
        System.out.println("start");  
        Job job =new Job(conf,"MyAverage");  
        job.setJarByClass(MyAverage.class);  
        job.setMapperClass(Map.class);  
        job.setReducerClass(Reduce.class);  
        job.setOutputKeyClass(Text.class);  
        job.setOutputValueClass(IntWritable.class);  
        job.setInputFormatClass(TextInputFormat.class);  
        job.setOutputFormatClass(TextOutputFormat.class);  
        Path in=new Path("hdfs://*:9000/user/hadoop/input/c.txt"); 
        System.out.println("in执行完毕");
        Path out=new Path("hdfs://*:9000/user/hadoop/output");
        System.out.println("out执行完毕");
        Path path = new Path("hdfs://*:9000/user/hadoop/output");// 取第1个表示输出目录参数（第0个参数是输入目录）
        FileSystem fileSystem = path.getFileSystem(conf);// 根据path找到这个文件
        if (fileSystem.exists(path)) {
            fileSystem.delete(path, true);// true的意思是，就算output有东西，也一带删除
        } 
        FileInputFormat.addInputPath(job,in);  
        FileOutputFormat.setOutputPath(job,out);  
        System.exit(job.waitForCompletion(true) ? 0 : 1);  

    }  
}

https://necydcy.me/

相关阅读:
UVA 11997 K Smallest Sums
POJ 1007 DNA Sorting
POJ 3669 Meteor Shower
POJ 2376 Cleaning Shifts
POJ 3050 Hopscotch
操作系统第6次实验报告：使用信号量解决进程互斥访问
 操作系统第5次实验报告：内存管理
 操作系统第4次实验报告：文件系统
 操作系统第3次实验报告：管道
 操作系统第2次实验报告：创建进程
原文地址：https://www.cnblogs.com/miria-486/p/9982524.html