• Hadoop 学习笔记 (十一) MapReduce 求平均成绩


    china:
    张三 78
    李四 89
    王五 96
    赵六 67
    english
    张三 80
    李四 82
    王五    84
    赵六 86
    math
    张三 88
    李四 99
    王五 66
    赵六 77




    import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer;
    public class MyAverage { public static class FormatMapper extends Mapper<Object, Text, Text, IntWritable>{ private IntWritable val = new IntWritable(); public void map(Object key, Text value, Context context) throws IOException, InterruptedException{ String line[] = value.toString().split("\s"); val.set(Integer.parseInt(line[1])); context.write(new Text(line[0]), val); } } public static class AverageReducer extends Reducer<Text, IntWritable, Text, FloatWritable>{ public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException{ int count = 0; int sum = 0; for (IntWritable val : values) { sum += val.get(); count ++; } float ave = (float)sum / count; context.write(key, new FloatWritable(ave)); } } public static void main(String[] args) throws Exception { String dir_in = "hdfs://localhost:9000/in_average"; String dir_out = "hdfs://localhost:9000/out_average"; Path in = new Path(dir_in); Path out = new Path(dir_out); Configuration conf = new Configuration(); Job averageJob = new Job(conf, "my_average"); averageJob.setJarByClass(MyAverage.class); averageJob.setInputFormatClass(TextInputFormat.class); averageJob.setMapperClass(FormatMapper.class); averageJob.setCombinerClass(IntSumReducer.class); //countJob.setPartitionerClass(HashPartitioner.class); averageJob.setMapOutputKeyClass(Text.class); averageJob.setMapOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(averageJob, in); averageJob.setReducerClass(AverageReducer.class); //averageJob.setNumReduceTasks(1); averageJob.setOutputKeyClass(Text.class); averageJob.setOutputValueClass(FloatWritable.class); //countJob.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(averageJob, out); averageJob.waitForCompletion(true); } }

    张三    82.0
    李四    90.0
    王五    82.0
    赵六    76.666664
    
    
    
     
  • 相关阅读:
    java加载类的方法1.classloader 2.class.forName()
    servlet与线程与jdbc connection的关系
    static再次深入理解
    多线程读某个共享变量有时候也要给读方法加锁
    多线程读一个全局变量要不要加锁?还是说只是当修改全局变量的时候才要加锁?
    接口耗时打印并统计
    Java从设计模式[本场比赛状态转换武器]状态分析(State)模式
    Openstack中间DVR Part1 -- 东西走向的交通处理
    写酷“大神”的公开信
    从反思谈论阵列和指针的几个问题,腾讯的笔名
  • 原文地址:https://www.cnblogs.com/i80386/p/3608273.html
Copyright © 2020-2023  润新知