mapreduce实现学生平均成绩

思路：

　　首先从文本读入一行数据，按空格对字符串进行切割，切割后包含学生姓名和某一科的成绩，map输出key->学生姓名 value->某一个成绩

　　然后在reduce里面对成绩进行遍历求和，求平均数，然后输出key->学生姓名 value->平均成绩

　　源数据：

　　　chines.txt　

zhangsan    78
lisi    89
wangwu    96
zhaoliu    67

　　english.txt

zhangsan    80
lisi    82
wangwu    84
zhaoliu    86

　　math.txt

zhangsan    88
lisi    99
wangwu    66
zhaoliu    77

　　源代码：

package com.duking.hadoop;

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.util.GenericOptionsParser;

public class Score {

	public static class Map extends

	Mapper<Object, Text, Text, IntWritable> {

		// 实现map函数

		public void map(Object key, Text value, Context context)

		throws IOException, InterruptedException {

			// 将输入的纯文本文件的数据转化成String

			String line = value.toString();

			// 将输入的数据首先按行进行分割

			StringTokenizer tokenizerArticle = new StringTokenizer(line);  //以空格分隔字符串

			// 分别对每一行进行处理

			while (tokenizerArticle.hasMoreElements()) {

				String strName= tokenizerArticle.nextToken();  // 学生姓名部分
				
				String strScore = tokenizerArticle.nextToken();// 成绩部分
				
                Text name = new Text(strName);

                int scoreInt = Integer.parseInt(strScore);
				// 输出姓名和成绩

				context.write(name, new IntWritable(scoreInt));

			}

		}

	}

	public static class Reduce extends

	Reducer<Text, IntWritable, Text, IntWritable> {

		// 实现reduce函数

		public void reduce(Text key, Iterable<IntWritable> values,

		Context context) throws IOException, InterruptedException {

			int sum = 0;

			int count = 0;

			Iterator<IntWritable> iterator = values.iterator();  //循环遍历成绩

			while (iterator.hasNext()) {

				sum += iterator.next().get();// 计算总分

				count++;// 统计总的科目数

			}

			int average = (int) sum / count;// 计算平均成绩

			context.write(key, new IntWritable(average));

		}

	}

	public static void main(String[] args) throws Exception {

		Configuration conf = new Configuration();

		conf.set("mapred.job.tracker", "192.168.60.129:9000");

		// 指定带运行参数的目录为输入输出目录
		String[] otherArgs = new GenericOptionsParser(conf, args)
				.getRemainingArgs();

		/*
		 * 指定工程下的input2为文件输入目录 output2为文件输出目录 String[] ioArgs = new String[] {
		 * "input2", "output2" };
		 * 
		 * String[] otherArgs = new GenericOptionsParser(conf, ioArgs)
		 * .getRemainingArgs();
		 */

		if (otherArgs.length != 2) { // 判断路径参数是否为2个

			System.err.println("Usage: Data Deduplication <in> <out>");

			System.exit(2);

		}

		// set maprduce job name
		Job job = new Job(conf, "Score Average");

		job.setJarByClass(Score.class);

		// 设置Map、Combine和Reduce处理类

		job.setMapperClass(Map.class);

		job.setCombinerClass(Reduce.class);

		job.setReducerClass(Reduce.class);

		// 设置输出类型

		job.setOutputKeyClass(Text.class);

		job.setOutputValueClass(IntWritable.class);

		// 设置输入和输出目录

		FileInputFormat.addInputPath(job, new Path(otherArgs[0]));

		FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

		System.exit(job.waitForCompletion(true) ? 0 : 1);

	}

}

相关阅读:
学习vim命令：“:w !sudo tee %”
mac下安装和卸载软件
 很好用的在线markdown编辑器
 doc2vec 利用gensim 生成文档向量
 C语言经典算法100例-024-求数列的前20 项和，2/1，3/2，5/3，8/5...
C语言经典算法100例-023-打印菱形
 C语言经典算法100例-022-乒乓球比赛名单问题
 C语言经典算法100例-021-猴子吃桃问题
 C语言经典算法100例-020-小球自由下落问题
 C语言经典算法100例-019-求完数
原文地址：https://www.cnblogs.com/duking1991/p/6065963.html