今天写了一个用mapreduce求平均分的程序,结果是出来了,可是没有按照“学生名字”进行排序,如果是英文名字的话,结果是排好序的。
代码如下:
package com.pro.bq; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.fs.Path; public class AverageScore { public static class MapAvg extends Mapper<Object, Text, Text, IntWritable> { public void map(Object key, Text value,Context context) throws IOException, InterruptedException {
// String[] lineData=value.toString().split(" ");//split中间如果有很多“ ”的话lineData的长度增加,灵活性差
// if(lineData.length==2)
// {
// name.set(lineData[0]);
// score.set(Integer.parseInt(lineData[1]));
// context.write(name,score);
// } String line=value.toString(); StringTokenizer tokenizer=new StringTokenizer(line," "); while(tokenizer.hasMoreElements()) { StringTokenizer token=new StringTokenizer(tokenizer.nextToken()); Text name=new Text(token.nextToken()); IntWritable score=new IntWritable(Integer.parseInt(token.nextToken())); context.write(name,score); } } } public static class ReduceAvg extends Reducer<Text, IntWritable, Text, IntWritable> { public void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub int sum=0; int cnt=0; for(IntWritable val:values) { sum+=val.get(); cnt++; } sum=(Integer)sum/cnt; context.write(key, new IntWritable(sum)); } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf=new Configuration(); String[] hdfsPath=new String[]{"hdfs://localhost:9000/user/haduser/input/averageTest/","hdfs://localhost:9000/user/haduser/output/outAvgScore/"}; String[] otherArgs=new GenericOptionsParser(conf, hdfsPath).getRemainingArgs(); if(otherArgs.length!=2) { System.err.println("<in> <out>!!"); System.exit(2); } Job job=new Job(); job.setJarByClass(AverageScore.class); job.setMapperClass(MapAvg.class); job.setReducerClass(ReduceAvg.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job,new Path(otherArgs[1])); System.exit(job.waitForCompletion(true)?0:1); } }
file1: zhangsan 33 lisi 44 wangwu 55 zhaoliu 66 file2: 张三 4 李四 1 王五 2 赵六 3 file3: zhangsan 22 lisi 33 wangwu 44 zhaoliu 55 file4: 李四 2 张三 1 王五 3 赵六 4
结果如下:
lisi 38
wangwu 49
zhangsan 27
zhaoliu 60
张三 2
李四 1
王五 2
赵六 3
难道不支持中文的排序??以后学会自己写Partitioner后是不是可以自己写排序的程序??以后解决...