import java.io.IOException; import java.util.Iterator; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class Main{ public static void main(String args[]) throws IOException, ClassNotFoundException, InterruptedException{ Configuration conf = new Configuration(); conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml")); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = new Job(conf, "Main"); job.setJarByClass(Main.class); job.setJobName("Main"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); //job.setInputFormatClass(TextInputFormat.class); //job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); boolean success = job.waitForCompletion(true); System.out.print(success ? 0 : 1); } public static class Reduce extends Reducer<Text,IntWritable,Text,IntWritable>{ public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException{ int sum = 0; int count = 0; Iterator<IntWritable> iterator = values.iterator(); while(iterator.hasNext()){ sum += iterator.next().get(); count++; } int average = (int)sum/count; context.write(key,new IntWritable(average)); } } public static class Map extends Mapper<LongWritable,Text,Text,IntWritable>{ public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException{ String line = value.toString(); //纯文本文件转化为string System.out.println(line); StringTokenizer tokenizerArticle = new StringTokenizer(line," "); //将输入的数据首先按行进行分割 while(tokenizerArticle.hasMoreTokens()){ //分别对每行进行处理 StringTokenizer tokenizerLine = new StringTokenizer(tokenizerArticle.nextToken()); //每行按空格划分 String strName = tokenizerLine.nextToken(); //学生姓名部分 String strScore = tokenizerLine.nextToken(); //成绩部分 Text name = new Text(strName); int ScoreInt = Integer.parseInt(strScore); context.write(name, new IntWritable(ScoreInt)); //输出 } } } }
输入的文件夹名为:/TestDirs/ /T/
注意Map和Reduce的类要是static的,不然会报错哦!