1 package com.mengyao.hadoop.mapreduce; 2 3 import java.io.IOException; 4 import java.text.SimpleDateFormat; 5 import java.util.Date; 6 7 import org.apache.hadoop.conf.Configuration; 8 import org.apache.hadoop.conf.Configured; 9 import org.apache.hadoop.fs.Path; 10 import org.apache.hadoop.io.LongWritable; 11 import org.apache.hadoop.io.Text; 12 import org.apache.hadoop.mapreduce.Job; 13 import org.apache.hadoop.mapreduce.Mapper; 14 import org.apache.hadoop.mapreduce.Reducer; 15 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 16 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 17 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 18 import org.apache.hadoop.util.Tool; 19 import org.apache.hadoop.util.ToolRunner; 20 21 22 public class MyGroupApp extends Configured implements Tool { 23 24 static class MyGroupMapper extends Mapper<LongWritable, Text, Text, LongWritable> { 25 26 private Text k = null; 27 private LongWritable v = null; 28 29 @Override 30 protected void setup( 31 Mapper<LongWritable, Text, Text, LongWritable>.Context context) 32 throws IOException, InterruptedException { 33 k = new Text(); 34 v = new LongWritable(1L); 35 } 36 37 @Override 38 protected void map(LongWritable key, Text value, Context context) 39 throws IOException, InterruptedException { 40 final String[] words = value.toString().split("\t"); 41 for (String word : words) { 42 k.set(word); 43 context.write(k, v); 44 } 45 } 46 } 47 48 static class MyGroupReducer extends Reducer<Text, LongWritable, Text, LongWritable> { 49 @Override 50 protected void reduce(Text key, Iterable<LongWritable> value, Context context) 51 throws IOException, InterruptedException { 52 long count = 0L; 53 for (LongWritable item : value) { 54 count += item.get(); 55 } 56 context.write(key, new LongWritable(count)); 57 } 58 } 59 60 @Override 61 public int run(String[] arg0) throws Exception { 62 Configuration conf = getConf(); 63 conf.set("mapreduce.job.jvm.numtasks", "-1"); 64 conf.set("mapreduce.map.speculative", "false"); 65 conf.set("mapreduce.reduce.speculative", "false"); 66 conf.set("mapreduce.map.maxattempts", "4"); 67 conf.set("mapreduce.reduce.maxattempts", "4"); 68 conf.set("mapreduce.map.skip.maxrecords", "0"); 69 Job job = Job.getInstance(conf, MyGroupApp.class.getSimpleName()); 70 job.setJarByClass(MyGroupApp.class); 71 job.setInputFormatClass(TextInputFormat.class); 72 73 FileInputFormat.addInputPath(job, new Path(arg0[0])); 74 FileOutputFormat.setOutputPath(job, new Path(arg0[1])); 75 76 job.setMapperClass(MyGroupMapper.class); 77 job.setMapOutputKeyClass(Text.class); 78 job.setMapOutputValueClass(LongWritable.class); 79 80 job.setReducerClass(MyGroupReducer.class); 81 job.setOutputKeyClass(Text.class); 82 job.setOutputValueClass(LongWritable.class); 83 84 return job.waitForCompletion(true)?0:1; 85 } 86 87 88 public static int createJob(String[] args) { 89 Configuration conf = new Configuration(); 90 int status = 1; 91 try { 92 status = ToolRunner.run(conf, new MyGroupApp(), args); 93 } catch (Exception e) { 94 e.printStackTrace(); 95 throw new RuntimeException(e); 96 } 97 98 return status; 99 } 100 101 public static void main(String[] args) throws Exception { 102 //此处用ant直接编译打包上传运行,先行赋值 103 args = new String[]{"/testdata/words", "/job/mapreduce/"+WordCountApp.class.getSimpleName()+"_"+new SimpleDateFormat("yyyyMMddhhMMss").format(new Date())}; 104 if (args.length != 2) { 105 System.out.println("Usage: "+WordCountApp.class.getSimpleName()+" <in> <out>"); 106 System.exit(2); 107 } else { 108 int status = createJob(args); 109 System.exit(status); 110 } 111 112 } 113 114 }