1 import java.io.IOException; 2 import java.net.URI; 3 4 5 import org.apache.hadoop.conf.Configuration; 6 import org.apache.hadoop.fs.Path; 7 import org.apache.hadoop.io.LongWritable; 8 import org.apache.hadoop.io.NullWritable; 9 import org.apache.hadoop.io.Text; 10 import org.apache.hadoop.mapreduce.Job; 11 import org.apache.hadoop.mapreduce.JobID; 12 import org.apache.hadoop.mapreduce.Mapper; 13 import org.apache.hadoop.mapreduce.TaskAttemptID; 14 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 15 import org.apache.hadoop.mapreduce.lib.input.FileSplit; 16 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 17 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 18 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 19 import org.apache.hadoop.util.GenericOptionsParser; 20 21 22 public class GetIDMapReduce { 23 public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { 24 Configuration conf = new Configuration(); 25 // String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); 26 if(args.length!=2){ 27 System.err.println("Usage databaseV1 <inputpath> <outputpath>"); 28 } 29 30 Job job = Job.getInstance(conf, GetIDMapReduce.class.getSimpleName() + "1"); 31 job.setJarByClass(GetIDMapReduce.class); 32 job.setMapOutputKeyClass(Text.class); 33 job.setMapOutputValueClass(Text.class); 34 job.setOutputKeyClass(Text.class); 35 job.setOutputValueClass(NullWritable.class); 36 job.setMapperClass(MyMapper1.class); 37 job.setNumReduceTasks(0); 38 job.setInputFormatClass(TextInputFormat.class); 39 job.setOutputFormatClass(TextOutputFormat.class); 40 FileInputFormat.addInputPath(job, new Path(args[0])); 41 FileOutputFormat.setOutputPath(job, new Path(args[1])); 42 job.waitForCompletion(true); 43 } 44 public static class MyMapper1 extends Mapper<LongWritable, Text, Text, NullWritable>{ 45 @Override 46 protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context) 47 throws IOException, InterruptedException { 48 49 FileSplit fileSplit=(FileSplit) context.getInputSplit(); 50 String pathname=fileSplit.getPath().getName(); 51 JobID jobid=context.getJobID(); //获取jobid 52 LongWritable keyout=context.getCurrentKey(); //获取key偏移量 53 Text valueout=context.getCurrentValue(); //获取一行的值 54 String jobname=context.getJobName(); //获得job名字 55 TaskAttemptID taskid=context.getTaskAttemptID(); //获得taskid 56 float progress=context.getProgress(); //获取任务执行进度 57 String jar = context.getJar(); //作业运行之前,往集群拷贝的作业资源jar 58 //String status = context.getStatus(); 59 String user = context.getUser(); //获取当前用户 60 //String[] fileTimestamps = context.getFileTimestamps(); 61 int numReduceTasks = context.getNumReduceTasks(); //获得reduce的数量 62 //Path[] fileClassPaths = context.getFileClassPaths(); 63 Configuration configuration = context.getConfiguration(); //获得作业配置文件 64 //RawComparator<?> groupingComparator = context.getGroupingComparator(); 65 boolean jobSetupCleanupNeeded = context.getJobSetupCleanupNeeded(); //Get whether job-setup and job-cleanup is needed for the job 66 int maxMapAttempts = context.getMaxMapAttempts(); //the max number of attempts per map task 67 int maxReduceAttempts = context.getMaxReduceAttempts(); //he max number of attempts per reduce task. 68 //@SuppressWarnings("deprecation") 69 //Path[] localCacheFiles = context.getLocalCacheFiles(); 70 //OutputCommitter outputCommitter = context.getOutputCommitter(); 71 Path workingDirectory = context.getWorkingDirectory(); //工作目录 72 boolean nextKeyValue = context.nextKeyValue(); //下一个键值对 73 //URI[] cacheFiles = context.getCacheFiles(); 74 URI[] cacheArchives = context.getCacheArchives(); //Get cache archives set in the Configuration 75 Path[] archiveClassPaths = context.getArchiveClassPaths();//Get the archive entries in classpath as an array of Path 76 boolean profileEnabled = context.getProfileEnabled();//Get whether the task profiling is enabled. 77 //String profileParams = context.getProfileParams(); 78 @SuppressWarnings("deprecation") 79 boolean symlink = context.getSymlink();// Originally intended to check if symlinks should be used, but currently symlinks cannot be disabled 80 //RawComparator<?> sortComparator = context.getSortComparator(); 81 //int hashCode = context.hashCode(); 82 context.write(new Text("===================================================================================="), NullWritable.get()); 83 context.write(new Text("pathname--"+pathname), NullWritable.get()); 84 context.write(new Text("jobid--"+jobid.toString()), NullWritable.get()); 85 context.write(new Text("keyout--"+keyout.toString()), NullWritable.get()); 86 context.write(new Text("keyout--"+valueout), NullWritable.get()); 87 context.write(new Text("jobname--"+jobname), NullWritable.get()); 88 context.write(new Text("taskid--"+taskid.toString()), NullWritable.get()); 89 context.write(new Text("progress--"+progress), NullWritable.get()); 90 context.write(new Text("jar--"+jar.toString()), NullWritable.get()); 91 //context.write(new Text("status--"+status), NullWritable.get()); 92 context.write(new Text("user--"+user), NullWritable.get()); 93 //context.write(new Text("fileTimestamps--"+fileTimestamps), NullWritable.get()); 94 context.write(new Text("numReduceTasks--"+numReduceTasks), NullWritable.get()); 95 //context.write(new Text("fileClassPaths--"+fileClassPaths), NullWritable.get()); 96 context.write(new Text("configuration--"+configuration), NullWritable.get()); 97 //context.write(new Text("groupingComparator--"+groupingComparator), NullWritable.get()); 98 context.write(new Text("jobSetupCleanupNeeded--"+jobSetupCleanupNeeded), NullWritable.get()); 99 context.write(new Text("maxMapAttempts--"+maxMapAttempts), NullWritable.get()); 100 context.write(new Text("maxReduceAttempts--"+maxReduceAttempts), NullWritable.get()); 101 //context.write(new Text("localCacheFiles--"+localCacheFiles), NullWritable.get()); 102 //context.write(new Text("outputCommitter--"+outputCommitter), NullWritable.get()); 103 context.write(new Text("workingDirectory--"+workingDirectory), NullWritable.get()); 104 context.write(new Text("nextKeyValue--"+nextKeyValue), NullWritable.get()); 105 //context.write(new Text("cacheFiles--"+cacheFiles), NullWritable.get()); 106 context.write(new Text("cacheArchives--"+cacheArchives), NullWritable.get()); 107 context.write(new Text("archiveClassPaths--"+archiveClassPaths), NullWritable.get()); 108 context.write(new Text("profileEnabled--"+profileEnabled), NullWritable.get()); 109 //context.write(new Text("profileParams--"+profileParams), NullWritable.get()); 110 context.write(new Text("symlink--"+symlink), NullWritable.get()); 111 //context.write(new Text("sortComparator--"+sortComparator), NullWritable.get()); 112 //context.write(new Text("hashCode--"+hashCode), NullWritable.get()); 113 } 114 } 115 }
注:并非原著,备注下来方便后面自己查看