• Linux巩固记录(5) hadoop 2.7.4下自己编译代码并运行MapReduce程序


    程序代码为 ~hadoop-2.7.4sharehadoopmapreducesourceshadoop-mapreduce-examples-2.7.4-sourcesorgapachehadoopexamplesWordCount.java  

    第一次  删除了package

    import java.io.IOException;
    import java.util.StringTokenizer;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.util.GenericOptionsParser;
    
    public class WordCount {
    
      public static class TokenizerMapper 
           extends Mapper<Object, Text, Text, IntWritable>{
        
        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();
          
        public void map(Object key, Text value, Context context
                        ) throws IOException, InterruptedException {
          StringTokenizer itr = new StringTokenizer(value.toString());
          while (itr.hasMoreTokens()) {
            word.set(itr.nextToken());
            context.write(word, one);
          }
        }
      }
      
      public static class IntSumReducer 
           extends Reducer<Text,IntWritable,Text,IntWritable> {
        private IntWritable result = new IntWritable();
    
        public void reduce(Text key, Iterable<IntWritable> values, 
                           Context context
                           ) throws IOException, InterruptedException {
          int sum = 0;
          for (IntWritable val : values) {
            sum += val.get();
          }
          result.set(sum);
          context.write(key, result);
        }
      }
    
      public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        if (otherArgs.length < 2) {
          System.err.println("Usage: wordcount <in> [<in>...] <out>");
          System.exit(2);
        }
        Job job = Job.getInstance(conf, "word count");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(TokenizerMapper.class);
        job.setCombinerClass(IntSumReducer.class);
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        for (int i = 0; i < otherArgs.length - 1; ++i) {
          FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
        }
        FileOutputFormat.setOutputPath(job,
          new Path(otherArgs[otherArgs.length - 1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
      }
    }
    View Code

    此程序需要下面三个jar包才能编译通过

    [root@master classes]# tree /home/jars/
    /home/jars/
    ├── commons-cli-1.4.jar
    ├── hadoop-common-2.7.4.jar
    └── hadoop-mapreduce-client-core-2.7.4.jar

    执行过程及结果如下

    [root@master classes]# 
    [root@master classes]# pwd
    /home/classes
    [root@master classes]# tree
    .
    
    0 directories, 0 files
    [root@master classes]# tree /home/javaFile/
    /home/javaFile/
    └── WordCount.java
    
    0 directories, 1 file
    [root@master classes]# tree /home/jars/
    /home/jars/
    ├── commons-cli-1.4.jar
    ├── hadoop-common-2.7.4.jar
    └── hadoop-mapreduce-client-core-2.7.4.jar
    
    0 directories, 3 files
    [root@master classes]# javac -classpath .:/home/jars/* -d /home/classes/ /home/javaFile/WordCount.java 
    [root@master classes]# tree 
    .
    ├── WordCount.class
    ├── WordCount$IntSumReducer.class
    └── WordCount$TokenizerMapper.class
    
    0 directories, 3 files
    [root@master classes]# jar -cvf wordc.jar ./*.class
    added manifest
    adding: WordCount.class(in = 1907) (out= 1040)(deflated 45%)
    adding: WordCount$IntSumReducer.class(in = 1739) (out= 742)(deflated 57%)
    adding: WordCount$TokenizerMapper.class(in = 1736) (out= 753)(deflated 56%)
    [root@master classes]# tree
    .
    ├── wordc.jar
    ├── WordCount.class
    ├── WordCount$IntSumReducer.class
    └── WordCount$TokenizerMapper.class
    
    0 directories, 4 files
    [root@master classes]# /home/hadoop-2.7.4/bin/hadoop jar /home/classes/wordc.jar WordCount /hdfs-input.txt /result-self-compile
    17/09/02 02:11:45 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.0.80:8032
    17/09/02 02:11:47 INFO input.FileInputFormat: Total input paths to process : 1
    17/09/02 02:11:47 INFO mapreduce.JobSubmitter: number of splits:1
    17/09/02 02:11:47 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1504320356950_0010
    17/09/02 02:11:47 INFO impl.YarnClientImpl: Submitted application application_1504320356950_0010
    17/09/02 02:11:47 INFO mapreduce.Job: The url to track the job: http://master:8088/proxy/application_1504320356950_0010/
    17/09/02 02:11:47 INFO mapreduce.Job: Running job: job_1504320356950_0010
    17/09/02 02:11:56 INFO mapreduce.Job: Job job_1504320356950_0010 running in uber mode : false
    17/09/02 02:11:56 INFO mapreduce.Job:  map 0% reduce 0%
    17/09/02 02:12:02 INFO mapreduce.Job:  map 100% reduce 0%
    17/09/02 02:12:09 INFO mapreduce.Job:  map 100% reduce 100%
    17/09/02 02:12:09 INFO mapreduce.Job: Job job_1504320356950_0010 completed successfully
    17/09/02 02:12:10 INFO mapreduce.Job: Counters: 49
        File System Counters
            FILE: Number of bytes read=118
            FILE: Number of bytes written=241697
            FILE: Number of read operations=0
            FILE: Number of large read operations=0
            FILE: Number of write operations=0
            HDFS: Number of bytes read=174
            HDFS: Number of bytes written=76
            HDFS: Number of read operations=6
            HDFS: Number of large read operations=0
            HDFS: Number of write operations=2
        Job Counters 
            Launched map tasks=1
            Launched reduce tasks=1
            Data-local map tasks=1
            Total time spent by all maps in occupied slots (ms)=3745
            Total time spent by all reduces in occupied slots (ms)=4081
            Total time spent by all map tasks (ms)=3745
            Total time spent by all reduce tasks (ms)=4081
            Total vcore-milliseconds taken by all map tasks=3745
            Total vcore-milliseconds taken by all reduce tasks=4081
            Total megabyte-milliseconds taken by all map tasks=3834880
            Total megabyte-milliseconds taken by all reduce tasks=4178944
        Map-Reduce Framework
            Map input records=6
            Map output records=12
            Map output bytes=118
            Map output materialized bytes=118
            Input split bytes=98
            Combine input records=12
            Combine output records=9
            Reduce input groups=9
            Reduce shuffle bytes=118
            Reduce input records=9
            Reduce output records=9
            Spilled Records=18
            Shuffled Maps =1
            Failed Shuffles=0
            Merged Map outputs=1
            GC time elapsed (ms)=155
            CPU time spent (ms)=1430
            Physical memory (bytes) snapshot=299466752
            Virtual memory (bytes) snapshot=4159479808
            Total committed heap usage (bytes)=141385728
        Shuffle Errors
            BAD_ID=0
            CONNECTION=0
            IO_ERROR=0
            WRONG_LENGTH=0
            WRONG_MAP=0
            WRONG_REDUCE=0
        File Input Format Counters 
            Bytes Read=76
        File Output Format Counters 
            Bytes Written=76
    [root@master classes]# /home/hadoop-2.7.4/bin/hadoop fs -ls /
    Found 3 items
    -rw-r--r--   2 root supergroup         76 2017-09-02 00:57 /hdfs-input.txt
    drwxr-xr-x   - root supergroup          0 2017-09-02 02:12 /result-self-compile
    drwx------   - root supergroup          0 2017-09-02 02:11 /tmp
    [root@master classes]# 
    [root@master classes]# 

    第二次  没有删除package

    package org.apache.hadoop.examples;
    
    import java.io.IOException;
    import java.util.StringTokenizer;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.util.GenericOptionsParser;
    
    public class WordCount {
    
      public static class TokenizerMapper 
           extends Mapper<Object, Text, Text, IntWritable>{
        
        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();
          
        public void map(Object key, Text value, Context context
                        ) throws IOException, InterruptedException {
          StringTokenizer itr = new StringTokenizer(value.toString());
          while (itr.hasMoreTokens()) {
            word.set(itr.nextToken());
            context.write(word, one);
          }
        }
      }
      
      public static class IntSumReducer 
           extends Reducer<Text,IntWritable,Text,IntWritable> {
        private IntWritable result = new IntWritable();
    
        public void reduce(Text key, Iterable<IntWritable> values, 
                           Context context
                           ) throws IOException, InterruptedException {
          int sum = 0;
          for (IntWritable val : values) {
            sum += val.get();
          }
          result.set(sum);
          context.write(key, result);
        }
      }
    
      public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        if (otherArgs.length < 2) {
          System.err.println("Usage: wordcount <in> [<in>...] <out>");
          System.exit(2);
        }
        Job job = Job.getInstance(conf, "word count");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(TokenizerMapper.class);
        job.setCombinerClass(IntSumReducer.class);
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        for (int i = 0; i < otherArgs.length - 1; ++i) {
          FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
        }
        FileOutputFormat.setOutputPath(job,
          new Path(otherArgs[otherArgs.length - 1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
      }
    }
    View Code
    [root@master classes]# 
    [root@master classes]# tree
    .
    
    0 directories, 0 files
    [root@master classes]# javac -classpath .:/home/jars/* -d /home/classes/ /home/javaFile/WordCount.java 
    [root@master classes]# tree
    .
    └── org
        └── apache
            └── hadoop
                └── examples
                    ├── WordCount.class
                    ├── WordCount$IntSumReducer.class
                    └── WordCount$TokenizerMapper.class
    
    4 directories, 3 files
    [root@master classes]# jar -cvf wordcount.jar ./*
    added manifest
    adding: org/(in = 0) (out= 0)(stored 0%)
    adding: org/apache/(in = 0) (out= 0)(stored 0%)
    adding: org/apache/hadoop/(in = 0) (out= 0)(stored 0%)
    adding: org/apache/hadoop/examples/(in = 0) (out= 0)(stored 0%)
    adding: org/apache/hadoop/examples/WordCount$TokenizerMapper.class(in = 1790) (out= 764)(deflated 57%)
    adding: org/apache/hadoop/examples/WordCount$IntSumReducer.class(in = 1793) (out= 749)(deflated 58%)
    adding: org/apache/hadoop/examples/WordCount.class(in = 1988) (out= 1050)(deflated 47%)
    [root@master classes]# /home/hadoop-2.7.4/bin/hadoop jar /home/classes/wordcount.jar org.apache.hadoop.examples.WordCount /hdfs-input.txt /result-package
    17/09/02 02:20:41 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.0.80:8032
    17/09/02 02:20:43 INFO input.FileInputFormat: Total input paths to process : 1
    17/09/02 02:20:43 INFO mapreduce.JobSubmitter: number of splits:1
    17/09/02 02:20:43 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1504320356950_0011
    17/09/02 02:20:43 INFO impl.YarnClientImpl: Submitted application application_1504320356950_0011
    17/09/02 02:20:43 INFO mapreduce.Job: The url to track the job: http://master:8088/proxy/application_1504320356950_0011/
    17/09/02 02:20:43 INFO mapreduce.Job: Running job: job_1504320356950_0011
    17/09/02 02:20:51 INFO mapreduce.Job: Job job_1504320356950_0011 running in uber mode : false
    17/09/02 02:20:51 INFO mapreduce.Job:  map 0% reduce 0%
    17/09/02 02:20:58 INFO mapreduce.Job:  map 100% reduce 0%
    17/09/02 02:21:05 INFO mapreduce.Job:  map 100% reduce 100%
    17/09/02 02:21:06 INFO mapreduce.Job: Job job_1504320356950_0011 completed successfully
    17/09/02 02:21:06 INFO mapreduce.Job: Counters: 49
        File System Counters
            FILE: Number of bytes read=118
            FILE: Number of bytes written=241857
            FILE: Number of read operations=0
            FILE: Number of large read operations=0
            FILE: Number of write operations=0
            HDFS: Number of bytes read=174
            HDFS: Number of bytes written=76
            HDFS: Number of read operations=6
            HDFS: Number of large read operations=0
            HDFS: Number of write operations=2
        Job Counters 
            Launched map tasks=1
            Launched reduce tasks=1
            Data-local map tasks=1
            Total time spent by all maps in occupied slots (ms)=3828
            Total time spent by all reduces in occupied slots (ms)=4312
            Total time spent by all map tasks (ms)=3828
            Total time spent by all reduce tasks (ms)=4312
            Total vcore-milliseconds taken by all map tasks=3828
            Total vcore-milliseconds taken by all reduce tasks=4312
            Total megabyte-milliseconds taken by all map tasks=3919872
            Total megabyte-milliseconds taken by all reduce tasks=4415488
        Map-Reduce Framework
            Map input records=6
            Map output records=12
            Map output bytes=118
            Map output materialized bytes=118
            Input split bytes=98
            Combine input records=12
            Combine output records=9
            Reduce input groups=9
            Reduce shuffle bytes=118
            Reduce input records=9
            Reduce output records=9
            Spilled Records=18
            Shuffled Maps =1
            Failed Shuffles=0
            Merged Map outputs=1
            GC time elapsed (ms)=186
            CPU time spent (ms)=1200
            Physical memory (bytes) snapshot=297316352
            Virtual memory (bytes) snapshot=4159815680
            Total committed heap usage (bytes)=139595776
        Shuffle Errors
            BAD_ID=0
            CONNECTION=0
            IO_ERROR=0
            WRONG_LENGTH=0
            WRONG_MAP=0
            WRONG_REDUCE=0
        File Input Format Counters 
            Bytes Read=76
        File Output Format Counters 
            Bytes Written=76
    [root@master classes]# /home/hadoop-2.7.4/bin/hadoop fs -ls /
    Found 4 items
    -rw-r--r--   2 root supergroup         76 2017-09-02 00:57 /hdfs-input.txt
    drwxr-xr-x   - root supergroup          0 2017-09-02 02:21 /result-package
    drwxr-xr-x   - root supergroup          0 2017-09-02 02:12 /result-self-compile
    drwx------   - root supergroup          0 2017-09-02 02:11 /tmp
    [root@master classes]# 
    [root@master classes]# 

    为啥要删除package,就是因为有包路径的时候 调用方式就要 xxx.xxxxx.xxx来执行,而且打包的时候就不能只打class了,目录结构也要一并打进去

    同理,自己写的代码也可按照这个方式执行

    顺便提一点,如果只是打jar包 用

    jar -cvf test.jar XXX.class

    但是如果要修改MANIFEST.MF,在里面指定mainClass,按照如下方式

    #解压文件
    jar -xf test.jar 
    
    #在MANIFEST.MF 增加mainclass 
    Manifest-Version: 1.0
    Created-By: 1.6.0_20 (Sun Microsystems Inc.)
    Main-class: WordCount
    
    #再打包
    jar -cvfm test.jar MANIFEST.MF XXXX.class

    这样就可以直接用 java -jar test.jar 运行了,后面不用跟具体的类

  • 相关阅读:
    VS中修改工程名的解决方案
    C++内存管理(转)http://www.cnblogs.com/qiubole/archive/2008/03/07/1094770.html
    OGR中空间叠加函数Union
    如何迅速掌握并提高linux运维技能(收藏文)
    Win10系统应用图标显示感叹号无法打开(详细版)
    IBM带库故障处理(驱动器down,磁带卡带,重配置)Netbackup
    2.2 Netbackup磁带库管理
    重复,空
    Oracle数据库几种启动方式及查询当前状态
    鸡汤自勉
  • 原文地址:https://www.cnblogs.com/xiaochangwei/p/7467209.html
Copyright © 2020-2023  润新知