• hadoop运行作业的脚本解析


    #!/usr/bin/env bash
    
    # Licensed to the Apache Software Foundation (ASF) under one or more
    # contributor license agreements.  See the NOTICE file distributed with
    # this work for additional information regarding copyright ownership.
    # The ASF licenses this file to You under the Apache License, Version 2.0
    # (the "License"); you may not use this file except in compliance with
    # the License.  You may obtain a copy of the License at
    #
    #     http://www.apache.org/licenses/LICENSE-2.0
    #
    # Unless required by applicable law or agreed to in writing, software
    # distributed under the License is distributed on an "AS IS" BASIS,
    # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    # See the License for the specific language governing permissions and
    # limitations under the License.
    
    # This script runs the hadoop core commands. 
    
    bin=`which $0`
    bin=`dirname ${bin}`
    bin=`cd "$bin"; pwd`
     
    DEFAULT_LIBEXEC_DIR="$bin"/../libexec
    
    HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
    . $HADOOP_LIBEXEC_DIR/hadoop-config.sh
    
    function print_usage(){
      echo "Usage: hadoop [--config confdir] [COMMAND | CLASSNAME]"
      echo "  CLASSNAME            run the class named CLASSNAME"
      echo " or"
      echo "  where COMMAND is one of:"
      echo "  fs                   run a generic filesystem user client"
      echo "  version              print the version"
      echo "  jar <jar>            run a jar file"
      echo "                       note: please use "yarn jar" to launch"
      echo "                             YARN applications, not this command."
      echo "  checknative [-a|-h]  check native hadoop and compression libraries availability"
      echo "  distcp <srcurl> <desturl> copy file or directories recursively"
      echo "  archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
      echo "  classpath            prints the class path needed to get the"
      echo "  credential           interact with credential providers"
      echo "                       Hadoop jar and the required libraries"
      echo "  daemonlog            get/set the log level for each daemon"
      echo "  trace                view and modify Hadoop tracing settings"
      echo ""
      echo "Most commands print help when invoked w/o parameters."
    }
    
    if [ $# = 0 ]; then
      print_usage
      exit
    fi
    
    COMMAND=$1
    case $COMMAND in
      # usage flags
      --help|-help|-h)
        print_usage
        exit
        ;;
    
      #hdfs commands
      namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer|fetchdt|oiv|dfsgroups|portmap|nfs3)
        echo "DEPRECATED: Use of this script to execute hdfs command is deprecated." 1>&2
        echo "Instead use the hdfs command for it." 1>&2
        echo "" 1>&2
        #try to locate hdfs and if present, delegate to it.  
        shift
        if [ -f "${HADOOP_HDFS_HOME}"/bin/hdfs ]; then
          exec "${HADOOP_HDFS_HOME}"/bin/hdfs ${COMMAND/dfsgroups/groups}  "$@"
        elif [ -f "${HADOOP_PREFIX}"/bin/hdfs ]; then
          exec "${HADOOP_PREFIX}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@"
        else
          echo "HADOOP_HDFS_HOME not found!"
          exit 1
        fi
        ;;
    
      #mapred commands for backwards compatibility
      pipes|job|queue|mrgroups|mradmin|jobtracker|tasktracker)
        echo "DEPRECATED: Use of this script to execute mapred command is deprecated." 1>&2
        echo "Instead use the mapred command for it." 1>&2
        echo "" 1>&2
        #try to locate mapred and if present, delegate to it.
        shift
        if [ -f "${HADOOP_MAPRED_HOME}"/bin/mapred ]; then
          exec "${HADOOP_MAPRED_HOME}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
        elif [ -f "${HADOOP_PREFIX}"/bin/mapred ]; then
          exec "${HADOOP_PREFIX}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
        else
          echo "HADOOP_MAPRED_HOME not found!"
          exit 1
        fi
        ;;
    
      #core commands  
      *)
        # the core commands
        if [ "$COMMAND" = "fs" ] ; then
          CLASS=org.apache.hadoop.fs.FsShell
        elif [ "$COMMAND" = "version" ] ; then
          CLASS=org.apache.hadoop.util.VersionInfo
        elif [ "$COMMAND" = "jar" ] ; then
          CLASS=org.apache.hadoop.util.RunJar
          if [[ -n "${YARN_OPTS}" ]] || [[ -n "${YARN_CLIENT_OPTS}" ]]; then
            echo "WARNING: Use "yarn jar" to launch YARN applications." 1>&2
          fi
        elif [ "$COMMAND" = "key" ] ; then
          CLASS=org.apache.hadoop.crypto.key.KeyShell
        elif [ "$COMMAND" = "checknative" ] ; then
          CLASS=org.apache.hadoop.util.NativeLibraryChecker
        elif [ "$COMMAND" = "distcp" ] ; then
          CLASS=org.apache.hadoop.tools.DistCp
          CLASSPATH=${CLASSPATH}:${TOOL_PATH}
        elif [ "$COMMAND" = "daemonlog" ] ; then
          CLASS=org.apache.hadoop.log.LogLevel
        elif [ "$COMMAND" = "archive" ] ; then
          CLASS=org.apache.hadoop.tools.HadoopArchives
          CLASSPATH=${CLASSPATH}:${TOOL_PATH}
        elif [ "$COMMAND" = "credential" ] ; then
          CLASS=org.apache.hadoop.security.alias.CredentialShell
        elif [ "$COMMAND" = "trace" ] ; then
          CLASS=org.apache.hadoop.tracing.TraceAdmin
        elif [ "$COMMAND" = "classpath" ] ; then
          if [ "$#" -gt 1 ]; then
            CLASS=org.apache.hadoop.util.Classpath
          else
            # No need to bother starting up a JVM for this simple case.
            if $cygwin; then
              CLASSPATH=$(cygpath -p -w "$CLASSPATH" 2>/dev/null)
            fi
            echo $CLASSPATH
            exit
          fi
        elif [[ "$COMMAND" = -*  ]] ; then
            # class and package names cannot begin with a -
            echo "Error: No command named \`$COMMAND' was found. Perhaps you meant \`hadoop ${COMMAND#-}'"
            exit 1
        else
          CLASS=$COMMAND
        fi
    
        # cygwin path translation
        if $cygwin; then
          CLASSPATH=$(cygpath -p -w "$CLASSPATH" 2>/dev/null)
          HADOOP_LOG_DIR=$(cygpath -w "$HADOOP_LOG_DIR" 2>/dev/null)
          HADOOP_PREFIX=$(cygpath -w "$HADOOP_PREFIX" 2>/dev/null)
          HADOOP_CONF_DIR=$(cygpath -w "$HADOOP_CONF_DIR" 2>/dev/null)
          HADOOP_COMMON_HOME=$(cygpath -w "$HADOOP_COMMON_HOME" 2>/dev/null)
          HADOOP_HDFS_HOME=$(cygpath -w "$HADOOP_HDFS_HOME" 2>/dev/null)
          HADOOP_YARN_HOME=$(cygpath -w "$HADOOP_YARN_HOME" 2>/dev/null)
          HADOOP_MAPRED_HOME=$(cygpath -w "$HADOOP_MAPRED_HOME" 2>/dev/null)
        fi
    
        shift
        
        # Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
        HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
    
        #make sure security appender is turned off
        HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}"
    
        export CLASSPATH=$CLASSPATH
        exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
        ;;
    
    esac

       可以看到当hadoop脚本运行jar包时,会执行org.apache.hadoop.util.RunJar脚本.

    package org.apache.hadoop.util;
    
    import java.io.File;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.OutputStream;
    import java.io.PrintStream;
    import java.lang.reflect.Array;
    import java.lang.reflect.InvocationTargetException;
    import java.lang.reflect.Method;
    import java.net.MalformedURLException;
    import java.net.URI;
    import java.net.URL;
    import java.net.URLClassLoader;
    import java.util.ArrayList;
    import java.util.Arrays;
    import java.util.Enumeration;
    import java.util.List;
    import java.util.jar.Attributes;
    import java.util.jar.JarEntry;
    import java.util.jar.JarFile;
    import java.util.jar.Manifest;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    import org.apache.hadoop.classification.InterfaceAudience.Private;
    import org.apache.hadoop.classification.InterfaceStability.Unstable;
    import org.apache.hadoop.fs.FileUtil;
    import org.apache.hadoop.io.IOUtils;
    
    @InterfaceAudience.Private
    @InterfaceStability.Unstable
    public class RunJar
    {
      public static final Pattern MATCH_ANY = Pattern.compile(".*");
      public static final int SHUTDOWN_HOOK_PRIORITY = 10;
      public static final String HADOOP_USE_CLIENT_CLASSLOADER = "HADOOP_USE_CLIENT_CLASSLOADER";
      public static final String HADOOP_CLASSPATH = "HADOOP_CLASSPATH";
      public static final String HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES = "HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES";
    
      public static void unJar(File jarFile, File toDir)
        throws IOException
      {
        unJar(jarFile, toDir, MATCH_ANY);
      }
    
      public static void unJar(File jarFile, File toDir, Pattern unpackRegex)
        throws IOException
      {
        JarFile jar = new JarFile(jarFile);
        try {
          Enumeration entries = jar.entries();
          while (entries.hasMoreElements()) {
            JarEntry entry = (JarEntry)entries.nextElement();
            if ((!entry.isDirectory()) && (unpackRegex.matcher(entry.getName()).matches()))
            {
              InputStream in = jar.getInputStream(entry);
              try {
                File file = new File(toDir, entry.getName());
                ensureDirectory(file.getParentFile());
                OutputStream out = new FileOutputStream(file);
                try {
                  IOUtils.copyBytes(in, out, 8192);
                } finally {
                }
              }
              finally {
              }
            }
          }
        }
        finally {
          jar.close();
        }
      }
    
      private static void ensureDirectory(File dir)
        throws IOException
      {
        if ((!dir.mkdirs()) && (!dir.isDirectory()))
          throw new IOException(new StringBuilder().append("Mkdirs failed to create ").append(dir.toString()).toString());
      }
    
      public static void main(String[] args)
        throws Throwable
      {
        new RunJar().run(args);
      }
    
      public void run(String[] args) throws Throwable {
        String usage = "RunJar jarFile [mainClass] args...";
    
        if (args.length < 1) {
          System.err.println(usage);
          System.exit(-1);
        }
    
        int firstArg = 0;
        String fileName = args[(firstArg++)];
        File file = new File(fileName);
        if ((!file.exists()) || (!file.isFile())) {
          System.err.println(new StringBuilder().append("Not a valid JAR: ").append(file.getCanonicalPath()).toString());
          System.exit(-1);
        }
        String mainClassName = null;
        JarFile jarFile;
        try {
          jarFile = new JarFile(fileName);
        } catch (IOException io) {
          throw new IOException(new StringBuilder().append("Error opening job jar: ").append(fileName).toString()).initCause(io);
        }
    
        Manifest manifest = jarFile.getManifest();
        if (manifest != null) {
          mainClassName = manifest.getMainAttributes().getValue("Main-Class");
        }
        jarFile.close();
    
        if (mainClassName == null) {
          if (args.length < 2) {
            System.err.println(usage);
            System.exit(-1);
          }
          mainClassName = args[(firstArg++)];
        }mainClassName = mainClassName.replaceAll("/", ".");
    
        File tmpDir = new File(System.getProperty("java.io.tmpdir"));
        ensureDirectory(tmpDir);
        final File workDir;
        try { workDir = File.createTempFile("hadoop-unjar", "", tmpDir); }
        catch (IOException ioe)
        {
          System.err.println(new StringBuilder().append("Error creating temp dir in java.io.tmpdir ").append(tmpDir).append(" due to ").append(ioe.getMessage()).toString());
    
          System.exit(-1);
          return;
        }
    
        if (!workDir.delete()) {
          System.err.println(new StringBuilder().append("Delete failed for ").append(workDir).toString());
          System.exit(-1);
        }
        ensureDirectory(workDir);
    
        ShutdownHookManager.get().addShutdownHook(new Runnable()
        {
          public void run()
          {
            FileUtil.fullyDelete(workDir);
          }
        }
        , 10);
    
        unJar(file, workDir);
    
        ClassLoader loader = createClassLoader(file, workDir);
    
        Thread.currentThread().setContextClassLoader(loader);
        Class mainClass = Class.forName(mainClassName, true, loader);
        Method main = mainClass.getMethod("main", new Class[] { Array.newInstance(String.class, 0).getClass() });
    
        String[] newArgs = (String[])Arrays.asList(args).subList(firstArg, args.length).toArray(new String[0]);
        try
        {
          main.invoke(null, new Object[] { newArgs });
        } catch (InvocationTargetException e) {
          throw e.getTargetException();
        }
      }
    
      private ClassLoader createClassLoader(File file, File workDir)
        throws MalformedURLException
      {
        ClassLoader loader;
        ClassLoader loader;
        if (useClientClassLoader()) {
          StringBuilder sb = new StringBuilder();
          sb.append(new StringBuilder().append(workDir).append("/").toString()).append(File.pathSeparator).append(file).append(File.pathSeparator).append(new StringBuilder().append(workDir).append("/classes/").toString()).append(File.pathSeparator).append(new StringBuilder().append(workDir).append("/lib/*").toString());
    
          String hadoopClasspath = getHadoopClasspath();
          if ((hadoopClasspath != null) && (!hadoopClasspath.isEmpty())) {
            sb.append(File.pathSeparator).append(hadoopClasspath);
          }
          String clientClasspath = sb.toString();
    
          String systemClasses = getSystemClasses();
          List systemClassesList = systemClasses == null ? null : Arrays.asList(StringUtils.getTrimmedStrings(systemClasses));
    
          loader = new ApplicationClassLoader(clientClasspath, getClass().getClassLoader(), systemClassesList);
        }
        else {
          List classPath = new ArrayList();
          classPath.add(new File(new StringBuilder().append(workDir).append("/").toString()).toURI().toURL());
          classPath.add(file.toURI().toURL());
          classPath.add(new File(workDir, "classes/").toURI().toURL());
          File[] libs = new File(workDir, "lib").listFiles();
          if (libs != null) {
            for (int i = 0; i < libs.length; i++) {
              classPath.add(libs[i].toURI().toURL());
            }
          }
    
          loader = new URLClassLoader((URL[])classPath.toArray(new URL[0]));
        }
        return loader;
      }
    
      boolean useClientClassLoader() {
        return Boolean.parseBoolean(System.getenv("HADOOP_USE_CLIENT_CLASSLOADER"));
      }
    
      String getHadoopClasspath() {
        return System.getenv("HADOOP_CLASSPATH");
      }
    
      String getSystemClasses() {
        return System.getenv("HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES");
      }
    }

       可以看到这个类将加载的jar解压掉,然后添加classpath路径,获取jar包的主函数,并执行主函数.

  • 相关阅读:
    IE设置cookie问题。
    正则表达式。
    Git和SVN区别
    点滴MarkDown~
    浏览器页面是否缩放问题。
    我理解的灰度发布。
    有衬线字体和无衬线字体
    移动开发规范
    Thunderbird 如何接收 Foxmail 发出的会议邀请。。
    移动端 CSS sprites 的 background-size 如何计算的。
  • 原文地址:https://www.cnblogs.com/garfieldcgf/p/5527420.html
Copyright © 2020-2023  润新知