• 小记--------spark的worker原理分析及源码分析


     
    Worker类源码位置: org.apache.spark.deploy.worker
     
     
    /**
    *启动driver的源码分析
    */
    case LaunchDriver(driverId, driverDesc) =>
      logInfo(s"Asked to launch driver $driverId")
     
    //创建DriverRunner线程
      val driver = new DriverRunner(
        conf,
        driverId,
        workDir,
        sparkHome,
        driverDesc.copy(command = Worker.maybeUpdateSSLSettings(driverDesc.command, conf)),
        self,
        workerUri,
        securityMgr)
     
    //把DriverRunner线程加入Drivers的hashset中
      drivers(driverId) = driver
     
    //启动driver
      driver.start() //详细代码见:代码1
     
     
      coresUsed += driverDesc.cores
      memoryUsed += driverDesc.mem
     
     
    代码1
    /** Starts a thread to run and manage the driver. */
    private[worker] def start() = {
     
      //DriverRunner机制分析
      //启动一个java线程
      new Thread("DriverRunner for " + driverId) {
        override def run() {
          var shutdownHook: AnyRef = null
          try {
            shutdownHook = ShutdownHookManager.addShutdownHook { () =>
              logInfo(s"Worker shutting down, killing driver $driverId")
              kill()
            }
     
            // prepare driver jars and run driver
            // 在此处进行第一步:创建DriverRunner的工作目录
            // 第二步,下载用户上传的jar(我们编写完的spark应用程序,如果是java,用maven打个jar包,如果是scala,那么会用export将它导出为jar包)
            //第三步 构建ProcessBuilder
            val exitCode = prepareAndRunDriver()//详细代码见:代码2
     
     
            // set final state depending on if forcibly killed and process exit code
            // 对driver的退出状态做一些处理
            finalState = if (exitCode == 0) {
              Some(DriverState.FINISHED)
            } else if (killed) {
              Some(DriverState.KILLED)
            } else {
              Some(DriverState.FAILED)
            }
          } catch {
            case e: Exception =>
              kill()
              finalState = Some(DriverState.ERROR)
              finalException = Some(e)
          } finally {
            if (shutdownHook != null) {
              ShutdownHookManager.removeShutdownHook(shutdownHook)
            }
          }
     
     
          // notify worker of final driver state, possible exception
            // 这个DriverRunner这个线程,向它所属的worker的actor,发送一个DriverStateChanged的事件 
          worker.send(DriverStateChanged(driverId, finalState.get, finalException))//详细代码见:代码3
        }
      }.start()
    }
     
     
     
    代码2
    private[worker] def prepareAndRunDriver(): Int = {
      val driverDir = createWorkingDirectory()//创建DriverRunner的工作目录
      val localJarFilename = downloadUserJar(driverDir)//第二步,下载用户上传的jar
     
     
      def substituteVariables(argument: String): String = argument match {
        case "{{WORKER_URL}}" => workerUrl
        case "{{USER_JAR}}" => localJarFilename
        case other => other
      }
     
     
      // TODO: If we add ability to submit multiple jars they should also be added here
     
      // 构建ProcessBuilder
      // 传入了driver的启动命令,需要的内存大小等信息
      val builder = CommandUtils.buildProcessBuilder(driverDesc.command, securityManager,
        driverDesc.mem, sparkHome.getAbsolutePath, substituteVariables)
     
     
      runDriver(builder, driverDir, driverDesc.supervise)
    }
     
     
    代码3
    //driver执行完以后,driverrunner线程会发送一个状态给worker
    //然后worker实际上会将DriverStateChanged消息发送给Master
    case driverStateChanged @ DriverStateChanged(driverId, state, exception) =>
      handleDriverStateChanged(driverStateChanged)//详细代码见:代码4
     
     
    代码4
    private[worker] def handleDriverStateChanged(driverStateChanged: DriverStateChanged): Unit = {
      val driverId = driverStateChanged.driverId
      val exception = driverStateChanged.exception
      val state = driverStateChanged.state
      state match {
        case DriverState.ERROR =>
          logWarning(s"Driver $driverId failed with unrecoverable exception: ${exception.get}")
        case DriverState.FAILED =>
          logWarning(s"Driver $driverId exited with failure")
        case DriverState.FINISHED =>
          logInfo(s"Driver $driverId exited successfully")
        case DriverState.KILLED =>
          logInfo(s"Driver $driverId was killed by user")
        case _ =>
          logDebug(s"Driver $driverId changed state to $state")
      }
     
    //worker把DriverStateChanged消息发送给Master
    // Master会对状态进行修改
      sendToMaster(driverStateChanged)
     
    //将driver从本地缓存中移除
      val driver = drivers.remove(driverId).get
     
    //将driver加入完成driver的队列
      finishedDrivers(driverId) = driver
      trimFinishedDriversIfNecessary()
     
    //将driver的内存和CPU进行释放
      memoryUsed -= driver.driverDesc.mem
      coresUsed -= driver.driverDesc.cores
    }
     
    /**
    *启动Executor的源码分析
    */
    case LaunchExecutor(masterUrl, appId, execId, appDesc, cores_, memory_) =>
      if (masterUrl != activeMasterUrl) {
        logWarning("Invalid Master (" + masterUrl + ") attempted to launch executor.")
      } else {
        try {
          logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name))
     
     
     
          // Create the executor's working directory    
          // 创建executor本地工作目录
          val executorDir = new File(workDir, appId + "/" + execId)
          if (!executorDir.mkdirs()) {
            throw new IOException("Failed to create directory " + executorDir)
          }
     
     
          // Create local dirs for the executor. These are passed to the executor via the
          // SPARK_EXECUTOR_DIRS environment variable, and deleted by the Worker when the
          // application finishes.
          val appLocalDirs = appDirectories.getOrElse(appId,
            Utils.getOrCreateLocalRootDirs(conf).map { dir =>
              val appDir = Utils.createDirectory(dir, namePrefix = "executor")
              Utils.chmod700(appDir)
              appDir.getAbsolutePath()
            }.toSeq)
          appDirectories(appId) = appLocalDirs
     
            //创建ExecutorRunner
          val manager = new ExecutorRunner(
            appId,
            execId,
            appDesc.copy(command = Worker.maybeUpdateSSLSettings(appDesc.command, conf)),
            cores_,
            memory_,
            self,
            workerId,
            host,
            webUi.boundPort,
            publicAddress,
            sparkHome,
            executorDir,
            workerUri,
            conf,
            appLocalDirs, ExecutorState.RUNNING)
     
        //把executorRunner加入本地缓存
          executors(appId + "/" + execId) = manager
     
        //启动ExecutorRunner
          manager.start()//详细代码:见代码5
     
        //加上Executor需要使用的CPU 内存的资源
          coresUsed += cores_
          memoryUsed += memory_
     
        //向master返回一个ExecutorStateChanged事件,用于master修改状态
          sendToMaster(ExecutorStateChanged(appId, execId, manager.state, None, None))
        } catch {
          case e: Exception =>
            logError(s"Failed to launch executor $appId/$execId for ${appDesc.name}.", e)
            if (executors.contains(appId + "/" + execId)) {
              executors(appId + "/" + execId).kill()
              executors -= appId + "/" + execId
            }
            sendToMaster(ExecutorStateChanged(appId, execId, ExecutorState.FAILED,
              Some(e.toString), None))
        }
      }
     
     
    代码5
    private[worker] def start() {
     
        //创建一个java线程
      workerThread = new Thread("ExecutorRunner for " + fullId) {
        override def run() { fetchAndRunExecutor() }//详细代码见代码6
      }
      workerThread.start()
      // Shutdown hook that kills actors on shutdown.
      shutdownHook = ShutdownHookManager.addShutdownHook { () =>
        // It's possible that we arrive here before calling `fetchAndRunExecutor`, then `state` will
        // be `ExecutorState.RUNNING`. In this case, we should set `state` to `FAILED`.
        if (state == ExecutorState.RUNNING) {
          state = ExecutorState.FAILED
        }
        killProcess(Some("Worker shutting down")) }
    }
     
     
     
    代码6
    /**
    * Download and run the executor described in our ApplicationDescription
    */
    private def fetchAndRunExecutor() {
      try {
        // Launch the process
     
        //封装一个ProcessBuilder
        val builder = CommandUtils.buildProcessBuilder(appDesc.command, new SecurityManager(conf),
          memory, sparkHome.getAbsolutePath, substituteVariables)
        val command = builder.command()
        val formattedCommand = command.asScala.mkString(""", "" "", """)
        logInfo(s"Launch command: $formattedCommand")
     
     
        builder.directory(executorDir)
        builder.environment.put("SPARK_EXECUTOR_DIRS", appLocalDirs.mkString(File.pathSeparator))
        // In case we are running this from within the Spark Shell, avoid creating a "scala"
        // parent process for the executor command
        builder.environment.put("SPARK_LAUNCH_WITH_SCALA", "0")
     
     
        // Add webUI log urls
        val baseUrl =
          if (conf.getBoolean("spark.ui.reverseProxy", false)) {
            s"/proxy/$workerId/logPage/?appId=$appId&executorId=$execId&logType="
          } else {
            s"http://$publicAddress:$webUiPort/logPage/?appId=$appId&executorId=$execId&logType="
          }
        builder.environment.put("SPARK_LOG_URL_STDERR", s"${baseUrl}stderr")
        builder.environment.put("SPARK_LOG_URL_STDOUT", s"${baseUrl}stdout")
     
     
        process = builder.start()
     
        //重定向到输出流文件(将是stdout和stderr)
        //将executor的InputStream和ErrorStream,输出的信息
        //分贝重定向到本地工作目录的stdout文件,和stderr文件中
        val header = "Spark Executor Command: %s
    %s
    
    ".format(
          formattedCommand, "=" * 40)
     
     
        // Redirect its stdout and stderr to files
        val stdout = new File(executorDir, "stdout")
        stdoutAppender = FileAppender(process.getInputStream, stdout, conf)
     
     
        val stderr = new File(executorDir, "stderr")
        Files.write(header, stderr, StandardCharsets.UTF_8)
        stderrAppender = FileAppender(process.getErrorStream, stderr, conf)
     
     
        // Wait for it to exit; executor may exit with code 0 (when driver instructs it to shutdown)
        // or with nonzero exit code
        // 调用Proess的waitFor()方法,启动executor进程
        val exitCode = process.waitFor()
     
        // executor执行完之后拿到返回值状态
        state = ExecutorState.EXITED
        val message = "Command exited with code " + exitCode
       
     //向ExecutorRunner线程所属的Worker actor,发送ExecutorStateChanged消息
        worker.send(ExecutorStateChanged(appId, execId, state, Some(message), Some(exitCode)))//详细代码见:代码7
      } catch {
        case interrupted: InterruptedException =>
          logInfo("Runner thread for executor " + fullId + " interrupted")
          state = ExecutorState.KILLED
          killProcess(None)
        case e: Exception =>
          logError("Error running executor", e)
          state = ExecutorState.FAILED
          killProcess(Some(e.toString))
      }
    }
     
    代码7
    //向master发送executorstatechanged事件
    case executorStateChanged @ ExecutorStateChanged(appId, execId, state, message, exitStatus) =>
      handleExecutorStateChanged(executorStateChanged)//详细代码见:代码8
     
     
    代码8
    private[worker] def handleExecutorStateChanged(executorStateChanged: ExecutorStateChanged):
      Unit = {
     
    // 直接向master也发送一个executorstatechanged消息
      sendToMaster(executorStateChanged)
      val state = executorStateChanged.state
     
    // 如果executor状态是finished
      if (ExecutorState.isFinished(state)) {
        val appId = executorStateChanged.appId
        val fullId = appId + "/" + executorStateChanged.execId
        val message = executorStateChanged.message
        val exitStatus = executorStateChanged.exitStatus
        executors.get(fullId) match {
          case Some(executor) =>
            logInfo("Executor " + fullId + " finished with state " + state +
              message.map(" message " + _).getOrElse("") +
              exitStatus.map(" exitStatus " + _).getOrElse(""))
     
            // 将executor从内存中移除
            executors -= fullId
            finishedExecutors(fullId) = executor
            trimFinishedExecutorsIfNecessary()
     
            // 释放executor占用的内存和CPU资源
            coresUsed -= executor.cores
            memoryUsed -= executor.memory
          case None =>
            logInfo("Unknown Executor " + fullId + " finished with state " + state +
              message.map(" message " + _).getOrElse("") +
              exitStatus.map(" exitStatus " + _).getOrElse(""))
        }
        maybeCleanupApplication(appId)
      }
    }
     
  • 相关阅读:
    gThumb 3.1.2 发布,支持 WebP 图像
    航空例行天气预报解析 metaf2xml
    Baruwa 1.1.2 发布,邮件监控系统
    Bisect 1.3 发布,Caml 代码覆盖测试
    MoonScript 0.2.2 发布,基于 Lua 的脚本语言
    Varnish 入门
    快速增量备份程序 DeltaCopy
    恢复模糊的图像 SmartDeblur
    Cairo 1.12.8 发布,向量图形会图库
    iText 5.3.4 发布,Java 的 PDF 开发包
  • 原文地址:https://www.cnblogs.com/yzqyxq/p/12054358.html
Copyright © 2020-2023  润新知