• spark源码(十三)sparksubmit action详解


       org.apache.spark.examples.SparkPi 27行
        def main(args: Array[String]): Unit = {
          val spark = SparkSession
            .builder
            .appName("Spark Pi")
            .getOrCreate()
          val slices = if (args.length > 0) args(0).toInt else 2
          val n = math.min(100000L * slices, Int.MaxValue).toInt 
          val count = spark.sparkContext.parallelize(1 until n, slices).map { i =>
            val x = random * 2 - 1
            val y = random * 2 - 1
            if (x*x + y*y <= 11 else 0
          }.reduce(_ + _)//我们知道reduce其实是一个action算子会触发任务执行 跟进一下
          println(s"Pi is roughly ${4.0 * count / (n - 1)}")
          spark.stop()
        }
       org.apache.spark.rdd.RDD 1102行
        def reduce(f: (T, T) => T): T = withScope {
          val cleanF = sc.clean(f)
          val reducePartition: Iterator[T] => Option[T] = iter => {
            if (iter.hasNext) {
              Some(iter.reduceLeft(cleanF))
            } else {
              None
            }
          }
          var jobResult: Option[T] = None//这一堆有点看不懂啊
          val mergeResult = (_: Int, taskResult: Option[T]) => {
            if (taskResult.isDefined) {
              jobResult = jobResult match {
                case Some(value) => Some(f(value, taskResult.get))
                case None => taskResult
              }
            }
          }
          sc.runJob(this, reducePartition, mergeResult) /*函数入口*/
          jobResult.getOrElse(throw new UnsupportedOperationException("empty collection"))
        }
       org.apache.spark.SparkContext 2286行
        def runJob[T, U: ClassTag](
            rdd: RDD[T],
            processPartition: Iterator[T] => U,
            resultHandler: (Int, U) => Unit): Unit = {
          val processFunc = (context: TaskContext, iter: Iterator[T]) => processPartition(iter)
          runJob[T, U](rdd, processFunc, 0 until rdd.partitions.length, resultHandler)
        }
       org.apache.spark.SparkContext 2182行
        def runJob[T, U: ClassTag](
            rdd: RDD[T],
            func: (TaskContext, Iterator[T]) => U,
            partitions: Seq[Int],
            resultHandler: (Int, U) => Unit): Unit = {
          if (stopped.get()) {
            throw new IllegalStateException("SparkContext has been shutdown")
          }
          val callSite = getCallSite
          val cleanedFunc = clean(func)
          logInfo("Starting job: " + callSite.shortForm)
          if (conf.getBoolean("spark.logLineage"false)) {
            logInfo("RDD's recursive dependencies:\n" + rdd.toDebugString)
          }
          /*最终调用的结果*/
          dagScheduler.runJob(rdd, cleanedFunc, partitions, 
    callSite, resultHandler, localProperties.get)
          progressBar.foreach(_.finishAll())
          rdd.doCheckpoint()//在这checkpoint的意义是啥呢?
        }
       org.apache.spark.scheduler.DAGScheduler 884行
        def runJob[T, U](rdd: RDD[T],func: (TaskContext, Iterator[T]) => U,
            partitions: Seq[Int],callSite: CallSite,resultHandler: (Int, U) => Unit,
            properties: Properties): Unit = {
            /*rdd(action之前的rdd) func(运行在分区上的函数)partitions(最后rdd的分区)*/
            /*resultHandler(结果处理器)properties(配置参数)*/
          val start = System.nanoTime
    /*任务切分入口*/
          val waiter = submitJob(rdd, func, partitions, callSite, resultHandler, properties)
          ThreadUtils.awaitReady(waiter.completionFuture, Duration.Inf)
          waiter.completionFuture.value.get match {
            ......日志没啥意思
          }
        }
       org.apache.spark.scheduler.DAGScheduler 826行
        def submitJob[T, U](rdd: RDD[T],func: (TaskContext, Iterator[T]) => U,
            partitions: Seq[Int],callSite: CallSite,resultHandler: (Int, U) => Unit,
            properties: Properties): JobWaiter[U] = {
          val maxPartitions = rdd.partitions.length
          partitions.find(p => p >= maxPartitions || p 0).foreach { p =>
            throw new IllegalArgumentException(
              "Attempting to access a non-existent partition: " + p + ". " +
                "Total number of partitions: " + maxPartitions)
          }
          eagerlyComputePartitionsForRddAndAncestors(rdd)

          val jobId = nextJobId.getAndIncrement()
          if (partitions.isEmpty) {
            val clonedProperties = Utils.cloneProperties(properties)
            if (sc.getLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION) == null) {
              clonedProperties.setProperty(SparkContext.SPARK_JOB_DESCRIPTION, callSite.shortForm)
            }
            val time = clock.getTimeMillis()
            listenerBus.post(
              SparkListenerJobStart(jobId, time, Seq.empty, clonedProperties))
            listenerBus.post(
              SparkListenerJobEnd(jobId, time, JobSucceeded))
            return new JobWaiter[U](this, jobId, 0, resultHandler)/*job运行所需要的信息*/
          }

          assert(partitions.nonEmpty)
          val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
          val waiter = new JobWaiter[U](this, jobId, partitions.size, resultHandler)
          /*后续其实是把当前事件放在队列里面 真正的调用方法其实是在eventProcessLoop 里面*/
          /*我们跟一下 DAGSchedulerEventProcessLoop方法*/
          eventProcessLoop.post(JobSubmitted(
            jobId, rdd, func2, partitions.toArray, callSite, waiter,
            Utils.cloneProperties(properties)))
          waiter
        }
       org.apache.spark.scheduler.DAGScheduler 2421 行
        private[scheduler] class DAGSchedulerEventProcessLoop(dagScheduler: DAGScheduler)
          extends EventLoop[DAGSchedulerEvent]("dag-scheduler-event-loop") with Logging {
          //这不是RPC调用
          override def onReceive(event: DAGSchedulerEvent): Unit = {
            val timerContext = timer.time()
            try {
              //OK一直调用doOnReceive 方法理解了
              //我们跟进父类  看看谁调用了 onReceive
              doOnReceive(event)
            } finally {
              timerContext.stop()
            }
          }
          private def doOnReceive(event: DAGSchedulerEvent): Unit = event match {}
          ......
        }
       org.apache.spark.util.EventLoop 34行
        private[spark] abstract class EventLoop[E](nameStringextends Logging {
          private[spark] val eventThread = new Thread(name) {
            setDaemon(true)

            override def run(): Unit = {
              try {
                while (!stopped.get) {
                  val event = eventQueue.take()
                  try {
                    //OK 其实是父类有一个线程一直从eventQueue 拿取事件
                    onReceive(event)//然后调用 onReceive
                  } catch {
                    case NonFatal(e) =>
                      try {
                        onError(e)
                      } catch {
                        case NonFatal(e) => logError("Unexpected error in " + name, e)
                      }
                  }
                }
              } catch {
                ......
              }
            }

          }
        }
       org.apache.spark.scheduler.DAGScheduler 2438 行
        //既然上面清楚了,那我们就跟进一下 doOnReceive 方法
        private def doOnReceive(event: DAGSchedulerEvent): Unit event match {
          case JobSubmitted(jobId, rdd, func, partitions, callSite, listener, properties=>
            dagScheduler.handleJobSubmitted(jobId, rdd, func, 
    partitions, callSite, listener, properties)

          ......//其他的没有用暂时不看了
        }
       org.apache.spark.scheduler.DAGScheduler 1149 行
        dagScheduler.handleJobSubmitted 方法跟进 case JobSubmitted
        private[scheduler] def handleJobSubmitted(jobId: Int,finalRDD: RDD[_],
            func: (TaskContext, Iterator[_]) => _,partitions: Array[Int],
            callSite: CallSite,listener: JobListener,
            properties: Properties): Unit = {/*重点方法*/
          var finalStage: ResultStage = null
          try {
    /*切分stage的入口  返回stage包含父stage   */
            finalStage = createResultStage(finalRDD, func, partitions, jobId, callSite)
          } catch {
    /*shuffleMapStage resultStage|shuffleMapTask resultTask|ShuffleDependency NarrowDependency|*/
            case e: BarrierJobSlotsNumberCheckFailed =>
              val numCheckFailures = barrierJobIdToNumTasksCheckFailures.compute(jobId,
                (_: Int, value: Int) => value + 1)

              if (numCheckFailures <= maxFailureNumTasksCheck) {
                messageScheduler.schedule(
                  new Runnable {
                    override def run(): Unit = eventProcessLoop.post(JobSubmitted(jobId, finalRDD, func,
                      partitions, callSite, listener, properties))
                  },
                  timeIntervalNumTasksCheck,
                  TimeUnit.SECONDS
                )
                return
              } else {
                barrierJobIdToNumTasksCheckFailures.remove(jobId)
                listener.jobFailed(e)
                return
              }

            case e: Exception =>
              logWarning("Creating new stage failed due to exception - job: " + jobId, e)
              listener.jobFailed(e)
              return
          }
          barrierJobIdToNumTasksCheckFailures.remove(jobId)

          val job = new ActiveJob(jobId, finalStage, callSite, listener, properties)
          clearCacheLocs()

          val jobSubmissionTime = clock.getTimeMillis()
          jobIdToActiveJob(jobId) = job
          activeJobs += job
          finalStage.setActiveJob(job)
          val stageIds = jobIdToStageIds(jobId).toArray
          val stageInfos = stageIds.flatMap(id => stageIdToStage.get(id).map(_.latestInfo))
          listenerBus.post(
            SparkListenerJobStart(job.jobId, jobSubmissionTime, stageInfos,
              Utils.cloneProperties(properties)))
          submitStage(finalStage)/*提交finalStage 通过递归调用不断往前找stage*/
        }
       1.1 createResultStage 详解
        private def createResultStage(
            rdd: RDD[_],
            func: (TaskContext, Iterator[_]) => _,
            partitions: Array[Int],
            jobId: Int,
            callSite: CallSite): ResultStage = {
          val (shuffleDeps, resourceProfiles) = getShuffleDependenciesAndResourceProfiles(rdd)
          val resourceProfile = mergeResourceProfilesForStage(resourceProfiles)
          checkBarrierStageWithDynamicAllocation(rdd)
          checkBarrierStageWithNumSlots(rdd, resourceProfile)
          checkBarrierStageWithRDDChainPattern(rdd, partitions.toSet.size)
          val parents = getOrCreateParentStages(shuffleDeps, jobId)/*拿到最后阶段的父阶段*/
          val id = nextStageId.getAndIncrement()
          val stage = new ResultStage(id, rdd, func, partitions, parents, jobId,
            callSite, resourceProfile.id)/*拿到最后一个阶段*/
          stageIdToStage(id) = stage
          updateJobIdStageIdMaps(jobId, stage)
          stage
        }
       1.1.1 getOrCreateParentStages 详解
        private def getOrCreateParentStages(shuffleDeps: HashSet[ShuffleDependency[_, _, _]],
            firstJobId: Int): List[Stage] = {
          shuffleDeps.map { shuffleDep =>
            getOrCreateShuffleMapStage(shuffleDep, firstJobId)/*入口方法*/
          }.toList
        }
       1.1.1.1 getOrCreateShuffleMapStage 详解
        private def getOrCreateShuffleMapStage(shuffleDep: ShuffleDependency[_, _, _],
            firstJobId: Int
    ): ShuffleMapStage 
    = {
          shuffleIdToMapStage.get(shuffleDep.shuffleId) match {
            case Some(stage=>
              stage

            case None =>
    /*传入一个rdd获得所有宽依赖*/
              getMissingAncestorShuffleDependencies(shuffleDep.rdd).foreach { dep =>
                if (!shuffleIdToMapStage.contains(dep.shuffleId)) {
    /*传入一个宽依赖获得ShuffleMapStage*/
                  createShuffleMapStage(dep, firstJobId)
                }
              }
              createShuffleMapStage(shuffleDep, firstJobId)
          }
        }
       1.2 submitStage 详解
        /*参数是resultStage,但是需要所有的父stage执行完成以后才能执行,所以这需要递归执行*/
        private def submitStage(stage: Stage): Unit = {
          val jobId = activeJobForStage(stage)
          if (jobId.isDefined) {
            logDebug(s"submitStage($stage (name=${stage.name};" +
              s"jobs=${stage.jobIds.toSeq.sorted.mkString(",")}))")
            if (!waitingStages(stage) && !runningStages(stage) && !failedStages(stage)) {
              val missing = getMissingParentStages(stage).sortBy(_.id)/*没有父stage*/
              logDebug("missing: " + missing)
              if (missing.isEmpty) {
                logInfo("Submitting " + stage + " (" + stage.rdd + "), which has no missing parents")
                submitMissingTasks(stage, jobId.get)/*提交当前stage*/
              } else {
                for (parent <- missing) {
                  submitStage(parent)/*提交父stage 递归调用*/
                }
                waitingStages += stage
              }
            }
          } else {
            abortStage(stage, "No active job for stage " + stage.id, None)
          }
        }
       1.2.1 submitMissingTasks 详解
        //这个方法还是有点难懂的  里面
        private def submitMissingTasks(stage: Stage, jobId: Int): Unit = {
        /*1.拿到最后一个stage分区个数|序列化taskbinary|为每一个partation分配一个task|提交一个task*/

          stage match {
            case sms: ShuffleMapStage if stage.isIndeterminate && !sms.isAvailable =>
              mapOutputTracker.unregisterAllMapOutput(sms.shuffleDep.shuffleId)
            case _ =>
          }

          val partitionsToCompute: Seq[Int] = stage.findMissingPartitions()

          val properties = jobIdToActiveJob(jobId).properties
          addPySparkConfigsToProperties(stage, properties)

          runningStages += stage
          stage match {/* 是不是最终的stage */
            case s: ShuffleMapStage =>
              outputCommitCoordinator.stageStart(stage = s.id, maxPartitionId = s.numPartitions - 1)
              if (pushBasedShuffleEnabled) {
                prepareShuffleServicesForShuffleMapStage(s)
              }
            case s: ResultStage =>
              outputCommitCoordinator.stageStart(
                stage = s.id, maxPartitionId = s.rdd.partitions.length - 1)
          }
          val taskIdToLocations: Map[Int, Seq[TaskLocation]] = try {
            stage match {
              case s: ShuffleMapStage =>
                /*获取优先的位置信息*/
                partitionsToCompute.map { id => (id, getPreferredLocs(stage.rdd, id))}.toMap
              case s: ResultStage =>
                partitionsToCompute.map { id =>
                  val p = s.partitions(id)
                  (id, getPreferredLocs(stage.rdd, p))
                }.toMap
            }
          } catch {
            case NonFatal(e) =>
              stage.makeNewStageAttempt(partitionsToCompute.size)
              listenerBus.post(SparkListenerStageSubmitted(stage.latestInfo,
                Utils.cloneProperties(properties)))
              abortStage(stage, s"Task creation failed: $e\n${Utils.exceptionString(e)}", Some(e))
              runningStages -= stage
              return
          }

          stage.makeNewStageAttempt(partitionsToCompute.size, taskIdToLocations.values.toSeq)

          if (partitionsToCompute.nonEmpty) {
            stage.latestInfo.submissionTime = Some(clock.getTimeMillis())
          }
          listenerBus.post(SparkListenerStageSubmitted(stage.latestInfo,
            Utils.cloneProperties(properties)))

          var taskBinary: Broadcast[Array[Byte]] = null
          var partitions: Array[Partition] = null
          try {
            var taskBinaryBytes: Array[Byte] = null
            /*resultTask把最终结果返回到driver|ShuffleMapTask*/
            RDDCheckpointData.synchronized {
              taskBinaryBytes = stage match {
                case stage: ShuffleMapStage =>
                  JavaUtils.bufferToArray(
                    closureSerializer.serialize((stage.rdd, stage.shuffleDep): AnyRef))
                case stage: ResultStage =>
                  JavaUtils.bufferToArray(closureSerializer.serialize((stage.rdd, stage.func): AnyRef))
              }

              partitions = stage.rdd.partitions
            }

            if (taskBinaryBytes.length > TaskSetManager.TASK_SIZE_TO_WARN_KIB * 1024) {
              logWarning(s"Broadcasting large task binary with size " +
                s"${Utils.bytesToString(taskBinaryBytes.length)}")
            }
            taskBinary = sc.broadcast(taskBinaryBytes)
          } catch {
            case e: NotSerializableException =>
              abortStage(stage, "Task not serializable: " + e.toString, Some(e))
              runningStages -= stage

              return
            case e: Throwable =>
              abortStage(stage, s"Task serialization failed: $e\n${Utils.exceptionString(e)}", Some(e))
              runningStages -= stage

              return
          }

          val tasks: Seq[Task[_]] = try {
            val serializedTaskMetrics = closureSerializer.serialize(stage.latestInfo.taskMetrics).array()
            stage match {
              case stage: ShuffleMapStage =>
                stage.pendingPartitions.clear()
                /*当前分区的stage的最后一个rdd的分区信息*/
                partitionsToCompute.map { id =>
                  val locs = taskIdToLocations(id)
                  val part = partitions(id)
                  stage.pendingPartitions += id
                  new ShuffleMapTask(stage.id, stage.latestInfo.attemptNumber,
                    taskBinary, part, locs, properties, serializedTaskMetrics, Option(jobId),
                    Option(sc.applicationId), sc.applicationAttemptId, stage.rdd.isBarrier())
                }

              case stage: ResultStage =>
                partitionsToCompute.map { id =>
                  val p: Int = stage.partitions(id)
                  val part = partitions(p)
                  val locs = taskIdToLocations(id)
                  new ResultTask(stage.id, stage.latestInfo.attemptNumber,
                    taskBinary, part, locs, id, properties, serializedTaskMetrics,
                    Option(jobId), Option(sc.applicationId), sc.applicationAttemptId,
                    stage.rdd.isBarrier())
                }
            }
          } catch {
            case NonFatal(e) =>
              abortStage(stage, s"Task creation failed: $e\n${Utils.exceptionString(e)}", Some(e))
              runningStages -= stage
              return
          }

          if (tasks.nonEmpty) {
            logInfo(s"Submitting ${tasks.size} missing tasks from $stage (${stage.rdd}) (first 15 " +
              s"tasks are for partitions ${tasks.take(15).map(_.partitionId)})")
            /*提交task  TaskSchedulerImpl*/
            taskScheduler.submitTasks(new TaskSet(
              tasks.toArray, stage.id, stage.latestInfo.attemptNumber, jobId, properties,
              stage.resourceProfileId))
          } else {
            markStageAsFinished(stage, None)

            stage match {
              case stage: ShuffleMapStage =>
                logDebug(s"Stage ${stage} is actually done; " +
                    s"(available: ${stage.isAvailable}," +
                    s"available outputs: ${stage.numAvailableOutputs}," +
                    s"partitions: ${stage.numPartitions})")
                markMapStageJobsAsFinished(stage)
              case stage : ResultStage =>
                logDebug(s"Stage ${stage} is actually done; (partitions: ${stage.numPartitions})")
            }
            submitWaitingChildStages(stage)
          }
        }
       1.2.1.1 askScheduler.submitTasks 详解
        org.apache.spark.scheduler.TaskSchedulerImpl 234
        override def submitTasks(taskSet: TaskSet): Unit = {
          val tasks = taskSet.tasks
          logInfo("Adding task set " + taskSet.id + " with " + tasks.length + " tasks "
            + "resource profile " + taskSet.resourceProfileId)
          this.synchronized {
            val manager = createTaskSetManager(taskSet, maxTaskFailures)
            val stage = taskSet.stageId
            val stageTaskSets =
              taskSetsByStageIdAndAttempt.getOrElseUpdate(stage, new HashMap[Int, TaskSetManager])
            stageTaskSets.foreach { case (_, ts) =>
              ts.isZombie = true
            }
            stageTaskSets(taskSet.stageAttemptId) = manager
    /* FIFO|FAIR 调用策略*/
            schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties)

            if (!isLocal && !hasReceivedTask) {
              starvationTimer.scheduleAtFixedRate(new TimerTask() {
                override def run(): Unit = {
                  if (!hasLaunchedTask) {
                    logWarning("Initial job has not accepted any resources; " +
                      "check your cluster UI to ensure that workers are registered " +
                      "and have sufficient resources")
                  } else {
                    this.cancel()
                  }
                }
              }, STARVATION_TIMEOUT_MS, STARVATION_TIMEOUT_MS)
            }
            hasReceivedTask = true
          }
          backend.reviveOffers()/*CoarseGrainedSchedulerBackend 类的方法*/
        }
       1.2.1.1.1 backend.reviveOffers 详解
        org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend 582
        override def reviveOffers(): Unit = Utils.tryLogNonFatalError {
          driverEndpoint.send(ReviveOffers)//最后还是调用了driver的的方法
        }

        org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend 165
        case ReviveOffers =>
          makeOffers()
       1.2.1.1.1.1 makeOffers 详解
        private def makeOffers(): Unit = {
          // Make sure no executor is killed while some task is launching on it
          val taskDescs = withLock {
            // Filter out executors under killing
            val activeExecutors = executorDataMap.filterKeys(isExecutorActive)
            val workOffers = activeExecutors.map {
              case (id, executorData) =>
                new WorkerOffer(id, executorData.executorHost, executorData.freeCores,
                  Some(executorData.executorAddress.hostPort),
                  executorData.resourcesInfo.map { case (rName, rInfo) =>
                    (rName, rInfo.availableAddrs.toBuffer)
                  }, executorData.resourceProfileId)
            }.toIndexedSeq
            scheduler.resourceOffers(workOffers, true)
          }
          if (taskDescs.nonEmpty) {
            launchTasks(taskDescs)/*启动任务*/
          }
        }
       1.2.1.1.1.1.1 launchTasks 详解
        org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend 351
        private def launchTasks(tasks: Seq[Seq[TaskDescription]]): Unit = {
          for (task <- tasks.flatten) {
            val serializedTask = TaskDescription.encode(task)
            /*Task序列化数据超过RpcMessage的最大大小*/
            if (serializedTask.limit() >= maxRpcMessageSize) {
              Option(scheduler.taskIdToTaskSetManager.get(task.taskId)).foreach { taskSetMgr =>
                try {
                  var msg = "Serialized task %s:%d was %d bytes, which exceeds max allowed: " +
                    s"${RPC_MESSAGE_MAX_SIZE.key} (%d bytes). Consider increasing " +
                    s"${RPC_MESSAGE_MAX_SIZE.key} or using broadcast variables for large values."
                  msg = msg.format(task.taskId, task.index, serializedTask.limit(), maxRpcMessageSize)
                  taskSetMgr.abort(msg)
                } catch {
                  case e: Exception => logError("Exception in error callback", e)
                }
              }
            }
            else {
              val executorData = executorDataMap(task.executorId)
              val rpId = executorData.resourceProfileId
              val prof = scheduler.sc.resourceProfileManager.resourceProfileFromId(rpId)
              val taskCpus = ResourceProfile.getTaskCpusOrDefaultForProfile(prof, conf)
              executorData.freeCores -= taskCpus
              task.resources.foreach { case (rName, rInfo) =>
                assert(executorData.resourcesInfo.contains(rName))
                executorData.resourcesInfo(rName).acquire(rInfo.addresses)
              }

              logDebug(s"Launching task ${task.taskId} on executor id: ${task.executorId} hostname: " +
                s"${executorData.executorHost}.")
              /*发送消息给executorEndpoint*/
              executorData.executorEndpoint.send(LaunchTask(new SerializableBuffer(serializedTask)))
            }
          }
        }
       LaunchTask 详解
        org.apache.spark.executor.CoarseGrainedExecutorBackend 166 行
        case LaunchTask(data) =>
          if (executor == null) {
            exitExecutor(1"Received LaunchTask command but executor was null")
          } else {
            val taskDesc = TaskDescription.decode(data.value)
            logInfo("Got assigned task " + taskDesc.taskId)
            taskResources(taskDesc.taskId) = taskDesc.resources
            executor.launchTask(this, taskDesc)
          }
       launchTask 详解
        org.apache.spark.executor.Executor 269
        def launchTask(context: ExecutorBackend, taskDescription: TaskDescription): Unit = {
          //该类继承Runnable 直接跳转到 run 方法
          val tr = new TaskRunner(context, taskDescription, plugins)
          runningTasks.put(taskDescription.taskId, tr)
          threadPool.execute(tr)
          if (decommissioned) {
            log.error(s"Launching a task while in decommissioned state.")
          }
        }
       run 详解
        org.apache.spark.executor.Executor 432 行
        override def run(): Unit = {
          setMDCForTask(taskName, mdcProperties)
          threadId = Thread.currentThread.getId
          Thread.currentThread.setName(threadName)
          val threadMXBean = ManagementFactory.getThreadMXBean
          val taskMemoryManager = new TaskMemoryManager(env.memoryManager, taskId)/*task内存管理*/
          val deserializeStartTimeNs = System.nanoTime()/*任务时长*/
          val deserializeStartCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
            threadMXBean.getCurrentThreadCpuTime
          } else 0/*使用cpu时长*/
          Thread.currentThread.setContextClassLoader(replClassLoader)
          val ser = env.closureSerializer.newInstance()
          logInfo(s"Running $taskName")
          execBackend.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER)
          var taskStartTimeNs: Long = 0
          var taskStartCpu: Long = 0
          startGCTime = computeTotalGcTime()/*jc时间*/
          var taskStarted: Boolean = false

          try {
            Executor.taskDeserializationProps.set(taskDescription.properties)
            /*Task文件依赖问题,资源,jar包*/
            updateDependencies(
              taskDescription.addedFiles, taskDescription.addedJars, taskDescription.addedArchives)
            task = ser.deserialize[Task[Any]](
              taskDescription.serializedTask, Thread.currentThread.getContextClassLoader)
            task.localProperties = taskDescription.properties
            task.setTaskMemoryManager(taskMemoryManager)

            val killReason = reasonIfKilled
            if (killReason.isDefined) {
              throw new TaskKilledException(killReason.get)
            }

            if (!isLocal) {
              logDebug(s"$taskName's epoch is ${task.epoch}")
              env.mapOutputTracker.asInstanceOf[MapOutputTrackerWorker].updateEpoch(task.epoch)
            }

            metricsPoller.onTaskStart(taskId, task.stageId, task.stageAttemptId)
            taskStarted = true

            taskStartTimeNs = System.nanoTime()
            taskStartCpu = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
              threadMXBean.getCurrentThreadCpuTime
            } else 0L
            var threwException = true
            val value = Utils.tryWithSafeFinally {
              val res = task.run(/*执行task*/
                taskAttemptId = taskId,
                attemptNumber = taskDescription.attemptNumber,
                metricsSystem = env.metricsSystem,
                resources = taskDescription.resources,
                plugins = plugins)
              threwException = false
              res
            } ......
        }
       runTask 详解
        //val res = task.run 内部还是调用了 runTask 
        //调用的还是最终的实现类 ResultTask ShuffleTask
        override def runTask(context: TaskContext): U = {
          // Deserialize the RDD and the func using the broadcast variables.
          val threadMXBean = ManagementFactory.getThreadMXBean
          val deserializeStartTimeNs = System.nanoTime()
          val deserializeStartCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
            threadMXBean.getCurrentThreadCpuTime
          } else 0L
          val ser = SparkEnv.get.closureSerializer.newInstance()
          val (rdd, func) = ser.deserialize[(RDD[T], (TaskContext, Iterator[T]) => U)](
            ByteBuffer.wrap(taskBinary.value)
    Thread.currentThread.getContextClassLoader)
          _executorDeserializeTimeNs = System.nanoTime() - deserializeStartTimeNs
          _executorDeserializeCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported)
     {
            threadMXBean.getCurrentThreadCpuTime - deserializeStartCpuTime
          } else 0L/*本身拿到的是ResultTask,collect收集到Driver foreach当前Task打印输出 写出到HDFS*/

          func(context, rdd.iterator(partition, context))
        }
  • 相关阅读:
    给脚本绑定LUA解释器
    Flash Socket连接受限解决方法
    使用CMake构建编译环境
    如何使用OpenCL编写程序
    使用zzip和minizip解压缩文件
    使用CURL库下载文件
    使用GDI+保存位图文件为PNG文件
    linux学习
    Tomcat6连接池配置
    ArrayList与LinkedList性能差别
  • 原文地址:https://www.cnblogs.com/wuxiaolong4/p/16688954.html
Copyright © 2020-2023  润新知