• spark1.1.0源码阅读-taskScheduler


    1. sparkContext中设置createTaskScheduler

     1       case "yarn-standalone" | "yarn-cluster" =>
     2         if (master == "yarn-standalone") {
     3           logWarning(
     4             ""yarn-standalone" is deprecated as of Spark 1.0. Use "yarn-cluster" instead.")
     5         }
     6         val scheduler = try {
     7           val clazz = Class.forName("org.apache.spark.scheduler.cluster.YarnClusterScheduler")
     8           val cons = clazz.getConstructor(classOf[SparkContext])
     9           cons.newInstance(sc).asInstanceOf[TaskSchedulerImpl]
    10         } catch {
    11           // TODO: Enumerate the exact reasons why it can fail
    12           // But irrespective of it, it means we cannot proceed !
    13           case e: Exception => {
    14             throw new SparkException("YARN mode not available ?", e)
    15           }
    16         }
    17         val backend = new CoarseGrainedSchedulerBackend(scheduler, sc.env.actorSystem)
    18         scheduler.initialize(backend) //调用实现类的initialize函数
    19         scheduler

    在taskSchedulerImpl.scala中

     1   def initialize(backend: SchedulerBackend) {
     2     this.backend = backend
     3     // temporarily set rootPool name to empty
     4     rootPool = new Pool("", schedulingMode, 0, 0)
     5     schedulableBuilder = {
     6       schedulingMode match {
     7         case SchedulingMode.FIFO =>
     8           new FIFOSchedulableBuilder(rootPool)
     9         case SchedulingMode.FAIR =>
    10           new FairSchedulableBuilder(rootPool, conf)
    11       }
    12     }
    13     schedulableBuilder.buildPools()
    14   }

    2. submitTasks

     1   override def submitTasks(taskSet: TaskSet) {
     2     val tasks = taskSet.tasks
     3     logInfo("Adding task set " + taskSet.id + " with " + tasks.length + " tasks")
     4     this.synchronized {
     5       val manager = new TaskSetManager(this, taskSet, maxTaskFailures)
     6       activeTaskSets(taskSet.id) = manager
     7       schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties)
     8 
     9       if (!isLocal && !hasReceivedTask) {
    10         starvationTimer.scheduleAtFixedRate(new TimerTask() {
    11           override def run() {
    12             if (!hasLaunchedTask) {
    13               logWarning("Initial job has not accepted any resources; " +
    14                 "check your cluster UI to ensure that workers are registered " +
    15                 "and have sufficient memory")
    16             } else {
    17               this.cancel()
    18             }
    19           }
    20         }, STARVATION_TIMEOUT, STARVATION_TIMEOUT)
    21       }
    22       hasReceivedTask = true
    23     }
    24     backend.reviveOffers()
    25   }

    3. CoarseGrainedSchedulerBackend的reviveOffers

    1   override def reviveOffers() {
    2     driverActor ! ReviveOffers  //将msg发给CoarseGrainedSchedulerBackend的driverActor
    3   }
    1       case ReviveOffers =>
    2         makeOffers()
    1     // Make fake resource offers on all executors
    2     def makeOffers() {
    3       launchTasks(scheduler.resourceOffers(
    4         executorHost.toArray.map {case (id, host) => new WorkerOffer(id, host, freeCores(id))}))
    5     }
    1 /**
    2  * Represents free resources available on an executor.
    3  */
    4 private[spark]
    5 case class WorkerOffer(executorId: String, host: String, cores: Int)
     1   /**
     2    * Called by cluster manager to offer resources on slaves. We respond by asking our active task
     3    * sets for tasks in order of priority. We fill each node with tasks in a round-robin manner so
     4    * that tasks are balanced across the cluster.
     5    */
     6   def resourceOffers(offers: Seq[WorkerOffer]): Seq[Seq[TaskDescription]] = synchronized {
     7     SparkEnv.set(sc.env)
     8 
     9     // Mark each slave as alive and remember its hostname
    10     for (o <- offers) {
    11       executorIdToHost(o.executorId) = o.host
    12       if (!executorsByHost.contains(o.host)) {
    13         executorsByHost(o.host) = new HashSet[String]()
    14         executorAdded(o.executorId, o.host)
    15       }
    16     }
    17 
    18     // Randomly shuffle offers to avoid always placing tasks on the same set of workers.
    19     val shuffledOffers = Random.shuffle(offers)
    20     // Build a list of tasks to assign to each worker.
    21     val tasks = shuffledOffers.map(o => new ArrayBuffer[TaskDescription](o.cores))
    22     val availableCpus = shuffledOffers.map(o => o.cores).toArray
    23     val sortedTaskSets = rootPool.getSortedTaskSetQueue
    24     for (taskSet <- sortedTaskSets) {
    25       logDebug("parentName: %s, name: %s, runningTasks: %s".format(
    26         taskSet.parent.name, taskSet.name, taskSet.runningTasks))
    27     }
    28 
    29     // Take each TaskSet in our scheduling order, and then offer it each node in increasing order
    30     // of locality levels so that it gets a chance to launch local tasks on all of them.
    31     var launchedTask = false
    32     for (taskSet <- sortedTaskSets; maxLocality <- TaskLocality.values) {
    33       do {
    34         launchedTask = false
    35         for (i <- 0 until shuffledOffers.size) {
    36           val execId = shuffledOffers(i).executorId
    37           val host = shuffledOffers(i).host
    38           if (availableCpus(i) >= CPUS_PER_TASK) {
    39             for (task <- taskSet.resourceOffer(execId, host, maxLocality)) {
    40               tasks(i) += task
    41               val tid = task.taskId
    42               taskIdToTaskSetId(tid) = taskSet.taskSet.id
    43               taskIdToExecutorId(tid) = execId
    44               activeExecutorIds += execId
    45               executorsByHost(host) += execId
    46               availableCpus(i) -= CPUS_PER_TASK
    47               assert (availableCpus(i) >= 0)
    48               launchedTask = true
    49             }
    50           }
    51         }
    52       } while (launchedTask)
    53     }
    54 
    55     if (tasks.size > 0) {
    56       hasLaunchedTask = true
    57     }
    58     return tasks
    59   }

    4. launchTasks

    1     // Launch tasks returned by a set of resource offers
    2     def launchTasks(tasks: Seq[Seq[TaskDescription]]) {
    3       for (task <- tasks.flatten) {
    4         freeCores(task.executorId) -= scheduler.CPUS_PER_TASK
    5         executorActor(task.executorId) ! LaunchTask(task)
    6       }
    7     }
     1 class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, actorSystem: ActorSystem)
     2   extends SchedulerBackend with Logging
     3 {
     4   // Use an atomic variable to track total number of cores in the cluster for simplicity and speed
     5   var totalCoreCount = new AtomicInteger(0)
     6   val conf = scheduler.sc.conf
     7   private val timeout = AkkaUtils.askTimeout(conf)
     8 
     9   class DriverActor(sparkProperties: Seq[(String, String)]) extends Actor {
    10     private val executorActor = new HashMap[String, ActorRef]
    11     private val executorAddress = new HashMap[String, Address]
    12     private val executorHost = new HashMap[String, String]
    13     private val freeCores = new HashMap[String, Int]
    14     private val totalCores = new HashMap[String, Int]
    15     private val addressToExecutorId = new HashMap[Address, String]
    1   // Driver to executors
    2   case class LaunchTask(task: TaskDescription) extends CoarseGrainedClusterMessage
     1 private[spark] class TaskDescription(
     2     val taskId: Long,
     3     val executorId: String,
     4     val name: String,
     5     val index: Int,    // Index within this task's TaskSet
     6     _serializedTask: ByteBuffer)
     7   extends Serializable {
     8 
     9   // Because ByteBuffers are not serializable, wrap the task in a SerializableBuffer
    10   private val buffer = new SerializableBuffer(_serializedTask)
    11 
    12   def serializedTask: ByteBuffer = buffer.value
    13 
    14   override def toString: String = "TaskDescription(TID=%d, index=%d)".format(taskId, index)
    15 }

    5. CoarseGrainedSchedulerBackend收到executor的注册之后,记录executor

     1     def receive = {
     2       case RegisterExecutor(executorId, hostPort, cores) =>
     3         Utils.checkHostPort(hostPort, "Host port expected " + hostPort)
     4         if (executorActor.contains(executorId)) {
     5           sender ! RegisterExecutorFailed("Duplicate executor ID: " + executorId)
     6         } else {
     7           logInfo("Registered executor: " + sender + " with ID " + executorId)
     8           sender ! RegisteredExecutor(sparkProperties)
     9           executorActor(executorId) = sender
    10           executorHost(executorId) = Utils.parseHostPort(hostPort)._1
    11           totalCores(executorId) = cores
    12           freeCores(executorId) = cores
    13           executorAddress(executorId) = sender.path.address
    14           addressToExecutorId(sender.path.address) = executorId
    15           totalCoreCount.addAndGet(cores)
    16           makeOffers()
    17         }

    executor先向CoarseGrainedSchedulerBackend注册,然后CoarseGrainedSchedulerBackend发task(序列化后)到这个executor上去。

    6. CoarseGrainedExecutorBackend跟CoarseGrainedSchedulerBackend通信。

     1 private[spark] class CoarseGrainedExecutorBackend(
     2     driverUrl: String,
     3     executorId: String,
     4     hostPort: String,
     5     cores: Int,
     6     sparkProperties: Seq[(String, String)])
     7   extends Actor with ActorLogReceive with ExecutorBackend with Logging {
     8 
     9   Utils.checkHostPort(hostPort, "Expected hostport")
    10 
    11   var executor: Executor = null
    12   var driver: ActorSelection = null
    13 
    14   override def preStart() {
    15     logInfo("Connecting to driver: " + driverUrl)
    16     driver = context.actorSelection(driverUrl)
    17     driver ! RegisterExecutor(executorId, hostPort, cores) //注册
    18     context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
    19   }
    20 
    21   override def receiveWithLogging = {
    22     case RegisteredExecutor =>
    23       logInfo("Successfully registered with driver")
    24       // Make this host instead of hostPort ?
    25       executor = new Executor(executorId, Utils.parseHostPort(hostPort)._1, sparkProperties,
    26         false)
    27 
    28     case RegisterExecutorFailed(message) =>
    29       logError("Slave registration failed: " + message)
    30       System.exit(1)
    31 
    32     case LaunchTask(data) =>  //收到task
    33       if (executor == null) {
    34         logError("Received LaunchTask command but executor was null")
    35         System.exit(1)
    36       } else {
    37         val ser = SparkEnv.get.closureSerializer.newInstance()
    38         val taskDesc = ser.deserialize[TaskDescription](data.value)
    39         logInfo("Got assigned task " + taskDesc.taskId)
    40         executor.launchTask(this, taskDesc.taskId, taskDesc.name, taskDesc.serializedTask)
    41       }

     7. executor.launchTask

    1   def launchTask(
    2       context: ExecutorBackend, taskId: Long, taskName: String, serializedTask: ByteBuffer) {
    3     val tr = new TaskRunner(context, taskId, taskName, serializedTask)
    4     runningTasks.put(taskId, tr)
    5     threadPool.execute(tr)
    6   }

    且听下回分解

  • 相关阅读:
    springcloud 项目源码 微服务 分布式 Activiti6 工作流 vue.js html 跨域 前后分离
    springcloud 项目源码 微服务 分布式 Activiti6 工作流 vue.js html 跨域 前后分离
    OA办公系统 Springboot Activiti6 工作流 集成代码生成器 vue.js 前后分离 跨域
    java企业官网源码 自适应响应式 freemarker 静态引擎 SSM 框架
    java OA办公系统源码 Springboot Activiti工作流 vue.js 前后分离 集成代码生成器
    springcloud 项目源码 微服务 分布式 Activiti6 工作流 vue.js html 跨域 前后分离
    java 视频播放 弹幕技术 视频弹幕 视频截图 springmvc mybatis SSM
    最后阶段总结
    第二阶段学习总结
    第一阶段学习总结
  • 原文地址:https://www.cnblogs.com/Torstan/p/4158650.html
Copyright © 2020-2023  润新知