spark1.1.0源码阅读-taskScheduler

1. sparkContext中设置createTaskScheduler

 1       case "yarn-standalone" | "yarn-cluster" =>
 2         if (master == "yarn-standalone") {
 3           logWarning(
 4             ""yarn-standalone" is deprecated as of Spark 1.0. Use "yarn-cluster" instead.")
 5         }
 6         val scheduler = try {
 7           val clazz = Class.forName("org.apache.spark.scheduler.cluster.YarnClusterScheduler")
 8           val cons = clazz.getConstructor(classOf[SparkContext])
 9           cons.newInstance(sc).asInstanceOf[TaskSchedulerImpl]
10         } catch {
11           // TODO: Enumerate the exact reasons why it can fail
12           // But irrespective of it, it means we cannot proceed !
13           case e: Exception => {
14             throw new SparkException("YARN mode not available ?", e)
15           }
16         }
17         val backend = new CoarseGrainedSchedulerBackend(scheduler, sc.env.actorSystem)
18         scheduler.initialize(backend) //调用实现类的initialize函数
19         scheduler

在taskSchedulerImpl.scala中

 1   def initialize(backend: SchedulerBackend) {
 2     this.backend = backend
 3     // temporarily set rootPool name to empty
 4     rootPool = new Pool("", schedulingMode, 0, 0)
 5     schedulableBuilder = {
 6       schedulingMode match {
 7         case SchedulingMode.FIFO =>
 8           new FIFOSchedulableBuilder(rootPool)
 9         case SchedulingMode.FAIR =>
10           new FairSchedulableBuilder(rootPool, conf)
11       }
12     }
13     schedulableBuilder.buildPools()
14   }

2. submitTasks

 1   override def submitTasks(taskSet: TaskSet) {
 2     val tasks = taskSet.tasks
 3     logInfo("Adding task set " + taskSet.id + " with " + tasks.length + " tasks")
 4     this.synchronized {
 5       val manager = new TaskSetManager(this, taskSet, maxTaskFailures)
 6       activeTaskSets(taskSet.id) = manager
 7       schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties)
 8 
 9       if (!isLocal && !hasReceivedTask) {
10         starvationTimer.scheduleAtFixedRate(new TimerTask() {
11           override def run() {
12             if (!hasLaunchedTask) {
13               logWarning("Initial job has not accepted any resources; " +
14                 "check your cluster UI to ensure that workers are registered " +
15                 "and have sufficient memory")
16             } else {
17               this.cancel()
18             }
19           }
20         }, STARVATION_TIMEOUT, STARVATION_TIMEOUT)
21       }
22       hasReceivedTask = true
23     }
24     backend.reviveOffers()
25   }

3. CoarseGrainedSchedulerBackend的reviveOffers

1   override def reviveOffers() {
2     driverActor ! ReviveOffers  //将msg发给CoarseGrainedSchedulerBackend的driverActor
3   }

1       case ReviveOffers =>
2         makeOffers()

1     // Make fake resource offers on all executors
2     def makeOffers() {
3       launchTasks(scheduler.resourceOffers(
4         executorHost.toArray.map {case (id, host) => new WorkerOffer(id, host, freeCores(id))}))
5     }

1 /**
2  * Represents free resources available on an executor.
3  */
4 private[spark]
5 case class WorkerOffer(executorId: String, host: String, cores: Int)

 1   /**
 2    * Called by cluster manager to offer resources on slaves. We respond by asking our active task
 3    * sets for tasks in order of priority. We fill each node with tasks in a round-robin manner so
 4    * that tasks are balanced across the cluster.
 5    */
 6   def resourceOffers(offers: Seq[WorkerOffer]): Seq[Seq[TaskDescription]] = synchronized {
 7     SparkEnv.set(sc.env)
 8 
 9     // Mark each slave as alive and remember its hostname
10     for (o <- offers) {
11       executorIdToHost(o.executorId) = o.host
12       if (!executorsByHost.contains(o.host)) {
13         executorsByHost(o.host) = new HashSet[String]()
14         executorAdded(o.executorId, o.host)
15       }
16     }
17 
18     // Randomly shuffle offers to avoid always placing tasks on the same set of workers.
19     val shuffledOffers = Random.shuffle(offers)
20     // Build a list of tasks to assign to each worker.
21     val tasks = shuffledOffers.map(o => new ArrayBuffer[TaskDescription](o.cores))
22     val availableCpus = shuffledOffers.map(o => o.cores).toArray
23     val sortedTaskSets = rootPool.getSortedTaskSetQueue
24     for (taskSet <- sortedTaskSets) {
25       logDebug("parentName: %s, name: %s, runningTasks: %s".format(
26         taskSet.parent.name, taskSet.name, taskSet.runningTasks))
27     }
28 
29     // Take each TaskSet in our scheduling order, and then offer it each node in increasing order
30     // of locality levels so that it gets a chance to launch local tasks on all of them.
31     var launchedTask = false
32     for (taskSet <- sortedTaskSets; maxLocality <- TaskLocality.values) {
33       do {
34         launchedTask = false
35         for (i <- 0 until shuffledOffers.size) {
36           val execId = shuffledOffers(i).executorId
37           val host = shuffledOffers(i).host
38           if (availableCpus(i) >= CPUS_PER_TASK) {
39             for (task <- taskSet.resourceOffer(execId, host, maxLocality)) {
40               tasks(i) += task
41               val tid = task.taskId
42               taskIdToTaskSetId(tid) = taskSet.taskSet.id
43               taskIdToExecutorId(tid) = execId
44               activeExecutorIds += execId
45               executorsByHost(host) += execId
46               availableCpus(i) -= CPUS_PER_TASK
47               assert (availableCpus(i) >= 0)
48               launchedTask = true
49             }
50           }
51         }
52       } while (launchedTask)
53     }
54 
55     if (tasks.size > 0) {
56       hasLaunchedTask = true
57     }
58     return tasks
59   }

4. launchTasks

1     // Launch tasks returned by a set of resource offers
2     def launchTasks(tasks: Seq[Seq[TaskDescription]]) {
3       for (task <- tasks.flatten) {
4         freeCores(task.executorId) -= scheduler.CPUS_PER_TASK
5         executorActor(task.executorId) ! LaunchTask(task)
6       }
7     }

 1 class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, actorSystem: ActorSystem)
 2   extends SchedulerBackend with Logging
 3 {
 4   // Use an atomic variable to track total number of cores in the cluster for simplicity and speed
 5   var totalCoreCount = new AtomicInteger(0)
 6   val conf = scheduler.sc.conf
 7   private val timeout = AkkaUtils.askTimeout(conf)
 8 
 9   class DriverActor(sparkProperties: Seq[(String, String)]) extends Actor {
10     private val executorActor = new HashMap[String, ActorRef]
11     private val executorAddress = new HashMap[String, Address]
12     private val executorHost = new HashMap[String, String]
13     private val freeCores = new HashMap[String, Int]
14     private val totalCores = new HashMap[String, Int]
15     private val addressToExecutorId = new HashMap[Address, String]

1   // Driver to executors
2   case class LaunchTask(task: TaskDescription) extends CoarseGrainedClusterMessage

 1 private[spark] class TaskDescription(
 2     val taskId: Long,
 3     val executorId: String,
 4     val name: String,
 5     val index: Int,    // Index within this task's TaskSet
 6     _serializedTask: ByteBuffer)
 7   extends Serializable {
 8 
 9   // Because ByteBuffers are not serializable, wrap the task in a SerializableBuffer
10   private val buffer = new SerializableBuffer(_serializedTask)
11 
12   def serializedTask: ByteBuffer = buffer.value
13 
14   override def toString: String = "TaskDescription(TID=%d, index=%d)".format(taskId, index)
15 }

5. CoarseGrainedSchedulerBackend收到executor的注册之后，记录executor

 1     def receive = {
 2       case RegisterExecutor(executorId, hostPort, cores) =>
 3         Utils.checkHostPort(hostPort, "Host port expected " + hostPort)
 4         if (executorActor.contains(executorId)) {
 5           sender ! RegisterExecutorFailed("Duplicate executor ID: " + executorId)
 6         } else {
 7           logInfo("Registered executor: " + sender + " with ID " + executorId)
 8           sender ! RegisteredExecutor(sparkProperties)
 9           executorActor(executorId) = sender
10           executorHost(executorId) = Utils.parseHostPort(hostPort)._1
11           totalCores(executorId) = cores
12           freeCores(executorId) = cores
13           executorAddress(executorId) = sender.path.address
14           addressToExecutorId(sender.path.address) = executorId
15           totalCoreCount.addAndGet(cores)
16           makeOffers()
17         }

executor先向CoarseGrainedSchedulerBackend注册，然后CoarseGrainedSchedulerBackend发task（序列化后）到这个executor上去。

6. CoarseGrainedExecutorBackend跟CoarseGrainedSchedulerBackend通信。

 1 private[spark] class CoarseGrainedExecutorBackend(
 2     driverUrl: String,
 3     executorId: String,
 4     hostPort: String,
 5     cores: Int,
 6     sparkProperties: Seq[(String, String)])
 7   extends Actor with ActorLogReceive with ExecutorBackend with Logging {
 8 
 9   Utils.checkHostPort(hostPort, "Expected hostport")
10 
11   var executor: Executor = null
12   var driver: ActorSelection = null
13 
14   override def preStart() {
15     logInfo("Connecting to driver: " + driverUrl)
16     driver = context.actorSelection(driverUrl)
17     driver ! RegisterExecutor(executorId, hostPort, cores) //注册
18     context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
19   }
20 
21   override def receiveWithLogging = {
22     case RegisteredExecutor =>
23       logInfo("Successfully registered with driver")
24       // Make this host instead of hostPort ?
25       executor = new Executor(executorId, Utils.parseHostPort(hostPort)._1, sparkProperties,
26         false)
27 
28     case RegisterExecutorFailed(message) =>
29       logError("Slave registration failed: " + message)
30       System.exit(1)
31 
32     case LaunchTask(data) =>  //收到task
33       if (executor == null) {
34         logError("Received LaunchTask command but executor was null")
35         System.exit(1)
36       } else {
37         val ser = SparkEnv.get.closureSerializer.newInstance()
38         val taskDesc = ser.deserialize[TaskDescription](data.value)
39         logInfo("Got assigned task " + taskDesc.taskId)
40         executor.launchTask(this, taskDesc.taskId, taskDesc.name, taskDesc.serializedTask)
41       }

7. executor.launchTask

1   def launchTask(
2       context: ExecutorBackend, taskId: Long, taskName: String, serializedTask: ByteBuffer) {
3     val tr = new TaskRunner(context, taskId, taskName, serializedTask)
4     runningTasks.put(taskId, tr)
5     threadPool.execute(tr)
6   }

且听下回分解

相关阅读:
springcloud 项目源码微服务分布式 Activiti6 工作流 vue.js html 跨域前后分离
 springcloud 项目源码微服务分布式 Activiti6 工作流 vue.js html 跨域前后分离
 OA办公系统 Springboot Activiti6 工作流集成代码生成器 vue.js 前后分离跨域
 java企业官网源码自适应响应式 freemarker 静态引擎 SSM 框架
 java OA办公系统源码 Springboot Activiti工作流 vue.js 前后分离集成代码生成器
 springcloud 项目源码微服务分布式 Activiti6 工作流 vue.js html 跨域前后分离
 java 视频播放弹幕技术视频弹幕视频截图 springmvc mybatis SSM
最后阶段总结
 第二阶段学习总结
 第一阶段学习总结
原文地址：https://www.cnblogs.com/Torstan/p/4158650.html