• JStorm与Storm源码分析(四)--均衡调度器,EvenScheduler


    EvenScheduler同DefaultScheduler一样,同样实现了IScheduler接口, 
    由下面代码可以看出:

    (ns backtype.storm.scheduler.EvenScheduler
      (:use [backtype.storm util log config])
      (:require [clojure.set :as set])
      (:import [backtype.storm.scheduler IScheduler Topologies
                Cluster TopologyDetails WorkerSlot ExecutorDetails])
      (:gen-class
        :implements [backtype.storm.scheduler.IScheduler]))
    
    EvenScheduler是一个对资源进行均匀分配的调度器:
    (defn -prepare [this conf]
      )
    
    (defn -schedule [this ^Topologies topologies ^Cluster cluster]
      (schedule-topologies-evenly topologies cluster))
    

    它是通过调用schedule-topologies-evenly方法来完成任务分配的. 
    schedule-topologies-evenly方法的具体定义如下:

    (defn schedule-topologies-evenly [^Topologies topologies ^Cluster cluster]
      ;;通过调用cluster对象的needsSchedulingTopologies方法来获取所有需要进行任务调度的Topology集合,
      ;;needsSchedulingTopologies方法具体定义如fn1所示.
      ;;判断Topology是否需要进行任务调度的依据在fn2中有说明.
      (let [needs-scheduling-topologies (.needsSchedulingTopologies cluster topologies)]   
        (doseq [^TopologyDetails topology needs-scheduling-topologies
            ;;对需要进行任务调度的Topology中的每一个,首先获取它的topology-id,
            :let [topology-id (.getId topology)
              ;;调用schedule-topology方法获取计算得到的<executor,node+port>类型集合new-assignment
              ;;schedule-topology方法具体定义如fn3所示.
              new-assignment (schedule-topology topology cluster)
              ;;将new-assignment的键和值颠倒获取<node+port,executors>集合.
              node+port->executors (reverse-map new-assignment)]]
           ;;对于前面获取的<node+port,executors>集合中的每一项进行以下操作.
          (doseq [[node+port executors] node+port->executors
      		  ;;用node和port信息构造WorkerSlot对象,并将其作为slot
              :let [^WorkerSlot slot (WorkerSlot. (first node+port) (last node+port))
                ;;下面两行代码:对于executors集合中的每一项,构造ExecutorDetail对象,
                ;;并返回一个ExecutorDetails集合作为executors
                executors (for [[start-task end-task] executors]
                   (ExecutorDetails. start-task end-task))]]
             ;;调用cluster的assign方法将计算出来的slot分配给与该Topology相对应的executors
            (.assign cluster slot topology-id executors)))))
    

    fn1:

    /**
     * 获取所有需要调度的Topology,并以集合的形式返回
     */
    public List<TopologyDetails> needsSchedulingTopologies(Topologies topologies) {
        List<TopologyDetails> ret = new ArrayList<TopologyDetails>();
        for (TopologyDetails topology : topologies.getTopologies()) {
            if (needsScheduling(topology)) {
                ret.add(topology);
            }
        }
        return ret;
    }
    

    fn2:

    /**
     * 判断Topology是否需要进行任务调度的依据有两个:
     * 1.Topology设置的NumWorkers数目是否大于已经分配给Topology的Worker数目
     * 2.该Topology尚未分配的Executor的数目是否大于0
     */
    public boolean needsScheduling(TopologyDetails topology) {
        int desiredNumWorkers = topology.getNumWorkers();
        int assignedNumWorkers = this.getAssignedNumWorkers(topology);
    
        if (desiredNumWorkers > assignedNumWorkers) {
            return true;
        }
    
        return this.getUnassignedExecutors(topology).size() > 0;
    }
    

    fn3:

    ;;该方法会根据集群当前的可用资源对Topology进行任务分配
    (defn- schedule-topology [^TopologyDetails topology ^Cluster cluster]
      ;;获取topology-id
      (let [topology-id (.getId topology)
            ;;调用cluster的getAvailableSlots方法获取集群当前可用的slot资源,
            ;;将其转换为<node,port>集合并赋值给available-slots
            ;;getAvailableSlots主要负责计算当前集群中还没有使用的Supervisor端口
            available-slots (->> (.getAvailableSlots cluster)
                  (map #(vector (.getNodeId %) (.getPort %))))
            ;;调用getExecutors获取Topology的所有Executor信息,
            ;;将其转换为<start-task-id,end-task-id>集合,
            ;;然后赋值给all-executors并返回
            all-executors (->> topology
                  .getExecutors
                  (map #(vector (.getStartTask %) (.getEndTask %)))
                  set)
            ;;调用get-alive-assigned-node+port->executors方法(具体定义如fn3_1)
            ;;计算当前该Topology已经分得的资源情况,
            ;;最后返回一个<node+port,executors>集合并将其赋值给变量alive-assigned
            ;;参数为cluster信息和topology-id
            alive-assigned (get-alive-assigned-node+port->executors cluster topology-id)
            ;;计算当前Topology可以使用的slot数目,并将其赋予total-slots-to-use,
            ;;该值的具体内容为下面两个值的最小值:
            ;;1.Topology中设置的Worker数目
            ;;2.当前available-slots加上alive-assigned数目
            total-slots-to-use (min (.getNumWorkers topology)
                   (+ (count available-slots) (count alive-assigned)))
            ;;对available-slots进行排序,计算需要分配的slot数目(total-slots-to-use减去alive-assigned)
            ;;最后从排序后的available-slots集合中按顺序去除这些slot并赋值给reassign-slots
            reassign-slots (take (- total-slots-to-use (count alive-assigned))
                 (sort-slots available-slots))
            ;;通过比较all-executors跟已经分配的Executor集合间的差异,获取需要进行分配的Executor集合
            reassign-executors (sort (set/difference all-executors (set (apply concat (vals alive-assigned)))))
            ;;将上述计算得到的reassign-executors与reassign-slots进行关联,转换为<executor,slot>映射集合,
            ;;并赋值给reassignment,此时有两种情况:
            ;;1.reassign-executors数目少于reassign-slots数目:意味着当前集群中的可用资源比较多,
            ;;eg.reassign-executors为(e1,e2,e3),reassign-slots为(s1,s2,s3,s4,s5),
            ;;那么匹配结果为{[e1,s1],[e2,s2],[e3,s3]}
            ;;2.reassign-executors数目多于reassign-slots数目:意味着当前集群的可用资源非常有限,
            ;;eg.reassign-executors为(e1,e2,e3,e4,e5,e6),reassign-slots为(s1,s2),
            ;;此时会有多个Executor被分配到同一个slot上,返回的结果可能是:
            ;;{[e1,s1],[e2,s1],[e3,s2],[e4,s1],[e5,s2],[e6,s2]}
            reassignment (into {}
               (map vector
                    reassign-executors
                    ;; for some reason it goes into infinite loop without limiting the repeat-seq
                    (repeat-seq (count reassign-executors) reassign-slots)))]
        ;;判断reassignment是否为空,若不为空则打印内容为可用的slot信息的日志
        (when-not (empty? reassignment)
          (log-message "Available slots: " (pr-str available-slots))
          )
        ;;返回计算得到类型为<executor,[node,port]>的集合reassignment,
        reassignment))
    

    fn3_1:

    ;;该方法用于获取Topology当前已经分配得到的资源
    (defn get-alive-assigned-node+port->executors [cluster topology-id]
      ;;调用cluster的getAssignmentById获取该Topology当前的assignment
      (let [existing-assignment (.getAssignmentById cluster topology-id)
            ;;判断当前的assignment是否为空,若不为空,则获取其中的<executor,slot>信息
            executor->slot (if existing-assignment
                             (.getExecutorToSlot existing-assignment)
                             {}) 
            ;;将前面获取到的<executor,slot>转换为<executor,[node+port]>集合
            executor->node+port (into {} (for [[^ExecutorDetails executor ^WorkerSlot slot] executor->slot
    	           :let [executor [(.getStartTask executor) (.getEndTask executor)]
    	                 node+port [(.getNodeId slot) (.getPort slot)]]]
    	       {executor node+port}))
    	    ;;将前面的<executor,[node+port]>集合转换为<[node+port],executors>集合
            alive-assigned (reverse-map executor->node+port)]
        ;;返回得到的<[node+port],executors>集合
        alive-assigned))
    

    注:学习李明老师等Storm源码分析和陈敏敏老师等Storm技术内幕与大数据实践的笔记整理。 
    欢迎关注下面二维码进行技术交流: 

  • 相关阅读:
    Centos7源码安装mysql及读写分离,互为主从
    Linux简介及Ubuntu安装
    CentOS7 IP自动获取
    memcached总结
    CentOS7安装iptables防火墙
    centos 7.0 mono&Jexus V5.5.3安装
    设置背景模糊效果
    vue 动画过渡
    sticky footer
    设置最低高度为100%
  • 原文地址:https://www.cnblogs.com/RoseVorchid/p/7253462.html
Copyright © 2020-2023  润新知