• 调度器10—walt的cpu_array和cluster组织


    1. sched_cluster 变量的初始化

    static bool walt_clusters_parsed;
    __read_mostly cpumask_t **cpu_array; //指针数组
    
    void walt_update_cluster_topology(void) //walt.c
    {
        struct cpumask cpus = *cpu_possible_mask;
        struct cpumask cluster_cpus;
        struct walt_sched_cluster *cluster;
        struct list_head new_head;
        cpumask_t **tmp;
        int i;
    
        INIT_LIST_HEAD(&new_head);
    
        for_each_cpu(i, &cpus) { //对于每一个possible的cpu
            cpumask_clear(&cluster_cpus); //清空 cluster_cpus 结构备用
            walt_get_possible_siblings(i, &cluster_cpus); //获取i的所有兄弟cpu并将其mask设置进cluster_cpus中
            if (cpumask_empty(&cluster_cpus)) {
                WARN(1, "WALT: Invalid cpu topology!!");
                cleanup_clusters(&new_head);
                return;
            }
            cpumask_andnot(&cpus, &cpus, &cluster_cpus); //从cpus中清理掉已经解析出来的cpu
            add_cluster(&cluster_cpus, &new_head); //new一个cluster结构出来,然后将其挂在new_head链表上
        }
    
        assign_cluster_ids(&new_head); //解析后的cluster  传递给sched_cluster[]
        ...
        /* Ensure cluster ids are visible to all CPUs before making cluster_head visible. */
        move_list(&cluster_head, &new_head, false); //cluster_head指向也指向cluster链表
    
        for_each_sched_cluster(cluster) {
            if (cpumask_weight(&cluster->cpus) == 1)
                /* *arg1 = *arg2 | *arg3,asym_cap_sibling_cpus 的唯一赋值位置 */
                cpumask_or(&asym_cap_sibling_cpus, &asym_cap_sibling_cpus, &cluster->cpus);
        }
        /* 4+3+1 这里会清0,6+2 根本不会赋值,因此此成员恒为0 */
        if (cpumask_weight(&asym_cap_sibling_cpus) == 1)
            cpumask_clear(&asym_cap_sibling_cpus);
    
        /* 这里面对 cpu_array 结构进行的初始化 */
        tmp = build_cpu_array();
        if (!tmp) {
            BUG_ON(1);
            return;
        }
        /*就是 cpu_array = tmp; */
        smp_store_release(&cpu_array, tmp); /*WRITE_ONCE(*p, v);    */
        walt_clusters_parsed = true;
    }

    调用路径:

    init_cpu_capacity_notifier.notifier_call //每个cluster的cpufreq策略ok后都会调用
        init_cpu_capacity_callback //arch_topology.c 只有所有cluster都调用后才只执行一次
            walt_update_cluster_topology();

    build_cpu_array 函数:

    //这个函数对 cpu_array 二维数组进行初始化。
    static cpumask_t **build_cpu_array(void)
    {
        int i;
        cpumask_t **tmp_array = init_cpu_array(); //分配一个数组指针
    
        if (!tmp_array)
            return NULL;
    
        /*Construct cpu_array row by row*/
        for (i = 0; i < num_sched_clusters; i++) {
            int j, k = 1;
    
            /* Fill out first column with appropriate cpu arrays*/
            cpumask_copy(&tmp_array[i][0], &sched_cluster[i]->cpus);
    
            /*
             * k starts from column 1 because 0 is filled
             * Fill clusters for the rest of the row,
             * above i in ascending order
             */
            for (j = i + 1; j < num_sched_clusters; j++) {
                cpumask_copy(&tmp_array[i][k], &sched_cluster[j]->cpus);
                k++;
            }
    
            /*
             * k starts from where we left off above.
             * Fill clusters below i in descending order.
             */
            for (j = i - 1; j >= 0; j--) {
                cpumask_copy(&tmp_array[i][k], &sched_cluster[j]->cpus);
                k++;
            }
        }
        return tmp_array;
    }
    
    static cpumask_t **init_cpu_array(void)
    {
        int i;
        cpumask_t **tmp_array;
    
        tmp_array = kcalloc(num_sched_clusters, sizeof(cpumask_t *), GFP_ATOMIC);
        if (!tmp_array)
            return NULL;
        for (i = 0; i < num_sched_clusters; i++) {
            tmp_array[i] = kcalloc(num_sched_clusters, sizeof(cpumask_t), GFP_ATOMIC);
            if (!tmp_array[i])
                return NULL;
        }
    
        return tmp_array;
    }

    2. 应用代码模拟build_cpu_array的实现

    build_cpu_array() 对 cpu_array 的初始化赋值不是很好理解,写应用代码模拟器操作,然后打印 cpu_array 的值:

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    #define NR_CPUS 32
    
    #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
    #define BITS_PER_BYTE        8
    
    //include/linux/bitops.h
    #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
    #define BITS_TO_LONGS(nr)    DIV_ROUND_UP(nr, BITS_PER_TYPE(long))
    
    //include/linux/types.h
    #define DECLARE_BITMAP(name,bits) unsigned long name[BITS_TO_LONGS(bits)]
    
    //include/linux/cpumask.h
    typedef struct cpumask { DECLARE_BITMAP(bits_name, NR_CPUS); } cpumask_t;
    
    static int num_sched_clusters;
    
    struct cluster {
        struct cpumask cpus;
    };
    
    struct cluster sched_cluster[NR_CPUS];
    
    static void cpumask_copy(struct cpumask *dstp, struct cpumask *srcp)
    {
        memcpy(dstp, srcp, sizeof(*dstp));    
    }
    
    static void set_bit(int nr, unsigned long *addr)
    {
        addr[nr / BITS_PER_TYPE(long)] |= 1UL << (nr % BITS_PER_TYPE(long));
    }
    
    static int test_bit(int nr, unsigned long *addr)
    {
        return 1UL & (addr[nr / BITS_PER_TYPE(long)] >> (nr & (BITS_PER_TYPE(long)-1)));
    }
    
    static void cpumask_set_cpu(int cpu, struct cpumask *dstp)
    {
        set_bit(cpu, dstp->bits_name);
    }
    
    static int cpumask_test_cpu(int cpu, struct cpumask *dstp)
    {
        return test_bit(cpu, dstp->bits_name);
    }
    
    static cpumask_t **init_cpu_array(void)
    {
        int i;
        cpumask_t **tmp_array;
    
        tmp_array = calloc(num_sched_clusters, sizeof(cpumask_t *));
        if (!tmp_array) {
            printf("no memory 1
    ");
            return NULL;
        }
        for (i = 0; i < num_sched_clusters; i++) {
            tmp_array[i] = calloc(num_sched_clusters, sizeof(cpumask_t));
            if (!tmp_array[i]) {
                printf("no memory 2
    ");
                return NULL;
            }
        }
    
        return tmp_array;
    }
    
    static cpumask_t **build_cpu_array(void)
    {
        int i;
        cpumask_t **tmp_array = init_cpu_array();
    
        if (!tmp_array)
            return NULL;
    
        /*Construct cpu_array row by row*/
        for (i = 0; i < num_sched_clusters; i++) {
            int j, k = 1;
    
            /* Fill out first column with appropriate cpu arrays*/
            cpumask_copy(&tmp_array[i][0], &sched_cluster[i].cpus);
    
            /*
             * k starts from column 1 because 0 is filled
             * Fill clusters for the rest of the row,
             * above i in ascending order
             */
            for (j = i + 1; j < num_sched_clusters; j++) {
                cpumask_copy(&tmp_array[i][k], &sched_cluster[j].cpus);
                k++;
            }
    
            /*
             * k starts from where we left off above.
             * Fill clusters below i in descending order.
             */
            for (j = i - 1; j >= 0; j--) {
                cpumask_copy(&tmp_array[i][k], &sched_cluster[j].cpus);
                k++;
            }
        }
        return tmp_array;
    }
    
    #if 0
    // 4+3+1
    static int num_sched_clusters = 3;
    static void test_init(void)
    {
        cpumask_set_cpu(0, &sched_cluster[0].cpus);
        cpumask_set_cpu(1, &sched_cluster[0].cpus);
        cpumask_set_cpu(2, &sched_cluster[0].cpus);
        cpumask_set_cpu(3, &sched_cluster[0].cpus);
    
        cpumask_set_cpu(4, &sched_cluster[1].cpus);
        cpumask_set_cpu(5, &sched_cluster[1].cpus);
        cpumask_set_cpu(6, &sched_cluster[1].cpus);
    
        cpumask_set_cpu(7, &sched_cluster[2].cpus);
    }
    #endif
    
    #if 1
    // 6+2
    static int num_sched_clusters = 2;
    static void test_init(void)
    {
        cpumask_set_cpu(0, &sched_cluster[0].cpus);
        cpumask_set_cpu(1, &sched_cluster[0].cpus);
        cpumask_set_cpu(2, &sched_cluster[0].cpus);
        cpumask_set_cpu(3, &sched_cluster[0].cpus);
        cpumask_set_cpu(4, &sched_cluster[0].cpus);
        cpumask_set_cpu(5, &sched_cluster[0].cpus);
    
        cpumask_set_cpu(6, &sched_cluster[1].cpus);
        cpumask_set_cpu(7, &sched_cluster[1].cpus);
    }
    #endif
    
    static void cpumask_print(cpumask_t *cpus)
    {
        int i;
    
        for (i = 0; i < 8; i++) {
            if (cpumask_test_cpu(i, cpus)) {
                printf("CPU%d ", i);
            }
        }
        printf("
    ");
    }
    
    int main()
    {
        int i, j;
        cpumask_t **cpu_array;
    
        test_init();
        cpu_array = build_cpu_array();
    
        for (i = 0; i < num_sched_clusters; i++) {
            for (j = 0; j < num_sched_clusters; j++) {
                printf("cpu_array[%d][%d]: ", i, j);
                cpumask_print(&cpu_array[i][j]);
            }
        }
    
        return 0;
    }

    执行结果:

    对于 6+2 架构:
    cpu_array[0][0]: CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 
    cpu_array[0][1]: CPU6 CPU7 
    cpu_array[1][0]: CPU6 CPU7 
    cpu_array[1][1]: CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 
    
    
    对于 4+3+1 架构:
    cpu_array[0][0]: CPU0 CPU1 CPU2 CPU3 
    cpu_array[0][1]: CPU4 CPU5 CPU6 
    cpu_array[0][2]: CPU7 
    cpu_array[1][0]: CPU4 CPU5 CPU6 
    cpu_array[1][1]: CPU7 
    cpu_array[1][2]: CPU0 CPU1 CPU2 CPU3 
    cpu_array[2][0]: CPU7 
    cpu_array[2][1]: CPU4 CPU5 CPU6 
    cpu_array[2][2]: CPU0 CPU1 CPU2 CPU3 

    从 4+3+1 架构中可以看出,开始选核的cluster不同的情况下三个cluster是怎么选核的。MTK的选核逻辑一致。

    3. walt选核流程中的使用

    int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, int sync, int sibling_count_hint) //fair.c
    {
        int start_cpu, order_index, end_index;
        ...
        walt_get_indicies(p, &order_index, &end_index, task_boost, boosted);
    
        /* start_cpu 控制从哪个核开始选 */
        start_cpu = cpumask_first(&cpu_array[order_index][0]);
        
        fbt_env.start_cpu = start_cpu;
        fbt_env.order_index = order_index; /*选核cluster id*/
        fbt_env.end_index = end_index;
        fbt_env.strict_max = is_rtg && (task_boost == TASK_BOOST_STRICT_MAX); //walt_get_indicies中指定从大核开始选了
    
        /*walt进行遍历算核,将选出的放在candidates中,最多只往里面set 2个cpu*/
        walt_find_best_target(NULL, candidates, p, &fbt_env);
        ...
    }
    
    /* 设置 order_index 和 end_index 的值 */
    static void walt_get_indicies(struct task_struct *p, int *order_index, int *end_index, int task_boost, bool boosted) //fair.c
    {
        int i = 0;
    
        *order_index = 0;//设置为0
        *end_index = 0; //设置为0
    
        if (num_sched_clusters <= 1)
            return;
    
        if (task_boost > TASK_BOOST_ON_MID) {
            *order_index = num_sched_clusters - 1; //减一后表示大核
            return;
        }
    
        if (is_full_throttle_boost()) {
            *order_index = num_sched_clusters - 1; //选大核
            if ((*order_index > 1) && task_demand_fits(p, cpumask_first(&cpu_array[*order_index][1]))) //大核的[1][1]是中核(4+3+1)或小核(6+2)
                *end_index = 1; //表述允许从中核或小核上选核
            return;
        }
    
        if (boosted || task_boost_policy(p) == SCHED_BOOST_ON_BIG || walt_task_skip_min_cpu(p))
            *order_index = 1; //跳过小核
    
        /* 若是没有boost操作,就根据算力从 order_index 开始选 */
        for (i = *order_index ; i < num_sched_clusters - 1; i++) {
            if (task_demand_fits(p, cpumask_first(&cpu_array[i][0])))
                break;
        }
        *order_index = i;
    }
    
    
    static void walt_find_best_target(struct sched_domain *sd, cpumask_t *cpus, struct task_struct *p, struct find_best_target_env *fbt_env) //fair.c
    {
        int i, start_cpu = fbt_env->start_cpu;
        int order_index = fbt_env->order_index, end_index = fbt_env->end_index;
        ...
        /* order_index>0 表示跳过小核开始选核,并且任务是在等待I/O,只能选大核(6+2),或只能选中大核(4+3+1) */
        if (order_index > 0 && p->wts.iowaited) {
            stop_index = num_sched_clusters - 2; //0或1
            most_spare_wake_cap = LONG_MIN;
        }
    
        /* 
         * strict_max=is_rtg && BOOST_MAX, walt_get_indicies中对BOOST_MAX指定从大核开始选核了,这里stop_index=0表示只能选大核了,
         * 无论是6+2还是4+3+1架构
         */
        if (fbt_env->strict_max) {
            stop_index = 0;
            most_spare_wake_cap = LONG_MIN;
        }
        ...
        
        /* 慢速路径 */
        for (cluster = 0; cluster < num_sched_clusters; cluster++) {
            /*
             * 从任务p的亲和性cpu和此cluster的cpu的交集中选择。从order_index这个cluster开始选核,
             * 它不同,一级数组中的大小核排布就不同。
             */
            cpumask_and(&visit_cpus, &p->cpus_mask, &cpu_array[order_index][cluster]);
            for_each_cpu(i, &visit_cpus) {
                ... //选核逻辑,没有使用到相关成员
            }
    
            /* 
             * end_index 是结束选核的cluster的下标,也就是根据boost情况和算力,到end_index就应该结束选核了,
             * 并且此时已经选到了target_cpu,且 初始选核的cluster是大核簇 或 开始选核的这个cluster中不只有1
             * 个CPU 或 target_cpu不是开始选核的这个cluster的首个CPU, 就不再在下一个cluster中继续选核心了。
             */
            if ((cluster >= end_index) && (target_cpu != -1) && walt_target_ok(target_cpu, order_index))
                break;
    
            /* 找到了最大空余算力的cpu 且 达到了停止选核的stop_index阈值,退出继续遍历其它cluster进行选核的流程 */
            if (most_spare_cap_cpu != -1 && cluster >= stop_index)
                break;
        
        }
        ...
    done:
            /*
             * trace log:
             * cat-32758 [001] d..4  5235.529561: sched_find_best_target: pid=18090 comm=kworker/u16:25
             * start_cpu=0 best_idle=-1 most_spare_cap=-1 target=0 order_index=0 end_index=0 skip=-1 running=0
             */
            trace_sched_find_best_target(p, min_util, start_cpu, best_idle_cpu, most_spare_cap_cpu,
                     target_cpu, order_index, end_index, fbt_env->skip_cpu, p->state == TASK_RUNNING);
    }

    order_index 越大表示越从算力高的cluster开始选核,但是 end_index 需要参考我们写的应用程序配和来看。

  • 相关阅读:
    java内部私有类的构造函数
    java 日志
    java Random.nextInt()方法
    迭代器是快速失败的
    java Calendar
    java null?
    EclEmma
    Java泛型、泛型协变&&类型擦除
    java 声明实例化初始化三连
    写在Ruby之前。
  • 原文地址:https://www.cnblogs.com/hellokitty2/p/15438824.html
Copyright © 2020-2023  润新知