• 调度器19—/proc/sched_debug文件 Hello


    一、打印说明

    1. 打印内容

    # cat /proc/sched_debug
    Sched Debug Version: v0.11, 5.10.xx-android12-x-xx-xxxxxxxxx #1
    ktime                                   : 20791668.206238
    sched_clk                               : 20791683.112454
    cpu_clk                                 : 20791683.112454
    jiffies                                 : 4300090214
    
    sysctl_sched
      .sysctl_sched_latency                    : 10.000000
      .sysctl_sched_min_granularity            : 3.000000
      .sysctl_sched_wakeup_granularity         : 2.000000
      .sysctl_sched_child_runs_first           : 0
      .sysctl_sched_features                   : 16722747 //使能了哪些调度feature,见features.h
      .sysctl_sched_tunable_scaling            : 0 (none)
    
    cpu#0
      .nr_running                    : 0
      .nr_switches                   : 515789
      .nr_uninterruptible            : -348
      .next_balance                  : 4300.090217
      .curr->pid                     : 0
      .clock                         : 20791690.941377
      .clock_task                    : 20393319.574563
      .avg_idle                      : 681678
      .max_idle_balance_cost         : 347294
      .yld_count                     : 45325
      .sched_count                   : 558874
      .sched_goidle                  : 174803
      .ttwu_count                    : 2261133
      .ttwu_local                    : 155736
    
    cfs_rq[0]:/
      .exec_clock                    : 150058.081435
      .MIN_vruntime                  : 0.000001
      .min_vruntime                  : 1032733.837701 //最小虚拟时间
      .max_vruntime                  : 0.000001
      .spread                        : 0.000000
      .spread0                       : 0.000000
      .nr_spread_over                : 7046
      .nr_running                    : 0
      .load                          : 0 //负载信息
      .load_avg                      : 0
      .runnable_avg                  : 0
      .util_avg                      : 0
      .util_est_enqueued             : 0
      .removed.load_avg              : 0
      .removed.util_avg              : 0
      .removed.runnable_avg          : 0
      .tg_load_avg_contrib           : 0
      .tg_load_avg                   : 0
    
    rt_rq[0]:
      .rt_nr_running                 : 0
      .rt_nr_migratory               : 0
      .rt_throttled                  : 0
      .rt_time                       : 3.853386
      .rt_runtime                    : 950.000000
    
    dl_rq[0]:
      .dl_nr_running                 : 0
      .dl_nr_migratory               : 0
      .dl_bw->bw                     : 996147
      .dl_bw->total_bw               : 0
    
    runnable tasks:
     S            task   PID         tree-key  switches  prio     wait-time             sum-exec        sum-sleep
    -------------------------------------------------------------------------------------------------------------
     I      rcu_par_gp     4         8.725293         2   100         0.000000         0.009155         0.000000 /
     D     hang_detect   152         0.000000       675     0         0.000000        87.342714         0.000000 /
    >R   Binder:1061_1 17584      1122.927614       598   120       189.109238      1271.457995       183.664618 /foreground
     S    Binder:799_2   844        20.058758         2   120         0.950462         0.123000         0.000000 /foreground
     S HwBinder:1154_1  1722      1420.876848        11   120         5.680075         1.564693         2.107003 /top-app
     S   Binder:3472_3  3539    555381.752165        65   120        24.436231        92.525768  20237325.907593 /background
     ...
     S irq/520-event_0   156         0.000000         4    49         0.000000         0.307768         0.000000 /
    
    
    //每个cpu的都进行打印,这里只保留cpu0的

    2. header部分打印函数

    //kernel/sched/debug.c
    static void sched_debug_header(struct seq_file *m)
    {
        u64 ktime, sched_clk, cpu_clk;
        unsigned long flags;
    
        local_irq_save(flags);
        ktime = ktime_to_ns(ktime_get());
        sched_clk = sched_clock();
        cpu_clk = local_clock();
        local_irq_restore(flags);
    
        SEQ_printf(m, "Sched Debug Version: v0.11, %s %.*s\n",
            init_utsname()->release,
            (int)strcspn(init_utsname()->version, " "),
            init_utsname()->version);
    
    #define P(x) \
        SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x))
    #define PN(x) \
        SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
        PN(ktime);
        PN(sched_clk);
        PN(cpu_clk);
        P(jiffies);
    #undef PN
    #undef P
    
        SEQ_printf(m, "\n");
        SEQ_printf(m, "sysctl_sched\n");
    
    #define P(x) \
        SEQ_printf(m, "  .%-40s: %Ld\n", #x, (long long)(x))
    #define PN(x) \
        SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
        PN(sysctl_sched_latency);
        PN(sysctl_sched_min_granularity);
        PN(sysctl_sched_wakeup_granularity);
        P(sysctl_sched_child_runs_first);
        P(sysctl_sched_features);
    #undef PN
    #undef P
    
        SEQ_printf(m, "  .%-40s: %d (%s)\n",
            "sysctl_sched_tunable_scaling",
            sysctl_sched_tunable_scaling,
            sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
        SEQ_printf(m, "\n");
    }
    
    static int sched_debug_show(struct seq_file *m, void *v)
    {
        int cpu = (unsigned long)(v - 2);
    
        if (cpu != -1)
            print_cpu(m, cpu);
        else
            sched_debug_header(m);
    
        return 0;
    }

    3. cpu#0 下的打印

    static void print_cpu(struct seq_file *m, int cpu)
    {
        struct rq *rq = cpu_rq(cpu);
    
        SEQ_printf(m, "cpu#%d\n", cpu);
    
    #define P(x)                                \
    do {                                    \
        if (sizeof(rq->x) == 4)                        \
            SEQ_printf(m, "  .%-30s: %ld\n", #x, (long)(rq->x));    \
        else                                \
            SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rq->x));\
    } while (0)
    
    #define PN(x) \
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
    
        P(nr_running);
        P(nr_switches);
        P(nr_uninterruptible); //以long类型打印unsinged long
        PN(next_balance);
        SEQ_printf(m, "  .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr)));
        PN(clock);
        PN(clock_task);
    #undef P
    #undef PN
    
    #ifdef CONFIG_SMP
    #define P64(n) SEQ_printf(m, "  .%-30s: %Ld\n", #n, rq->n);
        P64(avg_idle);
        P64(max_idle_balance_cost);
    #undef P64
    #endif
    
    #define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, schedstat_val(rq->n));
        if (schedstat_enabled()) {
            P(yld_count);
            P(sched_count);
            P(sched_goidle);
            P(ttwu_count);
            P(ttwu_local);
        }
    #undef P
    
        /*下面分别是"cfs_rq[0]:"、"rt_rq[0]:"、"dl_rq[0]:"下的打印*/
        print_cfs_stats(m, cpu);
        print_rt_stats(m, cpu);
        print_dl_stats(m, cpu);
    
        print_rq(m, rq, cpu);
        SEQ_printf(m, "\n");
    }

    4. cfs_rq[0]: 下的打印

    void print_cfs_stats(struct seq_file *m, int cpu)
    {
        struct cfs_rq *cfs_rq, *pos;
    
        rcu_read_lock();
        //对于rq->leaf_cfs_rq_list上的每一个叶子cfs_rq都调用,若没有使能组调度,就只打印 rq->cfs_rq
        for_each_leaf_cfs_rq_safe(cpu_rq(cpu), cfs_rq, pos)
            print_cfs_rq(m, cpu, cfs_rq);
        rcu_read_unlock();
    }

    如果需要 CFS 支持组调度管理,那得把所有 CFS 加入到一个链表当中,leaf_cfs_rq_list 成员就是负责把本 CPU 下的就绪队列中各个 CFS 子队列关联起来。并且在 cfs_rq 里面有成员 on_list,其表示当前的 CFS 队列是通过 leaf_cfs_rq_list 成员挂载在 rq->leaf_cfs_rq_list 链表中的。

    void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
    {
        s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, spread, rq0_min_vruntime, spread0;
        struct rq *rq = cpu_rq(cpu);
        struct sched_entity *last;
        unsigned long flags;
    
    #ifdef CONFIG_FAIR_GROUP_SCHED
        SEQ_printf(m, "\n");
        SEQ_printf_task_group_path(m, cfs_rq->tg, "cfs_rq[%d]:%s\n", cpu);
    #else
        SEQ_printf(m, "\n");
        SEQ_printf(m, "cfs_rq[%d]:\n", cpu);
    #endif
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock", SPLIT_NS(cfs_rq->exec_clock)); //格式:ms.ns
    
        raw_spin_lock_irqsave(&rq->lock, flags);
        if (rb_first_cached(&cfs_rq->tasks_timeline))
            MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
        last = __pick_last_entity(cfs_rq);
        if (last)
            max_vruntime = last->vruntime;
        min_vruntime = cfs_rq->min_vruntime;
        rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
        raw_spin_unlock_irqrestore(&rq->lock, flags);
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "MIN_vruntime", SPLIT_NS(MIN_vruntime));
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "min_vruntime", SPLIT_NS(min_vruntime));
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "max_vruntime", SPLIT_NS(max_vruntime));
        spread = max_vruntime - MIN_vruntime;
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread));
        spread0 = min_vruntime - rq0_min_vruntime;
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0", SPLIT_NS(spread0));
        SEQ_printf(m, "  .%-30s: %d\n", "nr_spread_over", cfs_rq->nr_spread_over);
        SEQ_printf(m, "  .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
        SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
    #ifdef CONFIG_SMP
        SEQ_printf(m, "  .%-30s: %lu\n", "load_avg", cfs_rq->avg.load_avg);
        SEQ_printf(m, "  .%-30s: %lu\n", "runnable_avg", cfs_rq->avg.runnable_avg);
        SEQ_printf(m, "  .%-30s: %lu\n", "util_avg", cfs_rq->avg.util_avg);
        SEQ_printf(m, "  .%-30s: %u\n", "util_est_enqueued", cfs_rq->avg.util_est.enqueued);
        SEQ_printf(m, "  .%-30s: %ld\n", "removed.load_avg", cfs_rq->removed.load_avg);
        SEQ_printf(m, "  .%-30s: %ld\n", "removed.util_avg", cfs_rq->removed.util_avg);
        SEQ_printf(m, "  .%-30s: %ld\n", "removed.runnable_avg", cfs_rq->removed.runnable_avg);
    #ifdef CONFIG_FAIR_GROUP_SCHED
        SEQ_printf(m, "  .%-30s: %lu\n", "tg_load_avg_contrib", cfs_rq->tg_load_avg_contrib);
        SEQ_printf(m, "  .%-30s: %ld\n", "tg_load_avg", atomic_long_read(&cfs_rq->tg->load_avg));
    #endif
    #endif
    #ifdef CONFIG_CFS_BANDWIDTH
        SEQ_printf(m, "  .%-30s: %d\n", "throttled", cfs_rq->throttled);
        SEQ_printf(m, "  .%-30s: %d\n", "throttle_count", cfs_rq->throttle_count);
    #endif
    
    #ifdef CONFIG_FAIR_GROUP_SCHED
        print_cfs_group_stats(m, cpu, cfs_rq->tg); //task_group 在此cpu上对应的se
    #endif
    }
    
    static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
    {
        struct sched_entity *se = tg->se[cpu]; //task_group 在此cpu上对应的se
    
    #define P(F)        SEQ_printf(m, "  .%-30s: %lld\n",    #F, (long long)F)
    #define P_SCHEDSTAT(F)    SEQ_printf(m, "  .%-30s: %lld\n",    #F, (long long)schedstat_val(F))
    #define PN(F)        SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
    #define PN_SCHEDSTAT(F)    SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
    
        if (!se)
            return;
    
        PN(se->exec_start);
        PN(se->vruntime);
        PN(se->sum_exec_runtime);
    
        if (schedstat_enabled()) {
            PN_SCHEDSTAT(se->statistics.wait_start);
            PN_SCHEDSTAT(se->statistics.sleep_start);
            PN_SCHEDSTAT(se->statistics.block_start);
            PN_SCHEDSTAT(se->statistics.sleep_max);
            PN_SCHEDSTAT(se->statistics.block_max);
            PN_SCHEDSTAT(se->statistics.exec_max);
            PN_SCHEDSTAT(se->statistics.slice_max);
            PN_SCHEDSTAT(se->statistics.wait_max);
            PN_SCHEDSTAT(se->statistics.wait_sum);
            P_SCHEDSTAT(se->statistics.wait_count);
        }
    
        P(se->load.weight);
    #ifdef CONFIG_SMP
        P(se->avg.load_avg);
        P(se->avg.util_avg);
        P(se->avg.runnable_avg);
    #endif
    
    #undef PN_SCHEDSTAT
    #undef PN
    #undef P_SCHEDSTAT
    #undef P
    }

    5. rt_rq[0] 下的打印

    void print_rt_stats(struct seq_file *m, int cpu)
    {
        rt_rq_iter_t iter;
        struct rt_rq *rt_rq;
    
        rcu_read_lock();
        for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
            print_rt_rq(m, cpu, rt_rq);
        rcu_read_unlock();
    }
    
    void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
    {
        SEQ_printf(m, "\n");
        SEQ_printf(m, "rt_rq[%d]:\n", cpu);
    
    #define P(x) \
        SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
    #define PU(x) \
        SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x))
    #define PN(x) \
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
    
        PU(rt_nr_running);
    #ifdef CONFIG_SMP
        PU(rt_nr_migratory);
    #endif
        P(rt_throttled);
        PN(rt_time);
        PN(rt_runtime);
    
    #undef PN
    #undef PU
    #undef P
    }

    原生内核使能了 CONFIG_FAIR_GROUP_SCHED,却没有使能 CONFIG_RT_GROUP_SCHED

    6. dl_rq[0] 下的打印

    void print_dl_stats(struct seq_file *m, int cpu)
    {
        print_dl_rq(m, cpu, &cpu_rq(cpu)->dl);
    }
    
    void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
    {
        struct dl_bw *dl_bw;
    
        SEQ_printf(m, "\n");
        SEQ_printf(m, "dl_rq[%d]:\n", cpu);
    
    #define PU(x) \
        SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x))
    
        PU(dl_nr_running);
    
        PU(dl_nr_migratory);
        dl_bw = &cpu_rq(cpu)->rd->dl_bw;
        SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw);
        SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw);
    
    #undef PU
    }

    7. runnable tasks: 下的打印

    static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
    {
        struct task_struct *g, *p;
    
        SEQ_printf(m, "\n");
        SEQ_printf(m, "runnable tasks:\n");
        SEQ_printf(m, " S            task   PID         tree-key  switches  prio"
               "     wait-time             sum-exec        sum-sleep\n");
        SEQ_printf(m, "-------------------------------------------------------"
               "------------------------------------------------------\n");
    
        rcu_read_lock();
        for_each_process_thread(g, p) {
            if (task_cpu(p) != rq_cpu) //对于 task_cpu(p) == rq_cpu 的每一个线程都打印
                continue;
    
            print_task(m, rq, p);
        }
        rcu_read_unlock();
    }
    
    
    static void print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
    {
        if (rq->curr == p)
            SEQ_printf(m, ">R");
        else
            SEQ_printf(m, " %c", task_state_to_char(p)); //此CPU上的所有任务,包括睡眠的
    
        SEQ_printf(m, " %15s %5d %9Ld.%06ld %9Ld %5d ",
            p->comm, task_pid_nr(p),
            SPLIT_NS(p->se.vruntime), //格式: ms.ns
            (long long)(p->nvcsw + p->nivcsw), //主动放弃cpu+被抢占
            p->prio);
    
        SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
            SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)), //总等待时间
            SPLIT_NS(p->se.sum_exec_runtime), //总执行时间
            SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime))); //纯休眠时间(sleep+D)
    
    #ifdef CONFIG_CGROUP_SCHED
        SEQ_printf_task_group_path(m, task_group(p), " %s") //task的cgroup分组
    #endif
    
        SEQ_printf(m, "\n");
    }
  • 相关阅读:
    linux下svn命令大全
    php常用函数
    在centos上设置计划任务
    sphinx使用心得
    sphinx2.8.8的配置文件
    Mac使用
    sftp
    uwp应用在debug模式下运行正常,编译为release版本的时候抛出异常
    win10 uwp 读取resw资源文件
    dll被设置为用记事本打开的解决方法
  • 原文地址:https://www.cnblogs.com/hellokitty2/p/15664139.html
Copyright © 2020-2023  润新知