sched_class
在 Linux 中有多种不同的调度策略,每一种调度策略都由不同的调度器类实现,在 sched_class 中定义了调度器需要实现的接口。
struct sched_class { const struct sched_class *next; void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); //入队 void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); //出队 void (*yield_task) (struct rq *rq); bool (*yield_to_task)(struct rq *rq, struct task_struct *p, bool preempt); void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags); /* * It is the responsibility of the pick_next_task() method that will * return the next task to call put_prev_task() on the @prev task or * something equivalent. * * May return RETRY_TASK when it finds a higher prio class has runnable * tasks. */ struct task_struct * (*pick_next_task)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf); void (*put_prev_task)(struct rq *rq, struct task_struct *p); #ifdef CONFIG_SMP int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags); void (*migrate_task_rq)(struct task_struct *p, int new_cpu); void (*task_woken)(struct rq *this_rq, struct task_struct *task); void (*set_cpus_allowed)(struct task_struct *p, const struct cpumask *newmask); void (*rq_online)(struct rq *rq); void (*rq_offline)(struct rq *rq); #endif void (*set_curr_task)(struct rq *rq); void (*task_tick)(struct rq *rq, struct task_struct *p, int queued); void (*task_fork)(struct task_struct *p); void (*task_dead)(struct task_struct *p); /* * The switched_from() call is allowed to drop rq->lock, therefore we * cannot assume the switched_from/switched_to pair is serliazed by * rq->lock. They are however serialized by p->pi_lock. */ void (*switched_from)(struct rq *this_rq, struct task_struct *task); void (*switched_to) (struct rq *this_rq, struct task_struct *task); void (*prio_changed) (struct rq *this_rq, struct task_struct *task, int oldprio); unsigned int (*get_rr_interval)(struct rq *rq, struct task_struct *task); void (*update_curr)(struct rq *rq); #define TASK_SET_GROUP 0 #define TASK_MOVE_GROUP 1 #ifdef CONFIG_FAIR_GROUP_SCHED void (*task_change_group)(struct task_struct *p, int type); #endif };
上面涉及到的 rq 就是 runqueue,每个 cpu 都对应着一个 runqueue,用来维护在该 cpu 上运行的进程。如果我们去看 struct rq 的定义,可以看到它包含了 cfs_rq, rt_rq 和 dl_rq 这三种不同的 rq 供调度器使用。
如果我们现在 fork() 了一个进程,就需要将它加入到 cfs_rq 队列中,此时 enqueue_task 运行 cfs 调度策略的入队方法,也就是在红黑树中插入节点。
struct rq { ... struct cfs_rq cfs; struct rt_rq rt; struct dl_rq dl; ... };
sched_entity
由于调度往往还需要一些额外的信息,所以在 Linux 中定义了调度实体类。
struct sched_entity { /* For load-balancing: */ struct load_weight load; unsigned long runnable_weight; struct rb_node run_node; struct list_head group_node; unsigned int on_rq; u64 exec_start; u64 sum_exec_runtime; u64 vruntime; u64 prev_sum_exec_runtime; u64 nr_migrations; struct sched_statistics statistics; #ifdef CONFIG_FAIR_GROUP_SCHED int depth; struct sched_entity *parent; /* rq on which this entity is (to be) queued: */ struct cfs_rq *cfs_rq; /* rq "owned" by this entity/group: */ struct cfs_rq *my_q; #endif
};
我们还可以看到,在一个进程描述符 task_struct 中是包含多个调度实体的,sched_class 指针指向其对应的调度器,然后调度器再调度相应的调度实体。
struct task_struct { ... int on_rq; //是否在就绪队列上 int prio; int static_prio; int normal_prio; unsigned int rt_priority; const struct sched_class *sched_class; //调度器 struct sched_entity se; //cfs调度实体 struct sched_rt_entity rt; //real_time调度实体 #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; #endif struct sched_dl_entity dl; //deadline调度实体 ... };