1 优先级的内核表示
内核使用 0 - 139 表示内部优先级,值越低,优先级越高.0 -99 实时进程使用 nice 值 [-20,19]映射到范围100 - 139,如下图
内核定义了一系列宏来辅助优先级之间的转换
sched.h
1 /* 2 * Priority of a process goes from 0..MAX_PRIO-1, valid RT 3 * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH 4 * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority 5 * values are inverted: lower p->prio value means higher priority. 6 * 7 * The MAX_USER_RT_PRIO value allows the actual maximum 8 * RT priority to be separate from the value exported to 9 * user-space. This allows kernel threads to set their 10 * priority to a value higher than any user task. Note: 11 * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. 12 */ 13 14 #define MAX_USER_RT_PRIO 100 15 #define MAX_RT_PRIO MAX_USER_RT_PRIO 16 17 #define MAX_PRIO (MAX_RT_PRIO + 40) 18 #define DEFAULT_PRIO (MAX_RT_PRIO + 20)
sched.c
1 /* 2 * Convert user-nice values [ -20 ... 0 ... 19 ] 3 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], 4 * and back. 5 */ 6 #define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20) 7 #define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20) 8 #define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio)
2 优先级计算
动态优先级 task_struct->prio
普通优先级 task_struct->normal_prio
静态优先级 task_struct->static_prio (计算起点,已经设置好)
sched.c
1 /* 2 * Calculate the current priority, i.e. the priority 3 * taken into account by the scheduler. This value might 4 * be boosted by RT tasks, or might be boosted by 5 * interactivity modifiers. Will be RT if the task got 6 * RT-boosted. If not then it returns p->normal_prio. 7 */ 8 static int effective_prio(struct task_struct *p) 9 { 10 p->normal_prio = normal_prio(p); 11 /* 12 * If we are RT tasks or we were boosted to RT priority, 13 * keep the priority unchanged. Otherwise, update priority 14 * to the normal priority: 15 */ 16 if (!rt_prio(p->prio)) 17 return p->normal_prio; 18 return p->prio; 19 }
rt_prio检测普通优先级是否在实时范围中
1 static inline int rt_prio(int prio) 2 { 3 if (unlikely(prio < MAX_RT_PRIO)) 4 return 1; 5 return 0; 6 }
普通优先级计算分为 普通进程 和 实时进程 ,普通进程用__normal_prio,实时进程需要rt_priority设置,rt_priority越高,表示优先级越高的实时进程,内核正好相反,因此内核用
MAX_RT_PRIO-1 - p->rt_priority 计算
/* * __normal_prio - return the priority that is based on the static prio */ static inline int __normal_prio(struct task_struct *p) { return p->static_prio; } /* * Calculate the expected normal priority: i.e. priority * without taking RT-inheritance into account. Might be * boosted by interactivity modifiers. Changes upon fork, * setprio syscalls, and whenever the interactivity * estimator recalculates. */ static inline int normal_prio(struct task_struct *p) { int prio; if (task_has_rt_policy(p)) prio = MAX_RT_PRIO-1 - p->rt_priority; else prio = __normal_prio(p); return prio; }
下图描述了不同类型上述计算结果
注意以下两点:
- 新建进程用wake_up_new_task唤醒,或使用nice 系统调用改变静态优先级,使用上述方法计算nice
- 进程分支出子进程,子进程静态优先级继承父进程,子进程的动态优先级,子进程的动态优先级(prio)设置为父进程的普通优先级.
3 计算负载权重
set_load_weight负责根据进程类型及静态优先级计算负载权重
sched.h
1 struct load_weight { 2 unsigned long weight, inv_weight; 3 };
一般来说 降低一个 nice值,多获得10% CPU,反之也一样,为了执行该策略,内核将优先级转换为权重,如下
1 /* 2 * Nice levels are multiplicative, with a gentle 10% change for every 3 * nice level changed. I.e. when a CPU-bound task goes from nice 0 to 4 * nice 1, it will get ~10% less CPU time than another CPU-bound task 5 * that remained on nice 0. 6 * 7 * The "10% effect" is relative and cumulative: from _any_ nice level, 8 * if you go up 1 level, it's -10% CPU usage, if you go down 1 level 9 * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25. 10 * If a task goes up by ~10% and another task goes down by ~10% then 11 * the relative distance between them is ~25%.) 12 */ 13 static const int prio_to_weight[40] = { 14 /* -20 */ 88761, 71755, 56483, 46273, 36291, 15 /* -15 */ 29154, 23254, 18705, 14949, 11916, 16 /* -10 */ 9548, 7620, 6100, 4904, 3906, 17 /* -5 */ 3121, 2501, 1991, 1586, 1277, 18 /* 0 */ 1024, 820, 655, 526, 423, 19 /* 5 */ 335, 272, 215, 172, 137, 20 /* 10 */ 110, 87, 70, 56, 45, 21 /* 15 */ 36, 29, 23, 18, 15, 22 };
具体转换代码如下,实时进程的权重是普通进程的2倍,SCHED_IDLE进程权重很小
1 #define WEIGHT_IDLEPRIO 3 2 #define WMULT_IDLEPRIO 1431655765
1 static void set_load_weight(struct task_struct *p) 2 { 3 if (task_has_rt_policy(p)) { 4 p->se.load.weight = prio_to_weight[0] * 2; 5 p->se.load.inv_weight = prio_to_wmult[0] >> 1; 6 return; 7 } 8 9 /* 10 * SCHED_IDLE tasks get minimal weight: 11 */ 12 if (p->policy == SCHED_IDLE) { 13 p->se.load.weight = WEIGHT_IDLEPRIO; 14 p->se.load.inv_weight = WMULT_IDLEPRIO; 15 return; 16 } 17 18 p->se.load.weight = prio_to_weight[p->static_prio - MAX_RT_PRIO]; 19 p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO]; 20 }
进程队列也有一个负载权重,每次进程倍加入到内核队列的时候,会调用inc_nr_running,这样可以确保就绪队列跟踪记录有多少进程在运行,而且还将进程的权重添加到就绪队列的权重里面,从就绪队列移除时候也会调用对应的函数
1 /* 2 * Update delta_exec, delta_fair fields for rq. 3 * 4 * delta_fair clock advances at a rate inversely proportional to 5 * total load (rq->load.weight) on the runqueue, while 6 * delta_exec advances at the same rate as wall-clock (provided 7 * cpu is not idle). 8 * 9 * delta_exec / delta_fair is a measure of the (smoothened) load on this 10 * runqueue over any given interval. This (smoothened) load is used 11 * during load balance. 12 * 13 * This function is called /before/ updating rq->load 14 * and when switching tasks. 15 */ 16 static inline void inc_load(struct rq *rq, const struct task_struct *p) 17 { 18 update_load_add(&rq->load, p->se.load.weight); 19 } 20 21 static inline void dec_load(struct rq *rq, const struct task_struct *p) 22 { 23 update_load_sub(&rq->load, p->se.load.weight); 24 } 25 26 static void inc_nr_running(struct task_struct *p, struct rq *rq) 27 { 28 rq->nr_running++; 29 inc_load(rq, p); 30 }