文件:kthread.c
1 /** 2 * kthread_create - create a kthread. 3 * @threadfn: the function to run until signal_pending(current). 4 * @data: data ptr for @threadfn. 5 * @namefmt: printf-style name for the thread. 6 * 7 * Description: This helper function creates and names a kernel 8 * thread. The thread will be stopped: use wake_up_process() to start 9 * it. See also kthread_run(), kthread_create_on_cpu(). 10 *这里面给出了如何运行一个内核线程的流程。
参数说明:threadfn是指线程回调函数,data为回调函数的参数,namefmt为线程名字 11 * When woken, the thread will run @threadfn() with @data as its argument.
12 @threadfn() can either call do_exit() directly if it is a standalone thread for which noone will call kthread_stop(), or 14 * return when 'kthread_should_stop()' is true (which means kthread_stop() has been called).
The return value should be zero or a negative error number; it will be passed to kthread_stop(). 17 * 18 * Returns a task_struct or ERR_PTR(-ENOMEM). 19 */ 20 struct task_struct *kthread_create(int (*threadfn)(void *data), 21 void *data, 22 const char namefmt[], 23 ...) 24 { 25 struct kthread_create_info create; 26 //初始化线程创建描述符 27 create.threadfn = threadfn; 28 create.data = data;
/*kthread_create采用了完成量机制,可以查阅等待量机制的原理*/ 29 init_completion(&create.done); 30 31 spin_lock(&kthread_create_lock); 32 list_add_tail(&create.list, &kthread_create_list); //将线程队列加入到全局线程创建队列中
/*注意这个全局链表kthread_create_list, 所用通过kthread_create创建的内核线程都会挂在这*/ 33 spin_unlock(&kthread_create_lock);
34 /*这是最重要的地方,从代码看是唤醒了kthreadd_task这个进程,如果对代码比较熟悉的话,就会想到这是内核中 的1号进程kthreadd*/ 35 wake_up_process(kthreadd_task);
/*当前进程在完成量上睡眠等待*/ 36 wait_for_completion(&create.done); 37 38 if (!IS_ERR(create.result)) { 39 struct sched_param param = { .sched_priority = 0 }; 40 va_list args; 41 42 va_start(args, namefmt); 43 vsnprintf(create.result->comm, sizeof(create.result->comm), 44 namefmt, args); 45 va_end(args); 46 /*设置线程属性,包括调度策略, 47 * root may have changed our (kthreadd's) priority or CPU mask. 48 * The kernel thread should not inherit these properties. 49 */ 50 sched_setscheduler_nocheck(create.result, SCHED_NORMAL, ¶m); 51 set_user_nice(create.result, KTHREAD_NICE_LEVEL);
//设置调度属性,具体原理参考http://www.cnblogs.com/papam/archive/2009/08/27/1555353.html 52 set_cpus_allowed_ptr(create.result, cpu_all_mask);
在多核架构的内核中,对于每个cpu,都有一个struct rq* ptr静态结构体指针变量对应,但是通过传统方法无法得到该静态指针变量的值,通过阅读/kernel/sched.c函数,在导出函数set_cpus_allowed_ptr中使用过该指针变量,具体参考:http://wanderer-zjhit.blogbus.com/logs/186876356.html 53 } 54 return create.result; 55 } 56 EXPORT_SYMBOL(kthread_create);
首先分析一下重要的一个函数:
1 int wake_up_process(struct task_struct *p) 2 { 3 return try_to_wake_up(p, TASK_ALL, 0); 4 }
1 /*** 2 * try_to_wake_up - wake up a thread 3 * @p: the to-be-woken-up thread 4 * @state: the mask of task states that can be woken 5 * @sync: do a synchronous wakeup? 6 *看这里的解释,让唤醒的进程进入运行队列, 7 * Put it on the run-queue if it's not already there. The "current" 8 * thread is always on the run-queue (except when the actual 9 * re-schedule is in progress), and as such you're allowed to do 10 * the simpler "current->state = TASK_RUNNING" to mark yourself 11 * runnable without the overhead of this. 12 * 13 * returns failure only if the task is already active. 14 */ 15 static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) 16 { 17 int cpu, orig_cpu, this_cpu, success = 0; 18 unsigned long flags; 19 long old_state; 20 struct rq *rq; 21 22 if (!sched_feat(SYNC_WAKEUPS)) 23 sync = 0; 24 25 #ifdef CONFIG_SMP 26 if (sched_feat(LB_WAKEUP_UPDATE) && !root_task_group_empty()) { 27 struct sched_domain *sd; 28 29 this_cpu = raw_smp_processor_id(); 30 cpu = task_cpu(p); 31 32 for_each_domain(this_cpu, sd) { 33 if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { 34 update_shares(sd); 35 break; 36 } 37 } 38 } 39 #endif 40 41 smp_wmb();
1. 关闭本地中断并给本地可执行队列rq加锁 42 rq = task_rq_lock(p, &flags); 43 update_rq_clock(rq);
2. 如果当前进程状态p->state不在要唤醒的进程状态集中,则不能唤醒该进程 44 old_state = p->state; 45 if (!(old_state & state)) 46 goto out; 47 3. 如果当前进程本身就在可执行队列中,则无需唤醒本进程 48 if (p->se.on_rq) 49 goto out_running; 50 4. task_cpu(p)返回当前进程p所使用的CPU编号(p所归属的runqueue所在的CPU编号) 51 cpu = task_cpu(p); 52 orig_cpu = cpu; 53 this_cpu = smp_processor_id(); 54 55 #ifdef CONFIG_SMP 56 if (unlikely(task_running(rq, p))) 57 goto out_activate; 58 59 cpu = p->sched_class->select_task_rq(p, sync); 60 if (cpu != orig_cpu) { 61 set_task_cpu(p, cpu); 62 task_rq_unlock(rq, &flags); 63 /* might preempt at this point */ 64 rq = task_rq_lock(p, &flags); 65 old_state = p->state; 66 if (!(old_state & state)) 67 goto out; 68 if (p->se.on_rq) 69 goto out_running; 70 71 this_cpu = smp_processor_id(); 72 cpu = task_cpu(p); 73 } 74 75 #ifdef CONFIG_SCHEDSTATS 76 schedstat_inc(rq, ttwu_count); 77 if (cpu == this_cpu) 78 schedstat_inc(rq, ttwu_local); 79 else { 80 struct sched_domain *sd; 81 for_each_domain(this_cpu, sd) { 82 if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { 83 schedstat_inc(sd, ttwu_wake_remote); 84 break; 85 } 86 } 87 } 88 #endif /* CONFIG_SCHEDSTATS */ 89 90 out_activate: 91 #endif /* CONFIG_SMP */ 92 schedstat_inc(p, se.nr_wakeups); 93 if (sync) 94 schedstat_inc(p, se.nr_wakeups_sync); 95 if (orig_cpu != cpu) 96 schedstat_inc(p, se.nr_wakeups_migrate); 97 if (cpu == this_cpu) 98 schedstat_inc(p, se.nr_wakeups_local); 99 else 100 schedstat_inc(p, se.nr_wakeups_remote);
更新唤醒进程p的平均睡眠时间sleep_avg和动态优先级prio;记录该进程唤醒前的睡眠状态;将该进程插入活跃优先级数组 101 activate_task(rq, p, 1); 102 success = 1; 103 104 /* 105 * Only attribute actual wakeups done by this task. 106 */ 107 if (!in_interrupt()) { 108 struct sched_entity *se = ¤t->se; 109 u64 sample = se->sum_exec_runtime; 110 111 if (se->last_wakeup) 112 sample -= se->last_wakeup; 113 else 114 sample -= se->start_runtime; 115 update_avg(&se->avg_wakeup, sample); 116 117 se->last_wakeup = se->sum_exec_runtime; 118 } 119 如果唤醒进程p的动态优先级prio比当前进程current的动态优先级高则当前进程的TIF_NEED_RESCHED就需要设置 120 out_running: 121 trace_sched_wakeup(rq, p, success); 122 check_preempt_curr(rq, p, sync); 123 124 p->state = TASK_RUNNING; 125 #ifdef CONFIG_SMP 126 if (p->sched_class->task_wake_up) 127 p->sched_class->task_wake_up(rq, p); 128 #endif 129 out: 130 task_rq_unlock(rq, &flags); 131 132 return success; 133 }
由于电池问题,暂时分析到这里,有几个问题,目前需要弄清楚:
1)线程如何进行管理的?
2)kthreadd_task,kthread_create_list具体的作用是什么?
kthread_create_list这个队列唯一被调用的地方,是在int kthreadd(void *unused)函数中。
1 int kthreadd(void *unused) 2 { 3 struct task_struct *tsk = current; 4 5 /* Setup a clean context for our children to inherit. */ 6 set_task_comm(tsk, "kthreadd"); 7 ignore_signals(tsk); 8 set_user_nice(tsk, KTHREAD_NICE_LEVEL); 9 set_cpus_allowed_ptr(tsk, cpu_all_mask); 10 set_mems_allowed(node_possible_map); 11 12 current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; 13 14 for (;;) { 15 set_current_state(TASK_INTERRUPTIBLE); 16 if (list_empty(&kthread_create_list)) //如果队列是空,进行调度 17 schedule(); 18 __set_current_state(TASK_RUNNING); //设置当前状态为运行状态 19 20 spin_lock(&kthread_create_lock); 21 while (!list_empty(&kthread_create_list)) { 22 struct kthread_create_info *create; 23 24 create = list_entry(kthread_create_list.next, 25 struct kthread_create_info, list); //从list中取出需要创建的线程描述符 26 list_del_init(&create->list); 27 spin_unlock(&kthread_create_lock); 28 29 create_kthread(create); 30 31 spin_lock(&kthread_create_lock); 32 } 33 spin_unlock(&kthread_create_lock); 34 } 35 36 return 0; 37 }
1 static void create_kthread(struct kthread_create_info *create) 2 { 3 int pid; 4 5 /* We want our own signal handler (we take no signals by default). */ 6 pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); //创建一个新的进程。 7 if (pid < 0) { 8 create->result = ERR_PTR(pid); 9 complete(&create->done); 10 } 11 }
而对ktheadd调用的地方为:
1 static noinline void __init_refok rest_init(void) 2 __releases(kernel_lock) 3 { 4 int pid; 5 6 kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND); 7 numa_default_policy(); 8 pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); //线程添加函数,在系统初始化的时候开启 9 kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); 10 unlock_kernel();
从以上的分析可以看到,在系统启动的时候,创建一个进程,运行kthreadd函数,而ktheadd函数循环判断线程队列是否为空,如果为空则调度出去,否则取出新添加的线程描述符,创建一个新的线程(进程);这个也就是kthread_create_list的作用。
再就是kthreadd_task这个变量。初始化在__init_refok rest_init(void)函数中,通过函数名可以看出来,task作用是通过pid找到任务。内核中调用kthread_task总共三处,一处是在初始化的时候,另外两处是:static void reparent_to_kthreadd(void),kthread_create();
线程创建时通过wake_up_process(kthreadd_task);唤醒ktheadd_task,然后将线程描述符添加到进程管理中,而在reparent_to_kthreadd中则是对当前进程进行设置,
current->real_parent = current->parent = kthreadd_task;
线程创建的过程就是以上的过程,剩下的就是对进程管理进行分析的。