1.搭建环境
cd ~/Work/ wget https://www.kernel.org/pub/linux/kernel/v3.x/linux-3.18.6.tar.xz xz -d linux-3.18.6.tar.xz tar -xvf linux-3.18.6.tar cd linux-3.18.6 make i386_defconfig make cd ~/Work/ mkdir rootfs git clone https://github.com/mengning/menu.git # 话说这里为什么用MenuOS 我个人觉得老师一来是节约编译时间 二来也可以做做广告 cd menu sudo apt-get install libc6:i386 lib32stdc++6 # 这两行安装非常有必要 sudo apt-get install lib32readline-gplv2-dev # 在64bit的Ubuntu环境下不能编译这个MenuOS的roofs 需要这些包来支持 即使用了-m32 gcc -o init linktable.c menu.c test.c -m32 -static -lpthread cd ../rootfs cp ../menu/init ./ find . | cpio -o -Hnewc |gzip -9 > ../rootfs.img cd ~/Work/ qemu -kernel linux-3.18.6/arch/x86/boot/bzImage -initrd rootfs.img sudo apt-get install libncurses5-dev # 保证make menuconfig可用 make menuconfig kernel hacking-> copile-time checks and compile options [*] compile the kernel with debug info qemu -kernel linux-3.18.6/arch/x86/boot/bzImage -initrd rootfs.img -s -S # -S freeze CPU at startup (use ’c’ to start execution) # -s shorthand for -gdb tcp::1234 若不想使用1234端口,则可以使用-gdb tcp:xxxx来取代-s选项
然后打开另一个shell窗口
gdb (gdb)file linux-3.18.6/vmlinux # 在gdb界面中targe remote之前加载符号表 (gdb)target remote:1234 # 建立gdb和gdbserver之间的连接,按c 让qemu上的Linux继续运行 (gdb)break start_kernel # 断点的设置可以在target remote之前,也可以在之后
2.分析start_kernel
可以在http://codelab.shiyanlou.com/xref/linux-3.18.6/init/main.c找到start_kernel所在函数的源代码。
关注需要关注的,删除不必要的代码:
asmlinkage __visible void __init start_kernel(void) { //init_task即手工创建的PCB,0号进程即最终的idle进程 set_task_stack_end_magic(&init_task); //初始化中断向量 trap_init(); //内存管理模块 mm_init(); /* * Set up the scheduler prior starting any interrupts (such as the * timer interrupt). Full topology setup happens at smp_init() * time - but meanwhile we still have a functioning scheduler. */ sched_init(); //内核启动时一直存在,0号进程创建了1进程和其他服务的内核的进程 rest_init(); }
执行start_kernel时,会对CPU、内存等各种硬件设备进行初始化,并加载非常多的不同内核模块。
rest_init是linux内核初始化进程的函数。如果在它执行之前自行创建我们自己的进程,并且利用自己的调度算法来调度之后创建的进程,那么rest_init则永远不会被执行,因为在它执行之前,自己创建的进程已经在轮转调度不会结束(如mykernel环境的搭建)。
再看rest_init
static noinline void __init_refok rest_init(void) { int pid; rcu_scheduler_starting(); /* * We need to spawn init first so that it obtains pid 1, however * the init task will end up wanting to create kthreads, which, if * we schedule it before we create kthreadd, will OOPS. */ kernel_thread(kernel_init, NULL, CLONE_FS); //创建新进程 numa_default_policy(); //创建内核进程,管理系统资源 pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); rcu_read_lock(); kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); rcu_read_unlock(); complete(&kthreadd_done); /* * The boot idle thread must execute schedule() * at least once to get things moving: */ init_idle_bootup_task(current); schedule_preempt_disabled(); /* Call into cpu_idle with preempt disabled */ cpu_startup_entry(CPUHP_ONLINE); }
分析上述kernel_thread(kernel_init, NULL, CLONE_FS)中kernel_init部分代码
if (ramdisk_execute_command) { ret = run_init_process(ramdisk_execute_command); if (!ret) return 0; pr_err("Failed to execute %s (error %d) ",ramdisk_execute_command, ret); }
run_init_process是linux系统的1号进程,第一个用户态进程,默认在根目录下,如果根目录下没有继续找/sbin/init/,/bin/init/等来作为1号进程。
分析上述pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES)中kernel_thread代码
pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) { return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, (unsigned long)arg, NULL, NULL); }
kernel_thread实际上就是取fork一个线程
分析上述cpu_startup_entry(CPUHP_ONLINE)中cpu_idle_loop()
static void cpu_idle_loop(void) { while (1) { /* * If the arch has a polling bit, we maintain an invariant: * * Our polling bit is clear if we're not scheduled (i.e. if * rq->curr != rq->idle). This means that, if rq->idle has * the polling bit set, then setting need_resched is * guaranteed to cause the cpu to reschedule. */ __current_set_polling(); tick_nohz_idle_enter(); while (!need_resched()) { check_pgt_cache(); rmb(); if (cpu_is_offline(smp_processor_id())) arch_cpu_idle_dead(); local_irq_disable(); arch_cpu_idle_enter(); /* * In poll mode we reenable interrupts and spin. * * Also if we detected in the wakeup from idle * path that the tick broadcast device expired * for us, we don't want to go deep idle as we * know that the IPI is going to arrive right * away */ if (cpu_idle_force_poll || tick_check_broadcast_expired()) cpu_idle_poll(); else cpuidle_idle_call(); arch_cpu_idle_exit(); } /* * Since we fell out of the loop above, we know * TIF_NEED_RESCHED must be set, propagate it into * PREEMPT_NEED_RESCHED. * * This is required because for polling idle loops we will * not have had an IPI to fold the state for us. */ preempt_set_need_resched(); tick_nohz_idle_exit(); __current_clr_polling(); /* * We promise to call sched_ttwu_pending and reschedule * if need_resched is set while polling is set. That * means that clearing polling needs to be visible * before doing these things. */ smp_mb__after_atomic(); sched_ttwu_pending(); schedule_preempt_disabled(); } }
当系统没有进程需要执行时就调度到idle进程
3.回顾总结
rest_init当start_kernel启动时会一直存在(0号进程),再0号进程创建1号进程及其服务的内核进程。最后,会创建idle进程(0号进程),不能被调度,并利用循环来不断调号空闲的CPU时间片,并且从不返回。