转自:http://www.formosaos.url.tw/linux/kinit.html
從核心程式碼預覽,可以知道核心的檔案和目錄都非常的多,不容易以個別目錄的程式碼的方式去了解。 筆者選擇以深入核心初始化程序的方式,了解核心的構成。 透過核心初始化的程序,了解各個模組間的關係和啟用順序,進而建立對核心模組群的認識。 |
核心起始程序包括核心起頭程式、I386 核心起始程式、核心起始程式。
其中,核心起始程式會處理比較重要的核心模組群初始化。
最後執行兩個核心行程,核心初始化行程、核心行程建立行程。
核心初始化行程會等核心行程建立行程啟動後,再繼續執行核心初始化工作。
等核心初始化行程完成後,核心便宣告啟動完成。
linux/init/main.c
01 asmlinkage void __init start_kernel(void){
02 char * command_line;
03 extern const struct kernel_param __start___param[], __stop___param[];
04
05 smp_setup_processor_id();
06 lockdep_init();
07 debug_objects_early_init();
08 boot_init_stack_canary();
09 cgroup_init_early();
10 local_irq_disable();
11 early_boot_irqs_disabled = true;
12 tick_init();
13 boot_cpu_init();
14 page_address_init();
15 printk(KERN_NOTICE "%s", linux_banner);
16 setup_arch(&command_line);
17 mm_init_owner(&init_mm, &init_task);
18 mm_init_cpumask(&init_mm);
19 setup_command_line(command_line);
20 setup_nr_cpu_ids();
21 setup_per_cpu_areas();
22 smp_prepare_boot_cpu();
23 build_all_zonelists(NULL);
24 page_alloc_init();
25 printk(KERN_NOTICE "Kernel command line: %s
", boot_command_line);
26 parse_early_param();
27 parse_args("Booting kernel", static_command_line, __start___param,
28 __stop___param - __start___param,
29 &unknown_bootoption);
30 setup_log_buf(0);
31 pidhash_init();
32 vfs_caches_init_early();
33 sort_main_extable();
34 trap_init();
35 mm_init();
36 sched_init();
37 preempt_disable();
38 if (!irqs_disabled()) {
39 printk(KERN_WARNING "start_kernel(): bug: interrupts were "
40 "enabled *very* early, fixing it
");
41 local_irq_disable();
42 }
43 idr_init_cache();
44 perf_event_init();
45 rcu_init();
46 radix_tree_init();
47 early_irq_init();
48 init_IRQ();
49 prio_tree_init();
50 init_timers();
51 hrtimers_init();
52 softirq_init();
53 timekeeping_init();
54 time_init();
55 profile_init();
56 call_function_init();
57 if (!irqs_disabled())
58 printk(KERN_CRIT "start_kernel(): bug: interrupts were " "enabled early
");
59 early_boot_irqs_disabled = false;
60 local_irq_enable();
61 gfp_allowed_mask = __GFP_BITS_MASK;
62 kmem_cache_init_late();
63 console_init();
64 if (panic_later) panic(panic_later, panic_param);
65 lockdep_info();
66 locking_selftest();
67 #ifdef CONFIG_BLK_DEV_INITRD
68 if (initrd_start && !initrd_below_start_ok &&
69 page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
70 printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "
71 "disabling it.
",
72 page_to_pfn(virt_to_page((void *)initrd_start)),
73 min_low_pfn);
74 initrd_start = 0;
75 }
76 #endif
77 page_cgroup_init();
78 enable_debug_pagealloc();
79 debug_objects_mem_init();
80 kmemleak_init();
81 setup_per_cpu_pageset();
82 numa_policy_init();
83 if (late_time_init) late_time_init();
84 sched_clock_init();
85 calibrate_delay();
86 pidmap_init();
87 anon_vma_init();
88 #ifdef CONFIG_X86
89 if (efi_enabled) efi_enter_virtual_mode();
90 #endif
91 thread_info_cache_init();
92 cred_init();
93 fork_init(totalram_pages);
94 proc_caches_init();
95 buffer_init();
96 key_init();
97 security_init();
98 dbg_late_init();
99 vfs_caches_init(totalram_pages);
100 signals_init();
101 page_writeback_init();
102 #ifdef CONFIG_PROC_FS
103 proc_root_init();
104 #endif
105 cgroup_init();
106 cpuset_init();
107 taskstats_init_early();
108 delayacct_init();
109 check_bugs();
110 acpi_early_init();
111 sfi_init_late();
112 ftrace_init();
113 rest_init();
114 }
|
依賴鎖的檔案位置是 linux/kernel/lockdep.c。 依賴鎖初始化函式會初始化兩個串列,依賴鎖類別雜湊表串列和依賴鎖鏈雜湊表串列,完成後設定初始化旗號為真,表示依賴鎖已初始化完成。 linux/kernel/lockdep.c
01 void lockdep_init(void){
02 int i;
03
04 if (lockdep_initialized) return;
05 for (i = 0; i < CLASSHASH_SIZE; i++) INIT_LIST_HEAD(classhash_table + i);
06 for (i = 0; i < CHAINHASH_SIZE; i++) INIT_LIST_HEAD(chainhash_table + i);
07 lockdep_initialized = 1;
08 }
INIT_LIST_HEAD 的定義: linux/include/linux/list.h
01 static inline void INIT_LIST_HEAD(struct list_head *list){
02 list->next = list;
03 list->prev = list;
04 }
依賴鎖依賴雜湊串列表: linux/kernel/lockdep.c
01 #define MAX_LOCKDEP_KEYS_BITS 13
02 #define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1)
03 #define CLASSHASH_SIZE (1UL << CLASSHASH_BITS)
04 #define __classhashfn(key) hash_long((unsigned long)key, CLASSHASH_BITS)
05 #define classhashentry(key) (classhash_table + __classhashfn((key)))
06 static struct list_head classhash_table[CLASSHASH_SIZE];
依賴鎖鏈雜湊串列表: linux/kernel/lockdep.c
01 #define MAX_LOCKDEP_CHAINS_BITS 15
02 #define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1)
03 #define CHAINHASH_SIZE (1UL << CHAINHASH_BITS)
04 #define __chainhashfn(chain) hash_long(chain, CHAINHASH_BITS)
05 #define chainhashentry(chain) (chainhash_table + __chainhashfn((chain)))
06 static struct list_head chainhash_table[CHAINHASH_SIZE];
|
控制群(control group),簡稱 cgroup,是一個行程群聚(process grouping)系統,用於量測系統效能。 也就是透過將行程群聚的方式,統計行程群集體消耗的 CPU 運算資源。 linux/kernel/cgroup.c
01 int __init cgroup_init_early(void){
02 int i;
03 atomic_set(&init_css_set.refcount, 1);
04 INIT_LIST_HEAD(&init_css_set.cg_links);
05 INIT_LIST_HEAD(&init_css_set.tasks);
06 INIT_HLIST_NODE(&init_css_set.hlist);
07 css_set_count = 1;
08 init_cgroup_root(&rootnode);
09 root_count = 1;
10 init_task.cgroups = &init_css_set;
11 init_css_set_link.cg = &init_css_set;
12 init_css_set_link.cgrp = dummytop;
13 list_add(&init_css_set_link.cgrp_link_list,&rootnode.top_cgroup.css_sets);
14 list_add(&init_css_set_link.cg_link_list,&init_css_set.cg_links);
15 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) INIT_HLIST_HEAD(&css_set_table[i]);
16 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
17 struct cgroup_subsys *ss = subsys[i];
18
19 BUG_ON(!ss->name);
20 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
21 BUG_ON(!ss->create);
22 BUG_ON(!ss->destroy);
23 if (ss->subsys_id != i) {
24 printk(KERN_ERR "cgroup: Subsys %s id == %d
",ss->name, ss->subsys_id);
25 BUG();
26 }
27 if (ss->early_init) cgroup_init_subsys(ss);
28 }
29 return 0;
30 }
|
將目前工作的 CPU 的中斷除能,即使用 cli 指令,使 CPU 外部中斷信號除能。 在設定啟動初期中斷除能旗號為真,表示中斷已經除能。 linux/kernel/lockdep.h
01 #define local_irq_disable()
02 do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0)
linux/arch/x86/include/asm/Irqflags.h
01 # define trace_hardirqs_off() do { } while (0)
02
03 #define raw_local_irq_disable() arch_local_irq_disable()
04
05 static inline void arch_local_irq_disable(void){
06 native_irq_disable();
07 }
08
09 static inline void native_irq_disable(void){
10 asm volatile("cli": : :"memory");
11 }
|
將時鐘滴答事件通知器(notifier)註冊到核心的通知器串列,之後核心就可以有時間計數的能力。 這裏只是註冊時鐘滴答通知器,實際硬體的時鐘運作要另外啟動。 linux/kernel/time/tick-common
01 void __init tick_init(void){
02 clockevents_register_notifier(&tick_notifier);
03 }
linux/kernel/time/clockevents.c
01 int clockevents_register_notifier(struct notifier_block *nb){
02 unsigned long flags;
03 int ret;
04
05 raw_spin_lock_irqsave(&clockevents_lock, flags);
06 ret = raw_notifier_chain_register(&clockevents_chain, nb);
07 raw_spin_unlock_irqrestore(&clockevents_lock, flags);
08 return ret;
09 }
實際核心通知器鏈的通知器註冊函式,將通知器註冊到通知器鏈。 linux/kernel/time/clockevents.c
01 int raw_notifier_chain_register(struct raw_notifier_head *nh,struct notifier_block *n){
02 return notifier_chain_register(&nh->head, n);
03 }
核心通知器鏈的通知器註冊函式,功能是將一個通知器以優先權次序,安置到某一通知器鏈。 linux/kernel/time/notifier.c
01 static int notifier_chain_register(struct notifier_block **nl,struct notifier_block *n){
02 while ((*nl) != NULL) {
03 if (n->priority > (*nl)->priority) break;
04 nl = &((*nl)->next);
05 }
06 n->next = *nl;
07 rcu_assign_pointer(*nl, n);
08 return 0;
09 }
|
CPU 管理器有總管 CPU 的遮罩位元結構,這個動作是把啟動過程所使用的 CPU 設定到各遮罩位元結構。 linux/init/main.c
01 static void __init boot_cpu_init(void){
02 int cpu = smp_processor_id();
03 set_cpu_online(cpu, true);
04 set_cpu_active(cpu, true);
05 set_cpu_present(cpu, true);
06 set_cpu_possible(cpu, true);
07 }
|
smp_processor_id 會再轉譯成 raw_smp_processor_id。 linux/include/linux/smp.h
01 #define smp_processor_id() raw_smp_processor_id()
01 #ifdef CONFIG_X86_32_SMP
01 #define raw_smp_processor_id() (percpu_read(cpu_number))
01 #elif defined(CONFIG_X86_64_SMP)
01 ....
01 #endif
percpu_read 會再轉譯成 percpu_from_op,用以讀取 CPU ID。 linux/arch/x86/include/asm/percpu.h
01 #define percpu_read(var) percpu_from_op("mov", var, "m" (var))
02
03 #define percpu_from_op(op, var, constraint)
04 ({
05 typeof(var) pfo_ret__;
06 switch (sizeof(var)) {
07 case 1:
08 asm(op "b "__percpu_arg(1)",%0"
09 : "=q" (pfo_ret__)
10 : constraint);
11 break;
12 case 2:
13 asm(op "w "__percpu_arg(1)",%0"
14 : "=r" (pfo_ret__)
15 : constraint);
16 break;
17 case 4:
18 asm(op "l "__percpu_arg(1)",%0"
19 : "=r" (pfo_ret__)
20 : constraint);
21 break;
22 case 8:
23 asm(op "q "__percpu_arg(1)",%0"
24 : "=r" (pfo_ret__)
25 : constraint);
26 break;
27 default: __bad_percpu_size();
28 }
29 pfo_ret__;
30 })
|
處理器設定函式群,包括 set_cpu_online、set_cpu_active、set_cpu_present、set_cpu_possible。動作原理上都相類似。 這裡以 CPU 線上位元設定函式為例,說明 CPU 處理器設定函式的操作方法。 linux/kernel/cpu.c
01 static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly;
02 const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits);
CPU 位元映射結構的宣告定義: linux/include/linux/cpumask.c
01 #define DECLARE_BITMAP(name,bits)
02 unsigned long name[BITS_TO_LONGS(bits)]
CPU 位元映射結構的設定函式: linux/kernel/cpu.c
01 void set_cpu_online(unsigned int cpu, bool online){
02 if (online) cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits));
03 else cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits));
04 }
CPU 遮罩設定函式: linux/include/linux/cpumask.h
01 static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp){
02 set_bit(cpumask_check(cpu), cpumask_bits(dstp));
03 }
記憶體位元設定函式: linux/include/linux/cpumask.c
01 static inline void set_bit(int nr, volatile unsigned long *addr){
02 unsigned long mask = BIT_MASK(nr);
03 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
04 unsigned long flags;
05
06 _atomic_spin_lock_irqsave(p, flags);
07 *p |= mask;
08 _atomic_spin_unlock_irqrestore(p, flags);
09 }
|
取得一塊記憶體給記憶體管理器的頁管理器,這塊記憶體必須是連續記憶體。 linux/include/linux/mm/highmem.c
01 void __init page_address_init(void){
02 int i;
03
04 INIT_LIST_HEAD(&page_address_pool);
05 for (i = 0; i < ARRAY_SIZE(page_address_maps); i++)
06 list_add(&page_address_maps[i].list, &page_address_pool);
07 for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) {
08 INIT_LIST_HEAD(&page_address_htable[i].lh);
09 spin_lock_init(&page_address_htable[i].lock);
10 }
11 spin_lock_init(&pool_lock);
12 }
|
LINUX 支援不同的機器架構,因此架構設置函式內容隨架構不同而改變。 當 VMLINUX 是由 EFI 載入器載入,也會伴隨著 EFI memmap、systab 的資訊。初始化過程中需要處理這些資料結構。 linux/arch/x86/kernel/setup.c
01 void __init setup_arch(char **cmdline_p){
01 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
01 visws_early_detect();
01 clone_pgd_range(swapper_pg_dir+ KERNEL_PGD_BOUNDARY,initial_page_table + KERNEL_PGD_BOUNDARY,
01 KERNEL_PGD_PTRS);
01 load_cr3(swapper_pg_dir);
01 __flush_tlb_all();
01 olpc_ofw_detect();
01 early_trap_init();
01 early_cpu_init();
01 early_ioremap_init();
01 setup_olpc_ofw_pgd();
01 ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
01 screen_info = boot_params.screen_info;
01 edid_info = boot_params.edid_info;
01 apm_info.bios = boot_params.apm_bios_info;
01 ist_info = boot_params.ist_info;
01 if (boot_params.sys_desc_table.length != 0) {
01 set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
01 machine_id = boot_params.sys_desc_table.table[0];
01 machine_submodel_id = boot_params.sys_desc_table.table[1];
01 BIOS_revision = boot_params.sys_desc_table.table[2];
01 }
01 saved_video_mode = boot_params.hdr.vid_mode;
01 bootloader_type = boot_params.hdr.type_of_loader;
01 if ((bootloader_type >> 4) == 0xe) {
01 bootloader_type &= 0xf;
01 bootloader_type |= (boot_params.hdr.ext_loader_type+0x10) << 4;
01 }
01 bootloader_version = bootloader_type & 0xf;
01 bootloader_version |= boot_params.hdr.ext_loader_ver << 4;
01 rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
01 rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
01 rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
01 if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,"EL32",4)) {
01 efi_enabled = 1;
01 efi_memblock_x86_reserve_range();
01 }
01 x86_init.oem.arch_setup();
01 iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
01 setup_memory_map();
01 parse_setup_data();
01 e820_reserve_setup_data();
01 copy_edd();
01 if (!boot_params.hdr.root_flags) root_mountflags &= ~MS_RDONLY;
01 init_mm.start_code = (unsigned long) _text;
01 init_mm.end_code = (unsigned long) _etext;
01 init_mm.end_data = (unsigned long) _edata;
01 init_mm.brk = _brk_end;
01
01 code_resource.start = virt_to_phys(_text);
01 code_resource.end = virt_to_phys(_etext)-1;
01 data_resource.start = virt_to_phys(_etext);
01 data_resource.end = virt_to_phys(_edata)-1;
01 bss_resource.start = virt_to_phys(&__bss_start);
01 bss_resource.end = virt_to_phys(&__bss_stop)-1;
01 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
01 *cmdline_p = command_line;
01 x86_configure_nx();
01 parse_early_param();
01 x86_report_nx();
01 memblock_x86_reserve_range_setup_data();
01 if (acpi_mps_check()) {
01 disable_apic = 1;
01 setup_clear_cpu_cap(X86_FEATURE_APIC);
01 }
01 if (pci_early_dump_regs) early_dump_pci_devices();
01 finish_e820_parsing();
01 if (efi_enabled) efi_init();
01 dmi_scan_machine();
01 init_hypervisor_platform();
01 x86_init.resources.probe_roms();
01 insert_resource(&iomem_resource, &code_resource);
01 insert_resource(&iomem_resource, &data_resource);
01 insert_resource(&iomem_resource, &bss_resource);
01 trim_bios_range();
01 if (ppro_with_ram_bug()) {
01 e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,E820_RESERVED);
01 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
01 printk(KERN_INFO "fixed physical RAM map:
");
01 e820_print_map("bad_ppro");
01 }
01 max_pfn = e820_end_of_ram_pfn();
01 mtrr_bp_init();
01 if (mtrr_trim_uncached_memory(max_pfn)) max_pfn = e820_end_of_ram_pfn();
01 find_low_pfn_range();
01 find_smp_config();
01 reserve_ibft_region();
01 reserve_brk();
01 cleanup_highmap();
01 memblock.current_limit = get_max_mapped();
01 memblock_x86_fill();
01 if (efi_enabled) efi_reserve_boot_services();
01 early_reserve_e820_mpc_new();
01 setup_bios_corruption_check();
01 printk(KERN_DEBUG "initial memory mapped : 0 - %08lx
",max_pfn_mapped<
01 setup_trampolines();
01 init_gbpages();
01 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<
01 max_pfn_mapped = max_low_pfn_mapped;
01 memblock.current_limit = get_max_mapped();
01 if (init_ohci1394_dma_early) init_ohci1394_dma_on_all_controllers();
01 setup_log_buf(1);
01 reserve_initrd();
01 reserve_crashkernel();
01 vsmp_init();
01 io_delay_init();
01 acpi_boot_table_init();
01 early_acpi_boot_init();
01 initmem_init();
01 memblock_find_dma_reserve();
01 x86_init.paging.pagetable_setup_start(swapper_pg_dir);
01 paging_init();
01 x86_init.paging.pagetable_setup_done(swapper_pg_dir);
01 if (boot_cpu_data.cpuid_level >= 0) {
01 mmu_cr4_features = read_cr4();
01 }
01 clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
01 swapper_pg_dir+ KERNEL_PGD_BOUNDARY,KERNEL_PGD_PTRS);
01 tboot_probe();
01 generic_apic_probe();
01 early_quirks();
01 acpi_boot_init();
01 sfi_init();
01 x86_dtb_init();
01 if (smp_found_config) get_smp_config();
01 prefill_possible_map();
01 init_cpu_to_node();
01 init_apic_mappings();
01 ioapic_and_gsi_init();
01 kvm_guest_init();
01 e820_reserve_resources();
01 e820_mark_nosave_regions(max_low_pfn);
01 x86_init.resources.reserve_resources();
01 e820_setup_gap();
01 if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) conswitchp = &vga_con;
01 x86_init.oem.banner();
01 x86_init.timers.wallclock_init();
01 mcheck_init();
01 arch_init_ideal_nops();
|
儲存啟動命令列和命令列參數內容。 linux/init/main.c
01 static void __init setup_command_line(char *command_line){
01 saved_command_line = alloc_bootmem(strlen (boot_command_line)+1);
01 static_command_line = alloc_bootmem(strlen (command_line)+1);
01 strcpy (saved_command_line, boot_command_line);
01 strcpy (static_command_line, command_line);
01 }
|
nr_cpu_ids 是 number of cpu id。NR_CPUS 是 cpu 最大值 8。 cpu_possible_mask 是可能的 cpu 的對映位元圖。 find_last_bit 找到 cpu_possible_mask 最後一個位元為一的值,此值為 base 0,必須加一才會變成 nr_cpu_ids 。 linux/kernel/smp.c
01 void __init setup_nr_cpu_ids(void){
01 nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
01 }
函式 find_last_bit: linux/lib/find_last_bit.c
01 unsigned long find_last_bit(const unsigned long *addr, unsigned long size){
02 unsigned long words;
03 unsigned long tmp;
04
05 words = size / BITS_PER_LONG;
06 if (size & (BITS_PER_LONG-1)) {
07 tmp = (addr[words] & (~0UL >> (BITS_PER_LONG- (size & (BITS_PER_LONG-1)))));
08 if (tmp) goto found;
09 }
10 while (words) {
11 tmp = addr[--words];
12 if (tmp) {
13 found:
14 return words * BITS_PER_LONG + __fls(tmp);
15 }
16 }
17 return size;
18 }
|
per_cpu_areas 設定函式在 linux/mm/percpu.c 和 linux/arch/x86/kernel/percpu.c。 在構建系統的編譯檔中,只有看到 linux/mm/percpu.c 被包含進來,所以這裡取用的是這個檔案的程式碼。 CPU AREA 是指每個 CPU 專屬的記憶體,用來存放該 CPU 的相關資料。 linux/mm/percpu.c
01 void __init setup_per_cpu_areas(void){
02 unsigned long delta;
03 unsigned int cpu;
04 int rc;
05
06 rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,PERCPU_DYNAMIC_RESERVE,
07 PAGE_SIZE, NULL,pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
08 if (rc < 0) panic("Failed to initialize percpu areas.");
09 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
10 for_each_possible_cpu(cpu)
11 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
12 }
linux/include/asm-generic/vmlinux.lds.h
01 #define PERCPU_INPUT(cacheline)
02 VMLINUX_SYMBOL(__per_cpu_start) = .;
03 *(.data..percpu..first)
04 . = ALIGN(PAGE_SIZE);
05 *(.data..percpu..page_aligned)
06 . = ALIGN(cacheline);
07 *(.data..percpu..readmostly)
08 . = ALIGN(cacheline);
09 *(.data..percpu)
10 *(.data..percpu..shared_aligned)
11 VMLINUX_SYMBOL(__per_cpu_end) = .;
|
設定在 SMP 模式下啟動的 CPU。 linux/arch/x86/include/asm/smp.h
01 static inline void smp_prepare_boot_cpu(void){
01 smp_ops.smp_prepare_boot_cpu();
01 }
在 SMP 模式的 CPU 操作函式結構設定多個 CPU 相關函式。 linux/arch/x86/kernel/smp.c
01 struct smp_ops smp_ops = {
02 .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
03 .smp_prepare_cpus = native_smp_prepare_cpus,
04 .smp_cpus_done = native_smp_cpus_done,
05 .stop_other_cpus = native_stop_other_cpus,
06 .smp_send_reschedule = native_smp_send_reschedule,
07 .cpu_up = native_cpu_up,
08 .cpu_die = native_cpu_die,
09 .cpu_disable = native_cpu_disable,
10 .play_dead = native_play_dead,
11 .send_call_func_ipi = native_send_call_func_ipi,
12 .send_call_func_single_ipi = native_send_call_func_single_ipi,
};
CPU 準備啟動的操作函式: linux/arch/x86/kernel/smpboot.c
01 void __init native_smp_prepare_boot_cpu(void){
02 int me = smp_processor_id();
03 switch_to_new_gdt(me);
04 cpumask_set_cpu(me, cpu_callout_mask);
05 per_cpu(cpu_state, me) = CPU_ONLINE;
06 }
|
記憶體有許多 node,每個 node 都有自己的 zone list,用來紀錄記憶體的使用。 linux/mm/page_alloc.c
01 void __ref build_all_zonelists(void *data){
02 set_zonelist_order();
03 if (system_state == SYSTEM_BOOTING) {
04 __build_all_zonelists(NULL);
05 mminit_verify_zonelist();
06 cpuset_init_current_mems_allowed();
07 }
08 else{
09 stop_machine(__build_all_zonelists, NULL, NULL);
10 }
11 vm_total_pages = nr_free_pagecache_pages();
12 if (vm_total_pages < (pageblock_nr_pages * MIGRATE_TYPES))
13 page_group_by_mobility_disabled = 1;
14 else
15 page_group_by_mobility_disabled = 0;
16 printk("Built %i zonelists in %s order, mobility grouping %s. "
17 "Total pages: %ld
",
18 nr_online_nodes,
19 zonelist_order_name[current_zonelist_order],
20 page_group_by_mobility_disabled ? "off" : "on",
21 vm_total_pages);
22 }
linux/mm/page_alloc.c
01 static void set_zonelist_order(void){
01 current_zonelist_order = ZONELIST_ORDER_ZONE;
01 }
linux/mm/page_alloc.c
01 static __init_refok int __build_all_zonelists(void *data){
02 int nid;
03 int cpu;
04
05 for_each_online_node(nid) {
06 pg_data_t *pgdat = NODE_DATA(nid);
07 build_zonelists(pgdat);
08 build_zonelist_cache(pgdat);
09 }
10 for_each_possible_cpu(cpu)
11 setup_pageset(&per_cpu(boot_pageset, cpu), 0);
12 return 0;
13 }
linux/mm/page_alloc.c
01 static void build_zonelists(pg_data_t *pgdat){
02 int j, node, load;
03 enum zone_type i;
04 nodemask_t used_mask;
05 int local_node, prev_node;
06 struct zonelist *zonelist;
07 int order = current_zonelist_order;
08
09 for (i = 0; i < MAX_ZONELISTS; i++) {
10 zonelist = pgdat->node_zonelists + i;
11 zonelist->_zonerefs[0].zone = NULL;
12 zonelist->_zonerefs[0].zone_idx = 0;
13 }
14 local_node = pgdat->node_id;
15 load = nr_online_nodes;
16 prev_node = local_node;
17 nodes_clear(used_mask);
18 memset(node_order, 0, sizeof(node_order));
19 j = 0;
20 while ((node = find_next_best_node(local_node, &used_mask)) >= 0) {
21 int distance = node_distance(local_node, node);
22 if (distance > RECLAIM_DISTANCE) zone_reclaim_mode = 1;
23 if (distance != node_distance(local_node, prev_node)) node_load[node] = load;
24 prev_node = node;
25 load--;
26 if (order == ZONELIST_ORDER_NODE) build_zonelists_in_node_order(pgdat, node);
27 else node_order[j++] = node;
28 }
29 if (order == ZONELIST_ORDER_ZONE) build_zonelists_in_zone_order(pgdat, j);
30 build_thisnode_zonelists(pgdat);
31 }
linux/mm/page_alloc.c
01 static void build_zonelist_cache(pg_data_t *pgdat){
02 struct zonelist *zonelist;
03 struct zonelist_cache *zlc;
04 struct zoneref *z;
05
06 zonelist = &pgdat->node_zonelists[0];
07 zonelist->zlcache_ptr = zlc = &zonelist->zlcache;
08 bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
09 for (z = zonelist->_zonerefs; z->zone; z++)
10 zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z);
11 }
|
將頁配置器通知函式註冊到 CPU 的通知器。 linux/mm/page_alloc.c
01 void __init page_alloc_init(void){
01 hotcpu_notifier(page_alloc_cpu_notify, 0);
01 }
linux/include/linux/cpu.h
01 #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri)
01
01 #define cpu_notifier(fn, pri) {
01 static struct notifier_block fn##_nb __cpuinitdata =
01 { .notifier_call = fn, .priority = pri };
01 register_cpu_notifier(&fn##_nb);
01 }
linux/kernel/cpu.c
01 int __ref register_cpu_notifier(struct notifier_block *nb){
02 int ret;
03 cpu_maps_update_begin();
04 ret = raw_notifier_chain_register(&cpu_chain, nb);
05 cpu_maps_update_done();
06 return ret;
07 }
|
解析啟動命令列的參數內容。 linux/init/main.c
01 void __init parse_early_param(void){
02 static __initdata int done = 0;
03 static __initdata char tmp_cmdline[COMMAND_LINE_SIZE];
04
05 if (done) return;
06 strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
07 parse_early_options(tmp_cmdline);
08 done = 1;
09 }
|
解析核心啟動靜態命令列的參數內容。 linux/init/main.c
01 parse_args("Booting kernel", static_command_line, __start___param,
02 __stop___param - __start___param,
03 &unknown_bootoption);
|
產生一個緩衝區用來存放核心啟動過程的事件紀錄。 linux/kernel/printk.c
01 void __init setup_log_buf(int early){
02 unsigned long flags;
03 unsigned start, dest_idx, offset;
04 char *new_log_buf;
05 int free;
06
07 if (!new_log_buf_len) return;
08 if (early) {
09 unsigned long mem;
10
11 mem = memblock_alloc(new_log_buf_len, PAGE_SIZE);
12 if (mem == MEMBLOCK_ERROR) return;
13 new_log_buf = __va(mem);
14 }
15 else {
16 new_log_buf = alloc_bootmem_nopanic(new_log_buf_len);
17 }
18 if (unlikely(!new_log_buf)) {
19 pr_err("log_buf_len: %ld bytes not available
",
20 new_log_buf_len);
21 return;
22 }
23 spin_lock_irqsave(&logbuf_lock, flags);
24 log_buf_len = new_log_buf_len;
25 log_buf = new_log_buf;
26 new_log_buf_len = 0;
27 free = __LOG_BUF_LEN - log_end;
28 offset = start = min(con_start, log_start);
29 dest_idx = 0;
30 while (start != log_end) {
31 unsigned log_idx_mask = start & (__LOG_BUF_LEN - 1);
32
33 log_buf[dest_idx] = __log_buf[log_idx_mask];
34 start++;
35 dest_idx++;
36 }
37 log_start -= offset;
38 con_start -= offset;
39 log_end -= offset;
40 spin_unlock_irqrestore(&logbuf_lock, flags);
41 pr_info("log_buf_len: %d
", log_buf_len);
42 pr_info("early log buf free: %d(%d%%)
",free, (free * 100) / __LOG_BUF_LEN);
43 }
|
建立 PID 管理器的 PID 雜湊表,包括取得雜湊表記憶體、初始化雜湊表串列。 linux/kernel/pid.c
01 void __init pidhash_init(void){
02 int i, pidhash_size;
03
04 pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18,
05 HASH_EARLY | HASH_SMALL,&pidhash_shift, NULL, 4096);
06 pidhash_size = 1 << pidhash_shift;
07 for (i = 0; i < pidhash_size; i++) INIT_HLIST_HEAD(&pid_hash[i]);
08 }
|
初始化早期的虛擬記憶體系統,。 linux/kernel/dcache.c
01 void __init vfs_caches_init_early(void){
02 dcache_init_early();
03 inode_init_early();
04 }
資料快取早期初始化,。 linux/kernel/dcache.c
01 static void __init dcache_init_early(void){
02 int loop;
03
04 if (hashdist) return;
05 dentry_hashtable = alloc_large_system_hash("Dentry cache",sizeof(struct hlist_bl_head),
06 dhash_entries,13,HASH_EARLY,&d_hash_shift,&d_hash_mask,0);
07 for (loop = 0; loop < (1 << d_hash_shift); loop++)
08 INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
09 }
inode 結構初始化,。 linux/kernel/inode.c
01 void __init inode_init_early(void){
02 int loop;
03
04 if (hashdist) return;
05 inode_hashtable =alloc_large_system_hash("Inode-cache",sizeof(struct hlist_head),
06 ihash_entries,14,HASH_EARLY,&i_hash_shift,&i_hash_mask,0);
07 for (loop = 0; loop < (1 << i_hash_shift); loop++)
08 INIT_HLIST_HEAD(&inode_hashtable[loop]);
09 }
|
將核心內建的例外中斷描述器依照名稱排序,。 linux/kernel/inode.c
01 void __init sort_main_extable(void){
01 sort_extable(__start___ex_table, __stop___ex_table);
01 }
例外中斷描述器表再編譯過程中會先連結成 __ex_table,再連結到 vmlinux。 連結位址的開始點被標記成符號 __start___ex_table,結束點被標記成 __stop___ex_table,二個符號之間就是所有內建中斷描述器。 linux/include/asm-generic/vmlinux.lds.h
01 #define EXCEPTION_TABLE(align)
02 . = ALIGN(align);
03 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
04 VMLINUX_SYMBOL(__start___ex_table) = .;
05 *(__ex_table)
06 VMLINUX_SYMBOL(__stop___ex_table) = .;
07 }
linux/lib/extable.c
01 void sort_extable(struct exception_table_entry *start,struct exception_table_entry *finish){
02 sort(start, finish - start, sizeof(struct exception_table_entry),cmp_ex, NULL);
03 }
linux/lib/sort.c
01 void sort(void *base, size_t num, size_t size,
02 int (*cmp_func)(const void *, const void *),
03 void (*swap_func)(void *, void *, int size)){
04 int i = (num/2 - 1) * size, n = num * size, c, r;
05
06 if (!swap_func) swap_func = (size == 4 ? u32_swap : generic_swap);
07 for ( ; i >= 0; i -= size) {
08 for (r = i; r * 2 + size < n; r = c) {
09 c = r * 2 + size;
10 if (c < n - size &&cmp_func(base + c, base + c + size) < 0) c += size;
11 if (cmp_func(base + r, base + c) >= 0) break;
12 swap_func(base + r, base + c, size);
13 }
14 }
15 for (i = n - size; i > 0; i -= size) {
16 swap_func(base, base + i, size);
17 for (r = 0; r * 2 + size < i; r = c) {
18 c = r * 2 + size;
19 if (c < i - size && cmp_func(base + c, base + c + size) < 0) c += size;
20 if (cmp_func(base + r, base + c) >= 0) break;
21 swap_func(base + r, base + c, size);
22 }
23 }
24 }
|
設定 CPU 內定中斷的中斷描述子,和系統中斷。最後再將 CPU 初始化。 linux/arch/x86/kernel/traps.c
01 void __init trap_init(void){
02 int i;
03
04 set_intr_gate(0, ÷_error);
05 set_intr_gate_ist(2, &nmi, NMI_STACK);
06 set_system_intr_gate(4, &overflow);
07 set_intr_gate(5, &bounds);
08 set_intr_gate(6, &invalid_op);
09 set_intr_gate(7, &device_not_available);
10 set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS);
11 set_intr_gate(9, &coprocessor_segment_overrun);
12 set_intr_gate(10, &invalid_TSS);
13 set_intr_gate(11, &segment_not_present);
14 set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK);
15 set_intr_gate(13, &general_protection);
16 set_intr_gate(15, &spurious_interrupt_bug);
17 set_intr_gate(16, &coprocessor_error);
18 set_intr_gate(17, &alignment_check);
19 set_intr_gate_ist(18, &machine_check, MCE_STACK);
20 set_intr_gate(19, &simd_coprocessor_error);
21 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) set_bit(i, used_vectors);
22 set_system_trap_gate(SYSCALL_VECTOR, &system_call);
23 set_bit(SYSCALL_VECTOR, used_vectors);
24 cpu_init();
25 x86_init.irqs.trap_init();
26 }
中斷描述子相關函式,。 linux/arch/x86/include/asm/desc.h
01 static inline void set_intr_gate(unsigned int n, void *addr){
02 BUG_ON((unsigned)n > 0xFF);
03 _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS);
04 }
05
06 static inline void set_system_intr_gate(unsigned int n, void *addr){
07 BUG_ON((unsigned)n > 0xFF);
08 _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS);
09 }
10
11 static inline void set_system_trap_gate(unsigned int n, void *addr){
12 BUG_ON((unsigned)n > 0xFF);
13 _set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS);
14 }
15
16 static inline void _set_gate(int gate, unsigned type, void *addr,
17 unsigned dpl, unsigned ist, unsigned seg){
18 gate_desc s;
19
20 pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
21 write_idt_entry(idt_table, gate, &s);
22 }
23
24 static inline void pack_gate(gate_desc *gate, unsigned char type,unsigned long base,
25 unsigned dpl, unsigned flags,unsigned short seg){
26 gate->a = (seg << 16) | (base & 0xffff);
27 gate->b = (base & 0xffff0000) | (((0x80 | type | (dpl << 5)) & 0xff) << 8);
28 }
29
30 #define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g)
31 static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate){
32 memcpy(&idt[entry], gate, sizeof(*gate));
33 }
位元設定函式,將變數的某位元值設定為一。 linux/arch/x86/include/asm/bitops.h
01 static inline void set_bit(int nr, volatile unsigned long *addr){
02 unsigned long mask = BIT_MASK(nr);
03 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
04 unsigned long flags;
05
06 _atomic_spin_lock_irqsave(p, flags);
07 *p |= mask;
08 _atomic_spin_unlock_irqrestore(p, flags);
09 }
linux/arch/x86/kernel/cpu/common.c
01 void __cpuinit cpu_init(void){
02 int cpu = smp_processor_id();
03 struct task_struct *curr = current;
04 struct tss_struct *t = &per_cpu(init_tss, cpu);
05 struct thread_struct *thread = &curr->thread;
06
07 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) {
08 printk(KERN_WARNING "CPU#%d already initialized!
", cpu);
09 for (;;) local_irq_enable();
10 }
11 printk(KERN_INFO "Initializing CPU#%d
", cpu);
12 if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
13 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
14 load_idt(&idt_descr);
15 switch_to_new_gdt(cpu);
16 atomic_inc(&init_mm.mm_count);
17 curr->active_mm = &init_mm;
18 BUG_ON(curr->mm);
19 enter_lazy_tlb(&init_mm, curr);
20 load_sp0(t, thread);
21 set_tss_desc(cpu, t);
22 load_TR_desc();
23 load_LDT(&init_mm.context);
24 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
25 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
26 clear_all_debug_regs();
27 dbg_restore_debug_regs();
28 fpu_init();
29 xsave_init();
30 }
執行 x86_init.irqs.trap_init 就是執行 visws_trap_init,。 linux/arch/x86/plateform/visws/views_quirks.c
01 static void __init visws_trap_init(void){
01 lithium_init();
01 cobalt_init();
01 }
函式 lithium_init。 linux/arch/x86/plateform/visws/views_quirks.c
01 static __init void lithium_init(void){
01 set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS);
01 set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS);
01
01 if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
01 (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
01 printk(KERN_EMERG "Lithium hostbridge %c not found
", 'A');
01 }
01 if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
01 (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
01 printk(KERN_EMERG "Lithium hostbridge %c not found
", 'B');
01 }
01 li_pcia_write16(LI_PCI_INTEN, ALLDEVS);
01 li_pcib_write16(LI_PCI_INTEN, ALLDEVS);
01 }
函式 cobalt_init 啟動 cobalt clock,用於 APIC,給支援 SMP 的系統使用。 linux/arch/x86/plateform/visws/views_quirks.c
01 static __init void cobalt_init(void){
01 set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
01 setup_local_APIC();
01 printk(KERN_INFO "Local APIC Version %#x, ID %#x
",
01 (unsigned int)apic_read(APIC_LVR),
01 (unsigned int)apic_read(APIC_ID));
01 set_fixmap(FIX_CO_CPU, CO_CPU_PHYS);
01 set_fixmap(FIX_CO_APIC, CO_APIC_PHYS);
01 printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx
",
01 co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID));
01 co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE);
01 printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx
",co_apic_read(CO_APIC_ID));
01 }
|
記憶體管理器初始化,包括分頁控制群、主記憶體、核心快取記憶體、CPU 專屬記憶體、分頁表快取、虛擬記憶體配置器。 linux/init/main.c
01 static void __init mm_init(void){
02 page_cgroup_init_flatmem();
03 mem_init();
04 kmem_cache_init();
05 percpu_init_late();
06 pgtable_cache_init();
07 vmalloc_init();
08 }
linux/mm/page_cgroup.c
01 void __init page_cgroup_init_flatmem(void){
01 int nid, fail;
01
01 if (mem_cgroup_disabled()) return;
01 for_each_online_node(nid) {
01 fail = alloc_node_page_cgroup(nid);
01 if (fail) goto fail;
01 }
01 printk(KERN_INFO "allocated %ld bytes of page_cgroup
", total_usage);
01 printk(KERN_INFO "please try 'cgroup_disable=memory' option if you"
01 " don't want memory cgroups
");
01 return;
01 fail:
01 printk(KERN_CRIT "allocation of page_cgroup failed.
");
01 printk(KERN_CRIT "please try 'cgroup_disable=memory' boot option
");
01 panic("Out of memory");
01 }
linux/x86/mm/init_32.c
01 void __init mem_init(void){
01 int codesize, reservedpages, datasize, initsize;
01 int tmp;
01
01 pci_iommu_alloc();
01 BUG_ON(!mem_map);
01 totalram_pages += free_all_bootmem();
01 reservedpages = 0;
01 for (tmp = 0; tmp < max_low_pfn; tmp++)
01 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
01 reservedpages++;
01 set_highmem_pages_init();
01 codesize = (unsigned long) &_etext - (unsigned long) &_text;
01 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
01 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
01 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
01 "%dk reserved, %dk data, %dk init, %ldk highmem)
",
01 nr_free_pages() << (PAGE_SHIFT-10),
01 num_physpages << (PAGE_SHIFT-10),
01 codesize >> 10,
01 reservedpages << (PAGE_SHIFT-10),
01 datasize >> 10,
01 initsize >> 10,
01 totalhigh_pages << (PAGE_SHIFT-10));
01
01 printk(KERN_INFO "virtual kernel memory layout:
"
01 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)
"
01 " pkmap : 0x%08lx - 0x%08lx (%4ld kB)
"
01 " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)
"
01 " lowmem : 0x%08lx - 0x%08lx (%4ld MB)
"
01 " .init : 0x%08lx - 0x%08lx (%4ld kB)
"
01 " .data : 0x%08lx - 0x%08lx (%4ld kB)
"
01 " .text : 0x%08lx - 0x%08lx (%4ld kB)
",
01 FIXADDR_START, FIXADDR_TOP,
01 (FIXADDR_TOP - FIXADDR_START) >> 10,
01 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
01 (LAST_PKMAP*PAGE_SIZE) >> 10,
01 VMALLOC_START, VMALLOC_END,
01 (VMALLOC_END - VMALLOC_START) >> 20,
01 (unsigned long)__va(0), (unsigned long)high_memory,
01 ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
01 (unsigned long)&__init_begin, (unsigned long)&__init_end,
01 ((unsigned long)&__init_end -
01 (unsigned long)&__init_begin) >> 10,
01 (unsigned long)&_etext, (unsigned long)&_edata,
01 ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
01 (unsigned long)&_text, (unsigned long)&_etext,
01 ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
01 #define __FIXADDR_TOP (-PAGE_SIZE)
01 BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
01 BUILD_BUG_ON(VMALLOC_END > PKMAP_BASE);
01 #define high_memory (-128UL << 20)
01 BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END);
01 #undef high_memory
01 #undef __FIXADDR_TOP
01
01 BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
01 BUG_ON(VMALLOC_END > PKMAP_BASE);
01 BUG_ON(VMALLOC_START >= VMALLOC_END);
01 BUG_ON((unsigned long)high_memory > VMALLOC_START);
01 if (boot_cpu_data.wp_works_ok < 0) test_wp_bit();
01 }
linux/mm/slub.c
01 void __init kmem_cache_init(void){
01 int i;
01 int caches = 0;
01 struct kmem_cache *temp_kmem_cache;
01 int order;
01 struct kmem_cache *temp_kmem_cache_node;
01 unsigned long kmalloc_size;
01
01 kmem_size = offsetof(struct kmem_cache, node) +
01 nr_node_ids * sizeof(struct kmem_cache_node *);
01 kmalloc_size = ALIGN(kmem_size, cache_line_size());
01 order = get_order(2 * kmalloc_size);
01 kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order);
01 kmem_cache_node = (void *)kmem_cache + kmalloc_size;
01 kmem_cache_open(kmem_cache_node, "kmem_cache_node",
01 sizeof(struct kmem_cache_node),
01 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
01 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
01 slab_state = PARTIAL;
01 temp_kmem_cache = kmem_cache;
01 kmem_cache_open(kmem_cache, "kmem_cache", kmem_size,
01 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
01 kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
01 memcpy(kmem_cache, temp_kmem_cache, kmem_size);
01 temp_kmem_cache_node = kmem_cache_node;
01 kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
01 memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size);
01 kmem_cache_bootstrap_fixup(kmem_cache_node);
01 caches++;
01 kmem_cache_bootstrap_fixup(kmem_cache);
01 caches++;
01 free_pages((unsigned long)temp_kmem_cache, order);
01 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
01 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
01 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
01 int elem = size_index_elem(i);
01 if (elem >= ARRAY_SIZE(size_index))
01 break;
01 size_index[elem] = KMALLOC_SHIFT_LOW;
01 }
01 if (KMALLOC_MIN_SIZE == 64) {
01 for (i = 64 + 8; i <= 96; i += 8)
01 size_index[size_index_elem(i)] = 7;
01 } else if (KMALLOC_MIN_SIZE == 128) {
01 for (i = 128 + 8; i <= 192; i += 8)
01 size_index[size_index_elem(i)] = 8;
01 }
01 if (KMALLOC_MIN_SIZE <= 32) {
01 kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0);
01 caches++;
01 }
01 if (KMALLOC_MIN_SIZE <= 64) {
01 kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0);
01 caches++;
01 }
01 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
01 kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0);
01 caches++;
01 }
01 slab_state = UP;
01 if (KMALLOC_MIN_SIZE <= 32) {
01 kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
01 BUG_ON(!kmalloc_caches[1]->name);
01 }
01 if (KMALLOC_MIN_SIZE <= 64) {
01 kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
01 BUG_ON(!kmalloc_caches[2]->name);
01 }
01 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
01 char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
01
01 BUG_ON(!s);
01 kmalloc_caches[i]->name = s;
01 }
01 register_cpu_notifier(&slab_notifier);
01 for (i = 0; i < SLUB_PAGE_SHIFT; i++) {
01 struct kmem_cache *s = kmalloc_caches[i];
01
01 if (s && s->size) {
01 char *name = kasprintf(GFP_NOWAIT,
01 "dma-kmalloc-%d", s->objsize);
01
01 BUG_ON(!name);
01 kmalloc_dma_caches[i] = create_kmalloc_cache(name,
01 s->objsize, SLAB_CACHE_DMA);
01 }
01 }
01 printk(KERN_INFO
01 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
01 " CPUs=%d, Nodes=%d
",
01 caches, cache_line_size(),
01 slub_min_order, slub_max_order, slub_min_objects,
01 nr_cpu_ids, nr_node_ids);
01 }
linux/mm/percpu.c
01 void __init percpu_init_late(void){
01 struct pcpu_chunk *target_chunks[] =
01 { pcpu_first_chunk, pcpu_reserved_chunk, NULL };
01 struct pcpu_chunk *chunk;
01 unsigned long flags;
01 int i;
01
01 for (i = 0; (chunk = target_chunks[i]); i++) {
01 int *map;
01 const size_t size = PERCPU_DYNAMIC_EARLY_SLOTS * sizeof(map[0]);
01
01 BUILD_BUG_ON(size > PAGE_SIZE);
01 map = pcpu_mem_alloc(size);
01 BUG_ON(!map);
01 spin_lock_irqsave(&pcpu_lock, flags);
01 memcpy(map, chunk->map, size);
01 chunk->map = map;
01 spin_unlock_irqrestore(&pcpu_lock, flags);
01 }
01 }
linux/mm/vmalloc.c
01 void __init vmalloc_init(void){
01 struct vmap_area *va;
01 struct vm_struct *tmp;
01 int i;
01
01 for_each_possible_cpu(i) {
01 struct vmap_block_queue *vbq;
01
01 vbq = &per_cpu(vmap_block_queue, i);
01 spin_lock_init(&vbq->lock);
01 INIT_LIST_HEAD(&vbq->free);
01 }
01 for (tmp = vmlist; tmp; tmp = tmp->next) {
01 va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
01 va->flags = tmp->flags | VM_VM_AREA;
01 va->va_start = (unsigned long)tmp->addr;
01 va->va_end = va->va_start + tmp->size;
01 __insert_vmap_area(va);
01 }
01 vmap_area_pcpu_hole = VMALLOC_END;
01 vmap_initialized = true;
01 }
|
初始化排程器。 linux/kernel/sched.c
01 void __init sched_init(void){
02 int i, j;
03 unsigned long alloc_size = 0, ptr;
04
05 alloc_size += 2 * nr_cpu_ids * sizeof(void **);
06 if (alloc_size) {
07 ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);
08 root_task_group.se = (struct sched_entity **)ptr;
09 ptr += nr_cpu_ids * sizeof(void **);
10 root_task_group.cfs_rq = (struct cfs_rq **)ptr;
11 ptr += nr_cpu_ids * sizeof(void **);
12 }
13 init_defrootdomain();
14 init_rt_bandwidth(&def_rt_bandwidth,global_rt_period(), global_rt_runtime());
15 list_add(&root_task_group.list, &task_groups);
16 INIT_LIST_HEAD(&root_task_group.children);
17 autogroup_init(&init_task);
18 for_each_possible_cpu(i) {
19 struct rq *rq;
20
21 rq = cpu_rq(i);
22 raw_spin_lock_init(&rq->lock);
23 rq->nr_running = 0;
24 rq->calc_load_active = 0;
25 rq->calc_load_update = jiffies + LOAD_FREQ;
26 init_cfs_rq(&rq->cfs, rq);
27 init_rt_rq(&rq->rt, rq);
28 root_task_group.shares = root_task_group_load;
29 INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
30 init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
31 rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
32 for (j = 0; j < CPU_LOAD_IDX_MAX; j++) rq->cpu_load[j] = 0;
33 rq->last_load_update_tick = jiffies;
34 rq->sd = NULL;
35 rq->rd = NULL;
36 rq->cpu_power = SCHED_POWER_SCALE;
37 rq->post_schedule = 0;
38 rq->active_balance = 0;
39 rq->next_balance = jiffies;
40 rq->push_cpu = 0;
41 rq->cpu = i;
42 rq->online = 0;
43 rq->idle_stamp = 0;
44 rq->avg_idle = 2*sysctl_sched_migration_cost;
45 rq_attach_root(rq, &def_root_domain);
46 rq->nohz_balance_kick = 0;
47 init_sched_softirq_csd(&per_cpu(remote_sched_softirq_cb, i));
48 init_rq_hrtick(rq);
49 atomic_set(&rq->nr_iowait, 0);
50 }
51 set_load_weight(&init_task);
52 open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
53 plist_head_init_raw(&init_task.pi_waiters, &init_task.pi_lock);
54 atomic_inc(&init_mm.mm_count);
55 enter_lazy_tlb(&init_mm, current);
56 init_idle(current, smp_processor_id());
57 calc_load_update = jiffies + LOAD_FREQ;
58 current->sched_class = &fair_sched_class;
59 zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
60 zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
61 zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
62 alloc_cpumask_var(&nohz.grp_idle_mask, GFP_NOWAIT);
63 atomic_set(&nohz.load_balancer, nr_cpu_ids);
64 atomic_set(&nohz.first_pick_cpu, nr_cpu_ids);
65 atomic_set(&nohz.second_pick_cpu, nr_cpu_ids);
66 if (cpu_isolated_map == NULL) zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
67 scheduler_running = 1;
68 }
|
初始化 ID REGISTER 管理器快取,。 linux/lib/idr.c
01 void __init idr_init_cache(void){
02 idr_layer_cache = kmem_cache_create("idr_layer_cache",sizeof(struct idr_layer), 0, SLAB_PANIC, NULL);
03 }
|
初始化效能管理器,。 linux/kernel/events/core.c
01 void __init perf_event_init(void){
02 int ret;
03
04 idr_init(&pmu_idr);
05 perf_event_init_all_cpus();
06 init_srcu_struct(&pmus_srcu);
07 perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
08 perf_pmu_register(&perf_cpu_clock, NULL, -1);
09 perf_pmu_register(&perf_task_clock, NULL, -1);
10 perf_tp_register();
11 perf_cpu_notifier(perf_cpu_notify);
12 register_reboot_notifier(&perf_reboot_notifier);
13 ret = init_hw_breakpoint();
14 WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
15 }
|
RCU 是 READ-COPY WRITE 的簡寫。 初始化 RCU 樹,。 linux/kernel/events/core.c
01 void __init rcu_init(void){
02 int cpu;
03
04 rcu_bootup_announce();
05 rcu_init_one(&rcu_sched_state, &rcu_sched_data);
06 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
07 __rcu_init_preempt();
08 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
09 cpu_notifier(rcu_cpu_notify, 0);
10 for_each_online_cpu(cpu)
11 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
12 check_cpu_stall_init();
13 }
|
初始化 RADIX 樹,取得樹節點結構的快取記憶體,設定樹的索引陣列,最後註冊 CPU 通知器。 linux/lib/radix-tree.c
01 void __init radix_tree_init(void){
02 radix_tree_node_cachep = kmem_cache_create("radix_tree_node",sizeof(struct radix_tree_node), 0,
03 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,radix_tree_node_ctor);
04 radix_tree_init_maxindex();
05 hotcpu_notifier(radix_tree_callback, 0);
06 }
|
初始化 IRQ 描述器資料結構,。 linux/kernel/irqdesc.c
01 int __init early_irq_init(void){
02 int i, initcnt, node = first_online_node;
03 struct irq_desc *desc;
04
05 init_irq_default_affinity();
06 initcnt = arch_probe_nr_irqs();
07 printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d %d
", NR_IRQS, nr_irqs, initcnt);
08 if (WARN_ON(nr_irqs > IRQ_BITMAP_BITS)) nr_irqs = IRQ_BITMAP_BITS;
09 if (WARN_ON(initcnt > IRQ_BITMAP_BITS)) initcnt = IRQ_BITMAP_BITS;
10 if (initcnt > nr_irqs) nr_irqs = initcnt;
11 for (i = 0; i < initcnt; i++) {
12 desc = alloc_desc(i, node);
13 set_bit(i, allocated_irqs);
14 irq_insert_desc(i, desc);
15 }
16 return arch_early_irq_init();
17 }
linux/arch/x86/kernel/apic/io_apic.c
01 static struct irq_desc *alloc_desc(int irq, int node){
01 struct irq_desc *desc;
01 gfp_t gfp = GFP_KERNEL;
01
01 desc = kzalloc_node(sizeof(*desc), gfp, node);
01 if (!desc) return NULL;
01 desc->kstat_irqs = alloc_percpu(unsigned int);
01 if (!desc->kstat_irqs) goto err_desc;
01 if (alloc_masks(desc, gfp, node)) goto err_kstat;
01 raw_spin_lock_init(&desc->lock);
01 lockdep_set_class(&desc->lock, &irq_desc_lock_class);
01 desc_set_defaults(irq, desc, node);
01 return desc;
01 err_kstat:
01 free_percpu(desc->kstat_irqs);
01 err_desc:
01 kfree(desc);
01 return NULL;
01 }
linux/arch/x86/include/asm/bitops.h
01 static inline void set_bit(int nr, volatile unsigned long *addr){
01 unsigned long mask = BIT_MASK(nr);
01 unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
01 unsigned long flags;
01
01 _atomic_spin_lock_irqsave(p, flags);
01 *p |= mask;
01 _atomic_spin_unlock_irqrestore(p, flags);
01 }
linux/kernel/irq/irqdesc.h
01 static void irq_insert_desc(unsigned int irq, struct irq_desc *desc){
01 radix_tree_insert(&irq_desc_tree, irq, desc);
01 }
linux/kernel/irq/irqdesc.h
01 int __init arch_early_irq_init(void){
01 struct irq_cfg *cfg;
01 int count, node, i;
01
01 if (!legacy_pic->nr_legacy_irqs) {
01 nr_irqs_gsi = 0;
01 io_apic_irqs = ~0UL;
01 }
01 for (i = 0; i < nr_ioapics; i++) {
01 ioapics[i].saved_registers =kzalloc(sizeof(struct IO_APIC_route_entry) *
01 ioapics[i].nr_registers, GFP_KERNEL);
01 if (!ioapics[i].saved_registers)
01 pr_err("IOAPIC %d: suspend/resume impossible!
", i);
01 }
01 cfg = irq_cfgx;
01 count = ARRAY_SIZE(irq_cfgx);
01 node = cpu_to_node(0);
01 irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs);
01 for (i = 0; i < count; i++) {
01 irq_set_chip_data(i, &cfg[i]);
01 zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
01 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
01 if (i < legacy_pic->nr_legacy_irqs) {
01 cfg[i].vector = IRQ0_VECTOR + i;
01 cpumask_set_cpu(0, cfg[i].domain);
01 }
01 }
01 return 0;
01 }
|
初始化 IRQ 描述器資料結構,。 linux/arch/x86/kernel/irqinit.c
01 void __init init_IRQ(void){
02 int i;
03
04 x86_add_irq_domains();
05 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++)
06 per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i;
07 x86_init.irqs.intr_init();
08 }
|
設定優先權數的索引陣列值。 linux/lib/prio_tree.c
01 void __init prio_tree_init(void){
02 unsigned int i;
03
04 for (i = 0; i < ARRAY_SIZE(index_bits_to_maxindex) - 1; i++)
05 index_bits_to_maxindex[i] = (1UL << (i + 1)) - 1;
06 index_bits_to_maxindex[ARRAY_SIZE(index_bits_to_maxindex) - 1] = ~0UL;
07 }
|
安裝計時器通知器和計時器處理函式。 linux/kernel/timer.c
01 void __init init_timers(void){
02 int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,(void *)(long)smp_processor_id());
03
04 init_timer_stats();
05 BUG_ON(err != NOTIFY_OK);
06 register_cpu_notifier(&timers_nb);
07 open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
08 }
|
安裝高解析度計時器通知器和中斷處理函式。 linux/kernel/hrtimer.c
01 void __init hrtimers_init(void){
02 hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
03 (void *)(long)smp_processor_id());
04 register_cpu_notifier(&hrtimers_nb);
05 open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
06 }
|
初始化每一個可能的 CPU 的每個軟體中斷的工作串列。 linux/kernel/softirq.c
01 void __init softirq_init(void){
02 int cpu;
03
04 for_each_possible_cpu(cpu) {
05 int i;
06
07 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
08 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
09 for (i = 0; i < NR_SOFTIRQS; i++)
10 INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
11 }
12 register_hotcpu_notifier(&remote_softirq_cpu_notifier);
13 open_softirq(TASKLET_SOFTIRQ, tasklet_action);
14 open_softirq(HI_SOFTIRQ, tasklet_hi_action);
15 }
|
時間保持器計算並紀錄目前的時間值。 linux/kernel/time/timesleeping.c
01 void __init timekeeping_init(void){
02 struct clocksource *clock;
03 unsigned long flags;
04 struct timespec now, boot;
05
06 read_persistent_clock(&now);
07 read_boot_clock(&boot);
08 write_seqlock_irqsave(&xtime_lock, flags);
09 ntp_init();
10 clock = clocksource_default_clock();
11 if (clock->enable) clock->enable(clock);
12 timekeeper_setup_internals(clock);
13 xtime.tv_sec = now.tv_sec;
14 xtime.tv_nsec = now.tv_nsec;
15 raw_time.tv_sec = 0;
16 raw_time.tv_nsec = 0;
17 if (boot.tv_sec == 0 && boot.tv_nsec == 0) {
18 boot.tv_sec = xtime.tv_sec;
19 boot.tv_nsec = xtime.tv_nsec;
20 }
21 set_normalized_timespec(&wall_to_monotonic,-boot.tv_sec, -boot.tv_nsec);
22 total_sleep_time.tv_sec = 0;
23 total_sleep_time.tv_nsec = 0;
24 write_sequnlock_irqrestore(&xtime_lock, flags);
25 }
|
linux/kernel/time/timesleeping.c
01 void __init time_init(void){
02 late_time_init = x86_late_time_init;
03 }
04
05 static __init void x86_late_time_init(void){
06 x86_init.timers.timer_init();
07 tsc_init();
08 }
linux/arch/x86/plateform/visws/visw_quirks.c
01 static void __init visws_time_init(void){
02 printk(KERN_INFO "Starting Cobalt Timer system clock
");
03 co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ);
04 co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN);
05 co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK);
06 setup_default_timer_irq();
07 }
linux/arch/x86/kernel/time.c
01 void __init setup_default_timer_irq(void){
01 setup_irq(0, &irq0);
01 }
linux/kernel/irq/manage.c
01 int setup_irq(unsigned int irq, struct irqaction *act){
02 int retval;
03 struct irq_desc *desc = irq_to_desc(irq);
04
05 chip_bus_lock(desc);
06 retval = __setup_irq(irq, desc, act);
07 chip_bus_sync_unlock(desc);
08 return retval;
09 }
linux/arch/x86/kernel/tsc.c
01 void __init tsc_init(void){
01 u64 lpj;
01 int cpu;
01
01 x86_init.timers.tsc_pre_init();
01 if (!cpu_has_tsc) return;
01 tsc_khz = x86_platform.calibrate_tsc();
01 cpu_khz = tsc_khz;
01 if (!tsc_khz) {
01 mark_tsc_unstable("could not calculate TSC khz");
01 return;
01 }
01 printk("Detected %lu.%03lu MHz processor.
",
01 (unsigned long)cpu_khz / 1000,
01 (unsigned long)cpu_khz % 1000);
01 for_each_possible_cpu(cpu)
01 set_cyc2ns_scale(cpu_khz, cpu);
01 if (tsc_disabled > 0) return;
01 tsc_disabled = 0;
01 if (!no_sched_irq_time) enable_sched_clock_irqtime();
01 lpj = ((u64)tsc_khz * 1000);
01 do_div(lpj, HZ);
01 lpj_fine = lpj;
01 use_tsc_delay();
01 dmi_check_system(bad_tsc_dmi_table);
01 if (unsynchronized_tsc()) mark_tsc_unstable("TSCs unsynchronized");
01 check_system_tsc_reliable();
01 }
|
PROFILE 管理器用來管理核心的 PROFILE,。 linux/kernel/profile.c
01 int __ref profile_init(void){
02 int buffer_bytes;
03
04 if (!prof_on) return 0;
05 prof_len = (_etext - _stext) >> prof_shift;
06 buffer_bytes = prof_len*sizeof(atomic_t);
07 if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL)) return -ENOMEM;
08 cpumask_copy(prof_cpu_mask, cpu_possible_mask);
09 prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL|__GFP_NOWARN);
10 if (prof_buffer) return 0;
11 prof_buffer = alloc_pages_exact(buffer_bytes,GFP_KERNEL|__GFP_ZERO|__GFP_NOWARN);
12 if (prof_buffer) return 0;
13 prof_buffer = vzalloc(buffer_bytes);
14 if (prof_buffer) return 0;
15 free_cpumask_var(prof_cpu_mask);
16 return -ENOMEM;
17 }
這是 _etext 和 _stext 之間的內容嗎?? linux/arch/x86/kernel/vmlinux.lds
01 _stext = .;
01 . = ALIGN(8); *(.text.hot) *(.text) *(.ref.text) *(.devinit.text) *(.devexit.text) *(.cpuinit.text) *(.cpuexit.text) *(.text.unlikely)
01 . = ALIGN(8); __sched_text_start = .; *(.sched.text) __sched_text_end = .;
01 . = ALIGN(8); __lock_text_start = .; *(.spinlock.text) __lock_text_end = .;
01 . = ALIGN(8); __kprobes_text_start = .; *(.kprobes.text) __kprobes_text_end = .;
01 . = ALIGN(8); __entry_text_start = .; *(.entry.text) __entry_text_end = .;
01 *(.fixup)
01 *(.gnu.warning)
01 _etext = .;
|
初始化每個 cpu 的 cfd 結構,cfd 是 call function data 的簡稱。 linux/kernel/smp.c
01 void __init call_function_init(void){
02 void *cpu = (void *)(long)smp_processor_id();
03 int i;
04
05 for_each_possible_cpu(i) {
06 struct call_single_queue *q = &per_cpu(call_single_queue, i);
07
08 raw_spin_lock_init(&q->lock);
09 INIT_LIST_HEAD(&q->list);
10 }
11 hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu);
12 register_cpu_notifier(&hotplug_cfd_notifier);
13 }
|
致能 CPU 外部中斷,以組合語言來說是執行指令 sti 。 linux/include/linux/irqflags.h
01 #define local_irq_enable()
02 do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
|
這是為了顯示初始過程的訊息,尤其是顯示偵錯訊息。 linux/drviers/tty/tty_io.c
01 void __init console_init(void){
02 initcall_t *call;
03
04 tty_ldisc_begin();
05 call = __con_initcall_start;
06 while (call < __con_initcall_end) {
07 (*call)();
08 call++;
09 }
10 }
linux/drviers/tty/n_tty.c
01 struct tty_ldisc_ops tty_ldisc_N_TTY = {
01 .magic = TTY_LDISC_MAGIC,
01 .name = "n_tty",
01 .open = n_tty_open,
01 .close = n_tty_close,
01 .flush_buffer = n_tty_flush_buffer,
01 .chars_in_buffer = n_tty_chars_in_buffer,
01 .read = n_tty_read,
01 .write = n_tty_write,
01 .ioctl = n_tty_ioctl,
01 .set_termios = n_tty_set_termios,
01 .poll = n_tty_poll,
01 .receive_buf = n_tty_receive_buf,
01 .write_wakeup = n_tty_write_wakeup
01 };
|
顯示目前依賴所管理器的資訊,。 linux/kernel/lockdep.c
01 void __init lockdep_info(void){
02 printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar
");
03 printk("... MAX_LOCKDEP_SUBCLASSES: %lu
", MAX_LOCKDEP_SUBCLASSES);
04 printk("... MAX_LOCK_DEPTH: %lu
", MAX_LOCK_DEPTH);
05 printk("... MAX_LOCKDEP_KEYS: %lu
", MAX_LOCKDEP_KEYS);
06 printk("... CLASSHASH_SIZE: %lu
", CLASSHASH_SIZE);
07 printk("... MAX_LOCKDEP_ENTRIES: %lu
", MAX_LOCKDEP_ENTRIES);
08 printk("... MAX_LOCKDEP_CHAINS: %lu
", MAX_LOCKDEP_CHAINS);
09 printk("... CHAINHASH_SIZE: %lu
", CHAINHASH_SIZE);
10 printk(" memory used by lock dependency info: %lu kB
",
11 (sizeof(struct lock_class) * MAX_LOCKDEP_KEYS +
12 sizeof(struct list_head) * CLASSHASH_SIZE +
13 sizeof(struct lock_list) * MAX_LOCKDEP_ENTRIES +
14 sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS +
15 sizeof(struct list_head) * CHAINHASH_SIZE) / 1024
16 );
17 printk(" per task-struct memory footprint: %lu bytes
",
18 sizeof(struct held_lock) * MAX_LOCK_DEPTH);
19 }
|
INITRD 是 initial ramdisk 的簡稱,其中包含一個簡易檔案系統,放有一些啟動時期會用到的檔案資料。 linux/kernel/lockdep.c
01 if (initrd_start && !initrd_below_start_ok &&
02 page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
03 printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "
04 "disabling it.
",
05 page_to_pfn(virt_to_page((void *)initrd_start)),min_low_pfn);
06 initrd_start = 0;
07 }
|
分頁控制群是一個延伸的記憶體分佈圖,一個分頁控制群的分頁會關聯到每一個分頁描述器。 分頁控制群幫助確認控制群的資訊。所有分頁控制群都是在啟動時期或記憶體熱拔除事件發生時,然後建立 pfn(??) 時,分頁控制群都會存在。 pfn 是 physical frame number 的簡寫,nid 是 namespace identifier 的簡稱。 linux/mm/page_cgroup.c
01 void __init page_cgroup_init(void){
02 unsigned long pfn;
03 int nid;
04
05 if (mem_cgroup_disabled()) return;
06 for_each_node_state(nid, N_HIGH_MEMORY) {
07 unsigned long start_pfn, end_pfn;
08
09 start_pfn = node_start_pfn(nid);
10 end_pfn = node_end_pfn(nid);
11 for (pfn = start_pfn;pfn < end_pfn;pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
12 if (!pfn_valid(pfn)) continue;
13 if (pfn_to_nid(pfn) != nid) continue;
14 if (init_section_page_cgroup(pfn, nid)) goto oom;
15 }
16 }
17 hotplug_memory_notifier(page_cgroup_callback, 0);
18 printk(KERN_INFO "allocated %ld bytes of page_cgroup
", total_usage);
19 printk(KERN_INFO "please try 'cgroup_disable=memory' option if you " "don't want memory cgroups
");
20 return;
21 oom:
22 printk(KERN_CRIT "try 'cgroup_disable=memory' boot option
");
23 panic("Out of memory");
24 }
|
當記憶體不再使用,會成為自由記憶體。自由記憶體必須被回收並集合成可重新配置的記憶體。 linux/mm/kmemleak.c
01 void __init kmemleak_init(void){
02 int i;
03 unsigned long flags;
04
05 jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE);
06 jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000);
07 object_cache = KMEM_CACHE(kmemleak_object, SLAB_NOLEAKTRACE);
08 scan_area_cache = KMEM_CACHE(kmemleak_scan_area, SLAB_NOLEAKTRACE);
09 INIT_PRIO_TREE_ROOT(&object_tree_root);
10 local_irq_save(flags);
11 if (!atomic_read(&kmemleak_error)) {
12 atomic_set(&kmemleak_enabled, 1);
13 atomic_set(&kmemleak_early_log, 0);
14 }
15 local_irq_restore(flags);
16 for (i = 0; i < crt_early_log; i++) {
17 struct early_log *log = &early_log[i];
18
19 switch (log->op_type) {
20 case KMEMLEAK_ALLOC:
21 early_alloc(log);
22 break;
23 case KMEMLEAK_FREE:
24 kmemleak_free(log->ptr);
25 break;
26 case KMEMLEAK_FREE_PART:
27 kmemleak_free_part(log->ptr, log->size);
28 break;
29 case KMEMLEAK_NOT_LEAK:
30 kmemleak_not_leak(log->ptr);
31 break;
32 case KMEMLEAK_IGNORE:
33 kmemleak_ignore(log->ptr);
34 break;
35 case KMEMLEAK_SCAN_AREA:
36 kmemleak_scan_area(log->ptr, log->size, GFP_KERNEL);
37 break;
38 case KMEMLEAK_NO_SCAN:
39 kmemleak_no_scan(log->ptr);
40 break;
41 default:
42 WARN_ON(1);
43 }
44 }
45 }
|
配置每個 CPU 的頁集合並初始化之,在此之前只有啟動時期的頁集合可以使用。 linux/init/main.c
01 void __init setup_per_cpu_pageset(void){
02 struct zone *zone;
03
04 for_each_populated_zone(zone)
05 setup_zone_pageset(zone);
06 }
|
NUMA 為 NON UNIFORM MEMORY ACCESS,非統一記憶體存取。 初始化 NUMA 記憶體存取管理器。 linux/mm/menpolicy.c
01 void __init numa_policy_init(void){
02 nodemask_t interleave_nodes;
03 unsigned long largest = 0;
04 int nid, prefer = 0;
05
06 policy_cache = kmem_cache_create("numa_policy",
07 sizeof(struct mempolicy),0, SLAB_PANIC, NULL);
08 sn_cache = kmem_cache_create("shared_policy_node",
09 sizeof(struct sp_node),0, SLAB_PANIC, NULL);
10 nodes_clear(interleave_nodes);
11 for_each_node_state(nid, N_HIGH_MEMORY) {
12 unsigned long total_pages = node_present_pages(nid);
13 if (largest < total_pages) {
14 largest = total_pages;
15 prefer = nid;
16 }
17 if ((total_pages << PAGE_SHIFT) >= (16 << 20))
18 node_set(nid, interleave_nodes);
19 }
20 if (unlikely(nodes_empty(interleave_nodes)))
21 node_set(prefer, interleave_nodes);
22 if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes))
23 printk("numa_policy_init: interleaving failed
");
24 }
|
對每個可能的 cpu 設定排程器時脈參數,。 linux/kernel/sched_clock.c
01 void sched_clock_init(void){
02 u64 ktime_now = ktime_to_ns(ktime_get());
03 int cpu;
04
05 for_each_possible_cpu(cpu) {
06 struct sched_clock_data *scd = cpu_sdc(cpu);
07
08 scd->tick_raw = 0;
09 scd->tick_gtod = ktime_now;
10 scd->clock = ktime_now;
11 }
12 sched_clock_running = 1;
13 }
|
lpj 是 loops per jiffy 的簡稱,。 linux/init/calibrate.c
01 void __cpuinit calibrate_delay(void){
02 unsigned long lpj;
03 static bool printed;
04
05 if (preset_lpj) {
06 lpj = preset_lpj;
07 if (!printed) pr_info("Calibrating delay loop (skipped) ""preset value.. ");
08 }
09 else if ((!printed) && lpj_fine) {
10 lpj = lpj_fine;
11 pr_info("Calibrating delay loop (skipped), ""value calculated using timer frequency.. ");
12 }
13 else if ((lpj = calibrate_delay_direct()) != 0) {
14 if (!printed) pr_info("Calibrating delay using timer ""specific routine.. ");
15 }
16 else {
17 if (!printed) pr_info("Calibrating delay loop... ");
18 lpj = calibrate_delay_converge();
19 }
20 if (!printed) pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)
",lpj/(500000/HZ),(lpj/(5000/HZ)) % 100, lpj);
21 loops_per_jiffy = lpj;
22 printed = true;
23 }
|
核心有一個 PID 管理器,用來管理所有有 PID 的物件。這個函式建立 PID 映射表,是 PID 管理器運作時的資料結構。 linux/init/pid.c
01 void __init pidmap_init(void){
01 pid_max = min(pid_max_max, max_t(int, pid_max,PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
01 pid_max_min = max_t(int, pid_max_min,PIDS_PER_CPU_MIN * num_possible_cpus());
01 pr_info("pid_max: default: %u minimum: %u
", pid_max, pid_max_min);
01 init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
01 set_bit(0, init_pid_ns.pidmap[0].page);
01 atomic_dec(&init_pid_ns.pidmap[0].nr_free);
01 init_pid_ns.pid_cachep = KMEM_CACHE(pid,SLAB_HWCACHE_ALIGN | SLAB_PANIC);
01 }
|
,。 linux/init/pid.c
01 void __init anon_vma_init(void){
01 anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
01 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
01 anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
01 }
|
EFI 是 EXTENDED FIRMWARE INTERFACE 的簡稱,延伸韌體介面。
linux/arch/x86/plateform/efi/efi.c
01 void __init efi_enter_virtual_mode(void){
02 efi_memory_desc_t *md, *prev_md = NULL;
03 efi_status_t status;
04 unsigned long size;
05 u64 end, systab, addr, npages, end_pfn;
06 void *p, *va, *new_memmap = NULL;
07 int count = 0;
08
09 efi.systab = NULL;
10 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
11 u64 prev_size;
12
13 md = p;
14 if (!prev_md) {
15 prev_md = md;
16 continue;
17 }
18 if (prev_md->type != md->type ||prev_md->attribute != md->attribute) {
19 prev_md = md;
20 continue;
21 }
22 prev_size = prev_md->num_pages << EFI_PAGE_SHIFT;
23 if (md->phys_addr == (prev_md->phys_addr + prev_size)) {
24 prev_md->num_pages += md->num_pages;
25 md->type = EFI_RESERVED_TYPE;
26 md->attribute = 0;
27 continue;
28 }
29 prev_md = md;
30 }
31 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
32 md = p;
33 if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
34 md->type != EFI_BOOT_SERVICES_CODE &&
35 md->type != EFI_BOOT_SERVICES_DATA) continue;
36 size = md->num_pages << EFI_PAGE_SHIFT;
37 end = md->phys_addr + size;
38 end_pfn = PFN_UP(end);
39 if (end_pfn <= max_low_pfn_mapped
40 || (end_pfn > (1UL << (32 - PAGE_SHIFT))
41 && end_pfn <= max_pfn_mapped))
42 va = __va(md->phys_addr);
43 else
44 va = efi_ioremap(md->phys_addr, size, md->type);
45 md->virt_addr = (u64) (unsigned long) va;
46 if (!va) {
47 printk(KERN_ERR PFX "ioremap of 0x%llX failed!
",
48 (unsigned long long)md->phys_addr);
49 continue;
50 }
51 if (!(md->attribute & EFI_MEMORY_WB)) {
52 addr = md->virt_addr;
53 npages = md->num_pages;
54 memrange_efi_to_native(&addr, &npages);
55 set_memory_uc(addr, npages);
56 }
57 systab = (u64) (unsigned long) efi_phys.systab;
58 if (md->phys_addr <= systab && systab < end) {
59 systab += md->virt_addr - md->phys_addr;
60 efi.systab = (efi_system_table_t *) (unsigned long) systab;
61 }
62 new_memmap = krealloc(new_memmap,(count + 1) * memmap.desc_size,GFP_KERNEL);
63 memcpy(new_memmap + (count * memmap.desc_size), md,memmap.desc_size);
64 count++;
65 }
66 BUG_ON(!efi.systab);
67 status = phys_efi_set_virtual_address_map(
68 memmap.desc_size * count,
69 memmap.desc_size,
70 memmap.desc_version,
71 (efi_memory_desc_t *)__pa(new_memmap));
72 if (status != EFI_SUCCESS) {
73 printk(KERN_ALERT "Unable to switch EFI into virtual mode "
74 "(status=%lx)!
", status);
75 panic("EFI call to SetVirtualAddressMap() failed!");
76 }
77 efi_free_boot_services();
78 efi.get_time = virt_efi_get_time;
79 efi.set_time = virt_efi_set_time;
80 efi.get_wakeup_time = virt_efi_get_wakeup_time;
81 efi.set_wakeup_time = virt_efi_set_wakeup_time;
82 efi.get_variable = virt_efi_get_variable;
83 efi.get_next_variable = virt_efi_get_next_variable;
84 efi.set_variable = virt_efi_set_variable;
85 efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
86 efi.reset_system = virt_efi_reset_system;
87 efi.set_virtual_address_map = NULL;
88 if (__supported_pte_mask & _PAGE_NX) runtime_code_page_mkexec();
89 early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
90 memmap.map = NULL;
91 kfree(new_memmap);
92 }
|
初始化權限管理器相關資料結構,。 linux/arch/x86/plateform/efi/efi.c
01 void __init cred_init(void){
02 cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred),0,
03 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
04 }
linux/mm/slub.c
01 struct kmem_cache *kmem_cache_create(const char *name, size_t size,
01 size_t align, unsigned long flags, void (*ctor)(void *)){
01 struct kmem_cache *s;
01 char *n;
01
01 if (WARN_ON(!name)) return NULL;
01 down_write(&slub_lock);
01 s = find_mergeable(size, align, flags, name, ctor);
01 if (s) {
01 s->refcount++;
01 s->objsize = max(s->objsize, (int)size);
01 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
01 if (sysfs_slab_alias(s, name)) {
01 s->refcount--;
01 goto err;
01 }
01 up_write(&slub_lock);
01 return s;
01 }
01 n = kstrdup(name, GFP_KERNEL);
01 if (!n) goto err;
01 s = kmalloc(kmem_size, GFP_KERNEL);
01 if (s) {
01 if (kmem_cache_open(s, n,size, align, flags, ctor)) {
01 list_add(&s->list, &slab_caches);
01 if (sysfs_slab_add(s)) {
01 list_del(&s->list);
01 kfree(n);
01 kfree(s);
01 goto err;
01 }
01 up_write(&slub_lock);
01 return s;
01 }
01 kfree(n);
01 kfree(s);
01 }
01 err:
01 up_write(&slub_lock);
01 if (flags & SLAB_PANIC) panic("Cannot create slabcache %s
", name);
01 else s = NULL;
01 return s;
01 }
|
行程管理器的初始化會先取得任務管理結構的快取記憶體,再取得架構有關的 xstate 快取。 接著計算記憶體能容納的行程最大數量,至少是二十個行程。 linux/kernel/fork.c
01 void __init fork_init(unsigned long mempages){
02 task_struct_cachep = kmem_cache_create("task_struct", sizeof(struct task_struct),
03 ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
04 arch_task_cache_init();
05 max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);
06 if(max_threads < 20) max_threads = 20;
07 init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
08 init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
09 init_task.signal->rlim[RLIMIT_SIGPENDING] = init_task.signal->rlim[RLIMIT_NPROC];
10 }
|
取得行程管理器所需的各種快取記憶體,包括 sighandle、signal、files、fs、mm 等快取控制結構的記憶體。 linux/kernel/fork.c
01 void __init proc_caches_init(void){
02 sighand_cachep = kmem_cache_create("sighand_cache",sizeof(struct sighand_struct), 0,
03 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|SLAB_NOTRACK, sighand_ctor);
04 signal_cachep = kmem_cache_create("signal_cache",sizeof(struct signal_struct), 0,
05 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
06 files_cachep = kmem_cache_create("files_cache",sizeof(struct files_struct), 0,
07 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
08 fs_cachep = kmem_cache_create("fs_cache",sizeof(struct fs_struct), 0,
09 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
10 mm_cachep = kmem_cache_create("mm_struct",sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
11 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
12 vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC);
13 mmap_init();
14 }
|
取得緩衝區記憶體管理器的結構記憶體,buffer_head。並以此記憶體做為所有緩衝區記憶體的串列頭,管理所有緩衝區記憶體。 linux/kernel/fork.c
01 void __init buffer_init(void){
02 int nrpages;
03
04 bh_cachep = kmem_cache_create("buffer_head",sizeof(struct buffer_head), 0,
05 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD),NULL);
06 nrpages = (nr_free_buffer_pages() * 10) / 100;
07 max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
08 hotcpu_notifier(buffer_cpu_notify, 0);
09 }
|
linux/kernel/debug/debug_core.c
01 void __init dbg_late_init(void){
02 dbg_is_early = false;
03 if (kgdb_io_module_registered) kgdb_arch_late();
04 kdb_init(KDB_INIT_FULL);
05 }
|
設定虛擬檔案系統的各子系統的快取記憶體。 linux/fs/dcache.c
01 void __init vfs_caches_init(unsigned long mempages){
02 unsigned long reserve;
03
04 reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
05 mempages -= reserve;
06 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
07 dcache_init();
08 inode_init();
09 files_init(mempages);
10 mnt_init();
11 bdev_cache_init();
12 chrdev_init();
13 }
|
設定信號管理器的信號串列頭的快取記憶體,。 linux/fs/dcache.c
01 void __init signals_init(void){
02 sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC);
03 }
|
頁寫回是當頁的內容要被替換時,必須把頁的內容寫入磁碟。這個寫入的動作就叫寫回。 linux/mm/page-writeback.c
01 void __init page_writeback_init(void){
02 int shift;
03
04 writeback_set_ratelimit();
05 register_cpu_notifier(&ratelimit_nb);
06 shift = calc_period_shift();
07 prop_descriptor_init(&vm_completions, shift);
08 prop_descriptor_init(&vm_dirties, shift);
09 }
|
連結檔案系統並建立根目錄的子目錄群。 linux/fs/proc/proc_sysctl.c
01 void __init proc_root_init(void){
02 struct vfsmount *mnt;
03 int err;
04
05 proc_init_inodecache();
06 err = register_filesystem(&proc_fs_type);
07 if (err) return;
08 mnt = kern_mount_data(&proc_fs_type, &init_pid_ns);
09 if (IS_ERR(mnt)) {
10 unregister_filesystem(&proc_fs_type);
11 return;
12 }
13 init_pid_ns.proc_mnt = mnt;
14 proc_symlink("mounts", NULL, "self/mounts");
15 proc_net_init();
16 proc_mkdir("sysvipc", NULL);
17 proc_mkdir("fs", NULL);
18 proc_mkdir("driver", NULL);
19 proc_mkdir("fs/nfsd", NULL);
20 proc_mkdir("openprom", NULL);
21 proc_tty_init();
22 proc_device_tree_init();
23 proc_mkdir("bus", NULL);
24 proc_sys_init();
25 }
linux/fs/proc/proc_sysctl.c
01 int __init proc_sys_init(void){
02 struct proc_dir_entry *proc_sys_root;
03
04 proc_sys_root = proc_mkdir("sys", NULL);
05 proc_sys_root->proc_iops = &proc_sys_dir_operations;
06 proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
07 proc_sys_root->nlink = 0;
08 return 0;
09 }
|
初始化控制群控制器,。 linux/kernel/cgroup.c
01 int __init cgroup_init(void){
01 int err;
01 int i;
01 struct hlist_head *hhead;
01
01 err = bdi_init(&cgroup_backing_dev_info);
01 if (err) return err;
01 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
01 struct cgroup_subsys *ss = subsys[i];
01 if (!ss->early_init)
01 cgroup_init_subsys(ss);
01 if (ss->use_id)
01 cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
01 }
01 hhead = css_set_hash(init_css_set.subsys);
01 hlist_add_head(&init_css_set.hlist, hhead);
01 BUG_ON(!init_root_id(&rootnode));
01 cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
01 if (!cgroup_kobj) {
01 err = -ENOMEM;
01 goto out;
01 }
01 err = register_filesystem(&cgroup_fs_type);
01 if (err < 0) {
01 kobject_put(cgroup_kobj);
01 goto out;
01 }
01 proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
01 out:
01 if (err) bdi_destroy(&cgroup_backing_dev_info);
01 return err;
01 }
|
初始化控制群控制器,。 linux/kernel/cpuset.c
01 int __init cpuset_init(void){
01 int err = 0;
01
01 if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL)) BUG();
01 cpumask_setall(top_cpuset.cpus_allowed);
01 nodes_setall(top_cpuset.mems_allowed);
01 fmeter_init(&top_cpuset.fmeter);
01 set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
01 top_cpuset.relax_domain_level = -1;
01 err = register_filesystem(&cpuset_fs_type);
01 if (err < 0) return err;
01 if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL)) BUG();
01 number_of_cpusets = 1;
01 return 0;
01 }
|
行程狀態管理器,。 linux/kernel/taskstats.c
01 void __init taskstats_init_early(void){
01 unsigned int i;
01
01 taskstats_cache = KMEM_CACHE(taskstats, SLAB_PANIC);
01 for_each_possible_cpu(i) {
01 INIT_LIST_HEAD(&(per_cpu(listener_array, i).list));
01 init_rwsem(&(per_cpu(listener_array, i).sem));
01 }
01 }
|
初始化 init_task 的任務延遲記錄器,。 linux/kernel/delayacct.c
01 void delayacct_init(void){
01 delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC);
01 delayacct_tsk_init(&init_task);
01 }
linux/kernel/delayacct.c
01 static inline void delayacct_tsk_init(struct task_struct *tsk){
02 tsk->delays = NULL;
03 if (delayacct_on)
04 __delayacct_tsk_init(tsk);
05 }
linux/kernel/delayacct.c
01 void __delayacct_tsk_init(struct task_struct *tsk){
02 tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL);
03 if (tsk->delays) spin_lock_init(&tsk->delays->lock);
04 }
|
檢查已知的硬體蟲,檢查的部分包括 CPU、CONFIG、FPU、HLT、POPAD。 這是為了確定 CPU 可以正確運作。 linux/arch/x86/kernel/cpu/bug.c
01 void __init check_bugs(void){
02 identify_boot_cpu();
03 printk(KERN_INFO "CPU: ");
04 print_cpu_info(&boot_cpu_data);
05 check_config();
06 check_fpu();
07 check_hlt();
08 check_popad();
09 init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
10 alternative_instructions();
11 }
|
ACPI 是 ADVANCE CONFIGURATION AND POWER INTERFACE 的簡稱,由 INTEL、MICROSOFT 等 ±同定義的電力管理系統。 linux/drivers/apci/bus.c
01 void __init acpi_early_init(void){
01 acpi_status status = AE_OK;
01
01 if (acpi_disabled) return;
01 printk(KERN_INFO PREFIX "Core revision %08x
", ACPI_CA_VERSION);
01 if (!acpi_strict) acpi_gbl_enable_interpreter_slack = TRUE;
01 acpi_gbl_permanent_mmap = 1;
01 dmi_check_system(dsdt_dmi_table);
01 status = acpi_reallocate_root_table();
01 if (ACPI_FAILURE(status)) {
01 printk(KERN_ERR PREFIX "Unable to reallocate ACPI tables
");
01 goto error0;
01 }
01 status = acpi_initialize_subsystem();
01 if (ACPI_FAILURE(status)) {
01 printk(KERN_ERR PREFIX
01 "Unable to initialize the ACPI Interpreter
");
01 goto error0;
01 }
01 status = acpi_load_tables();
01 if (ACPI_FAILURE(status)) {
01 printk(KERN_ERR PREFIX
01 "Unable to load the System Description Tables
");
01 goto error0;
01 }
01 if (!acpi_ioapic) {
01 if (!(acpi_sci_flags & ACPI_MADT_TRIGGER_MASK)) {
01 acpi_sci_flags &= ~ACPI_MADT_TRIGGER_MASK;
01 acpi_sci_flags |= ACPI_MADT_TRIGGER_LEVEL;
01 }
01 acpi_pic_sci_set_trigger(acpi_gbl_FADT.sci_interrupt,
01 (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) >> 2);
01 }
01 else {
01 acpi_gbl_FADT.sci_interrupt = acpi_sci_override_gsi;
01 }
01 status = acpi_enable_subsystem(~(ACPI_NO_HARDWARE_INIT | ACPI_NO_ACPI_ENABLE));
01 if (ACPI_FAILURE(status)) {
01 printk(KERN_ERR PREFIX "Unable to enable ACPI
");
01 goto error0;
01 }
01 return;
01 error0:
01 disable_acpi();
01 return;
01 }
|
SFI 是 SIMPLE FIRMWARE INTERFACE 的簡稱,意思是簡單韌體介面。 linux/drivers/sfi/sfi-core.c
01 void __init sfi_init_late(void){
02 int length;
03
04 if (sfi_disabled) return;
05 length = syst_va->header.len;
06 sfi_unmap_memory(syst_va, sizeof(struct sfi_table_simple));
07 sfi_use_ioremap = 1;
08 syst_va = sfi_map_memory(syst_pa, length);
09 sfi_acpi_init();
10 }
|
FTRACE,即 FUNCTION TRACE,功能追蹤模組。初始化功能追蹤模組。 linux/kernel/trace/ftrace.c
01 void __init ftrace_init(void){
02 unsigned long count, addr, flags;
03 int ret;
04
05 addr = (unsigned long)ftrace_stub;
06 local_irq_save(flags);
07 ftrace_dyn_arch_init(&addr);
08 local_irq_restore(flags);
09 if (addr) goto failed;
10 count = __stop_mcount_loc - __start_mcount_loc;
11 ret = ftrace_dyn_table_alloc(count);
12 if (ret) goto failed;
13 last_ftrace_enabled = ftrace_enabled = 1;
14 ret = ftrace_process_locs(NULL,__start_mcount_loc,__stop_mcount_loc);
15 ret = register_module_notifier(&ftrace_module_nb);
16 if (ret) pr_warning("Failed to register trace ftrace module notifier
");
17 set_ftrace_early_filters();
18 return;
19 failed:
20 ftrace_disabled = 1;
21 }
|
可能是因為這一部分必須啟動核心行程,所以作者將它和其他程式分開。 主要有兩個核心行程,核心初始化行程和核心行程建立行程。
linux/drivers/trace/ftrace.c
01 static noinline void __init_refok rest_init(void){
02 int pid;
03
04 rcu_scheduler_starting();
05 kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);
06 numa_default_policy();
07 pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
08 rcu_read_lock();
09 kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
10 rcu_read_unlock();
11 complete(&kthreadd_done);
12 init_idle_bootup_task(current);
13 preempt_enable_no_resched();
14 schedule();
15 preempt_disable();
16 cpu_idle();
17 }
CPU 閒置,同時先佔式多工除能。所以完成工作後,這個初始化任務會變成閒置任務。
行號 | 說明 |
04 | 起動 RCU 排程器。RCU 是 READ-COPY UPDATE。 |
05 | 建立核心行程,核心初始化行程。 |
06 | 設定 NUMA 記憶體使用策略,這是一個空函式。 |
07 | 建立核心行程,核心行程建立行程,此行程負責將行程啟動串列上的行程加入行程執行行列。 |
08 | 使用 RCU 讀取鎖。 |
09 | 使用 PID 和 NS 取得任務結構位址,這必須在 RCU 讀取所是致能的情況下執行。 |
10 | 釋放 RCU 讀取鎖,。 |
11 | 通知 kthreadd 的第一個等待任務此一完成訊息。 |
12 | 設定閒置啟動行程排程狀態為閒置排程類別,表示要切換此行程到別的行程,好讓別的行程可以動作一下。別的行程包括 kernel_init 和 kthreadd。 |
13 | 致能先佔式多工而尚未執行過的行程 ??,。 |
14 | 執行排程器,切換行程。 |
15 | 行程輪回來後,除能先佔式多工。 |
16 |