解析linux根文件系统的挂载过程

转：https://blog.csdn.net/yhf19881015/article/details/7552840

一：前言

前段时间在编译kernel的时候发现rootfs挂载不上。相同的root选项设置旧版的image却可以。为了彻底解决这个问题。研究了一下rootfs的挂载过程。特总结如下,希望能给这部份知识点比较迷茫的朋友一点帮助。

二：rootfs的种类

总的来说，rootfs分为两种：虚拟rootfs和真实rootfs.现在kernel的发展趋势是将更多的功能放到用户空间完成。以保持内核的精简。虚拟rootfs也是各linux发行厂商普遍采用的一种方式。可以将一部份的初始化工作放在虚拟的rootfs里完成。然后切换到真实的文件系统.

在虚拟rootfs的发展过程中。又有以下几个版本：

initramfs:

Initramfs是在 kernel 2.5中引入的技术，实际上它的含义就是：在内核镜像中附加一个cpio包，这个cpio包中包含了一个小型的文件系统，当内核启动时，内核将这个cpio包解开，并且将其中包含的文件系统释放到rootfs中，内核中的一部分初始化代码会放到这个文件系统中，作为用户层进程来执行。这样带来的明显的好处是精简了内核的初始化代码，而且使得内核的初始化过程更容易定制。这种这种方式的rootfs是包含在kernel image之中的.

cpio-initrd: cpio格式的rootfs

image-initrd:传统格式的rootfs

关于这两种虚拟文件系统的制作请自行参阅其它资料

三：rootfs文件系统的挂载过程

这里说的rootfs不同于上面分析的rootfs。这里指的是系统初始化时的根结点。即/结点。它是其于内存的rootfs文件系统。这部份之前在<< linux启动过程分析>>和文件系统中已经分析过。为了知识的连贯性这里再重复一次。

Start_kernel()àmnt_init():

void __init mnt_init(void)

{

……

init_rootfs();

init_mount_tree();

}

Init_rootfs的代码如下：

int __init init_rootfs(void)

{

int err;

err = bdi_init(&ramfs_backing_dev_info);

if (err)

return err;

err = register_filesystem(&rootfs_fs_type);

if (err)

bdi_destroy(&ramfs_backing_dev_info);

return err;

}

这个函数很简单。就是注册了rootfs的文件系统.

init_mount_tree()代码如下：

static void __init init_mount_tree(void)

{

struct vfsmount *mnt;

struct mnt_namespace *ns;

struct path root;

mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);

if (IS_ERR(mnt))

panic("Can't create rootfs");

ns = kmalloc(sizeof(*ns), GFP_KERNEL);

if (!ns)

panic("Can't allocate initial namespace");

atomic_set(&ns->count, 1);

INIT_LIST_HEAD(&ns->list);

init_waitqueue_head(&ns->poll);

ns->event = 0;

list_add(&mnt->mnt_list, &ns->list);

ns->root = mnt;

mnt->mnt_ns = ns;

init_task.nsproxy->mnt_ns = ns;

get_mnt_ns(ns);

root.mnt = ns->root;

root.dentry = ns->root->mnt_root;

set_fs_pwd(current->fs, &root);

set_fs_root(current->fs, &root);

}

在这里，将rootfs文件系统挂载。它的挂载点默认为”/”.最后切换进程的根目录和当前目录为”/”.这也就是根目录的由来。不过这里只是初始化。等挂载完具体的文件系统之后，一般都会将根目录切换到具体的文件系统。所以在系统启动之后，用mount命令是看不到rootfs的挂载信息的.

四：虚拟文件系统的挂载

根目录已经挂上去了，可以挂载具体的文件系统了.

在start_kernel()-->rest_init()-->kernel_init():

static int __init kernel_init(void * unused)

{

……

do_basic_setup();

if (!ramdisk_execute_command)

ramdisk_execute_command = "/init";

if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) {

ramdisk_execute_command = NULL;

prepare_namespace();

}

/*

* Ok, we have completed the initial bootup, and

* we're essentially up and running. Get rid of the

* initmem segments and start the user-mode stuff..

*/

init_post();

return 0;

}

do_basic_setup()是一个很关键的函数，所有直接编译在kernel中的模块都是由它启动的。代码片段如下：

static void __init do_basic_setup(void)

{

/* drivers will send hotplug events */

init_workqueues();

usermodehelper_init();

driver_init();

init_irq_proc();

do_initcalls();

}

Do_initcalls()用来启动所有在__initcall_start和__initcall_end段的函数，而静态编译进内核的modules也会将其入口放置在这段区间里。

跟根文件系统相关的初始化函数都会由rootfs_initcall（）所引用。注意到有以下初始化函数：

rootfs_initcall(populate_rootfs);

也就是说会在系统初始化的时候会调用populate_rootfs进行初始化。代码如下：

static int __init populate_rootfs(void)

{

char *err = unpack_to_rootfs(__initramfs_start,

__initramfs_end - __initramfs_start, 0);

if (err)

panic(err);

if (initrd_start) {

#ifdef CONFIG_BLK_DEV_RAM

int fd;

printk(KERN_INFO "checking if image is initramfs...");

err = unpack_to_rootfs((char *)initrd_start,initrd_end - initrd_start, 1);

if (!err) {

printk(" it is/n");

unpack_to_rootfs((char *)initrd_start,initrd_end - initrd_start, 0);

free_initrd();

return 0;

}

printk("it isn't (%s); looks like an initrd/n", err);

fd = sys_open("/initrd.image", O_WRONLY|O_CREAT, 0700);

if (fd >= 0) {

sys_write(fd, (char *)initrd_start,initrd_end - initrd_start);

sys_close(fd);

free_initrd();

}

#else

printk(KERN_INFO "Unpacking initramfs...");

err = unpack_to_rootfs((char *)initrd_start,initrd_end - initrd_start, 0);

if (err)

panic(err);

printk(" done/n");

free_initrd();

#endif

}

return 0;

}

unpack_to_rootfs：顾名思义就是解压包，并将其释放至rootfs。它实际上有两个功能，一个是释放包，一个是查看包，看其是否属于cpio结构的包。功能选择是根据最后的一个参数来区分的.

在这个函数里，对应我们之前分析的三种虚拟根文件系统的情况。一种是跟kernel融为一体的initramfs.在编译kernel的时候，通过链接脚本将其存放在__initramfs_start至__initramfs_end的区域。这种情况下，直接调用unpack_to_rootfs将其释放到根目录.如果不是属于这种形式的。也就是__initramfs_start和__initramfs_end的值相等，长度为零。不会做任何处理。退出.

对应后两种情况。从代码中看到，必须要配制CONFIG_BLK_DEV_RAM才会支持image-initrd。否则全当成cpio-initrd的形式处理。

对于是cpio-initrd的情况。直接将其释放到根目录。对于是image-initrd的情况。将其释放到/initrd.image.最后将initrd内存区域归入伙伴系统。这段内存就可以由操作系统来做其它的用途了。

接下来，内核对这几种情况又是怎么处理的呢？不要着急。往下看：

回到kernel_init()这个函数：

static int __init kernel_init(void * unused)

{

…….

do_basic_setup();

/*

* check if there is an early userspace init. If yes, let it do all

* the work

*/

if (!ramdisk_execute_command)

ramdisk_execute_command = "/init";

if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0)

{

ramdisk_execute_command = NULL; prepare_namespace();

}

/* * Ok, we have completed the initial bootup, and * we're essentially up and running. Get rid of the * initmem segments and start the user-mode stuff.. */ init_post(); return 0; } ramdisk_execute_command:在kernel解析引导参数的时候使用。如果用户指定了init文件路径，即使用了“init=”，就会将这个参数值存放到这里。如果没有指定init文件路径。默认为/init 对应于前面一段的分析，我们知道，对于initramdisk和cpio-initrd的情况，都会将虚拟根文件系统释放到根目录。如果这些虚拟文件系统里有/init这个文件。就会转入到init_post()。 Init_post()代码如下： static int noinline init_post(void){ free_initmem(); unlock_kernel(); mark_rodata_ro(); system_state = SYSTEM_RUNNING; numa_default_policy(); if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) printk(KERN_WARNING "Warning: unable to open an initial console./n"); (void) sys_dup(0); (void) sys_dup(0); if (ramdisk_execute_command) { run_init_process(ramdisk_execute_command); printk(KERN_WARNING "Failed to execute %s/n", ramdisk_execute_command); } /* * We try each of these until one succeeds. * * The Bourne shell can be used instead of init if we are * trying to recover a really broken machine. */ if (execute_command) { run_init_process(execute_command); printk(KERN_WARNING "Failed to execute %s. Attempting " "defaults.../n", execute_command); } run_init_process("/sbin/init"); run_init_process("/etc/init"); run_init_process("/bin/init"); run_init_process("/bin/sh"); panic("No init found. Try passing init= option to kernel."); } 从代码中可以看中，会依次执行指定的init文件，如果失败，就会执行/sbin/init, /etc/init,, /bin/init,/bin/sh 注意的是，run_init_process在调用相应程序运行的时候，用的是kernel_execve。也就是说调用进程会替换当前进程。只要上述任意一个文件调用成功，就不会返回到这个函数。如果上面几个文件都无法执行。打印出没有找到init文件的错误。对于image-hdr或者是虚拟文件系统中没有包含 /init的情况，会由prepare_namespace()处理。代码如下： void __init prepare_namespace(void){ int is_floppy; if (root_delay) { printk(KERN_INFO "Waiting %dsec before mounting root device.../n",root_delay); ssleep(root_delay); } /* wait for the known devices to complete their probing */ while (driver_probe_done() != 0) msleep(100); //mtd的处理 md_run_setup(); if (saved_root_name[0]) { root_device_name = saved_root_name; if (!strncmp(root_device_name, "mtd", 3)) { mount_block_root(root_device_name, root_mountflags); goto out; } ROOT_DEV = name_to_dev_t(root_device_name); if (strncmp(root_device_name, "/dev/", 5) == 0) root_device_name += 5;if (initrd_load()) goto out; /* wait for any asynchronous scanning to complete */ if ((ROOT_DEV == 0) && root_wait) { printk(KERN_INFO "Waiting for root device %s.../n",saved_root_name); while (driver_probe_done() != 0 || (ROOT_DEV = name_to_dev_t(saved_root_name)) == 0) msleep(100); } is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR; if (is_floppy && rd_doload && rd_load_disk(0)) ROOT_DEV = Root_RAM0; mount_root(); out: sys_mount(".", "/", NULL, MS_MOVE, NULL); sys_chroot("."); } 这里有几个比较有意思的处理，首先用户可以用root=来指定根文件系统。它的值保存在saved_root_name中。如果用户指定了以mtd开始的字串做为它的根文件系统。就会直接去挂载。这个文件是mtdblock的设备文件。否则将设备结点文件转换为ROOT_DEV即设备节点号，然后，转向initrd_load()执行initrd预处理后，再将具体的根文件系统挂载。注意到，在这个函数末尾。会调用sys_mount()来移动当前文件系统挂载点到”/”目录下。然后将根目录切换到当前目录。这样，根文件系统的挂载点就成为了我们在用户空间所看到的”/”了. 对于其它根文件系统的情况，会先经过initrd的处理。即 int __init initrd_load(void){ if (mount_initrd) { create_dev("/dev/ram", Root_RAM0); /* * Load the initrd data into /dev/ram0. Execute it as initrd * unless /dev/ram0 is supposed to be our actual root device, * in that case the ram disk is just set up here, and gets * mounted in the normal path. */ if (rd_load_image("/initrd.image") && ROOT_DEV != Root_RAM0) { sys_unlink("/initrd.image"); handle_initrd(); return 1; } } sys_unlink("/initrd.image"); return 0; } 建立一个ROOT_RAM)的设备节点，并将/initrd/.image释放到这个节点中，/initrd.image的内容，就是我们之前分析的image-initrd。如果根文件设备号不是ROOT_RAM0( 用户指定的根文件系统不是/dev/ram0就会转入到handle_initrd() 如果当前根文件系统是/dev/ram0.将其直接挂载就好了。 handle_initrd（）代码如下： static void __init handle_initrd(void){ int error; int pid; real_root_dev = new_encode_dev(ROOT_DEV); create_dev("/dev/root.old", Root_RAM0); /* mount initrd on rootfs' /root */ mount_block_root("/dev/root.old", root_mountflags & ~MS_RDONLY); sys_mkdir("/old", 0700); root_fd = sys_open("/", 0, 0); old_fd = sys_open("/old", 0, 0); /* move initrd over / and chdir/chroot in initrd root */ sys_chdir("/root"); sys_mount(".", "/", NULL, MS_MOVE, NULL); sys_chroot("."); /* * In case that a resume from disk is carried out by linuxrc or one of * its children, we need to tell the freezer not to wait for us. */ current->flags |= PF_FREEZER_SKIP; pid = kernel_thread(do_linuxrc, "/linuxrc", SIGCHLD); if (pid > 0) while (pid != sys_wait4(-1, NULL, 0, NULL)) yield(); current->flags &= ~PF_FREEZER_SKIP; /* move initrd to rootfs' /old */ sys_fchdir(old_fd); sys_mount("/", ".", NULL, MS_MOVE, NULL); /* switch root and cwd back to / of rootfs */ sys_fchdir(root_fd); sys_chroot(".");

sys_close(old_fd); sys_close(root_fd); if (new_decode_dev(real_root_dev) == Root_RAM0) { sys_chdir("/old"); return; } ROOT_DEV = new_decode_dev(real_root_dev); mount_root(); printk(KERN_NOTICE "Trying to move old root to /initrd ... "); error = sys_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL); if (!error) printk("okay/n"); else { int fd = sys_open("/dev/root.old", O_RDWR, 0); if (error == -ENOENT) printk("/initrd does not exist. Ignored./n"); else printk("failed/n"); printk(KERN_NOTICE "Unmounting old root/n"); sys_umount("/old", MNT_DETACH); printk(KERN_NOTICE "Trying to free ramdisk memory ... "); if (fd < 0) { error = fd; }else {

error = sys_ioctl(fd, BLKFLSBUF, 0); sys_close(fd); } printk(!error ? "okay/n" : "failed/n"); } } 先将/dev/ram0挂载，而后执行/linuxrc.等其执行完后。切换根目录，再挂载具体的根文件系统. 到这里。文件系统挂载的全部内容就分析完了. 五：小结在本小节里。分析了根文件系统的挂载流程。并对几个虚拟根文件系统的情况做了详细的分析。理解这部份，对我们构建linux嵌入式开发系统是很有帮助的. PS:参考资料：ibm技术论坛的<<Linux2.6 内核的 Initrd 机制解析>> 附根文件系统挂载流程图

相关阅读:
细数阿里云在使用 Docker 过程中踩过的那些坑
细数阿里云在使用 Docker 过程中踩过的那些坑
javascript – 从页面停用浏览器打印选项(页眉,页脚,页边距)？
jquery children()方法
jquery 获取输入框的值
java BigDecimal加减乘除
window.print()打印时，如何自定义页眉/页脚、页边距
深入分析：12C ASM Normal冗余中PDB文件块号与AU关系与恢复
我不是药神，救不了你的穷根
Install fail! Error: EPERM: operation not permitted

原文地址：https://www.cnblogs.com/newjiang/p/12535629.html