• systemd挂载cgroup文件系统流程简要分析


    参考:Control Groups vs. Control Groups

    systemd的源码链接:https://github.com/systemd/systemd

    ubuntu系统开机后,可以看到已经挂载了很多文件系统,其中有些是systemd在代码里自动挂载的,比如cgroup。

    /dev/root / ext4 rw,relatime 0 0
    devtmpfs /dev devtmpfs rw,relatime,size=1742432k,nr_inodes=435608,mode=755 0 0
    sysfs /sys sysfs rw,nosuid,nodev,noexec,relatime 0 0
    proc /proc proc rw,nosuid,nodev,noexec,relatime 0 0
    selinuxfs /sys/fs/selinux selinuxfs rw,relatime 0 0
    tmpfs /dev/shm tmpfs rw,nosuid,nodev 0 0
    devpts /dev/pts devpts rw,nosuid,noexec,relatime,gid=5,mode=620,ptmxmode=000 0 0
    tmpfs /run tmpfs rw,nosuid,nodev,size=348908k,mode=755 0 0
    tmpfs /run/lock tmpfs rw,nosuid,nodev,noexec,relatime,size=5120k 0 0
    tmpfs /sys/fs/cgroup tmpfs rw,nosuid,nodev,noexec,mode=755 0 0
    cgroup2 /sys/fs/cgroup/unified cgroup2 rw,nosuid,nodev,noexec,relatime,nsdelegate 0 0
    cgroup /sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatime,xattr,name=systemd 0 0
    none /sys/fs/bpf bpf rw,nosuid,nodev,noexec,relatime,mode=700 0 0
    cgroup /sys/fs/cgroup/debug cgroup rw,nosuid,nodev,noexec,relatime,debug 0 0
    cgroup /sys/fs/cgroup/hugetlb cgroup rw,nosuid,nodev,noexec,relatime,hugetlb 0 0
    cgroup /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory 0 0
    cgroup /sys/fs/cgroup/rdma cgroup rw,nosuid,nodev,noexec,relatime,rdma 0 0
    cgroup /sys/fs/cgroup/blkio cgroup rw,nosuid,nodev,noexec,relatime,blkio 0 0
    cgroup /sys/fs/cgroup/cpu,cpuacct cgroup rw,nosuid,nodev,noexec,relatime,cpu,cpuacct 0 0
    cgroup /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset 0 0
    cgroup /sys/fs/cgroup/misc cgroup rw,nosuid,nodev,noexec,relatime,misc 0 0
    cgroup /sys/fs/cgroup/pids cgroup rw,nosuid,nodev,noexec,relatime,pids 0 0
    cgroup /sys/fs/cgroup/perf_event cgroup rw,nosuid,nodev,noexec,relatime,perf_event 0 0
    cgroup /sys/fs/cgroup/devices cgroup rw,nosuid,nodev,noexec,relatime,devices 0 0
    cgroup /sys/fs/cgroup/freezer cgroup rw,nosuid,nodev,noexec,relatime,freezer 0 0
    systemd-1 /proc/sys/fs/binfmt_misc autofs rw,relatime,fd=29,pgrp=1,timeout=0,minproto=5,maxproto=5,direct 0 0
    debugfs /sys/kernel/debug debugfs rw,nosuid,nodev,noexec,relatime 0 0
    mqueue /dev/mqueue mqueue rw,nosuid,nodev,noexec,relatime 0 0
    hugetlbfs /dev/hugepages hugetlbfs rw,relatime,pagesize=2M 0 0
    tracefs /sys/kernel/tracing tracefs rw,nosuid,nodev,noexec,relatime 0 0
    hostshare /mnt 9p rw,sync,dirsync,relatime,access=client,trans=virtio 0 0
    tmpfs /run/user/1000 tmpfs rw,nosuid,nodev,relatime,size=348904k,mode=700,uid=1000,gid=1000 0 0
    

    下面简要分析这部分的代码:

    main
    	-> mount_setup
    		-> mount_points_setup
    			//遍历mount_table数组,调用mount_one挂载文件系统,systemd在挂载每个文件系统前,会执行每个数组项中指定的condition_fn,判断当前环境是否可以挂载.
    			//比如在/sys/fs/cgroup下挂载tmpfs文件系统
    	-> initialize_runtime
    		-> mount_cgroup_controllers
    			-> cg_kernel_controllers
    				//通过读取/proc/cgroups获取当前系统可用的cgroup子系统
    			-> 根据前一步获取到的cgroup子系统,挂载每个cgroup子系统,其中会调用join_with来检查需要合并挂载的cgroup
    				子系统,目前支持合并挂载的cgroup子系统是:cpu和cpuacct,net_cls和net_prio
    			-> 当所有的cgroup子系统都挂载完毕后,重新将/sys/fs/cgroup挂载为只读
    	-> invoke_main_loop
    		//进入事件循环
    	
    

    mount_table定义如下:

    static const MountPoint mount_table[] = {
            { "proc",        "/proc",                     "proc",       NULL,                                      MS_NOSUID|MS_NOEXEC|MS_NODEV,
              NULL,          MNT_FATAL|MNT_IN_CONTAINER|MNT_FOLLOW_SYMLINK },
            { "sysfs",       "/sys",                      "sysfs",      NULL,                                      MS_NOSUID|MS_NOEXEC|MS_NODEV,
              NULL,          MNT_FATAL|MNT_IN_CONTAINER },
            { "devtmpfs",    "/dev",                      "devtmpfs",   "mode=755" TMPFS_LIMITS_DEV,               MS_NOSUID|MS_STRICTATIME,
              NULL,          MNT_FATAL|MNT_IN_CONTAINER },
            { "securityfs",  "/sys/kernel/security",      "securityfs", NULL,                                      MS_NOSUID|MS_NOEXEC|MS_NODEV,
              NULL,          MNT_NONE                   },
    #if ENABLE_SMACK
            { "smackfs",     "/sys/fs/smackfs",           "smackfs",    "smackfsdef=*",                            MS_NOSUID|MS_NOEXEC|MS_NODEV,
              mac_smack_use, MNT_FATAL                  },
            { "tmpfs",       "/dev/shm",                  "tmpfs",      "mode=1777,smackfsroot=*",                 MS_NOSUID|MS_NODEV|MS_STRICTATIME,
              mac_smack_use, MNT_FATAL                  },
    #endif
            { "tmpfs",       "/dev/shm",                  "tmpfs",      "mode=1777",                               MS_NOSUID|MS_NODEV|MS_STRICTATIME,
              NULL,          MNT_FATAL|MNT_IN_CONTAINER },
            { "devpts",      "/dev/pts",                  "devpts",     "mode=620,gid=" STRINGIFY(TTY_GID),        MS_NOSUID|MS_NOEXEC,
              NULL,          MNT_IN_CONTAINER           },
    #if ENABLE_SMACK
            { "tmpfs",       "/run",                      "tmpfs",      "mode=755,smackfsroot=*" TMPFS_LIMITS_RUN, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
              mac_smack_use, MNT_FATAL                  },
    #endif
            { "tmpfs",       "/run",                      "tmpfs",      "mode=755" TMPFS_LIMITS_RUN,               MS_NOSUID|MS_NODEV|MS_STRICTATIME,
              NULL,          MNT_FATAL|MNT_IN_CONTAINER },
            { "cgroup2",     "/sys/fs/cgroup",            "cgroup2",    "nsdelegate,memory_recursiveprot",         MS_NOSUID|MS_NOEXEC|MS_NODEV,
              cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
            { "cgroup2",     "/sys/fs/cgroup",            "cgroup2",    "nsdelegate",                              MS_NOSUID|MS_NOEXEC|MS_NODEV,
              cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
            { "cgroup2",     "/sys/fs/cgroup",            "cgroup2",    NULL,                                      MS_NOSUID|MS_NOEXEC|MS_NODEV,
              cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
            { "tmpfs",       "/sys/fs/cgroup",            "tmpfs",      "mode=755" TMPFS_LIMITS_SYS_FS_CGROUP,     MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
              cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
            { "cgroup2",     "/sys/fs/cgroup/unified",    "cgroup2",    "nsdelegate",                              MS_NOSUID|MS_NOEXEC|MS_NODEV,
              cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
            { "cgroup2",     "/sys/fs/cgroup/unified",    "cgroup2",    NULL,                                      MS_NOSUID|MS_NOEXEC|MS_NODEV,
              cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
            { "cgroup",      "/sys/fs/cgroup/systemd",    "cgroup",     "none,name=systemd,xattr",                 MS_NOSUID|MS_NOEXEC|MS_NODEV,
              cg_is_legacy_wanted, MNT_IN_CONTAINER     },
            { "cgroup",      "/sys/fs/cgroup/systemd",    "cgroup",     "none,name=systemd",                       MS_NOSUID|MS_NOEXEC|MS_NODEV,
              cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
            { "pstore",      "/sys/fs/pstore",            "pstore",     NULL,                                      MS_NOSUID|MS_NOEXEC|MS_NODEV,
              NULL,          MNT_NONE                   },
    #if ENABLE_EFI
            { "efivarfs",    "/sys/firmware/efi/efivars", "efivarfs",   NULL,                                      MS_NOSUID|MS_NOEXEC|MS_NODEV,
              is_efi_boot,   MNT_NONE                   },
    #endif
            { "bpf",         "/sys/fs/bpf",               "bpf",        "mode=700",                                MS_NOSUID|MS_NOEXEC|MS_NODEV,
              NULL,          MNT_NONE,                  },
    };
    
  • 相关阅读:
    开源情报 Advise
    介绍几本搜索引擎的基础书
    Internet上的图像检索技术
    交易系统 转 武胜
    MySql数据库导出csv 武胜
    C# Process.Start()方法详解 武胜
    转 嵌入处部程序 武胜
    网际风通视接口 武胜
    C# Process运行cmd命令的异步回显 武胜
    RBreaker 武胜
  • 原文地址:https://www.cnblogs.com/pengdonglin137/p/16150827.html
Copyright © 2020-2023  润新知