https://github.com/kata-containers/documentation/blob/master/design/host-cgroups.md
https://zhuanlan.zhihu.com/p/105230155
https://blog.csdn.net/yuchunyu97/article/details/109241723
https://github.com/kata-containers/runtime/issues/2090
root@ubuntu:/opt/gopath/src/github.com/kata-containers# kata-runtime kata-env | grep SandboxCgroup SandboxCgroupOnly = false root@ubuntu:/opt/gopath/src/github.com/kata-containers#
// constrainHypervisor will place the VMM and vCPU threads into cgroups. func (s *Sandbox) constrainHypervisor(cgroup cgroups.Cgroup) error { // VMM threads are only placed into the constrained cgroup if SandboxCgroupOnly is being set. // This is the "correct" behavior, but if the parent cgroup isn't set up correctly to take // Kata/VMM into account, Kata may fail to boot due to being overconstrained. // If !SandboxCgroupOnly, place the VMM into an unconstrained cgroup, and the vCPU threads into constrained // cgroup if s.config.SandboxCgroupOnly { // Kata components were moved into the sandbox-cgroup already, so VMM // will already land there as well. No need to take action return nil } pids := s.hypervisor.getPids() if len(pids) == 0 || pids[0] == 0 { return fmt.Errorf("Invalid hypervisor PID: %+v", pids) } // VMM threads are only placed into the constrained cgroup if SandboxCgroupOnly is being set. // This is the "correct" behavior, but if the parent cgroup isn't set up correctly to take // Kata/VMM into account, Kata may fail to boot due to being overconstrained. // If !SandboxCgroupOnly, place the VMM into an unconstrained cgroup, and the vCPU threads into constrained // cgroup // Move the VMM into cgroups without constraints, those cgroups are not yet supported. resources := &specs.LinuxResources{} path := cgroupNoConstraintsPath(s.state.CgroupPath) vmmCgroup, err := cgroupsNewFunc(V1NoConstraints, cgroups.StaticPath(path), resources) if err != nil { return fmt.Errorf("Could not create cgroup %v: %v", path, err) } for _, pid := range pids { if pid <= 0 { s.Logger().Warnf("Invalid hypervisor pid: %d", pid) continue } if err := vmmCgroup.Add(cgroups.Process{Pid: pid}); err != nil { return fmt.Errorf("Could not add hypervisor PID %d to cgroup: %v", pid, err) } } // when new container joins, new CPU could be hotplugged, so we // have to query fresh vcpu info from hypervisor every time. tids, err := s.hypervisor.getThreadIDs() if err != nil { return fmt.Errorf("failed to get thread ids from hypervisor: %v", err) } if len(tids.vcpus) == 0 { // If there's no tid returned from the hypervisor, this is not // a bug. It simply means there is nothing to constrain, hence // let's return without any error from here. return nil } // Move vcpus (threads) into cgroups with constraints. // Move whole hypervisor process would be easier but the IO/network performance // would be over-constrained. for _, i := range tids.vcpus { // In contrast, AddTask will write thread id to `tasks` // After this, vcpu threads are in "vcpu" sub-cgroup, other threads in // qemu will be left in parent cgroup untouched. if err := cgroup.AddTask(cgroups.Process{ Pid: i, }); err != nil { return err } } return nil }