diff options
author | Tejun Heo <htejun@fb.com> | 2016-11-21 14:45:53 -0500 |
---|---|---|
committer | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2017-02-20 12:28:35 -0500 |
commit | 2977724b09eb997fc84a80517447b5d4a70770c7 (patch) | |
tree | 45fc98bb8093c2349f0467e1868d2e0f1b3948e2 /src/nspawn/nspawn-mount.c | |
parent | 2dcb526d7a43cc4ac9493877ceb05810ff56dbae (diff) |
core: make hybrid cgroup unified mode keep compat /sys/fs/cgroup/systemd hierarchy
Currently the hybrid mode mounts cgroup v2 on /sys/fs/cgroup instead of the v1
name=systemd hierarchy. While this works fine for systemd itself, it breaks
tools which expect cgroup v1 hierarchy on /sys/fs/cgroup/systemd.
This patch updates the hybrid mode so that it mounts v2 hierarchy on
/sys/fs/cgroup/unified and keeps v1 "name=systemd" hierarchy on
/sys/fs/cgroup/systemd for compatibility. systemd itself doesn't depend on the
"name=systemd" hierarchy at all. All operations take place on the v2 hierarchy
as before but the v1 hierarchy is kept in sync so that any tools which expect
it to be there can keep doing so. This allows systemd to take advantage of
cgroup v2 process management without requiring other tools to be aware of the
hybrid mode.
The hybrid mode is implemented by mapping the special systemd controller to
/sys/fs/cgroup/unified and making the basic cgroup utility operations -
cg_attach(), cg_create(), cg_rmdir() and cg_trim() - also operate on the
/sys/fs/cgroup/systemd hierarchy whenever the cgroup2 hierarchy is updated.
While a bit messy, this will allow dropping complications from using cgroup v1
for process management a lot sooner than otherwise possible which should make
it a net gain in terms of maintainability.
v2: Fixed !cgns breakage reported by @evverx and renamed the unified mount
point to /sys/fs/cgroup/unified as suggested by @brauner.
v3: chown the compat hierarchy too on delegation. Suggested by @evverx.
v4: [zj]
- drop the change to default, full "legacy" is still the default.
Diffstat (limited to 'src/nspawn/nspawn-mount.c')
-rw-r--r-- | src/nspawn/nspawn-mount.c | 79 |
1 files changed, 49 insertions, 30 deletions
diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index 1493ef6aad..ed4f1f9db8 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -890,7 +890,7 @@ static int get_controllers(Set *subsystems) { *e = 0; - if (STR_IN_SET(l, "", "name=systemd")) + if (STR_IN_SET(l, "", "name=systemd", "name=unified")) continue; p = strdup(l); @@ -909,7 +909,6 @@ static int mount_legacy_cgroup_hierarchy( const char *dest, const char *controller, const char *hierarchy, - CGroupUnified unified_requested, bool read_only) { const char *to, *fstype, *opts; @@ -927,14 +926,12 @@ static int mount_legacy_cgroup_hierarchy( /* The superblock mount options of the mount point need to be * identical to the hosts', and hence writable... */ - if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { - if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { - fstype = "cgroup2"; - opts = NULL; - } else { - fstype = "cgroup"; - opts = "none,name=systemd,xattr"; - } + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_HYBRID)) { + fstype = "cgroup2"; + opts = NULL; + } else if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_LEGACY)) { + fstype = "cgroup"; + opts = "none,name=systemd,xattr"; } else { fstype = "cgroup"; opts = controller; @@ -1012,7 +1009,7 @@ static int mount_legacy_cgns_supported( if (!controller) break; - r = mount_legacy_cgroup_hierarchy("", controller, controller, unified_requested, !userns); + r = mount_legacy_cgroup_hierarchy("", controller, controller, !userns); if (r < 0) return r; @@ -1046,7 +1043,13 @@ static int mount_legacy_cgns_supported( } skip_controllers: - r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER, "systemd", unified_requested, false); + if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { + r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false); + if (r < 0) + return r; + } + + r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false); if (r < 0) return r; @@ -1117,7 +1120,7 @@ static int mount_legacy_cgns_unsupported( if (r == -EINVAL) { /* Not a symbolic link, but directly a single cgroup hierarchy */ - r = mount_legacy_cgroup_hierarchy(dest, controller, controller, unified_requested, true); + r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true); if (r < 0) return r; @@ -1137,7 +1140,7 @@ static int mount_legacy_cgns_unsupported( continue; } - r = mount_legacy_cgroup_hierarchy(dest, combined, combined, unified_requested, true); + r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true); if (r < 0) return r; @@ -1150,7 +1153,13 @@ static int mount_legacy_cgns_unsupported( } skip_controllers: - r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER, "systemd", unified_requested, false); + if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { + r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false); + if (r < 0) + return r; + } + + r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false); if (r < 0) return r; @@ -1202,12 +1211,25 @@ int mount_cgroups( return mount_legacy_cgns_unsupported(dest, unified_requested, userns, uid_shift, uid_range, selinux_apifs_context); } +static int mount_systemd_cgroup_writable_one(const char *systemd_own, const char *systemd_root) +{ + int r; + + /* Make our own cgroup a (writable) bind mount */ + r = mount_verbose(LOG_ERR, systemd_own, systemd_own, NULL, MS_BIND, NULL); + if (r < 0) + return r; + + /* And then remount the systemd cgroup root read-only */ + return mount_verbose(LOG_ERR, NULL, systemd_root, NULL, + MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL); +} + int mount_systemd_cgroup_writable( const char *dest, CGroupUnified unified_requested) { _cleanup_free_ char *own_cgroup_path = NULL; - const char *systemd_root, *systemd_own; int r; assert(dest); @@ -1220,22 +1242,19 @@ int mount_systemd_cgroup_writable( if (path_equal(own_cgroup_path, "/")) return 0; - if (unified_requested >= CGROUP_UNIFIED_ALL) { - systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path); - systemd_root = prefix_roota(dest, "/sys/fs/cgroup"); - } else { - systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path); - systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd"); - } + if (unified_requested >= CGROUP_UNIFIED_ALL) + return mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup", own_cgroup_path), + prefix_roota(dest, "/sys/fs/cgroup")); - /* Make our own cgroup a (writable) bind mount */ - r = mount_verbose(LOG_ERR, systemd_own, systemd_own, NULL, MS_BIND, NULL); - if (r < 0) - return r; + if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { + r = mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup/unified", own_cgroup_path), + prefix_roota(dest, "/sys/fs/cgroup/unified")); + if (r < 0) + return r; + } - /* And then remount the systemd cgroup root read-only */ - return mount_verbose(LOG_ERR, NULL, systemd_root, NULL, - MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL); + return mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path), + prefix_roota(dest, "/sys/fs/cgroup/systemd")); } int setup_volatile_state( |