summaryrefslogtreecommitdiff
path: root/src/nspawn
diff options
context:
space:
mode:
authorTejun Heo <htejun@fb.com>2016-11-21 14:45:53 -0500
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2017-02-20 12:28:35 -0500
commit2977724b09eb997fc84a80517447b5d4a70770c7 (patch)
tree45fc98bb8093c2349f0467e1868d2e0f1b3948e2 /src/nspawn
parent2dcb526d7a43cc4ac9493877ceb05810ff56dbae (diff)
core: make hybrid cgroup unified mode keep compat /sys/fs/cgroup/systemd hierarchy
Currently the hybrid mode mounts cgroup v2 on /sys/fs/cgroup instead of the v1 name=systemd hierarchy. While this works fine for systemd itself, it breaks tools which expect cgroup v1 hierarchy on /sys/fs/cgroup/systemd. This patch updates the hybrid mode so that it mounts v2 hierarchy on /sys/fs/cgroup/unified and keeps v1 "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility. systemd itself doesn't depend on the "name=systemd" hierarchy at all. All operations take place on the v2 hierarchy as before but the v1 hierarchy is kept in sync so that any tools which expect it to be there can keep doing so. This allows systemd to take advantage of cgroup v2 process management without requiring other tools to be aware of the hybrid mode. The hybrid mode is implemented by mapping the special systemd controller to /sys/fs/cgroup/unified and making the basic cgroup utility operations - cg_attach(), cg_create(), cg_rmdir() and cg_trim() - also operate on the /sys/fs/cgroup/systemd hierarchy whenever the cgroup2 hierarchy is updated. While a bit messy, this will allow dropping complications from using cgroup v1 for process management a lot sooner than otherwise possible which should make it a net gain in terms of maintainability. v2: Fixed !cgns breakage reported by @evverx and renamed the unified mount point to /sys/fs/cgroup/unified as suggested by @brauner. v3: chown the compat hierarchy too on delegation. Suggested by @evverx. v4: [zj] - drop the change to default, full "legacy" is still the default.
Diffstat (limited to 'src/nspawn')
-rw-r--r--src/nspawn/nspawn-mount.c79
-rw-r--r--src/nspawn/nspawn.c4
2 files changed, 51 insertions, 32 deletions
diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c
index 1493ef6aad..ed4f1f9db8 100644
--- a/src/nspawn/nspawn-mount.c
+++ b/src/nspawn/nspawn-mount.c
@@ -890,7 +890,7 @@ static int get_controllers(Set *subsystems) {
*e = 0;
- if (STR_IN_SET(l, "", "name=systemd"))
+ if (STR_IN_SET(l, "", "name=systemd", "name=unified"))
continue;
p = strdup(l);
@@ -909,7 +909,6 @@ static int mount_legacy_cgroup_hierarchy(
const char *dest,
const char *controller,
const char *hierarchy,
- CGroupUnified unified_requested,
bool read_only) {
const char *to, *fstype, *opts;
@@ -927,14 +926,12 @@ static int mount_legacy_cgroup_hierarchy(
/* The superblock mount options of the mount point need to be
* identical to the hosts', and hence writable... */
- if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
- if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) {
- fstype = "cgroup2";
- opts = NULL;
- } else {
- fstype = "cgroup";
- opts = "none,name=systemd,xattr";
- }
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_HYBRID)) {
+ fstype = "cgroup2";
+ opts = NULL;
+ } else if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_LEGACY)) {
+ fstype = "cgroup";
+ opts = "none,name=systemd,xattr";
} else {
fstype = "cgroup";
opts = controller;
@@ -1012,7 +1009,7 @@ static int mount_legacy_cgns_supported(
if (!controller)
break;
- r = mount_legacy_cgroup_hierarchy("", controller, controller, unified_requested, !userns);
+ r = mount_legacy_cgroup_hierarchy("", controller, controller, !userns);
if (r < 0)
return r;
@@ -1046,7 +1043,13 @@ static int mount_legacy_cgns_supported(
}
skip_controllers:
- r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER, "systemd", unified_requested, false);
+ if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) {
+ r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false);
+ if (r < 0)
+ return r;
+ }
+
+ r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false);
if (r < 0)
return r;
@@ -1117,7 +1120,7 @@ static int mount_legacy_cgns_unsupported(
if (r == -EINVAL) {
/* Not a symbolic link, but directly a single cgroup hierarchy */
- r = mount_legacy_cgroup_hierarchy(dest, controller, controller, unified_requested, true);
+ r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);
if (r < 0)
return r;
@@ -1137,7 +1140,7 @@ static int mount_legacy_cgns_unsupported(
continue;
}
- r = mount_legacy_cgroup_hierarchy(dest, combined, combined, unified_requested, true);
+ r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);
if (r < 0)
return r;
@@ -1150,7 +1153,13 @@ static int mount_legacy_cgns_unsupported(
}
skip_controllers:
- r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER, "systemd", unified_requested, false);
+ if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) {
+ r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false);
+ if (r < 0)
+ return r;
+ }
+
+ r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false);
if (r < 0)
return r;
@@ -1202,12 +1211,25 @@ int mount_cgroups(
return mount_legacy_cgns_unsupported(dest, unified_requested, userns, uid_shift, uid_range, selinux_apifs_context);
}
+static int mount_systemd_cgroup_writable_one(const char *systemd_own, const char *systemd_root)
+{
+ int r;
+
+ /* Make our own cgroup a (writable) bind mount */
+ r = mount_verbose(LOG_ERR, systemd_own, systemd_own, NULL, MS_BIND, NULL);
+ if (r < 0)
+ return r;
+
+ /* And then remount the systemd cgroup root read-only */
+ return mount_verbose(LOG_ERR, NULL, systemd_root, NULL,
+ MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL);
+}
+
int mount_systemd_cgroup_writable(
const char *dest,
CGroupUnified unified_requested) {
_cleanup_free_ char *own_cgroup_path = NULL;
- const char *systemd_root, *systemd_own;
int r;
assert(dest);
@@ -1220,22 +1242,19 @@ int mount_systemd_cgroup_writable(
if (path_equal(own_cgroup_path, "/"))
return 0;
- if (unified_requested >= CGROUP_UNIFIED_ALL) {
- systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path);
- systemd_root = prefix_roota(dest, "/sys/fs/cgroup");
- } else {
- systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path);
- systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd");
- }
+ if (unified_requested >= CGROUP_UNIFIED_ALL)
+ return mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup", own_cgroup_path),
+ prefix_roota(dest, "/sys/fs/cgroup"));
- /* Make our own cgroup a (writable) bind mount */
- r = mount_verbose(LOG_ERR, systemd_own, systemd_own, NULL, MS_BIND, NULL);
- if (r < 0)
- return r;
+ if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) {
+ r = mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup/unified", own_cgroup_path),
+ prefix_roota(dest, "/sys/fs/cgroup/unified"));
+ if (r < 0)
+ return r;
+ }
- /* And then remount the systemd cgroup root read-only */
- return mount_verbose(LOG_ERR, NULL, systemd_root, NULL,
- MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL);
+ return mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path),
+ prefix_roota(dest, "/sys/fs/cgroup/systemd"));
}
int setup_volatile_state(
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 75b6728688..42355115ff 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -344,8 +344,8 @@ static int detect_unified_cgroup_hierarchy(const char *directory) {
else
arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE;
} else if (cg_unified(SYSTEMD_CGROUP_CONTROLLER)) {
- /* Mixed cgroup hierarchy support was added in 232 */
- r = systemd_installation_has_version(directory, 232);
+ /* Mixed cgroup hierarchy support was added in 233 */
+ r = systemd_installation_has_version(directory, 233);
if (r < 0)
return log_error_errno(r, "Failed to determine systemd version in container: %m");
if (r > 0)