summaryrefslogtreecommitdiff
path: root/src/systemd-nspawn/nspawn-cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/systemd-nspawn/nspawn-cgroup.c')
-rw-r--r--src/systemd-nspawn/nspawn-cgroup.c185
1 files changed, 185 insertions, 0 deletions
diff --git a/src/systemd-nspawn/nspawn-cgroup.c b/src/systemd-nspawn/nspawn-cgroup.c
new file mode 100644
index 0000000000..9e793d85f1
--- /dev/null
+++ b/src/systemd-nspawn/nspawn-cgroup.c
@@ -0,0 +1,185 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2015 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/mount.h>
+
+#include "systemd-basic/alloc-util.h"
+#include "systemd-basic/fd-util.h"
+#include "systemd-basic/fileio.h"
+#include "systemd-basic/mkdir.h"
+#include "systemd-basic/mount-util.h"
+#include "systemd-basic/rm-rf.h"
+#include "systemd-basic/string-util.h"
+#include "systemd-basic/strv.h"
+#include "systemd-basic/util.h"
+
+#include "nspawn-cgroup.h"
+
+static int chown_cgroup_path(const char *path, uid_t uid_shift) {
+ _cleanup_close_ int fd = -1;
+ const char *fn;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (fd < 0)
+ return -errno;
+
+ FOREACH_STRING(fn,
+ ".",
+ "tasks",
+ "notify_on_release",
+ "cgroup.procs",
+ "cgroup.events",
+ "cgroup.clone_children",
+ "cgroup.controllers",
+ "cgroup.subtree_control")
+ if (fchownat(fd, fn, uid_shift, uid_shift, 0) < 0)
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to chown() cgroup file %s, ignoring: %m", fn);
+
+ return 0;
+}
+
+int chown_cgroup(pid_t pid, uid_t uid_shift) {
+ _cleanup_free_ char *path = NULL, *fs = NULL;
+ int r;
+
+ r = cg_pid_get_path(NULL, pid, &path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get container cgroup path: %m");
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get file system path for container cgroup: %m");
+
+ r = chown_cgroup_path(fs, uid_shift);
+ if (r < 0)
+ return log_error_errno(r, "Failed to chown() cgroup %s: %m", fs);
+
+ return 0;
+}
+
+int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t arg_uid_shift) {
+ _cleanup_free_ char *cgroup = NULL;
+ char tree[] = "/tmp/unifiedXXXXXX", pid_string[DECIMAL_STR_MAX(pid) + 1];
+ bool undo_mount = false;
+ const char *fn;
+ int unified, r;
+
+ unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
+ if (unified < 0)
+ return log_error_errno(unified, "Failed to determine whether the unified hierarchy is used: %m");
+
+ if ((unified > 0) == (unified_requested >= CGROUP_UNIFIED_SYSTEMD))
+ return 0;
+
+ /* When the host uses the legacy cgroup setup, but the
+ * container shall use the unified hierarchy, let's make sure
+ * we copy the path from the name=systemd hierarchy into the
+ * unified hierarchy. Similar for the reverse situation. */
+
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get control group of " PID_FMT ": %m", pid);
+
+ /* In order to access the unified hierarchy we need to mount it */
+ if (!mkdtemp(tree))
+ return log_error_errno(errno, "Failed to generate temporary mount point for unified hierarchy: %m");
+
+ if (unified)
+ r = mount_verbose(LOG_ERR, "cgroup", tree, "cgroup",
+ MS_NOSUID|MS_NOEXEC|MS_NODEV, "none,name=systemd,xattr");
+ else
+ r = mount_verbose(LOG_ERR, "cgroup", tree, "cgroup2",
+ MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+ if (r < 0)
+ goto finish;
+
+ undo_mount = true;
+
+ /* If nspawn dies abruptly the cgroup hierarchy created below
+ * its unit isn't cleaned up. So, let's remove it
+ * https://github.com/systemd/systemd/pull/4223#issuecomment-252519810 */
+ fn = strjoina(tree, cgroup);
+ (void) rm_rf(fn, REMOVE_ROOT|REMOVE_ONLY_DIRECTORIES);
+
+ fn = strjoina(tree, cgroup, "/cgroup.procs");
+ (void) mkdir_parents(fn, 0755);
+
+ sprintf(pid_string, PID_FMT, pid);
+ r = write_string_file(fn, pid_string, 0);
+ if (r < 0) {
+ log_error_errno(r, "Failed to move process: %m");
+ goto finish;
+ }
+
+ fn = strjoina(tree, cgroup);
+ r = chown_cgroup_path(fn, arg_uid_shift);
+ if (r < 0)
+ log_error_errno(r, "Failed to chown() cgroup %s: %m", fn);
+finish:
+ if (undo_mount)
+ (void) umount_verbose(tree);
+
+ (void) rmdir(tree);
+ return r;
+}
+
+int create_subcgroup(pid_t pid, CGroupUnified unified_requested) {
+ _cleanup_free_ char *cgroup = NULL;
+ const char *child;
+ int unified, r;
+ CGroupMask supported;
+
+ /* In the unified hierarchy inner nodes may only contain
+ * subgroups, but not processes. Hence, if we running in the
+ * unified hierarchy and the container does the same, and we
+ * did not create a scope unit for the container move us and
+ * the container into two separate subcgroups. */
+
+ if (unified_requested == CGROUP_UNIFIED_NONE)
+ return 0;
+
+ unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
+ if (unified < 0)
+ return log_error_errno(unified, "Failed to determine whether the unified hierarchy is used: %m");
+ if (unified == 0)
+ return 0;
+
+ r = cg_mask_supported(&supported);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine supported controllers: %m");
+
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get our control group: %m");
+
+ child = strjoina(cgroup, "/payload");
+ r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, child, pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create %s subcgroup: %m", child);
+
+ child = strjoina(cgroup, "/supervisor");
+ r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, child, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create %s subcgroup: %m", child);
+
+ /* Try to enable as many controllers as possible for the new payload. */
+ (void) cg_enable_everywhere(supported, supported, cgroup);
+ return 0;
+}