summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/libsystemd-basic/include/systemd-basic/cgroup2-util.h109
-rw-r--r--src/libsystemd-basic/src/Makefile2
-rw-r--r--src/libsystemd-basic/src/cgroup-util.c6
-rw-r--r--src/libsystemd-basic/src/cgroup2-util.c712
-rw-r--r--src/systemd-nspawn/nspawn-cgroup.c45
-rw-r--r--src/systemd-nspawn/nspawn-cgroup.h6
-rw-r--r--src/systemd-nspawn/nspawn-mount.h2
-rw-r--r--src/systemd-nspawn/nspawn.c72
8 files changed, 898 insertions, 56 deletions
diff --git a/src/libsystemd-basic/include/systemd-basic/cgroup2-util.h b/src/libsystemd-basic/include/systemd-basic/cgroup2-util.h
new file mode 100644
index 0000000000..cdbd9b0d9b
--- /dev/null
+++ b/src/libsystemd-basic/include/systemd-basic/cgroup2-util.h
@@ -0,0 +1,109 @@
+#pragma once
+
+/***
+ This file is part of systemd.
+
+ Copyright 2010 Lennart Poettering
+ Copyright 2017 Luke Shumaker
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "macro.h"
+
+/* generic types ****************************************************/
+
+typedef struct CGroupHierarchy CGroupHierarchy;
+
+typedef struct CGroup {
+ CGroupHierarchy *hierarchy;
+ char *path;
+} CGroup;
+
+static inline void cg2_freep(CGroup *cgroup) {
+ free(cgroup->path);
+}
+
+static inline void cg2_free_freep(CGroup **cgroupp) {
+ if (*cgroupp) {
+ cg2_freep(*cgroupp);
+ free(*cgroupp);
+ }
+}
+
+#define _cleanup_cgroupfree_ _cleanup_(cg2_freep)
+#define _cleanup_cgroupfree_free_ _cleanup_(cg2_free_freep)
+
+/* generic functions ************************************************/
+
+int cg2_flush(void);
+bool cg2_ns_supported(void);
+
+int cg2_get_v1_hier(const char *controller, CGroupHierarchy **ret_hier);
+int cg2_get_v2_hier(CGroupHierarchy **ret_hier);
+int cg2_hier_get_version(CGroupHierarchy *hier);
+char *cg2_hier_get_str(CGroupHierarchy *hier);
+
+int cg2_pid_get_cgroups_real(pid_t pid, /* CGroupHierarchy *hier, CGroup *ret_cgroup */...) _sentinel_;
+#define cg2_pid_get_cgroups(pid, ...) cg2_pid_get_cgroups_real((pid), __VA_ARGS__, NULL)
+
+char *cg2_cgroup_get_filepath(CGroup cgroup);
+char *cg2_cgroup_get_str(CGroup cgroup);
+
+/* systemd types ****************************************************/
+
+typedef struct SdCGroup {
+ CGroup prefix;
+ char *path;
+} SdCGroup;
+
+static inline void cg2_sd_freep(SdCGroup *cgroup) {
+ cg2_freep(&cgroup->prefix);
+ free(cgroup->path);
+}
+
+static inline void cg2_sd_free_freep(SdCGroup **cgroupp) {
+ if (*cgroupp) {
+ cg2_sd_freep(*cgroupp);
+ free(*cgroupp);
+ }
+}
+
+#define _cleanup_sdcgroupfree_ _cleanup_(cg2_sd_freep)
+#define _cleanup_sdcgroupfree_free_ _cleanup_(cg2_sd_free_freep)
+
+typedef enum SdCGroupVersion {
+ CGROUP_VER_UNKNOWN = 0,
+ CGROUP_VER_1 = 1,
+ CGROUP_VER_2 = 2, /* added in systemd 230 */
+ CGROUP_VER_MIXED_SD232 = 3, /* added in systemd 232 */
+ CGROUP_VER_MIXED_SD233 = 4, /* added in systemd 233 */
+} SdCGroupVersion;
+
+/* systemd functions ************************************************/
+
+int cg2_sd_flush(void);
+int cg2_sd_get_version(SdCGroupVersion *ret_ver);
+int cg2_sd_get_root(CGroup *ret_root);
+
+int cg2_sd_ver_get_hier_ver(SdCGroupVersion ver);
+
+int cg2_sd_pid_get_cgroup(pid_t pid, SdCGroup *ret_cgroup);
+
+int cg2_sd_cgroup_parse(SdCGroup cgroup, char **ret_slice, char **ret_unit, SdCGroup *ret_extra);
+int cg2_sd_cgroup_get_owner_uid(SdCGroup cgroup, uid_t *ret_uid);
+
+char *cg2_sd_cgroup_get_filepath(SdCGroup sdcgroup);
+char *cg2_sd_cgroup_get_cgpath(SdCGroup sdcgroup);
+char *cg2_sd_cgroup_get_str(SdCGroup sdcgroup);
diff --git a/src/libsystemd-basic/src/Makefile b/src/libsystemd-basic/src/Makefile
index fd72f23308..7a2bc56057 100644
--- a/src/libsystemd-basic/src/Makefile
+++ b/src/libsystemd-basic/src/Makefile
@@ -166,6 +166,8 @@ libsystemd_basic_la_SOURCES = \
src/basic/mkdir.h \
src/basic/cgroup-util.c \
src/basic/cgroup-util.h \
+ src/basic/cgroup2-util.c \
+ src/basic/cgroup2-util.h \
src/basic/errno-list.c \
src/basic/errno-list.h \
src/basic/af-list.c \
diff --git a/src/libsystemd-basic/src/cgroup-util.c b/src/libsystemd-basic/src/cgroup-util.c
index 929101e558..e6a5882f7c 100644
--- a/src/libsystemd-basic/src/cgroup-util.c
+++ b/src/libsystemd-basic/src/cgroup-util.c
@@ -2281,8 +2281,10 @@ static int cg_update_unified(void) {
if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC))
unified_cache = CGROUP_UNIFIED_ALL;
else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
- if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
- return -errno;
+ if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) {
+ unified_cache = CGROUP_UNIFIED_NONE;
+ return 0;
+ }
unified_cache = F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC) ?
CGROUP_UNIFIED_SYSTEMD : CGROUP_UNIFIED_NONE;
diff --git a/src/libsystemd-basic/src/cgroup2-util.c b/src/libsystemd-basic/src/cgroup2-util.c
new file mode 100644
index 0000000000..973ec86b65
--- /dev/null
+++ b/src/libsystemd-basic/src/cgroup2-util.c
@@ -0,0 +1,712 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2010 Lennart Poettering
+ Copyright 2017 Luke Shumaker
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <errno.h>
+#include <glob.h>
+
+#include "systemd-basic/alloc-util.h" /* realloc_multiply */
+#include "systemd-basic/cgroup2-util.h"
+#include "systemd-basic/fd-util.h" /* _cleanp_fclose_ */
+#include "systemd-basic/fileio.h" /* FOREACH_LINE */
+#include "systemd-basic/glob-util.h" /* _cleanup_globfree_ */
+#include "systemd-basic/parse-util.h" /* safe_atoi */
+#include "systemd-basic/process-util.h" /* procfs_file_alloca */
+#include "systemd-basic/special.h"
+#include "systemd-basic/stat-util.h" /* F_TYPE_EQUAL */
+#include "systemd-basic/string-util.h" /* startswith, endswith, FOREACH_WORD_SEPARATOR */
+#include "systemd-basic/strv.h" /* STRV_FOREACH */
+#include "systemd-basic/unit-name.h" /* unit_name_is_valid */
+#include "systemd-basic/user-util.h" /* parse_uid */
+
+static int hier_init_mountpoint(CGroupHierarchy *hier);
+static void cg2_unescape(const char **p, size_t *n);
+static bool valid_slice_name(const char *p, size_t n);
+
+/* generic ***********************************************************/
+
+struct CGroupHierarchy {
+ int id;
+ char *controllers;
+ char *mountpoint;
+};
+
+static thread_local struct {
+ bool initialized;
+ size_t cap;
+ CGroupHierarchy *list;
+} cg2_cache = { 0 };
+
+int cg2_flush(void) {
+ cg2_cache.initialized = false;
+ for (size_t i = 0; i < cg2_cache.cap; i++) {
+ free(cg2_cache.list[i].controllers);
+ free(cg2_cache.list[i].mountpoint);
+ }
+ free(cg2_cache.list);
+ cg2_cache.list = NULL;
+ cg2_cache.cap = 0;
+ return cg2_sd_flush();
+}
+
+static int cg2_init(void) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char line[LINE_MAX];
+
+ if (cg2_cache.initialized)
+ return 0;
+
+ cg2_flush();
+
+ f = fopen("/proc/self/cgroup", "re");
+ if (!f) {
+ /* turn "no such file" in to "no such process" */
+ return errno == ENOENT ? -ESRCH : -errno;
+ }
+
+ FOREACH_LINE(line, f, return -errno) {
+ int id, r;
+ char *id_str, *controllers;
+ char *rest = line;
+ id_str = strsep(&rest, ":");
+ controllers = strsep(&rest, ":");
+ /*path =*/ strsep(&rest, "\n"); /* discard the path */
+ if (!rest || rest[0] != '\0')
+ return -ENODATA;
+ if (safe_atoi(id_str, &id) < 0)
+ return -ENODATA;
+ if (id < 0)
+ return -ENODATA;
+ if ( (id == 0) != (controllers[0] == '\0') )
+ return -ENODATA;
+
+ if ((size_t)id >= cg2_cache.cap) {
+ size_t cap = id+1;
+ CGroupHierarchy *list = realloc_multiply(cg2_cache.list, sizeof(cg2_cache.list[0]), cap);
+ if (!list)
+ return -ENOMEM;
+ cg2_cache.list = list;
+ while (cg2_cache.cap < cap) {
+ list[cg2_cache.cap].id = -1;
+ list[cg2_cache.cap].controllers = NULL;
+ list[cg2_cache.cap].mountpoint = NULL;
+ cg2_cache.cap++;
+ }
+ }
+
+ cg2_cache.list[id].id = id;
+ cg2_cache.list[id].controllers = strdup(controllers);
+ if (!cg2_cache.list[id].controllers)
+ return -ENOMEM;
+ r = hier_init_mountpoint(&cg2_cache.list[id]);
+ if (r < 0)
+ return r;
+ }
+ return 0;
+}
+
+static int hier_init_mountpoint(CGroupHierarchy *hier) {
+ assert(hier);
+
+ if (hier->id == 0) {
+ /* cgroup v2 hierarchy */
+ _cleanup_globfree_ glob_t g = {};
+ struct statfs fs;
+ int r;
+ char **tmp;
+
+ /* first check "/sys/fs/cgroup/" */
+ if (statfs("/sys/fs/cgroup/", &fs) < 0)
+ return -errno;
+ if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
+ hier->mountpoint = strdup("/sys/fs/group");
+ if (!hier->mountpoint)
+ return -ENOMEM;
+ return 0;
+ }
+
+ /* then check "/sys/fs/cgroup/X/" */
+ r = glob("/sys/fs/cgroup/*/", GLOB_ERR, NULL, &g);
+ if (r == GLOB_NOMATCH)
+ return -ENOENT;
+ if (r == GLOB_NOSPACE)
+ return -ENOMEM;
+ if (r != 0)
+ return errno > 0 ? -errno : -EIO;
+ STRV_FOREACH(tmp, g.gl_pathv) {
+ if (statfs(*tmp, &fs) < 0)
+ continue;
+ if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
+ hier->mountpoint = canonicalize_file_name(*tmp);
+ if (!hier->mountpoint)
+ return -ENOMEM;
+ return 0;
+ }
+ }
+ return -ENOENT;
+ } else {
+ /* cgroup v1 hierarchy */
+ char *controller, *tmp;
+
+ controller = strdupa(hier->controllers);
+ strchrnul(controller, ',')[0] = '\0';
+ tmp = startswith(controller, "name=");
+ if (tmp)
+ controller = tmp;
+
+ hier->mountpoint = canonicalize_file_name(strjoina("/sys/fs/cgroup/", controller, NULL));
+ if (!hier->mountpoint)
+ return -errno;
+ return 0;
+ }
+}
+
+bool cg2_ns_supported(void) {
+ static thread_local int enabled = -1;
+
+ if (enabled >= 0)
+ return enabled;
+
+ if (access("/proc/self/ns/cgroup", F_OK) == 0)
+ enabled = 1;
+ else
+ enabled = 0;
+
+ return enabled;
+}
+
+int cg2_get_v1_hier(const char *selector, CGroupHierarchy **ret_hier) {
+ size_t selector_len;
+ int r;
+
+ assert(selector);
+
+ r = cg2_init();
+ if (r < 0)
+ return r;
+
+ selector_len = strlen(selector);
+ for (int id = 0; (size_t)id < cg2_cache.cap; id++) {
+ const char *controller, *state;
+ size_t controller_len;
+ if (cg2_cache.list[id].id != id)
+ continue;
+
+ FOREACH_WORD_SEPARATOR(controller, controller_len, cg2_cache.list[id].controllers, ",", state) {
+ if (controller_len == selector_len && memcmp(controller, selector, selector_len) == 0) {
+ if (ret_hier)
+ *ret_hier = &cg2_cache.list[id];
+ return 0;
+ }
+ }
+ }
+ return -ENOENT;
+}
+
+int cg2_get_v2_hier(CGroupHierarchy **ret_hier) {
+ int r;
+
+ r = cg2_init();
+ if (r < 0)
+ return r;
+
+ if (cg2_cache.cap < 1 || cg2_cache.list[0].id != 0)
+ return -ENOENT;
+
+ if (ret_hier)
+ *ret_hier = &cg2_cache.list[0];
+ return 0;
+}
+
+
+int cg2_hier_get_version(CGroupHierarchy *hier) {
+ assert(hier);
+ if (hier->id < 0) {
+ return -EINVAL;
+ } else if (hier->id == 0) {
+ return 2;
+ } else {
+ return 1;
+ }
+}
+
+char *cg2_hier_get_str(CGroupHierarchy *hier) {
+ char *ret;
+
+ assert(hier);
+ assert(hier->controllers);
+
+ if (asprintf(&ret, "%d:%s", hier->id, hier->controllers) < 0)
+ return NULL;
+ return ret;
+}
+
+int cg2_pid_get_cgroups_real(pid_t pid, ...) {
+ const char *filename;
+ _cleanup_fclose_ FILE *file = NULL;
+ va_list ap;
+ char line[LINE_MAX];
+ int n, r;
+
+ r = cg2_init();
+ if (r < 0)
+ return r;
+
+ if (pid == 0)
+ filename = "/proc/self/cgroup";
+ else
+ filename = procfs_file_alloca(pid, "cgroup");
+
+ file = fopen(filename, "re");
+ if (!file) {
+ /* turn "no such file" in to "no such process" */
+ return errno == ENOENT ? -ESRCH : -errno;
+ }
+
+ n = 0;
+ FOREACH_LINE(line, file, return -errno) {
+ CGroupHierarchy *hier;
+ int id;
+ char *id_str, *controllers, *path;
+ char *rest = line;
+ id_str = strsep(&rest, ":");
+ controllers = strsep(&rest, ":");
+ path = strsep(&rest, "\n");
+ if (!rest || rest[0] != '\0')
+ continue;
+ if (safe_atoi(id_str, &id) < 0)
+ continue;
+ if ( (id == 0) != (controllers[0] == '\0') )
+ continue;
+
+ va_start(ap, pid);
+ while ((hier = va_arg(ap, CGroupHierarchy *))) {
+ CGroup *ret_cgroup = va_arg(ap, CGroup *);
+ if (id == hier->id) {
+ if (ret_cgroup) {
+ ret_cgroup->hierarchy = hier;
+ ret_cgroup->path = path;
+ }
+ n++;
+ }
+ }
+ va_end(ap);
+ }
+ return n;
+}
+
+char *cg2_cgroup_get_filepath(CGroup cgroup) {
+ assert(cgroup.hierarchy);
+ assert(cgroup.hierarchy->mountpoint);
+ assert(cgroup.path);
+
+ return strjoin(cgroup.hierarchy->mountpoint, cgroup.path, NULL);
+}
+
+char *cg2_cgroup_get_str(CGroup cgroup) {
+ _cleanup_free_ char *hierstr;
+ char *ret;
+
+ hierstr = cg2_hier_get_str(cgroup.hierarchy);
+ if (!hierstr)
+ return NULL;
+
+ if (asprintf(&ret, "%s:%s", hierstr, cgroup.path) < 0)
+ return NULL;
+ return ret;
+}
+
+/* systemd **********************************************************/
+
+static thread_local struct {
+ bool have_ver;
+ SdCGroupVersion ver;
+ bool have_hier;
+ CGroupHierarchy *hier;
+ bool have_root;
+ CGroup *root;
+} cg2_sd_cache = { 0 };
+
+int cg2_sd_flush(void) {
+ cg2_sd_cache.ver = CGROUP_VER_UNKNOWN;
+ cg2_sd_cache.have_ver = false;
+
+ cg2_sd_cache.hier = NULL;
+ cg2_sd_cache.have_hier = false;
+
+ cg2_free_freep(&cg2_sd_cache.root);
+ cg2_sd_cache.root = NULL;
+ cg2_sd_cache.have_root = false;
+
+ return 0;
+}
+
+static int cg2_sd_init_version(void) {
+ struct statfs fs;
+
+ if (cg2_sd_cache.have_ver)
+ return 0;
+
+ if (statfs("/sys/fs/cgroup/", &fs) < 0)
+ return -errno;
+ if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
+ cg2_sd_cache.ver = CGROUP_VER_2;
+ cg2_sd_cache.have_ver = true;
+ return 0;
+ }
+
+ if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
+ F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
+ cg2_sd_cache.ver = CGROUP_VER_MIXED_SD233;
+ cg2_sd_cache.have_ver = true;
+ return 0;
+ }
+
+ if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
+ return -errno;
+ if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
+ cg2_sd_cache.ver = CGROUP_VER_MIXED_SD232;
+ cg2_sd_cache.have_ver = true;
+ return 0;
+ }
+ if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
+ cg2_sd_cache.ver = CGROUP_VER_1;
+ cg2_sd_cache.have_ver = true;
+ return 0;
+ }
+
+ return -ENOMEDIUM;
+}
+
+static int cg2_sd_init_hier(void) {
+ int r;
+
+ if (cg2_sd_cache.have_hier)
+ return 0;
+
+ r = cg2_sd_init_version();
+ if (r < 0)
+ return r;
+
+ switch (cg2_sd_cache.ver) {
+ case CGROUP_VER_UNKNOWN:
+ assert_not_reached("Unknown systemd cgroup version");
+ break;
+ case CGROUP_VER_1:
+ r = cg2_get_v1_hier("name=systemd", &cg2_sd_cache.hier);
+ if (r < 0)
+ return r;
+ break;
+ case CGROUP_VER_2:
+ case CGROUP_VER_MIXED_SD232:
+ case CGROUP_VER_MIXED_SD233:
+ r = cg2_get_v2_hier(&cg2_sd_cache.hier);
+ if (r < 0)
+ return r;
+ break;
+ }
+ cg2_sd_cache.have_hier = true;
+ return 0;
+}
+
+int cg2_sd_get_root(CGroup *ret_root) {
+ CGroup cg;
+ int r;
+ char *e;
+
+ r = cg2_sd_init_hier();
+ if (r < 0)
+ return r;
+
+ r = cg2_pid_get_cgroups(1, cg2_sd_cache.hier, &cg);
+ if (r < 0)
+ return r;
+
+ e = endswith(cg.path, "/" SPECIAL_INIT_SCOPE); /* "/init.scope" */
+ if (!e)
+ e = endswith(cg.path, "/" SPECIAL_SYSTEM_SLICE); /* "/system.slice" (legacy) */
+ if (!e)
+ e = endswith(cg.path, "/system"); /* (even more legacy) */
+ if (e)
+ *e = 0;
+
+ if (ret_root)
+ *ret_root = cg;
+ return 0;
+}
+
+int cg2_sd_ver_get_hier_ver(SdCGroupVersion ver) {
+ switch (ver) {
+ default:
+ case CGROUP_VER_UNKNOWN:
+ return -EINVAL;
+ case CGROUP_VER_1:
+ return 1;
+ case CGROUP_VER_2:
+ case CGROUP_VER_MIXED_SD232:
+ case CGROUP_VER_MIXED_SD233:
+ return 2;
+ }
+}
+
+int cg2_sd_pid_get_cgroup(pid_t pid, SdCGroup *ret_cgroup) {
+ _cleanup_cgroupfree_ CGroup root, mine;
+ int r;
+ const char *p;
+
+ r = cg2_sd_get_root(&root);
+ if (r < 0)
+ return r;
+
+ r = cg2_pid_get_cgroups(pid, root.hierarchy, &mine);
+ if (r < 0)
+ return r;
+
+ p = startswith(mine.path, root.path);
+ if (!p)
+ return -ENXIO;
+
+ if (ret_cgroup) {
+ char *prefix, *path;
+ prefix = strdup(root.path);
+ if (!prefix)
+ goto enomem;
+ path = strdup(p);
+ if (!path)
+ goto enomem;
+ ret_cgroup->prefix.hierarchy = root.hierarchy;
+ ret_cgroup->prefix.path = prefix;
+ ret_cgroup->path = path;
+ return 0;
+ enomem:
+ free(prefix);
+ free(path);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+int cg2_sd_cgroup_parse(SdCGroup cgroup, char **ret_slice, char **ret_unit, SdCGroup *ret_extra) {
+ const char *rest, *slice, *unit, *prefix, *extra;
+ size_t slice_len, unit_len, prefix_len, extra_len;
+ char *hslice = NULL, *hunit = NULL;
+ SdCGroup sextra;
+
+ assert(cgroup.path);
+ assert(cgroup.prefix.path);
+ assert(cgroup.prefix.hierarchy);
+
+ /* Given
+ * cgroup.path = "/foo.slice/bar.slice/baz.slice/unit.service/extra..."
+ * we return
+ * *ret_slice = "baz.slice"
+ * *ret_unit = "unit.service"
+ * ret_extra->prefix.hierarchy = cgroup.prefix.hierarchy
+ * ret_extra->prefix.path = strjoin(cgroup.prefix.path, "/foo.slice/bar.slize/baz.slice/unit.service", NULL)
+ * ret_extra->path = "/extra..."
+ *
+ * The input path my contain 0 or more leading ".slice"
+ * segments; we return the rightmost. If there are no
+ * ".slice" segments, we return SPECIAL_ROOT_SLICE
+ * ("-.slice").
+ */
+
+ rest = cgroup.path;
+
+ /* slice */
+ slice = SPECIAL_ROOT_SLICE;
+ slice_len = strlen(slice);
+ for (;;) {
+ const char *part, *tmprest;
+ size_t part_len;
+
+ /* trim leading "/"s */
+ tmprest = rest + strspn(rest, "/");
+
+ /* split off the first part */
+ part = tmprest;
+ part_len = strcspn(part, "/");
+ tmprest += part_len;
+
+ if (valid_slice_name(part, part_len)) {
+ /* accept this iteration */
+ slice = part;
+ slice_len = part_len;
+ rest = tmprest;
+ } else {
+ /* reject this iteration; we have found the first
+ * non-slice segment. */
+ break;
+ }
+ }
+ cg2_unescape(&slice, &slice_len);
+
+ /* unit */
+ rest += strspn(rest, "/");
+ unit = rest;
+ unit_len = strcspn(unit, "/");
+ rest += unit_len;
+ cg2_unescape(&unit, &unit_len);
+ if (!unit_name_is_valid(strndupa(unit, unit_len), UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
+ return -ENXIO;
+
+ /* extra */
+ extra = rest;
+ extra_len = strlen(rest);
+ prefix = cgroup.path;
+ prefix_len = extra - prefix;
+
+ /* allocate return values */
+ if (ret_slice) {
+ hslice = strndup(slice, slice_len);
+ if (!hslice)
+ goto enomem;
+ }
+ if (ret_unit) {
+ hunit = strndup(unit, unit_len);
+ if (!hslice)
+ goto enomem;
+ }
+ if (ret_extra) {
+ sextra.prefix.hierarchy = cgroup.prefix.hierarchy;
+ sextra.prefix.path = strndup(prefix, prefix_len);
+ if (!sextra.prefix.path)
+ goto enomem;
+ sextra.prefix.path = strjoin(cgroup.prefix.path, sextra.prefix.path, NULL);
+ if (!sextra.prefix.path)
+ goto enomem;
+ sextra.path = strndup(extra, extra_len);
+ if (!sextra.path)
+ goto enomem;
+ }
+
+ /* return */
+ if (ret_slice)
+ *ret_slice = hslice;
+ if (ret_unit)
+ *ret_unit = hunit;
+ if (ret_extra)
+ *ret_extra = sextra;
+ return 0;
+
+ enomem:
+ free(hslice);
+ free(hunit);
+ cg2_sd_freep(&sextra);
+ return -ENOMEM;
+}
+
+
+int cg2_sd_cgroup_get_owner_uid(SdCGroup cgroup, uid_t *ret_uid) {
+ _cleanup_free_ char *slice = NULL;
+ char *start, *end;
+ int r;
+
+ r = cg2_sd_cgroup_parse(cgroup, &slice, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ start = startswith(slice, "user-");
+ if (!start)
+ return -ENXIO;
+ end = endswith(start, ".slice");
+ if (!end)
+ return -ENXIO;
+
+ *end = '\0';
+ if (parse_uid(start, ret_uid) < 0)
+ return -ENXIO;
+ return 0;
+}
+
+static int cg2_sd_cgroup_get_cgroup(SdCGroup sdcgroup, CGroup *ret_cgroup) {
+ assert(sdcgroup.prefix.path);
+ assert(sdcgroup.path);
+
+ if (ret_cgroup) {
+ ret_cgroup->path = strjoin(sdcgroup.prefix.path, sdcgroup.path, NULL);
+ if (!ret_cgroup->path)
+ return -ENOMEM;
+ ret_cgroup->hierarchy = sdcgroup.prefix.hierarchy;
+ }
+ return 0;
+}
+
+char *cg2_sd_cgroup_get_filepath(SdCGroup sdcgroup) {
+ _cleanup_cgroupfree_ CGroup cgroup;
+
+ if (cg2_sd_cgroup_get_cgroup(sdcgroup, &cgroup) < 0)
+ return NULL;
+
+ return cg2_cgroup_get_filepath(cgroup);
+}
+
+char *cg2_sd_cgroup_get_cgpath(SdCGroup sdcgroup) {
+ CGroup cgroup;
+
+ if (cg2_sd_cgroup_get_cgroup(sdcgroup, &cgroup) < 0)
+ return NULL;
+
+ return cgroup.path;
+}
+
+char *cg2_sd_cgroup_get_str(SdCGroup sdcgroup) {
+ _cleanup_cgroupfree_ CGroup cgroup;
+
+ if (cg2_sd_cgroup_get_cgroup(sdcgroup, &cgroup) < 0)
+ return NULL;
+
+ return cg2_cgroup_get_str(cgroup);
+}
+
+/* basically copied from old cgroup-util ****************************/
+
+static void cg2_unescape(const char **p, size_t *n) {
+ size_t sn;
+
+ assert(p);
+
+ if (!n)
+ n = &sn;
+
+ /* The return value of this function (unlike cg_escape())
+ * doesn't need free()! */
+
+ if (*n >= 1 && (*p)[0] == '_') {
+ (*p)++;
+ (*n)--;
+ }
+}
+
+static bool valid_slice_name(const char *p, size_t n) {
+
+ if (!p)
+ return false;
+
+ if (n < strlen("x.slice"))
+ return false;
+
+ if (memcmp(p + n - 6, ".slice", 6) == 0) {
+ const char *c = strndupa(p, n);
+ cg2_unescape(&c, &n);
+ return unit_name_is_valid(c, UNIT_NAME_PLAIN);
+ }
+
+ return false;
+}
diff --git a/src/systemd-nspawn/nspawn-cgroup.c b/src/systemd-nspawn/nspawn-cgroup.c
index 782966d31b..662b6c84e9 100644
--- a/src/systemd-nspawn/nspawn-cgroup.c
+++ b/src/systemd-nspawn/nspawn-cgroup.c
@@ -57,7 +57,9 @@ static int chown_cgroup_path(const char *path, uid_t uid_shift) {
}
int chown_cgroup(pid_t pid, uid_t uid_shift) {
- _cleanup_free_ char *path = NULL, *fs = NULL;
+ _cleanup_sdcgroupfree_ SdCGroup cgroup;
+ _cleanup_free_ char *fs = NULL;
+
int r;
/* If uid_shift == UID_INVALID, then chown_cgroup_path() is a no-op, and there isn't really a point to actually
@@ -66,12 +68,12 @@ int chown_cgroup(pid_t pid, uid_t uid_shift) {
if (uid_shift == UID_INVALID)
return 0;
- r = cg_pid_get_path(NULL, pid, &path);
+ r = cg2_sd_pid_get_cgroup(pid, &cgroup);
if (r < 0)
return log_error_errno(r, "Failed to get host cgroup of the container: %m");
- r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
- if (r < 0)
+ fs = cg2_sd_cgroup_get_filepath(cgroup);
+ if (!fs)
return log_error_errno(-ENOMEM, "Failed to get host file system path for container cgroup: %m");
r = chown_cgroup_path(fs, uid_shift);
@@ -81,20 +83,22 @@ int chown_cgroup(pid_t pid, uid_t uid_shift) {
return 0;
}
-int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift) {
- _cleanup_free_ char *cgroup = NULL;
+int sync_cgroup(pid_t pid, SdCGroupVersion inner_cgver, uid_t uid_shift) {
+ _cleanup_sdcgroupfree_ SdCGroup outer_cgroup;
+ _cleanup_free_ char *cgpath = NULL;
char mountpoint[] = "/tmp/containerXXXXXX", pid_string[DECIMAL_STR_MAX(pid) + 1];
bool undo_mount = false;
const char *fn, *inner_hier;
- int unified, r;
+ int r;
+ SdCGroupVersion outer_cgver;
#define LOG_PFIX "PID " PID_FMT ": sync host cgroup -> container cgroup"
- unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
- if (unified < 0)
+ r = cg2_sd_get_version(&outer_cgver);
+ if (r < 0)
return log_error_errno(r, LOG_PFIX ": failed to determine host cgroup version: %m", pid);
- if ((unified > 0) == (unified_requested >= CGROUP_UNIFIED_SYSTEMD))
+ if (cg2_sd_ver_get_hier_ver(outer_cgver) == cg2_sd_ver_get_hier_ver(inner_cgver))
return 0;
/* When the host uses the legacy cgroup setup, but the
@@ -102,15 +106,18 @@ int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift) {
* we copy the path from the name=systemd hierarchy into the
* unified hierarchy. Similar for the reverse situation. */
- r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
+ r = cg2_sd_pid_get_cgroup(pid, &outer_cgroup);
if (r < 0)
return log_error_errno(r, LOG_PFIX ": failed to determine host cgroup: %m", pid);
+ cgpath = cg2_sd_cgroup_get_cgpath(outer_cgroup);
+ if (!cgpath)
+ return log_error_errno(-ENOMEM, LOG_PFIX ": %m", pid);
/* In order to access the container's hierarchy we need to mount it */
if (!mkdtemp(mountpoint))
return log_error_errno(errno, LOG_PFIX ": failed to create temporary mount point for container cgroup hierarchy: %m", pid);
- if (unified) {
+ if (cg2_sd_ver_get_hier_ver(outer_cgver) == 2) {
/* host: v2 ; container: v1 */
inner_hier = "?:name=systemd";
r = mount_verbose(LOG_ERR, "cgroup", mountpoint, "cgroup",
@@ -157,11 +164,12 @@ finish:
return r;
}
-int create_subcgroup(pid_t pid, CGroupUnified unified_requested) {
+int create_subcgroup(pid_t pid, SdCGroupVersion inner_cgver) {
_cleanup_free_ char *cgroup = NULL;
const char *child;
- int unified, r;
+ int r;
CGroupMask supported;
+ SdCGroupVersion outer_cgver;
/* In the unified hierarchy inner nodes may only contain
* subgroups, but not processes. Hence, if we running in the
@@ -169,13 +177,14 @@ int create_subcgroup(pid_t pid, CGroupUnified unified_requested) {
* did not create a scope unit for the container move us and
* the container into two separate subcgroups. */
- if (unified_requested == CGROUP_UNIFIED_NONE)
+ if (inner_cgver == CGROUP_VER_1)
return 0;
- unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
- if (unified < 0)
+ r = cg2_sd_get_version(&outer_cgver);
+ if (r < 0)
return log_error_errno(r, "Failed to create host subcgroup: Failed to determine cgroup version: %m");
- if (unified == 0)
+
+ if (outer_cgver == CGROUP_VER_1)
return 0;
r = cg_mask_supported(&supported);
diff --git a/src/systemd-nspawn/nspawn-cgroup.h b/src/systemd-nspawn/nspawn-cgroup.h
index 6c0ddfc7de..4d5d1179ea 100644
--- a/src/systemd-nspawn/nspawn-cgroup.h
+++ b/src/systemd-nspawn/nspawn-cgroup.h
@@ -22,8 +22,8 @@
#include <stdbool.h>
#include <sys/types.h>
-#include "systemd-basic/cgroup-util.h"
+#include "systemd-basic/cgroup2-util.h"
int chown_cgroup(pid_t pid, uid_t uid_shift);
-int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift);
-int create_subcgroup(pid_t pid, CGroupUnified unified_requested);
+int sync_cgroup(pid_t pid, SdCGroupVersion inner_cgver, uid_t uid_shift);
+int create_subcgroup(pid_t pid, SdCGroupVersion inner_cgver);
diff --git a/src/systemd-nspawn/nspawn-mount.h b/src/systemd-nspawn/nspawn-mount.h
index 8601dfdad3..d7ac8181a3 100644
--- a/src/systemd-nspawn/nspawn-mount.h
+++ b/src/systemd-nspawn/nspawn-mount.h
@@ -22,7 +22,7 @@
#include <stdbool.h>
#include <sys/types.h>
-#include "systemd-basic/cgroup-util.h"
+#include "systemd-basic/cgroup2-util.h"
typedef enum VolatileMode {
VOLATILE_NO,
diff --git a/src/systemd-nspawn/nspawn.c b/src/systemd-nspawn/nspawn.c
index 7c9b32fbc2..ca61139d92 100644
--- a/src/systemd-nspawn/nspawn.c
+++ b/src/systemd-nspawn/nspawn.c
@@ -51,7 +51,7 @@
#include "systemd-basic/btrfs-util.h"
#include "systemd-basic/cap-list.h"
#include "systemd-basic/capability-util.h"
-#include "systemd-basic/cgroup-util.h"
+#include "systemd-basic/cgroup2-util.h"
#include "systemd-basic/copy.h"
#include "systemd-basic/env-util.h"
#include "systemd-basic/fd-util.h"
@@ -188,7 +188,7 @@ static UserNamespaceMode arg_userns_mode = USER_NAMESPACE_NO;
static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U;
static bool arg_userns_chown = false;
static int arg_kill_signal = 0;
-static CGroupUnified arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_UNKNOWN;
+static SdCGroupVersion arg_cgroup_version = CGROUP_VER_UNKNOWN;
static SettingsMask arg_settings_mask = 0;
static int arg_settings_trusted = -1;
static char **arg_parameters = NULL;
@@ -324,7 +324,7 @@ static int custom_mounts_prepare(void) {
static int detect_unified_cgroup_hierarchy(const char *directory) {
const char *e;
int r;
- CGroupUnified outer;
+ SdCGroupVersion outer;
/* Allow the user to control whether the unified hierarchy is used */
e = getenv("UNIFIED_CGROUP_HIERARCHY");
@@ -333,48 +333,53 @@ static int detect_unified_cgroup_hierarchy(const char *directory) {
if (r < 0)
return log_error_errno(r, "Failed to decide cgroup version to use: Failed to parse $UNIFIED_CGROUP_HIERARCHY.");
if (r > 0)
- arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_ALL;
+ arg_cgroup_version = CGROUP_VER_2;
else
- arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE;
+ arg_cgroup_version = CGROUP_VER_1;
return 0;
}
- r = cg_version(&outer);
+ r = cg2_sd_get_version(&outer);
if (r < 0)
return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine what the host system uses: %m");
/* Otherwise inherit the default from the host system, unless
* the container doesn't have a new enough systemd (detected
- * by checking libsystemd-shared). */
+ * by checking libsystemd-shared).
+ *
+ * But archroot containers don't even have any part of systemd
+ * installed, so why do we care about that? */
+ arg_cgroup_version = outer;
switch (outer) {
- case CGROUP_UNIFIED_UNKNOWN:
+ case CGROUP_VER_UNKNOWN:
assert_not_reached("Unknown host cgroup version");
break;
- case CGROUP_UNIFIED_NONE: /* cgroup v1 */
- arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE;
+ case CGROUP_VER_1:
break;
- case CGROUP_UNIFIED_ALL: /* cgroup v2 */
+ case CGROUP_VER_2:
/* Unified cgroup hierarchy support was added in 230. Unfortunately libsystemd-shared,
* which we use to sniff the systemd version, was only added in 231, so we'll have a
* false negative here for 230. */
r = systemd_installation_has_version(directory, 230);
if (r < 0)
return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine systemd version in container: %m");
- if (r > 0)
- arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_ALL;
- else
- arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE;
+ if (r == 0)
+ arg_cgroup_version = CGROUP_VER_1;
break;
- case CGROUP_UNIFIED_SYSTEMD: /* cgroup v1 & v2 mixed; but v2 for systemd */
- /* Mixed cgroup hierarchy support was added in 232 */
+ case CGROUP_VER_MIXED_SD232:
r = systemd_installation_has_version(directory, 232);
if (r < 0)
return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine systemd version in container: %m");
- if (r > 0)
- arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_SYSTEMD;
- else
- arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE;
+ if (r == 0)
+ arg_cgroup_version = CGROUP_VER_1;
+ break;
+ case CGROUP_VER_MIXED_SD233:
+ r = systemd_installation_has_version(directory, 233);
+ if (r < 0)
+ return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine systemd version in container: %m");
+ if (r == 0)
+ arg_cgroup_version = CGROUP_VER_1;
break;
}
@@ -482,6 +487,7 @@ static int parse_argv(int argc, char *argv[]) {
const char *p, *e;
uint64_t plus = 0, minus = 0;
bool mask_all_settings = false, mask_no_settings = false;
+ _cleanup_sdcgroupfree_ SdCGroup cgroup;
assert(argc >= 0);
assert(argv);
@@ -1096,7 +1102,9 @@ static int parse_argv(int argc, char *argv[]) {
if (arg_userns_mode == USER_NAMESPACE_PICK)
arg_userns_chown = true;
- if (arg_keep_unit && cg_pid_get_owner_uid(0, NULL) >= 0) {
+ if (arg_keep_unit &&
+ cg2_sd_pid_get_cgroup(0, &cgroup) >= 0 &&
+ cg2_sd_cgroup_get_owner_uid(cgroup, NULL) >= 0) {
log_error("--keep-unit may not be used when invoked from a user session.");
return -EINVAL;
}
@@ -1170,7 +1178,7 @@ static int parse_argv(int argc, char *argv[]) {
r = getenv_bool("SYSTEMD_NSPAWN_USE_CGNS");
if (r < 0)
- arg_use_cgns = cg_ns_supported();
+ arg_use_cgns = cg2_ns_supported();
else
arg_use_cgns = r;
@@ -2684,7 +2692,7 @@ static int inner_child(
assert(directory);
assert(kmsg_socket >= 0);
- cg_unified_flush();
+ cg2_flush();
if (arg_userns_mode != USER_NAMESPACE_NO) {
/* Tell the parent, that it now can write the UID map. */
@@ -2723,13 +2731,13 @@ static int inner_child(
return -ESRCH;
}
- if (arg_use_cgns && cg_ns_supported()) {
+ if (arg_use_cgns && cg2_ns_supported()) {
r = unshare(CLONE_NEWCGROUP);
if (r < 0)
return log_error_errno(errno, "Failed to unshare cgroup namespace");
r = mount_cgroups(
"",
- arg_unified_cgroup_hierarchy,
+ arg_cgroup_version,
arg_userns_mode != USER_NAMESPACE_NO,
arg_uid_shift,
arg_uid_range,
@@ -2738,7 +2746,7 @@ static int inner_child(
if (r < 0)
return r;
} else {
- r = mount_systemd_cgroup_writable("", arg_unified_cgroup_hierarchy);
+ r = mount_systemd_cgroup_writable("", arg_cgroup_version);
if (r < 0)
return r;
}
@@ -2950,7 +2958,7 @@ static int outer_child(
assert(notify_socket >= 0);
assert(kmsg_socket >= 0);
- cg_unified_flush();
+ cg2_flush();
if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
return log_error_errno(errno, "PR_SET_PDEATHSIG failed: %m");
@@ -3137,10 +3145,10 @@ static int outer_child(
if (r < 0)
return r;
- if (!arg_use_cgns || !cg_ns_supported()) {
+ if (!arg_use_cgns || !cg2_ns_supported()) {
r = mount_cgroups(
directory,
- arg_unified_cgroup_hierarchy,
+ arg_cgroup_version,
arg_userns_mode != USER_NAMESPACE_NO,
arg_uid_shift,
arg_uid_range,
@@ -3892,12 +3900,12 @@ static int run(int master,
return r;
}
- r = sync_cgroup(*pid, arg_unified_cgroup_hierarchy, arg_uid_shift);
+ r = sync_cgroup(*pid, arg_cgroup_version, arg_uid_shift);
if (r < 0)
return r;
if (arg_keep_unit) {
- r = create_subcgroup(*pid, arg_unified_cgroup_hierarchy);
+ r = create_subcgroup(*pid, arg_cgroup_version);
if (r < 0)
return r;
}