diff options
author | Luke Shumaker <lukeshu@lukeshu.com> | 2017-05-20 20:14:27 -0400 |
---|---|---|
committer | Luke Shumaker <lukeshu@lukeshu.com> | 2017-05-20 21:59:11 -0400 |
commit | ca29469b09faeb453e33619a99780964256777bf (patch) | |
tree | 8ed3c952cd74f84e72a2d7949319da00dfae3f12 | |
parent | 13d170a533b49490fc294bd727685cd087f0b070 (diff) |
-rw-r--r-- | src/libsystemd-basic/include/systemd-basic/cgroup2-util.h | 109 | ||||
-rw-r--r-- | src/libsystemd-basic/src/Makefile | 2 | ||||
-rw-r--r-- | src/libsystemd-basic/src/cgroup-util.c | 6 | ||||
-rw-r--r-- | src/libsystemd-basic/src/cgroup2-util.c | 712 | ||||
-rw-r--r-- | src/systemd-nspawn/nspawn-cgroup.c | 45 | ||||
-rw-r--r-- | src/systemd-nspawn/nspawn-cgroup.h | 6 | ||||
-rw-r--r-- | src/systemd-nspawn/nspawn-mount.h | 2 | ||||
-rw-r--r-- | src/systemd-nspawn/nspawn.c | 72 |
8 files changed, 898 insertions, 56 deletions
diff --git a/src/libsystemd-basic/include/systemd-basic/cgroup2-util.h b/src/libsystemd-basic/include/systemd-basic/cgroup2-util.h new file mode 100644 index 0000000000..cdbd9b0d9b --- /dev/null +++ b/src/libsystemd-basic/include/systemd-basic/cgroup2-util.h @@ -0,0 +1,109 @@ +#pragma once + +/*** + This file is part of systemd. + + Copyright 2010 Lennart Poettering + Copyright 2017 Luke Shumaker + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include "macro.h" + +/* generic types ****************************************************/ + +typedef struct CGroupHierarchy CGroupHierarchy; + +typedef struct CGroup { + CGroupHierarchy *hierarchy; + char *path; +} CGroup; + +static inline void cg2_freep(CGroup *cgroup) { + free(cgroup->path); +} + +static inline void cg2_free_freep(CGroup **cgroupp) { + if (*cgroupp) { + cg2_freep(*cgroupp); + free(*cgroupp); + } +} + +#define _cleanup_cgroupfree_ _cleanup_(cg2_freep) +#define _cleanup_cgroupfree_free_ _cleanup_(cg2_free_freep) + +/* generic functions ************************************************/ + +int cg2_flush(void); +bool cg2_ns_supported(void); + +int cg2_get_v1_hier(const char *controller, CGroupHierarchy **ret_hier); +int cg2_get_v2_hier(CGroupHierarchy **ret_hier); +int cg2_hier_get_version(CGroupHierarchy *hier); +char *cg2_hier_get_str(CGroupHierarchy *hier); + +int cg2_pid_get_cgroups_real(pid_t pid, /* CGroupHierarchy *hier, CGroup *ret_cgroup */...) _sentinel_; +#define cg2_pid_get_cgroups(pid, ...) cg2_pid_get_cgroups_real((pid), __VA_ARGS__, NULL) + +char *cg2_cgroup_get_filepath(CGroup cgroup); +char *cg2_cgroup_get_str(CGroup cgroup); + +/* systemd types ****************************************************/ + +typedef struct SdCGroup { + CGroup prefix; + char *path; +} SdCGroup; + +static inline void cg2_sd_freep(SdCGroup *cgroup) { + cg2_freep(&cgroup->prefix); + free(cgroup->path); +} + +static inline void cg2_sd_free_freep(SdCGroup **cgroupp) { + if (*cgroupp) { + cg2_sd_freep(*cgroupp); + free(*cgroupp); + } +} + +#define _cleanup_sdcgroupfree_ _cleanup_(cg2_sd_freep) +#define _cleanup_sdcgroupfree_free_ _cleanup_(cg2_sd_free_freep) + +typedef enum SdCGroupVersion { + CGROUP_VER_UNKNOWN = 0, + CGROUP_VER_1 = 1, + CGROUP_VER_2 = 2, /* added in systemd 230 */ + CGROUP_VER_MIXED_SD232 = 3, /* added in systemd 232 */ + CGROUP_VER_MIXED_SD233 = 4, /* added in systemd 233 */ +} SdCGroupVersion; + +/* systemd functions ************************************************/ + +int cg2_sd_flush(void); +int cg2_sd_get_version(SdCGroupVersion *ret_ver); +int cg2_sd_get_root(CGroup *ret_root); + +int cg2_sd_ver_get_hier_ver(SdCGroupVersion ver); + +int cg2_sd_pid_get_cgroup(pid_t pid, SdCGroup *ret_cgroup); + +int cg2_sd_cgroup_parse(SdCGroup cgroup, char **ret_slice, char **ret_unit, SdCGroup *ret_extra); +int cg2_sd_cgroup_get_owner_uid(SdCGroup cgroup, uid_t *ret_uid); + +char *cg2_sd_cgroup_get_filepath(SdCGroup sdcgroup); +char *cg2_sd_cgroup_get_cgpath(SdCGroup sdcgroup); +char *cg2_sd_cgroup_get_str(SdCGroup sdcgroup); diff --git a/src/libsystemd-basic/src/Makefile b/src/libsystemd-basic/src/Makefile index fd72f23308..7a2bc56057 100644 --- a/src/libsystemd-basic/src/Makefile +++ b/src/libsystemd-basic/src/Makefile @@ -166,6 +166,8 @@ libsystemd_basic_la_SOURCES = \ src/basic/mkdir.h \ src/basic/cgroup-util.c \ src/basic/cgroup-util.h \ + src/basic/cgroup2-util.c \ + src/basic/cgroup2-util.h \ src/basic/errno-list.c \ src/basic/errno-list.h \ src/basic/af-list.c \ diff --git a/src/libsystemd-basic/src/cgroup-util.c b/src/libsystemd-basic/src/cgroup-util.c index 929101e558..e6a5882f7c 100644 --- a/src/libsystemd-basic/src/cgroup-util.c +++ b/src/libsystemd-basic/src/cgroup-util.c @@ -2281,8 +2281,10 @@ static int cg_update_unified(void) { if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) unified_cache = CGROUP_UNIFIED_ALL; else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) { - if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) - return -errno; + if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) { + unified_cache = CGROUP_UNIFIED_NONE; + return 0; + } unified_cache = F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC) ? CGROUP_UNIFIED_SYSTEMD : CGROUP_UNIFIED_NONE; diff --git a/src/libsystemd-basic/src/cgroup2-util.c b/src/libsystemd-basic/src/cgroup2-util.c new file mode 100644 index 0000000000..973ec86b65 --- /dev/null +++ b/src/libsystemd-basic/src/cgroup2-util.c @@ -0,0 +1,712 @@ +/*** + This file is part of systemd. + + Copyright 2010 Lennart Poettering + Copyright 2017 Luke Shumaker + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <errno.h> +#include <glob.h> + +#include "systemd-basic/alloc-util.h" /* realloc_multiply */ +#include "systemd-basic/cgroup2-util.h" +#include "systemd-basic/fd-util.h" /* _cleanp_fclose_ */ +#include "systemd-basic/fileio.h" /* FOREACH_LINE */ +#include "systemd-basic/glob-util.h" /* _cleanup_globfree_ */ +#include "systemd-basic/parse-util.h" /* safe_atoi */ +#include "systemd-basic/process-util.h" /* procfs_file_alloca */ +#include "systemd-basic/special.h" +#include "systemd-basic/stat-util.h" /* F_TYPE_EQUAL */ +#include "systemd-basic/string-util.h" /* startswith, endswith, FOREACH_WORD_SEPARATOR */ +#include "systemd-basic/strv.h" /* STRV_FOREACH */ +#include "systemd-basic/unit-name.h" /* unit_name_is_valid */ +#include "systemd-basic/user-util.h" /* parse_uid */ + +static int hier_init_mountpoint(CGroupHierarchy *hier); +static void cg2_unescape(const char **p, size_t *n); +static bool valid_slice_name(const char *p, size_t n); + +/* generic ***********************************************************/ + +struct CGroupHierarchy { + int id; + char *controllers; + char *mountpoint; +}; + +static thread_local struct { + bool initialized; + size_t cap; + CGroupHierarchy *list; +} cg2_cache = { 0 }; + +int cg2_flush(void) { + cg2_cache.initialized = false; + for (size_t i = 0; i < cg2_cache.cap; i++) { + free(cg2_cache.list[i].controllers); + free(cg2_cache.list[i].mountpoint); + } + free(cg2_cache.list); + cg2_cache.list = NULL; + cg2_cache.cap = 0; + return cg2_sd_flush(); +} + +static int cg2_init(void) { + _cleanup_fclose_ FILE *f = NULL; + char line[LINE_MAX]; + + if (cg2_cache.initialized) + return 0; + + cg2_flush(); + + f = fopen("/proc/self/cgroup", "re"); + if (!f) { + /* turn "no such file" in to "no such process" */ + return errno == ENOENT ? -ESRCH : -errno; + } + + FOREACH_LINE(line, f, return -errno) { + int id, r; + char *id_str, *controllers; + char *rest = line; + id_str = strsep(&rest, ":"); + controllers = strsep(&rest, ":"); + /*path =*/ strsep(&rest, "\n"); /* discard the path */ + if (!rest || rest[0] != '\0') + return -ENODATA; + if (safe_atoi(id_str, &id) < 0) + return -ENODATA; + if (id < 0) + return -ENODATA; + if ( (id == 0) != (controllers[0] == '\0') ) + return -ENODATA; + + if ((size_t)id >= cg2_cache.cap) { + size_t cap = id+1; + CGroupHierarchy *list = realloc_multiply(cg2_cache.list, sizeof(cg2_cache.list[0]), cap); + if (!list) + return -ENOMEM; + cg2_cache.list = list; + while (cg2_cache.cap < cap) { + list[cg2_cache.cap].id = -1; + list[cg2_cache.cap].controllers = NULL; + list[cg2_cache.cap].mountpoint = NULL; + cg2_cache.cap++; + } + } + + cg2_cache.list[id].id = id; + cg2_cache.list[id].controllers = strdup(controllers); + if (!cg2_cache.list[id].controllers) + return -ENOMEM; + r = hier_init_mountpoint(&cg2_cache.list[id]); + if (r < 0) + return r; + } + return 0; +} + +static int hier_init_mountpoint(CGroupHierarchy *hier) { + assert(hier); + + if (hier->id == 0) { + /* cgroup v2 hierarchy */ + _cleanup_globfree_ glob_t g = {}; + struct statfs fs; + int r; + char **tmp; + + /* first check "/sys/fs/cgroup/" */ + if (statfs("/sys/fs/cgroup/", &fs) < 0) + return -errno; + if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { + hier->mountpoint = strdup("/sys/fs/group"); + if (!hier->mountpoint) + return -ENOMEM; + return 0; + } + + /* then check "/sys/fs/cgroup/X/" */ + r = glob("/sys/fs/cgroup/*/", GLOB_ERR, NULL, &g); + if (r == GLOB_NOMATCH) + return -ENOENT; + if (r == GLOB_NOSPACE) + return -ENOMEM; + if (r != 0) + return errno > 0 ? -errno : -EIO; + STRV_FOREACH(tmp, g.gl_pathv) { + if (statfs(*tmp, &fs) < 0) + continue; + if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { + hier->mountpoint = canonicalize_file_name(*tmp); + if (!hier->mountpoint) + return -ENOMEM; + return 0; + } + } + return -ENOENT; + } else { + /* cgroup v1 hierarchy */ + char *controller, *tmp; + + controller = strdupa(hier->controllers); + strchrnul(controller, ',')[0] = '\0'; + tmp = startswith(controller, "name="); + if (tmp) + controller = tmp; + + hier->mountpoint = canonicalize_file_name(strjoina("/sys/fs/cgroup/", controller, NULL)); + if (!hier->mountpoint) + return -errno; + return 0; + } +} + +bool cg2_ns_supported(void) { + static thread_local int enabled = -1; + + if (enabled >= 0) + return enabled; + + if (access("/proc/self/ns/cgroup", F_OK) == 0) + enabled = 1; + else + enabled = 0; + + return enabled; +} + +int cg2_get_v1_hier(const char *selector, CGroupHierarchy **ret_hier) { + size_t selector_len; + int r; + + assert(selector); + + r = cg2_init(); + if (r < 0) + return r; + + selector_len = strlen(selector); + for (int id = 0; (size_t)id < cg2_cache.cap; id++) { + const char *controller, *state; + size_t controller_len; + if (cg2_cache.list[id].id != id) + continue; + + FOREACH_WORD_SEPARATOR(controller, controller_len, cg2_cache.list[id].controllers, ",", state) { + if (controller_len == selector_len && memcmp(controller, selector, selector_len) == 0) { + if (ret_hier) + *ret_hier = &cg2_cache.list[id]; + return 0; + } + } + } + return -ENOENT; +} + +int cg2_get_v2_hier(CGroupHierarchy **ret_hier) { + int r; + + r = cg2_init(); + if (r < 0) + return r; + + if (cg2_cache.cap < 1 || cg2_cache.list[0].id != 0) + return -ENOENT; + + if (ret_hier) + *ret_hier = &cg2_cache.list[0]; + return 0; +} + + +int cg2_hier_get_version(CGroupHierarchy *hier) { + assert(hier); + if (hier->id < 0) { + return -EINVAL; + } else if (hier->id == 0) { + return 2; + } else { + return 1; + } +} + +char *cg2_hier_get_str(CGroupHierarchy *hier) { + char *ret; + + assert(hier); + assert(hier->controllers); + + if (asprintf(&ret, "%d:%s", hier->id, hier->controllers) < 0) + return NULL; + return ret; +} + +int cg2_pid_get_cgroups_real(pid_t pid, ...) { + const char *filename; + _cleanup_fclose_ FILE *file = NULL; + va_list ap; + char line[LINE_MAX]; + int n, r; + + r = cg2_init(); + if (r < 0) + return r; + + if (pid == 0) + filename = "/proc/self/cgroup"; + else + filename = procfs_file_alloca(pid, "cgroup"); + + file = fopen(filename, "re"); + if (!file) { + /* turn "no such file" in to "no such process" */ + return errno == ENOENT ? -ESRCH : -errno; + } + + n = 0; + FOREACH_LINE(line, file, return -errno) { + CGroupHierarchy *hier; + int id; + char *id_str, *controllers, *path; + char *rest = line; + id_str = strsep(&rest, ":"); + controllers = strsep(&rest, ":"); + path = strsep(&rest, "\n"); + if (!rest || rest[0] != '\0') + continue; + if (safe_atoi(id_str, &id) < 0) + continue; + if ( (id == 0) != (controllers[0] == '\0') ) + continue; + + va_start(ap, pid); + while ((hier = va_arg(ap, CGroupHierarchy *))) { + CGroup *ret_cgroup = va_arg(ap, CGroup *); + if (id == hier->id) { + if (ret_cgroup) { + ret_cgroup->hierarchy = hier; + ret_cgroup->path = path; + } + n++; + } + } + va_end(ap); + } + return n; +} + +char *cg2_cgroup_get_filepath(CGroup cgroup) { + assert(cgroup.hierarchy); + assert(cgroup.hierarchy->mountpoint); + assert(cgroup.path); + + return strjoin(cgroup.hierarchy->mountpoint, cgroup.path, NULL); +} + +char *cg2_cgroup_get_str(CGroup cgroup) { + _cleanup_free_ char *hierstr; + char *ret; + + hierstr = cg2_hier_get_str(cgroup.hierarchy); + if (!hierstr) + return NULL; + + if (asprintf(&ret, "%s:%s", hierstr, cgroup.path) < 0) + return NULL; + return ret; +} + +/* systemd **********************************************************/ + +static thread_local struct { + bool have_ver; + SdCGroupVersion ver; + bool have_hier; + CGroupHierarchy *hier; + bool have_root; + CGroup *root; +} cg2_sd_cache = { 0 }; + +int cg2_sd_flush(void) { + cg2_sd_cache.ver = CGROUP_VER_UNKNOWN; + cg2_sd_cache.have_ver = false; + + cg2_sd_cache.hier = NULL; + cg2_sd_cache.have_hier = false; + + cg2_free_freep(&cg2_sd_cache.root); + cg2_sd_cache.root = NULL; + cg2_sd_cache.have_root = false; + + return 0; +} + +static int cg2_sd_init_version(void) { + struct statfs fs; + + if (cg2_sd_cache.have_ver) + return 0; + + if (statfs("/sys/fs/cgroup/", &fs) < 0) + return -errno; + if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { + cg2_sd_cache.ver = CGROUP_VER_2; + cg2_sd_cache.have_ver = true; + return 0; + } + + if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 && + F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { + cg2_sd_cache.ver = CGROUP_VER_MIXED_SD233; + cg2_sd_cache.have_ver = true; + return 0; + } + + if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) + return -errno; + if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { + cg2_sd_cache.ver = CGROUP_VER_MIXED_SD232; + cg2_sd_cache.have_ver = true; + return 0; + } + if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) { + cg2_sd_cache.ver = CGROUP_VER_1; + cg2_sd_cache.have_ver = true; + return 0; + } + + return -ENOMEDIUM; +} + +static int cg2_sd_init_hier(void) { + int r; + + if (cg2_sd_cache.have_hier) + return 0; + + r = cg2_sd_init_version(); + if (r < 0) + return r; + + switch (cg2_sd_cache.ver) { + case CGROUP_VER_UNKNOWN: + assert_not_reached("Unknown systemd cgroup version"); + break; + case CGROUP_VER_1: + r = cg2_get_v1_hier("name=systemd", &cg2_sd_cache.hier); + if (r < 0) + return r; + break; + case CGROUP_VER_2: + case CGROUP_VER_MIXED_SD232: + case CGROUP_VER_MIXED_SD233: + r = cg2_get_v2_hier(&cg2_sd_cache.hier); + if (r < 0) + return r; + break; + } + cg2_sd_cache.have_hier = true; + return 0; +} + +int cg2_sd_get_root(CGroup *ret_root) { + CGroup cg; + int r; + char *e; + + r = cg2_sd_init_hier(); + if (r < 0) + return r; + + r = cg2_pid_get_cgroups(1, cg2_sd_cache.hier, &cg); + if (r < 0) + return r; + + e = endswith(cg.path, "/" SPECIAL_INIT_SCOPE); /* "/init.scope" */ + if (!e) + e = endswith(cg.path, "/" SPECIAL_SYSTEM_SLICE); /* "/system.slice" (legacy) */ + if (!e) + e = endswith(cg.path, "/system"); /* (even more legacy) */ + if (e) + *e = 0; + + if (ret_root) + *ret_root = cg; + return 0; +} + +int cg2_sd_ver_get_hier_ver(SdCGroupVersion ver) { + switch (ver) { + default: + case CGROUP_VER_UNKNOWN: + return -EINVAL; + case CGROUP_VER_1: + return 1; + case CGROUP_VER_2: + case CGROUP_VER_MIXED_SD232: + case CGROUP_VER_MIXED_SD233: + return 2; + } +} + +int cg2_sd_pid_get_cgroup(pid_t pid, SdCGroup *ret_cgroup) { + _cleanup_cgroupfree_ CGroup root, mine; + int r; + const char *p; + + r = cg2_sd_get_root(&root); + if (r < 0) + return r; + + r = cg2_pid_get_cgroups(pid, root.hierarchy, &mine); + if (r < 0) + return r; + + p = startswith(mine.path, root.path); + if (!p) + return -ENXIO; + + if (ret_cgroup) { + char *prefix, *path; + prefix = strdup(root.path); + if (!prefix) + goto enomem; + path = strdup(p); + if (!path) + goto enomem; + ret_cgroup->prefix.hierarchy = root.hierarchy; + ret_cgroup->prefix.path = prefix; + ret_cgroup->path = path; + return 0; + enomem: + free(prefix); + free(path); + return -ENOMEM; + } + return 0; +} + +int cg2_sd_cgroup_parse(SdCGroup cgroup, char **ret_slice, char **ret_unit, SdCGroup *ret_extra) { + const char *rest, *slice, *unit, *prefix, *extra; + size_t slice_len, unit_len, prefix_len, extra_len; + char *hslice = NULL, *hunit = NULL; + SdCGroup sextra; + + assert(cgroup.path); + assert(cgroup.prefix.path); + assert(cgroup.prefix.hierarchy); + + /* Given + * cgroup.path = "/foo.slice/bar.slice/baz.slice/unit.service/extra..." + * we return + * *ret_slice = "baz.slice" + * *ret_unit = "unit.service" + * ret_extra->prefix.hierarchy = cgroup.prefix.hierarchy + * ret_extra->prefix.path = strjoin(cgroup.prefix.path, "/foo.slice/bar.slize/baz.slice/unit.service", NULL) + * ret_extra->path = "/extra..." + * + * The input path my contain 0 or more leading ".slice" + * segments; we return the rightmost. If there are no + * ".slice" segments, we return SPECIAL_ROOT_SLICE + * ("-.slice"). + */ + + rest = cgroup.path; + + /* slice */ + slice = SPECIAL_ROOT_SLICE; + slice_len = strlen(slice); + for (;;) { + const char *part, *tmprest; + size_t part_len; + + /* trim leading "/"s */ + tmprest = rest + strspn(rest, "/"); + + /* split off the first part */ + part = tmprest; + part_len = strcspn(part, "/"); + tmprest += part_len; + + if (valid_slice_name(part, part_len)) { + /* accept this iteration */ + slice = part; + slice_len = part_len; + rest = tmprest; + } else { + /* reject this iteration; we have found the first + * non-slice segment. */ + break; + } + } + cg2_unescape(&slice, &slice_len); + + /* unit */ + rest += strspn(rest, "/"); + unit = rest; + unit_len = strcspn(unit, "/"); + rest += unit_len; + cg2_unescape(&unit, &unit_len); + if (!unit_name_is_valid(strndupa(unit, unit_len), UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE)) + return -ENXIO; + + /* extra */ + extra = rest; + extra_len = strlen(rest); + prefix = cgroup.path; + prefix_len = extra - prefix; + + /* allocate return values */ + if (ret_slice) { + hslice = strndup(slice, slice_len); + if (!hslice) + goto enomem; + } + if (ret_unit) { + hunit = strndup(unit, unit_len); + if (!hslice) + goto enomem; + } + if (ret_extra) { + sextra.prefix.hierarchy = cgroup.prefix.hierarchy; + sextra.prefix.path = strndup(prefix, prefix_len); + if (!sextra.prefix.path) + goto enomem; + sextra.prefix.path = strjoin(cgroup.prefix.path, sextra.prefix.path, NULL); + if (!sextra.prefix.path) + goto enomem; + sextra.path = strndup(extra, extra_len); + if (!sextra.path) + goto enomem; + } + + /* return */ + if (ret_slice) + *ret_slice = hslice; + if (ret_unit) + *ret_unit = hunit; + if (ret_extra) + *ret_extra = sextra; + return 0; + + enomem: + free(hslice); + free(hunit); + cg2_sd_freep(&sextra); + return -ENOMEM; +} + + +int cg2_sd_cgroup_get_owner_uid(SdCGroup cgroup, uid_t *ret_uid) { + _cleanup_free_ char *slice = NULL; + char *start, *end; + int r; + + r = cg2_sd_cgroup_parse(cgroup, &slice, NULL, NULL); + if (r < 0) + return r; + + start = startswith(slice, "user-"); + if (!start) + return -ENXIO; + end = endswith(start, ".slice"); + if (!end) + return -ENXIO; + + *end = '\0'; + if (parse_uid(start, ret_uid) < 0) + return -ENXIO; + return 0; +} + +static int cg2_sd_cgroup_get_cgroup(SdCGroup sdcgroup, CGroup *ret_cgroup) { + assert(sdcgroup.prefix.path); + assert(sdcgroup.path); + + if (ret_cgroup) { + ret_cgroup->path = strjoin(sdcgroup.prefix.path, sdcgroup.path, NULL); + if (!ret_cgroup->path) + return -ENOMEM; + ret_cgroup->hierarchy = sdcgroup.prefix.hierarchy; + } + return 0; +} + +char *cg2_sd_cgroup_get_filepath(SdCGroup sdcgroup) { + _cleanup_cgroupfree_ CGroup cgroup; + + if (cg2_sd_cgroup_get_cgroup(sdcgroup, &cgroup) < 0) + return NULL; + + return cg2_cgroup_get_filepath(cgroup); +} + +char *cg2_sd_cgroup_get_cgpath(SdCGroup sdcgroup) { + CGroup cgroup; + + if (cg2_sd_cgroup_get_cgroup(sdcgroup, &cgroup) < 0) + return NULL; + + return cgroup.path; +} + +char *cg2_sd_cgroup_get_str(SdCGroup sdcgroup) { + _cleanup_cgroupfree_ CGroup cgroup; + + if (cg2_sd_cgroup_get_cgroup(sdcgroup, &cgroup) < 0) + return NULL; + + return cg2_cgroup_get_str(cgroup); +} + +/* basically copied from old cgroup-util ****************************/ + +static void cg2_unescape(const char **p, size_t *n) { + size_t sn; + + assert(p); + + if (!n) + n = &sn; + + /* The return value of this function (unlike cg_escape()) + * doesn't need free()! */ + + if (*n >= 1 && (*p)[0] == '_') { + (*p)++; + (*n)--; + } +} + +static bool valid_slice_name(const char *p, size_t n) { + + if (!p) + return false; + + if (n < strlen("x.slice")) + return false; + + if (memcmp(p + n - 6, ".slice", 6) == 0) { + const char *c = strndupa(p, n); + cg2_unescape(&c, &n); + return unit_name_is_valid(c, UNIT_NAME_PLAIN); + } + + return false; +} diff --git a/src/systemd-nspawn/nspawn-cgroup.c b/src/systemd-nspawn/nspawn-cgroup.c index 782966d31b..662b6c84e9 100644 --- a/src/systemd-nspawn/nspawn-cgroup.c +++ b/src/systemd-nspawn/nspawn-cgroup.c @@ -57,7 +57,9 @@ static int chown_cgroup_path(const char *path, uid_t uid_shift) { } int chown_cgroup(pid_t pid, uid_t uid_shift) { - _cleanup_free_ char *path = NULL, *fs = NULL; + _cleanup_sdcgroupfree_ SdCGroup cgroup; + _cleanup_free_ char *fs = NULL; + int r; /* If uid_shift == UID_INVALID, then chown_cgroup_path() is a no-op, and there isn't really a point to actually @@ -66,12 +68,12 @@ int chown_cgroup(pid_t pid, uid_t uid_shift) { if (uid_shift == UID_INVALID) return 0; - r = cg_pid_get_path(NULL, pid, &path); + r = cg2_sd_pid_get_cgroup(pid, &cgroup); if (r < 0) return log_error_errno(r, "Failed to get host cgroup of the container: %m"); - r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs); - if (r < 0) + fs = cg2_sd_cgroup_get_filepath(cgroup); + if (!fs) return log_error_errno(-ENOMEM, "Failed to get host file system path for container cgroup: %m"); r = chown_cgroup_path(fs, uid_shift); @@ -81,20 +83,22 @@ int chown_cgroup(pid_t pid, uid_t uid_shift) { return 0; } -int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift) { - _cleanup_free_ char *cgroup = NULL; +int sync_cgroup(pid_t pid, SdCGroupVersion inner_cgver, uid_t uid_shift) { + _cleanup_sdcgroupfree_ SdCGroup outer_cgroup; + _cleanup_free_ char *cgpath = NULL; char mountpoint[] = "/tmp/containerXXXXXX", pid_string[DECIMAL_STR_MAX(pid) + 1]; bool undo_mount = false; const char *fn, *inner_hier; - int unified, r; + int r; + SdCGroupVersion outer_cgver; #define LOG_PFIX "PID " PID_FMT ": sync host cgroup -> container cgroup" - unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER); - if (unified < 0) + r = cg2_sd_get_version(&outer_cgver); + if (r < 0) return log_error_errno(r, LOG_PFIX ": failed to determine host cgroup version: %m", pid); - if ((unified > 0) == (unified_requested >= CGROUP_UNIFIED_SYSTEMD)) + if (cg2_sd_ver_get_hier_ver(outer_cgver) == cg2_sd_ver_get_hier_ver(inner_cgver)) return 0; /* When the host uses the legacy cgroup setup, but the @@ -102,15 +106,18 @@ int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift) { * we copy the path from the name=systemd hierarchy into the * unified hierarchy. Similar for the reverse situation. */ - r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup); + r = cg2_sd_pid_get_cgroup(pid, &outer_cgroup); if (r < 0) return log_error_errno(r, LOG_PFIX ": failed to determine host cgroup: %m", pid); + cgpath = cg2_sd_cgroup_get_cgpath(outer_cgroup); + if (!cgpath) + return log_error_errno(-ENOMEM, LOG_PFIX ": %m", pid); /* In order to access the container's hierarchy we need to mount it */ if (!mkdtemp(mountpoint)) return log_error_errno(errno, LOG_PFIX ": failed to create temporary mount point for container cgroup hierarchy: %m", pid); - if (unified) { + if (cg2_sd_ver_get_hier_ver(outer_cgver) == 2) { /* host: v2 ; container: v1 */ inner_hier = "?:name=systemd"; r = mount_verbose(LOG_ERR, "cgroup", mountpoint, "cgroup", @@ -157,11 +164,12 @@ finish: return r; } -int create_subcgroup(pid_t pid, CGroupUnified unified_requested) { +int create_subcgroup(pid_t pid, SdCGroupVersion inner_cgver) { _cleanup_free_ char *cgroup = NULL; const char *child; - int unified, r; + int r; CGroupMask supported; + SdCGroupVersion outer_cgver; /* In the unified hierarchy inner nodes may only contain * subgroups, but not processes. Hence, if we running in the @@ -169,13 +177,14 @@ int create_subcgroup(pid_t pid, CGroupUnified unified_requested) { * did not create a scope unit for the container move us and * the container into two separate subcgroups. */ - if (unified_requested == CGROUP_UNIFIED_NONE) + if (inner_cgver == CGROUP_VER_1) return 0; - unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER); - if (unified < 0) + r = cg2_sd_get_version(&outer_cgver); + if (r < 0) return log_error_errno(r, "Failed to create host subcgroup: Failed to determine cgroup version: %m"); - if (unified == 0) + + if (outer_cgver == CGROUP_VER_1) return 0; r = cg_mask_supported(&supported); diff --git a/src/systemd-nspawn/nspawn-cgroup.h b/src/systemd-nspawn/nspawn-cgroup.h index 6c0ddfc7de..4d5d1179ea 100644 --- a/src/systemd-nspawn/nspawn-cgroup.h +++ b/src/systemd-nspawn/nspawn-cgroup.h @@ -22,8 +22,8 @@ #include <stdbool.h> #include <sys/types.h> -#include "systemd-basic/cgroup-util.h" +#include "systemd-basic/cgroup2-util.h" int chown_cgroup(pid_t pid, uid_t uid_shift); -int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift); -int create_subcgroup(pid_t pid, CGroupUnified unified_requested); +int sync_cgroup(pid_t pid, SdCGroupVersion inner_cgver, uid_t uid_shift); +int create_subcgroup(pid_t pid, SdCGroupVersion inner_cgver); diff --git a/src/systemd-nspawn/nspawn-mount.h b/src/systemd-nspawn/nspawn-mount.h index 8601dfdad3..d7ac8181a3 100644 --- a/src/systemd-nspawn/nspawn-mount.h +++ b/src/systemd-nspawn/nspawn-mount.h @@ -22,7 +22,7 @@ #include <stdbool.h> #include <sys/types.h> -#include "systemd-basic/cgroup-util.h" +#include "systemd-basic/cgroup2-util.h" typedef enum VolatileMode { VOLATILE_NO, diff --git a/src/systemd-nspawn/nspawn.c b/src/systemd-nspawn/nspawn.c index 7c9b32fbc2..ca61139d92 100644 --- a/src/systemd-nspawn/nspawn.c +++ b/src/systemd-nspawn/nspawn.c @@ -51,7 +51,7 @@ #include "systemd-basic/btrfs-util.h" #include "systemd-basic/cap-list.h" #include "systemd-basic/capability-util.h" -#include "systemd-basic/cgroup-util.h" +#include "systemd-basic/cgroup2-util.h" #include "systemd-basic/copy.h" #include "systemd-basic/env-util.h" #include "systemd-basic/fd-util.h" @@ -188,7 +188,7 @@ static UserNamespaceMode arg_userns_mode = USER_NAMESPACE_NO; static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U; static bool arg_userns_chown = false; static int arg_kill_signal = 0; -static CGroupUnified arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_UNKNOWN; +static SdCGroupVersion arg_cgroup_version = CGROUP_VER_UNKNOWN; static SettingsMask arg_settings_mask = 0; static int arg_settings_trusted = -1; static char **arg_parameters = NULL; @@ -324,7 +324,7 @@ static int custom_mounts_prepare(void) { static int detect_unified_cgroup_hierarchy(const char *directory) { const char *e; int r; - CGroupUnified outer; + SdCGroupVersion outer; /* Allow the user to control whether the unified hierarchy is used */ e = getenv("UNIFIED_CGROUP_HIERARCHY"); @@ -333,48 +333,53 @@ static int detect_unified_cgroup_hierarchy(const char *directory) { if (r < 0) return log_error_errno(r, "Failed to decide cgroup version to use: Failed to parse $UNIFIED_CGROUP_HIERARCHY."); if (r > 0) - arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_ALL; + arg_cgroup_version = CGROUP_VER_2; else - arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE; + arg_cgroup_version = CGROUP_VER_1; return 0; } - r = cg_version(&outer); + r = cg2_sd_get_version(&outer); if (r < 0) return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine what the host system uses: %m"); /* Otherwise inherit the default from the host system, unless * the container doesn't have a new enough systemd (detected - * by checking libsystemd-shared). */ + * by checking libsystemd-shared). + * + * But archroot containers don't even have any part of systemd + * installed, so why do we care about that? */ + arg_cgroup_version = outer; switch (outer) { - case CGROUP_UNIFIED_UNKNOWN: + case CGROUP_VER_UNKNOWN: assert_not_reached("Unknown host cgroup version"); break; - case CGROUP_UNIFIED_NONE: /* cgroup v1 */ - arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE; + case CGROUP_VER_1: break; - case CGROUP_UNIFIED_ALL: /* cgroup v2 */ + case CGROUP_VER_2: /* Unified cgroup hierarchy support was added in 230. Unfortunately libsystemd-shared, * which we use to sniff the systemd version, was only added in 231, so we'll have a * false negative here for 230. */ r = systemd_installation_has_version(directory, 230); if (r < 0) return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine systemd version in container: %m"); - if (r > 0) - arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_ALL; - else - arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE; + if (r == 0) + arg_cgroup_version = CGROUP_VER_1; break; - case CGROUP_UNIFIED_SYSTEMD: /* cgroup v1 & v2 mixed; but v2 for systemd */ - /* Mixed cgroup hierarchy support was added in 232 */ + case CGROUP_VER_MIXED_SD232: r = systemd_installation_has_version(directory, 232); if (r < 0) return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine systemd version in container: %m"); - if (r > 0) - arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_SYSTEMD; - else - arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE; + if (r == 0) + arg_cgroup_version = CGROUP_VER_1; + break; + case CGROUP_VER_MIXED_SD233: + r = systemd_installation_has_version(directory, 233); + if (r < 0) + return log_error_errno(r, "Failed to decide cgroup version to use: Failed to determine systemd version in container: %m"); + if (r == 0) + arg_cgroup_version = CGROUP_VER_1; break; } @@ -482,6 +487,7 @@ static int parse_argv(int argc, char *argv[]) { const char *p, *e; uint64_t plus = 0, minus = 0; bool mask_all_settings = false, mask_no_settings = false; + _cleanup_sdcgroupfree_ SdCGroup cgroup; assert(argc >= 0); assert(argv); @@ -1096,7 +1102,9 @@ static int parse_argv(int argc, char *argv[]) { if (arg_userns_mode == USER_NAMESPACE_PICK) arg_userns_chown = true; - if (arg_keep_unit && cg_pid_get_owner_uid(0, NULL) >= 0) { + if (arg_keep_unit && + cg2_sd_pid_get_cgroup(0, &cgroup) >= 0 && + cg2_sd_cgroup_get_owner_uid(cgroup, NULL) >= 0) { log_error("--keep-unit may not be used when invoked from a user session."); return -EINVAL; } @@ -1170,7 +1178,7 @@ static int parse_argv(int argc, char *argv[]) { r = getenv_bool("SYSTEMD_NSPAWN_USE_CGNS"); if (r < 0) - arg_use_cgns = cg_ns_supported(); + arg_use_cgns = cg2_ns_supported(); else arg_use_cgns = r; @@ -2684,7 +2692,7 @@ static int inner_child( assert(directory); assert(kmsg_socket >= 0); - cg_unified_flush(); + cg2_flush(); if (arg_userns_mode != USER_NAMESPACE_NO) { /* Tell the parent, that it now can write the UID map. */ @@ -2723,13 +2731,13 @@ static int inner_child( return -ESRCH; } - if (arg_use_cgns && cg_ns_supported()) { + if (arg_use_cgns && cg2_ns_supported()) { r = unshare(CLONE_NEWCGROUP); if (r < 0) return log_error_errno(errno, "Failed to unshare cgroup namespace"); r = mount_cgroups( "", - arg_unified_cgroup_hierarchy, + arg_cgroup_version, arg_userns_mode != USER_NAMESPACE_NO, arg_uid_shift, arg_uid_range, @@ -2738,7 +2746,7 @@ static int inner_child( if (r < 0) return r; } else { - r = mount_systemd_cgroup_writable("", arg_unified_cgroup_hierarchy); + r = mount_systemd_cgroup_writable("", arg_cgroup_version); if (r < 0) return r; } @@ -2950,7 +2958,7 @@ static int outer_child( assert(notify_socket >= 0); assert(kmsg_socket >= 0); - cg_unified_flush(); + cg2_flush(); if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) return log_error_errno(errno, "PR_SET_PDEATHSIG failed: %m"); @@ -3137,10 +3145,10 @@ static int outer_child( if (r < 0) return r; - if (!arg_use_cgns || !cg_ns_supported()) { + if (!arg_use_cgns || !cg2_ns_supported()) { r = mount_cgroups( directory, - arg_unified_cgroup_hierarchy, + arg_cgroup_version, arg_userns_mode != USER_NAMESPACE_NO, arg_uid_shift, arg_uid_range, @@ -3892,12 +3900,12 @@ static int run(int master, return r; } - r = sync_cgroup(*pid, arg_unified_cgroup_hierarchy, arg_uid_shift); + r = sync_cgroup(*pid, arg_cgroup_version, arg_uid_shift); if (r < 0) return r; if (arg_keep_unit) { - r = create_subcgroup(*pid, arg_unified_cgroup_hierarchy); + r = create_subcgroup(*pid, arg_cgroup_version); if (r < 0) return r; } |