diff options
Diffstat (limited to 'src/libsystemd-basic/src/cgroup2-util.c')
-rw-r--r-- | src/libsystemd-basic/src/cgroup2-util.c | 712 |
1 files changed, 712 insertions, 0 deletions
diff --git a/src/libsystemd-basic/src/cgroup2-util.c b/src/libsystemd-basic/src/cgroup2-util.c new file mode 100644 index 0000000000..973ec86b65 --- /dev/null +++ b/src/libsystemd-basic/src/cgroup2-util.c @@ -0,0 +1,712 @@ +/*** + This file is part of systemd. + + Copyright 2010 Lennart Poettering + Copyright 2017 Luke Shumaker + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <errno.h> +#include <glob.h> + +#include "systemd-basic/alloc-util.h" /* realloc_multiply */ +#include "systemd-basic/cgroup2-util.h" +#include "systemd-basic/fd-util.h" /* _cleanp_fclose_ */ +#include "systemd-basic/fileio.h" /* FOREACH_LINE */ +#include "systemd-basic/glob-util.h" /* _cleanup_globfree_ */ +#include "systemd-basic/parse-util.h" /* safe_atoi */ +#include "systemd-basic/process-util.h" /* procfs_file_alloca */ +#include "systemd-basic/special.h" +#include "systemd-basic/stat-util.h" /* F_TYPE_EQUAL */ +#include "systemd-basic/string-util.h" /* startswith, endswith, FOREACH_WORD_SEPARATOR */ +#include "systemd-basic/strv.h" /* STRV_FOREACH */ +#include "systemd-basic/unit-name.h" /* unit_name_is_valid */ +#include "systemd-basic/user-util.h" /* parse_uid */ + +static int hier_init_mountpoint(CGroupHierarchy *hier); +static void cg2_unescape(const char **p, size_t *n); +static bool valid_slice_name(const char *p, size_t n); + +/* generic ***********************************************************/ + +struct CGroupHierarchy { + int id; + char *controllers; + char *mountpoint; +}; + +static thread_local struct { + bool initialized; + size_t cap; + CGroupHierarchy *list; +} cg2_cache = { 0 }; + +int cg2_flush(void) { + cg2_cache.initialized = false; + for (size_t i = 0; i < cg2_cache.cap; i++) { + free(cg2_cache.list[i].controllers); + free(cg2_cache.list[i].mountpoint); + } + free(cg2_cache.list); + cg2_cache.list = NULL; + cg2_cache.cap = 0; + return cg2_sd_flush(); +} + +static int cg2_init(void) { + _cleanup_fclose_ FILE *f = NULL; + char line[LINE_MAX]; + + if (cg2_cache.initialized) + return 0; + + cg2_flush(); + + f = fopen("/proc/self/cgroup", "re"); + if (!f) { + /* turn "no such file" in to "no such process" */ + return errno == ENOENT ? -ESRCH : -errno; + } + + FOREACH_LINE(line, f, return -errno) { + int id, r; + char *id_str, *controllers; + char *rest = line; + id_str = strsep(&rest, ":"); + controllers = strsep(&rest, ":"); + /*path =*/ strsep(&rest, "\n"); /* discard the path */ + if (!rest || rest[0] != '\0') + return -ENODATA; + if (safe_atoi(id_str, &id) < 0) + return -ENODATA; + if (id < 0) + return -ENODATA; + if ( (id == 0) != (controllers[0] == '\0') ) + return -ENODATA; + + if ((size_t)id >= cg2_cache.cap) { + size_t cap = id+1; + CGroupHierarchy *list = realloc_multiply(cg2_cache.list, sizeof(cg2_cache.list[0]), cap); + if (!list) + return -ENOMEM; + cg2_cache.list = list; + while (cg2_cache.cap < cap) { + list[cg2_cache.cap].id = -1; + list[cg2_cache.cap].controllers = NULL; + list[cg2_cache.cap].mountpoint = NULL; + cg2_cache.cap++; + } + } + + cg2_cache.list[id].id = id; + cg2_cache.list[id].controllers = strdup(controllers); + if (!cg2_cache.list[id].controllers) + return -ENOMEM; + r = hier_init_mountpoint(&cg2_cache.list[id]); + if (r < 0) + return r; + } + return 0; +} + +static int hier_init_mountpoint(CGroupHierarchy *hier) { + assert(hier); + + if (hier->id == 0) { + /* cgroup v2 hierarchy */ + _cleanup_globfree_ glob_t g = {}; + struct statfs fs; + int r; + char **tmp; + + /* first check "/sys/fs/cgroup/" */ + if (statfs("/sys/fs/cgroup/", &fs) < 0) + return -errno; + if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { + hier->mountpoint = strdup("/sys/fs/group"); + if (!hier->mountpoint) + return -ENOMEM; + return 0; + } + + /* then check "/sys/fs/cgroup/X/" */ + r = glob("/sys/fs/cgroup/*/", GLOB_ERR, NULL, &g); + if (r == GLOB_NOMATCH) + return -ENOENT; + if (r == GLOB_NOSPACE) + return -ENOMEM; + if (r != 0) + return errno > 0 ? -errno : -EIO; + STRV_FOREACH(tmp, g.gl_pathv) { + if (statfs(*tmp, &fs) < 0) + continue; + if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { + hier->mountpoint = canonicalize_file_name(*tmp); + if (!hier->mountpoint) + return -ENOMEM; + return 0; + } + } + return -ENOENT; + } else { + /* cgroup v1 hierarchy */ + char *controller, *tmp; + + controller = strdupa(hier->controllers); + strchrnul(controller, ',')[0] = '\0'; + tmp = startswith(controller, "name="); + if (tmp) + controller = tmp; + + hier->mountpoint = canonicalize_file_name(strjoina("/sys/fs/cgroup/", controller, NULL)); + if (!hier->mountpoint) + return -errno; + return 0; + } +} + +bool cg2_ns_supported(void) { + static thread_local int enabled = -1; + + if (enabled >= 0) + return enabled; + + if (access("/proc/self/ns/cgroup", F_OK) == 0) + enabled = 1; + else + enabled = 0; + + return enabled; +} + +int cg2_get_v1_hier(const char *selector, CGroupHierarchy **ret_hier) { + size_t selector_len; + int r; + + assert(selector); + + r = cg2_init(); + if (r < 0) + return r; + + selector_len = strlen(selector); + for (int id = 0; (size_t)id < cg2_cache.cap; id++) { + const char *controller, *state; + size_t controller_len; + if (cg2_cache.list[id].id != id) + continue; + + FOREACH_WORD_SEPARATOR(controller, controller_len, cg2_cache.list[id].controllers, ",", state) { + if (controller_len == selector_len && memcmp(controller, selector, selector_len) == 0) { + if (ret_hier) + *ret_hier = &cg2_cache.list[id]; + return 0; + } + } + } + return -ENOENT; +} + +int cg2_get_v2_hier(CGroupHierarchy **ret_hier) { + int r; + + r = cg2_init(); + if (r < 0) + return r; + + if (cg2_cache.cap < 1 || cg2_cache.list[0].id != 0) + return -ENOENT; + + if (ret_hier) + *ret_hier = &cg2_cache.list[0]; + return 0; +} + + +int cg2_hier_get_version(CGroupHierarchy *hier) { + assert(hier); + if (hier->id < 0) { + return -EINVAL; + } else if (hier->id == 0) { + return 2; + } else { + return 1; + } +} + +char *cg2_hier_get_str(CGroupHierarchy *hier) { + char *ret; + + assert(hier); + assert(hier->controllers); + + if (asprintf(&ret, "%d:%s", hier->id, hier->controllers) < 0) + return NULL; + return ret; +} + +int cg2_pid_get_cgroups_real(pid_t pid, ...) { + const char *filename; + _cleanup_fclose_ FILE *file = NULL; + va_list ap; + char line[LINE_MAX]; + int n, r; + + r = cg2_init(); + if (r < 0) + return r; + + if (pid == 0) + filename = "/proc/self/cgroup"; + else + filename = procfs_file_alloca(pid, "cgroup"); + + file = fopen(filename, "re"); + if (!file) { + /* turn "no such file" in to "no such process" */ + return errno == ENOENT ? -ESRCH : -errno; + } + + n = 0; + FOREACH_LINE(line, file, return -errno) { + CGroupHierarchy *hier; + int id; + char *id_str, *controllers, *path; + char *rest = line; + id_str = strsep(&rest, ":"); + controllers = strsep(&rest, ":"); + path = strsep(&rest, "\n"); + if (!rest || rest[0] != '\0') + continue; + if (safe_atoi(id_str, &id) < 0) + continue; + if ( (id == 0) != (controllers[0] == '\0') ) + continue; + + va_start(ap, pid); + while ((hier = va_arg(ap, CGroupHierarchy *))) { + CGroup *ret_cgroup = va_arg(ap, CGroup *); + if (id == hier->id) { + if (ret_cgroup) { + ret_cgroup->hierarchy = hier; + ret_cgroup->path = path; + } + n++; + } + } + va_end(ap); + } + return n; +} + +char *cg2_cgroup_get_filepath(CGroup cgroup) { + assert(cgroup.hierarchy); + assert(cgroup.hierarchy->mountpoint); + assert(cgroup.path); + + return strjoin(cgroup.hierarchy->mountpoint, cgroup.path, NULL); +} + +char *cg2_cgroup_get_str(CGroup cgroup) { + _cleanup_free_ char *hierstr; + char *ret; + + hierstr = cg2_hier_get_str(cgroup.hierarchy); + if (!hierstr) + return NULL; + + if (asprintf(&ret, "%s:%s", hierstr, cgroup.path) < 0) + return NULL; + return ret; +} + +/* systemd **********************************************************/ + +static thread_local struct { + bool have_ver; + SdCGroupVersion ver; + bool have_hier; + CGroupHierarchy *hier; + bool have_root; + CGroup *root; +} cg2_sd_cache = { 0 }; + +int cg2_sd_flush(void) { + cg2_sd_cache.ver = CGROUP_VER_UNKNOWN; + cg2_sd_cache.have_ver = false; + + cg2_sd_cache.hier = NULL; + cg2_sd_cache.have_hier = false; + + cg2_free_freep(&cg2_sd_cache.root); + cg2_sd_cache.root = NULL; + cg2_sd_cache.have_root = false; + + return 0; +} + +static int cg2_sd_init_version(void) { + struct statfs fs; + + if (cg2_sd_cache.have_ver) + return 0; + + if (statfs("/sys/fs/cgroup/", &fs) < 0) + return -errno; + if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { + cg2_sd_cache.ver = CGROUP_VER_2; + cg2_sd_cache.have_ver = true; + return 0; + } + + if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 && + F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { + cg2_sd_cache.ver = CGROUP_VER_MIXED_SD233; + cg2_sd_cache.have_ver = true; + return 0; + } + + if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) + return -errno; + if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { + cg2_sd_cache.ver = CGROUP_VER_MIXED_SD232; + cg2_sd_cache.have_ver = true; + return 0; + } + if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) { + cg2_sd_cache.ver = CGROUP_VER_1; + cg2_sd_cache.have_ver = true; + return 0; + } + + return -ENOMEDIUM; +} + +static int cg2_sd_init_hier(void) { + int r; + + if (cg2_sd_cache.have_hier) + return 0; + + r = cg2_sd_init_version(); + if (r < 0) + return r; + + switch (cg2_sd_cache.ver) { + case CGROUP_VER_UNKNOWN: + assert_not_reached("Unknown systemd cgroup version"); + break; + case CGROUP_VER_1: + r = cg2_get_v1_hier("name=systemd", &cg2_sd_cache.hier); + if (r < 0) + return r; + break; + case CGROUP_VER_2: + case CGROUP_VER_MIXED_SD232: + case CGROUP_VER_MIXED_SD233: + r = cg2_get_v2_hier(&cg2_sd_cache.hier); + if (r < 0) + return r; + break; + } + cg2_sd_cache.have_hier = true; + return 0; +} + +int cg2_sd_get_root(CGroup *ret_root) { + CGroup cg; + int r; + char *e; + + r = cg2_sd_init_hier(); + if (r < 0) + return r; + + r = cg2_pid_get_cgroups(1, cg2_sd_cache.hier, &cg); + if (r < 0) + return r; + + e = endswith(cg.path, "/" SPECIAL_INIT_SCOPE); /* "/init.scope" */ + if (!e) + e = endswith(cg.path, "/" SPECIAL_SYSTEM_SLICE); /* "/system.slice" (legacy) */ + if (!e) + e = endswith(cg.path, "/system"); /* (even more legacy) */ + if (e) + *e = 0; + + if (ret_root) + *ret_root = cg; + return 0; +} + +int cg2_sd_ver_get_hier_ver(SdCGroupVersion ver) { + switch (ver) { + default: + case CGROUP_VER_UNKNOWN: + return -EINVAL; + case CGROUP_VER_1: + return 1; + case CGROUP_VER_2: + case CGROUP_VER_MIXED_SD232: + case CGROUP_VER_MIXED_SD233: + return 2; + } +} + +int cg2_sd_pid_get_cgroup(pid_t pid, SdCGroup *ret_cgroup) { + _cleanup_cgroupfree_ CGroup root, mine; + int r; + const char *p; + + r = cg2_sd_get_root(&root); + if (r < 0) + return r; + + r = cg2_pid_get_cgroups(pid, root.hierarchy, &mine); + if (r < 0) + return r; + + p = startswith(mine.path, root.path); + if (!p) + return -ENXIO; + + if (ret_cgroup) { + char *prefix, *path; + prefix = strdup(root.path); + if (!prefix) + goto enomem; + path = strdup(p); + if (!path) + goto enomem; + ret_cgroup->prefix.hierarchy = root.hierarchy; + ret_cgroup->prefix.path = prefix; + ret_cgroup->path = path; + return 0; + enomem: + free(prefix); + free(path); + return -ENOMEM; + } + return 0; +} + +int cg2_sd_cgroup_parse(SdCGroup cgroup, char **ret_slice, char **ret_unit, SdCGroup *ret_extra) { + const char *rest, *slice, *unit, *prefix, *extra; + size_t slice_len, unit_len, prefix_len, extra_len; + char *hslice = NULL, *hunit = NULL; + SdCGroup sextra; + + assert(cgroup.path); + assert(cgroup.prefix.path); + assert(cgroup.prefix.hierarchy); + + /* Given + * cgroup.path = "/foo.slice/bar.slice/baz.slice/unit.service/extra..." + * we return + * *ret_slice = "baz.slice" + * *ret_unit = "unit.service" + * ret_extra->prefix.hierarchy = cgroup.prefix.hierarchy + * ret_extra->prefix.path = strjoin(cgroup.prefix.path, "/foo.slice/bar.slize/baz.slice/unit.service", NULL) + * ret_extra->path = "/extra..." + * + * The input path my contain 0 or more leading ".slice" + * segments; we return the rightmost. If there are no + * ".slice" segments, we return SPECIAL_ROOT_SLICE + * ("-.slice"). + */ + + rest = cgroup.path; + + /* slice */ + slice = SPECIAL_ROOT_SLICE; + slice_len = strlen(slice); + for (;;) { + const char *part, *tmprest; + size_t part_len; + + /* trim leading "/"s */ + tmprest = rest + strspn(rest, "/"); + + /* split off the first part */ + part = tmprest; + part_len = strcspn(part, "/"); + tmprest += part_len; + + if (valid_slice_name(part, part_len)) { + /* accept this iteration */ + slice = part; + slice_len = part_len; + rest = tmprest; + } else { + /* reject this iteration; we have found the first + * non-slice segment. */ + break; + } + } + cg2_unescape(&slice, &slice_len); + + /* unit */ + rest += strspn(rest, "/"); + unit = rest; + unit_len = strcspn(unit, "/"); + rest += unit_len; + cg2_unescape(&unit, &unit_len); + if (!unit_name_is_valid(strndupa(unit, unit_len), UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE)) + return -ENXIO; + + /* extra */ + extra = rest; + extra_len = strlen(rest); + prefix = cgroup.path; + prefix_len = extra - prefix; + + /* allocate return values */ + if (ret_slice) { + hslice = strndup(slice, slice_len); + if (!hslice) + goto enomem; + } + if (ret_unit) { + hunit = strndup(unit, unit_len); + if (!hslice) + goto enomem; + } + if (ret_extra) { + sextra.prefix.hierarchy = cgroup.prefix.hierarchy; + sextra.prefix.path = strndup(prefix, prefix_len); + if (!sextra.prefix.path) + goto enomem; + sextra.prefix.path = strjoin(cgroup.prefix.path, sextra.prefix.path, NULL); + if (!sextra.prefix.path) + goto enomem; + sextra.path = strndup(extra, extra_len); + if (!sextra.path) + goto enomem; + } + + /* return */ + if (ret_slice) + *ret_slice = hslice; + if (ret_unit) + *ret_unit = hunit; + if (ret_extra) + *ret_extra = sextra; + return 0; + + enomem: + free(hslice); + free(hunit); + cg2_sd_freep(&sextra); + return -ENOMEM; +} + + +int cg2_sd_cgroup_get_owner_uid(SdCGroup cgroup, uid_t *ret_uid) { + _cleanup_free_ char *slice = NULL; + char *start, *end; + int r; + + r = cg2_sd_cgroup_parse(cgroup, &slice, NULL, NULL); + if (r < 0) + return r; + + start = startswith(slice, "user-"); + if (!start) + return -ENXIO; + end = endswith(start, ".slice"); + if (!end) + return -ENXIO; + + *end = '\0'; + if (parse_uid(start, ret_uid) < 0) + return -ENXIO; + return 0; +} + +static int cg2_sd_cgroup_get_cgroup(SdCGroup sdcgroup, CGroup *ret_cgroup) { + assert(sdcgroup.prefix.path); + assert(sdcgroup.path); + + if (ret_cgroup) { + ret_cgroup->path = strjoin(sdcgroup.prefix.path, sdcgroup.path, NULL); + if (!ret_cgroup->path) + return -ENOMEM; + ret_cgroup->hierarchy = sdcgroup.prefix.hierarchy; + } + return 0; +} + +char *cg2_sd_cgroup_get_filepath(SdCGroup sdcgroup) { + _cleanup_cgroupfree_ CGroup cgroup; + + if (cg2_sd_cgroup_get_cgroup(sdcgroup, &cgroup) < 0) + return NULL; + + return cg2_cgroup_get_filepath(cgroup); +} + +char *cg2_sd_cgroup_get_cgpath(SdCGroup sdcgroup) { + CGroup cgroup; + + if (cg2_sd_cgroup_get_cgroup(sdcgroup, &cgroup) < 0) + return NULL; + + return cgroup.path; +} + +char *cg2_sd_cgroup_get_str(SdCGroup sdcgroup) { + _cleanup_cgroupfree_ CGroup cgroup; + + if (cg2_sd_cgroup_get_cgroup(sdcgroup, &cgroup) < 0) + return NULL; + + return cg2_cgroup_get_str(cgroup); +} + +/* basically copied from old cgroup-util ****************************/ + +static void cg2_unescape(const char **p, size_t *n) { + size_t sn; + + assert(p); + + if (!n) + n = &sn; + + /* The return value of this function (unlike cg_escape()) + * doesn't need free()! */ + + if (*n >= 1 && (*p)[0] == '_') { + (*p)++; + (*n)--; + } +} + +static bool valid_slice_name(const char *p, size_t n) { + + if (!p) + return false; + + if (n < strlen("x.slice")) + return false; + + if (memcmp(p + n - 6, ".slice", 6) == 0) { + const char *c = strndupa(p, n); + cg2_unescape(&c, &n); + return unit_name_is_valid(c, UNIT_NAME_PLAIN); + } + + return false; +} |