From bd15ab41a1347fed8266845f875842d1502e02a6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2016 14:45:53 -0500 Subject: nspawn: fix cgroup mode detection cgroup mode detection is broken in two different ways. * detect_unified_cgroup_hierarchy() is called too nested in outer_child(). sync_cgroup() which is used by run() also needs to know the requested cgroup mode but it's currently always getting CGROUP_UNIFIED_UNKNOWN. This makes it skip syncing the inner cgroup hierarchy on some config combinations. $ cat /proc/self/cgroup | grep systemd 1:name=systemd:/user.slice/user-0.slice/session-c1.scope $ UNIFIED_CGROUP_HIERARCHY=0 SYSTEMD_NSPAWN_USE_CGNS=0 systemd-nspawn -M container ... [root@container ~]# cat /proc/self/cgroup | grep systemd 1:name=systemd:/machine.slice/machine-container.x86_64.scope $ exit $ UNIFIED_CGROUP_HIERARCHY=1 SYSTEMD_NSPAWN_USE_CGNS=0 systemd-nspawn -M container [root@container ~]# cat /proc/self/cgroup | grep 0:: 0::/ $ exit Note how the unified hierarchy case's path is not synchronized with the host. This for example can cause issues when there are multiple such containers. Fixed by moving detect_unified_cgroup_hierarchy() invocation to main(). * inner_child() was invoking cg_unified_flush(). inner_child() executes fully scoped and can't determine which cgroup mode the host was in. It doesn't make sense to keep flushing the detected mode when the host mode can't change. Fixed by replacing cg_unified_flush() invocations in outer_child() and inner_child() with one in main(). --- src/nspawn/nspawn.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 751f26272b..f5956bcb15 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -2168,8 +2168,6 @@ static int inner_child( assert(directory); assert(kmsg_socket >= 0); - cg_unified_flush(); - if (arg_userns_mode != USER_NAMESPACE_NO) { /* Tell the parent, that it now can write the UID map. */ (void) barrier_place(barrier); /* #1 */ @@ -2440,8 +2438,6 @@ static int outer_child( assert(notify_socket >= 0); assert(kmsg_socket >= 0); - cg_unified_flush(); - if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) return log_error_errno(errno, "PR_SET_PDEATHSIG failed: %m"); @@ -2486,10 +2482,6 @@ static int outer_child( if (r < 0) return r; - r = detect_unified_cgroup_hierarchy(directory); - if (r < 0) - return r; - if (arg_userns_mode != USER_NAMESPACE_NO) { /* Let the parent know which UID shift we read from the image */ l = send(uid_shift_socket, &arg_uid_shift, sizeof(arg_uid_shift), MSG_NOSIGNAL); @@ -3541,6 +3533,7 @@ int main(int argc, char *argv[]) { log_parse_environment(); log_open(); + cg_unified_flush(); /* Make sure rename_process() in the stub init process can work */ saved_argv = argv; @@ -3810,6 +3803,10 @@ int main(int argc, char *argv[]) { if (r < 0) goto finish; + r = detect_unified_cgroup_hierarchy(arg_directory); + if (r < 0) + goto finish; + interactive = isatty(STDIN_FILENO) > 0 && isatty(STDOUT_FILENO) > 0; -- cgit v1.2.3-54-g00ecf From 415fc41ceaeada2e32639f24f134b1c248b9e43f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2016 14:45:53 -0500 Subject: core: simplify cg_[all_]unified() cg_[all_]unified() test whether a specific controller or all controllers are on the unified hierarchy. While what's being asked is a simple binary question, the callers must assume that the functions may fail any time, which unnecessarily complicates their usages. This complication is unnecessary. Internally, the test result is cached anyway and there are only a few places where the test actually needs to be performed. This patch simplifies cg_[all_]unified(). * cg_[all_]unified() are updated to return bool. If the result can't be decided, assertion failure is triggered. Error handlings from their callers are dropped. * cg_unified_flush() is updated to calculate the new result synchrnously and return whether it succeeded or not. Places which need to flush the test result are updated to test for failure. This ensures that all the following cg_[all_]unified() tests succeed. * Places which expected possible cg_[all_]unified() failures are updated to call and test cg_unified_flush() before calling cg_[all_]unified(). This includes functions used while setting up mounts during boot and manager_setup_cgroup(). --- src/basic/cgroup-util.c | 119 ++++++++++----------------------- src/basic/cgroup-util.h | 6 +- src/cgls/cgls.c | 2 +- src/cgtop/cgtop.c | 13 ++-- src/core/cgroup.c | 36 +++++----- src/core/manager.c | 2 +- src/core/scope.c | 2 +- src/core/service.c | 2 +- src/core/unit.c | 4 +- src/libsystemd/sd-bus/test-bus-creds.c | 2 +- src/nspawn/nspawn-cgroup.c | 17 ++--- src/nspawn/nspawn-mount.c | 4 +- src/nspawn/nspawn.c | 18 ++--- 13 files changed, 82 insertions(+), 145 deletions(-) (limited to 'src') diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 6948ed3931..48623fb235 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -594,7 +594,7 @@ static int join_path_unified(const char *path, const char *suffix, char **fs) { } int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) { - int unified, r; + int r; assert(fs); @@ -623,11 +623,7 @@ int cg_get_path(const char *controller, const char *path, const char *suffix, ch if (!cg_controller_is_valid(controller)) return -EINVAL; - unified = cg_all_unified(); - if (unified < 0) - return unified; - - if (unified > 0) + if (cg_all_unified()) r = join_path_unified(path, suffix, fs); else r = join_path_legacy(controller, path, suffix, fs); @@ -639,7 +635,6 @@ int cg_get_path(const char *controller, const char *path, const char *suffix, ch } static int controller_is_accessible(const char *controller) { - int unified; assert(controller); @@ -651,10 +646,7 @@ static int controller_is_accessible(const char *controller) { if (!cg_controller_is_valid(controller)) return -EINVAL; - unified = cg_all_unified(); - if (unified < 0) - return unified; - if (unified > 0) { + if (cg_all_unified()) { /* We don't support named hierarchies if we are using * the unified hierarchy. */ @@ -851,7 +843,7 @@ int cg_set_task_access( gid_t gid) { _cleanup_free_ char *fs = NULL, *procs = NULL; - int r, unified; + int r; assert(path); @@ -869,10 +861,7 @@ int cg_set_task_access( if (r < 0) return r; - unified = cg_unified(controller); - if (unified < 0) - return unified; - if (unified) + if (cg_unified(controller)) return 0; /* Compatibility, Always keep values for "tasks" in sync with @@ -925,7 +914,7 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path) { char line[LINE_MAX]; const char *fs; size_t cs = 0; - int unified; + bool unified; assert(path); assert(pid >= 0); @@ -937,9 +926,7 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path) { controller = SYSTEMD_CGROUP_CONTROLLER; unified = cg_unified(controller); - if (unified < 0) - return unified; - if (unified == 0) + if (!unified) cs = strlen(controller); fs = procfs_file_alloca(pid, "cgroup"); @@ -1001,14 +988,11 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path) { int cg_install_release_agent(const char *controller, const char *agent) { _cleanup_free_ char *fs = NULL, *contents = NULL; const char *sc; - int r, unified; + int r; assert(agent); - unified = cg_unified(controller); - if (unified < 0) - return unified; - if (unified) /* doesn't apply to unified hierarchy */ + if (cg_unified(controller)) /* doesn't apply to unified hierarchy */ return -EOPNOTSUPP; r = cg_get_path(controller, NULL, "release_agent", &fs); @@ -1054,12 +1038,9 @@ int cg_install_release_agent(const char *controller, const char *agent) { int cg_uninstall_release_agent(const char *controller) { _cleanup_free_ char *fs = NULL; - int r, unified; + int r; - unified = cg_unified(controller); - if (unified < 0) - return unified; - if (unified) /* Doesn't apply to unified hierarchy */ + if (cg_unified(controller)) /* Doesn't apply to unified hierarchy */ return -EOPNOTSUPP; r = cg_get_path(controller, NULL, "notify_on_release", &fs); @@ -1104,7 +1085,7 @@ int cg_is_empty(const char *controller, const char *path) { } int cg_is_empty_recursive(const char *controller, const char *path) { - int unified, r; + int r; assert(path); @@ -1112,11 +1093,7 @@ int cg_is_empty_recursive(const char *controller, const char *path) { if (controller && (isempty(path) || path_equal(path, "/"))) return false; - unified = cg_unified(controller); - if (unified < 0) - return unified; - - if (unified > 0) { + if (cg_unified(controller)) { _cleanup_free_ char *t = NULL; /* On the unified hierarchy we can check empty state @@ -1986,7 +1963,7 @@ int cg_get_keyed_attribute(const char *controller, const char *path, const char int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) { CGroupController c; - int r, unified; + int r; /* This one will create a cgroup in our private tree, but also * duplicate it in the trees specified in mask, and remove it @@ -1998,10 +1975,7 @@ int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path return r; /* If we are in the unified hierarchy, we are done now */ - unified = cg_all_unified(); - if (unified < 0) - return unified; - if (unified > 0) + if (cg_all_unified()) return 0; /* Otherwise, do the same in the other hierarchies */ @@ -2022,16 +1996,13 @@ int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) { CGroupController c; - int r, unified; + int r; r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid); if (r < 0) return r; - unified = cg_all_unified(); - if (unified < 0) - return unified; - if (unified > 0) + if (cg_all_unified()) return 0; for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { @@ -2072,7 +2043,7 @@ int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) { CGroupController c; - int r = 0, unified; + int r = 0; if (!path_equal(from, to)) { r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE); @@ -2080,10 +2051,7 @@ int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to return r; } - unified = cg_all_unified(); - if (unified < 0) - return unified; - if (unified > 0) + if (cg_all_unified()) return r; for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { @@ -2107,16 +2075,13 @@ int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) { CGroupController c; - int r, unified; + int r; r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root); if (r < 0) return r; - unified = cg_all_unified(); - if (unified < 0) - return unified; - if (unified > 0) + if (cg_all_unified()) return r; for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { @@ -2133,16 +2098,13 @@ int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) int cg_mask_supported(CGroupMask *ret) { CGroupMask mask = 0; - int r, unified; + int r; /* Determines the mask of supported cgroup controllers. Only * includes controllers we can make sense of and that are * actually accessible. */ - unified = cg_all_unified(); - if (unified < 0) - return unified; - if (unified > 0) { + if (cg_all_unified()) { _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL; const char *c; @@ -2291,13 +2253,9 @@ static int cg_update_unified(void) { return 0; } -int cg_unified(const char *controller) { - - int r; +bool cg_unified(const char *controller) { - r = cg_update_unified(); - if (r < 0) - return r; + assert(cg_update_unified() >= 0); if (streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER)) return unified_cache >= CGROUP_UNIFIED_SYSTEMD; @@ -2305,29 +2263,28 @@ int cg_unified(const char *controller) { return unified_cache >= CGROUP_UNIFIED_ALL; } -int cg_all_unified(void) { +bool cg_all_unified(void) { return cg_unified(NULL); } -void cg_unified_flush(void) { +int cg_unified_flush(void) { unified_cache = CGROUP_UNIFIED_UNKNOWN; + + return cg_update_unified(); } int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) { _cleanup_free_ char *fs = NULL; CGroupController c; - int r, unified; + int r; assert(p); if (supported == 0) return 0; - unified = cg_all_unified(); - if (unified < 0) - return unified; - if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */ + if (!cg_all_unified()) /* on the legacy hiearchy there's no joining of controllers defined */ return 0; r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs); @@ -2359,14 +2316,13 @@ int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) { bool cg_is_unified_wanted(void) { static thread_local int wanted = -1; - int r, unified; + int r; bool b; /* If the hierarchy is already mounted, then follow whatever * was chosen for it. */ - unified = cg_all_unified(); - if (unified >= 0) - return unified; + if (cg_unified_flush() >= 0) + return cg_all_unified(); /* Otherwise, let's see what the kernel command line has to * say. Since checking that is expensive, let's cache the @@ -2387,7 +2343,7 @@ bool cg_is_legacy_wanted(void) { bool cg_is_unified_systemd_controller_wanted(void) { static thread_local int wanted = -1; - int r, unified; + int r; bool b; /* If the unified hierarchy is requested in full, no need to @@ -2397,9 +2353,8 @@ bool cg_is_unified_systemd_controller_wanted(void) { /* If the hierarchy is already mounted, then follow whatever * was chosen for it. */ - unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER); - if (unified >= 0) - return unified; + if (cg_unified_flush() >= 0) + return cg_unified(SYSTEMD_CGROUP_CONTROLLER); /* Otherwise, let's see what the kernel command line has to * say. Since checking that is expensive, let's cache the diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 0aa27c4cd7..09dcc2b38e 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -240,9 +240,9 @@ int cg_kernel_controllers(Set *controllers); bool cg_ns_supported(void); -int cg_all_unified(void); -int cg_unified(const char *controller); -void cg_unified_flush(void); +bool cg_all_unified(void); +bool cg_unified(const char *controller); +int cg_unified_flush(void); bool cg_is_unified_wanted(void); bool cg_is_legacy_wanted(void); diff --git a/src/cgls/cgls.c b/src/cgls/cgls.c index 5574c14555..40db82f9ae 100644 --- a/src/cgls/cgls.c +++ b/src/cgls/cgls.c @@ -158,7 +158,7 @@ static int parse_argv(int argc, char *argv[]) { static void show_cg_info(const char *controller, const char *path) { - if (cg_all_unified() <= 0 && controller && !streq(controller, SYSTEMD_CGROUP_CONTROLLER)) + if (!cg_all_unified() && controller && !streq(controller, SYSTEMD_CGROUP_CONTROLLER)) printf("Controller %s; ", controller); printf("Control group %s:\n", isempty(path) ? "/" : path); diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c index 50ac6a58b0..45c050c9c3 100644 --- a/src/cgtop/cgtop.c +++ b/src/cgtop/cgtop.c @@ -214,7 +214,7 @@ static int process( uint64_t new_usage; nsec_t timestamp; - if (cg_all_unified() > 0) { + if (cg_all_unified()) { const char *keys[] = { "usage_usec", NULL }; _cleanup_free_ char *val = NULL; @@ -274,7 +274,7 @@ static int process( } else if (streq(controller, "memory")) { _cleanup_free_ char *p = NULL, *v = NULL; - if (cg_all_unified() <= 0) + if (!cg_all_unified()) r = cg_get_path(controller, path, "memory.usage_in_bytes", &p); else r = cg_get_path(controller, path, "memory.current", &p); @@ -294,15 +294,14 @@ static int process( if (g->memory > 0) g->memory_valid = true; - } else if ((streq(controller, "io") && cg_all_unified() > 0) || - (streq(controller, "blkio") && cg_all_unified() <= 0)) { + } else if ((streq(controller, "io") && cg_all_unified()) || + (streq(controller, "blkio") && !cg_all_unified())) { _cleanup_fclose_ FILE *f = NULL; _cleanup_free_ char *p = NULL; - bool unified = cg_all_unified() > 0; uint64_t wr = 0, rd = 0; nsec_t timestamp; - r = cg_get_path(controller, path, unified ? "io.stat" : "blkio.io_service_bytes", &p); + r = cg_get_path(controller, path, cg_all_unified() ? "io.stat" : "blkio.io_service_bytes", &p); if (r < 0) return r; @@ -325,7 +324,7 @@ static int process( l += strcspn(l, WHITESPACE); l += strspn(l, WHITESPACE); - if (unified) { + if (cg_all_unified()) { while (!isempty(l)) { if (sscanf(l, "rbytes=%" SCNu64, &k)) rd += k; diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 5789e2aa82..17e0857729 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -678,7 +678,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { bool has_weight = cgroup_context_has_cpu_weight(c); bool has_shares = cgroup_context_has_cpu_shares(c); - if (cg_all_unified() > 0) { + if (cg_all_unified()) { uint64_t weight; if (has_weight) @@ -858,7 +858,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { } if ((mask & CGROUP_MASK_MEMORY) && !is_root) { - if (cg_all_unified() > 0) { + if (cg_all_unified()) { uint64_t max; uint64_t swap_max = CGROUP_LIMIT_MAX; @@ -1033,7 +1033,7 @@ CGroupMask unit_get_own_mask(Unit *u) { e = unit_get_exec_context(u); if (!e || exec_context_maintains_privileges(e) || - cg_all_unified() > 0) + cg_all_unified()) return _CGROUP_MASK_ALL; } @@ -1260,10 +1260,7 @@ int unit_watch_cgroup(Unit *u) { return 0; /* Only applies to the unified hierarchy */ - r = cg_unified(SYSTEMD_CGROUP_CONTROLLER); - if (r < 0) - return log_unit_error_errno(u, r, "Failed detect whether the unified hierarchy is used: %m"); - if (r == 0) + if (!cg_unified(SYSTEMD_CGROUP_CONTROLLER)) return 0; /* Don't watch the root slice, it's pointless. */ @@ -1683,7 +1680,7 @@ int unit_watch_all_pids(Unit *u) { if (!u->cgroup_path) return -ENOENT; - if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0) /* On unified we can use proper notifications */ + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER)) /* On unified we can use proper notifications */ return 0; return unit_watch_pids_in_path(u, u->cgroup_path); @@ -1756,7 +1753,7 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents, int manager_setup_cgroup(Manager *m) { _cleanup_free_ char *path = NULL; CGroupController c; - int r, all_unified, systemd_unified; + int r; char *e; assert(m); @@ -1793,16 +1790,13 @@ int manager_setup_cgroup(Manager *m) { if (r < 0) return log_error_errno(r, "Cannot find cgroup mount point: %m"); - all_unified = cg_all_unified(); - systemd_unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER); - - if (all_unified < 0 || systemd_unified < 0) - return log_error_errno(all_unified < 0 ? all_unified : systemd_unified, - "Couldn't determine if we are running in the unified hierarchy: %m"); + r = cg_unified_flush(); + if (r < 0) + return log_error_errno(r, "Couldn't determine if we are running in the unified hierarchy: %m"); - if (all_unified > 0) + if (cg_all_unified()) log_debug("Unified cgroup hierarchy is located at %s.", path); - else if (systemd_unified > 0) + else if (cg_unified(SYSTEMD_CGROUP_CONTROLLER)) log_debug("Unified cgroup hierarchy is located at %s. Controllers are on legacy hierarchies.", path); else log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path); @@ -1811,7 +1805,7 @@ int manager_setup_cgroup(Manager *m) { const char *scope_path; /* 3. Install agent */ - if (systemd_unified) { + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER)) { /* In the unified hierarchy we can get * cgroup empty notifications via inotify. */ @@ -1871,7 +1865,7 @@ int manager_setup_cgroup(Manager *m) { return log_error_errno(errno, "Failed to open pin file: %m"); /* 6. Always enable hierarchical support if it exists... */ - if (!all_unified) + if (!cg_all_unified()) (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1"); } @@ -1997,7 +1991,7 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) { if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0) return -ENODATA; - if (cg_all_unified() <= 0) + if (!cg_all_unified()) r = cg_get_attribute("memory", u->cgroup_path, "memory.usage_in_bytes", &v); else r = cg_get_attribute("memory", u->cgroup_path, "memory.current", &v); @@ -2042,7 +2036,7 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) { if (!u->cgroup_path) return -ENODATA; - if (cg_all_unified() > 0) { + if (cg_all_unified()) { const char *keys[] = { "usage_usec", NULL }; _cleanup_free_ char *val = NULL; uint64_t us; diff --git a/src/core/manager.c b/src/core/manager.c index b509adfc64..b2cd352a0c 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -775,7 +775,7 @@ static int manager_setup_cgroups_agent(Manager *m) { if (!MANAGER_IS_SYSTEM(m)) return 0; - if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0) /* We don't need this anymore on the unified hierarchy */ + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER)) /* We don't need this anymore on the unified hierarchy */ return 0; if (m->cgroups_agent_fd < 0) { diff --git a/src/core/scope.c b/src/core/scope.c index 9540fb67d9..5e068a76d1 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -475,7 +475,7 @@ static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) { /* If the PID set is empty now, then let's finish this off (On unified we use proper notifications) */ - if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) <= 0 && set_isempty(u->pids)) + if (!cg_unified(SYSTEMD_CGROUP_CONTROLLER) && set_isempty(u->pids)) scope_notify_cgroup_empty_event(u); } diff --git a/src/core/service.c b/src/core/service.c index 54074ff7bc..0c2eb18f38 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -2938,7 +2938,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { /* If the PID set is empty now, then let's finish this off (On unified we use proper notifications) */ - if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) <= 0 && set_isempty(u->pids)) + if (!cg_unified(SYSTEMD_CGROUP_CONTROLLER) && set_isempty(u->pids)) service_notify_cgroup_empty_event(u); } diff --git a/src/core/unit.c b/src/core/unit.c index bb05d2abfb..685df6f00d 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -3897,8 +3897,8 @@ int unit_kill_context( * there we get proper events. Hence rely on * them.*/ - if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0 || - (detect_container() == 0 && !unit_cgroup_delegate(u))) + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) || + (detect_container() == 0 && !unit_cgroup_delegate(u))) wait_for_exit = true; if (send_sighup) { diff --git a/src/libsystemd/sd-bus/test-bus-creds.c b/src/libsystemd/sd-bus/test-bus-creds.c index 6fdcfa4128..64bd76a576 100644 --- a/src/libsystemd/sd-bus/test-bus-creds.c +++ b/src/libsystemd/sd-bus/test-bus-creds.c @@ -31,7 +31,7 @@ int main(int argc, char *argv[]) { log_parse_environment(); log_open(); - if (cg_all_unified() == -ENOMEDIUM) { + if (cg_unified_flush() == -ENOMEDIUM) { log_info("Skipping test: /sys/fs/cgroup/ not available"); return EXIT_TEST_SKIP; } diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c index 5274767b96..4678a7e349 100644 --- a/src/nspawn/nspawn-cgroup.c +++ b/src/nspawn/nspawn-cgroup.c @@ -78,13 +78,9 @@ int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t arg_uid_shift) char tree[] = "/tmp/unifiedXXXXXX", pid_string[DECIMAL_STR_MAX(pid) + 1]; bool undo_mount = false; const char *fn; - int unified, r; - - unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER); - if (unified < 0) - return log_error_errno(unified, "Failed to determine whether the unified hierarchy is used: %m"); + int r; - if ((unified > 0) == (unified_requested >= CGROUP_UNIFIED_SYSTEMD)) + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) == (unified_requested >= CGROUP_UNIFIED_SYSTEMD)) return 0; /* When the host uses the legacy cgroup setup, but the @@ -100,7 +96,7 @@ int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t arg_uid_shift) if (!mkdtemp(tree)) return log_error_errno(errno, "Failed to generate temporary mount point for unified hierarchy: %m"); - if (unified) + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER)) r = mount_verbose(LOG_ERR, "cgroup", tree, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "none,name=systemd,xattr"); else @@ -142,7 +138,7 @@ finish: int create_subcgroup(pid_t pid, CGroupUnified unified_requested) { _cleanup_free_ char *cgroup = NULL; const char *child; - int unified, r; + int r; CGroupMask supported; /* In the unified hierarchy inner nodes may only contain @@ -154,10 +150,7 @@ int create_subcgroup(pid_t pid, CGroupUnified unified_requested) { if (unified_requested == CGROUP_UNIFIED_NONE) return 0; - unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER); - if (unified < 0) - return log_error_errno(unified, "Failed to determine whether the unified hierarchy is used: %m"); - if (unified == 0) + if (!cg_unified(SYSTEMD_CGROUP_CONTROLLER)) return 0; r = cg_mask_supported(&supported); diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index 4b2838b752..1493ef6aad 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -994,7 +994,7 @@ static int mount_legacy_cgns_supported( return r; } - if (cg_all_unified() > 0) + if (cg_all_unified()) goto skip_controllers; controllers = set_new(&string_hash_ops); @@ -1091,7 +1091,7 @@ static int mount_legacy_cgns_unsupported( return r; } - if (cg_all_unified() > 0) + if (cg_all_unified()) goto skip_controllers; controllers = set_new(&string_hash_ops); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index f5956bcb15..75b6728688 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -316,7 +316,7 @@ static int custom_mount_check_all(void) { static int detect_unified_cgroup_hierarchy(const char *directory) { const char *e; - int r, all_unified, systemd_unified; + int r; /* Allow the user to control whether the unified hierarchy is used */ e = getenv("UNIFIED_CGROUP_HIERARCHY"); @@ -332,15 +332,8 @@ static int detect_unified_cgroup_hierarchy(const char *directory) { return 0; } - all_unified = cg_all_unified(); - systemd_unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER); - - if (all_unified < 0 || systemd_unified < 0) - return log_error_errno(all_unified < 0 ? all_unified : systemd_unified, - "Failed to determine whether the unified cgroups hierarchy is used: %m"); - /* Otherwise inherit the default from the host system */ - if (all_unified > 0) { + if (cg_all_unified()) { /* Unified cgroup hierarchy support was added in 230. Unfortunately the detection * routine only detects 231, so we'll have a false negative here for 230. */ r = systemd_installation_has_version(directory, 230); @@ -350,7 +343,7 @@ static int detect_unified_cgroup_hierarchy(const char *directory) { arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_ALL; else arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE; - } else if (systemd_unified > 0) { + } else if (cg_unified(SYSTEMD_CGROUP_CONTROLLER)) { /* Mixed cgroup hierarchy support was added in 232 */ r = systemd_installation_has_version(directory, 232); if (r < 0) @@ -3533,7 +3526,10 @@ int main(int argc, char *argv[]) { log_parse_environment(); log_open(); - cg_unified_flush(); + + r = cg_unified_flush(); + if (r < 0) + return log_error_errno(r, "Failed to determine whether the unified cgroups hierarchy is used: %m"); /* Make sure rename_process() in the stub init process can work */ saved_argv = argv; -- cgit v1.2.3-54-g00ecf From b6629c4b9f82f5801af186e975b0220bf066ddbe Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2016 14:45:53 -0500 Subject: core: make SYSTEMD_CGROUP_CONTROLLER a special string SYSTEMD_CGROUP_CONTROLLER is currently defined as "name=systemd" which cgroup utility functions interpret as a named cgroup hierarchy with the specified named. With the planned cgroup hybrid mode changes, SYSTEMD_CGROUP_CONTROLLER would map to different hierarchy names. This patch makes SYSTEMD_CGROUP_CONTROLLER a special string "_systemd" which is substituted to "name=systemd" by the cgroup utility functions. This allows the callers to address the systemd hierarchy without actually specifying the hierarchy name allowing the cgroup utility functions to map it to whatever is appropriate. Note that SYSTEMD_CGROUP_CONTROLLER was already special on full unified cgroup hierarchy even before this patch. --- src/basic/cgroup-util.c | 20 ++++++++++++++++---- src/basic/def.h | 3 ++- src/core/cgroup.c | 2 +- 3 files changed, 19 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 48623fb235..67c2d3a681 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -542,6 +542,9 @@ static const char *controller_to_dirname(const char *controller) { * just cuts off the name= prefixed used for named * hierarchies, if it is specified. */ + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) + controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY; + e = startswith(controller, "name="); if (e) return e; @@ -912,7 +915,7 @@ int cg_get_xattr(const char *controller, const char *path, const char *name, voi int cg_pid_get_path(const char *controller, pid_t pid, char **path) { _cleanup_fclose_ FILE *f = NULL; char line[LINE_MAX]; - const char *fs; + const char *fs, *controller_str; size_t cs = 0; bool unified; @@ -926,8 +929,14 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path) { controller = SYSTEMD_CGROUP_CONTROLLER; unified = cg_unified(controller); - if (!unified) - cs = strlen(controller); + if (!unified) { + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) + controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY; + else + controller_str = controller; + + cs = strlen(controller_str); + } fs = procfs_file_alloca(pid, "cgroup"); f = fopen(fs, "re"); @@ -964,7 +973,7 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path) { *e = 0; FOREACH_WORD_SEPARATOR(word, k, l, ",", state) { - if (k == cs && memcmp(word, controller, cs) == 0) { + if (k == cs && memcmp(word, controller_str, cs) == 0) { found = true; break; } @@ -1810,6 +1819,9 @@ bool cg_controller_is_valid(const char *p) { if (!p) return false; + if (streq(p, SYSTEMD_CGROUP_CONTROLLER)) + return true; + s = startswith(p, "name="); if (s) p = s; diff --git a/src/basic/def.h b/src/basic/def.h index 2266eff650..3ba275f64b 100644 --- a/src/basic/def.h +++ b/src/basic/def.h @@ -36,7 +36,8 @@ /* The default value for the net.unix.max_dgram_qlen sysctl */ #define DEFAULT_UNIX_MAX_DGRAM_QLEN 512UL -#define SYSTEMD_CGROUP_CONTROLLER "name=systemd" +#define SYSTEMD_CGROUP_CONTROLLER_LEGACY "name=systemd" +#define SYSTEMD_CGROUP_CONTROLLER "_systemd" #define SIGNALS_CRASH_HANDLER SIGSEGV,SIGILL,SIGFPE,SIGBUS,SIGQUIT,SIGABRT #define SIGNALS_IGNORE SIGPIPE diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 17e0857729..fbb711782e 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -1799,7 +1799,7 @@ int manager_setup_cgroup(Manager *m) { else if (cg_unified(SYSTEMD_CGROUP_CONTROLLER)) log_debug("Unified cgroup hierarchy is located at %s. Controllers are on legacy hierarchies.", path); else - log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path); + log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER_LEGACY ". File system hierarchy is at %s.", path); if (!m->test_run) { const char *scope_path; -- cgit v1.2.3-54-g00ecf From 2dcb526d7a43cc4ac9493877ceb05810ff56dbae Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Mon, 20 Feb 2017 12:26:53 -0500 Subject: cgroup-util: fix the reversed return value of cg_is_unified_systemd_contoller_wanted 1d84ad944520fc3e062ef518c4db4e1 reversed the meaning of the option. The kernel command line option has the opposite meaning to the function, i.e. specifying "legacy=yes" means "unifed systemd controller=no". --- src/basic/cgroup-util.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 67c2d3a681..b5ca10a2de 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -2378,7 +2378,9 @@ bool cg_is_unified_systemd_controller_wanted(void) { if (r < 0) return false; - return (wanted = r > 0 ? b : false); + /* The meaning of the kernel option is reversed wrt. to the return value + * of this function, hence the negation. */ + return (wanted = r > 0 ? !b : false); } bool cg_is_legacy_systemd_controller_wanted(void) { -- cgit v1.2.3-54-g00ecf From 2977724b09eb997fc84a80517447b5d4a70770c7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2016 14:45:53 -0500 Subject: core: make hybrid cgroup unified mode keep compat /sys/fs/cgroup/systemd hierarchy Currently the hybrid mode mounts cgroup v2 on /sys/fs/cgroup instead of the v1 name=systemd hierarchy. While this works fine for systemd itself, it breaks tools which expect cgroup v1 hierarchy on /sys/fs/cgroup/systemd. This patch updates the hybrid mode so that it mounts v2 hierarchy on /sys/fs/cgroup/unified and keeps v1 "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility. systemd itself doesn't depend on the "name=systemd" hierarchy at all. All operations take place on the v2 hierarchy as before but the v1 hierarchy is kept in sync so that any tools which expect it to be there can keep doing so. This allows systemd to take advantage of cgroup v2 process management without requiring other tools to be aware of the hybrid mode. The hybrid mode is implemented by mapping the special systemd controller to /sys/fs/cgroup/unified and making the basic cgroup utility operations - cg_attach(), cg_create(), cg_rmdir() and cg_trim() - also operate on the /sys/fs/cgroup/systemd hierarchy whenever the cgroup2 hierarchy is updated. While a bit messy, this will allow dropping complications from using cgroup v1 for process management a lot sooner than otherwise possible which should make it a net gain in terms of maintainability. v2: Fixed !cgns breakage reported by @evverx and renamed the unified mount point to /sys/fs/cgroup/unified as suggested by @brauner. v3: chown the compat hierarchy too on delegation. Suggested by @evverx. v4: [zj] - drop the change to default, full "legacy" is still the default. --- src/basic/cgroup-util.c | 95 +++++++++++++++++++++++++++++++++++++---------- src/basic/cgroup-util.h | 2 +- src/basic/def.h | 1 + src/core/mount-setup.c | 6 +-- src/nspawn/nspawn-mount.c | 79 ++++++++++++++++++++++++--------------- src/nspawn/nspawn.c | 4 +- 6 files changed, 131 insertions(+), 56 deletions(-) (limited to 'src') diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index b5ca10a2de..6601f16e01 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -208,6 +208,12 @@ int cg_rmdir(const char *controller, const char *path) { if (r < 0 && errno != ENOENT) return -errno; + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) { + r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path); + if (r < 0) + log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path); + } + return 0; } @@ -542,8 +548,12 @@ static const char *controller_to_dirname(const char *controller) { * just cuts off the name= prefixed used for named * hierarchies, if it is specified. */ - if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) - controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY; + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + if (cg_hybrid_unified()) + controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID; + else + controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY; + } e = startswith(controller, "name="); if (e) @@ -703,7 +713,7 @@ static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct int cg_trim(const char *controller, const char *path, bool delete_root) { _cleanup_free_ char *fs = NULL; - int r = 0; + int r = 0, q; assert(path); @@ -726,6 +736,12 @@ int cg_trim(const char *controller, const char *path, bool delete_root) { return -errno; } + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) { + q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root); + if (q < 0) + log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path); + } + return r; } @@ -749,6 +765,12 @@ int cg_create(const char *controller, const char *path) { return -errno; } + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) { + r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path); + if (r < 0) + log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path); + } + return 1; } @@ -786,7 +808,17 @@ int cg_attach(const char *controller, const char *path, pid_t pid) { xsprintf(c, PID_FMT "\n", pid); - return write_string_file(fs, c, 0); + r = write_string_file(fs, c, 0); + if (r < 0) + return r; + + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) { + r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid); + if (r < 0) + log_warning_errno(r, "Failed to attach %d to compat systemd cgroup %s: %m", pid, path); + } + + return 0; } int cg_attach_fallback(const char *controller, const char *path, pid_t pid) { @@ -835,7 +867,17 @@ int cg_set_group_access( if (r < 0) return r; - return chmod_and_chown(fs, mode, uid, gid); + r = chmod_and_chown(fs, mode, uid, gid); + if (r < 0) + return r; + + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) { + r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid); + if (r < 0) + log_warning_errno(r, "Failed to set group access on compat systemd cgroup %s: %m", path); + } + + return 0; } int cg_set_task_access( @@ -864,13 +906,18 @@ int cg_set_task_access( if (r < 0) return r; - if (cg_unified(controller)) - return 0; + if (!cg_unified(controller)) { + /* Compatibility, Always keep values for "tasks" in sync with + * "cgroup.procs" */ + if (cg_get_path(controller, path, "tasks", &procs) >= 0) + (void) chmod_and_chown(procs, mode, uid, gid); + } - /* Compatibility, Always keep values for "tasks" in sync with - * "cgroup.procs" */ - if (cg_get_path(controller, path, "tasks", &procs) >= 0) - (void) chmod_and_chown(procs, mode, uid, gid); + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) { + r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid); + if (r < 0) + log_warning_errno(r, "Failed to set task access on compat systemd cgroup %s: %m", path); + } return 0; } @@ -2254,11 +2301,16 @@ static int cg_update_unified(void) { if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) unified_cache = CGROUP_UNIFIED_ALL; else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) { - if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) - return -errno; - - unified_cache = F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC) ? - CGROUP_UNIFIED_SYSTEMD : CGROUP_UNIFIED_NONE; + if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 && + F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) + unified_cache = CGROUP_UNIFIED_SYSTEMD; + else { + if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) + return -errno; + if (!F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) + return -ENOMEDIUM; + unified_cache = CGROUP_UNIFIED_NONE; + } } else return -ENOMEDIUM; @@ -2280,6 +2332,13 @@ bool cg_all_unified(void) { return cg_unified(NULL); } +bool cg_hybrid_unified(void) { + + assert(cg_update_unified() >= 0); + + return unified_cache == CGROUP_UNIFIED_SYSTEMD; +} + int cg_unified_flush(void) { unified_cache = CGROUP_UNIFIED_UNKNOWN; @@ -2383,10 +2442,6 @@ bool cg_is_unified_systemd_controller_wanted(void) { return (wanted = r > 0 ? !b : false); } -bool cg_is_legacy_systemd_controller_wanted(void) { - return cg_is_legacy_wanted() && !cg_is_unified_systemd_controller_wanted(); -} - int cg_weight_parse(const char *s, uint64_t *ret) { uint64_t u; int r; diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 09dcc2b38e..5a82de71fe 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -241,13 +241,13 @@ int cg_kernel_controllers(Set *controllers); bool cg_ns_supported(void); bool cg_all_unified(void); +bool cg_hybrid_unified(void); bool cg_unified(const char *controller); int cg_unified_flush(void); bool cg_is_unified_wanted(void); bool cg_is_legacy_wanted(void); bool cg_is_unified_systemd_controller_wanted(void); -bool cg_is_legacy_systemd_controller_wanted(void); const char* cgroup_controller_to_string(CGroupController c) _const_; CGroupController cgroup_controller_from_string(const char *s) _pure_; diff --git a/src/basic/def.h b/src/basic/def.h index 3ba275f64b..fbab066564 100644 --- a/src/basic/def.h +++ b/src/basic/def.h @@ -37,6 +37,7 @@ #define DEFAULT_UNIX_MAX_DGRAM_QLEN 512UL #define SYSTEMD_CGROUP_CONTROLLER_LEGACY "name=systemd" +#define SYSTEMD_CGROUP_CONTROLLER_HYBRID "name=unified" #define SYSTEMD_CGROUP_CONTROLLER "_systemd" #define SIGNALS_CRASH_HANDLER SIGSEGV,SIGILL,SIGFPE,SIGBUS,SIGQUIT,SIGABRT diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index 9c2bf3a0ef..4e2df01346 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -99,12 +99,12 @@ static const MountPoint mount_table[] = { cg_is_unified_wanted, MNT_FATAL|MNT_IN_CONTAINER }, { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, - { "cgroup", "/sys/fs/cgroup/systemd", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + { "cgroup", "/sys/fs/cgroup/unified", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, cg_is_unified_systemd_controller_wanted, MNT_IN_CONTAINER }, { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV, - cg_is_legacy_systemd_controller_wanted, MNT_IN_CONTAINER }, + cg_is_legacy_wanted, MNT_IN_CONTAINER }, { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV, - cg_is_legacy_systemd_controller_wanted, MNT_FATAL|MNT_IN_CONTAINER }, + cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL, MNT_NONE }, #ifdef ENABLE_EFI diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index 1493ef6aad..ed4f1f9db8 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -890,7 +890,7 @@ static int get_controllers(Set *subsystems) { *e = 0; - if (STR_IN_SET(l, "", "name=systemd")) + if (STR_IN_SET(l, "", "name=systemd", "name=unified")) continue; p = strdup(l); @@ -909,7 +909,6 @@ static int mount_legacy_cgroup_hierarchy( const char *dest, const char *controller, const char *hierarchy, - CGroupUnified unified_requested, bool read_only) { const char *to, *fstype, *opts; @@ -927,14 +926,12 @@ static int mount_legacy_cgroup_hierarchy( /* The superblock mount options of the mount point need to be * identical to the hosts', and hence writable... */ - if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { - if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { - fstype = "cgroup2"; - opts = NULL; - } else { - fstype = "cgroup"; - opts = "none,name=systemd,xattr"; - } + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_HYBRID)) { + fstype = "cgroup2"; + opts = NULL; + } else if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_LEGACY)) { + fstype = "cgroup"; + opts = "none,name=systemd,xattr"; } else { fstype = "cgroup"; opts = controller; @@ -1012,7 +1009,7 @@ static int mount_legacy_cgns_supported( if (!controller) break; - r = mount_legacy_cgroup_hierarchy("", controller, controller, unified_requested, !userns); + r = mount_legacy_cgroup_hierarchy("", controller, controller, !userns); if (r < 0) return r; @@ -1046,7 +1043,13 @@ static int mount_legacy_cgns_supported( } skip_controllers: - r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER, "systemd", unified_requested, false); + if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { + r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false); + if (r < 0) + return r; + } + + r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false); if (r < 0) return r; @@ -1117,7 +1120,7 @@ static int mount_legacy_cgns_unsupported( if (r == -EINVAL) { /* Not a symbolic link, but directly a single cgroup hierarchy */ - r = mount_legacy_cgroup_hierarchy(dest, controller, controller, unified_requested, true); + r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true); if (r < 0) return r; @@ -1137,7 +1140,7 @@ static int mount_legacy_cgns_unsupported( continue; } - r = mount_legacy_cgroup_hierarchy(dest, combined, combined, unified_requested, true); + r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true); if (r < 0) return r; @@ -1150,7 +1153,13 @@ static int mount_legacy_cgns_unsupported( } skip_controllers: - r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER, "systemd", unified_requested, false); + if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { + r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false); + if (r < 0) + return r; + } + + r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false); if (r < 0) return r; @@ -1202,12 +1211,25 @@ int mount_cgroups( return mount_legacy_cgns_unsupported(dest, unified_requested, userns, uid_shift, uid_range, selinux_apifs_context); } +static int mount_systemd_cgroup_writable_one(const char *systemd_own, const char *systemd_root) +{ + int r; + + /* Make our own cgroup a (writable) bind mount */ + r = mount_verbose(LOG_ERR, systemd_own, systemd_own, NULL, MS_BIND, NULL); + if (r < 0) + return r; + + /* And then remount the systemd cgroup root read-only */ + return mount_verbose(LOG_ERR, NULL, systemd_root, NULL, + MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL); +} + int mount_systemd_cgroup_writable( const char *dest, CGroupUnified unified_requested) { _cleanup_free_ char *own_cgroup_path = NULL; - const char *systemd_root, *systemd_own; int r; assert(dest); @@ -1220,22 +1242,19 @@ int mount_systemd_cgroup_writable( if (path_equal(own_cgroup_path, "/")) return 0; - if (unified_requested >= CGROUP_UNIFIED_ALL) { - systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path); - systemd_root = prefix_roota(dest, "/sys/fs/cgroup"); - } else { - systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path); - systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd"); - } + if (unified_requested >= CGROUP_UNIFIED_ALL) + return mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup", own_cgroup_path), + prefix_roota(dest, "/sys/fs/cgroup")); - /* Make our own cgroup a (writable) bind mount */ - r = mount_verbose(LOG_ERR, systemd_own, systemd_own, NULL, MS_BIND, NULL); - if (r < 0) - return r; + if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { + r = mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup/unified", own_cgroup_path), + prefix_roota(dest, "/sys/fs/cgroup/unified")); + if (r < 0) + return r; + } - /* And then remount the systemd cgroup root read-only */ - return mount_verbose(LOG_ERR, NULL, systemd_root, NULL, - MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL); + return mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path), + prefix_roota(dest, "/sys/fs/cgroup/systemd")); } int setup_volatile_state( diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 75b6728688..42355115ff 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -344,8 +344,8 @@ static int detect_unified_cgroup_hierarchy(const char *directory) { else arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE; } else if (cg_unified(SYSTEMD_CGROUP_CONTROLLER)) { - /* Mixed cgroup hierarchy support was added in 232 */ - r = systemd_installation_has_version(directory, 232); + /* Mixed cgroup hierarchy support was added in 233 */ + r = systemd_installation_has_version(directory, 233); if (r < 0) return log_error_errno(r, "Failed to determine systemd version in container: %m"); if (r > 0) -- cgit v1.2.3-54-g00ecf From f08e9287200772c9a201d9f988dd78ffe2fc7ea4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Nov 2016 12:27:32 -0500 Subject: core: keep supporting cgroup hybrid layout from v232 for live upgrades v232's cgroup hybrid mode mounted v2 on /sys/fs/cgroup/systemd, which unfortunately broke other tools which expect v1 there. From v233 on, hybrid mode instead mounts and uses v2 on /sys/fs/cgroup/unified and keeps /sys/fs/cgroup/systemd on v1 for compatibility with external tools. However, to keep systemd live upgrades working, v233+ should be able to recognize v232 layout and keep using it. This patch adds v232 hybrid mode support. If v232 layout is detected, cg_unified(SYSTEMD_CGRouP_CONTROLLER) keeps returning %true but cg_hybrid_unified() returns %false. This keeps process management on cgroup v2 but turns off the parallel layout. --- src/basic/cgroup-util.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 6601f16e01..9208873d9c 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -2283,6 +2283,20 @@ int cg_kernel_controllers(Set *controllers) { static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN; +/* The hybrid mode was initially implemented in v232 and simply mounted + * cgroup v2 on /sys/fs/cgroup/systemd. This unfortunately broke other + * tools (such as docker) which expected the v1 "name=systemd" hierarchy + * on /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mountnbs + * v2 on /sys/fs/cgroup/unified and maintains "name=systemd" hierarchy + * on /sys/fs/cgroup/systemd for compatibility with other tools. + * + * To keep live upgrade working, we detect and support v232 layout. When + * v232 layout is detected, to keep cgroup v2 process management but + * disable the compat dual layout, we return %true on + * cg_unified(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified(). + */ +static thread_local bool unified_systemd_v232; + static int cg_update_unified(void) { struct statfs fs; @@ -2302,9 +2316,14 @@ static int cg_update_unified(void) { unified_cache = CGROUP_UNIFIED_ALL; else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) { if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 && - F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) + F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { unified_cache = CGROUP_UNIFIED_SYSTEMD; - else { + unified_systemd_v232 = false; + } else if (statfs("/sys/fs/cgroup/systemd/", &fs) == 0 && + F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { + unified_cache = CGROUP_UNIFIED_SYSTEMD; + unified_systemd_v232 = true; + } else { if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) return -errno; if (!F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) @@ -2336,7 +2355,7 @@ bool cg_hybrid_unified(void) { assert(cg_update_unified() >= 0); - return unified_cache == CGROUP_UNIFIED_SYSTEMD; + return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232; } int cg_unified_flush(void) { -- cgit v1.2.3-54-g00ecf From 77fab2a91c801985bd8efb293319935a3cdc1dcf Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Sat, 18 Feb 2017 23:13:15 -0500 Subject: pid1: add ./configure switch to select default cgroup hierarchy The default default is set to "legacy", with "hybrid" and "unified" being the other two alternatives. There invert the behaviour for systemd.legacy_systemd_cgroup_controller: if it is not specified on the kernel command line, "hybrid" is used if selected as the default. If this option is specified, "hybrid" is used if false, and full "legacy" if true. Also make all fields in the configure summary lowercase (unless they are capitalized names) for consistency. v2: - update for the fixed interpreation of systemd.legacy_systemd_cgroup_controller --- configure.ac | 50 ++++++++++++++++++++++++++++++++----------------- src/basic/cgroup-util.c | 22 ++++++++++++---------- 2 files changed, 45 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/configure.ac b/configure.ac index ef8a8087af..66ecde1a22 100644 --- a/configure.ac +++ b/configure.ac @@ -612,6 +612,21 @@ AC_ARG_WITH([fallback-hostname], AC_SUBST(FALLBACK_HOSTNAME) AC_DEFINE_UNQUOTED(FALLBACK_HOSTNAME, ["$FALLBACK_HOSTNAME"], [The hostname used if none configured]) +# ------------------------------------------------------------------------------ + +AC_ARG_WITH(default-hierarchy, + AS_HELP_STRING([--with-default-hierarchy=MODE], + [default cgroup hierarchy, defaults to "legacy"]), + [DEFAULT_HIERARCHY="$withval"], + [DEFAULT_HIERARCHY="legacy"]) + +AS_CASE("$DEFAULT_HIERARCHY", + [legacy], [mode=CGROUP_UNIFIED_NONE], + [hybrid], [mode=CGROUP_UNIFIED_SYSTEMD], + [unified], [mode=CGROUP_UNIFIED_ALL], + AC_MSG_ERROR(Bad default hierarchy mode ${DEFAULT_HIERARCHY})) +AC_DEFINE_UNQUOTED(DEFAULT_HIERARCHY, [$mode], [Default cgroup hierarchy]) + # ------------------------------------------------------------------------------ have_xz=no AC_ARG_ENABLE(xz, AS_HELP_STRING([--disable-xz], [disable optional XZ support])) @@ -1658,19 +1673,20 @@ AC_MSG_RESULT([ backlight: ${have_backlight} rfkill: ${have_rfkill} logind: ${have_logind} - Default KillUserProcesses setting: ${KILL_USER_PROCESSES} + default cgroup hierarchy: ${DEFAULT_HIERARCHY} + default KillUserProcesses setting: ${KILL_USER_PROCESSES} machined: ${have_machined} importd: ${have_importd} hostnamed: ${have_hostnamed} timedated: ${have_timedated} timesyncd: ${have_timesyncd} - Default NTP servers: ${NTP_SERVERS} + default NTP servers: ${NTP_SERVERS} time epoch: ${TIME_EPOCH} localed: ${have_localed} networkd: ${have_networkd} resolved: ${have_resolved} - Default DNS servers: ${DNS_SERVERS} - Default DNSSEC mode: ${DEFAULT_DNSSEC_MODE} + default DNS servers: ${DNS_SERVERS} + default DNSSEC mode: ${DEFAULT_DNSSEC_MODE} coredump: ${have_coredump} polkit: ${have_polkit} efi: ${have_efi} @@ -1709,27 +1725,27 @@ AC_MSG_RESULT([ rootlib dir: ${with_rootlibdir} SysV init scripts: ${SYSTEM_SYSVINIT_PATH} SysV rc?.d directories: ${SYSTEM_SYSVRCND_PATH} - Build Python: ${PYTHON} + build Python: ${PYTHON} PAM modules dir: ${with_pamlibdir} PAM configuration dir: ${with_pamconfdir} D-Bus policy dir: ${with_dbuspolicydir} D-Bus session dir: ${with_dbussessionservicedir} D-Bus system dir: ${with_dbussystemservicedir} - Bash completions dir: ${with_bashcompletiondir} - Zsh completions dir: ${with_zshcompletiondir} - Extra start script: ${RC_LOCAL_SCRIPT_PATH_START} - Extra stop script: ${RC_LOCAL_SCRIPT_PATH_STOP} - Adm group: ${have_adm_group} - Wheel group: ${have_wheel_group} - Debug shell: ${SUSHELL} @ ${DEBUGTTY} + bash completions dir: ${with_bashcompletiondir} + zsh completions dir: ${with_zshcompletiondir} + extra start script: ${RC_LOCAL_SCRIPT_PATH_START} + extra stop script: ${RC_LOCAL_SCRIPT_PATH_STOP} + adm group: ${have_adm_group} + wheel group: ${have_wheel_group} + debug shell: ${SUSHELL} @ ${DEBUGTTY} TTY GID: ${TTY_GID} - Maximum system UID: ${SYSTEM_UID_MAX} - Maximum system GID: ${SYSTEM_GID_MAX} - Certificate root: ${CERTIFICATEROOT} - Support URL: ${SUPPORT_URL} + maximum system UID: ${SYSTEM_UID_MAX} + maximum system GID: ${SYSTEM_GID_MAX} + certificate root: ${CERTIFICATEROOT} + support URL: ${SUPPORT_URL} nobody user name: ${NOBODY_USER_NAME} nobody group name: ${NOBODY_GROUP_NAME} - Fallback hostname: ${FALLBACK_HOSTNAME} + fallback hostname: ${FALLBACK_HOSTNAME} CFLAGS: ${OUR_CFLAGS} ${CFLAGS} CPPFLAGS: ${OUR_CPPFLAGS} ${CPPFLAGS} diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 9208873d9c..f80b6cacfb 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -2408,23 +2408,24 @@ bool cg_is_unified_wanted(void) { static thread_local int wanted = -1; int r; bool b; + const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL; /* If the hierarchy is already mounted, then follow whatever * was chosen for it. */ if (cg_unified_flush() >= 0) return cg_all_unified(); - /* Otherwise, let's see what the kernel command line has to - * say. Since checking that is expensive, let's cache the - * result. */ + /* If we have a cached value, return that. */ if (wanted >= 0) return wanted; + /* Otherwise, let's see what the kernel command line has to say. + * Since checking is expensive, cache a non-error result. */ r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b); if (r < 0) - return false; + return is_default; - return (wanted = r > 0 ? b : false); + return (wanted = r > 0 ? b : is_default); } bool cg_is_legacy_wanted(void) { @@ -2435,6 +2436,7 @@ bool cg_is_unified_systemd_controller_wanted(void) { static thread_local int wanted = -1; int r; bool b; + const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_SYSTEMD; /* If the unified hierarchy is requested in full, no need to * bother with this. */ @@ -2446,19 +2448,19 @@ bool cg_is_unified_systemd_controller_wanted(void) { if (cg_unified_flush() >= 0) return cg_unified(SYSTEMD_CGROUP_CONTROLLER); - /* Otherwise, let's see what the kernel command line has to - * say. Since checking that is expensive, let's cache the - * result. */ + /* If we have a cached value, return that. */ if (wanted >= 0) return wanted; + /* Otherwise, let's see what the kernel command line has to say. + * Since checking is expensive, cache a non-error result. */ r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b); if (r < 0) - return false; + return is_default; /* The meaning of the kernel option is reversed wrt. to the return value * of this function, hence the negation. */ - return (wanted = r > 0 ? !b : false); + return (wanted = r > 0 ? !b : is_default); } int cg_weight_parse(const char *s, uint64_t *ret) { -- cgit v1.2.3-54-g00ecf From 5a94b18752befd7cfb9961cad2b5dc4415036027 Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Sat, 18 Feb 2017 23:28:01 -0500 Subject: build.h: include default cgroup hierarchy setting in --version output This is pretty important, and we print this string during startup, so putting the default hierarchy information might help with diagnosis if things go awry. $ ./systemctl --version systemd 232 +PAM +AUDIT +SELINUX +IMA -APPARMOR +SMACK +SYSVINIT +UTMP +LIBCRYPTSETUP +GCRYPT +GNUTLS +ACL +XZ +LZ4 +SECCOMP +BLKID +ELFUTILS +KMOD +IDN default-hierarchy=legacy v2: make the message nicer by including the ./configure option argument directly in output --- configure.ac | 2 ++ src/basic/build.h | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/configure.ac b/configure.ac index 66ecde1a22..f671d7e1b5 100644 --- a/configure.ac +++ b/configure.ac @@ -626,6 +626,8 @@ AS_CASE("$DEFAULT_HIERARCHY", [unified], [mode=CGROUP_UNIFIED_ALL], AC_MSG_ERROR(Bad default hierarchy mode ${DEFAULT_HIERARCHY})) AC_DEFINE_UNQUOTED(DEFAULT_HIERARCHY, [$mode], [Default cgroup hierarchy]) +AC_DEFINE_UNQUOTED(DEFAULT_HIERARCHY_NAME, ["$DEFAULT_HIERARCHY"], + [Default cgroup hierarchy as string]) # ------------------------------------------------------------------------------ have_xz=no diff --git a/src/basic/build.h b/src/basic/build.h index 633c2aaccb..91312bd2a3 100644 --- a/src/basic/build.h +++ b/src/basic/build.h @@ -133,6 +133,8 @@ #define _IDN_FEATURE_ "-IDN" #endif +#define _CGROUP_HIEARCHY_ "default-hierarchy=" DEFAULT_HIERARCHY_NAME + #define SYSTEMD_FEATURES \ _PAM_FEATURE_ " " \ _AUDIT_FEATURE_ " " \ @@ -152,4 +154,5 @@ _BLKID_FEATURE_ " " \ _ELFUTILS_FEATURE_ " " \ _KMOD_FEATURE_ " " \ - _IDN_FEATURE_ + _IDN_FEATURE_ " " \ + _CGROUP_HIEARCHY_ -- cgit v1.2.3-54-g00ecf From a4464b952238cdcdab64f3dadb288700dcd25065 Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Sun, 19 Feb 2017 15:36:56 -0500 Subject: Rename cg_is_unified_systemd_controller_wanted to cg_is_hybrid_wanted Less typing and doesn't make the table so incredibly wide. --- src/basic/cgroup-util.c | 2 +- src/basic/cgroup-util.h | 2 +- src/core/mount-setup.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index f80b6cacfb..0b8bda89ea 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -2432,7 +2432,7 @@ bool cg_is_legacy_wanted(void) { return !cg_is_unified_wanted(); } -bool cg_is_unified_systemd_controller_wanted(void) { +bool cg_is_hybrid_wanted(void) { static thread_local int wanted = -1; int r; bool b; diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 5a82de71fe..3d14ec3d9d 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -247,7 +247,7 @@ int cg_unified_flush(void); bool cg_is_unified_wanted(void); bool cg_is_legacy_wanted(void); -bool cg_is_unified_systemd_controller_wanted(void); +bool cg_is_hybrid_wanted(void); const char* cgroup_controller_to_string(CGroupController c) _const_; CGroupController cgroup_controller_from_string(const char *s) _pure_; diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index 4e2df01346..c107aa0a54 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -100,7 +100,7 @@ static const MountPoint mount_table[] = { { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, { "cgroup", "/sys/fs/cgroup/unified", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, - cg_is_unified_systemd_controller_wanted, MNT_IN_CONTAINER }, + cg_is_hybrid_wanted, MNT_IN_CONTAINER }, { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV, cg_is_legacy_wanted, MNT_IN_CONTAINER }, { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV, -- cgit v1.2.3-54-g00ecf From 1b59cf04aee20525179f81928f1e1794ce970551 Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Sun, 19 Feb 2017 15:59:21 -0500 Subject: core/mount-setup: if unified hierarchy is not supported, fall back to legacy We need this to gracefully support older or strangely configured kernels. v2: - do not install a callback handler, just embed the right conditions into cg_is_*_wanted() v3: - fix bug in cg_is_legacy_wanted() --- src/basic/cgroup-util.c | 14 ++++++++------ src/core/mount-setup.c | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 0b8bda89ea..8d60ded5fe 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -2429,7 +2429,14 @@ bool cg_is_unified_wanted(void) { } bool cg_is_legacy_wanted(void) { - return !cg_is_unified_wanted(); + /* Check if we have cgroups2 already mounted. */ + if (cg_unified_flush() >= 0 && + unified_cache == CGROUP_UNIFIED_ALL) + return false; + + /* Otherwise, assume that at least partial legacy is wanted, + * since cgroups2 should already be mounted at this point. */ + return true; } bool cg_is_hybrid_wanted(void) { @@ -2438,11 +2445,6 @@ bool cg_is_hybrid_wanted(void) { bool b; const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_SYSTEMD; - /* If the unified hierarchy is requested in full, no need to - * bother with this. */ - if (cg_is_unified_wanted()) - return 0; - /* If the hierarchy is already mounted, then follow whatever * was chosen for it. */ if (cg_unified_flush() >= 0) diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index c107aa0a54..7295efbf31 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -96,7 +96,7 @@ static const MountPoint mount_table[] = { { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, NULL, MNT_FATAL|MNT_IN_CONTAINER }, { "cgroup", "/sys/fs/cgroup", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, - cg_is_unified_wanted, MNT_FATAL|MNT_IN_CONTAINER }, + cg_is_unified_wanted, MNT_IN_CONTAINER }, { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, { "cgroup", "/sys/fs/cgroup/unified", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, -- cgit v1.2.3-54-g00ecf From 239a3d09547a32c21e9b9b22499991781c15438e Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Wed, 22 Feb 2017 12:57:43 -0500 Subject: cgroup-util: cache all cg_is_*_wanted answers, disable /sys/fs/cgroups/unified on unified If we encounter an error in proc cmdline parsing, just treat that as permanent, i.e. the same as if the option was not specified. Realistically, it is better to use the same condition for all related mounts, then to have e.g. /sys/fs/cgroup mounted and /sys/fs/cgroup/unified not. If we find something is mounted and base our answer on that, cache that result too. Fix the conditions so that if "unified" is used, make sure any "hybrid" mounts are not mounted. --- src/basic/cgroup-util.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 8d60ded5fe..3222428649 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -2410,33 +2410,37 @@ bool cg_is_unified_wanted(void) { bool b; const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL; - /* If the hierarchy is already mounted, then follow whatever - * was chosen for it. */ - if (cg_unified_flush() >= 0) - return cg_all_unified(); - /* If we have a cached value, return that. */ if (wanted >= 0) return wanted; + /* If the hierarchy is already mounted, then follow whatever + * was chosen for it. */ + if (cg_unified_flush() >= 0) + return (wanted = cg_all_unified()); + /* Otherwise, let's see what the kernel command line has to say. * Since checking is expensive, cache a non-error result. */ r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b); - if (r < 0) - return is_default; return (wanted = r > 0 ? b : is_default); } bool cg_is_legacy_wanted(void) { + static thread_local int wanted = -1; + + /* If we have a cached value, return that. */ + if (wanted >= 0) + return wanted; + /* Check if we have cgroups2 already mounted. */ if (cg_unified_flush() >= 0 && unified_cache == CGROUP_UNIFIED_ALL) - return false; + return (wanted = false); /* Otherwise, assume that at least partial legacy is wanted, * since cgroups2 should already be mounted at this point. */ - return true; + return (wanted = true); } bool cg_is_hybrid_wanted(void) { @@ -2445,20 +2449,19 @@ bool cg_is_hybrid_wanted(void) { bool b; const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_SYSTEMD; - /* If the hierarchy is already mounted, then follow whatever - * was chosen for it. */ - if (cg_unified_flush() >= 0) - return cg_unified(SYSTEMD_CGROUP_CONTROLLER); - /* If we have a cached value, return that. */ if (wanted >= 0) return wanted; + /* If the hierarchy is already mounted, then follow whatever + * was chosen for it. */ + if (cg_unified_flush() >= 0 && + unified_cache == CGROUP_UNIFIED_ALL) + return (wanted = false); + /* Otherwise, let's see what the kernel command line has to say. * Since checking is expensive, cache a non-error result. */ r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b); - if (r < 0) - return is_default; /* The meaning of the kernel option is reversed wrt. to the return value * of this function, hence the negation. */ -- cgit v1.2.3-54-g00ecf From c19739db9ec705b4afdad32301e8750b5e02c7d3 Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Wed, 22 Feb 2017 19:55:31 -0500 Subject: cgroup-util: fix the case of default=unified, unified-cgroup-hierarchy=0 We should mount the hybrid hierarchy if the user disabled the unified hierarchy on the kernel command line. --- src/basic/cgroup-util.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 3222428649..f76b7f47e5 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -2447,7 +2447,10 @@ bool cg_is_hybrid_wanted(void) { static thread_local int wanted = -1; int r; bool b; - const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_SYSTEMD; + const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD; + /* We default to true if the default is "hybrid", obviously, + * but also when the default is "unified", because if we get + * called, it means that unified hierarchy was not mounted. */ /* If we have a cached value, return that. */ if (wanted >= 0) -- cgit v1.2.3-54-g00ecf From 0a05dcc09dd723dcf017bf42432d33e58ec56a1d Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Wed, 22 Feb 2017 19:57:11 -0500 Subject: test-cgroup-util: add a "test" to print out cg_is_*_wanted() values This isn't terribly useful because /sys/fs/cgroup will usually be mounted. But it at least allows checking if the values make sense in this case. --- src/test/test-cgroup-util.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'src') diff --git a/src/test/test-cgroup-util.c b/src/test/test-cgroup-util.c index c60fb631fa..30cd463722 100644 --- a/src/test/test-cgroup-util.c +++ b/src/test/test-cgroup-util.c @@ -18,11 +18,13 @@ ***/ #include "alloc-util.h" +#include "build.h" #include "cgroup-util.h" #include "dirent-util.h" #include "fd-util.h" #include "format-util.h" #include "parse-util.h" +#include "proc-cmdline.h" #include "process-util.h" #include "stat-util.h" #include "string-util.h" @@ -332,7 +334,49 @@ static void test_fd_is_cgroup_fs(void) { fd = safe_close(fd); } +static void test_is_wanted_print(bool header) { + _cleanup_free_ char *cmdline = NULL; + + log_info("-- %s --", __func__); + assert_se(proc_cmdline(&cmdline) >= 0); + log_info("cmdline: %s", cmdline); + if (header) { + + log_info(_CGROUP_HIEARCHY_); + (void) system("findmnt -n /sys/fs/cgroup"); + } + + log_info("is_unified_wanted() → %s", yes_no(cg_is_unified_wanted())); + log_info("is_hybrid_wanted() → %s", yes_no(cg_is_hybrid_wanted())); + log_info("is_legacy_wanted() → %s", yes_no(cg_is_legacy_wanted())); + log_info(" "); +} + +static void test_is_wanted(void) { + assert_se(setenv("SYSTEMD_PROC_CMDLINE", + "systemd.unified_cgroup_hierarchy", 1) >= 0); + test_is_wanted_print(false); + + assert_se(setenv("SYSTEMD_PROC_CMDLINE", + "systemd.unified_cgroup_hierarchy=0", 1) >= 0); + test_is_wanted_print(false); + + assert_se(setenv("SYSTEMD_PROC_CMDLINE", + "systemd.unified_cgroup_hierarchy=0 " + "systemd.legacy_systemd_cgroup_controller", 1) >= 0); + test_is_wanted_print(false); + + assert_se(setenv("SYSTEMD_PROC_CMDLINE", + "systemd.unified_cgroup_hierarchy=0 " + "systemd.legacy_systemd_cgroup_controller=0", 1) >= 0); + test_is_wanted_print(false); +} + int main(void) { + log_set_max_level(LOG_DEBUG); + log_parse_environment(); + log_open(); + test_path_decode_unit(); test_path_get_unit(); test_path_get_user_unit(); @@ -349,6 +393,9 @@ int main(void) { TEST_REQ_RUNNING_SYSTEMD(test_mask_supported()); TEST_REQ_RUNNING_SYSTEMD(test_is_cgroup_fs()); TEST_REQ_RUNNING_SYSTEMD(test_fd_is_cgroup_fs()); + test_is_wanted_print(true); + test_is_wanted_print(false); /* run twice to test caching */ + test_is_wanted(); return 0; } -- cgit v1.2.3-54-g00ecf