diff options
-rw-r--r-- | configure.ac | 52 | ||||
-rw-r--r-- | man/kernel-command-line.xml | 16 | ||||
-rw-r--r-- | man/systemd.xml | 148 | ||||
-rw-r--r-- | src/basic/build.h | 5 | ||||
-rw-r--r-- | src/basic/cgroup-util.c | 305 | ||||
-rw-r--r-- | src/basic/cgroup-util.h | 10 | ||||
-rw-r--r-- | src/basic/def.h | 4 | ||||
-rw-r--r-- | src/cgls/cgls.c | 2 | ||||
-rw-r--r-- | src/cgtop/cgtop.c | 13 | ||||
-rw-r--r-- | src/core/cgroup.c | 38 | ||||
-rw-r--r-- | src/core/manager.c | 2 | ||||
-rw-r--r-- | src/core/mount-setup.c | 10 | ||||
-rw-r--r-- | src/core/scope.c | 2 | ||||
-rw-r--r-- | src/core/service.c | 2 | ||||
-rw-r--r-- | src/core/unit.c | 4 | ||||
-rw-r--r-- | src/libsystemd/sd-bus/test-bus-creds.c | 2 | ||||
-rw-r--r-- | src/nspawn/nspawn-cgroup.c | 17 | ||||
-rw-r--r-- | src/nspawn/nspawn-mount.c | 83 | ||||
-rw-r--r-- | src/nspawn/nspawn.c | 33 | ||||
-rw-r--r-- | src/test/test-cgroup-util.c | 47 |
20 files changed, 480 insertions, 315 deletions
diff --git a/configure.ac b/configure.ac index 156511851c..d98cc4d9ba 100644 --- a/configure.ac +++ b/configure.ac @@ -613,6 +613,23 @@ AC_SUBST(FALLBACK_HOSTNAME) AC_DEFINE_UNQUOTED(FALLBACK_HOSTNAME, ["$FALLBACK_HOSTNAME"], [The hostname used if none configured]) # ------------------------------------------------------------------------------ + +AC_ARG_WITH(default-hierarchy, + AS_HELP_STRING([--with-default-hierarchy=MODE], + [default cgroup hierarchy, defaults to "hybrid"]), + [DEFAULT_HIERARCHY="$withval"], + [DEFAULT_HIERARCHY="hybrid"]) + +AS_CASE("$DEFAULT_HIERARCHY", + [legacy], [mode=CGROUP_UNIFIED_NONE], + [hybrid], [mode=CGROUP_UNIFIED_SYSTEMD], + [unified], [mode=CGROUP_UNIFIED_ALL], + AC_MSG_ERROR(Bad default hierarchy mode ${DEFAULT_HIERARCHY})) +AC_DEFINE_UNQUOTED(DEFAULT_HIERARCHY, [$mode], [Default cgroup hierarchy]) +AC_DEFINE_UNQUOTED(DEFAULT_HIERARCHY_NAME, ["$DEFAULT_HIERARCHY"], + [Default cgroup hierarchy as string]) + +# ------------------------------------------------------------------------------ have_xz=no AC_ARG_ENABLE(xz, AS_HELP_STRING([--disable-xz], [disable optional XZ support])) AS_IF([test "x$enable_xz" != "xno"], [ @@ -1667,19 +1684,20 @@ AC_MSG_RESULT([ backlight: ${have_backlight} rfkill: ${have_rfkill} logind: ${have_logind} - Default KillUserProcesses setting: ${KILL_USER_PROCESSES} + default cgroup hierarchy: ${DEFAULT_HIERARCHY} + default KillUserProcesses setting: ${KILL_USER_PROCESSES} machined: ${have_machined} importd: ${have_importd} hostnamed: ${have_hostnamed} timedated: ${have_timedated} timesyncd: ${have_timesyncd} - Default NTP servers: ${NTP_SERVERS} + default NTP servers: ${NTP_SERVERS} time epoch: ${TIME_EPOCH} localed: ${have_localed} networkd: ${have_networkd} resolved: ${have_resolved} - Default DNS servers: ${DNS_SERVERS} - Default DNSSEC mode: ${DEFAULT_DNSSEC_MODE} + default DNS servers: ${DNS_SERVERS} + default DNSSEC mode: ${DEFAULT_DNSSEC_MODE} coredump: ${have_coredump} polkit: ${have_polkit} efi: ${have_efi} @@ -1718,27 +1736,27 @@ AC_MSG_RESULT([ rootlib dir: ${with_rootlibdir} SysV init scripts: ${SYSTEM_SYSVINIT_PATH} SysV rc?.d directories: ${SYSTEM_SYSVRCND_PATH} - Build Python: ${PYTHON} + build Python: ${PYTHON} PAM modules dir: ${with_pamlibdir} PAM configuration dir: ${with_pamconfdir} D-Bus policy dir: ${with_dbuspolicydir} D-Bus session dir: ${with_dbussessionservicedir} D-Bus system dir: ${with_dbussystemservicedir} - Bash completions dir: ${with_bashcompletiondir} - Zsh completions dir: ${with_zshcompletiondir} - Extra start script: ${RC_LOCAL_SCRIPT_PATH_START} - Extra stop script: ${RC_LOCAL_SCRIPT_PATH_STOP} - Adm group: ${have_adm_group} - Wheel group: ${have_wheel_group} - Debug shell: ${SUSHELL} @ ${DEBUGTTY} + bash completions dir: ${with_bashcompletiondir} + zsh completions dir: ${with_zshcompletiondir} + extra start script: ${RC_LOCAL_SCRIPT_PATH_START} + extra stop script: ${RC_LOCAL_SCRIPT_PATH_STOP} + adm group: ${have_adm_group} + wheel group: ${have_wheel_group} + debug shell: ${SUSHELL} @ ${DEBUGTTY} TTY GID: ${TTY_GID} - Maximum system UID: ${SYSTEM_UID_MAX} - Maximum system GID: ${SYSTEM_GID_MAX} - Certificate root: ${CERTIFICATEROOT} - Support URL: ${SUPPORT_URL} + maximum system UID: ${SYSTEM_UID_MAX} + maximum system GID: ${SYSTEM_GID_MAX} + certificate root: ${CERTIFICATEROOT} + support URL: ${SUPPORT_URL} nobody user name: ${NOBODY_USER_NAME} nobody group name: ${NOBODY_GROUP_NAME} - Fallback hostname: ${FALLBACK_HOSTNAME} + fallback hostname: ${FALLBACK_HOSTNAME} CFLAGS: ${OUR_CFLAGS} ${CFLAGS} CPPFLAGS: ${OUR_CPPFLAGS} ${CPPFLAGS} diff --git a/man/kernel-command-line.xml b/man/kernel-command-line.xml index 415b8d3cf9..f02ca3e7bc 100644 --- a/man/kernel-command-line.xml +++ b/man/kernel-command-line.xml @@ -78,20 +78,22 @@ <varlistentry> <term><varname>systemd.unit=</varname></term> <term><varname>rd.systemd.unit=</varname></term> - <term><varname>systemd.dump_core=</varname></term> - <term><varname>systemd.crash_chvt=</varname></term> - <term><varname>systemd.crash_shell=</varname></term> - <term><varname>systemd.crash_reboot=</varname></term> - <term><varname>systemd.confirm_spawn=</varname></term> - <term><varname>systemd.show_status=</varname></term> + <term><varname>systemd.dump_core</varname></term> + <term><varname>systemd.crash_chvt</varname></term> + <term><varname>systemd.crash_shell</varname></term> + <term><varname>systemd.crash_reboot</varname></term> + <term><varname>systemd.confirm_spawn</varname></term> + <term><varname>systemd.show_status</varname></term> <term><varname>systemd.log_target=</varname></term> <term><varname>systemd.log_level=</varname></term> - <term><varname>systemd.log_color=</varname></term> <term><varname>systemd.log_location=</varname></term> + <term><varname>systemd.log_color</varname></term> <term><varname>systemd.default_standard_output=</varname></term> <term><varname>systemd.default_standard_error=</varname></term> <term><varname>systemd.setenv=</varname></term> <term><varname>systemd.machine_id=</varname></term> + <term><varname>systemd.unified_cgroup_hierarchy</varname></term> + <term><varname>systemd.legacy_systemd_cgroup_controller</varname></term> <listitem> <para>Parameters understood by the system and service manager to control system behavior. For details, see diff --git a/man/systemd.xml b/man/systemd.xml index bfcc0c13b0..4856dea824 100644 --- a/man/systemd.xml +++ b/man/systemd.xml @@ -51,10 +51,13 @@ <refsynopsisdiv> <cmdsynopsis> - <command>systemd <arg choice="opt" rep="repeat">OPTIONS</arg></command> + <command>systemd</command> + <arg choice="opt" rep="repeat">OPTIONS</arg> </cmdsynopsis> <cmdsynopsis> - <command>init <arg choice="opt" rep="repeat">OPTIONS</arg> <arg choice="req">COMMAND</arg></command> + <command>init</command> + <arg choice="opt" rep="repeat">OPTIONS</arg> + <arg choice="req">COMMAND</arg> </cmdsynopsis> </refsynopsisdiv> @@ -150,6 +153,7 @@ user instance. This setting may also be enabled during boot, on the kernel command line via the <varname>systemd.crash_vt=</varname> option, see + <!-- FIXME: there is no crash_vt command line option? --> below.</para></listitem> </varlistentry> @@ -898,88 +902,91 @@ </varlistentry> <varlistentry> - <term><varname>systemd.dump_core=</varname></term> + <term><varname>systemd.dump_core</varname></term> - <listitem><para>Takes a boolean argument. If - <option>yes</option>, the systemd manager (PID 1) dumps core - when it crashes. Otherwise, no core dump is created. Defaults - to <option>yes</option>.</para></listitem> + <listitem><para>Takes a boolean argument or enables the option if specified + without an argument. If enabled, the systemd manager (PID 1) dumps core when + it crashes. Otherwise, no core dump is created. Defaults to enabled.</para> + </listitem> </varlistentry> <varlistentry> - <term><varname>systemd.crash_chvt=</varname></term> + <term><varname>systemd.crash_chvt</varname></term> - <listitem><para>Takes a positive integer, or a boolean - argument. If a positive integer (in the range 1–63) is - specified, the system manager (PID 1) will activate the specified - virtual terminal (VT) when it crashes. Defaults to - <constant>no</constant>, meaning that no such switch is - attempted. If set to <constant>yes</constant>, the VT the - kernel messages are written to is selected.</para></listitem> + <listitem><para>Takes a positive integer, or a boolean argument. Can be also + specified without an argument, with the same effect as a positive boolean. If + a positive integer (in the range 1–63) is specified, the system manager (PID + 1) will activate the specified virtual terminal (VT) when it + crashes. Defaults to disabled, meaning that no such switch is attempted. If + set to enabled, the VT the kernel messages are written to is selected. + </para></listitem> </varlistentry> <varlistentry> - <term><varname>systemd.crash_shell=</varname></term> + <term><varname>systemd.crash_shell</varname></term> - <listitem><para>Takes a boolean argument. If - <option>yes</option>, the system manager (PID 1) spawns a - shell when it crashes, after a 10s delay. Otherwise, no shell - is spawned. Defaults to <option>no</option>, for security - reasons, as the shell is not protected by password + <listitem><para>Takes a boolean argument or enables the option if specified + without an argument. If enabled, the system manager (PID 1) spawns a shell + when it crashes, after a 10s delay. Otherwise, no shell is spawned. Defaults + to disabled, for security reasons, as the shell is not protected by password authentication.</para></listitem> </varlistentry> <varlistentry> - <term><varname>systemd.crash_reboot=</varname></term> + <term><varname>systemd.crash_reboot</varname></term> - <listitem><para>Takes a boolean argument. If - <option>yes</option>, the system manager (PID 1) will reboot - the machine automatically when it crashes, after a 10s delay. - Otherwise, the system will hang indefinitely. Defaults to - <option>no</option>, in order to avoid a reboot loop. If - combined with <varname>systemd.crash_shell=</varname>, the + <listitem><para>Takes a boolean argument or enables the option if specified + without an argument. If enabled, the system manager (PID 1) will reboot the + machine automatically when it crashes, after a 10s delay. Otherwise, the + system will hang indefinitely. Defaults to disabled, in order to avoid a + reboot loop. If combined with <varname>systemd.crash_shell</varname>, the system is rebooted after the shell exits.</para></listitem> </varlistentry> <varlistentry> - <term><varname>systemd.confirm_spawn=</varname></term> + <term><varname>systemd.confirm_spawn</varname></term> - <listitem><para>Takes a boolean argument or a path to the - virtual console where the confirmation messages should be - emitted. If <option>yes</option>, the system manager (PID 1) - asks for confirmation when spawning processes using - <option>/dev/console</option>. If a path or a console name - (such as <literal>ttyS0</literal>) is provided, the virtual - console pointed to by this path or described by the give name - will be used instead. Defaults to <option>no</option>.</para></listitem> + <listitem><para>Takes a boolean argument or a path to the virtual console + where the confirmation messages should be emitted. Can be also specified + without an argument, with the same effect as a positive boolean. If enabled, + the system manager (PID 1) asks for confirmation when spawning processes + using <option>/dev/console</option>. If a path or a console name (such as + <literal>ttyS0</literal>) is provided, the virtual console pointed to by this + path or described by the give name will be used instead. Defaults to disabled. + </para></listitem> </varlistentry> <varlistentry> - <term><varname>systemd.show_status=</varname></term> + <term><varname>systemd.show_status</varname></term> - <listitem><para>Takes a boolean argument or the constant <constant>auto</constant>. If <option>yes</option>, - the systemd manager (PID 1) shows terse service status updates on the console during bootup. - <constant>auto</constant> behaves like <option>false</option> until a unit fails or there is a significant - delay in boot. Defaults to <option>yes</option>, unless <option>quiet</option> is passed as kernel command - line option, in which case it defaults to <constant>auto</constant>. If specified overrides the system manager - configuration file option <option>ShowStatus=</option>, see - <citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>. However, - the process command line option <option>--show-status=</option> takes precedence over both this kernel command - line option and the configuration file option.</para></listitem> + <listitem><para>Takes a boolean argument or the constant + <constant>auto</constant>. Can be also specified without an argument, with + the same effect as a positive boolean. If enabled, the systemd manager (PID + 1) shows terse service status updates on the console during bootup. + <constant>auto</constant> behaves like <option>false</option> until a unit + fails or there is a significant delay in boot. Defaults to enabled, unless + <option>quiet</option> is passed as kernel command line option, in which case + it defaults to <constant>auto</constant>. If specified overrides the system + manager configuration file option <option>ShowStatus=</option>, see + <citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>. + However, the process command line option <option>--show-status=</option> + takes precedence over both this kernel command line option and the + configuration file option.</para></listitem> </varlistentry> <varlistentry> <term><varname>systemd.log_target=</varname></term> <term><varname>systemd.log_level=</varname></term> - <term><varname>systemd.log_color=</varname></term> <term><varname>systemd.log_location=</varname></term> + <term><varname>systemd.log_color</varname></term> - <listitem><para>Controls log output, with the same effect as - the <varname>$SYSTEMD_LOG_TARGET</varname>, + <listitem><para>Controls log output, with the same effect as the + <varname>$SYSTEMD_LOG_TARGET</varname>, <varname>$SYSTEMD_LOG_LEVEL</varname>, - <varname>$SYSTEMD_LOG_COLOR</varname>, - <varname>$SYSTEMD_LOG_LOCATION</varname> environment variables - described above.</para></listitem> + <varname>$SYSTEMD_LOG_LOCATION</varname>, + <varname>$SYSTEMD_LOG_COLOR</varname> environment variables described above. + <varname>systemd.log_color</varname> can be specified without an argument, + with the same effect as a positive boolean.</para></listitem> </varlistentry> <varlistentry> @@ -1011,6 +1018,41 @@ </varlistentry> <varlistentry> + <term><varname>systemd.unified_cgroup_hierarchy</varname></term> + + <listitem><para>When specified without an argument or with a true argument, + enables the usage of + <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">unified cgroup hierarchy</ulink> + (a.k.a. cgroups-v2). When specified with a false argument, fall back to + hybrid or full legacy cgroup hierarchy.</para> + + <para>If this option is not specified, the default behaviour is determined + during compilation (the <option>--with-default-hierarchy=</option> + option). If the kernel does not support unified cgroup hierarchy, the legacy + hierarchy will be used even if this option is specified.</para> + </listitem> + </varlistentry> + + <varlistentry> + <term><varname>systemd.legacy_systemd_cgroup_controller</varname></term> + + <listitem><para>Takes effect if the full unified cgroup hierarchy is not used + (see previous option). When specified without an argument or with a true + argument, disables the use of "hybrid" cgroup hierarchy (i.e. a cgroups-v2 + tree used for systemd, and + <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v1/">legacy + cgroup hierarchy</ulink>, a.k.a. cgroups-v1, for other controllers), and + forces a full "legacy" mode. When specified with a false argument, enables + the use of "hybrid" hierarchy.</para> + + <para>If this option is not specified, the default behaviour is determined + during compilation (the <option>--with-default-hierarchy=</option> + option). If the kernel does not support unified cgroup hierarchy, the legacy + hierarchy will be used even if this option is specified.</para> + </listitem> + </varlistentry> + + <varlistentry> <term><varname>quiet</varname></term> <listitem><para>Turn off status output at boot, much like diff --git a/src/basic/build.h b/src/basic/build.h index 633c2aaccb..91312bd2a3 100644 --- a/src/basic/build.h +++ b/src/basic/build.h @@ -133,6 +133,8 @@ #define _IDN_FEATURE_ "-IDN" #endif +#define _CGROUP_HIEARCHY_ "default-hierarchy=" DEFAULT_HIERARCHY_NAME + #define SYSTEMD_FEATURES \ _PAM_FEATURE_ " " \ _AUDIT_FEATURE_ " " \ @@ -152,4 +154,5 @@ _BLKID_FEATURE_ " " \ _ELFUTILS_FEATURE_ " " \ _KMOD_FEATURE_ " " \ - _IDN_FEATURE_ + _IDN_FEATURE_ " " \ + _CGROUP_HIEARCHY_ diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 6948ed3931..f76b7f47e5 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -208,6 +208,12 @@ int cg_rmdir(const char *controller, const char *path) { if (r < 0 && errno != ENOENT) return -errno; + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) { + r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path); + if (r < 0) + log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path); + } + return 0; } @@ -542,6 +548,13 @@ static const char *controller_to_dirname(const char *controller) { * just cuts off the name= prefixed used for named * hierarchies, if it is specified. */ + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + if (cg_hybrid_unified()) + controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID; + else + controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY; + } + e = startswith(controller, "name="); if (e) return e; @@ -594,7 +607,7 @@ static int join_path_unified(const char *path, const char *suffix, char **fs) { } int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) { - int unified, r; + int r; assert(fs); @@ -623,11 +636,7 @@ int cg_get_path(const char *controller, const char *path, const char *suffix, ch if (!cg_controller_is_valid(controller)) return -EINVAL; - unified = cg_all_unified(); - if (unified < 0) - return unified; - - if (unified > 0) + if (cg_all_unified()) r = join_path_unified(path, suffix, fs); else r = join_path_legacy(controller, path, suffix, fs); @@ -639,7 +648,6 @@ int cg_get_path(const char *controller, const char *path, const char *suffix, ch } static int controller_is_accessible(const char *controller) { - int unified; assert(controller); @@ -651,10 +659,7 @@ static int controller_is_accessible(const char *controller) { if (!cg_controller_is_valid(controller)) return -EINVAL; - unified = cg_all_unified(); - if (unified < 0) - return unified; - if (unified > 0) { + if (cg_all_unified()) { /* We don't support named hierarchies if we are using * the unified hierarchy. */ @@ -708,7 +713,7 @@ static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct int cg_trim(const char *controller, const char *path, bool delete_root) { _cleanup_free_ char *fs = NULL; - int r = 0; + int r = 0, q; assert(path); @@ -731,6 +736,12 @@ int cg_trim(const char *controller, const char *path, bool delete_root) { return -errno; } + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) { + q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root); + if (q < 0) + log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path); + } + return r; } @@ -754,6 +765,12 @@ int cg_create(const char *controller, const char *path) { return -errno; } + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) { + r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path); + if (r < 0) + log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path); + } + return 1; } @@ -791,7 +808,17 @@ int cg_attach(const char *controller, const char *path, pid_t pid) { xsprintf(c, PID_FMT "\n", pid); - return write_string_file(fs, c, 0); + r = write_string_file(fs, c, 0); + if (r < 0) + return r; + + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) { + r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid); + if (r < 0) + log_warning_errno(r, "Failed to attach %d to compat systemd cgroup %s: %m", pid, path); + } + + return 0; } int cg_attach_fallback(const char *controller, const char *path, pid_t pid) { @@ -840,7 +867,17 @@ int cg_set_group_access( if (r < 0) return r; - return chmod_and_chown(fs, mode, uid, gid); + r = chmod_and_chown(fs, mode, uid, gid); + if (r < 0) + return r; + + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) { + r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid); + if (r < 0) + log_warning_errno(r, "Failed to set group access on compat systemd cgroup %s: %m", path); + } + + return 0; } int cg_set_task_access( @@ -851,7 +888,7 @@ int cg_set_task_access( gid_t gid) { _cleanup_free_ char *fs = NULL, *procs = NULL; - int r, unified; + int r; assert(path); @@ -869,16 +906,18 @@ int cg_set_task_access( if (r < 0) return r; - unified = cg_unified(controller); - if (unified < 0) - return unified; - if (unified) - return 0; + if (!cg_unified(controller)) { + /* Compatibility, Always keep values for "tasks" in sync with + * "cgroup.procs" */ + if (cg_get_path(controller, path, "tasks", &procs) >= 0) + (void) chmod_and_chown(procs, mode, uid, gid); + } - /* Compatibility, Always keep values for "tasks" in sync with - * "cgroup.procs" */ - if (cg_get_path(controller, path, "tasks", &procs) >= 0) - (void) chmod_and_chown(procs, mode, uid, gid); + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) { + r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid); + if (r < 0) + log_warning_errno(r, "Failed to set task access on compat systemd cgroup %s: %m", path); + } return 0; } @@ -923,9 +962,9 @@ int cg_get_xattr(const char *controller, const char *path, const char *name, voi int cg_pid_get_path(const char *controller, pid_t pid, char **path) { _cleanup_fclose_ FILE *f = NULL; char line[LINE_MAX]; - const char *fs; + const char *fs, *controller_str; size_t cs = 0; - int unified; + bool unified; assert(path); assert(pid >= 0); @@ -937,10 +976,14 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path) { controller = SYSTEMD_CGROUP_CONTROLLER; unified = cg_unified(controller); - if (unified < 0) - return unified; - if (unified == 0) - cs = strlen(controller); + if (!unified) { + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) + controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY; + else + controller_str = controller; + + cs = strlen(controller_str); + } fs = procfs_file_alloca(pid, "cgroup"); f = fopen(fs, "re"); @@ -977,7 +1020,7 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path) { *e = 0; FOREACH_WORD_SEPARATOR(word, k, l, ",", state) { - if (k == cs && memcmp(word, controller, cs) == 0) { + if (k == cs && memcmp(word, controller_str, cs) == 0) { found = true; break; } @@ -1001,14 +1044,11 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path) { int cg_install_release_agent(const char *controller, const char *agent) { _cleanup_free_ char *fs = NULL, *contents = NULL; const char *sc; - int r, unified; + int r; assert(agent); - unified = cg_unified(controller); - if (unified < 0) - return unified; - if (unified) /* doesn't apply to unified hierarchy */ + if (cg_unified(controller)) /* doesn't apply to unified hierarchy */ return -EOPNOTSUPP; r = cg_get_path(controller, NULL, "release_agent", &fs); @@ -1054,12 +1094,9 @@ int cg_install_release_agent(const char *controller, const char *agent) { int cg_uninstall_release_agent(const char *controller) { _cleanup_free_ char *fs = NULL; - int r, unified; + int r; - unified = cg_unified(controller); - if (unified < 0) - return unified; - if (unified) /* Doesn't apply to unified hierarchy */ + if (cg_unified(controller)) /* Doesn't apply to unified hierarchy */ return -EOPNOTSUPP; r = cg_get_path(controller, NULL, "notify_on_release", &fs); @@ -1104,7 +1141,7 @@ int cg_is_empty(const char *controller, const char *path) { } int cg_is_empty_recursive(const char *controller, const char *path) { - int unified, r; + int r; assert(path); @@ -1112,11 +1149,7 @@ int cg_is_empty_recursive(const char *controller, const char *path) { if (controller && (isempty(path) || path_equal(path, "/"))) return false; - unified = cg_unified(controller); - if (unified < 0) - return unified; - - if (unified > 0) { + if (cg_unified(controller)) { _cleanup_free_ char *t = NULL; /* On the unified hierarchy we can check empty state @@ -1833,6 +1866,9 @@ bool cg_controller_is_valid(const char *p) { if (!p) return false; + if (streq(p, SYSTEMD_CGROUP_CONTROLLER)) + return true; + s = startswith(p, "name="); if (s) p = s; @@ -1986,7 +2022,7 @@ int cg_get_keyed_attribute(const char *controller, const char *path, const char int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) { CGroupController c; - int r, unified; + int r; /* This one will create a cgroup in our private tree, but also * duplicate it in the trees specified in mask, and remove it @@ -1998,10 +2034,7 @@ int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path return r; /* If we are in the unified hierarchy, we are done now */ - unified = cg_all_unified(); - if (unified < 0) - return unified; - if (unified > 0) + if (cg_all_unified()) return 0; /* Otherwise, do the same in the other hierarchies */ @@ -2022,16 +2055,13 @@ int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) { CGroupController c; - int r, unified; + int r; r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid); if (r < 0) return r; - unified = cg_all_unified(); - if (unified < 0) - return unified; - if (unified > 0) + if (cg_all_unified()) return 0; for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { @@ -2072,7 +2102,7 @@ int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) { CGroupController c; - int r = 0, unified; + int r = 0; if (!path_equal(from, to)) { r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE); @@ -2080,10 +2110,7 @@ int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to return r; } - unified = cg_all_unified(); - if (unified < 0) - return unified; - if (unified > 0) + if (cg_all_unified()) return r; for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { @@ -2107,16 +2134,13 @@ int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) { CGroupController c; - int r, unified; + int r; r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root); if (r < 0) return r; - unified = cg_all_unified(); - if (unified < 0) - return unified; - if (unified > 0) + if (cg_all_unified()) return r; for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { @@ -2133,16 +2157,13 @@ int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) int cg_mask_supported(CGroupMask *ret) { CGroupMask mask = 0; - int r, unified; + int r; /* Determines the mask of supported cgroup controllers. Only * includes controllers we can make sense of and that are * actually accessible. */ - unified = cg_all_unified(); - if (unified < 0) - return unified; - if (unified > 0) { + if (cg_all_unified()) { _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL; const char *c; @@ -2262,6 +2283,20 @@ int cg_kernel_controllers(Set *controllers) { static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN; +/* The hybrid mode was initially implemented in v232 and simply mounted + * cgroup v2 on /sys/fs/cgroup/systemd. This unfortunately broke other + * tools (such as docker) which expected the v1 "name=systemd" hierarchy + * on /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mountnbs + * v2 on /sys/fs/cgroup/unified and maintains "name=systemd" hierarchy + * on /sys/fs/cgroup/systemd for compatibility with other tools. + * + * To keep live upgrade working, we detect and support v232 layout. When + * v232 layout is detected, to keep cgroup v2 process management but + * disable the compat dual layout, we return %true on + * cg_unified(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified(). + */ +static thread_local bool unified_systemd_v232; + static int cg_update_unified(void) { struct statfs fs; @@ -2280,24 +2315,30 @@ static int cg_update_unified(void) { if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) unified_cache = CGROUP_UNIFIED_ALL; else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) { - if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) - return -errno; - - unified_cache = F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC) ? - CGROUP_UNIFIED_SYSTEMD : CGROUP_UNIFIED_NONE; + if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 && + F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { + unified_cache = CGROUP_UNIFIED_SYSTEMD; + unified_systemd_v232 = false; + } else if (statfs("/sys/fs/cgroup/systemd/", &fs) == 0 && + F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { + unified_cache = CGROUP_UNIFIED_SYSTEMD; + unified_systemd_v232 = true; + } else { + if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) + return -errno; + if (!F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) + return -ENOMEDIUM; + unified_cache = CGROUP_UNIFIED_NONE; + } } else return -ENOMEDIUM; return 0; } -int cg_unified(const char *controller) { +bool cg_unified(const char *controller) { - int r; - - r = cg_update_unified(); - if (r < 0) - return r; + assert(cg_update_unified() >= 0); if (streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER)) return unified_cache >= CGROUP_UNIFIED_SYSTEMD; @@ -2305,29 +2346,35 @@ int cg_unified(const char *controller) { return unified_cache >= CGROUP_UNIFIED_ALL; } -int cg_all_unified(void) { +bool cg_all_unified(void) { return cg_unified(NULL); } -void cg_unified_flush(void) { +bool cg_hybrid_unified(void) { + + assert(cg_update_unified() >= 0); + + return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232; +} + +int cg_unified_flush(void) { unified_cache = CGROUP_UNIFIED_UNKNOWN; + + return cg_update_unified(); } int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) { _cleanup_free_ char *fs = NULL; CGroupController c; - int r, unified; + int r; assert(p); if (supported == 0) return 0; - unified = cg_all_unified(); - if (unified < 0) - return unified; - if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */ + if (!cg_all_unified()) /* on the legacy hiearchy there's no joining of controllers defined */ return 0; r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs); @@ -2359,63 +2406,69 @@ int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) { bool cg_is_unified_wanted(void) { static thread_local int wanted = -1; - int r, unified; + int r; bool b; + const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL; - /* If the hierarchy is already mounted, then follow whatever - * was chosen for it. */ - unified = cg_all_unified(); - if (unified >= 0) - return unified; - - /* Otherwise, let's see what the kernel command line has to - * say. Since checking that is expensive, let's cache the - * result. */ + /* If we have a cached value, return that. */ if (wanted >= 0) return wanted; + /* If the hierarchy is already mounted, then follow whatever + * was chosen for it. */ + if (cg_unified_flush() >= 0) + return (wanted = cg_all_unified()); + + /* Otherwise, let's see what the kernel command line has to say. + * Since checking is expensive, cache a non-error result. */ r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b); - if (r < 0) - return false; - return (wanted = r > 0 ? b : false); + return (wanted = r > 0 ? b : is_default); } bool cg_is_legacy_wanted(void) { - return !cg_is_unified_wanted(); + static thread_local int wanted = -1; + + /* If we have a cached value, return that. */ + if (wanted >= 0) + return wanted; + + /* Check if we have cgroups2 already mounted. */ + if (cg_unified_flush() >= 0 && + unified_cache == CGROUP_UNIFIED_ALL) + return (wanted = false); + + /* Otherwise, assume that at least partial legacy is wanted, + * since cgroups2 should already be mounted at this point. */ + return (wanted = true); } -bool cg_is_unified_systemd_controller_wanted(void) { +bool cg_is_hybrid_wanted(void) { static thread_local int wanted = -1; - int r, unified; + int r; bool b; + const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD; + /* We default to true if the default is "hybrid", obviously, + * but also when the default is "unified", because if we get + * called, it means that unified hierarchy was not mounted. */ - /* If the unified hierarchy is requested in full, no need to - * bother with this. */ - if (cg_is_unified_wanted()) - return 0; + /* If we have a cached value, return that. */ + if (wanted >= 0) + return wanted; /* If the hierarchy is already mounted, then follow whatever * was chosen for it. */ - unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER); - if (unified >= 0) - return unified; - - /* Otherwise, let's see what the kernel command line has to - * say. Since checking that is expensive, let's cache the - * result. */ - if (wanted >= 0) - return wanted; + if (cg_unified_flush() >= 0 && + unified_cache == CGROUP_UNIFIED_ALL) + return (wanted = false); + /* Otherwise, let's see what the kernel command line has to say. + * Since checking is expensive, cache a non-error result. */ r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b); - if (r < 0) - return false; - - return (wanted = r > 0 ? b : false); -} -bool cg_is_legacy_systemd_controller_wanted(void) { - return cg_is_legacy_wanted() && !cg_is_unified_systemd_controller_wanted(); + /* The meaning of the kernel option is reversed wrt. to the return value + * of this function, hence the negation. */ + return (wanted = r > 0 ? !b : is_default); } int cg_weight_parse(const char *s, uint64_t *ret) { diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 0aa27c4cd7..3d14ec3d9d 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -240,14 +240,14 @@ int cg_kernel_controllers(Set *controllers); bool cg_ns_supported(void); -int cg_all_unified(void); -int cg_unified(const char *controller); -void cg_unified_flush(void); +bool cg_all_unified(void); +bool cg_hybrid_unified(void); +bool cg_unified(const char *controller); +int cg_unified_flush(void); bool cg_is_unified_wanted(void); bool cg_is_legacy_wanted(void); -bool cg_is_unified_systemd_controller_wanted(void); -bool cg_is_legacy_systemd_controller_wanted(void); +bool cg_is_hybrid_wanted(void); const char* cgroup_controller_to_string(CGroupController c) _const_; CGroupController cgroup_controller_from_string(const char *s) _pure_; diff --git a/src/basic/def.h b/src/basic/def.h index 10d776ec8e..200ea973c1 100644 --- a/src/basic/def.h +++ b/src/basic/def.h @@ -36,7 +36,9 @@ /* The default value for the net.unix.max_dgram_qlen sysctl */ #define DEFAULT_UNIX_MAX_DGRAM_QLEN 512UL -#define SYSTEMD_CGROUP_CONTROLLER "name=systemd" +#define SYSTEMD_CGROUP_CONTROLLER_LEGACY "name=systemd" +#define SYSTEMD_CGROUP_CONTROLLER_HYBRID "name=unified" +#define SYSTEMD_CGROUP_CONTROLLER "_systemd" #define SIGNALS_CRASH_HANDLER SIGSEGV,SIGILL,SIGFPE,SIGBUS,SIGQUIT,SIGABRT #define SIGNALS_IGNORE SIGPIPE diff --git a/src/cgls/cgls.c b/src/cgls/cgls.c index 5574c14555..40db82f9ae 100644 --- a/src/cgls/cgls.c +++ b/src/cgls/cgls.c @@ -158,7 +158,7 @@ static int parse_argv(int argc, char *argv[]) { static void show_cg_info(const char *controller, const char *path) { - if (cg_all_unified() <= 0 && controller && !streq(controller, SYSTEMD_CGROUP_CONTROLLER)) + if (!cg_all_unified() && controller && !streq(controller, SYSTEMD_CGROUP_CONTROLLER)) printf("Controller %s; ", controller); printf("Control group %s:\n", isempty(path) ? "/" : path); diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c index 50ac6a58b0..45c050c9c3 100644 --- a/src/cgtop/cgtop.c +++ b/src/cgtop/cgtop.c @@ -214,7 +214,7 @@ static int process( uint64_t new_usage; nsec_t timestamp; - if (cg_all_unified() > 0) { + if (cg_all_unified()) { const char *keys[] = { "usage_usec", NULL }; _cleanup_free_ char *val = NULL; @@ -274,7 +274,7 @@ static int process( } else if (streq(controller, "memory")) { _cleanup_free_ char *p = NULL, *v = NULL; - if (cg_all_unified() <= 0) + if (!cg_all_unified()) r = cg_get_path(controller, path, "memory.usage_in_bytes", &p); else r = cg_get_path(controller, path, "memory.current", &p); @@ -294,15 +294,14 @@ static int process( if (g->memory > 0) g->memory_valid = true; - } else if ((streq(controller, "io") && cg_all_unified() > 0) || - (streq(controller, "blkio") && cg_all_unified() <= 0)) { + } else if ((streq(controller, "io") && cg_all_unified()) || + (streq(controller, "blkio") && !cg_all_unified())) { _cleanup_fclose_ FILE *f = NULL; _cleanup_free_ char *p = NULL; - bool unified = cg_all_unified() > 0; uint64_t wr = 0, rd = 0; nsec_t timestamp; - r = cg_get_path(controller, path, unified ? "io.stat" : "blkio.io_service_bytes", &p); + r = cg_get_path(controller, path, cg_all_unified() ? "io.stat" : "blkio.io_service_bytes", &p); if (r < 0) return r; @@ -325,7 +324,7 @@ static int process( l += strcspn(l, WHITESPACE); l += strspn(l, WHITESPACE); - if (unified) { + if (cg_all_unified()) { while (!isempty(l)) { if (sscanf(l, "rbytes=%" SCNu64, &k)) rd += k; diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 5789e2aa82..fbb711782e 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -678,7 +678,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { bool has_weight = cgroup_context_has_cpu_weight(c); bool has_shares = cgroup_context_has_cpu_shares(c); - if (cg_all_unified() > 0) { + if (cg_all_unified()) { uint64_t weight; if (has_weight) @@ -858,7 +858,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { } if ((mask & CGROUP_MASK_MEMORY) && !is_root) { - if (cg_all_unified() > 0) { + if (cg_all_unified()) { uint64_t max; uint64_t swap_max = CGROUP_LIMIT_MAX; @@ -1033,7 +1033,7 @@ CGroupMask unit_get_own_mask(Unit *u) { e = unit_get_exec_context(u); if (!e || exec_context_maintains_privileges(e) || - cg_all_unified() > 0) + cg_all_unified()) return _CGROUP_MASK_ALL; } @@ -1260,10 +1260,7 @@ int unit_watch_cgroup(Unit *u) { return 0; /* Only applies to the unified hierarchy */ - r = cg_unified(SYSTEMD_CGROUP_CONTROLLER); - if (r < 0) - return log_unit_error_errno(u, r, "Failed detect whether the unified hierarchy is used: %m"); - if (r == 0) + if (!cg_unified(SYSTEMD_CGROUP_CONTROLLER)) return 0; /* Don't watch the root slice, it's pointless. */ @@ -1683,7 +1680,7 @@ int unit_watch_all_pids(Unit *u) { if (!u->cgroup_path) return -ENOENT; - if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0) /* On unified we can use proper notifications */ + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER)) /* On unified we can use proper notifications */ return 0; return unit_watch_pids_in_path(u, u->cgroup_path); @@ -1756,7 +1753,7 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents, int manager_setup_cgroup(Manager *m) { _cleanup_free_ char *path = NULL; CGroupController c; - int r, all_unified, systemd_unified; + int r; char *e; assert(m); @@ -1793,25 +1790,22 @@ int manager_setup_cgroup(Manager *m) { if (r < 0) return log_error_errno(r, "Cannot find cgroup mount point: %m"); - all_unified = cg_all_unified(); - systemd_unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER); - - if (all_unified < 0 || systemd_unified < 0) - return log_error_errno(all_unified < 0 ? all_unified : systemd_unified, - "Couldn't determine if we are running in the unified hierarchy: %m"); + r = cg_unified_flush(); + if (r < 0) + return log_error_errno(r, "Couldn't determine if we are running in the unified hierarchy: %m"); - if (all_unified > 0) + if (cg_all_unified()) log_debug("Unified cgroup hierarchy is located at %s.", path); - else if (systemd_unified > 0) + else if (cg_unified(SYSTEMD_CGROUP_CONTROLLER)) log_debug("Unified cgroup hierarchy is located at %s. Controllers are on legacy hierarchies.", path); else - log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path); + log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER_LEGACY ". File system hierarchy is at %s.", path); if (!m->test_run) { const char *scope_path; /* 3. Install agent */ - if (systemd_unified) { + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER)) { /* In the unified hierarchy we can get * cgroup empty notifications via inotify. */ @@ -1871,7 +1865,7 @@ int manager_setup_cgroup(Manager *m) { return log_error_errno(errno, "Failed to open pin file: %m"); /* 6. Always enable hierarchical support if it exists... */ - if (!all_unified) + if (!cg_all_unified()) (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1"); } @@ -1997,7 +1991,7 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) { if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0) return -ENODATA; - if (cg_all_unified() <= 0) + if (!cg_all_unified()) r = cg_get_attribute("memory", u->cgroup_path, "memory.usage_in_bytes", &v); else r = cg_get_attribute("memory", u->cgroup_path, "memory.current", &v); @@ -2042,7 +2036,7 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) { if (!u->cgroup_path) return -ENODATA; - if (cg_all_unified() > 0) { + if (cg_all_unified()) { const char *keys[] = { "usage_usec", NULL }; _cleanup_free_ char *val = NULL; uint64_t us; diff --git a/src/core/manager.c b/src/core/manager.c index d3f6efc91c..ea80585329 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -776,7 +776,7 @@ static int manager_setup_cgroups_agent(Manager *m) { if (!MANAGER_IS_SYSTEM(m)) return 0; - if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0) /* We don't need this anymore on the unified hierarchy */ + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER)) /* We don't need this anymore on the unified hierarchy */ return 0; if (m->cgroups_agent_fd < 0) { diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index 9c2bf3a0ef..7295efbf31 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -96,15 +96,15 @@ static const MountPoint mount_table[] = { { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, NULL, MNT_FATAL|MNT_IN_CONTAINER }, { "cgroup", "/sys/fs/cgroup", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, - cg_is_unified_wanted, MNT_FATAL|MNT_IN_CONTAINER }, + cg_is_unified_wanted, MNT_IN_CONTAINER }, { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, - { "cgroup", "/sys/fs/cgroup/systemd", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, - cg_is_unified_systemd_controller_wanted, MNT_IN_CONTAINER }, + { "cgroup", "/sys/fs/cgroup/unified", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + cg_is_hybrid_wanted, MNT_IN_CONTAINER }, { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV, - cg_is_legacy_systemd_controller_wanted, MNT_IN_CONTAINER }, + cg_is_legacy_wanted, MNT_IN_CONTAINER }, { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV, - cg_is_legacy_systemd_controller_wanted, MNT_FATAL|MNT_IN_CONTAINER }, + cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL, MNT_NONE }, #ifdef ENABLE_EFI diff --git a/src/core/scope.c b/src/core/scope.c index 9540fb67d9..5e068a76d1 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -475,7 +475,7 @@ static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) { /* If the PID set is empty now, then let's finish this off (On unified we use proper notifications) */ - if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) <= 0 && set_isempty(u->pids)) + if (!cg_unified(SYSTEMD_CGROUP_CONTROLLER) && set_isempty(u->pids)) scope_notify_cgroup_empty_event(u); } diff --git a/src/core/service.c b/src/core/service.c index 54074ff7bc..0c2eb18f38 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -2938,7 +2938,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { /* If the PID set is empty now, then let's finish this off (On unified we use proper notifications) */ - if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) <= 0 && set_isempty(u->pids)) + if (!cg_unified(SYSTEMD_CGROUP_CONTROLLER) && set_isempty(u->pids)) service_notify_cgroup_empty_event(u); } diff --git a/src/core/unit.c b/src/core/unit.c index bb05d2abfb..685df6f00d 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -3897,8 +3897,8 @@ int unit_kill_context( * there we get proper events. Hence rely on * them.*/ - if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0 || - (detect_container() == 0 && !unit_cgroup_delegate(u))) + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) || + (detect_container() == 0 && !unit_cgroup_delegate(u))) wait_for_exit = true; if (send_sighup) { diff --git a/src/libsystemd/sd-bus/test-bus-creds.c b/src/libsystemd/sd-bus/test-bus-creds.c index 6fdcfa4128..64bd76a576 100644 --- a/src/libsystemd/sd-bus/test-bus-creds.c +++ b/src/libsystemd/sd-bus/test-bus-creds.c @@ -31,7 +31,7 @@ int main(int argc, char *argv[]) { log_parse_environment(); log_open(); - if (cg_all_unified() == -ENOMEDIUM) { + if (cg_unified_flush() == -ENOMEDIUM) { log_info("Skipping test: /sys/fs/cgroup/ not available"); return EXIT_TEST_SKIP; } diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c index 5274767b96..4678a7e349 100644 --- a/src/nspawn/nspawn-cgroup.c +++ b/src/nspawn/nspawn-cgroup.c @@ -78,13 +78,9 @@ int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t arg_uid_shift) char tree[] = "/tmp/unifiedXXXXXX", pid_string[DECIMAL_STR_MAX(pid) + 1]; bool undo_mount = false; const char *fn; - int unified, r; - - unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER); - if (unified < 0) - return log_error_errno(unified, "Failed to determine whether the unified hierarchy is used: %m"); + int r; - if ((unified > 0) == (unified_requested >= CGROUP_UNIFIED_SYSTEMD)) + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) == (unified_requested >= CGROUP_UNIFIED_SYSTEMD)) return 0; /* When the host uses the legacy cgroup setup, but the @@ -100,7 +96,7 @@ int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t arg_uid_shift) if (!mkdtemp(tree)) return log_error_errno(errno, "Failed to generate temporary mount point for unified hierarchy: %m"); - if (unified) + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER)) r = mount_verbose(LOG_ERR, "cgroup", tree, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "none,name=systemd,xattr"); else @@ -142,7 +138,7 @@ finish: int create_subcgroup(pid_t pid, CGroupUnified unified_requested) { _cleanup_free_ char *cgroup = NULL; const char *child; - int unified, r; + int r; CGroupMask supported; /* In the unified hierarchy inner nodes may only contain @@ -154,10 +150,7 @@ int create_subcgroup(pid_t pid, CGroupUnified unified_requested) { if (unified_requested == CGROUP_UNIFIED_NONE) return 0; - unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER); - if (unified < 0) - return log_error_errno(unified, "Failed to determine whether the unified hierarchy is used: %m"); - if (unified == 0) + if (!cg_unified(SYSTEMD_CGROUP_CONTROLLER)) return 0; r = cg_mask_supported(&supported); diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index 4b2838b752..ed4f1f9db8 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -890,7 +890,7 @@ static int get_controllers(Set *subsystems) { *e = 0; - if (STR_IN_SET(l, "", "name=systemd")) + if (STR_IN_SET(l, "", "name=systemd", "name=unified")) continue; p = strdup(l); @@ -909,7 +909,6 @@ static int mount_legacy_cgroup_hierarchy( const char *dest, const char *controller, const char *hierarchy, - CGroupUnified unified_requested, bool read_only) { const char *to, *fstype, *opts; @@ -927,14 +926,12 @@ static int mount_legacy_cgroup_hierarchy( /* The superblock mount options of the mount point need to be * identical to the hosts', and hence writable... */ - if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { - if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { - fstype = "cgroup2"; - opts = NULL; - } else { - fstype = "cgroup"; - opts = "none,name=systemd,xattr"; - } + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_HYBRID)) { + fstype = "cgroup2"; + opts = NULL; + } else if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_LEGACY)) { + fstype = "cgroup"; + opts = "none,name=systemd,xattr"; } else { fstype = "cgroup"; opts = controller; @@ -994,7 +991,7 @@ static int mount_legacy_cgns_supported( return r; } - if (cg_all_unified() > 0) + if (cg_all_unified()) goto skip_controllers; controllers = set_new(&string_hash_ops); @@ -1012,7 +1009,7 @@ static int mount_legacy_cgns_supported( if (!controller) break; - r = mount_legacy_cgroup_hierarchy("", controller, controller, unified_requested, !userns); + r = mount_legacy_cgroup_hierarchy("", controller, controller, !userns); if (r < 0) return r; @@ -1046,7 +1043,13 @@ static int mount_legacy_cgns_supported( } skip_controllers: - r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER, "systemd", unified_requested, false); + if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { + r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false); + if (r < 0) + return r; + } + + r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false); if (r < 0) return r; @@ -1091,7 +1094,7 @@ static int mount_legacy_cgns_unsupported( return r; } - if (cg_all_unified() > 0) + if (cg_all_unified()) goto skip_controllers; controllers = set_new(&string_hash_ops); @@ -1117,7 +1120,7 @@ static int mount_legacy_cgns_unsupported( if (r == -EINVAL) { /* Not a symbolic link, but directly a single cgroup hierarchy */ - r = mount_legacy_cgroup_hierarchy(dest, controller, controller, unified_requested, true); + r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true); if (r < 0) return r; @@ -1137,7 +1140,7 @@ static int mount_legacy_cgns_unsupported( continue; } - r = mount_legacy_cgroup_hierarchy(dest, combined, combined, unified_requested, true); + r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true); if (r < 0) return r; @@ -1150,7 +1153,13 @@ static int mount_legacy_cgns_unsupported( } skip_controllers: - r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER, "systemd", unified_requested, false); + if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { + r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false); + if (r < 0) + return r; + } + + r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false); if (r < 0) return r; @@ -1202,12 +1211,25 @@ int mount_cgroups( return mount_legacy_cgns_unsupported(dest, unified_requested, userns, uid_shift, uid_range, selinux_apifs_context); } +static int mount_systemd_cgroup_writable_one(const char *systemd_own, const char *systemd_root) +{ + int r; + + /* Make our own cgroup a (writable) bind mount */ + r = mount_verbose(LOG_ERR, systemd_own, systemd_own, NULL, MS_BIND, NULL); + if (r < 0) + return r; + + /* And then remount the systemd cgroup root read-only */ + return mount_verbose(LOG_ERR, NULL, systemd_root, NULL, + MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL); +} + int mount_systemd_cgroup_writable( const char *dest, CGroupUnified unified_requested) { _cleanup_free_ char *own_cgroup_path = NULL; - const char *systemd_root, *systemd_own; int r; assert(dest); @@ -1220,22 +1242,19 @@ int mount_systemd_cgroup_writable( if (path_equal(own_cgroup_path, "/")) return 0; - if (unified_requested >= CGROUP_UNIFIED_ALL) { - systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path); - systemd_root = prefix_roota(dest, "/sys/fs/cgroup"); - } else { - systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path); - systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd"); - } + if (unified_requested >= CGROUP_UNIFIED_ALL) + return mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup", own_cgroup_path), + prefix_roota(dest, "/sys/fs/cgroup")); - /* Make our own cgroup a (writable) bind mount */ - r = mount_verbose(LOG_ERR, systemd_own, systemd_own, NULL, MS_BIND, NULL); - if (r < 0) - return r; + if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { + r = mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup/unified", own_cgroup_path), + prefix_roota(dest, "/sys/fs/cgroup/unified")); + if (r < 0) + return r; + } - /* And then remount the systemd cgroup root read-only */ - return mount_verbose(LOG_ERR, NULL, systemd_root, NULL, - MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL); + return mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path), + prefix_roota(dest, "/sys/fs/cgroup/systemd")); } int setup_volatile_state( diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 751f26272b..42355115ff 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -316,7 +316,7 @@ static int custom_mount_check_all(void) { static int detect_unified_cgroup_hierarchy(const char *directory) { const char *e; - int r, all_unified, systemd_unified; + int r; /* Allow the user to control whether the unified hierarchy is used */ e = getenv("UNIFIED_CGROUP_HIERARCHY"); @@ -332,15 +332,8 @@ static int detect_unified_cgroup_hierarchy(const char *directory) { return 0; } - all_unified = cg_all_unified(); - systemd_unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER); - - if (all_unified < 0 || systemd_unified < 0) - return log_error_errno(all_unified < 0 ? all_unified : systemd_unified, - "Failed to determine whether the unified cgroups hierarchy is used: %m"); - /* Otherwise inherit the default from the host system */ - if (all_unified > 0) { + if (cg_all_unified()) { /* Unified cgroup hierarchy support was added in 230. Unfortunately the detection * routine only detects 231, so we'll have a false negative here for 230. */ r = systemd_installation_has_version(directory, 230); @@ -350,9 +343,9 @@ static int detect_unified_cgroup_hierarchy(const char *directory) { arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_ALL; else arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE; - } else if (systemd_unified > 0) { - /* Mixed cgroup hierarchy support was added in 232 */ - r = systemd_installation_has_version(directory, 232); + } else if (cg_unified(SYSTEMD_CGROUP_CONTROLLER)) { + /* Mixed cgroup hierarchy support was added in 233 */ + r = systemd_installation_has_version(directory, 233); if (r < 0) return log_error_errno(r, "Failed to determine systemd version in container: %m"); if (r > 0) @@ -2168,8 +2161,6 @@ static int inner_child( assert(directory); assert(kmsg_socket >= 0); - cg_unified_flush(); - if (arg_userns_mode != USER_NAMESPACE_NO) { /* Tell the parent, that it now can write the UID map. */ (void) barrier_place(barrier); /* #1 */ @@ -2440,8 +2431,6 @@ static int outer_child( assert(notify_socket >= 0); assert(kmsg_socket >= 0); - cg_unified_flush(); - if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) return log_error_errno(errno, "PR_SET_PDEATHSIG failed: %m"); @@ -2486,10 +2475,6 @@ static int outer_child( if (r < 0) return r; - r = detect_unified_cgroup_hierarchy(directory); - if (r < 0) - return r; - if (arg_userns_mode != USER_NAMESPACE_NO) { /* Let the parent know which UID shift we read from the image */ l = send(uid_shift_socket, &arg_uid_shift, sizeof(arg_uid_shift), MSG_NOSIGNAL); @@ -3542,6 +3527,10 @@ int main(int argc, char *argv[]) { log_parse_environment(); log_open(); + r = cg_unified_flush(); + if (r < 0) + return log_error_errno(r, "Failed to determine whether the unified cgroups hierarchy is used: %m"); + /* Make sure rename_process() in the stub init process can work */ saved_argv = argv; saved_argc = argc; @@ -3810,6 +3799,10 @@ int main(int argc, char *argv[]) { if (r < 0) goto finish; + r = detect_unified_cgroup_hierarchy(arg_directory); + if (r < 0) + goto finish; + interactive = isatty(STDIN_FILENO) > 0 && isatty(STDOUT_FILENO) > 0; diff --git a/src/test/test-cgroup-util.c b/src/test/test-cgroup-util.c index c60fb631fa..30cd463722 100644 --- a/src/test/test-cgroup-util.c +++ b/src/test/test-cgroup-util.c @@ -18,11 +18,13 @@ ***/ #include "alloc-util.h" +#include "build.h" #include "cgroup-util.h" #include "dirent-util.h" #include "fd-util.h" #include "format-util.h" #include "parse-util.h" +#include "proc-cmdline.h" #include "process-util.h" #include "stat-util.h" #include "string-util.h" @@ -332,7 +334,49 @@ static void test_fd_is_cgroup_fs(void) { fd = safe_close(fd); } +static void test_is_wanted_print(bool header) { + _cleanup_free_ char *cmdline = NULL; + + log_info("-- %s --", __func__); + assert_se(proc_cmdline(&cmdline) >= 0); + log_info("cmdline: %s", cmdline); + if (header) { + + log_info(_CGROUP_HIEARCHY_); + (void) system("findmnt -n /sys/fs/cgroup"); + } + + log_info("is_unified_wanted() → %s", yes_no(cg_is_unified_wanted())); + log_info("is_hybrid_wanted() → %s", yes_no(cg_is_hybrid_wanted())); + log_info("is_legacy_wanted() → %s", yes_no(cg_is_legacy_wanted())); + log_info(" "); +} + +static void test_is_wanted(void) { + assert_se(setenv("SYSTEMD_PROC_CMDLINE", + "systemd.unified_cgroup_hierarchy", 1) >= 0); + test_is_wanted_print(false); + + assert_se(setenv("SYSTEMD_PROC_CMDLINE", + "systemd.unified_cgroup_hierarchy=0", 1) >= 0); + test_is_wanted_print(false); + + assert_se(setenv("SYSTEMD_PROC_CMDLINE", + "systemd.unified_cgroup_hierarchy=0 " + "systemd.legacy_systemd_cgroup_controller", 1) >= 0); + test_is_wanted_print(false); + + assert_se(setenv("SYSTEMD_PROC_CMDLINE", + "systemd.unified_cgroup_hierarchy=0 " + "systemd.legacy_systemd_cgroup_controller=0", 1) >= 0); + test_is_wanted_print(false); +} + int main(void) { + log_set_max_level(LOG_DEBUG); + log_parse_environment(); + log_open(); + test_path_decode_unit(); test_path_get_unit(); test_path_get_user_unit(); @@ -349,6 +393,9 @@ int main(void) { TEST_REQ_RUNNING_SYSTEMD(test_mask_supported()); TEST_REQ_RUNNING_SYSTEMD(test_is_cgroup_fs()); TEST_REQ_RUNNING_SYSTEMD(test_fd_is_cgroup_fs()); + test_is_wanted_print(true); + test_is_wanted_print(false); /* run twice to test caching */ + test_is_wanted(); return 0; } |