summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--configure.ac52
-rw-r--r--man/kernel-command-line.xml16
-rw-r--r--man/systemd.xml148
-rw-r--r--src/basic/build.h5
-rw-r--r--src/basic/cgroup-util.c305
-rw-r--r--src/basic/cgroup-util.h10
-rw-r--r--src/basic/def.h4
-rw-r--r--src/cgls/cgls.c2
-rw-r--r--src/cgtop/cgtop.c13
-rw-r--r--src/core/cgroup.c38
-rw-r--r--src/core/manager.c2
-rw-r--r--src/core/mount-setup.c10
-rw-r--r--src/core/scope.c2
-rw-r--r--src/core/service.c2
-rw-r--r--src/core/unit.c4
-rw-r--r--src/libsystemd/sd-bus/test-bus-creds.c2
-rw-r--r--src/nspawn/nspawn-cgroup.c17
-rw-r--r--src/nspawn/nspawn-mount.c83
-rw-r--r--src/nspawn/nspawn.c33
-rw-r--r--src/test/test-cgroup-util.c47
20 files changed, 480 insertions, 315 deletions
diff --git a/configure.ac b/configure.ac
index 156511851c..d98cc4d9ba 100644
--- a/configure.ac
+++ b/configure.ac
@@ -613,6 +613,23 @@ AC_SUBST(FALLBACK_HOSTNAME)
AC_DEFINE_UNQUOTED(FALLBACK_HOSTNAME, ["$FALLBACK_HOSTNAME"], [The hostname used if none configured])
# ------------------------------------------------------------------------------
+
+AC_ARG_WITH(default-hierarchy,
+ AS_HELP_STRING([--with-default-hierarchy=MODE],
+ [default cgroup hierarchy, defaults to "hybrid"]),
+ [DEFAULT_HIERARCHY="$withval"],
+ [DEFAULT_HIERARCHY="hybrid"])
+
+AS_CASE("$DEFAULT_HIERARCHY",
+ [legacy], [mode=CGROUP_UNIFIED_NONE],
+ [hybrid], [mode=CGROUP_UNIFIED_SYSTEMD],
+ [unified], [mode=CGROUP_UNIFIED_ALL],
+ AC_MSG_ERROR(Bad default hierarchy mode ${DEFAULT_HIERARCHY}))
+AC_DEFINE_UNQUOTED(DEFAULT_HIERARCHY, [$mode], [Default cgroup hierarchy])
+AC_DEFINE_UNQUOTED(DEFAULT_HIERARCHY_NAME, ["$DEFAULT_HIERARCHY"],
+ [Default cgroup hierarchy as string])
+
+# ------------------------------------------------------------------------------
have_xz=no
AC_ARG_ENABLE(xz, AS_HELP_STRING([--disable-xz], [disable optional XZ support]))
AS_IF([test "x$enable_xz" != "xno"], [
@@ -1667,19 +1684,20 @@ AC_MSG_RESULT([
backlight: ${have_backlight}
rfkill: ${have_rfkill}
logind: ${have_logind}
- Default KillUserProcesses setting: ${KILL_USER_PROCESSES}
+ default cgroup hierarchy: ${DEFAULT_HIERARCHY}
+ default KillUserProcesses setting: ${KILL_USER_PROCESSES}
machined: ${have_machined}
importd: ${have_importd}
hostnamed: ${have_hostnamed}
timedated: ${have_timedated}
timesyncd: ${have_timesyncd}
- Default NTP servers: ${NTP_SERVERS}
+ default NTP servers: ${NTP_SERVERS}
time epoch: ${TIME_EPOCH}
localed: ${have_localed}
networkd: ${have_networkd}
resolved: ${have_resolved}
- Default DNS servers: ${DNS_SERVERS}
- Default DNSSEC mode: ${DEFAULT_DNSSEC_MODE}
+ default DNS servers: ${DNS_SERVERS}
+ default DNSSEC mode: ${DEFAULT_DNSSEC_MODE}
coredump: ${have_coredump}
polkit: ${have_polkit}
efi: ${have_efi}
@@ -1718,27 +1736,27 @@ AC_MSG_RESULT([
rootlib dir: ${with_rootlibdir}
SysV init scripts: ${SYSTEM_SYSVINIT_PATH}
SysV rc?.d directories: ${SYSTEM_SYSVRCND_PATH}
- Build Python: ${PYTHON}
+ build Python: ${PYTHON}
PAM modules dir: ${with_pamlibdir}
PAM configuration dir: ${with_pamconfdir}
D-Bus policy dir: ${with_dbuspolicydir}
D-Bus session dir: ${with_dbussessionservicedir}
D-Bus system dir: ${with_dbussystemservicedir}
- Bash completions dir: ${with_bashcompletiondir}
- Zsh completions dir: ${with_zshcompletiondir}
- Extra start script: ${RC_LOCAL_SCRIPT_PATH_START}
- Extra stop script: ${RC_LOCAL_SCRIPT_PATH_STOP}
- Adm group: ${have_adm_group}
- Wheel group: ${have_wheel_group}
- Debug shell: ${SUSHELL} @ ${DEBUGTTY}
+ bash completions dir: ${with_bashcompletiondir}
+ zsh completions dir: ${with_zshcompletiondir}
+ extra start script: ${RC_LOCAL_SCRIPT_PATH_START}
+ extra stop script: ${RC_LOCAL_SCRIPT_PATH_STOP}
+ adm group: ${have_adm_group}
+ wheel group: ${have_wheel_group}
+ debug shell: ${SUSHELL} @ ${DEBUGTTY}
TTY GID: ${TTY_GID}
- Maximum system UID: ${SYSTEM_UID_MAX}
- Maximum system GID: ${SYSTEM_GID_MAX}
- Certificate root: ${CERTIFICATEROOT}
- Support URL: ${SUPPORT_URL}
+ maximum system UID: ${SYSTEM_UID_MAX}
+ maximum system GID: ${SYSTEM_GID_MAX}
+ certificate root: ${CERTIFICATEROOT}
+ support URL: ${SUPPORT_URL}
nobody user name: ${NOBODY_USER_NAME}
nobody group name: ${NOBODY_GROUP_NAME}
- Fallback hostname: ${FALLBACK_HOSTNAME}
+ fallback hostname: ${FALLBACK_HOSTNAME}
CFLAGS: ${OUR_CFLAGS} ${CFLAGS}
CPPFLAGS: ${OUR_CPPFLAGS} ${CPPFLAGS}
diff --git a/man/kernel-command-line.xml b/man/kernel-command-line.xml
index 415b8d3cf9..f02ca3e7bc 100644
--- a/man/kernel-command-line.xml
+++ b/man/kernel-command-line.xml
@@ -78,20 +78,22 @@
<varlistentry>
<term><varname>systemd.unit=</varname></term>
<term><varname>rd.systemd.unit=</varname></term>
- <term><varname>systemd.dump_core=</varname></term>
- <term><varname>systemd.crash_chvt=</varname></term>
- <term><varname>systemd.crash_shell=</varname></term>
- <term><varname>systemd.crash_reboot=</varname></term>
- <term><varname>systemd.confirm_spawn=</varname></term>
- <term><varname>systemd.show_status=</varname></term>
+ <term><varname>systemd.dump_core</varname></term>
+ <term><varname>systemd.crash_chvt</varname></term>
+ <term><varname>systemd.crash_shell</varname></term>
+ <term><varname>systemd.crash_reboot</varname></term>
+ <term><varname>systemd.confirm_spawn</varname></term>
+ <term><varname>systemd.show_status</varname></term>
<term><varname>systemd.log_target=</varname></term>
<term><varname>systemd.log_level=</varname></term>
- <term><varname>systemd.log_color=</varname></term>
<term><varname>systemd.log_location=</varname></term>
+ <term><varname>systemd.log_color</varname></term>
<term><varname>systemd.default_standard_output=</varname></term>
<term><varname>systemd.default_standard_error=</varname></term>
<term><varname>systemd.setenv=</varname></term>
<term><varname>systemd.machine_id=</varname></term>
+ <term><varname>systemd.unified_cgroup_hierarchy</varname></term>
+ <term><varname>systemd.legacy_systemd_cgroup_controller</varname></term>
<listitem>
<para>Parameters understood by the system and service
manager to control system behavior. For details, see
diff --git a/man/systemd.xml b/man/systemd.xml
index bfcc0c13b0..4856dea824 100644
--- a/man/systemd.xml
+++ b/man/systemd.xml
@@ -51,10 +51,13 @@
<refsynopsisdiv>
<cmdsynopsis>
- <command>systemd <arg choice="opt" rep="repeat">OPTIONS</arg></command>
+ <command>systemd</command>
+ <arg choice="opt" rep="repeat">OPTIONS</arg>
</cmdsynopsis>
<cmdsynopsis>
- <command>init <arg choice="opt" rep="repeat">OPTIONS</arg> <arg choice="req">COMMAND</arg></command>
+ <command>init</command>
+ <arg choice="opt" rep="repeat">OPTIONS</arg>
+ <arg choice="req">COMMAND</arg>
</cmdsynopsis>
</refsynopsisdiv>
@@ -150,6 +153,7 @@
user instance. This setting may also be enabled during boot,
on the kernel command line via the
<varname>systemd.crash_vt=</varname> option, see
+ <!-- FIXME: there is no crash_vt command line option? -->
below.</para></listitem>
</varlistentry>
@@ -898,88 +902,91 @@
</varlistentry>
<varlistentry>
- <term><varname>systemd.dump_core=</varname></term>
+ <term><varname>systemd.dump_core</varname></term>
- <listitem><para>Takes a boolean argument. If
- <option>yes</option>, the systemd manager (PID 1) dumps core
- when it crashes. Otherwise, no core dump is created. Defaults
- to <option>yes</option>.</para></listitem>
+ <listitem><para>Takes a boolean argument or enables the option if specified
+ without an argument. If enabled, the systemd manager (PID 1) dumps core when
+ it crashes. Otherwise, no core dump is created. Defaults to enabled.</para>
+ </listitem>
</varlistentry>
<varlistentry>
- <term><varname>systemd.crash_chvt=</varname></term>
+ <term><varname>systemd.crash_chvt</varname></term>
- <listitem><para>Takes a positive integer, or a boolean
- argument. If a positive integer (in the range 1–63) is
- specified, the system manager (PID 1) will activate the specified
- virtual terminal (VT) when it crashes. Defaults to
- <constant>no</constant>, meaning that no such switch is
- attempted. If set to <constant>yes</constant>, the VT the
- kernel messages are written to is selected.</para></listitem>
+ <listitem><para>Takes a positive integer, or a boolean argument. Can be also
+ specified without an argument, with the same effect as a positive boolean. If
+ a positive integer (in the range 1–63) is specified, the system manager (PID
+ 1) will activate the specified virtual terminal (VT) when it
+ crashes. Defaults to disabled, meaning that no such switch is attempted. If
+ set to enabled, the VT the kernel messages are written to is selected.
+ </para></listitem>
</varlistentry>
<varlistentry>
- <term><varname>systemd.crash_shell=</varname></term>
+ <term><varname>systemd.crash_shell</varname></term>
- <listitem><para>Takes a boolean argument. If
- <option>yes</option>, the system manager (PID 1) spawns a
- shell when it crashes, after a 10s delay. Otherwise, no shell
- is spawned. Defaults to <option>no</option>, for security
- reasons, as the shell is not protected by password
+ <listitem><para>Takes a boolean argument or enables the option if specified
+ without an argument. If enabled, the system manager (PID 1) spawns a shell
+ when it crashes, after a 10s delay. Otherwise, no shell is spawned. Defaults
+ to disabled, for security reasons, as the shell is not protected by password
authentication.</para></listitem>
</varlistentry>
<varlistentry>
- <term><varname>systemd.crash_reboot=</varname></term>
+ <term><varname>systemd.crash_reboot</varname></term>
- <listitem><para>Takes a boolean argument. If
- <option>yes</option>, the system manager (PID 1) will reboot
- the machine automatically when it crashes, after a 10s delay.
- Otherwise, the system will hang indefinitely. Defaults to
- <option>no</option>, in order to avoid a reboot loop. If
- combined with <varname>systemd.crash_shell=</varname>, the
+ <listitem><para>Takes a boolean argument or enables the option if specified
+ without an argument. If enabled, the system manager (PID 1) will reboot the
+ machine automatically when it crashes, after a 10s delay. Otherwise, the
+ system will hang indefinitely. Defaults to disabled, in order to avoid a
+ reboot loop. If combined with <varname>systemd.crash_shell</varname>, the
system is rebooted after the shell exits.</para></listitem>
</varlistentry>
<varlistentry>
- <term><varname>systemd.confirm_spawn=</varname></term>
+ <term><varname>systemd.confirm_spawn</varname></term>
- <listitem><para>Takes a boolean argument or a path to the
- virtual console where the confirmation messages should be
- emitted. If <option>yes</option>, the system manager (PID 1)
- asks for confirmation when spawning processes using
- <option>/dev/console</option>. If a path or a console name
- (such as <literal>ttyS0</literal>) is provided, the virtual
- console pointed to by this path or described by the give name
- will be used instead. Defaults to <option>no</option>.</para></listitem>
+ <listitem><para>Takes a boolean argument or a path to the virtual console
+ where the confirmation messages should be emitted. Can be also specified
+ without an argument, with the same effect as a positive boolean. If enabled,
+ the system manager (PID 1) asks for confirmation when spawning processes
+ using <option>/dev/console</option>. If a path or a console name (such as
+ <literal>ttyS0</literal>) is provided, the virtual console pointed to by this
+ path or described by the give name will be used instead. Defaults to disabled.
+ </para></listitem>
</varlistentry>
<varlistentry>
- <term><varname>systemd.show_status=</varname></term>
+ <term><varname>systemd.show_status</varname></term>
- <listitem><para>Takes a boolean argument or the constant <constant>auto</constant>. If <option>yes</option>,
- the systemd manager (PID 1) shows terse service status updates on the console during bootup.
- <constant>auto</constant> behaves like <option>false</option> until a unit fails or there is a significant
- delay in boot. Defaults to <option>yes</option>, unless <option>quiet</option> is passed as kernel command
- line option, in which case it defaults to <constant>auto</constant>. If specified overrides the system manager
- configuration file option <option>ShowStatus=</option>, see
- <citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>. However,
- the process command line option <option>--show-status=</option> takes precedence over both this kernel command
- line option and the configuration file option.</para></listitem>
+ <listitem><para>Takes a boolean argument or the constant
+ <constant>auto</constant>. Can be also specified without an argument, with
+ the same effect as a positive boolean. If enabled, the systemd manager (PID
+ 1) shows terse service status updates on the console during bootup.
+ <constant>auto</constant> behaves like <option>false</option> until a unit
+ fails or there is a significant delay in boot. Defaults to enabled, unless
+ <option>quiet</option> is passed as kernel command line option, in which case
+ it defaults to <constant>auto</constant>. If specified overrides the system
+ manager configuration file option <option>ShowStatus=</option>, see
+ <citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.
+ However, the process command line option <option>--show-status=</option>
+ takes precedence over both this kernel command line option and the
+ configuration file option.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>systemd.log_target=</varname></term>
<term><varname>systemd.log_level=</varname></term>
- <term><varname>systemd.log_color=</varname></term>
<term><varname>systemd.log_location=</varname></term>
+ <term><varname>systemd.log_color</varname></term>
- <listitem><para>Controls log output, with the same effect as
- the <varname>$SYSTEMD_LOG_TARGET</varname>,
+ <listitem><para>Controls log output, with the same effect as the
+ <varname>$SYSTEMD_LOG_TARGET</varname>,
<varname>$SYSTEMD_LOG_LEVEL</varname>,
- <varname>$SYSTEMD_LOG_COLOR</varname>,
- <varname>$SYSTEMD_LOG_LOCATION</varname> environment variables
- described above.</para></listitem>
+ <varname>$SYSTEMD_LOG_LOCATION</varname>,
+ <varname>$SYSTEMD_LOG_COLOR</varname> environment variables described above.
+ <varname>systemd.log_color</varname> can be specified without an argument,
+ with the same effect as a positive boolean.</para></listitem>
</varlistentry>
<varlistentry>
@@ -1011,6 +1018,41 @@
</varlistentry>
<varlistentry>
+ <term><varname>systemd.unified_cgroup_hierarchy</varname></term>
+
+ <listitem><para>When specified without an argument or with a true argument,
+ enables the usage of
+ <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">unified cgroup hierarchy</ulink>
+ (a.k.a. cgroups-v2). When specified with a false argument, fall back to
+ hybrid or full legacy cgroup hierarchy.</para>
+
+ <para>If this option is not specified, the default behaviour is determined
+ during compilation (the <option>--with-default-hierarchy=</option>
+ option). If the kernel does not support unified cgroup hierarchy, the legacy
+ hierarchy will be used even if this option is specified.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>systemd.legacy_systemd_cgroup_controller</varname></term>
+
+ <listitem><para>Takes effect if the full unified cgroup hierarchy is not used
+ (see previous option). When specified without an argument or with a true
+ argument, disables the use of "hybrid" cgroup hierarchy (i.e. a cgroups-v2
+ tree used for systemd, and
+ <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v1/">legacy
+ cgroup hierarchy</ulink>, a.k.a. cgroups-v1, for other controllers), and
+ forces a full "legacy" mode. When specified with a false argument, enables
+ the use of "hybrid" hierarchy.</para>
+
+ <para>If this option is not specified, the default behaviour is determined
+ during compilation (the <option>--with-default-hierarchy=</option>
+ option). If the kernel does not support unified cgroup hierarchy, the legacy
+ hierarchy will be used even if this option is specified.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>quiet</varname></term>
<listitem><para>Turn off status output at boot, much like
diff --git a/src/basic/build.h b/src/basic/build.h
index 633c2aaccb..91312bd2a3 100644
--- a/src/basic/build.h
+++ b/src/basic/build.h
@@ -133,6 +133,8 @@
#define _IDN_FEATURE_ "-IDN"
#endif
+#define _CGROUP_HIEARCHY_ "default-hierarchy=" DEFAULT_HIERARCHY_NAME
+
#define SYSTEMD_FEATURES \
_PAM_FEATURE_ " " \
_AUDIT_FEATURE_ " " \
@@ -152,4 +154,5 @@
_BLKID_FEATURE_ " " \
_ELFUTILS_FEATURE_ " " \
_KMOD_FEATURE_ " " \
- _IDN_FEATURE_
+ _IDN_FEATURE_ " " \
+ _CGROUP_HIEARCHY_
diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c
index 6948ed3931..f76b7f47e5 100644
--- a/src/basic/cgroup-util.c
+++ b/src/basic/cgroup-util.c
@@ -208,6 +208,12 @@ int cg_rmdir(const char *controller, const char *path) {
if (r < 0 && errno != ENOENT)
return -errno;
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) {
+ r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
+ if (r < 0)
+ log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
+ }
+
return 0;
}
@@ -542,6 +548,13 @@ static const char *controller_to_dirname(const char *controller) {
* just cuts off the name= prefixed used for named
* hierarchies, if it is specified. */
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ if (cg_hybrid_unified())
+ controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
+ else
+ controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
+ }
+
e = startswith(controller, "name=");
if (e)
return e;
@@ -594,7 +607,7 @@ static int join_path_unified(const char *path, const char *suffix, char **fs) {
}
int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
- int unified, r;
+ int r;
assert(fs);
@@ -623,11 +636,7 @@ int cg_get_path(const char *controller, const char *path, const char *suffix, ch
if (!cg_controller_is_valid(controller))
return -EINVAL;
- unified = cg_all_unified();
- if (unified < 0)
- return unified;
-
- if (unified > 0)
+ if (cg_all_unified())
r = join_path_unified(path, suffix, fs);
else
r = join_path_legacy(controller, path, suffix, fs);
@@ -639,7 +648,6 @@ int cg_get_path(const char *controller, const char *path, const char *suffix, ch
}
static int controller_is_accessible(const char *controller) {
- int unified;
assert(controller);
@@ -651,10 +659,7 @@ static int controller_is_accessible(const char *controller) {
if (!cg_controller_is_valid(controller))
return -EINVAL;
- unified = cg_all_unified();
- if (unified < 0)
- return unified;
- if (unified > 0) {
+ if (cg_all_unified()) {
/* We don't support named hierarchies if we are using
* the unified hierarchy. */
@@ -708,7 +713,7 @@ static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct
int cg_trim(const char *controller, const char *path, bool delete_root) {
_cleanup_free_ char *fs = NULL;
- int r = 0;
+ int r = 0, q;
assert(path);
@@ -731,6 +736,12 @@ int cg_trim(const char *controller, const char *path, bool delete_root) {
return -errno;
}
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) {
+ q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
+ if (q < 0)
+ log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
+ }
+
return r;
}
@@ -754,6 +765,12 @@ int cg_create(const char *controller, const char *path) {
return -errno;
}
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) {
+ r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
+ if (r < 0)
+ log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
+ }
+
return 1;
}
@@ -791,7 +808,17 @@ int cg_attach(const char *controller, const char *path, pid_t pid) {
xsprintf(c, PID_FMT "\n", pid);
- return write_string_file(fs, c, 0);
+ r = write_string_file(fs, c, 0);
+ if (r < 0)
+ return r;
+
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) {
+ r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
+ if (r < 0)
+ log_warning_errno(r, "Failed to attach %d to compat systemd cgroup %s: %m", pid, path);
+ }
+
+ return 0;
}
int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
@@ -840,7 +867,17 @@ int cg_set_group_access(
if (r < 0)
return r;
- return chmod_and_chown(fs, mode, uid, gid);
+ r = chmod_and_chown(fs, mode, uid, gid);
+ if (r < 0)
+ return r;
+
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) {
+ r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set group access on compat systemd cgroup %s: %m", path);
+ }
+
+ return 0;
}
int cg_set_task_access(
@@ -851,7 +888,7 @@ int cg_set_task_access(
gid_t gid) {
_cleanup_free_ char *fs = NULL, *procs = NULL;
- int r, unified;
+ int r;
assert(path);
@@ -869,16 +906,18 @@ int cg_set_task_access(
if (r < 0)
return r;
- unified = cg_unified(controller);
- if (unified < 0)
- return unified;
- if (unified)
- return 0;
+ if (!cg_unified(controller)) {
+ /* Compatibility, Always keep values for "tasks" in sync with
+ * "cgroup.procs" */
+ if (cg_get_path(controller, path, "tasks", &procs) >= 0)
+ (void) chmod_and_chown(procs, mode, uid, gid);
+ }
- /* Compatibility, Always keep values for "tasks" in sync with
- * "cgroup.procs" */
- if (cg_get_path(controller, path, "tasks", &procs) >= 0)
- (void) chmod_and_chown(procs, mode, uid, gid);
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER) && cg_hybrid_unified()) {
+ r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set task access on compat systemd cgroup %s: %m", path);
+ }
return 0;
}
@@ -923,9 +962,9 @@ int cg_get_xattr(const char *controller, const char *path, const char *name, voi
int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
_cleanup_fclose_ FILE *f = NULL;
char line[LINE_MAX];
- const char *fs;
+ const char *fs, *controller_str;
size_t cs = 0;
- int unified;
+ bool unified;
assert(path);
assert(pid >= 0);
@@ -937,10 +976,14 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
controller = SYSTEMD_CGROUP_CONTROLLER;
unified = cg_unified(controller);
- if (unified < 0)
- return unified;
- if (unified == 0)
- cs = strlen(controller);
+ if (!unified) {
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
+ controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
+ else
+ controller_str = controller;
+
+ cs = strlen(controller_str);
+ }
fs = procfs_file_alloca(pid, "cgroup");
f = fopen(fs, "re");
@@ -977,7 +1020,7 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
*e = 0;
FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
- if (k == cs && memcmp(word, controller, cs) == 0) {
+ if (k == cs && memcmp(word, controller_str, cs) == 0) {
found = true;
break;
}
@@ -1001,14 +1044,11 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
int cg_install_release_agent(const char *controller, const char *agent) {
_cleanup_free_ char *fs = NULL, *contents = NULL;
const char *sc;
- int r, unified;
+ int r;
assert(agent);
- unified = cg_unified(controller);
- if (unified < 0)
- return unified;
- if (unified) /* doesn't apply to unified hierarchy */
+ if (cg_unified(controller)) /* doesn't apply to unified hierarchy */
return -EOPNOTSUPP;
r = cg_get_path(controller, NULL, "release_agent", &fs);
@@ -1054,12 +1094,9 @@ int cg_install_release_agent(const char *controller, const char *agent) {
int cg_uninstall_release_agent(const char *controller) {
_cleanup_free_ char *fs = NULL;
- int r, unified;
+ int r;
- unified = cg_unified(controller);
- if (unified < 0)
- return unified;
- if (unified) /* Doesn't apply to unified hierarchy */
+ if (cg_unified(controller)) /* Doesn't apply to unified hierarchy */
return -EOPNOTSUPP;
r = cg_get_path(controller, NULL, "notify_on_release", &fs);
@@ -1104,7 +1141,7 @@ int cg_is_empty(const char *controller, const char *path) {
}
int cg_is_empty_recursive(const char *controller, const char *path) {
- int unified, r;
+ int r;
assert(path);
@@ -1112,11 +1149,7 @@ int cg_is_empty_recursive(const char *controller, const char *path) {
if (controller && (isempty(path) || path_equal(path, "/")))
return false;
- unified = cg_unified(controller);
- if (unified < 0)
- return unified;
-
- if (unified > 0) {
+ if (cg_unified(controller)) {
_cleanup_free_ char *t = NULL;
/* On the unified hierarchy we can check empty state
@@ -1833,6 +1866,9 @@ bool cg_controller_is_valid(const char *p) {
if (!p)
return false;
+ if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
+ return true;
+
s = startswith(p, "name=");
if (s)
p = s;
@@ -1986,7 +2022,7 @@ int cg_get_keyed_attribute(const char *controller, const char *path, const char
int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
CGroupController c;
- int r, unified;
+ int r;
/* This one will create a cgroup in our private tree, but also
* duplicate it in the trees specified in mask, and remove it
@@ -1998,10 +2034,7 @@ int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path
return r;
/* If we are in the unified hierarchy, we are done now */
- unified = cg_all_unified();
- if (unified < 0)
- return unified;
- if (unified > 0)
+ if (cg_all_unified())
return 0;
/* Otherwise, do the same in the other hierarchies */
@@ -2022,16 +2055,13 @@ int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path
int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
CGroupController c;
- int r, unified;
+ int r;
r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
if (r < 0)
return r;
- unified = cg_all_unified();
- if (unified < 0)
- return unified;
- if (unified > 0)
+ if (cg_all_unified())
return 0;
for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
@@ -2072,7 +2102,7 @@ int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids,
int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
CGroupController c;
- int r = 0, unified;
+ int r = 0;
if (!path_equal(from, to)) {
r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
@@ -2080,10 +2110,7 @@ int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to
return r;
}
- unified = cg_all_unified();
- if (unified < 0)
- return unified;
- if (unified > 0)
+ if (cg_all_unified())
return r;
for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
@@ -2107,16 +2134,13 @@ int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to
int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
CGroupController c;
- int r, unified;
+ int r;
r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
if (r < 0)
return r;
- unified = cg_all_unified();
- if (unified < 0)
- return unified;
- if (unified > 0)
+ if (cg_all_unified())
return r;
for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
@@ -2133,16 +2157,13 @@ int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root)
int cg_mask_supported(CGroupMask *ret) {
CGroupMask mask = 0;
- int r, unified;
+ int r;
/* Determines the mask of supported cgroup controllers. Only
* includes controllers we can make sense of and that are
* actually accessible. */
- unified = cg_all_unified();
- if (unified < 0)
- return unified;
- if (unified > 0) {
+ if (cg_all_unified()) {
_cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
const char *c;
@@ -2262,6 +2283,20 @@ int cg_kernel_controllers(Set *controllers) {
static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
+/* The hybrid mode was initially implemented in v232 and simply mounted
+ * cgroup v2 on /sys/fs/cgroup/systemd. This unfortunately broke other
+ * tools (such as docker) which expected the v1 "name=systemd" hierarchy
+ * on /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mountnbs
+ * v2 on /sys/fs/cgroup/unified and maintains "name=systemd" hierarchy
+ * on /sys/fs/cgroup/systemd for compatibility with other tools.
+ *
+ * To keep live upgrade working, we detect and support v232 layout. When
+ * v232 layout is detected, to keep cgroup v2 process management but
+ * disable the compat dual layout, we return %true on
+ * cg_unified(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
+ */
+static thread_local bool unified_systemd_v232;
+
static int cg_update_unified(void) {
struct statfs fs;
@@ -2280,24 +2315,30 @@ static int cg_update_unified(void) {
if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC))
unified_cache = CGROUP_UNIFIED_ALL;
else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
- if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
- return -errno;
-
- unified_cache = F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC) ?
- CGROUP_UNIFIED_SYSTEMD : CGROUP_UNIFIED_NONE;
+ if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
+ F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
+ unified_cache = CGROUP_UNIFIED_SYSTEMD;
+ unified_systemd_v232 = false;
+ } else if (statfs("/sys/fs/cgroup/systemd/", &fs) == 0 &&
+ F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
+ unified_cache = CGROUP_UNIFIED_SYSTEMD;
+ unified_systemd_v232 = true;
+ } else {
+ if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
+ return -errno;
+ if (!F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
+ return -ENOMEDIUM;
+ unified_cache = CGROUP_UNIFIED_NONE;
+ }
} else
return -ENOMEDIUM;
return 0;
}
-int cg_unified(const char *controller) {
+bool cg_unified(const char *controller) {
- int r;
-
- r = cg_update_unified();
- if (r < 0)
- return r;
+ assert(cg_update_unified() >= 0);
if (streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER))
return unified_cache >= CGROUP_UNIFIED_SYSTEMD;
@@ -2305,29 +2346,35 @@ int cg_unified(const char *controller) {
return unified_cache >= CGROUP_UNIFIED_ALL;
}
-int cg_all_unified(void) {
+bool cg_all_unified(void) {
return cg_unified(NULL);
}
-void cg_unified_flush(void) {
+bool cg_hybrid_unified(void) {
+
+ assert(cg_update_unified() >= 0);
+
+ return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
+}
+
+int cg_unified_flush(void) {
unified_cache = CGROUP_UNIFIED_UNKNOWN;
+
+ return cg_update_unified();
}
int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
_cleanup_free_ char *fs = NULL;
CGroupController c;
- int r, unified;
+ int r;
assert(p);
if (supported == 0)
return 0;
- unified = cg_all_unified();
- if (unified < 0)
- return unified;
- if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
+ if (!cg_all_unified()) /* on the legacy hiearchy there's no joining of controllers defined */
return 0;
r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
@@ -2359,63 +2406,69 @@ int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
bool cg_is_unified_wanted(void) {
static thread_local int wanted = -1;
- int r, unified;
+ int r;
bool b;
+ const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
- /* If the hierarchy is already mounted, then follow whatever
- * was chosen for it. */
- unified = cg_all_unified();
- if (unified >= 0)
- return unified;
-
- /* Otherwise, let's see what the kernel command line has to
- * say. Since checking that is expensive, let's cache the
- * result. */
+ /* If we have a cached value, return that. */
if (wanted >= 0)
return wanted;
+ /* If the hierarchy is already mounted, then follow whatever
+ * was chosen for it. */
+ if (cg_unified_flush() >= 0)
+ return (wanted = cg_all_unified());
+
+ /* Otherwise, let's see what the kernel command line has to say.
+ * Since checking is expensive, cache a non-error result. */
r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
- if (r < 0)
- return false;
- return (wanted = r > 0 ? b : false);
+ return (wanted = r > 0 ? b : is_default);
}
bool cg_is_legacy_wanted(void) {
- return !cg_is_unified_wanted();
+ static thread_local int wanted = -1;
+
+ /* If we have a cached value, return that. */
+ if (wanted >= 0)
+ return wanted;
+
+ /* Check if we have cgroups2 already mounted. */
+ if (cg_unified_flush() >= 0 &&
+ unified_cache == CGROUP_UNIFIED_ALL)
+ return (wanted = false);
+
+ /* Otherwise, assume that at least partial legacy is wanted,
+ * since cgroups2 should already be mounted at this point. */
+ return (wanted = true);
}
-bool cg_is_unified_systemd_controller_wanted(void) {
+bool cg_is_hybrid_wanted(void) {
static thread_local int wanted = -1;
- int r, unified;
+ int r;
bool b;
+ const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
+ /* We default to true if the default is "hybrid", obviously,
+ * but also when the default is "unified", because if we get
+ * called, it means that unified hierarchy was not mounted. */
- /* If the unified hierarchy is requested in full, no need to
- * bother with this. */
- if (cg_is_unified_wanted())
- return 0;
+ /* If we have a cached value, return that. */
+ if (wanted >= 0)
+ return wanted;
/* If the hierarchy is already mounted, then follow whatever
* was chosen for it. */
- unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
- if (unified >= 0)
- return unified;
-
- /* Otherwise, let's see what the kernel command line has to
- * say. Since checking that is expensive, let's cache the
- * result. */
- if (wanted >= 0)
- return wanted;
+ if (cg_unified_flush() >= 0 &&
+ unified_cache == CGROUP_UNIFIED_ALL)
+ return (wanted = false);
+ /* Otherwise, let's see what the kernel command line has to say.
+ * Since checking is expensive, cache a non-error result. */
r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
- if (r < 0)
- return false;
-
- return (wanted = r > 0 ? b : false);
-}
-bool cg_is_legacy_systemd_controller_wanted(void) {
- return cg_is_legacy_wanted() && !cg_is_unified_systemd_controller_wanted();
+ /* The meaning of the kernel option is reversed wrt. to the return value
+ * of this function, hence the negation. */
+ return (wanted = r > 0 ? !b : is_default);
}
int cg_weight_parse(const char *s, uint64_t *ret) {
diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h
index 0aa27c4cd7..3d14ec3d9d 100644
--- a/src/basic/cgroup-util.h
+++ b/src/basic/cgroup-util.h
@@ -240,14 +240,14 @@ int cg_kernel_controllers(Set *controllers);
bool cg_ns_supported(void);
-int cg_all_unified(void);
-int cg_unified(const char *controller);
-void cg_unified_flush(void);
+bool cg_all_unified(void);
+bool cg_hybrid_unified(void);
+bool cg_unified(const char *controller);
+int cg_unified_flush(void);
bool cg_is_unified_wanted(void);
bool cg_is_legacy_wanted(void);
-bool cg_is_unified_systemd_controller_wanted(void);
-bool cg_is_legacy_systemd_controller_wanted(void);
+bool cg_is_hybrid_wanted(void);
const char* cgroup_controller_to_string(CGroupController c) _const_;
CGroupController cgroup_controller_from_string(const char *s) _pure_;
diff --git a/src/basic/def.h b/src/basic/def.h
index 10d776ec8e..200ea973c1 100644
--- a/src/basic/def.h
+++ b/src/basic/def.h
@@ -36,7 +36,9 @@
/* The default value for the net.unix.max_dgram_qlen sysctl */
#define DEFAULT_UNIX_MAX_DGRAM_QLEN 512UL
-#define SYSTEMD_CGROUP_CONTROLLER "name=systemd"
+#define SYSTEMD_CGROUP_CONTROLLER_LEGACY "name=systemd"
+#define SYSTEMD_CGROUP_CONTROLLER_HYBRID "name=unified"
+#define SYSTEMD_CGROUP_CONTROLLER "_systemd"
#define SIGNALS_CRASH_HANDLER SIGSEGV,SIGILL,SIGFPE,SIGBUS,SIGQUIT,SIGABRT
#define SIGNALS_IGNORE SIGPIPE
diff --git a/src/cgls/cgls.c b/src/cgls/cgls.c
index 5574c14555..40db82f9ae 100644
--- a/src/cgls/cgls.c
+++ b/src/cgls/cgls.c
@@ -158,7 +158,7 @@ static int parse_argv(int argc, char *argv[]) {
static void show_cg_info(const char *controller, const char *path) {
- if (cg_all_unified() <= 0 && controller && !streq(controller, SYSTEMD_CGROUP_CONTROLLER))
+ if (!cg_all_unified() && controller && !streq(controller, SYSTEMD_CGROUP_CONTROLLER))
printf("Controller %s; ", controller);
printf("Control group %s:\n", isempty(path) ? "/" : path);
diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c
index 50ac6a58b0..45c050c9c3 100644
--- a/src/cgtop/cgtop.c
+++ b/src/cgtop/cgtop.c
@@ -214,7 +214,7 @@ static int process(
uint64_t new_usage;
nsec_t timestamp;
- if (cg_all_unified() > 0) {
+ if (cg_all_unified()) {
const char *keys[] = { "usage_usec", NULL };
_cleanup_free_ char *val = NULL;
@@ -274,7 +274,7 @@ static int process(
} else if (streq(controller, "memory")) {
_cleanup_free_ char *p = NULL, *v = NULL;
- if (cg_all_unified() <= 0)
+ if (!cg_all_unified())
r = cg_get_path(controller, path, "memory.usage_in_bytes", &p);
else
r = cg_get_path(controller, path, "memory.current", &p);
@@ -294,15 +294,14 @@ static int process(
if (g->memory > 0)
g->memory_valid = true;
- } else if ((streq(controller, "io") && cg_all_unified() > 0) ||
- (streq(controller, "blkio") && cg_all_unified() <= 0)) {
+ } else if ((streq(controller, "io") && cg_all_unified()) ||
+ (streq(controller, "blkio") && !cg_all_unified())) {
_cleanup_fclose_ FILE *f = NULL;
_cleanup_free_ char *p = NULL;
- bool unified = cg_all_unified() > 0;
uint64_t wr = 0, rd = 0;
nsec_t timestamp;
- r = cg_get_path(controller, path, unified ? "io.stat" : "blkio.io_service_bytes", &p);
+ r = cg_get_path(controller, path, cg_all_unified() ? "io.stat" : "blkio.io_service_bytes", &p);
if (r < 0)
return r;
@@ -325,7 +324,7 @@ static int process(
l += strcspn(l, WHITESPACE);
l += strspn(l, WHITESPACE);
- if (unified) {
+ if (cg_all_unified()) {
while (!isempty(l)) {
if (sscanf(l, "rbytes=%" SCNu64, &k))
rd += k;
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 5789e2aa82..fbb711782e 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -678,7 +678,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
bool has_weight = cgroup_context_has_cpu_weight(c);
bool has_shares = cgroup_context_has_cpu_shares(c);
- if (cg_all_unified() > 0) {
+ if (cg_all_unified()) {
uint64_t weight;
if (has_weight)
@@ -858,7 +858,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
}
if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
- if (cg_all_unified() > 0) {
+ if (cg_all_unified()) {
uint64_t max;
uint64_t swap_max = CGROUP_LIMIT_MAX;
@@ -1033,7 +1033,7 @@ CGroupMask unit_get_own_mask(Unit *u) {
e = unit_get_exec_context(u);
if (!e ||
exec_context_maintains_privileges(e) ||
- cg_all_unified() > 0)
+ cg_all_unified())
return _CGROUP_MASK_ALL;
}
@@ -1260,10 +1260,7 @@ int unit_watch_cgroup(Unit *u) {
return 0;
/* Only applies to the unified hierarchy */
- r = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
- if (r < 0)
- return log_unit_error_errno(u, r, "Failed detect whether the unified hierarchy is used: %m");
- if (r == 0)
+ if (!cg_unified(SYSTEMD_CGROUP_CONTROLLER))
return 0;
/* Don't watch the root slice, it's pointless. */
@@ -1683,7 +1680,7 @@ int unit_watch_all_pids(Unit *u) {
if (!u->cgroup_path)
return -ENOENT;
- if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0) /* On unified we can use proper notifications */
+ if (cg_unified(SYSTEMD_CGROUP_CONTROLLER)) /* On unified we can use proper notifications */
return 0;
return unit_watch_pids_in_path(u, u->cgroup_path);
@@ -1756,7 +1753,7 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents,
int manager_setup_cgroup(Manager *m) {
_cleanup_free_ char *path = NULL;
CGroupController c;
- int r, all_unified, systemd_unified;
+ int r;
char *e;
assert(m);
@@ -1793,25 +1790,22 @@ int manager_setup_cgroup(Manager *m) {
if (r < 0)
return log_error_errno(r, "Cannot find cgroup mount point: %m");
- all_unified = cg_all_unified();
- systemd_unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
-
- if (all_unified < 0 || systemd_unified < 0)
- return log_error_errno(all_unified < 0 ? all_unified : systemd_unified,
- "Couldn't determine if we are running in the unified hierarchy: %m");
+ r = cg_unified_flush();
+ if (r < 0)
+ return log_error_errno(r, "Couldn't determine if we are running in the unified hierarchy: %m");
- if (all_unified > 0)
+ if (cg_all_unified())
log_debug("Unified cgroup hierarchy is located at %s.", path);
- else if (systemd_unified > 0)
+ else if (cg_unified(SYSTEMD_CGROUP_CONTROLLER))
log_debug("Unified cgroup hierarchy is located at %s. Controllers are on legacy hierarchies.", path);
else
- log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
+ log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER_LEGACY ". File system hierarchy is at %s.", path);
if (!m->test_run) {
const char *scope_path;
/* 3. Install agent */
- if (systemd_unified) {
+ if (cg_unified(SYSTEMD_CGROUP_CONTROLLER)) {
/* In the unified hierarchy we can get
* cgroup empty notifications via inotify. */
@@ -1871,7 +1865,7 @@ int manager_setup_cgroup(Manager *m) {
return log_error_errno(errno, "Failed to open pin file: %m");
/* 6. Always enable hierarchical support if it exists... */
- if (!all_unified)
+ if (!cg_all_unified())
(void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
}
@@ -1997,7 +1991,7 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) {
if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0)
return -ENODATA;
- if (cg_all_unified() <= 0)
+ if (!cg_all_unified())
r = cg_get_attribute("memory", u->cgroup_path, "memory.usage_in_bytes", &v);
else
r = cg_get_attribute("memory", u->cgroup_path, "memory.current", &v);
@@ -2042,7 +2036,7 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
if (!u->cgroup_path)
return -ENODATA;
- if (cg_all_unified() > 0) {
+ if (cg_all_unified()) {
const char *keys[] = { "usage_usec", NULL };
_cleanup_free_ char *val = NULL;
uint64_t us;
diff --git a/src/core/manager.c b/src/core/manager.c
index d3f6efc91c..ea80585329 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -776,7 +776,7 @@ static int manager_setup_cgroups_agent(Manager *m) {
if (!MANAGER_IS_SYSTEM(m))
return 0;
- if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0) /* We don't need this anymore on the unified hierarchy */
+ if (cg_unified(SYSTEMD_CGROUP_CONTROLLER)) /* We don't need this anymore on the unified hierarchy */
return 0;
if (m->cgroups_agent_fd < 0) {
diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c
index 9c2bf3a0ef..7295efbf31 100644
--- a/src/core/mount-setup.c
+++ b/src/core/mount-setup.c
@@ -96,15 +96,15 @@ static const MountPoint mount_table[] = {
{ "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
NULL, MNT_FATAL|MNT_IN_CONTAINER },
{ "cgroup", "/sys/fs/cgroup", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
- cg_is_unified_wanted, MNT_FATAL|MNT_IN_CONTAINER },
+ cg_is_unified_wanted, MNT_IN_CONTAINER },
{ "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
- { "cgroup", "/sys/fs/cgroup/systemd", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
- cg_is_unified_systemd_controller_wanted, MNT_IN_CONTAINER },
+ { "cgroup", "/sys/fs/cgroup/unified", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ cg_is_hybrid_wanted, MNT_IN_CONTAINER },
{ "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
- cg_is_legacy_systemd_controller_wanted, MNT_IN_CONTAINER },
+ cg_is_legacy_wanted, MNT_IN_CONTAINER },
{ "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
- cg_is_legacy_systemd_controller_wanted, MNT_FATAL|MNT_IN_CONTAINER },
+ cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
{ "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
NULL, MNT_NONE },
#ifdef ENABLE_EFI
diff --git a/src/core/scope.c b/src/core/scope.c
index 9540fb67d9..5e068a76d1 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -475,7 +475,7 @@ static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
/* If the PID set is empty now, then let's finish this off
(On unified we use proper notifications) */
- if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) <= 0 && set_isempty(u->pids))
+ if (!cg_unified(SYSTEMD_CGROUP_CONTROLLER) && set_isempty(u->pids))
scope_notify_cgroup_empty_event(u);
}
diff --git a/src/core/service.c b/src/core/service.c
index 54074ff7bc..0c2eb18f38 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -2938,7 +2938,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
/* If the PID set is empty now, then let's finish this off
(On unified we use proper notifications) */
- if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) <= 0 && set_isempty(u->pids))
+ if (!cg_unified(SYSTEMD_CGROUP_CONTROLLER) && set_isempty(u->pids))
service_notify_cgroup_empty_event(u);
}
diff --git a/src/core/unit.c b/src/core/unit.c
index bb05d2abfb..685df6f00d 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -3897,8 +3897,8 @@ int unit_kill_context(
* there we get proper events. Hence rely on
* them.*/
- if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0 ||
- (detect_container() == 0 && !unit_cgroup_delegate(u)))
+ if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) ||
+ (detect_container() == 0 && !unit_cgroup_delegate(u)))
wait_for_exit = true;
if (send_sighup) {
diff --git a/src/libsystemd/sd-bus/test-bus-creds.c b/src/libsystemd/sd-bus/test-bus-creds.c
index 6fdcfa4128..64bd76a576 100644
--- a/src/libsystemd/sd-bus/test-bus-creds.c
+++ b/src/libsystemd/sd-bus/test-bus-creds.c
@@ -31,7 +31,7 @@ int main(int argc, char *argv[]) {
log_parse_environment();
log_open();
- if (cg_all_unified() == -ENOMEDIUM) {
+ if (cg_unified_flush() == -ENOMEDIUM) {
log_info("Skipping test: /sys/fs/cgroup/ not available");
return EXIT_TEST_SKIP;
}
diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c
index 5274767b96..4678a7e349 100644
--- a/src/nspawn/nspawn-cgroup.c
+++ b/src/nspawn/nspawn-cgroup.c
@@ -78,13 +78,9 @@ int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t arg_uid_shift)
char tree[] = "/tmp/unifiedXXXXXX", pid_string[DECIMAL_STR_MAX(pid) + 1];
bool undo_mount = false;
const char *fn;
- int unified, r;
-
- unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
- if (unified < 0)
- return log_error_errno(unified, "Failed to determine whether the unified hierarchy is used: %m");
+ int r;
- if ((unified > 0) == (unified_requested >= CGROUP_UNIFIED_SYSTEMD))
+ if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) == (unified_requested >= CGROUP_UNIFIED_SYSTEMD))
return 0;
/* When the host uses the legacy cgroup setup, but the
@@ -100,7 +96,7 @@ int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t arg_uid_shift)
if (!mkdtemp(tree))
return log_error_errno(errno, "Failed to generate temporary mount point for unified hierarchy: %m");
- if (unified)
+ if (cg_unified(SYSTEMD_CGROUP_CONTROLLER))
r = mount_verbose(LOG_ERR, "cgroup", tree, "cgroup",
MS_NOSUID|MS_NOEXEC|MS_NODEV, "none,name=systemd,xattr");
else
@@ -142,7 +138,7 @@ finish:
int create_subcgroup(pid_t pid, CGroupUnified unified_requested) {
_cleanup_free_ char *cgroup = NULL;
const char *child;
- int unified, r;
+ int r;
CGroupMask supported;
/* In the unified hierarchy inner nodes may only contain
@@ -154,10 +150,7 @@ int create_subcgroup(pid_t pid, CGroupUnified unified_requested) {
if (unified_requested == CGROUP_UNIFIED_NONE)
return 0;
- unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
- if (unified < 0)
- return log_error_errno(unified, "Failed to determine whether the unified hierarchy is used: %m");
- if (unified == 0)
+ if (!cg_unified(SYSTEMD_CGROUP_CONTROLLER))
return 0;
r = cg_mask_supported(&supported);
diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c
index 4b2838b752..ed4f1f9db8 100644
--- a/src/nspawn/nspawn-mount.c
+++ b/src/nspawn/nspawn-mount.c
@@ -890,7 +890,7 @@ static int get_controllers(Set *subsystems) {
*e = 0;
- if (STR_IN_SET(l, "", "name=systemd"))
+ if (STR_IN_SET(l, "", "name=systemd", "name=unified"))
continue;
p = strdup(l);
@@ -909,7 +909,6 @@ static int mount_legacy_cgroup_hierarchy(
const char *dest,
const char *controller,
const char *hierarchy,
- CGroupUnified unified_requested,
bool read_only) {
const char *to, *fstype, *opts;
@@ -927,14 +926,12 @@ static int mount_legacy_cgroup_hierarchy(
/* The superblock mount options of the mount point need to be
* identical to the hosts', and hence writable... */
- if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
- if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) {
- fstype = "cgroup2";
- opts = NULL;
- } else {
- fstype = "cgroup";
- opts = "none,name=systemd,xattr";
- }
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_HYBRID)) {
+ fstype = "cgroup2";
+ opts = NULL;
+ } else if (streq(controller, SYSTEMD_CGROUP_CONTROLLER_LEGACY)) {
+ fstype = "cgroup";
+ opts = "none,name=systemd,xattr";
} else {
fstype = "cgroup";
opts = controller;
@@ -994,7 +991,7 @@ static int mount_legacy_cgns_supported(
return r;
}
- if (cg_all_unified() > 0)
+ if (cg_all_unified())
goto skip_controllers;
controllers = set_new(&string_hash_ops);
@@ -1012,7 +1009,7 @@ static int mount_legacy_cgns_supported(
if (!controller)
break;
- r = mount_legacy_cgroup_hierarchy("", controller, controller, unified_requested, !userns);
+ r = mount_legacy_cgroup_hierarchy("", controller, controller, !userns);
if (r < 0)
return r;
@@ -1046,7 +1043,13 @@ static int mount_legacy_cgns_supported(
}
skip_controllers:
- r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER, "systemd", unified_requested, false);
+ if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) {
+ r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false);
+ if (r < 0)
+ return r;
+ }
+
+ r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false);
if (r < 0)
return r;
@@ -1091,7 +1094,7 @@ static int mount_legacy_cgns_unsupported(
return r;
}
- if (cg_all_unified() > 0)
+ if (cg_all_unified())
goto skip_controllers;
controllers = set_new(&string_hash_ops);
@@ -1117,7 +1120,7 @@ static int mount_legacy_cgns_unsupported(
if (r == -EINVAL) {
/* Not a symbolic link, but directly a single cgroup hierarchy */
- r = mount_legacy_cgroup_hierarchy(dest, controller, controller, unified_requested, true);
+ r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);
if (r < 0)
return r;
@@ -1137,7 +1140,7 @@ static int mount_legacy_cgns_unsupported(
continue;
}
- r = mount_legacy_cgroup_hierarchy(dest, combined, combined, unified_requested, true);
+ r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);
if (r < 0)
return r;
@@ -1150,7 +1153,13 @@ static int mount_legacy_cgns_unsupported(
}
skip_controllers:
- r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER, "systemd", unified_requested, false);
+ if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) {
+ r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false);
+ if (r < 0)
+ return r;
+ }
+
+ r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false);
if (r < 0)
return r;
@@ -1202,12 +1211,25 @@ int mount_cgroups(
return mount_legacy_cgns_unsupported(dest, unified_requested, userns, uid_shift, uid_range, selinux_apifs_context);
}
+static int mount_systemd_cgroup_writable_one(const char *systemd_own, const char *systemd_root)
+{
+ int r;
+
+ /* Make our own cgroup a (writable) bind mount */
+ r = mount_verbose(LOG_ERR, systemd_own, systemd_own, NULL, MS_BIND, NULL);
+ if (r < 0)
+ return r;
+
+ /* And then remount the systemd cgroup root read-only */
+ return mount_verbose(LOG_ERR, NULL, systemd_root, NULL,
+ MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL);
+}
+
int mount_systemd_cgroup_writable(
const char *dest,
CGroupUnified unified_requested) {
_cleanup_free_ char *own_cgroup_path = NULL;
- const char *systemd_root, *systemd_own;
int r;
assert(dest);
@@ -1220,22 +1242,19 @@ int mount_systemd_cgroup_writable(
if (path_equal(own_cgroup_path, "/"))
return 0;
- if (unified_requested >= CGROUP_UNIFIED_ALL) {
- systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path);
- systemd_root = prefix_roota(dest, "/sys/fs/cgroup");
- } else {
- systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path);
- systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd");
- }
+ if (unified_requested >= CGROUP_UNIFIED_ALL)
+ return mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup", own_cgroup_path),
+ prefix_roota(dest, "/sys/fs/cgroup"));
- /* Make our own cgroup a (writable) bind mount */
- r = mount_verbose(LOG_ERR, systemd_own, systemd_own, NULL, MS_BIND, NULL);
- if (r < 0)
- return r;
+ if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) {
+ r = mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup/unified", own_cgroup_path),
+ prefix_roota(dest, "/sys/fs/cgroup/unified"));
+ if (r < 0)
+ return r;
+ }
- /* And then remount the systemd cgroup root read-only */
- return mount_verbose(LOG_ERR, NULL, systemd_root, NULL,
- MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL);
+ return mount_systemd_cgroup_writable_one(strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path),
+ prefix_roota(dest, "/sys/fs/cgroup/systemd"));
}
int setup_volatile_state(
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 751f26272b..42355115ff 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -316,7 +316,7 @@ static int custom_mount_check_all(void) {
static int detect_unified_cgroup_hierarchy(const char *directory) {
const char *e;
- int r, all_unified, systemd_unified;
+ int r;
/* Allow the user to control whether the unified hierarchy is used */
e = getenv("UNIFIED_CGROUP_HIERARCHY");
@@ -332,15 +332,8 @@ static int detect_unified_cgroup_hierarchy(const char *directory) {
return 0;
}
- all_unified = cg_all_unified();
- systemd_unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
-
- if (all_unified < 0 || systemd_unified < 0)
- return log_error_errno(all_unified < 0 ? all_unified : systemd_unified,
- "Failed to determine whether the unified cgroups hierarchy is used: %m");
-
/* Otherwise inherit the default from the host system */
- if (all_unified > 0) {
+ if (cg_all_unified()) {
/* Unified cgroup hierarchy support was added in 230. Unfortunately the detection
* routine only detects 231, so we'll have a false negative here for 230. */
r = systemd_installation_has_version(directory, 230);
@@ -350,9 +343,9 @@ static int detect_unified_cgroup_hierarchy(const char *directory) {
arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_ALL;
else
arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE;
- } else if (systemd_unified > 0) {
- /* Mixed cgroup hierarchy support was added in 232 */
- r = systemd_installation_has_version(directory, 232);
+ } else if (cg_unified(SYSTEMD_CGROUP_CONTROLLER)) {
+ /* Mixed cgroup hierarchy support was added in 233 */
+ r = systemd_installation_has_version(directory, 233);
if (r < 0)
return log_error_errno(r, "Failed to determine systemd version in container: %m");
if (r > 0)
@@ -2168,8 +2161,6 @@ static int inner_child(
assert(directory);
assert(kmsg_socket >= 0);
- cg_unified_flush();
-
if (arg_userns_mode != USER_NAMESPACE_NO) {
/* Tell the parent, that it now can write the UID map. */
(void) barrier_place(barrier); /* #1 */
@@ -2440,8 +2431,6 @@ static int outer_child(
assert(notify_socket >= 0);
assert(kmsg_socket >= 0);
- cg_unified_flush();
-
if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
return log_error_errno(errno, "PR_SET_PDEATHSIG failed: %m");
@@ -2486,10 +2475,6 @@ static int outer_child(
if (r < 0)
return r;
- r = detect_unified_cgroup_hierarchy(directory);
- if (r < 0)
- return r;
-
if (arg_userns_mode != USER_NAMESPACE_NO) {
/* Let the parent know which UID shift we read from the image */
l = send(uid_shift_socket, &arg_uid_shift, sizeof(arg_uid_shift), MSG_NOSIGNAL);
@@ -3542,6 +3527,10 @@ int main(int argc, char *argv[]) {
log_parse_environment();
log_open();
+ r = cg_unified_flush();
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether the unified cgroups hierarchy is used: %m");
+
/* Make sure rename_process() in the stub init process can work */
saved_argv = argv;
saved_argc = argc;
@@ -3810,6 +3799,10 @@ int main(int argc, char *argv[]) {
if (r < 0)
goto finish;
+ r = detect_unified_cgroup_hierarchy(arg_directory);
+ if (r < 0)
+ goto finish;
+
interactive =
isatty(STDIN_FILENO) > 0 &&
isatty(STDOUT_FILENO) > 0;
diff --git a/src/test/test-cgroup-util.c b/src/test/test-cgroup-util.c
index c60fb631fa..30cd463722 100644
--- a/src/test/test-cgroup-util.c
+++ b/src/test/test-cgroup-util.c
@@ -18,11 +18,13 @@
***/
#include "alloc-util.h"
+#include "build.h"
#include "cgroup-util.h"
#include "dirent-util.h"
#include "fd-util.h"
#include "format-util.h"
#include "parse-util.h"
+#include "proc-cmdline.h"
#include "process-util.h"
#include "stat-util.h"
#include "string-util.h"
@@ -332,7 +334,49 @@ static void test_fd_is_cgroup_fs(void) {
fd = safe_close(fd);
}
+static void test_is_wanted_print(bool header) {
+ _cleanup_free_ char *cmdline = NULL;
+
+ log_info("-- %s --", __func__);
+ assert_se(proc_cmdline(&cmdline) >= 0);
+ log_info("cmdline: %s", cmdline);
+ if (header) {
+
+ log_info(_CGROUP_HIEARCHY_);
+ (void) system("findmnt -n /sys/fs/cgroup");
+ }
+
+ log_info("is_unified_wanted() → %s", yes_no(cg_is_unified_wanted()));
+ log_info("is_hybrid_wanted() → %s", yes_no(cg_is_hybrid_wanted()));
+ log_info("is_legacy_wanted() → %s", yes_no(cg_is_legacy_wanted()));
+ log_info(" ");
+}
+
+static void test_is_wanted(void) {
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "systemd.unified_cgroup_hierarchy", 1) >= 0);
+ test_is_wanted_print(false);
+
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "systemd.unified_cgroup_hierarchy=0", 1) >= 0);
+ test_is_wanted_print(false);
+
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "systemd.unified_cgroup_hierarchy=0 "
+ "systemd.legacy_systemd_cgroup_controller", 1) >= 0);
+ test_is_wanted_print(false);
+
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "systemd.unified_cgroup_hierarchy=0 "
+ "systemd.legacy_systemd_cgroup_controller=0", 1) >= 0);
+ test_is_wanted_print(false);
+}
+
int main(void) {
+ log_set_max_level(LOG_DEBUG);
+ log_parse_environment();
+ log_open();
+
test_path_decode_unit();
test_path_get_unit();
test_path_get_user_unit();
@@ -349,6 +393,9 @@ int main(void) {
TEST_REQ_RUNNING_SYSTEMD(test_mask_supported());
TEST_REQ_RUNNING_SYSTEMD(test_is_cgroup_fs());
TEST_REQ_RUNNING_SYSTEMD(test_fd_is_cgroup_fs());
+ test_is_wanted_print(true);
+ test_is_wanted_print(false); /* run twice to test caching */
+ test_is_wanted();
return 0;
}