summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTejun Heo <htejun@fb.com>2016-08-07 09:45:39 -0400
committerTejun Heo <tj@kernel.org>2016-08-07 09:45:39 -0400
commit66ebf6c0a1005f90099e25ece5630551ad97a3c3 (patch)
treed66af9b366845249937bf6b16025552e97ff25be
parentceab9e2deee29dfda213a30c533c7dcd59c6bcad (diff)
core: add cgroup CPU controller support on the unified hierarchy
Unfortunately, due to the disagreements in the kernel development community, CPU controller cgroup v2 support has not been merged and enabling it requires applying two small out-of-tree kernel patches. The situation is explained in the following documentation. https://git.kernel.org/cgit/linux/kernel/git/tj/cgroup.git/tree/Documentation/cgroup-v2-cpu.txt?h=cgroup-v2-cpu While it isn't clear what will happen with CPU controller cgroup v2 support, there are critical features which are possible only on cgroup v2 such as buffered write control making cgroup v2 essential for a lot of workloads. This commit implements systemd CPU controller support on the unified hierarchy so that users who choose to deploy CPU controller cgroup v2 support can easily take advantage of it. On the unified hierarchy, "cpu.weight" knob replaces "cpu.shares" and "cpu.max" replaces "cpu.cfs_period_us" and "cpu.cfs_quota_us". [Startup]CPUWeight config options are added with the usual compat translation. CPU quota settings remain unchanged and apply to both legacy and unified hierarchies. v2: - Error in man page corrected. - CPU config application in cgroup_context_apply() refactored. - CPU accounting now works on unified hierarchy.
-rw-r--r--man/systemd.resource-control.xml98
-rw-r--r--src/basic/cgroup-util.c47
-rw-r--r--src/basic/cgroup-util.h1
-rw-r--r--src/cgtop/cgtop.c50
-rw-r--r--src/core/cgroup.c203
-rw-r--r--src/core/cgroup.h5
-rw-r--r--src/core/dbus-cgroup.c46
-rw-r--r--src/core/load-fragment-gperf.gperf.m42
-rw-r--r--src/core/load-fragment.c29
-rw-r--r--src/core/load-fragment.h1
-rw-r--r--src/core/unit.c6
-rw-r--r--src/core/unit.h4
-rw-r--r--src/shared/bus-unit-util.c11
13 files changed, 413 insertions, 90 deletions
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml
index 0e98ca78b8..84dbfa2ff3 100644
--- a/man/systemd.resource-control.xml
+++ b/man/systemd.resource-control.xml
@@ -106,13 +106,21 @@
<para>
<variablelist>
+
<varlistentry>
- <term><option>IO</option></term>
+ <term><option>CPU</option></term>
<listitem>
- <para><varname>IO</varname> prefixed settings are superset of and replace <varname>BlockIO</varname>
- prefixed ones. On unified hierarchy, IO resource control also applies to buffered writes.</para>
+ <para>Due to the lack of consensus in the kernel community, the CPU controller support on the unified
+ cgroup hierarchy requires out-of-tree kernel patches. See <ulink
+ url="https://git.kernel.org/cgit/linux/kernel/git/tj/cgroup.git/tree/Documentation/cgroup-v2-cpu.txt?h=cgroup-v2-cpu">cgroup-v2-cpu.txt</ulink>.</para>
+
+ <para><varname>CPUWeight=</varname> and <varname>StartupCPUWeight=</varname> replace
+ <varname>CPUShares=</varname> and <varname>StartupCPUShares=</varname>, respectively.</para>
+
+ <para>The <literal>cpuacct</literal> controller does not exist separately on the unified hierarchy.</para>
</listitem>
</varlistentry>
+
<varlistentry>
<term><option>Memory</option></term>
<listitem>
@@ -120,6 +128,15 @@
and <varname>MemoryHigh=</varname> are effective only on unified hierarchy.</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><option>IO</option></term>
+ <listitem>
+ <para><varname>IO</varname> prefixed settings are superset of and replace <varname>BlockIO</varname>
+ prefixed ones. On unified hierarchy, IO resource control also applies to buffered writes.</para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</para>
@@ -160,30 +177,49 @@
</varlistentry>
<varlistentry>
+ <term><varname>CPUWeight=<replaceable>weight</replaceable></varname></term>
+ <term><varname>StartupCPUWeight=<replaceable>weight</replaceable></varname></term>
+
+ <listitem>
+ <para>Assign the specified CPU time weight to the processes executed, if the unified control group hierarchy
+ is used on the system. These options take an integer value and control the <literal>cpu.weight</literal>
+ control group attribute. The allowed range is 1 to 10000. Defaults to 100. For details about this control
+ group attribute, see <ulink
+ url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink> and <ulink
+ url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.
+ The available CPU time is split up among all units within one slice relative to their CPU time weight.</para>
+
+ <para>While <varname>StartupCPUWeight=</varname> only applies to the startup phase of the system,
+ <varname>CPUWeight=</varname> applies to normal runtime of the system, and if the former is not set also to
+ the startup phase. Using <varname>StartupCPUWeight=</varname> allows prioritizing specific services at
+ boot-up differently than during normal runtime.</para>
+
+ <para>Implies <literal>CPUAccounting=true</literal>.</para>
+
+ <para>These settings are supported only if the unified control group hierarchy is used.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>CPUShares=<replaceable>weight</replaceable></varname></term>
<term><varname>StartupCPUShares=<replaceable>weight</replaceable></varname></term>
<listitem>
- <para>Assign the specified CPU time share weight to the
- processes executed. These options take an integer value and
- control the <literal>cpu.shares</literal> control group
- attribute. The allowed range is 2 to 262144. Defaults to
- 1024. For details about this control group attribute, see
- <ulink
+ <para>Assign the specified CPU time share weight to the processes executed. These options take an integer
+ value and control the <literal>cpu.shares</literal> control group attribute. The allowed range is 2 to
+ 262144. Defaults to 1024. For details about this control group attribute, see <ulink
url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.
- The available CPU time is split up among all units within
- one slice relative to their CPU time share weight.</para>
+ The available CPU time is split up among all units within one slice relative to their CPU time share
+ weight.</para>
- <para>While <varname>StartupCPUShares=</varname> only
- applies to the startup phase of the system,
- <varname>CPUShares=</varname> applies to normal runtime of
- the system, and if the former is not set also to the startup
- phase. Using <varname>StartupCPUShares=</varname> allows
- prioritizing specific services at boot-up differently than
- during normal runtime.</para>
+ <para>While <varname>StartupCPUShares=</varname> only applies to the startup phase of the system,
+ <varname>CPUShares=</varname> applies to normal runtime of the system, and if the former is not set also to
+ the startup phase. Using <varname>StartupCPUShares=</varname> allows prioritizing specific services at
+ boot-up differently than during normal runtime.</para>
- <para>These options imply
- <literal>CPUAccounting=true</literal>.</para>
+ <para>Implies <literal>CPUAccounting=true</literal>.</para>
+
+ <para>These settings are supported only if the legacy control group hierarchy is used.</para>
</listitem>
</varlistentry>
@@ -191,22 +227,20 @@
<term><varname>CPUQuota=</varname></term>
<listitem>
- <para>Assign the specified CPU time quota to the processes
- executed. Takes a percentage value, suffixed with "%". The
- percentage specifies how much CPU time the unit shall get at
- maximum, relative to the total CPU time available on one
- CPU. Use values &gt; 100% for allotting CPU time on more than
- one CPU. This controls the
- <literal>cpu.cfs_quota_us</literal> control group
- attribute. For details about this control group attribute,
- see <ulink
+ <para>Assign the specified CPU time quota to the processes executed. Takes a percentage value, suffixed with
+ "%". The percentage specifies how much CPU time the unit shall get at maximum, relative to the total CPU time
+ available on one CPU. Use values &gt; 100% for allotting CPU time on more than one CPU. This controls the
+ <literal>cpu.max</literal> attribute on the unified control group hierarchy and
+ <literal>cpu.cfs_quota_us</literal> on legacy. For details about these control group attributes, see <ulink
+ url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink> and <ulink
url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.</para>
- <para>Example: <varname>CPUQuota=20%</varname> ensures that
- the executed processes will never get more than 20% CPU time
- on one CPU.</para>
+ <para>Example: <varname>CPUQuota=20%</varname> ensures that the executed processes will never get more than
+ 20% CPU time on one CPU.</para>
<para>Implies <literal>CPUAccounting=true</literal>.</para>
+
+ <para>This setting is supported on both unified and legacy control group hierarchies.</para>
</listitem>
</varlistentry>
diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c
index 302b958d0d..25ef8a5c76 100644
--- a/src/basic/cgroup-util.c
+++ b/src/basic/cgroup-util.c
@@ -1905,6 +1905,49 @@ int cg_get_attribute(const char *controller, const char *path, const char *attri
return read_one_line_file(p, ret);
}
+int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) {
+ _cleanup_free_ char *filename = NULL, *content = NULL;
+ char *line, *p;
+ int i, r;
+
+ for (i = 0; keys[i]; i++)
+ values[i] = NULL;
+
+ r = cg_get_path(controller, path, attribute, &filename);
+ if (r < 0)
+ return r;
+
+ r = read_full_file(filename, &content, NULL);
+ if (r < 0)
+ return r;
+
+ p = content;
+ while ((line = strsep(&p, "\n"))) {
+ char *key;
+
+ key = strsep(&line, " ");
+
+ for (i = 0; keys[i]; i++) {
+ if (streq(key, keys[i])) {
+ values[i] = strdup(line);
+ break;
+ }
+ }
+ }
+
+ for (i = 0; keys[i]; i++) {
+ if (!values[i]) {
+ for (i = 0; keys[i]; i++) {
+ free(values[i]);
+ values[i] = NULL;
+ }
+ return -ENOENT;
+ }
+ }
+
+ return 0;
+}
+
int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
CGroupController c;
int r, unified;
@@ -2101,10 +2144,10 @@ int cg_mask_supported(CGroupMask *ret) {
mask |= CGROUP_CONTROLLER_TO_MASK(v);
}
- /* Currently, we only support the memory, io and pids
+ /* Currently, we support the cpu, memory, io and pids
* controller in the unified hierarchy, mask
* everything else off. */
- mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
+ mask &= CGROUP_MASK_CPU | CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
} else {
CGroupController c;
diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h
index ec5c715987..a509a1775c 100644
--- a/src/basic/cgroup-util.h
+++ b/src/basic/cgroup-util.h
@@ -169,6 +169,7 @@ int cg_create_and_attach(const char *controller, const char *path, pid_t pid);
int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value);
int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret);
+int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values);
int cg_set_group_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid);
int cg_set_task_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid);
diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c
index c67b328b38..6045ae0437 100644
--- a/src/cgtop/cgtop.c
+++ b/src/cgtop/cgtop.c
@@ -208,24 +208,47 @@ static int process(
if (g->n_tasks > 0)
g->n_tasks_valid = true;
- } else if (streq(controller, "cpuacct") && cg_unified() <= 0) {
+ } else if (streq(controller, "cpu") || streq(controller, "cpuacct")) {
_cleanup_free_ char *p = NULL, *v = NULL;
uint64_t new_usage;
nsec_t timestamp;
- r = cg_get_path(controller, path, "cpuacct.usage", &p);
- if (r < 0)
- return r;
+ if (cg_unified() > 0) {
+ const char *keys[] = { "usage_usec", NULL };
+ _cleanup_free_ char *val = NULL;
- r = read_one_line_file(p, &v);
- if (r == -ENOENT)
- return 0;
- if (r < 0)
- return r;
+ if (!streq(controller, "cpu"))
+ return 0;
- r = safe_atou64(v, &new_usage);
- if (r < 0)
- return r;
+ r = cg_get_keyed_attribute("cpu", path, "cpu.stat", keys, &val);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(val, &new_usage);
+ if (r < 0)
+ return r;
+
+ new_usage *= NSEC_PER_USEC;
+ } else {
+ if (!streq(controller, "cpuacct"))
+ return 0;
+
+ r = cg_get_path(controller, path, "cpuacct.usage", &p);
+ if (r < 0)
+ return r;
+
+ r = read_one_line_file(p, &v);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(v, &new_usage);
+ if (r < 0)
+ return r;
+ }
timestamp = now_nsec(CLOCK_MONOTONIC);
@@ -449,6 +472,9 @@ static int refresh(const char *root, Hashmap *a, Hashmap *b, unsigned iteration)
r = refresh_one(SYSTEMD_CGROUP_CONTROLLER, root, a, b, iteration, 0, NULL);
if (r < 0)
return r;
+ r = refresh_one("cpu", root, a, b, iteration, 0, NULL);
+ if (r < 0)
+ return r;
r = refresh_one("cpuacct", root, a, b, iteration, 0, NULL);
if (r < 0)
return r;
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index c19e43f571..910a64b4da 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -57,9 +57,12 @@ void cgroup_context_init(CGroupContext *c) {
/* Initialize everything to the kernel defaults, assuming the
* structure is preinitialized to 0 */
+ c->cpu_weight = CGROUP_WEIGHT_INVALID;
+ c->startup_cpu_weight = CGROUP_WEIGHT_INVALID;
+ c->cpu_quota_per_sec_usec = USEC_INFINITY;
+
c->cpu_shares = CGROUP_CPU_SHARES_INVALID;
c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID;
- c->cpu_quota_per_sec_usec = USEC_INFINITY;
c->memory_high = CGROUP_LIMIT_MAX;
c->memory_max = CGROUP_LIMIT_MAX;
@@ -158,6 +161,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
"%sBlockIOAccounting=%s\n"
"%sMemoryAccounting=%s\n"
"%sTasksAccounting=%s\n"
+ "%sCPUWeight=%" PRIu64 "\n"
+ "%sStartupCPUWeight=%" PRIu64 "\n"
"%sCPUShares=%" PRIu64 "\n"
"%sStartupCPUShares=%" PRIu64 "\n"
"%sCPUQuotaPerSecSec=%s\n"
@@ -177,6 +182,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
prefix, yes_no(c->blockio_accounting),
prefix, yes_no(c->memory_accounting),
prefix, yes_no(c->tasks_accounting),
+ prefix, c->cpu_weight,
+ prefix, c->startup_cpu_weight,
prefix, c->cpu_shares,
prefix, c->startup_cpu_shares,
prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1),
@@ -382,6 +389,95 @@ fail:
return -errno;
}
+static bool cgroup_context_has_cpu_weight(CGroupContext *c) {
+ return c->cpu_weight != CGROUP_WEIGHT_INVALID ||
+ c->startup_cpu_weight != CGROUP_WEIGHT_INVALID;
+}
+
+static bool cgroup_context_has_cpu_shares(CGroupContext *c) {
+ return c->cpu_shares != CGROUP_CPU_SHARES_INVALID ||
+ c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID;
+}
+
+static uint64_t cgroup_context_cpu_weight(CGroupContext *c, ManagerState state) {
+ if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+ c->startup_cpu_weight != CGROUP_WEIGHT_INVALID)
+ return c->startup_cpu_weight;
+ else if (c->cpu_weight != CGROUP_WEIGHT_INVALID)
+ return c->cpu_weight;
+ else
+ return CGROUP_WEIGHT_DEFAULT;
+}
+
+static uint64_t cgroup_context_cpu_shares(CGroupContext *c, ManagerState state) {
+ if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+ c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID)
+ return c->startup_cpu_shares;
+ else if (c->cpu_shares != CGROUP_CPU_SHARES_INVALID)
+ return c->cpu_shares;
+ else
+ return CGROUP_CPU_SHARES_DEFAULT;
+}
+
+static void cgroup_apply_unified_cpu_config(Unit *u, uint64_t weight, uint64_t quota) {
+ char buf[MAX(DECIMAL_STR_MAX(uint64_t) + 1, (DECIMAL_STR_MAX(usec_t) + 1) * 2)];
+ int r;
+
+ xsprintf(buf, "%" PRIu64 "\n", weight);
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.weight", buf);
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.weight: %m");
+
+ if (quota != USEC_INFINITY)
+ xsprintf(buf, USEC_FMT " " USEC_FMT "\n",
+ quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC, CGROUP_CPU_QUOTA_PERIOD_USEC);
+ else
+ xsprintf(buf, "max " USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
+
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.max", buf);
+
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.max: %m");
+}
+
+static void cgroup_apply_legacy_cpu_config(Unit *u, uint64_t shares, uint64_t quota) {
+ char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1];
+ int r;
+
+ xsprintf(buf, "%" PRIu64 "\n", shares);
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.shares", buf);
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.shares: %m");
+
+ xsprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_period_us", buf);
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.cfs_period_us: %m");
+
+ if (quota != USEC_INFINITY) {
+ xsprintf(buf, USEC_FMT "\n", quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", buf);
+ } else
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", "-1");
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.cfs_quota_us: %m");
+}
+
+static uint64_t cgroup_cpu_shares_to_weight(uint64_t shares) {
+ return CLAMP(shares * CGROUP_WEIGHT_DEFAULT / CGROUP_CPU_SHARES_DEFAULT,
+ CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX);
+}
+
+static uint64_t cgroup_cpu_weight_to_shares(uint64_t weight) {
+ return CLAMP(weight * CGROUP_CPU_SHARES_DEFAULT / CGROUP_WEIGHT_DEFAULT,
+ CGROUP_CPU_SHARES_MIN, CGROUP_CPU_SHARES_MAX);
+}
+
static bool cgroup_context_has_io_config(CGroupContext *c) {
return c->io_accounting ||
c->io_weight != CGROUP_WEIGHT_INVALID ||
@@ -566,30 +662,42 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
* and missing cgroups, i.e. EROFS and ENOENT. */
if ((mask & CGROUP_MASK_CPU) && !is_root) {
- char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1];
+ bool has_weight = cgroup_context_has_cpu_weight(c);
+ bool has_shares = cgroup_context_has_cpu_shares(c);
- sprintf(buf, "%" PRIu64 "\n",
- IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->startup_cpu_shares :
- c->cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->cpu_shares : CGROUP_CPU_SHARES_DEFAULT);
- r = cg_set_attribute("cpu", path, "cpu.shares", buf);
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set cpu.shares: %m");
+ if (cg_unified() > 0) {
+ uint64_t weight;
- sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
- r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf);
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set cpu.cfs_period_us: %m");
+ if (has_weight)
+ weight = cgroup_context_cpu_weight(c, state);
+ else if (has_shares) {
+ uint64_t shares = cgroup_context_cpu_shares(c, state);
- if (c->cpu_quota_per_sec_usec != USEC_INFINITY) {
- sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
- r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf);
- } else
- r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1");
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set cpu.cfs_quota_us: %m");
+ weight = cgroup_cpu_shares_to_weight(shares);
+
+ log_cgroup_compat(u, "Applying [Startup]CpuShares %" PRIu64 " as [Startup]CpuWeight %" PRIu64 " on %s",
+ shares, weight, path);
+ } else
+ weight = CGROUP_WEIGHT_DEFAULT;
+
+ cgroup_apply_unified_cpu_config(u, weight, c->cpu_quota_per_sec_usec);
+ } else {
+ uint64_t shares;
+
+ if (has_shares)
+ shares = cgroup_context_cpu_shares(c, state);
+ else if (has_weight) {
+ uint64_t weight = cgroup_context_cpu_weight(c, state);
+
+ shares = cgroup_cpu_weight_to_shares(weight);
+
+ log_cgroup_compat(u, "Applying [Startup]CpuWeight %" PRIu64 " as [Startup]CpuShares %" PRIu64 " on %s",
+ weight, shares, path);
+ } else
+ shares = CGROUP_CPU_SHARES_DEFAULT;
+
+ cgroup_apply_legacy_cpu_config(u, shares, c->cpu_quota_per_sec_usec);
+ }
}
if (mask & CGROUP_MASK_IO) {
@@ -864,8 +972,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) {
/* Figure out which controllers we need */
if (c->cpu_accounting ||
- c->cpu_shares != CGROUP_CPU_SHARES_INVALID ||
- c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ||
+ cgroup_context_has_cpu_weight(c) ||
+ cgroup_context_has_cpu_shares(c) ||
c->cpu_quota_per_sec_usec != USEC_INFINITY)
mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU;
@@ -1890,18 +1998,37 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
if (!u->cgroup_path)
return -ENODATA;
- if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0)
- return -ENODATA;
+ if (cg_unified() > 0) {
+ const char *keys[] = { "usage_usec", NULL };
+ _cleanup_free_ char *val = NULL;
+ uint64_t us;
- r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v);
- if (r == -ENOENT)
- return -ENODATA;
- if (r < 0)
- return r;
+ if ((u->cgroup_realized_mask & CGROUP_MASK_CPU) == 0)
+ return -ENODATA;
- r = safe_atou64(v, &ns);
- if (r < 0)
- return r;
+ r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", keys, &val);
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(val, &us);
+ if (r < 0)
+ return r;
+
+ ns = us * NSEC_PER_USEC;
+ } else {
+ if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0)
+ return -ENODATA;
+
+ r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(v, &ns);
+ if (r < 0)
+ return r;
+ }
*ret = ns;
return 0;
@@ -1915,8 +2042,8 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
if (r < 0)
return r;
- if (ns > u->cpuacct_usage_base)
- ns -= u->cpuacct_usage_base;
+ if (ns > u->cpu_usage_base)
+ ns -= u->cpu_usage_base;
else
ns = 0;
@@ -1932,11 +2059,11 @@ int unit_reset_cpu_usage(Unit *u) {
r = unit_get_cpu_usage_raw(u, &ns);
if (r < 0) {
- u->cpuacct_usage_base = 0;
+ u->cpu_usage_base = 0;
return r;
}
- u->cpuacct_usage_base = ns;
+ u->cpu_usage_base = ns;
return 0;
}
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index a57403e79f..2fe9cc4039 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -89,6 +89,10 @@ struct CGroupContext {
bool tasks_accounting;
/* For unified hierarchy */
+ uint64_t cpu_weight;
+ uint64_t startup_cpu_weight;
+ usec_t cpu_quota_per_sec_usec;
+
uint64_t io_weight;
uint64_t startup_io_weight;
LIST_HEAD(CGroupIODeviceWeight, io_device_weights);
@@ -101,7 +105,6 @@ struct CGroupContext {
/* For legacy hierarchies */
uint64_t cpu_shares;
uint64_t startup_cpu_shares;
- usec_t cpu_quota_per_sec_usec;
uint64_t blockio_weight;
uint64_t startup_blockio_weight;
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index 85b0c86a2f..2ca80d2996 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -210,6 +210,8 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_VTABLE_START(0),
SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0),
+ SD_BUS_PROPERTY("CPUWeight", "t", NULL, offsetof(CGroupContext, cpu_weight), 0),
+ SD_BUS_PROPERTY("StartupCPUWeight", "t", NULL, offsetof(CGroupContext, startup_cpu_weight), 0),
SD_BUS_PROPERTY("CPUShares", "t", NULL, offsetof(CGroupContext, cpu_shares), 0),
SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0),
SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0),
@@ -303,6 +305,50 @@ int bus_cgroup_set_property(
return 1;
+ } else if (streq(name, "CPUWeight")) {
+ uint64_t weight;
+
+ r = sd_bus_message_read(message, "t", &weight);
+ if (r < 0)
+ return r;
+
+ if (!CGROUP_WEIGHT_IS_OK(weight))
+ return sd_bus_error_set_errnof(error, EINVAL, "CPUWeight value out of range");
+
+ if (mode != UNIT_CHECK) {
+ c->cpu_weight = weight;
+ unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
+
+ if (weight == CGROUP_WEIGHT_INVALID)
+ unit_write_drop_in_private(u, mode, name, "CPUWeight=");
+ else
+ unit_write_drop_in_private_format(u, mode, name, "CPUWeight=%" PRIu64, weight);
+ }
+
+ return 1;
+
+ } else if (streq(name, "StartupCPUWeight")) {
+ uint64_t weight;
+
+ r = sd_bus_message_read(message, "t", &weight);
+ if (r < 0)
+ return r;
+
+ if (!CGROUP_WEIGHT_IS_OK(weight))
+ return sd_bus_error_set_errnof(error, EINVAL, "StartupCPUWeight value out of range");
+
+ if (mode != UNIT_CHECK) {
+ c->startup_cpu_weight = weight;
+ unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
+
+ if (weight == CGROUP_CPU_SHARES_INVALID)
+ unit_write_drop_in_private(u, mode, name, "StartupCPUWeight=");
+ else
+ unit_write_drop_in_private_format(u, mode, name, "StartupCPUWeight=%" PRIu64, weight);
+ }
+
+ return 1;
+
} else if (streq(name, "CPUShares")) {
uint64_t shares;
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 396f847213..edcfa6fa1b 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -121,6 +121,8 @@ $1.KillSignal, config_parse_signal, 0,
m4_define(`CGROUP_CONTEXT_CONFIG_ITEMS',
`$1.Slice, config_parse_unit_slice, 0, 0
$1.CPUAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.cpu_accounting)
+$1.CPUWeight, config_parse_cpu_weight, 0, offsetof($1, cgroup_context.cpu_weight)
+$1.StartupCPUWeight, config_parse_cpu_weight, 0, offsetof($1, cgroup_context.startup_cpu_weight)
$1.CPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.cpu_shares)
$1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares)
$1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 420f368689..4ad6c4d79b 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -2851,6 +2851,34 @@ int config_parse_unit_slice(
DEFINE_CONFIG_PARSE_ENUM(config_parse_device_policy, cgroup_device_policy, CGroupDevicePolicy, "Failed to parse device policy");
+int config_parse_cpu_weight(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t *weight = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = cg_weight_parse(rvalue, weight);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "CPU weight '%s' invalid. Ignoring.", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
int config_parse_cpu_shares(
const char *unit,
const char *filename,
@@ -4191,6 +4219,7 @@ void unit_dump_config_items(FILE *f) {
{ config_parse_address_families, "FAMILIES" },
#endif
{ config_parse_cpu_shares, "SHARES" },
+ { config_parse_cpu_weight, "WEIGHT" },
{ config_parse_memory_limit, "LIMIT" },
{ config_parse_device_allow, "DEVICE" },
{ config_parse_device_policy, "POLICY" },
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index 213bce55a7..8b688740cf 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -81,6 +81,7 @@ int config_parse_syscall_errno(const char *unit, const char *filename, unsigned
int config_parse_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_pass_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_unit_slice(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_cpu_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_cpu_shares(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_memory_limit(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_tasks_max(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
diff --git a/src/core/unit.c b/src/core/unit.c
index ff7c562fba..952604e0db 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -2608,7 +2608,7 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
unit_serialize_item(u, f, "assert-result", yes_no(u->assert_result));
unit_serialize_item(u, f, "transient", yes_no(u->transient));
- unit_serialize_item_format(u, f, "cpuacct-usage-base", "%" PRIu64, u->cpuacct_usage_base);
+ unit_serialize_item_format(u, f, "cpu-usage-base", "%" PRIu64, u->cpu_usage_base);
if (u->cgroup_path)
unit_serialize_item(u, f, "cgroup", u->cgroup_path);
@@ -2824,9 +2824,9 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
continue;
- } else if (streq(l, "cpuacct-usage-base")) {
+ } else if (streq(l, "cpu-usage-base") || streq(l, "cpuacct-usage-base")) {
- r = safe_atou64(v, &u->cpuacct_usage_base);
+ r = safe_atou64(v, &u->cpu_usage_base);
if (r < 0)
log_unit_debug(u, "Failed to parse CPU usage %s, ignoring.", v);
diff --git a/src/core/unit.h b/src/core/unit.h
index 47eb8d50a6..a6c69938dd 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -184,8 +184,8 @@ struct Unit {
UnitFileState unit_file_state;
int unit_file_preset;
- /* Where the cpuacct.usage cgroup counter was at the time the unit was started */
- nsec_t cpuacct_usage_base;
+ /* Where the cpu.stat or cpuacct.usage was at the time the unit was started */
+ nsec_t cpu_usage_base;
/* Counterparts in the cgroup filesystem */
char *cgroup_path;
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index c3a5233532..bb98951a06 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -212,6 +212,17 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
r = sd_bus_message_append(m, "v", "b", r);
+ } else if (STR_IN_SET(field, "CPUWeight", "StartupCPUWeight")) {
+ uint64_t u;
+
+ r = cg_weight_parse(eq, &u);
+ if (r < 0) {
+ log_error("Failed to parse %s value %s.", field, eq);
+ return -EINVAL;
+ }
+
+ r = sd_bus_message_append(m, "v", "t", u);
+
} else if (STR_IN_SET(field, "CPUShares", "StartupCPUShares")) {
uint64_t u;