summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2016-08-14 18:03:35 -0400
committerGitHub <noreply@github.com>2016-08-14 18:03:35 -0400
commit5f9a610ad2b3200a31bbd9181c810726eeb30385 (patch)
tree4f8b0e5a15d297dcf2d69802cdc9b7e08524384d
parent04d0f7e9f9e2d68502f322b8f494c652cbeb5146 (diff)
parent66ebf6c0a1005f90099e25ece5630551ad97a3c3 (diff)
Merge pull request #3905 from htejun/cgroup-v2-cpu
core: add cgroup CPU controller support on the unified hierarchy (zj: merging not squashing to make it clear against which upstream this patch was developed.)
-rw-r--r--man/systemd.resource-control.xml98
-rw-r--r--src/basic/cgroup-util.c47
-rw-r--r--src/basic/cgroup-util.h1
-rw-r--r--src/cgtop/cgtop.c50
-rw-r--r--src/core/cgroup.c203
-rw-r--r--src/core/cgroup.h5
-rw-r--r--src/core/dbus-cgroup.c46
-rw-r--r--src/core/load-fragment-gperf.gperf.m42
-rw-r--r--src/core/load-fragment.c29
-rw-r--r--src/core/load-fragment.h1
-rw-r--r--src/core/unit.c6
-rw-r--r--src/core/unit.h4
-rw-r--r--src/shared/bus-unit-util.c11
13 files changed, 413 insertions, 90 deletions
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml
index 0e98ca78b8..84dbfa2ff3 100644
--- a/man/systemd.resource-control.xml
+++ b/man/systemd.resource-control.xml
@@ -106,13 +106,21 @@
<para>
<variablelist>
+
<varlistentry>
- <term><option>IO</option></term>
+ <term><option>CPU</option></term>
<listitem>
- <para><varname>IO</varname> prefixed settings are superset of and replace <varname>BlockIO</varname>
- prefixed ones. On unified hierarchy, IO resource control also applies to buffered writes.</para>
+ <para>Due to the lack of consensus in the kernel community, the CPU controller support on the unified
+ cgroup hierarchy requires out-of-tree kernel patches. See <ulink
+ url="https://git.kernel.org/cgit/linux/kernel/git/tj/cgroup.git/tree/Documentation/cgroup-v2-cpu.txt?h=cgroup-v2-cpu">cgroup-v2-cpu.txt</ulink>.</para>
+
+ <para><varname>CPUWeight=</varname> and <varname>StartupCPUWeight=</varname> replace
+ <varname>CPUShares=</varname> and <varname>StartupCPUShares=</varname>, respectively.</para>
+
+ <para>The <literal>cpuacct</literal> controller does not exist separately on the unified hierarchy.</para>
</listitem>
</varlistentry>
+
<varlistentry>
<term><option>Memory</option></term>
<listitem>
@@ -120,6 +128,15 @@
and <varname>MemoryHigh=</varname> are effective only on unified hierarchy.</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><option>IO</option></term>
+ <listitem>
+ <para><varname>IO</varname> prefixed settings are superset of and replace <varname>BlockIO</varname>
+ prefixed ones. On unified hierarchy, IO resource control also applies to buffered writes.</para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</para>
@@ -160,30 +177,49 @@
</varlistentry>
<varlistentry>
+ <term><varname>CPUWeight=<replaceable>weight</replaceable></varname></term>
+ <term><varname>StartupCPUWeight=<replaceable>weight</replaceable></varname></term>
+
+ <listitem>
+ <para>Assign the specified CPU time weight to the processes executed, if the unified control group hierarchy
+ is used on the system. These options take an integer value and control the <literal>cpu.weight</literal>
+ control group attribute. The allowed range is 1 to 10000. Defaults to 100. For details about this control
+ group attribute, see <ulink
+ url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink> and <ulink
+ url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.
+ The available CPU time is split up among all units within one slice relative to their CPU time weight.</para>
+
+ <para>While <varname>StartupCPUWeight=</varname> only applies to the startup phase of the system,
+ <varname>CPUWeight=</varname> applies to normal runtime of the system, and if the former is not set also to
+ the startup phase. Using <varname>StartupCPUWeight=</varname> allows prioritizing specific services at
+ boot-up differently than during normal runtime.</para>
+
+ <para>Implies <literal>CPUAccounting=true</literal>.</para>
+
+ <para>These settings are supported only if the unified control group hierarchy is used.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>CPUShares=<replaceable>weight</replaceable></varname></term>
<term><varname>StartupCPUShares=<replaceable>weight</replaceable></varname></term>
<listitem>
- <para>Assign the specified CPU time share weight to the
- processes executed. These options take an integer value and
- control the <literal>cpu.shares</literal> control group
- attribute. The allowed range is 2 to 262144. Defaults to
- 1024. For details about this control group attribute, see
- <ulink
+ <para>Assign the specified CPU time share weight to the processes executed. These options take an integer
+ value and control the <literal>cpu.shares</literal> control group attribute. The allowed range is 2 to
+ 262144. Defaults to 1024. For details about this control group attribute, see <ulink
url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.
- The available CPU time is split up among all units within
- one slice relative to their CPU time share weight.</para>
+ The available CPU time is split up among all units within one slice relative to their CPU time share
+ weight.</para>
- <para>While <varname>StartupCPUShares=</varname> only
- applies to the startup phase of the system,
- <varname>CPUShares=</varname> applies to normal runtime of
- the system, and if the former is not set also to the startup
- phase. Using <varname>StartupCPUShares=</varname> allows
- prioritizing specific services at boot-up differently than
- during normal runtime.</para>
+ <para>While <varname>StartupCPUShares=</varname> only applies to the startup phase of the system,
+ <varname>CPUShares=</varname> applies to normal runtime of the system, and if the former is not set also to
+ the startup phase. Using <varname>StartupCPUShares=</varname> allows prioritizing specific services at
+ boot-up differently than during normal runtime.</para>
- <para>These options imply
- <literal>CPUAccounting=true</literal>.</para>
+ <para>Implies <literal>CPUAccounting=true</literal>.</para>
+
+ <para>These settings are supported only if the legacy control group hierarchy is used.</para>
</listitem>
</varlistentry>
@@ -191,22 +227,20 @@
<term><varname>CPUQuota=</varname></term>
<listitem>
- <para>Assign the specified CPU time quota to the processes
- executed. Takes a percentage value, suffixed with "%". The
- percentage specifies how much CPU time the unit shall get at
- maximum, relative to the total CPU time available on one
- CPU. Use values &gt; 100% for allotting CPU time on more than
- one CPU. This controls the
- <literal>cpu.cfs_quota_us</literal> control group
- attribute. For details about this control group attribute,
- see <ulink
+ <para>Assign the specified CPU time quota to the processes executed. Takes a percentage value, suffixed with
+ "%". The percentage specifies how much CPU time the unit shall get at maximum, relative to the total CPU time
+ available on one CPU. Use values &gt; 100% for allotting CPU time on more than one CPU. This controls the
+ <literal>cpu.max</literal> attribute on the unified control group hierarchy and
+ <literal>cpu.cfs_quota_us</literal> on legacy. For details about these control group attributes, see <ulink
+ url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink> and <ulink
url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.</para>
- <para>Example: <varname>CPUQuota=20%</varname> ensures that
- the executed processes will never get more than 20% CPU time
- on one CPU.</para>
+ <para>Example: <varname>CPUQuota=20%</varname> ensures that the executed processes will never get more than
+ 20% CPU time on one CPU.</para>
<para>Implies <literal>CPUAccounting=true</literal>.</para>
+
+ <para>This setting is supported on both unified and legacy control group hierarchies.</para>
</listitem>
</varlistentry>
diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c
index 302b958d0d..25ef8a5c76 100644
--- a/src/basic/cgroup-util.c
+++ b/src/basic/cgroup-util.c
@@ -1905,6 +1905,49 @@ int cg_get_attribute(const char *controller, const char *path, const char *attri
return read_one_line_file(p, ret);
}
+int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) {
+ _cleanup_free_ char *filename = NULL, *content = NULL;
+ char *line, *p;
+ int i, r;
+
+ for (i = 0; keys[i]; i++)
+ values[i] = NULL;
+
+ r = cg_get_path(controller, path, attribute, &filename);
+ if (r < 0)
+ return r;
+
+ r = read_full_file(filename, &content, NULL);
+ if (r < 0)
+ return r;
+
+ p = content;
+ while ((line = strsep(&p, "\n"))) {
+ char *key;
+
+ key = strsep(&line, " ");
+
+ for (i = 0; keys[i]; i++) {
+ if (streq(key, keys[i])) {
+ values[i] = strdup(line);
+ break;
+ }
+ }
+ }
+
+ for (i = 0; keys[i]; i++) {
+ if (!values[i]) {
+ for (i = 0; keys[i]; i++) {
+ free(values[i]);
+ values[i] = NULL;
+ }
+ return -ENOENT;
+ }
+ }
+
+ return 0;
+}
+
int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
CGroupController c;
int r, unified;
@@ -2101,10 +2144,10 @@ int cg_mask_supported(CGroupMask *ret) {
mask |= CGROUP_CONTROLLER_TO_MASK(v);
}
- /* Currently, we only support the memory, io and pids
+ /* Currently, we support the cpu, memory, io and pids
* controller in the unified hierarchy, mask
* everything else off. */
- mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
+ mask &= CGROUP_MASK_CPU | CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
} else {
CGroupController c;
diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h
index ec5c715987..a509a1775c 100644
--- a/src/basic/cgroup-util.h
+++ b/src/basic/cgroup-util.h
@@ -169,6 +169,7 @@ int cg_create_and_attach(const char *controller, const char *path, pid_t pid);
int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value);
int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret);
+int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values);
int cg_set_group_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid);
int cg_set_task_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid);
diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c
index c67b328b38..6045ae0437 100644
--- a/src/cgtop/cgtop.c
+++ b/src/cgtop/cgtop.c
@@ -208,24 +208,47 @@ static int process(
if (g->n_tasks > 0)
g->n_tasks_valid = true;
- } else if (streq(controller, "cpuacct") && cg_unified() <= 0) {
+ } else if (streq(controller, "cpu") || streq(controller, "cpuacct")) {
_cleanup_free_ char *p = NULL, *v = NULL;
uint64_t new_usage;
nsec_t timestamp;
- r = cg_get_path(controller, path, "cpuacct.usage", &p);
- if (r < 0)
- return r;
+ if (cg_unified() > 0) {
+ const char *keys[] = { "usage_usec", NULL };
+ _cleanup_free_ char *val = NULL;
- r = read_one_line_file(p, &v);
- if (r == -ENOENT)
- return 0;
- if (r < 0)
- return r;
+ if (!streq(controller, "cpu"))
+ return 0;
- r = safe_atou64(v, &new_usage);
- if (r < 0)
- return r;
+ r = cg_get_keyed_attribute("cpu", path, "cpu.stat", keys, &val);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(val, &new_usage);
+ if (r < 0)
+ return r;
+
+ new_usage *= NSEC_PER_USEC;
+ } else {
+ if (!streq(controller, "cpuacct"))
+ return 0;
+
+ r = cg_get_path(controller, path, "cpuacct.usage", &p);
+ if (r < 0)
+ return r;
+
+ r = read_one_line_file(p, &v);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(v, &new_usage);
+ if (r < 0)
+ return r;
+ }
timestamp = now_nsec(CLOCK_MONOTONIC);
@@ -449,6 +472,9 @@ static int refresh(const char *root, Hashmap *a, Hashmap *b, unsigned iteration)
r = refresh_one(SYSTEMD_CGROUP_CONTROLLER, root, a, b, iteration, 0, NULL);
if (r < 0)
return r;
+ r = refresh_one("cpu", root, a, b, iteration, 0, NULL);
+ if (r < 0)
+ return r;
r = refresh_one("cpuacct", root, a, b, iteration, 0, NULL);
if (r < 0)
return r;
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index c19e43f571..910a64b4da 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -57,9 +57,12 @@ void cgroup_context_init(CGroupContext *c) {
/* Initialize everything to the kernel defaults, assuming the
* structure is preinitialized to 0 */
+ c->cpu_weight = CGROUP_WEIGHT_INVALID;
+ c->startup_cpu_weight = CGROUP_WEIGHT_INVALID;
+ c->cpu_quota_per_sec_usec = USEC_INFINITY;
+
c->cpu_shares = CGROUP_CPU_SHARES_INVALID;
c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID;
- c->cpu_quota_per_sec_usec = USEC_INFINITY;
c->memory_high = CGROUP_LIMIT_MAX;
c->memory_max = CGROUP_LIMIT_MAX;
@@ -158,6 +161,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
"%sBlockIOAccounting=%s\n"
"%sMemoryAccounting=%s\n"
"%sTasksAccounting=%s\n"
+ "%sCPUWeight=%" PRIu64 "\n"
+ "%sStartupCPUWeight=%" PRIu64 "\n"
"%sCPUShares=%" PRIu64 "\n"
"%sStartupCPUShares=%" PRIu64 "\n"
"%sCPUQuotaPerSecSec=%s\n"
@@ -177,6 +182,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
prefix, yes_no(c->blockio_accounting),
prefix, yes_no(c->memory_accounting),
prefix, yes_no(c->tasks_accounting),
+ prefix, c->cpu_weight,
+ prefix, c->startup_cpu_weight,
prefix, c->cpu_shares,
prefix, c->startup_cpu_shares,
prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1),
@@ -382,6 +389,95 @@ fail:
return -errno;
}
+static bool cgroup_context_has_cpu_weight(CGroupContext *c) {
+ return c->cpu_weight != CGROUP_WEIGHT_INVALID ||
+ c->startup_cpu_weight != CGROUP_WEIGHT_INVALID;
+}
+
+static bool cgroup_context_has_cpu_shares(CGroupContext *c) {
+ return c->cpu_shares != CGROUP_CPU_SHARES_INVALID ||
+ c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID;
+}
+
+static uint64_t cgroup_context_cpu_weight(CGroupContext *c, ManagerState state) {
+ if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+ c->startup_cpu_weight != CGROUP_WEIGHT_INVALID)
+ return c->startup_cpu_weight;
+ else if (c->cpu_weight != CGROUP_WEIGHT_INVALID)
+ return c->cpu_weight;
+ else
+ return CGROUP_WEIGHT_DEFAULT;
+}
+
+static uint64_t cgroup_context_cpu_shares(CGroupContext *c, ManagerState state) {
+ if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+ c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID)
+ return c->startup_cpu_shares;
+ else if (c->cpu_shares != CGROUP_CPU_SHARES_INVALID)
+ return c->cpu_shares;
+ else
+ return CGROUP_CPU_SHARES_DEFAULT;
+}
+
+static void cgroup_apply_unified_cpu_config(Unit *u, uint64_t weight, uint64_t quota) {
+ char buf[MAX(DECIMAL_STR_MAX(uint64_t) + 1, (DECIMAL_STR_MAX(usec_t) + 1) * 2)];
+ int r;
+
+ xsprintf(buf, "%" PRIu64 "\n", weight);
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.weight", buf);
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.weight: %m");
+
+ if (quota != USEC_INFINITY)
+ xsprintf(buf, USEC_FMT " " USEC_FMT "\n",
+ quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC, CGROUP_CPU_QUOTA_PERIOD_USEC);
+ else
+ xsprintf(buf, "max " USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
+
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.max", buf);
+
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.max: %m");
+}
+
+static void cgroup_apply_legacy_cpu_config(Unit *u, uint64_t shares, uint64_t quota) {
+ char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1];
+ int r;
+
+ xsprintf(buf, "%" PRIu64 "\n", shares);
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.shares", buf);
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.shares: %m");
+
+ xsprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_period_us", buf);
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.cfs_period_us: %m");
+
+ if (quota != USEC_INFINITY) {
+ xsprintf(buf, USEC_FMT "\n", quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", buf);
+ } else
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", "-1");
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.cfs_quota_us: %m");
+}
+
+static uint64_t cgroup_cpu_shares_to_weight(uint64_t shares) {
+ return CLAMP(shares * CGROUP_WEIGHT_DEFAULT / CGROUP_CPU_SHARES_DEFAULT,
+ CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX);
+}
+
+static uint64_t cgroup_cpu_weight_to_shares(uint64_t weight) {
+ return CLAMP(weight * CGROUP_CPU_SHARES_DEFAULT / CGROUP_WEIGHT_DEFAULT,
+ CGROUP_CPU_SHARES_MIN, CGROUP_CPU_SHARES_MAX);
+}
+
static bool cgroup_context_has_io_config(CGroupContext *c) {
return c->io_accounting ||
c->io_weight != CGROUP_WEIGHT_INVALID ||
@@ -566,30 +662,42 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
* and missing cgroups, i.e. EROFS and ENOENT. */
if ((mask & CGROUP_MASK_CPU) && !is_root) {
- char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1];
+ bool has_weight = cgroup_context_has_cpu_weight(c);
+ bool has_shares = cgroup_context_has_cpu_shares(c);
- sprintf(buf, "%" PRIu64 "\n",
- IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->startup_cpu_shares :
- c->cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->cpu_shares : CGROUP_CPU_SHARES_DEFAULT);
- r = cg_set_attribute("cpu", path, "cpu.shares", buf);
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set cpu.shares: %m");
+ if (cg_unified() > 0) {
+ uint64_t weight;
- sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
- r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf);
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set cpu.cfs_period_us: %m");
+ if (has_weight)
+ weight = cgroup_context_cpu_weight(c, state);
+ else if (has_shares) {
+ uint64_t shares = cgroup_context_cpu_shares(c, state);
- if (c->cpu_quota_per_sec_usec != USEC_INFINITY) {
- sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
- r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf);
- } else
- r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1");
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set cpu.cfs_quota_us: %m");
+ weight = cgroup_cpu_shares_to_weight(shares);
+
+ log_cgroup_compat(u, "Applying [Startup]CpuShares %" PRIu64 " as [Startup]CpuWeight %" PRIu64 " on %s",
+ shares, weight, path);
+ } else
+ weight = CGROUP_WEIGHT_DEFAULT;
+
+ cgroup_apply_unified_cpu_config(u, weight, c->cpu_quota_per_sec_usec);
+ } else {
+ uint64_t shares;
+
+ if (has_shares)
+ shares = cgroup_context_cpu_shares(c, state);
+ else if (has_weight) {
+ uint64_t weight = cgroup_context_cpu_weight(c, state);
+
+ shares = cgroup_cpu_weight_to_shares(weight);
+
+ log_cgroup_compat(u, "Applying [Startup]CpuWeight %" PRIu64 " as [Startup]CpuShares %" PRIu64 " on %s",
+ weight, shares, path);
+ } else
+ shares = CGROUP_CPU_SHARES_DEFAULT;
+
+ cgroup_apply_legacy_cpu_config(u, shares, c->cpu_quota_per_sec_usec);
+ }
}
if (mask & CGROUP_MASK_IO) {
@@ -864,8 +972,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) {
/* Figure out which controllers we need */
if (c->cpu_accounting ||
- c->cpu_shares != CGROUP_CPU_SHARES_INVALID ||
- c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ||
+ cgroup_context_has_cpu_weight(c) ||
+ cgroup_context_has_cpu_shares(c) ||
c->cpu_quota_per_sec_usec != USEC_INFINITY)
mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU;
@@ -1890,18 +1998,37 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
if (!u->cgroup_path)
return -ENODATA;
- if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0)
- return -ENODATA;
+ if (cg_unified() > 0) {
+ const char *keys[] = { "usage_usec", NULL };
+ _cleanup_free_ char *val = NULL;
+ uint64_t us;
- r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v);
- if (r == -ENOENT)
- return -ENODATA;
- if (r < 0)
- return r;
+ if ((u->cgroup_realized_mask & CGROUP_MASK_CPU) == 0)
+ return -ENODATA;
- r = safe_atou64(v, &ns);
- if (r < 0)
- return r;
+ r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", keys, &val);
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(val, &us);
+ if (r < 0)
+ return r;
+
+ ns = us * NSEC_PER_USEC;
+ } else {
+ if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0)
+ return -ENODATA;
+
+ r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(v, &ns);
+ if (r < 0)
+ return r;
+ }
*ret = ns;
return 0;
@@ -1915,8 +2042,8 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
if (r < 0)
return r;
- if (ns > u->cpuacct_usage_base)
- ns -= u->cpuacct_usage_base;
+ if (ns > u->cpu_usage_base)
+ ns -= u->cpu_usage_base;
else
ns = 0;
@@ -1932,11 +2059,11 @@ int unit_reset_cpu_usage(Unit *u) {
r = unit_get_cpu_usage_raw(u, &ns);
if (r < 0) {
- u->cpuacct_usage_base = 0;
+ u->cpu_usage_base = 0;
return r;
}
- u->cpuacct_usage_base = ns;
+ u->cpu_usage_base = ns;
return 0;
}
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index a57403e79f..2fe9cc4039 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -89,6 +89,10 @@ struct CGroupContext {
bool tasks_accounting;
/* For unified hierarchy */
+ uint64_t cpu_weight;
+ uint64_t startup_cpu_weight;
+ usec_t cpu_quota_per_sec_usec;
+
uint64_t io_weight;
uint64_t startup_io_weight;
LIST_HEAD(CGroupIODeviceWeight, io_device_weights);
@@ -101,7 +105,6 @@ struct CGroupContext {
/* For legacy hierarchies */
uint64_t cpu_shares;
uint64_t startup_cpu_shares;
- usec_t cpu_quota_per_sec_usec;
uint64_t blockio_weight;
uint64_t startup_blockio_weight;
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index 85b0c86a2f..2ca80d2996 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -210,6 +210,8 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_VTABLE_START(0),
SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0),
+ SD_BUS_PROPERTY("CPUWeight", "t", NULL, offsetof(CGroupContext, cpu_weight), 0),
+ SD_BUS_PROPERTY("StartupCPUWeight", "t", NULL, offsetof(CGroupContext, startup_cpu_weight), 0),
SD_BUS_PROPERTY("CPUShares", "t", NULL, offsetof(CGroupContext, cpu_shares), 0),
SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0),
SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0),
@@ -303,6 +305,50 @@ int bus_cgroup_set_property(
return 1;
+ } else if (streq(name, "CPUWeight")) {
+ uint64_t weight;
+
+ r = sd_bus_message_read(message, "t", &weight);
+ if (r < 0)
+ return r;
+
+ if (!CGROUP_WEIGHT_IS_OK(weight))
+ return sd_bus_error_set_errnof(error, EINVAL, "CPUWeight value out of range");
+
+ if (mode != UNIT_CHECK) {
+ c->cpu_weight = weight;
+ unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
+
+ if (weight == CGROUP_WEIGHT_INVALID)
+ unit_write_drop_in_private(u, mode, name, "CPUWeight=");
+ else
+ unit_write_drop_in_private_format(u, mode, name, "CPUWeight=%" PRIu64, weight);
+ }
+
+ return 1;
+
+ } else if (streq(name, "StartupCPUWeight")) {
+ uint64_t weight;
+
+ r = sd_bus_message_read(message, "t", &weight);
+ if (r < 0)
+ return r;
+
+ if (!CGROUP_WEIGHT_IS_OK(weight))
+ return sd_bus_error_set_errnof(error, EINVAL, "StartupCPUWeight value out of range");
+
+ if (mode != UNIT_CHECK) {
+ c->startup_cpu_weight = weight;
+ unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
+
+ if (weight == CGROUP_CPU_SHARES_INVALID)
+ unit_write_drop_in_private(u, mode, name, "StartupCPUWeight=");
+ else
+ unit_write_drop_in_private_format(u, mode, name, "StartupCPUWeight=%" PRIu64, weight);
+ }
+
+ return 1;
+
} else if (streq(name, "CPUShares")) {
uint64_t shares;
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 251155b428..05fe0df7e3 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -122,6 +122,8 @@ $1.KillSignal, config_parse_signal, 0,
m4_define(`CGROUP_CONTEXT_CONFIG_ITEMS',
`$1.Slice, config_parse_unit_slice, 0, 0
$1.CPUAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.cpu_accounting)
+$1.CPUWeight, config_parse_cpu_weight, 0, offsetof($1, cgroup_context.cpu_weight)
+$1.StartupCPUWeight, config_parse_cpu_weight, 0, offsetof($1, cgroup_context.startup_cpu_weight)
$1.CPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.cpu_shares)
$1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares)
$1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 420f368689..4ad6c4d79b 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -2851,6 +2851,34 @@ int config_parse_unit_slice(
DEFINE_CONFIG_PARSE_ENUM(config_parse_device_policy, cgroup_device_policy, CGroupDevicePolicy, "Failed to parse device policy");
+int config_parse_cpu_weight(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t *weight = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = cg_weight_parse(rvalue, weight);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "CPU weight '%s' invalid. Ignoring.", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
int config_parse_cpu_shares(
const char *unit,
const char *filename,
@@ -4191,6 +4219,7 @@ void unit_dump_config_items(FILE *f) {
{ config_parse_address_families, "FAMILIES" },
#endif
{ config_parse_cpu_shares, "SHARES" },
+ { config_parse_cpu_weight, "WEIGHT" },
{ config_parse_memory_limit, "LIMIT" },
{ config_parse_device_allow, "DEVICE" },
{ config_parse_device_policy, "POLICY" },
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index 213bce55a7..8b688740cf 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -81,6 +81,7 @@ int config_parse_syscall_errno(const char *unit, const char *filename, unsigned
int config_parse_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_pass_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_unit_slice(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_cpu_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_cpu_shares(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_memory_limit(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_tasks_max(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
diff --git a/src/core/unit.c b/src/core/unit.c
index ff7c562fba..952604e0db 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -2608,7 +2608,7 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
unit_serialize_item(u, f, "assert-result", yes_no(u->assert_result));
unit_serialize_item(u, f, "transient", yes_no(u->transient));
- unit_serialize_item_format(u, f, "cpuacct-usage-base", "%" PRIu64, u->cpuacct_usage_base);
+ unit_serialize_item_format(u, f, "cpu-usage-base", "%" PRIu64, u->cpu_usage_base);
if (u->cgroup_path)
unit_serialize_item(u, f, "cgroup", u->cgroup_path);
@@ -2824,9 +2824,9 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
continue;
- } else if (streq(l, "cpuacct-usage-base")) {
+ } else if (streq(l, "cpu-usage-base") || streq(l, "cpuacct-usage-base")) {
- r = safe_atou64(v, &u->cpuacct_usage_base);
+ r = safe_atou64(v, &u->cpu_usage_base);
if (r < 0)
log_unit_debug(u, "Failed to parse CPU usage %s, ignoring.", v);
diff --git a/src/core/unit.h b/src/core/unit.h
index 47eb8d50a6..a6c69938dd 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -184,8 +184,8 @@ struct Unit {
UnitFileState unit_file_state;
int unit_file_preset;
- /* Where the cpuacct.usage cgroup counter was at the time the unit was started */
- nsec_t cpuacct_usage_base;
+ /* Where the cpu.stat or cpuacct.usage was at the time the unit was started */
+ nsec_t cpu_usage_base;
/* Counterparts in the cgroup filesystem */
char *cgroup_path;
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index 7774d607c7..f9e12e0578 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -212,6 +212,17 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
r = sd_bus_message_append(m, "v", "b", r);
+ } else if (STR_IN_SET(field, "CPUWeight", "StartupCPUWeight")) {
+ uint64_t u;
+
+ r = cg_weight_parse(eq, &u);
+ if (r < 0) {
+ log_error("Failed to parse %s value %s.", field, eq);
+ return -EINVAL;
+ }
+
+ r = sd_bus_message_append(m, "v", "t", u);
+
} else if (STR_IN_SET(field, "CPUShares", "StartupCPUShares")) {
uint64_t u;