diff options
author | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2016-08-14 18:03:35 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-08-14 18:03:35 -0400 |
commit | 5f9a610ad2b3200a31bbd9181c810726eeb30385 (patch) | |
tree | 4f8b0e5a15d297dcf2d69802cdc9b7e08524384d | |
parent | 04d0f7e9f9e2d68502f322b8f494c652cbeb5146 (diff) | |
parent | 66ebf6c0a1005f90099e25ece5630551ad97a3c3 (diff) |
Merge pull request #3905 from htejun/cgroup-v2-cpu
core: add cgroup CPU controller support on the unified hierarchy
(zj: merging not squashing to make it clear against which upstream this patch was developed.)
-rw-r--r-- | man/systemd.resource-control.xml | 98 | ||||
-rw-r--r-- | src/basic/cgroup-util.c | 47 | ||||
-rw-r--r-- | src/basic/cgroup-util.h | 1 | ||||
-rw-r--r-- | src/cgtop/cgtop.c | 50 | ||||
-rw-r--r-- | src/core/cgroup.c | 203 | ||||
-rw-r--r-- | src/core/cgroup.h | 5 | ||||
-rw-r--r-- | src/core/dbus-cgroup.c | 46 | ||||
-rw-r--r-- | src/core/load-fragment-gperf.gperf.m4 | 2 | ||||
-rw-r--r-- | src/core/load-fragment.c | 29 | ||||
-rw-r--r-- | src/core/load-fragment.h | 1 | ||||
-rw-r--r-- | src/core/unit.c | 6 | ||||
-rw-r--r-- | src/core/unit.h | 4 | ||||
-rw-r--r-- | src/shared/bus-unit-util.c | 11 |
13 files changed, 413 insertions, 90 deletions
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index 0e98ca78b8..84dbfa2ff3 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -106,13 +106,21 @@ <para> <variablelist> + <varlistentry> - <term><option>IO</option></term> + <term><option>CPU</option></term> <listitem> - <para><varname>IO</varname> prefixed settings are superset of and replace <varname>BlockIO</varname> - prefixed ones. On unified hierarchy, IO resource control also applies to buffered writes.</para> + <para>Due to the lack of consensus in the kernel community, the CPU controller support on the unified + cgroup hierarchy requires out-of-tree kernel patches. See <ulink + url="https://git.kernel.org/cgit/linux/kernel/git/tj/cgroup.git/tree/Documentation/cgroup-v2-cpu.txt?h=cgroup-v2-cpu">cgroup-v2-cpu.txt</ulink>.</para> + + <para><varname>CPUWeight=</varname> and <varname>StartupCPUWeight=</varname> replace + <varname>CPUShares=</varname> and <varname>StartupCPUShares=</varname>, respectively.</para> + + <para>The <literal>cpuacct</literal> controller does not exist separately on the unified hierarchy.</para> </listitem> </varlistentry> + <varlistentry> <term><option>Memory</option></term> <listitem> @@ -120,6 +128,15 @@ and <varname>MemoryHigh=</varname> are effective only on unified hierarchy.</para> </listitem> </varlistentry> + + <varlistentry> + <term><option>IO</option></term> + <listitem> + <para><varname>IO</varname> prefixed settings are superset of and replace <varname>BlockIO</varname> + prefixed ones. On unified hierarchy, IO resource control also applies to buffered writes.</para> + </listitem> + </varlistentry> + </variablelist> </para> @@ -160,30 +177,49 @@ </varlistentry> <varlistentry> + <term><varname>CPUWeight=<replaceable>weight</replaceable></varname></term> + <term><varname>StartupCPUWeight=<replaceable>weight</replaceable></varname></term> + + <listitem> + <para>Assign the specified CPU time weight to the processes executed, if the unified control group hierarchy + is used on the system. These options take an integer value and control the <literal>cpu.weight</literal> + control group attribute. The allowed range is 1 to 10000. Defaults to 100. For details about this control + group attribute, see <ulink + url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink> and <ulink + url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>. + The available CPU time is split up among all units within one slice relative to their CPU time weight.</para> + + <para>While <varname>StartupCPUWeight=</varname> only applies to the startup phase of the system, + <varname>CPUWeight=</varname> applies to normal runtime of the system, and if the former is not set also to + the startup phase. Using <varname>StartupCPUWeight=</varname> allows prioritizing specific services at + boot-up differently than during normal runtime.</para> + + <para>Implies <literal>CPUAccounting=true</literal>.</para> + + <para>These settings are supported only if the unified control group hierarchy is used.</para> + </listitem> + </varlistentry> + + <varlistentry> <term><varname>CPUShares=<replaceable>weight</replaceable></varname></term> <term><varname>StartupCPUShares=<replaceable>weight</replaceable></varname></term> <listitem> - <para>Assign the specified CPU time share weight to the - processes executed. These options take an integer value and - control the <literal>cpu.shares</literal> control group - attribute. The allowed range is 2 to 262144. Defaults to - 1024. For details about this control group attribute, see - <ulink + <para>Assign the specified CPU time share weight to the processes executed. These options take an integer + value and control the <literal>cpu.shares</literal> control group attribute. The allowed range is 2 to + 262144. Defaults to 1024. For details about this control group attribute, see <ulink url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>. - The available CPU time is split up among all units within - one slice relative to their CPU time share weight.</para> + The available CPU time is split up among all units within one slice relative to their CPU time share + weight.</para> - <para>While <varname>StartupCPUShares=</varname> only - applies to the startup phase of the system, - <varname>CPUShares=</varname> applies to normal runtime of - the system, and if the former is not set also to the startup - phase. Using <varname>StartupCPUShares=</varname> allows - prioritizing specific services at boot-up differently than - during normal runtime.</para> + <para>While <varname>StartupCPUShares=</varname> only applies to the startup phase of the system, + <varname>CPUShares=</varname> applies to normal runtime of the system, and if the former is not set also to + the startup phase. Using <varname>StartupCPUShares=</varname> allows prioritizing specific services at + boot-up differently than during normal runtime.</para> - <para>These options imply - <literal>CPUAccounting=true</literal>.</para> + <para>Implies <literal>CPUAccounting=true</literal>.</para> + + <para>These settings are supported only if the legacy control group hierarchy is used.</para> </listitem> </varlistentry> @@ -191,22 +227,20 @@ <term><varname>CPUQuota=</varname></term> <listitem> - <para>Assign the specified CPU time quota to the processes - executed. Takes a percentage value, suffixed with "%". The - percentage specifies how much CPU time the unit shall get at - maximum, relative to the total CPU time available on one - CPU. Use values > 100% for allotting CPU time on more than - one CPU. This controls the - <literal>cpu.cfs_quota_us</literal> control group - attribute. For details about this control group attribute, - see <ulink + <para>Assign the specified CPU time quota to the processes executed. Takes a percentage value, suffixed with + "%". The percentage specifies how much CPU time the unit shall get at maximum, relative to the total CPU time + available on one CPU. Use values > 100% for allotting CPU time on more than one CPU. This controls the + <literal>cpu.max</literal> attribute on the unified control group hierarchy and + <literal>cpu.cfs_quota_us</literal> on legacy. For details about these control group attributes, see <ulink + url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink> and <ulink url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.</para> - <para>Example: <varname>CPUQuota=20%</varname> ensures that - the executed processes will never get more than 20% CPU time - on one CPU.</para> + <para>Example: <varname>CPUQuota=20%</varname> ensures that the executed processes will never get more than + 20% CPU time on one CPU.</para> <para>Implies <literal>CPUAccounting=true</literal>.</para> + + <para>This setting is supported on both unified and legacy control group hierarchies.</para> </listitem> </varlistentry> diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 302b958d0d..25ef8a5c76 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -1905,6 +1905,49 @@ int cg_get_attribute(const char *controller, const char *path, const char *attri return read_one_line_file(p, ret); } +int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) { + _cleanup_free_ char *filename = NULL, *content = NULL; + char *line, *p; + int i, r; + + for (i = 0; keys[i]; i++) + values[i] = NULL; + + r = cg_get_path(controller, path, attribute, &filename); + if (r < 0) + return r; + + r = read_full_file(filename, &content, NULL); + if (r < 0) + return r; + + p = content; + while ((line = strsep(&p, "\n"))) { + char *key; + + key = strsep(&line, " "); + + for (i = 0; keys[i]; i++) { + if (streq(key, keys[i])) { + values[i] = strdup(line); + break; + } + } + } + + for (i = 0; keys[i]; i++) { + if (!values[i]) { + for (i = 0; keys[i]; i++) { + free(values[i]); + values[i] = NULL; + } + return -ENOENT; + } + } + + return 0; +} + int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) { CGroupController c; int r, unified; @@ -2101,10 +2144,10 @@ int cg_mask_supported(CGroupMask *ret) { mask |= CGROUP_CONTROLLER_TO_MASK(v); } - /* Currently, we only support the memory, io and pids + /* Currently, we support the cpu, memory, io and pids * controller in the unified hierarchy, mask * everything else off. */ - mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS; + mask &= CGROUP_MASK_CPU | CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS; } else { CGroupController c; diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index ec5c715987..a509a1775c 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -169,6 +169,7 @@ int cg_create_and_attach(const char *controller, const char *path, pid_t pid); int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value); int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret); +int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values); int cg_set_group_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid); int cg_set_task_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid); diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c index c67b328b38..6045ae0437 100644 --- a/src/cgtop/cgtop.c +++ b/src/cgtop/cgtop.c @@ -208,24 +208,47 @@ static int process( if (g->n_tasks > 0) g->n_tasks_valid = true; - } else if (streq(controller, "cpuacct") && cg_unified() <= 0) { + } else if (streq(controller, "cpu") || streq(controller, "cpuacct")) { _cleanup_free_ char *p = NULL, *v = NULL; uint64_t new_usage; nsec_t timestamp; - r = cg_get_path(controller, path, "cpuacct.usage", &p); - if (r < 0) - return r; + if (cg_unified() > 0) { + const char *keys[] = { "usage_usec", NULL }; + _cleanup_free_ char *val = NULL; - r = read_one_line_file(p, &v); - if (r == -ENOENT) - return 0; - if (r < 0) - return r; + if (!streq(controller, "cpu")) + return 0; - r = safe_atou64(v, &new_usage); - if (r < 0) - return r; + r = cg_get_keyed_attribute("cpu", path, "cpu.stat", keys, &val); + if (r == -ENOENT) + return 0; + if (r < 0) + return r; + + r = safe_atou64(val, &new_usage); + if (r < 0) + return r; + + new_usage *= NSEC_PER_USEC; + } else { + if (!streq(controller, "cpuacct")) + return 0; + + r = cg_get_path(controller, path, "cpuacct.usage", &p); + if (r < 0) + return r; + + r = read_one_line_file(p, &v); + if (r == -ENOENT) + return 0; + if (r < 0) + return r; + + r = safe_atou64(v, &new_usage); + if (r < 0) + return r; + } timestamp = now_nsec(CLOCK_MONOTONIC); @@ -449,6 +472,9 @@ static int refresh(const char *root, Hashmap *a, Hashmap *b, unsigned iteration) r = refresh_one(SYSTEMD_CGROUP_CONTROLLER, root, a, b, iteration, 0, NULL); if (r < 0) return r; + r = refresh_one("cpu", root, a, b, iteration, 0, NULL); + if (r < 0) + return r; r = refresh_one("cpuacct", root, a, b, iteration, 0, NULL); if (r < 0) return r; diff --git a/src/core/cgroup.c b/src/core/cgroup.c index c19e43f571..910a64b4da 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -57,9 +57,12 @@ void cgroup_context_init(CGroupContext *c) { /* Initialize everything to the kernel defaults, assuming the * structure is preinitialized to 0 */ + c->cpu_weight = CGROUP_WEIGHT_INVALID; + c->startup_cpu_weight = CGROUP_WEIGHT_INVALID; + c->cpu_quota_per_sec_usec = USEC_INFINITY; + c->cpu_shares = CGROUP_CPU_SHARES_INVALID; c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID; - c->cpu_quota_per_sec_usec = USEC_INFINITY; c->memory_high = CGROUP_LIMIT_MAX; c->memory_max = CGROUP_LIMIT_MAX; @@ -158,6 +161,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { "%sBlockIOAccounting=%s\n" "%sMemoryAccounting=%s\n" "%sTasksAccounting=%s\n" + "%sCPUWeight=%" PRIu64 "\n" + "%sStartupCPUWeight=%" PRIu64 "\n" "%sCPUShares=%" PRIu64 "\n" "%sStartupCPUShares=%" PRIu64 "\n" "%sCPUQuotaPerSecSec=%s\n" @@ -177,6 +182,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { prefix, yes_no(c->blockio_accounting), prefix, yes_no(c->memory_accounting), prefix, yes_no(c->tasks_accounting), + prefix, c->cpu_weight, + prefix, c->startup_cpu_weight, prefix, c->cpu_shares, prefix, c->startup_cpu_shares, prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1), @@ -382,6 +389,95 @@ fail: return -errno; } +static bool cgroup_context_has_cpu_weight(CGroupContext *c) { + return c->cpu_weight != CGROUP_WEIGHT_INVALID || + c->startup_cpu_weight != CGROUP_WEIGHT_INVALID; +} + +static bool cgroup_context_has_cpu_shares(CGroupContext *c) { + return c->cpu_shares != CGROUP_CPU_SHARES_INVALID || + c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID; +} + +static uint64_t cgroup_context_cpu_weight(CGroupContext *c, ManagerState state) { + if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && + c->startup_cpu_weight != CGROUP_WEIGHT_INVALID) + return c->startup_cpu_weight; + else if (c->cpu_weight != CGROUP_WEIGHT_INVALID) + return c->cpu_weight; + else + return CGROUP_WEIGHT_DEFAULT; +} + +static uint64_t cgroup_context_cpu_shares(CGroupContext *c, ManagerState state) { + if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && + c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID) + return c->startup_cpu_shares; + else if (c->cpu_shares != CGROUP_CPU_SHARES_INVALID) + return c->cpu_shares; + else + return CGROUP_CPU_SHARES_DEFAULT; +} + +static void cgroup_apply_unified_cpu_config(Unit *u, uint64_t weight, uint64_t quota) { + char buf[MAX(DECIMAL_STR_MAX(uint64_t) + 1, (DECIMAL_STR_MAX(usec_t) + 1) * 2)]; + int r; + + xsprintf(buf, "%" PRIu64 "\n", weight); + r = cg_set_attribute("cpu", u->cgroup_path, "cpu.weight", buf); + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.weight: %m"); + + if (quota != USEC_INFINITY) + xsprintf(buf, USEC_FMT " " USEC_FMT "\n", + quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC, CGROUP_CPU_QUOTA_PERIOD_USEC); + else + xsprintf(buf, "max " USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC); + + r = cg_set_attribute("cpu", u->cgroup_path, "cpu.max", buf); + + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.max: %m"); +} + +static void cgroup_apply_legacy_cpu_config(Unit *u, uint64_t shares, uint64_t quota) { + char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1]; + int r; + + xsprintf(buf, "%" PRIu64 "\n", shares); + r = cg_set_attribute("cpu", u->cgroup_path, "cpu.shares", buf); + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.shares: %m"); + + xsprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC); + r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_period_us", buf); + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.cfs_period_us: %m"); + + if (quota != USEC_INFINITY) { + xsprintf(buf, USEC_FMT "\n", quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC); + r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", buf); + } else + r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", "-1"); + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.cfs_quota_us: %m"); +} + +static uint64_t cgroup_cpu_shares_to_weight(uint64_t shares) { + return CLAMP(shares * CGROUP_WEIGHT_DEFAULT / CGROUP_CPU_SHARES_DEFAULT, + CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX); +} + +static uint64_t cgroup_cpu_weight_to_shares(uint64_t weight) { + return CLAMP(weight * CGROUP_CPU_SHARES_DEFAULT / CGROUP_WEIGHT_DEFAULT, + CGROUP_CPU_SHARES_MIN, CGROUP_CPU_SHARES_MAX); +} + static bool cgroup_context_has_io_config(CGroupContext *c) { return c->io_accounting || c->io_weight != CGROUP_WEIGHT_INVALID || @@ -566,30 +662,42 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { * and missing cgroups, i.e. EROFS and ENOENT. */ if ((mask & CGROUP_MASK_CPU) && !is_root) { - char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1]; + bool has_weight = cgroup_context_has_cpu_weight(c); + bool has_shares = cgroup_context_has_cpu_shares(c); - sprintf(buf, "%" PRIu64 "\n", - IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->startup_cpu_shares : - c->cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->cpu_shares : CGROUP_CPU_SHARES_DEFAULT); - r = cg_set_attribute("cpu", path, "cpu.shares", buf); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.shares: %m"); + if (cg_unified() > 0) { + uint64_t weight; - sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC); - r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.cfs_period_us: %m"); + if (has_weight) + weight = cgroup_context_cpu_weight(c, state); + else if (has_shares) { + uint64_t shares = cgroup_context_cpu_shares(c, state); - if (c->cpu_quota_per_sec_usec != USEC_INFINITY) { - sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC); - r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf); - } else - r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1"); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.cfs_quota_us: %m"); + weight = cgroup_cpu_shares_to_weight(shares); + + log_cgroup_compat(u, "Applying [Startup]CpuShares %" PRIu64 " as [Startup]CpuWeight %" PRIu64 " on %s", + shares, weight, path); + } else + weight = CGROUP_WEIGHT_DEFAULT; + + cgroup_apply_unified_cpu_config(u, weight, c->cpu_quota_per_sec_usec); + } else { + uint64_t shares; + + if (has_shares) + shares = cgroup_context_cpu_shares(c, state); + else if (has_weight) { + uint64_t weight = cgroup_context_cpu_weight(c, state); + + shares = cgroup_cpu_weight_to_shares(weight); + + log_cgroup_compat(u, "Applying [Startup]CpuWeight %" PRIu64 " as [Startup]CpuShares %" PRIu64 " on %s", + weight, shares, path); + } else + shares = CGROUP_CPU_SHARES_DEFAULT; + + cgroup_apply_legacy_cpu_config(u, shares, c->cpu_quota_per_sec_usec); + } } if (mask & CGROUP_MASK_IO) { @@ -864,8 +972,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) { /* Figure out which controllers we need */ if (c->cpu_accounting || - c->cpu_shares != CGROUP_CPU_SHARES_INVALID || - c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID || + cgroup_context_has_cpu_weight(c) || + cgroup_context_has_cpu_shares(c) || c->cpu_quota_per_sec_usec != USEC_INFINITY) mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU; @@ -1890,18 +1998,37 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) { if (!u->cgroup_path) return -ENODATA; - if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0) - return -ENODATA; + if (cg_unified() > 0) { + const char *keys[] = { "usage_usec", NULL }; + _cleanup_free_ char *val = NULL; + uint64_t us; - r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v); - if (r == -ENOENT) - return -ENODATA; - if (r < 0) - return r; + if ((u->cgroup_realized_mask & CGROUP_MASK_CPU) == 0) + return -ENODATA; - r = safe_atou64(v, &ns); - if (r < 0) - return r; + r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", keys, &val); + if (r < 0) + return r; + + r = safe_atou64(val, &us); + if (r < 0) + return r; + + ns = us * NSEC_PER_USEC; + } else { + if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0) + return -ENODATA; + + r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v); + if (r == -ENOENT) + return -ENODATA; + if (r < 0) + return r; + + r = safe_atou64(v, &ns); + if (r < 0) + return r; + } *ret = ns; return 0; @@ -1915,8 +2042,8 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) { if (r < 0) return r; - if (ns > u->cpuacct_usage_base) - ns -= u->cpuacct_usage_base; + if (ns > u->cpu_usage_base) + ns -= u->cpu_usage_base; else ns = 0; @@ -1932,11 +2059,11 @@ int unit_reset_cpu_usage(Unit *u) { r = unit_get_cpu_usage_raw(u, &ns); if (r < 0) { - u->cpuacct_usage_base = 0; + u->cpu_usage_base = 0; return r; } - u->cpuacct_usage_base = ns; + u->cpu_usage_base = ns; return 0; } diff --git a/src/core/cgroup.h b/src/core/cgroup.h index a57403e79f..2fe9cc4039 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -89,6 +89,10 @@ struct CGroupContext { bool tasks_accounting; /* For unified hierarchy */ + uint64_t cpu_weight; + uint64_t startup_cpu_weight; + usec_t cpu_quota_per_sec_usec; + uint64_t io_weight; uint64_t startup_io_weight; LIST_HEAD(CGroupIODeviceWeight, io_device_weights); @@ -101,7 +105,6 @@ struct CGroupContext { /* For legacy hierarchies */ uint64_t cpu_shares; uint64_t startup_cpu_shares; - usec_t cpu_quota_per_sec_usec; uint64_t blockio_weight; uint64_t startup_blockio_weight; diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index 85b0c86a2f..2ca80d2996 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -210,6 +210,8 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_VTABLE_START(0), SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0), SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0), + SD_BUS_PROPERTY("CPUWeight", "t", NULL, offsetof(CGroupContext, cpu_weight), 0), + SD_BUS_PROPERTY("StartupCPUWeight", "t", NULL, offsetof(CGroupContext, startup_cpu_weight), 0), SD_BUS_PROPERTY("CPUShares", "t", NULL, offsetof(CGroupContext, cpu_shares), 0), SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0), SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0), @@ -303,6 +305,50 @@ int bus_cgroup_set_property( return 1; + } else if (streq(name, "CPUWeight")) { + uint64_t weight; + + r = sd_bus_message_read(message, "t", &weight); + if (r < 0) + return r; + + if (!CGROUP_WEIGHT_IS_OK(weight)) + return sd_bus_error_set_errnof(error, EINVAL, "CPUWeight value out of range"); + + if (mode != UNIT_CHECK) { + c->cpu_weight = weight; + unit_invalidate_cgroup(u, CGROUP_MASK_CPU); + + if (weight == CGROUP_WEIGHT_INVALID) + unit_write_drop_in_private(u, mode, name, "CPUWeight="); + else + unit_write_drop_in_private_format(u, mode, name, "CPUWeight=%" PRIu64, weight); + } + + return 1; + + } else if (streq(name, "StartupCPUWeight")) { + uint64_t weight; + + r = sd_bus_message_read(message, "t", &weight); + if (r < 0) + return r; + + if (!CGROUP_WEIGHT_IS_OK(weight)) + return sd_bus_error_set_errnof(error, EINVAL, "StartupCPUWeight value out of range"); + + if (mode != UNIT_CHECK) { + c->startup_cpu_weight = weight; + unit_invalidate_cgroup(u, CGROUP_MASK_CPU); + + if (weight == CGROUP_CPU_SHARES_INVALID) + unit_write_drop_in_private(u, mode, name, "StartupCPUWeight="); + else + unit_write_drop_in_private_format(u, mode, name, "StartupCPUWeight=%" PRIu64, weight); + } + + return 1; + } else if (streq(name, "CPUShares")) { uint64_t shares; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 251155b428..05fe0df7e3 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -122,6 +122,8 @@ $1.KillSignal, config_parse_signal, 0, m4_define(`CGROUP_CONTEXT_CONFIG_ITEMS', `$1.Slice, config_parse_unit_slice, 0, 0 $1.CPUAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.cpu_accounting) +$1.CPUWeight, config_parse_cpu_weight, 0, offsetof($1, cgroup_context.cpu_weight) +$1.StartupCPUWeight, config_parse_cpu_weight, 0, offsetof($1, cgroup_context.startup_cpu_weight) $1.CPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.cpu_shares) $1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares) $1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 420f368689..4ad6c4d79b 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -2851,6 +2851,34 @@ int config_parse_unit_slice( DEFINE_CONFIG_PARSE_ENUM(config_parse_device_policy, cgroup_device_policy, CGroupDevicePolicy, "Failed to parse device policy"); +int config_parse_cpu_weight( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + uint64_t *weight = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = cg_weight_parse(rvalue, weight); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "CPU weight '%s' invalid. Ignoring.", rvalue); + return 0; + } + + return 0; +} + int config_parse_cpu_shares( const char *unit, const char *filename, @@ -4191,6 +4219,7 @@ void unit_dump_config_items(FILE *f) { { config_parse_address_families, "FAMILIES" }, #endif { config_parse_cpu_shares, "SHARES" }, + { config_parse_cpu_weight, "WEIGHT" }, { config_parse_memory_limit, "LIMIT" }, { config_parse_device_allow, "DEVICE" }, { config_parse_device_policy, "POLICY" }, diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index 213bce55a7..8b688740cf 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -81,6 +81,7 @@ int config_parse_syscall_errno(const char *unit, const char *filename, unsigned int config_parse_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_pass_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_unit_slice(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_cpu_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_cpu_shares(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_memory_limit(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_tasks_max(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); diff --git a/src/core/unit.c b/src/core/unit.c index ff7c562fba..952604e0db 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -2608,7 +2608,7 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) { unit_serialize_item(u, f, "assert-result", yes_no(u->assert_result)); unit_serialize_item(u, f, "transient", yes_no(u->transient)); - unit_serialize_item_format(u, f, "cpuacct-usage-base", "%" PRIu64, u->cpuacct_usage_base); + unit_serialize_item_format(u, f, "cpu-usage-base", "%" PRIu64, u->cpu_usage_base); if (u->cgroup_path) unit_serialize_item(u, f, "cgroup", u->cgroup_path); @@ -2824,9 +2824,9 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { continue; - } else if (streq(l, "cpuacct-usage-base")) { + } else if (streq(l, "cpu-usage-base") || streq(l, "cpuacct-usage-base")) { - r = safe_atou64(v, &u->cpuacct_usage_base); + r = safe_atou64(v, &u->cpu_usage_base); if (r < 0) log_unit_debug(u, "Failed to parse CPU usage %s, ignoring.", v); diff --git a/src/core/unit.h b/src/core/unit.h index 47eb8d50a6..a6c69938dd 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -184,8 +184,8 @@ struct Unit { UnitFileState unit_file_state; int unit_file_preset; - /* Where the cpuacct.usage cgroup counter was at the time the unit was started */ - nsec_t cpuacct_usage_base; + /* Where the cpu.stat or cpuacct.usage was at the time the unit was started */ + nsec_t cpu_usage_base; /* Counterparts in the cgroup filesystem */ char *cgroup_path; diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 7774d607c7..f9e12e0578 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -212,6 +212,17 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen r = sd_bus_message_append(m, "v", "b", r); + } else if (STR_IN_SET(field, "CPUWeight", "StartupCPUWeight")) { + uint64_t u; + + r = cg_weight_parse(eq, &u); + if (r < 0) { + log_error("Failed to parse %s value %s.", field, eq); + return -EINVAL; + } + + r = sd_bus_message_append(m, "v", "t", u); + } else if (STR_IN_SET(field, "CPUShares", "StartupCPUShares")) { uint64_t u; |