From c51fa94772d85f2a2442d6bcecb5efc4c102ff34 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Tue, 5 Apr 2016 10:48:06 +0200 Subject: man: update links to kernel.org cgroup documentation This recently moved from /cgroups/ to /cgroup-v1/. Fixes #2958 --- man/systemd.resource-control.xml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'man/systemd.resource-control.xml') diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index 08cdf06e23..804d7375b0 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -202,7 +202,7 @@ controls the memory.limit_in_bytes control group attribute. For details about this control group attribute, see memory.txt. + url="https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt">memory.txt. Implies MemoryAccounting=true. @@ -239,7 +239,7 @@ controls the pids.max control group attribute. For details about this control group attribute, see pids.txt. + url="https://www.kernel.org/doc/Documentation/cgroup-v1/pids.txt">pids.txt. Implies TasksAccounting=true. The system default for this setting may be controlled with @@ -273,7 +273,7 @@ the blkio.weight control group attribute, which defaults to 500. For details about this control group attribute, see blkio-controller.txt. + url="https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt">blkio-controller.txt. The available I/O bandwidth is split up among all units within one slice relative to their block I/O weight. @@ -305,7 +305,7 @@ attribute, which defaults to 1000. Use this option multiple times to set weights for multiple devices. For details about this control group attribute, see blkio-controller.txt. + url="https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt">blkio-controller.txt. Implies BlockIOAccounting=true. @@ -333,7 +333,7 @@ attributes. Use this option multiple times to set bandwidth limits for multiple devices. For details about these control group attributes, see blkio-controller.txt. + url="https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt">blkio-controller.txt. Implies @@ -357,7 +357,7 @@ devices.deny control group attributes. For details about these control group attributes, see devices.txt. + url="https://www.kernel.org/doc/Documentation/cgroup-v1/devices.txt">devices.txt. The device node specifier is either a path to a device node in the file system, starting with @@ -482,10 +482,10 @@ systemd.directives7, systemd.special7, The documentation for control groups and specific controllers in the Linux kernel: - cgroups.txt, - cpuacct.txt, - memory.txt, - blkio-controller.txt. + cgroups.txt, + cpuacct.txt, + memory.txt, + blkio-controller.txt. -- cgit v1.2.3-54-g00ecf From 5e939dd6a47571f731a92aa1288e955fde7a37f8 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Tue, 5 Apr 2016 15:28:47 +0200 Subject: man: fix cgroup attributes for device throttling --- man/systemd.resource-control.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'man/systemd.resource-control.xml') diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index 804d7375b0..fd6f7a1b69 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -328,8 +328,8 @@ Gigabytes, or Terabytes, respectively, to the base of 1000. (Example: "/dev/disk/by-path/pci-0000:00:1f.2-scsi-0:0:0:0 5M"). This - controls the blkio.read_bps_device and - blkio.write_bps_device control group + controls the blkio.throttle.read_bps_device and + blkio.throttle.write_bps_device control group attributes. Use this option multiple times to set bandwidth limits for multiple devices. For details about these control group attributes, see Date: Thu, 5 May 2016 16:42:55 -0400 Subject: core: add io controller support on the unified hierarchy On the unified hierarchy, blkio controller is renamed to io and the interface is changed significantly. * blkio.weight and blkio.weight_device are consolidated into io.weight which uses the standardized weight range [1, 10000] with 100 as the default value. * blkio.throttle.{read|write}_{bps|iops}_device are consolidated into io.max. Expansion of throttling features is being worked on to support work-conserving absolute limits (io.low and io.high). * All stats are consolidated into io.stats. This patchset adds support for the new interface. As the interface has been revamped and new features are expected to be added, it seems best to treat it as a separate controller rather than trying to expand the blkio settings although we might add automatic translation if only blkio settings are specified. * io.weight handling is mostly identical to blkio.weight[_device] handling except that the weight range is different. * Both read and write bandwidth settings are consolidated into CGroupIODeviceLimit which describes all limits applicable to the device. This makes it less painful to add new limits. * "max" can be used to specify the maximum limit which is equivalent to no config for max limits and treated as such. If a given CGroupIODeviceLimit doesn't contain any non-default configs, the config struct is discarded once the no limit config is applied to cgroup. * lookup_blkio_device() is renamed to lookup_block_device(). Signed-off-by: Tejun Heo --- man/systemd.resource-control.xml | 108 +++++++++++- src/basic/cgroup-util.c | 25 ++- src/basic/cgroup-util.h | 18 ++ src/cgtop/cgtop.c | 50 ++++-- src/core/cgroup.c | 147 +++++++++++++++- src/core/cgroup.h | 26 +++ src/core/dbus-cgroup.c | 308 ++++++++++++++++++++++++++++++++++ src/core/load-fragment-gperf.gperf.m4 | 6 + src/core/load-fragment.c | 193 +++++++++++++++++++++ src/core/load-fragment.h | 3 + src/core/main.c | 3 + src/core/manager.h | 1 + src/core/system.conf | 1 + src/core/unit.c | 2 + src/shared/bus-unit-util.c | 30 +++- src/systemctl/systemctl.c | 5 +- 16 files changed, 892 insertions(+), 34 deletions(-) (limited to 'man/systemd.resource-control.xml') diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index fd6f7a1b69..4edb1a25a8 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -248,10 +248,104 @@ + + IOAccounting= + + + Turn on Block I/O accounting for this unit on unified + hierarchy. Takes a boolean argument. Note that turning on + block I/O accounting for one unit will also implicitly turn + it on for all units contained in the same slice and all for + its parent slices and the units contained therein. The + system default for this setting may be controlled with + DefaultIOAccounting= in + systemd-system.conf5. + + + + + IOWeight=weight + StartupIOWeight=weight + + + Set the default overall block I/O weight for the + executed processes on unified hierarchy. Takes a single + weight value (between 1 and 10000) to set the default block + I/O weight. This controls the io.weight + control group attribute, which defaults to 100. For details + about this control group attribute, see cgroup-v2.txt. + The available I/O bandwidth is split up among all units + within one slice relative to their block I/O weight. + + While StartupIOWeight= only applies + to the startup phase of the system, + IOWeight= applies to the later runtime of + the system, and if the former is not set also to the startup + phase. This allows prioritizing specific services at boot-up + differently than during runtime. + + Implies IOAccounting=true. + + + + + IODeviceWeight=device weight + + + Set the per-device overall block I/O weight for the + executed processes on unified hierarchy. Takes a + space-separated pair of a file path and a weight value to + specify the device specific weight value, between 1 and + 10000. (Example: "/dev/sda 1000"). The file path may be + specified as path to a block device node or as any other + file, in which case the backing block device of the file + system of the file is determined. This controls the + io.weight control group attribute, which + defaults to 100. Use this option multiple times to set + weights for multiple devices. For details about this control + group attribute, see cgroup-v2.txt. + + Implies IOAccounting=true. + + + + + IOReadBandwidthMax=device bytes + IOWriteBandwidthMax=device bytes + + + Set the per-device overall block I/O bandwidth maximum + limit for the executed processes on unified hierarchy. This + limit is not work-conserving and the executed processes are + not allowed to use more even if the device has idle + capacity. Takes a space-separated pair of a file path and a + bandwidth value (in bytes per second) to specify the device + specific bandwidth. The file path may be a path to a block + device node, or as any other file in which case the backing + block device of the file system of the file is used. If the + bandwidth is suffixed with K, M, G, or T, the specified + bandwidth is parsed as Kilobytes, Megabytes, Gigabytes, or + Terabytes, respectively, to the base of 1000. (Example: + "/dev/disk/by-path/pci-0000:00:1f.2-scsi-0:0:0:0 5M"). This + controls the io.max control group + attributes. Use this option multiple times to set bandwidth + limits for multiple devices. For details about this control + group attribute, see cgroup-v2.txt. + + + Implies IOAccounting=true. + + + BlockIOAccounting= + Use IOAccounting on unified hierarchy. + Turn on Block I/O accounting for this unit. Takes a boolean argument. Note that turning on block I/O accounting for one unit will also implicitly turn it on for all units @@ -267,9 +361,12 @@ BlockIOWeight=weight StartupBlockIOWeight=weight - Set the default overall block I/O weight for - the executed processes. Takes a single weight value (between - 10 and 1000) to set the default block I/O weight. This controls + Use IOWeight and StartupIOWeight on unified + hierarchy. + + Set the default overall block I/O weight for the + executed processes. Takes a single weight value (between 10 + and 1000) to set the default block I/O weight. This controls the blkio.weight control group attribute, which defaults to 500. For details about this control group attribute, see BlockIODeviceWeight=device weight + Use IODeviceWeight on unified hierarchy. + Set the per-device overall block I/O weight for the executed processes. Takes a space-separated pair of a file path and a weight value to specify the device specific @@ -317,6 +416,9 @@ BlockIOWriteBandwidth=device bytes + Use IOReadBandwidthMax and IOWriteBandwidthMax on + unified hierarchy. + Set the per-device overall block I/O bandwidth limit for the executed processes. Takes a space-separated pair of a file path and a bandwidth value (in bytes per second) to diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 5043180747..ff57cf30b7 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -2060,10 +2060,10 @@ int cg_mask_supported(CGroupMask *ret) { mask |= CGROUP_CONTROLLER_TO_MASK(v); } - /* Currently, we only support the memory and pids + /* Currently, we only support the memory, io and pids * controller in the unified hierarchy, mask * everything else off. */ - mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_PIDS; + mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS; } else { CGroupController c; @@ -2249,6 +2249,26 @@ bool cg_is_legacy_wanted(void) { return !cg_is_unified_wanted(); } +int cg_weight_parse(const char *s, uint64_t *ret) { + uint64_t u; + int r; + + if (isempty(s)) { + *ret = CGROUP_WEIGHT_INVALID; + return 0; + } + + r = safe_atou64(s, &u); + if (r < 0) + return r; + + if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX) + return -ERANGE; + + *ret = u; + return 0; +} + int cg_cpu_shares_parse(const char *s, uint64_t *ret) { uint64_t u; int r; @@ -2292,6 +2312,7 @@ int cg_blkio_weight_parse(const char *s, uint64_t *ret) { static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = { [CGROUP_CONTROLLER_CPU] = "cpu", [CGROUP_CONTROLLER_CPUACCT] = "cpuacct", + [CGROUP_CONTROLLER_IO] = "io", [CGROUP_CONTROLLER_BLKIO] = "blkio", [CGROUP_CONTROLLER_MEMORY] = "memory", [CGROUP_CONTROLLER_DEVICES] = "devices", diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 4254e51e5d..a696c1fa60 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -34,6 +34,7 @@ typedef enum CGroupController { CGROUP_CONTROLLER_CPU, CGROUP_CONTROLLER_CPUACCT, + CGROUP_CONTROLLER_IO, CGROUP_CONTROLLER_BLKIO, CGROUP_CONTROLLER_MEMORY, CGROUP_CONTROLLER_DEVICES, @@ -48,6 +49,7 @@ typedef enum CGroupController { typedef enum CGroupMask { CGROUP_MASK_CPU = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPU), CGROUP_MASK_CPUACCT = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPUACCT), + CGROUP_MASK_IO = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_IO), CGROUP_MASK_BLKIO = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BLKIO), CGROUP_MASK_MEMORY = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_MEMORY), CGROUP_MASK_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_DEVICES), @@ -55,6 +57,21 @@ typedef enum CGroupMask { _CGROUP_MASK_ALL = CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX) - 1 } CGroupMask; +/* Special values for all weight knobs on unified hierarchy */ +#define CGROUP_WEIGHT_INVALID ((uint64_t) -1) +#define CGROUP_WEIGHT_MIN UINT64_C(1) +#define CGROUP_WEIGHT_MAX UINT64_C(10000) +#define CGROUP_WEIGHT_DEFAULT UINT64_C(100) + +#define CGROUP_LIMIT_MIN UINT64_C(0) +#define CGROUP_LIMIT_MAX ((uint64_t) -1) + +static inline bool CGROUP_WEIGHT_IS_OK(uint64_t x) { + return + x == CGROUP_WEIGHT_INVALID || + (x >= CGROUP_WEIGHT_MIN && x <= CGROUP_WEIGHT_MAX); +} + /* Special values for the cpu.shares attribute */ #define CGROUP_CPU_SHARES_INVALID ((uint64_t) -1) #define CGROUP_CPU_SHARES_MIN UINT64_C(2) @@ -190,5 +207,6 @@ bool cg_is_legacy_wanted(void); const char* cgroup_controller_to_string(CGroupController c) _const_; CGroupController cgroup_controller_from_string(const char *s) _pure_; +int cg_weight_parse(const char *s, uint64_t *ret); int cg_cpu_shares_parse(const char *s, uint64_t *ret); int cg_blkio_weight_parse(const char *s, uint64_t *ret); diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c index 14eb46c8db..e088e4b197 100644 --- a/src/cgtop/cgtop.c +++ b/src/cgtop/cgtop.c @@ -269,13 +269,15 @@ static int process( if (g->memory > 0) g->memory_valid = true; - } else if (streq(controller, "blkio") && cg_unified() <= 0) { + } else if ((streq(controller, "io") && cg_unified() > 0) || + (streq(controller, "blkio") && cg_unified() <= 0)) { _cleanup_fclose_ FILE *f = NULL; _cleanup_free_ char *p = NULL; + bool unified = cg_unified() > 0; uint64_t wr = 0, rd = 0; nsec_t timestamp; - r = cg_get_path(controller, path, "blkio.io_service_bytes", &p); + r = cg_get_path(controller, path, unified ? "io.stat" : "blkio.io_service_bytes", &p); if (r < 0) return r; @@ -293,25 +295,38 @@ static int process( if (!fgets(line, sizeof(line), f)) break; + /* Trim and skip the device */ l = strstrip(line); l += strcspn(l, WHITESPACE); l += strspn(l, WHITESPACE); - if (first_word(l, "Read")) { - l += 4; - q = &rd; - } else if (first_word(l, "Write")) { - l += 5; - q = ≀ - } else - continue; - - l += strspn(l, WHITESPACE); - r = safe_atou64(l, &k); - if (r < 0) - continue; + if (unified) { + while (!isempty(l)) { + if (sscanf(l, "rbytes=%" SCNu64, &k)) + rd += k; + else if (sscanf(l, "wbytes=%" SCNu64, &k)) + wr += k; - *q += k; + l += strcspn(l, WHITESPACE); + l += strspn(l, WHITESPACE); + } + } else { + if (first_word(l, "Read")) { + l += 4; + q = &rd; + } else if (first_word(l, "Write")) { + l += 5; + q = ≀ + } else + continue; + + l += strspn(l, WHITESPACE); + r = safe_atou64(l, &k); + if (r < 0) + continue; + + *q += k; + } } timestamp = now_nsec(CLOCK_MONOTONIC); @@ -437,6 +452,9 @@ static int refresh(const char *root, Hashmap *a, Hashmap *b, unsigned iteration) if (r < 0) return r; r = refresh_one("memory", root, a, b, iteration, 0, NULL); + if (r < 0) + return r; + r = refresh_one("io", root, a, b, iteration, 0, NULL); if (r < 0) return r; r = refresh_one("blkio", root, a, b, iteration, 0, NULL); diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 25cc6962f9..44106e52ea 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -32,6 +32,7 @@ #include "special.h" #include "string-table.h" #include "string-util.h" +#include "stdio-util.h" #define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC) @@ -47,6 +48,9 @@ void cgroup_context_init(CGroupContext *c) { c->memory_limit = (uint64_t) -1; + c->io_weight = CGROUP_WEIGHT_INVALID; + c->startup_io_weight = CGROUP_WEIGHT_INVALID; + c->blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID; c->startup_blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID; @@ -62,6 +66,24 @@ void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) { free(a); } +void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight *w) { + assert(c); + assert(w); + + LIST_REMOVE(device_weights, c->io_device_weights, w); + free(w->path); + free(w); +} + +void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l) { + assert(c); + assert(l); + + LIST_REMOVE(device_limits, c->io_device_limits, l); + free(l->path); + free(l); +} + void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) { assert(c); assert(w); @@ -83,6 +105,12 @@ void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockI void cgroup_context_done(CGroupContext *c) { assert(c); + while (c->io_device_weights) + cgroup_context_free_io_device_weight(c, c->io_device_weights); + + while (c->io_device_limits) + cgroup_context_free_io_device_limit(c, c->io_device_limits); + while (c->blockio_device_weights) cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights); @@ -94,6 +122,8 @@ void cgroup_context_done(CGroupContext *c) { } void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { + CGroupIODeviceLimit *il; + CGroupIODeviceWeight *iw; CGroupBlockIODeviceBandwidth *b; CGroupBlockIODeviceWeight *w; CGroupDeviceAllow *a; @@ -106,12 +136,15 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { fprintf(f, "%sCPUAccounting=%s\n" + "%sIOAccounting=%s\n" "%sBlockIOAccounting=%s\n" "%sMemoryAccounting=%s\n" "%sTasksAccounting=%s\n" "%sCPUShares=%" PRIu64 "\n" "%sStartupCPUShares=%" PRIu64 "\n" "%sCPUQuotaPerSecSec=%s\n" + "%sIOWeight=%" PRIu64 "\n" + "%sStartupIOWeight=%" PRIu64 "\n" "%sBlockIOWeight=%" PRIu64 "\n" "%sStartupBlockIOWeight=%" PRIu64 "\n" "%sMemoryLimit=%" PRIu64 "\n" @@ -119,12 +152,15 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { "%sDevicePolicy=%s\n" "%sDelegate=%s\n", prefix, yes_no(c->cpu_accounting), + prefix, yes_no(c->io_accounting), prefix, yes_no(c->blockio_accounting), prefix, yes_no(c->memory_accounting), prefix, yes_no(c->tasks_accounting), prefix, c->cpu_shares, prefix, c->startup_cpu_shares, prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1), + prefix, c->io_weight, + prefix, c->startup_io_weight, prefix, c->blockio_weight, prefix, c->startup_blockio_weight, prefix, c->memory_limit, @@ -139,6 +175,31 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { a->path, a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : ""); + LIST_FOREACH(device_weights, iw, c->io_device_weights) + fprintf(f, + "%sIODeviceWeight=%s %" PRIu64, + prefix, + iw->path, + iw->weight); + + LIST_FOREACH(device_limits, il, c->io_device_limits) { + char buf[FORMAT_BYTES_MAX]; + + if (il->rbps_max != CGROUP_LIMIT_MAX) + fprintf(f, + "%sIOReadBandwidthMax=%s %s\n", + prefix, + il->path, + format_bytes(buf, sizeof(buf), il->rbps_max)); + + if (il->wbps_max != CGROUP_LIMIT_MAX) + fprintf(f, + "%sIOWriteBandwidthMax=%s %s\n", + prefix, + il->path, + format_bytes(buf, sizeof(buf), il->wbps_max)); + } + LIST_FOREACH(device_weights, w, c->blockio_device_weights) fprintf(f, "%sBlockIODeviceWeight=%s %" PRIu64, @@ -158,7 +219,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { } } -static int lookup_blkio_device(const char *p, dev_t *dev) { +static int lookup_block_device(const char *p, dev_t *dev) { struct stat st; int r; @@ -343,6 +404,77 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M "Failed to set cpu.cfs_quota_us on %s: %m", path); } + if (mask & CGROUP_MASK_IO) { + CGroupIODeviceWeight *w; + CGroupIODeviceLimit *l, *next; + + if (!is_root) { + char buf[MAX(8+DECIMAL_STR_MAX(uint64_t)+1, + DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)]; + uint64_t weight = CGROUP_WEIGHT_DEFAULT; + + if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && + c->startup_io_weight != CGROUP_WEIGHT_INVALID) + weight = c->startup_io_weight; + else if (c->io_weight != CGROUP_WEIGHT_INVALID) + weight = c->io_weight; + + xsprintf(buf, "default %" PRIu64 "\n", weight); + r = cg_set_attribute("io", path, "io.weight", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set io.weight on %s: %m", path); + + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_weights, w, c->io_device_weights) { + dev_t dev; + + r = lookup_block_device(w->path, &dev); + if (r < 0) + continue; + + xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), w->weight); + r = cg_set_attribute("io", path, "io.weight", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set io.weight on %s: %m", path); + } + } + + LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { + char rbps_buf[DECIMAL_STR_MAX(uint64_t)] = "max"; + char wbps_buf[DECIMAL_STR_MAX(uint64_t)] = "max"; + char buf[DECIMAL_STR_MAX(dev_t)*2+2+(5+DECIMAL_STR_MAX(uint64_t)+1)*2]; + dev_t dev; + unsigned n = 0; + + r = lookup_block_device(l->path, &dev); + if (r < 0) + continue; + + if (l->rbps_max != CGROUP_LIMIT_MAX) { + xsprintf(rbps_buf, "%" PRIu64, l->rbps_max); + n++; + } + + if (l->wbps_max != CGROUP_LIMIT_MAX) { + xsprintf(wbps_buf, "%" PRIu64, l->wbps_max); + n++; + } + + xsprintf(buf, "%u:%u rbps=%s wbps=%s\n", major(dev), minor(dev), rbps_buf, wbps_buf); + r = cg_set_attribute("io", path, "io.max", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set io.max on %s: %m", path); + + /* If @l contained no config, we just cleared the kernel + counterpart too. No reason to keep @l around. */ + if (!n) + cgroup_context_free_io_device_limit(c, l); + } + } + if (mask & CGROUP_MASK_BLKIO) { char buf[MAX(DECIMAL_STR_MAX(uint64_t)+1, DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)]; @@ -362,7 +494,7 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M LIST_FOREACH(device_weights, w, c->blockio_device_weights) { dev_t dev; - r = lookup_blkio_device(w->path, &dev); + r = lookup_block_device(w->path, &dev); if (r < 0) continue; @@ -379,7 +511,7 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M const char *a; dev_t dev; - r = lookup_blkio_device(b->path, &dev); + r = lookup_block_device(b->path, &dev); if (r < 0) continue; @@ -506,6 +638,13 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) { c->cpu_quota_per_sec_usec != USEC_INFINITY) mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU; + if (c->io_accounting || + c->io_weight != CGROUP_WEIGHT_INVALID || + c->startup_io_weight != CGROUP_WEIGHT_INVALID || + c->io_device_weights || + c->io_device_limits) + mask |= CGROUP_MASK_IO; + if (c->blockio_accounting || c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID || c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID || @@ -1608,7 +1747,7 @@ void manager_invalidate_startup_units(Manager *m) { assert(m); SET_FOREACH(u, m->startup_units, i) - unit_invalidate_cgroup(u, CGROUP_MASK_CPU|CGROUP_MASK_BLKIO); + unit_invalidate_cgroup(u, CGROUP_MASK_CPU|CGROUP_MASK_IO|CGROUP_MASK_BLKIO); } static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = { diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 360bbca30f..a533923072 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -26,6 +26,8 @@ typedef struct CGroupContext CGroupContext; typedef struct CGroupDeviceAllow CGroupDeviceAllow; +typedef struct CGroupIODeviceWeight CGroupIODeviceWeight; +typedef struct CGroupIODeviceLimit CGroupIODeviceLimit; typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight; typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth; @@ -53,6 +55,19 @@ struct CGroupDeviceAllow { bool m:1; }; +struct CGroupIODeviceWeight { + LIST_FIELDS(CGroupIODeviceWeight, device_weights); + char *path; + uint64_t weight; +}; + +struct CGroupIODeviceLimit { + LIST_FIELDS(CGroupIODeviceLimit, device_limits); + char *path; + uint64_t rbps_max; + uint64_t wbps_max; +}; + struct CGroupBlockIODeviceWeight { LIST_FIELDS(CGroupBlockIODeviceWeight, device_weights); char *path; @@ -68,10 +83,18 @@ struct CGroupBlockIODeviceBandwidth { struct CGroupContext { bool cpu_accounting; + bool io_accounting; bool blockio_accounting; bool memory_accounting; bool tasks_accounting; + /* For unified hierarchy */ + uint64_t io_weight; + uint64_t startup_io_weight; + LIST_HEAD(CGroupIODeviceWeight, io_device_weights); + LIST_HEAD(CGroupIODeviceLimit, io_device_limits); + + /* For legacy hierarchies */ uint64_t cpu_shares; uint64_t startup_cpu_shares; usec_t cpu_quota_per_sec_usec; @@ -86,6 +109,7 @@ struct CGroupContext { CGroupDevicePolicy device_policy; LIST_HEAD(CGroupDeviceAllow, device_allow); + /* Common */ uint64_t tasks_max; bool delegate; @@ -102,6 +126,8 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M CGroupMask cgroup_context_get_mask(CGroupContext *c); void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a); +void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight *w); +void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l); void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w); void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b); diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index 859d155ec1..a2a4a6249c 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -28,6 +28,76 @@ static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_cgroup_device_policy, cgroup_device_policy, CGroupDevicePolicy); +static int property_get_io_device_weight( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + CGroupContext *c = userdata; + CGroupIODeviceWeight *w; + int r; + + assert(bus); + assert(reply); + assert(c); + + r = sd_bus_message_open_container(reply, 'a', "(st)"); + if (r < 0) + return r; + + LIST_FOREACH(device_weights, w, c->io_device_weights) { + r = sd_bus_message_append(reply, "(st)", w->path, w->weight); + if (r < 0) + return r; + } + + return sd_bus_message_close_container(reply); +} + +static int property_get_io_device_limits( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + CGroupContext *c = userdata; + CGroupIODeviceLimit *l; + int r; + + assert(bus); + assert(reply); + assert(c); + + r = sd_bus_message_open_container(reply, 'a', "(st)"); + if (r < 0) + return r; + + LIST_FOREACH(device_limits, l, c->io_device_limits) { + uint64_t v; + + if (streq(property, "IOReadBandwidthMax")) + v = l->rbps_max; + else + v = l->wbps_max; + + if (v == CGROUP_LIMIT_MAX) + continue; + + r = sd_bus_message_append(reply, "(st)", l->path, v); + if (r < 0) + return r; + } + + return sd_bus_message_close_container(reply); +} + static int property_get_blockio_device_weight( sd_bus *bus, const char *path, @@ -141,6 +211,12 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_PROPERTY("CPUShares", "t", NULL, offsetof(CGroupContext, cpu_shares), 0), SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0), SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0), + SD_BUS_PROPERTY("IOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, io_accounting), 0), + SD_BUS_PROPERTY("IOWeight", "t", NULL, offsetof(CGroupContext, io_weight), 0), + SD_BUS_PROPERTY("StartupIOWeight", "t", NULL, offsetof(CGroupContext, startup_io_weight), 0), + SD_BUS_PROPERTY("IODeviceWeight", "a(st)", property_get_io_device_weight, 0, 0), + SD_BUS_PROPERTY("IOReadBandwidthMax", "a(st)", property_get_io_device_limits, 0, 0), + SD_BUS_PROPERTY("IOWriteBandwidthMax", "a(st)", property_get_io_device_limits, 0, 0), SD_BUS_PROPERTY("BlockIOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, blockio_accounting), 0), SD_BUS_PROPERTY("BlockIOWeight", "t", NULL, offsetof(CGroupContext, blockio_weight), 0), SD_BUS_PROPERTY("StartupBlockIOWeight", "t", NULL, offsetof(CGroupContext, startup_blockio_weight), 0), @@ -281,6 +357,238 @@ int bus_cgroup_set_property( return 1; + } else if (streq(name, "IOAccounting")) { + int b; + + r = sd_bus_message_read(message, "b", &b); + if (r < 0) + return r; + + if (mode != UNIT_CHECK) { + c->io_accounting = b; + unit_invalidate_cgroup(u, CGROUP_MASK_IO); + unit_write_drop_in_private(u, mode, name, b ? "IOAccounting=yes" : "IOAccounting=no"); + } + + return 1; + + } else if (streq(name, "IOWeight")) { + uint64_t weight; + + r = sd_bus_message_read(message, "t", &weight); + if (r < 0) + return r; + + if (!CGROUP_WEIGHT_IS_OK(weight)) + return sd_bus_error_set_errnof(error, EINVAL, "IOWeight value out of range"); + + if (mode != UNIT_CHECK) { + c->io_weight = weight; + unit_invalidate_cgroup(u, CGROUP_MASK_IO); + + if (weight == CGROUP_WEIGHT_INVALID) + unit_write_drop_in_private(u, mode, name, "IOWeight="); + else + unit_write_drop_in_private_format(u, mode, name, "IOWeight=%" PRIu64, weight); + } + + return 1; + + } else if (streq(name, "StartupIOWeight")) { + uint64_t weight; + + r = sd_bus_message_read(message, "t", &weight); + if (r < 0) + return r; + + if (CGROUP_WEIGHT_IS_OK(weight)) + return sd_bus_error_set_errnof(error, EINVAL, "StartupIOWeight value out of range"); + + if (mode != UNIT_CHECK) { + c->startup_io_weight = weight; + unit_invalidate_cgroup(u, CGROUP_MASK_IO); + + if (weight == CGROUP_WEIGHT_INVALID) + unit_write_drop_in_private(u, mode, name, "StartupIOWeight="); + else + unit_write_drop_in_private_format(u, mode, name, "StartupIOWeight=%" PRIu64, weight); + } + + return 1; + + } else if (streq(name, "IOReadBandwidthMax") || streq(name, "IOWriteBandwidthMax")) { + const char *path; + bool read = true; + unsigned n = 0; + uint64_t u64; + + if (streq(name, "IOWriteBandwidthMax")) + read = false; + + r = sd_bus_message_enter_container(message, 'a', "(st)"); + if (r < 0) + return r; + + while ((r = sd_bus_message_read(message, "(st)", &path, &u64)) > 0) { + + if (mode != UNIT_CHECK) { + CGroupIODeviceLimit *a = NULL, *b; + + LIST_FOREACH(device_limits, b, c->io_device_limits) { + if (path_equal(path, b->path)) { + a = b; + break; + } + } + + if (!a) { + a = new0(CGroupIODeviceLimit, 1); + if (!a) + return -ENOMEM; + + a->path = strdup(path); + if (!a->path) { + free(a); + return -ENOMEM; + } + + a->rbps_max = CGROUP_LIMIT_MAX; + a->wbps_max = CGROUP_LIMIT_MAX; + + LIST_PREPEND(device_limits, c->io_device_limits, a); + } + + if (read) + a->rbps_max = u64; + else + a->wbps_max = u64; + } + + n++; + } + if (r < 0) + return r; + + r = sd_bus_message_exit_container(message); + if (r < 0) + return r; + + if (mode != UNIT_CHECK) { + CGroupIODeviceLimit *a; + _cleanup_free_ char *buf = NULL; + _cleanup_fclose_ FILE *f = NULL; + size_t size = 0; + + if (n == 0) { + LIST_FOREACH(device_limits, a, c->io_device_limits) + if (read) + a->rbps_max = CGROUP_LIMIT_MAX; + else + a->wbps_max = CGROUP_LIMIT_MAX; + } + + unit_invalidate_cgroup(u, CGROUP_MASK_IO); + + f = open_memstream(&buf, &size); + if (!f) + return -ENOMEM; + + if (read) { + fputs("IOReadBandwidthMax=\n", f); + LIST_FOREACH(device_limits, a, c->io_device_limits) + if (a->rbps_max != CGROUP_LIMIT_MAX) + fprintf(f, "IOReadBandwidthMax=%s %" PRIu64 "\n", a->path, a->rbps_max); + } else { + fputs("IOWriteBandwidthMax=\n", f); + LIST_FOREACH(device_limits, a, c->io_device_limits) + if (a->wbps_max != CGROUP_LIMIT_MAX) + fprintf(f, "IOWriteBandwidthMax=%s %" PRIu64 "\n", a->path, a->wbps_max); + } + + r = fflush_and_check(f); + if (r < 0) + return r; + unit_write_drop_in_private(u, mode, name, buf); + } + + return 1; + + } else if (streq(name, "IODeviceWeight")) { + const char *path; + uint64_t weight; + unsigned n = 0; + + r = sd_bus_message_enter_container(message, 'a', "(st)"); + if (r < 0) + return r; + + while ((r = sd_bus_message_read(message, "(st)", &path, &weight)) > 0) { + + if (!CGROUP_WEIGHT_IS_OK(weight) || weight == CGROUP_WEIGHT_INVALID) + return sd_bus_error_set_errnof(error, EINVAL, "IODeviceWeight out of range"); + + if (mode != UNIT_CHECK) { + CGroupIODeviceWeight *a = NULL, *b; + + LIST_FOREACH(device_weights, b, c->io_device_weights) { + if (path_equal(b->path, path)) { + a = b; + break; + } + } + + if (!a) { + a = new0(CGroupIODeviceWeight, 1); + if (!a) + return -ENOMEM; + + a->path = strdup(path); + if (!a->path) { + free(a); + return -ENOMEM; + } + LIST_PREPEND(device_weights,c->io_device_weights, a); + } + + a->weight = weight; + } + + n++; + } + + r = sd_bus_message_exit_container(message); + if (r < 0) + return r; + + if (mode != UNIT_CHECK) { + _cleanup_free_ char *buf = NULL; + _cleanup_fclose_ FILE *f = NULL; + CGroupIODeviceWeight *a; + size_t size = 0; + + if (n == 0) { + while (c->io_device_weights) + cgroup_context_free_io_device_weight(c, c->io_device_weights); + } + + unit_invalidate_cgroup(u, CGROUP_MASK_IO); + + f = open_memstream(&buf, &size); + if (!f) + return -ENOMEM; + + fputs("IODeviceWeight=\n", f); + LIST_FOREACH(device_weights, a, c->io_device_weights) + fprintf(f, "IODeviceWeight=%s %" PRIu64 "\n", a->path, a->weight); + + r = fflush_and_check(f); + if (r < 0) + return r; + unit_write_drop_in_private(u, mode, name, buf); + } + + return 1; + } else if (streq(name, "BlockIOAccounting")) { int b; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 928b913c7b..ad45611d9d 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -120,6 +120,12 @@ $1.MemoryAccounting, config_parse_bool, 0, $1.MemoryLimit, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.DeviceAllow, config_parse_device_allow, 0, offsetof($1, cgroup_context) $1.DevicePolicy, config_parse_device_policy, 0, offsetof($1, cgroup_context.device_policy) +$1.IOAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.io_accounting) +$1.IOWeight, config_parse_io_weight, 0, offsetof($1, cgroup_context.io_weight) +$1.StartupIOWeight, config_parse_io_weight, 0, offsetof($1, cgroup_context.startup_io_weight) +$1.IODeviceWeight, config_parse_io_device_weight, 0, offsetof($1, cgroup_context) +$1.IOReadBandwidthMax, config_parse_io_limit, 0, offsetof($1, cgroup_context) +$1.IOWriteBandwidthMax, config_parse_io_limit, 0, offsetof($1, cgroup_context) $1.BlockIOAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.blockio_accounting) $1.BlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.blockio_weight) $1.StartupBlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.startup_blockio_weight) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 1a8c03904c..9d7329e924 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -2904,6 +2904,196 @@ int config_parse_device_allow( return 0; } +int config_parse_io_weight( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + uint64_t *weight = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = cg_weight_parse(rvalue, weight); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "IO weight '%s' invalid. Ignoring.", rvalue); + return 0; + } + + return 0; +} + +int config_parse_io_device_weight( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_free_ char *path = NULL; + CGroupIODeviceWeight *w; + CGroupContext *c = data; + const char *weight; + uint64_t u; + size_t n; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + if (isempty(rvalue)) { + while (c->io_device_weights) + cgroup_context_free_io_device_weight(c, c->io_device_weights); + + return 0; + } + + n = strcspn(rvalue, WHITESPACE); + weight = rvalue + n; + weight += strspn(weight, WHITESPACE); + + if (isempty(weight)) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Expected block device and device weight. Ignoring."); + return 0; + } + + path = strndup(rvalue, n); + if (!path) + return log_oom(); + + if (!path_startswith(path, "/dev")) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid device node path '%s'. Ignoring.", path); + return 0; + } + + r = cg_weight_parse(weight, &u); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "IO weight '%s' invalid. Ignoring.", weight); + return 0; + } + + assert(u != CGROUP_WEIGHT_INVALID); + + w = new0(CGroupIODeviceWeight, 1); + if (!w) + return log_oom(); + + w->path = path; + path = NULL; + + w->weight = u; + + LIST_PREPEND(device_weights, c->io_device_weights, w); + return 0; +} + +int config_parse_io_limit( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_free_ char *path = NULL; + CGroupIODeviceLimit *l = NULL, *t; + CGroupContext *c = data; + const char *limit; + uint64_t num; + bool read; + size_t n; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + read = streq("IOReadBandwidthMax", lvalue); + + if (isempty(rvalue)) { + LIST_FOREACH(device_limits, l, c->io_device_limits) + if (read) + l->rbps_max = CGROUP_LIMIT_MAX; + else + l->wbps_max = CGROUP_LIMIT_MAX; + return 0; + } + + n = strcspn(rvalue, WHITESPACE); + limit = rvalue + n; + limit += strspn(limit, WHITESPACE); + + if (!*limit) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Expected space separated pair of device node and bandwidth. Ignoring."); + return 0; + } + + path = strndup(rvalue, n); + if (!path) + return log_oom(); + + if (!path_startswith(path, "/dev")) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid device node path '%s'. Ignoring.", path); + return 0; + } + + if (streq("max", limit)) { + num = CGROUP_LIMIT_MAX; + } else { + r = parse_size(limit, 1000, &num); + if (r < 0 || num <= 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "IO Limit '%s' invalid. Ignoring.", rvalue); + return 0; + } + } + + LIST_FOREACH(device_limits, t, c->io_device_limits) { + if (path_equal(path, t->path)) { + l = t; + break; + } + } + + if (!l) { + l = new0(CGroupIODeviceLimit, 1); + if (!l) + return log_oom(); + + l->path = path; + path = NULL; + l->rbps_max = CGROUP_LIMIT_MAX; + l->wbps_max = CGROUP_LIMIT_MAX; + + LIST_PREPEND(device_limits, c->io_device_limits, l); + } + + if (read) + l->rbps_max = num; + else + l->wbps_max = num; + + return 0; +} + int config_parse_blockio_weight( const char *unit, const char *filename, @@ -3824,6 +4014,9 @@ void unit_dump_config_items(FILE *f) { { config_parse_memory_limit, "LIMIT" }, { config_parse_device_allow, "DEVICE" }, { config_parse_device_policy, "POLICY" }, + { config_parse_io_limit, "LIMIT" }, + { config_parse_io_weight, "WEIGHT" }, + { config_parse_io_device_weight, "DEVICEWEIGHT" }, { config_parse_blockio_bandwidth, "BANDWIDTH" }, { config_parse_blockio_weight, "WEIGHT" }, { config_parse_blockio_device_weight, "DEVICEWEIGHT" }, diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index 34f15afa62..b36a2e3a02 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -86,6 +86,9 @@ int config_parse_memory_limit(const char *unit, const char *filename, unsigned l int config_parse_tasks_max(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_device_policy(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_device_allow(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_io_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_io_device_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_io_limit(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_blockio_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_blockio_device_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_blockio_bandwidth(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); diff --git a/src/core/main.c b/src/core/main.c index ed4d42c8cc..6397aadc73 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -122,6 +122,7 @@ static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE; static Set* arg_syscall_archs = NULL; static FILE* arg_serialization = NULL; static bool arg_default_cpu_accounting = false; +static bool arg_default_io_accounting = false; static bool arg_default_blockio_accounting = false; static bool arg_default_memory_accounting = false; static bool arg_default_tasks_accounting = true; @@ -691,6 +692,7 @@ static int parse_config_file(void) { { "Manager", "DefaultLimitRTPRIO", config_parse_limit, RLIMIT_RTPRIO, arg_default_rlimit }, { "Manager", "DefaultLimitRTTIME", config_parse_limit, RLIMIT_RTTIME, arg_default_rlimit }, { "Manager", "DefaultCPUAccounting", config_parse_bool, 0, &arg_default_cpu_accounting }, + { "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting }, { "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting }, { "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting }, { "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting }, @@ -733,6 +735,7 @@ static void manager_set_defaults(Manager *m) { m->default_start_limit_interval = arg_default_start_limit_interval; m->default_start_limit_burst = arg_default_start_limit_burst; m->default_cpu_accounting = arg_default_cpu_accounting; + m->default_io_accounting = arg_default_io_accounting; m->default_blockio_accounting = arg_default_blockio_accounting; m->default_memory_accounting = arg_default_memory_accounting; m->default_tasks_accounting = arg_default_tasks_accounting; diff --git a/src/core/manager.h b/src/core/manager.h index 17f84e6963..f23e4056c4 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -252,6 +252,7 @@ struct Manager { bool default_cpu_accounting; bool default_memory_accounting; + bool default_io_accounting; bool default_blockio_accounting; bool default_tasks_accounting; diff --git a/src/core/system.conf b/src/core/system.conf index eacd7ee282..db8b7acd78 100644 --- a/src/core/system.conf +++ b/src/core/system.conf @@ -38,6 +38,7 @@ #DefaultStartLimitBurst=5 #DefaultEnvironment= #DefaultCPUAccounting=no +#DefaultIOAccounting=no #DefaultBlockIOAccounting=no #DefaultMemoryAccounting=no #DefaultTasksAccounting=yes diff --git a/src/core/unit.c b/src/core/unit.c index 8153515e89..04addc1f70 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -132,6 +132,7 @@ static void unit_init(Unit *u) { * been initialized */ cc->cpu_accounting = u->manager->default_cpu_accounting; + cc->io_accounting = u->manager->default_io_accounting; cc->blockio_accounting = u->manager->default_blockio_accounting; cc->memory_accounting = u->manager->default_memory_accounting; cc->tasks_accounting = u->manager->default_tasks_accounting; @@ -1213,6 +1214,7 @@ static int unit_add_startup_units(Unit *u) { return 0; if (c->startup_cpu_shares == CGROUP_CPU_SHARES_INVALID && + c->startup_io_weight == CGROUP_WEIGHT_INVALID && c->startup_blockio_weight == CGROUP_BLKIO_WEIGHT_INVALID) return 0; diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 2b755cea28..94d1c1d63c 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -154,7 +154,7 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen r = sd_bus_message_append(m, "sv", sn, "t", l.rlim_cur); } else if (STR_IN_SET(field, - "CPUAccounting", "MemoryAccounting", "BlockIOAccounting", "TasksAccounting", + "CPUAccounting", "MemoryAccounting", "IOAccounting", "BlockIOAccounting", "TasksAccounting", "SendSIGHUP", "SendSIGKILL", "WakeSystem", "DefaultDependencies", "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "RemainAfterExit", "PrivateTmp", "PrivateDevices", "PrivateNetwork", "NoNewPrivileges", @@ -207,6 +207,17 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen r = sd_bus_message_append(m, "v", "t", u); + } else if (STR_IN_SET(field, "IOWeight", "StartupIOWeight")) { + uint64_t u; + + r = cg_weight_parse(eq, &u); + if (r < 0) { + log_error("Failed to parse %s value %s.", field, eq); + return -EINVAL; + } + + r = sd_bus_message_append(m, "v", "t", u); + } else if (STR_IN_SET(field, "BlockIOWeight", "StartupBlockIOWeight")) { uint64_t u; @@ -273,7 +284,8 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen r = sd_bus_message_append(m, "v", "a(ss)", 1, path, rwm); } - } else if (STR_IN_SET(field, "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) { + } else if (STR_IN_SET(field, "IOReadBandwidthMax", "IOWriteBandwidthMax", + "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) { if (isempty(eq)) r = sd_bus_message_append(m, "v", "a(st)", 0); @@ -295,16 +307,20 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen return -EINVAL; } - r = parse_size(bandwidth, 1000, &bytes); - if (r < 0) { - log_error("Failed to parse byte value %s.", bandwidth); - return -EINVAL; + if (streq(bandwidth, "max")) { + bytes = CGROUP_LIMIT_MAX; + } else { + r = parse_size(bandwidth, 1000, &bytes); + if (r < 0) { + log_error("Failed to parse byte value %s.", bandwidth); + return -EINVAL; + } } r = sd_bus_message_append(m, "v", "a(st)", 1, path, bytes); } - } else if (streq(field, "BlockIODeviceWeight")) { + } else if (STR_IN_SET(field, "IODeviceWeight", "BlockIODeviceWeight")) { if (isempty(eq)) r = sd_bus_message_append(m, "v", "a(st)", 0); diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c index bec4f31b39..387de025c5 100644 --- a/src/systemctl/systemctl.c +++ b/src/systemctl/systemctl.c @@ -4428,7 +4428,7 @@ static int print_property(const char *name, sd_bus_message *m, const char *conte return 0; - } else if (contents[1] == SD_BUS_TYPE_STRUCT_BEGIN && streq(name, "BlockIODeviceWeight")) { + } else if (contents[1] == SD_BUS_TYPE_STRUCT_BEGIN && (streq(name, "IODeviceWeight") || streq(name, "BlockIODeviceWeight"))) { const char *path; uint64_t weight; @@ -4447,7 +4447,8 @@ static int print_property(const char *name, sd_bus_message *m, const char *conte return 0; - } else if (contents[1] == SD_BUS_TYPE_STRUCT_BEGIN && (streq(name, "BlockIOReadBandwidth") || streq(name, "BlockIOWriteBandwidth"))) { + } else if (contents[1] == SD_BUS_TYPE_STRUCT_BEGIN && (streq(name, "IOReadBandwidthMax") || streq(name, "IOWriteBandwidthMax") || + streq(name, "BlockIOReadBandwidth") || streq(name, "BlockIOWriteBandwidth"))) { const char *path; uint64_t bandwidth; -- cgit v1.2.3-54-g00ecf From 0069a0dd1431ea5de075f3c6405e2b33abab8351 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 16 May 2016 22:48:45 +0200 Subject: man: clarify that IOXyz= only applies to the unified hierarchy, and BlockIOXyz= to the legacy hierarchy With this change for each setting we say which hierarachy it applies to briefly in the first sentence of the description, plus in longer form in an extra pargraph at the end, with a recommendation for the counterpart of the option in the other hierarchy. Also adds markup and the "=" suffix to all mentioned settings. --- man/systemd.resource-control.xml | 182 ++++++++++++++++++--------------------- 1 file changed, 84 insertions(+), 98 deletions(-) (limited to 'man/systemd.resource-control.xml') diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index 4edb1a25a8..313a49a959 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -252,14 +252,15 @@ IOAccounting= - Turn on Block I/O accounting for this unit on unified - hierarchy. Takes a boolean argument. Note that turning on - block I/O accounting for one unit will also implicitly turn - it on for all units contained in the same slice and all for - its parent slices and the units contained therein. The - system default for this setting may be controlled with - DefaultIOAccounting= in + Turn on Block I/O accounting for this unit, if the unified control group hierarchy is used on the + system. Takes a boolean argument. Note that turning on block I/O accounting for one unit will also implicitly + turn it on for all units contained in the same slice and all for its parent slices and the units contained + therein. The system default for this setting may be controlled with DefaultIOAccounting= + in systemd-system.conf5. + + This setting is supported only if the unified control group hierarchy is used. Use + BlockIOAccounting= on systems using the legacy control group hierarchy. @@ -268,15 +269,12 @@ StartupIOWeight=weight - Set the default overall block I/O weight for the - executed processes on unified hierarchy. Takes a single - weight value (between 1 and 10000) to set the default block - I/O weight. This controls the io.weight - control group attribute, which defaults to 100. For details - about this control group attribute, see cgroup-v2.txt. - The available I/O bandwidth is split up among all units - within one slice relative to their block I/O weight. + Set the default overall block I/O weight for the executed processes, if the unified control group + hierarchy is used on the system. Takes a single weight value (between 1 and 10000) to set the default block + I/O weight. This controls the io.weight control group attribute, which defaults to + 100. For details about this control group attribute, see cgroup-v2.txt. The available I/O + bandwidth is split up among all units within one slice relative to their block I/O weight. While StartupIOWeight= only applies to the startup phase of the system, @@ -286,6 +284,10 @@ differently than during runtime. Implies IOAccounting=true. + + This setting is supported only if the unified control group hierarchy is used. Use + BlockIOWeight= and StartupBlockIOWeight= on systems using the legacy + control group hierarchy. @@ -293,21 +295,19 @@ IODeviceWeight=device weight - Set the per-device overall block I/O weight for the - executed processes on unified hierarchy. Takes a - space-separated pair of a file path and a weight value to - specify the device specific weight value, between 1 and - 10000. (Example: "/dev/sda 1000"). The file path may be - specified as path to a block device node or as any other - file, in which case the backing block device of the file - system of the file is determined. This controls the - io.weight control group attribute, which - defaults to 100. Use this option multiple times to set - weights for multiple devices. For details about this control - group attribute, see Set the per-device overall block I/O weight for the executed processes, if the unified control group + hierarchy is used on the system. Takes a space-separated pair of a file path and a weight value to specify + the device specific weight value, between 1 and 10000. (Example: "/dev/sda 1000"). The file path may be + specified as path to a block device node or as any other file, in which case the backing block device of the + file system of the file is determined. This controls the io.weight control group + attribute, which defaults to 100. Use this option multiple times to set weights for multiple devices. For + details about this control group attribute, see cgroup-v2.txt. Implies IOAccounting=true. + + This setting is supported only if the unified control group hierarchy is used. Use + BlockIODeviceWeight= on systems using the legacy control group hierarchy. @@ -316,27 +316,23 @@ IOWriteBandwidthMax=device bytes - Set the per-device overall block I/O bandwidth maximum - limit for the executed processes on unified hierarchy. This - limit is not work-conserving and the executed processes are - not allowed to use more even if the device has idle - capacity. Takes a space-separated pair of a file path and a - bandwidth value (in bytes per second) to specify the device - specific bandwidth. The file path may be a path to a block - device node, or as any other file in which case the backing - block device of the file system of the file is used. If the - bandwidth is suffixed with K, M, G, or T, the specified - bandwidth is parsed as Kilobytes, Megabytes, Gigabytes, or - Terabytes, respectively, to the base of 1000. (Example: - "/dev/disk/by-path/pci-0000:00:1f.2-scsi-0:0:0:0 5M"). This - controls the io.max control group - attributes. Use this option multiple times to set bandwidth - limits for multiple devices. For details about this control - group attribute, see Set the per-device overall block I/O bandwidth maximum limit for the executed processes, if the unified + control group hierarchy is used on the system. This limit is not work-conserving and the executed processes + are not allowed to use more even if the device has idle capacity. Takes a space-separated pair of a file + path and a bandwidth value (in bytes per second) to specify the device specific bandwidth. The file path may + be a path to a block device node, or as any other file in which case the backing block device of the file + system of the file is used. If the bandwidth is suffixed with K, M, G, or T, the specified bandwidth is + parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes, respectively, to the base of 1000. (Example: + "/dev/disk/by-path/pci-0000:00:1f.2-scsi-0:0:0:0 5M"). This controls the io.max control + group attributes. Use this option multiple times to set bandwidth limits for multiple devices. For details + about this control group attribute, see cgroup-v2.txt. Implies IOAccounting=true. + + This setting is supported only if the unified control group hierarchy is used. Use + BlockIOAccounting= on systems using the legacy control group hierarchy. @@ -344,16 +340,15 @@ BlockIOAccounting= - Use IOAccounting on unified hierarchy. - - Turn on Block I/O accounting for this unit. Takes a - boolean argument. Note that turning on block I/O accounting - for one unit will also implicitly turn it on for all units - contained in the same slice and all for its parent slices - and the units contained therein. The system default for this - setting may be controlled with + Turn on Block I/O accounting for this unit, if the legacy control group hierarchy is used on the + system. Takes a boolean argument. Note that turning on block I/O accounting for one unit will also implicitly + turn it on for all units contained in the same slice and all for its parent slices and the units contained + therein. The system default for this setting may be controlled with DefaultBlockIOAccounting= in systemd-system.conf5. + + This setting is supported only if the legacy control group hierarchy is used. Use + IOAccounting= on systems using the unified control group hierarchy. @@ -361,18 +356,13 @@ BlockIOWeight=weight StartupBlockIOWeight=weight - Use IOWeight and StartupIOWeight on unified - hierarchy. - - Set the default overall block I/O weight for the - executed processes. Takes a single weight value (between 10 - and 1000) to set the default block I/O weight. This controls - the blkio.weight control group attribute, - which defaults to 500. For details about this control group - attribute, see Set the default overall block I/O weight for the executed processes, if the legacy control + group hierarchy is used on the system. Takes a single weight value (between 10 and 1000) to set the default + block I/O weight. This controls the blkio.weight control group attribute, which defaults to + 500. For details about this control group attribute, see blkio-controller.txt. - The available I/O bandwidth is split up among all units within - one slice relative to their block I/O weight. + The available I/O bandwidth is split up among all units within one slice relative to their block I/O + weight. While StartupBlockIOWeight= only applies to the startup phase of the system, @@ -383,31 +373,32 @@ Implies BlockIOAccounting=true. - + + This setting is supported only if the legacy control group hierarchy is used. Use + IOWeight= and StartupIOWeight= on systems using the unified control group + hierarchy. + + BlockIODeviceWeight=device weight - Use IODeviceWeight on unified hierarchy. - - Set the per-device overall block I/O weight for the - executed processes. Takes a space-separated pair of a file - path and a weight value to specify the device specific - weight value, between 10 and 1000. (Example: "/dev/sda - 500"). The file path may be specified as path to a block - device node or as any other file, in which case the backing - block device of the file system of the file is - determined. This controls the - blkio.weight_device control group - attribute, which defaults to 1000. Use this option multiple - times to set weights for multiple devices. For details about - this control group attribute, see Set the per-device overall block I/O weight for the executed processes, if the legacy control group + hierarchy is used on the system. Takes a space-separated pair of a file path and a weight value to specify + the device specific weight value, between 10 and 1000. (Example: "/dev/sda 500"). The file path may be + specified as path to a block device node or as any other file, in which case the backing block device of the + file system of the file is determined. This controls the blkio.weight_device control group + attribute, which defaults to 1000. Use this option multiple times to set weights for multiple devices. For + details about this control group attribute, see blkio-controller.txt. Implies BlockIOAccounting=true. + + This setting is supported only if the legacy control group hierarchy is used. Use + IODeviceWeight= on systems using the unified control group hierarchy. @@ -416,30 +407,25 @@ BlockIOWriteBandwidth=device bytes - Use IOReadBandwidthMax and IOWriteBandwidthMax on - unified hierarchy. - - Set the per-device overall block I/O bandwidth limit - for the executed processes. Takes a space-separated pair of - a file path and a bandwidth value (in bytes per second) to - specify the device specific bandwidth. The file path may be - a path to a block device node, or as any other file in which - case the backing block device of the file system of the file - is used. If the bandwidth is suffixed with K, M, G, or T, - the specified bandwidth is parsed as Kilobytes, Megabytes, - Gigabytes, or Terabytes, respectively, to the base of - 1000. (Example: - "/dev/disk/by-path/pci-0000:00:1f.2-scsi-0:0:0:0 5M"). This - controls the blkio.throttle.read_bps_device and - blkio.throttle.write_bps_device control group - attributes. Use this option multiple times to set bandwidth - limits for multiple devices. For details about these control - group attributes, see Set the per-device overall block I/O bandwidth limit for the executed processes, if the legacy control + group hierarchy is used on the system. Takes a space-separated pair of a file path and a bandwidth value (in + bytes per second) to specify the device specific bandwidth. The file path may be a path to a block device + node, or as any other file in which case the backing block device of the file system of the file is used. If + the bandwidth is suffixed with K, M, G, or T, the specified bandwidth is parsed as Kilobytes, Megabytes, + Gigabytes, or Terabytes, respectively, to the base of 1000. (Example: + "/dev/disk/by-path/pci-0000:00:1f.2-scsi-0:0:0:0 5M"). This controls the + blkio.throttle.read_bps_device and blkio.throttle.write_bps_device + control group attributes. Use this option multiple times to set bandwidth limits for multiple devices. For + details about these control group attributes, see blkio-controller.txt. Implies BlockIOAccounting=true. + + This setting is supported only if the legacy control group hierarchy is used. Use + IOReadBandwidthMax= and IOWriteBandwidthMax= on systems using the + unified control group hierarchy. -- cgit v1.2.3-54-g00ecf From ac06a0cf8a5c5bd58bfa022408361e982f100bcb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 18 May 2016 13:50:56 -0700 Subject: core: add support for IOReadIOPSMax and IOWriteIOPSMax cgroup IO controller supports maximum limits for both bandwidth and IOPS but systemd resource control currently only supports bandwidth limits. This patch adds support for IOReadIOPSMax and IOWriteIOPSMax when unified cgroup hierarchy is in use. It isn't difficult to also add BlockIOReadIOPS and BlockIOWriteIOPS for legacy hierarchies but IO control on legacy hierarchies is half-broken anyway, so let's leave it alone for now. --- man/systemd.resource-control.xml | 24 ++++++++++++++++++++++++ src/basic/cgroup-util.c | 4 ++++ src/basic/cgroup-util.h | 2 ++ src/core/cgroup.c | 7 ++++--- src/core/dbus-cgroup.c | 2 ++ src/core/load-fragment-gperf.gperf.m4 | 2 ++ 6 files changed, 38 insertions(+), 3 deletions(-) (limited to 'man/systemd.resource-control.xml') diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index 313a49a959..8a95e1196b 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -336,6 +336,30 @@ + + IOReadIOPSMax=device IOPS + IOWriteIOPSMax=device IOPS + + + Set the per-device overall block I/O IOs-Per-Second maximum limit for the executed processes, if the + unified control group hierarchy is used on the system. This limit is not work-conserving and the executed + processes are not allowed to use more even if the device has idle capacity. Takes a space-separated pair of + a file path and an IOPS value to specify the device specific IOPS. The file path may be a path to a block + device node, or as any other file in which case the backing block device of the file system of the file is + used. If the IOPS is suffixed with K, M, G, or T, the specified IOPS is parsed as KiloIOPS, MegaIOPS, + GigaIOPS, or TeraIOPS, respectively, to the base of 1000. (Example: + "/dev/disk/by-path/pci-0000:00:1f.2-scsi-0:0:0:0 1K"). This controls the io.max control + group attributes. Use this option multiple times to set IOPS limits for multiple devices. For details about + this control group attribute, see cgroup-v2.txt. + + + Implies IOAccounting=true. + + This setting is supported only if the unified control group hierarchy is used. + + + BlockIOAccounting= diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 8eab100748..7cdc97ee3c 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -2272,11 +2272,15 @@ int cg_weight_parse(const char *s, uint64_t *ret) { const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = { [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX, [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX, + [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX, + [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX, }; static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = { [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax", [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax", + [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax", + [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax", }; DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType); diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 0d96fb6b76..4bb5291296 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -76,6 +76,8 @@ static inline bool CGROUP_WEIGHT_IS_OK(uint64_t x) { typedef enum CGroupIOLimitType { CGROUP_IO_RBPS_MAX, CGROUP_IO_WBPS_MAX, + CGROUP_IO_RIOPS_MAX, + CGROUP_IO_WIOPS_MAX, _CGROUP_IO_LIMIT_TYPE_MAX, _CGROUP_IO_LIMIT_TYPE_INVALID = -1 diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 0b902fa6f7..a54634469f 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -439,7 +439,7 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { char limit_bufs[_CGROUP_IO_LIMIT_TYPE_MAX][DECIMAL_STR_MAX(uint64_t)]; - char buf[DECIMAL_STR_MAX(dev_t)*2+2+(5+DECIMAL_STR_MAX(uint64_t)+1)*2]; + char buf[DECIMAL_STR_MAX(dev_t)*2+2+(6+DECIMAL_STR_MAX(uint64_t)+1)*4]; CGroupIOLimitType type; dev_t dev; unsigned n = 0; @@ -458,8 +458,9 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M } } - xsprintf(buf, "%u:%u rbps=%s wbps=%s\n", major(dev), minor(dev), - limit_bufs[CGROUP_IO_RBPS_MAX], limit_bufs[CGROUP_IO_WBPS_MAX]); + xsprintf(buf, "%u:%u rbps=%s wbps=%s riops=%s wiops=%s\n", major(dev), minor(dev), + limit_bufs[CGROUP_IO_RBPS_MAX], limit_bufs[CGROUP_IO_WBPS_MAX], + limit_bufs[CGROUP_IO_RIOPS_MAX], limit_bufs[CGROUP_IO_WIOPS_MAX]); r = cg_set_attribute("io", path, "io.max", buf); if (r < 0) log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index 4372828e30..29c2edaf6b 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -213,6 +213,8 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_PROPERTY("IODeviceWeight", "a(st)", property_get_io_device_weight, 0, 0), SD_BUS_PROPERTY("IOReadBandwidthMax", "a(st)", property_get_io_device_limits, 0, 0), SD_BUS_PROPERTY("IOWriteBandwidthMax", "a(st)", property_get_io_device_limits, 0, 0), + SD_BUS_PROPERTY("IOReadIOPSMax", "a(st)", property_get_io_device_limits, 0, 0), + SD_BUS_PROPERTY("IOWriteIOPSMax", "a(st)", property_get_io_device_limits, 0, 0), SD_BUS_PROPERTY("BlockIOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, blockio_accounting), 0), SD_BUS_PROPERTY("BlockIOWeight", "t", NULL, offsetof(CGroupContext, blockio_weight), 0), SD_BUS_PROPERTY("StartupBlockIOWeight", "t", NULL, offsetof(CGroupContext, startup_blockio_weight), 0), diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index ad45611d9d..8193418980 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -126,6 +126,8 @@ $1.StartupIOWeight, config_parse_io_weight, 0, $1.IODeviceWeight, config_parse_io_device_weight, 0, offsetof($1, cgroup_context) $1.IOReadBandwidthMax, config_parse_io_limit, 0, offsetof($1, cgroup_context) $1.IOWriteBandwidthMax, config_parse_io_limit, 0, offsetof($1, cgroup_context) +$1.IOReadIOPSMax, config_parse_io_limit, 0, offsetof($1, cgroup_context) +$1.IOWriteIOPSMax, config_parse_io_limit, 0, offsetof($1, cgroup_context) $1.BlockIOAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.blockio_accounting) $1.BlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.blockio_weight) $1.StartupBlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.startup_blockio_weight) -- cgit v1.2.3-54-g00ecf From 538b48524cf48afc299ab78690bc03c18af67ede Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 18 May 2016 17:35:12 -0700 Subject: core: translate between IO and BlockIO settings to ease transition Due to the substantial interface changes in cgroup unified hierarchy, new IO settings are introduced. Currently, IO settings apply only to unified hierarchy and BlockIO to legacy. While the transition is necessary, it's painful for users to have to provide configs for both. This patch implements translation from one config set to another for configs which make sense. * The translation takes place during application of the configs. Users won't see IO or BlockIO settings appearing without being explicitly created. * The translation takes place only if there is no config for the matching cgroup hierarchy type at all. While this doesn't provide comprehensive compatibility, it should considerably ease transition to the new IO settings which are a superset of BlockIO settings. v2: - Update test-cgroup-mask.c so that it accounts for the fact that CGROUP_MASK_IO and CGROUP_MASK_BLKIO move together. Also, test/parent.slice now sets IOWeight instead of BlockIOWeight. --- man/systemd.resource-control.xml | 25 +++++++ src/core/cgroup.c | 146 ++++++++++++++++++++++++++++++--------- src/test/test-cgroup-mask.c | 13 ++-- test/parent.slice | 2 +- 4 files changed, 148 insertions(+), 38 deletions(-) (limited to 'man/systemd.resource-control.xml') diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index 8a95e1196b..066f2cc19b 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -98,6 +98,31 @@ unit. + + Unified and Legacy Control Group Hierarchies + + Unified control group hierarchy is the new version of kernel control group interface. Depending on the + resource type, there are differences in resource control capabilities. Also, because of interface changes, some + resource types have a separate set of options on the unified hierarchy. + + + + + + + IO prefixed settings are superset of and replace BlockIO + prefixed ones. On unified hierarchy, IO resource control also applies to buffered writes. + + + + + + To ease the transition, there is best-effort translation between the two versions of settings. If all + settings of a unit for a given resource type are for the other hierarchy type, the settings are translated and + applied. If there are any valid settings for the hierarchy in use, all translations are disabled for the resource + type. Mixing the two types of settings on a unit can lead to confusing results. + + Options diff --git a/src/core/cgroup.c b/src/core/cgroup.c index ddc4f69ebe..8b8b2ac5ff 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -358,6 +358,24 @@ fail: return -errno; } +static bool cgroup_context_has_io_config(CGroupContext *c) +{ + return c->io_accounting || + c->io_weight != CGROUP_WEIGHT_INVALID || + c->startup_io_weight != CGROUP_WEIGHT_INVALID || + c->io_device_weights || + c->io_device_limits; +} + +static bool cgroup_context_has_blockio_config(CGroupContext *c) +{ + return c->blockio_accounting || + c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID || + c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID || + c->blockio_device_weights || + c->blockio_device_bandwidths; +} + static uint64_t cgroup_context_io_weight(CGroupContext *c, ManagerState state) { if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && @@ -380,6 +398,18 @@ static uint64_t cgroup_context_blkio_weight(CGroupContext *c, ManagerState state return CGROUP_BLKIO_WEIGHT_DEFAULT; } +static uint64_t cgroup_weight_blkio_to_io(uint64_t blkio_weight) +{ + return CLAMP(blkio_weight * CGROUP_WEIGHT_DEFAULT / CGROUP_BLKIO_WEIGHT_DEFAULT, + CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX); +} + +static uint64_t cgroup_weight_io_to_blkio(uint64_t io_weight) +{ + return CLAMP(io_weight * CGROUP_BLKIO_WEIGHT_DEFAULT / CGROUP_WEIGHT_DEFAULT, + CGROUP_BLKIO_WEIGHT_MIN, CGROUP_BLKIO_WEIGHT_MAX); +} + static void cgroup_apply_io_device_weight(const char *path, const char *dev_path, uint64_t io_weight) { char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1]; @@ -525,14 +555,19 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M } if (mask & CGROUP_MASK_IO) { - CGroupIODeviceLimit *l, *next; + bool has_io = cgroup_context_has_io_config(c); + bool has_blockio = cgroup_context_has_blockio_config(c); if (!is_root) { char buf[8+DECIMAL_STR_MAX(uint64_t)+1]; uint64_t weight; - CGroupIODeviceWeight *w; - weight = cgroup_context_io_weight(c, state); + if (has_io) + weight = cgroup_context_io_weight(c, state); + else if (has_blockio) + weight = cgroup_weight_blkio_to_io(cgroup_context_blkio_weight(c, state)); + else + weight = CGROUP_WEIGHT_DEFAULT; xsprintf(buf, "default %" PRIu64 "\n", weight); r = cg_set_attribute("io", path, "io.weight", buf); @@ -540,27 +575,62 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to set io.weight on %s: %m", path); - /* FIXME: no way to reset this list */ - LIST_FOREACH(device_weights, w, c->io_device_weights) - cgroup_apply_io_device_weight(path, w->path, w->weight); + if (has_io) { + CGroupIODeviceWeight *w; + + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_weights, w, c->io_device_weights) + cgroup_apply_io_device_weight(path, w->path, w->weight); + } else if (has_blockio) { + CGroupBlockIODeviceWeight *w; + + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_weights, w, c->blockio_device_weights) + cgroup_apply_io_device_weight(path, w->path, cgroup_weight_blkio_to_io(w->weight)); + } } /* Apply limits and free ones without config. */ - LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { - if (!cgroup_apply_io_device_limit(path, l->path, l->limits)) - cgroup_context_free_io_device_limit(c, l); + if (has_io) { + CGroupIODeviceLimit *l, *next; + + LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { + if (!cgroup_apply_io_device_limit(path, l->path, l->limits)) + cgroup_context_free_io_device_limit(c, l); + } + } else if (has_blockio) { + CGroupBlockIODeviceBandwidth *b, *next; + + LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) { + uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX]; + CGroupIOLimitType type; + + for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++) + limits[type] = cgroup_io_limit_defaults[type]; + + limits[CGROUP_IO_RBPS_MAX] = b->rbps; + limits[CGROUP_IO_WBPS_MAX] = b->wbps; + + if (!cgroup_apply_io_device_limit(path, b->path, limits)) + cgroup_context_free_blockio_device_bandwidth(c, b); + } } } if (mask & CGROUP_MASK_BLKIO) { - CGroupBlockIODeviceBandwidth *b, *next; + bool has_io = cgroup_context_has_io_config(c); + bool has_blockio = cgroup_context_has_blockio_config(c); if (!is_root) { char buf[DECIMAL_STR_MAX(uint64_t)+1]; uint64_t weight; - CGroupBlockIODeviceWeight *w; - weight = cgroup_context_blkio_weight(c, state); + if (has_blockio) + weight = cgroup_context_blkio_weight(c, state); + else if (has_io) + weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state)); + else + weight = CGROUP_BLKIO_WEIGHT_DEFAULT; xsprintf(buf, "%" PRIu64 "\n", weight); r = cg_set_attribute("blkio", path, "blkio.weight", buf); @@ -568,15 +638,36 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to set blkio.weight on %s: %m", path); - /* FIXME: no way to reset this list */ - LIST_FOREACH(device_weights, w, c->blockio_device_weights) - cgroup_apply_blkio_device_weight(path, w->path, w->weight); + if (has_blockio) { + CGroupBlockIODeviceWeight *w; + + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_weights, w, c->blockio_device_weights) + cgroup_apply_blkio_device_weight(path, w->path, w->weight); + } else if (has_io) { + CGroupIODeviceWeight *w; + + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_weights, w, c->io_device_weights) + cgroup_apply_blkio_device_weight(path, w->path, cgroup_weight_io_to_blkio(w->weight)); + } } /* Apply limits and free ones without config. */ - LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) { - if (!cgroup_apply_blkio_device_limit(path, b->path, b->rbps, b->wbps)) - cgroup_context_free_blockio_device_bandwidth(c, b); + if (has_blockio) { + CGroupBlockIODeviceBandwidth *b, *next; + + LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) { + if (!cgroup_apply_blkio_device_limit(path, b->path, b->rbps, b->wbps)) + cgroup_context_free_blockio_device_bandwidth(c, b); + } + } else if (has_io) { + CGroupIODeviceLimit *l, *next; + + LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { + if (!cgroup_apply_blkio_device_limit(path, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX])) + cgroup_context_free_io_device_limit(c, l); + } } } @@ -693,19 +784,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) { c->cpu_quota_per_sec_usec != USEC_INFINITY) mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU; - if (c->io_accounting || - c->io_weight != CGROUP_WEIGHT_INVALID || - c->startup_io_weight != CGROUP_WEIGHT_INVALID || - c->io_device_weights || - c->io_device_limits) - mask |= CGROUP_MASK_IO; - - if (c->blockio_accounting || - c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID || - c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID || - c->blockio_device_weights || - c->blockio_device_bandwidths) - mask |= CGROUP_MASK_BLKIO; + if (cgroup_context_has_io_config(c) || cgroup_context_has_blockio_config(c)) + mask |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO; if (c->memory_accounting || c->memory_limit != (uint64_t) -1) @@ -1795,6 +1875,10 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) { if (m == 0) return; + /* always invalidate compat pairs together */ + if (m & (CGROUP_MASK_IO | CGROUP_MASK_BLKIO)) + m |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO; + if ((u->cgroup_realized_mask & m) == 0) return; diff --git a/src/test/test-cgroup-mask.c b/src/test/test-cgroup-mask.c index 4eb8fcd773..a025704722 100644 --- a/src/test/test-cgroup-mask.c +++ b/src/test/test-cgroup-mask.c @@ -48,6 +48,7 @@ static int test_cgroup_mask(void) { m->default_cpu_accounting = m->default_memory_accounting = m->default_blockio_accounting = + m->default_io_accounting = m->default_tasks_accounting = false; m->default_tasks_max = (uint64_t) -1; @@ -76,7 +77,7 @@ static int test_cgroup_mask(void) { assert_se(unit_get_own_mask(daughter) == 0); assert_se(unit_get_own_mask(grandchild) == 0); assert_se(unit_get_own_mask(parent_deep) == CGROUP_MASK_MEMORY); - assert_se(unit_get_own_mask(parent) == CGROUP_MASK_BLKIO); + assert_se(unit_get_own_mask(parent) == (CGROUP_MASK_IO | CGROUP_MASK_BLKIO)); assert_se(unit_get_own_mask(root) == 0); /* Verify aggregation of member masks */ @@ -85,23 +86,23 @@ static int test_cgroup_mask(void) { assert_se(unit_get_members_mask(grandchild) == 0); assert_se(unit_get_members_mask(parent_deep) == 0); assert_se(unit_get_members_mask(parent) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); - assert_se(unit_get_members_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); + assert_se(unit_get_members_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); /* Verify aggregation of sibling masks. */ assert_se(unit_get_siblings_mask(son) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); assert_se(unit_get_siblings_mask(daughter) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); assert_se(unit_get_siblings_mask(grandchild) == 0); assert_se(unit_get_siblings_mask(parent_deep) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); - assert_se(unit_get_siblings_mask(parent) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); - assert_se(unit_get_siblings_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); + assert_se(unit_get_siblings_mask(parent) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); + assert_se(unit_get_siblings_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); /* Verify aggregation of target masks. */ assert_se(unit_get_target_mask(son) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported)); assert_se(unit_get_target_mask(daughter) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported)); assert_se(unit_get_target_mask(grandchild) == 0); assert_se(unit_get_target_mask(parent_deep) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported)); - assert_se(unit_get_target_mask(parent) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); - assert_se(unit_get_target_mask(root) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); + assert_se(unit_get_target_mask(parent) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); + assert_se(unit_get_target_mask(root) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); manager_free(m); diff --git a/test/parent.slice b/test/parent.slice index 0222f8eb47..a95f90392d 100644 --- a/test/parent.slice +++ b/test/parent.slice @@ -2,4 +2,4 @@ Description=Parent Slice [Slice] -BlockIOWeight=200 +IOWeight=200 -- cgit v1.2.3-54-g00ecf