diff options
author | Lennart Poettering <lennart@poettering.net> | 2016-05-20 18:53:11 +0200 |
---|---|---|
committer | Lennart Poettering <lennart@poettering.net> | 2016-05-20 18:53:11 +0200 |
commit | f9e26ecc48dc87f51e75ce8aa58ef9bdae8ce0f0 (patch) | |
tree | e88eccf7c3ad07de0c0f848b6e5d7637baf867ad | |
parent | 51e0b250779d28eaffff886b5a3cb208c6813a40 (diff) | |
parent | 538b48524cf48afc299ab78690bc03c18af67ede (diff) |
Merge pull request #3290 from htejun/cgroup2-io-compat
Implement compat translation between IO* and BlockIO* settings
-rw-r--r-- | man/systemd.resource-control.xml | 49 | ||||
-rw-r--r-- | src/basic/cgroup-util.c | 16 | ||||
-rw-r--r-- | src/basic/cgroup-util.h | 16 | ||||
-rw-r--r-- | src/core/cgroup.c | 363 | ||||
-rw-r--r-- | src/core/cgroup.h | 8 | ||||
-rw-r--r-- | src/core/dbus-cgroup.c | 91 | ||||
-rw-r--r-- | src/core/load-fragment-gperf.gperf.m4 | 2 | ||||
-rw-r--r-- | src/core/load-fragment.c | 63 | ||||
-rw-r--r-- | src/shared/bus-unit-util.c | 3 | ||||
-rw-r--r-- | src/systemctl/systemctl.c | 2 | ||||
-rw-r--r-- | src/test/test-cgroup-mask.c | 13 | ||||
-rw-r--r-- | test/parent.slice | 2 |
12 files changed, 428 insertions, 200 deletions
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index 313a49a959..066f2cc19b 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -99,6 +99,31 @@ </refsect1> <refsect1> + <title>Unified and Legacy Control Group Hierarchies</title> + + <para>Unified control group hierarchy is the new version of kernel control group interface. Depending on the + resource type, there are differences in resource control capabilities. Also, because of interface changes, some + resource types have a separate set of options on the unified hierarchy.</para> + + <para> + <variablelist> + <varlistentry> + <term><option>IO</option></term> + <listitem> + <para><varname>IO</varname> prefixed settings are superset of and replace <varname>BlockIO</varname> + prefixed ones. On unified hierarchy, IO resource control also applies to buffered writes.</para> + </listitem> + </varlistentry> + </variablelist> + </para> + + <para>To ease the transition, there is best-effort translation between the two versions of settings. If all + settings of a unit for a given resource type are for the other hierarchy type, the settings are translated and + applied. If there are any valid settings for the hierarchy in use, all translations are disabled for the resource + type. Mixing the two types of settings on a unit can lead to confusing results.</para> + </refsect1> + + <refsect1> <title>Options</title> <para>Units of the types listed above can have settings @@ -337,6 +362,30 @@ </varlistentry> <varlistentry> + <term><varname>IOReadIOPSMax=<replaceable>device</replaceable> <replaceable>IOPS</replaceable></varname></term> + <term><varname>IOWriteIOPSMax=<replaceable>device</replaceable> <replaceable>IOPS</replaceable></varname></term> + + <listitem> + <para>Set the per-device overall block I/O IOs-Per-Second maximum limit for the executed processes, if the + unified control group hierarchy is used on the system. This limit is not work-conserving and the executed + processes are not allowed to use more even if the device has idle capacity. Takes a space-separated pair of + a file path and an IOPS value to specify the device specific IOPS. The file path may be a path to a block + device node, or as any other file in which case the backing block device of the file system of the file is + used. If the IOPS is suffixed with K, M, G, or T, the specified IOPS is parsed as KiloIOPS, MegaIOPS, + GigaIOPS, or TeraIOPS, respectively, to the base of 1000. (Example: + "/dev/disk/by-path/pci-0000:00:1f.2-scsi-0:0:0:0 1K"). This controls the <literal>io.max</literal> control + group attributes. Use this option multiple times to set IOPS limits for multiple devices. For details about + this control group attribute, see <ulink + url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>. + </para> + + <para>Implies <literal>IOAccounting=true</literal>.</para> + + <para>This setting is supported only if the unified control group hierarchy is used.</para> + </listitem> + </varlistentry> + + <varlistentry> <term><varname>BlockIOAccounting=</varname></term> <listitem> diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index ff57cf30b7..7cdc97ee3c 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -2269,6 +2269,22 @@ int cg_weight_parse(const char *s, uint64_t *ret) { return 0; } +const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = { + [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX, + [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX, + [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX, + [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX, +}; + +static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = { + [CGROUP_IO_RBPS_MAX] = "IOReadBandwidthMax", + [CGROUP_IO_WBPS_MAX] = "IOWriteBandwidthMax", + [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax", + [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax", +}; + +DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType); + int cg_cpu_shares_parse(const char *s, uint64_t *ret) { uint64_t u; int r; diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index a696c1fa60..4bb5291296 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -72,6 +72,22 @@ static inline bool CGROUP_WEIGHT_IS_OK(uint64_t x) { (x >= CGROUP_WEIGHT_MIN && x <= CGROUP_WEIGHT_MAX); } +/* IO limits on unified hierarchy */ +typedef enum CGroupIOLimitType { + CGROUP_IO_RBPS_MAX, + CGROUP_IO_WBPS_MAX, + CGROUP_IO_RIOPS_MAX, + CGROUP_IO_WIOPS_MAX, + + _CGROUP_IO_LIMIT_TYPE_MAX, + _CGROUP_IO_LIMIT_TYPE_INVALID = -1 +} CGroupIOLimitType; + +extern const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX]; + +const char* cgroup_io_limit_type_to_string(CGroupIOLimitType t) _const_; +CGroupIOLimitType cgroup_io_limit_type_from_string(const char *s) _pure_; + /* Special values for the cpu.shares attribute */ #define CGROUP_CPU_SHARES_INVALID ((uint64_t) -1) #define CGROUP_CPU_SHARES_MIN UINT64_C(2) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 4f1637ffe9..8b8b2ac5ff 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -184,20 +184,16 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { LIST_FOREACH(device_limits, il, c->io_device_limits) { char buf[FORMAT_BYTES_MAX]; - - if (il->rbps_max != CGROUP_LIMIT_MAX) - fprintf(f, - "%sIOReadBandwidthMax=%s %s\n", - prefix, - il->path, - format_bytes(buf, sizeof(buf), il->rbps_max)); - - if (il->wbps_max != CGROUP_LIMIT_MAX) - fprintf(f, - "%sIOWriteBandwidthMax=%s %s\n", - prefix, - il->path, - format_bytes(buf, sizeof(buf), il->wbps_max)); + CGroupIOLimitType type; + + for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++) + if (il->limits[type] != cgroup_io_limit_defaults[type]) + fprintf(f, + "%s%s=%s %s\n", + prefix, + cgroup_io_limit_type_to_string(type), + il->path, + format_bytes(buf, sizeof(buf), il->limits[type])); } LIST_FOREACH(device_weights, w, c->blockio_device_weights) @@ -210,12 +206,18 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) { char buf[FORMAT_BYTES_MAX]; - fprintf(f, - "%s%s=%s %s\n", - prefix, - b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth", - b->path, - format_bytes(buf, sizeof(buf), b->bandwidth)); + if (b->rbps != CGROUP_LIMIT_MAX) + fprintf(f, + "%sBlockIOReadBandwidth=%s %s\n", + prefix, + b->path, + format_bytes(buf, sizeof(buf), b->rbps)); + if (b->wbps != CGROUP_LIMIT_MAX) + fprintf(f, + "%sBlockIOWriteBandwidth=%s %s\n", + prefix, + b->path, + format_bytes(buf, sizeof(buf), b->wbps)); } } @@ -356,6 +358,154 @@ fail: return -errno; } +static bool cgroup_context_has_io_config(CGroupContext *c) +{ + return c->io_accounting || + c->io_weight != CGROUP_WEIGHT_INVALID || + c->startup_io_weight != CGROUP_WEIGHT_INVALID || + c->io_device_weights || + c->io_device_limits; +} + +static bool cgroup_context_has_blockio_config(CGroupContext *c) +{ + return c->blockio_accounting || + c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID || + c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID || + c->blockio_device_weights || + c->blockio_device_bandwidths; +} + +static uint64_t cgroup_context_io_weight(CGroupContext *c, ManagerState state) +{ + if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && + c->startup_io_weight != CGROUP_WEIGHT_INVALID) + return c->startup_io_weight; + else if (c->io_weight != CGROUP_WEIGHT_INVALID) + return c->io_weight; + else + return CGROUP_WEIGHT_DEFAULT; +} + +static uint64_t cgroup_context_blkio_weight(CGroupContext *c, ManagerState state) +{ + if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && + c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID) + return c->startup_blockio_weight; + else if (c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID) + return c->blockio_weight; + else + return CGROUP_BLKIO_WEIGHT_DEFAULT; +} + +static uint64_t cgroup_weight_blkio_to_io(uint64_t blkio_weight) +{ + return CLAMP(blkio_weight * CGROUP_WEIGHT_DEFAULT / CGROUP_BLKIO_WEIGHT_DEFAULT, + CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX); +} + +static uint64_t cgroup_weight_io_to_blkio(uint64_t io_weight) +{ + return CLAMP(io_weight * CGROUP_BLKIO_WEIGHT_DEFAULT / CGROUP_WEIGHT_DEFAULT, + CGROUP_BLKIO_WEIGHT_MIN, CGROUP_BLKIO_WEIGHT_MAX); +} + +static void cgroup_apply_io_device_weight(const char *path, const char *dev_path, uint64_t io_weight) +{ + char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1]; + dev_t dev; + int r; + + r = lookup_block_device(dev_path, &dev); + if (r < 0) + return; + + xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), io_weight); + r = cg_set_attribute("io", path, "io.weight", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set io.weight on %s: %m", path); +} + +static void cgroup_apply_blkio_device_weight(const char *path, const char *dev_path, uint64_t blkio_weight) +{ + char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1]; + dev_t dev; + int r; + + r = lookup_block_device(dev_path, &dev); + if (r < 0) + return; + + xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), blkio_weight); + r = cg_set_attribute("blkio", path, "blkio.weight_device", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set blkio.weight_device on %s: %m", path); +} + +static unsigned cgroup_apply_io_device_limit(const char *path, const char *dev_path, uint64_t *limits) +{ + char limit_bufs[_CGROUP_IO_LIMIT_TYPE_MAX][DECIMAL_STR_MAX(uint64_t)]; + char buf[DECIMAL_STR_MAX(dev_t)*2+2+(6+DECIMAL_STR_MAX(uint64_t)+1)*4]; + CGroupIOLimitType type; + dev_t dev; + unsigned n = 0; + int r; + + r = lookup_block_device(dev_path, &dev); + if (r < 0) + return 0; + + for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++) { + if (limits[type] != cgroup_io_limit_defaults[type]) { + xsprintf(limit_bufs[type], "%" PRIu64, limits[type]); + n++; + } else { + xsprintf(limit_bufs[type], "%s", limits[type] == CGROUP_LIMIT_MAX ? "max" : "0"); + } + } + + xsprintf(buf, "%u:%u rbps=%s wbps=%s riops=%s wiops=%s\n", major(dev), minor(dev), + limit_bufs[CGROUP_IO_RBPS_MAX], limit_bufs[CGROUP_IO_WBPS_MAX], + limit_bufs[CGROUP_IO_RIOPS_MAX], limit_bufs[CGROUP_IO_WIOPS_MAX]); + r = cg_set_attribute("io", path, "io.max", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set io.max on %s: %m", path); + return n; +} + +static unsigned cgroup_apply_blkio_device_limit(const char *path, const char *dev_path, uint64_t rbps, uint64_t wbps) +{ + char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1]; + dev_t dev; + unsigned n = 0; + int r; + + r = lookup_block_device(dev_path, &dev); + if (r < 0) + return 0; + + if (rbps != CGROUP_LIMIT_MAX) + n++; + sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), rbps); + r = cg_set_attribute("blkio", path, "blkio.throttle.read_bps_device", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set blkio.throttle.read_bps_device on %s: %m", path); + + if (wbps != CGROUP_LIMIT_MAX) + n++; + sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), wbps); + r = cg_set_attribute("blkio", path, "blkio.throttle.write_bps_device", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set blkio.throttle.write_bps_device on %s: %m", path); + + return n; +} + void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state) { bool is_root; int r; @@ -405,19 +555,19 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M } if (mask & CGROUP_MASK_IO) { - CGroupIODeviceWeight *w; - CGroupIODeviceLimit *l, *next; + bool has_io = cgroup_context_has_io_config(c); + bool has_blockio = cgroup_context_has_blockio_config(c); if (!is_root) { - char buf[MAX(8+DECIMAL_STR_MAX(uint64_t)+1, - DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)]; - uint64_t weight = CGROUP_WEIGHT_DEFAULT; + char buf[8+DECIMAL_STR_MAX(uint64_t)+1]; + uint64_t weight; - if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && - c->startup_io_weight != CGROUP_WEIGHT_INVALID) - weight = c->startup_io_weight; - else if (c->io_weight != CGROUP_WEIGHT_INVALID) - weight = c->io_weight; + if (has_io) + weight = cgroup_context_io_weight(c, state); + else if (has_blockio) + weight = cgroup_weight_blkio_to_io(cgroup_context_blkio_weight(c, state)); + else + weight = CGROUP_WEIGHT_DEFAULT; xsprintf(buf, "default %" PRIu64 "\n", weight); r = cg_set_attribute("io", path, "io.weight", buf); @@ -425,103 +575,99 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to set io.weight on %s: %m", path); - /* FIXME: no way to reset this list */ - LIST_FOREACH(device_weights, w, c->io_device_weights) { - dev_t dev; + if (has_io) { + CGroupIODeviceWeight *w; - r = lookup_block_device(w->path, &dev); - if (r < 0) - continue; + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_weights, w, c->io_device_weights) + cgroup_apply_io_device_weight(path, w->path, w->weight); + } else if (has_blockio) { + CGroupBlockIODeviceWeight *w; - xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), w->weight); - r = cg_set_attribute("io", path, "io.weight", buf); - if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set io.weight on %s: %m", path); + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_weights, w, c->blockio_device_weights) + cgroup_apply_io_device_weight(path, w->path, cgroup_weight_blkio_to_io(w->weight)); } } - LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { - char rbps_buf[DECIMAL_STR_MAX(uint64_t)] = "max"; - char wbps_buf[DECIMAL_STR_MAX(uint64_t)] = "max"; - char buf[DECIMAL_STR_MAX(dev_t)*2+2+(5+DECIMAL_STR_MAX(uint64_t)+1)*2]; - dev_t dev; - unsigned n = 0; - - r = lookup_block_device(l->path, &dev); - if (r < 0) - continue; + /* Apply limits and free ones without config. */ + if (has_io) { + CGroupIODeviceLimit *l, *next; - if (l->rbps_max != CGROUP_LIMIT_MAX) { - xsprintf(rbps_buf, "%" PRIu64, l->rbps_max); - n++; + LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { + if (!cgroup_apply_io_device_limit(path, l->path, l->limits)) + cgroup_context_free_io_device_limit(c, l); } + } else if (has_blockio) { + CGroupBlockIODeviceBandwidth *b, *next; - if (l->wbps_max != CGROUP_LIMIT_MAX) { - xsprintf(wbps_buf, "%" PRIu64, l->wbps_max); - n++; - } + LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) { + uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX]; + CGroupIOLimitType type; - xsprintf(buf, "%u:%u rbps=%s wbps=%s\n", major(dev), minor(dev), rbps_buf, wbps_buf); - r = cg_set_attribute("io", path, "io.max", buf); - if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set io.max on %s: %m", path); + for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++) + limits[type] = cgroup_io_limit_defaults[type]; - /* If @l contained no config, we just cleared the kernel - counterpart too. No reason to keep @l around. */ - if (!n) - cgroup_context_free_io_device_limit(c, l); + limits[CGROUP_IO_RBPS_MAX] = b->rbps; + limits[CGROUP_IO_WBPS_MAX] = b->wbps; + + if (!cgroup_apply_io_device_limit(path, b->path, limits)) + cgroup_context_free_blockio_device_bandwidth(c, b); + } } } if (mask & CGROUP_MASK_BLKIO) { - char buf[MAX(DECIMAL_STR_MAX(uint64_t)+1, - DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)]; - CGroupBlockIODeviceWeight *w; - CGroupBlockIODeviceBandwidth *b; + bool has_io = cgroup_context_has_io_config(c); + bool has_blockio = cgroup_context_has_blockio_config(c); if (!is_root) { - sprintf(buf, "%" PRIu64 "\n", - IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ? c->startup_blockio_weight : - c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ? c->blockio_weight : CGROUP_BLKIO_WEIGHT_DEFAULT); + char buf[DECIMAL_STR_MAX(uint64_t)+1]; + uint64_t weight; + + if (has_blockio) + weight = cgroup_context_blkio_weight(c, state); + else if (has_io) + weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state)); + else + weight = CGROUP_BLKIO_WEIGHT_DEFAULT; + + xsprintf(buf, "%" PRIu64 "\n", weight); r = cg_set_attribute("blkio", path, "blkio.weight", buf); if (r < 0) log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to set blkio.weight on %s: %m", path); - /* FIXME: no way to reset this list */ - LIST_FOREACH(device_weights, w, c->blockio_device_weights) { - dev_t dev; + if (has_blockio) { + CGroupBlockIODeviceWeight *w; - r = lookup_block_device(w->path, &dev); - if (r < 0) - continue; + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_weights, w, c->blockio_device_weights) + cgroup_apply_blkio_device_weight(path, w->path, w->weight); + } else if (has_io) { + CGroupIODeviceWeight *w; - sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), w->weight); - r = cg_set_attribute("blkio", path, "blkio.weight_device", buf); - if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set blkio.weight_device on %s: %m", path); + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_weights, w, c->io_device_weights) + cgroup_apply_blkio_device_weight(path, w->path, cgroup_weight_io_to_blkio(w->weight)); } } - /* FIXME: no way to reset this list */ - LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) { - const char *a; - dev_t dev; - - r = lookup_block_device(b->path, &dev); - if (r < 0) - continue; + /* Apply limits and free ones without config. */ + if (has_blockio) { + CGroupBlockIODeviceBandwidth *b, *next; - a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device"; + LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) { + if (!cgroup_apply_blkio_device_limit(path, b->path, b->rbps, b->wbps)) + cgroup_context_free_blockio_device_bandwidth(c, b); + } + } else if (has_io) { + CGroupIODeviceLimit *l, *next; - sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth); - r = cg_set_attribute("blkio", path, a, buf); - if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set %s on %s: %m", a, path); + LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { + if (!cgroup_apply_blkio_device_limit(path, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX])) + cgroup_context_free_io_device_limit(c, l); + } } } @@ -638,19 +784,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) { c->cpu_quota_per_sec_usec != USEC_INFINITY) mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU; - if (c->io_accounting || - c->io_weight != CGROUP_WEIGHT_INVALID || - c->startup_io_weight != CGROUP_WEIGHT_INVALID || - c->io_device_weights || - c->io_device_limits) - mask |= CGROUP_MASK_IO; - - if (c->blockio_accounting || - c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID || - c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID || - c->blockio_device_weights || - c->blockio_device_bandwidths) - mask |= CGROUP_MASK_BLKIO; + if (cgroup_context_has_io_config(c) || cgroup_context_has_blockio_config(c)) + mask |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO; if (c->memory_accounting || c->memory_limit != (uint64_t) -1) @@ -1740,6 +1875,10 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) { if (m == 0) return; + /* always invalidate compat pairs together */ + if (m & (CGROUP_MASK_IO | CGROUP_MASK_BLKIO)) + m |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO; + if ((u->cgroup_realized_mask & m) == 0) return; diff --git a/src/core/cgroup.h b/src/core/cgroup.h index a533923072..2b1edbafc4 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -23,6 +23,7 @@ #include "list.h" #include "time-util.h" +#include "cgroup-util.h" typedef struct CGroupContext CGroupContext; typedef struct CGroupDeviceAllow CGroupDeviceAllow; @@ -64,8 +65,7 @@ struct CGroupIODeviceWeight { struct CGroupIODeviceLimit { LIST_FIELDS(CGroupIODeviceLimit, device_limits); char *path; - uint64_t rbps_max; - uint64_t wbps_max; + uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX]; }; struct CGroupBlockIODeviceWeight { @@ -77,8 +77,8 @@ struct CGroupBlockIODeviceWeight { struct CGroupBlockIODeviceBandwidth { LIST_FIELDS(CGroupBlockIODeviceBandwidth, device_bandwidths); char *path; - uint64_t bandwidth; - bool read; + uint64_t rbps; + uint64_t wbps; }; struct CGroupContext { diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index a2a4a6249c..eef1c47c14 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -80,17 +80,13 @@ static int property_get_io_device_limits( return r; LIST_FOREACH(device_limits, l, c->io_device_limits) { - uint64_t v; - - if (streq(property, "IOReadBandwidthMax")) - v = l->rbps_max; - else - v = l->wbps_max; + CGroupIOLimitType type; - if (v == CGROUP_LIMIT_MAX) + type = cgroup_io_limit_type_from_string(property); + if (type < 0 || l->limits[type] == cgroup_io_limit_defaults[type]) continue; - r = sd_bus_message_append(reply, "(st)", l->path, v); + r = sd_bus_message_append(reply, "(st)", l->path, l->limits[type]); if (r < 0) return r; } @@ -150,11 +146,17 @@ static int property_get_blockio_device_bandwidths( return r; LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) { + uint64_t v; + + if (streq(property, "BlockIOReadBandwidth")) + v = b->rbps; + else + v = b->wbps; - if (streq(property, "BlockIOReadBandwidth") != b->read) + if (v == CGROUP_LIMIT_MAX) continue; - r = sd_bus_message_append(reply, "(st)", b->path, b->bandwidth); + r = sd_bus_message_append(reply, "(st)", b->path, v); if (r < 0) return r; } @@ -217,6 +219,8 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_PROPERTY("IODeviceWeight", "a(st)", property_get_io_device_weight, 0, 0), SD_BUS_PROPERTY("IOReadBandwidthMax", "a(st)", property_get_io_device_limits, 0, 0), SD_BUS_PROPERTY("IOWriteBandwidthMax", "a(st)", property_get_io_device_limits, 0, 0), + SD_BUS_PROPERTY("IOReadIOPSMax", "a(st)", property_get_io_device_limits, 0, 0), + SD_BUS_PROPERTY("IOWriteIOPSMax", "a(st)", property_get_io_device_limits, 0, 0), SD_BUS_PROPERTY("BlockIOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, blockio_accounting), 0), SD_BUS_PROPERTY("BlockIOWeight", "t", NULL, offsetof(CGroupContext, blockio_weight), 0), SD_BUS_PROPERTY("StartupBlockIOWeight", "t", NULL, offsetof(CGroupContext, startup_blockio_weight), 0), @@ -273,6 +277,7 @@ int bus_cgroup_set_property( UnitSetPropertiesMode mode, sd_bus_error *error) { + CGroupIOLimitType iol_type; int r; assert(u); @@ -416,15 +421,11 @@ int bus_cgroup_set_property( return 1; - } else if (streq(name, "IOReadBandwidthMax") || streq(name, "IOWriteBandwidthMax")) { + } else if ((iol_type = cgroup_io_limit_type_from_string(name)) >= 0) { const char *path; - bool read = true; unsigned n = 0; uint64_t u64; - if (streq(name, "IOWriteBandwidthMax")) - read = false; - r = sd_bus_message_enter_container(message, 'a', "(st)"); if (r < 0) return r; @@ -442,6 +443,8 @@ int bus_cgroup_set_property( } if (!a) { + CGroupIOLimitType type; + a = new0(CGroupIODeviceLimit, 1); if (!a) return -ENOMEM; @@ -452,16 +455,13 @@ int bus_cgroup_set_property( return -ENOMEM; } - a->rbps_max = CGROUP_LIMIT_MAX; - a->wbps_max = CGROUP_LIMIT_MAX; + for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++) + a->limits[type] = cgroup_io_limit_defaults[type]; LIST_PREPEND(device_limits, c->io_device_limits, a); } - if (read) - a->rbps_max = u64; - else - a->wbps_max = u64; + a->limits[iol_type] = u64; } n++; @@ -481,10 +481,7 @@ int bus_cgroup_set_property( if (n == 0) { LIST_FOREACH(device_limits, a, c->io_device_limits) - if (read) - a->rbps_max = CGROUP_LIMIT_MAX; - else - a->wbps_max = CGROUP_LIMIT_MAX; + a->limits[iol_type] = cgroup_io_limit_defaults[iol_type]; } unit_invalidate_cgroup(u, CGROUP_MASK_IO); @@ -493,17 +490,10 @@ int bus_cgroup_set_property( if (!f) return -ENOMEM; - if (read) { - fputs("IOReadBandwidthMax=\n", f); - LIST_FOREACH(device_limits, a, c->io_device_limits) - if (a->rbps_max != CGROUP_LIMIT_MAX) - fprintf(f, "IOReadBandwidthMax=%s %" PRIu64 "\n", a->path, a->rbps_max); - } else { - fputs("IOWriteBandwidthMax=\n", f); - LIST_FOREACH(device_limits, a, c->io_device_limits) - if (a->wbps_max != CGROUP_LIMIT_MAX) - fprintf(f, "IOWriteBandwidthMax=%s %" PRIu64 "\n", a->path, a->wbps_max); - } + fprintf(f, "%s=\n", name); + LIST_FOREACH(device_limits, a, c->io_device_limits) + if (a->limits[iol_type] != cgroup_io_limit_defaults[iol_type]) + fprintf(f, "%s=%s %" PRIu64 "\n", name, a->path, a->limits[iol_type]); r = fflush_and_check(f); if (r < 0) @@ -667,7 +657,7 @@ int bus_cgroup_set_property( CGroupBlockIODeviceBandwidth *a = NULL, *b; LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) { - if (path_equal(path, b->path) && read == b->read) { + if (path_equal(path, b->path)) { a = b; break; } @@ -678,7 +668,8 @@ int bus_cgroup_set_property( if (!a) return -ENOMEM; - a->read = read; + a->rbps = CGROUP_LIMIT_MAX; + a->wbps = CGROUP_LIMIT_MAX; a->path = strdup(path); if (!a->path) { free(a); @@ -688,7 +679,10 @@ int bus_cgroup_set_property( LIST_PREPEND(device_bandwidths, c->blockio_device_bandwidths, a); } - a->bandwidth = u64; + if (read) + a->rbps = u64; + else + a->wbps = u64; } n++; @@ -701,15 +695,18 @@ int bus_cgroup_set_property( return r; if (mode != UNIT_CHECK) { - CGroupBlockIODeviceBandwidth *a, *next; + CGroupBlockIODeviceBandwidth *a; _cleanup_free_ char *buf = NULL; _cleanup_fclose_ FILE *f = NULL; size_t size = 0; if (n == 0) { - LIST_FOREACH_SAFE(device_bandwidths, a, next, c->blockio_device_bandwidths) - if (a->read == read) - cgroup_context_free_blockio_device_bandwidth(c, a); + LIST_FOREACH(device_bandwidths, a, c->blockio_device_bandwidths) { + if (read) + a->rbps = CGROUP_LIMIT_MAX; + else + a->wbps = CGROUP_LIMIT_MAX; + } } unit_invalidate_cgroup(u, CGROUP_MASK_BLKIO); @@ -721,13 +718,13 @@ int bus_cgroup_set_property( if (read) { fputs("BlockIOReadBandwidth=\n", f); LIST_FOREACH(device_bandwidths, a, c->blockio_device_bandwidths) - if (a->read) - fprintf(f, "BlockIOReadBandwidth=%s %" PRIu64 "\n", a->path, a->bandwidth); + if (a->rbps != CGROUP_LIMIT_MAX) + fprintf(f, "BlockIOReadBandwidth=%s %" PRIu64 "\n", a->path, a->rbps); } else { fputs("BlockIOWriteBandwidth=\n", f); LIST_FOREACH(device_bandwidths, a, c->blockio_device_bandwidths) - if (!a->read) - fprintf(f, "BlockIOWriteBandwidth=%s %" PRIu64 "\n", a->path, a->bandwidth); + if (a->wbps != CGROUP_LIMIT_MAX) + fprintf(f, "BlockIOWriteBandwidth=%s %" PRIu64 "\n", a->path, a->wbps); } r = fflush_and_check(f); diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index ad45611d9d..8193418980 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -126,6 +126,8 @@ $1.StartupIOWeight, config_parse_io_weight, 0, $1.IODeviceWeight, config_parse_io_device_weight, 0, offsetof($1, cgroup_context) $1.IOReadBandwidthMax, config_parse_io_limit, 0, offsetof($1, cgroup_context) $1.IOWriteBandwidthMax, config_parse_io_limit, 0, offsetof($1, cgroup_context) +$1.IOReadIOPSMax, config_parse_io_limit, 0, offsetof($1, cgroup_context) +$1.IOWriteIOPSMax, config_parse_io_limit, 0, offsetof($1, cgroup_context) $1.BlockIOAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.blockio_accounting) $1.BlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.blockio_weight) $1.StartupBlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.startup_blockio_weight) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index cea615132a..86b4fb071b 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -3023,9 +3023,9 @@ int config_parse_io_limit( _cleanup_free_ char *path = NULL; CGroupIODeviceLimit *l = NULL, *t; CGroupContext *c = data; + CGroupIOLimitType type; const char *limit; uint64_t num; - bool read; size_t n; int r; @@ -3033,14 +3033,12 @@ int config_parse_io_limit( assert(lvalue); assert(rvalue); - read = streq("IOReadBandwidthMax", lvalue); + type = cgroup_io_limit_type_from_string(lvalue); + assert(type >= 0); if (isempty(rvalue)) { LIST_FOREACH(device_limits, l, c->io_device_limits) - if (read) - l->rbps_max = CGROUP_LIMIT_MAX; - else - l->wbps_max = CGROUP_LIMIT_MAX; + l->limits[type] = cgroup_io_limit_defaults[type]; return 0; } @@ -3080,22 +3078,21 @@ int config_parse_io_limit( } if (!l) { + CGroupIOLimitType ttype; + l = new0(CGroupIODeviceLimit, 1); if (!l) return log_oom(); l->path = path; path = NULL; - l->rbps_max = CGROUP_LIMIT_MAX; - l->wbps_max = CGROUP_LIMIT_MAX; + for (ttype = 0; ttype < _CGROUP_IO_LIMIT_TYPE_MAX; ttype++) + l->limits[ttype] = cgroup_io_limit_defaults[ttype]; LIST_PREPEND(device_limits, c->io_device_limits, l); } - if (read) - l->rbps_max = num; - else - l->wbps_max = num; + l->limits[type] = num; return 0; } @@ -3211,7 +3208,7 @@ int config_parse_blockio_bandwidth( void *userdata) { _cleanup_free_ char *path = NULL; - CGroupBlockIODeviceBandwidth *b; + CGroupBlockIODeviceBandwidth *b = NULL, *t; CGroupContext *c = data; const char *bandwidth; uint64_t bytes; @@ -3226,12 +3223,10 @@ int config_parse_blockio_bandwidth( read = streq("BlockIOReadBandwidth", lvalue); if (isempty(rvalue)) { - CGroupBlockIODeviceBandwidth *next; - - LIST_FOREACH_SAFE (device_bandwidths, b, next, c->blockio_device_bandwidths) - if (b->read == read) - cgroup_context_free_blockio_device_bandwidth(c, b); - + LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) { + b->rbps = CGROUP_LIMIT_MAX; + b->wbps = CGROUP_LIMIT_MAX; + } return 0; } @@ -3259,16 +3254,30 @@ int config_parse_blockio_bandwidth( return 0; } - b = new0(CGroupBlockIODeviceBandwidth, 1); - if (!b) - return log_oom(); + LIST_FOREACH(device_bandwidths, t, c->blockio_device_bandwidths) { + if (path_equal(path, t->path)) { + b = t; + break; + } + } - b->path = path; - path = NULL; - b->bandwidth = bytes; - b->read = read; + if (!t) { + b = new0(CGroupBlockIODeviceBandwidth, 1); + if (!b) + return log_oom(); - LIST_PREPEND(device_bandwidths, c->blockio_device_bandwidths, b); + b->path = path; + path = NULL; + b->rbps = CGROUP_LIMIT_MAX; + b->wbps = CGROUP_LIMIT_MAX; + + LIST_PREPEND(device_bandwidths, c->blockio_device_bandwidths, b); + } + + if (read) + b->rbps = bytes; + else + b->wbps = bytes; return 0; } diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 9431dad411..f6559cd854 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -284,8 +284,7 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen r = sd_bus_message_append(m, "v", "a(ss)", 1, path, rwm); } - } else if (STR_IN_SET(field, "IOReadBandwidthMax", "IOWriteBandwidthMax", - "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) { + } else if (cgroup_io_limit_type_from_string(field) >= 0 || STR_IN_SET(field, "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) { if (isempty(eq)) r = sd_bus_message_append(m, "v", "a(st)", 0); diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c index 53ab650ce2..b943c68e1b 100644 --- a/src/systemctl/systemctl.c +++ b/src/systemctl/systemctl.c @@ -4448,7 +4448,7 @@ static int print_property(const char *name, sd_bus_message *m, const char *conte return 0; - } else if (contents[1] == SD_BUS_TYPE_STRUCT_BEGIN && (streq(name, "IOReadBandwidthMax") || streq(name, "IOWriteBandwidthMax") || + } else if (contents[1] == SD_BUS_TYPE_STRUCT_BEGIN && (cgroup_io_limit_type_from_string(name) >= 0 || streq(name, "BlockIOReadBandwidth") || streq(name, "BlockIOWriteBandwidth"))) { const char *path; uint64_t bandwidth; diff --git a/src/test/test-cgroup-mask.c b/src/test/test-cgroup-mask.c index 4677f7cbd9..a027eb0fd2 100644 --- a/src/test/test-cgroup-mask.c +++ b/src/test/test-cgroup-mask.c @@ -48,6 +48,7 @@ static int test_cgroup_mask(void) { m->default_cpu_accounting = m->default_memory_accounting = m->default_blockio_accounting = + m->default_io_accounting = m->default_tasks_accounting = false; m->default_tasks_max = (uint64_t) -1; @@ -76,7 +77,7 @@ static int test_cgroup_mask(void) { assert_se(unit_get_own_mask(daughter) == 0); assert_se(unit_get_own_mask(grandchild) == 0); assert_se(unit_get_own_mask(parent_deep) == CGROUP_MASK_MEMORY); - assert_se(unit_get_own_mask(parent) == CGROUP_MASK_BLKIO); + assert_se(unit_get_own_mask(parent) == (CGROUP_MASK_IO | CGROUP_MASK_BLKIO)); assert_se(unit_get_own_mask(root) == 0); /* Verify aggregation of member masks */ @@ -85,23 +86,23 @@ static int test_cgroup_mask(void) { assert_se(unit_get_members_mask(grandchild) == 0); assert_se(unit_get_members_mask(parent_deep) == 0); assert_se(unit_get_members_mask(parent) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); - assert_se(unit_get_members_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); + assert_se(unit_get_members_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); /* Verify aggregation of sibling masks. */ assert_se(unit_get_siblings_mask(son) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); assert_se(unit_get_siblings_mask(daughter) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); assert_se(unit_get_siblings_mask(grandchild) == 0); assert_se(unit_get_siblings_mask(parent_deep) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); - assert_se(unit_get_siblings_mask(parent) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); - assert_se(unit_get_siblings_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); + assert_se(unit_get_siblings_mask(parent) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); + assert_se(unit_get_siblings_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); /* Verify aggregation of target masks. */ assert_se(unit_get_target_mask(son) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported)); assert_se(unit_get_target_mask(daughter) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported)); assert_se(unit_get_target_mask(grandchild) == 0); assert_se(unit_get_target_mask(parent_deep) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported)); - assert_se(unit_get_target_mask(parent) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); - assert_se(unit_get_target_mask(root) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); + assert_se(unit_get_target_mask(parent) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); + assert_se(unit_get_target_mask(root) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); manager_free(m); diff --git a/test/parent.slice b/test/parent.slice index 0222f8eb47..a95f90392d 100644 --- a/test/parent.slice +++ b/test/parent.slice @@ -2,4 +2,4 @@ Description=Parent Slice [Slice] -BlockIOWeight=200 +IOWeight=200 |