diff options
author | Tejun Heo <htejun@fb.com> | 2016-05-18 17:35:12 -0700 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2016-05-18 17:35:12 -0700 |
commit | 538b48524cf48afc299ab78690bc03c18af67ede (patch) | |
tree | 70c5612292ae16f1485644bab7c80bdba365565e | |
parent | 64faf04c1f178804e066972ab0d6ff1181c0d373 (diff) |
core: translate between IO and BlockIO settings to ease transition
Due to the substantial interface changes in cgroup unified hierarchy, new IO
settings are introduced. Currently, IO settings apply only to unified
hierarchy and BlockIO to legacy. While the transition is necessary, it's
painful for users to have to provide configs for both. This patch implements
translation from one config set to another for configs which make sense.
* The translation takes place during application of the configs. Users won't
see IO or BlockIO settings appearing without being explicitly created.
* The translation takes place only if there is no config for the matching
cgroup hierarchy type at all.
While this doesn't provide comprehensive compatibility, it should considerably
ease transition to the new IO settings which are a superset of BlockIO
settings.
v2:
- Update test-cgroup-mask.c so that it accounts for the fact that
CGROUP_MASK_IO and CGROUP_MASK_BLKIO move together. Also, test/parent.slice
now sets IOWeight instead of BlockIOWeight.
-rw-r--r-- | man/systemd.resource-control.xml | 25 | ||||
-rw-r--r-- | src/core/cgroup.c | 146 | ||||
-rw-r--r-- | src/test/test-cgroup-mask.c | 13 | ||||
-rw-r--r-- | test/parent.slice | 2 |
4 files changed, 148 insertions, 38 deletions
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index 8a95e1196b..066f2cc19b 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -99,6 +99,31 @@ </refsect1> <refsect1> + <title>Unified and Legacy Control Group Hierarchies</title> + + <para>Unified control group hierarchy is the new version of kernel control group interface. Depending on the + resource type, there are differences in resource control capabilities. Also, because of interface changes, some + resource types have a separate set of options on the unified hierarchy.</para> + + <para> + <variablelist> + <varlistentry> + <term><option>IO</option></term> + <listitem> + <para><varname>IO</varname> prefixed settings are superset of and replace <varname>BlockIO</varname> + prefixed ones. On unified hierarchy, IO resource control also applies to buffered writes.</para> + </listitem> + </varlistentry> + </variablelist> + </para> + + <para>To ease the transition, there is best-effort translation between the two versions of settings. If all + settings of a unit for a given resource type are for the other hierarchy type, the settings are translated and + applied. If there are any valid settings for the hierarchy in use, all translations are disabled for the resource + type. Mixing the two types of settings on a unit can lead to confusing results.</para> + </refsect1> + + <refsect1> <title>Options</title> <para>Units of the types listed above can have settings diff --git a/src/core/cgroup.c b/src/core/cgroup.c index ddc4f69ebe..8b8b2ac5ff 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -358,6 +358,24 @@ fail: return -errno; } +static bool cgroup_context_has_io_config(CGroupContext *c) +{ + return c->io_accounting || + c->io_weight != CGROUP_WEIGHT_INVALID || + c->startup_io_weight != CGROUP_WEIGHT_INVALID || + c->io_device_weights || + c->io_device_limits; +} + +static bool cgroup_context_has_blockio_config(CGroupContext *c) +{ + return c->blockio_accounting || + c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID || + c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID || + c->blockio_device_weights || + c->blockio_device_bandwidths; +} + static uint64_t cgroup_context_io_weight(CGroupContext *c, ManagerState state) { if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && @@ -380,6 +398,18 @@ static uint64_t cgroup_context_blkio_weight(CGroupContext *c, ManagerState state return CGROUP_BLKIO_WEIGHT_DEFAULT; } +static uint64_t cgroup_weight_blkio_to_io(uint64_t blkio_weight) +{ + return CLAMP(blkio_weight * CGROUP_WEIGHT_DEFAULT / CGROUP_BLKIO_WEIGHT_DEFAULT, + CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX); +} + +static uint64_t cgroup_weight_io_to_blkio(uint64_t io_weight) +{ + return CLAMP(io_weight * CGROUP_BLKIO_WEIGHT_DEFAULT / CGROUP_WEIGHT_DEFAULT, + CGROUP_BLKIO_WEIGHT_MIN, CGROUP_BLKIO_WEIGHT_MAX); +} + static void cgroup_apply_io_device_weight(const char *path, const char *dev_path, uint64_t io_weight) { char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1]; @@ -525,14 +555,19 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M } if (mask & CGROUP_MASK_IO) { - CGroupIODeviceLimit *l, *next; + bool has_io = cgroup_context_has_io_config(c); + bool has_blockio = cgroup_context_has_blockio_config(c); if (!is_root) { char buf[8+DECIMAL_STR_MAX(uint64_t)+1]; uint64_t weight; - CGroupIODeviceWeight *w; - weight = cgroup_context_io_weight(c, state); + if (has_io) + weight = cgroup_context_io_weight(c, state); + else if (has_blockio) + weight = cgroup_weight_blkio_to_io(cgroup_context_blkio_weight(c, state)); + else + weight = CGROUP_WEIGHT_DEFAULT; xsprintf(buf, "default %" PRIu64 "\n", weight); r = cg_set_attribute("io", path, "io.weight", buf); @@ -540,27 +575,62 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to set io.weight on %s: %m", path); - /* FIXME: no way to reset this list */ - LIST_FOREACH(device_weights, w, c->io_device_weights) - cgroup_apply_io_device_weight(path, w->path, w->weight); + if (has_io) { + CGroupIODeviceWeight *w; + + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_weights, w, c->io_device_weights) + cgroup_apply_io_device_weight(path, w->path, w->weight); + } else if (has_blockio) { + CGroupBlockIODeviceWeight *w; + + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_weights, w, c->blockio_device_weights) + cgroup_apply_io_device_weight(path, w->path, cgroup_weight_blkio_to_io(w->weight)); + } } /* Apply limits and free ones without config. */ - LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { - if (!cgroup_apply_io_device_limit(path, l->path, l->limits)) - cgroup_context_free_io_device_limit(c, l); + if (has_io) { + CGroupIODeviceLimit *l, *next; + + LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { + if (!cgroup_apply_io_device_limit(path, l->path, l->limits)) + cgroup_context_free_io_device_limit(c, l); + } + } else if (has_blockio) { + CGroupBlockIODeviceBandwidth *b, *next; + + LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) { + uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX]; + CGroupIOLimitType type; + + for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++) + limits[type] = cgroup_io_limit_defaults[type]; + + limits[CGROUP_IO_RBPS_MAX] = b->rbps; + limits[CGROUP_IO_WBPS_MAX] = b->wbps; + + if (!cgroup_apply_io_device_limit(path, b->path, limits)) + cgroup_context_free_blockio_device_bandwidth(c, b); + } } } if (mask & CGROUP_MASK_BLKIO) { - CGroupBlockIODeviceBandwidth *b, *next; + bool has_io = cgroup_context_has_io_config(c); + bool has_blockio = cgroup_context_has_blockio_config(c); if (!is_root) { char buf[DECIMAL_STR_MAX(uint64_t)+1]; uint64_t weight; - CGroupBlockIODeviceWeight *w; - weight = cgroup_context_blkio_weight(c, state); + if (has_blockio) + weight = cgroup_context_blkio_weight(c, state); + else if (has_io) + weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state)); + else + weight = CGROUP_BLKIO_WEIGHT_DEFAULT; xsprintf(buf, "%" PRIu64 "\n", weight); r = cg_set_attribute("blkio", path, "blkio.weight", buf); @@ -568,15 +638,36 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to set blkio.weight on %s: %m", path); - /* FIXME: no way to reset this list */ - LIST_FOREACH(device_weights, w, c->blockio_device_weights) - cgroup_apply_blkio_device_weight(path, w->path, w->weight); + if (has_blockio) { + CGroupBlockIODeviceWeight *w; + + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_weights, w, c->blockio_device_weights) + cgroup_apply_blkio_device_weight(path, w->path, w->weight); + } else if (has_io) { + CGroupIODeviceWeight *w; + + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_weights, w, c->io_device_weights) + cgroup_apply_blkio_device_weight(path, w->path, cgroup_weight_io_to_blkio(w->weight)); + } } /* Apply limits and free ones without config. */ - LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) { - if (!cgroup_apply_blkio_device_limit(path, b->path, b->rbps, b->wbps)) - cgroup_context_free_blockio_device_bandwidth(c, b); + if (has_blockio) { + CGroupBlockIODeviceBandwidth *b, *next; + + LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) { + if (!cgroup_apply_blkio_device_limit(path, b->path, b->rbps, b->wbps)) + cgroup_context_free_blockio_device_bandwidth(c, b); + } + } else if (has_io) { + CGroupIODeviceLimit *l, *next; + + LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { + if (!cgroup_apply_blkio_device_limit(path, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX])) + cgroup_context_free_io_device_limit(c, l); + } } } @@ -693,19 +784,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) { c->cpu_quota_per_sec_usec != USEC_INFINITY) mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU; - if (c->io_accounting || - c->io_weight != CGROUP_WEIGHT_INVALID || - c->startup_io_weight != CGROUP_WEIGHT_INVALID || - c->io_device_weights || - c->io_device_limits) - mask |= CGROUP_MASK_IO; - - if (c->blockio_accounting || - c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID || - c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID || - c->blockio_device_weights || - c->blockio_device_bandwidths) - mask |= CGROUP_MASK_BLKIO; + if (cgroup_context_has_io_config(c) || cgroup_context_has_blockio_config(c)) + mask |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO; if (c->memory_accounting || c->memory_limit != (uint64_t) -1) @@ -1795,6 +1875,10 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) { if (m == 0) return; + /* always invalidate compat pairs together */ + if (m & (CGROUP_MASK_IO | CGROUP_MASK_BLKIO)) + m |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO; + if ((u->cgroup_realized_mask & m) == 0) return; diff --git a/src/test/test-cgroup-mask.c b/src/test/test-cgroup-mask.c index 4eb8fcd773..a025704722 100644 --- a/src/test/test-cgroup-mask.c +++ b/src/test/test-cgroup-mask.c @@ -48,6 +48,7 @@ static int test_cgroup_mask(void) { m->default_cpu_accounting = m->default_memory_accounting = m->default_blockio_accounting = + m->default_io_accounting = m->default_tasks_accounting = false; m->default_tasks_max = (uint64_t) -1; @@ -76,7 +77,7 @@ static int test_cgroup_mask(void) { assert_se(unit_get_own_mask(daughter) == 0); assert_se(unit_get_own_mask(grandchild) == 0); assert_se(unit_get_own_mask(parent_deep) == CGROUP_MASK_MEMORY); - assert_se(unit_get_own_mask(parent) == CGROUP_MASK_BLKIO); + assert_se(unit_get_own_mask(parent) == (CGROUP_MASK_IO | CGROUP_MASK_BLKIO)); assert_se(unit_get_own_mask(root) == 0); /* Verify aggregation of member masks */ @@ -85,23 +86,23 @@ static int test_cgroup_mask(void) { assert_se(unit_get_members_mask(grandchild) == 0); assert_se(unit_get_members_mask(parent_deep) == 0); assert_se(unit_get_members_mask(parent) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); - assert_se(unit_get_members_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); + assert_se(unit_get_members_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); /* Verify aggregation of sibling masks. */ assert_se(unit_get_siblings_mask(son) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); assert_se(unit_get_siblings_mask(daughter) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); assert_se(unit_get_siblings_mask(grandchild) == 0); assert_se(unit_get_siblings_mask(parent_deep) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); - assert_se(unit_get_siblings_mask(parent) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); - assert_se(unit_get_siblings_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); + assert_se(unit_get_siblings_mask(parent) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); + assert_se(unit_get_siblings_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); /* Verify aggregation of target masks. */ assert_se(unit_get_target_mask(son) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported)); assert_se(unit_get_target_mask(daughter) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported)); assert_se(unit_get_target_mask(grandchild) == 0); assert_se(unit_get_target_mask(parent_deep) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported)); - assert_se(unit_get_target_mask(parent) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); - assert_se(unit_get_target_mask(root) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); + assert_se(unit_get_target_mask(parent) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); + assert_se(unit_get_target_mask(root) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); manager_free(m); diff --git a/test/parent.slice b/test/parent.slice index 0222f8eb47..a95f90392d 100644 --- a/test/parent.slice +++ b/test/parent.slice @@ -2,4 +2,4 @@ Description=Parent Slice [Slice] -BlockIOWeight=200 +IOWeight=200 |