diff options
author | Tejun Heo <htejun@fb.com> | 2016-05-27 09:10:18 -0700 |
---|---|---|
committer | Lennart Poettering <lennart@poettering.net> | 2016-05-27 18:10:18 +0200 |
commit | da4d897e75e574911cb73ac91fdeef7d4fce8fbe (patch) | |
tree | cfa92fa7f77cdccd2f1807f205f795ae40a0a39d | |
parent | ffe54bf4bfb5a210861a3cde47ff07d9ffdebca7 (diff) |
core: add cgroup memory controller support on the unified hierarchy (#3315)
On the unified hierarchy, memory controller implements three control knobs -
low, high and max which enables more useable and versatile control over memory
usage. This patch implements support for the three control knobs.
* MemoryLow, MemoryHigh and MemoryMax are added for memory.low, memory.high and
memory.max, respectively.
* As all absolute limits on the unified hierarchy use "max" for no limit, make
memory limit parse functions accept "max" in addition to "infinity" and
document "max" for the new knobs.
* Implement compatibility translation between MemoryMax and MemoryLimit.
v2:
- Fixed missing else's in config_parse_memory_limit().
- Fixed missing newline when writing out drop-ins.
- Coding style updates to use "val > 0" instead of "val".
- Minor updates to documentation.
-rw-r--r-- | man/systemd.resource-control.xml | 71 | ||||
-rw-r--r-- | src/core/cgroup.c | 63 | ||||
-rw-r--r-- | src/core/cgroup.h | 4 | ||||
-rw-r--r-- | src/core/dbus-cgroup.c | 28 | ||||
-rw-r--r-- | src/core/load-fragment-gperf.gperf.m4 | 3 | ||||
-rw-r--r-- | src/core/load-fragment.c | 25 | ||||
-rw-r--r-- | src/shared/bus-unit-util.c | 6 | ||||
-rw-r--r-- | src/systemctl/systemctl.c | 39 |
8 files changed, 206 insertions, 33 deletions
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index 066f2cc19b..570619a743 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -114,6 +114,13 @@ prefixed ones. On unified hierarchy, IO resource control also applies to buffered writes.</para> </listitem> </varlistentry> + <varlistentry> + <term><option>Memory</option></term> + <listitem> + <para><varname>MemoryMax</varname> replaces <varname>MemoryLimit</varname>. <varname>MemoryLow</varname> + and <varname>MemoryHigh</varname> are effective only on unified hierarchy.</para> + </listitem> + </varlistentry> </variablelist> </para> @@ -213,6 +220,67 @@ </varlistentry> <varlistentry> + <term><varname>MemoryLow=<replaceable>bytes</replaceable></varname></term> + + <listitem> + <para>Specify the best-effort memory usage protection of the executed processes in this unit. If the memory + usages of this unit and all its ancestors are below their low boundaries, this unit's memory won't be + reclaimed as long as memory can be reclaimed from unprotected units.</para> + + <para>Takes a memory size in bytes. If the value is suffixed with K, M, G or T, the specified memory size is + parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes (with the base 1024), respectively. This controls the + <literal>memory.low</literal> control group attribute. For details about this control group attribute, see + <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>.</para> + + <para>Implies <literal>MemoryAccounting=true</literal>.</para> + + <para>This setting is supported only if the unified control group hierarchy is used.</para> + </listitem> + </varlistentry> + + <varlistentry> + <term><varname>MemoryHigh=<replaceable>bytes</replaceable></varname></term> + + <listitem> + <para>Specify the high limit on memory usage of the executed processes in this unit. Memory usage may go + above the limit if unavoidable, but the processes are heavily slowed down and memory is taken away + aggressively in such cases. This is the main mechanism to control memory usage of a unit.</para> + + <para>Takes a memory size in bytes. If the value is suffixed with K, M, G or T, the specified memory size is + parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes (with the base 1024), respectively. If assigned the + special value <literal>max</literal>, no memory limit is applied. This controls the + <literal>memory.high</literal> control group attribute. For details about this control group attribute, see + <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>.</para> + + <para>Implies <literal>MemoryAccounting=true</literal>.</para> + + <para>This setting is supported only if the unified control group hierarchy is used.</para> + </listitem> + </varlistentry> + + <varlistentry> + <term><varname>MemoryMax=<replaceable>bytes</replaceable></varname></term> + + <listitem> + <para>Specify the absolute limit on memory usage of the executed processes in this unit. If memory usage + cannot be contained under the limit, out-of-memory killer is invoked inside the unit. It is recommended to + use <varname>MemoryHigh=</varname> as the main control mechanism and use <varname>MemoryMax=</varname> as the + last line of defense.</para> + + <para>Takes a memory size in bytes. If the value is suffixed with K, M, G or T, the specified memory size is + parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes (with the base 1024), respectively. If assigned the + special value <literal>max</literal>, no memory limit is applied. This controls the + <literal>memory.max</literal> control group attribute. For details about this control group attribute, see + <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>.</para> + + <para>Implies <literal>MemoryAccounting=true</literal>.</para> + + <para>This setting is supported only if the unified control group hierarchy is used. Use + <varname>MemoryLimit=</varname> on systems using the legacy control group hierarchy.</para> + </listitem> + </varlistentry> + + <varlistentry> <term><varname>MemoryLimit=<replaceable>bytes</replaceable></varname></term> <listitem> @@ -230,6 +298,9 @@ url="https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt">memory.txt</ulink>.</para> <para>Implies <literal>MemoryAccounting=true</literal>.</para> + + <para>This setting is supported only if the legacy control group hierarchy is used. Use + <varname>MemoryMax=</varname> on systems using the unified control group hierarchy.</para> </listitem> </varlistentry> diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 0fb63b1bd1..fbe69df4e9 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -46,7 +46,10 @@ void cgroup_context_init(CGroupContext *c) { c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID; c->cpu_quota_per_sec_usec = USEC_INFINITY; - c->memory_limit = (uint64_t) -1; + c->memory_high = CGROUP_LIMIT_MAX; + c->memory_max = CGROUP_LIMIT_MAX; + + c->memory_limit = CGROUP_LIMIT_MAX; c->io_weight = CGROUP_WEIGHT_INVALID; c->startup_io_weight = CGROUP_WEIGHT_INVALID; @@ -147,6 +150,9 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { "%sStartupIOWeight=%" PRIu64 "\n" "%sBlockIOWeight=%" PRIu64 "\n" "%sStartupBlockIOWeight=%" PRIu64 "\n" + "%sMemoryLow=%" PRIu64 "\n" + "%sMemoryHigh=%" PRIu64 "\n" + "%sMemoryMax=%" PRIu64 "\n" "%sMemoryLimit=%" PRIu64 "\n" "%sTasksMax=%" PRIu64 "\n" "%sDevicePolicy=%s\n" @@ -163,6 +169,9 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { prefix, c->startup_io_weight, prefix, c->blockio_weight, prefix, c->startup_blockio_weight, + prefix, c->memory_low, + prefix, c->memory_high, + prefix, c->memory_max, prefix, c->memory_limit, prefix, c->tasks_max, prefix, cgroup_device_policy_to_string(c->device_policy), @@ -496,6 +505,23 @@ static unsigned cgroup_apply_blkio_device_limit(const char *path, const char *de return n; } +static bool cgroup_context_has_unified_memory_config(CGroupContext *c) { + return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX; +} + +static void cgroup_apply_unified_memory_limit(const char *path, const char *file, uint64_t v) { + char buf[DECIMAL_STR_MAX(uint64_t) + 1] = "max"; + int r; + + if (v != CGROUP_LIMIT_MAX) + xsprintf(buf, "%" PRIu64 "\n", v); + + r = cg_set_attribute("memory", path, file, buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set %s on %s: %m", file, path); +} + void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state) { bool is_root; int r; @@ -662,26 +688,30 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M } if ((mask & CGROUP_MASK_MEMORY) && !is_root) { - if (c->memory_limit != (uint64_t) -1) { - char buf[DECIMAL_STR_MAX(uint64_t) + 1]; - - sprintf(buf, "%" PRIu64 "\n", c->memory_limit); + if (cg_unified() > 0) { + uint64_t max = c->memory_max; - if (cg_unified() <= 0) - r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf); + if (cgroup_context_has_unified_memory_config(c)) + max = c->memory_max; else - r = cg_set_attribute("memory", path, "memory.max", buf); + max = c->memory_limit; + cgroup_apply_unified_memory_limit(path, "memory.low", c->memory_low); + cgroup_apply_unified_memory_limit(path, "memory.high", c->memory_high); + cgroup_apply_unified_memory_limit(path, "memory.max", max); } else { - if (cg_unified() <= 0) - r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1"); + char buf[DECIMAL_STR_MAX(uint64_t) + 1]; + + if (c->memory_limit != CGROUP_LIMIT_MAX) + xsprintf(buf, "%" PRIu64 "\n", c->memory_limit); else - r = cg_set_attribute("memory", path, "memory.max", "max"); - } + xsprintf(buf, "%" PRIu64 "\n", c->memory_max); - if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set memory.limit_in_bytes/memory.max on %s: %m", path); + r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set memory.limit_in_bytes on %s: %m", path); + } } if ((mask & CGROUP_MASK_DEVICES) && !is_root) { @@ -778,7 +808,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) { mask |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO; if (c->memory_accounting || - c->memory_limit != (uint64_t) -1) + c->memory_limit != CGROUP_LIMIT_MAX || + cgroup_context_has_unified_memory_config(c)) mask |= CGROUP_MASK_MEMORY; if (c->device_allow || diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 2b1edbafc4..ff87adfba1 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -94,6 +94,10 @@ struct CGroupContext { LIST_HEAD(CGroupIODeviceWeight, io_device_weights); LIST_HEAD(CGroupIODeviceLimit, io_device_limits); + uint64_t memory_low; + uint64_t memory_high; + uint64_t memory_max; + /* For legacy hierarchies */ uint64_t cpu_shares; uint64_t startup_cpu_shares; diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index d6053581f8..27050b4507 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -228,6 +228,9 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_PROPERTY("BlockIOReadBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0), SD_BUS_PROPERTY("BlockIOWriteBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0), SD_BUS_PROPERTY("MemoryAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, memory_accounting), 0), + SD_BUS_PROPERTY("MemoryLow", "t", NULL, offsetof(CGroupContext, memory_low), 0), + SD_BUS_PROPERTY("MemoryHigh", "t", NULL, offsetof(CGroupContext, memory_high), 0), + SD_BUS_PROPERTY("MemoryMax", "t", NULL, offsetof(CGroupContext, memory_max), 0), SD_BUS_PROPERTY("MemoryLimit", "t", NULL, offsetof(CGroupContext, memory_limit), 0), SD_BUS_PROPERTY("DevicePolicy", "s", property_get_cgroup_device_policy, offsetof(CGroupContext, device_policy), 0), SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0), @@ -826,6 +829,31 @@ int bus_cgroup_set_property( return 1; + } else if (STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax")) { + uint64_t v; + + r = sd_bus_message_read(message, "t", &v); + if (r < 0) + return r; + + if (mode != UNIT_CHECK) { + if (streq(name, "MemoryLow")) + c->memory_low = v; + else if (streq(name, "MemoryHigh")) + c->memory_high = v; + else + c->memory_max = v; + + unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY); + + if (v == CGROUP_LIMIT_MAX) + unit_write_drop_in_private_format(u, mode, name, "%s=max\n", name); + else + unit_write_drop_in_private_format(u, mode, name, "%s=%" PRIu64 "\n", name, v); + } + + return 1; + } else if (streq(name, "MemoryLimit")) { uint64_t limit; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 8193418980..00bdc238ce 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -117,6 +117,9 @@ $1.CPUShares, config_parse_cpu_shares, 0, $1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares) $1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context) $1.MemoryAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.memory_accounting) +$1.MemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context) +$1.MemoryHigh, config_parse_memory_limit, 0, offsetof($1, cgroup_context) +$1.MemoryMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.MemoryLimit, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.DeviceAllow, config_parse_device_allow, 0, offsetof($1, cgroup_context) $1.DevicePolicy, config_parse_device_policy, 0, offsetof($1, cgroup_context.device_policy) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 86b4fb071b..09d3f65c77 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -2793,21 +2793,26 @@ int config_parse_memory_limit( void *userdata) { CGroupContext *c = data; - uint64_t bytes; + uint64_t bytes = CGROUP_LIMIT_MAX; int r; - if (isempty(rvalue) || streq(rvalue, "infinity")) { - c->memory_limit = (uint64_t) -1; - return 0; + if (!isempty(rvalue) && !streq(rvalue, "infinity") && !streq(rvalue, "max")) { + r = parse_size(rvalue, 1024, &bytes); + if (r < 0 || bytes < 1) { + log_syntax(unit, LOG_ERR, filename, line, r, "Memory limit '%s' invalid. Ignoring.", rvalue); + return 0; + } } - r = parse_size(rvalue, 1024, &bytes); - if (r < 0 || bytes < 1) { - log_syntax(unit, LOG_ERR, filename, line, r, "Memory limit '%s' invalid. Ignoring.", rvalue); - return 0; - } + if (streq(lvalue, "MemoryLow")) + c->memory_low = bytes; + else if (streq(lvalue, "MemoryHigh")) + c->memory_high = bytes; + else if (streq(lvalue, "MemoryMax")) + c->memory_max = bytes; + else + c->memory_limit = bytes; - c->memory_limit = bytes; return 0; } diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index f68c4a41ac..502e98d9dc 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -166,11 +166,11 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen r = sd_bus_message_append(m, "v", "b", r); - } else if (streq(field, "MemoryLimit")) { + } else if (STR_IN_SET(field, "MemoryLow", "MemoryHigh", "MemoryMax", "MemoryLimit")) { uint64_t bytes; - if (isempty(eq) || streq(eq, "infinity")) - bytes = (uint64_t) -1; + if (isempty(eq) || streq(eq, "max") || streq(eq, "infinity")) + bytes = CGROUP_LIMIT_MAX; else { r = parse_size(eq, 1024, &bytes); if (r < 0) { diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c index 0500593d06..b2ee00fab3 100644 --- a/src/systemctl/systemctl.c +++ b/src/systemctl/systemctl.c @@ -3493,6 +3493,9 @@ typedef struct UnitStatusInfo { /* CGroup */ uint64_t memory_current; + uint64_t memory_low; + uint64_t memory_high; + uint64_t memory_max; uint64_t memory_limit; uint64_t cpu_usage_nsec; uint64_t tasks_current; @@ -3775,10 +3778,30 @@ static void print_status_info( printf(" Memory: %s", format_bytes(buf, sizeof(buf), i->memory_current)); - if (i->memory_limit != (uint64_t) -1) - printf(" (limit: %s)\n", format_bytes(buf, sizeof(buf), i->memory_limit)); - else - printf("\n"); + if (i->memory_low > 0 || i->memory_high != CGROUP_LIMIT_MAX || i->memory_max != CGROUP_LIMIT_MAX || + i->memory_limit != CGROUP_LIMIT_MAX) { + const char *prefix = ""; + + printf(" ("); + if (i->memory_low > 0) { + printf("%slow: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_low)); + prefix = " "; + } + if (i->memory_high != CGROUP_LIMIT_MAX) { + printf("%shigh: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_high)); + prefix = " "; + } + if (i->memory_max != CGROUP_LIMIT_MAX) { + printf("%smax: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_max)); + prefix = " "; + } + if (i->memory_limit != CGROUP_LIMIT_MAX) { + printf("%slimit: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_limit)); + prefix = " "; + } + printf(")"); + } + printf("\n"); } if (i->cpu_usage_nsec != (uint64_t) -1) { @@ -4007,6 +4030,12 @@ static int status_property(const char *name, sd_bus_message *m, UnitStatusInfo * i->assert_timestamp = (usec_t) u; else if (streq(name, "MemoryCurrent")) i->memory_current = u; + else if (streq(name, "MemoryLow")) + i->memory_low = u; + else if (streq(name, "MemoryHigh")) + i->memory_high = u; + else if (streq(name, "MemoryMax")) + i->memory_max = u; else if (streq(name, "MemoryLimit")) i->memory_limit = u; else if (streq(name, "TasksCurrent")) @@ -4500,6 +4529,8 @@ static int show_one( _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; UnitStatusInfo info = { .memory_current = (uint64_t) -1, + .memory_high = CGROUP_LIMIT_MAX, + .memory_max = CGROUP_LIMIT_MAX, .memory_limit = (uint64_t) -1, .cpu_usage_nsec = (uint64_t) -1, .tasks_current = (uint64_t) -1, |