summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTejun Heo <htejun@fb.com>2016-05-27 09:10:18 -0700
committerLennart Poettering <lennart@poettering.net>2016-05-27 18:10:18 +0200
commitda4d897e75e574911cb73ac91fdeef7d4fce8fbe (patch)
treecfa92fa7f77cdccd2f1807f205f795ae40a0a39d
parentffe54bf4bfb5a210861a3cde47ff07d9ffdebca7 (diff)
core: add cgroup memory controller support on the unified hierarchy (#3315)
On the unified hierarchy, memory controller implements three control knobs - low, high and max which enables more useable and versatile control over memory usage. This patch implements support for the three control knobs. * MemoryLow, MemoryHigh and MemoryMax are added for memory.low, memory.high and memory.max, respectively. * As all absolute limits on the unified hierarchy use "max" for no limit, make memory limit parse functions accept "max" in addition to "infinity" and document "max" for the new knobs. * Implement compatibility translation between MemoryMax and MemoryLimit. v2: - Fixed missing else's in config_parse_memory_limit(). - Fixed missing newline when writing out drop-ins. - Coding style updates to use "val > 0" instead of "val". - Minor updates to documentation.
-rw-r--r--man/systemd.resource-control.xml71
-rw-r--r--src/core/cgroup.c63
-rw-r--r--src/core/cgroup.h4
-rw-r--r--src/core/dbus-cgroup.c28
-rw-r--r--src/core/load-fragment-gperf.gperf.m43
-rw-r--r--src/core/load-fragment.c25
-rw-r--r--src/shared/bus-unit-util.c6
-rw-r--r--src/systemctl/systemctl.c39
8 files changed, 206 insertions, 33 deletions
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml
index 066f2cc19b..570619a743 100644
--- a/man/systemd.resource-control.xml
+++ b/man/systemd.resource-control.xml
@@ -114,6 +114,13 @@
prefixed ones. On unified hierarchy, IO resource control also applies to buffered writes.</para>
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><option>Memory</option></term>
+ <listitem>
+ <para><varname>MemoryMax</varname> replaces <varname>MemoryLimit</varname>. <varname>MemoryLow</varname>
+ and <varname>MemoryHigh</varname> are effective only on unified hierarchy.</para>
+ </listitem>
+ </varlistentry>
</variablelist>
</para>
@@ -213,6 +220,67 @@
</varlistentry>
<varlistentry>
+ <term><varname>MemoryLow=<replaceable>bytes</replaceable></varname></term>
+
+ <listitem>
+ <para>Specify the best-effort memory usage protection of the executed processes in this unit. If the memory
+ usages of this unit and all its ancestors are below their low boundaries, this unit's memory won't be
+ reclaimed as long as memory can be reclaimed from unprotected units.</para>
+
+ <para>Takes a memory size in bytes. If the value is suffixed with K, M, G or T, the specified memory size is
+ parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes (with the base 1024), respectively. This controls the
+ <literal>memory.low</literal> control group attribute. For details about this control group attribute, see
+ <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>.</para>
+
+ <para>Implies <literal>MemoryAccounting=true</literal>.</para>
+
+ <para>This setting is supported only if the unified control group hierarchy is used.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>MemoryHigh=<replaceable>bytes</replaceable></varname></term>
+
+ <listitem>
+ <para>Specify the high limit on memory usage of the executed processes in this unit. Memory usage may go
+ above the limit if unavoidable, but the processes are heavily slowed down and memory is taken away
+ aggressively in such cases. This is the main mechanism to control memory usage of a unit.</para>
+
+ <para>Takes a memory size in bytes. If the value is suffixed with K, M, G or T, the specified memory size is
+ parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes (with the base 1024), respectively. If assigned the
+ special value <literal>max</literal>, no memory limit is applied. This controls the
+ <literal>memory.high</literal> control group attribute. For details about this control group attribute, see
+ <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>.</para>
+
+ <para>Implies <literal>MemoryAccounting=true</literal>.</para>
+
+ <para>This setting is supported only if the unified control group hierarchy is used.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>MemoryMax=<replaceable>bytes</replaceable></varname></term>
+
+ <listitem>
+ <para>Specify the absolute limit on memory usage of the executed processes in this unit. If memory usage
+ cannot be contained under the limit, out-of-memory killer is invoked inside the unit. It is recommended to
+ use <varname>MemoryHigh=</varname> as the main control mechanism and use <varname>MemoryMax=</varname> as the
+ last line of defense.</para>
+
+ <para>Takes a memory size in bytes. If the value is suffixed with K, M, G or T, the specified memory size is
+ parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes (with the base 1024), respectively. If assigned the
+ special value <literal>max</literal>, no memory limit is applied. This controls the
+ <literal>memory.max</literal> control group attribute. For details about this control group attribute, see
+ <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>.</para>
+
+ <para>Implies <literal>MemoryAccounting=true</literal>.</para>
+
+ <para>This setting is supported only if the unified control group hierarchy is used. Use
+ <varname>MemoryLimit=</varname> on systems using the legacy control group hierarchy.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>MemoryLimit=<replaceable>bytes</replaceable></varname></term>
<listitem>
@@ -230,6 +298,9 @@
url="https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt">memory.txt</ulink>.</para>
<para>Implies <literal>MemoryAccounting=true</literal>.</para>
+
+ <para>This setting is supported only if the legacy control group hierarchy is used. Use
+ <varname>MemoryMax=</varname> on systems using the unified control group hierarchy.</para>
</listitem>
</varlistentry>
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 0fb63b1bd1..fbe69df4e9 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -46,7 +46,10 @@ void cgroup_context_init(CGroupContext *c) {
c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID;
c->cpu_quota_per_sec_usec = USEC_INFINITY;
- c->memory_limit = (uint64_t) -1;
+ c->memory_high = CGROUP_LIMIT_MAX;
+ c->memory_max = CGROUP_LIMIT_MAX;
+
+ c->memory_limit = CGROUP_LIMIT_MAX;
c->io_weight = CGROUP_WEIGHT_INVALID;
c->startup_io_weight = CGROUP_WEIGHT_INVALID;
@@ -147,6 +150,9 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
"%sStartupIOWeight=%" PRIu64 "\n"
"%sBlockIOWeight=%" PRIu64 "\n"
"%sStartupBlockIOWeight=%" PRIu64 "\n"
+ "%sMemoryLow=%" PRIu64 "\n"
+ "%sMemoryHigh=%" PRIu64 "\n"
+ "%sMemoryMax=%" PRIu64 "\n"
"%sMemoryLimit=%" PRIu64 "\n"
"%sTasksMax=%" PRIu64 "\n"
"%sDevicePolicy=%s\n"
@@ -163,6 +169,9 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
prefix, c->startup_io_weight,
prefix, c->blockio_weight,
prefix, c->startup_blockio_weight,
+ prefix, c->memory_low,
+ prefix, c->memory_high,
+ prefix, c->memory_max,
prefix, c->memory_limit,
prefix, c->tasks_max,
prefix, cgroup_device_policy_to_string(c->device_policy),
@@ -496,6 +505,23 @@ static unsigned cgroup_apply_blkio_device_limit(const char *path, const char *de
return n;
}
+static bool cgroup_context_has_unified_memory_config(CGroupContext *c) {
+ return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX;
+}
+
+static void cgroup_apply_unified_memory_limit(const char *path, const char *file, uint64_t v) {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 1] = "max";
+ int r;
+
+ if (v != CGROUP_LIMIT_MAX)
+ xsprintf(buf, "%" PRIu64 "\n", v);
+
+ r = cg_set_attribute("memory", path, file, buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set %s on %s: %m", file, path);
+}
+
void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state) {
bool is_root;
int r;
@@ -662,26 +688,30 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
}
if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
- if (c->memory_limit != (uint64_t) -1) {
- char buf[DECIMAL_STR_MAX(uint64_t) + 1];
-
- sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
+ if (cg_unified() > 0) {
+ uint64_t max = c->memory_max;
- if (cg_unified() <= 0)
- r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
+ if (cgroup_context_has_unified_memory_config(c))
+ max = c->memory_max;
else
- r = cg_set_attribute("memory", path, "memory.max", buf);
+ max = c->memory_limit;
+ cgroup_apply_unified_memory_limit(path, "memory.low", c->memory_low);
+ cgroup_apply_unified_memory_limit(path, "memory.high", c->memory_high);
+ cgroup_apply_unified_memory_limit(path, "memory.max", max);
} else {
- if (cg_unified() <= 0)
- r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1");
+ char buf[DECIMAL_STR_MAX(uint64_t) + 1];
+
+ if (c->memory_limit != CGROUP_LIMIT_MAX)
+ xsprintf(buf, "%" PRIu64 "\n", c->memory_limit);
else
- r = cg_set_attribute("memory", path, "memory.max", "max");
- }
+ xsprintf(buf, "%" PRIu64 "\n", c->memory_max);
- if (r < 0)
- log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set memory.limit_in_bytes/memory.max on %s: %m", path);
+ r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set memory.limit_in_bytes on %s: %m", path);
+ }
}
if ((mask & CGROUP_MASK_DEVICES) && !is_root) {
@@ -778,7 +808,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) {
mask |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO;
if (c->memory_accounting ||
- c->memory_limit != (uint64_t) -1)
+ c->memory_limit != CGROUP_LIMIT_MAX ||
+ cgroup_context_has_unified_memory_config(c))
mask |= CGROUP_MASK_MEMORY;
if (c->device_allow ||
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index 2b1edbafc4..ff87adfba1 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -94,6 +94,10 @@ struct CGroupContext {
LIST_HEAD(CGroupIODeviceWeight, io_device_weights);
LIST_HEAD(CGroupIODeviceLimit, io_device_limits);
+ uint64_t memory_low;
+ uint64_t memory_high;
+ uint64_t memory_max;
+
/* For legacy hierarchies */
uint64_t cpu_shares;
uint64_t startup_cpu_shares;
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index d6053581f8..27050b4507 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -228,6 +228,9 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("BlockIOReadBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0),
SD_BUS_PROPERTY("BlockIOWriteBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0),
SD_BUS_PROPERTY("MemoryAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, memory_accounting), 0),
+ SD_BUS_PROPERTY("MemoryLow", "t", NULL, offsetof(CGroupContext, memory_low), 0),
+ SD_BUS_PROPERTY("MemoryHigh", "t", NULL, offsetof(CGroupContext, memory_high), 0),
+ SD_BUS_PROPERTY("MemoryMax", "t", NULL, offsetof(CGroupContext, memory_max), 0),
SD_BUS_PROPERTY("MemoryLimit", "t", NULL, offsetof(CGroupContext, memory_limit), 0),
SD_BUS_PROPERTY("DevicePolicy", "s", property_get_cgroup_device_policy, offsetof(CGroupContext, device_policy), 0),
SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0),
@@ -826,6 +829,31 @@ int bus_cgroup_set_property(
return 1;
+ } else if (STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax")) {
+ uint64_t v;
+
+ r = sd_bus_message_read(message, "t", &v);
+ if (r < 0)
+ return r;
+
+ if (mode != UNIT_CHECK) {
+ if (streq(name, "MemoryLow"))
+ c->memory_low = v;
+ else if (streq(name, "MemoryHigh"))
+ c->memory_high = v;
+ else
+ c->memory_max = v;
+
+ unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY);
+
+ if (v == CGROUP_LIMIT_MAX)
+ unit_write_drop_in_private_format(u, mode, name, "%s=max\n", name);
+ else
+ unit_write_drop_in_private_format(u, mode, name, "%s=%" PRIu64 "\n", name, v);
+ }
+
+ return 1;
+
} else if (streq(name, "MemoryLimit")) {
uint64_t limit;
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 8193418980..00bdc238ce 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -117,6 +117,9 @@ $1.CPUShares, config_parse_cpu_shares, 0,
$1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares)
$1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context)
$1.MemoryAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.memory_accounting)
+$1.MemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
+$1.MemoryHigh, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
+$1.MemoryMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.MemoryLimit, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.DeviceAllow, config_parse_device_allow, 0, offsetof($1, cgroup_context)
$1.DevicePolicy, config_parse_device_policy, 0, offsetof($1, cgroup_context.device_policy)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 86b4fb071b..09d3f65c77 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -2793,21 +2793,26 @@ int config_parse_memory_limit(
void *userdata) {
CGroupContext *c = data;
- uint64_t bytes;
+ uint64_t bytes = CGROUP_LIMIT_MAX;
int r;
- if (isempty(rvalue) || streq(rvalue, "infinity")) {
- c->memory_limit = (uint64_t) -1;
- return 0;
+ if (!isempty(rvalue) && !streq(rvalue, "infinity") && !streq(rvalue, "max")) {
+ r = parse_size(rvalue, 1024, &bytes);
+ if (r < 0 || bytes < 1) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Memory limit '%s' invalid. Ignoring.", rvalue);
+ return 0;
+ }
}
- r = parse_size(rvalue, 1024, &bytes);
- if (r < 0 || bytes < 1) {
- log_syntax(unit, LOG_ERR, filename, line, r, "Memory limit '%s' invalid. Ignoring.", rvalue);
- return 0;
- }
+ if (streq(lvalue, "MemoryLow"))
+ c->memory_low = bytes;
+ else if (streq(lvalue, "MemoryHigh"))
+ c->memory_high = bytes;
+ else if (streq(lvalue, "MemoryMax"))
+ c->memory_max = bytes;
+ else
+ c->memory_limit = bytes;
- c->memory_limit = bytes;
return 0;
}
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index f68c4a41ac..502e98d9dc 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -166,11 +166,11 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
r = sd_bus_message_append(m, "v", "b", r);
- } else if (streq(field, "MemoryLimit")) {
+ } else if (STR_IN_SET(field, "MemoryLow", "MemoryHigh", "MemoryMax", "MemoryLimit")) {
uint64_t bytes;
- if (isempty(eq) || streq(eq, "infinity"))
- bytes = (uint64_t) -1;
+ if (isempty(eq) || streq(eq, "max") || streq(eq, "infinity"))
+ bytes = CGROUP_LIMIT_MAX;
else {
r = parse_size(eq, 1024, &bytes);
if (r < 0) {
diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c
index 0500593d06..b2ee00fab3 100644
--- a/src/systemctl/systemctl.c
+++ b/src/systemctl/systemctl.c
@@ -3493,6 +3493,9 @@ typedef struct UnitStatusInfo {
/* CGroup */
uint64_t memory_current;
+ uint64_t memory_low;
+ uint64_t memory_high;
+ uint64_t memory_max;
uint64_t memory_limit;
uint64_t cpu_usage_nsec;
uint64_t tasks_current;
@@ -3775,10 +3778,30 @@ static void print_status_info(
printf(" Memory: %s", format_bytes(buf, sizeof(buf), i->memory_current));
- if (i->memory_limit != (uint64_t) -1)
- printf(" (limit: %s)\n", format_bytes(buf, sizeof(buf), i->memory_limit));
- else
- printf("\n");
+ if (i->memory_low > 0 || i->memory_high != CGROUP_LIMIT_MAX || i->memory_max != CGROUP_LIMIT_MAX ||
+ i->memory_limit != CGROUP_LIMIT_MAX) {
+ const char *prefix = "";
+
+ printf(" (");
+ if (i->memory_low > 0) {
+ printf("%slow: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_low));
+ prefix = " ";
+ }
+ if (i->memory_high != CGROUP_LIMIT_MAX) {
+ printf("%shigh: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_high));
+ prefix = " ";
+ }
+ if (i->memory_max != CGROUP_LIMIT_MAX) {
+ printf("%smax: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_max));
+ prefix = " ";
+ }
+ if (i->memory_limit != CGROUP_LIMIT_MAX) {
+ printf("%slimit: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_limit));
+ prefix = " ";
+ }
+ printf(")");
+ }
+ printf("\n");
}
if (i->cpu_usage_nsec != (uint64_t) -1) {
@@ -4007,6 +4030,12 @@ static int status_property(const char *name, sd_bus_message *m, UnitStatusInfo *
i->assert_timestamp = (usec_t) u;
else if (streq(name, "MemoryCurrent"))
i->memory_current = u;
+ else if (streq(name, "MemoryLow"))
+ i->memory_low = u;
+ else if (streq(name, "MemoryHigh"))
+ i->memory_high = u;
+ else if (streq(name, "MemoryMax"))
+ i->memory_max = u;
else if (streq(name, "MemoryLimit"))
i->memory_limit = u;
else if (streq(name, "TasksCurrent"))
@@ -4500,6 +4529,8 @@ static int show_one(
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
UnitStatusInfo info = {
.memory_current = (uint64_t) -1,
+ .memory_high = CGROUP_LIMIT_MAX,
+ .memory_max = CGROUP_LIMIT_MAX,
.memory_limit = (uint64_t) -1,
.cpu_usage_nsec = (uint64_t) -1,
.tasks_current = (uint64_t) -1,