diff options
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/cgroup.c | 185 | ||||
-rw-r--r-- | src/core/cgroup.h | 1 | ||||
-rw-r--r-- | src/core/dbus-execute.c | 5 | ||||
-rw-r--r-- | src/core/execute.c | 53 | ||||
-rw-r--r-- | src/core/execute.h | 1 | ||||
-rw-r--r-- | src/core/load-fragment-gperf.gperf.m4 | 2 | ||||
-rw-r--r-- | src/core/load-fragment.c | 99 |
7 files changed, 237 insertions, 109 deletions
diff --git a/src/core/cgroup.c b/src/core/cgroup.c index fbe69df4e9..f3e0c54b76 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -36,6 +36,22 @@ #define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC) +static void cgroup_compat_warn(void) +{ + static bool cgroup_compat_warned = false; + + if (cgroup_compat_warned) + return; + + log_warning("cgroup compatibility translation between legacy and unified hierarchy settings activated. See cgroup-compat debug messages for details."); + cgroup_compat_warned = true; +} + +#define log_cgroup_compat(unit, fmt, ...) do { \ + cgroup_compat_warn(); \ + log_unit_debug(unit, "cgroup-compat: " fmt, ##__VA_ARGS__); \ + } while (0) + void cgroup_context_init(CGroupContext *c) { assert(c); @@ -413,7 +429,7 @@ static uint64_t cgroup_weight_io_to_blkio(uint64_t io_weight) { CGROUP_BLKIO_WEIGHT_MIN, CGROUP_BLKIO_WEIGHT_MAX); } -static void cgroup_apply_io_device_weight(const char *path, const char *dev_path, uint64_t io_weight) { +static void cgroup_apply_io_device_weight(Unit *u, const char *dev_path, uint64_t io_weight) { char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1]; dev_t dev; int r; @@ -423,13 +439,13 @@ static void cgroup_apply_io_device_weight(const char *path, const char *dev_path return; xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), io_weight); - r = cg_set_attribute("io", path, "io.weight", buf); + r = cg_set_attribute("io", u->cgroup_path, "io.weight", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set io.weight on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set io.weight: %m"); } -static void cgroup_apply_blkio_device_weight(const char *path, const char *dev_path, uint64_t blkio_weight) { +static void cgroup_apply_blkio_device_weight(Unit *u, const char *dev_path, uint64_t blkio_weight) { char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1]; dev_t dev; int r; @@ -439,13 +455,13 @@ static void cgroup_apply_blkio_device_weight(const char *path, const char *dev_p return; xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), blkio_weight); - r = cg_set_attribute("blkio", path, "blkio.weight_device", buf); + r = cg_set_attribute("blkio", u->cgroup_path, "blkio.weight_device", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set blkio.weight_device on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set blkio.weight_device: %m"); } -static unsigned cgroup_apply_io_device_limit(const char *path, const char *dev_path, uint64_t *limits) { +static unsigned cgroup_apply_io_device_limit(Unit *u, const char *dev_path, uint64_t *limits) { char limit_bufs[_CGROUP_IO_LIMIT_TYPE_MAX][DECIMAL_STR_MAX(uint64_t)]; char buf[DECIMAL_STR_MAX(dev_t)*2+2+(6+DECIMAL_STR_MAX(uint64_t)+1)*4]; CGroupIOLimitType type; @@ -469,14 +485,14 @@ static unsigned cgroup_apply_io_device_limit(const char *path, const char *dev_p xsprintf(buf, "%u:%u rbps=%s wbps=%s riops=%s wiops=%s\n", major(dev), minor(dev), limit_bufs[CGROUP_IO_RBPS_MAX], limit_bufs[CGROUP_IO_WBPS_MAX], limit_bufs[CGROUP_IO_RIOPS_MAX], limit_bufs[CGROUP_IO_WIOPS_MAX]); - r = cg_set_attribute("io", path, "io.max", buf); + r = cg_set_attribute("io", u->cgroup_path, "io.max", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set io.max on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set io.max: %m"); return n; } -static unsigned cgroup_apply_blkio_device_limit(const char *path, const char *dev_path, uint64_t rbps, uint64_t wbps) { +static unsigned cgroup_apply_blkio_device_limit(Unit *u, const char *dev_path, uint64_t rbps, uint64_t wbps) { char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1]; dev_t dev; unsigned n = 0; @@ -489,18 +505,18 @@ static unsigned cgroup_apply_blkio_device_limit(const char *path, const char *de if (rbps != CGROUP_LIMIT_MAX) n++; sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), rbps); - r = cg_set_attribute("blkio", path, "blkio.throttle.read_bps_device", buf); + r = cg_set_attribute("blkio", u->cgroup_path, "blkio.throttle.read_bps_device", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set blkio.throttle.read_bps_device on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set blkio.throttle.read_bps_device: %m"); if (wbps != CGROUP_LIMIT_MAX) n++; sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), wbps); - r = cg_set_attribute("blkio", path, "blkio.throttle.write_bps_device", buf); + r = cg_set_attribute("blkio", u->cgroup_path, "blkio.throttle.write_bps_device", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set blkio.throttle.write_bps_device on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set blkio.throttle.write_bps_device: %m"); return n; } @@ -509,23 +525,30 @@ static bool cgroup_context_has_unified_memory_config(CGroupContext *c) { return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX; } -static void cgroup_apply_unified_memory_limit(const char *path, const char *file, uint64_t v) { +static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_t v) { char buf[DECIMAL_STR_MAX(uint64_t) + 1] = "max"; int r; if (v != CGROUP_LIMIT_MAX) xsprintf(buf, "%" PRIu64 "\n", v); - r = cg_set_attribute("memory", path, file, buf); + r = cg_set_attribute("memory", u->cgroup_path, file, buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set %s on %s: %m", file, path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set %s: %m", file); } -void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state) { +static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { + const char *path; + CGroupContext *c; bool is_root; int r; + assert(u); + + c = unit_get_cgroup_context(u); + path = u->cgroup_path; + assert(c); assert(path); @@ -551,14 +574,14 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M c->cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->cpu_shares : CGROUP_CPU_SHARES_DEFAULT); r = cg_set_attribute("cpu", path, "cpu.shares", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.shares on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.shares: %m"); sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC); r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.cfs_period_us on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.cfs_period_us: %m"); if (c->cpu_quota_per_sec_usec != USEC_INFINITY) { sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC); @@ -566,8 +589,8 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M } else r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1"); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.cfs_quota_us on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.cfs_quota_us: %m"); } if (mask & CGROUP_MASK_IO) { @@ -580,29 +603,40 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M if (has_io) weight = cgroup_context_io_weight(c, state); - else if (has_blockio) - weight = cgroup_weight_blkio_to_io(cgroup_context_blkio_weight(c, state)); - else + else if (has_blockio) { + uint64_t blkio_weight = cgroup_context_blkio_weight(c, state); + + weight = cgroup_weight_blkio_to_io(blkio_weight); + + log_cgroup_compat(u, "Applying [Startup]BlockIOWeight %" PRIu64 " as [Startup]IOWeight %" PRIu64, + blkio_weight, weight); + } else weight = CGROUP_WEIGHT_DEFAULT; xsprintf(buf, "default %" PRIu64 "\n", weight); r = cg_set_attribute("io", path, "io.weight", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set io.weight on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set io.weight: %m"); if (has_io) { CGroupIODeviceWeight *w; /* FIXME: no way to reset this list */ LIST_FOREACH(device_weights, w, c->io_device_weights) - cgroup_apply_io_device_weight(path, w->path, w->weight); + cgroup_apply_io_device_weight(u, w->path, w->weight); } else if (has_blockio) { CGroupBlockIODeviceWeight *w; /* FIXME: no way to reset this list */ - LIST_FOREACH(device_weights, w, c->blockio_device_weights) - cgroup_apply_io_device_weight(path, w->path, cgroup_weight_blkio_to_io(w->weight)); + LIST_FOREACH(device_weights, w, c->blockio_device_weights) { + weight = cgroup_weight_blkio_to_io(w->weight); + + log_cgroup_compat(u, "Applying BlockIODeviceWeight %" PRIu64 " as IODeviceWeight %" PRIu64 " for %s", + w->weight, weight, w->path); + + cgroup_apply_io_device_weight(u, w->path, weight); + } } } @@ -611,7 +645,7 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M CGroupIODeviceLimit *l, *next; LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { - if (!cgroup_apply_io_device_limit(path, l->path, l->limits)) + if (!cgroup_apply_io_device_limit(u, l->path, l->limits)) cgroup_context_free_io_device_limit(c, l); } } else if (has_blockio) { @@ -627,7 +661,10 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M limits[CGROUP_IO_RBPS_MAX] = b->rbps; limits[CGROUP_IO_WBPS_MAX] = b->wbps; - if (!cgroup_apply_io_device_limit(path, b->path, limits)) + log_cgroup_compat(u, "Applying BlockIO{Read|Write}Bandwidth %" PRIu64 " %" PRIu64 " as IO{Read|Write}BandwidthMax for %s", + b->rbps, b->wbps, b->path); + + if (!cgroup_apply_io_device_limit(u, b->path, limits)) cgroup_context_free_blockio_device_bandwidth(c, b); } } @@ -643,29 +680,40 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M if (has_blockio) weight = cgroup_context_blkio_weight(c, state); - else if (has_io) + else if (has_io) { + uint64_t io_weight = cgroup_context_io_weight(c, state); + weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state)); - else + + log_cgroup_compat(u, "Applying [Startup]IOWeight %" PRIu64 " as [Startup]BlockIOWeight %" PRIu64, + io_weight, weight); + } else weight = CGROUP_BLKIO_WEIGHT_DEFAULT; xsprintf(buf, "%" PRIu64 "\n", weight); r = cg_set_attribute("blkio", path, "blkio.weight", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set blkio.weight on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set blkio.weight: %m"); if (has_blockio) { CGroupBlockIODeviceWeight *w; /* FIXME: no way to reset this list */ LIST_FOREACH(device_weights, w, c->blockio_device_weights) - cgroup_apply_blkio_device_weight(path, w->path, w->weight); + cgroup_apply_blkio_device_weight(u, w->path, w->weight); } else if (has_io) { CGroupIODeviceWeight *w; /* FIXME: no way to reset this list */ - LIST_FOREACH(device_weights, w, c->io_device_weights) - cgroup_apply_blkio_device_weight(path, w->path, cgroup_weight_io_to_blkio(w->weight)); + LIST_FOREACH(device_weights, w, c->io_device_weights) { + weight = cgroup_weight_io_to_blkio(w->weight); + + log_cgroup_compat(u, "Applying IODeviceWeight %" PRIu64 " as BlockIODeviceWeight %" PRIu64 " for %s", + w->weight, weight, w->path); + + cgroup_apply_blkio_device_weight(u, w->path, weight); + } } } @@ -674,14 +722,17 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M CGroupBlockIODeviceBandwidth *b, *next; LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) { - if (!cgroup_apply_blkio_device_limit(path, b->path, b->rbps, b->wbps)) + if (!cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps)) cgroup_context_free_blockio_device_bandwidth(c, b); } } else if (has_io) { CGroupIODeviceLimit *l, *next; LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { - if (!cgroup_apply_blkio_device_limit(path, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX])) + log_cgroup_compat(u, "Applying IO{Read|Write}Bandwidth %" PRIu64 " %" PRIu64 " as BlockIO{Read|Write}BandwidthMax for %s", + l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX], l->path); + + if (!cgroup_apply_blkio_device_limit(u, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX])) cgroup_context_free_io_device_limit(c, l); } } @@ -693,24 +744,32 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M if (cgroup_context_has_unified_memory_config(c)) max = c->memory_max; - else + else { max = c->memory_limit; - cgroup_apply_unified_memory_limit(path, "memory.low", c->memory_low); - cgroup_apply_unified_memory_limit(path, "memory.high", c->memory_high); - cgroup_apply_unified_memory_limit(path, "memory.max", max); + if (max != CGROUP_LIMIT_MAX) + log_cgroup_compat(u, "Applying MemoryLimit %" PRIu64 " as MemoryMax", max); + } + + cgroup_apply_unified_memory_limit(u, "memory.low", c->memory_low); + cgroup_apply_unified_memory_limit(u, "memory.high", c->memory_high); + cgroup_apply_unified_memory_limit(u, "memory.max", max); } else { char buf[DECIMAL_STR_MAX(uint64_t) + 1]; if (c->memory_limit != CGROUP_LIMIT_MAX) xsprintf(buf, "%" PRIu64 "\n", c->memory_limit); - else + else { xsprintf(buf, "%" PRIu64 "\n", c->memory_max); + if (c->memory_max != CGROUP_LIMIT_MAX) + log_cgroup_compat(u, "Applying MemoryMax %" PRIu64 " as MemoryLimit", c->memory_max); + } + r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set memory.limit_in_bytes on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set memory.limit_in_bytes: %m"); } } @@ -726,8 +785,8 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M else r = cg_set_attribute("devices", path, "devices.allow", "a"); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to reset devices.list on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to reset devices.list: %m"); if (c->device_policy == CGROUP_CLOSED || (c->device_policy == CGROUP_AUTO && c->device_allow)) { @@ -773,7 +832,7 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M else if (startswith(a->path, "char-")) whitelist_major(path, a->path + 5, 'c', acc); else - log_debug("Ignoring device %s while writing cgroup attribute.", a->path); + log_unit_debug(u, "Ignoring device %s while writing cgroup attribute.", a->path); } } @@ -788,8 +847,8 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M r = cg_set_attribute("pids", path, "pids.max", "max"); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set pids.max on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set pids.max: %m"); } } @@ -1224,7 +1283,7 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) { return r; /* Finally, apply the necessary attributes. */ - cgroup_context_apply(unit_get_cgroup_context(u), target_mask, u->cgroup_path, state); + cgroup_context_apply(u, target_mask, state); return 0; } @@ -1355,7 +1414,7 @@ void unit_prune_cgroup(Unit *u) { r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice); if (r < 0) { - log_debug_errno(r, "Failed to destroy cgroup %s, ignoring: %m", u->cgroup_path); + log_unit_debug_errno(u, r, "Failed to destroy cgroup %s, ignoring: %m", u->cgroup_path); return; } diff --git a/src/core/cgroup.h b/src/core/cgroup.h index ff87adfba1..f21409bd5d 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -125,7 +125,6 @@ struct CGroupContext { void cgroup_context_init(CGroupContext *c); void cgroup_context_done(CGroupContext *c); void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix); -void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state); CGroupMask cgroup_context_get_mask(CGroupContext *c); diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index de29d5da04..4c88c41127 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -719,6 +719,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("RestrictAddressFamilies", "(bas)", property_get_address_families, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RuntimeDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, runtime_directory_mode), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RuntimeDirectory", "as", NULL, offsetof(ExecContext, runtime_directory), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("MemoryDenyWriteExecute", "b", bus_property_get_bool, offsetof(ExecContext, memory_deny_write_execute), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_VTABLE_END }; @@ -1056,7 +1057,7 @@ int bus_exec_context_set_transient_property( } else if (STR_IN_SET(name, "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "PrivateTmp", "PrivateDevices", "PrivateNetwork", - "NoNewPrivileges", "SyslogLevelPrefix")) { + "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute")) { int b; r = sd_bus_message_read(message, "b", &b); @@ -1080,6 +1081,8 @@ int bus_exec_context_set_transient_property( c->no_new_privileges = b; else if (streq(name, "SyslogLevelPrefix")) c->syslog_level_prefix = b; + else if (streq(name, "MemoryDenyWriteExecute")) + c->memory_deny_write_execute = b; unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, yes_no(b)); } diff --git a/src/core/execute.c b/src/core/execute.c index 5eb3f13695..2cef70e668 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -25,6 +25,7 @@ #include <signal.h> #include <string.h> #include <sys/capability.h> +#include <sys/mman.h> #include <sys/personality.h> #include <sys/prctl.h> #include <sys/socket.h> @@ -1190,6 +1191,45 @@ finish: return r; } +static int apply_memory_deny_write_execute(const ExecContext *c) { + scmp_filter_ctx *seccomp; + int r; + + assert(c); + + seccomp = seccomp_init(SCMP_ACT_ALLOW); + if (!seccomp) + return -ENOMEM; + + r = seccomp_rule_add( + seccomp, + SCMP_ACT_KILL, + SCMP_SYS(mmap), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE)); + if (r < 0) + goto finish; + + r = seccomp_rule_add( + seccomp, + SCMP_ACT_KILL, + SCMP_SYS(mprotect), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC)); + if (r < 0) + goto finish; + + r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + if (r < 0) + goto finish; + + r = seccomp_load(seccomp); + +finish: + seccomp_release(seccomp); + return r; +} + #endif static void do_idle_pipe_dance(int idle_pipe[4]) { @@ -1912,6 +1952,13 @@ static int exec_child( } } + if (context->memory_deny_write_execute) { + r = apply_memory_deny_write_execute(context); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + return r; + } + } if (use_syscall_filter) { r = apply_seccomp(context); if (r < 0) { @@ -2371,7 +2418,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { "%sPrivateDevices: %s\n" "%sProtectHome: %s\n" "%sProtectSystem: %s\n" - "%sIgnoreSIGPIPE: %s\n", + "%sIgnoreSIGPIPE: %s\n" + "%sMemoryDenyWriteExecute: %s\n", prefix, c->umask, prefix, c->working_directory ? c->working_directory : "/", prefix, c->root_directory ? c->root_directory : "/", @@ -2381,7 +2429,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { prefix, yes_no(c->private_devices), prefix, protect_home_to_string(c->protect_home), prefix, protect_system_to_string(c->protect_system), - prefix, yes_no(c->ignore_sigpipe)); + prefix, yes_no(c->ignore_sigpipe), + prefix, yes_no(c->memory_deny_write_execute)); STRV_FOREACH(e, c->environment) fprintf(f, "%sEnvironment: %s\n", prefix, *e); diff --git a/src/core/execute.h b/src/core/execute.h index 41148bcea2..464869d226 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -197,6 +197,7 @@ struct ExecContext { bool ioprio_set:1; bool cpu_sched_set:1; bool no_new_privileges_set:1; + bool memory_deny_write_execute; }; #include "cgroup-util.h" diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 00bdc238ce..eb58586523 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -55,10 +55,12 @@ m4_ifdef(`HAVE_SECCOMP', `$1.SystemCallFilter, config_parse_syscall_filter, 0, offsetof($1, exec_context) $1.SystemCallArchitectures, config_parse_syscall_archs, 0, offsetof($1, exec_context.syscall_archs) $1.SystemCallErrorNumber, config_parse_syscall_errno, 0, offsetof($1, exec_context) +$1.MemoryDenyWriteExecute, config_parse_bool, 0, offsetof($1, exec_context.memory_deny_write_execute) $1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)', `$1.SystemCallFilter, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.SystemCallArchitectures, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.SystemCallErrorNumber, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 +$1.MemoryDenyWriteExecute, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0') $1.LimitCPU, config_parse_limit, RLIMIT_CPU, offsetof($1, exec_context.rlimit) $1.LimitFSIZE, config_parse_limit, RLIMIT_FSIZE, offsetof($1, exec_context.rlimit) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 09d3f65c77..b53301a147 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -2396,6 +2396,55 @@ int config_parse_documentation(const char *unit, } #ifdef HAVE_SECCOMP +static int syscall_filter_parse_one( + const char *unit, + const char *filename, + unsigned line, + ExecContext *c, + bool invert, + const char *t, + bool warn) { + int r; + + if (*t == '@') { + const SystemCallFilterSet *set; + + for (set = syscall_filter_sets; set->set_name; set++) + if (streq(set->set_name, t)) { + const char *sys; + + NULSTR_FOREACH(sys, set->value) { + r = syscall_filter_parse_one(unit, filename, line, c, invert, sys, false); + if (r < 0) + return r; + } + break; + } + } else { + int id; + + id = seccomp_syscall_resolve_name(t); + if (id < 0) { + if (warn) + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t); + return 0; + } + + /* If we previously wanted to forbid a syscall and now + * we want to allow it, then remove it from the list + */ + if (!invert == c->syscall_whitelist) { + r = set_put(c->syscall_filter, INT_TO_PTR(id + 1)); + if (r == 0) + return 0; + if (r < 0) + return log_oom(); + } else + set_remove(c->syscall_filter, INT_TO_PTR(id + 1)); + } + return 0; +} + int config_parse_syscall_filter( const char *unit, const char *filename, @@ -2408,13 +2457,6 @@ int config_parse_syscall_filter( void *data, void *userdata) { - static const char default_syscalls[] = - "execve\0" - "exit\0" - "exit_group\0" - "rt_sigreturn\0" - "sigreturn\0"; - ExecContext *c = data; Unit *u = userdata; bool invert = false; @@ -2448,53 +2490,26 @@ int config_parse_syscall_filter( /* Allow everything but the ones listed */ c->syscall_whitelist = false; else { - const char *i; - /* Allow nothing but the ones listed */ c->syscall_whitelist = true; /* Accept default syscalls if we are on a whitelist */ - NULSTR_FOREACH(i, default_syscalls) { - int id; - - id = seccomp_syscall_resolve_name(i); - if (id < 0) - continue; - - r = set_put(c->syscall_filter, INT_TO_PTR(id + 1)); - if (r == 0) - continue; - if (r < 0) - return log_oom(); - } + r = syscall_filter_parse_one(unit, filename, line, c, false, "@default", false); + if (r < 0) + return r; } } FOREACH_WORD_QUOTED(word, l, rvalue, state) { _cleanup_free_ char *t = NULL; - int id; t = strndup(word, l); if (!t) return log_oom(); - id = seccomp_syscall_resolve_name(t); - if (id < 0) { - log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t); - continue; - } - - /* If we previously wanted to forbid a syscall and now - * we want to allow it, then remove it from the list - */ - if (!invert == c->syscall_whitelist) { - r = set_put(c->syscall_filter, INT_TO_PTR(id + 1)); - if (r == 0) - continue; - if (r < 0) - return log_oom(); - } else - set_remove(c->syscall_filter, INT_TO_PTR(id + 1)); + r = syscall_filter_parse_one(unit, filename, line, c, invert, t, true); + if (r < 0) + return r; } if (!isempty(state)) log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring."); @@ -2796,7 +2811,7 @@ int config_parse_memory_limit( uint64_t bytes = CGROUP_LIMIT_MAX; int r; - if (!isempty(rvalue) && !streq(rvalue, "infinity") && !streq(rvalue, "max")) { + if (!isempty(rvalue) && !streq(rvalue, "infinity")) { r = parse_size(rvalue, 1024, &bytes); if (r < 0 || bytes < 1) { log_syntax(unit, LOG_ERR, filename, line, r, "Memory limit '%s' invalid. Ignoring.", rvalue); @@ -3065,7 +3080,7 @@ int config_parse_io_limit( return 0; } - if (streq("max", limit)) { + if (streq("infinity", limit)) { num = CGROUP_LIMIT_MAX; } else { r = parse_size(limit, 1000, &num); |