diff options
author | Luke Shumaker <lukeshu@sbcglobal.net> | 2016-09-14 18:33:57 -0400 |
---|---|---|
committer | Luke Shumaker <lukeshu@sbcglobal.net> | 2016-09-14 18:33:57 -0400 |
commit | 3c72c8d3ee67388336aca58c5afa3fb93a9c24c0 (patch) | |
tree | d072df7fee0f5906fad88c08398b2fe887cbc064 /src/grp-system/libcore | |
parent | e51613a3291342c6006edda8783755fb8994fd75 (diff) | |
parent | 6ba6ca19507add38549e07058c57489a8cd98cd1 (diff) |
Merge branch 'notsystemd/postmove' into notsystemd/master
# Conflicts:
# src/grp-journal/systemd-journald/Makefile
# src/grp-login/systemd-logind/Makefile
# src/grp-machine/grp-import/systemd-export/Makefile
# src/grp-machine/grp-import/systemd-import/Makefile
# src/grp-machine/grp-import/systemd-pull/Makefile
# src/grp-machine/systemd-machined/Makefile
# src/grp-network/libnetworkd-core/Makefile
# src/grp-resolve/libbasic-dns/Makefile
# src/grp-resolve/systemd-resolved/Makefile
# src/grp-utils/systemd-path/Makefile
# src/libshared/src/Makefile
# src/libsystemd-network/include/systemd-network/sd-ndisc.h
# src/libsystemd/Makefile
# src/libsystemd/src/test.mk
# src/libudev/Makefile
# src/systemd-dbus1-generator/Makefile
# src/systemd-nspawn/nspawn.c
Signed-off-by: Luke Shumaker <lukeshu@sbcglobal.net>
Diffstat (limited to 'src/grp-system/libcore')
37 files changed, 1127 insertions, 619 deletions
diff --git a/src/grp-system/libcore/Makefile b/src/grp-system/libcore/Makefile index 1f19355412..b31d00fcdb 100644 --- a/src/grp-system/libcore/Makefile +++ b/src/grp-system/libcore/Makefile @@ -156,7 +156,7 @@ libcore_la_CFLAGS = \ $(SECCOMP_CFLAGS) libcore_la_LIBADD = \ - libshared.la \ + libsystemd-shared.la \ $(PAM_LIBS) \ $(AUDIT_LIBS) \ $(KMOD_LIBS) \ diff --git a/src/grp-system/libcore/automount.c b/src/grp-system/libcore/automount.c index 726a044030..30c30469ca 100644 --- a/src/grp-system/libcore/automount.c +++ b/src/grp-system/libcore/automount.c @@ -100,9 +100,6 @@ static void unmount_autofs(Automount *a) { if (a->pipe_fd < 0) return; - automount_send_ready(a, a->tokens, -EHOSTDOWN); - automount_send_ready(a, a->expire_tokens, -EHOSTDOWN); - a->pipe_event_source = sd_event_source_unref(a->pipe_event_source); a->pipe_fd = safe_close(a->pipe_fd); @@ -111,6 +108,9 @@ static void unmount_autofs(Automount *a) { if (a->where && (UNIT(a)->manager->exit_code != MANAGER_RELOAD && UNIT(a)->manager->exit_code != MANAGER_REEXECUTE)) { + automount_send_ready(a, a->tokens, -EHOSTDOWN); + automount_send_ready(a, a->expire_tokens, -EHOSTDOWN); + r = repeat_unmount(a->where, MNT_DETACH); if (r < 0) log_error_errno(r, "Failed to unmount: %m"); @@ -504,6 +504,20 @@ static void automount_trigger_notify(Unit *u, Unit *other) { automount_set_state(a, AUTOMOUNT_RUNNING); } + if (IN_SET(MOUNT(other)->state, + MOUNT_MOUNTING, MOUNT_MOUNTING_DONE, + MOUNT_MOUNTED, MOUNT_REMOUNTING, + MOUNT_MOUNTING_SIGTERM, MOUNT_MOUNTING_SIGKILL, + MOUNT_REMOUNTING_SIGTERM, MOUNT_REMOUNTING_SIGKILL, + MOUNT_UNMOUNTING_SIGTERM, MOUNT_UNMOUNTING_SIGKILL, + MOUNT_FAILED)) { + + (void) automount_send_ready(a, a->expire_tokens, -ENODEV); + } + + if (MOUNT(other)->state == MOUNT_DEAD) + (void) automount_send_ready(a, a->expire_tokens, 0); + /* The mount is in some unhappy state now, let's unfreeze any waiting clients */ if (IN_SET(MOUNT(other)->state, MOUNT_DEAD, MOUNT_UNMOUNTING, diff --git a/src/grp-system/libcore/busname.c b/src/grp-system/libcore/busname.c index 5d85c20730..f21a2d23c1 100644 --- a/src/grp-system/libcore/busname.c +++ b/src/grp-system/libcore/busname.c @@ -999,12 +999,7 @@ static int busname_get_timeout(Unit *u, usec_t *timeout) { } static bool busname_supported(void) { - static int supported = -1; - - if (supported < 0) - supported = is_kdbus_available(); - - return supported; + return false; } static int busname_control_pid(Unit *u) { diff --git a/src/grp-system/libcore/cgroup.c b/src/grp-system/libcore/cgroup.c index df850ec68f..0c1365f329 100644 --- a/src/grp-system/libcore/cgroup.c +++ b/src/grp-system/libcore/cgroup.c @@ -37,6 +37,21 @@ #define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC) +static void cgroup_compat_warn(void) { + static bool cgroup_compat_warned = false; + + if (cgroup_compat_warned) + return; + + log_warning("cgroup compatibility translation between legacy and unified hierarchy settings activated. See cgroup-compat debug messages for details."); + cgroup_compat_warned = true; +} + +#define log_cgroup_compat(unit, fmt, ...) do { \ + cgroup_compat_warn(); \ + log_unit_debug(unit, "cgroup-compat: " fmt, ##__VA_ARGS__); \ + } while (false) + void cgroup_context_init(CGroupContext *c) { assert(c); @@ -47,7 +62,10 @@ void cgroup_context_init(CGroupContext *c) { c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID; c->cpu_quota_per_sec_usec = USEC_INFINITY; - c->memory_limit = (uint64_t) -1; + c->memory_high = CGROUP_LIMIT_MAX; + c->memory_max = CGROUP_LIMIT_MAX; + + c->memory_limit = CGROUP_LIMIT_MAX; c->io_weight = CGROUP_WEIGHT_INVALID; c->startup_io_weight = CGROUP_WEIGHT_INVALID; @@ -148,6 +166,9 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { "%sStartupIOWeight=%" PRIu64 "\n" "%sBlockIOWeight=%" PRIu64 "\n" "%sStartupBlockIOWeight=%" PRIu64 "\n" + "%sMemoryLow=%" PRIu64 "\n" + "%sMemoryHigh=%" PRIu64 "\n" + "%sMemoryMax=%" PRIu64 "\n" "%sMemoryLimit=%" PRIu64 "\n" "%sTasksMax=%" PRIu64 "\n" "%sDevicePolicy=%s\n" @@ -164,6 +185,9 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { prefix, c->startup_io_weight, prefix, c->blockio_weight, prefix, c->startup_blockio_weight, + prefix, c->memory_low, + prefix, c->memory_high, + prefix, c->memory_max, prefix, c->memory_limit, prefix, c->tasks_max, prefix, cgroup_device_policy_to_string(c->device_policy), @@ -405,7 +429,7 @@ static uint64_t cgroup_weight_io_to_blkio(uint64_t io_weight) { CGROUP_BLKIO_WEIGHT_MIN, CGROUP_BLKIO_WEIGHT_MAX); } -static void cgroup_apply_io_device_weight(const char *path, const char *dev_path, uint64_t io_weight) { +static void cgroup_apply_io_device_weight(Unit *u, const char *dev_path, uint64_t io_weight) { char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1]; dev_t dev; int r; @@ -415,13 +439,13 @@ static void cgroup_apply_io_device_weight(const char *path, const char *dev_path return; xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), io_weight); - r = cg_set_attribute("io", path, "io.weight", buf); + r = cg_set_attribute("io", u->cgroup_path, "io.weight", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set io.weight on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set io.weight: %m"); } -static void cgroup_apply_blkio_device_weight(const char *path, const char *dev_path, uint64_t blkio_weight) { +static void cgroup_apply_blkio_device_weight(Unit *u, const char *dev_path, uint64_t blkio_weight) { char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1]; dev_t dev; int r; @@ -431,13 +455,13 @@ static void cgroup_apply_blkio_device_weight(const char *path, const char *dev_p return; xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), blkio_weight); - r = cg_set_attribute("blkio", path, "blkio.weight_device", buf); + r = cg_set_attribute("blkio", u->cgroup_path, "blkio.weight_device", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set blkio.weight_device on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set blkio.weight_device: %m"); } -static unsigned cgroup_apply_io_device_limit(const char *path, const char *dev_path, uint64_t *limits) { +static unsigned cgroup_apply_io_device_limit(Unit *u, const char *dev_path, uint64_t *limits) { char limit_bufs[_CGROUP_IO_LIMIT_TYPE_MAX][DECIMAL_STR_MAX(uint64_t)]; char buf[DECIMAL_STR_MAX(dev_t)*2+2+(6+DECIMAL_STR_MAX(uint64_t)+1)*4]; CGroupIOLimitType type; @@ -461,14 +485,14 @@ static unsigned cgroup_apply_io_device_limit(const char *path, const char *dev_p xsprintf(buf, "%u:%u rbps=%s wbps=%s riops=%s wiops=%s\n", major(dev), minor(dev), limit_bufs[CGROUP_IO_RBPS_MAX], limit_bufs[CGROUP_IO_WBPS_MAX], limit_bufs[CGROUP_IO_RIOPS_MAX], limit_bufs[CGROUP_IO_WIOPS_MAX]); - r = cg_set_attribute("io", path, "io.max", buf); + r = cg_set_attribute("io", u->cgroup_path, "io.max", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set io.max on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set io.max: %m"); return n; } -static unsigned cgroup_apply_blkio_device_limit(const char *path, const char *dev_path, uint64_t rbps, uint64_t wbps) { +static unsigned cgroup_apply_blkio_device_limit(Unit *u, const char *dev_path, uint64_t rbps, uint64_t wbps) { char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1]; dev_t dev; unsigned n = 0; @@ -481,26 +505,50 @@ static unsigned cgroup_apply_blkio_device_limit(const char *path, const char *de if (rbps != CGROUP_LIMIT_MAX) n++; sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), rbps); - r = cg_set_attribute("blkio", path, "blkio.throttle.read_bps_device", buf); + r = cg_set_attribute("blkio", u->cgroup_path, "blkio.throttle.read_bps_device", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set blkio.throttle.read_bps_device on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set blkio.throttle.read_bps_device: %m"); if (wbps != CGROUP_LIMIT_MAX) n++; sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), wbps); - r = cg_set_attribute("blkio", path, "blkio.throttle.write_bps_device", buf); + r = cg_set_attribute("blkio", u->cgroup_path, "blkio.throttle.write_bps_device", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set blkio.throttle.write_bps_device on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set blkio.throttle.write_bps_device: %m"); return n; } -void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state) { +static bool cgroup_context_has_unified_memory_config(CGroupContext *c) { + return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX; +} + +static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_t v) { + char buf[DECIMAL_STR_MAX(uint64_t) + 1] = "max"; + int r; + + if (v != CGROUP_LIMIT_MAX) + xsprintf(buf, "%" PRIu64 "\n", v); + + r = cg_set_attribute("memory", u->cgroup_path, file, buf); + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set %s: %m", file); +} + +static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { + const char *path; + CGroupContext *c; bool is_root; int r; + assert(u); + + c = unit_get_cgroup_context(u); + path = u->cgroup_path; + assert(c); assert(path); @@ -526,14 +574,14 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M c->cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->cpu_shares : CGROUP_CPU_SHARES_DEFAULT); r = cg_set_attribute("cpu", path, "cpu.shares", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.shares on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.shares: %m"); sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC); r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.cfs_period_us on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.cfs_period_us: %m"); if (c->cpu_quota_per_sec_usec != USEC_INFINITY) { sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC); @@ -541,8 +589,8 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M } else r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1"); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.cfs_quota_us on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.cfs_quota_us: %m"); } if (mask & CGROUP_MASK_IO) { @@ -555,29 +603,40 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M if (has_io) weight = cgroup_context_io_weight(c, state); - else if (has_blockio) - weight = cgroup_weight_blkio_to_io(cgroup_context_blkio_weight(c, state)); - else + else if (has_blockio) { + uint64_t blkio_weight = cgroup_context_blkio_weight(c, state); + + weight = cgroup_weight_blkio_to_io(blkio_weight); + + log_cgroup_compat(u, "Applying [Startup]BlockIOWeight %" PRIu64 " as [Startup]IOWeight %" PRIu64, + blkio_weight, weight); + } else weight = CGROUP_WEIGHT_DEFAULT; xsprintf(buf, "default %" PRIu64 "\n", weight); r = cg_set_attribute("io", path, "io.weight", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set io.weight on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set io.weight: %m"); if (has_io) { CGroupIODeviceWeight *w; /* FIXME: no way to reset this list */ LIST_FOREACH(device_weights, w, c->io_device_weights) - cgroup_apply_io_device_weight(path, w->path, w->weight); + cgroup_apply_io_device_weight(u, w->path, w->weight); } else if (has_blockio) { CGroupBlockIODeviceWeight *w; /* FIXME: no way to reset this list */ - LIST_FOREACH(device_weights, w, c->blockio_device_weights) - cgroup_apply_io_device_weight(path, w->path, cgroup_weight_blkio_to_io(w->weight)); + LIST_FOREACH(device_weights, w, c->blockio_device_weights) { + weight = cgroup_weight_blkio_to_io(w->weight); + + log_cgroup_compat(u, "Applying BlockIODeviceWeight %" PRIu64 " as IODeviceWeight %" PRIu64 " for %s", + w->weight, weight, w->path); + + cgroup_apply_io_device_weight(u, w->path, weight); + } } } @@ -586,7 +645,7 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M CGroupIODeviceLimit *l, *next; LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { - if (!cgroup_apply_io_device_limit(path, l->path, l->limits)) + if (!cgroup_apply_io_device_limit(u, l->path, l->limits)) cgroup_context_free_io_device_limit(c, l); } } else if (has_blockio) { @@ -602,7 +661,10 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M limits[CGROUP_IO_RBPS_MAX] = b->rbps; limits[CGROUP_IO_WBPS_MAX] = b->wbps; - if (!cgroup_apply_io_device_limit(path, b->path, limits)) + log_cgroup_compat(u, "Applying BlockIO{Read|Write}Bandwidth %" PRIu64 " %" PRIu64 " as IO{Read|Write}BandwidthMax for %s", + b->rbps, b->wbps, b->path); + + if (!cgroup_apply_io_device_limit(u, b->path, limits)) cgroup_context_free_blockio_device_bandwidth(c, b); } } @@ -618,29 +680,40 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M if (has_blockio) weight = cgroup_context_blkio_weight(c, state); - else if (has_io) + else if (has_io) { + uint64_t io_weight = cgroup_context_io_weight(c, state); + weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state)); - else + + log_cgroup_compat(u, "Applying [Startup]IOWeight %" PRIu64 " as [Startup]BlockIOWeight %" PRIu64, + io_weight, weight); + } else weight = CGROUP_BLKIO_WEIGHT_DEFAULT; xsprintf(buf, "%" PRIu64 "\n", weight); r = cg_set_attribute("blkio", path, "blkio.weight", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set blkio.weight on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set blkio.weight: %m"); if (has_blockio) { CGroupBlockIODeviceWeight *w; /* FIXME: no way to reset this list */ LIST_FOREACH(device_weights, w, c->blockio_device_weights) - cgroup_apply_blkio_device_weight(path, w->path, w->weight); + cgroup_apply_blkio_device_weight(u, w->path, w->weight); } else if (has_io) { CGroupIODeviceWeight *w; /* FIXME: no way to reset this list */ - LIST_FOREACH(device_weights, w, c->io_device_weights) - cgroup_apply_blkio_device_weight(path, w->path, cgroup_weight_io_to_blkio(w->weight)); + LIST_FOREACH(device_weights, w, c->io_device_weights) { + weight = cgroup_weight_io_to_blkio(w->weight); + + log_cgroup_compat(u, "Applying IODeviceWeight %" PRIu64 " as BlockIODeviceWeight %" PRIu64 " for %s", + w->weight, weight, w->path); + + cgroup_apply_blkio_device_weight(u, w->path, weight); + } } } @@ -649,40 +722,59 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M CGroupBlockIODeviceBandwidth *b, *next; LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) { - if (!cgroup_apply_blkio_device_limit(path, b->path, b->rbps, b->wbps)) + if (!cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps)) cgroup_context_free_blockio_device_bandwidth(c, b); } } else if (has_io) { CGroupIODeviceLimit *l, *next; LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { - if (!cgroup_apply_blkio_device_limit(path, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX])) + log_cgroup_compat(u, "Applying IO{Read|Write}Bandwidth %" PRIu64 " %" PRIu64 " as BlockIO{Read|Write}BandwidthMax for %s", + l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX], l->path); + + if (!cgroup_apply_blkio_device_limit(u, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX])) cgroup_context_free_io_device_limit(c, l); } } } if ((mask & CGROUP_MASK_MEMORY) && !is_root) { - if (c->memory_limit != (uint64_t) -1) { - char buf[DECIMAL_STR_MAX(uint64_t) + 1]; + if (cg_unified() > 0) { + uint64_t max = c->memory_max; - sprintf(buf, "%" PRIu64 "\n", c->memory_limit); + if (cgroup_context_has_unified_memory_config(c)) + max = c->memory_max; + else { + max = c->memory_limit; - if (cg_unified() <= 0) - r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf); - else - r = cg_set_attribute("memory", path, "memory.max", buf); + if (max != CGROUP_LIMIT_MAX) + log_cgroup_compat(u, "Applying MemoryLimit %" PRIu64 " as MemoryMax", max); + } + cgroup_apply_unified_memory_limit(u, "memory.low", c->memory_low); + cgroup_apply_unified_memory_limit(u, "memory.high", c->memory_high); + cgroup_apply_unified_memory_limit(u, "memory.max", max); } else { - if (cg_unified() <= 0) - r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1"); + char buf[DECIMAL_STR_MAX(uint64_t) + 1]; + uint64_t val = c->memory_limit; + + if (val == CGROUP_LIMIT_MAX) { + val = c->memory_max; + + if (val != CGROUP_LIMIT_MAX) + log_cgroup_compat(u, "Applying MemoryMax %" PRIi64 " as MemoryLimit", c->memory_max); + } + + if (val == CGROUP_LIMIT_MAX) + strncpy(buf, "-1\n", sizeof(buf)); else - r = cg_set_attribute("memory", path, "memory.max", "max"); - } + xsprintf(buf, "%" PRIu64 "\n", val); - if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set memory.limit_in_bytes/memory.max on %s: %m", path); + r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf); + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set memory.limit_in_bytes: %m"); + } } if ((mask & CGROUP_MASK_DEVICES) && !is_root) { @@ -697,8 +789,8 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M else r = cg_set_attribute("devices", path, "devices.allow", "a"); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to reset devices.list on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to reset devices.list: %m"); if (c->device_policy == CGROUP_CLOSED || (c->device_policy == CGROUP_AUTO && c->device_allow)) { @@ -709,7 +801,10 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M "/dev/random\0" "rwm\0" "/dev/urandom\0" "rwm\0" "/dev/tty\0" "rwm\0" - "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */ + "/dev/pts/ptmx\0" "rw\0" /* /dev/pts/ptmx may not be duplicated, but accessed */ + /* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */ + "/run/systemd/inaccessible/chr\0" "rwm\0" + "/run/systemd/inaccessible/blk\0" "rwm\0"; const char *x, *y; @@ -744,7 +839,7 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M else if (startswith(a->path, "char-")) whitelist_major(path, a->path + 5, 'c', acc); else - log_debug("Ignoring device %s while writing cgroup attribute.", a->path); + log_unit_debug(u, "Ignoring device %s while writing cgroup attribute.", a->path); } } @@ -759,8 +854,8 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M r = cg_set_attribute("pids", path, "pids.max", "max"); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set pids.max on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set pids.max: %m"); } } @@ -779,7 +874,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) { mask |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO; if (c->memory_accounting || - c->memory_limit != (uint64_t) -1) + c->memory_limit != CGROUP_LIMIT_MAX || + cgroup_context_has_unified_memory_config(c)) mask |= CGROUP_MASK_MEMORY; if (c->device_allow || @@ -1044,7 +1140,7 @@ int unit_watch_cgroup(Unit *u) { /* Only applies to the unified hierarchy */ r = cg_unified(); if (r < 0) - return log_unit_error_errno(u, r, "Failed detect wether the unified hierarchy is used: %m"); + return log_unit_error_errno(u, r, "Failed detect whether the unified hierarchy is used: %m"); if (r == 0) return 0; @@ -1194,7 +1290,7 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) { return r; /* Finally, apply the necessary attributes. */ - cgroup_context_apply(unit_get_cgroup_context(u), target_mask, u->cgroup_path, state); + cgroup_context_apply(u, target_mask, state); return 0; } @@ -1325,7 +1421,7 @@ void unit_prune_cgroup(Unit *u) { r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice); if (r < 0) { - log_debug_errno(r, "Failed to destroy cgroup %s, ignoring: %m", u->cgroup_path); + log_unit_debug_errno(u, r, "Failed to destroy cgroup %s, ignoring: %m", u->cgroup_path); return; } @@ -1566,7 +1662,7 @@ int manager_setup_cgroup(Manager *m) { /* 3. Install agent */ if (unified) { - /* In the unified hierarchy we can can get + /* In the unified hierarchy we can get * cgroup empty notifications via inotify. */ m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source); @@ -1613,7 +1709,7 @@ int manager_setup_cgroup(Manager *m) { /* also, move all other userspace processes remaining * in the root cgroup into that scope. */ - r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, false); + r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, 0); if (r < 0) log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m"); @@ -1634,7 +1730,7 @@ int manager_setup_cgroup(Manager *m) { return log_error_errno(r, "Failed to determine supported controllers: %m"); for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) - log_debug("Controller '%s' supported: %s", cgroup_controller_to_string(c), yes_no(m->cgroup_supported & c)); + log_debug("Controller '%s' supported: %s", cgroup_controller_to_string(c), yes_no(m->cgroup_supported & CGROUP_CONTROLLER_TO_MASK(c))); return 0; } diff --git a/src/grp-system/libcore/cgroup.h b/src/grp-system/libcore/cgroup.h index 36699fb21b..bf40c3cc74 100644 --- a/src/grp-system/libcore/cgroup.h +++ b/src/grp-system/libcore/cgroup.h @@ -94,6 +94,10 @@ struct CGroupContext { LIST_HEAD(CGroupIODeviceWeight, io_device_weights); LIST_HEAD(CGroupIODeviceLimit, io_device_limits); + uint64_t memory_low; + uint64_t memory_high; + uint64_t memory_max; + /* For legacy hierarchies */ uint64_t cpu_shares; uint64_t startup_cpu_shares; @@ -120,7 +124,6 @@ struct CGroupContext { void cgroup_context_init(CGroupContext *c); void cgroup_context_done(CGroupContext *c); void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix); -void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state); CGroupMask cgroup_context_get_mask(CGroupContext *c); diff --git a/src/grp-system/libcore/dbus-cgroup.c b/src/grp-system/libcore/dbus-cgroup.c index d707ac51c3..da1c333043 100644 --- a/src/grp-system/libcore/dbus-cgroup.c +++ b/src/grp-system/libcore/dbus-cgroup.c @@ -229,6 +229,9 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_PROPERTY("BlockIOReadBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0), SD_BUS_PROPERTY("BlockIOWriteBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0), SD_BUS_PROPERTY("MemoryAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, memory_accounting), 0), + SD_BUS_PROPERTY("MemoryLow", "t", NULL, offsetof(CGroupContext, memory_low), 0), + SD_BUS_PROPERTY("MemoryHigh", "t", NULL, offsetof(CGroupContext, memory_high), 0), + SD_BUS_PROPERTY("MemoryMax", "t", NULL, offsetof(CGroupContext, memory_max), 0), SD_BUS_PROPERTY("MemoryLimit", "t", NULL, offsetof(CGroupContext, memory_limit), 0), SD_BUS_PROPERTY("DevicePolicy", "s", property_get_cgroup_device_policy, offsetof(CGroupContext, device_policy), 0), SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0), @@ -624,7 +627,7 @@ int bus_cgroup_set_property( if (r < 0) return r; - if (CGROUP_BLKIO_WEIGHT_IS_OK(weight)) + if (!CGROUP_BLKIO_WEIGHT_IS_OK(weight)) return sd_bus_error_set_errnof(error, EINVAL, "StartupBlockIOWeight value out of range"); if (mode != UNIT_CHECK) { @@ -639,7 +642,7 @@ int bus_cgroup_set_property( return 1; - } else if (streq(name, "BlockIOReadBandwidth") || streq(name, "BlockIOWriteBandwidth")) { + } else if (STR_IN_SET(name, "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) { const char *path; bool read = true; unsigned n = 0; @@ -827,12 +830,74 @@ int bus_cgroup_set_property( return 1; + } else if (STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax")) { + uint64_t v; + + r = sd_bus_message_read(message, "t", &v); + if (r < 0) + return r; + if (v <= 0) + return sd_bus_error_set_errnof(error, EINVAL, "%s= is too small", name); + + if (mode != UNIT_CHECK) { + if (streq(name, "MemoryLow")) + c->memory_low = v; + else if (streq(name, "MemoryHigh")) + c->memory_high = v; + else + c->memory_max = v; + + unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY); + + if (v == CGROUP_LIMIT_MAX) + unit_write_drop_in_private_format(u, mode, name, "%s=infinity", name); + else + unit_write_drop_in_private_format(u, mode, name, "%s=%" PRIu64, name, v); + } + + return 1; + + } else if (STR_IN_SET(name, "MemoryLowScale", "MemoryHighScale", "MemoryMaxScale")) { + uint32_t raw; + uint64_t v; + + r = sd_bus_message_read(message, "u", &raw); + if (r < 0) + return r; + + v = physical_memory_scale(raw, UINT32_MAX); + if (v <= 0 || v == UINT64_MAX) + return sd_bus_error_set_errnof(error, EINVAL, "%s= is out of range", name); + + if (mode != UNIT_CHECK) { + const char *e; + + /* Chop off suffix */ + assert_se(e = endswith(name, "Scale")); + name = strndupa(name, e - name); + + if (streq(name, "MemoryLow")) + c->memory_low = v; + else if (streq(name, "MemoryHigh")) + c->memory_high = v; + else + c->memory_max = v; + + unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY); + unit_write_drop_in_private_format(u, mode, name, "%s=%" PRIu32 "%%", name, + (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX))); + } + + return 1; + } else if (streq(name, "MemoryLimit")) { uint64_t limit; r = sd_bus_message_read(message, "t", &limit); if (r < 0) return r; + if (limit <= 0) + return sd_bus_error_set_errnof(error, EINVAL, "%s= is too small", name); if (mode != UNIT_CHECK) { c->memory_limit = limit; @@ -846,6 +911,27 @@ int bus_cgroup_set_property( return 1; + } else if (streq(name, "MemoryLimitScale")) { + uint64_t limit; + uint32_t raw; + + r = sd_bus_message_read(message, "u", &raw); + if (r < 0) + return r; + + limit = physical_memory_scale(raw, UINT32_MAX); + if (limit <= 0 || limit == UINT64_MAX) + return sd_bus_error_set_errnof(error, EINVAL, "%s= is out of range", name); + + if (mode != UNIT_CHECK) { + c->memory_limit = limit; + unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY); + unit_write_drop_in_private_format(u, mode, "MemoryLimit", "MemoryLimit=%" PRIu32 "%%", + (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX))); + } + + return 1; + } else if (streq(name, "DevicePolicy")) { const char *policy; CGroupDevicePolicy p; @@ -859,13 +945,9 @@ int bus_cgroup_set_property( return -EINVAL; if (mode != UNIT_CHECK) { - char *buf; - c->device_policy = p; unit_invalidate_cgroup(u, CGROUP_MASK_DEVICES); - - buf = strjoina("DevicePolicy=", policy); - unit_write_drop_in_private(u, mode, name, buf); + unit_write_drop_in_private_format(u, mode, name, "DevicePolicy=%s", policy); } return 1; @@ -881,6 +963,7 @@ int bus_cgroup_set_property( while ((r = sd_bus_message_read(message, "(ss)", &path, &rwm)) > 0) { if ((!startswith(path, "/dev/") && + !startswith(path, "/run/systemd/inaccessible/") && !startswith(path, "block-") && !startswith(path, "char-")) || strpbrk(path, WHITESPACE)) @@ -980,6 +1063,8 @@ int bus_cgroup_set_property( r = sd_bus_message_read(message, "t", &limit); if (r < 0) return r; + if (limit <= 0) + return sd_bus_error_set_errnof(error, EINVAL, "%s= is too small", name); if (mode != UNIT_CHECK) { c->tasks_max = limit; @@ -992,6 +1077,26 @@ int bus_cgroup_set_property( } return 1; + } else if (streq(name, "TasksMaxScale")) { + uint64_t limit; + uint32_t raw; + + r = sd_bus_message_read(message, "u", &raw); + if (r < 0) + return r; + + limit = system_tasks_max_scale(raw, UINT32_MAX); + if (limit <= 0 || limit >= UINT64_MAX) + return sd_bus_error_set_errnof(error, EINVAL, "%s= is out of range", name); + + if (mode != UNIT_CHECK) { + c->tasks_max = limit; + unit_invalidate_cgroup(u, CGROUP_MASK_PIDS); + unit_write_drop_in_private_format(u, mode, name, "TasksMax=%" PRIu32 "%%", + (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX))); + } + + return 1; } if (u->transient && u->load_state == UNIT_STUB) { diff --git a/src/grp-system/libcore/dbus-execute.c b/src/grp-system/libcore/dbus-execute.c index fe1d9da440..f7c217efc1 100644 --- a/src/grp-system/libcore/dbus-execute.c +++ b/src/grp-system/libcore/dbus-execute.c @@ -696,9 +696,12 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("Group", "s", NULL, offsetof(ExecContext, group), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SupplementaryGroups", "as", NULL, offsetof(ExecContext, supplementary_groups), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PAMName", "s", NULL, offsetof(ExecContext, pam_name), SD_BUS_VTABLE_PROPERTY_CONST), - SD_BUS_PROPERTY("ReadWriteDirectories", "as", NULL, offsetof(ExecContext, read_write_dirs), SD_BUS_VTABLE_PROPERTY_CONST), - SD_BUS_PROPERTY("ReadOnlyDirectories", "as", NULL, offsetof(ExecContext, read_only_dirs), SD_BUS_VTABLE_PROPERTY_CONST), - SD_BUS_PROPERTY("InaccessibleDirectories", "as", NULL, offsetof(ExecContext, inaccessible_dirs), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ReadWriteDirectories", "as", NULL, offsetof(ExecContext, read_write_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("ReadOnlyDirectories", "as", NULL, offsetof(ExecContext, read_only_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("InaccessibleDirectories", "as", NULL, offsetof(ExecContext, inaccessible_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("ReadWritePaths", "as", NULL, offsetof(ExecContext, read_write_paths), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ReadOnlyPaths", "as", NULL, offsetof(ExecContext, read_only_paths), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("InaccessiblePaths", "as", NULL, offsetof(ExecContext, inaccessible_paths), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_flags), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST), @@ -720,6 +723,8 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("RestrictAddressFamilies", "(bas)", property_get_address_families, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RuntimeDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, runtime_directory_mode), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RuntimeDirectory", "as", NULL, offsetof(ExecContext, runtime_directory), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("MemoryDenyWriteExecute", "b", bus_property_get_bool, offsetof(ExecContext, memory_deny_write_execute), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("RestrictRealtime", "b", bus_property_get_bool, offsetof(ExecContext, restrict_realtime), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_VTABLE_END }; @@ -843,7 +848,7 @@ int bus_exec_context_set_transient_property( else if (free_and_strdup(&c->user, uu) < 0) return -ENOMEM; - unit_write_drop_in_private_format(u, mode, name, "User=%s\n", uu); + unit_write_drop_in_private_format(u, mode, name, "User=%s", uu); } return 1; @@ -862,7 +867,7 @@ int bus_exec_context_set_transient_property( else if (free_and_strdup(&c->group, gg) < 0) return -ENOMEM; - unit_write_drop_in_private_format(u, mode, name, "Group=%s\n", gg); + unit_write_drop_in_private_format(u, mode, name, "Group=%s", gg); } return 1; @@ -880,7 +885,7 @@ int bus_exec_context_set_transient_property( else if (free_and_strdup(&c->syslog_identifier, id) < 0) return -ENOMEM; - unit_write_drop_in_private_format(u, mode, name, "SyslogIdentifier=%s\n", id); + unit_write_drop_in_private_format(u, mode, name, "SyslogIdentifier=%s", id); } return 1; @@ -896,7 +901,7 @@ int bus_exec_context_set_transient_property( if (mode != UNIT_CHECK) { c->syslog_priority = (c->syslog_priority & LOG_FACMASK) | level; - unit_write_drop_in_private_format(u, mode, name, "SyslogLevel=%i\n", level); + unit_write_drop_in_private_format(u, mode, name, "SyslogLevel=%i", level); } return 1; @@ -912,7 +917,7 @@ int bus_exec_context_set_transient_property( if (mode != UNIT_CHECK) { c->syslog_priority = (facility << 3) | LOG_PRI(c->syslog_priority); - unit_write_drop_in_private_format(u, mode, name, "SyslogFacility=%i\n", facility); + unit_write_drop_in_private_format(u, mode, name, "SyslogFacility=%i", facility); } return 1; @@ -928,7 +933,7 @@ int bus_exec_context_set_transient_property( if (mode != UNIT_CHECK) { c->nice = n; - unit_write_drop_in_private_format(u, mode, name, "Nice=%i\n", n); + unit_write_drop_in_private_format(u, mode, name, "Nice=%i", n); } return 1; @@ -953,7 +958,7 @@ int bus_exec_context_set_transient_property( if (r < 0) return r; - unit_write_drop_in_private_format(u, mode, name, "%s=%s\n", name, s); + unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, s); } return 1; @@ -1008,7 +1013,7 @@ int bus_exec_context_set_transient_property( if (mode != UNIT_CHECK) { c->std_input = p; - unit_write_drop_in_private_format(u, mode, name, "StandardInput=%s\n", exec_input_to_string(p)); + unit_write_drop_in_private_format(u, mode, name, "StandardInput=%s", exec_input_to_string(p)); } return 1; @@ -1029,7 +1034,7 @@ int bus_exec_context_set_transient_property( if (mode != UNIT_CHECK) { c->std_output = p; - unit_write_drop_in_private_format(u, mode, name, "StandardOutput=%s\n", exec_output_to_string(p)); + unit_write_drop_in_private_format(u, mode, name, "StandardOutput=%s", exec_output_to_string(p)); } return 1; @@ -1049,7 +1054,7 @@ int bus_exec_context_set_transient_property( if (mode != UNIT_CHECK) { c->std_error = p; - unit_write_drop_in_private_format(u, mode, name, "StandardError=%s\n", exec_output_to_string(p)); + unit_write_drop_in_private_format(u, mode, name, "StandardError=%s", exec_output_to_string(p)); } return 1; @@ -1057,7 +1062,7 @@ int bus_exec_context_set_transient_property( } else if (STR_IN_SET(name, "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "PrivateTmp", "PrivateDevices", "PrivateNetwork", - "NoNewPrivileges", "SyslogLevelPrefix")) { + "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute", "RestrictRealtime")) { int b; r = sd_bus_message_read(message, "b", &b); @@ -1081,8 +1086,12 @@ int bus_exec_context_set_transient_property( c->no_new_privileges = b; else if (streq(name, "SyslogLevelPrefix")) c->syslog_level_prefix = b; + else if (streq(name, "MemoryDenyWriteExecute")) + c->memory_deny_write_execute = b; + else if (streq(name, "RestrictRealtime")) + c->restrict_realtime = b; - unit_write_drop_in_private_format(u, mode, name, "%s=%s\n", name, yes_no(b)); + unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, yes_no(b)); } return 1; @@ -1100,7 +1109,7 @@ int bus_exec_context_set_transient_property( else if (free_and_strdup(&c->utmp_id, id) < 0) return -ENOMEM; - unit_write_drop_in_private_format(u, mode, name, "UtmpIdentifier=%s\n", strempty(id)); + unit_write_drop_in_private_format(u, mode, name, "UtmpIdentifier=%s", strempty(id)); } return 1; @@ -1120,7 +1129,7 @@ int bus_exec_context_set_transient_property( if (mode != UNIT_CHECK) { c->utmp_mode = m; - unit_write_drop_in_private_format(u, mode, name, "UtmpMode=%s\n", exec_utmp_mode_to_string(m)); + unit_write_drop_in_private_format(u, mode, name, "UtmpMode=%s", exec_utmp_mode_to_string(m)); } return 1; @@ -1138,7 +1147,7 @@ int bus_exec_context_set_transient_property( else if (free_and_strdup(&c->pam_name, n) < 0) return -ENOMEM; - unit_write_drop_in_private_format(u, mode, name, "PAMName=%s\n", strempty(n)); + unit_write_drop_in_private_format(u, mode, name, "PAMName=%s", strempty(n)); } return 1; @@ -1160,7 +1169,7 @@ int bus_exec_context_set_transient_property( if (strv_length(l) == 0) { c->environment = strv_free(c->environment); - unit_write_drop_in_private_format(u, mode, name, "Environment=\n"); + unit_write_drop_in_private_format(u, mode, name, "Environment="); } else { e = strv_env_merge(2, c->environment, l); if (!e) @@ -1173,7 +1182,7 @@ int bus_exec_context_set_transient_property( if (!joined) return -ENOMEM; - unit_write_drop_in_private_format(u, mode, name, "Environment=%s\n", joined); + unit_write_drop_in_private_format(u, mode, name, "Environment=%s", joined); } } @@ -1189,7 +1198,7 @@ int bus_exec_context_set_transient_property( if (mode != UNIT_CHECK) { c->timer_slack_nsec = n; - unit_write_drop_in_private_format(u, mode, name, "TimerSlackNSec=" NSEC_FMT "\n", n); + unit_write_drop_in_private_format(u, mode, name, "TimerSlackNSec=" NSEC_FMT, n); } return 1; @@ -1207,7 +1216,7 @@ int bus_exec_context_set_transient_property( if (mode != UNIT_CHECK) { c->oom_score_adjust = oa; c->oom_score_adjust_set = true; - unit_write_drop_in_private_format(u, mode, name, "OOMScoreAdjust=%i\n", oa); + unit_write_drop_in_private_format(u, mode, name, "OOMScoreAdjust=%i", oa); } return 1; @@ -1229,7 +1238,7 @@ int bus_exec_context_set_transient_property( return -ENOMEM; STRV_FOREACH(i, c->environment_files) - fprintf(f, "EnvironmentFile=%s\n", *i); + fprintf(f, "EnvironmentFile=%s", *i); while ((r = sd_bus_message_enter_container(message, 'r', "sb")) > 0) { const char *path; @@ -1253,7 +1262,7 @@ int bus_exec_context_set_transient_property( if (!buf) return -ENOMEM; - fprintf(f, "EnvironmentFile=%s\n", buf); + fprintf(f, "EnvironmentFile=%s", buf); r = strv_consume(&l, buf); if (r < 0) @@ -1274,7 +1283,7 @@ int bus_exec_context_set_transient_property( if (mode != UNIT_CHECK) { if (strv_isempty(l)) { c->environment_files = strv_free(c->environment_files); - unit_write_drop_in_private(u, mode, name, "EnvironmentFile=\n"); + unit_write_drop_in_private(u, mode, name, "EnvironmentFile="); } else { r = strv_extend_strv(&c->environment_files, l, true); if (r < 0) @@ -1300,7 +1309,7 @@ int bus_exec_context_set_transient_property( if (mode != UNIT_CHECK) { if (strv_isempty(l)) { c->pass_environment = strv_free(c->pass_environment); - unit_write_drop_in_private_format(u, mode, name, "PassEnvironment=\n"); + unit_write_drop_in_private_format(u, mode, name, "PassEnvironment="); } else { _cleanup_free_ char *joined = NULL; @@ -1312,14 +1321,14 @@ int bus_exec_context_set_transient_property( if (!joined) return -ENOMEM; - unit_write_drop_in_private_format(u, mode, name, "PassEnvironment=%s\n", joined); + unit_write_drop_in_private_format(u, mode, name, "PassEnvironment=%s", joined); } } return 1; - } else if (STR_IN_SET(name, "ReadWriteDirectories", "ReadOnlyDirectories", "InaccessibleDirectories")) { - + } else if (STR_IN_SET(name, "ReadWriteDirectories", "ReadOnlyDirectories", "InaccessibleDirectories", + "ReadWritePaths", "ReadOnlyPaths", "InaccessiblePaths")) { _cleanup_strv_free_ char **l = NULL; char ***dirs; char **p; @@ -1341,16 +1350,16 @@ int bus_exec_context_set_transient_property( if (mode != UNIT_CHECK) { _cleanup_free_ char *joined = NULL; - if (streq(name, "ReadWriteDirectories")) - dirs = &c->read_write_dirs; - else if (streq(name, "ReadOnlyDirectories")) - dirs = &c->read_only_dirs; - else /* "InaccessibleDirectories" */ - dirs = &c->inaccessible_dirs; + if (STR_IN_SET(name, "ReadWriteDirectories", "ReadWritePaths")) + dirs = &c->read_write_paths; + else if (STR_IN_SET(name, "ReadOnlyDirectories", "ReadOnlyPaths")) + dirs = &c->read_only_paths; + else /* "InaccessiblePaths" */ + dirs = &c->inaccessible_paths; if (strv_length(l) == 0) { *dirs = strv_free(*dirs); - unit_write_drop_in_private_format(u, mode, name, "%s=\n", name); + unit_write_drop_in_private_format(u, mode, name, "%s=", name); } else { r = strv_extend_strv(dirs, l, true); @@ -1361,7 +1370,7 @@ int bus_exec_context_set_transient_property( if (!joined) return -ENOMEM; - unit_write_drop_in_private_format(u, mode, name, "%s=%s\n", name, joined); + unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, joined); } } @@ -1389,7 +1398,7 @@ int bus_exec_context_set_transient_property( if (mode != UNIT_CHECK) { c->protect_system = ps; - unit_write_drop_in_private_format(u, mode, name, "%s=%s\n", name, s); + unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, s); } return 1; @@ -1415,7 +1424,7 @@ int bus_exec_context_set_transient_property( if (mode != UNIT_CHECK) { c->protect_home = ph; - unit_write_drop_in_private_format(u, mode, name, "%s=%s\n", name, s); + unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, s); } return 1; @@ -1438,7 +1447,7 @@ int bus_exec_context_set_transient_property( if (strv_isempty(l)) { c->runtime_directory = strv_free(c->runtime_directory); - unit_write_drop_in_private_format(u, mode, name, "%s=\n", name); + unit_write_drop_in_private_format(u, mode, name, "%s=", name); } else { r = strv_extend_strv(&c->runtime_directory, l, true); @@ -1449,7 +1458,7 @@ int bus_exec_context_set_transient_property( if (!joined) return -ENOMEM; - unit_write_drop_in_private_format(u, mode, name, "%s=%s\n", name, joined); + unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, joined); } } @@ -1468,7 +1477,7 @@ int bus_exec_context_set_transient_property( else if (free_and_strdup(&c->selinux_context, s) < 0) return -ENOMEM; - unit_write_drop_in_private_format(u, mode, name, "%s=%s\n", name, strempty(s)); + unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, strempty(s)); } return 1; @@ -1536,7 +1545,7 @@ int bus_exec_context_set_transient_property( return -ENOMEM; } - unit_write_drop_in_private_format(u, mode, name, "%s=%s\n", name, f); + unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, f); } return 1; diff --git a/src/grp-system/libcore/dbus-kill.c b/src/grp-system/libcore/dbus-kill.c index ba7a3f3b72..44bc2db7e2 100644 --- a/src/grp-system/libcore/dbus-kill.c +++ b/src/grp-system/libcore/dbus-kill.c @@ -64,7 +64,7 @@ int bus_kill_context_set_transient_property( if (mode != UNIT_CHECK) { c->kill_mode = k; - unit_write_drop_in_private_format(u, mode, name, "KillMode=%s\n", kill_mode_to_string(k)); + unit_write_drop_in_private_format(u, mode, name, "KillMode=%s", kill_mode_to_string(k)); } return 1; @@ -82,7 +82,7 @@ int bus_kill_context_set_transient_property( if (mode != UNIT_CHECK) { c->kill_signal = sig; - unit_write_drop_in_private_format(u, mode, name, "KillSignal=%s\n", signal_to_string(sig)); + unit_write_drop_in_private_format(u, mode, name, "KillSignal=%s", signal_to_string(sig)); } return 1; @@ -97,7 +97,7 @@ int bus_kill_context_set_transient_property( if (mode != UNIT_CHECK) { c->send_sighup = b; - unit_write_drop_in_private_format(u, mode, name, "SendSIGHUP=%s\n", yes_no(b)); + unit_write_drop_in_private_format(u, mode, name, "SendSIGHUP=%s", yes_no(b)); } return 1; @@ -112,7 +112,7 @@ int bus_kill_context_set_transient_property( if (mode != UNIT_CHECK) { c->send_sigkill = b; - unit_write_drop_in_private_format(u, mode, name, "SendSIGKILL=%s\n", yes_no(b)); + unit_write_drop_in_private_format(u, mode, name, "SendSIGKILL=%s", yes_no(b)); } return 1; diff --git a/src/grp-system/libcore/dbus-manager.c b/src/grp-system/libcore/dbus-manager.c index b2ca867987..c5684014d3 100644 --- a/src/grp-system/libcore/dbus-manager.c +++ b/src/grp-system/libcore/dbus-manager.c @@ -782,6 +782,7 @@ static int transient_unit_from_message( return r; /* Now load the missing bits of the unit we just created */ + unit_add_to_load_queue(u); manager_dispatch_load_queue(m); *unit = u; diff --git a/src/grp-system/libcore/dbus-scope.c b/src/grp-system/libcore/dbus-scope.c index 9ecc01d275..d319597c10 100644 --- a/src/grp-system/libcore/dbus-scope.c +++ b/src/grp-system/libcore/dbus-scope.c @@ -148,7 +148,7 @@ static int bus_scope_set_transient_property( if (r < 0) return r; - unit_write_drop_in_format(UNIT(s), mode, name, "[Scope]\nTimeoutStopSec="USEC_FMT"us\n", s->timeout_stop_usec); + unit_write_drop_in_private_format(UNIT(s), mode, name, "TimeoutStopSec="USEC_FMT"us", s->timeout_stop_usec); } else { r = sd_bus_message_skip(message, "t"); if (r < 0) @@ -226,5 +226,5 @@ int bus_scope_send_request_stop(Scope *s) { if (r < 0) return r; - return sd_bus_send_to(UNIT(s)->manager->api_bus, m, /* s->controller */ NULL, NULL); + return sd_bus_send_to(UNIT(s)->manager->api_bus, m, s->controller, NULL); } diff --git a/src/grp-system/libcore/dbus-service.c b/src/grp-system/libcore/dbus-service.c index a666c9db95..a3524815bd 100644 --- a/src/grp-system/libcore/dbus-service.c +++ b/src/grp-system/libcore/dbus-service.c @@ -103,7 +103,7 @@ static int bus_service_set_transient_property( if (mode != UNIT_CHECK) { s->remain_after_exit = b; - unit_write_drop_in_private_format(UNIT(s), mode, name, "RemainAfterExit=%s\n", yes_no(b)); + unit_write_drop_in_private_format(UNIT(s), mode, name, "RemainAfterExit=%s", yes_no(b)); } return 1; @@ -122,7 +122,7 @@ static int bus_service_set_transient_property( if (mode != UNIT_CHECK) { s->type = k; - unit_write_drop_in_private_format(UNIT(s), mode, name, "Type=%s\n", service_type_to_string(s->type)); + unit_write_drop_in_private_format(UNIT(s), mode, name, "Type=%s", service_type_to_string(s->type)); } return 1; @@ -135,7 +135,7 @@ static int bus_service_set_transient_property( if (mode != UNIT_CHECK) { s->runtime_max_usec = u; - unit_write_drop_in_private_format(UNIT(s), mode, name, "RuntimeMaxSec=" USEC_FMT "us\n", u); + unit_write_drop_in_private_format(UNIT(s), mode, name, "RuntimeMaxSec=" USEC_FMT "us", u); } return 1; diff --git a/src/grp-system/libcore/dbus-timer.c b/src/grp-system/libcore/dbus-timer.c index 02036de42f..ce454385f8 100644 --- a/src/grp-system/libcore/dbus-timer.c +++ b/src/grp-system/libcore/dbus-timer.c @@ -221,7 +221,7 @@ static int bus_timer_set_transient_property( if (mode != UNIT_CHECK) { char time[FORMAT_TIMESPAN_MAX]; - unit_write_drop_in_private_format(UNIT(t), mode, name, "%s=%s\n", name, format_timespan(time, sizeof(time), u, USEC_PER_MSEC)); + unit_write_drop_in_private_format(UNIT(t), mode, name, "%s=%s", name, format_timespan(time, sizeof(time), u, USEC_PER_MSEC)); v = new0(TimerValue, 1); if (!v) @@ -250,7 +250,7 @@ static int bus_timer_set_transient_property( if (r < 0) return r; - unit_write_drop_in_private_format(UNIT(t), mode, name, "%s=%s\n", name, str); + unit_write_drop_in_private_format(UNIT(t), mode, name, "%s=%s", name, str); v = new0(TimerValue, 1); if (!v) { @@ -278,7 +278,7 @@ static int bus_timer_set_transient_property( if (mode != UNIT_CHECK) { t->accuracy_usec = u; - unit_write_drop_in_private_format(UNIT(t), mode, name, "AccuracySec=" USEC_FMT "us\n", u); + unit_write_drop_in_private_format(UNIT(t), mode, name, "AccuracySec=" USEC_FMT "us", u); } return 1; @@ -292,7 +292,7 @@ static int bus_timer_set_transient_property( if (mode != UNIT_CHECK) { t->random_usec = u; - unit_write_drop_in_private_format(UNIT(t), mode, name, "RandomizedDelaySec=" USEC_FMT "us\n", u); + unit_write_drop_in_private_format(UNIT(t), mode, name, "RandomizedDelaySec=" USEC_FMT "us", u); } return 1; @@ -306,7 +306,7 @@ static int bus_timer_set_transient_property( if (mode != UNIT_CHECK) { t->wake_system = b; - unit_write_drop_in_private_format(UNIT(t), mode, name, "%s=%s\n", name, yes_no(b)); + unit_write_drop_in_private_format(UNIT(t), mode, name, "%s=%s", name, yes_no(b)); } return 1; @@ -320,7 +320,7 @@ static int bus_timer_set_transient_property( if (mode != UNIT_CHECK) { t->remain_after_elapse = b; - unit_write_drop_in_private_format(UNIT(t), mode, name, "%s=%s\n", name, yes_no(b)); + unit_write_drop_in_private_format(UNIT(t), mode, name, "%s=%s", name, yes_no(b)); } return 1; diff --git a/src/grp-system/libcore/dbus-unit.c b/src/grp-system/libcore/dbus-unit.c index 31fbb0d54d..7e2a63b434 100644 --- a/src/grp-system/libcore/dbus-unit.c +++ b/src/grp-system/libcore/dbus-unit.c @@ -1206,7 +1206,7 @@ static int bus_unit_set_transient_property( if (r < 0) return r; - unit_write_drop_in_format(u, mode, name, "[Unit]\nDescription=%s\n", d); + unit_write_drop_in_format(u, mode, name, "[Unit]\nDescription=%s", d); } return 1; @@ -1220,7 +1220,7 @@ static int bus_unit_set_transient_property( if (mode != UNIT_CHECK) { u->default_dependencies = b; - unit_write_drop_in_format(u, mode, name, "[Unit]\nDefaultDependencies=%s\n", yes_no(b)); + unit_write_drop_in_format(u, mode, name, "[Unit]\nDefaultDependencies=%s", yes_no(b)); } return 1; @@ -1258,7 +1258,7 @@ static int bus_unit_set_transient_property( if (r < 0) return r; - unit_write_drop_in_private_format(u, mode, name, "Slice=%s\n", s); + unit_write_drop_in_private_format(u, mode, name, "Slice=%s", s); } return 1; @@ -1306,7 +1306,7 @@ static int bus_unit_set_transient_property( if (!label) return -ENOMEM; - unit_write_drop_in_format(u, mode, label, "[Unit]\n%s=%s\n", name, other); + unit_write_drop_in_format(u, mode, label, "[Unit]\n%s=%s", name, other); } } diff --git a/src/grp-system/libcore/execute.c b/src/grp-system/libcore/execute.c index b19473c6a5..b73577817f 100644 --- a/src/grp-system/libcore/execute.c +++ b/src/grp-system/libcore/execute.c @@ -25,6 +25,7 @@ #include <signal.h> #include <string.h> #include <sys/capability.h> +#include <sys/mman.h> #include <sys/personality.h> #include <sys/prctl.h> #include <sys/socket.h> @@ -290,7 +291,15 @@ static int connect_journal_socket(int fd, uid_t uid, gid_t gid) { return r; } -static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) { +static int connect_logger_as( + Unit *unit, + const ExecContext *context, + ExecOutput output, + const char *ident, + int nfd, + uid_t uid, + gid_t gid) { + int fd, r; assert(context); @@ -311,7 +320,7 @@ static int connect_logger_as(const ExecContext *context, ExecOutput output, cons return -errno; } - fd_inc_sndbuf(fd, SNDBUF_SIZE); + (void) fd_inc_sndbuf(fd, SNDBUF_SIZE); dprintf(fd, "%s\n" @@ -322,18 +331,18 @@ static int connect_logger_as(const ExecContext *context, ExecOutput output, cons "%i\n" "%i\n", context->syslog_identifier ? context->syslog_identifier : ident, - unit_id, + unit->id, context->syslog_priority, !!context->syslog_level_prefix, output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE, output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE, is_terminal_output(output)); - if (fd != nfd) { - r = dup2(fd, nfd) < 0 ? -errno : nfd; - safe_close(fd); - } else - r = nfd; + if (fd == nfd) + return nfd; + + r = dup2(fd, nfd) < 0 ? -errno : nfd; + safe_close(fd); return r; } @@ -447,7 +456,10 @@ static int setup_output( int fileno, int socket_fd, const char *ident, - uid_t uid, gid_t gid) { + uid_t uid, + gid_t gid, + dev_t *journal_stream_dev, + ino_t *journal_stream_ino) { ExecOutput o; ExecInput i; @@ -457,6 +469,8 @@ static int setup_output( assert(context); assert(params); assert(ident); + assert(journal_stream_dev); + assert(journal_stream_ino); if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) { @@ -532,10 +546,21 @@ static int setup_output( case EXEC_OUTPUT_KMSG_AND_CONSOLE: case EXEC_OUTPUT_JOURNAL: case EXEC_OUTPUT_JOURNAL_AND_CONSOLE: - r = connect_logger_as(context, o, ident, unit->id, fileno, uid, gid); + r = connect_logger_as(unit, context, o, ident, fileno, uid, gid); if (r < 0) { log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr"); r = open_null_as(O_WRONLY, fileno); + } else { + struct stat st; + + /* If we connected this fd to the journal via a stream, patch the device/inode into the passed + * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits + * services to detect whether they are connected to the journal or not. */ + + if (fstat(fileno, &st) >= 0) { + *journal_stream_dev = st.st_dev; + *journal_stream_ino = st.st_ino; + } } return r; @@ -553,6 +578,10 @@ static int chown_terminal(int fd, uid_t uid) { assert(fd >= 0); + /* Before we chown/chmod the TTY, let's ensure this is actually a tty */ + if (isatty(fd) < 1) + return 0; + /* This might fail. What matters are the results. */ (void) fchown(fd, uid, -1); (void) fchmod(fd, TTY_MODE); @@ -796,7 +825,7 @@ static int setup_pam( const char *user, uid_t uid, const char *tty, - char ***pam_env, + char ***env, int fds[], unsigned n_fds) { static const struct pam_conv conv = { @@ -808,14 +837,14 @@ static int setup_pam( pam_handle_t *handle = NULL; sigset_t old_ss; int pam_code = PAM_SUCCESS, r; - char **e = NULL; + char **nv, **e = NULL; bool close_session = false; pid_t pam_pid = 0, parent_pid; int flags = 0; assert(name); assert(user); - assert(pam_env); + assert(env); /* We set up PAM in the parent process, then fork. The child * will then stay around until killed via PR_GET_PDEATHSIG or @@ -843,6 +872,12 @@ static int setup_pam( goto fail; } + STRV_FOREACH(nv, *env) { + pam_code = pam_putenv(handle, *nv); + if (pam_code != PAM_SUCCESS) + goto fail; + } + pam_code = pam_acct_mgmt(handle, flags); if (pam_code != PAM_SUCCESS) goto fail; @@ -963,8 +998,8 @@ static int setup_pam( if (!barrier_place_and_sync(&barrier)) log_error("PAM initialization failed"); - *pam_env = e; - e = NULL; + strv_free(*env); + *env = e; return 0; @@ -1192,6 +1227,115 @@ finish: return r; } +static int apply_memory_deny_write_execute(const ExecContext *c) { + scmp_filter_ctx *seccomp; + int r; + + assert(c); + + seccomp = seccomp_init(SCMP_ACT_ALLOW); + if (!seccomp) + return -ENOMEM; + + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(mmap), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE)); + if (r < 0) + goto finish; + + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(mprotect), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC)); + if (r < 0) + goto finish; + + r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + if (r < 0) + goto finish; + + r = seccomp_load(seccomp); + +finish: + seccomp_release(seccomp); + return r; +} + +static int apply_restrict_realtime(const ExecContext *c) { + static const int permitted_policies[] = { + SCHED_OTHER, + SCHED_BATCH, + SCHED_IDLE, + }; + + scmp_filter_ctx *seccomp; + unsigned i; + int r, p, max_policy = 0; + + assert(c); + + seccomp = seccomp_init(SCMP_ACT_ALLOW); + if (!seccomp) + return -ENOMEM; + + /* Determine the highest policy constant we want to allow */ + for (i = 0; i < ELEMENTSOF(permitted_policies); i++) + if (permitted_policies[i] > max_policy) + max_policy = permitted_policies[i]; + + /* Go through all policies with lower values than that, and block them -- unless they appear in the + * whitelist. */ + for (p = 0; p < max_policy; p++) { + bool good = false; + + /* Check if this is in the whitelist. */ + for (i = 0; i < ELEMENTSOF(permitted_policies); i++) + if (permitted_policies[i] == p) { + good = true; + break; + } + + if (good) + continue; + + /* Deny this policy */ + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(sched_setscheduler), + 1, + SCMP_A1(SCMP_CMP_EQ, p)); + if (r < 0) + goto finish; + } + + /* Blacklist all other policies, i.e. the ones with higher values. Note that all comparisons are unsigned here, + * hence no need no check for < 0 values. */ + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(sched_setscheduler), + 1, + SCMP_A1(SCMP_CMP_GT, max_policy)); + if (r < 0) + goto finish; + + r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + if (r < 0) + goto finish; + + r = seccomp_load(seccomp); + +finish: + seccomp_release(seccomp); + return r; +} + #endif static void do_idle_pipe_dance(int idle_pipe[4]) { @@ -1230,6 +1374,8 @@ static int build_environment( const char *home, const char *username, const char *shell, + dev_t journal_stream_dev, + ino_t journal_stream_ino, char ***ret) { _cleanup_strv_free_ char **our_env = NULL; @@ -1239,7 +1385,7 @@ static int build_environment( assert(c); assert(ret); - our_env = new0(char*, 11); + our_env = new0(char*, 12); if (!our_env) return -ENOMEM; @@ -1311,8 +1457,15 @@ static int build_environment( our_env[n_env++] = x; } + if (journal_stream_dev != 0 && journal_stream_ino != 0) { + if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0) + return -ENOMEM; + + our_env[n_env++] = x; + } + our_env[n_env++] = NULL; - assert(n_env <= 11); + assert(n_env <= 12); *ret = our_env; our_env = NULL; @@ -1356,9 +1509,9 @@ static bool exec_needs_mount_namespace( assert(context); assert(params); - if (!strv_isempty(context->read_write_dirs) || - !strv_isempty(context->read_only_dirs) || - !strv_isempty(context->inaccessible_dirs)) + if (!strv_isempty(context->read_write_paths) || + !strv_isempty(context->read_only_paths) || + !strv_isempty(context->inaccessible_paths)) return true; if (context->mount_flags != 0) @@ -1422,13 +1575,15 @@ static int exec_child( char **files_env, int *exit_status) { - _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL; + _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL; _cleanup_free_ char *mac_selinux_context_net = NULL; const char *username = NULL, *home = NULL, *shell = NULL, *wd; + dev_t journal_stream_dev = 0; + ino_t journal_stream_ino = 0; + bool needs_mount_namespace; uid_t uid = UID_INVALID; gid_t gid = GID_INVALID; int i, r; - bool needs_mount_namespace; assert(unit); assert(command); @@ -1528,13 +1683,13 @@ static int exec_child( return r; } - r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, basename(command->path), uid, gid); + r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino); if (r < 0) { *exit_status = EXIT_STDOUT; return r; } - r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, basename(command->path), uid, gid); + r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino); if (r < 0) { *exit_status = EXIT_STDERR; return r; @@ -1673,9 +1828,43 @@ static int exec_child( } } + r = build_environment( + context, + params, + n_fds, + home, + username, + shell, + journal_stream_dev, + journal_stream_ino, + &our_env); + if (r < 0) { + *exit_status = EXIT_MEMORY; + return r; + } + + r = build_pass_environment(context, &pass_env); + if (r < 0) { + *exit_status = EXIT_MEMORY; + return r; + } + + accum_env = strv_env_merge(5, + params->environment, + our_env, + pass_env, + context->environment, + files_env, + NULL); + if (!accum_env) { + *exit_status = EXIT_MEMORY; + return -ENOMEM; + } + accum_env = strv_env_clean(accum_env); + umask(context->umask); - if (params->apply_permissions) { + if (params->apply_permissions && !command->privileged) { r = enforce_groups(context, username, gid); if (r < 0) { *exit_status = EXIT_GROUP; @@ -1709,7 +1898,7 @@ static int exec_child( #endif #ifdef HAVE_PAM if (context->pam_name && username) { - r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds); + r = setup_pam(context->pam_name, username, uid, context->tty_path, &accum_env, fds, n_fds); if (r < 0) { *exit_status = EXIT_PAM; return r; @@ -1746,9 +1935,9 @@ static int exec_child( r = setup_namespace( params->apply_chroot ? context->root_directory : NULL, - context->read_write_dirs, - context->read_only_dirs, - context->inaccessible_dirs, + context->read_write_paths, + context->read_only_paths, + context->inaccessible_paths, tmp, var, context->private_devices, @@ -1800,7 +1989,7 @@ static int exec_child( } #ifdef HAVE_SELINUX - if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) { + if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0 && !command->privileged) { r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net); if (r < 0) { *exit_status = EXIT_SELINUX_CONTEXT; @@ -1825,7 +2014,7 @@ static int exec_child( return r; } - if (params->apply_permissions) { + if (params->apply_permissions && !command->privileged) { bool use_address_families = context->address_families_whitelist || !set_isempty(context->address_families); @@ -1835,10 +2024,20 @@ static int exec_child( int secure_bits = context->secure_bits; for (i = 0; i < _RLIMIT_MAX; i++) { + if (!context->rlimit[i]) continue; - if (setrlimit_closest(i, context->rlimit[i]) < 0) { + r = setrlimit_closest(i, context->rlimit[i]); + if (r < 0) { + *exit_status = EXIT_LIMITS; + return r; + } + } + + /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */ + if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) { + if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) { *exit_status = EXIT_LIMITS; return -errno; } @@ -1899,7 +2098,7 @@ static int exec_child( } if (context->no_new_privileges || - (!have_effective_cap(CAP_SYS_ADMIN) && (use_address_families || use_syscall_filter))) + (!have_effective_cap(CAP_SYS_ADMIN) && (use_address_families || context->memory_deny_write_execute || context->restrict_realtime || use_syscall_filter))) if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) { *exit_status = EXIT_NO_NEW_PRIVILEGES; return -errno; @@ -1914,6 +2113,22 @@ static int exec_child( } } + if (context->memory_deny_write_execute) { + r = apply_memory_deny_write_execute(context); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + return r; + } + } + + if (context->restrict_realtime) { + r = apply_restrict_realtime(context); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + return r; + } + } + if (use_syscall_filter) { r = apply_seccomp(context); if (r < 0) { @@ -1948,39 +2163,12 @@ static int exec_child( #endif } - r = build_environment(context, params, n_fds, home, username, shell, &our_env); - if (r < 0) { - *exit_status = EXIT_MEMORY; - return r; - } - - r = build_pass_environment(context, &pass_env); - if (r < 0) { - *exit_status = EXIT_MEMORY; - return r; - } - - final_env = strv_env_merge(6, - params->environment, - our_env, - pass_env, - context->environment, - files_env, - pam_env, - NULL); - if (!final_env) { - *exit_status = EXIT_MEMORY; - return -ENOMEM; - } - - final_argv = replace_env_argv(argv, final_env); + final_argv = replace_env_argv(argv, accum_env); if (!final_argv) { *exit_status = EXIT_MEMORY; return -ENOMEM; } - final_env = strv_env_clean(final_env); - if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) { _cleanup_free_ char *line; @@ -1996,7 +2184,7 @@ static int exec_child( } } - execve(command->path, final_argv, final_env); + execve(command->path, final_argv, accum_env); *exit_status = EXIT_EXEC; return -errno; } @@ -2138,9 +2326,9 @@ void exec_context_done(ExecContext *c) { c->pam_name = mfree(c->pam_name); - c->read_only_dirs = strv_free(c->read_only_dirs); - c->read_write_dirs = strv_free(c->read_write_dirs); - c->inaccessible_dirs = strv_free(c->inaccessible_dirs); + c->read_only_paths = strv_free(c->read_only_paths); + c->read_write_paths = strv_free(c->read_write_paths); + c->inaccessible_paths = strv_free(c->inaccessible_paths); if (c->cpuset) CPU_FREE(c->cpuset); @@ -2373,7 +2561,9 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { "%sPrivateDevices: %s\n" "%sProtectHome: %s\n" "%sProtectSystem: %s\n" - "%sIgnoreSIGPIPE: %s\n", + "%sIgnoreSIGPIPE: %s\n" + "%sMemoryDenyWriteExecute: %s\n" + "%sRestrictRealtime: %s\n", prefix, c->umask, prefix, c->working_directory ? c->working_directory : "/", prefix, c->root_directory ? c->root_directory : "/", @@ -2383,7 +2573,9 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { prefix, yes_no(c->private_devices), prefix, protect_home_to_string(c->protect_home), prefix, protect_system_to_string(c->protect_system), - prefix, yes_no(c->ignore_sigpipe)); + prefix, yes_no(c->ignore_sigpipe), + prefix, yes_no(c->memory_deny_write_execute), + prefix, yes_no(c->restrict_realtime)); STRV_FOREACH(e, c->environment) fprintf(f, "%sEnvironment: %s\n", prefix, *e); @@ -2542,21 +2734,21 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { if (c->pam_name) fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name); - if (strv_length(c->read_write_dirs) > 0) { - fprintf(f, "%sReadWriteDirs:", prefix); - strv_fprintf(f, c->read_write_dirs); + if (strv_length(c->read_write_paths) > 0) { + fprintf(f, "%sReadWritePaths:", prefix); + strv_fprintf(f, c->read_write_paths); fputs("\n", f); } - if (strv_length(c->read_only_dirs) > 0) { - fprintf(f, "%sReadOnlyDirs:", prefix); - strv_fprintf(f, c->read_only_dirs); + if (strv_length(c->read_only_paths) > 0) { + fprintf(f, "%sReadOnlyPaths:", prefix); + strv_fprintf(f, c->read_only_paths); fputs("\n", f); } - if (strv_length(c->inaccessible_dirs) > 0) { - fprintf(f, "%sInaccessibleDirs:", prefix); - strv_fprintf(f, c->inaccessible_dirs); + if (strv_length(c->inaccessible_paths) > 0) { + fprintf(f, "%sInaccessiblePaths:", prefix); + strv_fprintf(f, c->inaccessible_paths); fputs("\n", f); } @@ -2637,7 +2829,7 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { bool exec_context_maintains_privileges(ExecContext *c) { assert(c); - /* Returns true if the process forked off would run run under + /* Returns true if the process forked off would run under * an unchanged UID or as root. */ if (!c->user) @@ -2872,7 +3064,7 @@ int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) { return r; if (c->private_network && (*rt)->netns_storage_socket[0] < 0) { - if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0) + if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0) return -errno; } diff --git a/src/grp-system/libcore/execute.h b/src/grp-system/libcore/execute.h index 18d6f0e4a3..cf5e7e4617 100644 --- a/src/grp-system/libcore/execute.h +++ b/src/grp-system/libcore/execute.h @@ -24,9 +24,10 @@ #include <stdio.h> #include <sys/capability.h> -#include "basic/fdset.h" +#include "basic/cgroup-util.h" #include "basic/list.h" #include "basic/missing.h" +#include "shared/fdset.h" typedef struct ExecCommand ExecCommand; typedef struct ExecContext ExecContext; @@ -82,7 +83,8 @@ struct ExecCommand { char **argv; ExecStatus exec_status; LIST_FIELDS(ExecCommand, command); /* useful for chaining commands */ - bool ignore; + bool ignore:1; + bool privileged:1; }; struct ExecRuntime { @@ -130,7 +132,7 @@ struct ExecContext { bool ignore_sigpipe; - /* Since resolving these names might might involve socket + /* Since resolving these names might involve socket * connections and we don't want to deadlock ourselves these * names are resolved on execution only and in the child * process. */ @@ -152,7 +154,7 @@ struct ExecContext { bool smack_process_label_ignore; char *smack_process_label; - char **read_write_dirs, **read_only_dirs, **inaccessible_dirs; + char **read_write_paths, **read_only_paths, **inaccessible_paths; unsigned long mount_flags; uint64_t capability_bounding_set; @@ -193,6 +195,9 @@ struct ExecContext { char **runtime_directory; mode_t runtime_directory_mode; + bool memory_deny_write_execute; + bool restrict_realtime; + bool oom_score_adjust_set:1; bool nice_set:1; bool ioprio_set:1; @@ -200,10 +205,6 @@ struct ExecContext { bool no_new_privileges_set:1; }; -#include "basic/cgroup-util.h" - -#include "cgroup.h" - struct ExecParameters { char **argv; char **environment; @@ -234,6 +235,8 @@ struct ExecParameters { int stderr_fd; }; +#include "unit.h" + int exec_spawn(Unit *unit, ExecCommand *command, const ExecContext *context, diff --git a/src/grp-system/libcore/killall.c b/src/grp-system/libcore/killall.c index bc4d67ae21..27946f1f84 100644 --- a/src/grp-system/libcore/killall.c +++ b/src/grp-system/libcore/killall.c @@ -23,6 +23,7 @@ #include <unistd.h> #include "basic/alloc-util.h" +#include "basic/def.h" #include "basic/fd-util.h" #include "basic/formats-util.h" #include "basic/parse-util.h" @@ -34,8 +35,6 @@ #include "killall.h" -#define TIMEOUT_USEC (10 * USEC_PER_SEC) - static bool ignore_proc(pid_t pid, bool warn_rootfs) { _cleanup_fclose_ FILE *f = NULL; char c; @@ -81,7 +80,7 @@ static bool ignore_proc(pid_t pid, bool warn_rootfs) { get_process_comm(pid, &comm); if (r) - log_notice("Process " PID_FMT " (%s) has been been marked to be excluded from killing. It is " + log_notice("Process " PID_FMT " (%s) has been marked to be excluded from killing. It is " "running from the root file system, and thus likely to block re-mounting of the " "root file system to read-only. Please consider moving it into an initrd file " "system instead.", pid, strna(comm)); @@ -100,7 +99,7 @@ static void wait_for_children(Set *pids, sigset_t *mask) { if (set_isempty(pids)) return; - until = now(CLOCK_MONOTONIC) + TIMEOUT_USEC; + until = now(CLOCK_MONOTONIC) + DEFAULT_TIMEOUT_USEC; for (;;) { struct timespec ts; int k; diff --git a/src/grp-system/libcore/kmod-setup.c b/src/grp-system/libcore/kmod-setup.c index a6fa6a32fb..c91c280e7d 100644 --- a/src/grp-system/libcore/kmod-setup.c +++ b/src/grp-system/libcore/kmod-setup.c @@ -65,9 +65,6 @@ int kmod_setup(void) { /* this should never be a module */ { "unix", "/proc/net/unix", true, true, NULL }, - /* IPC is needed before we bring up any other services */ - { "kdbus", "/sys/fs/kdbus", false, false, is_kdbus_wanted }, - #ifdef HAVE_LIBIPTC /* netfilter is needed by networkd, nspawn among others, and cannot be autoloaded */ { "ip_tables", "/proc/net/ip_tables_names", false, false, NULL }, diff --git a/src/grp-system/libcore/load-fragment-gperf.gperf.m4 b/src/grp-system/libcore/load-fragment-gperf.gperf.m4 index c9f6e6acf7..ac4598f4c2 100644 --- a/src/grp-system/libcore/load-fragment-gperf.gperf.m4 +++ b/src/grp-system/libcore/load-fragment-gperf.gperf.m4 @@ -55,10 +55,14 @@ m4_ifdef(`HAVE_SECCOMP', `$1.SystemCallFilter, config_parse_syscall_filter, 0, offsetof($1, exec_context) $1.SystemCallArchitectures, config_parse_syscall_archs, 0, offsetof($1, exec_context.syscall_archs) $1.SystemCallErrorNumber, config_parse_syscall_errno, 0, offsetof($1, exec_context) +$1.MemoryDenyWriteExecute, config_parse_bool, 0, offsetof($1, exec_context.memory_deny_write_execute) +$1.RestrictRealtime, config_parse_bool, 0, offsetof($1, exec_context.restrict_realtime) $1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)', `$1.SystemCallFilter, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.SystemCallArchitectures, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.SystemCallErrorNumber, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 +$1.MemoryDenyWriteExecute, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 +$1.RestrictRealtime, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0') $1.LimitCPU, config_parse_limit, RLIMIT_CPU, offsetof($1, exec_context.rlimit) $1.LimitFSIZE, config_parse_limit, RLIMIT_FSIZE, offsetof($1, exec_context.rlimit) @@ -76,9 +80,12 @@ $1.LimitMSGQUEUE, config_parse_limit, RLIMIT_MSGQ $1.LimitNICE, config_parse_limit, RLIMIT_NICE, offsetof($1, exec_context.rlimit) $1.LimitRTPRIO, config_parse_limit, RLIMIT_RTPRIO, offsetof($1, exec_context.rlimit) $1.LimitRTTIME, config_parse_limit, RLIMIT_RTTIME, offsetof($1, exec_context.rlimit) -$1.ReadWriteDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_write_dirs) -$1.ReadOnlyDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_dirs) -$1.InaccessibleDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_dirs) +$1.ReadWriteDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_write_paths) +$1.ReadOnlyDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_paths) +$1.InaccessibleDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths) +$1.ReadWritePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_write_paths) +$1.ReadOnlyPaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_paths) +$1.InaccessiblePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths) $1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp) $1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network) $1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices) @@ -117,6 +124,9 @@ $1.CPUShares, config_parse_cpu_shares, 0, $1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares) $1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context) $1.MemoryAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.memory_accounting) +$1.MemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context) +$1.MemoryHigh, config_parse_memory_limit, 0, offsetof($1, cgroup_context) +$1.MemoryMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.MemoryLimit, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.DeviceAllow, config_parse_device_allow, 0, offsetof($1, cgroup_context) $1.DevicePolicy, config_parse_device_policy, 0, offsetof($1, cgroup_context.device_policy) diff --git a/src/grp-system/libcore/load-fragment.c b/src/grp-system/libcore/load-fragment.c index bb12b6cbfe..9a6ffc8bff 100644 --- a/src/grp-system/libcore/load-fragment.c +++ b/src/grp-system/libcore/load-fragment.c @@ -599,7 +599,7 @@ int config_parse_exec( p = rvalue; do { _cleanup_free_ char *path = NULL, *firstword = NULL; - bool separate_argv0 = false, ignore = false; + bool separate_argv0 = false, ignore = false, privileged = false; _cleanup_free_ ExecCommand *nce = NULL; _cleanup_strv_free_ char **n = NULL; size_t nlen = 0, nbufsize = 0; @@ -613,14 +613,18 @@ int config_parse_exec( return 0; f = firstword; - for (i = 0; i < 2; i++) { - /* We accept an absolute path as first argument, or - * alternatively an absolute prefixed with @ to allow - * overriding of argv[0]. */ + for (i = 0; i < 3; i++) { + /* We accept an absolute path as first argument. + * If it's prefixed with - and the path doesn't exist, + * we ignore it instead of erroring out; + * if it's prefixed with @, we allow overriding of argv[0]; + * and if it's prefixed with !, it will be run with full privileges */ if (*f == '-' && !ignore) ignore = true; else if (*f == '@' && !separate_argv0) separate_argv0 = true; + else if (*f == '+' && !privileged) + privileged = true; else break; f++; @@ -718,6 +722,7 @@ int config_parse_exec( nce->argv = n; nce->path = path; nce->ignore = ignore; + nce->privileged = privileged; exec_command_append_list(e, nce); @@ -2399,6 +2404,55 @@ int config_parse_documentation(const char *unit, } #ifdef HAVE_SECCOMP +static int syscall_filter_parse_one( + const char *unit, + const char *filename, + unsigned line, + ExecContext *c, + bool invert, + const char *t, + bool warn) { + int r; + + if (*t == '@') { + const SystemCallFilterSet *set; + + for (set = syscall_filter_sets; set->set_name; set++) + if (streq(set->set_name, t)) { + const char *sys; + + NULSTR_FOREACH(sys, set->value) { + r = syscall_filter_parse_one(unit, filename, line, c, invert, sys, false); + if (r < 0) + return r; + } + break; + } + } else { + int id; + + id = seccomp_syscall_resolve_name(t); + if (id == __NR_SCMP_ERROR) { + if (warn) + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t); + return 0; + } + + /* If we previously wanted to forbid a syscall and now + * we want to allow it, then remove it from the list + */ + if (!invert == c->syscall_whitelist) { + r = set_put(c->syscall_filter, INT_TO_PTR(id + 1)); + if (r == 0) + return 0; + if (r < 0) + return log_oom(); + } else + set_remove(c->syscall_filter, INT_TO_PTR(id + 1)); + } + return 0; +} + int config_parse_syscall_filter( const char *unit, const char *filename, @@ -2411,13 +2465,6 @@ int config_parse_syscall_filter( void *data, void *userdata) { - static const char default_syscalls[] = - "execve\0" - "exit\0" - "exit_group\0" - "rt_sigreturn\0" - "sigreturn\0"; - ExecContext *c = data; Unit *u = userdata; bool invert = false; @@ -2451,53 +2498,26 @@ int config_parse_syscall_filter( /* Allow everything but the ones listed */ c->syscall_whitelist = false; else { - const char *i; - /* Allow nothing but the ones listed */ c->syscall_whitelist = true; /* Accept default syscalls if we are on a whitelist */ - NULSTR_FOREACH(i, default_syscalls) { - int id; - - id = seccomp_syscall_resolve_name(i); - if (id < 0) - continue; - - r = set_put(c->syscall_filter, INT_TO_PTR(id + 1)); - if (r == 0) - continue; - if (r < 0) - return log_oom(); - } + r = syscall_filter_parse_one(unit, filename, line, c, false, "@default", false); + if (r < 0) + return r; } } FOREACH_WORD_QUOTED(word, l, rvalue, state) { _cleanup_free_ char *t = NULL; - int id; t = strndup(word, l); if (!t) return log_oom(); - id = seccomp_syscall_resolve_name(t); - if (id < 0) { - log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t); - continue; - } - - /* If we previously wanted to forbid a syscall and now - * we want to allow it, then remove it from the list - */ - if (!invert == c->syscall_whitelist) { - r = set_put(c->syscall_filter, INT_TO_PTR(id + 1)); - if (r == 0) - continue; - if (r < 0) - return log_oom(); - } else - set_remove(c->syscall_filter, INT_TO_PTR(id + 1)); + r = syscall_filter_parse_one(unit, filename, line, c, invert, t, true); + if (r < 0) + return r; } if (!isempty(state)) log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring."); @@ -2757,7 +2777,7 @@ int config_parse_cpu_quota( void *userdata) { CGroupContext *c = data; - double percent; + int r; assert(filename); assert(lvalue); @@ -2768,18 +2788,13 @@ int config_parse_cpu_quota( return 0; } - if (!endswith(rvalue, "%")) { - log_syntax(unit, LOG_ERR, filename, line, 0, "CPU quota '%s' not ending in '%%'. Ignoring.", rvalue); + r = parse_percent(rvalue); + if (r <= 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "CPU quota '%s' invalid. Ignoring.", rvalue); return 0; } - if (sscanf(rvalue, "%lf%%", &percent) != 1 || percent <= 0) { - log_syntax(unit, LOG_ERR, filename, line, 0, "CPU quota '%s' invalid. Ignoring.", rvalue); - return 0; - } - - c->cpu_quota_per_sec_usec = (usec_t) (percent * USEC_PER_SEC / 100); - + c->cpu_quota_per_sec_usec = ((usec_t) r * USEC_PER_SEC) / 100U; return 0; } @@ -2796,21 +2811,36 @@ int config_parse_memory_limit( void *userdata) { CGroupContext *c = data; - uint64_t bytes; + uint64_t bytes = CGROUP_LIMIT_MAX; int r; - if (isempty(rvalue) || streq(rvalue, "infinity")) { - c->memory_limit = (uint64_t) -1; - return 0; - } + if (!isempty(rvalue) && !streq(rvalue, "infinity")) { - r = parse_size(rvalue, 1024, &bytes); - if (r < 0 || bytes < 1) { - log_syntax(unit, LOG_ERR, filename, line, r, "Memory limit '%s' invalid. Ignoring.", rvalue); - return 0; + r = parse_percent(rvalue); + if (r < 0) { + r = parse_size(rvalue, 1024, &bytes); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Memory limit '%s' invalid. Ignoring.", rvalue); + return 0; + } + } else + bytes = physical_memory_scale(r, 100U); + + if (bytes <= 0 || bytes >= UINT64_MAX) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Memory limit '%s' out of range. Ignoring.", rvalue); + return 0; + } } - c->memory_limit = bytes; + if (streq(lvalue, "MemoryLow")) + c->memory_low = bytes; + else if (streq(lvalue, "MemoryHigh")) + c->memory_high = bytes; + else if (streq(lvalue, "MemoryMax")) + c->memory_max = bytes; + else + c->memory_limit = bytes; + return 0; } @@ -2834,9 +2864,18 @@ int config_parse_tasks_max( return 0; } - r = safe_atou64(rvalue, &u); - if (r < 0 || u < 1) { - log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue); + r = parse_percent(rvalue); + if (r < 0) { + r = safe_atou64(rvalue, &u); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue); + return 0; + } + } else + u = system_tasks_max_scale(r, 100U); + + if (u <= 0 || u >= UINT64_MAX) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range. Ignoring.", rvalue); return 0; } @@ -3063,7 +3102,7 @@ int config_parse_io_limit( return 0; } - if (streq("max", limit)) { + if (streq("infinity", limit)) { num = CGROUP_LIMIT_MAX; } else { r = parse_size(limit, 1000, &num); @@ -3567,7 +3606,7 @@ int config_parse_protect_home( assert(data); /* Our enum shall be a superset of booleans, hence first try - * to parse as as boolean, and then as enum */ + * to parse as boolean, and then as enum */ k = parse_boolean(rvalue); if (k > 0) @@ -3610,7 +3649,7 @@ int config_parse_protect_system( assert(data); /* Our enum shall be a superset of booleans, hence first try - * to parse as as boolean, and then as enum */ + * to parse as boolean, and then as enum */ k = parse_boolean(rvalue); if (k > 0) @@ -3726,7 +3765,7 @@ static int merge_by_names(Unit **u, Set *names, const char *id) { /* If the symlink name we are looking at is unit template, then we must search for instance of this template */ - if (unit_name_is_valid(k, UNIT_NAME_TEMPLATE)) { + if (unit_name_is_valid(k, UNIT_NAME_TEMPLATE) && (*u)->instance) { _cleanup_free_ char *instance = NULL; r = unit_name_replace_instance(k, (*u)->instance, &instance); @@ -3808,7 +3847,15 @@ static int load_from_path(Unit *u, const char *path) { if (r >= 0) break; filename = mfree(filename); - if (r != -ENOENT) + + /* ENOENT means that the file is missing or is a dangling symlink. + * ENOTDIR means that one of paths we expect to be is a directory + * is not a directory, we should just ignore that. + * EACCES means that the directory or file permissions are wrong. + */ + if (r == -EACCES) + log_debug_errno(r, "Cannot access \"%s\": %m", filename); + else if (!IN_SET(r, -ENOENT, -ENOTDIR)) return r; /* Empty the symlink names for the next run */ diff --git a/src/grp-system/libcore/machine-id-setup.c b/src/grp-system/libcore/machine-id-setup.c index 28722b46f0..0b65583686 100644 --- a/src/grp-system/libcore/machine-id-setup.c +++ b/src/grp-system/libcore/machine-id-setup.c @@ -17,11 +17,8 @@ along with systemd; If not, see <http://www.gnu.org/licenses/>. ***/ -#include <errno.h> #include <fcntl.h> #include <sched.h> -#include <stdio.h> -#include <string.h> #include <sys/mount.h> #include <unistd.h> @@ -29,10 +26,7 @@ #include "basic/alloc-util.h" #include "basic/fd-util.h" -#include "basic/fileio.h" #include "basic/fs-util.h" -#include "basic/hexdecoct.h" -#include "basic/io-util.h" #include "basic/log.h" #include "basic/macro.h" #include "basic/mkdir.h" @@ -44,104 +38,27 @@ #include "basic/umask-util.h" #include "basic/util.h" #include "basic/virt.h" +#include "sd-id128/id128-util.h" #include "machine-id-setup.h" -static int shorten_uuid(char destination[34], const char source[36]) { - unsigned i, j; - - assert(destination); - assert(source); - - /* Converts a UUID into a machine ID, by lowercasing it and - * removing dashes. Validates everything. */ - - for (i = 0, j = 0; i < 36 && j < 32; i++) { - int t; - - t = unhexchar(source[i]); - if (t < 0) - continue; - - destination[j++] = hexchar(t); - } - - if (i != 36 || j != 32) - return -EINVAL; - - destination[32] = '\n'; - destination[33] = 0; - return 0; -} - -static int read_machine_id(int fd, char id[34]) { - char id_to_validate[34]; - int r; - - assert(fd >= 0); - assert(id); - - /* Reads a machine ID from a file, validates it, and returns - * it. The returned ID ends in a newline. */ - - r = loop_read_exact(fd, id_to_validate, 33, false); - if (r < 0) - return r; - - if (id_to_validate[32] != '\n') - return -EINVAL; - - id_to_validate[32] = 0; - - if (!id128_is_valid(id_to_validate)) - return -EINVAL; - - memcpy(id, id_to_validate, 32); - id[32] = '\n'; - id[33] = 0; - return 0; -} - -static int write_machine_id(int fd, const char id[34]) { - int r; - - assert(fd >= 0); - assert(id); - - if (lseek(fd, 0, SEEK_SET) < 0) - return -errno; - - r = loop_write(fd, id, 33, false); - if (r < 0) - return r; - - if (fsync(fd) < 0) - return -errno; - - return 0; -} - -static int generate_machine_id(char id[34], const char *root) { - int fd, r; - unsigned char *p; - sd_id128_t buf; - char *q; +static int generate_machine_id(const char *root, sd_id128_t *ret) { const char *dbus_machine_id; + _cleanup_close_ int fd = -1; + int r; - assert(id); - - dbus_machine_id = prefix_roota(root, "/var/lib/dbus/machine-id"); + assert(ret); /* First, try reading the D-Bus machine id, unless it is a symlink */ + dbus_machine_id = prefix_roota(root, "/var/lib/dbus/machine-id"); fd = open(dbus_machine_id, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); if (fd >= 0) { - r = read_machine_id(fd, id); - safe_close(fd); - - if (r >= 0) { + if (id128_read_fd(fd, ID128_PLAIN, ret) >= 0) { log_info("Initializing machine ID from D-Bus machine ID."); return 0; } + + fd = safe_close(fd); } if (isempty(root)) { @@ -152,13 +69,10 @@ static int generate_machine_id(char id[34], const char *root) { if (detect_container() > 0) { _cleanup_free_ char *e = NULL; - r = getenv_for_pid(1, "container_uuid", &e); - if (r > 0) { - r = shorten_uuid(id, e); - if (r >= 0) { - log_info("Initializing machine ID from container UUID."); - return 0; - } + if (getenv_for_pid(1, "container_uuid", &e) > 0 && + sd_id128_from_string(e, ret) >= 0) { + log_info("Initializing machine ID from container UUID."); + return 0; } } else if (detect_vm() == VIRTUALIZATION_KVM) { @@ -167,51 +81,29 @@ static int generate_machine_id(char id[34], const char *root) { * running in qemu/kvm and a machine ID was passed in * via -uuid on the qemu/kvm command line */ - char uuid[36]; - - fd = open("/sys/class/dmi/id/product_uuid", O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); - if (fd >= 0) { - r = loop_read_exact(fd, uuid, 36, false); - safe_close(fd); - - if (r >= 0) { - r = shorten_uuid(id, uuid); - if (r >= 0) { - log_info("Initializing machine ID from KVM UUID."); - return 0; - } - } + if (id128_read("/sys/class/dmi/id/product_uuid", ID128_UUID, ret) >= 0) { + log_info("Initializing machine ID from KVM UUID."); + return 0; } } } /* If that didn't work, generate a random machine id */ - r = sd_id128_randomize(&buf); + r = sd_id128_randomize(ret); if (r < 0) - return log_error_errno(r, "Failed to open /dev/urandom: %m"); - - for (p = buf.bytes, q = id; p < buf.bytes + sizeof(buf); p++, q += 2) { - q[0] = hexchar(*p >> 4); - q[1] = hexchar(*p & 15); - } - - id[32] = '\n'; - id[33] = 0; + return log_error_errno(r, "Failed to generate randomized : %m"); log_info("Initializing machine ID from random generator."); - return 0; } -int machine_id_setup(const char *root, sd_id128_t machine_id) { +int machine_id_setup(const char *root, sd_id128_t machine_id, sd_id128_t *ret) { const char *etc_machine_id, *run_machine_id; _cleanup_close_ int fd = -1; - bool writable = true; - char id[34]; /* 32 + \n + \0 */ + bool writable; int r; etc_machine_id = prefix_roota(root, "/etc/machine-id"); - run_machine_id = prefix_roota(root, "/run/machine-id"); RUN_WITH_UMASK(0000) { /* We create this 0444, to indicate that this isn't really @@ -219,7 +111,7 @@ int machine_id_setup(const char *root, sd_id128_t machine_id) { * will be owned by root it doesn't matter much, but maybe * people look. */ - mkdir_parents(etc_machine_id, 0755); + (void) mkdir_parents(etc_machine_id, 0755); fd = open(etc_machine_id, O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY, 0444); if (fd < 0) { int old_errno = errno; @@ -240,41 +132,41 @@ int machine_id_setup(const char *root, sd_id128_t machine_id) { } writable = false; - } + } else + writable = true; } - /* A machine id argument overrides all other machined-ids */ - if (!sd_id128_is_null(machine_id)) { - sd_id128_to_string(machine_id, id); - id[32] = '\n'; - id[33] = 0; - } else { - if (read_machine_id(fd, id) >= 0) - return 0; + /* A we got a valid machine ID argument, that's what counts */ + if (sd_id128_is_null(machine_id)) { - /* Hmm, so, the id currently stored is not useful, then let's - * generate one */ + /* Try to read any existing machine ID */ + if (id128_read_fd(fd, ID128_PLAIN, ret) >= 0) + return 0; - r = generate_machine_id(id, root); + /* Hmm, so, the id currently stored is not useful, then let's generate one */ + r = generate_machine_id(root, &machine_id); if (r < 0) return r; + + if (lseek(fd, 0, SEEK_SET) == (off_t) -1) + return log_error_errno(errno, "Failed to seek: %m"); } if (writable) - if (write_machine_id(fd, id) >= 0) - return 0; + if (id128_write_fd(fd, ID128_PLAIN, machine_id, true) >= 0) + goto finish; fd = safe_close(fd); - /* Hmm, we couldn't write it? So let's write it to - * /run/machine-id as a replacement */ + /* Hmm, we couldn't write it? So let's write it to /run/machine-id as a replacement */ - RUN_WITH_UMASK(0022) { - r = write_string_file(run_machine_id, id, WRITE_STRING_FILE_CREATE); - if (r < 0) { - (void) unlink(run_machine_id); - return log_error_errno(r, "Cannot write %s: %m", run_machine_id); - } + run_machine_id = prefix_roota(root, "/run/machine-id"); + + RUN_WITH_UMASK(0022) + r = id128_write(run_machine_id, ID128_PLAIN, machine_id, false); + if (r < 0) { + (void) unlink(run_machine_id); + return log_error_errno(r, "Cannot write %s: %m", run_machine_id); } /* And now, let's mount it over */ @@ -287,7 +179,11 @@ int machine_id_setup(const char *root, sd_id128_t machine_id) { /* Mark the mount read-only */ if (mount(NULL, etc_machine_id, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, NULL) < 0) - log_warning_errno(errno, "Failed to make transient %s read-only: %m", etc_machine_id); + log_warning_errno(errno, "Failed to make transient %s read-only, ignoring: %m", etc_machine_id); + +finish: + if (ret) + *ret = machine_id; return 0; } @@ -295,16 +191,20 @@ int machine_id_setup(const char *root, sd_id128_t machine_id) { int machine_id_commit(const char *root) { _cleanup_close_ int fd = -1, initial_mntns_fd = -1; const char *etc_machine_id; - char id[34]; /* 32 + \n + \0 */ + sd_id128_t id; int r; + /* Replaces a tmpfs bind mount of /etc/machine-id by a proper file, atomically. For this, the umount is removed + * in a mount namespace, a new file is created at the right place. Afterwards the mount is also removed in the + * original mount namespace, thus revealing the file that was just created. */ + etc_machine_id = prefix_roota(root, "/etc/machine-id"); r = path_is_mount_point(etc_machine_id, 0); if (r < 0) return log_error_errno(r, "Failed to determine whether %s is a mount point: %m", etc_machine_id); if (r == 0) { - log_debug("%s is is not a mount point. Nothing to do.", etc_machine_id); + log_debug("%s is not a mount point. Nothing to do.", etc_machine_id); return 0; } @@ -313,10 +213,6 @@ int machine_id_commit(const char *root) { if (fd < 0) return log_error_errno(errno, "Cannot open %s: %m", etc_machine_id); - r = read_machine_id(fd, id); - if (r < 0) - return log_error_errno(r, "We didn't find a valid machine ID in %s.", etc_machine_id); - r = fd_is_temporary_fs(fd); if (r < 0) return log_error_errno(r, "Failed to determine whether %s is on a temporary file system: %m", etc_machine_id); @@ -325,6 +221,10 @@ int machine_id_commit(const char *root) { return -EROFS; } + r = id128_read_fd(fd, ID128_PLAIN, &id); + if (r < 0) + return log_error_errno(r, "We didn't find a valid machine ID in %s.", etc_machine_id); + fd = safe_close(fd); /* Store current mount namespace */ @@ -343,15 +243,9 @@ int machine_id_commit(const char *root) { return log_error_errno(errno, "Failed to unmount transient %s file in our private namespace: %m", etc_machine_id); /* Update a persistent version of etc_machine_id */ - fd = open(etc_machine_id, O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY, 0444); - if (fd < 0) - return log_error_errno(errno, "Cannot open for writing %s. This is mandatory to get a persistent machine-id: %m", etc_machine_id); - - r = write_machine_id(fd, id); + r = id128_write(etc_machine_id, ID128_PLAIN, id, true); if (r < 0) - return log_error_errno(r, "Cannot write %s: %m", etc_machine_id); - - fd = safe_close(fd); + return log_error_errno(r, "Cannot write %s. This is mandatory to get a persistent machine ID: %m", etc_machine_id); /* Return to initial namespace and proceed a lazy tmpfs unmount */ r = namespace_enter(-1, initial_mntns_fd, -1, -1, -1); diff --git a/src/grp-system/libcore/machine-id-setup.h b/src/grp-system/libcore/machine-id-setup.h index c5dd8d7790..88830ecc42 100644 --- a/src/grp-system/libcore/machine-id-setup.h +++ b/src/grp-system/libcore/machine-id-setup.h @@ -22,4 +22,4 @@ #include <systemd/sd-id128.h> int machine_id_commit(const char *root); -int machine_id_setup(const char *root, sd_id128_t machine_id); +int machine_id_setup(const char *root, sd_id128_t requested, sd_id128_t *ret); diff --git a/src/grp-system/libcore/manager.c b/src/grp-system/libcore/manager.c index 29dbf092d7..26395bc2b3 100644 --- a/src/grp-system/libcore/manager.c +++ b/src/grp-system/libcore/manager.c @@ -137,23 +137,28 @@ static void draw_cylon(char buffer[], size_t buflen, unsigned width, unsigned po if (pos > 1) { if (pos > 2) p = mempset(p, ' ', pos-2); - p = stpcpy(p, ANSI_RED); + if (log_get_show_color()) + p = stpcpy(p, ANSI_RED); *p++ = '*'; } if (pos > 0 && pos <= width) { - p = stpcpy(p, ANSI_HIGHLIGHT_RED); + if (log_get_show_color()) + p = stpcpy(p, ANSI_HIGHLIGHT_RED); *p++ = '*'; } - p = stpcpy(p, ANSI_NORMAL); + if (log_get_show_color()) + p = stpcpy(p, ANSI_NORMAL); if (pos < width) { - p = stpcpy(p, ANSI_RED); + if (log_get_show_color()) + p = stpcpy(p, ANSI_RED); *p++ = '*'; if (pos < width-1) p = mempset(p, ' ', width-1-pos); - strcpy(p, ANSI_NORMAL); + if (log_get_show_color()) + strcpy(p, ANSI_NORMAL); } } @@ -566,7 +571,7 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) { m->exit_code = _MANAGER_EXIT_CODE_INVALID; m->default_timer_accuracy_usec = USEC_PER_MINUTE; m->default_tasks_accounting = true; - m->default_tasks_max = UINT64_C(512); + m->default_tasks_max = UINT64_MAX; #ifdef ENABLE_EFI if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0) @@ -810,28 +815,6 @@ static int manager_setup_cgroups_agent(Manager *m) { return 0; } -static int manager_setup_kdbus(Manager *m) { - _cleanup_free_ char *p = NULL; - - assert(m); - - if (m->test_run || m->kdbus_fd >= 0) - return 0; - if (!is_kdbus_available()) - return -ESOCKTNOSUPPORT; - - m->kdbus_fd = bus_kernel_create_bus( - MANAGER_IS_SYSTEM(m) ? "system" : "user", - MANAGER_IS_SYSTEM(m), &p); - - if (m->kdbus_fd < 0) - return log_debug_errno(m->kdbus_fd, "Failed to set up kdbus: %m"); - - log_debug("Successfully set up kdbus on %s", p); - - return 0; -} - static int manager_connect_bus(Manager *m, bool reexecuting) { bool try_bus_connect; @@ -873,6 +856,19 @@ enum { _GC_OFFSET_MAX }; +static void unit_gc_mark_good(Unit *u, unsigned gc_marker) +{ + Iterator i; + Unit *other; + + u->gc_marker = gc_marker + GC_OFFSET_GOOD; + + /* Recursively mark referenced units as GOOD as well */ + SET_FOREACH(other, u->dependencies[UNIT_REFERENCES], i) + if (other->gc_marker == gc_marker + GC_OFFSET_UNSURE) + unit_gc_mark_good(other, gc_marker); +} + static void unit_gc_sweep(Unit *u, unsigned gc_marker) { Iterator i; Unit *other; @@ -882,6 +878,7 @@ static void unit_gc_sweep(Unit *u, unsigned gc_marker) { if (u->gc_marker == gc_marker + GC_OFFSET_GOOD || u->gc_marker == gc_marker + GC_OFFSET_BAD || + u->gc_marker == gc_marker + GC_OFFSET_UNSURE || u->gc_marker == gc_marker + GC_OFFSET_IN_PATH) return; @@ -922,7 +919,7 @@ bad: return; good: - u->gc_marker = gc_marker + GC_OFFSET_GOOD; + unit_gc_mark_good(u, gc_marker); } static unsigned manager_dispatch_gc_queue(Manager *m) { @@ -1226,7 +1223,6 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) { /* We might have deserialized the kdbus control fd, but if we * didn't, then let's create the bus now. */ - manager_setup_kdbus(m); manager_connect_bus(m, !!serialization); bus_track_coldplug(m, &m->subscribed, &m->deserialized_subscribed); @@ -1611,9 +1607,9 @@ static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const } static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) { + _cleanup_fdset_free_ FDSet *fds = NULL; Manager *m = userdata; - char buf[NOTIFY_BUFFER_MAX+1]; struct iovec iovec = { .iov_base = buf, @@ -1721,16 +1717,28 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t } static void invoke_sigchld_event(Manager *m, Unit *u, const siginfo_t *si) { + uint64_t iteration; + assert(m); assert(u); assert(si); + sd_event_get_iteration(m->event, &iteration); + log_unit_debug(u, "Child "PID_FMT" belongs to %s", si->si_pid, u->id); unit_unwatch_pid(u, si->si_pid); - if (UNIT_VTABLE(u)->sigchld_event) - UNIT_VTABLE(u)->sigchld_event(u, si->si_pid, si->si_code, si->si_status); + if (UNIT_VTABLE(u)->sigchld_event) { + if (set_size(u->pids) <= 1 || + iteration != u->sigchldgen || + unit_main_pid(u) == si->si_pid || + unit_control_pid(u) == si->si_pid) { + UNIT_VTABLE(u)->sigchld_event(u, si->si_pid, si->si_code, si->si_status); + u->sigchldgen = iteration; + } else + log_debug("%s already issued a sigchld this iteration %" PRIu64 ", skipping. Pids still being watched %d", u->id, iteration, set_size(u->pids)); + } } static int manager_dispatch_sigchld(Manager *m) { diff --git a/src/grp-system/libcore/manager.h b/src/grp-system/libcore/manager.h index fedf39ddcd..252919c27f 100644 --- a/src/grp-system/libcore/manager.h +++ b/src/grp-system/libcore/manager.h @@ -27,10 +27,10 @@ #include <systemd/sd-event.h> #include "basic/cgroup-util.h" -#include "basic/fdset.h" #include "basic/hashmap.h" #include "basic/list.h" #include "basic/ratelimit.h" +#include "shared/fdset.h" /* Enforce upper limit how many names we allow */ #define MANAGER_MAX_NAMES 131072 /* 128K */ diff --git a/src/grp-system/libcore/mount-setup.c b/src/grp-system/libcore/mount-setup.c index e90f0e918a..0de1c63b3e 100644 --- a/src/grp-system/libcore/mount-setup.c +++ b/src/grp-system/libcore/mount-setup.c @@ -25,6 +25,7 @@ #include "basic/alloc-util.h" #include "basic/cgroup-util.h" +#include "basic/fs-util.h" #include "basic/label.h" #include "basic/log.h" #include "basic/macro.h" @@ -109,8 +110,6 @@ static const MountPoint mount_table[] = { { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, is_efi_boot, MNT_NONE }, #endif - { "kdbusfs", "/sys/fs/kdbus", "kdbusfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, - is_kdbus_wanted, MNT_IN_CONTAINER }, }; /* These are API file systems that might be mounted by other software, @@ -406,9 +405,16 @@ int mount_setup(bool loaded_policy) { * really needs to stay for good, otherwise software that * copied sd-daemon.c into their sources will misdetect * systemd. */ - mkdir_label("/run/systemd", 0755); - mkdir_label("/run/systemd/system", 0755); - mkdir_label("/run/systemd/inaccessible", 0000); + (void) mkdir_label("/run/systemd", 0755); + (void) mkdir_label("/run/systemd/system", 0755); + (void) mkdir_label("/run/systemd/inaccessible", 0000); + /* Set up inaccessible items */ + (void) mknod("/run/systemd/inaccessible/reg", S_IFREG | 0000, 0); + (void) mkdir_label("/run/systemd/inaccessible/dir", 0000); + (void) mknod("/run/systemd/inaccessible/chr", S_IFCHR | 0000, makedev(0, 0)); + (void) mknod("/run/systemd/inaccessible/blk", S_IFBLK | 0000, makedev(0, 0)); + (void) mkfifo("/run/systemd/inaccessible/fifo", 0000); + (void) mknod("/run/systemd/inaccessible/sock", S_IFSOCK | 0000, 0); return 0; } diff --git a/src/grp-system/libcore/mount.c b/src/grp-system/libcore/mount.c index d4af6c65f6..bc5f29692d 100644 --- a/src/grp-system/libcore/mount.c +++ b/src/grp-system/libcore/mount.c @@ -1707,6 +1707,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, /* This has just been unmounted by * somebody else, follow the state * change. */ + mount->result = MOUNT_SUCCESS; /* make sure we forget any earlier umount failures */ mount_enter_dead(mount, MOUNT_SUCCESS); break; diff --git a/src/grp-system/libcore/namespace.c b/src/grp-system/libcore/namespace.c index db60336a60..f76a0a7fbf 100644 --- a/src/grp-system/libcore/namespace.c +++ b/src/grp-system/libcore/namespace.c @@ -280,6 +280,7 @@ static int apply_mount( const char *what; int r; + struct stat target; assert(m); @@ -289,12 +290,21 @@ static int apply_mount( /* First, get rid of everything that is below if there * is anything... Then, overmount it with an - * inaccessible directory. */ + * inaccessible path. */ umount_recursive(m->path, 0); - what = "/run/systemd/inaccessible"; - break; + if (lstat(m->path, &target) < 0) { + if (m->ignore && errno == ENOENT) + return 0; + return -errno; + } + what = mode_to_inaccessible_node(target.st_mode); + if (!what) { + log_debug("File type not supported for inaccessible mounts. Note that symlinks are not allowed"); + return -ELOOP; + } + break; case READONLY: case READWRITE: /* Nothing to mount here, we just later toggle the @@ -319,12 +329,14 @@ static int apply_mount( assert(what); r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL); - if (r >= 0) + if (r >= 0) { log_debug("Successfully mounted %s to %s", what, m->path); - else if (m->ignore && errno == ENOENT) - return 0; - - return r; + return r; + } else { + if (m->ignore && errno == ENOENT) + return 0; + return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, m->path); + } } static int make_read_only(BindMount *m) { @@ -337,7 +349,8 @@ static int make_read_only(BindMount *m) { else if (IN_SET(m->mode, READWRITE, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV)) { r = bind_remount_recursive(m->path, false); if (r == 0 && m->mode == PRIVATE_DEV) /* can be readonly but the submounts can't*/ - r = mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL); + if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0) + r = -errno; } else r = 0; @@ -349,9 +362,9 @@ static int make_read_only(BindMount *m) { int setup_namespace( const char* root_directory, - char** read_write_dirs, - char** read_only_dirs, - char** inaccessible_dirs, + char** read_write_paths, + char** read_only_paths, + char** inaccessible_paths, const char* tmp_dir, const char* var_tmp_dir, bool private_dev, @@ -370,9 +383,9 @@ int setup_namespace( return -errno; n = !!tmp_dir + !!var_tmp_dir + - strv_length(read_write_dirs) + - strv_length(read_only_dirs) + - strv_length(inaccessible_dirs) + + strv_length(read_write_paths) + + strv_length(read_only_paths) + + strv_length(inaccessible_paths) + private_dev + (protect_home != PROTECT_HOME_NO ? 3 : 0) + (protect_system != PROTECT_SYSTEM_NO ? 2 : 0) + @@ -380,15 +393,15 @@ int setup_namespace( if (n > 0) { m = mounts = (BindMount *) alloca0(n * sizeof(BindMount)); - r = append_mounts(&m, read_write_dirs, READWRITE); + r = append_mounts(&m, read_write_paths, READWRITE); if (r < 0) return r; - r = append_mounts(&m, read_only_dirs, READONLY); + r = append_mounts(&m, read_only_paths, READONLY); if (r < 0) return r; - r = append_mounts(&m, inaccessible_dirs, INACCESSIBLE); + r = append_mounts(&m, inaccessible_paths, INACCESSIBLE); if (r < 0) return r; @@ -631,7 +644,7 @@ int setup_netns(int netns_storage_socket[2]) { } fail: - lockf(netns_storage_socket[0], F_ULOCK, 0); + (void) lockf(netns_storage_socket[0], F_ULOCK, 0); return r; } diff --git a/src/grp-system/libcore/namespace.h b/src/grp-system/libcore/namespace.h index 03097327dd..1ae206efd1 100644 --- a/src/grp-system/libcore/namespace.h +++ b/src/grp-system/libcore/namespace.h @@ -40,9 +40,9 @@ typedef enum ProtectSystem { } ProtectSystem; int setup_namespace(const char *chroot, - char **read_write_dirs, - char **read_only_dirs, - char **inaccessible_dirs, + char **read_write_paths, + char **read_only_paths, + char **inaccessible_paths, const char *tmp_dir, const char *var_tmp_dir, bool private_dev, diff --git a/src/grp-system/libcore/scope.c b/src/grp-system/libcore/scope.c index c290529e14..8998f751f5 100644 --- a/src/grp-system/libcore/scope.c +++ b/src/grp-system/libcore/scope.c @@ -241,7 +241,7 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) { /* If we have a controller set let's ask the controller nicely * to terminate the scope, instead of us going directly into - * SIGTERM beserk mode */ + * SIGTERM berserk mode */ if (state == SCOPE_STOP_SIGTERM) skip_signal = bus_scope_send_request_stop(s) > 0; @@ -249,7 +249,9 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) { r = unit_kill_context( UNIT(s), &s->kill_context, - state != SCOPE_STOP_SIGTERM ? KILL_KILL : KILL_TERMINATE, + state != SCOPE_STOP_SIGTERM ? KILL_KILL : + s->was_abandoned ? KILL_TERMINATE_AND_LOG : + KILL_TERMINATE, -1, -1, false); if (r < 0) goto fail; @@ -370,6 +372,7 @@ static int scope_serialize(Unit *u, FILE *f, FDSet *fds) { assert(fds); unit_serialize_item(u, f, "state", scope_state_to_string(s->state)); + unit_serialize_item(u, f, "was-abandoned", yes_no(s->was_abandoned)); return 0; } @@ -390,6 +393,14 @@ static int scope_deserialize_item(Unit *u, const char *key, const char *value, F else s->deserialized_state = state; + } else if (streq(key, "was-abandoned")) { + int k; + + k = parse_boolean(value); + if (k < 0) + log_unit_debug(u, "Failed to parse boolean value: %s", value); + else + s->was_abandoned = k; } else log_unit_debug(u, "Unknown serialization key: %s", key); @@ -429,8 +440,9 @@ static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) { unit_tidy_watch_pids(u, 0, 0); unit_watch_all_pids(u); - /* If the PID set is empty now, then let's finish this off */ - if (set_isempty(u->pids)) + /* If the PID set is empty now, then let's finish this off + (On unified we use proper notifications) */ + if (cg_unified() <= 0 && set_isempty(u->pids)) scope_notify_cgroup_empty_event(u); } @@ -474,6 +486,7 @@ int scope_abandon(Scope *s) { if (!IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED)) return -ESTALE; + s->was_abandoned = true; s->controller = mfree(s->controller); /* The client is no longer watching the remaining processes, diff --git a/src/grp-system/libcore/scope.h b/src/grp-system/libcore/scope.h index f0cb3bd3e2..eaf8e8b447 100644 --- a/src/grp-system/libcore/scope.h +++ b/src/grp-system/libcore/scope.h @@ -21,6 +21,7 @@ typedef struct Scope Scope; +#include "cgroup.h" #include "kill.h" #include "unit.h" @@ -44,6 +45,7 @@ struct Scope { usec_t timeout_stop_usec; char *controller; + bool was_abandoned; sd_event_source *timer_event_source; }; diff --git a/src/grp-system/libcore/selinux-access.c b/src/grp-system/libcore/selinux-access.c index ce2ef2db1c..f6dbfa64b7 100644 --- a/src/grp-system/libcore/selinux-access.c +++ b/src/grp-system/libcore/selinux-access.c @@ -192,7 +192,7 @@ int mac_selinux_generic_access_check( const char *tclass = NULL, *scon = NULL; struct audit_info audit_info = {}; _cleanup_free_ char *cl = NULL; - security_context_t fcon = NULL; + char *fcon = NULL; char **cmdline = NULL; int r = 0; diff --git a/src/grp-system/libcore/selinux-setup.c b/src/grp-system/libcore/selinux-setup.c index 314d8edbaa..08c61af146 100644 --- a/src/grp-system/libcore/selinux-setup.c +++ b/src/grp-system/libcore/selinux-setup.c @@ -45,7 +45,7 @@ int mac_selinux_setup(bool *loaded_policy) { #ifdef HAVE_SELINUX int enforce = 0; usec_t before_load, after_load; - security_context_t con; + char *con; int r; union selinux_callback cb; bool initialized = false; diff --git a/src/grp-system/libcore/service.c b/src/grp-system/libcore/service.c index 9b7f7bd68c..43d195bbba 100644 --- a/src/grp-system/libcore/service.c +++ b/src/grp-system/libcore/service.c @@ -201,16 +201,27 @@ static void service_stop_watchdog(Service *s) { s->watchdog_timestamp = DUAL_TIMESTAMP_NULL; } +static usec_t service_get_watchdog_usec(Service *s) { + assert(s); + + if (s->watchdog_override_enable) + return s->watchdog_override_usec; + else + return s->watchdog_usec; +} + static void service_start_watchdog(Service *s) { int r; + usec_t watchdog_usec; assert(s); - if (s->watchdog_usec <= 0) + watchdog_usec = service_get_watchdog_usec(s); + if (watchdog_usec == 0 || watchdog_usec == USEC_INFINITY) return; if (s->watchdog_event_source) { - r = sd_event_source_set_time(s->watchdog_event_source, usec_add(s->watchdog_timestamp.monotonic, s->watchdog_usec)); + r = sd_event_source_set_time(s->watchdog_event_source, usec_add(s->watchdog_timestamp.monotonic, watchdog_usec)); if (r < 0) { log_unit_warning_errno(UNIT(s), r, "Failed to reset watchdog timer: %m"); return; @@ -222,7 +233,7 @@ static void service_start_watchdog(Service *s) { UNIT(s)->manager->event, &s->watchdog_event_source, CLOCK_MONOTONIC, - usec_add(s->watchdog_timestamp.monotonic, s->watchdog_usec), 0, + usec_add(s->watchdog_timestamp.monotonic, watchdog_usec), 0, service_dispatch_watchdog, s); if (r < 0) { log_unit_warning_errno(UNIT(s), r, "Failed to add watchdog timer: %m"); @@ -247,6 +258,17 @@ static void service_reset_watchdog(Service *s) { service_start_watchdog(s); } +static void service_reset_watchdog_timeout(Service *s, usec_t watchdog_override_usec) { + assert(s); + + s->watchdog_override_enable = true; + s->watchdog_override_usec = watchdog_override_usec; + service_reset_watchdog(s); + + log_unit_debug(UNIT(s), "watchdog_usec="USEC_FMT, s->watchdog_usec); + log_unit_debug(UNIT(s), "watchdog_override_usec="USEC_FMT, s->watchdog_override_usec); +} + static void service_fd_store_unlink(ServiceFDStore *fs) { if (!fs) @@ -575,20 +597,9 @@ static int service_setup_bus_name(Service *s) { if (!s->bus_name) return 0; - if (is_kdbus_available()) { - const char *n; - - n = strjoina(s->bus_name, ".busname"); - r = unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, n, NULL, true); - if (r < 0) - return log_unit_error_errno(UNIT(s), r, "Failed to add dependency to .busname unit: %m"); - - } else { - /* If kdbus is not available, we know the dbus socket is required, hence pull it in, and require it */ - r = unit_add_dependency_by_name(UNIT(s), UNIT_REQUIRES, SPECIAL_DBUS_SOCKET, NULL, true); - if (r < 0) - return log_unit_error_errno(UNIT(s), r, "Failed to add dependency on " SPECIAL_DBUS_SOCKET ": %m"); - } + r = unit_add_dependency_by_name(UNIT(s), UNIT_REQUIRES, SPECIAL_DBUS_SOCKET, NULL, true); + if (r < 0) + return log_unit_error_errno(UNIT(s), r, "Failed to add dependency on " SPECIAL_DBUS_SOCKET ": %m"); /* Regardless if kdbus is used or not, we always want to be ordered against dbus.socket if both are in the transaction. */ r = unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_DBUS_SOCKET, NULL, true); @@ -1664,7 +1675,7 @@ static void service_kill_control_processes(Service *s) { return; p = strjoina(UNIT(s)->cgroup_path, "/control"); - cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, p, SIGKILL, true, true, true, NULL); + cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, p, SIGKILL, CGROUP_SIGCONT|CGROUP_IGNORE_SELF|CGROUP_REMOVE, NULL, NULL, NULL); } static void service_enter_start(Service *s) { @@ -2004,6 +2015,9 @@ static int service_start(Unit *u) { s->notify_state = NOTIFY_UNKNOWN; + s->watchdog_override_enable = false; + s->watchdog_override_usec = 0; + service_enter_start_pre(s); return 1; } @@ -2135,6 +2149,9 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) { unit_serialize_item(u, f, "forbid-restart", yes_no(s->forbid_restart)); + if (s->watchdog_override_enable) + unit_serialize_item_format(u, f, "watchdog-override-usec", USEC_FMT, s->watchdog_override_usec); + return 0; } @@ -2329,6 +2346,14 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value, s->stderr_fd = fdset_remove(fds, fd); s->exec_context.stdio_as_fds = true; } + } else if (streq(key, "watchdog-override-usec")) { + usec_t watchdog_override_usec; + if (timestamp_deserialize(value, &watchdog_override_usec) < 0) + log_unit_debug(u, "Failed to parse watchdog_override_usec value: %s", value); + else { + s->watchdog_override_enable = true; + s->watchdog_override_usec = watchdog_override_usec; + } } else log_unit_debug(u, "Unknown serialization key: %s", key); @@ -2801,8 +2826,9 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { unit_tidy_watch_pids(u, s->main_pid, s->control_pid); unit_watch_all_pids(u); - /* If the PID set is empty now, then let's finish this off */ - if (set_isempty(u->pids)) + /* If the PID set is empty now, then let's finish this off + (On unified we use proper notifications) */ + if (cg_unified() <= 0 && set_isempty(u->pids)) service_notify_cgroup_empty_event(u); } @@ -2906,12 +2932,15 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void *userdata) { Service *s = SERVICE(userdata); char t[FORMAT_TIMESPAN_MAX]; + usec_t watchdog_usec; assert(s); assert(source == s->watchdog_event_source); + watchdog_usec = service_get_watchdog_usec(s); + log_unit_error(UNIT(s), "Watchdog timeout (limit %s)!", - format_timespan(t, sizeof(t), s->watchdog_usec, 1)); + format_timespan(t, sizeof(t), watchdog_usec, 1)); service_enter_signal(s, SERVICE_STOP_SIGABRT, SERVICE_FAILURE_WATCHDOG); @@ -3048,6 +3077,15 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags, FDSet *fds) service_add_fd_store_set(s, fds, name); } + e = strv_find_startswith(tags, "WATCHDOG_USEC="); + if (e) { + usec_t watchdog_override_usec; + if (safe_atou64(e, &watchdog_override_usec) < 0) + log_unit_warning(u, "Failed to parse WATCHDOG_USEC=%s", e); + else + service_reset_watchdog_timeout(s, watchdog_override_usec); + } + /* Notify clients about changed status or main pid */ if (notify_dbus) unit_add_to_dbus_queue(u); diff --git a/src/grp-system/libcore/service.h b/src/grp-system/libcore/service.h index aa7d1de8b6..4dcf5ecf78 100644 --- a/src/grp-system/libcore/service.h +++ b/src/grp-system/libcore/service.h @@ -122,6 +122,8 @@ struct Service { dual_timestamp watchdog_timestamp; usec_t watchdog_usec; + usec_t watchdog_override_usec; + bool watchdog_override_enable; sd_event_source *watchdog_event_source; ExecCommand* exec_command[_SERVICE_EXEC_COMMAND_MAX]; diff --git a/src/grp-system/libcore/socket.c b/src/grp-system/libcore/socket.c index 051cbdab8b..3e0b3e2e49 100644 --- a/src/grp-system/libcore/socket.c +++ b/src/grp-system/libcore/socket.c @@ -732,16 +732,16 @@ static int instance_from_socket(int fd, unsigned nr, char **instance) { case AF_INET: { uint32_t - a = ntohl(local.in.sin_addr.s_addr), - b = ntohl(remote.in.sin_addr.s_addr); + a = be32toh(local.in.sin_addr.s_addr), + b = be32toh(remote.in.sin_addr.s_addr); if (asprintf(&r, "%u-%u.%u.%u.%u:%u-%u.%u.%u.%u:%u", nr, a >> 24, (a >> 16) & 0xFF, (a >> 8) & 0xFF, a & 0xFF, - ntohs(local.in.sin_port), + be16toh(local.in.sin_port), b >> 24, (b >> 16) & 0xFF, (b >> 8) & 0xFF, b & 0xFF, - ntohs(remote.in.sin_port)) < 0) + be16toh(remote.in.sin_port)) < 0) return -ENOMEM; break; @@ -762,9 +762,9 @@ static int instance_from_socket(int fd, unsigned nr, char **instance) { "%u-%u.%u.%u.%u:%u-%u.%u.%u.%u:%u", nr, a[0], a[1], a[2], a[3], - ntohs(local.in6.sin6_port), + be16toh(local.in6.sin6_port), b[0], b[1], b[2], b[3], - ntohs(remote.in6.sin6_port)) < 0) + be16toh(remote.in6.sin6_port)) < 0) return -ENOMEM; } else { char a[INET6_ADDRSTRLEN], b[INET6_ADDRSTRLEN]; @@ -773,9 +773,9 @@ static int instance_from_socket(int fd, unsigned nr, char **instance) { "%u-%s:%u-%s:%u", nr, inet_ntop(AF_INET6, &local.in6.sin6_addr, a, sizeof(a)), - ntohs(local.in6.sin6_port), + be16toh(local.in6.sin6_port), inet_ntop(AF_INET6, &remote.in6.sin6_addr, b, sizeof(b)), - ntohs(remote.in6.sin6_port)) < 0) + be16toh(remote.in6.sin6_port)) < 0) return -ENOMEM; } diff --git a/src/grp-system/libcore/transaction.c b/src/grp-system/libcore/transaction.c index d19e19f978..aa57eee556 100644 --- a/src/grp-system/libcore/transaction.c +++ b/src/grp-system/libcore/transaction.c @@ -374,7 +374,7 @@ static int transaction_verify_order_one(Transaction *tr, Job *j, Job *from, unsi delete = NULL; for (k = from; k; k = ((k->generation == generation && k->marker != k) ? k->marker : NULL)) { - /* logging for j not k here here to provide consistent narrative */ + /* logging for j not k here to provide consistent narrative */ log_unit_warning(j->unit, "Found dependency on %s/%s", k->unit->id, job_type_to_string(k->type)); @@ -393,7 +393,7 @@ static int transaction_verify_order_one(Transaction *tr, Job *j, Job *from, unsi if (delete) { const char *status; - /* logging for j not k here here to provide consistent narrative */ + /* logging for j not k here to provide consistent narrative */ log_unit_warning(j->unit, "Breaking ordering cycle by deleting job %s/%s", delete->unit->id, job_type_to_string(delete->type)); @@ -592,6 +592,9 @@ static int transaction_apply(Transaction *tr, Manager *m, JobMode mode) { HASHMAP_FOREACH(j, m->jobs, i) { assert(j->installed); + if (j->unit->ignore_on_isolate) + continue; + if (hashmap_get(tr->jobs, j->unit)) continue; diff --git a/src/grp-system/libcore/unit.c b/src/grp-system/libcore/unit.c index f539c3971d..aff4dbd2ca 100644 --- a/src/grp-system/libcore/unit.c +++ b/src/grp-system/libcore/unit.c @@ -101,6 +101,7 @@ Unit *unit_new(Manager *m, size_t size) { u->on_failure_job_mode = JOB_REPLACE; u->cgroup_inotify_wd = -1; u->job_timeout = USEC_INFINITY; + u->sigchldgen = 0; RATELIMIT_INIT(u->start_limit, m->default_start_limit_interval, m->default_start_limit_burst); RATELIMIT_INIT(u->auto_stop_ratelimit, 10 * USEC_PER_SEC, 16); @@ -1683,7 +1684,7 @@ static void unit_check_unneeded(Unit *u) { if (unit_active_or_pending(other)) return; - /* If stopping a unit fails continously we might enter a stop + /* If stopping a unit fails continuously we might enter a stop * loop here, hence stop acting on the service being * unnecessary after a while. */ if (!ratelimit_test(&u->auto_stop_ratelimit)) { @@ -1728,7 +1729,7 @@ static void unit_check_binds_to(Unit *u) { if (!stop) return; - /* If stopping a unit fails continously we might enter a stop + /* If stopping a unit fails continuously we might enter a stop * loop here, hence stop acting on the service being * unnecessary after a while. */ if (!ratelimit_test(&u->auto_stop_ratelimit)) { @@ -3144,7 +3145,7 @@ int unit_kill_common( if (!pid_set) return -ENOMEM; - q = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, signo, false, false, false, pid_set); + q = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, signo, 0, pid_set, NULL, NULL); if (q < 0 && q != -EAGAIN && q != -ESRCH && q != -ENOENT) r = q; else @@ -3356,7 +3357,7 @@ static const char* unit_drop_in_dir(Unit *u, UnitSetPropertiesMode mode) { int unit_write_drop_in(Unit *u, UnitSetPropertiesMode mode, const char *name, const char *data) { _cleanup_free_ char *p = NULL, *q = NULL; - const char *dir, *prefixed; + const char *dir, *wrapped; int r; assert(u); @@ -3365,6 +3366,7 @@ int unit_write_drop_in(Unit *u, UnitSetPropertiesMode mode, const char *name, co /* When this is a transient unit file in creation, then let's not create a new drop-in but instead * write to the transient unit file. */ fputs(data, u->transient_file); + fputc('\n', u->transient_file); return 0; } @@ -3375,15 +3377,17 @@ int unit_write_drop_in(Unit *u, UnitSetPropertiesMode mode, const char *name, co if (!dir) return -EINVAL; - prefixed = strjoina("# This is a drop-in unit file extension, created via \"systemctl set-property\" or an equivalent operation. Do not edit.\n", - data); + wrapped = strjoina("# This is a drop-in unit file extension, created via \"systemctl set-property\"\n" + "# or an equivalent operation. Do not edit.\n", + data, + "\n"); r = drop_in_file(dir, u->id, 50, name, &p, &q); if (r < 0) return r; (void) mkdir_p(p, 0755); - r = write_string_file_atomic_label(q, prefixed); + r = write_string_file_atomic_label(q, wrapped); if (r < 0) return r; @@ -3502,7 +3506,6 @@ int unit_make_transient(Unit *u) { unit_add_to_dbus_queue(u); unit_add_to_gc_queue(u); - unit_add_to_load_queue(u); fputs("# This is a transient unit file, created programmatically via the systemd API. Do not edit.\n", u->transient_file); @@ -3510,6 +3513,43 @@ int unit_make_transient(Unit *u) { return 0; } +static void log_kill(pid_t pid, int sig, void *userdata) { + _cleanup_free_ char *comm = NULL; + + (void) get_process_comm(pid, &comm); + + /* Don't log about processes marked with brackets, under the assumption that these are temporary processes + only, like for example systemd's own PAM stub process. */ + if (comm && comm[0] == '(') + return; + + log_unit_notice(userdata, + "Killing process " PID_FMT " (%s) with signal SIG%s.", + pid, + strna(comm), + signal_to_string(sig)); +} + +static int operation_to_signal(KillContext *c, KillOperation k) { + assert(c); + + switch (k) { + + case KILL_TERMINATE: + case KILL_TERMINATE_AND_LOG: + return c->kill_signal; + + case KILL_KILL: + return SIGKILL; + + case KILL_ABORT: + return SIGABRT; + + default: + assert_not_reached("KillOperation unknown"); + } +} + int unit_kill_context( Unit *u, KillContext *c, @@ -3518,58 +3558,63 @@ int unit_kill_context( pid_t control_pid, bool main_pid_alien) { - bool wait_for_exit = false; + bool wait_for_exit = false, send_sighup; + cg_kill_log_func_t log_func; int sig, r; assert(u); assert(c); + /* Kill the processes belonging to this unit, in preparation for shutting the unit down. Returns > 0 if we + * killed something worth waiting for, 0 otherwise. */ + if (c->kill_mode == KILL_NONE) return 0; - switch (k) { - case KILL_KILL: - sig = SIGKILL; - break; - case KILL_ABORT: - sig = SIGABRT; - break; - case KILL_TERMINATE: - sig = c->kill_signal; - break; - default: - assert_not_reached("KillOperation unknown"); - } + sig = operation_to_signal(c, k); + + send_sighup = + c->send_sighup && + IN_SET(k, KILL_TERMINATE, KILL_TERMINATE_AND_LOG) && + sig != SIGHUP; + + log_func = + k != KILL_TERMINATE || + IN_SET(sig, SIGKILL, SIGABRT) ? log_kill : NULL; if (main_pid > 0) { - r = kill_and_sigcont(main_pid, sig); + if (log_func) + log_func(main_pid, sig, u); + r = kill_and_sigcont(main_pid, sig); if (r < 0 && r != -ESRCH) { _cleanup_free_ char *comm = NULL; - get_process_comm(main_pid, &comm); + (void) get_process_comm(main_pid, &comm); log_unit_warning_errno(u, r, "Failed to kill main process " PID_FMT " (%s), ignoring: %m", main_pid, strna(comm)); } else { if (!main_pid_alien) wait_for_exit = true; - if (c->send_sighup && k == KILL_TERMINATE) + if (r != -ESRCH && send_sighup) (void) kill(main_pid, SIGHUP); } } if (control_pid > 0) { - r = kill_and_sigcont(control_pid, sig); + if (log_func) + log_func(control_pid, sig, u); + r = kill_and_sigcont(control_pid, sig); if (r < 0 && r != -ESRCH) { _cleanup_free_ char *comm = NULL; - get_process_comm(control_pid, &comm); + (void) get_process_comm(control_pid, &comm); log_unit_warning_errno(u, r, "Failed to kill control process " PID_FMT " (%s), ignoring: %m", control_pid, strna(comm)); } else { wait_for_exit = true; - if (c->send_sighup && k == KILL_TERMINATE) + if (r != -ESRCH && send_sighup) (void) kill(control_pid, SIGHUP); } } @@ -3583,7 +3628,11 @@ int unit_kill_context( if (!pid_set) return -ENOMEM; - r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, sig, true, k != KILL_TERMINATE, false, pid_set); + r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, + sig, + CGROUP_SIGCONT|CGROUP_IGNORE_SELF, + pid_set, + log_func, u); if (r < 0) { if (r != -EAGAIN && r != -ESRCH && r != -ENOENT) log_unit_warning_errno(u, r, "Failed to kill control group %s, ignoring: %m", u->cgroup_path); @@ -3608,14 +3657,18 @@ int unit_kill_context( (detect_container() == 0 && !unit_cgroup_delegate(u))) wait_for_exit = true; - if (c->send_sighup && k != KILL_KILL) { + if (send_sighup) { set_free(pid_set); pid_set = unit_pid_set(main_pid, control_pid); if (!pid_set) return -ENOMEM; - cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, SIGHUP, false, true, false, pid_set); + cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, + SIGHUP, + CGROUP_IGNORE_SELF, + pid_set, + NULL, NULL); } } } @@ -3788,7 +3841,7 @@ bool unit_is_pristine(Unit *u) { /* Check if the unit already exists or is already around, * in a number of different ways. Note that to cater for unit * types such as slice, we are generally fine with units that - * are marked UNIT_LOADED even even though nothing was + * are marked UNIT_LOADED even though nothing was * actually loaded, as those unit types don't require a file * on disk to validly load. */ diff --git a/src/grp-system/libcore/unit.h b/src/grp-system/libcore/unit.h index 72ceed1fef..3e25bfd32a 100644 --- a/src/grp-system/libcore/unit.h +++ b/src/grp-system/libcore/unit.h @@ -37,6 +37,7 @@ typedef struct UnitVTable UnitVTable; typedef enum KillOperation { KILL_TERMINATE, + KILL_TERMINATE_AND_LOG, KILL_KILL, KILL_ABORT, _KILL_OPERATION_MAX, @@ -163,6 +164,9 @@ struct Unit { * process SIGCHLD for */ Set *pids; + /* Used in sigchld event invocation to avoid repeat events being invoked */ + uint64_t sigchldgen; + /* Used during GC sweeps */ unsigned gc_marker; |