diff options
author | Luke Shumaker <lukeshu@sbcglobal.net> | 2016-12-17 00:42:49 -0500 |
---|---|---|
committer | Luke Shumaker <lukeshu@sbcglobal.net> | 2016-12-17 00:42:49 -0500 |
commit | ccef261c077d31dce02aa92e519b23b3a2a58303 (patch) | |
tree | 582fafc98c76e98e912f453fb55ca57f29273d57 /src/core | |
parent | 5d4922bba91c6d60b3b9f38fb29fda0f6ba8338d (diff) | |
parent | 5fb2a20a29c2cc0494d5a31e175a8e3ff0b2d3e2 (diff) |
Merge tag 'systemd/v232-4.parabola1' into systemd/parabola
Diffstat (limited to 'src/core')
56 files changed, 5659 insertions, 1270 deletions
diff --git a/src/core/automount.c b/src/core/automount.c index 4e9891569c..7d7a0a6e46 100644 --- a/src/core/automount.c +++ b/src/core/automount.c @@ -271,6 +271,11 @@ static int automount_coldplug(Unit *u) { return r; (void) sd_event_source_set_description(a->pipe_event_source, "automount-io"); + if (a->deserialized_state == AUTOMOUNT_RUNNING) { + r = automount_start_expire(a); + if (r < 0) + log_unit_warning_errno(UNIT(a), r, "Failed to start expiration timer, ignoring: %m"); + } } automount_set_state(a, a->deserialized_state); @@ -301,7 +306,7 @@ static void automount_dump(Unit *u, FILE *f, const char *prefix) { static void automount_enter_dead(Automount *a, AutomountResult f) { assert(a); - if (f != AUTOMOUNT_SUCCESS) + if (a->result == AUTOMOUNT_SUCCESS) a->result = f; automount_set_state(a, a->result != AUTOMOUNT_SUCCESS ? AUTOMOUNT_FAILED : AUTOMOUNT_DEAD); @@ -795,6 +800,10 @@ static int automount_start(Unit *u) { return r; } + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + a->result = AUTOMOUNT_SUCCESS; automount_enter_waiting(a); return 1; @@ -1105,6 +1114,9 @@ const UnitVTable automount_vtable = { .reset_failed = automount_reset_failed, .bus_vtable = bus_automount_vtable, + .bus_set_property = bus_automount_set_property, + + .can_transient = true, .shutdown = automount_shutdown, .supported = automount_supported, diff --git a/src/core/busname.c b/src/core/busname.c index 730be2ee14..b96ec09e67 100644 --- a/src/core/busname.c +++ b/src/core/busname.c @@ -442,7 +442,7 @@ fail: static void busname_enter_dead(BusName *n, BusNameResult f) { assert(n); - if (f != BUSNAME_SUCCESS) + if (n->result == BUSNAME_SUCCESS) n->result = f; busname_set_state(n, n->result != BUSNAME_SUCCESS ? BUSNAME_FAILED : BUSNAME_DEAD); @@ -454,7 +454,7 @@ static void busname_enter_signal(BusName *n, BusNameState state, BusNameResult f assert(n); - if (f != BUSNAME_SUCCESS) + if (n->result == BUSNAME_SUCCESS) n->result = f; kill_context_init(&kill_context); @@ -639,6 +639,10 @@ static int busname_start(Unit *u) { return r; } + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + n->result = BUSNAME_SUCCESS; busname_enter_making(n); @@ -868,7 +872,7 @@ static void busname_sigchld_event(Unit *u, pid_t pid, int code, int status) { n->control_pid = 0; - if (is_clean_exit(code, status, NULL)) + if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL)) f = BUSNAME_SUCCESS; else if (code == CLD_EXITED) f = BUSNAME_FAILURE_EXIT_CODE; @@ -882,7 +886,7 @@ static void busname_sigchld_event(Unit *u, pid_t pid, int code, int status) { log_unit_full(u, f == BUSNAME_SUCCESS ? LOG_DEBUG : LOG_NOTICE, 0, "Control process exited, code=%s status=%i", sigchld_code_to_string(code), status); - if (f != BUSNAME_SUCCESS) + if (n->result == BUSNAME_SUCCESS) n->result = f; switch (n->state) { diff --git a/src/core/cgroup.c b/src/core/cgroup.c index c19e43f571..23a92f9651 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -57,12 +57,16 @@ void cgroup_context_init(CGroupContext *c) { /* Initialize everything to the kernel defaults, assuming the * structure is preinitialized to 0 */ + c->cpu_weight = CGROUP_WEIGHT_INVALID; + c->startup_cpu_weight = CGROUP_WEIGHT_INVALID; + c->cpu_quota_per_sec_usec = USEC_INFINITY; + c->cpu_shares = CGROUP_CPU_SHARES_INVALID; c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID; - c->cpu_quota_per_sec_usec = USEC_INFINITY; c->memory_high = CGROUP_LIMIT_MAX; c->memory_max = CGROUP_LIMIT_MAX; + c->memory_swap_max = CGROUP_LIMIT_MAX; c->memory_limit = CGROUP_LIMIT_MAX; @@ -158,6 +162,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { "%sBlockIOAccounting=%s\n" "%sMemoryAccounting=%s\n" "%sTasksAccounting=%s\n" + "%sCPUWeight=%" PRIu64 "\n" + "%sStartupCPUWeight=%" PRIu64 "\n" "%sCPUShares=%" PRIu64 "\n" "%sStartupCPUShares=%" PRIu64 "\n" "%sCPUQuotaPerSecSec=%s\n" @@ -168,6 +174,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { "%sMemoryLow=%" PRIu64 "\n" "%sMemoryHigh=%" PRIu64 "\n" "%sMemoryMax=%" PRIu64 "\n" + "%sMemorySwapMax=%" PRIu64 "\n" "%sMemoryLimit=%" PRIu64 "\n" "%sTasksMax=%" PRIu64 "\n" "%sDevicePolicy=%s\n" @@ -177,6 +184,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { prefix, yes_no(c->blockio_accounting), prefix, yes_no(c->memory_accounting), prefix, yes_no(c->tasks_accounting), + prefix, c->cpu_weight, + prefix, c->startup_cpu_weight, prefix, c->cpu_shares, prefix, c->startup_cpu_shares, prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1), @@ -187,6 +196,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { prefix, c->memory_low, prefix, c->memory_high, prefix, c->memory_max, + prefix, c->memory_swap_max, prefix, c->memory_limit, prefix, c->tasks_max, prefix, cgroup_device_policy_to_string(c->device_policy), @@ -382,6 +392,95 @@ fail: return -errno; } +static bool cgroup_context_has_cpu_weight(CGroupContext *c) { + return c->cpu_weight != CGROUP_WEIGHT_INVALID || + c->startup_cpu_weight != CGROUP_WEIGHT_INVALID; +} + +static bool cgroup_context_has_cpu_shares(CGroupContext *c) { + return c->cpu_shares != CGROUP_CPU_SHARES_INVALID || + c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID; +} + +static uint64_t cgroup_context_cpu_weight(CGroupContext *c, ManagerState state) { + if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && + c->startup_cpu_weight != CGROUP_WEIGHT_INVALID) + return c->startup_cpu_weight; + else if (c->cpu_weight != CGROUP_WEIGHT_INVALID) + return c->cpu_weight; + else + return CGROUP_WEIGHT_DEFAULT; +} + +static uint64_t cgroup_context_cpu_shares(CGroupContext *c, ManagerState state) { + if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && + c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID) + return c->startup_cpu_shares; + else if (c->cpu_shares != CGROUP_CPU_SHARES_INVALID) + return c->cpu_shares; + else + return CGROUP_CPU_SHARES_DEFAULT; +} + +static void cgroup_apply_unified_cpu_config(Unit *u, uint64_t weight, uint64_t quota) { + char buf[MAX(DECIMAL_STR_MAX(uint64_t) + 1, (DECIMAL_STR_MAX(usec_t) + 1) * 2)]; + int r; + + xsprintf(buf, "%" PRIu64 "\n", weight); + r = cg_set_attribute("cpu", u->cgroup_path, "cpu.weight", buf); + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.weight: %m"); + + if (quota != USEC_INFINITY) + xsprintf(buf, USEC_FMT " " USEC_FMT "\n", + quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC, CGROUP_CPU_QUOTA_PERIOD_USEC); + else + xsprintf(buf, "max " USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC); + + r = cg_set_attribute("cpu", u->cgroup_path, "cpu.max", buf); + + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.max: %m"); +} + +static void cgroup_apply_legacy_cpu_config(Unit *u, uint64_t shares, uint64_t quota) { + char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1]; + int r; + + xsprintf(buf, "%" PRIu64 "\n", shares); + r = cg_set_attribute("cpu", u->cgroup_path, "cpu.shares", buf); + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.shares: %m"); + + xsprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC); + r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_period_us", buf); + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.cfs_period_us: %m"); + + if (quota != USEC_INFINITY) { + xsprintf(buf, USEC_FMT "\n", quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC); + r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", buf); + } else + r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", "-1"); + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.cfs_quota_us: %m"); +} + +static uint64_t cgroup_cpu_shares_to_weight(uint64_t shares) { + return CLAMP(shares * CGROUP_WEIGHT_DEFAULT / CGROUP_CPU_SHARES_DEFAULT, + CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX); +} + +static uint64_t cgroup_cpu_weight_to_shares(uint64_t weight) { + return CLAMP(weight * CGROUP_CPU_SHARES_DEFAULT / CGROUP_WEIGHT_DEFAULT, + CGROUP_CPU_SHARES_MIN, CGROUP_CPU_SHARES_MAX); +} + static bool cgroup_context_has_io_config(CGroupContext *c) { return c->io_accounting || c->io_weight != CGROUP_WEIGHT_INVALID || @@ -521,7 +620,7 @@ static unsigned cgroup_apply_blkio_device_limit(Unit *u, const char *dev_path, u } static bool cgroup_context_has_unified_memory_config(CGroupContext *c) { - return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX; + return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX || c->memory_swap_max != CGROUP_LIMIT_MAX; } static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_t v) { @@ -566,30 +665,42 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { * and missing cgroups, i.e. EROFS and ENOENT. */ if ((mask & CGROUP_MASK_CPU) && !is_root) { - char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1]; + bool has_weight = cgroup_context_has_cpu_weight(c); + bool has_shares = cgroup_context_has_cpu_shares(c); - sprintf(buf, "%" PRIu64 "\n", - IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->startup_cpu_shares : - c->cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->cpu_shares : CGROUP_CPU_SHARES_DEFAULT); - r = cg_set_attribute("cpu", path, "cpu.shares", buf); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.shares: %m"); + if (cg_all_unified() > 0) { + uint64_t weight; - sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC); - r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.cfs_period_us: %m"); + if (has_weight) + weight = cgroup_context_cpu_weight(c, state); + else if (has_shares) { + uint64_t shares = cgroup_context_cpu_shares(c, state); - if (c->cpu_quota_per_sec_usec != USEC_INFINITY) { - sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC); - r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf); - } else - r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1"); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.cfs_quota_us: %m"); + weight = cgroup_cpu_shares_to_weight(shares); + + log_cgroup_compat(u, "Applying [Startup]CpuShares %" PRIu64 " as [Startup]CpuWeight %" PRIu64 " on %s", + shares, weight, path); + } else + weight = CGROUP_WEIGHT_DEFAULT; + + cgroup_apply_unified_cpu_config(u, weight, c->cpu_quota_per_sec_usec); + } else { + uint64_t shares; + + if (has_weight) { + uint64_t weight = cgroup_context_cpu_weight(c, state); + + shares = cgroup_cpu_weight_to_shares(weight); + + log_cgroup_compat(u, "Applying [Startup]CpuWeight %" PRIu64 " as [Startup]CpuShares %" PRIu64 " on %s", + weight, shares, path); + } else if (has_shares) + shares = cgroup_context_cpu_shares(c, state); + else + shares = CGROUP_CPU_SHARES_DEFAULT; + + cgroup_apply_legacy_cpu_config(u, shares, c->cpu_quota_per_sec_usec); + } } if (mask & CGROUP_MASK_IO) { @@ -677,16 +788,16 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { char buf[DECIMAL_STR_MAX(uint64_t)+1]; uint64_t weight; - if (has_blockio) - weight = cgroup_context_blkio_weight(c, state); - else if (has_io) { + if (has_io) { uint64_t io_weight = cgroup_context_io_weight(c, state); weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state)); log_cgroup_compat(u, "Applying [Startup]IOWeight %" PRIu64 " as [Startup]BlockIOWeight %" PRIu64, io_weight, weight); - } else + } else if (has_blockio) + weight = cgroup_context_blkio_weight(c, state); + else weight = CGROUP_BLKIO_WEIGHT_DEFAULT; xsprintf(buf, "%" PRIu64 "\n", weight); @@ -695,13 +806,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to set blkio.weight: %m"); - if (has_blockio) { - CGroupBlockIODeviceWeight *w; - - /* FIXME: no way to reset this list */ - LIST_FOREACH(device_weights, w, c->blockio_device_weights) - cgroup_apply_blkio_device_weight(u, w->path, w->weight); - } else if (has_io) { + if (has_io) { CGroupIODeviceWeight *w; /* FIXME: no way to reset this list */ @@ -713,18 +818,17 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { cgroup_apply_blkio_device_weight(u, w->path, weight); } + } else if (has_blockio) { + CGroupBlockIODeviceWeight *w; + + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_weights, w, c->blockio_device_weights) + cgroup_apply_blkio_device_weight(u, w->path, w->weight); } } /* Apply limits and free ones without config. */ - if (has_blockio) { - CGroupBlockIODeviceBandwidth *b, *next; - - LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) { - if (!cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps)) - cgroup_context_free_blockio_device_bandwidth(c, b); - } - } else if (has_io) { + if (has_io) { CGroupIODeviceLimit *l, *next; LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { @@ -734,16 +838,24 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { if (!cgroup_apply_blkio_device_limit(u, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX])) cgroup_context_free_io_device_limit(c, l); } + } else if (has_blockio) { + CGroupBlockIODeviceBandwidth *b, *next; + + LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) + if (!cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps)) + cgroup_context_free_blockio_device_bandwidth(c, b); } } if ((mask & CGROUP_MASK_MEMORY) && !is_root) { - if (cg_unified() > 0) { - uint64_t max = c->memory_max; + if (cg_all_unified() > 0) { + uint64_t max; + uint64_t swap_max = CGROUP_LIMIT_MAX; - if (cgroup_context_has_unified_memory_config(c)) + if (cgroup_context_has_unified_memory_config(c)) { max = c->memory_max; - else { + swap_max = c->memory_swap_max; + } else { max = c->memory_limit; if (max != CGROUP_LIMIT_MAX) @@ -753,16 +865,16 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { cgroup_apply_unified_memory_limit(u, "memory.low", c->memory_low); cgroup_apply_unified_memory_limit(u, "memory.high", c->memory_high); cgroup_apply_unified_memory_limit(u, "memory.max", max); + cgroup_apply_unified_memory_limit(u, "memory.swap.max", swap_max); } else { char buf[DECIMAL_STR_MAX(uint64_t) + 1]; - uint64_t val = c->memory_limit; + uint64_t val; - if (val == CGROUP_LIMIT_MAX) { + if (cgroup_context_has_unified_memory_config(c)) { val = c->memory_max; - - if (val != CGROUP_LIMIT_MAX) - log_cgroup_compat(u, "Applying MemoryMax %" PRIi64 " as MemoryLimit", c->memory_max); - } + log_cgroup_compat(u, "Applying MemoryMax %" PRIi64 " as MemoryLimit", val); + } else + val = c->memory_limit; if (val == CGROUP_LIMIT_MAX) strncpy(buf, "-1\n", sizeof(buf)); @@ -844,7 +956,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { if ((mask & CGROUP_MASK_PIDS) && !is_root) { - if (c->tasks_max != (uint64_t) -1) { + if (c->tasks_max != CGROUP_LIMIT_MAX) { char buf[DECIMAL_STR_MAX(uint64_t) + 2]; sprintf(buf, "%" PRIu64 "\n", c->tasks_max); @@ -864,8 +976,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) { /* Figure out which controllers we need */ if (c->cpu_accounting || - c->cpu_shares != CGROUP_CPU_SHARES_INVALID || - c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID || + cgroup_context_has_cpu_weight(c) || + cgroup_context_has_cpu_shares(c) || c->cpu_quota_per_sec_usec != USEC_INFINITY) mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU; @@ -911,7 +1023,7 @@ CGroupMask unit_get_own_mask(Unit *u) { e = unit_get_exec_context(u); if (!e || exec_context_maintains_privileges(e) || - cg_unified() > 0) + cg_all_unified() > 0) return _CGROUP_MASK_ALL; } @@ -1137,7 +1249,7 @@ int unit_watch_cgroup(Unit *u) { return 0; /* Only applies to the unified hierarchy */ - r = cg_unified(); + r = cg_unified(SYSTEMD_CGROUP_CONTROLLER); if (r < 0) return log_unit_error_errno(u, r, "Failed detect whether the unified hierarchy is used: %m"); if (r == 0) @@ -1247,6 +1359,26 @@ int unit_attach_pids_to_cgroup(Unit *u) { return 0; } +static void cgroup_xattr_apply(Unit *u) { + char ids[SD_ID128_STRING_MAX]; + int r; + + assert(u); + + if (!MANAGER_IS_SYSTEM(u->manager)) + return; + + if (sd_id128_is_null(u->invocation_id)) + return; + + r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, + "trusted.invocation_id", + sd_id128_to_string(u->invocation_id, ids), 32, + 0); + if (r < 0) + log_unit_warning_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path); +} + static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask enable_mask) { assert(u); @@ -1290,6 +1422,7 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) { /* Finally, apply the necessary attributes. */ cgroup_context_apply(u, target_mask, state); + cgroup_xattr_apply(u); return 0; } @@ -1416,6 +1549,8 @@ void unit_prune_cgroup(Unit *u) { if (!u->cgroup_path) return; + (void) unit_get_cpu_usage(u, NULL); /* Cache the last CPU usage value before we destroy the cgroup */ + is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE); r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice); @@ -1537,7 +1672,7 @@ int unit_watch_all_pids(Unit *u) { if (!u->cgroup_path) return -ENOENT; - if (cg_unified() > 0) /* On unified we can use proper notifications */ + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0) /* On unified we can use proper notifications */ return 0; return unit_watch_pids_in_path(u, u->cgroup_path); @@ -1610,7 +1745,7 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents, int manager_setup_cgroup(Manager *m) { _cleanup_free_ char *path = NULL; CGroupController c; - int r, unified; + int r, all_unified, systemd_unified; char *e; assert(m); @@ -1647,11 +1782,17 @@ int manager_setup_cgroup(Manager *m) { if (r < 0) return log_error_errno(r, "Cannot find cgroup mount point: %m"); - unified = cg_unified(); - if (unified < 0) - return log_error_errno(r, "Couldn't determine if we are running in the unified hierarchy: %m"); - if (unified > 0) + all_unified = cg_all_unified(); + systemd_unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER); + + if (all_unified < 0 || systemd_unified < 0) + return log_error_errno(all_unified < 0 ? all_unified : systemd_unified, + "Couldn't determine if we are running in the unified hierarchy: %m"); + + if (all_unified > 0) log_debug("Unified cgroup hierarchy is located at %s.", path); + else if (systemd_unified > 0) + log_debug("Unified cgroup hierarchy is located at %s. Controllers are on legacy hierarchies.", path); else log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path); @@ -1659,7 +1800,7 @@ int manager_setup_cgroup(Manager *m) { const char *scope_path; /* 3. Install agent */ - if (unified) { + if (systemd_unified) { /* In the unified hierarchy we can get * cgroup empty notifications via inotify. */ @@ -1719,7 +1860,7 @@ int manager_setup_cgroup(Manager *m) { return log_error_errno(errno, "Failed to open pin file: %m"); /* 6. Always enable hierarchical support if it exists... */ - if (!unified) + if (!all_unified) (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1"); } @@ -1845,7 +1986,7 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) { if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0) return -ENODATA; - if (cg_unified() <= 0) + if (cg_all_unified() <= 0) r = cg_get_attribute("memory", u->cgroup_path, "memory.usage_in_bytes", &v); else r = cg_get_attribute("memory", u->cgroup_path, "memory.current", &v); @@ -1890,18 +2031,37 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) { if (!u->cgroup_path) return -ENODATA; - if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0) - return -ENODATA; + if (cg_all_unified() > 0) { + const char *keys[] = { "usage_usec", NULL }; + _cleanup_free_ char *val = NULL; + uint64_t us; - r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v); - if (r == -ENOENT) - return -ENODATA; - if (r < 0) - return r; + if ((u->cgroup_realized_mask & CGROUP_MASK_CPU) == 0) + return -ENODATA; - r = safe_atou64(v, &ns); - if (r < 0) - return r; + r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", keys, &val); + if (r < 0) + return r; + + r = safe_atou64(val, &us); + if (r < 0) + return r; + + ns = us * NSEC_PER_USEC; + } else { + if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0) + return -ENODATA; + + r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v); + if (r == -ENOENT) + return -ENODATA; + if (r < 0) + return r; + + r = safe_atou64(v, &ns); + if (r < 0) + return r; + } *ret = ns; return 0; @@ -1911,16 +2071,33 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) { nsec_t ns; int r; + assert(u); + + /* Retrieve the current CPU usage counter. This will subtract the CPU counter taken when the unit was + * started. If the cgroup has been removed already, returns the last cached value. To cache the value, simply + * call this function with a NULL return value. */ + r = unit_get_cpu_usage_raw(u, &ns); + if (r == -ENODATA && u->cpu_usage_last != NSEC_INFINITY) { + /* If we can't get the CPU usage anymore (because the cgroup was already removed, for example), use our + * cached value. */ + + if (ret) + *ret = u->cpu_usage_last; + return 0; + } if (r < 0) return r; - if (ns > u->cpuacct_usage_base) - ns -= u->cpuacct_usage_base; + if (ns > u->cpu_usage_base) + ns -= u->cpu_usage_base; else ns = 0; - *ret = ns; + u->cpu_usage_last = ns; + if (ret) + *ret = ns; + return 0; } @@ -1930,13 +2107,15 @@ int unit_reset_cpu_usage(Unit *u) { assert(u); + u->cpu_usage_last = NSEC_INFINITY; + r = unit_get_cpu_usage_raw(u, &ns); if (r < 0) { - u->cpuacct_usage_base = 0; + u->cpu_usage_base = 0; return r; } - u->cpuacct_usage_base = ns; + u->cpu_usage_base = ns; return 0; } diff --git a/src/core/cgroup.h b/src/core/cgroup.h index a57403e79f..4cd168f63e 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -89,6 +89,10 @@ struct CGroupContext { bool tasks_accounting; /* For unified hierarchy */ + uint64_t cpu_weight; + uint64_t startup_cpu_weight; + usec_t cpu_quota_per_sec_usec; + uint64_t io_weight; uint64_t startup_io_weight; LIST_HEAD(CGroupIODeviceWeight, io_device_weights); @@ -97,11 +101,11 @@ struct CGroupContext { uint64_t memory_low; uint64_t memory_high; uint64_t memory_max; + uint64_t memory_swap_max; /* For legacy hierarchies */ uint64_t cpu_shares; uint64_t startup_cpu_shares; - usec_t cpu_quota_per_sec_usec; uint64_t blockio_weight; uint64_t startup_blockio_weight; diff --git a/src/core/dbus-automount.c b/src/core/dbus-automount.c index b2806ad86f..26212b3a95 100644 --- a/src/core/dbus-automount.c +++ b/src/core/dbus-automount.c @@ -32,3 +32,57 @@ const sd_bus_vtable bus_automount_vtable[] = { SD_BUS_PROPERTY("TimeoutIdleUSec", "t", bus_property_get_usec, offsetof(Automount, timeout_idle_usec), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_VTABLE_END }; + +static int bus_automount_set_transient_property( + Automount *a, + const char *name, + sd_bus_message *message, + UnitSetPropertiesMode mode, + sd_bus_error *error) { + + int r; + + assert(a); + assert(name); + assert(message); + + if (streq(name, "TimeoutIdleUSec")) { + usec_t timeout_idle_usec; + r = sd_bus_message_read(message, "t", &timeout_idle_usec); + if (r < 0) + return r; + + if (mode != UNIT_CHECK) { + char time[FORMAT_TIMESPAN_MAX]; + + a->timeout_idle_usec = timeout_idle_usec; + unit_write_drop_in_format(UNIT(a), mode, name, "[Automount]\nTimeoutIdleSec=%s\n", + format_timespan(time, sizeof(time), timeout_idle_usec, USEC_PER_MSEC)); + } + } else + return 0; + + return 1; +} + +int bus_automount_set_property( + Unit *u, + const char *name, + sd_bus_message *message, + UnitSetPropertiesMode mode, + sd_bus_error *error) { + + Automount *a = AUTOMOUNT(u); + int r = 0; + + assert(a); + assert(name); + assert(message); + + if (u->transient && u->load_state == UNIT_STUB) + /* This is a transient unit, let's load a little more */ + + r = bus_automount_set_transient_property(a, name, message, mode, error); + + return r; +} diff --git a/src/core/dbus-automount.h b/src/core/dbus-automount.h index 7b51eb973a..f41adda2a6 100644 --- a/src/core/dbus-automount.h +++ b/src/core/dbus-automount.h @@ -21,3 +21,5 @@ extern const sd_bus_vtable bus_automount_vtable[]; + +int bus_automount_set_property(Unit *u, const char *name, sd_bus_message *message, UnitSetPropertiesMode mode, sd_bus_error *error); diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index 85b0c86a2f..c4067a95bf 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -210,6 +210,8 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_VTABLE_START(0), SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0), SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0), + SD_BUS_PROPERTY("CPUWeight", "t", NULL, offsetof(CGroupContext, cpu_weight), 0), + SD_BUS_PROPERTY("StartupCPUWeight", "t", NULL, offsetof(CGroupContext, startup_cpu_weight), 0), SD_BUS_PROPERTY("CPUShares", "t", NULL, offsetof(CGroupContext, cpu_shares), 0), SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0), SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0), @@ -231,6 +233,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_PROPERTY("MemoryLow", "t", NULL, offsetof(CGroupContext, memory_low), 0), SD_BUS_PROPERTY("MemoryHigh", "t", NULL, offsetof(CGroupContext, memory_high), 0), SD_BUS_PROPERTY("MemoryMax", "t", NULL, offsetof(CGroupContext, memory_max), 0), + SD_BUS_PROPERTY("MemorySwapMax", "t", NULL, offsetof(CGroupContext, memory_swap_max), 0), SD_BUS_PROPERTY("MemoryLimit", "t", NULL, offsetof(CGroupContext, memory_limit), 0), SD_BUS_PROPERTY("DevicePolicy", "s", property_get_cgroup_device_policy, offsetof(CGroupContext, device_policy), 0), SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0), @@ -303,6 +306,50 @@ int bus_cgroup_set_property( return 1; + } else if (streq(name, "CPUWeight")) { + uint64_t weight; + + r = sd_bus_message_read(message, "t", &weight); + if (r < 0) + return r; + + if (!CGROUP_WEIGHT_IS_OK(weight)) + return sd_bus_error_set_errnof(error, EINVAL, "CPUWeight value out of range"); + + if (mode != UNIT_CHECK) { + c->cpu_weight = weight; + unit_invalidate_cgroup(u, CGROUP_MASK_CPU); + + if (weight == CGROUP_WEIGHT_INVALID) + unit_write_drop_in_private(u, mode, name, "CPUWeight="); + else + unit_write_drop_in_private_format(u, mode, name, "CPUWeight=%" PRIu64, weight); + } + + return 1; + + } else if (streq(name, "StartupCPUWeight")) { + uint64_t weight; + + r = sd_bus_message_read(message, "t", &weight); + if (r < 0) + return r; + + if (!CGROUP_WEIGHT_IS_OK(weight)) + return sd_bus_error_set_errnof(error, EINVAL, "StartupCPUWeight value out of range"); + + if (mode != UNIT_CHECK) { + c->startup_cpu_weight = weight; + unit_invalidate_cgroup(u, CGROUP_MASK_CPU); + + if (weight == CGROUP_CPU_SHARES_INVALID) + unit_write_drop_in_private(u, mode, name, "StartupCPUWeight="); + else + unit_write_drop_in_private_format(u, mode, name, "StartupCPUWeight=%" PRIu64, weight); + } + + return 1; + } else if (streq(name, "CPUShares")) { uint64_t shares; @@ -829,7 +876,7 @@ int bus_cgroup_set_property( return 1; - } else if (STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax")) { + } else if (STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax", "MemorySwapMax")) { uint64_t v; r = sd_bus_message_read(message, "t", &v); @@ -843,6 +890,8 @@ int bus_cgroup_set_property( c->memory_low = v; else if (streq(name, "MemoryHigh")) c->memory_high = v; + else if (streq(name, "MemorySwapMax")) + c->memory_swap_max = v; else c->memory_max = v; diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 307c3d8e7a..1a7f770db1 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -44,6 +44,7 @@ #endif #include "strv.h" #include "syslog-util.h" +#include "user-util.h" #include "utf8.h" BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_exec_output, exec_output, ExecOutput); @@ -626,6 +627,53 @@ static int property_get_syslog_facility( return sd_bus_message_append(reply, "i", LOG_FAC(c->syslog_priority)); } +static int property_get_input_fdname( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + ExecContext *c = userdata; + const char *name; + + assert(bus); + assert(c); + assert(property); + assert(reply); + + name = exec_context_fdname(c, STDIN_FILENO); + + return sd_bus_message_append(reply, "s", name); +} + +static int property_get_output_fdname( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + ExecContext *c = userdata; + const char *name = NULL; + + assert(bus); + assert(c); + assert(property); + assert(reply); + + if (c->std_output == EXEC_OUTPUT_NAMED_FD && streq(property, "StandardOutputFileDescriptorName")) + name = exec_context_fdname(c, STDOUT_FILENO); + else if (c->std_error == EXEC_OUTPUT_NAMED_FD && streq(property, "StandardErrorFileDescriptorName")) + name = exec_context_fdname(c, STDERR_FILENO); + + return sd_bus_message_append(reply, "s", name); +} + const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_VTABLE_START(0), SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(ExecContext, environment), SD_BUS_VTABLE_PROPERTY_CONST), @@ -676,8 +724,11 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("CPUSchedulingResetOnFork", "b", bus_property_get_bool, offsetof(ExecContext, cpu_sched_reset_on_fork), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("NonBlocking", "b", bus_property_get_bool, offsetof(ExecContext, non_blocking), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("StandardInput", "s", property_get_exec_input, offsetof(ExecContext, std_input), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("StandardInputFileDescriptorName", "s", property_get_input_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("StandardOutput", "s", bus_property_get_exec_output, offsetof(ExecContext, std_output), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("StandardOutputFileDescriptorName", "s", property_get_output_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("StandardError", "s", bus_property_get_exec_output, offsetof(ExecContext, std_error), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("StandardErrorFileDescriptorName", "s", property_get_output_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("TTYPath", "s", NULL, offsetof(ExecContext, tty_path), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("TTYReset", "b", bus_property_get_bool, offsetof(ExecContext, tty_reset), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("TTYVHangup", "b", bus_property_get_bool, offsetof(ExecContext, tty_vhangup), SD_BUS_VTABLE_PROPERTY_CONST), @@ -693,6 +744,8 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("AmbientCapabilities", "t", property_get_ambient_capabilities, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("User", "s", NULL, offsetof(ExecContext, user), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Group", "s", NULL, offsetof(ExecContext, group), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("DynamicUser", "b", bus_property_get_bool, offsetof(ExecContext, dynamic_user), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("RemoveIPC", "b", bus_property_get_bool, offsetof(ExecContext, remove_ipc), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SupplementaryGroups", "as", NULL, offsetof(ExecContext, supplementary_groups), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PAMName", "s", NULL, offsetof(ExecContext, pam_name), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ReadWriteDirectories", "as", NULL, offsetof(ExecContext, read_write_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), @@ -703,8 +756,12 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("InaccessiblePaths", "as", NULL, offsetof(ExecContext, inaccessible_paths), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_flags), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST), - SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ProtectKernelTunables", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_tunables), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ProtectKernelModules", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_modules), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ProtectHome", "s", bus_property_get_protect_home, offsetof(ExecContext, protect_home), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ProtectSystem", "s", bus_property_get_protect_system, offsetof(ExecContext, protect_system), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SameProcessGroup", "b", bus_property_get_bool, offsetof(ExecContext, same_pgrp), SD_BUS_VTABLE_PROPERTY_CONST), @@ -840,6 +897,9 @@ int bus_exec_context_set_transient_property( if (r < 0) return r; + if (!isempty(uu) && !valid_user_group_name_or_id(uu)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid user name: %s", uu); + if (mode != UNIT_CHECK) { if (isempty(uu)) @@ -859,6 +919,9 @@ int bus_exec_context_set_transient_property( if (r < 0) return r; + if (!isempty(gg) && !valid_user_group_name_or_id(gg)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid group name: %s", gg); + if (mode != UNIT_CHECK) { if (isempty(gg)) @@ -927,7 +990,7 @@ int bus_exec_context_set_transient_property( if (r < 0) return r; - if (n < PRIO_MIN || n >= PRIO_MAX) + if (!nice_is_valid(n)) return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Nice value out of range"); if (mode != UNIT_CHECK) { @@ -1017,7 +1080,6 @@ int bus_exec_context_set_transient_property( return 1; - } else if (streq(name, "StandardOutput")) { const char *s; ExecOutput p; @@ -1059,9 +1121,46 @@ int bus_exec_context_set_transient_property( return 1; } else if (STR_IN_SET(name, + "StandardInputFileDescriptorName", "StandardOutputFileDescriptorName", "StandardErrorFileDescriptorName")) { + const char *s; + + r = sd_bus_message_read(message, "s", &s); + if (r < 0) + return r; + + if (!fdname_is_valid(s)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid file descriptor name"); + + if (mode != UNIT_CHECK) { + if (streq(name, "StandardInputFileDescriptorName")) { + c->std_input = EXEC_INPUT_NAMED_FD; + r = free_and_strdup(&c->stdio_fdname[STDIN_FILENO], s); + if (r < 0) + return r; + unit_write_drop_in_private_format(u, mode, name, "StandardInput=fd:%s", s); + } else if (streq(name, "StandardOutputFileDescriptorName")) { + c->std_output = EXEC_OUTPUT_NAMED_FD; + r = free_and_strdup(&c->stdio_fdname[STDOUT_FILENO], s); + if (r < 0) + return r; + unit_write_drop_in_private_format(u, mode, name, "StandardOutput=fd:%s", s); + } else if (streq(name, "StandardErrorFileDescriptorName")) { + c->std_error = EXEC_OUTPUT_NAMED_FD; + r = free_and_strdup(&c->stdio_fdname[STDERR_FILENO], s); + if (r < 0) + return r; + unit_write_drop_in_private_format(u, mode, name, "StandardError=fd:%s", s); + } + } + + return 1; + + } else if (STR_IN_SET(name, "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", - "PrivateTmp", "PrivateDevices", "PrivateNetwork", - "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute", "RestrictRealtime")) { + "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", + "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute", + "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables", + "ProtectKernelModules", "ProtectControlGroups")) { int b; r = sd_bus_message_read(message, "b", &b); @@ -1081,6 +1180,8 @@ int bus_exec_context_set_transient_property( c->private_devices = b; else if (streq(name, "PrivateNetwork")) c->private_network = b; + else if (streq(name, "PrivateUsers")) + c->private_users = b; else if (streq(name, "NoNewPrivileges")) c->no_new_privileges = b; else if (streq(name, "SyslogLevelPrefix")) @@ -1089,6 +1190,16 @@ int bus_exec_context_set_transient_property( c->memory_deny_write_execute = b; else if (streq(name, "RestrictRealtime")) c->restrict_realtime = b; + else if (streq(name, "DynamicUser")) + c->dynamic_user = b; + else if (streq(name, "RemoveIPC")) + c->remove_ipc = b; + else if (streq(name, "ProtectKernelTunables")) + c->protect_kernel_tunables = b; + else if (streq(name, "ProtectKernelModules")) + c->protect_kernel_modules = b; + else if (streq(name, "ProtectControlGroups")) + c->protect_control_groups = b; unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, yes_no(b)); } diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index d05968bd65..d7d3d3c8ce 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -43,9 +43,15 @@ #include "string-util.h" #include "strv.h" #include "syslog-util.h" +#include "user-util.h" #include "virt.h" #include "watchdog.h" +static UnitFileFlags unit_file_bools_to_flags(bool runtime, bool force) { + return (runtime ? UNIT_FILE_RUNTIME : 0) | + (force ? UNIT_FILE_FORCE : 0); +} + static int property_get_version( sd_bus *bus, const char *path, @@ -463,6 +469,64 @@ static int method_get_unit_by_pid(sd_bus_message *message, void *userdata, sd_bu return sd_bus_reply_method_return(message, "o", path); } +static int method_get_unit_by_invocation_id(sd_bus_message *message, void *userdata, sd_bus_error *error) { + _cleanup_free_ char *path = NULL; + Manager *m = userdata; + sd_id128_t id; + const void *a; + Unit *u; + size_t sz; + int r; + + assert(message); + assert(m); + + /* Anyone can call this method */ + + r = sd_bus_message_read_array(message, 'y', &a, &sz); + if (r < 0) + return r; + if (sz == 0) + id = SD_ID128_NULL; + else if (sz == 16) + memcpy(&id, a, sz); + else + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid invocation ID"); + + if (sd_id128_is_null(id)) { + _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL; + pid_t pid; + + r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds); + if (r < 0) + return r; + + r = sd_bus_creds_get_pid(creds, &pid); + if (r < 0) + return r; + + u = manager_get_unit_by_pid(m, pid); + if (!u) + return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Client " PID_FMT " not member of any unit.", pid); + } else { + u = hashmap_get(m->units_by_invocation_id, &id); + if (!u) + return sd_bus_error_setf(error, BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID, "No unit with the specified invocation ID " SD_ID128_FORMAT_STR " known.", SD_ID128_FORMAT_VAL(id)); + } + + r = mac_selinux_unit_access_check(u, message, "status", error); + if (r < 0) + return r; + + /* So here's a special trick: the bus path we return actually references the unit by its invocation ID instead + * of the unit name. This means it stays valid only as long as the invocation ID stays the same. */ + path = unit_dbus_path_invocation_id(u); + if (!path) + return -ENOMEM; + + return sd_bus_reply_method_return(message, "o", path); +} + static int method_load_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) { _cleanup_free_ char *path = NULL; Manager *m = userdata; @@ -642,6 +706,54 @@ static int method_set_unit_properties(sd_bus_message *message, void *userdata, s return bus_unit_method_set_properties(message, u, error); } +static int method_ref_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) { + Manager *m = userdata; + const char *name; + Unit *u; + int r; + + assert(message); + assert(m); + + r = sd_bus_message_read(message, "s", &name); + if (r < 0) + return r; + + r = manager_load_unit(m, name, NULL, error, &u); + if (r < 0) + return r; + + r = bus_unit_check_load_state(u, error); + if (r < 0) + return r; + + return bus_unit_method_ref(message, u, error); +} + +static int method_unref_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) { + Manager *m = userdata; + const char *name; + Unit *u; + int r; + + assert(message); + assert(m); + + r = sd_bus_message_read(message, "s", &name); + if (r < 0) + return r; + + r = manager_load_unit(m, name, NULL, error, &u); + if (r < 0) + return r; + + r = bus_unit_check_load_state(u, error); + if (r < 0) + return r; + + return bus_unit_method_unref(message, u, error); +} + static int reply_unit_info(sd_bus_message *reply, Unit *u) { _cleanup_free_ char *unit_path = NULL, *job_path = NULL; Unit *following; @@ -780,6 +892,13 @@ static int transient_unit_from_message( if (r < 0) return r; + /* If the client asked for it, automatically add a reference to this unit. */ + if (u->bus_track_add) { + r = bus_unit_track_add_sender(u, message); + if (r < 0) + return log_error_errno(r, "Failed to watch sender: %m"); + } + /* Now load the missing bits of the unit we just created */ unit_add_to_load_queue(u); manager_dispatch_load_queue(m); @@ -1511,8 +1630,8 @@ static int method_unset_and_set_environment(sd_bus_message *message, void *userd } static int method_set_exit_code(sd_bus_message *message, void *userdata, sd_bus_error *error) { - uint8_t code; Manager *m = userdata; + uint8_t code; int r; assert(message); @@ -1534,6 +1653,61 @@ static int method_set_exit_code(sd_bus_message *message, void *userdata, sd_bus_ return sd_bus_reply_method_return(message, NULL); } +static int method_lookup_dynamic_user_by_name(sd_bus_message *message, void *userdata, sd_bus_error *error) { + Manager *m = userdata; + const char *name; + uid_t uid; + int r; + + assert(message); + assert(m); + + r = sd_bus_message_read_basic(message, 's', &name); + if (r < 0) + return r; + + if (!MANAGER_IS_SYSTEM(m)) + return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Dynamic users are only supported in the system instance."); + if (!valid_user_group_name(name)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "User name invalid: %s", name); + + r = dynamic_user_lookup_name(m, name, &uid); + if (r == -ESRCH) + return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_DYNAMIC_USER, "Dynamic user %s does not exist.", name); + if (r < 0) + return r; + + return sd_bus_reply_method_return(message, "u", (uint32_t) uid); +} + +static int method_lookup_dynamic_user_by_uid(sd_bus_message *message, void *userdata, sd_bus_error *error) { + _cleanup_free_ char *name = NULL; + Manager *m = userdata; + uid_t uid; + int r; + + assert(message); + assert(m); + + assert_cc(sizeof(uid) == sizeof(uint32_t)); + r = sd_bus_message_read_basic(message, 'u', &uid); + if (r < 0) + return r; + + if (!MANAGER_IS_SYSTEM(m)) + return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Dynamic users are only supported in the system instance."); + if (!uid_is_valid(uid)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "User ID invalid: " UID_FMT, uid); + + r = dynamic_user_lookup_uid(m, uid, &name); + if (r == -ESRCH) + return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_DYNAMIC_USER, "Dynamic user ID " UID_FMT " does not exist.", uid); + if (r < 0) + return r; + + return sd_bus_reply_method_return(message, "s", name); +} + static int list_unit_files_by_patterns(sd_bus_message *message, void *userdata, sd_bus_error *error, char **states, char **patterns) { _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; Manager *m = userdata; @@ -1779,13 +1953,14 @@ static int install_error( static int method_enable_unit_files_generic( sd_bus_message *message, Manager *m, - int (*call)(UnitFileScope scope, bool runtime, const char *root_dir, char *files[], bool force, UnitFileChange **changes, unsigned *n_changes), + int (*call)(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char *files[], UnitFileChange **changes, unsigned *n_changes), bool carries_install_info, sd_bus_error *error) { _cleanup_strv_free_ char **l = NULL; UnitFileChange *changes = NULL; unsigned n_changes = 0; + UnitFileFlags flags; int runtime, force, r; assert(message); @@ -1799,13 +1974,15 @@ static int method_enable_unit_files_generic( if (r < 0) return r; + flags = unit_file_bools_to_flags(runtime, force); + r = bus_verify_manage_unit_files_async(m, message, error); if (r < 0) return r; if (r == 0) return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */ - r = call(m->unit_file_scope, runtime, NULL, l, force, &changes, &n_changes); + r = call(m->unit_file_scope, flags, NULL, l, &changes, &n_changes); if (r < 0) return install_error(error, r, changes, n_changes); @@ -1824,8 +2001,8 @@ static int method_link_unit_files(sd_bus_message *message, void *userdata, sd_bu return method_enable_unit_files_generic(message, userdata, unit_file_link, false, error); } -static int unit_file_preset_without_mode(UnitFileScope scope, bool runtime, const char *root_dir, char **files, bool force, UnitFileChange **changes, unsigned *n_changes) { - return unit_file_preset(scope, runtime, root_dir, files, UNIT_FILE_PRESET_FULL, force, changes, n_changes); +static int unit_file_preset_without_mode(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char **files, UnitFileChange **changes, unsigned *n_changes) { + return unit_file_preset(scope, flags, root_dir, files, UNIT_FILE_PRESET_FULL, changes, n_changes); } static int method_preset_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) { @@ -1844,6 +2021,7 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use Manager *m = userdata; UnitFilePresetMode mm; int runtime, force, r; + UnitFileFlags flags; const char *mode; assert(message); @@ -1857,6 +2035,8 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use if (r < 0) return r; + flags = unit_file_bools_to_flags(runtime, force); + if (isempty(mode)) mm = UNIT_FILE_PRESET_FULL; else { @@ -1871,7 +2051,7 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use if (r == 0) return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */ - r = unit_file_preset(m->unit_file_scope, runtime, NULL, l, mm, force, &changes, &n_changes); + r = unit_file_preset(m->unit_file_scope, flags, NULL, l, mm, &changes, &n_changes); if (r < 0) return install_error(error, r, changes, n_changes); @@ -1881,7 +2061,7 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use static int method_disable_unit_files_generic( sd_bus_message *message, Manager *m, - int (*call)(UnitFileScope scope, bool runtime, const char *root_dir, char *files[], UnitFileChange **changes, unsigned *n_changes), + int (*call)(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char *files[], UnitFileChange **changes, unsigned *n_changes), sd_bus_error *error) { _cleanup_strv_free_ char **l = NULL; @@ -1906,7 +2086,7 @@ static int method_disable_unit_files_generic( if (r == 0) return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */ - r = call(m->unit_file_scope, runtime, NULL, l, &changes, &n_changes); + r = call(m->unit_file_scope, runtime ? UNIT_FILE_RUNTIME : 0, NULL, l, &changes, &n_changes); if (r < 0) return install_error(error, r, changes, n_changes); @@ -1972,7 +2152,7 @@ static int method_set_default_target(sd_bus_message *message, void *userdata, sd if (r == 0) return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */ - r = unit_file_set_default(m->unit_file_scope, NULL, name, force, &changes, &n_changes); + r = unit_file_set_default(m->unit_file_scope, force ? UNIT_FILE_FORCE : 0, NULL, name, &changes, &n_changes); if (r < 0) return install_error(error, r, changes, n_changes); @@ -1985,6 +2165,7 @@ static int method_preset_all_unit_files(sd_bus_message *message, void *userdata, Manager *m = userdata; UnitFilePresetMode mm; const char *mode; + UnitFileFlags flags; int force, runtime, r; assert(message); @@ -1998,6 +2179,8 @@ static int method_preset_all_unit_files(sd_bus_message *message, void *userdata, if (r < 0) return r; + flags = unit_file_bools_to_flags(runtime, force); + if (isempty(mode)) mm = UNIT_FILE_PRESET_FULL; else { @@ -2012,7 +2195,7 @@ static int method_preset_all_unit_files(sd_bus_message *message, void *userdata, if (r == 0) return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */ - r = unit_file_preset_all(m->unit_file_scope, runtime, NULL, mm, force, &changes, &n_changes); + r = unit_file_preset_all(m->unit_file_scope, flags, NULL, mm, &changes, &n_changes); if (r < 0) return install_error(error, r, changes, n_changes); @@ -2027,6 +2210,7 @@ static int method_add_dependency_unit_files(sd_bus_message *message, void *userd int runtime, force, r; char *target, *type; UnitDependency dep; + UnitFileFlags flags; assert(message); assert(m); @@ -2045,17 +2229,62 @@ static int method_add_dependency_unit_files(sd_bus_message *message, void *userd if (r < 0) return r; + flags = unit_file_bools_to_flags(runtime, force); + dep = unit_dependency_from_string(type); if (dep < 0) return -EINVAL; - r = unit_file_add_dependency(m->unit_file_scope, runtime, NULL, l, target, dep, force, &changes, &n_changes); + r = unit_file_add_dependency(m->unit_file_scope, flags, NULL, l, target, dep, &changes, &n_changes); if (r < 0) return install_error(error, r, changes, n_changes); return reply_unit_file_changes_and_free(m, message, -1, changes, n_changes); } +static int method_get_unit_file_links(sd_bus_message *message, void *userdata, sd_bus_error *error) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + UnitFileChange *changes = NULL; + unsigned n_changes = 0, i; + UnitFileFlags flags; + const char *name; + char **p; + int runtime, r; + + r = sd_bus_message_read(message, "sb", &name, &runtime); + if (r < 0) + return r; + + r = sd_bus_message_new_method_return(message, &reply); + if (r < 0) + return r; + + r = sd_bus_message_open_container(reply, SD_BUS_TYPE_ARRAY, "s"); + if (r < 0) + return r; + + p = STRV_MAKE(name); + flags = UNIT_FILE_DRY_RUN | + (runtime ? UNIT_FILE_RUNTIME : 0); + + r = unit_file_disable(UNIT_FILE_SYSTEM, flags, NULL, p, &changes, &n_changes); + if (r < 0) + return log_error_errno(r, "Failed to get file links for %s: %m", name); + + for (i = 0; i < n_changes; i++) + if (changes[i].type == UNIT_FILE_UNLINK) { + r = sd_bus_message_append(reply, "s", changes[i].path); + if (r < 0) + return r; + } + + r = sd_bus_message_close_container(reply); + if (r < 0) + return r; + + return sd_bus_send(NULL, reply, NULL); +} + const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_VTABLE_START(0), @@ -2143,6 +2372,7 @@ const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_METHOD("GetUnit", "s", "o", method_get_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("GetUnitByPID", "u", "o", method_get_unit_by_pid, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("GetUnitByInvocationID", "ay", "o", method_get_unit_by_invocation_id, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("LoadUnit", "s", "o", method_load_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("StartUnit", "ss", "o", method_start_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("StartUnitReplace", "sss", "o", method_start_unit_replace, SD_BUS_VTABLE_UNPRIVILEGED), @@ -2155,6 +2385,8 @@ const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_METHOD("KillUnit", "ssi", NULL, method_kill_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("ResetFailedUnit", "s", NULL, method_reset_failed_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("SetUnitProperties", "sba(sv)", NULL, method_set_unit_properties, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("RefUnit", "s", NULL, method_ref_unit, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("UnrefUnit", "s", NULL, method_unref_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("StartTransientUnit", "ssa(sv)a(sa(sv))", "o", method_start_transient_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("GetUnitProcesses", "s", "a(sus)", method_get_unit_processes, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("GetJob", "u", "o", method_get_job, SD_BUS_VTABLE_UNPRIVILEGED), @@ -2198,7 +2430,10 @@ const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_METHOD("GetDefaultTarget", NULL, "s", method_get_default_target, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("PresetAllUnitFiles", "sbb", "a(sss)", method_preset_all_unit_files, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("AddDependencyUnitFiles", "asssbb", "a(sss)", method_add_dependency_unit_files, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("GetUnitFileLinks", "sb", "as", method_get_unit_file_links, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("SetExitCode", "y", NULL, method_set_exit_code, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("LookupDynamicUserByName", "s", "u", method_lookup_dynamic_user_by_name, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("LookupDynamicUserByUID", "u", "s", method_lookup_dynamic_user_by_uid, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_SIGNAL("UnitNew", "so", 0), SD_BUS_SIGNAL("UnitRemoved", "so", 0), diff --git a/src/core/dbus-mount.c b/src/core/dbus-mount.c index 935db7c48b..76a7a7ce97 100644 --- a/src/core/dbus-mount.c +++ b/src/core/dbus-mount.c @@ -116,7 +116,11 @@ const sd_bus_vtable bus_mount_vtable[] = { SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Mount, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("DirectoryMode", "u", bus_property_get_mode, offsetof(Mount, directory_mode), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SloppyOptions", "b", bus_property_get_bool, offsetof(Mount, sloppy_options), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("LazyUnmount", "b", bus_property_get_bool, offsetof(Mount, lazy_unmount), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ForceUnmount", "b", bus_property_get_bool, offsetof(Mount, force_unmount), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Mount, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), BUS_EXEC_COMMAND_VTABLE("ExecMount", offsetof(Mount, exec_command[MOUNT_EXEC_MOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), BUS_EXEC_COMMAND_VTABLE("ExecUnmount", offsetof(Mount, exec_command[MOUNT_EXEC_UNMOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), BUS_EXEC_COMMAND_VTABLE("ExecRemount", offsetof(Mount, exec_command[MOUNT_EXEC_REMOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), @@ -157,6 +161,9 @@ static int bus_mount_set_transient_property( if (!p) return -ENOMEM; + unit_write_drop_in_format(UNIT(m), mode, name, "[Mount]\n%s=%s\n", + name, new_property); + free(*property); *property = p; } diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c index fab3677a01..61b83d2d62 100644 --- a/src/core/dbus-service.c +++ b/src/core/dbus-service.c @@ -36,7 +36,7 @@ static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, service_type, ServiceType static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, service_result, ServiceResult); static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_restart, service_restart, ServiceRestart); static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_notify_access, notify_access, NotifyAccess); -static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_failure_action, failure_action, FailureAction); +static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_emergency_action, emergency_action, EmergencyAction); const sd_bus_vtable bus_service_vtable[] = { SD_BUS_VTABLE_START(0), @@ -50,12 +50,7 @@ const sd_bus_vtable bus_service_vtable[] = { SD_BUS_PROPERTY("RuntimeMaxUSec", "t", bus_property_get_usec, offsetof(Service, runtime_max_usec), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("WatchdogUSec", "t", bus_property_get_usec, offsetof(Service, watchdog_usec), SD_BUS_VTABLE_PROPERTY_CONST), BUS_PROPERTY_DUAL_TIMESTAMP("WatchdogTimestamp", offsetof(Service, watchdog_timestamp), 0), - /* The following four are obsolete, and thus marked hidden here. They moved into the Unit interface */ - SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), - SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), - SD_BUS_PROPERTY("StartLimitAction", "s", property_get_failure_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), - SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), - SD_BUS_PROPERTY("FailureAction", "s", property_get_failure_action, offsetof(Service, failure_action), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("FailureAction", "s", property_get_emergency_action, offsetof(Service, emergency_action), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PermissionsStartOnly", "b", bus_property_get_bool, offsetof(Service, permissions_start_only), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RootDirectoryStartOnly", "b", bus_property_get_bool, offsetof(Service, root_directory_start_only), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RemainAfterExit", "b", bus_property_get_bool, offsetof(Service, remain_after_exit), SD_BUS_VTABLE_PROPERTY_CONST), @@ -70,6 +65,9 @@ const sd_bus_vtable bus_service_vtable[] = { SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Service, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("USBFunctionDescriptors", "s", NULL, offsetof(Service, usb_function_descriptors), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("USBFunctionStrings", "s", NULL, offsetof(Service, usb_function_strings), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + BUS_EXEC_STATUS_VTABLE("ExecMain", offsetof(Service, main_exec_status), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), BUS_EXEC_COMMAND_LIST_VTABLE("ExecStart", offsetof(Service, exec_command[SERVICE_EXEC_START]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), @@ -77,6 +75,12 @@ const sd_bus_vtable bus_service_vtable[] = { BUS_EXEC_COMMAND_LIST_VTABLE("ExecReload", offsetof(Service, exec_command[SERVICE_EXEC_RELOAD]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), BUS_EXEC_COMMAND_LIST_VTABLE("ExecStop", offsetof(Service, exec_command[SERVICE_EXEC_STOP]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), BUS_EXEC_COMMAND_LIST_VTABLE("ExecStopPost", offsetof(Service, exec_command[SERVICE_EXEC_STOP_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), + + /* The following four are obsolete, and thus marked hidden here. They moved into the Unit interface */ + SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("StartLimitAction", "s", property_get_emergency_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), SD_BUS_VTABLE_END }; diff --git a/src/core/dbus-socket.c b/src/core/dbus-socket.c index 961340608d..21adb64e15 100644 --- a/src/core/dbus-socket.c +++ b/src/core/dbus-socket.c @@ -137,6 +137,7 @@ const sd_bus_vtable bus_socket_vtable[] = { SD_BUS_PROPERTY("Symlinks", "as", NULL, offsetof(Socket, symlinks), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Mark", "i", bus_property_get_int, offsetof(Socket, mark), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("MaxConnections", "u", bus_property_get_unsigned, offsetof(Socket, max_connections), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("MaxConnectionsPerSource", "u", bus_property_get_unsigned, offsetof(Socket, max_connections_per_source), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("MessageQueueMaxMessages", "x", bus_property_get_long, offsetof(Socket, mq_maxmsg), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("MessageQueueMessageSize", "x", bus_property_get_long, offsetof(Socket, mq_msgsize), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ReusePort", "b", bus_property_get_bool, offsetof(Socket, reuse_port), SD_BUS_VTABLE_PROPERTY_CONST), @@ -151,6 +152,8 @@ const sd_bus_vtable bus_socket_vtable[] = { SD_BUS_PROPERTY("SocketProtocol", "i", bus_property_get_int, offsetof(Socket, socket_protocol), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("TriggerLimitIntervalUSec", "t", bus_property_get_usec, offsetof(Socket, trigger_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("TriggerLimitBurst", "u", bus_property_get_unsigned, offsetof(Socket, trigger_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Socket, exec_command[SOCKET_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPost", offsetof(Socket, exec_command[SOCKET_EXEC_START_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), BUS_EXEC_COMMAND_LIST_VTABLE("ExecStopPre", offsetof(Socket, exec_command[SOCKET_EXEC_STOP_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), diff --git a/src/core/dbus-swap.c b/src/core/dbus-swap.c index 292f8738c6..85a2c26b98 100644 --- a/src/core/dbus-swap.c +++ b/src/core/dbus-swap.c @@ -84,6 +84,8 @@ const sd_bus_vtable bus_swap_vtable[] = { SD_BUS_PROPERTY("TimeoutUSec", "t", bus_property_get_usec, offsetof(Swap, timeout_usec), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Swap, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Swap, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), BUS_EXEC_COMMAND_VTABLE("ExecActivate", offsetof(Swap, exec_command[SWAP_EXEC_ACTIVATE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), BUS_EXEC_COMMAND_VTABLE("ExecDeactivate", offsetof(Swap, exec_command[SWAP_EXEC_DEACTIVATE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), SD_BUS_VTABLE_END diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index b55d2cf735..69e249c844 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -37,7 +37,7 @@ static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_load_state, unit_load_state, UnitLoadState); static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_job_mode, job_mode, JobMode); -static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_failure_action, failure_action, FailureAction); +static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_emergency_action, emergency_action, EmergencyAction); static int property_get_names( sd_bus *bus, @@ -263,10 +263,7 @@ static int property_get_can_stop( assert(reply); assert(u); - /* On the lower levels we assume that every unit we can start - * we can also stop */ - - return sd_bus_message_append(reply, "b", unit_can_start(u) && !u->refuse_manual_stop); + return sd_bus_message_append(reply, "b", unit_can_stop(u) && !u->refuse_manual_stop); } static int property_get_can_reload( @@ -418,6 +415,7 @@ static int bus_verify_manage_units_async_full( const char *verb, int capability, const char *polkit_message, + bool interactive, sd_bus_message *call, sd_bus_error *error) { @@ -433,7 +431,15 @@ static int bus_verify_manage_units_async_full( details[7] = GETTEXT_PACKAGE; } - return bus_verify_polkit_async(call, capability, "org.freedesktop.systemd1.manage-units", details, false, UID_INVALID, &u->manager->polkit_registry, error); + return bus_verify_polkit_async( + call, + capability, + "org.freedesktop.systemd1.manage-units", + details, + interactive, + UID_INVALID, + &u->manager->polkit_registry, + error); } int bus_unit_method_start_generic( @@ -486,6 +492,7 @@ int bus_unit_method_start_generic( verb, CAP_SYS_ADMIN, job_type < _JOB_TYPE_MAX ? polkit_message_for_job[job_type] : NULL, + true, message, error); if (r < 0) @@ -558,6 +565,7 @@ int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error * "kill", CAP_KILL, N_("Authentication is required to kill '$(unit)'."), + true, message, error); if (r < 0) @@ -588,6 +596,7 @@ int bus_unit_method_reset_failed(sd_bus_message *message, void *userdata, sd_bus "reset-failed", CAP_SYS_ADMIN, N_("Authentication is required to reset the \"failed\" state of '$(unit)'."), + true, message, error); if (r < 0) @@ -620,6 +629,7 @@ int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_b "set-property", CAP_SYS_ADMIN, N_("Authentication is required to set properties on '$(unit)'."), + true, message, error); if (r < 0) @@ -634,6 +644,53 @@ int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_b return sd_bus_reply_method_return(message, NULL); } +int bus_unit_method_ref(sd_bus_message *message, void *userdata, sd_bus_error *error) { + Unit *u = userdata; + int r; + + assert(message); + assert(u); + + r = mac_selinux_unit_access_check(u, message, "start", error); + if (r < 0) + return r; + + r = bus_verify_manage_units_async_full( + u, + "ref", + CAP_SYS_ADMIN, + NULL, + false, + message, + error); + if (r < 0) + return r; + if (r == 0) + return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */ + + r = bus_unit_track_add_sender(u, message); + if (r < 0) + return r; + + return sd_bus_reply_method_return(message, NULL); +} + +int bus_unit_method_unref(sd_bus_message *message, void *userdata, sd_bus_error *error) { + Unit *u = userdata; + int r; + + assert(message); + assert(u); + + r = bus_unit_track_remove_sender(u, message); + if (r == -EUNATCH) + return sd_bus_error_setf(error, BUS_ERROR_NOT_REFERENCED, "Unit has not been referenced yet."); + if (r < 0) + return r; + + return sd_bus_reply_method_return(message, NULL); +} + const sd_bus_vtable bus_unit_vtable[] = { SD_BUS_VTABLE_START(0), @@ -660,10 +717,6 @@ const sd_bus_vtable bus_unit_vtable[] = { SD_BUS_PROPERTY("PropagatesReloadTo", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_PROPAGATES_RELOAD_TO]), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ReloadPropagatedFrom", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_RELOAD_PROPAGATED_FROM]), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("JoinsNamespaceOf", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_JOINS_NAMESPACE_OF]), SD_BUS_VTABLE_PROPERTY_CONST), - SD_BUS_PROPERTY("RequiresOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN), - SD_BUS_PROPERTY("RequisiteOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN), - SD_BUS_PROPERTY("RequiredByOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN), - SD_BUS_PROPERTY("RequisiteOfOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN), SD_BUS_PROPERTY("RequiresMountsFor", "as", NULL, offsetof(Unit, requires_mounts_for), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Documentation", "as", NULL, offsetof(Unit, documentation), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Description", "s", property_get_description, 0, SD_BUS_VTABLE_PROPERTY_CONST), @@ -694,7 +747,7 @@ const sd_bus_vtable bus_unit_vtable[] = { SD_BUS_PROPERTY("IgnoreOnIsolate", "b", bus_property_get_bool, offsetof(Unit, ignore_on_isolate), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("NeedDaemonReload", "b", property_get_need_daemon_reload, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("JobTimeoutUSec", "t", bus_property_get_usec, offsetof(Unit, job_timeout), SD_BUS_VTABLE_PROPERTY_CONST), - SD_BUS_PROPERTY("JobTimeoutAction", "s", property_get_failure_action, offsetof(Unit, job_timeout_action), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("JobTimeoutAction", "s", property_get_emergency_action, offsetof(Unit, job_timeout_action), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("JobTimeoutRebootArgument", "s", NULL, offsetof(Unit, job_timeout_reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ConditionResult", "b", bus_property_get_bool, offsetof(Unit, condition_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("AssertResult", "b", bus_property_get_bool, offsetof(Unit, assert_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), @@ -704,11 +757,12 @@ const sd_bus_vtable bus_unit_vtable[] = { SD_BUS_PROPERTY("Asserts", "a(sbbsi)", property_get_conditions, offsetof(Unit, asserts), 0), SD_BUS_PROPERTY("LoadError", "(ss)", property_get_load_error, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Transient", "b", bus_property_get_bool, offsetof(Unit, transient), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("Perpetual", "b", bus_property_get_bool, offsetof(Unit, perpetual), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("StartLimitIntervalSec", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST), - SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), /* obsolete alias name */ SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST), - SD_BUS_PROPERTY("StartLimitAction", "s", property_get_failure_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("StartLimitAction", "s", property_get_emergency_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("InvocationID", "ay", bus_property_get_id128, offsetof(Unit, invocation_id), 0), SD_BUS_METHOD("Start", "s", "o", method_start, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("Stop", "s", "o", method_stop, SD_BUS_VTABLE_UNPRIVILEGED), @@ -720,7 +774,15 @@ const sd_bus_vtable bus_unit_vtable[] = { SD_BUS_METHOD("Kill", "si", NULL, bus_unit_method_kill, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("ResetFailed", NULL, NULL, bus_unit_method_reset_failed, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("SetProperties", "ba(sv)", NULL, bus_unit_method_set_properties, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("Ref", NULL, NULL, bus_unit_method_ref, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("Unref", NULL, NULL, bus_unit_method_unref, SD_BUS_VTABLE_UNPRIVILEGED), + /* Obsolete properties or obsolete alias names */ + SD_BUS_PROPERTY("RequiresOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("RequisiteOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("RequiredByOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("RequisiteOfOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), SD_BUS_VTABLE_END }; @@ -1104,7 +1166,7 @@ void bus_unit_send_removed_signal(Unit *u) { int r; assert(u); - if (!u->sent_dbus_new_signal) + if (!u->sent_dbus_new_signal || u->in_dbus_queue) bus_unit_send_change_signal(u); if (!u->id) @@ -1317,6 +1379,29 @@ static int bus_unit_set_transient_property( return r; return 1; + + } else if (streq(name, "AddRef")) { + + int b; + + /* Why is this called "AddRef" rather than just "Ref", or "Reference"? There's already a "Ref()" method + * on the Unit interface, and it's probably not a good idea to expose a property and a method on the + * same interface (well, strictly speaking AddRef isn't exposed as full property, we just read it for + * transient units, but still). And "References" and "ReferencedBy" is already used as unit reference + * dependency type, hence let's not confuse things with that. + * + * Note that we don't acually add the reference to the bus track. We do that only after the setup of + * the transient unit is complete, so that setting this property multiple times in the same transient + * unit creation call doesn't count as individual references. */ + + r = sd_bus_message_read(message, "b", &b); + if (r < 0) + return r; + + if (mode != UNIT_CHECK) + u->bus_track_add = b; + + return 1; } return 0; @@ -1421,3 +1506,71 @@ int bus_unit_check_load_state(Unit *u, sd_bus_error *error) { return sd_bus_error_set_errnof(error, u->load_error, "Unit %s is not loaded properly: %m.", u->id); } + +static int bus_track_handler(sd_bus_track *t, void *userdata) { + Unit *u = userdata; + + assert(t); + assert(u); + + u->bus_track = sd_bus_track_unref(u->bus_track); /* make sure we aren't called again */ + + unit_add_to_gc_queue(u); + return 0; +} + +static int allocate_bus_track(Unit *u) { + int r; + + assert(u); + + if (u->bus_track) + return 0; + + r = sd_bus_track_new(u->manager->api_bus, &u->bus_track, bus_track_handler, u); + if (r < 0) + return r; + + r = sd_bus_track_set_recursive(u->bus_track, true); + if (r < 0) { + u->bus_track = sd_bus_track_unref(u->bus_track); + return r; + } + + return 0; +} + +int bus_unit_track_add_name(Unit *u, const char *name) { + int r; + + assert(u); + + r = allocate_bus_track(u); + if (r < 0) + return r; + + return sd_bus_track_add_name(u->bus_track, name); +} + +int bus_unit_track_add_sender(Unit *u, sd_bus_message *m) { + int r; + + assert(u); + + r = allocate_bus_track(u); + if (r < 0) + return r; + + return sd_bus_track_add_sender(u->bus_track, m); +} + +int bus_unit_track_remove_sender(Unit *u, sd_bus_message *m) { + assert(u); + + /* If we haven't allocated the bus track object yet, then there's definitely no reference taken yet, return an + * error */ + if (!u->bus_track) + return -EUNATCH; + + return sd_bus_track_remove_sender(u->bus_track, m); +} diff --git a/src/core/dbus-unit.h b/src/core/dbus-unit.h index 4db88dbebc..b280de7a1d 100644 --- a/src/core/dbus-unit.h +++ b/src/core/dbus-unit.h @@ -33,9 +33,15 @@ int bus_unit_method_start_generic(sd_bus_message *message, Unit *u, JobType job_ int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error); int bus_unit_method_reset_failed(sd_bus_message *message, void *userdata, sd_bus_error *error); -int bus_unit_queue_job(sd_bus_message *message, Unit *u, JobType type, JobMode mode, bool reload_if_possible, sd_bus_error *error); int bus_unit_set_properties(Unit *u, sd_bus_message *message, UnitSetPropertiesMode mode, bool commit, sd_bus_error *error); int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_bus_error *error); int bus_unit_method_get_processes(sd_bus_message *message, void *userdata, sd_bus_error *error); +int bus_unit_method_ref(sd_bus_message *message, void *userdata, sd_bus_error *error); +int bus_unit_method_unref(sd_bus_message *message, void *userdata, sd_bus_error *error); +int bus_unit_queue_job(sd_bus_message *message, Unit *u, JobType type, JobMode mode, bool reload_if_possible, sd_bus_error *error); int bus_unit_check_load_state(Unit *u, sd_bus_error *error); + +int bus_unit_track_add_name(Unit *u, const char *name); +int bus_unit_track_add_sender(Unit *u, sd_bus_message *m); +int bus_unit_track_remove_sender(Unit *u, sd_bus_message *m); diff --git a/src/core/dbus.c b/src/core/dbus.c index 3422a02d68..070974fe66 100644 --- a/src/core/dbus.c +++ b/src/core/dbus.c @@ -964,10 +964,6 @@ static int bus_init_private(Manager *m) { if (m->private_listen_fd >= 0) return 0; - /* We don't need the private socket if we have kdbus */ - if (m->kdbus_fd >= 0) - return 0; - if (MANAGER_IS_SYSTEM(m)) { /* We want the private bus only when running as init */ @@ -1168,60 +1164,57 @@ int bus_foreach_bus( return ret; } -void bus_track_serialize(sd_bus_track *t, FILE *f) { +void bus_track_serialize(sd_bus_track *t, FILE *f, const char *prefix) { const char *n; assert(f); + assert(prefix); - for (n = sd_bus_track_first(t); n; n = sd_bus_track_next(t)) - fprintf(f, "subscribed=%s\n", n); -} + for (n = sd_bus_track_first(t); n; n = sd_bus_track_next(t)) { + int c, j; -int bus_track_deserialize_item(char ***l, const char *line) { - const char *e; - int r; - - assert(l); - assert(line); - - e = startswith(line, "subscribed="); - if (!e) - return 0; + c = sd_bus_track_count_name(t, n); - r = strv_extend(l, e); - if (r < 0) - return r; - - return 1; + for (j = 0; j < c; j++) { + fputs(prefix, f); + fputc('=', f); + fputs(n, f); + fputc('\n', f); + } + } } -int bus_track_coldplug(Manager *m, sd_bus_track **t, char ***l) { +int bus_track_coldplug(Manager *m, sd_bus_track **t, bool recursive, char **l) { + char **i; int r = 0; assert(m); assert(t); - assert(l); - if (!strv_isempty(*l) && m->api_bus) { - char **i; - - if (!*t) { - r = sd_bus_track_new(m->api_bus, t, NULL, NULL); - if (r < 0) - return r; - } + if (strv_isempty(l)) + return 0; - r = 0; - STRV_FOREACH(i, *l) { - int k; + if (!m->api_bus) + return 0; - k = sd_bus_track_add_name(*t, *i); - if (k < 0) - r = k; - } + if (!*t) { + r = sd_bus_track_new(m->api_bus, t, NULL, NULL); + if (r < 0) + return r; } - *l = strv_free(*l); + r = sd_bus_track_set_recursive(*t, recursive); + if (r < 0) + return r; + + r = 0; + STRV_FOREACH(i, l) { + int k; + + k = sd_bus_track_add_name(*t, *i); + if (k < 0) + r = k; + } return r; } diff --git a/src/core/dbus.h b/src/core/dbus.h index 6baaffbd75..a092ed9d76 100644 --- a/src/core/dbus.h +++ b/src/core/dbus.h @@ -28,9 +28,8 @@ void bus_done(Manager *m); int bus_fdset_add_all(Manager *m, FDSet *fds); -void bus_track_serialize(sd_bus_track *t, FILE *f); -int bus_track_deserialize_item(char ***l, const char *line); -int bus_track_coldplug(Manager *m, sd_bus_track **t, char ***l); +void bus_track_serialize(sd_bus_track *t, FILE *f, const char *prefix); +int bus_track_coldplug(Manager *m, sd_bus_track **t, bool recursive, char **l); int manager_sync_bus_names(Manager *m, sd_bus *bus); diff --git a/src/core/device.c b/src/core/device.c index 16e56efcc3..4b9e84aeb6 100644 --- a/src/core/device.c +++ b/src/core/device.c @@ -331,11 +331,7 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa if (!u) { delete = true; - u = unit_new(m, sizeof(Device)); - if (!u) - return log_oom(); - - r = unit_add_name(u, e); + r = unit_new_for_name(m, sizeof(Device), e, &u); if (r < 0) goto fail; @@ -369,7 +365,7 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa fail: log_unit_warning_errno(u, r, "Failed to set up device unit: %m"); - if (delete) + if (delete && u) unit_free(u); return r; @@ -464,6 +460,10 @@ static void device_update_found_one(Device *d, bool add, DeviceFound found, bool if (!now) return; + /* Didn't exist before, but does now? if so, generate a new invocation ID for it */ + if (previous == DEVICE_NOT_FOUND && d->found != DEVICE_NOT_FOUND) + (void) unit_acquire_invocation_id(UNIT(d)); + if (d->found & DEVICE_FOUND_UDEV) /* When the device is known to udev we consider it * plugged. */ diff --git a/src/core/dynamic-user.c b/src/core/dynamic-user.c new file mode 100644 index 0000000000..e1846e1adb --- /dev/null +++ b/src/core/dynamic-user.c @@ -0,0 +1,794 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <grp.h> +#include <pwd.h> +#include <sys/file.h> + +#include "dynamic-user.h" +#include "fd-util.h" +#include "fs-util.h" +#include "parse-util.h" +#include "random-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "user-util.h" +#include "fileio.h" + +/* Takes a value generated randomly or by hashing and turns it into a UID in the right range */ +#define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (DYNAMIC_UID_MAX - DYNAMIC_UID_MIN + 1)) + DYNAMIC_UID_MIN) + +static DynamicUser* dynamic_user_free(DynamicUser *d) { + if (!d) + return NULL; + + if (d->manager) + (void) hashmap_remove(d->manager->dynamic_users, d->name); + + safe_close_pair(d->storage_socket); + return mfree(d); +} + +static int dynamic_user_add(Manager *m, const char *name, int storage_socket[2], DynamicUser **ret) { + DynamicUser *d = NULL; + int r; + + assert(m); + assert(name); + assert(storage_socket); + + r = hashmap_ensure_allocated(&m->dynamic_users, &string_hash_ops); + if (r < 0) + return r; + + d = malloc0(offsetof(DynamicUser, name) + strlen(name) + 1); + if (!d) + return -ENOMEM; + + strcpy(d->name, name); + + d->storage_socket[0] = storage_socket[0]; + d->storage_socket[1] = storage_socket[1]; + + r = hashmap_put(m->dynamic_users, d->name, d); + if (r < 0) { + free(d); + return r; + } + + d->manager = m; + + if (ret) + *ret = d; + + return 0; +} + +int dynamic_user_acquire(Manager *m, const char *name, DynamicUser** ret) { + _cleanup_close_pair_ int storage_socket[2] = { -1, -1 }; + DynamicUser *d; + int r; + + assert(m); + assert(name); + + /* Return the DynamicUser structure for a specific user name. Note that this won't actually allocate a UID for + * it, but just prepare the data structure for it. The UID is allocated only on demand, when it's really + * needed, and in the child process we fork off, since allocation involves NSS checks which are not OK to do + * from PID 1. To allow the children and PID 1 share information about allocated UIDs we use an anonymous + * AF_UNIX/SOCK_DGRAM socket (called the "storage socket") that contains at most one datagram with the + * allocated UID number, plus an fd referencing the lock file for the UID + * (i.e. /run/systemd/dynamic-uid/$UID). Why involve the socket pair? So that PID 1 and all its children can + * share the same storage for the UID and lock fd, simply by inheriting the storage socket fds. The socket pair + * may exist in three different states: + * + * a) no datagram stored. This is the initial state. In this case the dynamic user was never realized. + * + * b) a datagram containing a UID stored, but no lock fd attached to it. In this case there was already a + * statically assigned UID by the same name, which we are reusing. + * + * c) a datagram containing a UID stored, and a lock fd is attached to it. In this case we allocated a dynamic + * UID and locked it in the file system, using the lock fd. + * + * As PID 1 and various children might access the socket pair simultaneously, and pop the datagram or push it + * back in any time, we also maintain a lock on the socket pair. Note one peculiarity regarding locking here: + * the UID lock on disk is protected via a BSD file lock (i.e. an fd-bound lock), so that the lock is kept in + * place as long as there's a reference to the fd open. The lock on the storage socket pair however is a POSIX + * file lock (i.e. a process-bound lock), as all users share the same fd of this (after all it is anonymous, + * nobody else could get any access to it except via our own fd) and we want to synchronize access between all + * processes that have access to it. */ + + d = hashmap_get(m->dynamic_users, name); + if (d) { + /* We already have a structure for the dynamic user, let's increase the ref count and reuse it */ + d->n_ref++; + *ret = d; + return 0; + } + + if (!valid_user_group_name_or_id(name)) + return -EINVAL; + + if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, storage_socket) < 0) + return -errno; + + r = dynamic_user_add(m, name, storage_socket, &d); + if (r < 0) + return r; + + storage_socket[0] = storage_socket[1] = -1; + + if (ret) { + d->n_ref++; + *ret = d; + } + + return 1; +} + +static int make_uid_symlinks(uid_t uid, const char *name, bool b) { + + char path1[strlen("/run/systemd/dynamic-uid/direct:") + DECIMAL_STR_MAX(uid_t) + 1]; + const char *path2; + int r = 0, k; + + /* Add direct additional symlinks for direct lookups of dynamic UIDs and their names by userspace code. The + * only reason we have this is because dbus-daemon cannot use D-Bus for resolving users and groups (since it + * would be its own client then). We hence keep these world-readable symlinks in place, so that the + * unprivileged dbus user can read the mappings when it needs them via these symlinks instead of having to go + * via the bus. Ideally, we'd use the lock files we keep for this anyway, but we can't since we use BSD locks + * on them and as those may be taken by any user with read access we can't make them world-readable. */ + + xsprintf(path1, "/run/systemd/dynamic-uid/direct:" UID_FMT, uid); + if (unlink(path1) < 0 && errno != ENOENT) + r = -errno; + + if (b && symlink(name, path1) < 0) { + k = log_warning_errno(errno, "Failed to symlink \"%s\": %m", path1); + if (r == 0) + r = k; + } + + path2 = strjoina("/run/systemd/dynamic-uid/direct:", name); + if (unlink(path2) < 0 && errno != ENOENT) { + k = -errno; + if (r == 0) + r = k; + } + + if (b && symlink(path1 + strlen("/run/systemd/dynamic-uid/direct:"), path2) < 0) { + k = log_warning_errno(errno, "Failed to symlink \"%s\": %m", path2); + if (r == 0) + r = k; + } + + return r; +} + +static int pick_uid(const char *name, uid_t *ret_uid) { + + static const uint8_t hash_key[] = { + 0x37, 0x53, 0x7e, 0x31, 0xcf, 0xce, 0x48, 0xf5, + 0x8a, 0xbb, 0x39, 0x57, 0x8d, 0xd9, 0xec, 0x59 + }; + + unsigned n_tries = 100; + uid_t candidate; + int r; + + /* A static user by this name does not exist yet. Let's find a free ID then, and use that. We start with a UID + * generated as hash from the user name. */ + candidate = UID_CLAMP_INTO_RANGE(siphash24(name, strlen(name), hash_key)); + + (void) mkdir("/run/systemd/dynamic-uid", 0755); + + for (;;) { + char lock_path[strlen("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1]; + _cleanup_close_ int lock_fd = -1; + ssize_t l; + + if (--n_tries <= 0) /* Give up retrying eventually */ + return -EBUSY; + + if (!uid_is_dynamic(candidate)) + goto next; + + xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, candidate); + + for (;;) { + struct stat st; + + lock_fd = open(lock_path, O_CREAT|O_RDWR|O_NOFOLLOW|O_CLOEXEC|O_NOCTTY, 0600); + if (lock_fd < 0) + return -errno; + + r = flock(lock_fd, LOCK_EX|LOCK_NB); /* Try to get a BSD file lock on the UID lock file */ + if (r < 0) { + if (errno == EBUSY || errno == EAGAIN) + goto next; /* already in use */ + + return -errno; + } + + if (fstat(lock_fd, &st) < 0) + return -errno; + if (st.st_nlink > 0) + break; + + /* Oh, bummer, we got the lock, but the file was unlinked between the time we opened it and + * got the lock. Close it, and try again. */ + lock_fd = safe_close(lock_fd); + } + + /* Some superficial check whether this UID/GID might already be taken by some static user */ + if (getpwuid(candidate) || getgrgid((gid_t) candidate)) { + (void) unlink(lock_path); + goto next; + } + + /* Let's store the user name in the lock file, so that we can use it for looking up the username for a UID */ + l = pwritev(lock_fd, + (struct iovec[2]) { + { .iov_base = (char*) name, .iov_len = strlen(name) }, + { .iov_base = (char[1]) { '\n' }, .iov_len = 1 } + }, 2, 0); + if (l < 0) { + (void) unlink(lock_path); + return -errno; + } + + (void) ftruncate(lock_fd, l); + (void) make_uid_symlinks(candidate, name, true); /* also add direct lookup symlinks */ + + *ret_uid = candidate; + r = lock_fd; + lock_fd = -1; + + return r; + + next: + /* Pick another random UID, and see if that works for us. */ + random_bytes(&candidate, sizeof(candidate)); + candidate = UID_CLAMP_INTO_RANGE(candidate); + } +} + +static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) { + uid_t uid = UID_INVALID; + struct iovec iov = { + .iov_base = &uid, + .iov_len = sizeof(uid), + }; + union { + struct cmsghdr cmsghdr; + uint8_t buf[CMSG_SPACE(sizeof(int))]; + } control = {}; + struct msghdr mh = { + .msg_control = &control, + .msg_controllen = sizeof(control), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + struct cmsghdr *cmsg; + + ssize_t k; + int lock_fd = -1; + + assert(d); + assert(ret_uid); + assert(ret_lock_fd); + + /* Read the UID and lock fd that is stored in the storage AF_UNIX socket. This should be called with the lock + * on the socket taken. */ + + k = recvmsg(d->storage_socket[0], &mh, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC); + if (k < 0) + return -errno; + + cmsg = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int))); + if (cmsg) + lock_fd = *(int*) CMSG_DATA(cmsg); + else + cmsg_close_all(&mh); /* just in case... */ + + *ret_uid = uid; + *ret_lock_fd = lock_fd; + + return 0; +} + +static int dynamic_user_push(DynamicUser *d, uid_t uid, int lock_fd) { + struct iovec iov = { + .iov_base = &uid, + .iov_len = sizeof(uid), + }; + union { + struct cmsghdr cmsghdr; + uint8_t buf[CMSG_SPACE(sizeof(int))]; + } control = {}; + struct msghdr mh = { + .msg_control = &control, + .msg_controllen = sizeof(control), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + ssize_t k; + + assert(d); + + /* Store the UID and lock_fd in the storage socket. This should be called with the socket pair lock taken. */ + + if (lock_fd >= 0) { + struct cmsghdr *cmsg; + + cmsg = CMSG_FIRSTHDR(&mh); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + memcpy(CMSG_DATA(cmsg), &lock_fd, sizeof(int)); + + mh.msg_controllen = CMSG_SPACE(sizeof(int)); + } else { + mh.msg_control = NULL; + mh.msg_controllen = 0; + } + + k = sendmsg(d->storage_socket[1], &mh, MSG_DONTWAIT|MSG_NOSIGNAL); + if (k < 0) + return -errno; + + return 0; +} + +static void unlink_uid_lock(int lock_fd, uid_t uid, const char *name) { + char lock_path[strlen("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1]; + + if (lock_fd < 0) + return; + + xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, uid); + (void) unlink(lock_path); + + (void) make_uid_symlinks(uid, name, false); /* remove direct lookup symlinks */ +} + +int dynamic_user_realize(DynamicUser *d, uid_t *ret) { + + _cleanup_close_ int etc_passwd_lock_fd = -1, uid_lock_fd = -1; + uid_t uid = UID_INVALID; + int r; + + assert(d); + + /* Acquire a UID for the user name. This will allocate a UID for the user name if the user doesn't exist + * yet. If it already exists its existing UID/GID will be reused. */ + + if (lockf(d->storage_socket[0], F_LOCK, 0) < 0) + return -errno; + + r = dynamic_user_pop(d, &uid, &uid_lock_fd); + if (r < 0) { + int new_uid_lock_fd; + uid_t new_uid; + + if (r != -EAGAIN) + goto finish; + + /* OK, nothing stored yet, let's try to find something useful. While we are working on this release the + * lock however, so that nobody else blocks on our NSS lookups. */ + (void) lockf(d->storage_socket[0], F_ULOCK, 0); + + /* Let's see if a proper, static user or group by this name exists. Try to take the lock on + * /etc/passwd, if that fails with EROFS then /etc is read-only. In that case it's fine if we don't + * take the lock, given that users can't be added there anyway in this case. */ + etc_passwd_lock_fd = take_etc_passwd_lock(NULL); + if (etc_passwd_lock_fd < 0 && etc_passwd_lock_fd != -EROFS) + return etc_passwd_lock_fd; + + /* First, let's parse this as numeric UID */ + r = parse_uid(d->name, &uid); + if (r < 0) { + struct passwd *p; + struct group *g; + + /* OK, this is not a numeric UID. Let's see if there's a user by this name */ + p = getpwnam(d->name); + if (p) + uid = p->pw_uid; + + /* Let's see if there's a group by this name */ + g = getgrnam(d->name); + if (g) { + /* If the UID/GID of the user/group of the same don't match, refuse operation */ + if (uid != UID_INVALID && uid != (uid_t) g->gr_gid) + return -EILSEQ; + + uid = (uid_t) g->gr_gid; + } + } + + if (uid == UID_INVALID) { + /* No static UID assigned yet, excellent. Let's pick a new dynamic one, and lock it. */ + + uid_lock_fd = pick_uid(d->name, &uid); + if (uid_lock_fd < 0) + return uid_lock_fd; + } + + /* So, we found a working UID/lock combination. Let's see if we actually still need it. */ + if (lockf(d->storage_socket[0], F_LOCK, 0) < 0) { + unlink_uid_lock(uid_lock_fd, uid, d->name); + return -errno; + } + + r = dynamic_user_pop(d, &new_uid, &new_uid_lock_fd); + if (r < 0) { + if (r != -EAGAIN) { + /* OK, something bad happened, let's get rid of the bits we acquired. */ + unlink_uid_lock(uid_lock_fd, uid, d->name); + goto finish; + } + + /* Great! Nothing is stored here, still. Store our newly acquired data. */ + } else { + /* Hmm, so as it appears there's now something stored in the storage socket. Throw away what we + * acquired, and use what's stored now. */ + + unlink_uid_lock(uid_lock_fd, uid, d->name); + safe_close(uid_lock_fd); + + uid = new_uid; + uid_lock_fd = new_uid_lock_fd; + } + } + + /* If the UID/GID was already allocated dynamically, push the data we popped out back in. If it was already + * allocated statically, push the UID back too, but do not push the lock fd in. If we allocated the UID + * dynamically right here, push that in along with the lock fd for it. */ + r = dynamic_user_push(d, uid, uid_lock_fd); + if (r < 0) + goto finish; + + *ret = uid; + r = 0; + +finish: + (void) lockf(d->storage_socket[0], F_ULOCK, 0); + return r; +} + +int dynamic_user_current(DynamicUser *d, uid_t *ret) { + _cleanup_close_ int lock_fd = -1; + uid_t uid; + int r; + + assert(d); + assert(ret); + + /* Get the currently assigned UID for the user, if there's any. This simply pops the data from the storage socket, and pushes it back in right-away. */ + + if (lockf(d->storage_socket[0], F_LOCK, 0) < 0) + return -errno; + + r = dynamic_user_pop(d, &uid, &lock_fd); + if (r < 0) + goto finish; + + r = dynamic_user_push(d, uid, lock_fd); + if (r < 0) + goto finish; + + *ret = uid; + r = 0; + +finish: + (void) lockf(d->storage_socket[0], F_ULOCK, 0); + return r; +} + +DynamicUser* dynamic_user_ref(DynamicUser *d) { + if (!d) + return NULL; + + assert(d->n_ref > 0); + d->n_ref++; + + return d; +} + +DynamicUser* dynamic_user_unref(DynamicUser *d) { + if (!d) + return NULL; + + /* Note that this doesn't actually release any resources itself. If a dynamic user should be fully destroyed + * and its UID released, use dynamic_user_destroy() instead. NB: the dynamic user table may contain entries + * with no references, which is commonly the case right before a daemon reload. */ + + assert(d->n_ref > 0); + d->n_ref--; + + return NULL; +} + +static int dynamic_user_close(DynamicUser *d) { + _cleanup_close_ int lock_fd = -1; + uid_t uid; + int r; + + /* Release the user ID, by releasing the lock on it, and emptying the storage socket. After this the user is + * unrealized again, much like it was after it the DynamicUser object was first allocated. */ + + if (lockf(d->storage_socket[0], F_LOCK, 0) < 0) + return -errno; + + r = dynamic_user_pop(d, &uid, &lock_fd); + if (r == -EAGAIN) { + /* User wasn't realized yet, nothing to do. */ + r = 0; + goto finish; + } + if (r < 0) + goto finish; + + /* This dynamic user was realized and dynamically allocated. In this case, let's remove the lock file. */ + unlink_uid_lock(lock_fd, uid, d->name); + r = 1; + +finish: + (void) lockf(d->storage_socket[0], F_ULOCK, 0); + return r; +} + +DynamicUser* dynamic_user_destroy(DynamicUser *d) { + if (!d) + return NULL; + + /* Drop a reference to a DynamicUser object, and destroy the user completely if this was the last + * reference. This is called whenever a service is shut down and wants its dynamic UID gone. Note that + * dynamic_user_unref() is what is called whenever a service is simply freed, for example during a reload + * cycle, where the dynamic users should not be destroyed, but our datastructures should. */ + + dynamic_user_unref(d); + + if (d->n_ref > 0) + return NULL; + + (void) dynamic_user_close(d); + return dynamic_user_free(d); +} + +int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds) { + DynamicUser *d; + Iterator i; + + assert(m); + assert(f); + assert(fds); + + /* Dump the dynamic user database into the manager serialization, to deal with daemon reloads. */ + + HASHMAP_FOREACH(d, m->dynamic_users, i) { + int copy0, copy1; + + copy0 = fdset_put_dup(fds, d->storage_socket[0]); + if (copy0 < 0) + return copy0; + + copy1 = fdset_put_dup(fds, d->storage_socket[1]); + if (copy1 < 0) + return copy1; + + fprintf(f, "dynamic-user=%s %i %i\n", d->name, copy0, copy1); + } + + return 0; +} + +void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds) { + _cleanup_free_ char *name = NULL, *s0 = NULL, *s1 = NULL; + int r, fd0, fd1; + + assert(m); + assert(value); + assert(fds); + + /* Parse the serialization again, after a daemon reload */ + + r = extract_many_words(&value, NULL, 0, &name, &s0, &s1, NULL); + if (r != 3 || !isempty(value)) { + log_debug("Unable to parse dynamic user line."); + return; + } + + if (safe_atoi(s0, &fd0) < 0 || !fdset_contains(fds, fd0)) { + log_debug("Unable to process dynamic user fd specification."); + return; + } + + if (safe_atoi(s1, &fd1) < 0 || !fdset_contains(fds, fd1)) { + log_debug("Unable to process dynamic user fd specification."); + return; + } + + r = dynamic_user_add(m, name, (int[]) { fd0, fd1 }, NULL); + if (r < 0) { + log_debug_errno(r, "Failed to add dynamic user: %m"); + return; + } + + (void) fdset_remove(fds, fd0); + (void) fdset_remove(fds, fd1); +} + +void dynamic_user_vacuum(Manager *m, bool close_user) { + DynamicUser *d; + Iterator i; + + assert(m); + + /* Empty the dynamic user database, optionally cleaning up orphaned dynamic users, i.e. destroy and free users + * to which no reference exist. This is called after a daemon reload finished, in order to destroy users which + * might not be referenced anymore. */ + + HASHMAP_FOREACH(d, m->dynamic_users, i) { + if (d->n_ref > 0) + continue; + + if (close_user) { + log_debug("Removing orphaned dynamic user %s", d->name); + (void) dynamic_user_close(d); + } + + dynamic_user_free(d); + } +} + +int dynamic_user_lookup_uid(Manager *m, uid_t uid, char **ret) { + char lock_path[strlen("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1]; + _cleanup_free_ char *user = NULL; + uid_t check_uid; + int r; + + assert(m); + assert(ret); + + /* A friendly way to translate a dynamic user's UID into a name. */ + if (!uid_is_dynamic(uid)) + return -ESRCH; + + xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, uid); + r = read_one_line_file(lock_path, &user); + if (r == -ENOENT) + return -ESRCH; + if (r < 0) + return r; + + /* The lock file might be stale, hence let's verify the data before we return it */ + r = dynamic_user_lookup_name(m, user, &check_uid); + if (r < 0) + return r; + if (check_uid != uid) /* lock file doesn't match our own idea */ + return -ESRCH; + + *ret = user; + user = NULL; + + return 0; +} + +int dynamic_user_lookup_name(Manager *m, const char *name, uid_t *ret) { + DynamicUser *d; + int r; + + assert(m); + assert(name); + assert(ret); + + /* A friendly call for translating a dynamic user's name into its UID */ + + d = hashmap_get(m->dynamic_users, name); + if (!d) + return -ESRCH; + + r = dynamic_user_current(d, ret); + if (r == -EAGAIN) /* not realized yet? */ + return -ESRCH; + + return r; +} + +int dynamic_creds_acquire(DynamicCreds *creds, Manager *m, const char *user, const char *group) { + bool acquired = false; + int r; + + assert(creds); + assert(m); + + /* A DynamicUser object encapsulates an allocation of both a UID and a GID for a specific name. However, some + * services use different user and groups. For cases like that there's DynamicCreds containing a pair of user + * and group. This call allocates a pair. */ + + if (!creds->user && user) { + r = dynamic_user_acquire(m, user, &creds->user); + if (r < 0) + return r; + + acquired = true; + } + + if (!creds->group) { + + if (creds->user && (!group || streq_ptr(user, group))) + creds->group = dynamic_user_ref(creds->user); + else { + r = dynamic_user_acquire(m, group, &creds->group); + if (r < 0) { + if (acquired) + creds->user = dynamic_user_unref(creds->user); + return r; + } + } + } + + return 0; +} + +int dynamic_creds_realize(DynamicCreds *creds, uid_t *uid, gid_t *gid) { + uid_t u = UID_INVALID; + gid_t g = GID_INVALID; + int r; + + assert(creds); + assert(uid); + assert(gid); + + /* Realize both the referenced user and group */ + + if (creds->user) { + r = dynamic_user_realize(creds->user, &u); + if (r < 0) + return r; + } + + if (creds->group && creds->group != creds->user) { + r = dynamic_user_realize(creds->group, &g); + if (r < 0) + return r; + } else + g = u; + + *uid = u; + *gid = g; + + return 0; +} + +void dynamic_creds_unref(DynamicCreds *creds) { + assert(creds); + + creds->user = dynamic_user_unref(creds->user); + creds->group = dynamic_user_unref(creds->group); +} + +void dynamic_creds_destroy(DynamicCreds *creds) { + assert(creds); + + creds->user = dynamic_user_destroy(creds->user); + creds->group = dynamic_user_destroy(creds->group); +} diff --git a/src/core/dynamic-user.h b/src/core/dynamic-user.h new file mode 100644 index 0000000000..0b8bce1a72 --- /dev/null +++ b/src/core/dynamic-user.h @@ -0,0 +1,66 @@ +#pragma once + +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +typedef struct DynamicUser DynamicUser; + +typedef struct DynamicCreds { + /* A combination of a dynamic user and group */ + DynamicUser *user; + DynamicUser *group; +} DynamicCreds; + +#include "manager.h" + +/* Note that this object always allocates a pair of user and group under the same name, even if one of them isn't + * used. This means, if you want to allocate a group and user pair, and they might have two different names, then you + * need to allocated two of these objects. DynamicCreds below makes that easy. */ +struct DynamicUser { + int n_ref; + Manager *manager; + + /* An AF_UNIX socket pair that contains a datagram containing both the numeric ID assigned, as well as a lock + * file fd locking the user ID we picked. */ + int storage_socket[2]; + + char name[]; +}; + +int dynamic_user_acquire(Manager *m, const char *name, DynamicUser **ret); + +int dynamic_user_realize(DynamicUser *d, uid_t *ret); +int dynamic_user_current(DynamicUser *d, uid_t *ret); + +DynamicUser* dynamic_user_ref(DynamicUser *d); +DynamicUser* dynamic_user_unref(DynamicUser *d); +DynamicUser* dynamic_user_destroy(DynamicUser *d); + +int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds); +void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds); +void dynamic_user_vacuum(Manager *m, bool close_user); + +int dynamic_user_lookup_uid(Manager *m, uid_t uid, char **ret); +int dynamic_user_lookup_name(Manager *m, const char *name, uid_t *ret); + +int dynamic_creds_acquire(DynamicCreds *creds, Manager *m, const char *user, const char *group); +int dynamic_creds_realize(DynamicCreds *creds, uid_t *uid, gid_t *gid); + +void dynamic_creds_unref(DynamicCreds *creds); +void dynamic_creds_destroy(DynamicCreds *creds); diff --git a/src/core/failure-action.c b/src/core/emergency-action.c index ddae46190f..90232bc57a 100644 --- a/src/core/failure-action.c +++ b/src/core/emergency-action.c @@ -23,59 +23,60 @@ #include "bus-error.h" #include "bus-util.h" -#include "failure-action.h" +#include "emergency-action.h" #include "special.h" #include "string-table.h" #include "terminal-util.h" -static void log_and_status(Manager *m, const char *message) { - log_warning("%s", message); +static void log_and_status(Manager *m, const char *message, const char *reason) { + log_warning("%s: %s", message, reason); manager_status_printf(m, STATUS_TYPE_EMERGENCY, ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL, - "%s", message); + "%s: %s", message, reason); } -int failure_action( +int emergency_action( Manager *m, - FailureAction action, - const char *reboot_arg) { + EmergencyAction action, + const char *reboot_arg, + const char *reason) { assert(m); assert(action >= 0); - assert(action < _FAILURE_ACTION_MAX); + assert(action < _EMERGENCY_ACTION_MAX); - if (action == FAILURE_ACTION_NONE) + if (action == EMERGENCY_ACTION_NONE) return -ECANCELED; if (!MANAGER_IS_SYSTEM(m)) { /* Downgrade all options to simply exiting if we run * in user mode */ - log_warning("Exiting as result of failure."); + log_warning("Exiting: %s", reason); m->exit_code = MANAGER_EXIT; return -ECANCELED; } switch (action) { - case FAILURE_ACTION_REBOOT: - log_and_status(m, "Rebooting as result of failure."); + case EMERGENCY_ACTION_REBOOT: + log_and_status(m, "Rebooting", reason); (void) update_reboot_parameter_and_warn(reboot_arg); (void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_REBOOT_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL); break; - case FAILURE_ACTION_REBOOT_FORCE: - log_and_status(m, "Forcibly rebooting as result of failure."); + case EMERGENCY_ACTION_REBOOT_FORCE: + log_and_status(m, "Forcibly rebooting", reason); (void) update_reboot_parameter_and_warn(reboot_arg); m->exit_code = MANAGER_REBOOT; break; - case FAILURE_ACTION_REBOOT_IMMEDIATE: - log_and_status(m, "Rebooting immediately as result of failure."); + case EMERGENCY_ACTION_REBOOT_IMMEDIATE: + log_and_status(m, "Rebooting immediately", reason); sync(); @@ -89,18 +90,18 @@ int failure_action( reboot(RB_AUTOBOOT); break; - case FAILURE_ACTION_POWEROFF: - log_and_status(m, "Powering off as result of failure."); + case EMERGENCY_ACTION_POWEROFF: + log_and_status(m, "Powering off", reason); (void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_POWEROFF_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL); break; - case FAILURE_ACTION_POWEROFF_FORCE: - log_and_status(m, "Forcibly powering off as result of failure."); + case EMERGENCY_ACTION_POWEROFF_FORCE: + log_and_status(m, "Forcibly powering off", reason); m->exit_code = MANAGER_POWEROFF; break; - case FAILURE_ACTION_POWEROFF_IMMEDIATE: - log_and_status(m, "Powering off immediately as result of failure."); + case EMERGENCY_ACTION_POWEROFF_IMMEDIATE: + log_and_status(m, "Powering off immediately", reason); sync(); @@ -109,19 +110,19 @@ int failure_action( break; default: - assert_not_reached("Unknown failure action"); + assert_not_reached("Unknown emergency action"); } return -ECANCELED; } -static const char* const failure_action_table[_FAILURE_ACTION_MAX] = { - [FAILURE_ACTION_NONE] = "none", - [FAILURE_ACTION_REBOOT] = "reboot", - [FAILURE_ACTION_REBOOT_FORCE] = "reboot-force", - [FAILURE_ACTION_REBOOT_IMMEDIATE] = "reboot-immediate", - [FAILURE_ACTION_POWEROFF] = "poweroff", - [FAILURE_ACTION_POWEROFF_FORCE] = "poweroff-force", - [FAILURE_ACTION_POWEROFF_IMMEDIATE] = "poweroff-immediate" +static const char* const emergency_action_table[_EMERGENCY_ACTION_MAX] = { + [EMERGENCY_ACTION_NONE] = "none", + [EMERGENCY_ACTION_REBOOT] = "reboot", + [EMERGENCY_ACTION_REBOOT_FORCE] = "reboot-force", + [EMERGENCY_ACTION_REBOOT_IMMEDIATE] = "reboot-immediate", + [EMERGENCY_ACTION_POWEROFF] = "poweroff", + [EMERGENCY_ACTION_POWEROFF_FORCE] = "poweroff-force", + [EMERGENCY_ACTION_POWEROFF_IMMEDIATE] = "poweroff-immediate" }; -DEFINE_STRING_TABLE_LOOKUP(failure_action, FailureAction); +DEFINE_STRING_TABLE_LOOKUP(emergency_action, EmergencyAction); diff --git a/src/core/failure-action.h b/src/core/emergency-action.h index 1adac4ad5c..8804b59752 100644 --- a/src/core/failure-action.h +++ b/src/core/emergency-action.h @@ -20,22 +20,22 @@ along with systemd; If not, see <http://www.gnu.org/licenses/>. ***/ -typedef enum FailureAction { - FAILURE_ACTION_NONE, - FAILURE_ACTION_REBOOT, - FAILURE_ACTION_REBOOT_FORCE, - FAILURE_ACTION_REBOOT_IMMEDIATE, - FAILURE_ACTION_POWEROFF, - FAILURE_ACTION_POWEROFF_FORCE, - FAILURE_ACTION_POWEROFF_IMMEDIATE, - _FAILURE_ACTION_MAX, - _FAILURE_ACTION_INVALID = -1 -} FailureAction; +typedef enum EmergencyAction { + EMERGENCY_ACTION_NONE, + EMERGENCY_ACTION_REBOOT, + EMERGENCY_ACTION_REBOOT_FORCE, + EMERGENCY_ACTION_REBOOT_IMMEDIATE, + EMERGENCY_ACTION_POWEROFF, + EMERGENCY_ACTION_POWEROFF_FORCE, + EMERGENCY_ACTION_POWEROFF_IMMEDIATE, + _EMERGENCY_ACTION_MAX, + _EMERGENCY_ACTION_INVALID = -1 +} EmergencyAction; #include "macro.h" #include "manager.h" -int failure_action(Manager *m, FailureAction action, const char *reboot_arg); +int emergency_action(Manager *m, EmergencyAction action, const char *reboot_arg, const char *reason); -const char* failure_action_to_string(FailureAction i) _const_; -FailureAction failure_action_from_string(const char *s) _pure_; +const char* emergency_action_to_string(EmergencyAction i) _const_; +EmergencyAction emergency_action_from_string(const char *s) _pure_; diff --git a/src/core/execute.c b/src/core/execute.c index 7c178b97c3..85ee82c3e1 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -25,11 +25,14 @@ #include <signal.h> #include <string.h> #include <sys/capability.h> +#include <sys/eventfd.h> #include <sys/mman.h> #include <sys/personality.h> #include <sys/prctl.h> +#include <sys/shm.h> #include <sys/socket.h> #include <sys/stat.h> +#include <sys/types.h> #include <sys/un.h> #include <unistd.h> #include <utmpx.h> @@ -90,6 +93,7 @@ #include "selinux-util.h" #include "signal-util.h" #include "smack-util.h" +#include "special.h" #include "string-table.h" #include "string-util.h" #include "strv.h" @@ -219,12 +223,36 @@ static void exec_context_tty_reset(const ExecContext *context, const ExecParamet (void) vt_disallocate(path); } +static bool is_terminal_input(ExecInput i) { + return IN_SET(i, + EXEC_INPUT_TTY, + EXEC_INPUT_TTY_FORCE, + EXEC_INPUT_TTY_FAIL); +} + static bool is_terminal_output(ExecOutput o) { - return - o == EXEC_OUTPUT_TTY || - o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || - o == EXEC_OUTPUT_KMSG_AND_CONSOLE || - o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE; + return IN_SET(o, + EXEC_OUTPUT_TTY, + EXEC_OUTPUT_SYSLOG_AND_CONSOLE, + EXEC_OUTPUT_KMSG_AND_CONSOLE, + EXEC_OUTPUT_JOURNAL_AND_CONSOLE); +} + +static bool exec_context_needs_term(const ExecContext *c) { + assert(c); + + /* Return true if the execution context suggests we should set $TERM to something useful. */ + + if (is_terminal_input(c->std_input)) + return true; + + if (is_terminal_output(c->std_output)) + return true; + + if (is_terminal_output(c->std_error)) + return true; + + return !!c->tty_path; } static int open_null_as(int flags, int nfd) { @@ -363,13 +391,6 @@ static int open_terminal_as(const char *path, mode_t mode, int nfd) { return r; } -static bool is_terminal_input(ExecInput i) { - return - i == EXEC_INPUT_TTY || - i == EXEC_INPUT_TTY_FORCE || - i == EXEC_INPUT_TTY_FAIL; -} - static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) { if (is_terminal_input(std_input) && !apply_tty_stdin) @@ -392,7 +413,8 @@ static int fixup_output(ExecOutput std_output, int socket_fd) { static int setup_input( const ExecContext *context, const ExecParameters *params, - int socket_fd) { + int socket_fd, + int named_iofds[3]) { ExecInput i; @@ -410,7 +432,7 @@ static int setup_input( return STDIN_FILENO; } - i = fixup_input(context->std_input, socket_fd, params->apply_tty_stdin); + i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN); switch (i) { @@ -442,6 +464,10 @@ static int setup_input( case EXEC_INPUT_SOCKET: return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO; + case EXEC_INPUT_NAMED_FD: + (void) fd_nonblock(named_iofds[STDIN_FILENO], false); + return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO; + default: assert_not_reached("Unknown input type"); } @@ -453,6 +479,7 @@ static int setup_output( const ExecParameters *params, int fileno, int socket_fd, + int named_iofds[3], const char *ident, uid_t uid, gid_t gid, @@ -485,7 +512,7 @@ static int setup_output( return STDERR_FILENO; } - i = fixup_input(context->std_input, socket_fd, params->apply_tty_stdin); + i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN); o = fixup_output(context->std_output, socket_fd); if (fileno == STDERR_FILENO) { @@ -504,7 +531,7 @@ static int setup_output( return fileno; /* Duplicate from stdout if possible */ - if (e == o || e == EXEC_OUTPUT_INHERIT) + if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT) return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno; o = e; @@ -566,6 +593,10 @@ static int setup_output( assert(socket_fd >= 0); return dup2(socket_fd, fileno) < 0 ? -errno : fileno; + case EXEC_OUTPUT_NAMED_FD: + (void) fd_nonblock(named_iofds[fileno], false); + return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno; + default: assert_not_reached("Unknown error type"); } @@ -701,73 +732,157 @@ static int ask_for_confirmation(char *response, char **argv) { return r; } -static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) { - bool keep_groups = false; +static int get_fixed_user(const ExecContext *c, const char **user, + uid_t *uid, gid_t *gid, + const char **home, const char **shell) { int r; + const char *name; - assert(context); + assert(c); + + if (!c->user) + return 0; + + /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway + * (i.e. are "/" or "/bin/nologin"). */ + + name = c->user; + r = get_user_creds_clean(&name, uid, gid, home, shell); + if (r < 0) + return r; - /* Lookup and set GID and supplementary group list. Here too - * we avoid NSS lookups for gid=0. */ + *user = name; + return 0; +} - if (context->group || username) { +static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) { + int r; + const char *name; + + assert(c); + + if (!c->group) + return 0; + + name = c->group; + r = get_group_creds(&name, gid); + if (r < 0) + return r; + + *group = name; + return 0; +} + +static int get_supplementary_groups(const ExecContext *c, const char *user, + const char *group, gid_t gid, + gid_t **supplementary_gids, int *ngids) { + char **i; + int r, k = 0; + int ngroups_max; + bool keep_groups = false; + gid_t *groups = NULL; + _cleanup_free_ gid_t *l_gids = NULL; + + assert(c); + + /* + * If user is given, then lookup GID and supplementary groups list. + * We avoid NSS lookups for gid=0. Also we have to initialize groups + * here and as early as possible so we keep the list of supplementary + * groups of the caller. + */ + if (user && gid_is_valid(gid) && gid != 0) { /* First step, initialize groups from /etc/groups */ - if (username && gid != 0) { - if (initgroups(username, gid) < 0) - return -errno; + if (initgroups(user, gid) < 0) + return -errno; - keep_groups = true; - } + keep_groups = true; + } - /* Second step, set our gids */ - if (setresgid(gid, gid, gid) < 0) + if (!c->supplementary_groups) + return 0; + + /* + * If SupplementaryGroups= was passed then NGROUPS_MAX has to + * be positive, otherwise fail. + */ + errno = 0; + ngroups_max = (int) sysconf(_SC_NGROUPS_MAX); + if (ngroups_max <= 0) { + if (errno > 0) return -errno; + else + return -EOPNOTSUPP; /* For all other values */ } - if (context->supplementary_groups) { - int ngroups_max, k; - gid_t *gids; - char **i; + l_gids = new(gid_t, ngroups_max); + if (!l_gids) + return -ENOMEM; - /* Final step, initialize any manually set supplementary groups */ - assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0); + if (keep_groups) { + /* + * Lookup the list of groups that the user belongs to, we + * avoid NSS lookups here too for gid=0. + */ + k = ngroups_max; + if (getgrouplist(user, gid, l_gids, &k) < 0) + return -EINVAL; + } else + k = 0; - if (!(gids = new(gid_t, ngroups_max))) - return -ENOMEM; + STRV_FOREACH(i, c->supplementary_groups) { + const char *g; - if (keep_groups) { - k = getgroups(ngroups_max, gids); - if (k < 0) { - free(gids); - return -errno; - } - } else - k = 0; + if (k >= ngroups_max) + return -E2BIG; - STRV_FOREACH(i, context->supplementary_groups) { - const char *g; + g = *i; + r = get_group_creds(&g, l_gids+k); + if (r < 0) + return r; - if (k >= ngroups_max) { - free(gids); - return -E2BIG; - } + k++; + } - g = *i; - r = get_group_creds(&g, gids+k); - if (r < 0) { - free(gids); - return r; - } + /* + * Sets ngids to zero to drop all supplementary groups, happens + * when we are under root and SupplementaryGroups= is empty. + */ + if (k == 0) { + *ngids = 0; + return 0; + } - k++; - } + /* Otherwise get the final list of supplementary groups */ + groups = memdup(l_gids, sizeof(gid_t) * k); + if (!groups) + return -ENOMEM; - if (setgroups(k, gids) < 0) { - free(gids); - return -errno; - } + *supplementary_gids = groups; + *ngids = k; + + groups = NULL; + + return 0; +} + +static int enforce_groups(const ExecContext *context, gid_t gid, + gid_t *supplementary_gids, int ngids) { + int r; + + assert(context); + + /* Handle SupplementaryGroups= even if it is empty */ + if (context->supplementary_groups) { + r = maybe_setgroups(ngids, supplementary_gids); + if (r < 0) + return r; + } - free(gids); + if (gid_is_valid(gid)) { + /* Then set our gids */ + if (setresgid(gid, gid, gid) < 0) + return -errno; } return 0; @@ -776,6 +891,9 @@ static int enforce_groups(const ExecContext *context, const char *username, gid_ static int enforce_user(const ExecContext *context, uid_t uid) { assert(context); + if (!uid_is_valid(uid)) + return 0; + /* Sets (but doesn't look up) the uid and make sure we keep the * capabilities while doing so. */ @@ -818,14 +936,19 @@ static int null_conv( return PAM_CONV_ERR; } +#endif + static int setup_pam( const char *name, const char *user, uid_t uid, + gid_t gid, const char *tty, char ***env, int fds[], unsigned n_fds) { +#ifdef HAVE_PAM + static const struct pam_conv conv = { .conv = null_conv, .appdata_ptr = NULL @@ -925,8 +1048,14 @@ static int setup_pam( * and this will make PR_SET_PDEATHSIG work in most cases. * If this fails, ignore the error - but expect sd-pam threads * to fail to exit normally */ + + r = maybe_setgroups(0, NULL); + if (r < 0) + log_warning_errno(r, "Failed to setgroups() in sd-pam: %m"); + if (setresgid(gid, gid, gid) < 0) + log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m"); if (setresuid(uid, uid, uid) < 0) - log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m"); + log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m"); (void) ignore_signals(SIGPIPE, -1); @@ -1019,8 +1148,10 @@ fail: closelog(); return r; -} +#else + return 0; #endif +} static void rename_process_from_path(const char *path) { char process_name[11]; @@ -1055,15 +1186,29 @@ static void rename_process_from_path(const char *path) { #ifdef HAVE_SECCOMP -static int apply_seccomp(const ExecContext *c) { +static bool skip_seccomp_unavailable(const Unit* u, const char* msg) { + + if (is_seccomp_available()) + return false; + + log_open(); + log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg); + log_close(); + return true; +} + +static int apply_seccomp(const Unit* u, const ExecContext *c) { uint32_t negative_action, action; - scmp_filter_ctx *seccomp; + scmp_filter_ctx seccomp; Iterator i; void *id; int r; assert(c); + if (skip_seccomp_unavailable(u, "syscall filtering")) + return 0; + negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno); seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW); @@ -1104,20 +1249,23 @@ finish: return r; } -static int apply_address_families(const ExecContext *c) { - scmp_filter_ctx *seccomp; +static int apply_address_families(const Unit* u, const ExecContext *c) { + scmp_filter_ctx seccomp; Iterator i; int r; +#if defined(__i386__) + return 0; +#endif + assert(c); - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; + if (skip_seccomp_unavailable(u, "RestrictAddressFamilies=")) + return 0; - r = seccomp_add_secondary_archs(seccomp); + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); if (r < 0) - goto finish; + return r; if (c->address_families_whitelist) { int af, first = 0, last = 0; @@ -1214,10 +1362,6 @@ static int apply_address_families(const ExecContext *c) { } } - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) - goto finish; - r = seccomp_load(seccomp); finish: @@ -1225,15 +1369,18 @@ finish: return r; } -static int apply_memory_deny_write_execute(const ExecContext *c) { - scmp_filter_ctx *seccomp; +static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) { + scmp_filter_ctx seccomp; int r; assert(c); - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; + if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute=")) + return 0; + + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); + if (r < 0) + return r; r = seccomp_rule_add( seccomp, @@ -1253,7 +1400,12 @@ static int apply_memory_deny_write_execute(const ExecContext *c) { if (r < 0) goto finish; - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(shmat), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC)); if (r < 0) goto finish; @@ -1264,22 +1416,25 @@ finish: return r; } -static int apply_restrict_realtime(const ExecContext *c) { +static int apply_restrict_realtime(const Unit* u, const ExecContext *c) { static const int permitted_policies[] = { SCHED_OTHER, SCHED_BATCH, SCHED_IDLE, }; - scmp_filter_ctx *seccomp; + scmp_filter_ctx seccomp; unsigned i; int r, p, max_policy = 0; assert(c); - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; + if (skip_seccomp_unavailable(u, "RestrictRealtime=")) + return 0; + + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); + if (r < 0) + return r; /* Determine the highest policy constant we want to allow */ for (i = 0; i < ELEMENTSOF(permitted_policies); i++) @@ -1323,7 +1478,34 @@ static int apply_restrict_realtime(const ExecContext *c) { if (r < 0) goto finish; - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + r = seccomp_load(seccomp); + +finish: + seccomp_release(seccomp); + return r; +} + +static int apply_protect_sysctl(const Unit *u, const ExecContext *c) { + scmp_filter_ctx seccomp; + int r; + + assert(c); + + /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but + * let's protect even those systems where this is left on in the kernel. */ + + if (skip_seccomp_unavailable(u, "ProtectKernelTunables=")) + return 0; + + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(_sysctl), + 0); if (r < 0) goto finish; @@ -1334,12 +1516,33 @@ finish: return r; } +static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) { + assert(c); + + /* Turn off module syscalls on ProtectKernelModules=yes */ + + if (skip_seccomp_unavailable(u, "ProtectKernelModules=")) + return 0; + + return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM)); +} + +static int apply_private_devices(const Unit *u, const ExecContext *c) { + assert(c); + + /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */ + + if (skip_seccomp_unavailable(u, "PrivateDevices=")) + return 0; + + return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM)); +} + #endif static void do_idle_pipe_dance(int idle_pipe[4]) { assert(idle_pipe); - idle_pipe[1] = safe_close(idle_pipe[1]); idle_pipe[2] = safe_close(idle_pipe[2]); @@ -1366,6 +1569,7 @@ static void do_idle_pipe_dance(int idle_pipe[4]) { } static int build_environment( + Unit *u, const ExecContext *c, const ExecParameters *p, unsigned n_fds, @@ -1380,10 +1584,11 @@ static int build_environment( unsigned n_env = 0; char *x; + assert(u); assert(c); assert(ret); - our_env = new0(char*, 12); + our_env = new0(char*, 14); if (!our_env) return -ENOMEM; @@ -1408,7 +1613,7 @@ static int build_environment( our_env[n_env++] = x; } - if (p->watchdog_usec > 0) { + if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) { if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0) return -ENOMEM; our_env[n_env++] = x; @@ -1418,6 +1623,16 @@ static int build_environment( our_env[n_env++] = x; } + /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic + * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but + * check the database directly. */ + if (unit_has_name(u, SPECIAL_DBUS_SERVICE)) { + x = strdup("SYSTEMD_NSS_BYPASS_BUS=1"); + if (!x) + return -ENOMEM; + our_env[n_env++] = x; + } + if (home) { x = strappend("HOME=", home); if (!x) @@ -1444,12 +1659,28 @@ static int build_environment( our_env[n_env++] = x; } - if (is_terminal_input(c->std_input) || - c->std_output == EXEC_OUTPUT_TTY || - c->std_error == EXEC_OUTPUT_TTY || - c->tty_path) { + if (!sd_id128_is_null(u->invocation_id)) { + if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0) + return -ENOMEM; + + our_env[n_env++] = x; + } + + if (exec_context_needs_term(c)) { + const char *tty_path, *term = NULL; + + tty_path = exec_context_tty_path(c); + + /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit + * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager + * passes to PID 1 ends up all the way in the console login shown. */ - x = strdup(default_term_for_tty(exec_context_tty_path(c))); + if (path_equal(tty_path, "/dev/console") && getppid() == 1) + term = getenv("TERM"); + if (!term) + term = default_term_for_tty(tty_path); + + x = strappend("TERM=", term); if (!x) return -ENOMEM; our_env[n_env++] = x; @@ -1520,20 +1751,382 @@ static bool exec_needs_mount_namespace( if (context->private_devices || context->protect_system != PROTECT_SYSTEM_NO || - context->protect_home != PROTECT_HOME_NO) + context->protect_home != PROTECT_HOME_NO || + context->protect_kernel_tunables || + context->protect_kernel_modules || + context->protect_control_groups) return true; return false; } +static int setup_private_users(uid_t uid, gid_t gid) { + _cleanup_free_ char *uid_map = NULL, *gid_map = NULL; + _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 }; + _cleanup_close_ int unshare_ready_fd = -1; + _cleanup_(sigkill_waitp) pid_t pid = 0; + uint64_t c = 1; + siginfo_t si; + ssize_t n; + int r; + + /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to + * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which + * we however lack after opening the user namespace. To work around this we fork() a temporary child process, + * which waits for the parent to create the new user namespace while staying in the original namespace. The + * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and + * continues execution normally. */ + + if (uid != 0 && uid_is_valid(uid)) + asprintf(&uid_map, + "0 0 1\n" /* Map root → root */ + UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */ + uid, uid); + else + uid_map = strdup("0 0 1\n"); /* The case where the above is the same */ + if (!uid_map) + return -ENOMEM; + + if (gid != 0 && gid_is_valid(gid)) + asprintf(&gid_map, + "0 0 1\n" /* Map root → root */ + GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */ + gid, gid); + else + gid_map = strdup("0 0 1\n"); /* The case where the above is the same */ + if (!gid_map) + return -ENOMEM; + + /* Create a communication channel so that the parent can tell the child when it finished creating the user + * namespace. */ + unshare_ready_fd = eventfd(0, EFD_CLOEXEC); + if (unshare_ready_fd < 0) + return -errno; + + /* Create a communication channel so that the child can tell the parent a proper error code in case it + * failed. */ + if (pipe2(errno_pipe, O_CLOEXEC) < 0) + return -errno; + + pid = fork(); + if (pid < 0) + return -errno; + + if (pid == 0) { + _cleanup_close_ int fd = -1; + const char *a; + pid_t ppid; + + /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from + * here, after the parent opened its own user namespace. */ + + ppid = getppid(); + errno_pipe[0] = safe_close(errno_pipe[0]); + + /* Wait until the parent unshared the user namespace */ + if (read(unshare_ready_fd, &c, sizeof(c)) < 0) { + r = -errno; + goto child_fail; + } + + /* Disable the setgroups() system call in the child user namespace, for good. */ + a = procfs_file_alloca(ppid, "setgroups"); + fd = open(a, O_WRONLY|O_CLOEXEC); + if (fd < 0) { + if (errno != ENOENT) { + r = -errno; + goto child_fail; + } + + /* If the file is missing the kernel is too old, let's continue anyway. */ + } else { + if (write(fd, "deny\n", 5) < 0) { + r = -errno; + goto child_fail; + } + + fd = safe_close(fd); + } + + /* First write the GID map */ + a = procfs_file_alloca(ppid, "gid_map"); + fd = open(a, O_WRONLY|O_CLOEXEC); + if (fd < 0) { + r = -errno; + goto child_fail; + } + if (write(fd, gid_map, strlen(gid_map)) < 0) { + r = -errno; + goto child_fail; + } + fd = safe_close(fd); + + /* The write the UID map */ + a = procfs_file_alloca(ppid, "uid_map"); + fd = open(a, O_WRONLY|O_CLOEXEC); + if (fd < 0) { + r = -errno; + goto child_fail; + } + if (write(fd, uid_map, strlen(uid_map)) < 0) { + r = -errno; + goto child_fail; + } + + _exit(EXIT_SUCCESS); + + child_fail: + (void) write(errno_pipe[1], &r, sizeof(r)); + _exit(EXIT_FAILURE); + } + + errno_pipe[1] = safe_close(errno_pipe[1]); + + if (unshare(CLONE_NEWUSER) < 0) + return -errno; + + /* Let the child know that the namespace is ready now */ + if (write(unshare_ready_fd, &c, sizeof(c)) < 0) + return -errno; + + /* Try to read an error code from the child */ + n = read(errno_pipe[0], &r, sizeof(r)); + if (n < 0) + return -errno; + if (n == sizeof(r)) { /* an error code was sent to us */ + if (r < 0) + return r; + return -EIO; + } + if (n != 0) /* on success we should have read 0 bytes */ + return -EIO; + + r = wait_for_terminate(pid, &si); + if (r < 0) + return r; + pid = 0; + + /* If something strange happened with the child, let's consider this fatal, too */ + if (si.si_code != CLD_EXITED || si.si_status != 0) + return -EIO; + + return 0; +} + +static int setup_runtime_directory( + const ExecContext *context, + const ExecParameters *params, + uid_t uid, + gid_t gid) { + + char **rt; + int r; + + assert(context); + assert(params); + + STRV_FOREACH(rt, context->runtime_directory) { + _cleanup_free_ char *p; + + p = strjoin(params->runtime_prefix, "/", *rt, NULL); + if (!p) + return -ENOMEM; + + r = mkdir_p_label(p, context->runtime_directory_mode); + if (r < 0) + return r; + + r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid); + if (r < 0) + return r; + } + + return 0; +} + +static int setup_smack( + const ExecContext *context, + const ExecCommand *command) { + +#ifdef HAVE_SMACK + int r; + + assert(context); + assert(command); + + if (!mac_smack_use()) + return 0; + + if (context->smack_process_label) { + r = mac_smack_apply_pid(0, context->smack_process_label); + if (r < 0) + return r; + } +#ifdef SMACK_DEFAULT_PROCESS_LABEL + else { + _cleanup_free_ char *exec_label = NULL; + + r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label); + if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP) + return r; + + r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL); + if (r < 0) + return r; + } +#endif +#endif + + return 0; +} + +static int compile_read_write_paths( + const ExecContext *context, + const ExecParameters *params, + char ***ret) { + + _cleanup_strv_free_ char **l = NULL; + char **rt; + + /* Compile the list of writable paths. This is the combination of the explicitly configured paths, plus all + * runtime directories. */ + + if (strv_isempty(context->read_write_paths) && + strv_isempty(context->runtime_directory)) { + *ret = NULL; /* NOP if neither is set */ + return 0; + } + + l = strv_copy(context->read_write_paths); + if (!l) + return -ENOMEM; + + STRV_FOREACH(rt, context->runtime_directory) { + char *s; + + s = strjoin(params->runtime_prefix, "/", *rt, NULL); + if (!s) + return -ENOMEM; + + if (strv_consume(&l, s) < 0) + return -ENOMEM; + } + + *ret = l; + l = NULL; + + return 0; +} + +static int apply_mount_namespace(Unit *u, const ExecContext *context, + const ExecParameters *params, + ExecRuntime *runtime) { + int r; + _cleanup_free_ char **rw = NULL; + char *tmp = NULL, *var = NULL; + const char *root_dir = NULL; + NameSpaceInfo ns_info = { + .private_dev = context->private_devices, + .protect_control_groups = context->protect_control_groups, + .protect_kernel_tunables = context->protect_kernel_tunables, + .protect_kernel_modules = context->protect_kernel_modules, + }; + + assert(context); + + /* The runtime struct only contains the parent of the private /tmp, + * which is non-accessible to world users. Inside of it there's a /tmp + * that is sticky, and that's the one we want to use here. */ + + if (context->private_tmp && runtime) { + if (runtime->tmp_dir) + tmp = strjoina(runtime->tmp_dir, "/tmp"); + if (runtime->var_tmp_dir) + var = strjoina(runtime->var_tmp_dir, "/tmp"); + } + + r = compile_read_write_paths(context, params, &rw); + if (r < 0) + return r; + + if (params->flags & EXEC_APPLY_CHROOT) + root_dir = context->root_directory; + + r = setup_namespace(root_dir, &ns_info, rw, + context->read_only_paths, + context->inaccessible_paths, + tmp, + var, + context->protect_home, + context->protect_system, + context->mount_flags); + + /* If we couldn't set up the namespace this is probably due to a + * missing capability. In this case, silently proceeed. */ + if (IN_SET(r, -EPERM, -EACCES)) { + log_open(); + log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m"); + log_close(); + r = 0; + } + + return r; +} + +static int apply_working_directory(const ExecContext *context, + const ExecParameters *params, + const char *home, + const bool needs_mount_ns) { + const char *d; + const char *wd; + + assert(context); + + if (context->working_directory_home) + wd = home; + else if (context->working_directory) + wd = context->working_directory; + else + wd = "/"; + + if (params->flags & EXEC_APPLY_CHROOT) { + if (!needs_mount_ns && context->root_directory) + if (chroot(context->root_directory) < 0) + return -errno; + + d = wd; + } else + d = strjoina(strempty(context->root_directory), "/", strempty(wd)); + + if (chdir(d) < 0 && !context->working_directory_missing_ok) + return -errno; + + return 0; +} + +static void append_socket_pair(int *array, unsigned *n, int pair[2]) { + assert(array); + assert(n); + + if (!pair) + return; + + if (pair[0] >= 0) + array[(*n)++] = pair[0]; + if (pair[1] >= 0) + array[(*n)++] = pair[1]; +} + static int close_remaining_fds( const ExecParameters *params, ExecRuntime *runtime, + DynamicCreds *dcreds, + int user_lookup_fd, int socket_fd, int *fds, unsigned n_fds) { unsigned n_dont_close = 0; - int dont_close[n_fds + 7]; + int dont_close[n_fds + 12]; assert(params); @@ -1551,37 +2144,109 @@ static int close_remaining_fds( n_dont_close += n_fds; } - if (runtime) { - if (runtime->netns_storage_socket[0] >= 0) - dont_close[n_dont_close++] = runtime->netns_storage_socket[0]; - if (runtime->netns_storage_socket[1] >= 0) - dont_close[n_dont_close++] = runtime->netns_storage_socket[1]; + if (runtime) + append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket); + + if (dcreds) { + if (dcreds->user) + append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket); + if (dcreds->group) + append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket); } + if (user_lookup_fd >= 0) + dont_close[n_dont_close++] = user_lookup_fd; + return close_all_fds(dont_close, n_dont_close); } +static bool context_has_address_families(const ExecContext *c) { + assert(c); + + return c->address_families_whitelist || + !set_isempty(c->address_families); +} + +static bool context_has_syscall_filters(const ExecContext *c) { + assert(c); + + return c->syscall_whitelist || + !set_isempty(c->syscall_filter) || + !set_isempty(c->syscall_archs); +} + +static bool context_has_no_new_privileges(const ExecContext *c) { + assert(c); + + if (c->no_new_privileges) + return true; + + if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */ + return false; + + return context_has_address_families(c) || /* we need NNP if we have any form of seccomp and are unprivileged */ + c->memory_deny_write_execute || + c->restrict_realtime || + c->protect_kernel_tunables || + c->protect_kernel_modules || + c->private_devices || + context_has_syscall_filters(c); +} + +static int send_user_lookup( + Unit *unit, + int user_lookup_fd, + uid_t uid, + gid_t gid) { + + assert(unit); + + /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID + * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was + * specified. */ + + if (user_lookup_fd < 0) + return 0; + + if (!uid_is_valid(uid) && !gid_is_valid(gid)) + return 0; + + if (writev(user_lookup_fd, + (struct iovec[]) { + { .iov_base = &uid, .iov_len = sizeof(uid) }, + { .iov_base = &gid, .iov_len = sizeof(gid) }, + { .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0) + return -errno; + + return 0; +} + static int exec_child( Unit *unit, ExecCommand *command, const ExecContext *context, const ExecParameters *params, ExecRuntime *runtime, + DynamicCreds *dcreds, char **argv, int socket_fd, + int named_iofds[3], int *fds, unsigned n_fds, char **files_env, + int user_lookup_fd, int *exit_status) { _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL; _cleanup_free_ char *mac_selinux_context_net = NULL; - const char *username = NULL, *home = NULL, *shell = NULL, *wd; + _cleanup_free_ gid_t *supplementary_gids = NULL; + const char *username = NULL, *groupname = NULL; + const char *home = NULL, *shell = NULL; dev_t journal_stream_dev = 0; ino_t journal_stream_ino = 0; bool needs_mount_namespace; uid_t uid = UID_INVALID; gid_t gid = GID_INVALID; - int i, r; + int i, r, ngids = 0; assert(unit); assert(command); @@ -1617,7 +2282,7 @@ static int exec_child( log_forget_fds(); - r = close_remaining_fds(params, runtime, socket_fd, fds, n_fds); + r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds); if (r < 0) { *exit_status = EXIT_FDS; return r; @@ -1631,7 +2296,7 @@ static int exec_child( exec_context_tty_reset(context, params); - if (params->confirm_spawn) { + if (params->flags & EXEC_CONFIRM_SPAWN) { char response; r = ask_for_confirmation(&response, argv); @@ -1650,44 +2315,76 @@ static int exec_child( } } - if (context->user) { - username = context->user; - r = get_user_creds(&username, &uid, &gid, &home, &shell); + if (context->dynamic_user && dcreds) { + + /* Make sure we bypass our own NSS module for any NSS checks */ + if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) { + *exit_status = EXIT_USER; + return -errno; + } + + r = dynamic_creds_realize(dcreds, &uid, &gid); if (r < 0) { *exit_status = EXIT_USER; return r; } - } - if (context->group) { - const char *g = context->group; + if (!uid_is_valid(uid) || !gid_is_valid(gid)) { + *exit_status = EXIT_USER; + return -ESRCH; + } + + if (dcreds->user) + username = dcreds->user->name; + + } else { + r = get_fixed_user(context, &username, &uid, &gid, &home, &shell); + if (r < 0) { + *exit_status = EXIT_USER; + return r; + } - r = get_group_creds(&g, &gid); + r = get_fixed_group(context, &groupname, &gid); if (r < 0) { *exit_status = EXIT_GROUP; return r; } } + /* Initialize user supplementary groups and get SupplementaryGroups= ones */ + r = get_supplementary_groups(context, username, groupname, gid, + &supplementary_gids, &ngids); + if (r < 0) { + *exit_status = EXIT_GROUP; + return r; + } + + r = send_user_lookup(unit, user_lookup_fd, uid, gid); + if (r < 0) { + *exit_status = EXIT_USER; + return r; + } + + user_lookup_fd = safe_close(user_lookup_fd); /* If a socket is connected to STDIN/STDOUT/STDERR, we * must sure to drop O_NONBLOCK */ if (socket_fd >= 0) (void) fd_nonblock(socket_fd, false); - r = setup_input(context, params, socket_fd); + r = setup_input(context, params, socket_fd, named_iofds); if (r < 0) { *exit_status = EXIT_STDIN; return r; } - r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino); + r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino); if (r < 0) { *exit_status = EXIT_STDOUT; return r; } - r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino); + r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino); if (r < 0) { *exit_status = EXIT_STDERR; return r; @@ -1774,7 +2471,7 @@ static int exec_child( USER_PROCESS, username ? "root" : context->user); - if (context->user && is_terminal_input(context->std_input)) { + if (context->user) { r = chown_terminal(STDIN_FILENO, uid); if (r < 0) { *exit_status = EXIT_STDIN; @@ -1801,32 +2498,15 @@ static int exec_child( } if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) { - char **rt; - - STRV_FOREACH(rt, context->runtime_directory) { - _cleanup_free_ char *p; - - p = strjoin(params->runtime_prefix, "/", *rt, NULL); - if (!p) { - *exit_status = EXIT_RUNTIME_DIRECTORY; - return -ENOMEM; - } - - r = mkdir_p_label(p, context->runtime_directory_mode); - if (r < 0) { - *exit_status = EXIT_RUNTIME_DIRECTORY; - return r; - } - - r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid); - if (r < 0) { - *exit_status = EXIT_RUNTIME_DIRECTORY; - return r; - } + r = setup_runtime_directory(context, params, uid, gid); + if (r < 0) { + *exit_status = EXIT_RUNTIME_DIRECTORY; + return r; } } r = build_environment( + unit, context, params, n_fds, @@ -1860,49 +2540,16 @@ static int exec_child( } accum_env = strv_env_clean(accum_env); - umask(context->umask); + (void) umask(context->umask); - if (params->apply_permissions && !command->privileged) { - r = enforce_groups(context, username, gid); - if (r < 0) { - *exit_status = EXIT_GROUP; - return r; - } -#ifdef HAVE_SMACK - if (context->smack_process_label) { - r = mac_smack_apply_pid(0, context->smack_process_label); - if (r < 0) { - *exit_status = EXIT_SMACK_PROCESS_LABEL; - return r; - } - } -#ifdef SMACK_DEFAULT_PROCESS_LABEL - else { - _cleanup_free_ char *exec_label = NULL; - - r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label); - if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP) { - *exit_status = EXIT_SMACK_PROCESS_LABEL; - return r; - } - - r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL); - if (r < 0) { - *exit_status = EXIT_SMACK_PROCESS_LABEL; - return r; - } - } -#endif -#endif -#ifdef HAVE_PAM + if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) { if (context->pam_name && username) { - r = setup_pam(context->pam_name, username, uid, context->tty_path, &accum_env, fds, n_fds); + r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds); if (r < 0) { *exit_status = EXIT_PAM; return r; } } -#endif } if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) { @@ -1914,80 +2561,37 @@ static int exec_child( } needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime); - if (needs_mount_namespace) { - char *tmp = NULL, *var = NULL; - - /* The runtime struct only contains the parent - * of the private /tmp, which is - * non-accessible to world users. Inside of it - * there's a /tmp that is sticky, and that's - * the one we want to use here. */ - - if (context->private_tmp && runtime) { - if (runtime->tmp_dir) - tmp = strjoina(runtime->tmp_dir, "/tmp"); - if (runtime->var_tmp_dir) - var = strjoina(runtime->var_tmp_dir, "/tmp"); - } - - r = setup_namespace( - params->apply_chroot ? context->root_directory : NULL, - context->read_write_paths, - context->read_only_paths, - context->inaccessible_paths, - tmp, - var, - context->private_devices, - context->protect_home, - context->protect_system, - context->mount_flags); - - /* If we couldn't set up the namespace this is - * probably due to a missing capability. In this case, - * silently proceeed. */ - if (r == -EPERM || r == -EACCES) { - log_open(); - log_unit_debug_errno(unit, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m"); - log_close(); - } else if (r < 0) { + r = apply_mount_namespace(unit, context, params, runtime); + if (r < 0) { *exit_status = EXIT_NAMESPACE; return r; } } - if (context->working_directory_home) - wd = home; - else if (context->working_directory) - wd = context->working_directory; - else - wd = "/"; - - if (params->apply_chroot) { - if (!needs_mount_namespace && context->root_directory) - if (chroot(context->root_directory) < 0) { - *exit_status = EXIT_CHROOT; - return -errno; - } - - if (chdir(wd) < 0 && - !context->working_directory_missing_ok) { - *exit_status = EXIT_CHDIR; - return -errno; - } - } else { - const char *d; + /* Apply just after mount namespace setup */ + r = apply_working_directory(context, params, home, needs_mount_namespace); + if (r < 0) { + *exit_status = EXIT_CHROOT; + return r; + } - d = strjoina(strempty(context->root_directory), "/", strempty(wd)); - if (chdir(d) < 0 && - !context->working_directory_missing_ok) { - *exit_status = EXIT_CHDIR; - return -errno; + /* Drop groups as early as possbile */ + if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) { + r = enforce_groups(context, gid, supplementary_gids, ngids); + if (r < 0) { + *exit_status = EXIT_GROUP; + return r; } } #ifdef HAVE_SELINUX - if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0 && !command->privileged) { + if ((params->flags & EXEC_APPLY_PERMISSIONS) && + mac_selinux_use() && + params->selinux_context_net && + socket_fd >= 0 && + !command->privileged) { + r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net); if (r < 0) { *exit_status = EXIT_SELINUX_CONTEXT; @@ -1996,6 +2600,14 @@ static int exec_child( } #endif + if ((params->flags & EXEC_APPLY_PERMISSIONS) && context->private_users) { + r = setup_private_users(uid, gid); + if (r < 0) { + *exit_status = EXIT_USER; + return r; + } + } + /* We repeat the fd closing here, to make sure that * nothing is leaked from the PAM modules. Note that * we are more aggressive this time since socket_fd @@ -2012,13 +2624,8 @@ static int exec_child( return r; } - if (params->apply_permissions && !command->privileged) { + if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) { - bool use_address_families = context->address_families_whitelist || - !set_isempty(context->address_families); - bool use_syscall_filter = context->syscall_whitelist || - !set_isempty(context->syscall_filter) || - !set_isempty(context->syscall_archs); int secure_bits = context->secure_bits; for (i = 0; i < _RLIMIT_MAX; i++) { @@ -2085,6 +2692,41 @@ static int exec_child( } } + /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to + * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires + * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls + * are restricted. */ + +#ifdef HAVE_SELINUX + if (mac_selinux_use()) { + char *exec_context = mac_selinux_context_net ?: context->selinux_context; + + if (exec_context) { + r = setexeccon(exec_context); + if (r < 0) { + *exit_status = EXIT_SELINUX_CONTEXT; + return r; + } + } + } +#endif + + r = setup_smack(context, command); + if (r < 0) { + *exit_status = EXIT_SMACK_PROCESS_LABEL; + return r; + } + +#ifdef HAVE_APPARMOR + if (context->apparmor_profile && mac_apparmor_use()) { + r = aa_change_onexec(context->apparmor_profile); + if (r < 0 && !context->apparmor_profile_ignore) { + *exit_status = EXIT_APPARMOR_PROFILE; + return -errno; + } + } +#endif + /* PR_GET_SECUREBITS is not privileged, while * PR_SET_SECUREBITS is. So to suppress * potential EPERMs we'll try not to call @@ -2095,16 +2737,15 @@ static int exec_child( return -errno; } - if (context->no_new_privileges || - (!have_effective_cap(CAP_SYS_ADMIN) && (use_address_families || context->memory_deny_write_execute || context->restrict_realtime || use_syscall_filter))) + if (context_has_no_new_privileges(context)) if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) { *exit_status = EXIT_NO_NEW_PRIVILEGES; return -errno; } #ifdef HAVE_SECCOMP - if (use_address_families) { - r = apply_address_families(context); + if (context_has_address_families(context)) { + r = apply_address_families(unit, context); if (r < 0) { *exit_status = EXIT_ADDRESS_FAMILIES; return r; @@ -2112,7 +2753,7 @@ static int exec_child( } if (context->memory_deny_write_execute) { - r = apply_memory_deny_write_execute(context); + r = apply_memory_deny_write_execute(unit, context); if (r < 0) { *exit_status = EXIT_SECCOMP; return r; @@ -2120,42 +2761,44 @@ static int exec_child( } if (context->restrict_realtime) { - r = apply_restrict_realtime(context); + r = apply_restrict_realtime(unit, context); if (r < 0) { *exit_status = EXIT_SECCOMP; return r; } } - if (use_syscall_filter) { - r = apply_seccomp(context); + if (context->protect_kernel_tunables) { + r = apply_protect_sysctl(unit, context); if (r < 0) { *exit_status = EXIT_SECCOMP; return r; } } -#endif -#ifdef HAVE_SELINUX - if (mac_selinux_use()) { - char *exec_context = mac_selinux_context_net ?: context->selinux_context; + if (context->protect_kernel_modules) { + r = apply_protect_kernel_modules(unit, context); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + return r; + } + } - if (exec_context) { - r = setexeccon(exec_context); - if (r < 0) { - *exit_status = EXIT_SELINUX_CONTEXT; - return r; - } + if (context->private_devices) { + r = apply_private_devices(unit, context); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + return r; } } -#endif -#ifdef HAVE_APPARMOR - if (context->apparmor_profile && mac_apparmor_use()) { - r = aa_change_onexec(context->apparmor_profile); - if (r < 0 && !context->apparmor_profile_ignore) { - *exit_status = EXIT_APPARMOR_PROFILE; - return -errno; + /* This really should remain the last step before the execve(), to make sure our own code is unaffected + * by the filter as little as possible. */ + if (context_has_syscall_filters(context)) { + r = apply_seccomp(unit, context); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + return r; } } #endif @@ -2192,12 +2835,14 @@ int exec_spawn(Unit *unit, const ExecContext *context, const ExecParameters *params, ExecRuntime *runtime, + DynamicCreds *dcreds, pid_t *ret) { _cleanup_strv_free_ char **files_env = NULL; int *fds = NULL; unsigned n_fds = 0; _cleanup_free_ char *line = NULL; int socket_fd, r; + int named_iofds[3] = { -1, -1, -1 }; char **argv; pid_t pid; @@ -2224,6 +2869,10 @@ int exec_spawn(Unit *unit, n_fds = params->n_fds; } + r = exec_context_named_iofds(unit, context, params, named_iofds); + if (r < 0) + return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m"); + r = exec_context_load_environment(unit, context, &files_env); if (r < 0) return log_unit_error_errno(unit, r, "Failed to load environment files: %m"); @@ -2250,10 +2899,13 @@ int exec_spawn(Unit *unit, context, params, runtime, + dcreds, argv, socket_fd, + named_iofds, fds, n_fds, files_env, + unit->manager->user_lookup_fds[1], &exit_status); if (r < 0) { log_open(); @@ -2313,6 +2965,9 @@ void exec_context_done(ExecContext *c) { for (l = 0; l < ELEMENTSOF(c->rlimit); l++) c->rlimit[l] = mfree(c->rlimit[l]); + for (l = 0; l < 3; l++) + c->stdio_fdname[l] = mfree(c->stdio_fdname[l]); + c->working_directory = mfree(c->working_directory); c->root_directory = mfree(c->root_directory); c->tty_path = mfree(c->tty_path); @@ -2411,6 +3066,56 @@ static void invalid_env(const char *p, void *userdata) { log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path); } +const char* exec_context_fdname(const ExecContext *c, int fd_index) { + assert(c); + + switch (fd_index) { + case STDIN_FILENO: + if (c->std_input != EXEC_INPUT_NAMED_FD) + return NULL; + return c->stdio_fdname[STDIN_FILENO] ?: "stdin"; + case STDOUT_FILENO: + if (c->std_output != EXEC_OUTPUT_NAMED_FD) + return NULL; + return c->stdio_fdname[STDOUT_FILENO] ?: "stdout"; + case STDERR_FILENO: + if (c->std_error != EXEC_OUTPUT_NAMED_FD) + return NULL; + return c->stdio_fdname[STDERR_FILENO] ?: "stderr"; + default: + return NULL; + } +} + +int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) { + unsigned i, targets; + const char *stdio_fdname[3]; + + assert(c); + assert(p); + + targets = (c->std_input == EXEC_INPUT_NAMED_FD) + + (c->std_output == EXEC_OUTPUT_NAMED_FD) + + (c->std_error == EXEC_OUTPUT_NAMED_FD); + + for (i = 0; i < 3; i++) + stdio_fdname[i] = exec_context_fdname(c, i); + + for (i = 0; i < p->n_fds && targets > 0; i++) + if (named_iofds[STDIN_FILENO] < 0 && c->std_input == EXEC_INPUT_NAMED_FD && stdio_fdname[STDIN_FILENO] && streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) { + named_iofds[STDIN_FILENO] = p->fds[i]; + targets--; + } else if (named_iofds[STDOUT_FILENO] < 0 && c->std_output == EXEC_OUTPUT_NAMED_FD && stdio_fdname[STDOUT_FILENO] && streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) { + named_iofds[STDOUT_FILENO] = p->fds[i]; + targets--; + } else if (named_iofds[STDERR_FILENO] < 0 && c->std_error == EXEC_OUTPUT_NAMED_FD && stdio_fdname[STDERR_FILENO] && streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) { + named_iofds[STDERR_FILENO] = p->fds[i]; + targets--; + } + + return (targets == 0 ? 0 : -ENOENT); +} + int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) { char **i, **r = NULL; @@ -2555,8 +3260,12 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { "%sRootDirectory: %s\n" "%sNonBlocking: %s\n" "%sPrivateTmp: %s\n" - "%sPrivateNetwork: %s\n" "%sPrivateDevices: %s\n" + "%sProtectKernelTunables: %s\n" + "%sProtectKernelModules: %s\n" + "%sProtectControlGroups: %s\n" + "%sPrivateNetwork: %s\n" + "%sPrivateUsers: %s\n" "%sProtectHome: %s\n" "%sProtectSystem: %s\n" "%sIgnoreSIGPIPE: %s\n" @@ -2567,8 +3276,12 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { prefix, c->root_directory ? c->root_directory : "/", prefix, yes_no(c->non_blocking), prefix, yes_no(c->private_tmp), - prefix, yes_no(c->private_network), prefix, yes_no(c->private_devices), + prefix, yes_no(c->protect_kernel_tunables), + prefix, yes_no(c->protect_kernel_modules), + prefix, yes_no(c->protect_control_groups), + prefix, yes_no(c->private_network), + prefix, yes_no(c->private_users), prefix, protect_home_to_string(c->protect_home), prefix, protect_system_to_string(c->protect_system), prefix, yes_no(c->ignore_sigpipe), @@ -2723,6 +3436,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { if (c->group) fprintf(f, "%sGroup: %s\n", prefix, c->group); + fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user)); + if (strv_length(c->supplementary_groups) > 0) { fprintf(f, "%sSupplementaryGroups:", prefix); strv_fprintf(f, c->supplementary_groups); @@ -2882,12 +3597,12 @@ void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) { "%sPID: "PID_FMT"\n", prefix, s->pid); - if (s->start_timestamp.realtime > 0) + if (dual_timestamp_is_set(&s->start_timestamp)) fprintf(f, "%sStart Timestamp: %s\n", prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime)); - if (s->exit_timestamp.realtime > 0) + if (dual_timestamp_is_set(&s->exit_timestamp)) fprintf(f, "%sExit Timestamp: %s\n" "%sExit Code: %s\n" @@ -2908,7 +3623,8 @@ char *exec_command_line(char **argv) { STRV_FOREACH(a, argv) k += strlen(*a)+3; - if (!(n = new(char, k))) + n = new(char, k); + if (!n) return NULL; p = n; @@ -3097,9 +3813,7 @@ ExecRuntime *exec_runtime_unref(ExecRuntime *r) { free(r->tmp_dir); free(r->var_tmp_dir); safe_close_pair(r->netns_storage_socket); - free(r); - - return NULL; + return mfree(r); } int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) { @@ -3255,7 +3969,8 @@ static const char* const exec_input_table[_EXEC_INPUT_MAX] = { [EXEC_INPUT_TTY] = "tty", [EXEC_INPUT_TTY_FORCE] = "tty-force", [EXEC_INPUT_TTY_FAIL] = "tty-fail", - [EXEC_INPUT_SOCKET] = "socket" + [EXEC_INPUT_SOCKET] = "socket", + [EXEC_INPUT_NAMED_FD] = "fd", }; DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput); @@ -3270,7 +3985,8 @@ static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = { [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console", [EXEC_OUTPUT_JOURNAL] = "journal", [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console", - [EXEC_OUTPUT_SOCKET] = "socket" + [EXEC_OUTPUT_SOCKET] = "socket", + [EXEC_OUTPUT_NAMED_FD] = "fd", }; DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput); diff --git a/src/core/execute.h b/src/core/execute.h index 189c4d0999..c7d0f7761e 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -50,6 +50,7 @@ typedef enum ExecInput { EXEC_INPUT_TTY_FORCE, EXEC_INPUT_TTY_FAIL, EXEC_INPUT_SOCKET, + EXEC_INPUT_NAMED_FD, _EXEC_INPUT_MAX, _EXEC_INPUT_INVALID = -1 } ExecInput; @@ -65,6 +66,7 @@ typedef enum ExecOutput { EXEC_OUTPUT_JOURNAL, EXEC_OUTPUT_JOURNAL_AND_CONSOLE, EXEC_OUTPUT_SOCKET, + EXEC_OUTPUT_NAMED_FD, _EXEC_OUTPUT_MAX, _EXEC_OUTPUT_INVALID = -1 } ExecOutput; @@ -92,6 +94,8 @@ struct ExecRuntime { char *tmp_dir; char *var_tmp_dir; + /* An AF_UNIX socket pair, that contains a datagram containing a file descriptor referring to the network + * namespace. */ int netns_storage_socket[2]; }; @@ -118,6 +122,7 @@ struct ExecContext { ExecInput std_input; ExecOutput std_output; ExecOutput std_error; + char *stdio_fdname[3]; nsec_t timer_slack_nsec; @@ -169,11 +174,18 @@ struct ExecContext { bool private_tmp; bool private_network; bool private_devices; + bool private_users; ProtectSystem protect_system; ProtectHome protect_home; + bool protect_kernel_tunables; + bool protect_kernel_modules; + bool protect_control_groups; bool no_new_privileges; + bool dynamic_user; + bool remove_ipc; + /* This is not exposed to the user but available * internally. We need it to make sure that whenever we spawn * /usr/bin/mount it is run in the same process group as us so @@ -204,6 +216,19 @@ struct ExecContext { bool no_new_privileges_set:1; }; +typedef enum ExecFlags { + EXEC_CONFIRM_SPAWN = 1U << 0, + EXEC_APPLY_PERMISSIONS = 1U << 1, + EXEC_APPLY_CHROOT = 1U << 2, + EXEC_APPLY_TTY_STDIN = 1U << 3, + + /* The following are not used by execute.c, but by consumers internally */ + EXEC_PASS_FDS = 1U << 4, + EXEC_IS_CONTROL = 1U << 5, + EXEC_SETENV_RESULT = 1U << 6, + EXEC_SET_WATCHDOG = 1U << 7, +} ExecFlags; + struct ExecParameters { char **argv; char **environment; @@ -212,11 +237,7 @@ struct ExecParameters { char **fd_names; unsigned n_fds; - bool apply_permissions:1; - bool apply_chroot:1; - bool apply_tty_stdin:1; - - bool confirm_spawn:1; + ExecFlags flags; bool selinux_context_net:1; bool cgroup_delegate:1; @@ -235,12 +256,14 @@ struct ExecParameters { }; #include "unit.h" +#include "dynamic-user.h" int exec_spawn(Unit *unit, ExecCommand *command, const ExecContext *context, const ExecParameters *exec_params, ExecRuntime *runtime, + DynamicCreds *dynamic_creds, pid_t *ret); void exec_command_done(ExecCommand *c); @@ -264,6 +287,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix); int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_root); int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l); +int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]); +const char* exec_context_fdname(const ExecContext *c, int fd_index); bool exec_context_may_touch_console(ExecContext *c); bool exec_context_maintains_privileges(ExecContext *c); diff --git a/src/core/job.c b/src/core/job.c index 7557874d4d..ac6910a906 100644 --- a/src/core/job.c +++ b/src/core/job.c @@ -690,16 +690,16 @@ _pure_ static const char *job_get_status_message_format(Unit *u, JobType t, JobR } static void job_print_status_message(Unit *u, JobType t, JobResult result) { - static struct { + static const struct { const char *color, *word; } const statuses[_JOB_RESULT_MAX] = { - [JOB_DONE] = {ANSI_GREEN, " OK "}, - [JOB_TIMEOUT] = {ANSI_HIGHLIGHT_RED, " TIME "}, - [JOB_FAILED] = {ANSI_HIGHLIGHT_RED, "FAILED"}, - [JOB_DEPENDENCY] = {ANSI_HIGHLIGHT_YELLOW, "DEPEND"}, - [JOB_SKIPPED] = {ANSI_HIGHLIGHT, " INFO "}, - [JOB_ASSERT] = {ANSI_HIGHLIGHT_YELLOW, "ASSERT"}, - [JOB_UNSUPPORTED] = {ANSI_HIGHLIGHT_YELLOW, "UNSUPP"}, + [JOB_DONE] = { ANSI_GREEN, " OK " }, + [JOB_TIMEOUT] = { ANSI_HIGHLIGHT_RED, " TIME " }, + [JOB_FAILED] = { ANSI_HIGHLIGHT_RED, "FAILED" }, + [JOB_DEPENDENCY] = { ANSI_HIGHLIGHT_YELLOW, "DEPEND" }, + [JOB_SKIPPED] = { ANSI_HIGHLIGHT, " INFO " }, + [JOB_ASSERT] = { ANSI_HIGHLIGHT_YELLOW, "ASSERT" }, + [JOB_UNSUPPORTED] = { ANSI_HIGHLIGHT_YELLOW, "UNSUPP" }, }; const char *format; @@ -767,8 +767,9 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) { if (!format) return; + /* The description might be longer than the buffer, but that's OK, we'll just truncate it here */ DISABLE_WARNING_FORMAT_NONLITERAL; - xsprintf(buf, format, unit_description(u)); + snprintf(buf, sizeof(buf), format, unit_description(u)); REENABLE_WARNING; switch (t) { @@ -927,7 +928,7 @@ static int job_dispatch_timer(sd_event_source *s, uint64_t monotonic, void *user u = j->unit; job_finish_and_invalidate(j, JOB_TIMEOUT, true, false); - failure_action(u->manager, u->job_timeout_action, u->job_timeout_reboot_arg); + emergency_action(u->manager, u->job_timeout_action, u->job_timeout_reboot_arg, "job timed out"); return 0; } @@ -997,7 +998,10 @@ char *job_dbus_path(Job *j) { return p; } -int job_serialize(Job *j, FILE *f, FDSet *fds) { +int job_serialize(Job *j, FILE *f) { + assert(j); + assert(f); + fprintf(f, "job-id=%u\n", j->id); fprintf(f, "job-type=%s\n", job_type_to_string(j->type)); fprintf(f, "job-state=%s\n", job_state_to_string(j->state)); @@ -1008,15 +1012,16 @@ int job_serialize(Job *j, FILE *f, FDSet *fds) { if (j->begin_usec > 0) fprintf(f, "job-begin="USEC_FMT"\n", j->begin_usec); - bus_track_serialize(j->clients, f); + bus_track_serialize(j->clients, f, "subscribed"); /* End marker */ fputc('\n', f); return 0; } -int job_deserialize(Job *j, FILE *f, FDSet *fds) { +int job_deserialize(Job *j, FILE *f) { assert(j); + assert(f); for (;;) { char line[LINE_MAX], *l, *v; @@ -1106,7 +1111,7 @@ int job_deserialize(Job *j, FILE *f, FDSet *fds) { } else if (streq(l, "subscribed")) { if (strv_extend(&j->deserialized_clients, v) < 0) - return log_oom(); + log_oom(); } } } @@ -1118,9 +1123,8 @@ int job_coldplug(Job *j) { /* After deserialization is complete and the bus connection * set up again, let's start watching our subscribers again */ - r = bus_track_coldplug(j->manager, &j->clients, &j->deserialized_clients); - if (r < 0) - return r; + (void) bus_track_coldplug(j->manager, &j->clients, false, j->deserialized_clients); + j->deserialized_clients = strv_free(j->deserialized_clients); if (j->state == JOB_WAITING) job_add_to_run_queue(j); diff --git a/src/core/job.h b/src/core/job.h index d359e8bb3e..85368f0d30 100644 --- a/src/core/job.h +++ b/src/core/job.h @@ -177,8 +177,8 @@ Job* job_install(Job *j); int job_install_deserialized(Job *j); void job_uninstall(Job *j); void job_dump(Job *j, FILE*f, const char *prefix); -int job_serialize(Job *j, FILE *f, FDSet *fds); -int job_deserialize(Job *j, FILE *f, FDSet *fds); +int job_serialize(Job *j, FILE *f); +int job_deserialize(Job *j, FILE *f); int job_coldplug(Job *j); JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts); diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 6a5c16a000..af2f9d960b 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -19,9 +19,9 @@ m4_dnl Define the context options only once m4_define(`EXEC_CONTEXT_CONFIG_ITEMS', `$1.WorkingDirectory, config_parse_working_directory, 0, offsetof($1, exec_context) $1.RootDirectory, config_parse_unit_path_printf, 0, offsetof($1, exec_context.root_directory) -$1.User, config_parse_unit_string_printf, 0, offsetof($1, exec_context.user) -$1.Group, config_parse_unit_string_printf, 0, offsetof($1, exec_context.group) -$1.SupplementaryGroups, config_parse_strv, 0, offsetof($1, exec_context.supplementary_groups) +$1.User, config_parse_user_group, 0, offsetof($1, exec_context.user) +$1.Group, config_parse_user_group, 0, offsetof($1, exec_context.group) +$1.SupplementaryGroups, config_parse_user_group_strv, 0, offsetof($1, exec_context.supplementary_groups) $1.Nice, config_parse_exec_nice, 0, offsetof($1, exec_context) $1.OOMScoreAdjust, config_parse_exec_oom_score_adjust, 0, offsetof($1, exec_context) $1.IOSchedulingClass, config_parse_exec_io_class, 0, offsetof($1, exec_context) @@ -34,9 +34,10 @@ $1.UMask, config_parse_mode, 0, $1.Environment, config_parse_environ, 0, offsetof($1, exec_context.environment) $1.EnvironmentFile, config_parse_unit_env_file, 0, offsetof($1, exec_context.environment_files) $1.PassEnvironment, config_parse_pass_environ, 0, offsetof($1, exec_context.pass_environment) -$1.StandardInput, config_parse_input, 0, offsetof($1, exec_context.std_input) -$1.StandardOutput, config_parse_output, 0, offsetof($1, exec_context.std_output) -$1.StandardError, config_parse_output, 0, offsetof($1, exec_context.std_error) +$1.DynamicUser, config_parse_bool, 0, offsetof($1, exec_context.dynamic_user) +$1.StandardInput, config_parse_exec_input, 0, offsetof($1, exec_context) +$1.StandardOutput, config_parse_exec_output, 0, offsetof($1, exec_context) +$1.StandardError, config_parse_exec_output, 0, offsetof($1, exec_context) $1.TTYPath, config_parse_unit_path_printf, 0, offsetof($1, exec_context.tty_path) $1.TTYReset, config_parse_bool, 0, offsetof($1, exec_context.tty_reset) $1.TTYVHangup, config_parse_bool, 0, offsetof($1, exec_context.tty_vhangup) @@ -87,8 +88,12 @@ $1.ReadWritePaths, config_parse_namespace_path_strv, 0, $1.ReadOnlyPaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_paths) $1.InaccessiblePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths) $1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp) -$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network) $1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices) +$1.ProtectKernelTunables, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_tunables) +$1.ProtectKernelModules, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_modules) +$1.ProtectControlGroups, config_parse_bool, 0, offsetof($1, exec_context.protect_control_groups) +$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network) +$1.PrivateUsers, config_parse_bool, 0, offsetof($1, exec_context.private_users) $1.ProtectSystem, config_parse_protect_system, 0, offsetof($1, exec_context) $1.ProtectHome, config_parse_protect_home, 0, offsetof($1, exec_context) $1.MountFlags, config_parse_exec_mount_flags, 0, offsetof($1, exec_context) @@ -120,6 +125,8 @@ $1.KillSignal, config_parse_signal, 0, m4_define(`CGROUP_CONTEXT_CONFIG_ITEMS', `$1.Slice, config_parse_unit_slice, 0, 0 $1.CPUAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.cpu_accounting) +$1.CPUWeight, config_parse_cpu_weight, 0, offsetof($1, cgroup_context.cpu_weight) +$1.StartupCPUWeight, config_parse_cpu_weight, 0, offsetof($1, cgroup_context.startup_cpu_weight) $1.CPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.cpu_shares) $1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares) $1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context) @@ -127,6 +134,7 @@ $1.MemoryAccounting, config_parse_bool, 0, $1.MemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.MemoryHigh, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.MemoryMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context) +$1.MemorySwapMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.MemoryLimit, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.DeviceAllow, config_parse_device_allow, 0, offsetof($1, cgroup_context) $1.DevicePolicy, config_parse_device_policy, 0, offsetof($1, cgroup_context.device_policy) @@ -180,13 +188,13 @@ Unit.OnFailureIsolate, config_parse_job_mode_isolate, 0, Unit.IgnoreOnIsolate, config_parse_bool, 0, offsetof(Unit, ignore_on_isolate) Unit.IgnoreOnSnapshot, config_parse_warn_compat, DISABLED_LEGACY, 0 Unit.JobTimeoutSec, config_parse_sec_fix_0, 0, offsetof(Unit, job_timeout) -Unit.JobTimeoutAction, config_parse_failure_action, 0, offsetof(Unit, job_timeout_action) +Unit.JobTimeoutAction, config_parse_emergency_action, 0, offsetof(Unit, job_timeout_action) Unit.JobTimeoutRebootArgument, config_parse_string, 0, offsetof(Unit, job_timeout_reboot_arg) Unit.StartLimitIntervalSec, config_parse_sec, 0, offsetof(Unit, start_limit.interval) m4_dnl The following is a legacy alias name for compatibility Unit.StartLimitInterval, config_parse_sec, 0, offsetof(Unit, start_limit.interval) Unit.StartLimitBurst, config_parse_unsigned, 0, offsetof(Unit, start_limit.burst) -Unit.StartLimitAction, config_parse_failure_action, 0, offsetof(Unit, start_limit_action) +Unit.StartLimitAction, config_parse_emergency_action, 0, offsetof(Unit, start_limit_action) Unit.RebootArgument, config_parse_string, 0, offsetof(Unit, reboot_arg) Unit.ConditionPathExists, config_parse_unit_condition_path, CONDITION_PATH_EXISTS, offsetof(Unit, conditions) Unit.ConditionPathExistsGlob, config_parse_unit_condition_path, CONDITION_PATH_EXISTS_GLOB, offsetof(Unit, conditions) @@ -243,9 +251,9 @@ Service.WatchdogSec, config_parse_sec, 0, m4_dnl The following three only exist for compatibility, they moved into Unit, see above Service.StartLimitInterval, config_parse_sec, 0, offsetof(Unit, start_limit.interval) Service.StartLimitBurst, config_parse_unsigned, 0, offsetof(Unit, start_limit.burst) -Service.StartLimitAction, config_parse_failure_action, 0, offsetof(Unit, start_limit_action) +Service.StartLimitAction, config_parse_emergency_action, 0, offsetof(Unit, start_limit_action) Service.RebootArgument, config_parse_string, 0, offsetof(Unit, reboot_arg) -Service.FailureAction, config_parse_failure_action, 0, offsetof(Service, failure_action) +Service.FailureAction, config_parse_emergency_action, 0, offsetof(Service, emergency_action) Service.Type, config_parse_service_type, 0, offsetof(Service, type) Service.Restart, config_parse_service_restart, 0, offsetof(Service, restart) Service.PermissionsStartOnly, config_parse_bool, 0, offsetof(Service, permissions_start_only) @@ -285,13 +293,14 @@ Socket.ExecStartPost, config_parse_exec, SOCKET_EXEC Socket.ExecStopPre, config_parse_exec, SOCKET_EXEC_STOP_PRE, offsetof(Socket, exec_command) Socket.ExecStopPost, config_parse_exec, SOCKET_EXEC_STOP_POST, offsetof(Socket, exec_command) Socket.TimeoutSec, config_parse_sec, 0, offsetof(Socket, timeout_usec) -Socket.SocketUser, config_parse_unit_string_printf, 0, offsetof(Socket, user) -Socket.SocketGroup, config_parse_unit_string_printf, 0, offsetof(Socket, group) +Socket.SocketUser, config_parse_user_group, 0, offsetof(Socket, user) +Socket.SocketGroup, config_parse_user_group, 0, offsetof(Socket, group) Socket.SocketMode, config_parse_mode, 0, offsetof(Socket, socket_mode) Socket.DirectoryMode, config_parse_mode, 0, offsetof(Socket, directory_mode) Socket.Accept, config_parse_bool, 0, offsetof(Socket, accept) Socket.Writable, config_parse_bool, 0, offsetof(Socket, writable) Socket.MaxConnections, config_parse_unsigned, 0, offsetof(Socket, max_connections) +Socket.MaxConnectionsPerSource, config_parse_unsigned, 0, offsetof(Socket, max_connections_per_source) Socket.KeepAlive, config_parse_bool, 0, offsetof(Socket, keep_alive) Socket.KeepAliveTimeSec, config_parse_sec, 0, offsetof(Socket, keep_alive_time) Socket.KeepAliveIntervalSec, config_parse_sec, 0, offsetof(Socket, keep_alive_interval) @@ -350,6 +359,8 @@ Mount.Type, config_parse_string, 0, Mount.TimeoutSec, config_parse_sec, 0, offsetof(Mount, timeout_usec) Mount.DirectoryMode, config_parse_mode, 0, offsetof(Mount, directory_mode) Mount.SloppyOptions, config_parse_bool, 0, offsetof(Mount, sloppy_options) +Mount.LazyUnmount, config_parse_bool, 0, offsetof(Mount, lazy_unmount) +Mount.ForceUnmount, config_parse_bool, 0, offsetof(Mount, force_unmount) EXEC_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl CGROUP_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl KILL_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index a36953f766..cbc826809e 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -64,6 +64,7 @@ #include "unit-name.h" #include "unit-printf.h" #include "unit.h" +#include "user-util.h" #include "utf8.h" #include "web-util.h" @@ -490,16 +491,17 @@ int config_parse_socket_bind(const char *unit, return 0; } -int config_parse_exec_nice(const char *unit, - const char *filename, - unsigned line, - const char *section, - unsigned section_line, - const char *lvalue, - int ltype, - const char *rvalue, - void *data, - void *userdata) { +int config_parse_exec_nice( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { ExecContext *c = data; int priority, r; @@ -509,14 +511,13 @@ int config_parse_exec_nice(const char *unit, assert(rvalue); assert(data); - r = safe_atoi(rvalue, &priority); + r = parse_nice(rvalue, &priority); if (r < 0) { - log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse nice priority, ignoring: %s", rvalue); - return 0; - } + if (r == -ERANGE) + log_syntax(unit, LOG_ERR, filename, line, r, "Nice priority out of range, ignoring: %s", rvalue); + else + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse nice priority, ignoring: %s", rvalue); - if (priority < PRIO_MIN || priority >= PRIO_MAX) { - log_syntax(unit, LOG_ERR, filename, line, 0, "Nice priority out of range, ignoring: %s", rvalue); return 0; } @@ -775,8 +776,104 @@ int config_parse_socket_bindtodevice( return 0; } -DEFINE_CONFIG_PARSE_ENUM(config_parse_output, exec_output, ExecOutput, "Failed to parse output specifier"); -DEFINE_CONFIG_PARSE_ENUM(config_parse_input, exec_input, ExecInput, "Failed to parse input specifier"); +DEFINE_CONFIG_PARSE_ENUM(config_parse_input, exec_input, ExecInput, "Failed to parse input literal specifier"); +DEFINE_CONFIG_PARSE_ENUM(config_parse_output, exec_output, ExecOutput, "Failed to parse output literal specifier"); + +int config_parse_exec_input(const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + ExecContext *c = data; + const char *name; + int r; + + assert(data); + assert(filename); + assert(line); + assert(rvalue); + + name = startswith(rvalue, "fd:"); + if (name) { + /* Strip prefix and validate fd name */ + if (!fdname_is_valid(name)) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid file descriptor name, ignoring: %s", name); + return 0; + } + c->std_input = EXEC_INPUT_NAMED_FD; + r = free_and_strdup(&c->stdio_fdname[STDIN_FILENO], name); + if (r < 0) + log_oom(); + return r; + } else { + ExecInput ei = exec_input_from_string(rvalue); + if (ei == _EXEC_INPUT_INVALID) + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse input specifier, ignoring: %s", rvalue); + else + c->std_input = ei; + return 0; + } +} + +int config_parse_exec_output(const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + ExecContext *c = data; + ExecOutput eo; + const char *name; + int r; + + assert(data); + assert(filename); + assert(line); + assert(lvalue); + assert(rvalue); + + name = startswith(rvalue, "fd:"); + if (name) { + /* Strip prefix and validate fd name */ + if (!fdname_is_valid(name)) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid file descriptor name, ignoring: %s", name); + return 0; + } + eo = EXEC_OUTPUT_NAMED_FD; + } else { + eo = exec_output_from_string(rvalue); + if (eo == _EXEC_OUTPUT_INVALID) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse output specifier, ignoring: %s", rvalue); + return 0; + } + } + + if (streq(lvalue, "StandardOutput")) { + c->std_output = eo; + r = free_and_strdup(&c->stdio_fdname[STDOUT_FILENO], name); + if (r < 0) + log_oom(); + return r; + } else if (streq(lvalue, "StandardError")) { + c->std_error = eo; + r = free_and_strdup(&c->stdio_fdname[STDERR_FILENO], name); + if (r < 0) + log_oom(); + return r; + } else { + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse output property, ignoring: %s", lvalue); + return 0; + } +} int config_parse_exec_io_class(const char *unit, const char *filename, @@ -1337,10 +1434,13 @@ int config_parse_timer(const char *unit, void *userdata) { Timer *t = data; - usec_t u = 0; + usec_t usec = 0; TimerValue *v; TimerBase b; CalendarSpec *c = NULL; + Unit *u = userdata; + _cleanup_free_ char *k = NULL; + int r; assert(filename); assert(lvalue); @@ -1359,14 +1459,20 @@ int config_parse_timer(const char *unit, return 0; } + r = unit_full_printf(u, rvalue, &k); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue); + return 0; + } + if (b == TIMER_CALENDAR) { - if (calendar_spec_from_string(rvalue, &c) < 0) { - log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse calendar specification, ignoring: %s", rvalue); + if (calendar_spec_from_string(k, &c) < 0) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse calendar specification, ignoring: %s", k); return 0; } } else { - if (parse_sec(rvalue, &u) < 0) { - log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse timer value, ignoring: %s", rvalue); + if (parse_sec(k, &usec) < 0) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse timer value, ignoring: %s", k); return 0; } } @@ -1378,7 +1484,7 @@ int config_parse_timer(const char *unit, } v->base = b; - v->value = u; + v->value = usec; v->calendar_spec = c; LIST_PREPEND(value, t->values, v); @@ -1581,11 +1687,7 @@ int config_parse_fdname( return 0; } - free(s->fdname); - s->fdname = p; - p = NULL; - - return 0; + return free_and_replace(s->fdname, p); } int config_parse_service_sockets( @@ -1763,6 +1865,123 @@ int config_parse_sec_fix_0( return 0; } +int config_parse_user_group( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + char **user = data, *n; + Unit *u = userdata; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(u); + + if (isempty(rvalue)) + n = NULL; + else { + _cleanup_free_ char *k = NULL; + + r = unit_full_printf(u, rvalue, &k); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue); + return 0; + } + + if (!valid_user_group_name_or_id(k)) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid user/group name or numeric ID, ignoring: %s", k); + return 0; + } + + n = k; + k = NULL; + } + + free(*user); + *user = n; + + return 0; +} + +int config_parse_user_group_strv( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + char ***users = data; + Unit *u = userdata; + const char *p; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(u); + + if (isempty(rvalue)) { + char **empty; + + empty = new0(char*, 1); + if (!empty) + return log_oom(); + + strv_free(*users); + *users = empty; + + return 0; + } + + p = rvalue; + for (;;) { + _cleanup_free_ char *word = NULL, *k = NULL; + + r = extract_first_word(&p, &word, WHITESPACE, 0); + if (r == 0) + break; + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Invalid syntax, ignoring: %s", rvalue); + break; + } + + r = unit_full_printf(u, word, &k); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", word); + continue; + } + + if (!valid_user_group_name_or_id(k)) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid user/group name or numeric ID, ignoring: %s", k); + continue; + } + + r = strv_push(users, k); + if (r < 0) + return log_oom(); + + k = NULL; + } + + return 0; +} + int config_parse_busname_service( const char *unit, const char *filename, @@ -1925,9 +2144,7 @@ int config_parse_working_directory( return 0; } - free(c->working_directory); - c->working_directory = k; - k = NULL; + free_and_replace(c->working_directory, k); c->working_directory_home = false; } @@ -2306,7 +2523,7 @@ int config_parse_unit_condition_null( } DEFINE_CONFIG_PARSE_ENUM(config_parse_notify_access, notify_access, NotifyAccess, "Failed to parse notify access specifier"); -DEFINE_CONFIG_PARSE_ENUM(config_parse_failure_action, failure_action, FailureAction, "Failed to parse failure action specifier"); +DEFINE_CONFIG_PARSE_ENUM(config_parse_emergency_action, emergency_action, EmergencyAction, "Failed to parse failure action specifier"); int config_parse_unit_requires_mounts_for( const char *unit, @@ -2401,6 +2618,7 @@ int config_parse_documentation(const char *unit, } #ifdef HAVE_SECCOMP + static int syscall_filter_parse_one( const char *unit, const char *filename, @@ -2411,27 +2629,29 @@ static int syscall_filter_parse_one( bool warn) { int r; - if (*t == '@') { - const SystemCallFilterSet *set; + if (t[0] == '@') { + const SyscallFilterSet *set; + const char *i; - for (set = syscall_filter_sets; set->set_name; set++) - if (streq(set->set_name, t)) { - const char *sys; + set = syscall_filter_set_find(t); + if (!set) { + if (warn) + log_syntax(unit, LOG_WARNING, filename, line, 0, "Don't know system call group, ignoring: %s", t); + return 0; + } - NULSTR_FOREACH(sys, set->value) { - r = syscall_filter_parse_one(unit, filename, line, c, invert, sys, false); - if (r < 0) - return r; - } - break; - } + NULSTR_FOREACH(i, set->value) { + r = syscall_filter_parse_one(unit, filename, line, c, invert, i, false); + if (r < 0) + return r; + } } else { int id; id = seccomp_syscall_resolve_name(t); if (id == __NR_SCMP_ERROR) { if (warn) - log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t); + log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse system call, ignoring: %s", t); return 0; } @@ -2445,8 +2665,9 @@ static int syscall_filter_parse_one( if (r < 0) return log_oom(); } else - set_remove(c->syscall_filter, INT_TO_PTR(id + 1)); + (void) set_remove(c->syscall_filter, INT_TO_PTR(id + 1)); } + return 0; } @@ -2465,8 +2686,7 @@ int config_parse_syscall_filter( ExecContext *c = data; Unit *u = userdata; bool invert = false; - const char *word, *state; - size_t l; + const char *p; int r; assert(filename); @@ -2505,24 +2725,24 @@ int config_parse_syscall_filter( } } - FOREACH_WORD_QUOTED(word, l, rvalue, state) { - _cleanup_free_ char *t = NULL; + p = rvalue; + for (;;) { + _cleanup_free_ char *word = NULL; - t = strndup(word, l); - if (!t) + r = extract_first_word(&p, &word, NULL, 0); + if (r == 0) + break; + if (r == -ENOMEM) return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue); + break; + } - r = syscall_filter_parse_one(unit, filename, line, c, invert, t, true); + r = syscall_filter_parse_one(unit, filename, line, c, invert, word, true); if (r < 0) return r; } - if (!isempty(state)) - log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring."); - - /* Turn on NNP, but only if it wasn't configured explicitly - * before, and only if we are in user mode. */ - if (!c->no_new_privileges_set && MANAGER_IS_USER(u->manager)) - c->no_new_privileges = true; return 0; } @@ -2733,6 +2953,34 @@ int config_parse_unit_slice( DEFINE_CONFIG_PARSE_ENUM(config_parse_device_policy, cgroup_device_policy, CGroupDevicePolicy, "Failed to parse device policy"); +int config_parse_cpu_weight( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + uint64_t *weight = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = cg_weight_parse(rvalue, weight); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "CPU weight '%s' invalid. Ignoring.", rvalue); + return 0; + } + + return 0; +} + int config_parse_cpu_shares( const char *unit, const char *filename, @@ -2785,7 +3033,7 @@ int config_parse_cpu_quota( return 0; } - r = parse_percent(rvalue); + r = parse_percent_unbounded(rvalue); if (r <= 0) { log_syntax(unit, LOG_ERR, filename, line, r, "CPU quota '%s' invalid. Ignoring.", rvalue); return 0; @@ -2835,8 +3083,12 @@ int config_parse_memory_limit( c->memory_high = bytes; else if (streq(lvalue, "MemoryMax")) c->memory_max = bytes; - else + else if (streq(lvalue, "MemorySwapMax")) + c->memory_swap_max = bytes; + else if (streq(lvalue, "MemoryLimit")) c->memory_limit = bytes; + else + return -EINVAL; return 0; } @@ -2853,30 +3105,36 @@ int config_parse_tasks_max( void *data, void *userdata) { - uint64_t *tasks_max = data, u; + uint64_t *tasks_max = data, v; + Unit *u = userdata; int r; - if (isempty(rvalue) || streq(rvalue, "infinity")) { - *tasks_max = (uint64_t) -1; + if (isempty(rvalue)) { + *tasks_max = u->manager->default_tasks_max; + return 0; + } + + if (streq(rvalue, "infinity")) { + *tasks_max = CGROUP_LIMIT_MAX; return 0; } r = parse_percent(rvalue); if (r < 0) { - r = safe_atou64(rvalue, &u); + r = safe_atou64(rvalue, &v); if (r < 0) { log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue); return 0; } } else - u = system_tasks_max_scale(r, 100U); + v = system_tasks_max_scale(r, 100U); - if (u <= 0 || u >= UINT64_MAX) { + if (v <= 0 || v >= UINT64_MAX) { log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range. Ignoring.", rvalue); return 0; } - *tasks_max = u; + *tasks_max = v; return 0; } @@ -2919,9 +3177,7 @@ int config_parse_device_allow( if (!path) return log_oom(); - if (!startswith(path, "/dev/") && - !startswith(path, "block-") && - !startswith(path, "char-")) { + if (!is_deviceallow_pattern(path)) { log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid device node path '%s'. Ignoring.", path); return 0; } @@ -3576,7 +3832,7 @@ int config_parse_no_new_privileges( return 0; } - c->no_new_privileges = !!k; + c->no_new_privileges = k; c->no_new_privileges_set = true; return 0; @@ -4026,8 +4282,8 @@ void unit_dump_config_items(FILE *f) { { config_parse_exec_cpu_affinity, "CPUAFFINITY" }, { config_parse_mode, "MODE" }, { config_parse_unit_env_file, "FILE" }, - { config_parse_output, "OUTPUT" }, - { config_parse_input, "INPUT" }, + { config_parse_exec_output, "OUTPUT" }, + { config_parse_exec_input, "INPUT" }, { config_parse_log_facility, "FACILITY" }, { config_parse_log_level, "LEVEL" }, { config_parse_exec_secure_bits, "SECUREBITS" }, @@ -4062,7 +4318,7 @@ void unit_dump_config_items(FILE *f) { { config_parse_unit_slice, "SLICE" }, { config_parse_documentation, "URL" }, { config_parse_service_timeout, "SECONDS" }, - { config_parse_failure_action, "ACTION" }, + { config_parse_emergency_action, "ACTION" }, { config_parse_set_status, "STATUS" }, { config_parse_service_sockets, "SOCKETS" }, { config_parse_environ, "ENVIRON" }, @@ -4073,6 +4329,7 @@ void unit_dump_config_items(FILE *f) { { config_parse_address_families, "FAMILIES" }, #endif { config_parse_cpu_shares, "SHARES" }, + { config_parse_cpu_weight, "WEIGHT" }, { config_parse_memory_limit, "LIMIT" }, { config_parse_device_allow, "DEVICE" }, { config_parse_device_policy, "POLICY" }, diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index b36a2e3a02..c05f205c37 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -45,7 +45,9 @@ int config_parse_service_timeout(const char *unit, const char *filename, unsigne int config_parse_service_type(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_service_restart(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_socket_bindtodevice(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_exec_output(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_output(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_exec_input(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_input(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_exec_io_class(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_exec_io_priority(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); @@ -73,7 +75,7 @@ int config_parse_unit_condition_string(const char *unit, const char *filename, u int config_parse_unit_condition_null(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_kill_mode(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_notify_access(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); -int config_parse_failure_action(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_emergency_action(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_unit_requires_mounts_for(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_syscall_filter(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_syscall_archs(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); @@ -81,6 +83,7 @@ int config_parse_syscall_errno(const char *unit, const char *filename, unsigned int config_parse_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_pass_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_unit_slice(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_cpu_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_cpu_shares(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_memory_limit(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_tasks_max(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); @@ -111,6 +114,8 @@ int config_parse_exec_utmp_mode(const char *unit, const char *filename, unsigned int config_parse_working_directory(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_fdname(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_sec_fix_0(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_user_group(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_user_group_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); /* gperf prototypes */ const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length); diff --git a/src/core/main.c b/src/core/main.c index 33e22e37dc..f07ed71b31 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -72,6 +72,9 @@ #include "process-util.h" #include "raw-clone.h" #include "rlimit-util.h" +#ifdef HAVE_SECCOMP +#include "seccomp-util.h" +#endif #include "selinux-setup.h" #include "selinux-util.h" #include "signal-util.h" @@ -86,14 +89,14 @@ #include "user-util.h" #include "virt.h" #include "watchdog.h" +#include "emergency-action.h" static enum { ACTION_RUN, ACTION_HELP, ACTION_VERSION, ACTION_TEST, - ACTION_DUMP_CONFIGURATION_ITEMS, - ACTION_DONE + ACTION_DUMP_CONFIGURATION_ITEMS } arg_action = ACTION_RUN; static char *arg_default_unit = NULL; static bool arg_system = false; @@ -129,6 +132,7 @@ static bool arg_default_memory_accounting = false; static bool arg_default_tasks_accounting = true; static uint64_t arg_default_tasks_max = UINT64_MAX; static sd_id128_t arg_machine_id = {}; +static EmergencyAction arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE; noreturn static void freeze_or_reboot(void) { @@ -200,7 +204,7 @@ noreturn static void crash(int sig) { pid, sigchld_code_to_string(status.si_code), status.si_status, strna(status.si_code == CLD_EXITED - ? exit_status_to_string(status.si_status, EXIT_STATUS_FULL) + ? exit_status_to_string(status.si_status, EXIT_STATUS_MINIMAL) : signal_to_string(status.si_status))); else log_emergency("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid); @@ -304,7 +308,7 @@ static int set_machine_id(const char *m) { return 0; } -static int parse_proc_cmdline_item(const char *key, const char *value) { +static int parse_proc_cmdline_item(const char *key, const char *value, void *data) { int r; @@ -700,6 +704,7 @@ static int parse_config_file(void) { { "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting }, { "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting }, { "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max }, + { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, 0, &arg_cad_burst_action }, {} }; @@ -713,7 +718,7 @@ static int parse_config_file(void) { CONF_PATHS_NULSTR("systemd/system.conf.d") : CONF_PATHS_NULSTR("systemd/user.conf.d"); - config_parse_many(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, false, NULL); + config_parse_many_nulstr(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, false, NULL); /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we used USEC_INFINITY * like everywhere else. */ @@ -994,10 +999,8 @@ static int parse_argv(int argc, char *argv[]) { case ARG_MACHINE_ID: r = set_machine_id(optarg); - if (r < 0) { - log_error("MachineID '%s' is not valid.", optarg); - return r; - } + if (r < 0) + return log_error_errno(r, "MachineID '%s' is not valid.", optarg); break; case 'h': @@ -1120,7 +1123,7 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) { * later when transitioning from the initrd to the main * systemd or suchlike. */ if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) - return log_error_errno(errno, "Reading RLIMIT_NOFILE failed: %m"); + return log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m"); /* Make sure forked processes get the default kernel setting */ if (!arg_default_rlimit[RLIMIT_NOFILE]) { @@ -1137,7 +1140,7 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) { nl.rlim_cur = nl.rlim_max = 64*1024; r = setrlimit_closest(RLIMIT_NOFILE, &nl); if (r < 0) - return log_error_errno(r, "Setting RLIMIT_NOFILE failed: %m"); + return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m"); return 0; } @@ -1187,6 +1190,9 @@ static int enforce_syscall_archs(Set *archs) { void *id; int r; + if (!is_seccomp_available()) + return 0; + seccomp = seccomp_init(SCMP_ACT_ALLOW); if (!seccomp) return log_oom(); @@ -1319,7 +1325,7 @@ static int fixup_environment(void) { return r; if (r == 0) { - term = strdup(default_term_for_tty("/dev/console") + 5); + term = strdup(default_term_for_tty("/dev/console")); if (!term) return -ENOMEM; } @@ -1415,12 +1421,12 @@ int main(int argc, char *argv[]) { if (mac_selinux_setup(&loaded_policy) < 0) { error_message = "Failed to load SELinux policy"; goto finish; - } else if (ima_setup() < 0) { - error_message = "Failed to load IMA policy"; - goto finish; } else if (mac_smack_setup(&loaded_policy) < 0) { error_message = "Failed to load SMACK policy"; goto finish; + } else if (ima_setup() < 0) { + error_message = "Failed to load IMA policy"; + goto finish; } dual_timestamp_get(&security_finish_timestamp); } @@ -1506,7 +1512,8 @@ int main(int argc, char *argv[]) { if (getpid() == 1) { /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit) * will process core dumps for system services by default. */ - (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)); + if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0) + log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m"); /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored * until the systemd-coredump tool is enabled via sysctl. */ @@ -1524,15 +1531,9 @@ int main(int argc, char *argv[]) { * need to do that for user instances since they never log * into the console. */ log_show_color(colors_enabled()); - make_null_stdio(); - } - - /* Initialize default unit */ - r = free_and_strdup(&arg_default_unit, SPECIAL_DEFAULT_TARGET); - if (r < 0) { - log_emergency_errno(r, "Failed to set default unit %s: %m", SPECIAL_DEFAULT_TARGET); - error_message = "Failed to set default unit"; - goto finish; + r = make_null_stdio(); + if (r < 0) + log_warning_errno(r, "Failed to redirect standard streams to /dev/null: %m"); } r = initialize_join_controllers(); @@ -1560,7 +1561,7 @@ int main(int argc, char *argv[]) { (void) reset_all_signal_handlers(); (void) ignore_signals(SIGNALS_IGNORE, -1); - arg_default_tasks_max = system_tasks_max_scale(15U, 100U); /* 15% the system PIDs equals 4915 by default. */ + arg_default_tasks_max = system_tasks_max_scale(DEFAULT_TASKS_MAX_PERCENTAGE, 100U); if (parse_config_file() < 0) { error_message = "Failed to parse config file"; @@ -1568,7 +1569,7 @@ int main(int argc, char *argv[]) { } if (arg_system) { - r = parse_proc_cmdline(parse_proc_cmdline_item); + r = parse_proc_cmdline(parse_proc_cmdline_item, NULL, false); if (r < 0) log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m"); } @@ -1582,6 +1583,16 @@ int main(int argc, char *argv[]) { goto finish; } + /* Initialize default unit */ + if (!arg_default_unit) { + arg_default_unit = strdup(SPECIAL_DEFAULT_TARGET); + if (!arg_default_unit) { + r = log_oom(); + error_message = "Failed to set default unit"; + goto finish; + } + } + if (arg_action == ACTION_TEST && geteuid() == 0) { log_error("Don't run test mode as root."); @@ -1602,11 +1613,10 @@ int main(int argc, char *argv[]) { goto finish; } - if (arg_action == ACTION_TEST) - skip_setup = true; - - if (arg_action == ACTION_TEST || arg_action == ACTION_HELP) + if (arg_action == ACTION_TEST || arg_action == ACTION_HELP) { pager_open(arg_no_pager, false); + skip_setup = true; + } if (arg_action == ACTION_HELP) { retval = help(); @@ -1615,12 +1625,10 @@ int main(int argc, char *argv[]) { retval = version(); goto finish; } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) { + pager_open(arg_no_pager, false); unit_dump_config_items(stdout); retval = EXIT_SUCCESS; goto finish; - } else if (arg_action == ACTION_DONE) { - retval = EXIT_SUCCESS; - goto finish; } if (!arg_system && @@ -1766,10 +1774,10 @@ int main(int argc, char *argv[]) { log_warning_errno(errno, "Failed to make us a subreaper: %m"); if (arg_system) { - bump_rlimit_nofile(&saved_rlimit_nofile); + (void) bump_rlimit_nofile(&saved_rlimit_nofile); if (empty_etc) { - r = unit_file_preset_all(UNIT_FILE_SYSTEM, false, NULL, UNIT_FILE_PRESET_ENABLE_ONLY, false, NULL, 0); + r = unit_file_preset_all(UNIT_FILE_SYSTEM, 0, NULL, UNIT_FILE_PRESET_ENABLE_ONLY, NULL, 0); if (r < 0) log_full_errno(r == -EEXIST ? LOG_NOTICE : LOG_WARNING, r, "Failed to populate /etc with preset unit settings, ignoring: %m"); else @@ -1792,6 +1800,7 @@ int main(int argc, char *argv[]) { m->initrd_timestamp = initrd_timestamp; m->security_start_timestamp = security_start_timestamp; m->security_finish_timestamp = security_finish_timestamp; + m->cad_burst_action = arg_cad_burst_action; manager_set_defaults(m); manager_set_show_status(m, arg_show_status); diff --git a/src/core/manager.c b/src/core/manager.c index 85bf858992..ffccfdcd5e 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -45,6 +45,7 @@ #include "bus-error.h" #include "bus-kernel.h" #include "bus-util.h" +#include "clean-ipc.h" #include "dbus-job.h" #include "dbus-manager.h" #include "dbus-unit.h" @@ -81,6 +82,7 @@ #include "transaction.h" #include "umask-util.h" #include "unit-name.h" +#include "user-util.h" #include "util.h" #include "virt.h" #include "watchdog.h" @@ -98,6 +100,7 @@ static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, ui static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); +static int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata); static int manager_dispatch_run_queue(sd_event_source *source, void *userdata); static int manager_run_generators(Manager *m); @@ -519,6 +522,7 @@ static void manager_clean_environment(Manager *m) { "LISTEN_FDNAMES", "WATCHDOG_PID", "WATCHDOG_USEC", + "INVOCATION_ID", NULL); } @@ -553,7 +557,6 @@ static int manager_default_environment(Manager *m) { return 0; } - int manager_new(UnitFileScope scope, bool test_run, Manager **_m) { Manager *m; int r; @@ -580,17 +583,25 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) { if (MANAGER_IS_SYSTEM(m)) { m->unit_log_field = "UNIT="; m->unit_log_format_string = "UNIT=%s"; + + m->invocation_log_field = "INVOCATION_ID="; + m->invocation_log_format_string = "INVOCATION_ID=" SD_ID128_FORMAT_STR; } else { m->unit_log_field = "USER_UNIT="; m->unit_log_format_string = "USER_UNIT=%s"; + + m->invocation_log_field = "USER_INVOCATION_ID="; + m->invocation_log_format_string = "USER_INVOCATION_ID=" SD_ID128_FORMAT_STR; } m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1; m->pin_cgroupfs_fd = m->notify_fd = m->cgroups_agent_fd = m->signal_fd = m->time_change_fd = - m->dev_autofs_fd = m->private_listen_fd = m->kdbus_fd = m->cgroup_inotify_fd = + m->dev_autofs_fd = m->private_listen_fd = m->cgroup_inotify_fd = m->ask_password_inotify_fd = -1; + m->user_lookup_fds[0] = m->user_lookup_fds[1] = -1; + m->current_job_id = 1; /* start as id #1, so that we can leave #0 around as "null-like" value */ m->have_ask_password = -EINVAL; /* we don't know */ @@ -657,9 +668,8 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) { goto fail; } - /* Note that we set up neither kdbus, nor the notify fd - * here. We do that after deserialization, since they might - * have gotten serialized across the reexec. */ + /* Note that we do not set up the notify fd here. We do that after deserialization, + * since they might have gotten serialized across the reexec. */ m->taint_usr = dir_is_empty("/usr") > 0; @@ -767,7 +777,7 @@ static int manager_setup_cgroups_agent(Manager *m) { if (!MANAGER_IS_SYSTEM(m)) return 0; - if (cg_unified() > 0) /* We don't need this anymore on the unified hierarchy */ + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0) /* We don't need this anymore on the unified hierarchy */ return 0; if (m->cgroups_agent_fd < 0) { @@ -813,6 +823,59 @@ static int manager_setup_cgroups_agent(Manager *m) { return 0; } +static int manager_setup_user_lookup_fd(Manager *m) { + int r; + + assert(m); + + /* Set up the socket pair used for passing UID/GID resolution results from forked off processes to PID + * 1. Background: we can't do name lookups (NSS) from PID 1, since it might involve IPC and thus activation, + * and we might hence deadlock on ourselves. Hence we do all user/group lookups asynchronously from the forked + * off processes right before executing the binaries to start. In order to be able to clean up any IPC objects + * created by a unit (see RemoveIPC=) we need to know in PID 1 the used UID/GID of the executed processes, + * hence we establish this communication channel so that forked off processes can pass their UID/GID + * information back to PID 1. The forked off processes send their resolved UID/GID to PID 1 in a simple + * datagram, along with their unit name, so that we can share one communication socket pair among all units for + * this purpose. + * + * You might wonder why we need a communication channel for this that is independent of the usual notification + * socket scheme (i.e. $NOTIFY_SOCKET). The primary difference is about trust: data sent via the $NOTIFY_SOCKET + * channel is only accepted if it originates from the right unit and if reception was enabled for it. The user + * lookup socket OTOH is only accessible by PID 1 and its children until they exec(), and always available. + * + * Note that this function is called under two circumstances: when we first initialize (in which case we + * allocate both the socket pair and the event source to listen on it), and when we deserialize after a reload + * (in which case the socket pair already exists but we still need to allocate the event source for it). */ + + if (m->user_lookup_fds[0] < 0) { + + /* Free all secondary fields */ + safe_close_pair(m->user_lookup_fds); + m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source); + + if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, m->user_lookup_fds) < 0) + return log_error_errno(errno, "Failed to allocate user lookup socket: %m"); + + (void) fd_inc_rcvbuf(m->user_lookup_fds[0], NOTIFY_RCVBUF_SIZE); + } + + if (!m->user_lookup_event_source) { + r = sd_event_add_io(m->event, &m->user_lookup_event_source, m->user_lookup_fds[0], EPOLLIN, manager_dispatch_user_lookup_fd, m); + if (r < 0) + return log_error_errno(errno, "Failed to allocate user lookup event source: %m"); + + /* Process even earlier than the notify event source, so that we always know first about valid UID/GID + * resolutions */ + r = sd_event_source_set_priority(m->user_lookup_event_source, SD_EVENT_PRIORITY_NORMAL-8); + if (r < 0) + return log_error_errno(errno, "Failed to set priority ot user lookup event source: %m"); + + (void) sd_event_source_set_description(m->user_lookup_event_source, "user-lookup"); + } + + return 0; +} + static int manager_connect_bus(Manager *m, bool reexecuting) { bool try_bus_connect; @@ -822,7 +885,6 @@ static int manager_connect_bus(Manager *m, bool reexecuting) { return 0; try_bus_connect = - m->kdbus_fd >= 0 || reexecuting || (MANAGER_IS_USER(m) && getenv("DBUS_SESSION_BUS_ADDRESS")); @@ -854,8 +916,7 @@ enum { _GC_OFFSET_MAX }; -static void unit_gc_mark_good(Unit *u, unsigned gc_marker) -{ +static void unit_gc_mark_good(Unit *u, unsigned gc_marker) { Iterator i; Unit *other; @@ -1004,7 +1065,11 @@ Manager* manager_free(Manager *m) { bus_done(m); + dynamic_user_vacuum(m, false); + hashmap_free(m->dynamic_users); + hashmap_free(m->units); + hashmap_free(m->units_by_invocation_id); hashmap_free(m->jobs); hashmap_free(m->watch_pids1); hashmap_free(m->watch_pids2); @@ -1019,12 +1084,13 @@ Manager* manager_free(Manager *m) { sd_event_source_unref(m->time_change_event_source); sd_event_source_unref(m->jobs_in_progress_event_source); sd_event_source_unref(m->run_queue_event_source); + sd_event_source_unref(m->user_lookup_event_source); safe_close(m->signal_fd); safe_close(m->notify_fd); safe_close(m->cgroups_agent_fd); safe_close(m->time_change_fd); - safe_close(m->kdbus_fd); + safe_close_pair(m->user_lookup_fds); manager_close_ask_password(m); @@ -1050,8 +1116,10 @@ Manager* manager_free(Manager *m) { assert(hashmap_isempty(m->units_requiring_mounts_for)); hashmap_free(m->units_requiring_mounts_for); - free(m); - return NULL; + hashmap_free(m->uid_refs); + hashmap_free(m->gid_refs); + + return mfree(m); } void manager_enumerate(Manager *m) { @@ -1175,9 +1243,11 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) { return r; /* Make sure the transient directory always exists, so that it remains in the search path */ - r = mkdir_p_label(m->lookup_paths.transient, 0755); - if (r < 0) - return r; + if (!m->test_run) { + r = mkdir_p_label(m->lookup_paths.transient, 0755); + if (r < 0) + return r; + } dual_timestamp_get(&m->generators_start_timestamp); r = manager_run_generators(m); @@ -1219,14 +1289,26 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) { if (q < 0 && r == 0) r = q; - /* We might have deserialized the kdbus control fd, but if we - * didn't, then let's create the bus now. */ - manager_connect_bus(m, !!serialization); - bus_track_coldplug(m, &m->subscribed, &m->deserialized_subscribed); + q = manager_setup_user_lookup_fd(m); + if (q < 0 && r == 0) + r = q; + + /* Let's connect to the bus now. */ + (void) manager_connect_bus(m, !!serialization); + + (void) bus_track_coldplug(m, &m->subscribed, false, m->deserialized_subscribed); + m->deserialized_subscribed = strv_free(m->deserialized_subscribed); /* Third, fire things up! */ manager_coldplug(m); + /* Release any dynamic users no longer referenced */ + dynamic_user_vacuum(m, true); + + /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */ + manager_vacuum_uid_refs(m); + manager_vacuum_gid_refs(m); + if (serialization) { assert(m->n_reloading > 0); m->n_reloading--; @@ -1599,8 +1681,14 @@ static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const if (UNIT_VTABLE(u)->notify_message) UNIT_VTABLE(u)->notify_message(u, pid, tags, fds); - else - log_unit_debug(u, "Got notification message for unit. Ignoring."); + else if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) { + _cleanup_free_ char *x = NULL, *y = NULL; + + x = cescape(buf); + if (x) + y = ellipsize(x, 20, 90); + log_unit_debug(u, "Got notification message \"%s\", ignoring.", strnull(y)); + } } static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) { @@ -1626,7 +1714,6 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t struct cmsghdr *cmsg; struct ucred *ucred = NULL; - bool found = false; Unit *u1, *u2, *u3; int r, *fd_array = NULL; unsigned n_fds = 0; @@ -1640,16 +1727,15 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t return 0; } - n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC); + n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC|MSG_TRUNC); if (n < 0) { - if (!IN_SET(errno, EAGAIN, EINTR)) - log_error("Failed to receive notification message: %m"); + if (IN_SET(errno, EAGAIN, EINTR)) + return 0; /* Spurious wakeup, try again */ - /* It's not an option to return an error here since it - * would disable the notification handler entirely. Services - * wouldn't be able to send the WATCHDOG message for - * example... */ - return 0; + /* If this is any other, real error, then let's stop processing this socket. This of course means we + * won't take notification messages anymore, but that's still better than busy looping around this: + * being woken up over and over again but being unable to actually read the message off the socket. */ + return log_error_errno(errno, "Failed to receive notification message: %m"); } CMSG_FOREACH(cmsg, &msghdr) { @@ -1682,40 +1768,40 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t return 0; } - if ((size_t) n >= sizeof(buf)) { + if ((size_t) n >= sizeof(buf) || (msghdr.msg_flags & MSG_TRUNC)) { log_warning("Received notify message exceeded maximum size. Ignoring."); return 0; } - /* The message should be a string. Here we make sure it's NUL-terminated, - * but only the part until first NUL will be used anyway. */ + /* As extra safety check, let's make sure the string we get doesn't contain embedded NUL bytes. We permit one + * trailing NUL byte in the message, but don't expect it. */ + if (n > 1 && memchr(buf, 0, n-1)) { + log_warning("Received notify message with embedded NUL bytes. Ignoring."); + return 0; + } + + /* Make sure it's NUL-terminated. */ buf[n] = 0; /* Notify every unit that might be interested, but try * to avoid notifying the same one multiple times. */ u1 = manager_get_unit_by_pid_cgroup(m, ucred->pid); - if (u1) { + if (u1) manager_invoke_notify_message(m, u1, ucred->pid, buf, fds); - found = true; - } u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(ucred->pid)); - if (u2 && u2 != u1) { + if (u2 && u2 != u1) manager_invoke_notify_message(m, u2, ucred->pid, buf, fds); - found = true; - } u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(ucred->pid)); - if (u3 && u3 != u2 && u3 != u1) { + if (u3 && u3 != u2 && u3 != u1) manager_invoke_notify_message(m, u3, ucred->pid, buf, fds); - found = true; - } - if (!found) + if (!u1 && !u2 && !u3) log_warning("Cannot find unit for notify message of PID "PID_FMT".", ucred->pid); if (fdset_size(fds) > 0) - log_warning("Got auxiliary fds with notification message, closing all."); + log_warning("Got extra auxiliary fds with notification message, closing them."); return 0; } @@ -1820,6 +1906,18 @@ static int manager_start_target(Manager *m, const char *name, JobMode mode) { return r; } +static void manager_handle_ctrl_alt_del(Manager *m) { + /* If the user presses C-A-D more than + * 7 times within 2s, we reboot/shutdown immediately, + * unless it was disabled in system.conf */ + + if (ratelimit_test(&m->ctrl_alt_del_ratelimit) || m->cad_burst_action == EMERGENCY_ACTION_NONE) + manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY); + else + emergency_action(m, m->cad_burst_action, NULL, + "Ctrl-Alt-Del was pressed more than 7 times within 2s"); +} + static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) { Manager *m = userdata; ssize_t n; @@ -1838,14 +1936,17 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t for (;;) { n = read(m->signal_fd, &sfsi, sizeof(sfsi)); if (n != sizeof(sfsi)) { + if (n >= 0) { + log_warning("Truncated read from signal fd (%zu bytes)!", n); + return 0; + } - if (n >= 0) - return -EIO; - - if (errno == EINTR || errno == EAGAIN) + if (IN_SET(errno, EINTR, EAGAIN)) break; - return -errno; + /* We return an error here, which will kill this handler, + * to avoid a busy loop on read error. */ + return log_error_errno(errno, "Reading from signal fd failed: %m"); } log_received_signal(sfsi.ssi_signo == SIGCHLD || @@ -1871,19 +1972,7 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t case SIGINT: if (MANAGER_IS_SYSTEM(m)) { - - /* If the user presses C-A-D more than - * 7 times within 2s, we reboot - * immediately. */ - - if (ratelimit_test(&m->ctrl_alt_del_ratelimit)) - manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY); - else { - log_notice("Ctrl-Alt-Del was pressed more than 7 times within 2s, rebooting immediately."); - status_printf(NULL, true, false, "Ctrl-Alt-Del was pressed more than 7 times within 2s, rebooting immediately."); - m->exit_code = MANAGER_REBOOT; - } - + manager_handle_ctrl_alt_del(m); break; } @@ -2169,6 +2258,7 @@ int manager_loop(Manager *m) { int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e, Unit **_u) { _cleanup_free_ char *n = NULL; + sd_id128_t invocation_id; Unit *u; int r; @@ -2180,12 +2270,25 @@ int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e, if (r < 0) return r; + /* Permit addressing units by invocation ID: if the passed bus path is suffixed by a 128bit ID then we use it + * as invocation ID. */ + r = sd_id128_from_string(n, &invocation_id); + if (r >= 0) { + u = hashmap_get(m->units_by_invocation_id, &invocation_id); + if (u) { + *_u = u; + return 0; + } + + return sd_bus_error_setf(e, BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID, "No unit with the specified invocation ID " SD_ID128_FORMAT_STR " known.", SD_ID128_FORMAT_VAL(invocation_id)); + } + + /* If this didn't work, we use the suffix as unit name. */ r = manager_load_unit(m, n, NULL, e, &u); if (r < 0) return r; *_u = u; - return 0; } @@ -2397,17 +2500,28 @@ int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) { fprintf(f, "cgroups-agent-fd=%i\n", copy); } - if (m->kdbus_fd >= 0) { - int copy; + if (m->user_lookup_fds[0] >= 0) { + int copy0, copy1; - copy = fdset_put_dup(fds, m->kdbus_fd); - if (copy < 0) - return copy; + copy0 = fdset_put_dup(fds, m->user_lookup_fds[0]); + if (copy0 < 0) + return copy0; + + copy1 = fdset_put_dup(fds, m->user_lookup_fds[1]); + if (copy1 < 0) + return copy1; - fprintf(f, "kdbus-fd=%i\n", copy); + fprintf(f, "user-lookup=%i %i\n", copy0, copy1); } - bus_track_serialize(m->subscribed, f); + bus_track_serialize(m->subscribed, f, "subscribed"); + + r = dynamic_user_serialize(m, f, fds); + if (r < 0) + return r; + + manager_serialize_uid_refs(m, f); + manager_serialize_gid_refs(m, f); fputc('\n', f); @@ -2575,25 +2689,31 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { m->cgroups_agent_fd = fdset_remove(fds, fd); } - } else if (startswith(l, "kdbus-fd=")) { - int fd; + } else if (startswith(l, "user-lookup=")) { + int fd0, fd1; - if (safe_atoi(l + 9, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd)) - log_debug("Failed to parse kdbus fd: %s", l + 9); + if (sscanf(l + 12, "%i %i", &fd0, &fd1) != 2 || fd0 < 0 || fd1 < 0 || fd0 == fd1 || !fdset_contains(fds, fd0) || !fdset_contains(fds, fd1)) + log_debug("Failed to parse user lookup fd: %s", l + 12); else { - safe_close(m->kdbus_fd); - m->kdbus_fd = fdset_remove(fds, fd); + m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source); + safe_close_pair(m->user_lookup_fds); + m->user_lookup_fds[0] = fdset_remove(fds, fd0); + m->user_lookup_fds[1] = fdset_remove(fds, fd1); } - } else { - int k; + } else if (startswith(l, "dynamic-user=")) + dynamic_user_deserialize_one(m, l + 13, fds); + else if (startswith(l, "destroy-ipc-uid=")) + manager_deserialize_uid_refs_one(m, l + 16); + else if (startswith(l, "destroy-ipc-gid=")) + manager_deserialize_gid_refs_one(m, l + 16); + else if (startswith(l, "subscribed=")) { - k = bus_track_deserialize_item(&m->deserialized_subscribed, l); - if (k < 0) - log_debug_errno(k, "Failed to deserialize bus tracker object: %m"); - else if (k == 0) - log_debug("Unknown serialization item '%s'", l); - } + if (strv_extend(&m->deserialized_subscribed, l+11) < 0) + log_oom(); + + } else if (!startswith(l, "kdbus-fd=")) /* ignore this one */ + log_debug("Unknown serialization item '%s'", l); } for (;;) { @@ -2666,6 +2786,9 @@ int manager_reload(Manager *m) { manager_clear_jobs_and_units(m); lookup_paths_flush_generator(&m->lookup_paths); lookup_paths_free(&m->lookup_paths); + dynamic_user_vacuum(m, false); + m->uid_refs = hashmap_free(m->uid_refs); + m->gid_refs = hashmap_free(m->gid_refs); q = lookup_paths_init(&m->lookup_paths, m->unit_file_scope, 0, NULL); if (q < 0 && r >= 0) @@ -2699,9 +2822,20 @@ int manager_reload(Manager *m) { if (q < 0 && r >= 0) r = q; + q = manager_setup_user_lookup_fd(m); + if (q < 0 && r >= 0) + r = q; + /* Third, fire things up! */ manager_coldplug(m); + /* Release any dynamic users no longer referenced */ + dynamic_user_vacuum(m, true); + + /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */ + manager_vacuum_uid_refs(m); + manager_vacuum_gid_refs(m); + /* Sync current state of bus names with our set of listening units */ if (m->api_bus) manager_sync_bus_names(m, m->api_bus); @@ -2947,7 +3081,7 @@ int manager_set_default_rlimits(Manager *m, struct rlimit **default_rlimit) { m->rlimit[i] = newdup(struct rlimit, default_rlimit[i], 1); if (!m->rlimit[i]) - return -ENOMEM; + return log_oom(); } return 0; @@ -3135,6 +3269,300 @@ ManagerState manager_state(Manager *m) { return MANAGER_RUNNING; } +#define DESTROY_IPC_FLAG (UINT32_C(1) << 31) + +static void manager_unref_uid_internal( + Manager *m, + Hashmap **uid_refs, + uid_t uid, + bool destroy_now, + int (*_clean_ipc)(uid_t uid)) { + + uint32_t c, n; + + assert(m); + assert(uid_refs); + assert(uid_is_valid(uid)); + assert(_clean_ipc); + + /* A generic implementation, covering both manager_unref_uid() and manager_unref_gid(), under the assumption + * that uid_t and gid_t are actually defined the same way, with the same validity rules. + * + * We store a hashmap where the UID/GID is they key and the value is a 32bit reference counter, whose highest + * bit is used as flag for marking UIDs/GIDs whose IPC objects to remove when the last reference to the UID/GID + * is dropped. The flag is set to on, once at least one reference from a unit where RemoveIPC= is set is added + * on a UID/GID. It is reset when the UID's/GID's reference counter drops to 0 again. */ + + assert_cc(sizeof(uid_t) == sizeof(gid_t)); + assert_cc(UID_INVALID == (uid_t) GID_INVALID); + + if (uid == 0) /* We don't keep track of root, and will never destroy it */ + return; + + c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid))); + + n = c & ~DESTROY_IPC_FLAG; + assert(n > 0); + n--; + + if (destroy_now && n == 0) { + hashmap_remove(*uid_refs, UID_TO_PTR(uid)); + + if (c & DESTROY_IPC_FLAG) { + log_debug("%s " UID_FMT " is no longer referenced, cleaning up its IPC.", + _clean_ipc == clean_ipc_by_uid ? "UID" : "GID", + uid); + (void) _clean_ipc(uid); + } + } else { + c = n | (c & DESTROY_IPC_FLAG); + assert_se(hashmap_update(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c)) >= 0); + } +} + +void manager_unref_uid(Manager *m, uid_t uid, bool destroy_now) { + manager_unref_uid_internal(m, &m->uid_refs, uid, destroy_now, clean_ipc_by_uid); +} + +void manager_unref_gid(Manager *m, gid_t gid, bool destroy_now) { + manager_unref_uid_internal(m, &m->gid_refs, (uid_t) gid, destroy_now, clean_ipc_by_gid); +} + +static int manager_ref_uid_internal( + Manager *m, + Hashmap **uid_refs, + uid_t uid, + bool clean_ipc) { + + uint32_t c, n; + int r; + + assert(m); + assert(uid_refs); + assert(uid_is_valid(uid)); + + /* A generic implementation, covering both manager_ref_uid() and manager_ref_gid(), under the assumption + * that uid_t and gid_t are actually defined the same way, with the same validity rules. */ + + assert_cc(sizeof(uid_t) == sizeof(gid_t)); + assert_cc(UID_INVALID == (uid_t) GID_INVALID); + + if (uid == 0) /* We don't keep track of root, and will never destroy it */ + return 0; + + r = hashmap_ensure_allocated(uid_refs, &trivial_hash_ops); + if (r < 0) + return r; + + c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid))); + + n = c & ~DESTROY_IPC_FLAG; + n++; + + if (n & DESTROY_IPC_FLAG) /* check for overflow */ + return -EOVERFLOW; + + c = n | (c & DESTROY_IPC_FLAG) | (clean_ipc ? DESTROY_IPC_FLAG : 0); + + return hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c)); +} + +int manager_ref_uid(Manager *m, uid_t uid, bool clean_ipc) { + return manager_ref_uid_internal(m, &m->uid_refs, uid, clean_ipc); +} + +int manager_ref_gid(Manager *m, gid_t gid, bool clean_ipc) { + return manager_ref_uid_internal(m, &m->gid_refs, (uid_t) gid, clean_ipc); +} + +static void manager_vacuum_uid_refs_internal( + Manager *m, + Hashmap **uid_refs, + int (*_clean_ipc)(uid_t uid)) { + + Iterator i; + void *p, *k; + + assert(m); + assert(uid_refs); + assert(_clean_ipc); + + HASHMAP_FOREACH_KEY(p, k, *uid_refs, i) { + uint32_t c, n; + uid_t uid; + + uid = PTR_TO_UID(k); + c = PTR_TO_UINT32(p); + + n = c & ~DESTROY_IPC_FLAG; + if (n > 0) + continue; + + if (c & DESTROY_IPC_FLAG) { + log_debug("Found unreferenced %s " UID_FMT " after reload/reexec. Cleaning up.", + _clean_ipc == clean_ipc_by_uid ? "UID" : "GID", + uid); + (void) _clean_ipc(uid); + } + + assert_se(hashmap_remove(*uid_refs, k) == p); + } +} + +void manager_vacuum_uid_refs(Manager *m) { + manager_vacuum_uid_refs_internal(m, &m->uid_refs, clean_ipc_by_uid); +} + +void manager_vacuum_gid_refs(Manager *m) { + manager_vacuum_uid_refs_internal(m, &m->gid_refs, clean_ipc_by_gid); +} + +static void manager_serialize_uid_refs_internal( + Manager *m, + FILE *f, + Hashmap **uid_refs, + const char *field_name) { + + Iterator i; + void *p, *k; + + assert(m); + assert(f); + assert(uid_refs); + assert(field_name); + + /* Serialize the UID reference table. Or actually, just the IPC destruction flag of it, as the actual counter + * of it is better rebuild after a reload/reexec. */ + + HASHMAP_FOREACH_KEY(p, k, *uid_refs, i) { + uint32_t c; + uid_t uid; + + uid = PTR_TO_UID(k); + c = PTR_TO_UINT32(p); + + if (!(c & DESTROY_IPC_FLAG)) + continue; + + fprintf(f, "%s=" UID_FMT "\n", field_name, uid); + } +} + +void manager_serialize_uid_refs(Manager *m, FILE *f) { + manager_serialize_uid_refs_internal(m, f, &m->uid_refs, "destroy-ipc-uid"); +} + +void manager_serialize_gid_refs(Manager *m, FILE *f) { + manager_serialize_uid_refs_internal(m, f, &m->gid_refs, "destroy-ipc-gid"); +} + +static void manager_deserialize_uid_refs_one_internal( + Manager *m, + Hashmap** uid_refs, + const char *value) { + + uid_t uid; + uint32_t c; + int r; + + assert(m); + assert(uid_refs); + assert(value); + + r = parse_uid(value, &uid); + if (r < 0 || uid == 0) { + log_debug("Unable to parse UID reference serialization"); + return; + } + + r = hashmap_ensure_allocated(uid_refs, &trivial_hash_ops); + if (r < 0) { + log_oom(); + return; + } + + c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid))); + if (c & DESTROY_IPC_FLAG) + return; + + c |= DESTROY_IPC_FLAG; + + r = hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c)); + if (r < 0) { + log_debug("Failed to add UID reference entry"); + return; + } +} + +void manager_deserialize_uid_refs_one(Manager *m, const char *value) { + manager_deserialize_uid_refs_one_internal(m, &m->uid_refs, value); +} + +void manager_deserialize_gid_refs_one(Manager *m, const char *value) { + manager_deserialize_uid_refs_one_internal(m, &m->gid_refs, value); +} + +int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) { + struct buffer { + uid_t uid; + gid_t gid; + char unit_name[UNIT_NAME_MAX+1]; + } _packed_ buffer; + + Manager *m = userdata; + ssize_t l; + size_t n; + Unit *u; + + assert_se(source); + assert_se(m); + + /* Invoked whenever a child process succeeded resolving its user/group to use and sent us the resulting UID/GID + * in a datagram. We parse the datagram here and pass it off to the unit, so that it can add a reference to the + * UID/GID so that it can destroy the UID/GID's IPC objects when the reference counter drops to 0. */ + + l = recv(fd, &buffer, sizeof(buffer), MSG_DONTWAIT); + if (l < 0) { + if (errno == EINTR || errno == EAGAIN) + return 0; + + return log_error_errno(errno, "Failed to read from user lookup fd: %m"); + } + + if ((size_t) l <= offsetof(struct buffer, unit_name)) { + log_warning("Received too short user lookup message, ignoring."); + return 0; + } + + if ((size_t) l > offsetof(struct buffer, unit_name) + UNIT_NAME_MAX) { + log_warning("Received too long user lookup message, ignoring."); + return 0; + } + + if (!uid_is_valid(buffer.uid) && !gid_is_valid(buffer.gid)) { + log_warning("Got user lookup message with invalid UID/GID pair, ignoring."); + return 0; + } + + n = (size_t) l - offsetof(struct buffer, unit_name); + if (memchr(buffer.unit_name, 0, n)) { + log_warning("Received lookup message with embedded NUL character, ignoring."); + return 0; + } + + buffer.unit_name[n] = 0; + u = manager_get_unit(m, buffer.unit_name); + if (!u) { + log_debug("Got user lookup message but unit doesn't exist, ignoring."); + return 0; + } + + log_unit_debug(u, "User lookup succeeded: uid=" UID_FMT " gid=" GID_FMT, buffer.uid, buffer.gid); + + unit_notify_user_lookup(u, buffer.uid, buffer.gid); + return 0; +} + static const char *const manager_state_table[_MANAGER_STATE_MAX] = { [MANAGER_INITIALIZING] = "initializing", [MANAGER_STARTING] = "starting", diff --git a/src/core/manager.h b/src/core/manager.h index 6ed15c1a41..35172fdba9 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -81,6 +81,7 @@ struct Manager { /* Active jobs and units */ Hashmap *units; /* name string => Unit object n:1 */ + Hashmap *units_by_invocation_id; Hashmap *jobs; /* job id => Job object 1:1 */ /* To make it easy to iterate through the units of a specific @@ -143,6 +144,9 @@ struct Manager { sd_event_source *jobs_in_progress_event_source; + int user_lookup_fds[2]; + sd_event_source *user_lookup_event_source; + UnitFileScope unit_file_scope; LookupPaths lookup_paths; Set *unit_path_cache; @@ -234,7 +238,6 @@ struct Manager { bool dispatching_dbus_queue:1; bool taint_usr:1; - bool test_run:1; /* If non-zero, exit with the following value when the systemd @@ -292,18 +295,26 @@ struct Manager { * value where Unit objects are contained. */ Hashmap *units_requiring_mounts_for; - /* Reference to the kdbus bus control fd */ - int kdbus_fd; - /* Used for processing polkit authorization responses */ Hashmap *polkit_registry; - /* When the user hits C-A-D more than 7 times per 2s, reboot immediately... */ + /* Dynamic users/groups, indexed by their name */ + Hashmap *dynamic_users; + + /* Keep track of all UIDs and GIDs any of our services currently use. This is useful for the RemoveIPC= logic. */ + Hashmap *uid_refs; + Hashmap *gid_refs; + + /* When the user hits C-A-D more than 7 times per 2s, do something immediately... */ RateLimit ctrl_alt_del_ratelimit; + EmergencyAction cad_burst_action; const char *unit_log_field; const char *unit_log_format_string; + const char *invocation_log_field; + const char *invocation_log_format_string; + int first_boot; /* tri-state */ }; @@ -375,5 +386,20 @@ ManagerState manager_state(Manager *m); int manager_update_failed_units(Manager *m, Unit *u, bool failed); +void manager_unref_uid(Manager *m, uid_t uid, bool destroy_now); +int manager_ref_uid(Manager *m, uid_t uid, bool clean_ipc); + +void manager_unref_gid(Manager *m, gid_t gid, bool destroy_now); +int manager_ref_gid(Manager *m, gid_t gid, bool destroy_now); + +void manager_vacuum_uid_refs(Manager *m); +void manager_vacuum_gid_refs(Manager *m); + +void manager_serialize_uid_refs(Manager *m, FILE *f); +void manager_deserialize_uid_refs_one(Manager *m, const char *value); + +void manager_serialize_gid_refs(Manager *m, FILE *f); +void manager_deserialize_gid_refs_one(Manager *m, const char *value); + const char *manager_state_to_string(ManagerState m) _const_; ManagerState manager_state_from_string(const char *s) _pure_; diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index 5d8ab0ec70..ca63a93e8b 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -99,10 +99,12 @@ static const MountPoint mount_table[] = { cg_is_unified_wanted, MNT_FATAL|MNT_IN_CONTAINER }, { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, + { "cgroup", "/sys/fs/cgroup/systemd", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + cg_is_unified_systemd_controller_wanted, MNT_IN_CONTAINER }, { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV, - cg_is_legacy_wanted, MNT_IN_CONTAINER }, + cg_is_legacy_systemd_controller_wanted, MNT_IN_CONTAINER }, { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV, - cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, + cg_is_legacy_systemd_controller_wanted, MNT_FATAL|MNT_IN_CONTAINER }, { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL, MNT_NONE }, #ifdef ENABLE_EFI diff --git a/src/core/mount.c b/src/core/mount.c index fda4d65d6f..d749e49df5 100644 --- a/src/core/mount.c +++ b/src/core/mount.c @@ -159,17 +159,6 @@ static void mount_init(Unit *u) { m->timeout_usec = u->manager->default_timeout_start_usec; m->directory_mode = 0755; - if (unit_has_name(u, "-.mount")) { - /* Don't allow start/stop for root directory */ - u->refuse_manual_start = true; - u->refuse_manual_stop = true; - } else { - /* The stdio/kmsg bridge socket is on /, in order to avoid a - * dep loop, don't use kmsg logging for -.mount */ - m->exec_context.std_output = u->manager->default_std_output; - m->exec_context.std_error = u->manager->default_std_error; - } - /* We need to make sure that /usr/bin/mount is always called * in the same process group as us, so that the autofs kernel * side doesn't send us another mount request while we are @@ -245,6 +234,8 @@ static void mount_done(Unit *u) { exec_command_done_array(m->exec_command, _MOUNT_EXEC_COMMAND_MAX); m->control_command = NULL; + dynamic_creds_unref(&m->dynamic_creds); + mount_unwatch_control_pid(m); m->timer_event_source = sd_event_source_unref(m->timer_event_source); @@ -482,6 +473,7 @@ static int mount_add_default_dependencies(Mount *m) { static int mount_verify(Mount *m) { _cleanup_free_ char *e = NULL; + MountParameters *p; int r; assert(m); @@ -506,7 +498,8 @@ static int mount_verify(Mount *m) { return -EINVAL; } - if (UNIT(m)->fragment_path && !m->parameters_fragment.what) { + p = get_mount_parameters_fragment(m); + if (p && !p->what) { log_unit_error(UNIT(m), "What= setting is missing. Refusing."); return -EBADMSG; } @@ -573,6 +566,25 @@ static int mount_add_extras(Mount *m) { return 0; } +static int mount_load_root_mount(Unit *u) { + assert(u); + + if (!unit_has_name(u, SPECIAL_ROOT_MOUNT)) + return 0; + + u->perpetual = true; + u->default_dependencies = false; + + /* The stdio/kmsg bridge socket is on /, in order to avoid a dep loop, don't use kmsg logging for -.mount */ + MOUNT(u)->exec_context.std_output = EXEC_OUTPUT_NULL; + MOUNT(u)->exec_context.std_input = EXEC_INPUT_NULL; + + if (!u->description) + u->description = strdup("Root Mount"); + + return 1; +} + static int mount_load(Unit *u) { Mount *m = MOUNT(u); int r; @@ -580,11 +592,14 @@ static int mount_load(Unit *u) { assert(u); assert(u->load_state == UNIT_STUB); - if (m->from_proc_self_mountinfo) + r = mount_load_root_mount(u); + if (r < 0) + return r; + + if (m->from_proc_self_mountinfo || u->perpetual) r = unit_load_fragment_and_dropin_optional(u); else r = unit_load_fragment_and_dropin(u); - if (r < 0) return r; @@ -648,6 +663,9 @@ static int mount_coldplug(Unit *u) { return r; } + if (!IN_SET(new_state, MOUNT_DEAD, MOUNT_FAILED)) + (void) unit_setup_dynamic_creds(u); + mount_set_state(m, new_state); return 0; } @@ -670,7 +688,10 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) { "%sOptions: %s\n" "%sFrom /proc/self/mountinfo: %s\n" "%sFrom fragment: %s\n" - "%sDirectoryMode: %04o\n", + "%sDirectoryMode: %04o\n" + "%sSloppyOptions: %s\n" + "%sLazyUnmount: %s\n" + "%sForceUnmount: %s\n", prefix, mount_state_to_string(m->state), prefix, mount_result_to_string(m->result), prefix, m->where, @@ -679,7 +700,10 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) { prefix, p ? strna(p->options) : "n/a", prefix, yes_no(m->from_proc_self_mountinfo), prefix, yes_no(m->from_fragment), - prefix, m->directory_mode); + prefix, m->directory_mode, + prefix, yes_no(m->sloppy_options), + prefix, yes_no(m->lazy_unmount), + prefix, yes_no(m->force_unmount)); if (m->control_pid > 0) fprintf(f, @@ -694,12 +718,10 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) { pid_t pid; int r; ExecParameters exec_params = { - .apply_permissions = true, - .apply_chroot = true, - .apply_tty_stdin = true, - .stdin_fd = -1, - .stdout_fd = -1, - .stderr_fd = -1, + .flags = EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN, + .stdin_fd = -1, + .stdout_fd = -1, + .stderr_fd = -1, }; assert(m); @@ -716,12 +738,16 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) { if (r < 0) return r; + r = unit_setup_dynamic_creds(UNIT(m)); + if (r < 0) + return r; + r = mount_arm_timer(m, usec_add(now(CLOCK_MONOTONIC), m->timeout_usec)); if (r < 0) return r; exec_params.environment = UNIT(m)->manager->environment; - exec_params.confirm_spawn = UNIT(m)->manager->confirm_spawn; + exec_params.flags |= UNIT(m)->manager->confirm_spawn ? EXEC_CONFIRM_SPAWN : 0; exec_params.cgroup_supported = UNIT(m)->manager->cgroup_supported; exec_params.cgroup_path = UNIT(m)->cgroup_path; exec_params.cgroup_delegate = m->cgroup_context.delegate; @@ -732,6 +758,7 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) { &m->exec_context, &exec_params, m->exec_runtime, + &m->dynamic_creds, &pid); if (r < 0) return r; @@ -749,21 +776,25 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) { static void mount_enter_dead(Mount *m, MountResult f) { assert(m); - if (f != MOUNT_SUCCESS) + if (m->result == MOUNT_SUCCESS) m->result = f; + mount_set_state(m, m->result != MOUNT_SUCCESS ? MOUNT_FAILED : MOUNT_DEAD); + exec_runtime_destroy(m->exec_runtime); m->exec_runtime = exec_runtime_unref(m->exec_runtime); exec_context_destroy_runtime_directory(&m->exec_context, manager_get_runtime_prefix(UNIT(m)->manager)); - mount_set_state(m, m->result != MOUNT_SUCCESS ? MOUNT_FAILED : MOUNT_DEAD); + unit_unref_uid_gid(UNIT(m), true); + + dynamic_creds_destroy(&m->dynamic_creds); } static void mount_enter_mounted(Mount *m, MountResult f) { assert(m); - if (f != MOUNT_SUCCESS) + if (m->result == MOUNT_SUCCESS) m->result = f; mount_set_state(m, MOUNT_MOUNTED); @@ -774,7 +805,7 @@ static void mount_enter_signal(Mount *m, MountState state, MountResult f) { assert(m); - if (f != MOUNT_SUCCESS) + if (m->result == MOUNT_SUCCESS) m->result = f; r = unit_kill_context( @@ -810,7 +841,7 @@ static void mount_enter_signal(Mount *m, MountState state, MountResult f) { fail: log_unit_warning_errno(UNIT(m), r, "Failed to kill processes: %m"); - if (state == MOUNT_REMOUNTING_SIGTERM || state == MOUNT_REMOUNTING_SIGKILL) + if (IN_SET(state, MOUNT_REMOUNTING_SIGTERM, MOUNT_REMOUNTING_SIGKILL)) mount_enter_mounted(m, MOUNT_FAILURE_RESOURCES); else mount_enter_dead(m, MOUNT_FAILURE_RESOURCES); @@ -832,6 +863,10 @@ static void mount_enter_unmounting(Mount *m) { m->control_command = m->exec_command + MOUNT_EXEC_UNMOUNT; r = exec_command_set(m->control_command, UMOUNT_PATH, m->where, NULL); + if (r >= 0 && m->lazy_unmount) + r = exec_command_append(m->control_command, "-l", NULL); + if (r >= 0 && m->force_unmount) + r = exec_command_append(m->control_command, "-f", NULL); if (r < 0) goto fail; @@ -850,11 +885,6 @@ fail: mount_enter_mounted(m, MOUNT_FAILURE_RESOURCES); } -static int mount_get_opts(Mount *m, char **ret) { - return fstab_filter_options(m->parameters_fragment.options, - "nofail\0" "noauto\0" "auto\0", NULL, NULL, ret); -} - static void mount_enter_mounting(Mount *m) { int r; MountParameters *p; @@ -877,19 +907,18 @@ static void mount_enter_mounting(Mount *m) { if (p && mount_is_bind(p)) (void) mkdir_p_label(p->what, m->directory_mode); - if (m->from_fragment) { + if (p) { _cleanup_free_ char *opts = NULL; - r = mount_get_opts(m, &opts); + r = fstab_filter_options(p->options, "nofail\0" "noauto\0" "auto\0", NULL, NULL, &opts); if (r < 0) goto fail; - r = exec_command_set(m->control_command, MOUNT_PATH, - m->parameters_fragment.what, m->where, NULL); + r = exec_command_set(m->control_command, MOUNT_PATH, p->what, m->where, NULL); if (r >= 0 && m->sloppy_options) r = exec_command_append(m->control_command, "-s", NULL); - if (r >= 0 && m->parameters_fragment.fstype) - r = exec_command_append(m->control_command, "-t", m->parameters_fragment.fstype, NULL); + if (r >= 0 && p->fstype) + r = exec_command_append(m->control_command, "-t", p->fstype, NULL); if (r >= 0 && !isempty(opts)) r = exec_command_append(m->control_command, "-o", opts, NULL); } else @@ -915,27 +944,29 @@ fail: static void mount_enter_remounting(Mount *m) { int r; + MountParameters *p; assert(m); m->control_command_id = MOUNT_EXEC_REMOUNT; m->control_command = m->exec_command + MOUNT_EXEC_REMOUNT; - if (m->from_fragment) { + p = get_mount_parameters_fragment(m); + if (p) { const char *o; - if (m->parameters_fragment.options) - o = strjoina("remount,", m->parameters_fragment.options); + if (p->options) + o = strjoina("remount,", p->options); else o = "remount"; r = exec_command_set(m->control_command, MOUNT_PATH, - m->parameters_fragment.what, m->where, + p->what, m->where, "-o", o, NULL); if (r >= 0 && m->sloppy_options) r = exec_command_append(m->control_command, "-s", NULL); - if (r >= 0 && m->parameters_fragment.fstype) - r = exec_command_append(m->control_command, "-t", m->parameters_fragment.fstype, NULL); + if (r >= 0 && p->fstype) + r = exec_command_append(m->control_command, "-t", p->fstype, NULL); } else r = -ENOENT; @@ -966,18 +997,19 @@ static int mount_start(Unit *u) { /* We cannot fulfill this request right now, try again later * please! */ - if (m->state == MOUNT_UNMOUNTING || - m->state == MOUNT_UNMOUNTING_SIGTERM || - m->state == MOUNT_UNMOUNTING_SIGKILL || - m->state == MOUNT_MOUNTING_SIGTERM || - m->state == MOUNT_MOUNTING_SIGKILL) + if (IN_SET(m->state, + MOUNT_UNMOUNTING, + MOUNT_UNMOUNTING_SIGTERM, + MOUNT_UNMOUNTING_SIGKILL, + MOUNT_MOUNTING_SIGTERM, + MOUNT_MOUNTING_SIGKILL)) return -EAGAIN; /* Already on it! */ if (m->state == MOUNT_MOUNTING) return 0; - assert(m->state == MOUNT_DEAD || m->state == MOUNT_FAILED); + assert(IN_SET(m->state, MOUNT_DEAD, MOUNT_FAILED)); r = unit_start_limit_test(u); if (r < 0) { @@ -985,6 +1017,10 @@ static int mount_start(Unit *u) { return r; } + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + m->result = MOUNT_SUCCESS; m->reload_result = MOUNT_SUCCESS; m->reset_cpu_usage = true; @@ -999,19 +1035,21 @@ static int mount_stop(Unit *u) { assert(m); /* Already on it */ - if (m->state == MOUNT_UNMOUNTING || - m->state == MOUNT_UNMOUNTING_SIGKILL || - m->state == MOUNT_UNMOUNTING_SIGTERM || - m->state == MOUNT_MOUNTING_SIGTERM || - m->state == MOUNT_MOUNTING_SIGKILL) + if (IN_SET(m->state, + MOUNT_UNMOUNTING, + MOUNT_UNMOUNTING_SIGKILL, + MOUNT_UNMOUNTING_SIGTERM, + MOUNT_MOUNTING_SIGTERM, + MOUNT_MOUNTING_SIGKILL)) return 0; - assert(m->state == MOUNT_MOUNTING || - m->state == MOUNT_MOUNTING_DONE || - m->state == MOUNT_MOUNTED || - m->state == MOUNT_REMOUNTING || - m->state == MOUNT_REMOUNTING_SIGTERM || - m->state == MOUNT_REMOUNTING_SIGKILL); + assert(IN_SET(m->state, + MOUNT_MOUNTING, + MOUNT_MOUNTING_DONE, + MOUNT_MOUNTED, + MOUNT_REMOUNTING, + MOUNT_REMOUNTING_SIGTERM, + MOUNT_REMOUNTING_SIGKILL)); mount_enter_unmounting(m); return 1; @@ -1139,7 +1177,7 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) { m->control_pid = 0; - if (is_clean_exit(code, status, NULL)) + if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL)) f = MOUNT_SUCCESS; else if (code == CLD_EXITED) f = MOUNT_FAILURE_EXIT_CODE; @@ -1150,7 +1188,7 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) { else assert_not_reached("Unknown code"); - if (f != MOUNT_SUCCESS) + if (m->result == MOUNT_SUCCESS) m->result = f; if (m->control_command) { @@ -1177,9 +1215,10 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) { case MOUNT_MOUNTING_SIGKILL: case MOUNT_MOUNTING_SIGTERM: - if (f == MOUNT_SUCCESS) - mount_enter_mounted(m, f); - else if (m->from_proc_self_mountinfo) + if (f == MOUNT_SUCCESS || m->from_proc_self_mountinfo) + /* If /bin/mount returned success, or if we see the mount point in /proc/self/mountinfo we are + * happy. If we see the first condition first, we should see the the second condition + * immediately after – or /bin/mount lies to us and is broken. */ mount_enter_mounted(m, f); else mount_enter_dead(m, f); @@ -1365,11 +1404,7 @@ static int mount_setup_unit( if (!u) { delete = true; - u = unit_new(m, sizeof(Mount)); - if (!u) - return log_oom(); - - r = unit_add_name(u, e); + r = unit_new_for_name(m, sizeof(Mount), e, &u); if (r < 0) goto fail; @@ -1456,17 +1491,9 @@ static int mount_setup_unit( MOUNT(u)->from_proc_self_mountinfo = true; - free(p->what); - p->what = w; - w = NULL; - - free(p->options); - p->options = o; - o = NULL; - - free(p->fstype); - p->fstype = f; - f = NULL; + free_and_replace(p->what, w); + free_and_replace(p->options, o); + free_and_replace(p->fstype, f); if (load_extras) { r = mount_add_extras(MOUNT(u)); @@ -1572,11 +1599,46 @@ static int mount_get_timeout(Unit *u, usec_t *timeout) { return 1; } +static int synthesize_root_mount(Manager *m) { + Unit *u; + int r; + + assert(m); + + /* Whatever happens, we know for sure that the root directory is around, and cannot go away. Let's + * unconditionally synthesize it here and mark it as perpetual. */ + + u = manager_get_unit(m, SPECIAL_ROOT_MOUNT); + if (!u) { + r = unit_new_for_name(m, sizeof(Mount), SPECIAL_ROOT_MOUNT, &u); + if (r < 0) + return log_error_errno(r, "Failed to allocate the special " SPECIAL_ROOT_MOUNT " unit: %m"); + } + + u->perpetual = true; + MOUNT(u)->deserialized_state = MOUNT_MOUNTED; + + unit_add_to_load_queue(u); + unit_add_to_dbus_queue(u); + + return 0; +} + +static bool mount_is_mounted(Mount *m) { + assert(m); + + return UNIT(m)->perpetual || m->is_mounted; +} + static void mount_enumerate(Manager *m) { int r; assert(m); + r = synthesize_root_mount(m); + if (r < 0) + goto fail; + mnt_init_debug(0); if (!m->mount_monitor) { @@ -1683,7 +1745,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, LIST_FOREACH(units_by_type, u, m->units_by_type[UNIT_MOUNT]) { Mount *mount = MOUNT(u); - if (!mount->is_mounted) { + if (!mount_is_mounted(mount)) { /* A mount point is not around right now. It * might be gone, or might never have @@ -1722,9 +1784,10 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, case MOUNT_DEAD: case MOUNT_FAILED: - /* This has just been mounted by - * somebody else, follow the state - * change. */ + + /* This has just been mounted by somebody else, follow the state change, but let's + * generate a new invocation ID for this implicitly and automatically. */ + (void) unit_acquire_invocation_id(UNIT(mount)); mount_enter_mounted(mount, MOUNT_SUCCESS); break; @@ -1743,7 +1806,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, } } - if (mount->is_mounted && + if (mount_is_mounted(mount) && mount->from_proc_self_mountinfo && mount->parameters_proc_self_mountinfo.what) { @@ -1817,6 +1880,7 @@ const UnitVTable mount_vtable = { .cgroup_context_offset = offsetof(Mount, cgroup_context), .kill_context_offset = offsetof(Mount, kill_context), .exec_runtime_offset = offsetof(Mount, exec_runtime), + .dynamic_creds_offset = offsetof(Mount, dynamic_creds), .sections = "Unit\0" diff --git a/src/core/mount.h b/src/core/mount.h index da529c44f4..9f7326ba6a 100644 --- a/src/core/mount.h +++ b/src/core/mount.h @@ -21,8 +21,8 @@ typedef struct Mount Mount; -#include "execute.h" #include "kill.h" +#include "dynamic-user.h" typedef enum MountExecCommand { MOUNT_EXEC_MOUNT, @@ -71,6 +71,9 @@ struct Mount { bool sloppy_options; + bool lazy_unmount; + bool force_unmount; + MountResult result; MountResult reload_result; @@ -85,6 +88,7 @@ struct Mount { CGroupContext cgroup_context; ExecRuntime *exec_runtime; + DynamicCreds dynamic_creds; MountState state, deserialized_state; diff --git a/src/core/namespace.c b/src/core/namespace.c index 52a2505d94..1195e9a854 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -29,6 +29,7 @@ #include "alloc-util.h" #include "dev-setup.h" #include "fd-util.h" +#include "fs-util.h" #include "loopback-setup.h" #include "missing.h" #include "mkdir.h" @@ -53,61 +54,245 @@ typedef enum MountMode { PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV, - READWRITE + READWRITE, } MountMode; typedef struct BindMount { - const char *path; + const char *path; /* stack memory, doesn't need to be freed explicitly */ + char *chased; /* malloc()ed memory, needs to be freed */ MountMode mode; - bool done; - bool ignore; + bool ignore; /* Ignore if path does not exist */ } BindMount; +typedef struct TargetMount { + const char *path; + MountMode mode; + bool ignore; /* Ignore if path does not exist */ +} TargetMount; + +/* + * The following Protect tables are to protect paths and mark some of them + * READONLY, in case a path is covered by an option from another table, then + * it is marked READWRITE in the current one, and the more restrictive mode is + * applied from that other table. This way all options can be combined in a + * safe and comprehensible way for users. + */ + +/* ProtectKernelTunables= option and the related filesystem APIs */ +static const TargetMount protect_kernel_tunables_table[] = { + { "/proc/sys", READONLY, false }, + { "/proc/sysrq-trigger", READONLY, true }, + { "/proc/latency_stats", READONLY, true }, + { "/proc/mtrr", READONLY, true }, + { "/proc/apm", READONLY, true }, + { "/proc/acpi", READONLY, true }, + { "/proc/timer_stats", READONLY, true }, + { "/proc/asound", READONLY, true }, + { "/proc/bus", READONLY, true }, + { "/proc/fs", READONLY, true }, + { "/proc/irq", READONLY, true }, + { "/sys", READONLY, false }, + { "/sys/kernel/debug", READONLY, true }, + { "/sys/kernel/tracing", READONLY, true }, + { "/sys/fs/cgroup", READWRITE, false }, /* READONLY is set by ProtectControlGroups= option */ +}; + +/* ProtectKernelModules= option */ +static const TargetMount protect_kernel_modules_table[] = { +#ifdef HAVE_SPLIT_USR + { "/lib/modules", INACCESSIBLE, true }, +#endif + { "/usr/lib/modules", INACCESSIBLE, true }, +}; + +/* + * ProtectHome=read-only table, protect $HOME and $XDG_RUNTIME_DIR and rest of + * system should be protected by ProtectSystem= + */ +static const TargetMount protect_home_read_only_table[] = { + { "/home", READONLY, true }, + { "/run/user", READONLY, true }, + { "/root", READONLY, true }, +}; + +/* ProtectHome=yes table */ +static const TargetMount protect_home_yes_table[] = { + { "/home", INACCESSIBLE, true }, + { "/run/user", INACCESSIBLE, true }, + { "/root", INACCESSIBLE, true }, +}; + +/* ProtectSystem=yes table */ +static const TargetMount protect_system_yes_table[] = { + { "/usr", READONLY, false }, + { "/boot", READONLY, true }, + { "/efi", READONLY, true }, +}; + +/* ProtectSystem=full includes ProtectSystem=yes */ +static const TargetMount protect_system_full_table[] = { + { "/usr", READONLY, false }, + { "/boot", READONLY, true }, + { "/efi", READONLY, true }, + { "/etc", READONLY, false }, +}; + +/* + * ProtectSystem=strict table. In this strict mode, we mount everything + * read-only, except for /proc, /dev, /sys which are the kernel API VFS, + * which are left writable, but PrivateDevices= + ProtectKernelTunables= + * protect those, and these options should be fully orthogonal. + * (And of course /home and friends are also left writable, as ProtectHome= + * shall manage those, orthogonally). + */ +static const TargetMount protect_system_strict_table[] = { + { "/", READONLY, false }, + { "/proc", READWRITE, false }, /* ProtectKernelTunables= */ + { "/sys", READWRITE, false }, /* ProtectKernelTunables= */ + { "/dev", READWRITE, false }, /* PrivateDevices= */ + { "/home", READWRITE, true }, /* ProtectHome= */ + { "/run/user", READWRITE, true }, /* ProtectHome= */ + { "/root", READWRITE, true }, /* ProtectHome= */ +}; + +static void set_bind_mount(BindMount **p, const char *path, MountMode mode, bool ignore) { + (*p)->path = path; + (*p)->mode = mode; + (*p)->ignore = ignore; +} + static int append_mounts(BindMount **p, char **strv, MountMode mode) { char **i; assert(p); STRV_FOREACH(i, strv) { + bool ignore = false; - (*p)->ignore = false; - (*p)->done = false; - - if ((mode == INACCESSIBLE || mode == READONLY || mode == READWRITE) && (*i)[0] == '-') { - (*p)->ignore = true; + if (IN_SET(mode, INACCESSIBLE, READONLY, READWRITE) && startswith(*i, "-")) { (*i)++; + ignore = true; } if (!path_is_absolute(*i)) return -EINVAL; - (*p)->path = *i; - (*p)->mode = mode; + set_bind_mount(p, *i, mode, ignore); (*p)++; } return 0; } -static int mount_path_compare(const void *a, const void *b) { - const BindMount *p = a, *q = b; - int d; +static int append_target_mounts(BindMount **p, const char *root_directory, const TargetMount *mounts, const size_t size) { + unsigned i; - d = path_compare(p->path, q->path); + assert(p); + assert(mounts); + + for (i = 0; i < size; i++) { + /* + * Here we assume that the ignore field is set during + * declaration we do not support "-" at the beginning. + */ + const TargetMount *m = &mounts[i]; + const char *path = prefix_roota(root_directory, m->path); + + if (!path_is_absolute(path)) + return -EINVAL; + + set_bind_mount(p, path, m->mode, m->ignore); + (*p)++; + } + + return 0; +} + +static int append_protect_kernel_tunables(BindMount **p, const char *root_directory) { + assert(p); + + return append_target_mounts(p, root_directory, protect_kernel_tunables_table, + ELEMENTSOF(protect_kernel_tunables_table)); +} + +static int append_protect_kernel_modules(BindMount **p, const char *root_directory) { + assert(p); + + return append_target_mounts(p, root_directory, protect_kernel_modules_table, + ELEMENTSOF(protect_kernel_modules_table)); +} + +static int append_protect_home(BindMount **p, const char *root_directory, ProtectHome protect_home) { + int r = 0; + + assert(p); + + if (protect_home == PROTECT_HOME_NO) + return 0; + + switch (protect_home) { + case PROTECT_HOME_READ_ONLY: + r = append_target_mounts(p, root_directory, protect_home_read_only_table, + ELEMENTSOF(protect_home_read_only_table)); + break; + case PROTECT_HOME_YES: + r = append_target_mounts(p, root_directory, protect_home_yes_table, + ELEMENTSOF(protect_home_yes_table)); + break; + default: + r = -EINVAL; + break; + } + + return r; +} - if (d == 0) { - /* If the paths are equal, check the mode */ - if (p->mode < q->mode) - return -1; +static int append_protect_system(BindMount **p, const char *root_directory, ProtectSystem protect_system) { + int r = 0; - if (p->mode > q->mode) - return 1; + assert(p); + if (protect_system == PROTECT_SYSTEM_NO) return 0; + + switch (protect_system) { + case PROTECT_SYSTEM_STRICT: + r = append_target_mounts(p, root_directory, protect_system_strict_table, + ELEMENTSOF(protect_system_strict_table)); + break; + case PROTECT_SYSTEM_YES: + r = append_target_mounts(p, root_directory, protect_system_yes_table, + ELEMENTSOF(protect_system_yes_table)); + break; + case PROTECT_SYSTEM_FULL: + r = append_target_mounts(p, root_directory, protect_system_full_table, + ELEMENTSOF(protect_system_full_table)); + break; + default: + r = -EINVAL; + break; } + return r; +} + +static int mount_path_compare(const void *a, const void *b) { + const BindMount *p = a, *q = b; + int d; + /* If the paths are not equal, then order prefixes first */ - return d; + d = path_compare(p->path, q->path); + if (d != 0) + return d; + + /* If the paths are equal, check the mode */ + if (p->mode < q->mode) + return -1; + + if (p->mode > q->mode) + return 1; + + return 0; } static void drop_duplicates(BindMount *m, unsigned *n) { @@ -116,16 +301,110 @@ static void drop_duplicates(BindMount *m, unsigned *n) { assert(m); assert(n); + /* Drops duplicate entries. Expects that the array is properly ordered already. */ + for (f = m, t = m, previous = NULL; f < m+*n; f++) { - /* The first one wins */ - if (previous && path_equal(f->path, previous->path)) + /* The first one wins (which is the one with the more restrictive mode), see mount_path_compare() + * above. */ + if (previous && path_equal(f->path, previous->path)) { + log_debug("%s is duplicate.", f->path); continue; + } *t = *f; - previous = t; + t++; + } + + *n = t - m; +} + +static void drop_inaccessible(BindMount *m, unsigned *n) { + BindMount *f, *t; + const char *clear = NULL; + assert(m); + assert(n); + + /* Drops all entries obstructed by another entry further up the tree. Expects that the array is properly + * ordered already. */ + + for (f = m, t = m; f < m+*n; f++) { + + /* If we found a path set for INACCESSIBLE earlier, and this entry has it as prefix we should drop + * it, as inaccessible paths really should drop the entire subtree. */ + if (clear && path_startswith(f->path, clear)) { + log_debug("%s is masked by %s.", f->path, clear); + continue; + } + + clear = f->mode == INACCESSIBLE ? f->path : NULL; + + *t = *f; + t++; + } + + *n = t - m; +} + +static void drop_nop(BindMount *m, unsigned *n) { + BindMount *f, *t; + + assert(m); + assert(n); + + /* Drops all entries which have an immediate parent that has the same type, as they are redundant. Assumes the + * list is ordered by prefixes. */ + + for (f = m, t = m; f < m+*n; f++) { + + /* Only suppress such subtrees for READONLY and READWRITE entries */ + if (IN_SET(f->mode, READONLY, READWRITE)) { + BindMount *p; + bool found = false; + + /* Now let's find the first parent of the entry we are looking at. */ + for (p = t-1; p >= m; p--) { + if (path_startswith(f->path, p->path)) { + found = true; + break; + } + } + + /* We found it, let's see if it's the same mode, if so, we can drop this entry */ + if (found && p->mode == f->mode) { + log_debug("%s is redundant by %s", f->path, p->path); + continue; + } + } + + *t = *f; + t++; + } + + *n = t - m; +} + +static void drop_outside_root(const char *root_directory, BindMount *m, unsigned *n) { + BindMount *f, *t; + + assert(m); + assert(n); + + if (!root_directory) + return; + + /* Drops all mounts that are outside of the root directory. */ + + for (f = m, t = m; f < m+*n; f++) { + + if (!path_startswith(f->path, root_directory)) { + log_debug("%s is outside of root directory.", f->path); + continue; + } + + *t = *f; t++; } @@ -278,24 +557,23 @@ static int apply_mount( const char *what; int r; - struct stat target; assert(m); + log_debug("Applying namespace mount on %s", m->path); + switch (m->mode) { - case INACCESSIBLE: + case INACCESSIBLE: { + struct stat target; /* First, get rid of everything that is below if there * is anything... Then, overmount it with an * inaccessible path. */ - umount_recursive(m->path, 0); + (void) umount_recursive(m->path, 0); - if (lstat(m->path, &target) < 0) { - if (m->ignore && errno == ENOENT) - return 0; - return -errno; - } + if (lstat(m->path, &target) < 0) + return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", m->path); what = mode_to_inaccessible_node(target.st_mode); if (!what) { @@ -303,11 +581,20 @@ static int apply_mount( return -ELOOP; } break; + } + case READONLY: case READWRITE: - /* Nothing to mount here, we just later toggle the - * MS_RDONLY bit for the mount point */ - return 0; + + r = path_is_mount_point(m->path, 0); + if (r < 0) + return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", m->path); + if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */ + return 0; + + /* This isn't a mount point yet, let's make it one. */ + what = m->path; + break; case PRIVATE_TMP: what = tmp_dir; @@ -326,68 +613,133 @@ static int apply_mount( assert(what); - r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL); - if (r >= 0) { - log_debug("Successfully mounted %s to %s", what, m->path); - return r; - } else { - if (m->ignore && errno == ENOENT) - return 0; + if (mount(what, m->path, NULL, MS_BIND|MS_REC, NULL) < 0) return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, m->path); - } + + log_debug("Successfully mounted %s to %s", what, m->path); + return 0; } -static int make_read_only(BindMount *m) { - int r; +static int make_read_only(BindMount *m, char **blacklist) { + int r = 0; assert(m); if (IN_SET(m->mode, INACCESSIBLE, READONLY)) - r = bind_remount_recursive(m->path, true); - else if (IN_SET(m->mode, READWRITE, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV)) { - r = bind_remount_recursive(m->path, false); - if (r == 0 && m->mode == PRIVATE_DEV) /* can be readonly but the submounts can't*/ - if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0) - r = -errno; + r = bind_remount_recursive(m->path, true, blacklist); + else if (m->mode == PRIVATE_DEV) { /* Can be readonly but the submounts can't*/ + if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0) + r = -errno; } else - r = 0; - - if (m->ignore && r == -ENOENT) return 0; + /* Not that we only turn on the MS_RDONLY flag here, we never turn it off. Something that was marked read-only + * already stays this way. This improves compatibility with container managers, where we won't attempt to undo + * read-only mounts already applied. */ + return r; } +static int chase_all_symlinks(const char *root_directory, BindMount *m, unsigned *n) { + BindMount *f, *t; + int r; + + assert(m); + assert(n); + + /* Since mount() will always follow symlinks and we need to take the different root directory into account we + * chase the symlinks on our own first. This call wil do so for all entries and remove all entries where we + * can't resolve the path, and which have been marked for such removal. */ + + for (f = m, t = m; f < m+*n; f++) { + + r = chase_symlinks(f->path, root_directory, &f->chased); + if (r == -ENOENT && f->ignore) /* Doesn't exist? Then remove it! */ + continue; + if (r < 0) + return log_debug_errno(r, "Failed to chase symlinks for %s: %m", f->path); + + if (path_equal(f->path, f->chased)) + f->chased = mfree(f->chased); + else { + log_debug("Chased %s → %s", f->path, f->chased); + f->path = f->chased; + } + + *t = *f; + t++; + } + + *n = t - m; + return 0; +} + +static unsigned namespace_calculate_mounts( + const NameSpaceInfo *ns_info, + char** read_write_paths, + char** read_only_paths, + char** inaccessible_paths, + const char* tmp_dir, + const char* var_tmp_dir, + ProtectHome protect_home, + ProtectSystem protect_system) { + + unsigned protect_home_cnt; + unsigned protect_system_cnt = + (protect_system == PROTECT_SYSTEM_STRICT ? + ELEMENTSOF(protect_system_strict_table) : + ((protect_system == PROTECT_SYSTEM_FULL) ? + ELEMENTSOF(protect_system_full_table) : + ((protect_system == PROTECT_SYSTEM_YES) ? + ELEMENTSOF(protect_system_yes_table) : 0))); + + protect_home_cnt = + (protect_home == PROTECT_HOME_YES ? + ELEMENTSOF(protect_home_yes_table) : + ((protect_home == PROTECT_HOME_READ_ONLY) ? + ELEMENTSOF(protect_home_read_only_table) : 0)); + + return !!tmp_dir + !!var_tmp_dir + + strv_length(read_write_paths) + + strv_length(read_only_paths) + + strv_length(inaccessible_paths) + + ns_info->private_dev + + (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) + + (ns_info->protect_control_groups ? 1 : 0) + + (ns_info->protect_kernel_modules ? ELEMENTSOF(protect_kernel_modules_table) : 0) + + protect_home_cnt + protect_system_cnt; +} + int setup_namespace( const char* root_directory, + const NameSpaceInfo *ns_info, char** read_write_paths, char** read_only_paths, char** inaccessible_paths, const char* tmp_dir, const char* var_tmp_dir, - bool private_dev, ProtectHome protect_home, ProtectSystem protect_system, unsigned long mount_flags) { BindMount *m, *mounts = NULL; + bool make_slave = false; unsigned n; int r = 0; if (mount_flags == 0) mount_flags = MS_SHARED; - if (unshare(CLONE_NEWNS) < 0) - return -errno; + n = namespace_calculate_mounts(ns_info, + read_write_paths, + read_only_paths, + inaccessible_paths, + tmp_dir, var_tmp_dir, + protect_home, protect_system); - n = !!tmp_dir + !!var_tmp_dir + - strv_length(read_write_paths) + - strv_length(read_only_paths) + - strv_length(inaccessible_paths) + - private_dev + - (protect_home != PROTECT_HOME_NO ? 3 : 0) + - (protect_system != PROTECT_SYSTEM_NO ? 2 : 0) + - (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0); + /* Set mount slave mode */ + if (root_directory || n > 0) + make_slave = true; if (n > 0) { m = mounts = (BindMount *) alloca0(n * sizeof(BindMount)); @@ -415,100 +767,127 @@ int setup_namespace( m++; } - if (private_dev) { + if (ns_info->private_dev) { m->path = prefix_roota(root_directory, "/dev"); m->mode = PRIVATE_DEV; m++; } - if (protect_home != PROTECT_HOME_NO) { - const char *home_dir, *run_user_dir, *root_dir; - - home_dir = prefix_roota(root_directory, "/home"); - home_dir = strjoina("-", home_dir); - run_user_dir = prefix_roota(root_directory, "/run/user"); - run_user_dir = strjoina("-", run_user_dir); - root_dir = prefix_roota(root_directory, "/root"); - root_dir = strjoina("-", root_dir); - - r = append_mounts(&m, STRV_MAKE(home_dir, run_user_dir, root_dir), - protect_home == PROTECT_HOME_READ_ONLY ? READONLY : INACCESSIBLE); + if (ns_info->protect_kernel_tunables) { + r = append_protect_kernel_tunables(&m, root_directory); if (r < 0) return r; } - if (protect_system != PROTECT_SYSTEM_NO) { - const char *usr_dir, *boot_dir, *etc_dir; - - usr_dir = prefix_roota(root_directory, "/usr"); - boot_dir = prefix_roota(root_directory, "/boot"); - boot_dir = strjoina("-", boot_dir); - etc_dir = prefix_roota(root_directory, "/etc"); - - r = append_mounts(&m, protect_system == PROTECT_SYSTEM_FULL - ? STRV_MAKE(usr_dir, boot_dir, etc_dir) - : STRV_MAKE(usr_dir, boot_dir), READONLY); + if (ns_info->protect_kernel_modules) { + r = append_protect_kernel_modules(&m, root_directory); if (r < 0) return r; } + if (ns_info->protect_control_groups) { + m->path = prefix_roota(root_directory, "/sys/fs/cgroup"); + m->mode = READONLY; + m++; + } + + r = append_protect_home(&m, root_directory, protect_home); + if (r < 0) + return r; + + r = append_protect_system(&m, root_directory, protect_system); + if (r < 0) + return r; + assert(mounts + n == m); + /* Resolve symlinks manually first, as mount() will always follow them relative to the host's + * root. Moreover we want to suppress duplicates based on the resolved paths. This of course is a bit + * racy. */ + r = chase_all_symlinks(root_directory, mounts, &n); + if (r < 0) + goto finish; + qsort(mounts, n, sizeof(BindMount), mount_path_compare); + drop_duplicates(mounts, &n); + drop_outside_root(root_directory, mounts, &n); + drop_inaccessible(mounts, &n); + drop_nop(mounts, &n); } - if (n > 0 || root_directory) { + if (unshare(CLONE_NEWNS) < 0) { + r = -errno; + goto finish; + } + + if (make_slave) { /* Remount / as SLAVE so that nothing now mounted in the namespace shows up in the parent */ - if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) - return -errno; + if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) { + r = -errno; + goto finish; + } } if (root_directory) { - /* Turn directory into bind mount */ - if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0) - return -errno; + /* Turn directory into bind mount, if it isn't one yet */ + r = path_is_mount_point(root_directory, AT_SYMLINK_FOLLOW); + if (r < 0) + goto finish; + if (r == 0) { + if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0) { + r = -errno; + goto finish; + } + } } if (n > 0) { + char **blacklist; + unsigned j; + + /* First round, add in all special mounts we need */ for (m = mounts; m < mounts + n; ++m) { r = apply_mount(m, tmp_dir, var_tmp_dir); if (r < 0) - goto fail; + goto finish; } + /* Create a blacklist we can pass to bind_mount_recursive() */ + blacklist = newa(char*, n+1); + for (j = 0; j < n; j++) + blacklist[j] = (char*) mounts[j].path; + blacklist[j] = NULL; + + /* Second round, flip the ro bits if necessary. */ for (m = mounts; m < mounts + n; ++m) { - r = make_read_only(m); + r = make_read_only(m, blacklist); if (r < 0) - goto fail; + goto finish; } } if (root_directory) { /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */ r = mount_move_root(root_directory); - - /* at this point, we cannot rollback */ if (r < 0) - return r; + goto finish; } /* Remount / as the desired mode. Not that this will not * reestablish propagation from our side to the host, since * what's disconnected is disconnected. */ - if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) - /* at this point, we cannot rollback */ - return -errno; + if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) { + r = -errno; + goto finish; + } - return 0; + r = 0; -fail: - if (n > 0) { - for (m = mounts; m < mounts + n; ++m) - if (m->done) - (void) umount2(m->path, MNT_DETACH); - } +finish: + for (m = mounts; m < mounts + n; m++) + free(m->chased); return r; } @@ -658,6 +1037,7 @@ static const char *const protect_system_table[_PROTECT_SYSTEM_MAX] = { [PROTECT_SYSTEM_NO] = "no", [PROTECT_SYSTEM_YES] = "yes", [PROTECT_SYSTEM_FULL] = "full", + [PROTECT_SYSTEM_STRICT] = "strict", }; DEFINE_STRING_TABLE_LOOKUP(protect_system, ProtectSystem); diff --git a/src/core/namespace.h b/src/core/namespace.h index 1aedf5f208..6310638e9a 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -4,6 +4,7 @@ This file is part of systemd. Copyright 2010 Lennart Poettering + Copyright 2016 Djalal Harouni systemd is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by @@ -19,6 +20,8 @@ along with systemd; If not, see <http://www.gnu.org/licenses/>. ***/ +typedef struct NameSpaceInfo NameSpaceInfo; + #include <stdbool.h> #include "macro.h" @@ -35,17 +38,25 @@ typedef enum ProtectSystem { PROTECT_SYSTEM_NO, PROTECT_SYSTEM_YES, PROTECT_SYSTEM_FULL, + PROTECT_SYSTEM_STRICT, _PROTECT_SYSTEM_MAX, _PROTECT_SYSTEM_INVALID = -1 } ProtectSystem; +struct NameSpaceInfo { + bool private_dev:1; + bool protect_control_groups:1; + bool protect_kernel_tunables:1; + bool protect_kernel_modules:1; +}; + int setup_namespace(const char *chroot, + const NameSpaceInfo *ns_info, char **read_write_paths, char **read_only_paths, char **inaccessible_paths, const char *tmp_dir, const char *var_tmp_dir, - bool private_dev, ProtectHome protect_home, ProtectSystem protect_system, unsigned long mount_flags); diff --git a/src/core/org.freedesktop.systemd1.conf b/src/core/org.freedesktop.systemd1.conf index 3c64f20872..a61677e645 100644 --- a/src/core/org.freedesktop.systemd1.conf +++ b/src/core/org.freedesktop.systemd1.conf @@ -54,6 +54,10 @@ <allow send_destination="org.freedesktop.systemd1" send_interface="org.freedesktop.systemd1.Manager" + send_member="GetUnitByInvocationID"/> + + <allow send_destination="org.freedesktop.systemd1" + send_interface="org.freedesktop.systemd1.Manager" send_member="LoadUnit"/> <allow send_destination="org.freedesktop.systemd1" @@ -90,6 +94,10 @@ <allow send_destination="org.freedesktop.systemd1" send_interface="org.freedesktop.systemd1.Manager" + send_member="GetUnitFileLinks"/> + + <allow send_destination="org.freedesktop.systemd1" + send_interface="org.freedesktop.systemd1.Manager" send_member="ListJobs"/> <allow send_destination="org.freedesktop.systemd1" @@ -108,6 +116,14 @@ send_interface="org.freedesktop.systemd1.Manager" send_member="GetDefaultTarget"/> + <allow send_destination="org.freedesktop.systemd1" + send_interface="org.freedesktop.systemd1.Manager" + send_member="LookupDynamicUserByName"/> + + <allow send_destination="org.freedesktop.systemd1" + send_interface="org.freedesktop.systemd1.Manager" + send_member="LookupDynamicUserByUID"/> + <!-- Managed via polkit or other criteria --> <allow send_destination="org.freedesktop.systemd1" @@ -176,6 +192,14 @@ <allow send_destination="org.freedesktop.systemd1" send_interface="org.freedesktop.systemd1.Manager" + send_member="RefUnit"/> + + <allow send_destination="org.freedesktop.systemd1" + send_interface="org.freedesktop.systemd1.Manager" + send_member="UnrefUnit"/> + + <allow send_destination="org.freedesktop.systemd1" + send_interface="org.freedesktop.systemd1.Manager" send_member="EnableUnitFiles"/> <allow send_destination="org.freedesktop.systemd1" diff --git a/src/core/path.c b/src/core/path.c index 0dd0d375d8..83f794be89 100644 --- a/src/core/path.c +++ b/src/core/path.c @@ -454,7 +454,7 @@ static int path_coldplug(Unit *u) { static void path_enter_dead(Path *p, PathResult f) { assert(p); - if (f != PATH_SUCCESS) + if (p->result == PATH_SUCCESS) p->result = f; path_set_state(p, p->result != PATH_SUCCESS ? PATH_FAILED : PATH_DEAD); @@ -577,6 +577,10 @@ static int path_start(Unit *u) { return r; } + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + path_mkdir(p); p->result = PATH_SUCCESS; diff --git a/src/core/scope.c b/src/core/scope.c index b45e238974..d6e1f8e392 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -147,6 +147,32 @@ static int scope_verify(Scope *s) { return 0; } +static int scope_load_init_scope(Unit *u) { + assert(u); + + if (!unit_has_name(u, SPECIAL_INIT_SCOPE)) + return 0; + + u->transient = true; + u->perpetual = true; + + /* init.scope is a bit special, as it has to stick around forever. Because of its special semantics we + * synthesize it here, instead of relying on the unit file on disk. */ + + u->default_dependencies = false; + u->ignore_on_isolate = true; + + SCOPE(u)->kill_context.kill_signal = SIGRTMIN+14; + + /* Prettify things, if we can. */ + if (!u->description) + u->description = strdup("System and Service Manager"); + if (!u->documentation) + (void) strv_extend(&u->documentation, "man:systemd(1)"); + + return 1; +} + static int scope_load(Unit *u) { Scope *s = SCOPE(u); int r; @@ -158,6 +184,9 @@ static int scope_load(Unit *u) { /* Refuse to load non-transient scope units, but allow them while reloading. */ return -ENOENT; + r = scope_load_init_scope(u); + if (r < 0) + return r; r = unit_load_fragment_and_dropin_optional(u); if (r < 0) return r; @@ -221,7 +250,7 @@ static void scope_dump(Unit *u, FILE *f, const char *prefix) { static void scope_enter_dead(Scope *s, ScopeResult f) { assert(s); - if (f != SCOPE_SUCCESS) + if (s->result == SCOPE_SUCCESS) s->result = f; scope_set_state(s, s->result != SCOPE_SUCCESS ? SCOPE_FAILED : SCOPE_DEAD); @@ -233,7 +262,7 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) { assert(s); - if (f != SCOPE_SUCCESS) + if (s->result == SCOPE_SUCCESS) s->result = f; unit_watch_all_pids(UNIT(s)); @@ -298,6 +327,10 @@ static int scope_start(Unit *u) { if (!u->transient && !MANAGER_IS_RELOADING(u->manager)) return -ENOENT; + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + (void) unit_realize_cgroup(u); (void) unit_reset_cpu_usage(u); @@ -441,7 +474,7 @@ static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) { /* If the PID set is empty now, then let's finish this off (On unified we use proper notifications) */ - if (cg_unified() <= 0 && set_isempty(u->pids)) + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) <= 0 && set_isempty(u->pids)) scope_notify_cgroup_empty_event(u); } @@ -530,34 +563,16 @@ static void scope_enumerate(Manager *m) { u = manager_get_unit(m, SPECIAL_INIT_SCOPE); if (!u) { - u = unit_new(m, sizeof(Scope)); - if (!u) { - log_oom(); - return; - } - - r = unit_add_name(u, SPECIAL_INIT_SCOPE); + r = unit_new_for_name(m, sizeof(Scope), SPECIAL_INIT_SCOPE, &u); if (r < 0) { - unit_free(u); - log_error_errno(r, "Failed to add init.scope name"); + log_error_errno(r, "Failed to allocate the special " SPECIAL_INIT_SCOPE " unit: %m"); return; } } u->transient = true; - u->default_dependencies = false; - u->no_gc = true; - u->ignore_on_isolate = true; - u->refuse_manual_start = true; - u->refuse_manual_stop = true; + u->perpetual = true; SCOPE(u)->deserialized_state = SCOPE_RUNNING; - SCOPE(u)->kill_context.kill_signal = SIGRTMIN+14; - - /* Prettify things, if we can. */ - if (!u->description) - u->description = strdup("System and Service Manager"); - if (!u->documentation) - (void) strv_extend(&u->documentation, "man:systemd(1)"); unit_add_to_load_queue(u); unit_add_to_dbus_queue(u); diff --git a/src/core/selinux-access.h b/src/core/selinux-access.h index 8f1f058a32..f46370d020 100644 --- a/src/core/selinux-access.h +++ b/src/core/selinux-access.h @@ -33,7 +33,7 @@ int mac_selinux_generic_access_check(sd_bus_message *message, const char *path, #define mac_selinux_unit_access_check(unit, message, permission, error) \ ({ \ - Unit *_unit = (unit); \ + const Unit *_unit = (unit); \ mac_selinux_generic_access_check((message), _unit->source_path ?: _unit->fragment_path, (permission), (error)); \ }) diff --git a/src/core/service.c b/src/core/service.c index afb198507b..a7274a758f 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -289,7 +289,17 @@ static void service_fd_store_unlink(ServiceFDStore *fs) { free(fs); } -static void service_release_resources(Unit *u) { +static void service_release_fd_store(Service *s) { + assert(s); + + log_unit_debug(UNIT(s), "Releasing all stored fds"); + while (s->fd_store) + service_fd_store_unlink(s->fd_store); + + assert(s->n_fd_store == 0); +} + +static void service_release_resources(Unit *u, bool inactive) { Service *s = SERVICE(u); assert(s); @@ -297,16 +307,14 @@ static void service_release_resources(Unit *u) { if (!s->fd_store && s->stdin_fd < 0 && s->stdout_fd < 0 && s->stderr_fd < 0) return; - log_unit_debug(u, "Releasing all resources."); + log_unit_debug(u, "Releasing resources."); s->stdin_fd = safe_close(s->stdin_fd); s->stdout_fd = safe_close(s->stdout_fd); s->stderr_fd = safe_close(s->stderr_fd); - while (s->fd_store) - service_fd_store_unlink(s->fd_store); - - assert(s->n_fd_store == 0); + if (inactive) + service_release_fd_store(s); } static void service_done(Unit *u) { @@ -322,6 +330,8 @@ static void service_done(Unit *u) { s->control_command = NULL; s->main_command = NULL; + dynamic_creds_unref(&s->dynamic_creds); + exit_status_set_free(&s->restart_prevent_status); exit_status_set_free(&s->restart_force_status); exit_status_set_free(&s->success_status); @@ -340,6 +350,7 @@ static void service_done(Unit *u) { s->bus_name_owner = mfree(s->bus_name_owner); service_close_socket_fd(s); + s->peer = socket_peer_unref(s->peer); unit_ref_unset(&s->accept_socket); @@ -347,7 +358,7 @@ static void service_done(Unit *u) { s->timer_event_source = sd_event_source_unref(s->timer_event_source); - service_release_resources(u); + service_release_resources(u, true); } static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *userdata) { @@ -357,6 +368,10 @@ static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *us assert(fs); /* If we get either EPOLLHUP or EPOLLERR, it's time to remove this entry from the fd store */ + log_unit_debug(UNIT(fs->service), + "Received %s on stored fd %d (%s), closing.", + revents & EPOLLERR ? "EPOLLERR" : "EPOLLHUP", + fs->fd, strna(fs->fdname)); service_fd_store_unlink(fs); return 0; } @@ -365,20 +380,23 @@ static int service_add_fd_store(Service *s, int fd, const char *name) { ServiceFDStore *fs; int r; + /* fd is always consumed if we return >= 0 */ + assert(s); assert(fd >= 0); if (s->n_fd_store >= s->n_fd_store_max) - return 0; + return -EXFULL; /* Our store is full. + * Use this errno rather than E[NM]FILE to distinguish from + * the case where systemd itself hits the file limit. */ LIST_FOREACH(fd_store, fs, s->fd_store) { r = same_fd(fs->fd, fd); if (r < 0) return r; if (r > 0) { - /* Already included */ safe_close(fd); - return 1; + return 0; /* fd already included */ } } @@ -406,7 +424,7 @@ static int service_add_fd_store(Service *s, int fd, const char *name) { LIST_PREPEND(fd_store, s->fd_store, fs); s->n_fd_store++; - return 1; + return 1; /* fd newly stored */ } static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) { @@ -414,10 +432,7 @@ static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) { assert(s); - if (fdset_size(fds) <= 0) - return 0; - - while (s->n_fd_store < s->n_fd_store_max) { + while (fdset_size(fds) > 0) { _cleanup_close_ int fd = -1; fd = fdset_steal_first(fds); @@ -425,17 +440,17 @@ static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) { break; r = service_add_fd_store(s, fd, name); + if (r == -EXFULL) + return log_unit_warning_errno(UNIT(s), r, + "Cannot store more fds than FileDescriptorStoreMax=%u, closing remaining.", + s->n_fd_store_max); if (r < 0) - return log_unit_error_errno(UNIT(s), r, "Couldn't add fd to fd store: %m"); - if (r > 0) { - log_unit_debug(UNIT(s), "Added fd to fd store."); - fd = -1; - } + return log_unit_error_errno(UNIT(s), r, "Failed to add fd to store: %m"); + if (r > 0) + log_unit_debug(UNIT(s), "Added fd %u (%s) to fd store.", fd, strna(name)); + fd = -1; } - if (fdset_size(fds) > 0) - log_unit_warning(UNIT(s), "Tried to store more fds than FileDescriptorStoreMax=%u allows, closing remaining.", s->n_fd_store_max); - return 0; } @@ -758,6 +773,11 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) { prefix, s->bus_name, prefix, yes_no(s->bus_name_good)); + if (UNIT_ISSET(s->accept_socket)) + fprintf(f, + "%sAccept Socket: %s\n", + prefix, UNIT_DEREF(s->accept_socket)->id); + kill_context_dump(&s->kill_context, f, prefix); exec_context_dump(&s->exec_context, f, prefix); @@ -1030,6 +1050,23 @@ static int service_coldplug(Unit *u) { if (IN_SET(s->deserialized_state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD)) service_start_watchdog(s); + if (!IN_SET(s->deserialized_state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART)) + (void) unit_setup_dynamic_creds(u); + + if (UNIT_ISSET(s->accept_socket)) { + Socket* socket = SOCKET(UNIT_DEREF(s->accept_socket)); + + if (socket->max_connections_per_source > 0) { + SocketPeer *peer; + + /* Make a best-effort attempt at bumping the connection count */ + if (socket_acquire_peer(socket, s->socket_fd, &peer) > 0) { + socket_peer_unref(s->peer); + s->peer = peer; + } + } + } + service_set_state(s, s->deserialized_state); return 0; } @@ -1146,11 +1183,7 @@ static int service_spawn( Service *s, ExecCommand *c, usec_t timeout, - bool pass_fds, - bool apply_permissions, - bool apply_chroot, - bool apply_tty_stdin, - bool is_control, + ExecFlags flags, pid_t *_pid) { _cleanup_strv_free_ char **argv = NULL, **final_env = NULL, **our_env = NULL, **fd_names = NULL; @@ -1160,12 +1193,10 @@ static int service_spawn( pid_t pid; ExecParameters exec_params = { - .apply_permissions = apply_permissions, - .apply_chroot = apply_chroot, - .apply_tty_stdin = apply_tty_stdin, - .stdin_fd = -1, - .stdout_fd = -1, - .stderr_fd = -1, + .flags = flags, + .stdin_fd = -1, + .stdout_fd = -1, + .stderr_fd = -1, }; int r; @@ -1174,6 +1205,14 @@ static int service_spawn( assert(c); assert(_pid); + if (flags & EXEC_IS_CONTROL) { + /* If this is a control process, mask the permissions/chroot application if this is requested. */ + if (s->permissions_start_only) + exec_params.flags &= ~EXEC_APPLY_PERMISSIONS; + if (s->root_directory_start_only) + exec_params.flags &= ~EXEC_APPLY_CHROOT; + } + (void) unit_realize_cgroup(UNIT(s)); if (s->reset_cpu_usage) { (void) unit_reset_cpu_usage(UNIT(s)); @@ -1184,7 +1223,11 @@ static int service_spawn( if (r < 0) return r; - if (pass_fds || + r = unit_setup_dynamic_creds(UNIT(s)); + if (r < 0) + return r; + + if ((flags & EXEC_PASS_FDS) || s->exec_context.std_input == EXEC_INPUT_SOCKET || s->exec_context.std_output == EXEC_OUTPUT_SOCKET || s->exec_context.std_error == EXEC_OUTPUT_SOCKET) { @@ -1194,6 +1237,7 @@ static int service_spawn( return r; n_fds = r; + log_unit_debug(UNIT(s), "Passing %i fds to service", n_fds); } r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), timeout)); @@ -1204,11 +1248,11 @@ static int service_spawn( if (r < 0) return r; - our_env = new0(char*, 6); + our_env = new0(char*, 9); if (!our_env) return -ENOMEM; - if (is_control ? s->notify_access == NOTIFY_ALL : s->notify_access != NOTIFY_NONE) + if ((flags & EXEC_IS_CONTROL) ? s->notify_access == NOTIFY_ALL : s->notify_access != NOTIFY_NONE) if (asprintf(our_env + n_env++, "NOTIFY_SOCKET=%s", UNIT(s)->manager->notify_socket) < 0) return -ENOMEM; @@ -1216,7 +1260,7 @@ static int service_spawn( if (asprintf(our_env + n_env++, "MAINPID="PID_FMT, s->main_pid) < 0) return -ENOMEM; - if (!MANAGER_IS_SYSTEM(UNIT(s)->manager)) + if (MANAGER_IS_USER(UNIT(s)->manager)) if (asprintf(our_env + n_env++, "MANAGERPID="PID_FMT, getpid()) < 0) return -ENOMEM; @@ -1225,10 +1269,16 @@ static int service_spawn( socklen_t salen = sizeof(sa); r = getpeername(s->socket_fd, &sa.sa, &salen); - if (r < 0) - return -errno; + if (r < 0) { + r = -errno; - if (IN_SET(sa.sa.sa_family, AF_INET, AF_INET6)) { + /* ENOTCONN is legitimate if the endpoint disappeared on shutdown. + * This connection is over, but the socket unit lives on. */ + if (r != -ENOTCONN || !IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST)) + return r; + } + + if (r == 0 && IN_SET(sa.sa.sa_family, AF_INET, AF_INET6)) { _cleanup_free_ char *addr = NULL; char *t; int port; @@ -1252,22 +1302,40 @@ static int service_spawn( } } + if (flags & EXEC_SETENV_RESULT) { + if (asprintf(our_env + n_env++, "SERVICE_RESULT=%s", service_result_to_string(s->result)) < 0) + return -ENOMEM; + + if (s->main_exec_status.pid > 0 && + dual_timestamp_is_set(&s->main_exec_status.exit_timestamp)) { + if (asprintf(our_env + n_env++, "EXIT_CODE=%s", sigchld_code_to_string(s->main_exec_status.code)) < 0) + return -ENOMEM; + + if (s->main_exec_status.code == CLD_EXITED) + r = asprintf(our_env + n_env++, "EXIT_STATUS=%i", s->main_exec_status.status); + else + r = asprintf(our_env + n_env++, "EXIT_STATUS=%s", signal_to_string(s->main_exec_status.status)); + if (r < 0) + return -ENOMEM; + } + } + final_env = strv_env_merge(2, UNIT(s)->manager->environment, our_env, NULL); if (!final_env) return -ENOMEM; - if (is_control && UNIT(s)->cgroup_path) { + if ((flags & EXEC_IS_CONTROL) && UNIT(s)->cgroup_path) { path = strjoina(UNIT(s)->cgroup_path, "/control"); (void) cg_create(SYSTEMD_CGROUP_CONTROLLER, path); } else path = UNIT(s)->cgroup_path; exec_params.argv = argv; + exec_params.environment = final_env; exec_params.fds = fds; exec_params.fd_names = fd_names; exec_params.n_fds = n_fds; - exec_params.environment = final_env; - exec_params.confirm_spawn = UNIT(s)->manager->confirm_spawn; + exec_params.flags |= UNIT(s)->manager->confirm_spawn ? EXEC_CONFIRM_SPAWN : 0; exec_params.cgroup_supported = UNIT(s)->manager->cgroup_supported; exec_params.cgroup_path = path; exec_params.cgroup_delegate = s->cgroup_context.delegate; @@ -1285,6 +1353,7 @@ static int service_spawn( &s->exec_context, &exec_params, s->exec_runtime, + &s->dynamic_creds, &pid); if (r < 0) return r; @@ -1392,14 +1461,14 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) int r; assert(s); - if (f != SERVICE_SUCCESS) + if (s->result == SERVICE_SUCCESS) s->result = f; service_set_state(s, s->result != SERVICE_SUCCESS ? SERVICE_FAILED : SERVICE_DEAD); if (s->result != SERVICE_SUCCESS) { log_unit_warning(UNIT(s), "Failed with result '%s'.", service_result_to_string(s->result)); - failure_action(UNIT(s)->manager, s->failure_action, UNIT(s)->reboot_arg); + emergency_action(UNIT(s)->manager, s->emergency_action, UNIT(s)->reboot_arg, "service failed"); } if (allow_restart && service_shall_restart(s)) { @@ -1418,9 +1487,15 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) exec_runtime_destroy(s->exec_runtime); s->exec_runtime = exec_runtime_unref(s->exec_runtime); - /* Also, remove the runtime directory in */ + /* Also, remove the runtime directory */ exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager)); + /* Get rid of the IPC bits of the user */ + unit_unref_uid_gid(UNIT(s), true); + + /* Release the user, and destroy it if we are the only remaining owner */ + dynamic_creds_destroy(&s->dynamic_creds); + /* Try to delete the pid file. At this point it will be * out-of-date, and some software might be confused by it, so * let's remove it. */ @@ -1438,7 +1513,7 @@ static void service_enter_stop_post(Service *s, ServiceResult f) { int r; assert(s); - if (f != SERVICE_SUCCESS) + if (s->result == SERVICE_SUCCESS) s->result = f; service_unwatch_control_pid(s); @@ -1451,11 +1526,7 @@ static void service_enter_stop_post(Service *s, ServiceResult f) { r = service_spawn(s, s->control_command, s->timeout_stop_usec, - false, - !s->permissions_start_only, - !s->root_directory_start_only, - true, - true, + EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_IS_CONTROL|EXEC_SETENV_RESULT, &s->control_pid); if (r < 0) goto fail; @@ -1495,7 +1566,7 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f assert(s); - if (f != SERVICE_SUCCESS) + if (s->result == SERVICE_SUCCESS) s->result = f; unit_watch_all_pids(UNIT(s)); @@ -1553,7 +1624,7 @@ static void service_enter_stop(Service *s, ServiceResult f) { assert(s); - if (f != SERVICE_SUCCESS) + if (s->result == SERVICE_SUCCESS) s->result = f; service_unwatch_control_pid(s); @@ -1566,11 +1637,7 @@ static void service_enter_stop(Service *s, ServiceResult f) { r = service_spawn(s, s->control_command, s->timeout_stop_usec, - false, - !s->permissions_start_only, - !s->root_directory_start_only, - false, - true, + EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_SETENV_RESULT, &s->control_pid); if (r < 0) goto fail; @@ -1609,7 +1676,7 @@ static bool service_good(Service *s) { static void service_enter_running(Service *s, ServiceResult f) { assert(s); - if (f != SERVICE_SUCCESS) + if (s->result == SERVICE_SUCCESS) s->result = f; service_unwatch_control_pid(s); @@ -1647,11 +1714,7 @@ static void service_enter_start_post(Service *s) { r = service_spawn(s, s->control_command, s->timeout_start_usec, - false, - !s->permissions_start_only, - !s->root_directory_start_only, - false, - true, + EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL, &s->control_pid); if (r < 0) goto fail; @@ -1706,7 +1769,15 @@ static void service_enter_start(Service *s) { } if (!c) { - assert(s->type == SERVICE_ONESHOT); + if (s->type != SERVICE_ONESHOT) { + /* There's no command line configured for the main command? Hmm, that is strange. This can only + * happen if the configuration changes at runtime. In this case, let's enter a failure + * state. */ + log_unit_error(UNIT(s), "There's no 'start' task anymore we could start: %m"); + r = -ENXIO; + goto fail; + } + service_enter_start_post(s); return; } @@ -1721,11 +1792,7 @@ static void service_enter_start(Service *s) { r = service_spawn(s, c, timeout, - true, - true, - true, - true, - false, + EXEC_PASS_FDS|EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_SET_WATCHDOG, &pid); if (r < 0) goto fail; @@ -1784,11 +1851,7 @@ static void service_enter_start_pre(Service *s) { r = service_spawn(s, s->control_command, s->timeout_start_usec, - false, - !s->permissions_start_only, - !s->root_directory_start_only, - true, - true, + EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_APPLY_TTY_STDIN, &s->control_pid); if (r < 0) goto fail; @@ -1863,11 +1926,7 @@ static void service_enter_reload(Service *s) { r = service_spawn(s, s->control_command, s->timeout_start_usec, - false, - !s->permissions_start_only, - !s->root_directory_start_only, - false, - true, + EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL, &s->control_pid); if (r < 0) goto fail; @@ -1905,12 +1964,9 @@ static void service_run_next_control(Service *s) { r = service_spawn(s, s->control_command, timeout, - false, - !s->permissions_start_only, - !s->root_directory_start_only, - s->control_command_id == SERVICE_EXEC_START_PRE || - s->control_command_id == SERVICE_EXEC_STOP_POST, - true, + EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL| + (IN_SET(s->control_command_id, SERVICE_EXEC_START_PRE, SERVICE_EXEC_STOP_POST) ? EXEC_APPLY_TTY_STDIN : 0)| + (IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_SETENV_RESULT : 0), &s->control_pid); if (r < 0) goto fail; @@ -1948,11 +2004,7 @@ static void service_run_next_main(Service *s) { r = service_spawn(s, s->main_command, s->timeout_start_usec, - true, - true, - true, - true, - false, + EXEC_PASS_FDS|EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_SET_WATCHDOG, &pid); if (r < 0) goto fail; @@ -2002,6 +2054,10 @@ static int service_start(Unit *u) { return r; } + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + s->result = SERVICE_SUCCESS; s->reload_result = SERVICE_SUCCESS; s->main_pid_known = false; @@ -2116,6 +2172,12 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) { if (r < 0) return r; + if (UNIT_ISSET(s->accept_socket)) { + r = unit_serialize_item(u, f, "accept-socket", UNIT_DEREF(s->accept_socket)->id); + if (r < 0) + return r; + } + r = unit_serialize_item_fd(u, f, fds, "socket-fd", s->socket_fd); if (r < 0) return r; @@ -2246,6 +2308,17 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value, s->control_command_id = id; s->control_command = s->exec_command[id]; } + } else if (streq(key, "accept-socket")) { + Unit *socket; + + r = manager_load_unit(u->manager, value, NULL, NULL, &socket); + if (r < 0) + log_unit_debug_errno(u, r, "Failed to load accept-socket unit: %s", value); + else { + unit_ref_set(&s->accept_socket, socket); + SOCKET(socket)->n_connections++; + } + } else if (streq(key, "socket-fd")) { int fd; @@ -2276,7 +2349,7 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value, r = service_add_fd_store(s, fd, t); if (r < 0) log_unit_error_errno(u, r, "Failed to add fd to store: %m"); - else if (r > 0) + else fdset_remove(fds, fd); } @@ -2552,8 +2625,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { assert(s); assert(pid >= 0); - if (UNIT(s)->fragment_path ? is_clean_exit(code, status, &s->success_status) : - is_clean_exit_lsb(code, status, &s->success_status)) + if (is_clean_exit(code, status, s->type == SERVICE_ONESHOT ? EXIT_CLEAN_COMMAND : EXIT_CLEAN_DAEMON, &s->success_status)) f = SERVICE_SUCCESS; else if (code == CLD_EXITED) f = SERVICE_FAILURE_EXIT_CODE; @@ -2595,7 +2667,14 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { f = SERVICE_SUCCESS; } - log_struct(f == SERVICE_SUCCESS ? LOG_DEBUG : LOG_NOTICE, + /* When this is a successful exit, let's log about the exit code on DEBUG level. If this is a failure + * and the process exited on its own via exit(), then let's make this a NOTICE, under the assumption + * that the service already logged the reason at a higher log level on its own. However, if the service + * died due to a signal, then it most likely didn't say anything about any reason, hence let's raise + * our log level to WARNING then. */ + + log_struct(f == SERVICE_SUCCESS ? LOG_DEBUG : + (code == CLD_EXITED ? LOG_NOTICE : LOG_WARNING), LOG_UNIT_ID(u), LOG_UNIT_MESSAGE(u, "Main process exited, code=%s, status=%i/%s", sigchld_code_to_string(code), status, @@ -2606,7 +2685,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { "EXIT_STATUS=%i", status, NULL); - if (f != SERVICE_SUCCESS) + if (s->result == SERVICE_SUCCESS) s->result = f; if (s->main_command && @@ -2687,7 +2766,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { "Control process exited, code=%s status=%i", sigchld_code_to_string(code), status); - if (f != SERVICE_SUCCESS) + if (s->result == SERVICE_SUCCESS) s->result = f; /* Immediately get rid of the cgroup, so that the @@ -2827,7 +2906,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { /* If the PID set is empty now, then let's finish this off (On unified we use proper notifications) */ - if (cg_unified() <= 0 && set_isempty(u->pids)) + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) <= 0 && set_isempty(u->pids)) service_notify_cgroup_empty_event(u); } @@ -3037,9 +3116,7 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags, FDSet *fds) if (!streq_ptr(s->status_text, t)) { - free(s->status_text); - s->status_text = t; - t = NULL; + free_and_replace(s->status_text, t); notify_dbus = true; } @@ -3323,6 +3400,7 @@ const UnitVTable service_vtable = { .cgroup_context_offset = offsetof(Service, cgroup_context), .kill_context_offset = offsetof(Service, kill_context), .exec_runtime_offset = offsetof(Service, exec_runtime), + .dynamic_creds_offset = offsetof(Service, dynamic_creds), .sections = "Unit\0" diff --git a/src/core/service.h b/src/core/service.h index cfef375b03..2869144fcb 100644 --- a/src/core/service.h +++ b/src/core/service.h @@ -148,9 +148,11 @@ struct Service { /* Runtime data of the execution context */ ExecRuntime *exec_runtime; + DynamicCreds dynamic_creds; pid_t main_pid, control_pid; int socket_fd; + SocketPeer *peer; bool socket_fd_selinux_context_net; bool permissions_start_only; @@ -176,7 +178,7 @@ struct Service { char *status_text; int status_errno; - FailureAction failure_action; + EmergencyAction emergency_action; UnitRef accept_socket; diff --git a/src/core/show-status.c b/src/core/show-status.c index 59ebdc7219..65f9cb888a 100644 --- a/src/core/show-status.c +++ b/src/core/show-status.c @@ -61,6 +61,11 @@ int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char if (vasprintf(&s, format, ap) < 0) return log_oom(); + /* Before you ask: yes, on purpose we open/close the console for each status line we write individually. This + * is a good strategy to avoid PID 1 getting killed by the kernel's SAK concept (it doesn't fix this entirely, + * but minimizes the time window the kernel might end up killing PID 1 due to SAK). It also makes things easier + * for us so that we don't have to recover from hangups and suchlike triggered on the console. */ + fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC); if (fd < 0) return fd; diff --git a/src/core/slice.c b/src/core/slice.c index c7700b8857..ed5d3fd701 100644 --- a/src/core/slice.c +++ b/src/core/slice.c @@ -130,6 +130,28 @@ static int slice_verify(Slice *s) { return 0; } +static int slice_load_root_slice(Unit *u) { + assert(u); + + if (!unit_has_name(u, SPECIAL_ROOT_SLICE)) + return 0; + + u->perpetual = true; + + /* The root slice is a bit special. For example it is always running and cannot be terminated. Because of its + * special semantics we synthesize it here, instead of relying on the unit file on disk. */ + + u->default_dependencies = false; + u->ignore_on_isolate = true; + + if (!u->description) + u->description = strdup("Root Slice"); + if (!u->documentation) + u->documentation = strv_new("man:systemd.special(7)", NULL); + + return 1; +} + static int slice_load(Unit *u) { Slice *s = SLICE(u); int r; @@ -137,6 +159,9 @@ static int slice_load(Unit *u) { assert(s); assert(u->load_state == UNIT_STUB); + r = slice_load_root_slice(u); + if (r < 0) + return r; r = unit_load_fragment_and_dropin_optional(u); if (r < 0) return r; @@ -187,10 +212,15 @@ static void slice_dump(Unit *u, FILE *f, const char *prefix) { static int slice_start(Unit *u) { Slice *t = SLICE(u); + int r; assert(t); assert(t->state == SLICE_DEAD); + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + (void) unit_realize_cgroup(u); (void) unit_reset_cpu_usage(u); @@ -269,32 +299,16 @@ static void slice_enumerate(Manager *m) { u = manager_get_unit(m, SPECIAL_ROOT_SLICE); if (!u) { - u = unit_new(m, sizeof(Slice)); - if (!u) { - log_oom(); - return; - } - - r = unit_add_name(u, SPECIAL_ROOT_SLICE); + r = unit_new_for_name(m, sizeof(Slice), SPECIAL_ROOT_SLICE, &u); if (r < 0) { - unit_free(u); - log_error_errno(r, "Failed to add -.slice name"); + log_error_errno(r, "Failed to allocate the special " SPECIAL_ROOT_SLICE " unit: %m"); return; } } - u->default_dependencies = false; - u->no_gc = true; - u->ignore_on_isolate = true; - u->refuse_manual_start = true; - u->refuse_manual_stop = true; + u->perpetual = true; SLICE(u)->deserialized_state = SLICE_ACTIVE; - if (!u->description) - u->description = strdup("Root Slice"); - if (!u->documentation) - (void) strv_extend(&u->documentation, "man:systemd.special(7)"); - unit_add_to_load_queue(u); unit_add_to_dbus_queue(u); } diff --git a/src/core/socket.c b/src/core/socket.c index e098055885..0b1c4acfec 100644 --- a/src/core/socket.c +++ b/src/core/socket.c @@ -57,6 +57,14 @@ #include "unit-printf.h" #include "unit.h" #include "user-util.h" +#include "in-addr-util.h" + +struct SocketPeer { + unsigned n_ref; + + Socket *socket; + union sockaddr_union peer; +}; static const UnitActiveState state_translation_table[_SOCKET_STATE_MAX] = { [SOCKET_DEAD] = UNIT_INACTIVE, @@ -141,15 +149,23 @@ void socket_free_ports(Socket *s) { static void socket_done(Unit *u) { Socket *s = SOCKET(u); + SocketPeer *p; assert(s); socket_free_ports(s); + while ((p = set_steal_first(s->peers_by_address))) + p->socket = NULL; + + s->peers_by_address = set_free(s->peers_by_address); + s->exec_runtime = exec_runtime_unref(s->exec_runtime); exec_command_free_array(s->exec_command, _SOCKET_EXEC_COMMAND_MAX); s->control_command = NULL; + dynamic_creds_unref(&s->dynamic_creds); + socket_unwatch_control_pid(s); unit_ref_unset(&s->service); @@ -466,6 +482,40 @@ static int socket_verify(Socket *s) { return 0; } +static void peer_address_hash_func(const void *p, struct siphash *state) { + const SocketPeer *s = p; + + assert(s); + assert(IN_SET(s->peer.sa.sa_family, AF_INET, AF_INET6)); + + if (s->peer.sa.sa_family == AF_INET) + siphash24_compress(&s->peer.in.sin_addr, sizeof(s->peer.in.sin_addr), state); + else + siphash24_compress(&s->peer.in6.sin6_addr, sizeof(s->peer.in6.sin6_addr), state); +} + +static int peer_address_compare_func(const void *a, const void *b) { + const SocketPeer *x = a, *y = b; + + if (x->peer.sa.sa_family < y->peer.sa.sa_family) + return -1; + if (x->peer.sa.sa_family > y->peer.sa.sa_family) + return 1; + + switch(x->peer.sa.sa_family) { + case AF_INET: + return memcmp(&x->peer.in.sin_addr, &y->peer.in.sin_addr, sizeof(x->peer.in.sin_addr)); + case AF_INET6: + return memcmp(&x->peer.in6.sin6_addr, &y->peer.in6.sin6_addr, sizeof(x->peer.in6.sin6_addr)); + } + assert_not_reached("Black sheep in the family!"); +} + +const struct hash_ops peer_address_hash_ops = { + .hash = peer_address_hash_func, + .compare = peer_address_compare_func +}; + static int socket_load(Unit *u) { Socket *s = SOCKET(u); int r; @@ -473,6 +523,10 @@ static int socket_load(Unit *u) { assert(u); assert(u->load_state == UNIT_STUB); + r = set_ensure_allocated(&s->peers_by_address, &peer_address_hash_ops); + if (r < 0) + return r; + r = unit_load_fragment_and_dropin(u); if (r < 0) return r; @@ -487,6 +541,87 @@ static int socket_load(Unit *u) { return socket_verify(s); } +static SocketPeer *socket_peer_new(void) { + SocketPeer *p; + + p = new0(SocketPeer, 1); + if (!p) + return NULL; + + p->n_ref = 1; + + return p; +} + +SocketPeer *socket_peer_ref(SocketPeer *p) { + if (!p) + return NULL; + + assert(p->n_ref > 0); + p->n_ref++; + + return p; +} + +SocketPeer *socket_peer_unref(SocketPeer *p) { + if (!p) + return NULL; + + assert(p->n_ref > 0); + + p->n_ref--; + + if (p->n_ref > 0) + return NULL; + + if (p->socket) + set_remove(p->socket->peers_by_address, p); + + return mfree(p); +} + +int socket_acquire_peer(Socket *s, int fd, SocketPeer **p) { + _cleanup_(socket_peer_unrefp) SocketPeer *remote = NULL; + SocketPeer sa = {}, *i; + socklen_t salen = sizeof(sa.peer); + int r; + + assert(fd >= 0); + assert(s); + + r = getpeername(fd, &sa.peer.sa, &salen); + if (r < 0) + return log_error_errno(errno, "getpeername failed: %m"); + + if (!IN_SET(sa.peer.sa.sa_family, AF_INET, AF_INET6)) { + *p = NULL; + return 0; + } + + i = set_get(s->peers_by_address, &sa); + if (i) { + *p = socket_peer_ref(i); + return 1; + } + + remote = socket_peer_new(); + if (!remote) + return log_oom(); + + remote->peer = sa.peer; + + r = set_put(s->peers_by_address, remote); + if (r < 0) + return r; + + remote->socket = s; + + *p = remote; + remote = NULL; + + return 1; +} + _const_ static const char* listen_lookup(int family, int type) { if (family == AF_NETLINK) @@ -1199,14 +1334,9 @@ static int usbffs_select_ep(const struct dirent *d) { static int usbffs_dispatch_eps(SocketPort *p) { _cleanup_free_ struct dirent **ent = NULL; - _cleanup_free_ char *path = NULL; int r, i, n, k; - path = dirname_malloc(p->path); - if (!path) - return -ENOMEM; - - r = scandir(path, &ent, usbffs_select_ep, alphasort); + r = scandir(p->path, &ent, usbffs_select_ep, alphasort); if (r < 0) return -errno; @@ -1221,7 +1351,7 @@ static int usbffs_dispatch_eps(SocketPort *p) { for (i = 0; i < n; ++i) { _cleanup_free_ char *ep = NULL; - ep = path_make_absolute(ent[i]->d_name, path); + ep = path_make_absolute(ent[i]->d_name, p->path); if (!ep) return -ENOMEM; @@ -1602,6 +1732,9 @@ static int socket_coldplug(Unit *u) { return r; } + if (!IN_SET(s->deserialized_state, SOCKET_DEAD, SOCKET_FAILED)) + (void) unit_setup_dynamic_creds(u); + socket_set_state(s, s->deserialized_state); return 0; } @@ -1611,12 +1744,10 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) { pid_t pid; int r; ExecParameters exec_params = { - .apply_permissions = true, - .apply_chroot = true, - .apply_tty_stdin = true, - .stdin_fd = -1, - .stdout_fd = -1, - .stderr_fd = -1, + .flags = EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN, + .stdin_fd = -1, + .stdout_fd = -1, + .stderr_fd = -1, }; assert(s); @@ -1633,6 +1764,10 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) { if (r < 0) return r; + r = unit_setup_dynamic_creds(UNIT(s)); + if (r < 0) + return r; + r = socket_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_usec)); if (r < 0) return r; @@ -1643,7 +1778,7 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) { exec_params.argv = argv; exec_params.environment = UNIT(s)->manager->environment; - exec_params.confirm_spawn = UNIT(s)->manager->confirm_spawn; + exec_params.flags |= UNIT(s)->manager->confirm_spawn ? EXEC_CONFIRM_SPAWN : 0; exec_params.cgroup_supported = UNIT(s)->manager->cgroup_supported; exec_params.cgroup_path = UNIT(s)->cgroup_path; exec_params.cgroup_delegate = s->cgroup_context.delegate; @@ -1654,6 +1789,7 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) { &s->exec_context, &exec_params, s->exec_runtime, + &s->dynamic_creds, &pid); if (r < 0) return r; @@ -1754,15 +1890,19 @@ fail: static void socket_enter_dead(Socket *s, SocketResult f) { assert(s); - if (f != SOCKET_SUCCESS) + if (s->result == SOCKET_SUCCESS) s->result = f; + socket_set_state(s, s->result != SOCKET_SUCCESS ? SOCKET_FAILED : SOCKET_DEAD); + exec_runtime_destroy(s->exec_runtime); s->exec_runtime = exec_runtime_unref(s->exec_runtime); exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager)); - socket_set_state(s, s->result != SOCKET_SUCCESS ? SOCKET_FAILED : SOCKET_DEAD); + unit_unref_uid_gid(UNIT(s), true); + + dynamic_creds_destroy(&s->dynamic_creds); } static void socket_enter_signal(Socket *s, SocketState state, SocketResult f); @@ -1771,7 +1911,7 @@ static void socket_enter_stop_post(Socket *s, SocketResult f) { int r; assert(s); - if (f != SOCKET_SUCCESS) + if (s->result == SOCKET_SUCCESS) s->result = f; socket_unwatch_control_pid(s); @@ -1799,7 +1939,7 @@ static void socket_enter_signal(Socket *s, SocketState state, SocketResult f) { assert(s); - if (f != SOCKET_SUCCESS) + if (s->result == SOCKET_SUCCESS) s->result = f; r = unit_kill_context( @@ -1843,7 +1983,7 @@ static void socket_enter_stop_pre(Socket *s, SocketResult f) { int r; assert(s); - if (f != SOCKET_SUCCESS) + if (s->result == SOCKET_SUCCESS) s->result = f; socket_unwatch_control_pid(s); @@ -2038,14 +2178,34 @@ static void socket_enter_running(Socket *s, int cfd) { socket_set_state(s, SOCKET_RUNNING); } else { _cleanup_free_ char *prefix = NULL, *instance = NULL, *name = NULL; + _cleanup_(socket_peer_unrefp) SocketPeer *p = NULL; Service *service; if (s->n_connections >= s->max_connections) { - log_unit_warning(UNIT(s), "Too many incoming connections (%u), refusing connection attempt.", s->n_connections); + log_unit_warning(UNIT(s), "Too many incoming connections (%u), dropping connection.", + s->n_connections); safe_close(cfd); return; } + if (s->max_connections_per_source > 0) { + r = socket_acquire_peer(s, cfd, &p); + if (r < 0) { + safe_close(cfd); + return; + } else if (r > 0 && p->n_ref > s->max_connections_per_source) { + _cleanup_free_ char *t = NULL; + + sockaddr_pretty(&p->peer.sa, FAMILY_ADDRESS_SIZE(p->peer.sa.sa_family), true, false, &t); + + log_unit_warning(UNIT(s), + "Too many incoming connections (%u) from source %s, dropping connection.", + p->n_ref, strnull(t)); + safe_close(cfd); + return; + } + } + r = socket_instantiate_service(s); if (r < 0) goto fail; @@ -2087,6 +2247,9 @@ static void socket_enter_running(Socket *s, int cfd) { cfd = -1; /* We passed ownership of the fd to the service now. Forget it here. */ s->n_connections++; + service->peer = p; /* Pass ownership of the peer reference */ + p = NULL; + r = manager_add_job(UNIT(s)->manager, JOB_START, UNIT(service), JOB_REPLACE, &error, NULL); if (r < 0) { /* We failed to activate the new service, but it still exists. Let's make sure the service @@ -2191,11 +2354,14 @@ static int socket_start(Unit *u) { return r; } + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + s->result = SOCKET_SUCCESS; s->reset_cpu_usage = true; socket_enter_start_pre(s); - return 1; } @@ -2286,6 +2452,11 @@ static int socket_serialize(Unit *u, FILE *f, FDSet *fds) { return 0; } +static void socket_port_take_fd(SocketPort *p, FDSet *fds, int fd) { + safe_close(p->fd); + p->fd = fdset_remove(fds, fd); +} + static int socket_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) { Socket *s = SOCKET(u); @@ -2340,18 +2511,13 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value, if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd)) log_unit_debug(u, "Failed to parse fifo value: %s", value); - else { - + else LIST_FOREACH(port, p, s->ports) if (p->type == SOCKET_FIFO && - path_equal_or_files_same(p->path, value+skip)) + path_equal_or_files_same(p->path, value+skip)) { + socket_port_take_fd(p, fds, fd); break; - - if (p) { - safe_close(p->fd); - p->fd = fdset_remove(fds, fd); - } - } + } } else if (streq(key, "special")) { int fd, skip = 0; @@ -2359,18 +2525,13 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value, if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd)) log_unit_debug(u, "Failed to parse special value: %s", value); - else { - + else LIST_FOREACH(port, p, s->ports) if (p->type == SOCKET_SPECIAL && - path_equal_or_files_same(p->path, value+skip)) + path_equal_or_files_same(p->path, value+skip)) { + socket_port_take_fd(p, fds, fd); break; - - if (p) { - safe_close(p->fd); - p->fd = fdset_remove(fds, fd); - } - } + } } else if (streq(key, "mqueue")) { int fd, skip = 0; @@ -2378,18 +2539,13 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value, if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd)) log_unit_debug(u, "Failed to parse mqueue value: %s", value); - else { - + else LIST_FOREACH(port, p, s->ports) if (p->type == SOCKET_MQUEUE && - streq(p->path, value+skip)) + streq(p->path, value+skip)) { + socket_port_take_fd(p, fds, fd); break; - - if (p) { - safe_close(p->fd); - p->fd = fdset_remove(fds, fd); - } - } + } } else if (streq(key, "socket")) { int fd, type, skip = 0; @@ -2397,17 +2553,12 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value, if (sscanf(value, "%i %i %n", &fd, &type, &skip) < 2 || fd < 0 || type < 0 || !fdset_contains(fds, fd)) log_unit_debug(u, "Failed to parse socket value: %s", value); - else { - + else LIST_FOREACH(port, p, s->ports) - if (socket_address_is(&p->address, value+skip, type)) + if (socket_address_is(&p->address, value+skip, type)) { + socket_port_take_fd(p, fds, fd); break; - - if (p) { - safe_close(p->fd); - p->fd = fdset_remove(fds, fd); - } - } + } } else if (streq(key, "netlink")) { int fd, skip = 0; @@ -2415,17 +2566,12 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value, if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd)) log_unit_debug(u, "Failed to parse socket value: %s", value); - else { - + else LIST_FOREACH(port, p, s->ports) - if (socket_address_is_netlink(&p->address, value+skip)) + if (socket_address_is_netlink(&p->address, value+skip)) { + socket_port_take_fd(p, fds, fd); break; - - if (p) { - safe_close(p->fd); - p->fd = fdset_remove(fds, fd); - } - } + } } else if (streq(key, "ffs")) { int fd, skip = 0; @@ -2433,18 +2579,13 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value, if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd)) log_unit_debug(u, "Failed to parse ffs value: %s", value); - else { - + else LIST_FOREACH(port, p, s->ports) if (p->type == SOCKET_USB_FUNCTION && - path_equal_or_files_same(p->path, value+skip)) + path_equal_or_files_same(p->path, value+skip)) { + socket_port_take_fd(p, fds, fd); break; - - if (p) { - safe_close(p->fd); - p->fd = fdset_remove(fds, fd); - } - } + } } else log_unit_debug(UNIT(s), "Unknown serialization key: %s", key); @@ -2605,7 +2746,7 @@ static void socket_sigchld_event(Unit *u, pid_t pid, int code, int status) { s->control_pid = 0; - if (is_clean_exit(code, status, NULL)) + if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL)) f = SOCKET_SUCCESS; else if (code == CLD_EXITED) f = SOCKET_FAILURE_EXIT_CODE; @@ -2627,7 +2768,7 @@ static void socket_sigchld_event(Unit *u, pid_t pid, int code, int status) { "Control process exited, code=%s status=%i", sigchld_code_to_string(code), status); - if (f != SOCKET_SUCCESS) + if (s->result == SOCKET_SUCCESS) s->result = f; if (s->control_command && @@ -2930,6 +3071,7 @@ const UnitVTable socket_vtable = { .cgroup_context_offset = offsetof(Socket, cgroup_context), .kill_context_offset = offsetof(Socket, kill_context), .exec_runtime_offset = offsetof(Socket, exec_runtime), + .dynamic_creds_offset = offsetof(Socket, dynamic_creds), .sections = "Unit\0" diff --git a/src/core/socket.h b/src/core/socket.h index 0f1ac69c6f..89f4664510 100644 --- a/src/core/socket.h +++ b/src/core/socket.h @@ -20,6 +20,7 @@ ***/ typedef struct Socket Socket; +typedef struct SocketPeer SocketPeer; #include "mount.h" #include "service.h" @@ -79,9 +80,12 @@ struct Socket { LIST_HEAD(SocketPort, ports); + Set *peers_by_address; + unsigned n_accepted; unsigned n_connections; unsigned max_connections; + unsigned max_connections_per_source; unsigned backlog; unsigned keep_alive_cnt; @@ -94,7 +98,9 @@ struct Socket { ExecContext exec_context; KillContext kill_context; CGroupContext cgroup_context; + ExecRuntime *exec_runtime; + DynamicCreds dynamic_creds; /* For Accept=no sockets refers to the one service we'll activate. For Accept=yes sockets is either NULL, or filled @@ -162,6 +168,12 @@ struct Socket { RateLimit trigger_limit; }; +SocketPeer *socket_peer_ref(SocketPeer *p); +SocketPeer *socket_peer_unref(SocketPeer *p); +int socket_acquire_peer(Socket *s, int fd, SocketPeer **p); + +DEFINE_TRIVIAL_CLEANUP_FUNC(SocketPeer*, socket_peer_unref); + /* Called from the service code when collecting fds */ int socket_collect_fds(Socket *s, int **fds); diff --git a/src/core/swap.c b/src/core/swap.c index a532b15be8..2228a254bb 100644 --- a/src/core/swap.c +++ b/src/core/swap.c @@ -153,6 +153,8 @@ static void swap_done(Unit *u) { exec_command_done_array(s->exec_command, _SWAP_EXEC_COMMAND_MAX); s->control_command = NULL; + dynamic_creds_unref(&s->dynamic_creds); + swap_unwatch_control_pid(s); s->timer_event_source = sd_event_source_unref(s->timer_event_source); @@ -379,11 +381,7 @@ static int swap_setup_unit( if (!u) { delete = true; - u = unit_new(m, sizeof(Swap)); - if (!u) - return log_oom(); - - r = unit_add_name(u, e); + r = unit_new_for_name(m, sizeof(Swap), e, &u); if (r < 0) goto fail; @@ -553,6 +551,9 @@ static int swap_coldplug(Unit *u) { return r; } + if (!IN_SET(new_state, SWAP_DEAD, SWAP_FAILED)) + (void) unit_setup_dynamic_creds(u); + swap_set_state(s, new_state); return 0; } @@ -606,12 +607,10 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) { pid_t pid; int r; ExecParameters exec_params = { - .apply_permissions = true, - .apply_chroot = true, - .apply_tty_stdin = true, - .stdin_fd = -1, - .stdout_fd = -1, - .stderr_fd = -1, + .flags = EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN, + .stdin_fd = -1, + .stdout_fd = -1, + .stderr_fd = -1, }; assert(s); @@ -628,12 +627,16 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) { if (r < 0) goto fail; + r = unit_setup_dynamic_creds(UNIT(s)); + if (r < 0) + return r; + r = swap_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_usec)); if (r < 0) goto fail; exec_params.environment = UNIT(s)->manager->environment; - exec_params.confirm_spawn = UNIT(s)->manager->confirm_spawn; + exec_params.flags |= UNIT(s)->manager->confirm_spawn ? EXEC_CONFIRM_SPAWN : 0; exec_params.cgroup_supported = UNIT(s)->manager->cgroup_supported; exec_params.cgroup_path = UNIT(s)->cgroup_path; exec_params.cgroup_delegate = s->cgroup_context.delegate; @@ -644,6 +647,7 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) { &s->exec_context, &exec_params, s->exec_runtime, + &s->dynamic_creds, &pid); if (r < 0) goto fail; @@ -665,21 +669,25 @@ fail: static void swap_enter_dead(Swap *s, SwapResult f) { assert(s); - if (f != SWAP_SUCCESS) + if (s->result == SWAP_SUCCESS) s->result = f; + swap_set_state(s, s->result != SWAP_SUCCESS ? SWAP_FAILED : SWAP_DEAD); + exec_runtime_destroy(s->exec_runtime); s->exec_runtime = exec_runtime_unref(s->exec_runtime); exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager)); - swap_set_state(s, s->result != SWAP_SUCCESS ? SWAP_FAILED : SWAP_DEAD); + unit_unref_uid_gid(UNIT(s), true); + + dynamic_creds_destroy(&s->dynamic_creds); } static void swap_enter_active(Swap *s, SwapResult f) { assert(s); - if (f != SWAP_SUCCESS) + if (s->result == SWAP_SUCCESS) s->result = f; swap_set_state(s, SWAP_ACTIVE); @@ -690,7 +698,7 @@ static void swap_enter_signal(Swap *s, SwapState state, SwapResult f) { assert(s); - if (f != SWAP_SUCCESS) + if (s->result == SWAP_SUCCESS) s->result = f; r = unit_kill_context( @@ -849,6 +857,10 @@ static int swap_start(Unit *u) { return r; } + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + s->result = SWAP_SUCCESS; s->reset_cpu_usage = true; @@ -976,7 +988,7 @@ static void swap_sigchld_event(Unit *u, pid_t pid, int code, int status) { s->control_pid = 0; - if (is_clean_exit(code, status, NULL)) + if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL)) f = SWAP_SUCCESS; else if (code == CLD_EXITED) f = SWAP_FAILURE_EXIT_CODE; @@ -987,7 +999,7 @@ static void swap_sigchld_event(Unit *u, pid_t pid, int code, int status) { else assert_not_reached("Unknown code"); - if (f != SWAP_SUCCESS) + if (s->result == SWAP_SUCCESS) s->result = f; if (s->control_command) { @@ -1177,6 +1189,7 @@ static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, v case SWAP_DEAD: case SWAP_FAILED: + (void) unit_acquire_invocation_id(UNIT(swap)); swap_enter_active(swap, SWAP_SUCCESS); break; @@ -1466,6 +1479,7 @@ const UnitVTable swap_vtable = { .cgroup_context_offset = offsetof(Swap, cgroup_context), .kill_context_offset = offsetof(Swap, kill_context), .exec_runtime_offset = offsetof(Swap, exec_runtime), + .dynamic_creds_offset = offsetof(Swap, dynamic_creds), .sections = "Unit\0" diff --git a/src/core/swap.h b/src/core/swap.h index fbf66debdc..b0ef50f1e8 100644 --- a/src/core/swap.h +++ b/src/core/swap.h @@ -82,6 +82,7 @@ struct Swap { CGroupContext cgroup_context; ExecRuntime *exec_runtime; + DynamicCreds dynamic_creds; SwapState state, deserialized_state; diff --git a/src/core/system.conf b/src/core/system.conf index c6bb050aac..746572b7ff 100644 --- a/src/core/system.conf +++ b/src/core/system.conf @@ -21,6 +21,7 @@ #CrashChangeVT=no #CrashShell=no #CrashReboot=no +#CtrlAltDelBurstAction=reboot-force #CPUAffinity=1 2 #JoinControllers=cpu,cpuacct net_cls,net_prio #RuntimeWatchdogSec=0 diff --git a/src/core/target.c b/src/core/target.c index 61a91aad07..765c1f3fa4 100644 --- a/src/core/target.c +++ b/src/core/target.c @@ -124,10 +124,15 @@ static void target_dump(Unit *u, FILE *f, const char *prefix) { static int target_start(Unit *u) { Target *t = TARGET(u); + int r; assert(t); assert(t->state == TARGET_DEAD); + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + target_set_state(t, TARGET_ACTIVE); return 1; } diff --git a/src/core/timer.c b/src/core/timer.c index 3206296f09..2469a517ea 100644 --- a/src/core/timer.c +++ b/src/core/timer.c @@ -261,6 +261,8 @@ static void timer_set_state(Timer *t, TimerState state) { if (state != TIMER_WAITING) { t->monotonic_event_source = sd_event_source_unref(t->monotonic_event_source); t->realtime_event_source = sd_event_source_unref(t->realtime_event_source); + t->next_elapse_monotonic_or_boottime = USEC_INFINITY; + t->next_elapse_realtime = USEC_INFINITY; } if (state != old_state) @@ -291,7 +293,7 @@ static int timer_coldplug(Unit *u) { static void timer_enter_dead(Timer *t, TimerResult f) { assert(t); - if (f != TIMER_SUCCESS) + if (t->result == TIMER_SUCCESS) t->result = f; timer_set_state(t, t->result != TIMER_SUCCESS ? TIMER_FAILED : TIMER_DEAD); @@ -616,6 +618,10 @@ static int timer_start(Unit *u) { return r; } + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + t->last_trigger = DUAL_TIMESTAMP_NULL; /* Reenable all timers that depend on unit activation time */ @@ -632,7 +638,7 @@ static int timer_start(Unit *u) { /* The timer has never run before, * make sure a stamp file exists. */ - touch_file(t->stamp_path, true, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID); + (void) touch_file(t->stamp_path, true, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID); } t->result = TIMER_SUCCESS; diff --git a/src/core/transaction.c b/src/core/transaction.c index 8370b864fb..e22e3b30c2 100644 --- a/src/core/transaction.c +++ b/src/core/transaction.c @@ -1085,10 +1085,8 @@ Transaction *transaction_new(bool irreversible) { return NULL; tr->jobs = hashmap_new(NULL); - if (!tr->jobs) { - free(tr); - return NULL; - } + if (!tr->jobs) + return mfree(tr); tr->irreversible = irreversible; diff --git a/src/core/umount.c b/src/core/umount.c index c21a2be54e..1e5459ed80 100644 --- a/src/core/umount.c +++ b/src/core/umount.c @@ -375,7 +375,7 @@ static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_e /* If we are in a container, don't attempt to read-only mount anything as that brings no real benefits, but might confuse the host, as we remount - the superblock here, not the bind mound. */ + the superblock here, not the bind mount. */ if (detect_container() <= 0) { _cleanup_free_ char *options = NULL; /* MS_REMOUNT requires that the data parameter diff --git a/src/core/unit.c b/src/core/unit.c index 4934a0e56f..e664e23892 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -37,6 +37,7 @@ #include "execute.h" #include "fileio-label.h" #include "formats-util.h" +#include "id128-util.h" #include "load-dropin.h" #include "load-fragment.h" #include "log.h" @@ -87,10 +88,8 @@ Unit *unit_new(Manager *m, size_t size) { return NULL; u->names = set_new(&string_hash_ops); - if (!u->names) { - free(u); - return NULL; - } + if (!u->names) + return mfree(u); u->manager = m; u->type = _UNIT_TYPE_INVALID; @@ -100,7 +99,9 @@ Unit *unit_new(Manager *m, size_t size) { u->on_failure_job_mode = JOB_REPLACE; u->cgroup_inotify_wd = -1; u->job_timeout = USEC_INFINITY; - u->sigchldgen = 0; + u->ref_uid = UID_INVALID; + u->ref_gid = GID_INVALID; + u->cpu_usage_last = NSEC_INFINITY; RATELIMIT_INIT(u->start_limit, m->default_start_limit_interval, m->default_start_limit_burst); RATELIMIT_INIT(u->auto_stop_ratelimit, 10 * USEC_PER_SEC, 16); @@ -108,11 +109,29 @@ Unit *unit_new(Manager *m, size_t size) { return u; } +int unit_new_for_name(Manager *m, size_t size, const char *name, Unit **ret) { + Unit *u; + int r; + + u = unit_new(m, size); + if (!u) + return -ENOMEM; + + r = unit_add_name(u, name); + if (r < 0) { + unit_free(u); + return r; + } + + *ret = u; + return r; +} + bool unit_has_name(Unit *u, const char *name) { assert(u); assert(name); - return !!set_get(u->names, (char*) name); + return set_contains(u->names, (char*) name); } static void unit_init(Unit *u) { @@ -301,6 +320,7 @@ int unit_set_description(Unit *u, const char *description) { bool unit_check_gc(Unit *u) { UnitActiveState state; + bool inactive; assert(u); if (u->job) @@ -310,24 +330,28 @@ bool unit_check_gc(Unit *u) { return true; state = unit_active_state(u); + inactive = state == UNIT_INACTIVE; /* If the unit is inactive and failed and no job is queued for * it, then release its runtime resources */ if (UNIT_IS_INACTIVE_OR_FAILED(state) && UNIT_VTABLE(u)->release_resources) - UNIT_VTABLE(u)->release_resources(u); + UNIT_VTABLE(u)->release_resources(u, inactive); /* But we keep the unit object around for longer when it is * referenced or configured to not be gc'ed */ - if (state != UNIT_INACTIVE) + if (!inactive) return true; - if (u->no_gc) + if (u->perpetual) return true; if (u->refs) return true; + if (sd_bus_track_count(u->bus_track) > 0) + return true; + if (UNIT_VTABLE(u)->check_gc) if (UNIT_VTABLE(u)->check_gc(u)) return true; @@ -508,11 +532,17 @@ void unit_free(Unit *u) { sd_bus_slot_unref(u->match_bus_slot); + sd_bus_track_unref(u->bus_track); + u->deserialized_refs = strv_free(u->deserialized_refs); + unit_free_requires_mounts_for(u); SET_FOREACH(t, u->names, i) hashmap_remove_value(u->manager->units, t, u); + if (!sd_id128_is_null(u->invocation_id)) + hashmap_remove_value(u->manager->units_by_invocation_id, &u->invocation_id, u); + if (u->job) { Job *j = u->job; job_uninstall(j); @@ -550,6 +580,8 @@ void unit_free(Unit *u) { unit_release_cgroup(u); + unit_unref_uid_gid(u, false); + (void) manager_update_failed_units(u->manager, u, false); set_remove(u->manager->startup_units, u); @@ -846,18 +878,14 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) { return r; } - if (c->std_output != EXEC_OUTPUT_KMSG && - c->std_output != EXEC_OUTPUT_SYSLOG && - c->std_output != EXEC_OUTPUT_JOURNAL && - c->std_output != EXEC_OUTPUT_KMSG_AND_CONSOLE && - c->std_output != EXEC_OUTPUT_SYSLOG_AND_CONSOLE && - c->std_output != EXEC_OUTPUT_JOURNAL_AND_CONSOLE && - c->std_error != EXEC_OUTPUT_KMSG && - c->std_error != EXEC_OUTPUT_SYSLOG && - c->std_error != EXEC_OUTPUT_JOURNAL && - c->std_error != EXEC_OUTPUT_KMSG_AND_CONSOLE && - c->std_error != EXEC_OUTPUT_JOURNAL_AND_CONSOLE && - c->std_error != EXEC_OUTPUT_SYSLOG_AND_CONSOLE) + if (!IN_SET(c->std_output, + EXEC_OUTPUT_JOURNAL, EXEC_OUTPUT_JOURNAL_AND_CONSOLE, + EXEC_OUTPUT_KMSG, EXEC_OUTPUT_KMSG_AND_CONSOLE, + EXEC_OUTPUT_SYSLOG, EXEC_OUTPUT_SYSLOG_AND_CONSOLE) && + !IN_SET(c->std_error, + EXEC_OUTPUT_JOURNAL, EXEC_OUTPUT_JOURNAL_AND_CONSOLE, + EXEC_OUTPUT_KMSG, EXEC_OUTPUT_KMSG_AND_CONSOLE, + EXEC_OUTPUT_SYSLOG, EXEC_OUTPUT_SYSLOG_AND_CONSOLE)) return 0; /* If syslog or kernel logging is requested, make sure our own @@ -894,6 +922,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) { Unit *following; _cleanup_set_free_ Set *following_set = NULL; int r; + const char *n; assert(u); assert(u->type >= 0); @@ -915,6 +944,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) { "%s\tGC Check Good: %s\n" "%s\tNeed Daemon Reload: %s\n" "%s\tTransient: %s\n" + "%s\tPerpetual: %s\n" "%s\tSlice: %s\n" "%s\tCGroup: %s\n" "%s\tCGroup realized: %s\n" @@ -933,6 +963,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) { prefix, yes_no(unit_check_gc(u)), prefix, yes_no(unit_need_daemon_reload(u)), prefix, yes_no(u->transient), + prefix, yes_no(u->perpetual), prefix, strna(unit_slice_name(u)), prefix, strna(u->cgroup_path), prefix, yes_no(u->cgroup_realized), @@ -942,6 +973,10 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) { SET_FOREACH(t, u->names, i) fprintf(f, "%s\tName: %s\n", prefix, t); + if (!sd_id128_is_null(u->invocation_id)) + fprintf(f, "%s\tInvocation ID: " SD_ID128_FORMAT_STR "\n", + prefix, SD_ID128_FORMAT_VAL(u->invocation_id)); + STRV_FOREACH(j, u->documentation) fprintf(f, "%s\tDocumentation: %s\n", prefix, *j); @@ -969,8 +1004,8 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) { if (u->job_timeout != USEC_INFINITY) fprintf(f, "%s\tJob Timeout: %s\n", prefix, format_timespan(timespan, sizeof(timespan), u->job_timeout, 0)); - if (u->job_timeout_action != FAILURE_ACTION_NONE) - fprintf(f, "%s\tJob Timeout Action: %s\n", prefix, failure_action_to_string(u->job_timeout_action)); + if (u->job_timeout_action != EMERGENCY_ACTION_NONE) + fprintf(f, "%s\tJob Timeout Action: %s\n", prefix, emergency_action_to_string(u->job_timeout_action)); if (u->job_timeout_reboot_arg) fprintf(f, "%s\tJob Timeout Reboot Argument: %s\n", prefix, u->job_timeout_reboot_arg); @@ -1035,13 +1070,14 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) { else if (u->load_state == UNIT_ERROR) fprintf(f, "%s\tLoad Error Code: %s\n", prefix, strerror(-u->load_error)); + for (n = sd_bus_track_first(u->bus_track); n; n = sd_bus_track_next(u->bus_track)) + fprintf(f, "%s\tBus Ref: %s\n", prefix, n); if (u->job) job_dump(u->job, f, prefix2); if (u->nop_job) job_dump(u->nop_job, f, prefix2); - } /* Common implementation for multiple backends */ @@ -1436,7 +1472,7 @@ static void unit_status_log_starting_stopping_reloading(Unit *u, JobType t) { format = unit_get_status_message_format(u, t); DISABLE_WARNING_FORMAT_NONLITERAL; - xsprintf(buf, format, unit_description(u)); + snprintf(buf, sizeof buf, format, unit_description(u)); REENABLE_WARNING; mid = t == JOB_START ? SD_MESSAGE_UNIT_STARTING : @@ -1476,7 +1512,7 @@ int unit_start_limit_test(Unit *u) { log_unit_warning(u, "Start request repeated too quickly."); u->start_limit_hit = true; - return failure_action(u->manager, u->start_limit_action, u->reboot_arg); + return emergency_action(u->manager, u->start_limit_action, u->reboot_arg, "unit failed"); } /* Errors: @@ -1602,6 +1638,18 @@ int unit_stop(Unit *u) { return UNIT_VTABLE(u)->stop(u); } +bool unit_can_stop(Unit *u) { + assert(u); + + if (!unit_supported(u)) + return false; + + if (u->perpetual) + return false; + + return !!UNIT_VTABLE(u)->stop; +} + /* Errors: * -EBADR: This unit type does not support reloading. * -ENOEXEC: Unit is not started. @@ -2136,13 +2184,20 @@ bool unit_job_is_applicable(Unit *u, JobType j) { case JOB_VERIFY_ACTIVE: case JOB_START: - case JOB_STOP: case JOB_NOP: + /* Note that we don't check unit_can_start() here. That's because .device units and suchlike are not + * startable by us but may appear due to external events, and it thus makes sense to permit enqueing + * jobs for it. */ return true; + case JOB_STOP: + /* Similar as above. However, perpetual units can never be stopped (neither explicitly nor due to + * external events), hence it makes no sense to permit enqueing such a request either. */ + return !u->perpetual; + case JOB_RESTART: case JOB_TRY_RESTART: - return unit_can_start(u); + return unit_can_stop(u) && unit_can_start(u); case JOB_RELOAD: case JOB_TRY_RELOAD: @@ -2212,6 +2267,11 @@ int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_referen return 0; } + if (d == UNIT_BEFORE && other->type == UNIT_DEVICE) { + log_unit_warning(u, "Dependency Before=%s ignored (.device units cannot be delayed)", other->id); + return 0; + } + r = set_ensure_allocated(&u->dependencies[d], NULL); if (r < 0) return r; @@ -2374,6 +2434,15 @@ char *unit_dbus_path(Unit *u) { return unit_dbus_path_from_name(u->id); } +char *unit_dbus_path_invocation_id(Unit *u) { + assert(u); + + if (sd_id128_is_null(u->invocation_id)) + return NULL; + + return unit_dbus_path_from_name(u->invocation_id_string); +} + int unit_set_slice(Unit *u, Unit *slice) { assert(u); assert(slice); @@ -2608,21 +2677,34 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) { unit_serialize_item(u, f, "assert-result", yes_no(u->assert_result)); unit_serialize_item(u, f, "transient", yes_no(u->transient)); - unit_serialize_item_format(u, f, "cpuacct-usage-base", "%" PRIu64, u->cpuacct_usage_base); + + unit_serialize_item_format(u, f, "cpu-usage-base", "%" PRIu64, u->cpu_usage_base); + if (u->cpu_usage_last != NSEC_INFINITY) + unit_serialize_item_format(u, f, "cpu-usage-last", "%" PRIu64, u->cpu_usage_last); if (u->cgroup_path) unit_serialize_item(u, f, "cgroup", u->cgroup_path); unit_serialize_item(u, f, "cgroup-realized", yes_no(u->cgroup_realized)); + if (uid_is_valid(u->ref_uid)) + unit_serialize_item_format(u, f, "ref-uid", UID_FMT, u->ref_uid); + if (gid_is_valid(u->ref_gid)) + unit_serialize_item_format(u, f, "ref-gid", GID_FMT, u->ref_gid); + + if (!sd_id128_is_null(u->invocation_id)) + unit_serialize_item_format(u, f, "invocation-id", SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)); + + bus_track_serialize(u->bus_track, f, "ref"); + if (serialize_jobs) { if (u->job) { fprintf(f, "job\n"); - job_serialize(u->job, f, fds); + job_serialize(u->job, f); } if (u->nop_job) { fprintf(f, "job\n"); - job_serialize(u->nop_job, f, fds); + job_serialize(u->nop_job, f); } } @@ -2752,7 +2834,7 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { if (!j) return log_oom(); - r = job_deserialize(j, f, fds); + r = job_deserialize(j, f); if (r < 0) { job_free(j); return r; @@ -2824,11 +2906,19 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { continue; - } else if (streq(l, "cpuacct-usage-base")) { + } else if (STR_IN_SET(l, "cpu-usage-base", "cpuacct-usage-base")) { + + r = safe_atou64(v, &u->cpu_usage_base); + if (r < 0) + log_unit_debug(u, "Failed to parse CPU usage base %s, ignoring.", v); + + continue; + + } else if (streq(l, "cpu-usage-last")) { - r = safe_atou64(v, &u->cpuacct_usage_base); + r = safe_atou64(v, &u->cpu_usage_last); if (r < 0) - log_unit_debug(u, "Failed to parse CPU usage %s, ignoring.", v); + log_unit_debug(u, "Failed to read CPU usage last %s, ignoring.", v); continue; @@ -2851,6 +2941,47 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { u->cgroup_realized = b; continue; + + } else if (streq(l, "ref-uid")) { + uid_t uid; + + r = parse_uid(v, &uid); + if (r < 0) + log_unit_debug(u, "Failed to parse referenced UID %s, ignoring.", v); + else + unit_ref_uid_gid(u, uid, GID_INVALID); + + continue; + + } else if (streq(l, "ref-gid")) { + gid_t gid; + + r = parse_gid(v, &gid); + if (r < 0) + log_unit_debug(u, "Failed to parse referenced GID %s, ignoring.", v); + else + unit_ref_uid_gid(u, UID_INVALID, gid); + + } else if (streq(l, "ref")) { + + r = strv_extend(&u->deserialized_refs, v); + if (r < 0) + log_oom(); + + continue; + } else if (streq(l, "invocation-id")) { + sd_id128_t id; + + r = sd_id128_from_string(v, &id); + if (r < 0) + log_unit_debug(u, "Failed to parse invocation id %s, ignoring.", v); + else { + r = unit_set_invocation_id(u, id); + if (r < 0) + log_unit_warning_errno(u, r, "Failed to set invocation ID for unit: %m"); + } + + continue; } if (unit_can_serialize(u)) { @@ -2925,7 +3056,8 @@ int unit_add_node_link(Unit *u, const char *what, bool wants, UnitDependency dep } int unit_coldplug(Unit *u) { - int r = 0, q = 0; + int r = 0, q; + char **i; assert(u); @@ -2936,21 +3068,29 @@ int unit_coldplug(Unit *u) { u->coldplugged = true; - if (UNIT_VTABLE(u)->coldplug) - r = UNIT_VTABLE(u)->coldplug(u); + STRV_FOREACH(i, u->deserialized_refs) { + q = bus_unit_track_add_name(u, *i); + if (q < 0 && r >= 0) + r = q; + } + u->deserialized_refs = strv_free(u->deserialized_refs); - if (u->job) - q = job_coldplug(u->job); + if (UNIT_VTABLE(u)->coldplug) { + q = UNIT_VTABLE(u)->coldplug(u); + if (q < 0 && r >= 0) + r = q; + } - if (r < 0) - return r; - if (q < 0) - return q; + if (u->job) { + q = job_coldplug(u->job); + if (q < 0 && r >= 0) + r = q; + } - return 0; + return r; } -static bool fragment_mtime_newer(const char *path, usec_t mtime) { +static bool fragment_mtime_newer(const char *path, usec_t mtime, bool path_masked) { struct stat st; if (!path) @@ -2960,12 +3100,12 @@ static bool fragment_mtime_newer(const char *path, usec_t mtime) { /* What, cannot access this anymore? */ return true; - if (mtime > 0) + if (path_masked) + /* For masked files check if they are still so */ + return !null_or_empty(&st); + else /* For non-empty files check the mtime */ return timespec_load(&st.st_mtim) > mtime; - else if (!null_or_empty(&st)) - /* For masked files check if they are still so */ - return true; return false; } @@ -2976,18 +3116,22 @@ bool unit_need_daemon_reload(Unit *u) { assert(u); - if (fragment_mtime_newer(u->fragment_path, u->fragment_mtime)) + /* For unit files, we allow masking… */ + if (fragment_mtime_newer(u->fragment_path, u->fragment_mtime, + u->load_state == UNIT_MASKED)) return true; - if (fragment_mtime_newer(u->source_path, u->source_mtime)) + /* Source paths should not be masked… */ + if (fragment_mtime_newer(u->source_path, u->source_mtime, false)) return true; (void) unit_find_dropin_paths(u, &t); if (!strv_equal(u->dropin_paths, t)) return true; + /* … any drop-ins that are masked are simply omitted from the list. */ STRV_FOREACH(path, u->dropin_paths) - if (fragment_mtime_newer(*path, u->dropin_mtime)) + if (fragment_mtime_newer(*path, u->dropin_mtime, false)) return true; return false; @@ -3224,6 +3368,33 @@ void unit_ref_unset(UnitRef *ref) { ref->unit = NULL; } +static int user_from_unit_name(Unit *u, char **ret) { + + static const uint8_t hash_key[] = { + 0x58, 0x1a, 0xaf, 0xe6, 0x28, 0x58, 0x4e, 0x96, + 0xb4, 0x4e, 0xf5, 0x3b, 0x8c, 0x92, 0x07, 0xec + }; + + _cleanup_free_ char *n = NULL; + int r; + + r = unit_name_to_prefix(u->id, &n); + if (r < 0) + return r; + + if (valid_user_group_name(n)) { + *ret = n; + n = NULL; + return 0; + } + + /* If we can't use the unit name as a user name, then let's hash it and use that */ + if (asprintf(ret, "_du%016" PRIx64, siphash24(n, strlen(n), hash_key)) < 0) + return -ENOMEM; + + return 0; +} + int unit_patch_contexts(Unit *u) { CGroupContext *cc; ExecContext *ec; @@ -3267,7 +3438,33 @@ int unit_patch_contexts(Unit *u) { ec->no_new_privileges = true; if (ec->private_devices) - ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_MKNOD); + ec->capability_bounding_set &= ~((UINT64_C(1) << CAP_MKNOD) | (UINT64_C(1) << CAP_SYS_RAWIO)); + + if (ec->protect_kernel_modules) + ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_SYS_MODULE); + + if (ec->dynamic_user) { + if (!ec->user) { + r = user_from_unit_name(u, &ec->user); + if (r < 0) + return r; + } + + if (!ec->group) { + ec->group = strdup(ec->user); + if (!ec->group) + return -ENOMEM; + } + + /* If the dynamic user option is on, let's make sure that the unit can't leave its UID/GID + * around in the file system or on IPC objects. Hence enforce a strict sandbox. */ + + ec->private_tmp = true; + ec->remove_ipc = true; + ec->protect_system = PROTECT_SYSTEM_STRICT; + if (ec->protect_home == PROTECT_HOME_NO) + ec->protect_home = PROTECT_HOME_READ_ONLY; + } } cc = unit_get_cgroup_context(u); @@ -3652,7 +3849,7 @@ int unit_kill_context( * there we get proper events. Hence rely on * them.*/ - if (cg_unified() > 0 || + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0 || (detect_container() == 0 && !unit_cgroup_delegate(u))) wait_for_exit = true; @@ -3776,6 +3973,26 @@ int unit_setup_exec_runtime(Unit *u) { return exec_runtime_make(rt, unit_get_exec_context(u), u->id); } +int unit_setup_dynamic_creds(Unit *u) { + ExecContext *ec; + DynamicCreds *dcreds; + size_t offset; + + assert(u); + + offset = UNIT_VTABLE(u)->dynamic_creds_offset; + assert(offset > 0); + dcreds = (DynamicCreds*) ((uint8_t*) u + offset); + + ec = unit_get_exec_context(u); + assert(ec); + + if (!ec->dynamic_user) + return 0; + + return dynamic_creds_acquire(dcreds, u->manager, ec->user, ec->group); +} + bool unit_type_supported(UnitType t) { if (_unlikely_(t < 0)) return false; @@ -3869,3 +4086,198 @@ pid_t unit_main_pid(Unit *u) { return 0; } + +static void unit_unref_uid_internal( + Unit *u, + uid_t *ref_uid, + bool destroy_now, + void (*_manager_unref_uid)(Manager *m, uid_t uid, bool destroy_now)) { + + assert(u); + assert(ref_uid); + assert(_manager_unref_uid); + + /* Generic implementation of both unit_unref_uid() and unit_unref_gid(), under the assumption that uid_t and + * gid_t are actually the same time, with the same validity rules. + * + * Drops a reference to UID/GID from a unit. */ + + assert_cc(sizeof(uid_t) == sizeof(gid_t)); + assert_cc(UID_INVALID == (uid_t) GID_INVALID); + + if (!uid_is_valid(*ref_uid)) + return; + + _manager_unref_uid(u->manager, *ref_uid, destroy_now); + *ref_uid = UID_INVALID; +} + +void unit_unref_uid(Unit *u, bool destroy_now) { + unit_unref_uid_internal(u, &u->ref_uid, destroy_now, manager_unref_uid); +} + +void unit_unref_gid(Unit *u, bool destroy_now) { + unit_unref_uid_internal(u, (uid_t*) &u->ref_gid, destroy_now, manager_unref_gid); +} + +static int unit_ref_uid_internal( + Unit *u, + uid_t *ref_uid, + uid_t uid, + bool clean_ipc, + int (*_manager_ref_uid)(Manager *m, uid_t uid, bool clean_ipc)) { + + int r; + + assert(u); + assert(ref_uid); + assert(uid_is_valid(uid)); + assert(_manager_ref_uid); + + /* Generic implementation of both unit_ref_uid() and unit_ref_guid(), under the assumption that uid_t and gid_t + * are actually the same type, and have the same validity rules. + * + * Adds a reference on a specific UID/GID to this unit. Each unit referencing the same UID/GID maintains a + * reference so that we can destroy the UID/GID's IPC resources as soon as this is requested and the counter + * drops to zero. */ + + assert_cc(sizeof(uid_t) == sizeof(gid_t)); + assert_cc(UID_INVALID == (uid_t) GID_INVALID); + + if (*ref_uid == uid) + return 0; + + if (uid_is_valid(*ref_uid)) /* Already set? */ + return -EBUSY; + + r = _manager_ref_uid(u->manager, uid, clean_ipc); + if (r < 0) + return r; + + *ref_uid = uid; + return 1; +} + +int unit_ref_uid(Unit *u, uid_t uid, bool clean_ipc) { + return unit_ref_uid_internal(u, &u->ref_uid, uid, clean_ipc, manager_ref_uid); +} + +int unit_ref_gid(Unit *u, gid_t gid, bool clean_ipc) { + return unit_ref_uid_internal(u, (uid_t*) &u->ref_gid, (uid_t) gid, clean_ipc, manager_ref_gid); +} + +static int unit_ref_uid_gid_internal(Unit *u, uid_t uid, gid_t gid, bool clean_ipc) { + int r = 0, q = 0; + + assert(u); + + /* Reference both a UID and a GID in one go. Either references both, or neither. */ + + if (uid_is_valid(uid)) { + r = unit_ref_uid(u, uid, clean_ipc); + if (r < 0) + return r; + } + + if (gid_is_valid(gid)) { + q = unit_ref_gid(u, gid, clean_ipc); + if (q < 0) { + if (r > 0) + unit_unref_uid(u, false); + + return q; + } + } + + return r > 0 || q > 0; +} + +int unit_ref_uid_gid(Unit *u, uid_t uid, gid_t gid) { + ExecContext *c; + int r; + + assert(u); + + c = unit_get_exec_context(u); + + r = unit_ref_uid_gid_internal(u, uid, gid, c ? c->remove_ipc : false); + if (r < 0) + return log_unit_warning_errno(u, r, "Couldn't add UID/GID reference to unit, proceeding without: %m"); + + return r; +} + +void unit_unref_uid_gid(Unit *u, bool destroy_now) { + assert(u); + + unit_unref_uid(u, destroy_now); + unit_unref_gid(u, destroy_now); +} + +void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid) { + int r; + + assert(u); + + /* This is invoked whenever one of the forked off processes let's us know the UID/GID its user name/group names + * resolved to. We keep track of which UID/GID is currently assigned in order to be able to destroy its IPC + * objects when no service references the UID/GID anymore. */ + + r = unit_ref_uid_gid(u, uid, gid); + if (r > 0) + bus_unit_send_change_signal(u); +} + +int unit_set_invocation_id(Unit *u, sd_id128_t id) { + int r; + + assert(u); + + /* Set the invocation ID for this unit. If we cannot, this will not roll back, but reset the whole thing. */ + + if (sd_id128_equal(u->invocation_id, id)) + return 0; + + if (!sd_id128_is_null(u->invocation_id)) + (void) hashmap_remove_value(u->manager->units_by_invocation_id, &u->invocation_id, u); + + if (sd_id128_is_null(id)) { + r = 0; + goto reset; + } + + r = hashmap_ensure_allocated(&u->manager->units_by_invocation_id, &id128_hash_ops); + if (r < 0) + goto reset; + + u->invocation_id = id; + sd_id128_to_string(id, u->invocation_id_string); + + r = hashmap_put(u->manager->units_by_invocation_id, &u->invocation_id, u); + if (r < 0) + goto reset; + + return 0; + +reset: + u->invocation_id = SD_ID128_NULL; + u->invocation_id_string[0] = 0; + return r; +} + +int unit_acquire_invocation_id(Unit *u) { + sd_id128_t id; + int r; + + assert(u); + + r = sd_id128_randomize(&id); + if (r < 0) + return log_unit_error_errno(u, r, "Failed to generate invocation ID for unit: %m"); + + r = unit_set_invocation_id(u, id); + if (r < 0) + return log_unit_error_errno(u, r, "Failed to set invocation ID for unit: %m"); + + return 0; +} diff --git a/src/core/unit.h b/src/core/unit.h index 1eabfa51e2..991543664b 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -29,7 +29,7 @@ typedef struct UnitRef UnitRef; typedef struct UnitStatusMessageFormats UnitStatusMessageFormats; #include "condition.h" -#include "failure-action.h" +#include "emergency-action.h" #include "install.h" #include "list.h" #include "unit-name.h" @@ -108,9 +108,13 @@ struct Unit { /* The slot used for watching NameOwnerChanged signals */ sd_bus_slot *match_bus_slot; + /* References to this unit from clients */ + sd_bus_track *bus_track; + char **deserialized_refs; + /* Job timeout and action to take */ usec_t job_timeout; - FailureAction job_timeout_action; + EmergencyAction job_timeout_action; char *job_timeout_reboot_arg; /* References to this */ @@ -174,18 +178,23 @@ struct Unit { /* Put a ratelimit on unit starting */ RateLimit start_limit; - FailureAction start_limit_action; + EmergencyAction start_limit_action; char *reboot_arg; /* Make sure we never enter endless loops with the check unneeded logic, or the BindsTo= logic */ RateLimit auto_stop_ratelimit; + /* Reference to a specific UID/GID */ + uid_t ref_uid; + gid_t ref_gid; + /* Cached unit file state and preset */ UnitFileState unit_file_state; int unit_file_preset; - /* Where the cpuacct.usage cgroup counter was at the time the unit was started */ - nsec_t cpuacct_usage_base; + /* Where the cpu.stat or cpuacct.usage was at the time the unit was started */ + nsec_t cpu_usage_base; + nsec_t cpu_usage_last; /* the most recently read value */ /* Counterparts in the cgroup filesystem */ char *cgroup_path; @@ -195,11 +204,13 @@ struct Unit { CGroupMask cgroup_members_mask; int cgroup_inotify_wd; - uint32_t cgroup_netclass_id; - /* How to start OnFailure units */ JobMode on_failure_job_mode; + /* The current invocation ID */ + sd_id128_t invocation_id; + char invocation_id_string[SD_ID128_STRING_MAX]; /* useful when logging */ + /* Garbage collect us we nobody wants or requires us anymore */ bool stop_when_unneeded; @@ -225,6 +236,9 @@ struct Unit { /* Is this a transient unit? */ bool transient; + /* Is this a unit that is always running and cannot be stopped? */ + bool perpetual; + bool in_load_queue:1; bool in_dbus_queue:1; bool in_cleanup_queue:1; @@ -233,8 +247,6 @@ struct Unit { bool sent_dbus_new_signal:1; - bool no_gc:1; - bool in_audit:1; bool cgroup_realized:1; @@ -245,6 +257,9 @@ struct Unit { /* Did we already invoke unit_coldplug() for this unit? */ bool coldplugged:1; + + /* For transient units: whether to add a bus track reference after creating the unit */ + bool bus_track_add:1; }; struct UnitStatusMessageFormats { @@ -291,6 +306,10 @@ struct UnitVTable { * that */ size_t exec_runtime_offset; + /* If greater than 0, the offset into the object where the pointer to DynamicCreds is found, if the unit type + * has that. */ + size_t dynamic_creds_offset; + /* The name of the configuration file section with the private settings of this unit */ const char *private_section; @@ -354,7 +373,7 @@ struct UnitVTable { /* When the unit is not running and no job for it queued we * shall release its runtime resources */ - void (*release_resources)(Unit *u); + void (*release_resources)(Unit *u, bool inactive); /* Invoked on every child that died */ void (*sigchld_event)(Unit *u, pid_t pid, int code, int status); @@ -369,8 +388,7 @@ struct UnitVTable { /* Called whenever a process of this unit sends us a message */ void (*notify_message)(Unit *u, pid_t pid, char **tags, FDSet *fds); - /* Called whenever a name this Unit registered for comes or - * goes away. */ + /* Called whenever a name this Unit registered for comes or goes away. */ void (*bus_name_owner_change)(Unit *u, const char *name, const char *old_owner, const char *new_owner); /* Called for each property that is being set */ @@ -463,6 +481,7 @@ DEFINE_CAST(SCOPE, Scope); Unit *unit_new(Manager *m, size_t size); void unit_free(Unit *u); +int unit_new_for_name(Manager *m, size_t size, const char *name, Unit **ret); int unit_add_name(Unit *u, const char *name); int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_reference); @@ -507,6 +526,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix); bool unit_can_reload(Unit *u) _pure_; bool unit_can_start(Unit *u) _pure_; +bool unit_can_stop(Unit *u) _pure_; bool unit_can_isolate(Unit *u) _pure_; int unit_start(Unit *u); @@ -533,6 +553,7 @@ bool unit_job_is_applicable(Unit *u, JobType j); int set_unit_path(const char *p); char *unit_dbus_path(Unit *u); +char *unit_dbus_path_invocation_id(Unit *u); int unit_load_related_unit(Unit *u, const char *type, Unit **_found); @@ -589,6 +610,7 @@ CGroupContext *unit_get_cgroup_context(Unit *u) _pure_; ExecRuntime *unit_get_exec_runtime(Unit *u) _pure_; int unit_setup_exec_runtime(Unit *u); +int unit_setup_dynamic_creds(Unit *u); int unit_write_drop_in(Unit *u, UnitSetPropertiesMode mode, const char *name, const char *data); int unit_write_drop_in_format(Unit *u, UnitSetPropertiesMode mode, const char *name, const char *format, ...) _printf_(4,5); @@ -618,12 +640,26 @@ int unit_fail_if_symlink(Unit *u, const char* where); int unit_start_limit_test(Unit *u); +void unit_unref_uid(Unit *u, bool destroy_now); +int unit_ref_uid(Unit *u, uid_t uid, bool clean_ipc); + +void unit_unref_gid(Unit *u, bool destroy_now); +int unit_ref_gid(Unit *u, gid_t gid, bool clean_ipc); + +int unit_ref_uid_gid(Unit *u, uid_t uid, gid_t gid); +void unit_unref_uid_gid(Unit *u, bool destroy_now); + +void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid); + +int unit_set_invocation_id(Unit *u, sd_id128_t id); +int unit_acquire_invocation_id(Unit *u); + /* Macros which append UNIT= or USER_UNIT= to the message */ #define log_unit_full(unit, level, error, ...) \ ({ \ - Unit *_u = (unit); \ - _u ? log_object_internal(level, error, __FILE__, __LINE__, __func__, _u->manager->unit_log_field, _u->id, ##__VA_ARGS__) : \ + const Unit *_u = (unit); \ + _u ? log_object_internal(level, error, __FILE__, __LINE__, __func__, _u->manager->unit_log_field, _u->id, _u->manager->invocation_log_field, _u->invocation_id_string, ##__VA_ARGS__) : \ log_internal(level, error, __FILE__, __LINE__, __func__, ##__VA_ARGS__); \ }) |