diff options
Diffstat (limited to 'src/core')
51 files changed, 3780 insertions, 1054 deletions
diff --git a/src/core/automount.c b/src/core/automount.c index 00295cf769..7d7a0a6e46 100644 --- a/src/core/automount.c +++ b/src/core/automount.c @@ -271,6 +271,11 @@ static int automount_coldplug(Unit *u) { return r; (void) sd_event_source_set_description(a->pipe_event_source, "automount-io"); + if (a->deserialized_state == AUTOMOUNT_RUNNING) { + r = automount_start_expire(a); + if (r < 0) + log_unit_warning_errno(UNIT(a), r, "Failed to start expiration timer, ignoring: %m"); + } } automount_set_state(a, a->deserialized_state); @@ -795,6 +800,10 @@ static int automount_start(Unit *u) { return r; } + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + a->result = AUTOMOUNT_SUCCESS; automount_enter_waiting(a); return 1; diff --git a/src/core/busname.c b/src/core/busname.c index 7952cd31aa..b96ec09e67 100644 --- a/src/core/busname.c +++ b/src/core/busname.c @@ -639,6 +639,10 @@ static int busname_start(Unit *u) { return r; } + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + n->result = BUSNAME_SUCCESS; busname_enter_making(n); @@ -868,7 +872,7 @@ static void busname_sigchld_event(Unit *u, pid_t pid, int code, int status) { n->control_pid = 0; - if (is_clean_exit(code, status, NULL)) + if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL)) f = BUSNAME_SUCCESS; else if (code == CLD_EXITED) f = BUSNAME_FAILURE_EXIT_CODE; diff --git a/src/core/cgroup.c b/src/core/cgroup.c index c19e43f571..23a92f9651 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -57,12 +57,16 @@ void cgroup_context_init(CGroupContext *c) { /* Initialize everything to the kernel defaults, assuming the * structure is preinitialized to 0 */ + c->cpu_weight = CGROUP_WEIGHT_INVALID; + c->startup_cpu_weight = CGROUP_WEIGHT_INVALID; + c->cpu_quota_per_sec_usec = USEC_INFINITY; + c->cpu_shares = CGROUP_CPU_SHARES_INVALID; c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID; - c->cpu_quota_per_sec_usec = USEC_INFINITY; c->memory_high = CGROUP_LIMIT_MAX; c->memory_max = CGROUP_LIMIT_MAX; + c->memory_swap_max = CGROUP_LIMIT_MAX; c->memory_limit = CGROUP_LIMIT_MAX; @@ -158,6 +162,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { "%sBlockIOAccounting=%s\n" "%sMemoryAccounting=%s\n" "%sTasksAccounting=%s\n" + "%sCPUWeight=%" PRIu64 "\n" + "%sStartupCPUWeight=%" PRIu64 "\n" "%sCPUShares=%" PRIu64 "\n" "%sStartupCPUShares=%" PRIu64 "\n" "%sCPUQuotaPerSecSec=%s\n" @@ -168,6 +174,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { "%sMemoryLow=%" PRIu64 "\n" "%sMemoryHigh=%" PRIu64 "\n" "%sMemoryMax=%" PRIu64 "\n" + "%sMemorySwapMax=%" PRIu64 "\n" "%sMemoryLimit=%" PRIu64 "\n" "%sTasksMax=%" PRIu64 "\n" "%sDevicePolicy=%s\n" @@ -177,6 +184,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { prefix, yes_no(c->blockio_accounting), prefix, yes_no(c->memory_accounting), prefix, yes_no(c->tasks_accounting), + prefix, c->cpu_weight, + prefix, c->startup_cpu_weight, prefix, c->cpu_shares, prefix, c->startup_cpu_shares, prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1), @@ -187,6 +196,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { prefix, c->memory_low, prefix, c->memory_high, prefix, c->memory_max, + prefix, c->memory_swap_max, prefix, c->memory_limit, prefix, c->tasks_max, prefix, cgroup_device_policy_to_string(c->device_policy), @@ -382,6 +392,95 @@ fail: return -errno; } +static bool cgroup_context_has_cpu_weight(CGroupContext *c) { + return c->cpu_weight != CGROUP_WEIGHT_INVALID || + c->startup_cpu_weight != CGROUP_WEIGHT_INVALID; +} + +static bool cgroup_context_has_cpu_shares(CGroupContext *c) { + return c->cpu_shares != CGROUP_CPU_SHARES_INVALID || + c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID; +} + +static uint64_t cgroup_context_cpu_weight(CGroupContext *c, ManagerState state) { + if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && + c->startup_cpu_weight != CGROUP_WEIGHT_INVALID) + return c->startup_cpu_weight; + else if (c->cpu_weight != CGROUP_WEIGHT_INVALID) + return c->cpu_weight; + else + return CGROUP_WEIGHT_DEFAULT; +} + +static uint64_t cgroup_context_cpu_shares(CGroupContext *c, ManagerState state) { + if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && + c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID) + return c->startup_cpu_shares; + else if (c->cpu_shares != CGROUP_CPU_SHARES_INVALID) + return c->cpu_shares; + else + return CGROUP_CPU_SHARES_DEFAULT; +} + +static void cgroup_apply_unified_cpu_config(Unit *u, uint64_t weight, uint64_t quota) { + char buf[MAX(DECIMAL_STR_MAX(uint64_t) + 1, (DECIMAL_STR_MAX(usec_t) + 1) * 2)]; + int r; + + xsprintf(buf, "%" PRIu64 "\n", weight); + r = cg_set_attribute("cpu", u->cgroup_path, "cpu.weight", buf); + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.weight: %m"); + + if (quota != USEC_INFINITY) + xsprintf(buf, USEC_FMT " " USEC_FMT "\n", + quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC, CGROUP_CPU_QUOTA_PERIOD_USEC); + else + xsprintf(buf, "max " USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC); + + r = cg_set_attribute("cpu", u->cgroup_path, "cpu.max", buf); + + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.max: %m"); +} + +static void cgroup_apply_legacy_cpu_config(Unit *u, uint64_t shares, uint64_t quota) { + char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1]; + int r; + + xsprintf(buf, "%" PRIu64 "\n", shares); + r = cg_set_attribute("cpu", u->cgroup_path, "cpu.shares", buf); + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.shares: %m"); + + xsprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC); + r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_period_us", buf); + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.cfs_period_us: %m"); + + if (quota != USEC_INFINITY) { + xsprintf(buf, USEC_FMT "\n", quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC); + r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", buf); + } else + r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", "-1"); + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.cfs_quota_us: %m"); +} + +static uint64_t cgroup_cpu_shares_to_weight(uint64_t shares) { + return CLAMP(shares * CGROUP_WEIGHT_DEFAULT / CGROUP_CPU_SHARES_DEFAULT, + CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX); +} + +static uint64_t cgroup_cpu_weight_to_shares(uint64_t weight) { + return CLAMP(weight * CGROUP_CPU_SHARES_DEFAULT / CGROUP_WEIGHT_DEFAULT, + CGROUP_CPU_SHARES_MIN, CGROUP_CPU_SHARES_MAX); +} + static bool cgroup_context_has_io_config(CGroupContext *c) { return c->io_accounting || c->io_weight != CGROUP_WEIGHT_INVALID || @@ -521,7 +620,7 @@ static unsigned cgroup_apply_blkio_device_limit(Unit *u, const char *dev_path, u } static bool cgroup_context_has_unified_memory_config(CGroupContext *c) { - return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX; + return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX || c->memory_swap_max != CGROUP_LIMIT_MAX; } static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_t v) { @@ -566,30 +665,42 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { * and missing cgroups, i.e. EROFS and ENOENT. */ if ((mask & CGROUP_MASK_CPU) && !is_root) { - char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1]; + bool has_weight = cgroup_context_has_cpu_weight(c); + bool has_shares = cgroup_context_has_cpu_shares(c); - sprintf(buf, "%" PRIu64 "\n", - IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->startup_cpu_shares : - c->cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->cpu_shares : CGROUP_CPU_SHARES_DEFAULT); - r = cg_set_attribute("cpu", path, "cpu.shares", buf); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.shares: %m"); + if (cg_all_unified() > 0) { + uint64_t weight; - sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC); - r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.cfs_period_us: %m"); + if (has_weight) + weight = cgroup_context_cpu_weight(c, state); + else if (has_shares) { + uint64_t shares = cgroup_context_cpu_shares(c, state); - if (c->cpu_quota_per_sec_usec != USEC_INFINITY) { - sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC); - r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf); - } else - r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1"); - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.cfs_quota_us: %m"); + weight = cgroup_cpu_shares_to_weight(shares); + + log_cgroup_compat(u, "Applying [Startup]CpuShares %" PRIu64 " as [Startup]CpuWeight %" PRIu64 " on %s", + shares, weight, path); + } else + weight = CGROUP_WEIGHT_DEFAULT; + + cgroup_apply_unified_cpu_config(u, weight, c->cpu_quota_per_sec_usec); + } else { + uint64_t shares; + + if (has_weight) { + uint64_t weight = cgroup_context_cpu_weight(c, state); + + shares = cgroup_cpu_weight_to_shares(weight); + + log_cgroup_compat(u, "Applying [Startup]CpuWeight %" PRIu64 " as [Startup]CpuShares %" PRIu64 " on %s", + weight, shares, path); + } else if (has_shares) + shares = cgroup_context_cpu_shares(c, state); + else + shares = CGROUP_CPU_SHARES_DEFAULT; + + cgroup_apply_legacy_cpu_config(u, shares, c->cpu_quota_per_sec_usec); + } } if (mask & CGROUP_MASK_IO) { @@ -677,16 +788,16 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { char buf[DECIMAL_STR_MAX(uint64_t)+1]; uint64_t weight; - if (has_blockio) - weight = cgroup_context_blkio_weight(c, state); - else if (has_io) { + if (has_io) { uint64_t io_weight = cgroup_context_io_weight(c, state); weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state)); log_cgroup_compat(u, "Applying [Startup]IOWeight %" PRIu64 " as [Startup]BlockIOWeight %" PRIu64, io_weight, weight); - } else + } else if (has_blockio) + weight = cgroup_context_blkio_weight(c, state); + else weight = CGROUP_BLKIO_WEIGHT_DEFAULT; xsprintf(buf, "%" PRIu64 "\n", weight); @@ -695,13 +806,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to set blkio.weight: %m"); - if (has_blockio) { - CGroupBlockIODeviceWeight *w; - - /* FIXME: no way to reset this list */ - LIST_FOREACH(device_weights, w, c->blockio_device_weights) - cgroup_apply_blkio_device_weight(u, w->path, w->weight); - } else if (has_io) { + if (has_io) { CGroupIODeviceWeight *w; /* FIXME: no way to reset this list */ @@ -713,18 +818,17 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { cgroup_apply_blkio_device_weight(u, w->path, weight); } + } else if (has_blockio) { + CGroupBlockIODeviceWeight *w; + + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_weights, w, c->blockio_device_weights) + cgroup_apply_blkio_device_weight(u, w->path, w->weight); } } /* Apply limits and free ones without config. */ - if (has_blockio) { - CGroupBlockIODeviceBandwidth *b, *next; - - LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) { - if (!cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps)) - cgroup_context_free_blockio_device_bandwidth(c, b); - } - } else if (has_io) { + if (has_io) { CGroupIODeviceLimit *l, *next; LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { @@ -734,16 +838,24 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { if (!cgroup_apply_blkio_device_limit(u, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX])) cgroup_context_free_io_device_limit(c, l); } + } else if (has_blockio) { + CGroupBlockIODeviceBandwidth *b, *next; + + LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) + if (!cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps)) + cgroup_context_free_blockio_device_bandwidth(c, b); } } if ((mask & CGROUP_MASK_MEMORY) && !is_root) { - if (cg_unified() > 0) { - uint64_t max = c->memory_max; + if (cg_all_unified() > 0) { + uint64_t max; + uint64_t swap_max = CGROUP_LIMIT_MAX; - if (cgroup_context_has_unified_memory_config(c)) + if (cgroup_context_has_unified_memory_config(c)) { max = c->memory_max; - else { + swap_max = c->memory_swap_max; + } else { max = c->memory_limit; if (max != CGROUP_LIMIT_MAX) @@ -753,16 +865,16 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { cgroup_apply_unified_memory_limit(u, "memory.low", c->memory_low); cgroup_apply_unified_memory_limit(u, "memory.high", c->memory_high); cgroup_apply_unified_memory_limit(u, "memory.max", max); + cgroup_apply_unified_memory_limit(u, "memory.swap.max", swap_max); } else { char buf[DECIMAL_STR_MAX(uint64_t) + 1]; - uint64_t val = c->memory_limit; + uint64_t val; - if (val == CGROUP_LIMIT_MAX) { + if (cgroup_context_has_unified_memory_config(c)) { val = c->memory_max; - - if (val != CGROUP_LIMIT_MAX) - log_cgroup_compat(u, "Applying MemoryMax %" PRIi64 " as MemoryLimit", c->memory_max); - } + log_cgroup_compat(u, "Applying MemoryMax %" PRIi64 " as MemoryLimit", val); + } else + val = c->memory_limit; if (val == CGROUP_LIMIT_MAX) strncpy(buf, "-1\n", sizeof(buf)); @@ -844,7 +956,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { if ((mask & CGROUP_MASK_PIDS) && !is_root) { - if (c->tasks_max != (uint64_t) -1) { + if (c->tasks_max != CGROUP_LIMIT_MAX) { char buf[DECIMAL_STR_MAX(uint64_t) + 2]; sprintf(buf, "%" PRIu64 "\n", c->tasks_max); @@ -864,8 +976,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) { /* Figure out which controllers we need */ if (c->cpu_accounting || - c->cpu_shares != CGROUP_CPU_SHARES_INVALID || - c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID || + cgroup_context_has_cpu_weight(c) || + cgroup_context_has_cpu_shares(c) || c->cpu_quota_per_sec_usec != USEC_INFINITY) mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU; @@ -911,7 +1023,7 @@ CGroupMask unit_get_own_mask(Unit *u) { e = unit_get_exec_context(u); if (!e || exec_context_maintains_privileges(e) || - cg_unified() > 0) + cg_all_unified() > 0) return _CGROUP_MASK_ALL; } @@ -1137,7 +1249,7 @@ int unit_watch_cgroup(Unit *u) { return 0; /* Only applies to the unified hierarchy */ - r = cg_unified(); + r = cg_unified(SYSTEMD_CGROUP_CONTROLLER); if (r < 0) return log_unit_error_errno(u, r, "Failed detect whether the unified hierarchy is used: %m"); if (r == 0) @@ -1247,6 +1359,26 @@ int unit_attach_pids_to_cgroup(Unit *u) { return 0; } +static void cgroup_xattr_apply(Unit *u) { + char ids[SD_ID128_STRING_MAX]; + int r; + + assert(u); + + if (!MANAGER_IS_SYSTEM(u->manager)) + return; + + if (sd_id128_is_null(u->invocation_id)) + return; + + r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, + "trusted.invocation_id", + sd_id128_to_string(u->invocation_id, ids), 32, + 0); + if (r < 0) + log_unit_warning_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path); +} + static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask enable_mask) { assert(u); @@ -1290,6 +1422,7 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) { /* Finally, apply the necessary attributes. */ cgroup_context_apply(u, target_mask, state); + cgroup_xattr_apply(u); return 0; } @@ -1416,6 +1549,8 @@ void unit_prune_cgroup(Unit *u) { if (!u->cgroup_path) return; + (void) unit_get_cpu_usage(u, NULL); /* Cache the last CPU usage value before we destroy the cgroup */ + is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE); r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice); @@ -1537,7 +1672,7 @@ int unit_watch_all_pids(Unit *u) { if (!u->cgroup_path) return -ENOENT; - if (cg_unified() > 0) /* On unified we can use proper notifications */ + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0) /* On unified we can use proper notifications */ return 0; return unit_watch_pids_in_path(u, u->cgroup_path); @@ -1610,7 +1745,7 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents, int manager_setup_cgroup(Manager *m) { _cleanup_free_ char *path = NULL; CGroupController c; - int r, unified; + int r, all_unified, systemd_unified; char *e; assert(m); @@ -1647,11 +1782,17 @@ int manager_setup_cgroup(Manager *m) { if (r < 0) return log_error_errno(r, "Cannot find cgroup mount point: %m"); - unified = cg_unified(); - if (unified < 0) - return log_error_errno(r, "Couldn't determine if we are running in the unified hierarchy: %m"); - if (unified > 0) + all_unified = cg_all_unified(); + systemd_unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER); + + if (all_unified < 0 || systemd_unified < 0) + return log_error_errno(all_unified < 0 ? all_unified : systemd_unified, + "Couldn't determine if we are running in the unified hierarchy: %m"); + + if (all_unified > 0) log_debug("Unified cgroup hierarchy is located at %s.", path); + else if (systemd_unified > 0) + log_debug("Unified cgroup hierarchy is located at %s. Controllers are on legacy hierarchies.", path); else log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path); @@ -1659,7 +1800,7 @@ int manager_setup_cgroup(Manager *m) { const char *scope_path; /* 3. Install agent */ - if (unified) { + if (systemd_unified) { /* In the unified hierarchy we can get * cgroup empty notifications via inotify. */ @@ -1719,7 +1860,7 @@ int manager_setup_cgroup(Manager *m) { return log_error_errno(errno, "Failed to open pin file: %m"); /* 6. Always enable hierarchical support if it exists... */ - if (!unified) + if (!all_unified) (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1"); } @@ -1845,7 +1986,7 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) { if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0) return -ENODATA; - if (cg_unified() <= 0) + if (cg_all_unified() <= 0) r = cg_get_attribute("memory", u->cgroup_path, "memory.usage_in_bytes", &v); else r = cg_get_attribute("memory", u->cgroup_path, "memory.current", &v); @@ -1890,18 +2031,37 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) { if (!u->cgroup_path) return -ENODATA; - if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0) - return -ENODATA; + if (cg_all_unified() > 0) { + const char *keys[] = { "usage_usec", NULL }; + _cleanup_free_ char *val = NULL; + uint64_t us; - r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v); - if (r == -ENOENT) - return -ENODATA; - if (r < 0) - return r; + if ((u->cgroup_realized_mask & CGROUP_MASK_CPU) == 0) + return -ENODATA; - r = safe_atou64(v, &ns); - if (r < 0) - return r; + r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", keys, &val); + if (r < 0) + return r; + + r = safe_atou64(val, &us); + if (r < 0) + return r; + + ns = us * NSEC_PER_USEC; + } else { + if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0) + return -ENODATA; + + r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v); + if (r == -ENOENT) + return -ENODATA; + if (r < 0) + return r; + + r = safe_atou64(v, &ns); + if (r < 0) + return r; + } *ret = ns; return 0; @@ -1911,16 +2071,33 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) { nsec_t ns; int r; + assert(u); + + /* Retrieve the current CPU usage counter. This will subtract the CPU counter taken when the unit was + * started. If the cgroup has been removed already, returns the last cached value. To cache the value, simply + * call this function with a NULL return value. */ + r = unit_get_cpu_usage_raw(u, &ns); + if (r == -ENODATA && u->cpu_usage_last != NSEC_INFINITY) { + /* If we can't get the CPU usage anymore (because the cgroup was already removed, for example), use our + * cached value. */ + + if (ret) + *ret = u->cpu_usage_last; + return 0; + } if (r < 0) return r; - if (ns > u->cpuacct_usage_base) - ns -= u->cpuacct_usage_base; + if (ns > u->cpu_usage_base) + ns -= u->cpu_usage_base; else ns = 0; - *ret = ns; + u->cpu_usage_last = ns; + if (ret) + *ret = ns; + return 0; } @@ -1930,13 +2107,15 @@ int unit_reset_cpu_usage(Unit *u) { assert(u); + u->cpu_usage_last = NSEC_INFINITY; + r = unit_get_cpu_usage_raw(u, &ns); if (r < 0) { - u->cpuacct_usage_base = 0; + u->cpu_usage_base = 0; return r; } - u->cpuacct_usage_base = ns; + u->cpu_usage_base = ns; return 0; } diff --git a/src/core/cgroup.h b/src/core/cgroup.h index a57403e79f..4cd168f63e 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -89,6 +89,10 @@ struct CGroupContext { bool tasks_accounting; /* For unified hierarchy */ + uint64_t cpu_weight; + uint64_t startup_cpu_weight; + usec_t cpu_quota_per_sec_usec; + uint64_t io_weight; uint64_t startup_io_weight; LIST_HEAD(CGroupIODeviceWeight, io_device_weights); @@ -97,11 +101,11 @@ struct CGroupContext { uint64_t memory_low; uint64_t memory_high; uint64_t memory_max; + uint64_t memory_swap_max; /* For legacy hierarchies */ uint64_t cpu_shares; uint64_t startup_cpu_shares; - usec_t cpu_quota_per_sec_usec; uint64_t blockio_weight; uint64_t startup_blockio_weight; diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index 85b0c86a2f..c4067a95bf 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -210,6 +210,8 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_VTABLE_START(0), SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0), SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0), + SD_BUS_PROPERTY("CPUWeight", "t", NULL, offsetof(CGroupContext, cpu_weight), 0), + SD_BUS_PROPERTY("StartupCPUWeight", "t", NULL, offsetof(CGroupContext, startup_cpu_weight), 0), SD_BUS_PROPERTY("CPUShares", "t", NULL, offsetof(CGroupContext, cpu_shares), 0), SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0), SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0), @@ -231,6 +233,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_PROPERTY("MemoryLow", "t", NULL, offsetof(CGroupContext, memory_low), 0), SD_BUS_PROPERTY("MemoryHigh", "t", NULL, offsetof(CGroupContext, memory_high), 0), SD_BUS_PROPERTY("MemoryMax", "t", NULL, offsetof(CGroupContext, memory_max), 0), + SD_BUS_PROPERTY("MemorySwapMax", "t", NULL, offsetof(CGroupContext, memory_swap_max), 0), SD_BUS_PROPERTY("MemoryLimit", "t", NULL, offsetof(CGroupContext, memory_limit), 0), SD_BUS_PROPERTY("DevicePolicy", "s", property_get_cgroup_device_policy, offsetof(CGroupContext, device_policy), 0), SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0), @@ -303,6 +306,50 @@ int bus_cgroup_set_property( return 1; + } else if (streq(name, "CPUWeight")) { + uint64_t weight; + + r = sd_bus_message_read(message, "t", &weight); + if (r < 0) + return r; + + if (!CGROUP_WEIGHT_IS_OK(weight)) + return sd_bus_error_set_errnof(error, EINVAL, "CPUWeight value out of range"); + + if (mode != UNIT_CHECK) { + c->cpu_weight = weight; + unit_invalidate_cgroup(u, CGROUP_MASK_CPU); + + if (weight == CGROUP_WEIGHT_INVALID) + unit_write_drop_in_private(u, mode, name, "CPUWeight="); + else + unit_write_drop_in_private_format(u, mode, name, "CPUWeight=%" PRIu64, weight); + } + + return 1; + + } else if (streq(name, "StartupCPUWeight")) { + uint64_t weight; + + r = sd_bus_message_read(message, "t", &weight); + if (r < 0) + return r; + + if (!CGROUP_WEIGHT_IS_OK(weight)) + return sd_bus_error_set_errnof(error, EINVAL, "StartupCPUWeight value out of range"); + + if (mode != UNIT_CHECK) { + c->startup_cpu_weight = weight; + unit_invalidate_cgroup(u, CGROUP_MASK_CPU); + + if (weight == CGROUP_CPU_SHARES_INVALID) + unit_write_drop_in_private(u, mode, name, "StartupCPUWeight="); + else + unit_write_drop_in_private_format(u, mode, name, "StartupCPUWeight=%" PRIu64, weight); + } + + return 1; + } else if (streq(name, "CPUShares")) { uint64_t shares; @@ -829,7 +876,7 @@ int bus_cgroup_set_property( return 1; - } else if (STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax")) { + } else if (STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax", "MemorySwapMax")) { uint64_t v; r = sd_bus_message_read(message, "t", &v); @@ -843,6 +890,8 @@ int bus_cgroup_set_property( c->memory_low = v; else if (streq(name, "MemoryHigh")) c->memory_high = v; + else if (streq(name, "MemorySwapMax")) + c->memory_swap_max = v; else c->memory_max = v; diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index e35d3ccd2e..1a7f770db1 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -627,6 +627,53 @@ static int property_get_syslog_facility( return sd_bus_message_append(reply, "i", LOG_FAC(c->syslog_priority)); } +static int property_get_input_fdname( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + ExecContext *c = userdata; + const char *name; + + assert(bus); + assert(c); + assert(property); + assert(reply); + + name = exec_context_fdname(c, STDIN_FILENO); + + return sd_bus_message_append(reply, "s", name); +} + +static int property_get_output_fdname( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + ExecContext *c = userdata; + const char *name = NULL; + + assert(bus); + assert(c); + assert(property); + assert(reply); + + if (c->std_output == EXEC_OUTPUT_NAMED_FD && streq(property, "StandardOutputFileDescriptorName")) + name = exec_context_fdname(c, STDOUT_FILENO); + else if (c->std_error == EXEC_OUTPUT_NAMED_FD && streq(property, "StandardErrorFileDescriptorName")) + name = exec_context_fdname(c, STDERR_FILENO); + + return sd_bus_message_append(reply, "s", name); +} + const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_VTABLE_START(0), SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(ExecContext, environment), SD_BUS_VTABLE_PROPERTY_CONST), @@ -677,8 +724,11 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("CPUSchedulingResetOnFork", "b", bus_property_get_bool, offsetof(ExecContext, cpu_sched_reset_on_fork), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("NonBlocking", "b", bus_property_get_bool, offsetof(ExecContext, non_blocking), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("StandardInput", "s", property_get_exec_input, offsetof(ExecContext, std_input), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("StandardInputFileDescriptorName", "s", property_get_input_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("StandardOutput", "s", bus_property_get_exec_output, offsetof(ExecContext, std_output), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("StandardOutputFileDescriptorName", "s", property_get_output_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("StandardError", "s", bus_property_get_exec_output, offsetof(ExecContext, std_error), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("StandardErrorFileDescriptorName", "s", property_get_output_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("TTYPath", "s", NULL, offsetof(ExecContext, tty_path), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("TTYReset", "b", bus_property_get_bool, offsetof(ExecContext, tty_reset), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("TTYVHangup", "b", bus_property_get_bool, offsetof(ExecContext, tty_vhangup), SD_BUS_VTABLE_PROPERTY_CONST), @@ -695,6 +745,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("User", "s", NULL, offsetof(ExecContext, user), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Group", "s", NULL, offsetof(ExecContext, group), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("DynamicUser", "b", bus_property_get_bool, offsetof(ExecContext, dynamic_user), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("RemoveIPC", "b", bus_property_get_bool, offsetof(ExecContext, remove_ipc), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SupplementaryGroups", "as", NULL, offsetof(ExecContext, supplementary_groups), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PAMName", "s", NULL, offsetof(ExecContext, pam_name), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ReadWriteDirectories", "as", NULL, offsetof(ExecContext, read_write_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), @@ -706,6 +757,9 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_flags), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ProtectKernelTunables", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_tunables), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ProtectKernelModules", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_modules), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ProtectHome", "s", bus_property_get_protect_home, offsetof(ExecContext, protect_home), SD_BUS_VTABLE_PROPERTY_CONST), @@ -1026,7 +1080,6 @@ int bus_exec_context_set_transient_property( return 1; - } else if (streq(name, "StandardOutput")) { const char *s; ExecOutput p; @@ -1068,10 +1121,46 @@ int bus_exec_context_set_transient_property( return 1; } else if (STR_IN_SET(name, + "StandardInputFileDescriptorName", "StandardOutputFileDescriptorName", "StandardErrorFileDescriptorName")) { + const char *s; + + r = sd_bus_message_read(message, "s", &s); + if (r < 0) + return r; + + if (!fdname_is_valid(s)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid file descriptor name"); + + if (mode != UNIT_CHECK) { + if (streq(name, "StandardInputFileDescriptorName")) { + c->std_input = EXEC_INPUT_NAMED_FD; + r = free_and_strdup(&c->stdio_fdname[STDIN_FILENO], s); + if (r < 0) + return r; + unit_write_drop_in_private_format(u, mode, name, "StandardInput=fd:%s", s); + } else if (streq(name, "StandardOutputFileDescriptorName")) { + c->std_output = EXEC_OUTPUT_NAMED_FD; + r = free_and_strdup(&c->stdio_fdname[STDOUT_FILENO], s); + if (r < 0) + return r; + unit_write_drop_in_private_format(u, mode, name, "StandardOutput=fd:%s", s); + } else if (streq(name, "StandardErrorFileDescriptorName")) { + c->std_error = EXEC_OUTPUT_NAMED_FD; + r = free_and_strdup(&c->stdio_fdname[STDERR_FILENO], s); + if (r < 0) + return r; + unit_write_drop_in_private_format(u, mode, name, "StandardError=fd:%s", s); + } + } + + return 1; + + } else if (STR_IN_SET(name, "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute", - "RestrictRealtime", "DynamicUser")) { + "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables", + "ProtectKernelModules", "ProtectControlGroups")) { int b; r = sd_bus_message_read(message, "b", &b); @@ -1103,6 +1192,14 @@ int bus_exec_context_set_transient_property( c->restrict_realtime = b; else if (streq(name, "DynamicUser")) c->dynamic_user = b; + else if (streq(name, "RemoveIPC")) + c->remove_ipc = b; + else if (streq(name, "ProtectKernelTunables")) + c->protect_kernel_tunables = b; + else if (streq(name, "ProtectKernelModules")) + c->protect_kernel_modules = b; + else if (streq(name, "ProtectControlGroups")) + c->protect_control_groups = b; unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, yes_no(b)); } diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index ef05a75a8b..d7d3d3c8ce 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -47,6 +47,11 @@ #include "virt.h" #include "watchdog.h" +static UnitFileFlags unit_file_bools_to_flags(bool runtime, bool force) { + return (runtime ? UNIT_FILE_RUNTIME : 0) | + (force ? UNIT_FILE_FORCE : 0); +} + static int property_get_version( sd_bus *bus, const char *path, @@ -464,6 +469,64 @@ static int method_get_unit_by_pid(sd_bus_message *message, void *userdata, sd_bu return sd_bus_reply_method_return(message, "o", path); } +static int method_get_unit_by_invocation_id(sd_bus_message *message, void *userdata, sd_bus_error *error) { + _cleanup_free_ char *path = NULL; + Manager *m = userdata; + sd_id128_t id; + const void *a; + Unit *u; + size_t sz; + int r; + + assert(message); + assert(m); + + /* Anyone can call this method */ + + r = sd_bus_message_read_array(message, 'y', &a, &sz); + if (r < 0) + return r; + if (sz == 0) + id = SD_ID128_NULL; + else if (sz == 16) + memcpy(&id, a, sz); + else + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid invocation ID"); + + if (sd_id128_is_null(id)) { + _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL; + pid_t pid; + + r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds); + if (r < 0) + return r; + + r = sd_bus_creds_get_pid(creds, &pid); + if (r < 0) + return r; + + u = manager_get_unit_by_pid(m, pid); + if (!u) + return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Client " PID_FMT " not member of any unit.", pid); + } else { + u = hashmap_get(m->units_by_invocation_id, &id); + if (!u) + return sd_bus_error_setf(error, BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID, "No unit with the specified invocation ID " SD_ID128_FORMAT_STR " known.", SD_ID128_FORMAT_VAL(id)); + } + + r = mac_selinux_unit_access_check(u, message, "status", error); + if (r < 0) + return r; + + /* So here's a special trick: the bus path we return actually references the unit by its invocation ID instead + * of the unit name. This means it stays valid only as long as the invocation ID stays the same. */ + path = unit_dbus_path_invocation_id(u); + if (!path) + return -ENOMEM; + + return sd_bus_reply_method_return(message, "o", path); +} + static int method_load_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) { _cleanup_free_ char *path = NULL; Manager *m = userdata; @@ -643,6 +706,54 @@ static int method_set_unit_properties(sd_bus_message *message, void *userdata, s return bus_unit_method_set_properties(message, u, error); } +static int method_ref_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) { + Manager *m = userdata; + const char *name; + Unit *u; + int r; + + assert(message); + assert(m); + + r = sd_bus_message_read(message, "s", &name); + if (r < 0) + return r; + + r = manager_load_unit(m, name, NULL, error, &u); + if (r < 0) + return r; + + r = bus_unit_check_load_state(u, error); + if (r < 0) + return r; + + return bus_unit_method_ref(message, u, error); +} + +static int method_unref_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) { + Manager *m = userdata; + const char *name; + Unit *u; + int r; + + assert(message); + assert(m); + + r = sd_bus_message_read(message, "s", &name); + if (r < 0) + return r; + + r = manager_load_unit(m, name, NULL, error, &u); + if (r < 0) + return r; + + r = bus_unit_check_load_state(u, error); + if (r < 0) + return r; + + return bus_unit_method_unref(message, u, error); +} + static int reply_unit_info(sd_bus_message *reply, Unit *u) { _cleanup_free_ char *unit_path = NULL, *job_path = NULL; Unit *following; @@ -781,6 +892,13 @@ static int transient_unit_from_message( if (r < 0) return r; + /* If the client asked for it, automatically add a reference to this unit. */ + if (u->bus_track_add) { + r = bus_unit_track_add_sender(u, message); + if (r < 0) + return log_error_errno(r, "Failed to watch sender: %m"); + } + /* Now load the missing bits of the unit we just created */ unit_add_to_load_queue(u); manager_dispatch_load_queue(m); @@ -1835,13 +1953,14 @@ static int install_error( static int method_enable_unit_files_generic( sd_bus_message *message, Manager *m, - int (*call)(UnitFileScope scope, bool runtime, const char *root_dir, char *files[], bool force, UnitFileChange **changes, unsigned *n_changes), + int (*call)(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char *files[], UnitFileChange **changes, unsigned *n_changes), bool carries_install_info, sd_bus_error *error) { _cleanup_strv_free_ char **l = NULL; UnitFileChange *changes = NULL; unsigned n_changes = 0; + UnitFileFlags flags; int runtime, force, r; assert(message); @@ -1855,13 +1974,15 @@ static int method_enable_unit_files_generic( if (r < 0) return r; + flags = unit_file_bools_to_flags(runtime, force); + r = bus_verify_manage_unit_files_async(m, message, error); if (r < 0) return r; if (r == 0) return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */ - r = call(m->unit_file_scope, runtime, NULL, l, force, &changes, &n_changes); + r = call(m->unit_file_scope, flags, NULL, l, &changes, &n_changes); if (r < 0) return install_error(error, r, changes, n_changes); @@ -1880,8 +2001,8 @@ static int method_link_unit_files(sd_bus_message *message, void *userdata, sd_bu return method_enable_unit_files_generic(message, userdata, unit_file_link, false, error); } -static int unit_file_preset_without_mode(UnitFileScope scope, bool runtime, const char *root_dir, char **files, bool force, UnitFileChange **changes, unsigned *n_changes) { - return unit_file_preset(scope, runtime, root_dir, files, UNIT_FILE_PRESET_FULL, force, changes, n_changes); +static int unit_file_preset_without_mode(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char **files, UnitFileChange **changes, unsigned *n_changes) { + return unit_file_preset(scope, flags, root_dir, files, UNIT_FILE_PRESET_FULL, changes, n_changes); } static int method_preset_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) { @@ -1900,6 +2021,7 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use Manager *m = userdata; UnitFilePresetMode mm; int runtime, force, r; + UnitFileFlags flags; const char *mode; assert(message); @@ -1913,6 +2035,8 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use if (r < 0) return r; + flags = unit_file_bools_to_flags(runtime, force); + if (isempty(mode)) mm = UNIT_FILE_PRESET_FULL; else { @@ -1927,7 +2051,7 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use if (r == 0) return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */ - r = unit_file_preset(m->unit_file_scope, runtime, NULL, l, mm, force, &changes, &n_changes); + r = unit_file_preset(m->unit_file_scope, flags, NULL, l, mm, &changes, &n_changes); if (r < 0) return install_error(error, r, changes, n_changes); @@ -1937,7 +2061,7 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use static int method_disable_unit_files_generic( sd_bus_message *message, Manager *m, - int (*call)(UnitFileScope scope, bool runtime, const char *root_dir, char *files[], UnitFileChange **changes, unsigned *n_changes), + int (*call)(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char *files[], UnitFileChange **changes, unsigned *n_changes), sd_bus_error *error) { _cleanup_strv_free_ char **l = NULL; @@ -1962,7 +2086,7 @@ static int method_disable_unit_files_generic( if (r == 0) return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */ - r = call(m->unit_file_scope, runtime, NULL, l, &changes, &n_changes); + r = call(m->unit_file_scope, runtime ? UNIT_FILE_RUNTIME : 0, NULL, l, &changes, &n_changes); if (r < 0) return install_error(error, r, changes, n_changes); @@ -2028,7 +2152,7 @@ static int method_set_default_target(sd_bus_message *message, void *userdata, sd if (r == 0) return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */ - r = unit_file_set_default(m->unit_file_scope, NULL, name, force, &changes, &n_changes); + r = unit_file_set_default(m->unit_file_scope, force ? UNIT_FILE_FORCE : 0, NULL, name, &changes, &n_changes); if (r < 0) return install_error(error, r, changes, n_changes); @@ -2041,6 +2165,7 @@ static int method_preset_all_unit_files(sd_bus_message *message, void *userdata, Manager *m = userdata; UnitFilePresetMode mm; const char *mode; + UnitFileFlags flags; int force, runtime, r; assert(message); @@ -2054,6 +2179,8 @@ static int method_preset_all_unit_files(sd_bus_message *message, void *userdata, if (r < 0) return r; + flags = unit_file_bools_to_flags(runtime, force); + if (isempty(mode)) mm = UNIT_FILE_PRESET_FULL; else { @@ -2068,7 +2195,7 @@ static int method_preset_all_unit_files(sd_bus_message *message, void *userdata, if (r == 0) return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */ - r = unit_file_preset_all(m->unit_file_scope, runtime, NULL, mm, force, &changes, &n_changes); + r = unit_file_preset_all(m->unit_file_scope, flags, NULL, mm, &changes, &n_changes); if (r < 0) return install_error(error, r, changes, n_changes); @@ -2083,6 +2210,7 @@ static int method_add_dependency_unit_files(sd_bus_message *message, void *userd int runtime, force, r; char *target, *type; UnitDependency dep; + UnitFileFlags flags; assert(message); assert(m); @@ -2101,17 +2229,62 @@ static int method_add_dependency_unit_files(sd_bus_message *message, void *userd if (r < 0) return r; + flags = unit_file_bools_to_flags(runtime, force); + dep = unit_dependency_from_string(type); if (dep < 0) return -EINVAL; - r = unit_file_add_dependency(m->unit_file_scope, runtime, NULL, l, target, dep, force, &changes, &n_changes); + r = unit_file_add_dependency(m->unit_file_scope, flags, NULL, l, target, dep, &changes, &n_changes); if (r < 0) return install_error(error, r, changes, n_changes); return reply_unit_file_changes_and_free(m, message, -1, changes, n_changes); } +static int method_get_unit_file_links(sd_bus_message *message, void *userdata, sd_bus_error *error) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + UnitFileChange *changes = NULL; + unsigned n_changes = 0, i; + UnitFileFlags flags; + const char *name; + char **p; + int runtime, r; + + r = sd_bus_message_read(message, "sb", &name, &runtime); + if (r < 0) + return r; + + r = sd_bus_message_new_method_return(message, &reply); + if (r < 0) + return r; + + r = sd_bus_message_open_container(reply, SD_BUS_TYPE_ARRAY, "s"); + if (r < 0) + return r; + + p = STRV_MAKE(name); + flags = UNIT_FILE_DRY_RUN | + (runtime ? UNIT_FILE_RUNTIME : 0); + + r = unit_file_disable(UNIT_FILE_SYSTEM, flags, NULL, p, &changes, &n_changes); + if (r < 0) + return log_error_errno(r, "Failed to get file links for %s: %m", name); + + for (i = 0; i < n_changes; i++) + if (changes[i].type == UNIT_FILE_UNLINK) { + r = sd_bus_message_append(reply, "s", changes[i].path); + if (r < 0) + return r; + } + + r = sd_bus_message_close_container(reply); + if (r < 0) + return r; + + return sd_bus_send(NULL, reply, NULL); +} + const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_VTABLE_START(0), @@ -2199,6 +2372,7 @@ const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_METHOD("GetUnit", "s", "o", method_get_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("GetUnitByPID", "u", "o", method_get_unit_by_pid, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("GetUnitByInvocationID", "ay", "o", method_get_unit_by_invocation_id, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("LoadUnit", "s", "o", method_load_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("StartUnit", "ss", "o", method_start_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("StartUnitReplace", "sss", "o", method_start_unit_replace, SD_BUS_VTABLE_UNPRIVILEGED), @@ -2211,6 +2385,8 @@ const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_METHOD("KillUnit", "ssi", NULL, method_kill_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("ResetFailedUnit", "s", NULL, method_reset_failed_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("SetUnitProperties", "sba(sv)", NULL, method_set_unit_properties, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("RefUnit", "s", NULL, method_ref_unit, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("UnrefUnit", "s", NULL, method_unref_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("StartTransientUnit", "ssa(sv)a(sa(sv))", "o", method_start_transient_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("GetUnitProcesses", "s", "a(sus)", method_get_unit_processes, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("GetJob", "u", "o", method_get_job, SD_BUS_VTABLE_UNPRIVILEGED), @@ -2254,6 +2430,7 @@ const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_METHOD("GetDefaultTarget", NULL, "s", method_get_default_target, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("PresetAllUnitFiles", "sbb", "a(sss)", method_preset_all_unit_files, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("AddDependencyUnitFiles", "asssbb", "a(sss)", method_add_dependency_unit_files, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("GetUnitFileLinks", "sb", "as", method_get_unit_file_links, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("SetExitCode", "y", NULL, method_set_exit_code, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("LookupDynamicUserByName", "s", "u", method_lookup_dynamic_user_by_name, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("LookupDynamicUserByUID", "u", "s", method_lookup_dynamic_user_by_uid, SD_BUS_VTABLE_UNPRIVILEGED), diff --git a/src/core/dbus-mount.c b/src/core/dbus-mount.c index b4bbee0648..76a7a7ce97 100644 --- a/src/core/dbus-mount.c +++ b/src/core/dbus-mount.c @@ -116,7 +116,11 @@ const sd_bus_vtable bus_mount_vtable[] = { SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Mount, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("DirectoryMode", "u", bus_property_get_mode, offsetof(Mount, directory_mode), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SloppyOptions", "b", bus_property_get_bool, offsetof(Mount, sloppy_options), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("LazyUnmount", "b", bus_property_get_bool, offsetof(Mount, lazy_unmount), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ForceUnmount", "b", bus_property_get_bool, offsetof(Mount, force_unmount), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Mount, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), BUS_EXEC_COMMAND_VTABLE("ExecMount", offsetof(Mount, exec_command[MOUNT_EXEC_MOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), BUS_EXEC_COMMAND_VTABLE("ExecUnmount", offsetof(Mount, exec_command[MOUNT_EXEC_UNMOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), BUS_EXEC_COMMAND_VTABLE("ExecRemount", offsetof(Mount, exec_command[MOUNT_EXEC_REMOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c index fab3677a01..61b83d2d62 100644 --- a/src/core/dbus-service.c +++ b/src/core/dbus-service.c @@ -36,7 +36,7 @@ static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, service_type, ServiceType static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, service_result, ServiceResult); static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_restart, service_restart, ServiceRestart); static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_notify_access, notify_access, NotifyAccess); -static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_failure_action, failure_action, FailureAction); +static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_emergency_action, emergency_action, EmergencyAction); const sd_bus_vtable bus_service_vtable[] = { SD_BUS_VTABLE_START(0), @@ -50,12 +50,7 @@ const sd_bus_vtable bus_service_vtable[] = { SD_BUS_PROPERTY("RuntimeMaxUSec", "t", bus_property_get_usec, offsetof(Service, runtime_max_usec), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("WatchdogUSec", "t", bus_property_get_usec, offsetof(Service, watchdog_usec), SD_BUS_VTABLE_PROPERTY_CONST), BUS_PROPERTY_DUAL_TIMESTAMP("WatchdogTimestamp", offsetof(Service, watchdog_timestamp), 0), - /* The following four are obsolete, and thus marked hidden here. They moved into the Unit interface */ - SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), - SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), - SD_BUS_PROPERTY("StartLimitAction", "s", property_get_failure_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), - SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), - SD_BUS_PROPERTY("FailureAction", "s", property_get_failure_action, offsetof(Service, failure_action), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("FailureAction", "s", property_get_emergency_action, offsetof(Service, emergency_action), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PermissionsStartOnly", "b", bus_property_get_bool, offsetof(Service, permissions_start_only), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RootDirectoryStartOnly", "b", bus_property_get_bool, offsetof(Service, root_directory_start_only), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RemainAfterExit", "b", bus_property_get_bool, offsetof(Service, remain_after_exit), SD_BUS_VTABLE_PROPERTY_CONST), @@ -70,6 +65,9 @@ const sd_bus_vtable bus_service_vtable[] = { SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Service, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("USBFunctionDescriptors", "s", NULL, offsetof(Service, usb_function_descriptors), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("USBFunctionStrings", "s", NULL, offsetof(Service, usb_function_strings), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + BUS_EXEC_STATUS_VTABLE("ExecMain", offsetof(Service, main_exec_status), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), BUS_EXEC_COMMAND_LIST_VTABLE("ExecStart", offsetof(Service, exec_command[SERVICE_EXEC_START]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), @@ -77,6 +75,12 @@ const sd_bus_vtable bus_service_vtable[] = { BUS_EXEC_COMMAND_LIST_VTABLE("ExecReload", offsetof(Service, exec_command[SERVICE_EXEC_RELOAD]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), BUS_EXEC_COMMAND_LIST_VTABLE("ExecStop", offsetof(Service, exec_command[SERVICE_EXEC_STOP]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), BUS_EXEC_COMMAND_LIST_VTABLE("ExecStopPost", offsetof(Service, exec_command[SERVICE_EXEC_STOP_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), + + /* The following four are obsolete, and thus marked hidden here. They moved into the Unit interface */ + SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("StartLimitAction", "s", property_get_emergency_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), SD_BUS_VTABLE_END }; diff --git a/src/core/dbus-socket.c b/src/core/dbus-socket.c index 9a071a1355..21adb64e15 100644 --- a/src/core/dbus-socket.c +++ b/src/core/dbus-socket.c @@ -152,6 +152,8 @@ const sd_bus_vtable bus_socket_vtable[] = { SD_BUS_PROPERTY("SocketProtocol", "i", bus_property_get_int, offsetof(Socket, socket_protocol), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("TriggerLimitIntervalUSec", "t", bus_property_get_usec, offsetof(Socket, trigger_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("TriggerLimitBurst", "u", bus_property_get_unsigned, offsetof(Socket, trigger_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Socket, exec_command[SOCKET_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPost", offsetof(Socket, exec_command[SOCKET_EXEC_START_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), BUS_EXEC_COMMAND_LIST_VTABLE("ExecStopPre", offsetof(Socket, exec_command[SOCKET_EXEC_STOP_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), diff --git a/src/core/dbus-swap.c b/src/core/dbus-swap.c index 292f8738c6..85a2c26b98 100644 --- a/src/core/dbus-swap.c +++ b/src/core/dbus-swap.c @@ -84,6 +84,8 @@ const sd_bus_vtable bus_swap_vtable[] = { SD_BUS_PROPERTY("TimeoutUSec", "t", bus_property_get_usec, offsetof(Swap, timeout_usec), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Swap, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Swap, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), BUS_EXEC_COMMAND_VTABLE("ExecActivate", offsetof(Swap, exec_command[SWAP_EXEC_ACTIVATE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), BUS_EXEC_COMMAND_VTABLE("ExecDeactivate", offsetof(Swap, exec_command[SWAP_EXEC_DEACTIVATE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), SD_BUS_VTABLE_END diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index b55d2cf735..69e249c844 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -37,7 +37,7 @@ static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_load_state, unit_load_state, UnitLoadState); static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_job_mode, job_mode, JobMode); -static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_failure_action, failure_action, FailureAction); +static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_emergency_action, emergency_action, EmergencyAction); static int property_get_names( sd_bus *bus, @@ -263,10 +263,7 @@ static int property_get_can_stop( assert(reply); assert(u); - /* On the lower levels we assume that every unit we can start - * we can also stop */ - - return sd_bus_message_append(reply, "b", unit_can_start(u) && !u->refuse_manual_stop); + return sd_bus_message_append(reply, "b", unit_can_stop(u) && !u->refuse_manual_stop); } static int property_get_can_reload( @@ -418,6 +415,7 @@ static int bus_verify_manage_units_async_full( const char *verb, int capability, const char *polkit_message, + bool interactive, sd_bus_message *call, sd_bus_error *error) { @@ -433,7 +431,15 @@ static int bus_verify_manage_units_async_full( details[7] = GETTEXT_PACKAGE; } - return bus_verify_polkit_async(call, capability, "org.freedesktop.systemd1.manage-units", details, false, UID_INVALID, &u->manager->polkit_registry, error); + return bus_verify_polkit_async( + call, + capability, + "org.freedesktop.systemd1.manage-units", + details, + interactive, + UID_INVALID, + &u->manager->polkit_registry, + error); } int bus_unit_method_start_generic( @@ -486,6 +492,7 @@ int bus_unit_method_start_generic( verb, CAP_SYS_ADMIN, job_type < _JOB_TYPE_MAX ? polkit_message_for_job[job_type] : NULL, + true, message, error); if (r < 0) @@ -558,6 +565,7 @@ int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error * "kill", CAP_KILL, N_("Authentication is required to kill '$(unit)'."), + true, message, error); if (r < 0) @@ -588,6 +596,7 @@ int bus_unit_method_reset_failed(sd_bus_message *message, void *userdata, sd_bus "reset-failed", CAP_SYS_ADMIN, N_("Authentication is required to reset the \"failed\" state of '$(unit)'."), + true, message, error); if (r < 0) @@ -620,6 +629,7 @@ int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_b "set-property", CAP_SYS_ADMIN, N_("Authentication is required to set properties on '$(unit)'."), + true, message, error); if (r < 0) @@ -634,6 +644,53 @@ int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_b return sd_bus_reply_method_return(message, NULL); } +int bus_unit_method_ref(sd_bus_message *message, void *userdata, sd_bus_error *error) { + Unit *u = userdata; + int r; + + assert(message); + assert(u); + + r = mac_selinux_unit_access_check(u, message, "start", error); + if (r < 0) + return r; + + r = bus_verify_manage_units_async_full( + u, + "ref", + CAP_SYS_ADMIN, + NULL, + false, + message, + error); + if (r < 0) + return r; + if (r == 0) + return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */ + + r = bus_unit_track_add_sender(u, message); + if (r < 0) + return r; + + return sd_bus_reply_method_return(message, NULL); +} + +int bus_unit_method_unref(sd_bus_message *message, void *userdata, sd_bus_error *error) { + Unit *u = userdata; + int r; + + assert(message); + assert(u); + + r = bus_unit_track_remove_sender(u, message); + if (r == -EUNATCH) + return sd_bus_error_setf(error, BUS_ERROR_NOT_REFERENCED, "Unit has not been referenced yet."); + if (r < 0) + return r; + + return sd_bus_reply_method_return(message, NULL); +} + const sd_bus_vtable bus_unit_vtable[] = { SD_BUS_VTABLE_START(0), @@ -660,10 +717,6 @@ const sd_bus_vtable bus_unit_vtable[] = { SD_BUS_PROPERTY("PropagatesReloadTo", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_PROPAGATES_RELOAD_TO]), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ReloadPropagatedFrom", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_RELOAD_PROPAGATED_FROM]), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("JoinsNamespaceOf", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_JOINS_NAMESPACE_OF]), SD_BUS_VTABLE_PROPERTY_CONST), - SD_BUS_PROPERTY("RequiresOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN), - SD_BUS_PROPERTY("RequisiteOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN), - SD_BUS_PROPERTY("RequiredByOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN), - SD_BUS_PROPERTY("RequisiteOfOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN), SD_BUS_PROPERTY("RequiresMountsFor", "as", NULL, offsetof(Unit, requires_mounts_for), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Documentation", "as", NULL, offsetof(Unit, documentation), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Description", "s", property_get_description, 0, SD_BUS_VTABLE_PROPERTY_CONST), @@ -694,7 +747,7 @@ const sd_bus_vtable bus_unit_vtable[] = { SD_BUS_PROPERTY("IgnoreOnIsolate", "b", bus_property_get_bool, offsetof(Unit, ignore_on_isolate), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("NeedDaemonReload", "b", property_get_need_daemon_reload, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("JobTimeoutUSec", "t", bus_property_get_usec, offsetof(Unit, job_timeout), SD_BUS_VTABLE_PROPERTY_CONST), - SD_BUS_PROPERTY("JobTimeoutAction", "s", property_get_failure_action, offsetof(Unit, job_timeout_action), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("JobTimeoutAction", "s", property_get_emergency_action, offsetof(Unit, job_timeout_action), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("JobTimeoutRebootArgument", "s", NULL, offsetof(Unit, job_timeout_reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ConditionResult", "b", bus_property_get_bool, offsetof(Unit, condition_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("AssertResult", "b", bus_property_get_bool, offsetof(Unit, assert_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), @@ -704,11 +757,12 @@ const sd_bus_vtable bus_unit_vtable[] = { SD_BUS_PROPERTY("Asserts", "a(sbbsi)", property_get_conditions, offsetof(Unit, asserts), 0), SD_BUS_PROPERTY("LoadError", "(ss)", property_get_load_error, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Transient", "b", bus_property_get_bool, offsetof(Unit, transient), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("Perpetual", "b", bus_property_get_bool, offsetof(Unit, perpetual), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("StartLimitIntervalSec", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST), - SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), /* obsolete alias name */ SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST), - SD_BUS_PROPERTY("StartLimitAction", "s", property_get_failure_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("StartLimitAction", "s", property_get_emergency_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("InvocationID", "ay", bus_property_get_id128, offsetof(Unit, invocation_id), 0), SD_BUS_METHOD("Start", "s", "o", method_start, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("Stop", "s", "o", method_stop, SD_BUS_VTABLE_UNPRIVILEGED), @@ -720,7 +774,15 @@ const sd_bus_vtable bus_unit_vtable[] = { SD_BUS_METHOD("Kill", "si", NULL, bus_unit_method_kill, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("ResetFailed", NULL, NULL, bus_unit_method_reset_failed, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("SetProperties", "ba(sv)", NULL, bus_unit_method_set_properties, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("Ref", NULL, NULL, bus_unit_method_ref, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("Unref", NULL, NULL, bus_unit_method_unref, SD_BUS_VTABLE_UNPRIVILEGED), + /* Obsolete properties or obsolete alias names */ + SD_BUS_PROPERTY("RequiresOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("RequisiteOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("RequiredByOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("RequisiteOfOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), SD_BUS_VTABLE_END }; @@ -1104,7 +1166,7 @@ void bus_unit_send_removed_signal(Unit *u) { int r; assert(u); - if (!u->sent_dbus_new_signal) + if (!u->sent_dbus_new_signal || u->in_dbus_queue) bus_unit_send_change_signal(u); if (!u->id) @@ -1317,6 +1379,29 @@ static int bus_unit_set_transient_property( return r; return 1; + + } else if (streq(name, "AddRef")) { + + int b; + + /* Why is this called "AddRef" rather than just "Ref", or "Reference"? There's already a "Ref()" method + * on the Unit interface, and it's probably not a good idea to expose a property and a method on the + * same interface (well, strictly speaking AddRef isn't exposed as full property, we just read it for + * transient units, but still). And "References" and "ReferencedBy" is already used as unit reference + * dependency type, hence let's not confuse things with that. + * + * Note that we don't acually add the reference to the bus track. We do that only after the setup of + * the transient unit is complete, so that setting this property multiple times in the same transient + * unit creation call doesn't count as individual references. */ + + r = sd_bus_message_read(message, "b", &b); + if (r < 0) + return r; + + if (mode != UNIT_CHECK) + u->bus_track_add = b; + + return 1; } return 0; @@ -1421,3 +1506,71 @@ int bus_unit_check_load_state(Unit *u, sd_bus_error *error) { return sd_bus_error_set_errnof(error, u->load_error, "Unit %s is not loaded properly: %m.", u->id); } + +static int bus_track_handler(sd_bus_track *t, void *userdata) { + Unit *u = userdata; + + assert(t); + assert(u); + + u->bus_track = sd_bus_track_unref(u->bus_track); /* make sure we aren't called again */ + + unit_add_to_gc_queue(u); + return 0; +} + +static int allocate_bus_track(Unit *u) { + int r; + + assert(u); + + if (u->bus_track) + return 0; + + r = sd_bus_track_new(u->manager->api_bus, &u->bus_track, bus_track_handler, u); + if (r < 0) + return r; + + r = sd_bus_track_set_recursive(u->bus_track, true); + if (r < 0) { + u->bus_track = sd_bus_track_unref(u->bus_track); + return r; + } + + return 0; +} + +int bus_unit_track_add_name(Unit *u, const char *name) { + int r; + + assert(u); + + r = allocate_bus_track(u); + if (r < 0) + return r; + + return sd_bus_track_add_name(u->bus_track, name); +} + +int bus_unit_track_add_sender(Unit *u, sd_bus_message *m) { + int r; + + assert(u); + + r = allocate_bus_track(u); + if (r < 0) + return r; + + return sd_bus_track_add_sender(u->bus_track, m); +} + +int bus_unit_track_remove_sender(Unit *u, sd_bus_message *m) { + assert(u); + + /* If we haven't allocated the bus track object yet, then there's definitely no reference taken yet, return an + * error */ + if (!u->bus_track) + return -EUNATCH; + + return sd_bus_track_remove_sender(u->bus_track, m); +} diff --git a/src/core/dbus-unit.h b/src/core/dbus-unit.h index 4db88dbebc..b280de7a1d 100644 --- a/src/core/dbus-unit.h +++ b/src/core/dbus-unit.h @@ -33,9 +33,15 @@ int bus_unit_method_start_generic(sd_bus_message *message, Unit *u, JobType job_ int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error); int bus_unit_method_reset_failed(sd_bus_message *message, void *userdata, sd_bus_error *error); -int bus_unit_queue_job(sd_bus_message *message, Unit *u, JobType type, JobMode mode, bool reload_if_possible, sd_bus_error *error); int bus_unit_set_properties(Unit *u, sd_bus_message *message, UnitSetPropertiesMode mode, bool commit, sd_bus_error *error); int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_bus_error *error); int bus_unit_method_get_processes(sd_bus_message *message, void *userdata, sd_bus_error *error); +int bus_unit_method_ref(sd_bus_message *message, void *userdata, sd_bus_error *error); +int bus_unit_method_unref(sd_bus_message *message, void *userdata, sd_bus_error *error); +int bus_unit_queue_job(sd_bus_message *message, Unit *u, JobType type, JobMode mode, bool reload_if_possible, sd_bus_error *error); int bus_unit_check_load_state(Unit *u, sd_bus_error *error); + +int bus_unit_track_add_name(Unit *u, const char *name); +int bus_unit_track_add_sender(Unit *u, sd_bus_message *m); +int bus_unit_track_remove_sender(Unit *u, sd_bus_message *m); diff --git a/src/core/dbus.c b/src/core/dbus.c index 3422a02d68..070974fe66 100644 --- a/src/core/dbus.c +++ b/src/core/dbus.c @@ -964,10 +964,6 @@ static int bus_init_private(Manager *m) { if (m->private_listen_fd >= 0) return 0; - /* We don't need the private socket if we have kdbus */ - if (m->kdbus_fd >= 0) - return 0; - if (MANAGER_IS_SYSTEM(m)) { /* We want the private bus only when running as init */ @@ -1168,60 +1164,57 @@ int bus_foreach_bus( return ret; } -void bus_track_serialize(sd_bus_track *t, FILE *f) { +void bus_track_serialize(sd_bus_track *t, FILE *f, const char *prefix) { const char *n; assert(f); + assert(prefix); - for (n = sd_bus_track_first(t); n; n = sd_bus_track_next(t)) - fprintf(f, "subscribed=%s\n", n); -} + for (n = sd_bus_track_first(t); n; n = sd_bus_track_next(t)) { + int c, j; -int bus_track_deserialize_item(char ***l, const char *line) { - const char *e; - int r; - - assert(l); - assert(line); - - e = startswith(line, "subscribed="); - if (!e) - return 0; + c = sd_bus_track_count_name(t, n); - r = strv_extend(l, e); - if (r < 0) - return r; - - return 1; + for (j = 0; j < c; j++) { + fputs(prefix, f); + fputc('=', f); + fputs(n, f); + fputc('\n', f); + } + } } -int bus_track_coldplug(Manager *m, sd_bus_track **t, char ***l) { +int bus_track_coldplug(Manager *m, sd_bus_track **t, bool recursive, char **l) { + char **i; int r = 0; assert(m); assert(t); - assert(l); - if (!strv_isempty(*l) && m->api_bus) { - char **i; - - if (!*t) { - r = sd_bus_track_new(m->api_bus, t, NULL, NULL); - if (r < 0) - return r; - } + if (strv_isempty(l)) + return 0; - r = 0; - STRV_FOREACH(i, *l) { - int k; + if (!m->api_bus) + return 0; - k = sd_bus_track_add_name(*t, *i); - if (k < 0) - r = k; - } + if (!*t) { + r = sd_bus_track_new(m->api_bus, t, NULL, NULL); + if (r < 0) + return r; } - *l = strv_free(*l); + r = sd_bus_track_set_recursive(*t, recursive); + if (r < 0) + return r; + + r = 0; + STRV_FOREACH(i, l) { + int k; + + k = sd_bus_track_add_name(*t, *i); + if (k < 0) + r = k; + } return r; } diff --git a/src/core/dbus.h b/src/core/dbus.h index 6baaffbd75..a092ed9d76 100644 --- a/src/core/dbus.h +++ b/src/core/dbus.h @@ -28,9 +28,8 @@ void bus_done(Manager *m); int bus_fdset_add_all(Manager *m, FDSet *fds); -void bus_track_serialize(sd_bus_track *t, FILE *f); -int bus_track_deserialize_item(char ***l, const char *line); -int bus_track_coldplug(Manager *m, sd_bus_track **t, char ***l); +void bus_track_serialize(sd_bus_track *t, FILE *f, const char *prefix); +int bus_track_coldplug(Manager *m, sd_bus_track **t, bool recursive, char **l); int manager_sync_bus_names(Manager *m, sd_bus *bus); diff --git a/src/core/device.c b/src/core/device.c index 16e56efcc3..4b9e84aeb6 100644 --- a/src/core/device.c +++ b/src/core/device.c @@ -331,11 +331,7 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa if (!u) { delete = true; - u = unit_new(m, sizeof(Device)); - if (!u) - return log_oom(); - - r = unit_add_name(u, e); + r = unit_new_for_name(m, sizeof(Device), e, &u); if (r < 0) goto fail; @@ -369,7 +365,7 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa fail: log_unit_warning_errno(u, r, "Failed to set up device unit: %m"); - if (delete) + if (delete && u) unit_free(u); return r; @@ -464,6 +460,10 @@ static void device_update_found_one(Device *d, bool add, DeviceFound found, bool if (!now) return; + /* Didn't exist before, but does now? if so, generate a new invocation ID for it */ + if (previous == DEVICE_NOT_FOUND && d->found != DEVICE_NOT_FOUND) + (void) unit_acquire_invocation_id(UNIT(d)); + if (d->found & DEVICE_FOUND_UDEV) /* When the device is known to udev we consider it * plugged. */ diff --git a/src/core/dynamic-user.c b/src/core/dynamic-user.c index 8035bee231..e1846e1adb 100644 --- a/src/core/dynamic-user.c +++ b/src/core/dynamic-user.c @@ -31,14 +31,8 @@ #include "user-util.h" #include "fileio.h" -/* Let's pick a UIDs within the 16bit range, so that we are compatible with containers using 16bit user namespacing. At - * least on Fedora normal users are allocated until UID 60000, hence do not allocate from below this. Also stay away - * from the upper end of the range as that is often used for overflow/nobody users. */ -#define UID_PICK_MIN ((uid_t) UINT32_C(0x0000EF00)) -#define UID_PICK_MAX ((uid_t) UINT32_C(0x0000FFEF)) - /* Takes a value generated randomly or by hashing and turns it into a UID in the right range */ -#define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (UID_PICK_MAX - UID_PICK_MIN + 1)) + UID_PICK_MIN) +#define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (DYNAMIC_UID_MAX - DYNAMIC_UID_MIN + 1)) + DYNAMIC_UID_MIN) static DynamicUser* dynamic_user_free(DynamicUser *d) { if (!d) @@ -48,9 +42,7 @@ static DynamicUser* dynamic_user_free(DynamicUser *d) { (void) hashmap_remove(d->manager->dynamic_users, d->name); safe_close_pair(d->storage_socket); - free(d); - - return NULL; + return mfree(d); } static int dynamic_user_add(Manager *m, const char *name, int storage_socket[2], DynamicUser **ret) { @@ -150,6 +142,45 @@ int dynamic_user_acquire(Manager *m, const char *name, DynamicUser** ret) { return 1; } +static int make_uid_symlinks(uid_t uid, const char *name, bool b) { + + char path1[strlen("/run/systemd/dynamic-uid/direct:") + DECIMAL_STR_MAX(uid_t) + 1]; + const char *path2; + int r = 0, k; + + /* Add direct additional symlinks for direct lookups of dynamic UIDs and their names by userspace code. The + * only reason we have this is because dbus-daemon cannot use D-Bus for resolving users and groups (since it + * would be its own client then). We hence keep these world-readable symlinks in place, so that the + * unprivileged dbus user can read the mappings when it needs them via these symlinks instead of having to go + * via the bus. Ideally, we'd use the lock files we keep for this anyway, but we can't since we use BSD locks + * on them and as those may be taken by any user with read access we can't make them world-readable. */ + + xsprintf(path1, "/run/systemd/dynamic-uid/direct:" UID_FMT, uid); + if (unlink(path1) < 0 && errno != ENOENT) + r = -errno; + + if (b && symlink(name, path1) < 0) { + k = log_warning_errno(errno, "Failed to symlink \"%s\": %m", path1); + if (r == 0) + r = k; + } + + path2 = strjoina("/run/systemd/dynamic-uid/direct:", name); + if (unlink(path2) < 0 && errno != ENOENT) { + k = -errno; + if (r == 0) + r = k; + } + + if (b && symlink(path1 + strlen("/run/systemd/dynamic-uid/direct:"), path2) < 0) { + k = log_warning_errno(errno, "Failed to symlink \"%s\": %m", path2); + if (r == 0) + r = k; + } + + return r; +} + static int pick_uid(const char *name, uid_t *ret_uid) { static const uint8_t hash_key[] = { @@ -175,7 +206,7 @@ static int pick_uid(const char *name, uid_t *ret_uid) { if (--n_tries <= 0) /* Give up retrying eventually */ return -EBUSY; - if (candidate < UID_PICK_MIN || candidate > UID_PICK_MAX) + if (!uid_is_dynamic(candidate)) goto next; xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, candidate); @@ -200,7 +231,7 @@ static int pick_uid(const char *name, uid_t *ret_uid) { if (st.st_nlink > 0) break; - /* Oh, bummer, we got got the lock, but the file was unlinked between the time we opened it and + /* Oh, bummer, we got the lock, but the file was unlinked between the time we opened it and * got the lock. Close it, and try again. */ lock_fd = safe_close(lock_fd); } @@ -223,6 +254,7 @@ static int pick_uid(const char *name, uid_t *ret_uid) { } (void) ftruncate(lock_fd, l); + (void) make_uid_symlinks(candidate, name, true); /* also add direct lookup symlinks */ *ret_uid = candidate; r = lock_fd; @@ -324,14 +356,16 @@ static int dynamic_user_push(DynamicUser *d, uid_t uid, int lock_fd) { return 0; } -static void unlink_uid_lock(int lock_fd, uid_t uid) { +static void unlink_uid_lock(int lock_fd, uid_t uid, const char *name) { char lock_path[strlen("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1]; if (lock_fd < 0) return; xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, uid); - (void) unlink_noerrno(lock_path); + (void) unlink(lock_path); + + (void) make_uid_symlinks(uid, name, false); /* remove direct lookup symlinks */ } int dynamic_user_realize(DynamicUser *d, uid_t *ret) { @@ -399,7 +433,7 @@ int dynamic_user_realize(DynamicUser *d, uid_t *ret) { /* So, we found a working UID/lock combination. Let's see if we actually still need it. */ if (lockf(d->storage_socket[0], F_LOCK, 0) < 0) { - unlink_uid_lock(uid_lock_fd, uid); + unlink_uid_lock(uid_lock_fd, uid, d->name); return -errno; } @@ -407,7 +441,7 @@ int dynamic_user_realize(DynamicUser *d, uid_t *ret) { if (r < 0) { if (r != -EAGAIN) { /* OK, something bad happened, let's get rid of the bits we acquired. */ - unlink_uid_lock(uid_lock_fd, uid); + unlink_uid_lock(uid_lock_fd, uid, d->name); goto finish; } @@ -416,7 +450,7 @@ int dynamic_user_realize(DynamicUser *d, uid_t *ret) { /* Hmm, so as it appears there's now something stored in the storage socket. Throw away what we * acquired, and use what's stored now. */ - unlink_uid_lock(uid_lock_fd, uid); + unlink_uid_lock(uid_lock_fd, uid, d->name); safe_close(uid_lock_fd); uid = new_uid; @@ -513,7 +547,7 @@ static int dynamic_user_close(DynamicUser *d) { goto finish; /* This dynamic user was realized and dynamically allocated. In this case, let's remove the lock file. */ - unlink_uid_lock(lock_fd, uid); + unlink_uid_lock(lock_fd, uid, d->name); r = 1; finish: @@ -634,11 +668,8 @@ int dynamic_user_lookup_uid(Manager *m, uid_t uid, char **ret) { assert(m); assert(ret); - /* A friendly way to translate a dynamic user's UID into a his name. */ - - if (uid < UID_PICK_MIN) - return -ESRCH; - if (uid > UID_PICK_MAX) + /* A friendly way to translate a dynamic user's UID into a name. */ + if (!uid_is_dynamic(uid)) return -ESRCH; xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, uid); diff --git a/src/core/failure-action.c b/src/core/emergency-action.c index ddae46190f..90232bc57a 100644 --- a/src/core/failure-action.c +++ b/src/core/emergency-action.c @@ -23,59 +23,60 @@ #include "bus-error.h" #include "bus-util.h" -#include "failure-action.h" +#include "emergency-action.h" #include "special.h" #include "string-table.h" #include "terminal-util.h" -static void log_and_status(Manager *m, const char *message) { - log_warning("%s", message); +static void log_and_status(Manager *m, const char *message, const char *reason) { + log_warning("%s: %s", message, reason); manager_status_printf(m, STATUS_TYPE_EMERGENCY, ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL, - "%s", message); + "%s: %s", message, reason); } -int failure_action( +int emergency_action( Manager *m, - FailureAction action, - const char *reboot_arg) { + EmergencyAction action, + const char *reboot_arg, + const char *reason) { assert(m); assert(action >= 0); - assert(action < _FAILURE_ACTION_MAX); + assert(action < _EMERGENCY_ACTION_MAX); - if (action == FAILURE_ACTION_NONE) + if (action == EMERGENCY_ACTION_NONE) return -ECANCELED; if (!MANAGER_IS_SYSTEM(m)) { /* Downgrade all options to simply exiting if we run * in user mode */ - log_warning("Exiting as result of failure."); + log_warning("Exiting: %s", reason); m->exit_code = MANAGER_EXIT; return -ECANCELED; } switch (action) { - case FAILURE_ACTION_REBOOT: - log_and_status(m, "Rebooting as result of failure."); + case EMERGENCY_ACTION_REBOOT: + log_and_status(m, "Rebooting", reason); (void) update_reboot_parameter_and_warn(reboot_arg); (void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_REBOOT_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL); break; - case FAILURE_ACTION_REBOOT_FORCE: - log_and_status(m, "Forcibly rebooting as result of failure."); + case EMERGENCY_ACTION_REBOOT_FORCE: + log_and_status(m, "Forcibly rebooting", reason); (void) update_reboot_parameter_and_warn(reboot_arg); m->exit_code = MANAGER_REBOOT; break; - case FAILURE_ACTION_REBOOT_IMMEDIATE: - log_and_status(m, "Rebooting immediately as result of failure."); + case EMERGENCY_ACTION_REBOOT_IMMEDIATE: + log_and_status(m, "Rebooting immediately", reason); sync(); @@ -89,18 +90,18 @@ int failure_action( reboot(RB_AUTOBOOT); break; - case FAILURE_ACTION_POWEROFF: - log_and_status(m, "Powering off as result of failure."); + case EMERGENCY_ACTION_POWEROFF: + log_and_status(m, "Powering off", reason); (void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_POWEROFF_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL); break; - case FAILURE_ACTION_POWEROFF_FORCE: - log_and_status(m, "Forcibly powering off as result of failure."); + case EMERGENCY_ACTION_POWEROFF_FORCE: + log_and_status(m, "Forcibly powering off", reason); m->exit_code = MANAGER_POWEROFF; break; - case FAILURE_ACTION_POWEROFF_IMMEDIATE: - log_and_status(m, "Powering off immediately as result of failure."); + case EMERGENCY_ACTION_POWEROFF_IMMEDIATE: + log_and_status(m, "Powering off immediately", reason); sync(); @@ -109,19 +110,19 @@ int failure_action( break; default: - assert_not_reached("Unknown failure action"); + assert_not_reached("Unknown emergency action"); } return -ECANCELED; } -static const char* const failure_action_table[_FAILURE_ACTION_MAX] = { - [FAILURE_ACTION_NONE] = "none", - [FAILURE_ACTION_REBOOT] = "reboot", - [FAILURE_ACTION_REBOOT_FORCE] = "reboot-force", - [FAILURE_ACTION_REBOOT_IMMEDIATE] = "reboot-immediate", - [FAILURE_ACTION_POWEROFF] = "poweroff", - [FAILURE_ACTION_POWEROFF_FORCE] = "poweroff-force", - [FAILURE_ACTION_POWEROFF_IMMEDIATE] = "poweroff-immediate" +static const char* const emergency_action_table[_EMERGENCY_ACTION_MAX] = { + [EMERGENCY_ACTION_NONE] = "none", + [EMERGENCY_ACTION_REBOOT] = "reboot", + [EMERGENCY_ACTION_REBOOT_FORCE] = "reboot-force", + [EMERGENCY_ACTION_REBOOT_IMMEDIATE] = "reboot-immediate", + [EMERGENCY_ACTION_POWEROFF] = "poweroff", + [EMERGENCY_ACTION_POWEROFF_FORCE] = "poweroff-force", + [EMERGENCY_ACTION_POWEROFF_IMMEDIATE] = "poweroff-immediate" }; -DEFINE_STRING_TABLE_LOOKUP(failure_action, FailureAction); +DEFINE_STRING_TABLE_LOOKUP(emergency_action, EmergencyAction); diff --git a/src/core/failure-action.h b/src/core/emergency-action.h index 1adac4ad5c..8804b59752 100644 --- a/src/core/failure-action.h +++ b/src/core/emergency-action.h @@ -20,22 +20,22 @@ along with systemd; If not, see <http://www.gnu.org/licenses/>. ***/ -typedef enum FailureAction { - FAILURE_ACTION_NONE, - FAILURE_ACTION_REBOOT, - FAILURE_ACTION_REBOOT_FORCE, - FAILURE_ACTION_REBOOT_IMMEDIATE, - FAILURE_ACTION_POWEROFF, - FAILURE_ACTION_POWEROFF_FORCE, - FAILURE_ACTION_POWEROFF_IMMEDIATE, - _FAILURE_ACTION_MAX, - _FAILURE_ACTION_INVALID = -1 -} FailureAction; +typedef enum EmergencyAction { + EMERGENCY_ACTION_NONE, + EMERGENCY_ACTION_REBOOT, + EMERGENCY_ACTION_REBOOT_FORCE, + EMERGENCY_ACTION_REBOOT_IMMEDIATE, + EMERGENCY_ACTION_POWEROFF, + EMERGENCY_ACTION_POWEROFF_FORCE, + EMERGENCY_ACTION_POWEROFF_IMMEDIATE, + _EMERGENCY_ACTION_MAX, + _EMERGENCY_ACTION_INVALID = -1 +} EmergencyAction; #include "macro.h" #include "manager.h" -int failure_action(Manager *m, FailureAction action, const char *reboot_arg); +int emergency_action(Manager *m, EmergencyAction action, const char *reboot_arg, const char *reason); -const char* failure_action_to_string(FailureAction i) _const_; -FailureAction failure_action_from_string(const char *s) _pure_; +const char* emergency_action_to_string(EmergencyAction i) _const_; +EmergencyAction emergency_action_from_string(const char *s) _pure_; diff --git a/src/core/execute.c b/src/core/execute.c index 6019df7ea6..85ee82c3e1 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -29,8 +29,10 @@ #include <sys/mman.h> #include <sys/personality.h> #include <sys/prctl.h> +#include <sys/shm.h> #include <sys/socket.h> #include <sys/stat.h> +#include <sys/types.h> #include <sys/un.h> #include <unistd.h> #include <utmpx.h> @@ -91,6 +93,7 @@ #include "selinux-util.h" #include "signal-util.h" #include "smack-util.h" +#include "special.h" #include "string-table.h" #include "string-util.h" #include "strv.h" @@ -410,7 +413,8 @@ static int fixup_output(ExecOutput std_output, int socket_fd) { static int setup_input( const ExecContext *context, const ExecParameters *params, - int socket_fd) { + int socket_fd, + int named_iofds[3]) { ExecInput i; @@ -460,6 +464,10 @@ static int setup_input( case EXEC_INPUT_SOCKET: return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO; + case EXEC_INPUT_NAMED_FD: + (void) fd_nonblock(named_iofds[STDIN_FILENO], false); + return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO; + default: assert_not_reached("Unknown input type"); } @@ -471,6 +479,7 @@ static int setup_output( const ExecParameters *params, int fileno, int socket_fd, + int named_iofds[3], const char *ident, uid_t uid, gid_t gid, @@ -522,7 +531,7 @@ static int setup_output( return fileno; /* Duplicate from stdout if possible */ - if (e == o || e == EXEC_OUTPUT_INHERIT) + if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT) return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno; o = e; @@ -584,6 +593,10 @@ static int setup_output( assert(socket_fd >= 0); return dup2(socket_fd, fileno) < 0 ? -errno : fileno; + case EXEC_OUTPUT_NAMED_FD: + (void) fd_nonblock(named_iofds[fileno], false); + return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno; + default: assert_not_reached("Unknown error type"); } @@ -719,73 +732,157 @@ static int ask_for_confirmation(char *response, char **argv) { return r; } -static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) { - bool keep_groups = false; +static int get_fixed_user(const ExecContext *c, const char **user, + uid_t *uid, gid_t *gid, + const char **home, const char **shell) { int r; + const char *name; - assert(context); + assert(c); + + if (!c->user) + return 0; + + /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway + * (i.e. are "/" or "/bin/nologin"). */ + + name = c->user; + r = get_user_creds_clean(&name, uid, gid, home, shell); + if (r < 0) + return r; - /* Lookup and set GID and supplementary group list. Here too - * we avoid NSS lookups for gid=0. */ + *user = name; + return 0; +} - if (context->group || username) { +static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) { + int r; + const char *name; + + assert(c); + + if (!c->group) + return 0; + + name = c->group; + r = get_group_creds(&name, gid); + if (r < 0) + return r; + + *group = name; + return 0; +} + +static int get_supplementary_groups(const ExecContext *c, const char *user, + const char *group, gid_t gid, + gid_t **supplementary_gids, int *ngids) { + char **i; + int r, k = 0; + int ngroups_max; + bool keep_groups = false; + gid_t *groups = NULL; + _cleanup_free_ gid_t *l_gids = NULL; + + assert(c); + + /* + * If user is given, then lookup GID and supplementary groups list. + * We avoid NSS lookups for gid=0. Also we have to initialize groups + * here and as early as possible so we keep the list of supplementary + * groups of the caller. + */ + if (user && gid_is_valid(gid) && gid != 0) { /* First step, initialize groups from /etc/groups */ - if (username && gid != 0) { - if (initgroups(username, gid) < 0) - return -errno; + if (initgroups(user, gid) < 0) + return -errno; - keep_groups = true; - } + keep_groups = true; + } - /* Second step, set our gids */ - if (setresgid(gid, gid, gid) < 0) + if (!c->supplementary_groups) + return 0; + + /* + * If SupplementaryGroups= was passed then NGROUPS_MAX has to + * be positive, otherwise fail. + */ + errno = 0; + ngroups_max = (int) sysconf(_SC_NGROUPS_MAX); + if (ngroups_max <= 0) { + if (errno > 0) return -errno; + else + return -EOPNOTSUPP; /* For all other values */ } - if (context->supplementary_groups) { - int ngroups_max, k; - gid_t *gids; - char **i; + l_gids = new(gid_t, ngroups_max); + if (!l_gids) + return -ENOMEM; - /* Final step, initialize any manually set supplementary groups */ - assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0); + if (keep_groups) { + /* + * Lookup the list of groups that the user belongs to, we + * avoid NSS lookups here too for gid=0. + */ + k = ngroups_max; + if (getgrouplist(user, gid, l_gids, &k) < 0) + return -EINVAL; + } else + k = 0; - if (!(gids = new(gid_t, ngroups_max))) - return -ENOMEM; + STRV_FOREACH(i, c->supplementary_groups) { + const char *g; - if (keep_groups) { - k = getgroups(ngroups_max, gids); - if (k < 0) { - free(gids); - return -errno; - } - } else - k = 0; + if (k >= ngroups_max) + return -E2BIG; - STRV_FOREACH(i, context->supplementary_groups) { - const char *g; + g = *i; + r = get_group_creds(&g, l_gids+k); + if (r < 0) + return r; - if (k >= ngroups_max) { - free(gids); - return -E2BIG; - } + k++; + } - g = *i; - r = get_group_creds(&g, gids+k); - if (r < 0) { - free(gids); - return r; - } + /* + * Sets ngids to zero to drop all supplementary groups, happens + * when we are under root and SupplementaryGroups= is empty. + */ + if (k == 0) { + *ngids = 0; + return 0; + } - k++; - } + /* Otherwise get the final list of supplementary groups */ + groups = memdup(l_gids, sizeof(gid_t) * k); + if (!groups) + return -ENOMEM; - if (setgroups(k, gids) < 0) { - free(gids); - return -errno; - } + *supplementary_gids = groups; + *ngids = k; + + groups = NULL; + + return 0; +} + +static int enforce_groups(const ExecContext *context, gid_t gid, + gid_t *supplementary_gids, int ngids) { + int r; - free(gids); + assert(context); + + /* Handle SupplementaryGroups= even if it is empty */ + if (context->supplementary_groups) { + r = maybe_setgroups(ngids, supplementary_gids); + if (r < 0) + return r; + } + + if (gid_is_valid(gid)) { + /* Then set our gids */ + if (setresgid(gid, gid, gid) < 0) + return -errno; } return 0; @@ -794,6 +891,9 @@ static int enforce_groups(const ExecContext *context, const char *username, gid_ static int enforce_user(const ExecContext *context, uid_t uid) { assert(context); + if (!uid_is_valid(uid)) + return 0; + /* Sets (but doesn't look up) the uid and make sure we keep the * capabilities while doing so. */ @@ -836,14 +936,19 @@ static int null_conv( return PAM_CONV_ERR; } +#endif + static int setup_pam( const char *name, const char *user, uid_t uid, + gid_t gid, const char *tty, char ***env, int fds[], unsigned n_fds) { +#ifdef HAVE_PAM + static const struct pam_conv conv = { .conv = null_conv, .appdata_ptr = NULL @@ -943,8 +1048,14 @@ static int setup_pam( * and this will make PR_SET_PDEATHSIG work in most cases. * If this fails, ignore the error - but expect sd-pam threads * to fail to exit normally */ + + r = maybe_setgroups(0, NULL); + if (r < 0) + log_warning_errno(r, "Failed to setgroups() in sd-pam: %m"); + if (setresgid(gid, gid, gid) < 0) + log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m"); if (setresuid(uid, uid, uid) < 0) - log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m"); + log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m"); (void) ignore_signals(SIGPIPE, -1); @@ -1037,8 +1148,10 @@ fail: closelog(); return r; -} +#else + return 0; #endif +} static void rename_process_from_path(const char *path) { char process_name[11]; @@ -1073,15 +1186,29 @@ static void rename_process_from_path(const char *path) { #ifdef HAVE_SECCOMP -static int apply_seccomp(const ExecContext *c) { +static bool skip_seccomp_unavailable(const Unit* u, const char* msg) { + + if (is_seccomp_available()) + return false; + + log_open(); + log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg); + log_close(); + return true; +} + +static int apply_seccomp(const Unit* u, const ExecContext *c) { uint32_t negative_action, action; - scmp_filter_ctx *seccomp; + scmp_filter_ctx seccomp; Iterator i; void *id; int r; assert(c); + if (skip_seccomp_unavailable(u, "syscall filtering")) + return 0; + negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno); seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW); @@ -1122,20 +1249,23 @@ finish: return r; } -static int apply_address_families(const ExecContext *c) { - scmp_filter_ctx *seccomp; +static int apply_address_families(const Unit* u, const ExecContext *c) { + scmp_filter_ctx seccomp; Iterator i; int r; +#if defined(__i386__) + return 0; +#endif + assert(c); - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; + if (skip_seccomp_unavailable(u, "RestrictAddressFamilies=")) + return 0; - r = seccomp_add_secondary_archs(seccomp); + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); if (r < 0) - goto finish; + return r; if (c->address_families_whitelist) { int af, first = 0, last = 0; @@ -1232,10 +1362,6 @@ static int apply_address_families(const ExecContext *c) { } } - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) - goto finish; - r = seccomp_load(seccomp); finish: @@ -1243,15 +1369,18 @@ finish: return r; } -static int apply_memory_deny_write_execute(const ExecContext *c) { - scmp_filter_ctx *seccomp; +static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) { + scmp_filter_ctx seccomp; int r; assert(c); - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; + if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute=")) + return 0; + + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); + if (r < 0) + return r; r = seccomp_rule_add( seccomp, @@ -1271,7 +1400,12 @@ static int apply_memory_deny_write_execute(const ExecContext *c) { if (r < 0) goto finish; - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(shmat), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC)); if (r < 0) goto finish; @@ -1282,22 +1416,25 @@ finish: return r; } -static int apply_restrict_realtime(const ExecContext *c) { +static int apply_restrict_realtime(const Unit* u, const ExecContext *c) { static const int permitted_policies[] = { SCHED_OTHER, SCHED_BATCH, SCHED_IDLE, }; - scmp_filter_ctx *seccomp; + scmp_filter_ctx seccomp; unsigned i; int r, p, max_policy = 0; assert(c); - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; + if (skip_seccomp_unavailable(u, "RestrictRealtime=")) + return 0; + + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); + if (r < 0) + return r; /* Determine the highest policy constant we want to allow */ for (i = 0; i < ELEMENTSOF(permitted_policies); i++) @@ -1341,7 +1478,34 @@ static int apply_restrict_realtime(const ExecContext *c) { if (r < 0) goto finish; - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + r = seccomp_load(seccomp); + +finish: + seccomp_release(seccomp); + return r; +} + +static int apply_protect_sysctl(const Unit *u, const ExecContext *c) { + scmp_filter_ctx seccomp; + int r; + + assert(c); + + /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but + * let's protect even those systems where this is left on in the kernel. */ + + if (skip_seccomp_unavailable(u, "ProtectKernelTunables=")) + return 0; + + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(_sysctl), + 0); if (r < 0) goto finish; @@ -1352,12 +1516,33 @@ finish: return r; } +static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) { + assert(c); + + /* Turn off module syscalls on ProtectKernelModules=yes */ + + if (skip_seccomp_unavailable(u, "ProtectKernelModules=")) + return 0; + + return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM)); +} + +static int apply_private_devices(const Unit *u, const ExecContext *c) { + assert(c); + + /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */ + + if (skip_seccomp_unavailable(u, "PrivateDevices=")) + return 0; + + return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM)); +} + #endif static void do_idle_pipe_dance(int idle_pipe[4]) { assert(idle_pipe); - idle_pipe[1] = safe_close(idle_pipe[1]); idle_pipe[2] = safe_close(idle_pipe[2]); @@ -1384,6 +1569,7 @@ static void do_idle_pipe_dance(int idle_pipe[4]) { } static int build_environment( + Unit *u, const ExecContext *c, const ExecParameters *p, unsigned n_fds, @@ -1398,10 +1584,11 @@ static int build_environment( unsigned n_env = 0; char *x; + assert(u); assert(c); assert(ret); - our_env = new0(char*, 12); + our_env = new0(char*, 14); if (!our_env) return -ENOMEM; @@ -1436,6 +1623,16 @@ static int build_environment( our_env[n_env++] = x; } + /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic + * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but + * check the database directly. */ + if (unit_has_name(u, SPECIAL_DBUS_SERVICE)) { + x = strdup("SYSTEMD_NSS_BYPASS_BUS=1"); + if (!x) + return -ENOMEM; + our_env[n_env++] = x; + } + if (home) { x = strappend("HOME=", home); if (!x) @@ -1462,6 +1659,13 @@ static int build_environment( our_env[n_env++] = x; } + if (!sd_id128_is_null(u->invocation_id)) { + if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0) + return -ENOMEM; + + our_env[n_env++] = x; + } + if (exec_context_needs_term(c)) { const char *tty_path, *term = NULL; @@ -1547,7 +1751,10 @@ static bool exec_needs_mount_namespace( if (context->private_devices || context->protect_system != PROTECT_SYSTEM_NO || - context->protect_home != PROTECT_HOME_NO) + context->protect_home != PROTECT_HOME_NO || + context->protect_kernel_tunables || + context->protect_kernel_modules || + context->protect_control_groups) return true; return false; @@ -1574,9 +1781,9 @@ static int setup_private_users(uid_t uid, gid_t gid) { asprintf(&uid_map, "0 0 1\n" /* Map root → root */ UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */ - uid, uid); /* The case where the above is the same */ + uid, uid); else - uid_map = strdup("0 0 1\n"); + uid_map = strdup("0 0 1\n"); /* The case where the above is the same */ if (!uid_map) return -ENOMEM; @@ -1706,6 +1913,197 @@ static int setup_private_users(uid_t uid, gid_t gid) { return 0; } +static int setup_runtime_directory( + const ExecContext *context, + const ExecParameters *params, + uid_t uid, + gid_t gid) { + + char **rt; + int r; + + assert(context); + assert(params); + + STRV_FOREACH(rt, context->runtime_directory) { + _cleanup_free_ char *p; + + p = strjoin(params->runtime_prefix, "/", *rt, NULL); + if (!p) + return -ENOMEM; + + r = mkdir_p_label(p, context->runtime_directory_mode); + if (r < 0) + return r; + + r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid); + if (r < 0) + return r; + } + + return 0; +} + +static int setup_smack( + const ExecContext *context, + const ExecCommand *command) { + +#ifdef HAVE_SMACK + int r; + + assert(context); + assert(command); + + if (!mac_smack_use()) + return 0; + + if (context->smack_process_label) { + r = mac_smack_apply_pid(0, context->smack_process_label); + if (r < 0) + return r; + } +#ifdef SMACK_DEFAULT_PROCESS_LABEL + else { + _cleanup_free_ char *exec_label = NULL; + + r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label); + if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP) + return r; + + r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL); + if (r < 0) + return r; + } +#endif +#endif + + return 0; +} + +static int compile_read_write_paths( + const ExecContext *context, + const ExecParameters *params, + char ***ret) { + + _cleanup_strv_free_ char **l = NULL; + char **rt; + + /* Compile the list of writable paths. This is the combination of the explicitly configured paths, plus all + * runtime directories. */ + + if (strv_isempty(context->read_write_paths) && + strv_isempty(context->runtime_directory)) { + *ret = NULL; /* NOP if neither is set */ + return 0; + } + + l = strv_copy(context->read_write_paths); + if (!l) + return -ENOMEM; + + STRV_FOREACH(rt, context->runtime_directory) { + char *s; + + s = strjoin(params->runtime_prefix, "/", *rt, NULL); + if (!s) + return -ENOMEM; + + if (strv_consume(&l, s) < 0) + return -ENOMEM; + } + + *ret = l; + l = NULL; + + return 0; +} + +static int apply_mount_namespace(Unit *u, const ExecContext *context, + const ExecParameters *params, + ExecRuntime *runtime) { + int r; + _cleanup_free_ char **rw = NULL; + char *tmp = NULL, *var = NULL; + const char *root_dir = NULL; + NameSpaceInfo ns_info = { + .private_dev = context->private_devices, + .protect_control_groups = context->protect_control_groups, + .protect_kernel_tunables = context->protect_kernel_tunables, + .protect_kernel_modules = context->protect_kernel_modules, + }; + + assert(context); + + /* The runtime struct only contains the parent of the private /tmp, + * which is non-accessible to world users. Inside of it there's a /tmp + * that is sticky, and that's the one we want to use here. */ + + if (context->private_tmp && runtime) { + if (runtime->tmp_dir) + tmp = strjoina(runtime->tmp_dir, "/tmp"); + if (runtime->var_tmp_dir) + var = strjoina(runtime->var_tmp_dir, "/tmp"); + } + + r = compile_read_write_paths(context, params, &rw); + if (r < 0) + return r; + + if (params->flags & EXEC_APPLY_CHROOT) + root_dir = context->root_directory; + + r = setup_namespace(root_dir, &ns_info, rw, + context->read_only_paths, + context->inaccessible_paths, + tmp, + var, + context->protect_home, + context->protect_system, + context->mount_flags); + + /* If we couldn't set up the namespace this is probably due to a + * missing capability. In this case, silently proceeed. */ + if (IN_SET(r, -EPERM, -EACCES)) { + log_open(); + log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m"); + log_close(); + r = 0; + } + + return r; +} + +static int apply_working_directory(const ExecContext *context, + const ExecParameters *params, + const char *home, + const bool needs_mount_ns) { + const char *d; + const char *wd; + + assert(context); + + if (context->working_directory_home) + wd = home; + else if (context->working_directory) + wd = context->working_directory; + else + wd = "/"; + + if (params->flags & EXEC_APPLY_CHROOT) { + if (!needs_mount_ns && context->root_directory) + if (chroot(context->root_directory) < 0) + return -errno; + + d = wd; + } else + d = strjoina(strempty(context->root_directory), "/", strempty(wd)); + + if (chdir(d) < 0 && !context->working_directory_missing_ok) + return -errno; + + return 0; +} + static void append_socket_pair(int *array, unsigned *n, int pair[2]) { assert(array); assert(n); @@ -1723,11 +2121,12 @@ static int close_remaining_fds( const ExecParameters *params, ExecRuntime *runtime, DynamicCreds *dcreds, + int user_lookup_fd, int socket_fd, int *fds, unsigned n_fds) { unsigned n_dont_close = 0; - int dont_close[n_fds + 11]; + int dont_close[n_fds + 12]; assert(params); @@ -1755,9 +2154,73 @@ static int close_remaining_fds( append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket); } + if (user_lookup_fd >= 0) + dont_close[n_dont_close++] = user_lookup_fd; + return close_all_fds(dont_close, n_dont_close); } +static bool context_has_address_families(const ExecContext *c) { + assert(c); + + return c->address_families_whitelist || + !set_isempty(c->address_families); +} + +static bool context_has_syscall_filters(const ExecContext *c) { + assert(c); + + return c->syscall_whitelist || + !set_isempty(c->syscall_filter) || + !set_isempty(c->syscall_archs); +} + +static bool context_has_no_new_privileges(const ExecContext *c) { + assert(c); + + if (c->no_new_privileges) + return true; + + if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */ + return false; + + return context_has_address_families(c) || /* we need NNP if we have any form of seccomp and are unprivileged */ + c->memory_deny_write_execute || + c->restrict_realtime || + c->protect_kernel_tunables || + c->protect_kernel_modules || + c->private_devices || + context_has_syscall_filters(c); +} + +static int send_user_lookup( + Unit *unit, + int user_lookup_fd, + uid_t uid, + gid_t gid) { + + assert(unit); + + /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID + * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was + * specified. */ + + if (user_lookup_fd < 0) + return 0; + + if (!uid_is_valid(uid) && !gid_is_valid(gid)) + return 0; + + if (writev(user_lookup_fd, + (struct iovec[]) { + { .iov_base = &uid, .iov_len = sizeof(uid) }, + { .iov_base = &gid, .iov_len = sizeof(gid) }, + { .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0) + return -errno; + + return 0; +} + static int exec_child( Unit *unit, ExecCommand *command, @@ -1767,19 +2230,23 @@ static int exec_child( DynamicCreds *dcreds, char **argv, int socket_fd, + int named_iofds[3], int *fds, unsigned n_fds, char **files_env, + int user_lookup_fd, int *exit_status) { _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL; _cleanup_free_ char *mac_selinux_context_net = NULL; - const char *username = NULL, *home = NULL, *shell = NULL, *wd; + _cleanup_free_ gid_t *supplementary_gids = NULL; + const char *username = NULL, *groupname = NULL; + const char *home = NULL, *shell = NULL; dev_t journal_stream_dev = 0; ino_t journal_stream_ino = 0; bool needs_mount_namespace; uid_t uid = UID_INVALID; gid_t gid = GID_INVALID; - int i, r; + int i, r, ngids = 0; assert(unit); assert(command); @@ -1815,7 +2282,7 @@ static int exec_child( log_forget_fds(); - r = close_remaining_fds(params, runtime, dcreds, socket_fd, fds, n_fds); + r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds); if (r < 0) { *exit_status = EXIT_FDS; return r; @@ -1862,7 +2329,7 @@ static int exec_child( return r; } - if (uid == UID_INVALID || gid == GID_INVALID) { + if (!uid_is_valid(uid) || !gid_is_valid(gid)) { *exit_status = EXIT_USER; return -ESRCH; } @@ -1871,55 +2338,53 @@ static int exec_child( username = dcreds->user->name; } else { - if (context->user) { - username = context->user; - r = get_user_creds(&username, &uid, &gid, &home, &shell); - if (r < 0) { - *exit_status = EXIT_USER; - return r; - } - - /* Don't set $HOME or $SHELL if they are are not particularly enlightening anyway. */ - if (isempty(home) || path_equal(home, "/")) - home = NULL; + r = get_fixed_user(context, &username, &uid, &gid, &home, &shell); + if (r < 0) { + *exit_status = EXIT_USER; + return r; + } - if (isempty(shell) || PATH_IN_SET(shell, - "/bin/nologin", - "/sbin/nologin", - "/usr/bin/nologin", - "/usr/sbin/nologin")) - shell = NULL; + r = get_fixed_group(context, &groupname, &gid); + if (r < 0) { + *exit_status = EXIT_GROUP; + return r; } + } - if (context->group) { - const char *g = context->group; + /* Initialize user supplementary groups and get SupplementaryGroups= ones */ + r = get_supplementary_groups(context, username, groupname, gid, + &supplementary_gids, &ngids); + if (r < 0) { + *exit_status = EXIT_GROUP; + return r; + } - r = get_group_creds(&g, &gid); - if (r < 0) { - *exit_status = EXIT_GROUP; - return r; - } - } + r = send_user_lookup(unit, user_lookup_fd, uid, gid); + if (r < 0) { + *exit_status = EXIT_USER; + return r; } + user_lookup_fd = safe_close(user_lookup_fd); + /* If a socket is connected to STDIN/STDOUT/STDERR, we * must sure to drop O_NONBLOCK */ if (socket_fd >= 0) (void) fd_nonblock(socket_fd, false); - r = setup_input(context, params, socket_fd); + r = setup_input(context, params, socket_fd, named_iofds); if (r < 0) { *exit_status = EXIT_STDIN; return r; } - r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino); + r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino); if (r < 0) { *exit_status = EXIT_STDOUT; return r; } - r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino); + r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino); if (r < 0) { *exit_status = EXIT_STDERR; return r; @@ -2006,7 +2471,7 @@ static int exec_child( USER_PROCESS, username ? "root" : context->user); - if (context->user && is_terminal_input(context->std_input)) { + if (context->user) { r = chown_terminal(STDIN_FILENO, uid); if (r < 0) { *exit_status = EXIT_STDIN; @@ -2033,32 +2498,15 @@ static int exec_child( } if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) { - char **rt; - - STRV_FOREACH(rt, context->runtime_directory) { - _cleanup_free_ char *p; - - p = strjoin(params->runtime_prefix, "/", *rt, NULL); - if (!p) { - *exit_status = EXIT_RUNTIME_DIRECTORY; - return -ENOMEM; - } - - r = mkdir_p_label(p, context->runtime_directory_mode); - if (r < 0) { - *exit_status = EXIT_RUNTIME_DIRECTORY; - return r; - } - - r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid); - if (r < 0) { - *exit_status = EXIT_RUNTIME_DIRECTORY; - return r; - } + r = setup_runtime_directory(context, params, uid, gid); + if (r < 0) { + *exit_status = EXIT_RUNTIME_DIRECTORY; + return r; } } r = build_environment( + unit, context, params, n_fds, @@ -2092,49 +2540,16 @@ static int exec_child( } accum_env = strv_env_clean(accum_env); - umask(context->umask); + (void) umask(context->umask); if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) { - r = enforce_groups(context, username, gid); - if (r < 0) { - *exit_status = EXIT_GROUP; - return r; - } -#ifdef HAVE_SMACK - if (context->smack_process_label) { - r = mac_smack_apply_pid(0, context->smack_process_label); - if (r < 0) { - *exit_status = EXIT_SMACK_PROCESS_LABEL; - return r; - } - } -#ifdef SMACK_DEFAULT_PROCESS_LABEL - else { - _cleanup_free_ char *exec_label = NULL; - - r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label); - if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP) { - *exit_status = EXIT_SMACK_PROCESS_LABEL; - return r; - } - - r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL); - if (r < 0) { - *exit_status = EXIT_SMACK_PROCESS_LABEL; - return r; - } - } -#endif -#endif -#ifdef HAVE_PAM if (context->pam_name && username) { - r = setup_pam(context->pam_name, username, uid, context->tty_path, &accum_env, fds, n_fds); + r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds); if (r < 0) { *exit_status = EXIT_PAM; return r; } } -#endif } if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) { @@ -2146,75 +2561,27 @@ static int exec_child( } needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime); - if (needs_mount_namespace) { - char *tmp = NULL, *var = NULL; - - /* The runtime struct only contains the parent - * of the private /tmp, which is - * non-accessible to world users. Inside of it - * there's a /tmp that is sticky, and that's - * the one we want to use here. */ - - if (context->private_tmp && runtime) { - if (runtime->tmp_dir) - tmp = strjoina(runtime->tmp_dir, "/tmp"); - if (runtime->var_tmp_dir) - var = strjoina(runtime->var_tmp_dir, "/tmp"); - } - - r = setup_namespace( - (params->flags & EXEC_APPLY_CHROOT) ? context->root_directory : NULL, - context->read_write_paths, - context->read_only_paths, - context->inaccessible_paths, - tmp, - var, - context->private_devices, - context->protect_home, - context->protect_system, - context->mount_flags); - - /* If we couldn't set up the namespace this is - * probably due to a missing capability. In this case, - * silently proceeed. */ - if (r == -EPERM || r == -EACCES) { - log_open(); - log_unit_debug_errno(unit, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m"); - log_close(); - } else if (r < 0) { + r = apply_mount_namespace(unit, context, params, runtime); + if (r < 0) { *exit_status = EXIT_NAMESPACE; return r; } } - if (context->working_directory_home) - wd = home; - else if (context->working_directory) - wd = context->working_directory; - else - wd = "/"; - - if (params->flags & EXEC_APPLY_CHROOT) { - if (!needs_mount_namespace && context->root_directory) - if (chroot(context->root_directory) < 0) { - *exit_status = EXIT_CHROOT; - return -errno; - } - - if (chdir(wd) < 0 && - !context->working_directory_missing_ok) { - *exit_status = EXIT_CHDIR; - return -errno; - } - } else { - const char *d; + /* Apply just after mount namespace setup */ + r = apply_working_directory(context, params, home, needs_mount_namespace); + if (r < 0) { + *exit_status = EXIT_CHROOT; + return r; + } - d = strjoina(strempty(context->root_directory), "/", strempty(wd)); - if (chdir(d) < 0 && - !context->working_directory_missing_ok) { - *exit_status = EXIT_CHDIR; - return -errno; + /* Drop groups as early as possbile */ + if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) { + r = enforce_groups(context, gid, supplementary_gids, ngids); + if (r < 0) { + *exit_status = EXIT_GROUP; + return r; } } @@ -2259,11 +2626,6 @@ static int exec_child( if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) { - bool use_address_families = context->address_families_whitelist || - !set_isempty(context->address_families); - bool use_syscall_filter = context->syscall_whitelist || - !set_isempty(context->syscall_filter) || - !set_isempty(context->syscall_archs); int secure_bits = context->secure_bits; for (i = 0; i < _RLIMIT_MAX; i++) { @@ -2330,6 +2692,41 @@ static int exec_child( } } + /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to + * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires + * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls + * are restricted. */ + +#ifdef HAVE_SELINUX + if (mac_selinux_use()) { + char *exec_context = mac_selinux_context_net ?: context->selinux_context; + + if (exec_context) { + r = setexeccon(exec_context); + if (r < 0) { + *exit_status = EXIT_SELINUX_CONTEXT; + return r; + } + } + } +#endif + + r = setup_smack(context, command); + if (r < 0) { + *exit_status = EXIT_SMACK_PROCESS_LABEL; + return r; + } + +#ifdef HAVE_APPARMOR + if (context->apparmor_profile && mac_apparmor_use()) { + r = aa_change_onexec(context->apparmor_profile); + if (r < 0 && !context->apparmor_profile_ignore) { + *exit_status = EXIT_APPARMOR_PROFILE; + return -errno; + } + } +#endif + /* PR_GET_SECUREBITS is not privileged, while * PR_SET_SECUREBITS is. So to suppress * potential EPERMs we'll try not to call @@ -2340,16 +2737,15 @@ static int exec_child( return -errno; } - if (context->no_new_privileges || - (!have_effective_cap(CAP_SYS_ADMIN) && (use_address_families || context->memory_deny_write_execute || context->restrict_realtime || use_syscall_filter))) + if (context_has_no_new_privileges(context)) if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) { *exit_status = EXIT_NO_NEW_PRIVILEGES; return -errno; } #ifdef HAVE_SECCOMP - if (use_address_families) { - r = apply_address_families(context); + if (context_has_address_families(context)) { + r = apply_address_families(unit, context); if (r < 0) { *exit_status = EXIT_ADDRESS_FAMILIES; return r; @@ -2357,7 +2753,7 @@ static int exec_child( } if (context->memory_deny_write_execute) { - r = apply_memory_deny_write_execute(context); + r = apply_memory_deny_write_execute(unit, context); if (r < 0) { *exit_status = EXIT_SECCOMP; return r; @@ -2365,42 +2761,44 @@ static int exec_child( } if (context->restrict_realtime) { - r = apply_restrict_realtime(context); + r = apply_restrict_realtime(unit, context); if (r < 0) { *exit_status = EXIT_SECCOMP; return r; } } - if (use_syscall_filter) { - r = apply_seccomp(context); + if (context->protect_kernel_tunables) { + r = apply_protect_sysctl(unit, context); if (r < 0) { *exit_status = EXIT_SECCOMP; return r; } } -#endif -#ifdef HAVE_SELINUX - if (mac_selinux_use()) { - char *exec_context = mac_selinux_context_net ?: context->selinux_context; + if (context->protect_kernel_modules) { + r = apply_protect_kernel_modules(unit, context); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + return r; + } + } - if (exec_context) { - r = setexeccon(exec_context); - if (r < 0) { - *exit_status = EXIT_SELINUX_CONTEXT; - return r; - } + if (context->private_devices) { + r = apply_private_devices(unit, context); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + return r; } } -#endif -#ifdef HAVE_APPARMOR - if (context->apparmor_profile && mac_apparmor_use()) { - r = aa_change_onexec(context->apparmor_profile); - if (r < 0 && !context->apparmor_profile_ignore) { - *exit_status = EXIT_APPARMOR_PROFILE; - return -errno; + /* This really should remain the last step before the execve(), to make sure our own code is unaffected + * by the filter as little as possible. */ + if (context_has_syscall_filters(context)) { + r = apply_seccomp(unit, context); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + return r; } } #endif @@ -2444,6 +2842,7 @@ int exec_spawn(Unit *unit, int *fds = NULL; unsigned n_fds = 0; _cleanup_free_ char *line = NULL; int socket_fd, r; + int named_iofds[3] = { -1, -1, -1 }; char **argv; pid_t pid; @@ -2470,6 +2869,10 @@ int exec_spawn(Unit *unit, n_fds = params->n_fds; } + r = exec_context_named_iofds(unit, context, params, named_iofds); + if (r < 0) + return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m"); + r = exec_context_load_environment(unit, context, &files_env); if (r < 0) return log_unit_error_errno(unit, r, "Failed to load environment files: %m"); @@ -2499,8 +2902,10 @@ int exec_spawn(Unit *unit, dcreds, argv, socket_fd, + named_iofds, fds, n_fds, files_env, + unit->manager->user_lookup_fds[1], &exit_status); if (r < 0) { log_open(); @@ -2560,6 +2965,9 @@ void exec_context_done(ExecContext *c) { for (l = 0; l < ELEMENTSOF(c->rlimit); l++) c->rlimit[l] = mfree(c->rlimit[l]); + for (l = 0; l < 3; l++) + c->stdio_fdname[l] = mfree(c->stdio_fdname[l]); + c->working_directory = mfree(c->working_directory); c->root_directory = mfree(c->root_directory); c->tty_path = mfree(c->tty_path); @@ -2658,6 +3066,56 @@ static void invalid_env(const char *p, void *userdata) { log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path); } +const char* exec_context_fdname(const ExecContext *c, int fd_index) { + assert(c); + + switch (fd_index) { + case STDIN_FILENO: + if (c->std_input != EXEC_INPUT_NAMED_FD) + return NULL; + return c->stdio_fdname[STDIN_FILENO] ?: "stdin"; + case STDOUT_FILENO: + if (c->std_output != EXEC_OUTPUT_NAMED_FD) + return NULL; + return c->stdio_fdname[STDOUT_FILENO] ?: "stdout"; + case STDERR_FILENO: + if (c->std_error != EXEC_OUTPUT_NAMED_FD) + return NULL; + return c->stdio_fdname[STDERR_FILENO] ?: "stderr"; + default: + return NULL; + } +} + +int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) { + unsigned i, targets; + const char *stdio_fdname[3]; + + assert(c); + assert(p); + + targets = (c->std_input == EXEC_INPUT_NAMED_FD) + + (c->std_output == EXEC_OUTPUT_NAMED_FD) + + (c->std_error == EXEC_OUTPUT_NAMED_FD); + + for (i = 0; i < 3; i++) + stdio_fdname[i] = exec_context_fdname(c, i); + + for (i = 0; i < p->n_fds && targets > 0; i++) + if (named_iofds[STDIN_FILENO] < 0 && c->std_input == EXEC_INPUT_NAMED_FD && stdio_fdname[STDIN_FILENO] && streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) { + named_iofds[STDIN_FILENO] = p->fds[i]; + targets--; + } else if (named_iofds[STDOUT_FILENO] < 0 && c->std_output == EXEC_OUTPUT_NAMED_FD && stdio_fdname[STDOUT_FILENO] && streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) { + named_iofds[STDOUT_FILENO] = p->fds[i]; + targets--; + } else if (named_iofds[STDERR_FILENO] < 0 && c->std_error == EXEC_OUTPUT_NAMED_FD && stdio_fdname[STDERR_FILENO] && streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) { + named_iofds[STDERR_FILENO] = p->fds[i]; + targets--; + } + + return (targets == 0 ? 0 : -ENOENT); +} + int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) { char **i, **r = NULL; @@ -2803,6 +3261,9 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { "%sNonBlocking: %s\n" "%sPrivateTmp: %s\n" "%sPrivateDevices: %s\n" + "%sProtectKernelTunables: %s\n" + "%sProtectKernelModules: %s\n" + "%sProtectControlGroups: %s\n" "%sPrivateNetwork: %s\n" "%sPrivateUsers: %s\n" "%sProtectHome: %s\n" @@ -2816,6 +3277,9 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { prefix, yes_no(c->non_blocking), prefix, yes_no(c->private_tmp), prefix, yes_no(c->private_devices), + prefix, yes_no(c->protect_kernel_tunables), + prefix, yes_no(c->protect_kernel_modules), + prefix, yes_no(c->protect_control_groups), prefix, yes_no(c->private_network), prefix, yes_no(c->private_users), prefix, protect_home_to_string(c->protect_home), @@ -3159,7 +3623,8 @@ char *exec_command_line(char **argv) { STRV_FOREACH(a, argv) k += strlen(*a)+3; - if (!(n = new(char, k))) + n = new(char, k); + if (!n) return NULL; p = n; @@ -3348,9 +3813,7 @@ ExecRuntime *exec_runtime_unref(ExecRuntime *r) { free(r->tmp_dir); free(r->var_tmp_dir); safe_close_pair(r->netns_storage_socket); - free(r); - - return NULL; + return mfree(r); } int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) { @@ -3506,7 +3969,8 @@ static const char* const exec_input_table[_EXEC_INPUT_MAX] = { [EXEC_INPUT_TTY] = "tty", [EXEC_INPUT_TTY_FORCE] = "tty-force", [EXEC_INPUT_TTY_FAIL] = "tty-fail", - [EXEC_INPUT_SOCKET] = "socket" + [EXEC_INPUT_SOCKET] = "socket", + [EXEC_INPUT_NAMED_FD] = "fd", }; DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput); @@ -3521,7 +3985,8 @@ static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = { [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console", [EXEC_OUTPUT_JOURNAL] = "journal", [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console", - [EXEC_OUTPUT_SOCKET] = "socket" + [EXEC_OUTPUT_SOCKET] = "socket", + [EXEC_OUTPUT_NAMED_FD] = "fd", }; DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput); diff --git a/src/core/execute.h b/src/core/execute.h index 106154f81a..c7d0f7761e 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -50,6 +50,7 @@ typedef enum ExecInput { EXEC_INPUT_TTY_FORCE, EXEC_INPUT_TTY_FAIL, EXEC_INPUT_SOCKET, + EXEC_INPUT_NAMED_FD, _EXEC_INPUT_MAX, _EXEC_INPUT_INVALID = -1 } ExecInput; @@ -65,6 +66,7 @@ typedef enum ExecOutput { EXEC_OUTPUT_JOURNAL, EXEC_OUTPUT_JOURNAL_AND_CONSOLE, EXEC_OUTPUT_SOCKET, + EXEC_OUTPUT_NAMED_FD, _EXEC_OUTPUT_MAX, _EXEC_OUTPUT_INVALID = -1 } ExecOutput; @@ -120,6 +122,7 @@ struct ExecContext { ExecInput std_input; ExecOutput std_output; ExecOutput std_error; + char *stdio_fdname[3]; nsec_t timer_slack_nsec; @@ -174,10 +177,14 @@ struct ExecContext { bool private_users; ProtectSystem protect_system; ProtectHome protect_home; + bool protect_kernel_tunables; + bool protect_kernel_modules; + bool protect_control_groups; bool no_new_privileges; bool dynamic_user; + bool remove_ipc; /* This is not exposed to the user but available * internally. We need it to make sure that whenever we spawn @@ -280,6 +287,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix); int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_root); int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l); +int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]); +const char* exec_context_fdname(const ExecContext *c, int fd_index); bool exec_context_may_touch_console(ExecContext *c); bool exec_context_maintains_privileges(ExecContext *c); diff --git a/src/core/job.c b/src/core/job.c index 7557874d4d..ac6910a906 100644 --- a/src/core/job.c +++ b/src/core/job.c @@ -690,16 +690,16 @@ _pure_ static const char *job_get_status_message_format(Unit *u, JobType t, JobR } static void job_print_status_message(Unit *u, JobType t, JobResult result) { - static struct { + static const struct { const char *color, *word; } const statuses[_JOB_RESULT_MAX] = { - [JOB_DONE] = {ANSI_GREEN, " OK "}, - [JOB_TIMEOUT] = {ANSI_HIGHLIGHT_RED, " TIME "}, - [JOB_FAILED] = {ANSI_HIGHLIGHT_RED, "FAILED"}, - [JOB_DEPENDENCY] = {ANSI_HIGHLIGHT_YELLOW, "DEPEND"}, - [JOB_SKIPPED] = {ANSI_HIGHLIGHT, " INFO "}, - [JOB_ASSERT] = {ANSI_HIGHLIGHT_YELLOW, "ASSERT"}, - [JOB_UNSUPPORTED] = {ANSI_HIGHLIGHT_YELLOW, "UNSUPP"}, + [JOB_DONE] = { ANSI_GREEN, " OK " }, + [JOB_TIMEOUT] = { ANSI_HIGHLIGHT_RED, " TIME " }, + [JOB_FAILED] = { ANSI_HIGHLIGHT_RED, "FAILED" }, + [JOB_DEPENDENCY] = { ANSI_HIGHLIGHT_YELLOW, "DEPEND" }, + [JOB_SKIPPED] = { ANSI_HIGHLIGHT, " INFO " }, + [JOB_ASSERT] = { ANSI_HIGHLIGHT_YELLOW, "ASSERT" }, + [JOB_UNSUPPORTED] = { ANSI_HIGHLIGHT_YELLOW, "UNSUPP" }, }; const char *format; @@ -767,8 +767,9 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) { if (!format) return; + /* The description might be longer than the buffer, but that's OK, we'll just truncate it here */ DISABLE_WARNING_FORMAT_NONLITERAL; - xsprintf(buf, format, unit_description(u)); + snprintf(buf, sizeof(buf), format, unit_description(u)); REENABLE_WARNING; switch (t) { @@ -927,7 +928,7 @@ static int job_dispatch_timer(sd_event_source *s, uint64_t monotonic, void *user u = j->unit; job_finish_and_invalidate(j, JOB_TIMEOUT, true, false); - failure_action(u->manager, u->job_timeout_action, u->job_timeout_reboot_arg); + emergency_action(u->manager, u->job_timeout_action, u->job_timeout_reboot_arg, "job timed out"); return 0; } @@ -997,7 +998,10 @@ char *job_dbus_path(Job *j) { return p; } -int job_serialize(Job *j, FILE *f, FDSet *fds) { +int job_serialize(Job *j, FILE *f) { + assert(j); + assert(f); + fprintf(f, "job-id=%u\n", j->id); fprintf(f, "job-type=%s\n", job_type_to_string(j->type)); fprintf(f, "job-state=%s\n", job_state_to_string(j->state)); @@ -1008,15 +1012,16 @@ int job_serialize(Job *j, FILE *f, FDSet *fds) { if (j->begin_usec > 0) fprintf(f, "job-begin="USEC_FMT"\n", j->begin_usec); - bus_track_serialize(j->clients, f); + bus_track_serialize(j->clients, f, "subscribed"); /* End marker */ fputc('\n', f); return 0; } -int job_deserialize(Job *j, FILE *f, FDSet *fds) { +int job_deserialize(Job *j, FILE *f) { assert(j); + assert(f); for (;;) { char line[LINE_MAX], *l, *v; @@ -1106,7 +1111,7 @@ int job_deserialize(Job *j, FILE *f, FDSet *fds) { } else if (streq(l, "subscribed")) { if (strv_extend(&j->deserialized_clients, v) < 0) - return log_oom(); + log_oom(); } } } @@ -1118,9 +1123,8 @@ int job_coldplug(Job *j) { /* After deserialization is complete and the bus connection * set up again, let's start watching our subscribers again */ - r = bus_track_coldplug(j->manager, &j->clients, &j->deserialized_clients); - if (r < 0) - return r; + (void) bus_track_coldplug(j->manager, &j->clients, false, j->deserialized_clients); + j->deserialized_clients = strv_free(j->deserialized_clients); if (j->state == JOB_WAITING) job_add_to_run_queue(j); diff --git a/src/core/job.h b/src/core/job.h index d359e8bb3e..85368f0d30 100644 --- a/src/core/job.h +++ b/src/core/job.h @@ -177,8 +177,8 @@ Job* job_install(Job *j); int job_install_deserialized(Job *j); void job_uninstall(Job *j); void job_dump(Job *j, FILE*f, const char *prefix); -int job_serialize(Job *j, FILE *f, FDSet *fds); -int job_deserialize(Job *j, FILE *f, FDSet *fds); +int job_serialize(Job *j, FILE *f); +int job_deserialize(Job *j, FILE *f); int job_coldplug(Job *j); JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts); diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 251155b428..af2f9d960b 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -35,9 +35,9 @@ $1.Environment, config_parse_environ, 0, $1.EnvironmentFile, config_parse_unit_env_file, 0, offsetof($1, exec_context.environment_files) $1.PassEnvironment, config_parse_pass_environ, 0, offsetof($1, exec_context.pass_environment) $1.DynamicUser, config_parse_bool, 0, offsetof($1, exec_context.dynamic_user) -$1.StandardInput, config_parse_input, 0, offsetof($1, exec_context.std_input) -$1.StandardOutput, config_parse_output, 0, offsetof($1, exec_context.std_output) -$1.StandardError, config_parse_output, 0, offsetof($1, exec_context.std_error) +$1.StandardInput, config_parse_exec_input, 0, offsetof($1, exec_context) +$1.StandardOutput, config_parse_exec_output, 0, offsetof($1, exec_context) +$1.StandardError, config_parse_exec_output, 0, offsetof($1, exec_context) $1.TTYPath, config_parse_unit_path_printf, 0, offsetof($1, exec_context.tty_path) $1.TTYReset, config_parse_bool, 0, offsetof($1, exec_context.tty_reset) $1.TTYVHangup, config_parse_bool, 0, offsetof($1, exec_context.tty_vhangup) @@ -89,6 +89,9 @@ $1.ReadOnlyPaths, config_parse_namespace_path_strv, 0, $1.InaccessiblePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths) $1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp) $1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices) +$1.ProtectKernelTunables, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_tunables) +$1.ProtectKernelModules, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_modules) +$1.ProtectControlGroups, config_parse_bool, 0, offsetof($1, exec_context.protect_control_groups) $1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network) $1.PrivateUsers, config_parse_bool, 0, offsetof($1, exec_context.private_users) $1.ProtectSystem, config_parse_protect_system, 0, offsetof($1, exec_context) @@ -122,6 +125,8 @@ $1.KillSignal, config_parse_signal, 0, m4_define(`CGROUP_CONTEXT_CONFIG_ITEMS', `$1.Slice, config_parse_unit_slice, 0, 0 $1.CPUAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.cpu_accounting) +$1.CPUWeight, config_parse_cpu_weight, 0, offsetof($1, cgroup_context.cpu_weight) +$1.StartupCPUWeight, config_parse_cpu_weight, 0, offsetof($1, cgroup_context.startup_cpu_weight) $1.CPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.cpu_shares) $1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares) $1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context) @@ -129,6 +134,7 @@ $1.MemoryAccounting, config_parse_bool, 0, $1.MemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.MemoryHigh, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.MemoryMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context) +$1.MemorySwapMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.MemoryLimit, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.DeviceAllow, config_parse_device_allow, 0, offsetof($1, cgroup_context) $1.DevicePolicy, config_parse_device_policy, 0, offsetof($1, cgroup_context.device_policy) @@ -182,13 +188,13 @@ Unit.OnFailureIsolate, config_parse_job_mode_isolate, 0, Unit.IgnoreOnIsolate, config_parse_bool, 0, offsetof(Unit, ignore_on_isolate) Unit.IgnoreOnSnapshot, config_parse_warn_compat, DISABLED_LEGACY, 0 Unit.JobTimeoutSec, config_parse_sec_fix_0, 0, offsetof(Unit, job_timeout) -Unit.JobTimeoutAction, config_parse_failure_action, 0, offsetof(Unit, job_timeout_action) +Unit.JobTimeoutAction, config_parse_emergency_action, 0, offsetof(Unit, job_timeout_action) Unit.JobTimeoutRebootArgument, config_parse_string, 0, offsetof(Unit, job_timeout_reboot_arg) Unit.StartLimitIntervalSec, config_parse_sec, 0, offsetof(Unit, start_limit.interval) m4_dnl The following is a legacy alias name for compatibility Unit.StartLimitInterval, config_parse_sec, 0, offsetof(Unit, start_limit.interval) Unit.StartLimitBurst, config_parse_unsigned, 0, offsetof(Unit, start_limit.burst) -Unit.StartLimitAction, config_parse_failure_action, 0, offsetof(Unit, start_limit_action) +Unit.StartLimitAction, config_parse_emergency_action, 0, offsetof(Unit, start_limit_action) Unit.RebootArgument, config_parse_string, 0, offsetof(Unit, reboot_arg) Unit.ConditionPathExists, config_parse_unit_condition_path, CONDITION_PATH_EXISTS, offsetof(Unit, conditions) Unit.ConditionPathExistsGlob, config_parse_unit_condition_path, CONDITION_PATH_EXISTS_GLOB, offsetof(Unit, conditions) @@ -245,9 +251,9 @@ Service.WatchdogSec, config_parse_sec, 0, m4_dnl The following three only exist for compatibility, they moved into Unit, see above Service.StartLimitInterval, config_parse_sec, 0, offsetof(Unit, start_limit.interval) Service.StartLimitBurst, config_parse_unsigned, 0, offsetof(Unit, start_limit.burst) -Service.StartLimitAction, config_parse_failure_action, 0, offsetof(Unit, start_limit_action) +Service.StartLimitAction, config_parse_emergency_action, 0, offsetof(Unit, start_limit_action) Service.RebootArgument, config_parse_string, 0, offsetof(Unit, reboot_arg) -Service.FailureAction, config_parse_failure_action, 0, offsetof(Service, failure_action) +Service.FailureAction, config_parse_emergency_action, 0, offsetof(Service, emergency_action) Service.Type, config_parse_service_type, 0, offsetof(Service, type) Service.Restart, config_parse_service_restart, 0, offsetof(Service, restart) Service.PermissionsStartOnly, config_parse_bool, 0, offsetof(Service, permissions_start_only) @@ -353,6 +359,8 @@ Mount.Type, config_parse_string, 0, Mount.TimeoutSec, config_parse_sec, 0, offsetof(Mount, timeout_usec) Mount.DirectoryMode, config_parse_mode, 0, offsetof(Mount, directory_mode) Mount.SloppyOptions, config_parse_bool, 0, offsetof(Mount, sloppy_options) +Mount.LazyUnmount, config_parse_bool, 0, offsetof(Mount, lazy_unmount) +Mount.ForceUnmount, config_parse_bool, 0, offsetof(Mount, force_unmount) EXEC_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl CGROUP_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl KILL_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 420f368689..cbc826809e 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -776,8 +776,104 @@ int config_parse_socket_bindtodevice( return 0; } -DEFINE_CONFIG_PARSE_ENUM(config_parse_output, exec_output, ExecOutput, "Failed to parse output specifier"); -DEFINE_CONFIG_PARSE_ENUM(config_parse_input, exec_input, ExecInput, "Failed to parse input specifier"); +DEFINE_CONFIG_PARSE_ENUM(config_parse_input, exec_input, ExecInput, "Failed to parse input literal specifier"); +DEFINE_CONFIG_PARSE_ENUM(config_parse_output, exec_output, ExecOutput, "Failed to parse output literal specifier"); + +int config_parse_exec_input(const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + ExecContext *c = data; + const char *name; + int r; + + assert(data); + assert(filename); + assert(line); + assert(rvalue); + + name = startswith(rvalue, "fd:"); + if (name) { + /* Strip prefix and validate fd name */ + if (!fdname_is_valid(name)) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid file descriptor name, ignoring: %s", name); + return 0; + } + c->std_input = EXEC_INPUT_NAMED_FD; + r = free_and_strdup(&c->stdio_fdname[STDIN_FILENO], name); + if (r < 0) + log_oom(); + return r; + } else { + ExecInput ei = exec_input_from_string(rvalue); + if (ei == _EXEC_INPUT_INVALID) + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse input specifier, ignoring: %s", rvalue); + else + c->std_input = ei; + return 0; + } +} + +int config_parse_exec_output(const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + ExecContext *c = data; + ExecOutput eo; + const char *name; + int r; + + assert(data); + assert(filename); + assert(line); + assert(lvalue); + assert(rvalue); + + name = startswith(rvalue, "fd:"); + if (name) { + /* Strip prefix and validate fd name */ + if (!fdname_is_valid(name)) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid file descriptor name, ignoring: %s", name); + return 0; + } + eo = EXEC_OUTPUT_NAMED_FD; + } else { + eo = exec_output_from_string(rvalue); + if (eo == _EXEC_OUTPUT_INVALID) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse output specifier, ignoring: %s", rvalue); + return 0; + } + } + + if (streq(lvalue, "StandardOutput")) { + c->std_output = eo; + r = free_and_strdup(&c->stdio_fdname[STDOUT_FILENO], name); + if (r < 0) + log_oom(); + return r; + } else if (streq(lvalue, "StandardError")) { + c->std_error = eo; + r = free_and_strdup(&c->stdio_fdname[STDERR_FILENO], name); + if (r < 0) + log_oom(); + return r; + } else { + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse output property, ignoring: %s", lvalue); + return 0; + } +} int config_parse_exec_io_class(const char *unit, const char *filename, @@ -1338,10 +1434,13 @@ int config_parse_timer(const char *unit, void *userdata) { Timer *t = data; - usec_t u = 0; + usec_t usec = 0; TimerValue *v; TimerBase b; CalendarSpec *c = NULL; + Unit *u = userdata; + _cleanup_free_ char *k = NULL; + int r; assert(filename); assert(lvalue); @@ -1360,14 +1459,20 @@ int config_parse_timer(const char *unit, return 0; } + r = unit_full_printf(u, rvalue, &k); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue); + return 0; + } + if (b == TIMER_CALENDAR) { - if (calendar_spec_from_string(rvalue, &c) < 0) { - log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse calendar specification, ignoring: %s", rvalue); + if (calendar_spec_from_string(k, &c) < 0) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse calendar specification, ignoring: %s", k); return 0; } } else { - if (parse_sec(rvalue, &u) < 0) { - log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse timer value, ignoring: %s", rvalue); + if (parse_sec(k, &usec) < 0) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse timer value, ignoring: %s", k); return 0; } } @@ -1379,7 +1484,7 @@ int config_parse_timer(const char *unit, } v->base = b; - v->value = u; + v->value = usec; v->calendar_spec = c; LIST_PREPEND(value, t->values, v); @@ -1582,11 +1687,7 @@ int config_parse_fdname( return 0; } - free(s->fdname); - s->fdname = p; - p = NULL; - - return 0; + return free_and_replace(s->fdname, p); } int config_parse_service_sockets( @@ -2043,9 +2144,7 @@ int config_parse_working_directory( return 0; } - free(c->working_directory); - c->working_directory = k; - k = NULL; + free_and_replace(c->working_directory, k); c->working_directory_home = false; } @@ -2424,7 +2523,7 @@ int config_parse_unit_condition_null( } DEFINE_CONFIG_PARSE_ENUM(config_parse_notify_access, notify_access, NotifyAccess, "Failed to parse notify access specifier"); -DEFINE_CONFIG_PARSE_ENUM(config_parse_failure_action, failure_action, FailureAction, "Failed to parse failure action specifier"); +DEFINE_CONFIG_PARSE_ENUM(config_parse_emergency_action, emergency_action, EmergencyAction, "Failed to parse failure action specifier"); int config_parse_unit_requires_mounts_for( const char *unit, @@ -2519,6 +2618,7 @@ int config_parse_documentation(const char *unit, } #ifdef HAVE_SECCOMP + static int syscall_filter_parse_one( const char *unit, const char *filename, @@ -2529,27 +2629,29 @@ static int syscall_filter_parse_one( bool warn) { int r; - if (*t == '@') { - const SystemCallFilterSet *set; + if (t[0] == '@') { + const SyscallFilterSet *set; + const char *i; - for (set = syscall_filter_sets; set->set_name; set++) - if (streq(set->set_name, t)) { - const char *sys; + set = syscall_filter_set_find(t); + if (!set) { + if (warn) + log_syntax(unit, LOG_WARNING, filename, line, 0, "Don't know system call group, ignoring: %s", t); + return 0; + } - NULSTR_FOREACH(sys, set->value) { - r = syscall_filter_parse_one(unit, filename, line, c, invert, sys, false); - if (r < 0) - return r; - } - break; - } + NULSTR_FOREACH(i, set->value) { + r = syscall_filter_parse_one(unit, filename, line, c, invert, i, false); + if (r < 0) + return r; + } } else { int id; id = seccomp_syscall_resolve_name(t); if (id == __NR_SCMP_ERROR) { if (warn) - log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t); + log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse system call, ignoring: %s", t); return 0; } @@ -2563,8 +2665,9 @@ static int syscall_filter_parse_one( if (r < 0) return log_oom(); } else - set_remove(c->syscall_filter, INT_TO_PTR(id + 1)); + (void) set_remove(c->syscall_filter, INT_TO_PTR(id + 1)); } + return 0; } @@ -2583,8 +2686,7 @@ int config_parse_syscall_filter( ExecContext *c = data; Unit *u = userdata; bool invert = false; - const char *word, *state; - size_t l; + const char *p; int r; assert(filename); @@ -2623,24 +2725,24 @@ int config_parse_syscall_filter( } } - FOREACH_WORD_QUOTED(word, l, rvalue, state) { - _cleanup_free_ char *t = NULL; + p = rvalue; + for (;;) { + _cleanup_free_ char *word = NULL; - t = strndup(word, l); - if (!t) + r = extract_first_word(&p, &word, NULL, 0); + if (r == 0) + break; + if (r == -ENOMEM) return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue); + break; + } - r = syscall_filter_parse_one(unit, filename, line, c, invert, t, true); + r = syscall_filter_parse_one(unit, filename, line, c, invert, word, true); if (r < 0) return r; } - if (!isempty(state)) - log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring."); - - /* Turn on NNP, but only if it wasn't configured explicitly - * before, and only if we are in user mode. */ - if (!c->no_new_privileges_set && MANAGER_IS_USER(u->manager)) - c->no_new_privileges = true; return 0; } @@ -2851,6 +2953,34 @@ int config_parse_unit_slice( DEFINE_CONFIG_PARSE_ENUM(config_parse_device_policy, cgroup_device_policy, CGroupDevicePolicy, "Failed to parse device policy"); +int config_parse_cpu_weight( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + uint64_t *weight = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = cg_weight_parse(rvalue, weight); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "CPU weight '%s' invalid. Ignoring.", rvalue); + return 0; + } + + return 0; +} + int config_parse_cpu_shares( const char *unit, const char *filename, @@ -2953,8 +3083,12 @@ int config_parse_memory_limit( c->memory_high = bytes; else if (streq(lvalue, "MemoryMax")) c->memory_max = bytes; - else + else if (streq(lvalue, "MemorySwapMax")) + c->memory_swap_max = bytes; + else if (streq(lvalue, "MemoryLimit")) c->memory_limit = bytes; + else + return -EINVAL; return 0; } @@ -2971,30 +3105,36 @@ int config_parse_tasks_max( void *data, void *userdata) { - uint64_t *tasks_max = data, u; + uint64_t *tasks_max = data, v; + Unit *u = userdata; int r; - if (isempty(rvalue) || streq(rvalue, "infinity")) { - *tasks_max = (uint64_t) -1; + if (isempty(rvalue)) { + *tasks_max = u->manager->default_tasks_max; + return 0; + } + + if (streq(rvalue, "infinity")) { + *tasks_max = CGROUP_LIMIT_MAX; return 0; } r = parse_percent(rvalue); if (r < 0) { - r = safe_atou64(rvalue, &u); + r = safe_atou64(rvalue, &v); if (r < 0) { log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue); return 0; } } else - u = system_tasks_max_scale(r, 100U); + v = system_tasks_max_scale(r, 100U); - if (u <= 0 || u >= UINT64_MAX) { + if (v <= 0 || v >= UINT64_MAX) { log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range. Ignoring.", rvalue); return 0; } - *tasks_max = u; + *tasks_max = v; return 0; } @@ -3037,9 +3177,7 @@ int config_parse_device_allow( if (!path) return log_oom(); - if (!startswith(path, "/dev/") && - !startswith(path, "block-") && - !startswith(path, "char-")) { + if (!is_deviceallow_pattern(path)) { log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid device node path '%s'. Ignoring.", path); return 0; } @@ -3694,7 +3832,7 @@ int config_parse_no_new_privileges( return 0; } - c->no_new_privileges = !!k; + c->no_new_privileges = k; c->no_new_privileges_set = true; return 0; @@ -4144,8 +4282,8 @@ void unit_dump_config_items(FILE *f) { { config_parse_exec_cpu_affinity, "CPUAFFINITY" }, { config_parse_mode, "MODE" }, { config_parse_unit_env_file, "FILE" }, - { config_parse_output, "OUTPUT" }, - { config_parse_input, "INPUT" }, + { config_parse_exec_output, "OUTPUT" }, + { config_parse_exec_input, "INPUT" }, { config_parse_log_facility, "FACILITY" }, { config_parse_log_level, "LEVEL" }, { config_parse_exec_secure_bits, "SECUREBITS" }, @@ -4180,7 +4318,7 @@ void unit_dump_config_items(FILE *f) { { config_parse_unit_slice, "SLICE" }, { config_parse_documentation, "URL" }, { config_parse_service_timeout, "SECONDS" }, - { config_parse_failure_action, "ACTION" }, + { config_parse_emergency_action, "ACTION" }, { config_parse_set_status, "STATUS" }, { config_parse_service_sockets, "SOCKETS" }, { config_parse_environ, "ENVIRON" }, @@ -4191,6 +4329,7 @@ void unit_dump_config_items(FILE *f) { { config_parse_address_families, "FAMILIES" }, #endif { config_parse_cpu_shares, "SHARES" }, + { config_parse_cpu_weight, "WEIGHT" }, { config_parse_memory_limit, "LIMIT" }, { config_parse_device_allow, "DEVICE" }, { config_parse_device_policy, "POLICY" }, diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index 213bce55a7..c05f205c37 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -45,7 +45,9 @@ int config_parse_service_timeout(const char *unit, const char *filename, unsigne int config_parse_service_type(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_service_restart(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_socket_bindtodevice(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_exec_output(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_output(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_exec_input(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_input(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_exec_io_class(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_exec_io_priority(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); @@ -73,7 +75,7 @@ int config_parse_unit_condition_string(const char *unit, const char *filename, u int config_parse_unit_condition_null(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_kill_mode(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_notify_access(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); -int config_parse_failure_action(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_emergency_action(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_unit_requires_mounts_for(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_syscall_filter(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_syscall_archs(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); @@ -81,6 +83,7 @@ int config_parse_syscall_errno(const char *unit, const char *filename, unsigned int config_parse_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_pass_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_unit_slice(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_cpu_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_cpu_shares(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_memory_limit(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_tasks_max(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); diff --git a/src/core/main.c b/src/core/main.c index 35b0a43901..f07ed71b31 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -72,6 +72,9 @@ #include "process-util.h" #include "raw-clone.h" #include "rlimit-util.h" +#ifdef HAVE_SECCOMP +#include "seccomp-util.h" +#endif #include "selinux-setup.h" #include "selinux-util.h" #include "signal-util.h" @@ -86,6 +89,7 @@ #include "user-util.h" #include "virt.h" #include "watchdog.h" +#include "emergency-action.h" static enum { ACTION_RUN, @@ -128,6 +132,7 @@ static bool arg_default_memory_accounting = false; static bool arg_default_tasks_accounting = true; static uint64_t arg_default_tasks_max = UINT64_MAX; static sd_id128_t arg_machine_id = {}; +static EmergencyAction arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE; noreturn static void freeze_or_reboot(void) { @@ -199,7 +204,7 @@ noreturn static void crash(int sig) { pid, sigchld_code_to_string(status.si_code), status.si_status, strna(status.si_code == CLD_EXITED - ? exit_status_to_string(status.si_status, EXIT_STATUS_FULL) + ? exit_status_to_string(status.si_status, EXIT_STATUS_MINIMAL) : signal_to_string(status.si_status))); else log_emergency("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid); @@ -303,7 +308,7 @@ static int set_machine_id(const char *m) { return 0; } -static int parse_proc_cmdline_item(const char *key, const char *value) { +static int parse_proc_cmdline_item(const char *key, const char *value, void *data) { int r; @@ -699,6 +704,7 @@ static int parse_config_file(void) { { "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting }, { "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting }, { "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max }, + { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, 0, &arg_cad_burst_action }, {} }; @@ -712,7 +718,7 @@ static int parse_config_file(void) { CONF_PATHS_NULSTR("systemd/system.conf.d") : CONF_PATHS_NULSTR("systemd/user.conf.d"); - config_parse_many(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, false, NULL); + config_parse_many_nulstr(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, false, NULL); /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we used USEC_INFINITY * like everywhere else. */ @@ -993,10 +999,8 @@ static int parse_argv(int argc, char *argv[]) { case ARG_MACHINE_ID: r = set_machine_id(optarg); - if (r < 0) { - log_error("MachineID '%s' is not valid.", optarg); - return r; - } + if (r < 0) + return log_error_errno(r, "MachineID '%s' is not valid.", optarg); break; case 'h': @@ -1119,7 +1123,7 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) { * later when transitioning from the initrd to the main * systemd or suchlike. */ if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) - return log_error_errno(errno, "Reading RLIMIT_NOFILE failed: %m"); + return log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m"); /* Make sure forked processes get the default kernel setting */ if (!arg_default_rlimit[RLIMIT_NOFILE]) { @@ -1136,7 +1140,7 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) { nl.rlim_cur = nl.rlim_max = 64*1024; r = setrlimit_closest(RLIMIT_NOFILE, &nl); if (r < 0) - return log_error_errno(r, "Setting RLIMIT_NOFILE failed: %m"); + return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m"); return 0; } @@ -1186,6 +1190,9 @@ static int enforce_syscall_archs(Set *archs) { void *id; int r; + if (!is_seccomp_available()) + return 0; + seccomp = seccomp_init(SCMP_ACT_ALLOW); if (!seccomp) return log_oom(); @@ -1505,7 +1512,8 @@ int main(int argc, char *argv[]) { if (getpid() == 1) { /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit) * will process core dumps for system services by default. */ - (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)); + if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0) + log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m"); /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored * until the systemd-coredump tool is enabled via sysctl. */ @@ -1523,15 +1531,9 @@ int main(int argc, char *argv[]) { * need to do that for user instances since they never log * into the console. */ log_show_color(colors_enabled()); - make_null_stdio(); - } - - /* Initialize default unit */ - r = free_and_strdup(&arg_default_unit, SPECIAL_DEFAULT_TARGET); - if (r < 0) { - log_emergency_errno(r, "Failed to set default unit %s: %m", SPECIAL_DEFAULT_TARGET); - error_message = "Failed to set default unit"; - goto finish; + r = make_null_stdio(); + if (r < 0) + log_warning_errno(r, "Failed to redirect standard streams to /dev/null: %m"); } r = initialize_join_controllers(); @@ -1559,7 +1561,7 @@ int main(int argc, char *argv[]) { (void) reset_all_signal_handlers(); (void) ignore_signals(SIGNALS_IGNORE, -1); - arg_default_tasks_max = system_tasks_max_scale(15U, 100U); /* 15% the system PIDs equals 4915 by default. */ + arg_default_tasks_max = system_tasks_max_scale(DEFAULT_TASKS_MAX_PERCENTAGE, 100U); if (parse_config_file() < 0) { error_message = "Failed to parse config file"; @@ -1567,7 +1569,7 @@ int main(int argc, char *argv[]) { } if (arg_system) { - r = parse_proc_cmdline(parse_proc_cmdline_item); + r = parse_proc_cmdline(parse_proc_cmdline_item, NULL, false); if (r < 0) log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m"); } @@ -1581,6 +1583,16 @@ int main(int argc, char *argv[]) { goto finish; } + /* Initialize default unit */ + if (!arg_default_unit) { + arg_default_unit = strdup(SPECIAL_DEFAULT_TARGET); + if (!arg_default_unit) { + r = log_oom(); + error_message = "Failed to set default unit"; + goto finish; + } + } + if (arg_action == ACTION_TEST && geteuid() == 0) { log_error("Don't run test mode as root."); @@ -1601,11 +1613,10 @@ int main(int argc, char *argv[]) { goto finish; } - if (arg_action == ACTION_TEST) - skip_setup = true; - - if (arg_action == ACTION_TEST || arg_action == ACTION_HELP) + if (arg_action == ACTION_TEST || arg_action == ACTION_HELP) { pager_open(arg_no_pager, false); + skip_setup = true; + } if (arg_action == ACTION_HELP) { retval = help(); @@ -1763,10 +1774,10 @@ int main(int argc, char *argv[]) { log_warning_errno(errno, "Failed to make us a subreaper: %m"); if (arg_system) { - bump_rlimit_nofile(&saved_rlimit_nofile); + (void) bump_rlimit_nofile(&saved_rlimit_nofile); if (empty_etc) { - r = unit_file_preset_all(UNIT_FILE_SYSTEM, false, NULL, UNIT_FILE_PRESET_ENABLE_ONLY, false, NULL, 0); + r = unit_file_preset_all(UNIT_FILE_SYSTEM, 0, NULL, UNIT_FILE_PRESET_ENABLE_ONLY, NULL, 0); if (r < 0) log_full_errno(r == -EEXIST ? LOG_NOTICE : LOG_WARNING, r, "Failed to populate /etc with preset unit settings, ignoring: %m"); else @@ -1789,6 +1800,7 @@ int main(int argc, char *argv[]) { m->initrd_timestamp = initrd_timestamp; m->security_start_timestamp = security_start_timestamp; m->security_finish_timestamp = security_finish_timestamp; + m->cad_burst_action = arg_cad_burst_action; manager_set_defaults(m); manager_set_show_status(m, arg_show_status); diff --git a/src/core/manager.c b/src/core/manager.c index bb2000d860..ffccfdcd5e 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -45,6 +45,7 @@ #include "bus-error.h" #include "bus-kernel.h" #include "bus-util.h" +#include "clean-ipc.h" #include "dbus-job.h" #include "dbus-manager.h" #include "dbus-unit.h" @@ -81,6 +82,7 @@ #include "transaction.h" #include "umask-util.h" #include "unit-name.h" +#include "user-util.h" #include "util.h" #include "virt.h" #include "watchdog.h" @@ -98,6 +100,7 @@ static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, ui static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); +static int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata); static int manager_dispatch_run_queue(sd_event_source *source, void *userdata); static int manager_run_generators(Manager *m); @@ -519,6 +522,7 @@ static void manager_clean_environment(Manager *m) { "LISTEN_FDNAMES", "WATCHDOG_PID", "WATCHDOG_USEC", + "INVOCATION_ID", NULL); } @@ -579,17 +583,25 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) { if (MANAGER_IS_SYSTEM(m)) { m->unit_log_field = "UNIT="; m->unit_log_format_string = "UNIT=%s"; + + m->invocation_log_field = "INVOCATION_ID="; + m->invocation_log_format_string = "INVOCATION_ID=" SD_ID128_FORMAT_STR; } else { m->unit_log_field = "USER_UNIT="; m->unit_log_format_string = "USER_UNIT=%s"; + + m->invocation_log_field = "USER_INVOCATION_ID="; + m->invocation_log_format_string = "USER_INVOCATION_ID=" SD_ID128_FORMAT_STR; } m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1; m->pin_cgroupfs_fd = m->notify_fd = m->cgroups_agent_fd = m->signal_fd = m->time_change_fd = - m->dev_autofs_fd = m->private_listen_fd = m->kdbus_fd = m->cgroup_inotify_fd = + m->dev_autofs_fd = m->private_listen_fd = m->cgroup_inotify_fd = m->ask_password_inotify_fd = -1; + m->user_lookup_fds[0] = m->user_lookup_fds[1] = -1; + m->current_job_id = 1; /* start as id #1, so that we can leave #0 around as "null-like" value */ m->have_ask_password = -EINVAL; /* we don't know */ @@ -656,9 +668,8 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) { goto fail; } - /* Note that we set up neither kdbus, nor the notify fd - * here. We do that after deserialization, since they might - * have gotten serialized across the reexec. */ + /* Note that we do not set up the notify fd here. We do that after deserialization, + * since they might have gotten serialized across the reexec. */ m->taint_usr = dir_is_empty("/usr") > 0; @@ -766,7 +777,7 @@ static int manager_setup_cgroups_agent(Manager *m) { if (!MANAGER_IS_SYSTEM(m)) return 0; - if (cg_unified() > 0) /* We don't need this anymore on the unified hierarchy */ + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0) /* We don't need this anymore on the unified hierarchy */ return 0; if (m->cgroups_agent_fd < 0) { @@ -812,6 +823,59 @@ static int manager_setup_cgroups_agent(Manager *m) { return 0; } +static int manager_setup_user_lookup_fd(Manager *m) { + int r; + + assert(m); + + /* Set up the socket pair used for passing UID/GID resolution results from forked off processes to PID + * 1. Background: we can't do name lookups (NSS) from PID 1, since it might involve IPC and thus activation, + * and we might hence deadlock on ourselves. Hence we do all user/group lookups asynchronously from the forked + * off processes right before executing the binaries to start. In order to be able to clean up any IPC objects + * created by a unit (see RemoveIPC=) we need to know in PID 1 the used UID/GID of the executed processes, + * hence we establish this communication channel so that forked off processes can pass their UID/GID + * information back to PID 1. The forked off processes send their resolved UID/GID to PID 1 in a simple + * datagram, along with their unit name, so that we can share one communication socket pair among all units for + * this purpose. + * + * You might wonder why we need a communication channel for this that is independent of the usual notification + * socket scheme (i.e. $NOTIFY_SOCKET). The primary difference is about trust: data sent via the $NOTIFY_SOCKET + * channel is only accepted if it originates from the right unit and if reception was enabled for it. The user + * lookup socket OTOH is only accessible by PID 1 and its children until they exec(), and always available. + * + * Note that this function is called under two circumstances: when we first initialize (in which case we + * allocate both the socket pair and the event source to listen on it), and when we deserialize after a reload + * (in which case the socket pair already exists but we still need to allocate the event source for it). */ + + if (m->user_lookup_fds[0] < 0) { + + /* Free all secondary fields */ + safe_close_pair(m->user_lookup_fds); + m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source); + + if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, m->user_lookup_fds) < 0) + return log_error_errno(errno, "Failed to allocate user lookup socket: %m"); + + (void) fd_inc_rcvbuf(m->user_lookup_fds[0], NOTIFY_RCVBUF_SIZE); + } + + if (!m->user_lookup_event_source) { + r = sd_event_add_io(m->event, &m->user_lookup_event_source, m->user_lookup_fds[0], EPOLLIN, manager_dispatch_user_lookup_fd, m); + if (r < 0) + return log_error_errno(errno, "Failed to allocate user lookup event source: %m"); + + /* Process even earlier than the notify event source, so that we always know first about valid UID/GID + * resolutions */ + r = sd_event_source_set_priority(m->user_lookup_event_source, SD_EVENT_PRIORITY_NORMAL-8); + if (r < 0) + return log_error_errno(errno, "Failed to set priority ot user lookup event source: %m"); + + (void) sd_event_source_set_description(m->user_lookup_event_source, "user-lookup"); + } + + return 0; +} + static int manager_connect_bus(Manager *m, bool reexecuting) { bool try_bus_connect; @@ -821,7 +885,6 @@ static int manager_connect_bus(Manager *m, bool reexecuting) { return 0; try_bus_connect = - m->kdbus_fd >= 0 || reexecuting || (MANAGER_IS_USER(m) && getenv("DBUS_SESSION_BUS_ADDRESS")); @@ -853,8 +916,7 @@ enum { _GC_OFFSET_MAX }; -static void unit_gc_mark_good(Unit *u, unsigned gc_marker) -{ +static void unit_gc_mark_good(Unit *u, unsigned gc_marker) { Iterator i; Unit *other; @@ -1007,6 +1069,7 @@ Manager* manager_free(Manager *m) { hashmap_free(m->dynamic_users); hashmap_free(m->units); + hashmap_free(m->units_by_invocation_id); hashmap_free(m->jobs); hashmap_free(m->watch_pids1); hashmap_free(m->watch_pids2); @@ -1021,12 +1084,13 @@ Manager* manager_free(Manager *m) { sd_event_source_unref(m->time_change_event_source); sd_event_source_unref(m->jobs_in_progress_event_source); sd_event_source_unref(m->run_queue_event_source); + sd_event_source_unref(m->user_lookup_event_source); safe_close(m->signal_fd); safe_close(m->notify_fd); safe_close(m->cgroups_agent_fd); safe_close(m->time_change_fd); - safe_close(m->kdbus_fd); + safe_close_pair(m->user_lookup_fds); manager_close_ask_password(m); @@ -1052,8 +1116,10 @@ Manager* manager_free(Manager *m) { assert(hashmap_isempty(m->units_requiring_mounts_for)); hashmap_free(m->units_requiring_mounts_for); - free(m); - return NULL; + hashmap_free(m->uid_refs); + hashmap_free(m->gid_refs); + + return mfree(m); } void manager_enumerate(Manager *m) { @@ -1177,9 +1243,11 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) { return r; /* Make sure the transient directory always exists, so that it remains in the search path */ - r = mkdir_p_label(m->lookup_paths.transient, 0755); - if (r < 0) - return r; + if (!m->test_run) { + r = mkdir_p_label(m->lookup_paths.transient, 0755); + if (r < 0) + return r; + } dual_timestamp_get(&m->generators_start_timestamp); r = manager_run_generators(m); @@ -1221,10 +1289,15 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) { if (q < 0 && r == 0) r = q; - /* We might have deserialized the kdbus control fd, but if we - * didn't, then let's create the bus now. */ - manager_connect_bus(m, !!serialization); - bus_track_coldplug(m, &m->subscribed, &m->deserialized_subscribed); + q = manager_setup_user_lookup_fd(m); + if (q < 0 && r == 0) + r = q; + + /* Let's connect to the bus now. */ + (void) manager_connect_bus(m, !!serialization); + + (void) bus_track_coldplug(m, &m->subscribed, false, m->deserialized_subscribed); + m->deserialized_subscribed = strv_free(m->deserialized_subscribed); /* Third, fire things up! */ manager_coldplug(m); @@ -1232,6 +1305,10 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) { /* Release any dynamic users no longer referenced */ dynamic_user_vacuum(m, true); + /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */ + manager_vacuum_uid_refs(m); + manager_vacuum_gid_refs(m); + if (serialization) { assert(m->n_reloading > 0); m->n_reloading--; @@ -1604,8 +1681,14 @@ static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const if (UNIT_VTABLE(u)->notify_message) UNIT_VTABLE(u)->notify_message(u, pid, tags, fds); - else - log_unit_debug(u, "Got notification message for unit. Ignoring."); + else if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) { + _cleanup_free_ char *x = NULL, *y = NULL; + + x = cescape(buf); + if (x) + y = ellipsize(x, 20, 90); + log_unit_debug(u, "Got notification message \"%s\", ignoring.", strnull(y)); + } } static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) { @@ -1631,7 +1714,6 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t struct cmsghdr *cmsg; struct ucred *ucred = NULL; - bool found = false; Unit *u1, *u2, *u3; int r, *fd_array = NULL; unsigned n_fds = 0; @@ -1645,16 +1727,15 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t return 0; } - n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC); + n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC|MSG_TRUNC); if (n < 0) { - if (!IN_SET(errno, EAGAIN, EINTR)) - log_error("Failed to receive notification message: %m"); + if (IN_SET(errno, EAGAIN, EINTR)) + return 0; /* Spurious wakeup, try again */ - /* It's not an option to return an error here since it - * would disable the notification handler entirely. Services - * wouldn't be able to send the WATCHDOG message for - * example... */ - return 0; + /* If this is any other, real error, then let's stop processing this socket. This of course means we + * won't take notification messages anymore, but that's still better than busy looping around this: + * being woken up over and over again but being unable to actually read the message off the socket. */ + return log_error_errno(errno, "Failed to receive notification message: %m"); } CMSG_FOREACH(cmsg, &msghdr) { @@ -1687,40 +1768,40 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t return 0; } - if ((size_t) n >= sizeof(buf)) { + if ((size_t) n >= sizeof(buf) || (msghdr.msg_flags & MSG_TRUNC)) { log_warning("Received notify message exceeded maximum size. Ignoring."); return 0; } - /* The message should be a string. Here we make sure it's NUL-terminated, - * but only the part until first NUL will be used anyway. */ + /* As extra safety check, let's make sure the string we get doesn't contain embedded NUL bytes. We permit one + * trailing NUL byte in the message, but don't expect it. */ + if (n > 1 && memchr(buf, 0, n-1)) { + log_warning("Received notify message with embedded NUL bytes. Ignoring."); + return 0; + } + + /* Make sure it's NUL-terminated. */ buf[n] = 0; /* Notify every unit that might be interested, but try * to avoid notifying the same one multiple times. */ u1 = manager_get_unit_by_pid_cgroup(m, ucred->pid); - if (u1) { + if (u1) manager_invoke_notify_message(m, u1, ucred->pid, buf, fds); - found = true; - } u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(ucred->pid)); - if (u2 && u2 != u1) { + if (u2 && u2 != u1) manager_invoke_notify_message(m, u2, ucred->pid, buf, fds); - found = true; - } u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(ucred->pid)); - if (u3 && u3 != u2 && u3 != u1) { + if (u3 && u3 != u2 && u3 != u1) manager_invoke_notify_message(m, u3, ucred->pid, buf, fds); - found = true; - } - if (!found) + if (!u1 && !u2 && !u3) log_warning("Cannot find unit for notify message of PID "PID_FMT".", ucred->pid); if (fdset_size(fds) > 0) - log_warning("Got auxiliary fds with notification message, closing all."); + log_warning("Got extra auxiliary fds with notification message, closing them."); return 0; } @@ -1825,6 +1906,18 @@ static int manager_start_target(Manager *m, const char *name, JobMode mode) { return r; } +static void manager_handle_ctrl_alt_del(Manager *m) { + /* If the user presses C-A-D more than + * 7 times within 2s, we reboot/shutdown immediately, + * unless it was disabled in system.conf */ + + if (ratelimit_test(&m->ctrl_alt_del_ratelimit) || m->cad_burst_action == EMERGENCY_ACTION_NONE) + manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY); + else + emergency_action(m, m->cad_burst_action, NULL, + "Ctrl-Alt-Del was pressed more than 7 times within 2s"); +} + static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) { Manager *m = userdata; ssize_t n; @@ -1843,14 +1936,17 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t for (;;) { n = read(m->signal_fd, &sfsi, sizeof(sfsi)); if (n != sizeof(sfsi)) { + if (n >= 0) { + log_warning("Truncated read from signal fd (%zu bytes)!", n); + return 0; + } - if (n >= 0) - return -EIO; - - if (errno == EINTR || errno == EAGAIN) + if (IN_SET(errno, EINTR, EAGAIN)) break; - return -errno; + /* We return an error here, which will kill this handler, + * to avoid a busy loop on read error. */ + return log_error_errno(errno, "Reading from signal fd failed: %m"); } log_received_signal(sfsi.ssi_signo == SIGCHLD || @@ -1876,19 +1972,7 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t case SIGINT: if (MANAGER_IS_SYSTEM(m)) { - - /* If the user presses C-A-D more than - * 7 times within 2s, we reboot - * immediately. */ - - if (ratelimit_test(&m->ctrl_alt_del_ratelimit)) - manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY); - else { - log_notice("Ctrl-Alt-Del was pressed more than 7 times within 2s, rebooting immediately."); - status_printf(NULL, true, false, "Ctrl-Alt-Del was pressed more than 7 times within 2s, rebooting immediately."); - m->exit_code = MANAGER_REBOOT; - } - + manager_handle_ctrl_alt_del(m); break; } @@ -2174,6 +2258,7 @@ int manager_loop(Manager *m) { int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e, Unit **_u) { _cleanup_free_ char *n = NULL; + sd_id128_t invocation_id; Unit *u; int r; @@ -2185,12 +2270,25 @@ int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e, if (r < 0) return r; + /* Permit addressing units by invocation ID: if the passed bus path is suffixed by a 128bit ID then we use it + * as invocation ID. */ + r = sd_id128_from_string(n, &invocation_id); + if (r >= 0) { + u = hashmap_get(m->units_by_invocation_id, &invocation_id); + if (u) { + *_u = u; + return 0; + } + + return sd_bus_error_setf(e, BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID, "No unit with the specified invocation ID " SD_ID128_FORMAT_STR " known.", SD_ID128_FORMAT_VAL(invocation_id)); + } + + /* If this didn't work, we use the suffix as unit name. */ r = manager_load_unit(m, n, NULL, e, &u); if (r < 0) return r; *_u = u; - return 0; } @@ -2402,22 +2500,29 @@ int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) { fprintf(f, "cgroups-agent-fd=%i\n", copy); } - if (m->kdbus_fd >= 0) { - int copy; + if (m->user_lookup_fds[0] >= 0) { + int copy0, copy1; - copy = fdset_put_dup(fds, m->kdbus_fd); - if (copy < 0) - return copy; + copy0 = fdset_put_dup(fds, m->user_lookup_fds[0]); + if (copy0 < 0) + return copy0; + + copy1 = fdset_put_dup(fds, m->user_lookup_fds[1]); + if (copy1 < 0) + return copy1; - fprintf(f, "kdbus-fd=%i\n", copy); + fprintf(f, "user-lookup=%i %i\n", copy0, copy1); } - bus_track_serialize(m->subscribed, f); + bus_track_serialize(m->subscribed, f, "subscribed"); r = dynamic_user_serialize(m, f, fds); if (r < 0) return r; + manager_serialize_uid_refs(m, f); + manager_serialize_gid_refs(m, f); + fputc('\n', f); HASHMAP_FOREACH_KEY(u, t, m->units, i) { @@ -2584,27 +2689,31 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { m->cgroups_agent_fd = fdset_remove(fds, fd); } - } else if (startswith(l, "kdbus-fd=")) { - int fd; + } else if (startswith(l, "user-lookup=")) { + int fd0, fd1; - if (safe_atoi(l + 9, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd)) - log_debug("Failed to parse kdbus fd: %s", l + 9); + if (sscanf(l + 12, "%i %i", &fd0, &fd1) != 2 || fd0 < 0 || fd1 < 0 || fd0 == fd1 || !fdset_contains(fds, fd0) || !fdset_contains(fds, fd1)) + log_debug("Failed to parse user lookup fd: %s", l + 12); else { - safe_close(m->kdbus_fd); - m->kdbus_fd = fdset_remove(fds, fd); + m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source); + safe_close_pair(m->user_lookup_fds); + m->user_lookup_fds[0] = fdset_remove(fds, fd0); + m->user_lookup_fds[1] = fdset_remove(fds, fd1); } } else if (startswith(l, "dynamic-user=")) dynamic_user_deserialize_one(m, l + 13, fds); - else { - int k; - - k = bus_track_deserialize_item(&m->deserialized_subscribed, l); - if (k < 0) - log_debug_errno(k, "Failed to deserialize bus tracker object: %m"); - else if (k == 0) - log_debug("Unknown serialization item '%s'", l); - } + else if (startswith(l, "destroy-ipc-uid=")) + manager_deserialize_uid_refs_one(m, l + 16); + else if (startswith(l, "destroy-ipc-gid=")) + manager_deserialize_gid_refs_one(m, l + 16); + else if (startswith(l, "subscribed=")) { + + if (strv_extend(&m->deserialized_subscribed, l+11) < 0) + log_oom(); + + } else if (!startswith(l, "kdbus-fd=")) /* ignore this one */ + log_debug("Unknown serialization item '%s'", l); } for (;;) { @@ -2678,6 +2787,8 @@ int manager_reload(Manager *m) { lookup_paths_flush_generator(&m->lookup_paths); lookup_paths_free(&m->lookup_paths); dynamic_user_vacuum(m, false); + m->uid_refs = hashmap_free(m->uid_refs); + m->gid_refs = hashmap_free(m->gid_refs); q = lookup_paths_init(&m->lookup_paths, m->unit_file_scope, 0, NULL); if (q < 0 && r >= 0) @@ -2711,12 +2822,20 @@ int manager_reload(Manager *m) { if (q < 0 && r >= 0) r = q; + q = manager_setup_user_lookup_fd(m); + if (q < 0 && r >= 0) + r = q; + /* Third, fire things up! */ manager_coldplug(m); /* Release any dynamic users no longer referenced */ dynamic_user_vacuum(m, true); + /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */ + manager_vacuum_uid_refs(m); + manager_vacuum_gid_refs(m); + /* Sync current state of bus names with our set of listening units */ if (m->api_bus) manager_sync_bus_names(m, m->api_bus); @@ -2962,7 +3081,7 @@ int manager_set_default_rlimits(Manager *m, struct rlimit **default_rlimit) { m->rlimit[i] = newdup(struct rlimit, default_rlimit[i], 1); if (!m->rlimit[i]) - return -ENOMEM; + return log_oom(); } return 0; @@ -3150,6 +3269,300 @@ ManagerState manager_state(Manager *m) { return MANAGER_RUNNING; } +#define DESTROY_IPC_FLAG (UINT32_C(1) << 31) + +static void manager_unref_uid_internal( + Manager *m, + Hashmap **uid_refs, + uid_t uid, + bool destroy_now, + int (*_clean_ipc)(uid_t uid)) { + + uint32_t c, n; + + assert(m); + assert(uid_refs); + assert(uid_is_valid(uid)); + assert(_clean_ipc); + + /* A generic implementation, covering both manager_unref_uid() and manager_unref_gid(), under the assumption + * that uid_t and gid_t are actually defined the same way, with the same validity rules. + * + * We store a hashmap where the UID/GID is they key and the value is a 32bit reference counter, whose highest + * bit is used as flag for marking UIDs/GIDs whose IPC objects to remove when the last reference to the UID/GID + * is dropped. The flag is set to on, once at least one reference from a unit where RemoveIPC= is set is added + * on a UID/GID. It is reset when the UID's/GID's reference counter drops to 0 again. */ + + assert_cc(sizeof(uid_t) == sizeof(gid_t)); + assert_cc(UID_INVALID == (uid_t) GID_INVALID); + + if (uid == 0) /* We don't keep track of root, and will never destroy it */ + return; + + c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid))); + + n = c & ~DESTROY_IPC_FLAG; + assert(n > 0); + n--; + + if (destroy_now && n == 0) { + hashmap_remove(*uid_refs, UID_TO_PTR(uid)); + + if (c & DESTROY_IPC_FLAG) { + log_debug("%s " UID_FMT " is no longer referenced, cleaning up its IPC.", + _clean_ipc == clean_ipc_by_uid ? "UID" : "GID", + uid); + (void) _clean_ipc(uid); + } + } else { + c = n | (c & DESTROY_IPC_FLAG); + assert_se(hashmap_update(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c)) >= 0); + } +} + +void manager_unref_uid(Manager *m, uid_t uid, bool destroy_now) { + manager_unref_uid_internal(m, &m->uid_refs, uid, destroy_now, clean_ipc_by_uid); +} + +void manager_unref_gid(Manager *m, gid_t gid, bool destroy_now) { + manager_unref_uid_internal(m, &m->gid_refs, (uid_t) gid, destroy_now, clean_ipc_by_gid); +} + +static int manager_ref_uid_internal( + Manager *m, + Hashmap **uid_refs, + uid_t uid, + bool clean_ipc) { + + uint32_t c, n; + int r; + + assert(m); + assert(uid_refs); + assert(uid_is_valid(uid)); + + /* A generic implementation, covering both manager_ref_uid() and manager_ref_gid(), under the assumption + * that uid_t and gid_t are actually defined the same way, with the same validity rules. */ + + assert_cc(sizeof(uid_t) == sizeof(gid_t)); + assert_cc(UID_INVALID == (uid_t) GID_INVALID); + + if (uid == 0) /* We don't keep track of root, and will never destroy it */ + return 0; + + r = hashmap_ensure_allocated(uid_refs, &trivial_hash_ops); + if (r < 0) + return r; + + c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid))); + + n = c & ~DESTROY_IPC_FLAG; + n++; + + if (n & DESTROY_IPC_FLAG) /* check for overflow */ + return -EOVERFLOW; + + c = n | (c & DESTROY_IPC_FLAG) | (clean_ipc ? DESTROY_IPC_FLAG : 0); + + return hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c)); +} + +int manager_ref_uid(Manager *m, uid_t uid, bool clean_ipc) { + return manager_ref_uid_internal(m, &m->uid_refs, uid, clean_ipc); +} + +int manager_ref_gid(Manager *m, gid_t gid, bool clean_ipc) { + return manager_ref_uid_internal(m, &m->gid_refs, (uid_t) gid, clean_ipc); +} + +static void manager_vacuum_uid_refs_internal( + Manager *m, + Hashmap **uid_refs, + int (*_clean_ipc)(uid_t uid)) { + + Iterator i; + void *p, *k; + + assert(m); + assert(uid_refs); + assert(_clean_ipc); + + HASHMAP_FOREACH_KEY(p, k, *uid_refs, i) { + uint32_t c, n; + uid_t uid; + + uid = PTR_TO_UID(k); + c = PTR_TO_UINT32(p); + + n = c & ~DESTROY_IPC_FLAG; + if (n > 0) + continue; + + if (c & DESTROY_IPC_FLAG) { + log_debug("Found unreferenced %s " UID_FMT " after reload/reexec. Cleaning up.", + _clean_ipc == clean_ipc_by_uid ? "UID" : "GID", + uid); + (void) _clean_ipc(uid); + } + + assert_se(hashmap_remove(*uid_refs, k) == p); + } +} + +void manager_vacuum_uid_refs(Manager *m) { + manager_vacuum_uid_refs_internal(m, &m->uid_refs, clean_ipc_by_uid); +} + +void manager_vacuum_gid_refs(Manager *m) { + manager_vacuum_uid_refs_internal(m, &m->gid_refs, clean_ipc_by_gid); +} + +static void manager_serialize_uid_refs_internal( + Manager *m, + FILE *f, + Hashmap **uid_refs, + const char *field_name) { + + Iterator i; + void *p, *k; + + assert(m); + assert(f); + assert(uid_refs); + assert(field_name); + + /* Serialize the UID reference table. Or actually, just the IPC destruction flag of it, as the actual counter + * of it is better rebuild after a reload/reexec. */ + + HASHMAP_FOREACH_KEY(p, k, *uid_refs, i) { + uint32_t c; + uid_t uid; + + uid = PTR_TO_UID(k); + c = PTR_TO_UINT32(p); + + if (!(c & DESTROY_IPC_FLAG)) + continue; + + fprintf(f, "%s=" UID_FMT "\n", field_name, uid); + } +} + +void manager_serialize_uid_refs(Manager *m, FILE *f) { + manager_serialize_uid_refs_internal(m, f, &m->uid_refs, "destroy-ipc-uid"); +} + +void manager_serialize_gid_refs(Manager *m, FILE *f) { + manager_serialize_uid_refs_internal(m, f, &m->gid_refs, "destroy-ipc-gid"); +} + +static void manager_deserialize_uid_refs_one_internal( + Manager *m, + Hashmap** uid_refs, + const char *value) { + + uid_t uid; + uint32_t c; + int r; + + assert(m); + assert(uid_refs); + assert(value); + + r = parse_uid(value, &uid); + if (r < 0 || uid == 0) { + log_debug("Unable to parse UID reference serialization"); + return; + } + + r = hashmap_ensure_allocated(uid_refs, &trivial_hash_ops); + if (r < 0) { + log_oom(); + return; + } + + c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid))); + if (c & DESTROY_IPC_FLAG) + return; + + c |= DESTROY_IPC_FLAG; + + r = hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c)); + if (r < 0) { + log_debug("Failed to add UID reference entry"); + return; + } +} + +void manager_deserialize_uid_refs_one(Manager *m, const char *value) { + manager_deserialize_uid_refs_one_internal(m, &m->uid_refs, value); +} + +void manager_deserialize_gid_refs_one(Manager *m, const char *value) { + manager_deserialize_uid_refs_one_internal(m, &m->gid_refs, value); +} + +int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) { + struct buffer { + uid_t uid; + gid_t gid; + char unit_name[UNIT_NAME_MAX+1]; + } _packed_ buffer; + + Manager *m = userdata; + ssize_t l; + size_t n; + Unit *u; + + assert_se(source); + assert_se(m); + + /* Invoked whenever a child process succeeded resolving its user/group to use and sent us the resulting UID/GID + * in a datagram. We parse the datagram here and pass it off to the unit, so that it can add a reference to the + * UID/GID so that it can destroy the UID/GID's IPC objects when the reference counter drops to 0. */ + + l = recv(fd, &buffer, sizeof(buffer), MSG_DONTWAIT); + if (l < 0) { + if (errno == EINTR || errno == EAGAIN) + return 0; + + return log_error_errno(errno, "Failed to read from user lookup fd: %m"); + } + + if ((size_t) l <= offsetof(struct buffer, unit_name)) { + log_warning("Received too short user lookup message, ignoring."); + return 0; + } + + if ((size_t) l > offsetof(struct buffer, unit_name) + UNIT_NAME_MAX) { + log_warning("Received too long user lookup message, ignoring."); + return 0; + } + + if (!uid_is_valid(buffer.uid) && !gid_is_valid(buffer.gid)) { + log_warning("Got user lookup message with invalid UID/GID pair, ignoring."); + return 0; + } + + n = (size_t) l - offsetof(struct buffer, unit_name); + if (memchr(buffer.unit_name, 0, n)) { + log_warning("Received lookup message with embedded NUL character, ignoring."); + return 0; + } + + buffer.unit_name[n] = 0; + u = manager_get_unit(m, buffer.unit_name); + if (!u) { + log_debug("Got user lookup message but unit doesn't exist, ignoring."); + return 0; + } + + log_unit_debug(u, "User lookup succeeded: uid=" UID_FMT " gid=" GID_FMT, buffer.uid, buffer.gid); + + unit_notify_user_lookup(u, buffer.uid, buffer.gid); + return 0; +} + static const char *const manager_state_table[_MANAGER_STATE_MAX] = { [MANAGER_INITIALIZING] = "initializing", [MANAGER_STARTING] = "starting", diff --git a/src/core/manager.h b/src/core/manager.h index c681d5dc46..35172fdba9 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -81,6 +81,7 @@ struct Manager { /* Active jobs and units */ Hashmap *units; /* name string => Unit object n:1 */ + Hashmap *units_by_invocation_id; Hashmap *jobs; /* job id => Job object 1:1 */ /* To make it easy to iterate through the units of a specific @@ -143,6 +144,9 @@ struct Manager { sd_event_source *jobs_in_progress_event_source; + int user_lookup_fds[2]; + sd_event_source *user_lookup_event_source; + UnitFileScope unit_file_scope; LookupPaths lookup_paths; Set *unit_path_cache; @@ -234,7 +238,6 @@ struct Manager { bool dispatching_dbus_queue:1; bool taint_usr:1; - bool test_run:1; /* If non-zero, exit with the following value when the systemd @@ -292,21 +295,26 @@ struct Manager { * value where Unit objects are contained. */ Hashmap *units_requiring_mounts_for; - /* Reference to the kdbus bus control fd */ - int kdbus_fd; - /* Used for processing polkit authorization responses */ Hashmap *polkit_registry; /* Dynamic users/groups, indexed by their name */ Hashmap *dynamic_users; - /* When the user hits C-A-D more than 7 times per 2s, reboot immediately... */ + /* Keep track of all UIDs and GIDs any of our services currently use. This is useful for the RemoveIPC= logic. */ + Hashmap *uid_refs; + Hashmap *gid_refs; + + /* When the user hits C-A-D more than 7 times per 2s, do something immediately... */ RateLimit ctrl_alt_del_ratelimit; + EmergencyAction cad_burst_action; const char *unit_log_field; const char *unit_log_format_string; + const char *invocation_log_field; + const char *invocation_log_format_string; + int first_boot; /* tri-state */ }; @@ -378,5 +386,20 @@ ManagerState manager_state(Manager *m); int manager_update_failed_units(Manager *m, Unit *u, bool failed); +void manager_unref_uid(Manager *m, uid_t uid, bool destroy_now); +int manager_ref_uid(Manager *m, uid_t uid, bool clean_ipc); + +void manager_unref_gid(Manager *m, gid_t gid, bool destroy_now); +int manager_ref_gid(Manager *m, gid_t gid, bool destroy_now); + +void manager_vacuum_uid_refs(Manager *m); +void manager_vacuum_gid_refs(Manager *m); + +void manager_serialize_uid_refs(Manager *m, FILE *f); +void manager_deserialize_uid_refs_one(Manager *m, const char *value); + +void manager_serialize_gid_refs(Manager *m, FILE *f); +void manager_deserialize_gid_refs_one(Manager *m, const char *value); + const char *manager_state_to_string(ManagerState m) _const_; ManagerState manager_state_from_string(const char *s) _pure_; diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index 5d8ab0ec70..ca63a93e8b 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -99,10 +99,12 @@ static const MountPoint mount_table[] = { cg_is_unified_wanted, MNT_FATAL|MNT_IN_CONTAINER }, { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, + { "cgroup", "/sys/fs/cgroup/systemd", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + cg_is_unified_systemd_controller_wanted, MNT_IN_CONTAINER }, { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV, - cg_is_legacy_wanted, MNT_IN_CONTAINER }, + cg_is_legacy_systemd_controller_wanted, MNT_IN_CONTAINER }, { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV, - cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, + cg_is_legacy_systemd_controller_wanted, MNT_FATAL|MNT_IN_CONTAINER }, { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL, MNT_NONE }, #ifdef ENABLE_EFI diff --git a/src/core/mount.c b/src/core/mount.c index f3ccf6d48a..d749e49df5 100644 --- a/src/core/mount.c +++ b/src/core/mount.c @@ -159,17 +159,6 @@ static void mount_init(Unit *u) { m->timeout_usec = u->manager->default_timeout_start_usec; m->directory_mode = 0755; - if (unit_has_name(u, "-.mount")) { - /* Don't allow start/stop for root directory */ - u->refuse_manual_start = true; - u->refuse_manual_stop = true; - } else { - /* The stdio/kmsg bridge socket is on /, in order to avoid a - * dep loop, don't use kmsg logging for -.mount */ - m->exec_context.std_output = u->manager->default_std_output; - m->exec_context.std_error = u->manager->default_std_error; - } - /* We need to make sure that /usr/bin/mount is always called * in the same process group as us, so that the autofs kernel * side doesn't send us another mount request while we are @@ -577,6 +566,25 @@ static int mount_add_extras(Mount *m) { return 0; } +static int mount_load_root_mount(Unit *u) { + assert(u); + + if (!unit_has_name(u, SPECIAL_ROOT_MOUNT)) + return 0; + + u->perpetual = true; + u->default_dependencies = false; + + /* The stdio/kmsg bridge socket is on /, in order to avoid a dep loop, don't use kmsg logging for -.mount */ + MOUNT(u)->exec_context.std_output = EXEC_OUTPUT_NULL; + MOUNT(u)->exec_context.std_input = EXEC_INPUT_NULL; + + if (!u->description) + u->description = strdup("Root Mount"); + + return 1; +} + static int mount_load(Unit *u) { Mount *m = MOUNT(u); int r; @@ -584,11 +592,14 @@ static int mount_load(Unit *u) { assert(u); assert(u->load_state == UNIT_STUB); - if (m->from_proc_self_mountinfo) + r = mount_load_root_mount(u); + if (r < 0) + return r; + + if (m->from_proc_self_mountinfo || u->perpetual) r = unit_load_fragment_and_dropin_optional(u); else r = unit_load_fragment_and_dropin(u); - if (r < 0) return r; @@ -677,7 +688,10 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) { "%sOptions: %s\n" "%sFrom /proc/self/mountinfo: %s\n" "%sFrom fragment: %s\n" - "%sDirectoryMode: %04o\n", + "%sDirectoryMode: %04o\n" + "%sSloppyOptions: %s\n" + "%sLazyUnmount: %s\n" + "%sForceUnmount: %s\n", prefix, mount_state_to_string(m->state), prefix, mount_result_to_string(m->result), prefix, m->where, @@ -686,7 +700,10 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) { prefix, p ? strna(p->options) : "n/a", prefix, yes_no(m->from_proc_self_mountinfo), prefix, yes_no(m->from_fragment), - prefix, m->directory_mode); + prefix, m->directory_mode, + prefix, yes_no(m->sloppy_options), + prefix, yes_no(m->lazy_unmount), + prefix, yes_no(m->force_unmount)); if (m->control_pid > 0) fprintf(f, @@ -769,6 +786,8 @@ static void mount_enter_dead(Mount *m, MountResult f) { exec_context_destroy_runtime_directory(&m->exec_context, manager_get_runtime_prefix(UNIT(m)->manager)); + unit_unref_uid_gid(UNIT(m), true); + dynamic_creds_destroy(&m->dynamic_creds); } @@ -822,7 +841,7 @@ static void mount_enter_signal(Mount *m, MountState state, MountResult f) { fail: log_unit_warning_errno(UNIT(m), r, "Failed to kill processes: %m"); - if (state == MOUNT_REMOUNTING_SIGTERM || state == MOUNT_REMOUNTING_SIGKILL) + if (IN_SET(state, MOUNT_REMOUNTING_SIGTERM, MOUNT_REMOUNTING_SIGKILL)) mount_enter_mounted(m, MOUNT_FAILURE_RESOURCES); else mount_enter_dead(m, MOUNT_FAILURE_RESOURCES); @@ -844,6 +863,10 @@ static void mount_enter_unmounting(Mount *m) { m->control_command = m->exec_command + MOUNT_EXEC_UNMOUNT; r = exec_command_set(m->control_command, UMOUNT_PATH, m->where, NULL); + if (r >= 0 && m->lazy_unmount) + r = exec_command_append(m->control_command, "-l", NULL); + if (r >= 0 && m->force_unmount) + r = exec_command_append(m->control_command, "-f", NULL); if (r < 0) goto fail; @@ -974,18 +997,19 @@ static int mount_start(Unit *u) { /* We cannot fulfill this request right now, try again later * please! */ - if (m->state == MOUNT_UNMOUNTING || - m->state == MOUNT_UNMOUNTING_SIGTERM || - m->state == MOUNT_UNMOUNTING_SIGKILL || - m->state == MOUNT_MOUNTING_SIGTERM || - m->state == MOUNT_MOUNTING_SIGKILL) + if (IN_SET(m->state, + MOUNT_UNMOUNTING, + MOUNT_UNMOUNTING_SIGTERM, + MOUNT_UNMOUNTING_SIGKILL, + MOUNT_MOUNTING_SIGTERM, + MOUNT_MOUNTING_SIGKILL)) return -EAGAIN; /* Already on it! */ if (m->state == MOUNT_MOUNTING) return 0; - assert(m->state == MOUNT_DEAD || m->state == MOUNT_FAILED); + assert(IN_SET(m->state, MOUNT_DEAD, MOUNT_FAILED)); r = unit_start_limit_test(u); if (r < 0) { @@ -993,6 +1017,10 @@ static int mount_start(Unit *u) { return r; } + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + m->result = MOUNT_SUCCESS; m->reload_result = MOUNT_SUCCESS; m->reset_cpu_usage = true; @@ -1007,19 +1035,21 @@ static int mount_stop(Unit *u) { assert(m); /* Already on it */ - if (m->state == MOUNT_UNMOUNTING || - m->state == MOUNT_UNMOUNTING_SIGKILL || - m->state == MOUNT_UNMOUNTING_SIGTERM || - m->state == MOUNT_MOUNTING_SIGTERM || - m->state == MOUNT_MOUNTING_SIGKILL) + if (IN_SET(m->state, + MOUNT_UNMOUNTING, + MOUNT_UNMOUNTING_SIGKILL, + MOUNT_UNMOUNTING_SIGTERM, + MOUNT_MOUNTING_SIGTERM, + MOUNT_MOUNTING_SIGKILL)) return 0; - assert(m->state == MOUNT_MOUNTING || - m->state == MOUNT_MOUNTING_DONE || - m->state == MOUNT_MOUNTED || - m->state == MOUNT_REMOUNTING || - m->state == MOUNT_REMOUNTING_SIGTERM || - m->state == MOUNT_REMOUNTING_SIGKILL); + assert(IN_SET(m->state, + MOUNT_MOUNTING, + MOUNT_MOUNTING_DONE, + MOUNT_MOUNTED, + MOUNT_REMOUNTING, + MOUNT_REMOUNTING_SIGTERM, + MOUNT_REMOUNTING_SIGKILL)); mount_enter_unmounting(m); return 1; @@ -1147,7 +1177,7 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) { m->control_pid = 0; - if (is_clean_exit(code, status, NULL)) + if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL)) f = MOUNT_SUCCESS; else if (code == CLD_EXITED) f = MOUNT_FAILURE_EXIT_CODE; @@ -1185,9 +1215,10 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) { case MOUNT_MOUNTING_SIGKILL: case MOUNT_MOUNTING_SIGTERM: - if (f == MOUNT_SUCCESS) - mount_enter_mounted(m, f); - else if (m->from_proc_self_mountinfo) + if (f == MOUNT_SUCCESS || m->from_proc_self_mountinfo) + /* If /bin/mount returned success, or if we see the mount point in /proc/self/mountinfo we are + * happy. If we see the first condition first, we should see the the second condition + * immediately after – or /bin/mount lies to us and is broken. */ mount_enter_mounted(m, f); else mount_enter_dead(m, f); @@ -1373,11 +1404,7 @@ static int mount_setup_unit( if (!u) { delete = true; - u = unit_new(m, sizeof(Mount)); - if (!u) - return log_oom(); - - r = unit_add_name(u, e); + r = unit_new_for_name(m, sizeof(Mount), e, &u); if (r < 0) goto fail; @@ -1464,17 +1491,9 @@ static int mount_setup_unit( MOUNT(u)->from_proc_self_mountinfo = true; - free(p->what); - p->what = w; - w = NULL; - - free(p->options); - p->options = o; - o = NULL; - - free(p->fstype); - p->fstype = f; - f = NULL; + free_and_replace(p->what, w); + free_and_replace(p->options, o); + free_and_replace(p->fstype, f); if (load_extras) { r = mount_add_extras(MOUNT(u)); @@ -1580,11 +1599,46 @@ static int mount_get_timeout(Unit *u, usec_t *timeout) { return 1; } +static int synthesize_root_mount(Manager *m) { + Unit *u; + int r; + + assert(m); + + /* Whatever happens, we know for sure that the root directory is around, and cannot go away. Let's + * unconditionally synthesize it here and mark it as perpetual. */ + + u = manager_get_unit(m, SPECIAL_ROOT_MOUNT); + if (!u) { + r = unit_new_for_name(m, sizeof(Mount), SPECIAL_ROOT_MOUNT, &u); + if (r < 0) + return log_error_errno(r, "Failed to allocate the special " SPECIAL_ROOT_MOUNT " unit: %m"); + } + + u->perpetual = true; + MOUNT(u)->deserialized_state = MOUNT_MOUNTED; + + unit_add_to_load_queue(u); + unit_add_to_dbus_queue(u); + + return 0; +} + +static bool mount_is_mounted(Mount *m) { + assert(m); + + return UNIT(m)->perpetual || m->is_mounted; +} + static void mount_enumerate(Manager *m) { int r; assert(m); + r = synthesize_root_mount(m); + if (r < 0) + goto fail; + mnt_init_debug(0); if (!m->mount_monitor) { @@ -1691,7 +1745,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, LIST_FOREACH(units_by_type, u, m->units_by_type[UNIT_MOUNT]) { Mount *mount = MOUNT(u); - if (!mount->is_mounted) { + if (!mount_is_mounted(mount)) { /* A mount point is not around right now. It * might be gone, or might never have @@ -1730,9 +1784,10 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, case MOUNT_DEAD: case MOUNT_FAILED: - /* This has just been mounted by - * somebody else, follow the state - * change. */ + + /* This has just been mounted by somebody else, follow the state change, but let's + * generate a new invocation ID for this implicitly and automatically. */ + (void) unit_acquire_invocation_id(UNIT(mount)); mount_enter_mounted(mount, MOUNT_SUCCESS); break; @@ -1751,7 +1806,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, } } - if (mount->is_mounted && + if (mount_is_mounted(mount) && mount->from_proc_self_mountinfo && mount->parameters_proc_self_mountinfo.what) { diff --git a/src/core/mount.h b/src/core/mount.h index ac27b518cc..9f7326ba6a 100644 --- a/src/core/mount.h +++ b/src/core/mount.h @@ -71,6 +71,9 @@ struct Mount { bool sloppy_options; + bool lazy_unmount; + bool force_unmount; + MountResult result; MountResult reload_result; diff --git a/src/core/namespace.c b/src/core/namespace.c index 52a2505d94..1195e9a854 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -29,6 +29,7 @@ #include "alloc-util.h" #include "dev-setup.h" #include "fd-util.h" +#include "fs-util.h" #include "loopback-setup.h" #include "missing.h" #include "mkdir.h" @@ -53,61 +54,245 @@ typedef enum MountMode { PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV, - READWRITE + READWRITE, } MountMode; typedef struct BindMount { - const char *path; + const char *path; /* stack memory, doesn't need to be freed explicitly */ + char *chased; /* malloc()ed memory, needs to be freed */ MountMode mode; - bool done; - bool ignore; + bool ignore; /* Ignore if path does not exist */ } BindMount; +typedef struct TargetMount { + const char *path; + MountMode mode; + bool ignore; /* Ignore if path does not exist */ +} TargetMount; + +/* + * The following Protect tables are to protect paths and mark some of them + * READONLY, in case a path is covered by an option from another table, then + * it is marked READWRITE in the current one, and the more restrictive mode is + * applied from that other table. This way all options can be combined in a + * safe and comprehensible way for users. + */ + +/* ProtectKernelTunables= option and the related filesystem APIs */ +static const TargetMount protect_kernel_tunables_table[] = { + { "/proc/sys", READONLY, false }, + { "/proc/sysrq-trigger", READONLY, true }, + { "/proc/latency_stats", READONLY, true }, + { "/proc/mtrr", READONLY, true }, + { "/proc/apm", READONLY, true }, + { "/proc/acpi", READONLY, true }, + { "/proc/timer_stats", READONLY, true }, + { "/proc/asound", READONLY, true }, + { "/proc/bus", READONLY, true }, + { "/proc/fs", READONLY, true }, + { "/proc/irq", READONLY, true }, + { "/sys", READONLY, false }, + { "/sys/kernel/debug", READONLY, true }, + { "/sys/kernel/tracing", READONLY, true }, + { "/sys/fs/cgroup", READWRITE, false }, /* READONLY is set by ProtectControlGroups= option */ +}; + +/* ProtectKernelModules= option */ +static const TargetMount protect_kernel_modules_table[] = { +#ifdef HAVE_SPLIT_USR + { "/lib/modules", INACCESSIBLE, true }, +#endif + { "/usr/lib/modules", INACCESSIBLE, true }, +}; + +/* + * ProtectHome=read-only table, protect $HOME and $XDG_RUNTIME_DIR and rest of + * system should be protected by ProtectSystem= + */ +static const TargetMount protect_home_read_only_table[] = { + { "/home", READONLY, true }, + { "/run/user", READONLY, true }, + { "/root", READONLY, true }, +}; + +/* ProtectHome=yes table */ +static const TargetMount protect_home_yes_table[] = { + { "/home", INACCESSIBLE, true }, + { "/run/user", INACCESSIBLE, true }, + { "/root", INACCESSIBLE, true }, +}; + +/* ProtectSystem=yes table */ +static const TargetMount protect_system_yes_table[] = { + { "/usr", READONLY, false }, + { "/boot", READONLY, true }, + { "/efi", READONLY, true }, +}; + +/* ProtectSystem=full includes ProtectSystem=yes */ +static const TargetMount protect_system_full_table[] = { + { "/usr", READONLY, false }, + { "/boot", READONLY, true }, + { "/efi", READONLY, true }, + { "/etc", READONLY, false }, +}; + +/* + * ProtectSystem=strict table. In this strict mode, we mount everything + * read-only, except for /proc, /dev, /sys which are the kernel API VFS, + * which are left writable, but PrivateDevices= + ProtectKernelTunables= + * protect those, and these options should be fully orthogonal. + * (And of course /home and friends are also left writable, as ProtectHome= + * shall manage those, orthogonally). + */ +static const TargetMount protect_system_strict_table[] = { + { "/", READONLY, false }, + { "/proc", READWRITE, false }, /* ProtectKernelTunables= */ + { "/sys", READWRITE, false }, /* ProtectKernelTunables= */ + { "/dev", READWRITE, false }, /* PrivateDevices= */ + { "/home", READWRITE, true }, /* ProtectHome= */ + { "/run/user", READWRITE, true }, /* ProtectHome= */ + { "/root", READWRITE, true }, /* ProtectHome= */ +}; + +static void set_bind_mount(BindMount **p, const char *path, MountMode mode, bool ignore) { + (*p)->path = path; + (*p)->mode = mode; + (*p)->ignore = ignore; +} + static int append_mounts(BindMount **p, char **strv, MountMode mode) { char **i; assert(p); STRV_FOREACH(i, strv) { + bool ignore = false; - (*p)->ignore = false; - (*p)->done = false; - - if ((mode == INACCESSIBLE || mode == READONLY || mode == READWRITE) && (*i)[0] == '-') { - (*p)->ignore = true; + if (IN_SET(mode, INACCESSIBLE, READONLY, READWRITE) && startswith(*i, "-")) { (*i)++; + ignore = true; } if (!path_is_absolute(*i)) return -EINVAL; - (*p)->path = *i; - (*p)->mode = mode; + set_bind_mount(p, *i, mode, ignore); (*p)++; } return 0; } -static int mount_path_compare(const void *a, const void *b) { - const BindMount *p = a, *q = b; - int d; +static int append_target_mounts(BindMount **p, const char *root_directory, const TargetMount *mounts, const size_t size) { + unsigned i; - d = path_compare(p->path, q->path); + assert(p); + assert(mounts); + + for (i = 0; i < size; i++) { + /* + * Here we assume that the ignore field is set during + * declaration we do not support "-" at the beginning. + */ + const TargetMount *m = &mounts[i]; + const char *path = prefix_roota(root_directory, m->path); + + if (!path_is_absolute(path)) + return -EINVAL; + + set_bind_mount(p, path, m->mode, m->ignore); + (*p)++; + } + + return 0; +} + +static int append_protect_kernel_tunables(BindMount **p, const char *root_directory) { + assert(p); + + return append_target_mounts(p, root_directory, protect_kernel_tunables_table, + ELEMENTSOF(protect_kernel_tunables_table)); +} + +static int append_protect_kernel_modules(BindMount **p, const char *root_directory) { + assert(p); + + return append_target_mounts(p, root_directory, protect_kernel_modules_table, + ELEMENTSOF(protect_kernel_modules_table)); +} + +static int append_protect_home(BindMount **p, const char *root_directory, ProtectHome protect_home) { + int r = 0; + + assert(p); + + if (protect_home == PROTECT_HOME_NO) + return 0; + + switch (protect_home) { + case PROTECT_HOME_READ_ONLY: + r = append_target_mounts(p, root_directory, protect_home_read_only_table, + ELEMENTSOF(protect_home_read_only_table)); + break; + case PROTECT_HOME_YES: + r = append_target_mounts(p, root_directory, protect_home_yes_table, + ELEMENTSOF(protect_home_yes_table)); + break; + default: + r = -EINVAL; + break; + } + + return r; +} - if (d == 0) { - /* If the paths are equal, check the mode */ - if (p->mode < q->mode) - return -1; +static int append_protect_system(BindMount **p, const char *root_directory, ProtectSystem protect_system) { + int r = 0; - if (p->mode > q->mode) - return 1; + assert(p); + if (protect_system == PROTECT_SYSTEM_NO) return 0; + + switch (protect_system) { + case PROTECT_SYSTEM_STRICT: + r = append_target_mounts(p, root_directory, protect_system_strict_table, + ELEMENTSOF(protect_system_strict_table)); + break; + case PROTECT_SYSTEM_YES: + r = append_target_mounts(p, root_directory, protect_system_yes_table, + ELEMENTSOF(protect_system_yes_table)); + break; + case PROTECT_SYSTEM_FULL: + r = append_target_mounts(p, root_directory, protect_system_full_table, + ELEMENTSOF(protect_system_full_table)); + break; + default: + r = -EINVAL; + break; } + return r; +} + +static int mount_path_compare(const void *a, const void *b) { + const BindMount *p = a, *q = b; + int d; + /* If the paths are not equal, then order prefixes first */ - return d; + d = path_compare(p->path, q->path); + if (d != 0) + return d; + + /* If the paths are equal, check the mode */ + if (p->mode < q->mode) + return -1; + + if (p->mode > q->mode) + return 1; + + return 0; } static void drop_duplicates(BindMount *m, unsigned *n) { @@ -116,16 +301,110 @@ static void drop_duplicates(BindMount *m, unsigned *n) { assert(m); assert(n); + /* Drops duplicate entries. Expects that the array is properly ordered already. */ + for (f = m, t = m, previous = NULL; f < m+*n; f++) { - /* The first one wins */ - if (previous && path_equal(f->path, previous->path)) + /* The first one wins (which is the one with the more restrictive mode), see mount_path_compare() + * above. */ + if (previous && path_equal(f->path, previous->path)) { + log_debug("%s is duplicate.", f->path); continue; + } *t = *f; - previous = t; + t++; + } + + *n = t - m; +} + +static void drop_inaccessible(BindMount *m, unsigned *n) { + BindMount *f, *t; + const char *clear = NULL; + assert(m); + assert(n); + + /* Drops all entries obstructed by another entry further up the tree. Expects that the array is properly + * ordered already. */ + + for (f = m, t = m; f < m+*n; f++) { + + /* If we found a path set for INACCESSIBLE earlier, and this entry has it as prefix we should drop + * it, as inaccessible paths really should drop the entire subtree. */ + if (clear && path_startswith(f->path, clear)) { + log_debug("%s is masked by %s.", f->path, clear); + continue; + } + + clear = f->mode == INACCESSIBLE ? f->path : NULL; + + *t = *f; + t++; + } + + *n = t - m; +} + +static void drop_nop(BindMount *m, unsigned *n) { + BindMount *f, *t; + + assert(m); + assert(n); + + /* Drops all entries which have an immediate parent that has the same type, as they are redundant. Assumes the + * list is ordered by prefixes. */ + + for (f = m, t = m; f < m+*n; f++) { + + /* Only suppress such subtrees for READONLY and READWRITE entries */ + if (IN_SET(f->mode, READONLY, READWRITE)) { + BindMount *p; + bool found = false; + + /* Now let's find the first parent of the entry we are looking at. */ + for (p = t-1; p >= m; p--) { + if (path_startswith(f->path, p->path)) { + found = true; + break; + } + } + + /* We found it, let's see if it's the same mode, if so, we can drop this entry */ + if (found && p->mode == f->mode) { + log_debug("%s is redundant by %s", f->path, p->path); + continue; + } + } + + *t = *f; + t++; + } + + *n = t - m; +} + +static void drop_outside_root(const char *root_directory, BindMount *m, unsigned *n) { + BindMount *f, *t; + + assert(m); + assert(n); + + if (!root_directory) + return; + + /* Drops all mounts that are outside of the root directory. */ + + for (f = m, t = m; f < m+*n; f++) { + + if (!path_startswith(f->path, root_directory)) { + log_debug("%s is outside of root directory.", f->path); + continue; + } + + *t = *f; t++; } @@ -278,24 +557,23 @@ static int apply_mount( const char *what; int r; - struct stat target; assert(m); + log_debug("Applying namespace mount on %s", m->path); + switch (m->mode) { - case INACCESSIBLE: + case INACCESSIBLE: { + struct stat target; /* First, get rid of everything that is below if there * is anything... Then, overmount it with an * inaccessible path. */ - umount_recursive(m->path, 0); + (void) umount_recursive(m->path, 0); - if (lstat(m->path, &target) < 0) { - if (m->ignore && errno == ENOENT) - return 0; - return -errno; - } + if (lstat(m->path, &target) < 0) + return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", m->path); what = mode_to_inaccessible_node(target.st_mode); if (!what) { @@ -303,11 +581,20 @@ static int apply_mount( return -ELOOP; } break; + } + case READONLY: case READWRITE: - /* Nothing to mount here, we just later toggle the - * MS_RDONLY bit for the mount point */ - return 0; + + r = path_is_mount_point(m->path, 0); + if (r < 0) + return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", m->path); + if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */ + return 0; + + /* This isn't a mount point yet, let's make it one. */ + what = m->path; + break; case PRIVATE_TMP: what = tmp_dir; @@ -326,68 +613,133 @@ static int apply_mount( assert(what); - r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL); - if (r >= 0) { - log_debug("Successfully mounted %s to %s", what, m->path); - return r; - } else { - if (m->ignore && errno == ENOENT) - return 0; + if (mount(what, m->path, NULL, MS_BIND|MS_REC, NULL) < 0) return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, m->path); - } + + log_debug("Successfully mounted %s to %s", what, m->path); + return 0; } -static int make_read_only(BindMount *m) { - int r; +static int make_read_only(BindMount *m, char **blacklist) { + int r = 0; assert(m); if (IN_SET(m->mode, INACCESSIBLE, READONLY)) - r = bind_remount_recursive(m->path, true); - else if (IN_SET(m->mode, READWRITE, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV)) { - r = bind_remount_recursive(m->path, false); - if (r == 0 && m->mode == PRIVATE_DEV) /* can be readonly but the submounts can't*/ - if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0) - r = -errno; + r = bind_remount_recursive(m->path, true, blacklist); + else if (m->mode == PRIVATE_DEV) { /* Can be readonly but the submounts can't*/ + if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0) + r = -errno; } else - r = 0; - - if (m->ignore && r == -ENOENT) return 0; + /* Not that we only turn on the MS_RDONLY flag here, we never turn it off. Something that was marked read-only + * already stays this way. This improves compatibility with container managers, where we won't attempt to undo + * read-only mounts already applied. */ + return r; } +static int chase_all_symlinks(const char *root_directory, BindMount *m, unsigned *n) { + BindMount *f, *t; + int r; + + assert(m); + assert(n); + + /* Since mount() will always follow symlinks and we need to take the different root directory into account we + * chase the symlinks on our own first. This call wil do so for all entries and remove all entries where we + * can't resolve the path, and which have been marked for such removal. */ + + for (f = m, t = m; f < m+*n; f++) { + + r = chase_symlinks(f->path, root_directory, &f->chased); + if (r == -ENOENT && f->ignore) /* Doesn't exist? Then remove it! */ + continue; + if (r < 0) + return log_debug_errno(r, "Failed to chase symlinks for %s: %m", f->path); + + if (path_equal(f->path, f->chased)) + f->chased = mfree(f->chased); + else { + log_debug("Chased %s → %s", f->path, f->chased); + f->path = f->chased; + } + + *t = *f; + t++; + } + + *n = t - m; + return 0; +} + +static unsigned namespace_calculate_mounts( + const NameSpaceInfo *ns_info, + char** read_write_paths, + char** read_only_paths, + char** inaccessible_paths, + const char* tmp_dir, + const char* var_tmp_dir, + ProtectHome protect_home, + ProtectSystem protect_system) { + + unsigned protect_home_cnt; + unsigned protect_system_cnt = + (protect_system == PROTECT_SYSTEM_STRICT ? + ELEMENTSOF(protect_system_strict_table) : + ((protect_system == PROTECT_SYSTEM_FULL) ? + ELEMENTSOF(protect_system_full_table) : + ((protect_system == PROTECT_SYSTEM_YES) ? + ELEMENTSOF(protect_system_yes_table) : 0))); + + protect_home_cnt = + (protect_home == PROTECT_HOME_YES ? + ELEMENTSOF(protect_home_yes_table) : + ((protect_home == PROTECT_HOME_READ_ONLY) ? + ELEMENTSOF(protect_home_read_only_table) : 0)); + + return !!tmp_dir + !!var_tmp_dir + + strv_length(read_write_paths) + + strv_length(read_only_paths) + + strv_length(inaccessible_paths) + + ns_info->private_dev + + (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) + + (ns_info->protect_control_groups ? 1 : 0) + + (ns_info->protect_kernel_modules ? ELEMENTSOF(protect_kernel_modules_table) : 0) + + protect_home_cnt + protect_system_cnt; +} + int setup_namespace( const char* root_directory, + const NameSpaceInfo *ns_info, char** read_write_paths, char** read_only_paths, char** inaccessible_paths, const char* tmp_dir, const char* var_tmp_dir, - bool private_dev, ProtectHome protect_home, ProtectSystem protect_system, unsigned long mount_flags) { BindMount *m, *mounts = NULL; + bool make_slave = false; unsigned n; int r = 0; if (mount_flags == 0) mount_flags = MS_SHARED; - if (unshare(CLONE_NEWNS) < 0) - return -errno; + n = namespace_calculate_mounts(ns_info, + read_write_paths, + read_only_paths, + inaccessible_paths, + tmp_dir, var_tmp_dir, + protect_home, protect_system); - n = !!tmp_dir + !!var_tmp_dir + - strv_length(read_write_paths) + - strv_length(read_only_paths) + - strv_length(inaccessible_paths) + - private_dev + - (protect_home != PROTECT_HOME_NO ? 3 : 0) + - (protect_system != PROTECT_SYSTEM_NO ? 2 : 0) + - (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0); + /* Set mount slave mode */ + if (root_directory || n > 0) + make_slave = true; if (n > 0) { m = mounts = (BindMount *) alloca0(n * sizeof(BindMount)); @@ -415,100 +767,127 @@ int setup_namespace( m++; } - if (private_dev) { + if (ns_info->private_dev) { m->path = prefix_roota(root_directory, "/dev"); m->mode = PRIVATE_DEV; m++; } - if (protect_home != PROTECT_HOME_NO) { - const char *home_dir, *run_user_dir, *root_dir; - - home_dir = prefix_roota(root_directory, "/home"); - home_dir = strjoina("-", home_dir); - run_user_dir = prefix_roota(root_directory, "/run/user"); - run_user_dir = strjoina("-", run_user_dir); - root_dir = prefix_roota(root_directory, "/root"); - root_dir = strjoina("-", root_dir); - - r = append_mounts(&m, STRV_MAKE(home_dir, run_user_dir, root_dir), - protect_home == PROTECT_HOME_READ_ONLY ? READONLY : INACCESSIBLE); + if (ns_info->protect_kernel_tunables) { + r = append_protect_kernel_tunables(&m, root_directory); if (r < 0) return r; } - if (protect_system != PROTECT_SYSTEM_NO) { - const char *usr_dir, *boot_dir, *etc_dir; - - usr_dir = prefix_roota(root_directory, "/usr"); - boot_dir = prefix_roota(root_directory, "/boot"); - boot_dir = strjoina("-", boot_dir); - etc_dir = prefix_roota(root_directory, "/etc"); - - r = append_mounts(&m, protect_system == PROTECT_SYSTEM_FULL - ? STRV_MAKE(usr_dir, boot_dir, etc_dir) - : STRV_MAKE(usr_dir, boot_dir), READONLY); + if (ns_info->protect_kernel_modules) { + r = append_protect_kernel_modules(&m, root_directory); if (r < 0) return r; } + if (ns_info->protect_control_groups) { + m->path = prefix_roota(root_directory, "/sys/fs/cgroup"); + m->mode = READONLY; + m++; + } + + r = append_protect_home(&m, root_directory, protect_home); + if (r < 0) + return r; + + r = append_protect_system(&m, root_directory, protect_system); + if (r < 0) + return r; + assert(mounts + n == m); + /* Resolve symlinks manually first, as mount() will always follow them relative to the host's + * root. Moreover we want to suppress duplicates based on the resolved paths. This of course is a bit + * racy. */ + r = chase_all_symlinks(root_directory, mounts, &n); + if (r < 0) + goto finish; + qsort(mounts, n, sizeof(BindMount), mount_path_compare); + drop_duplicates(mounts, &n); + drop_outside_root(root_directory, mounts, &n); + drop_inaccessible(mounts, &n); + drop_nop(mounts, &n); } - if (n > 0 || root_directory) { + if (unshare(CLONE_NEWNS) < 0) { + r = -errno; + goto finish; + } + + if (make_slave) { /* Remount / as SLAVE so that nothing now mounted in the namespace shows up in the parent */ - if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) - return -errno; + if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) { + r = -errno; + goto finish; + } } if (root_directory) { - /* Turn directory into bind mount */ - if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0) - return -errno; + /* Turn directory into bind mount, if it isn't one yet */ + r = path_is_mount_point(root_directory, AT_SYMLINK_FOLLOW); + if (r < 0) + goto finish; + if (r == 0) { + if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0) { + r = -errno; + goto finish; + } + } } if (n > 0) { + char **blacklist; + unsigned j; + + /* First round, add in all special mounts we need */ for (m = mounts; m < mounts + n; ++m) { r = apply_mount(m, tmp_dir, var_tmp_dir); if (r < 0) - goto fail; + goto finish; } + /* Create a blacklist we can pass to bind_mount_recursive() */ + blacklist = newa(char*, n+1); + for (j = 0; j < n; j++) + blacklist[j] = (char*) mounts[j].path; + blacklist[j] = NULL; + + /* Second round, flip the ro bits if necessary. */ for (m = mounts; m < mounts + n; ++m) { - r = make_read_only(m); + r = make_read_only(m, blacklist); if (r < 0) - goto fail; + goto finish; } } if (root_directory) { /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */ r = mount_move_root(root_directory); - - /* at this point, we cannot rollback */ if (r < 0) - return r; + goto finish; } /* Remount / as the desired mode. Not that this will not * reestablish propagation from our side to the host, since * what's disconnected is disconnected. */ - if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) - /* at this point, we cannot rollback */ - return -errno; + if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) { + r = -errno; + goto finish; + } - return 0; + r = 0; -fail: - if (n > 0) { - for (m = mounts; m < mounts + n; ++m) - if (m->done) - (void) umount2(m->path, MNT_DETACH); - } +finish: + for (m = mounts; m < mounts + n; m++) + free(m->chased); return r; } @@ -658,6 +1037,7 @@ static const char *const protect_system_table[_PROTECT_SYSTEM_MAX] = { [PROTECT_SYSTEM_NO] = "no", [PROTECT_SYSTEM_YES] = "yes", [PROTECT_SYSTEM_FULL] = "full", + [PROTECT_SYSTEM_STRICT] = "strict", }; DEFINE_STRING_TABLE_LOOKUP(protect_system, ProtectSystem); diff --git a/src/core/namespace.h b/src/core/namespace.h index 1aedf5f208..6310638e9a 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -4,6 +4,7 @@ This file is part of systemd. Copyright 2010 Lennart Poettering + Copyright 2016 Djalal Harouni systemd is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by @@ -19,6 +20,8 @@ along with systemd; If not, see <http://www.gnu.org/licenses/>. ***/ +typedef struct NameSpaceInfo NameSpaceInfo; + #include <stdbool.h> #include "macro.h" @@ -35,17 +38,25 @@ typedef enum ProtectSystem { PROTECT_SYSTEM_NO, PROTECT_SYSTEM_YES, PROTECT_SYSTEM_FULL, + PROTECT_SYSTEM_STRICT, _PROTECT_SYSTEM_MAX, _PROTECT_SYSTEM_INVALID = -1 } ProtectSystem; +struct NameSpaceInfo { + bool private_dev:1; + bool protect_control_groups:1; + bool protect_kernel_tunables:1; + bool protect_kernel_modules:1; +}; + int setup_namespace(const char *chroot, + const NameSpaceInfo *ns_info, char **read_write_paths, char **read_only_paths, char **inaccessible_paths, const char *tmp_dir, const char *var_tmp_dir, - bool private_dev, ProtectHome protect_home, ProtectSystem protect_system, unsigned long mount_flags); diff --git a/src/core/org.freedesktop.systemd1.conf b/src/core/org.freedesktop.systemd1.conf index 14f6aec029..a61677e645 100644 --- a/src/core/org.freedesktop.systemd1.conf +++ b/src/core/org.freedesktop.systemd1.conf @@ -54,6 +54,10 @@ <allow send_destination="org.freedesktop.systemd1" send_interface="org.freedesktop.systemd1.Manager" + send_member="GetUnitByInvocationID"/> + + <allow send_destination="org.freedesktop.systemd1" + send_interface="org.freedesktop.systemd1.Manager" send_member="LoadUnit"/> <allow send_destination="org.freedesktop.systemd1" @@ -90,6 +94,10 @@ <allow send_destination="org.freedesktop.systemd1" send_interface="org.freedesktop.systemd1.Manager" + send_member="GetUnitFileLinks"/> + + <allow send_destination="org.freedesktop.systemd1" + send_interface="org.freedesktop.systemd1.Manager" send_member="ListJobs"/> <allow send_destination="org.freedesktop.systemd1" @@ -184,6 +192,14 @@ <allow send_destination="org.freedesktop.systemd1" send_interface="org.freedesktop.systemd1.Manager" + send_member="RefUnit"/> + + <allow send_destination="org.freedesktop.systemd1" + send_interface="org.freedesktop.systemd1.Manager" + send_member="UnrefUnit"/> + + <allow send_destination="org.freedesktop.systemd1" + send_interface="org.freedesktop.systemd1.Manager" send_member="EnableUnitFiles"/> <allow send_destination="org.freedesktop.systemd1" diff --git a/src/core/path.c b/src/core/path.c index 10f9b06974..83f794be89 100644 --- a/src/core/path.c +++ b/src/core/path.c @@ -577,6 +577,10 @@ static int path_start(Unit *u) { return r; } + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + path_mkdir(p); p->result = PATH_SUCCESS; diff --git a/src/core/scope.c b/src/core/scope.c index b278aed3d6..d6e1f8e392 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -147,6 +147,32 @@ static int scope_verify(Scope *s) { return 0; } +static int scope_load_init_scope(Unit *u) { + assert(u); + + if (!unit_has_name(u, SPECIAL_INIT_SCOPE)) + return 0; + + u->transient = true; + u->perpetual = true; + + /* init.scope is a bit special, as it has to stick around forever. Because of its special semantics we + * synthesize it here, instead of relying on the unit file on disk. */ + + u->default_dependencies = false; + u->ignore_on_isolate = true; + + SCOPE(u)->kill_context.kill_signal = SIGRTMIN+14; + + /* Prettify things, if we can. */ + if (!u->description) + u->description = strdup("System and Service Manager"); + if (!u->documentation) + (void) strv_extend(&u->documentation, "man:systemd(1)"); + + return 1; +} + static int scope_load(Unit *u) { Scope *s = SCOPE(u); int r; @@ -158,6 +184,9 @@ static int scope_load(Unit *u) { /* Refuse to load non-transient scope units, but allow them while reloading. */ return -ENOENT; + r = scope_load_init_scope(u); + if (r < 0) + return r; r = unit_load_fragment_and_dropin_optional(u); if (r < 0) return r; @@ -298,6 +327,10 @@ static int scope_start(Unit *u) { if (!u->transient && !MANAGER_IS_RELOADING(u->manager)) return -ENOENT; + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + (void) unit_realize_cgroup(u); (void) unit_reset_cpu_usage(u); @@ -441,7 +474,7 @@ static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) { /* If the PID set is empty now, then let's finish this off (On unified we use proper notifications) */ - if (cg_unified() <= 0 && set_isempty(u->pids)) + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) <= 0 && set_isempty(u->pids)) scope_notify_cgroup_empty_event(u); } @@ -530,34 +563,16 @@ static void scope_enumerate(Manager *m) { u = manager_get_unit(m, SPECIAL_INIT_SCOPE); if (!u) { - u = unit_new(m, sizeof(Scope)); - if (!u) { - log_oom(); - return; - } - - r = unit_add_name(u, SPECIAL_INIT_SCOPE); + r = unit_new_for_name(m, sizeof(Scope), SPECIAL_INIT_SCOPE, &u); if (r < 0) { - unit_free(u); - log_error_errno(r, "Failed to add init.scope name"); + log_error_errno(r, "Failed to allocate the special " SPECIAL_INIT_SCOPE " unit: %m"); return; } } u->transient = true; - u->default_dependencies = false; - u->no_gc = true; - u->ignore_on_isolate = true; - u->refuse_manual_start = true; - u->refuse_manual_stop = true; + u->perpetual = true; SCOPE(u)->deserialized_state = SCOPE_RUNNING; - SCOPE(u)->kill_context.kill_signal = SIGRTMIN+14; - - /* Prettify things, if we can. */ - if (!u->description) - u->description = strdup("System and Service Manager"); - if (!u->documentation) - (void) strv_extend(&u->documentation, "man:systemd(1)"); unit_add_to_load_queue(u); unit_add_to_dbus_queue(u); diff --git a/src/core/selinux-access.h b/src/core/selinux-access.h index 8f1f058a32..f46370d020 100644 --- a/src/core/selinux-access.h +++ b/src/core/selinux-access.h @@ -33,7 +33,7 @@ int mac_selinux_generic_access_check(sd_bus_message *message, const char *path, #define mac_selinux_unit_access_check(unit, message, permission, error) \ ({ \ - Unit *_unit = (unit); \ + const Unit *_unit = (unit); \ mac_selinux_generic_access_check((message), _unit->source_path ?: _unit->fragment_path, (permission), (error)); \ }) diff --git a/src/core/service.c b/src/core/service.c index 4a37702f52..a7274a758f 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -289,7 +289,17 @@ static void service_fd_store_unlink(ServiceFDStore *fs) { free(fs); } -static void service_release_resources(Unit *u) { +static void service_release_fd_store(Service *s) { + assert(s); + + log_unit_debug(UNIT(s), "Releasing all stored fds"); + while (s->fd_store) + service_fd_store_unlink(s->fd_store); + + assert(s->n_fd_store == 0); +} + +static void service_release_resources(Unit *u, bool inactive) { Service *s = SERVICE(u); assert(s); @@ -297,16 +307,14 @@ static void service_release_resources(Unit *u) { if (!s->fd_store && s->stdin_fd < 0 && s->stdout_fd < 0 && s->stderr_fd < 0) return; - log_unit_debug(u, "Releasing all resources."); + log_unit_debug(u, "Releasing resources."); s->stdin_fd = safe_close(s->stdin_fd); s->stdout_fd = safe_close(s->stdout_fd); s->stderr_fd = safe_close(s->stderr_fd); - while (s->fd_store) - service_fd_store_unlink(s->fd_store); - - assert(s->n_fd_store == 0); + if (inactive) + service_release_fd_store(s); } static void service_done(Unit *u) { @@ -350,7 +358,7 @@ static void service_done(Unit *u) { s->timer_event_source = sd_event_source_unref(s->timer_event_source); - service_release_resources(u); + service_release_resources(u, true); } static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *userdata) { @@ -360,6 +368,10 @@ static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *us assert(fs); /* If we get either EPOLLHUP or EPOLLERR, it's time to remove this entry from the fd store */ + log_unit_debug(UNIT(fs->service), + "Received %s on stored fd %d (%s), closing.", + revents & EPOLLERR ? "EPOLLERR" : "EPOLLHUP", + fs->fd, strna(fs->fdname)); service_fd_store_unlink(fs); return 0; } @@ -368,20 +380,23 @@ static int service_add_fd_store(Service *s, int fd, const char *name) { ServiceFDStore *fs; int r; + /* fd is always consumed if we return >= 0 */ + assert(s); assert(fd >= 0); if (s->n_fd_store >= s->n_fd_store_max) - return 0; + return -EXFULL; /* Our store is full. + * Use this errno rather than E[NM]FILE to distinguish from + * the case where systemd itself hits the file limit. */ LIST_FOREACH(fd_store, fs, s->fd_store) { r = same_fd(fs->fd, fd); if (r < 0) return r; if (r > 0) { - /* Already included */ safe_close(fd); - return 1; + return 0; /* fd already included */ } } @@ -409,7 +424,7 @@ static int service_add_fd_store(Service *s, int fd, const char *name) { LIST_PREPEND(fd_store, s->fd_store, fs); s->n_fd_store++; - return 1; + return 1; /* fd newly stored */ } static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) { @@ -417,10 +432,7 @@ static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) { assert(s); - if (fdset_size(fds) <= 0) - return 0; - - while (s->n_fd_store < s->n_fd_store_max) { + while (fdset_size(fds) > 0) { _cleanup_close_ int fd = -1; fd = fdset_steal_first(fds); @@ -428,17 +440,17 @@ static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) { break; r = service_add_fd_store(s, fd, name); + if (r == -EXFULL) + return log_unit_warning_errno(UNIT(s), r, + "Cannot store more fds than FileDescriptorStoreMax=%u, closing remaining.", + s->n_fd_store_max); if (r < 0) - return log_unit_error_errno(UNIT(s), r, "Couldn't add fd to fd store: %m"); - if (r > 0) { - log_unit_debug(UNIT(s), "Added fd to fd store."); - fd = -1; - } + return log_unit_error_errno(UNIT(s), r, "Failed to add fd to store: %m"); + if (r > 0) + log_unit_debug(UNIT(s), "Added fd %u (%s) to fd store.", fd, strna(name)); + fd = -1; } - if (fdset_size(fds) > 0) - log_unit_warning(UNIT(s), "Tried to store more fds than FileDescriptorStoreMax=%u allows, closing remaining.", s->n_fd_store_max); - return 0; } @@ -1225,6 +1237,7 @@ static int service_spawn( return r; n_fds = r; + log_unit_debug(UNIT(s), "Passing %i fds to service", n_fds); } r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), timeout)); @@ -1256,10 +1269,16 @@ static int service_spawn( socklen_t salen = sizeof(sa); r = getpeername(s->socket_fd, &sa.sa, &salen); - if (r < 0) - return -errno; + if (r < 0) { + r = -errno; - if (IN_SET(sa.sa.sa_family, AF_INET, AF_INET6)) { + /* ENOTCONN is legitimate if the endpoint disappeared on shutdown. + * This connection is over, but the socket unit lives on. */ + if (r != -ENOTCONN || !IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST)) + return r; + } + + if (r == 0 && IN_SET(sa.sa.sa_family, AF_INET, AF_INET6)) { _cleanup_free_ char *addr = NULL; char *t; int port; @@ -1449,7 +1468,7 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) if (s->result != SERVICE_SUCCESS) { log_unit_warning(UNIT(s), "Failed with result '%s'.", service_result_to_string(s->result)); - failure_action(UNIT(s)->manager, s->failure_action, UNIT(s)->reboot_arg); + emergency_action(UNIT(s)->manager, s->emergency_action, UNIT(s)->reboot_arg, "service failed"); } if (allow_restart && service_shall_restart(s)) { @@ -1471,6 +1490,9 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) /* Also, remove the runtime directory */ exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager)); + /* Get rid of the IPC bits of the user */ + unit_unref_uid_gid(UNIT(s), true); + /* Release the user, and destroy it if we are the only remaining owner */ dynamic_creds_destroy(&s->dynamic_creds); @@ -1747,7 +1769,15 @@ static void service_enter_start(Service *s) { } if (!c) { - assert(s->type == SERVICE_ONESHOT); + if (s->type != SERVICE_ONESHOT) { + /* There's no command line configured for the main command? Hmm, that is strange. This can only + * happen if the configuration changes at runtime. In this case, let's enter a failure + * state. */ + log_unit_error(UNIT(s), "There's no 'start' task anymore we could start: %m"); + r = -ENXIO; + goto fail; + } + service_enter_start_post(s); return; } @@ -2024,6 +2054,10 @@ static int service_start(Unit *u) { return r; } + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + s->result = SERVICE_SUCCESS; s->reload_result = SERVICE_SUCCESS; s->main_pid_known = false; @@ -2315,7 +2349,7 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value, r = service_add_fd_store(s, fd, t); if (r < 0) log_unit_error_errno(u, r, "Failed to add fd to store: %m"); - else if (r > 0) + else fdset_remove(fds, fd); } @@ -2591,8 +2625,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { assert(s); assert(pid >= 0); - if (UNIT(s)->fragment_path ? is_clean_exit(code, status, &s->success_status) : - is_clean_exit_lsb(code, status, &s->success_status)) + if (is_clean_exit(code, status, s->type == SERVICE_ONESHOT ? EXIT_CLEAN_COMMAND : EXIT_CLEAN_DAEMON, &s->success_status)) f = SERVICE_SUCCESS; else if (code == CLD_EXITED) f = SERVICE_FAILURE_EXIT_CODE; @@ -2634,7 +2667,14 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { f = SERVICE_SUCCESS; } - log_struct(f == SERVICE_SUCCESS ? LOG_DEBUG : LOG_NOTICE, + /* When this is a successful exit, let's log about the exit code on DEBUG level. If this is a failure + * and the process exited on its own via exit(), then let's make this a NOTICE, under the assumption + * that the service already logged the reason at a higher log level on its own. However, if the service + * died due to a signal, then it most likely didn't say anything about any reason, hence let's raise + * our log level to WARNING then. */ + + log_struct(f == SERVICE_SUCCESS ? LOG_DEBUG : + (code == CLD_EXITED ? LOG_NOTICE : LOG_WARNING), LOG_UNIT_ID(u), LOG_UNIT_MESSAGE(u, "Main process exited, code=%s, status=%i/%s", sigchld_code_to_string(code), status, @@ -2866,7 +2906,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { /* If the PID set is empty now, then let's finish this off (On unified we use proper notifications) */ - if (cg_unified() <= 0 && set_isempty(u->pids)) + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) <= 0 && set_isempty(u->pids)) service_notify_cgroup_empty_event(u); } @@ -3076,9 +3116,7 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags, FDSet *fds) if (!streq_ptr(s->status_text, t)) { - free(s->status_text); - s->status_text = t; - t = NULL; + free_and_replace(s->status_text, t); notify_dbus = true; } diff --git a/src/core/service.h b/src/core/service.h index 888007cc0b..2869144fcb 100644 --- a/src/core/service.h +++ b/src/core/service.h @@ -178,7 +178,7 @@ struct Service { char *status_text; int status_errno; - FailureAction failure_action; + EmergencyAction emergency_action; UnitRef accept_socket; diff --git a/src/core/show-status.c b/src/core/show-status.c index 59ebdc7219..65f9cb888a 100644 --- a/src/core/show-status.c +++ b/src/core/show-status.c @@ -61,6 +61,11 @@ int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char if (vasprintf(&s, format, ap) < 0) return log_oom(); + /* Before you ask: yes, on purpose we open/close the console for each status line we write individually. This + * is a good strategy to avoid PID 1 getting killed by the kernel's SAK concept (it doesn't fix this entirely, + * but minimizes the time window the kernel might end up killing PID 1 due to SAK). It also makes things easier + * for us so that we don't have to recover from hangups and suchlike triggered on the console. */ + fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC); if (fd < 0) return fd; diff --git a/src/core/slice.c b/src/core/slice.c index c7700b8857..ed5d3fd701 100644 --- a/src/core/slice.c +++ b/src/core/slice.c @@ -130,6 +130,28 @@ static int slice_verify(Slice *s) { return 0; } +static int slice_load_root_slice(Unit *u) { + assert(u); + + if (!unit_has_name(u, SPECIAL_ROOT_SLICE)) + return 0; + + u->perpetual = true; + + /* The root slice is a bit special. For example it is always running and cannot be terminated. Because of its + * special semantics we synthesize it here, instead of relying on the unit file on disk. */ + + u->default_dependencies = false; + u->ignore_on_isolate = true; + + if (!u->description) + u->description = strdup("Root Slice"); + if (!u->documentation) + u->documentation = strv_new("man:systemd.special(7)", NULL); + + return 1; +} + static int slice_load(Unit *u) { Slice *s = SLICE(u); int r; @@ -137,6 +159,9 @@ static int slice_load(Unit *u) { assert(s); assert(u->load_state == UNIT_STUB); + r = slice_load_root_slice(u); + if (r < 0) + return r; r = unit_load_fragment_and_dropin_optional(u); if (r < 0) return r; @@ -187,10 +212,15 @@ static void slice_dump(Unit *u, FILE *f, const char *prefix) { static int slice_start(Unit *u) { Slice *t = SLICE(u); + int r; assert(t); assert(t->state == SLICE_DEAD); + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + (void) unit_realize_cgroup(u); (void) unit_reset_cpu_usage(u); @@ -269,32 +299,16 @@ static void slice_enumerate(Manager *m) { u = manager_get_unit(m, SPECIAL_ROOT_SLICE); if (!u) { - u = unit_new(m, sizeof(Slice)); - if (!u) { - log_oom(); - return; - } - - r = unit_add_name(u, SPECIAL_ROOT_SLICE); + r = unit_new_for_name(m, sizeof(Slice), SPECIAL_ROOT_SLICE, &u); if (r < 0) { - unit_free(u); - log_error_errno(r, "Failed to add -.slice name"); + log_error_errno(r, "Failed to allocate the special " SPECIAL_ROOT_SLICE " unit: %m"); return; } } - u->default_dependencies = false; - u->no_gc = true; - u->ignore_on_isolate = true; - u->refuse_manual_start = true; - u->refuse_manual_stop = true; + u->perpetual = true; SLICE(u)->deserialized_state = SLICE_ACTIVE; - if (!u->description) - u->description = strdup("Root Slice"); - if (!u->documentation) - (void) strv_extend(&u->documentation, "man:systemd.special(7)"); - unit_add_to_load_queue(u); unit_add_to_dbus_queue(u); } diff --git a/src/core/socket.c b/src/core/socket.c index 50872e8366..0b1c4acfec 100644 --- a/src/core/socket.c +++ b/src/core/socket.c @@ -1334,14 +1334,9 @@ static int usbffs_select_ep(const struct dirent *d) { static int usbffs_dispatch_eps(SocketPort *p) { _cleanup_free_ struct dirent **ent = NULL; - _cleanup_free_ char *path = NULL; int r, i, n, k; - path = dirname_malloc(p->path); - if (!path) - return -ENOMEM; - - r = scandir(path, &ent, usbffs_select_ep, alphasort); + r = scandir(p->path, &ent, usbffs_select_ep, alphasort); if (r < 0) return -errno; @@ -1356,7 +1351,7 @@ static int usbffs_dispatch_eps(SocketPort *p) { for (i = 0; i < n; ++i) { _cleanup_free_ char *ep = NULL; - ep = path_make_absolute(ent[i]->d_name, path); + ep = path_make_absolute(ent[i]->d_name, p->path); if (!ep) return -ENOMEM; @@ -1905,6 +1900,8 @@ static void socket_enter_dead(Socket *s, SocketResult f) { exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager)); + unit_unref_uid_gid(UNIT(s), true); + dynamic_creds_destroy(&s->dynamic_creds); } @@ -2357,11 +2354,14 @@ static int socket_start(Unit *u) { return r; } + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + s->result = SOCKET_SUCCESS; s->reset_cpu_usage = true; socket_enter_start_pre(s); - return 1; } @@ -2746,7 +2746,7 @@ static void socket_sigchld_event(Unit *u, pid_t pid, int code, int status) { s->control_pid = 0; - if (is_clean_exit(code, status, NULL)) + if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL)) f = SOCKET_SUCCESS; else if (code == CLD_EXITED) f = SOCKET_FAILURE_EXIT_CODE; diff --git a/src/core/swap.c b/src/core/swap.c index 2c802da3b5..2228a254bb 100644 --- a/src/core/swap.c +++ b/src/core/swap.c @@ -381,11 +381,7 @@ static int swap_setup_unit( if (!u) { delete = true; - u = unit_new(m, sizeof(Swap)); - if (!u) - return log_oom(); - - r = unit_add_name(u, e); + r = unit_new_for_name(m, sizeof(Swap), e, &u); if (r < 0) goto fail; @@ -683,6 +679,8 @@ static void swap_enter_dead(Swap *s, SwapResult f) { exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager)); + unit_unref_uid_gid(UNIT(s), true); + dynamic_creds_destroy(&s->dynamic_creds); } @@ -859,6 +857,10 @@ static int swap_start(Unit *u) { return r; } + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + s->result = SWAP_SUCCESS; s->reset_cpu_usage = true; @@ -986,7 +988,7 @@ static void swap_sigchld_event(Unit *u, pid_t pid, int code, int status) { s->control_pid = 0; - if (is_clean_exit(code, status, NULL)) + if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL)) f = SWAP_SUCCESS; else if (code == CLD_EXITED) f = SWAP_FAILURE_EXIT_CODE; @@ -1187,6 +1189,7 @@ static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, v case SWAP_DEAD: case SWAP_FAILED: + (void) unit_acquire_invocation_id(UNIT(swap)); swap_enter_active(swap, SWAP_SUCCESS); break; diff --git a/src/core/system.conf b/src/core/system.conf index c6bb050aac..746572b7ff 100644 --- a/src/core/system.conf +++ b/src/core/system.conf @@ -21,6 +21,7 @@ #CrashChangeVT=no #CrashShell=no #CrashReboot=no +#CtrlAltDelBurstAction=reboot-force #CPUAffinity=1 2 #JoinControllers=cpu,cpuacct net_cls,net_prio #RuntimeWatchdogSec=0 diff --git a/src/core/target.c b/src/core/target.c index 61a91aad07..765c1f3fa4 100644 --- a/src/core/target.c +++ b/src/core/target.c @@ -124,10 +124,15 @@ static void target_dump(Unit *u, FILE *f, const char *prefix) { static int target_start(Unit *u) { Target *t = TARGET(u); + int r; assert(t); assert(t->state == TARGET_DEAD); + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + target_set_state(t, TARGET_ACTIVE); return 1; } diff --git a/src/core/timer.c b/src/core/timer.c index e2b43f02f8..2469a517ea 100644 --- a/src/core/timer.c +++ b/src/core/timer.c @@ -261,6 +261,8 @@ static void timer_set_state(Timer *t, TimerState state) { if (state != TIMER_WAITING) { t->monotonic_event_source = sd_event_source_unref(t->monotonic_event_source); t->realtime_event_source = sd_event_source_unref(t->realtime_event_source); + t->next_elapse_monotonic_or_boottime = USEC_INFINITY; + t->next_elapse_realtime = USEC_INFINITY; } if (state != old_state) @@ -616,6 +618,10 @@ static int timer_start(Unit *u) { return r; } + r = unit_acquire_invocation_id(u); + if (r < 0) + return r; + t->last_trigger = DUAL_TIMESTAMP_NULL; /* Reenable all timers that depend on unit activation time */ @@ -632,7 +638,7 @@ static int timer_start(Unit *u) { /* The timer has never run before, * make sure a stamp file exists. */ - touch_file(t->stamp_path, true, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID); + (void) touch_file(t->stamp_path, true, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID); } t->result = TIMER_SUCCESS; diff --git a/src/core/transaction.c b/src/core/transaction.c index 8370b864fb..e22e3b30c2 100644 --- a/src/core/transaction.c +++ b/src/core/transaction.c @@ -1085,10 +1085,8 @@ Transaction *transaction_new(bool irreversible) { return NULL; tr->jobs = hashmap_new(NULL); - if (!tr->jobs) { - free(tr); - return NULL; - } + if (!tr->jobs) + return mfree(tr); tr->irreversible = irreversible; diff --git a/src/core/umount.c b/src/core/umount.c index c21a2be54e..1e5459ed80 100644 --- a/src/core/umount.c +++ b/src/core/umount.c @@ -375,7 +375,7 @@ static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_e /* If we are in a container, don't attempt to read-only mount anything as that brings no real benefits, but might confuse the host, as we remount - the superblock here, not the bind mound. */ + the superblock here, not the bind mount. */ if (detect_container() <= 0) { _cleanup_free_ char *options = NULL; /* MS_REMOUNT requires that the data parameter diff --git a/src/core/unit.c b/src/core/unit.c index ff7c562fba..e664e23892 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -37,6 +37,7 @@ #include "execute.h" #include "fileio-label.h" #include "formats-util.h" +#include "id128-util.h" #include "load-dropin.h" #include "load-fragment.h" #include "log.h" @@ -87,10 +88,8 @@ Unit *unit_new(Manager *m, size_t size) { return NULL; u->names = set_new(&string_hash_ops); - if (!u->names) { - free(u); - return NULL; - } + if (!u->names) + return mfree(u); u->manager = m; u->type = _UNIT_TYPE_INVALID; @@ -100,7 +99,9 @@ Unit *unit_new(Manager *m, size_t size) { u->on_failure_job_mode = JOB_REPLACE; u->cgroup_inotify_wd = -1; u->job_timeout = USEC_INFINITY; - u->sigchldgen = 0; + u->ref_uid = UID_INVALID; + u->ref_gid = GID_INVALID; + u->cpu_usage_last = NSEC_INFINITY; RATELIMIT_INIT(u->start_limit, m->default_start_limit_interval, m->default_start_limit_burst); RATELIMIT_INIT(u->auto_stop_ratelimit, 10 * USEC_PER_SEC, 16); @@ -108,11 +109,29 @@ Unit *unit_new(Manager *m, size_t size) { return u; } +int unit_new_for_name(Manager *m, size_t size, const char *name, Unit **ret) { + Unit *u; + int r; + + u = unit_new(m, size); + if (!u) + return -ENOMEM; + + r = unit_add_name(u, name); + if (r < 0) { + unit_free(u); + return r; + } + + *ret = u; + return r; +} + bool unit_has_name(Unit *u, const char *name) { assert(u); assert(name); - return !!set_get(u->names, (char*) name); + return set_contains(u->names, (char*) name); } static void unit_init(Unit *u) { @@ -301,6 +320,7 @@ int unit_set_description(Unit *u, const char *description) { bool unit_check_gc(Unit *u) { UnitActiveState state; + bool inactive; assert(u); if (u->job) @@ -310,24 +330,28 @@ bool unit_check_gc(Unit *u) { return true; state = unit_active_state(u); + inactive = state == UNIT_INACTIVE; /* If the unit is inactive and failed and no job is queued for * it, then release its runtime resources */ if (UNIT_IS_INACTIVE_OR_FAILED(state) && UNIT_VTABLE(u)->release_resources) - UNIT_VTABLE(u)->release_resources(u); + UNIT_VTABLE(u)->release_resources(u, inactive); /* But we keep the unit object around for longer when it is * referenced or configured to not be gc'ed */ - if (state != UNIT_INACTIVE) + if (!inactive) return true; - if (u->no_gc) + if (u->perpetual) return true; if (u->refs) return true; + if (sd_bus_track_count(u->bus_track) > 0) + return true; + if (UNIT_VTABLE(u)->check_gc) if (UNIT_VTABLE(u)->check_gc(u)) return true; @@ -508,11 +532,17 @@ void unit_free(Unit *u) { sd_bus_slot_unref(u->match_bus_slot); + sd_bus_track_unref(u->bus_track); + u->deserialized_refs = strv_free(u->deserialized_refs); + unit_free_requires_mounts_for(u); SET_FOREACH(t, u->names, i) hashmap_remove_value(u->manager->units, t, u); + if (!sd_id128_is_null(u->invocation_id)) + hashmap_remove_value(u->manager->units_by_invocation_id, &u->invocation_id, u); + if (u->job) { Job *j = u->job; job_uninstall(j); @@ -550,6 +580,8 @@ void unit_free(Unit *u) { unit_release_cgroup(u); + unit_unref_uid_gid(u, false); + (void) manager_update_failed_units(u->manager, u, false); set_remove(u->manager->startup_units, u); @@ -846,18 +878,14 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) { return r; } - if (c->std_output != EXEC_OUTPUT_KMSG && - c->std_output != EXEC_OUTPUT_SYSLOG && - c->std_output != EXEC_OUTPUT_JOURNAL && - c->std_output != EXEC_OUTPUT_KMSG_AND_CONSOLE && - c->std_output != EXEC_OUTPUT_SYSLOG_AND_CONSOLE && - c->std_output != EXEC_OUTPUT_JOURNAL_AND_CONSOLE && - c->std_error != EXEC_OUTPUT_KMSG && - c->std_error != EXEC_OUTPUT_SYSLOG && - c->std_error != EXEC_OUTPUT_JOURNAL && - c->std_error != EXEC_OUTPUT_KMSG_AND_CONSOLE && - c->std_error != EXEC_OUTPUT_JOURNAL_AND_CONSOLE && - c->std_error != EXEC_OUTPUT_SYSLOG_AND_CONSOLE) + if (!IN_SET(c->std_output, + EXEC_OUTPUT_JOURNAL, EXEC_OUTPUT_JOURNAL_AND_CONSOLE, + EXEC_OUTPUT_KMSG, EXEC_OUTPUT_KMSG_AND_CONSOLE, + EXEC_OUTPUT_SYSLOG, EXEC_OUTPUT_SYSLOG_AND_CONSOLE) && + !IN_SET(c->std_error, + EXEC_OUTPUT_JOURNAL, EXEC_OUTPUT_JOURNAL_AND_CONSOLE, + EXEC_OUTPUT_KMSG, EXEC_OUTPUT_KMSG_AND_CONSOLE, + EXEC_OUTPUT_SYSLOG, EXEC_OUTPUT_SYSLOG_AND_CONSOLE)) return 0; /* If syslog or kernel logging is requested, make sure our own @@ -894,6 +922,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) { Unit *following; _cleanup_set_free_ Set *following_set = NULL; int r; + const char *n; assert(u); assert(u->type >= 0); @@ -915,6 +944,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) { "%s\tGC Check Good: %s\n" "%s\tNeed Daemon Reload: %s\n" "%s\tTransient: %s\n" + "%s\tPerpetual: %s\n" "%s\tSlice: %s\n" "%s\tCGroup: %s\n" "%s\tCGroup realized: %s\n" @@ -933,6 +963,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) { prefix, yes_no(unit_check_gc(u)), prefix, yes_no(unit_need_daemon_reload(u)), prefix, yes_no(u->transient), + prefix, yes_no(u->perpetual), prefix, strna(unit_slice_name(u)), prefix, strna(u->cgroup_path), prefix, yes_no(u->cgroup_realized), @@ -942,6 +973,10 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) { SET_FOREACH(t, u->names, i) fprintf(f, "%s\tName: %s\n", prefix, t); + if (!sd_id128_is_null(u->invocation_id)) + fprintf(f, "%s\tInvocation ID: " SD_ID128_FORMAT_STR "\n", + prefix, SD_ID128_FORMAT_VAL(u->invocation_id)); + STRV_FOREACH(j, u->documentation) fprintf(f, "%s\tDocumentation: %s\n", prefix, *j); @@ -969,8 +1004,8 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) { if (u->job_timeout != USEC_INFINITY) fprintf(f, "%s\tJob Timeout: %s\n", prefix, format_timespan(timespan, sizeof(timespan), u->job_timeout, 0)); - if (u->job_timeout_action != FAILURE_ACTION_NONE) - fprintf(f, "%s\tJob Timeout Action: %s\n", prefix, failure_action_to_string(u->job_timeout_action)); + if (u->job_timeout_action != EMERGENCY_ACTION_NONE) + fprintf(f, "%s\tJob Timeout Action: %s\n", prefix, emergency_action_to_string(u->job_timeout_action)); if (u->job_timeout_reboot_arg) fprintf(f, "%s\tJob Timeout Reboot Argument: %s\n", prefix, u->job_timeout_reboot_arg); @@ -1035,13 +1070,14 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) { else if (u->load_state == UNIT_ERROR) fprintf(f, "%s\tLoad Error Code: %s\n", prefix, strerror(-u->load_error)); + for (n = sd_bus_track_first(u->bus_track); n; n = sd_bus_track_next(u->bus_track)) + fprintf(f, "%s\tBus Ref: %s\n", prefix, n); if (u->job) job_dump(u->job, f, prefix2); if (u->nop_job) job_dump(u->nop_job, f, prefix2); - } /* Common implementation for multiple backends */ @@ -1436,7 +1472,7 @@ static void unit_status_log_starting_stopping_reloading(Unit *u, JobType t) { format = unit_get_status_message_format(u, t); DISABLE_WARNING_FORMAT_NONLITERAL; - xsprintf(buf, format, unit_description(u)); + snprintf(buf, sizeof buf, format, unit_description(u)); REENABLE_WARNING; mid = t == JOB_START ? SD_MESSAGE_UNIT_STARTING : @@ -1476,7 +1512,7 @@ int unit_start_limit_test(Unit *u) { log_unit_warning(u, "Start request repeated too quickly."); u->start_limit_hit = true; - return failure_action(u->manager, u->start_limit_action, u->reboot_arg); + return emergency_action(u->manager, u->start_limit_action, u->reboot_arg, "unit failed"); } /* Errors: @@ -1602,6 +1638,18 @@ int unit_stop(Unit *u) { return UNIT_VTABLE(u)->stop(u); } +bool unit_can_stop(Unit *u) { + assert(u); + + if (!unit_supported(u)) + return false; + + if (u->perpetual) + return false; + + return !!UNIT_VTABLE(u)->stop; +} + /* Errors: * -EBADR: This unit type does not support reloading. * -ENOEXEC: Unit is not started. @@ -2136,13 +2184,20 @@ bool unit_job_is_applicable(Unit *u, JobType j) { case JOB_VERIFY_ACTIVE: case JOB_START: - case JOB_STOP: case JOB_NOP: + /* Note that we don't check unit_can_start() here. That's because .device units and suchlike are not + * startable by us but may appear due to external events, and it thus makes sense to permit enqueing + * jobs for it. */ return true; + case JOB_STOP: + /* Similar as above. However, perpetual units can never be stopped (neither explicitly nor due to + * external events), hence it makes no sense to permit enqueing such a request either. */ + return !u->perpetual; + case JOB_RESTART: case JOB_TRY_RESTART: - return unit_can_start(u); + return unit_can_stop(u) && unit_can_start(u); case JOB_RELOAD: case JOB_TRY_RELOAD: @@ -2212,6 +2267,11 @@ int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_referen return 0; } + if (d == UNIT_BEFORE && other->type == UNIT_DEVICE) { + log_unit_warning(u, "Dependency Before=%s ignored (.device units cannot be delayed)", other->id); + return 0; + } + r = set_ensure_allocated(&u->dependencies[d], NULL); if (r < 0) return r; @@ -2374,6 +2434,15 @@ char *unit_dbus_path(Unit *u) { return unit_dbus_path_from_name(u->id); } +char *unit_dbus_path_invocation_id(Unit *u) { + assert(u); + + if (sd_id128_is_null(u->invocation_id)) + return NULL; + + return unit_dbus_path_from_name(u->invocation_id_string); +} + int unit_set_slice(Unit *u, Unit *slice) { assert(u); assert(slice); @@ -2608,21 +2677,34 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) { unit_serialize_item(u, f, "assert-result", yes_no(u->assert_result)); unit_serialize_item(u, f, "transient", yes_no(u->transient)); - unit_serialize_item_format(u, f, "cpuacct-usage-base", "%" PRIu64, u->cpuacct_usage_base); + + unit_serialize_item_format(u, f, "cpu-usage-base", "%" PRIu64, u->cpu_usage_base); + if (u->cpu_usage_last != NSEC_INFINITY) + unit_serialize_item_format(u, f, "cpu-usage-last", "%" PRIu64, u->cpu_usage_last); if (u->cgroup_path) unit_serialize_item(u, f, "cgroup", u->cgroup_path); unit_serialize_item(u, f, "cgroup-realized", yes_no(u->cgroup_realized)); + if (uid_is_valid(u->ref_uid)) + unit_serialize_item_format(u, f, "ref-uid", UID_FMT, u->ref_uid); + if (gid_is_valid(u->ref_gid)) + unit_serialize_item_format(u, f, "ref-gid", GID_FMT, u->ref_gid); + + if (!sd_id128_is_null(u->invocation_id)) + unit_serialize_item_format(u, f, "invocation-id", SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)); + + bus_track_serialize(u->bus_track, f, "ref"); + if (serialize_jobs) { if (u->job) { fprintf(f, "job\n"); - job_serialize(u->job, f, fds); + job_serialize(u->job, f); } if (u->nop_job) { fprintf(f, "job\n"); - job_serialize(u->nop_job, f, fds); + job_serialize(u->nop_job, f); } } @@ -2752,7 +2834,7 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { if (!j) return log_oom(); - r = job_deserialize(j, f, fds); + r = job_deserialize(j, f); if (r < 0) { job_free(j); return r; @@ -2824,11 +2906,19 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { continue; - } else if (streq(l, "cpuacct-usage-base")) { + } else if (STR_IN_SET(l, "cpu-usage-base", "cpuacct-usage-base")) { - r = safe_atou64(v, &u->cpuacct_usage_base); + r = safe_atou64(v, &u->cpu_usage_base); if (r < 0) - log_unit_debug(u, "Failed to parse CPU usage %s, ignoring.", v); + log_unit_debug(u, "Failed to parse CPU usage base %s, ignoring.", v); + + continue; + + } else if (streq(l, "cpu-usage-last")) { + + r = safe_atou64(v, &u->cpu_usage_last); + if (r < 0) + log_unit_debug(u, "Failed to read CPU usage last %s, ignoring.", v); continue; @@ -2851,6 +2941,47 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { u->cgroup_realized = b; continue; + + } else if (streq(l, "ref-uid")) { + uid_t uid; + + r = parse_uid(v, &uid); + if (r < 0) + log_unit_debug(u, "Failed to parse referenced UID %s, ignoring.", v); + else + unit_ref_uid_gid(u, uid, GID_INVALID); + + continue; + + } else if (streq(l, "ref-gid")) { + gid_t gid; + + r = parse_gid(v, &gid); + if (r < 0) + log_unit_debug(u, "Failed to parse referenced GID %s, ignoring.", v); + else + unit_ref_uid_gid(u, UID_INVALID, gid); + + } else if (streq(l, "ref")) { + + r = strv_extend(&u->deserialized_refs, v); + if (r < 0) + log_oom(); + + continue; + } else if (streq(l, "invocation-id")) { + sd_id128_t id; + + r = sd_id128_from_string(v, &id); + if (r < 0) + log_unit_debug(u, "Failed to parse invocation id %s, ignoring.", v); + else { + r = unit_set_invocation_id(u, id); + if (r < 0) + log_unit_warning_errno(u, r, "Failed to set invocation ID for unit: %m"); + } + + continue; } if (unit_can_serialize(u)) { @@ -2925,7 +3056,8 @@ int unit_add_node_link(Unit *u, const char *what, bool wants, UnitDependency dep } int unit_coldplug(Unit *u) { - int r = 0, q = 0; + int r = 0, q; + char **i; assert(u); @@ -2936,21 +3068,29 @@ int unit_coldplug(Unit *u) { u->coldplugged = true; - if (UNIT_VTABLE(u)->coldplug) - r = UNIT_VTABLE(u)->coldplug(u); + STRV_FOREACH(i, u->deserialized_refs) { + q = bus_unit_track_add_name(u, *i); + if (q < 0 && r >= 0) + r = q; + } + u->deserialized_refs = strv_free(u->deserialized_refs); + + if (UNIT_VTABLE(u)->coldplug) { + q = UNIT_VTABLE(u)->coldplug(u); + if (q < 0 && r >= 0) + r = q; + } - if (u->job) + if (u->job) { q = job_coldplug(u->job); + if (q < 0 && r >= 0) + r = q; + } - if (r < 0) - return r; - if (q < 0) - return q; - - return 0; + return r; } -static bool fragment_mtime_newer(const char *path, usec_t mtime) { +static bool fragment_mtime_newer(const char *path, usec_t mtime, bool path_masked) { struct stat st; if (!path) @@ -2960,12 +3100,12 @@ static bool fragment_mtime_newer(const char *path, usec_t mtime) { /* What, cannot access this anymore? */ return true; - if (mtime > 0) + if (path_masked) + /* For masked files check if they are still so */ + return !null_or_empty(&st); + else /* For non-empty files check the mtime */ return timespec_load(&st.st_mtim) > mtime; - else if (!null_or_empty(&st)) - /* For masked files check if they are still so */ - return true; return false; } @@ -2976,18 +3116,22 @@ bool unit_need_daemon_reload(Unit *u) { assert(u); - if (fragment_mtime_newer(u->fragment_path, u->fragment_mtime)) + /* For unit files, we allow masking… */ + if (fragment_mtime_newer(u->fragment_path, u->fragment_mtime, + u->load_state == UNIT_MASKED)) return true; - if (fragment_mtime_newer(u->source_path, u->source_mtime)) + /* Source paths should not be masked… */ + if (fragment_mtime_newer(u->source_path, u->source_mtime, false)) return true; (void) unit_find_dropin_paths(u, &t); if (!strv_equal(u->dropin_paths, t)) return true; + /* … any drop-ins that are masked are simply omitted from the list. */ STRV_FOREACH(path, u->dropin_paths) - if (fragment_mtime_newer(*path, u->dropin_mtime)) + if (fragment_mtime_newer(*path, u->dropin_mtime, false)) return true; return false; @@ -3294,7 +3438,10 @@ int unit_patch_contexts(Unit *u) { ec->no_new_privileges = true; if (ec->private_devices) - ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_MKNOD); + ec->capability_bounding_set &= ~((UINT64_C(1) << CAP_MKNOD) | (UINT64_C(1) << CAP_SYS_RAWIO)); + + if (ec->protect_kernel_modules) + ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_SYS_MODULE); if (ec->dynamic_user) { if (!ec->user) { @@ -3309,7 +3456,14 @@ int unit_patch_contexts(Unit *u) { return -ENOMEM; } + /* If the dynamic user option is on, let's make sure that the unit can't leave its UID/GID + * around in the file system or on IPC objects. Hence enforce a strict sandbox. */ + ec->private_tmp = true; + ec->remove_ipc = true; + ec->protect_system = PROTECT_SYSTEM_STRICT; + if (ec->protect_home == PROTECT_HOME_NO) + ec->protect_home = PROTECT_HOME_READ_ONLY; } } @@ -3695,7 +3849,7 @@ int unit_kill_context( * there we get proper events. Hence rely on * them.*/ - if (cg_unified() > 0 || + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0 || (detect_container() == 0 && !unit_cgroup_delegate(u))) wait_for_exit = true; @@ -3932,3 +4086,198 @@ pid_t unit_main_pid(Unit *u) { return 0; } + +static void unit_unref_uid_internal( + Unit *u, + uid_t *ref_uid, + bool destroy_now, + void (*_manager_unref_uid)(Manager *m, uid_t uid, bool destroy_now)) { + + assert(u); + assert(ref_uid); + assert(_manager_unref_uid); + + /* Generic implementation of both unit_unref_uid() and unit_unref_gid(), under the assumption that uid_t and + * gid_t are actually the same time, with the same validity rules. + * + * Drops a reference to UID/GID from a unit. */ + + assert_cc(sizeof(uid_t) == sizeof(gid_t)); + assert_cc(UID_INVALID == (uid_t) GID_INVALID); + + if (!uid_is_valid(*ref_uid)) + return; + + _manager_unref_uid(u->manager, *ref_uid, destroy_now); + *ref_uid = UID_INVALID; +} + +void unit_unref_uid(Unit *u, bool destroy_now) { + unit_unref_uid_internal(u, &u->ref_uid, destroy_now, manager_unref_uid); +} + +void unit_unref_gid(Unit *u, bool destroy_now) { + unit_unref_uid_internal(u, (uid_t*) &u->ref_gid, destroy_now, manager_unref_gid); +} + +static int unit_ref_uid_internal( + Unit *u, + uid_t *ref_uid, + uid_t uid, + bool clean_ipc, + int (*_manager_ref_uid)(Manager *m, uid_t uid, bool clean_ipc)) { + + int r; + + assert(u); + assert(ref_uid); + assert(uid_is_valid(uid)); + assert(_manager_ref_uid); + + /* Generic implementation of both unit_ref_uid() and unit_ref_guid(), under the assumption that uid_t and gid_t + * are actually the same type, and have the same validity rules. + * + * Adds a reference on a specific UID/GID to this unit. Each unit referencing the same UID/GID maintains a + * reference so that we can destroy the UID/GID's IPC resources as soon as this is requested and the counter + * drops to zero. */ + + assert_cc(sizeof(uid_t) == sizeof(gid_t)); + assert_cc(UID_INVALID == (uid_t) GID_INVALID); + + if (*ref_uid == uid) + return 0; + + if (uid_is_valid(*ref_uid)) /* Already set? */ + return -EBUSY; + + r = _manager_ref_uid(u->manager, uid, clean_ipc); + if (r < 0) + return r; + + *ref_uid = uid; + return 1; +} + +int unit_ref_uid(Unit *u, uid_t uid, bool clean_ipc) { + return unit_ref_uid_internal(u, &u->ref_uid, uid, clean_ipc, manager_ref_uid); +} + +int unit_ref_gid(Unit *u, gid_t gid, bool clean_ipc) { + return unit_ref_uid_internal(u, (uid_t*) &u->ref_gid, (uid_t) gid, clean_ipc, manager_ref_gid); +} + +static int unit_ref_uid_gid_internal(Unit *u, uid_t uid, gid_t gid, bool clean_ipc) { + int r = 0, q = 0; + + assert(u); + + /* Reference both a UID and a GID in one go. Either references both, or neither. */ + + if (uid_is_valid(uid)) { + r = unit_ref_uid(u, uid, clean_ipc); + if (r < 0) + return r; + } + + if (gid_is_valid(gid)) { + q = unit_ref_gid(u, gid, clean_ipc); + if (q < 0) { + if (r > 0) + unit_unref_uid(u, false); + + return q; + } + } + + return r > 0 || q > 0; +} + +int unit_ref_uid_gid(Unit *u, uid_t uid, gid_t gid) { + ExecContext *c; + int r; + + assert(u); + + c = unit_get_exec_context(u); + + r = unit_ref_uid_gid_internal(u, uid, gid, c ? c->remove_ipc : false); + if (r < 0) + return log_unit_warning_errno(u, r, "Couldn't add UID/GID reference to unit, proceeding without: %m"); + + return r; +} + +void unit_unref_uid_gid(Unit *u, bool destroy_now) { + assert(u); + + unit_unref_uid(u, destroy_now); + unit_unref_gid(u, destroy_now); +} + +void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid) { + int r; + + assert(u); + + /* This is invoked whenever one of the forked off processes let's us know the UID/GID its user name/group names + * resolved to. We keep track of which UID/GID is currently assigned in order to be able to destroy its IPC + * objects when no service references the UID/GID anymore. */ + + r = unit_ref_uid_gid(u, uid, gid); + if (r > 0) + bus_unit_send_change_signal(u); +} + +int unit_set_invocation_id(Unit *u, sd_id128_t id) { + int r; + + assert(u); + + /* Set the invocation ID for this unit. If we cannot, this will not roll back, but reset the whole thing. */ + + if (sd_id128_equal(u->invocation_id, id)) + return 0; + + if (!sd_id128_is_null(u->invocation_id)) + (void) hashmap_remove_value(u->manager->units_by_invocation_id, &u->invocation_id, u); + + if (sd_id128_is_null(id)) { + r = 0; + goto reset; + } + + r = hashmap_ensure_allocated(&u->manager->units_by_invocation_id, &id128_hash_ops); + if (r < 0) + goto reset; + + u->invocation_id = id; + sd_id128_to_string(id, u->invocation_id_string); + + r = hashmap_put(u->manager->units_by_invocation_id, &u->invocation_id, u); + if (r < 0) + goto reset; + + return 0; + +reset: + u->invocation_id = SD_ID128_NULL; + u->invocation_id_string[0] = 0; + return r; +} + +int unit_acquire_invocation_id(Unit *u) { + sd_id128_t id; + int r; + + assert(u); + + r = sd_id128_randomize(&id); + if (r < 0) + return log_unit_error_errno(u, r, "Failed to generate invocation ID for unit: %m"); + + r = unit_set_invocation_id(u, id); + if (r < 0) + return log_unit_error_errno(u, r, "Failed to set invocation ID for unit: %m"); + + return 0; +} diff --git a/src/core/unit.h b/src/core/unit.h index 47eb8d50a6..991543664b 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -29,7 +29,7 @@ typedef struct UnitRef UnitRef; typedef struct UnitStatusMessageFormats UnitStatusMessageFormats; #include "condition.h" -#include "failure-action.h" +#include "emergency-action.h" #include "install.h" #include "list.h" #include "unit-name.h" @@ -108,9 +108,13 @@ struct Unit { /* The slot used for watching NameOwnerChanged signals */ sd_bus_slot *match_bus_slot; + /* References to this unit from clients */ + sd_bus_track *bus_track; + char **deserialized_refs; + /* Job timeout and action to take */ usec_t job_timeout; - FailureAction job_timeout_action; + EmergencyAction job_timeout_action; char *job_timeout_reboot_arg; /* References to this */ @@ -174,18 +178,23 @@ struct Unit { /* Put a ratelimit on unit starting */ RateLimit start_limit; - FailureAction start_limit_action; + EmergencyAction start_limit_action; char *reboot_arg; /* Make sure we never enter endless loops with the check unneeded logic, or the BindsTo= logic */ RateLimit auto_stop_ratelimit; + /* Reference to a specific UID/GID */ + uid_t ref_uid; + gid_t ref_gid; + /* Cached unit file state and preset */ UnitFileState unit_file_state; int unit_file_preset; - /* Where the cpuacct.usage cgroup counter was at the time the unit was started */ - nsec_t cpuacct_usage_base; + /* Where the cpu.stat or cpuacct.usage was at the time the unit was started */ + nsec_t cpu_usage_base; + nsec_t cpu_usage_last; /* the most recently read value */ /* Counterparts in the cgroup filesystem */ char *cgroup_path; @@ -195,11 +204,13 @@ struct Unit { CGroupMask cgroup_members_mask; int cgroup_inotify_wd; - uint32_t cgroup_netclass_id; - /* How to start OnFailure units */ JobMode on_failure_job_mode; + /* The current invocation ID */ + sd_id128_t invocation_id; + char invocation_id_string[SD_ID128_STRING_MAX]; /* useful when logging */ + /* Garbage collect us we nobody wants or requires us anymore */ bool stop_when_unneeded; @@ -225,6 +236,9 @@ struct Unit { /* Is this a transient unit? */ bool transient; + /* Is this a unit that is always running and cannot be stopped? */ + bool perpetual; + bool in_load_queue:1; bool in_dbus_queue:1; bool in_cleanup_queue:1; @@ -233,8 +247,6 @@ struct Unit { bool sent_dbus_new_signal:1; - bool no_gc:1; - bool in_audit:1; bool cgroup_realized:1; @@ -245,6 +257,9 @@ struct Unit { /* Did we already invoke unit_coldplug() for this unit? */ bool coldplugged:1; + + /* For transient units: whether to add a bus track reference after creating the unit */ + bool bus_track_add:1; }; struct UnitStatusMessageFormats { @@ -358,7 +373,7 @@ struct UnitVTable { /* When the unit is not running and no job for it queued we * shall release its runtime resources */ - void (*release_resources)(Unit *u); + void (*release_resources)(Unit *u, bool inactive); /* Invoked on every child that died */ void (*sigchld_event)(Unit *u, pid_t pid, int code, int status); @@ -373,8 +388,7 @@ struct UnitVTable { /* Called whenever a process of this unit sends us a message */ void (*notify_message)(Unit *u, pid_t pid, char **tags, FDSet *fds); - /* Called whenever a name this Unit registered for comes or - * goes away. */ + /* Called whenever a name this Unit registered for comes or goes away. */ void (*bus_name_owner_change)(Unit *u, const char *name, const char *old_owner, const char *new_owner); /* Called for each property that is being set */ @@ -467,6 +481,7 @@ DEFINE_CAST(SCOPE, Scope); Unit *unit_new(Manager *m, size_t size); void unit_free(Unit *u); +int unit_new_for_name(Manager *m, size_t size, const char *name, Unit **ret); int unit_add_name(Unit *u, const char *name); int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_reference); @@ -511,6 +526,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix); bool unit_can_reload(Unit *u) _pure_; bool unit_can_start(Unit *u) _pure_; +bool unit_can_stop(Unit *u) _pure_; bool unit_can_isolate(Unit *u) _pure_; int unit_start(Unit *u); @@ -537,6 +553,7 @@ bool unit_job_is_applicable(Unit *u, JobType j); int set_unit_path(const char *p); char *unit_dbus_path(Unit *u); +char *unit_dbus_path_invocation_id(Unit *u); int unit_load_related_unit(Unit *u, const char *type, Unit **_found); @@ -623,12 +640,26 @@ int unit_fail_if_symlink(Unit *u, const char* where); int unit_start_limit_test(Unit *u); +void unit_unref_uid(Unit *u, bool destroy_now); +int unit_ref_uid(Unit *u, uid_t uid, bool clean_ipc); + +void unit_unref_gid(Unit *u, bool destroy_now); +int unit_ref_gid(Unit *u, gid_t gid, bool clean_ipc); + +int unit_ref_uid_gid(Unit *u, uid_t uid, gid_t gid); +void unit_unref_uid_gid(Unit *u, bool destroy_now); + +void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid); + +int unit_set_invocation_id(Unit *u, sd_id128_t id); +int unit_acquire_invocation_id(Unit *u); + /* Macros which append UNIT= or USER_UNIT= to the message */ #define log_unit_full(unit, level, error, ...) \ ({ \ - Unit *_u = (unit); \ - _u ? log_object_internal(level, error, __FILE__, __LINE__, __func__, _u->manager->unit_log_field, _u->id, ##__VA_ARGS__) : \ + const Unit *_u = (unit); \ + _u ? log_object_internal(level, error, __FILE__, __LINE__, __func__, _u->manager->unit_log_field, _u->id, _u->manager->invocation_log_field, _u->invocation_id_string, ##__VA_ARGS__) : \ log_internal(level, error, __FILE__, __LINE__, __func__, ##__VA_ARGS__); \ }) |