51 files changed, 3780 insertions, 1054 deletions
diff --git a/src/core/automount.c b/src/core/automount.c
index 00295cf769..7d7a0a6e46 100644
--- a/src/core/automount.c
+++ b/src/core/automount.c
@@ -271,6 +271,11 @@ static int automount_coldplug(Unit *u) {
                                 return r;
 
                         (void) sd_event_source_set_description(a->pipe_event_source, "automount-io");
+                        if (a->deserialized_state == AUTOMOUNT_RUNNING) {
+                                r = automount_start_expire(a);
+                                if (r < 0)
+                                        log_unit_warning_errno(UNIT(a), r, "Failed to start expiration timer, ignoring: %m");
+                        }
                 }
 
                 automount_set_state(a, a->deserialized_state);
@@ -795,6 +800,10 @@ static int automount_start(Unit *u) {
                 return r;
         }
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         a->result = AUTOMOUNT_SUCCESS;
         automount_enter_waiting(a);
         return 1;
diff --git a/src/core/busname.c b/src/core/busname.c
index 7952cd31aa..b96ec09e67 100644
--- a/src/core/busname.c
+++ b/src/core/busname.c
@@ -639,6 +639,10 @@ static int busname_start(Unit *u) {
                 return r;
         }
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         n->result = BUSNAME_SUCCESS;
         busname_enter_making(n);
 
@@ -868,7 +872,7 @@ static void busname_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 
         n->control_pid = 0;
 
-        if (is_clean_exit(code, status, NULL))
+        if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
                 f = BUSNAME_SUCCESS;
         else if (code == CLD_EXITED)
                 f = BUSNAME_FAILURE_EXIT_CODE;
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index c19e43f571..23a92f9651 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -57,12 +57,16 @@ void cgroup_context_init(CGroupContext *c) {
         /* Initialize everything to the kernel defaults, assuming the
          * structure is preinitialized to 0 */
 
+        c->cpu_weight = CGROUP_WEIGHT_INVALID;
+        c->startup_cpu_weight = CGROUP_WEIGHT_INVALID;
+        c->cpu_quota_per_sec_usec = USEC_INFINITY;
+
         c->cpu_shares = CGROUP_CPU_SHARES_INVALID;
         c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID;
-        c->cpu_quota_per_sec_usec = USEC_INFINITY;
 
         c->memory_high = CGROUP_LIMIT_MAX;
         c->memory_max = CGROUP_LIMIT_MAX;
+        c->memory_swap_max = CGROUP_LIMIT_MAX;
 
         c->memory_limit = CGROUP_LIMIT_MAX;
 
@@ -158,6 +162,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
                 "%sBlockIOAccounting=%s\n"
                 "%sMemoryAccounting=%s\n"
                 "%sTasksAccounting=%s\n"
+                "%sCPUWeight=%" PRIu64 "\n"
+                "%sStartupCPUWeight=%" PRIu64 "\n"
                 "%sCPUShares=%" PRIu64 "\n"
                 "%sStartupCPUShares=%" PRIu64 "\n"
                 "%sCPUQuotaPerSecSec=%s\n"
@@ -168,6 +174,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
                 "%sMemoryLow=%" PRIu64 "\n"
                 "%sMemoryHigh=%" PRIu64 "\n"
                 "%sMemoryMax=%" PRIu64 "\n"
+                "%sMemorySwapMax=%" PRIu64 "\n"
                 "%sMemoryLimit=%" PRIu64 "\n"
                 "%sTasksMax=%" PRIu64 "\n"
                 "%sDevicePolicy=%s\n"
@@ -177,6 +184,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
                 prefix, yes_no(c->blockio_accounting),
                 prefix, yes_no(c->memory_accounting),
                 prefix, yes_no(c->tasks_accounting),
+                prefix, c->cpu_weight,
+                prefix, c->startup_cpu_weight,
                 prefix, c->cpu_shares,
                 prefix, c->startup_cpu_shares,
                 prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1),
@@ -187,6 +196,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
                 prefix, c->memory_low,
                 prefix, c->memory_high,
                 prefix, c->memory_max,
+                prefix, c->memory_swap_max,
                 prefix, c->memory_limit,
                 prefix, c->tasks_max,
                 prefix, cgroup_device_policy_to_string(c->device_policy),
@@ -382,6 +392,95 @@ fail:
         return -errno;
 }
 
+static bool cgroup_context_has_cpu_weight(CGroupContext *c) {
+        return c->cpu_weight != CGROUP_WEIGHT_INVALID ||
+                c->startup_cpu_weight != CGROUP_WEIGHT_INVALID;
+}
+
+static bool cgroup_context_has_cpu_shares(CGroupContext *c) {
+        return c->cpu_shares != CGROUP_CPU_SHARES_INVALID ||
+                c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID;
+}
+
+static uint64_t cgroup_context_cpu_weight(CGroupContext *c, ManagerState state) {
+        if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+            c->startup_cpu_weight != CGROUP_WEIGHT_INVALID)
+                return c->startup_cpu_weight;
+        else if (c->cpu_weight != CGROUP_WEIGHT_INVALID)
+                return c->cpu_weight;
+        else
+                return CGROUP_WEIGHT_DEFAULT;
+}
+
+static uint64_t cgroup_context_cpu_shares(CGroupContext *c, ManagerState state) {
+        if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+            c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID)
+                return c->startup_cpu_shares;
+        else if (c->cpu_shares != CGROUP_CPU_SHARES_INVALID)
+                return c->cpu_shares;
+        else
+                return CGROUP_CPU_SHARES_DEFAULT;
+}
+
+static void cgroup_apply_unified_cpu_config(Unit *u, uint64_t weight, uint64_t quota) {
+        char buf[MAX(DECIMAL_STR_MAX(uint64_t) + 1, (DECIMAL_STR_MAX(usec_t) + 1) * 2)];
+        int r;
+
+        xsprintf(buf, "%" PRIu64 "\n", weight);
+        r = cg_set_attribute("cpu", u->cgroup_path, "cpu.weight", buf);
+        if (r < 0)
+                log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+                              "Failed to set cpu.weight: %m");
+
+        if (quota != USEC_INFINITY)
+                xsprintf(buf, USEC_FMT " " USEC_FMT "\n",
+                         quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC, CGROUP_CPU_QUOTA_PERIOD_USEC);
+        else
+                xsprintf(buf, "max " USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
+
+        r = cg_set_attribute("cpu", u->cgroup_path, "cpu.max", buf);
+
+        if (r < 0)
+                log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+                              "Failed to set cpu.max: %m");
+}
+
+static void cgroup_apply_legacy_cpu_config(Unit *u, uint64_t shares, uint64_t quota) {
+        char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1];
+        int r;
+
+        xsprintf(buf, "%" PRIu64 "\n", shares);
+        r = cg_set_attribute("cpu", u->cgroup_path, "cpu.shares", buf);
+        if (r < 0)
+                log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+                              "Failed to set cpu.shares: %m");
+
+        xsprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
+        r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_period_us", buf);
+        if (r < 0)
+                log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+                              "Failed to set cpu.cfs_period_us: %m");
+
+        if (quota != USEC_INFINITY) {
+                xsprintf(buf, USEC_FMT "\n", quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
+                r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", buf);
+        } else
+                r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", "-1");
+        if (r < 0)
+                log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+                              "Failed to set cpu.cfs_quota_us: %m");
+}
+
+static uint64_t cgroup_cpu_shares_to_weight(uint64_t shares) {
+        return CLAMP(shares * CGROUP_WEIGHT_DEFAULT / CGROUP_CPU_SHARES_DEFAULT,
+                     CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX);
+}
+
+static uint64_t cgroup_cpu_weight_to_shares(uint64_t weight) {
+        return CLAMP(weight * CGROUP_CPU_SHARES_DEFAULT / CGROUP_WEIGHT_DEFAULT,
+                     CGROUP_CPU_SHARES_MIN, CGROUP_CPU_SHARES_MAX);
+}
+
 static bool cgroup_context_has_io_config(CGroupContext *c) {
         return c->io_accounting ||
                 c->io_weight != CGROUP_WEIGHT_INVALID ||
@@ -521,7 +620,7 @@ static unsigned cgroup_apply_blkio_device_limit(Unit *u, const char *dev_path, u
 }
 
 static bool cgroup_context_has_unified_memory_config(CGroupContext *c) {
-        return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX;
+        return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX || c->memory_swap_max != CGROUP_LIMIT_MAX;
 }
 
 static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_t v) {
@@ -566,30 +665,42 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
          * and missing cgroups, i.e. EROFS and ENOENT. */
 
         if ((mask & CGROUP_MASK_CPU) && !is_root) {
-                char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1];
+                bool has_weight = cgroup_context_has_cpu_weight(c);
+                bool has_shares = cgroup_context_has_cpu_shares(c);
 
-                sprintf(buf, "%" PRIu64 "\n",
-                        IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->startup_cpu_shares :
-                        c->cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->cpu_shares : CGROUP_CPU_SHARES_DEFAULT);
-                r = cg_set_attribute("cpu", path, "cpu.shares", buf);
-                if (r < 0)
-                        log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
-                                      "Failed to set cpu.shares: %m");
+                if (cg_all_unified() > 0) {
+                        uint64_t weight;
 
-                sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
-                r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf);
-                if (r < 0)
-                        log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
-                                      "Failed to set cpu.cfs_period_us: %m");
+                        if (has_weight)
+                                weight = cgroup_context_cpu_weight(c, state);
+                        else if (has_shares) {
+                                uint64_t shares = cgroup_context_cpu_shares(c, state);
 
-                if (c->cpu_quota_per_sec_usec != USEC_INFINITY) {
-                        sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
-                        r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf);
-                } else
-                        r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1");
-                if (r < 0)
-                        log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
-                                      "Failed to set cpu.cfs_quota_us: %m");
+                                weight = cgroup_cpu_shares_to_weight(shares);
+
+                                log_cgroup_compat(u, "Applying [Startup]CpuShares %" PRIu64 " as [Startup]CpuWeight %" PRIu64 " on %s",
+                                                  shares, weight, path);
+                        } else
+                                weight = CGROUP_WEIGHT_DEFAULT;
+
+                        cgroup_apply_unified_cpu_config(u, weight, c->cpu_quota_per_sec_usec);
+                } else {
+                        uint64_t shares;
+
+                        if (has_weight) {
+                                uint64_t weight = cgroup_context_cpu_weight(c, state);
+
+                                shares = cgroup_cpu_weight_to_shares(weight);
+
+                                log_cgroup_compat(u, "Applying [Startup]CpuWeight %" PRIu64 " as [Startup]CpuShares %" PRIu64 " on %s",
+                                                  weight, shares, path);
+                        } else if (has_shares)
+                                shares = cgroup_context_cpu_shares(c, state);
+                        else
+                                shares = CGROUP_CPU_SHARES_DEFAULT;
+
+                        cgroup_apply_legacy_cpu_config(u, shares, c->cpu_quota_per_sec_usec);
+                }
         }
 
         if (mask & CGROUP_MASK_IO) {
@@ -677,16 +788,16 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                         char buf[DECIMAL_STR_MAX(uint64_t)+1];
                         uint64_t weight;
 
-                        if (has_blockio)
-                                weight = cgroup_context_blkio_weight(c, state);
-                        else if (has_io) {
+                        if (has_io) {
                                 uint64_t io_weight = cgroup_context_io_weight(c, state);
 
                                 weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state));
 
                                 log_cgroup_compat(u, "Applying [Startup]IOWeight %" PRIu64 " as [Startup]BlockIOWeight %" PRIu64,
                                                   io_weight, weight);
-                        } else
+                        } else if (has_blockio)
+                                weight = cgroup_context_blkio_weight(c, state);
+                        else
                                 weight = CGROUP_BLKIO_WEIGHT_DEFAULT;
 
                         xsprintf(buf, "%" PRIu64 "\n", weight);
@@ -695,13 +806,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                                 log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
                                               "Failed to set blkio.weight: %m");
 
-                        if (has_blockio) {
-                                CGroupBlockIODeviceWeight *w;
-
-                                /* FIXME: no way to reset this list */
-                                LIST_FOREACH(device_weights, w, c->blockio_device_weights)
-                                        cgroup_apply_blkio_device_weight(u, w->path, w->weight);
-                        } else if (has_io) {
+                        if (has_io) {
                                 CGroupIODeviceWeight *w;
 
                                 /* FIXME: no way to reset this list */
@@ -713,18 +818,17 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
 
                                         cgroup_apply_blkio_device_weight(u, w->path, weight);
                                 }
+                        } else if (has_blockio) {
+                                CGroupBlockIODeviceWeight *w;
+
+                                /* FIXME: no way to reset this list */
+                                LIST_FOREACH(device_weights, w, c->blockio_device_weights)
+                                        cgroup_apply_blkio_device_weight(u, w->path, w->weight);
                         }
                 }
 
                 /* Apply limits and free ones without config. */
-                if (has_blockio) {
-                        CGroupBlockIODeviceBandwidth *b, *next;
-
-                        LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) {
-                                if (!cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps))
-                                        cgroup_context_free_blockio_device_bandwidth(c, b);
-                        }
-                } else if (has_io) {
+                if (has_io) {
                         CGroupIODeviceLimit *l, *next;
 
                         LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) {
@@ -734,16 +838,24 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                                 if (!cgroup_apply_blkio_device_limit(u, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX]))
                                         cgroup_context_free_io_device_limit(c, l);
                         }
+                } else if (has_blockio) {
+                        CGroupBlockIODeviceBandwidth *b, *next;
+
+                        LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths)
+                                if (!cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps))
+                                        cgroup_context_free_blockio_device_bandwidth(c, b);
                 }
         }
 
         if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
-                if (cg_unified() > 0) {
-                        uint64_t max = c->memory_max;
+                if (cg_all_unified() > 0) {
+                        uint64_t max;
+                        uint64_t swap_max = CGROUP_LIMIT_MAX;
 
-                        if (cgroup_context_has_unified_memory_config(c))
+                        if (cgroup_context_has_unified_memory_config(c)) {
                                 max = c->memory_max;
-                        else {
+                                swap_max = c->memory_swap_max;
+                        } else {
                                 max = c->memory_limit;
 
                                 if (max != CGROUP_LIMIT_MAX)
@@ -753,16 +865,16 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                         cgroup_apply_unified_memory_limit(u, "memory.low", c->memory_low);
                         cgroup_apply_unified_memory_limit(u, "memory.high", c->memory_high);
                         cgroup_apply_unified_memory_limit(u, "memory.max", max);
+                        cgroup_apply_unified_memory_limit(u, "memory.swap.max", swap_max);
                 } else {
                         char buf[DECIMAL_STR_MAX(uint64_t) + 1];
-                        uint64_t val = c->memory_limit;
+                        uint64_t val;
 
-                        if (val == CGROUP_LIMIT_MAX) {
+                        if (cgroup_context_has_unified_memory_config(c)) {
                                 val = c->memory_max;
-
-                                if (val != CGROUP_LIMIT_MAX)
-                                        log_cgroup_compat(u, "Applying MemoryMax %" PRIi64 " as MemoryLimit", c->memory_max);
-                        }
+                                log_cgroup_compat(u, "Applying MemoryMax %" PRIi64 " as MemoryLimit", val);
+                        } else
+                                val = c->memory_limit;
 
                         if (val == CGROUP_LIMIT_MAX)
                                 strncpy(buf, "-1\n", sizeof(buf));
@@ -844,7 +956,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
 
         if ((mask & CGROUP_MASK_PIDS) && !is_root) {
 
-                if (c->tasks_max != (uint64_t) -1) {
+                if (c->tasks_max != CGROUP_LIMIT_MAX) {
                         char buf[DECIMAL_STR_MAX(uint64_t) + 2];
 
                         sprintf(buf, "%" PRIu64 "\n", c->tasks_max);
@@ -864,8 +976,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) {
         /* Figure out which controllers we need */
 
         if (c->cpu_accounting ||
-            c->cpu_shares != CGROUP_CPU_SHARES_INVALID ||
-            c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ||
+            cgroup_context_has_cpu_weight(c) ||
+            cgroup_context_has_cpu_shares(c) ||
             c->cpu_quota_per_sec_usec != USEC_INFINITY)
                 mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU;
 
@@ -911,7 +1023,7 @@ CGroupMask unit_get_own_mask(Unit *u) {
                 e = unit_get_exec_context(u);
                 if (!e ||
                     exec_context_maintains_privileges(e) ||
-                    cg_unified() > 0)
+                    cg_all_unified() > 0)
                         return _CGROUP_MASK_ALL;
         }
 
@@ -1137,7 +1249,7 @@ int unit_watch_cgroup(Unit *u) {
                 return 0;
 
         /* Only applies to the unified hierarchy */
-        r = cg_unified();
+        r = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
         if (r < 0)
                 return log_unit_error_errno(u, r, "Failed detect whether the unified hierarchy is used: %m");
         if (r == 0)
@@ -1247,6 +1359,26 @@ int unit_attach_pids_to_cgroup(Unit *u) {
         return 0;
 }
 
+static void cgroup_xattr_apply(Unit *u) {
+        char ids[SD_ID128_STRING_MAX];
+        int r;
+
+        assert(u);
+
+        if (!MANAGER_IS_SYSTEM(u->manager))
+                return;
+
+        if (sd_id128_is_null(u->invocation_id))
+                return;
+
+        r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path,
+                         "trusted.invocation_id",
+                         sd_id128_to_string(u->invocation_id, ids), 32,
+                         0);
+        if (r < 0)
+                log_unit_warning_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path);
+}
+
 static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask enable_mask) {
         assert(u);
 
@@ -1290,6 +1422,7 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
 
         /* Finally, apply the necessary attributes. */
         cgroup_context_apply(u, target_mask, state);
+        cgroup_xattr_apply(u);
 
         return 0;
 }
@@ -1416,6 +1549,8 @@ void unit_prune_cgroup(Unit *u) {
         if (!u->cgroup_path)
                 return;
 
+        (void) unit_get_cpu_usage(u, NULL); /* Cache the last CPU usage value before we destroy the cgroup */
+
         is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE);
 
         r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice);
@@ -1537,7 +1672,7 @@ int unit_watch_all_pids(Unit *u) {
         if (!u->cgroup_path)
                 return -ENOENT;
 
-        if (cg_unified() > 0) /* On unified we can use proper notifications */
+        if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0) /* On unified we can use proper notifications */
                 return 0;
 
         return unit_watch_pids_in_path(u, u->cgroup_path);
@@ -1610,7 +1745,7 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents,
 int manager_setup_cgroup(Manager *m) {
         _cleanup_free_ char *path = NULL;
         CGroupController c;
-        int r, unified;
+        int r, all_unified, systemd_unified;
         char *e;
 
         assert(m);
@@ -1647,11 +1782,17 @@ int manager_setup_cgroup(Manager *m) {
         if (r < 0)
                 return log_error_errno(r, "Cannot find cgroup mount point: %m");
 
-        unified = cg_unified();
-        if (unified < 0)
-                return log_error_errno(r, "Couldn't determine if we are running in the unified hierarchy: %m");
-        if (unified > 0)
+        all_unified = cg_all_unified();
+        systemd_unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
+
+        if (all_unified < 0 || systemd_unified < 0)
+                return log_error_errno(all_unified < 0 ? all_unified : systemd_unified,
+                                       "Couldn't determine if we are running in the unified hierarchy: %m");
+
+        if (all_unified > 0)
                 log_debug("Unified cgroup hierarchy is located at %s.", path);
+        else if (systemd_unified > 0)
+                log_debug("Unified cgroup hierarchy is located at %s. Controllers are on legacy hierarchies.", path);
         else
                 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
 
@@ -1659,7 +1800,7 @@ int manager_setup_cgroup(Manager *m) {
                 const char *scope_path;
 
                 /* 3. Install agent */
-                if (unified) {
+                if (systemd_unified) {
 
                         /* In the unified hierarchy we can get
                          * cgroup empty notifications via inotify. */
@@ -1719,7 +1860,7 @@ int manager_setup_cgroup(Manager *m) {
                         return log_error_errno(errno, "Failed to open pin file: %m");
 
                 /* 6.  Always enable hierarchical support if it exists... */
-                if (!unified)
+                if (!all_unified)
                         (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
         }
 
@@ -1845,7 +1986,7 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) {
         if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0)
                 return -ENODATA;
 
-        if (cg_unified() <= 0)
+        if (cg_all_unified() <= 0)
                 r = cg_get_attribute("memory", u->cgroup_path, "memory.usage_in_bytes", &v);
         else
                 r = cg_get_attribute("memory", u->cgroup_path, "memory.current", &v);
@@ -1890,18 +2031,37 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
         if (!u->cgroup_path)
                 return -ENODATA;
 
-        if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0)
-                return -ENODATA;
+        if (cg_all_unified() > 0) {
+                const char *keys[] = { "usage_usec", NULL };
+                _cleanup_free_ char *val = NULL;
+                uint64_t us;
 
-        r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v);
-        if (r == -ENOENT)
-                return -ENODATA;
-        if (r < 0)
-                return r;
+                if ((u->cgroup_realized_mask & CGROUP_MASK_CPU) == 0)
+                        return -ENODATA;
 
-        r = safe_atou64(v, &ns);
-        if (r < 0)
-                return r;
+                r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", keys, &val);
+                if (r < 0)
+                        return r;
+
+                r = safe_atou64(val, &us);
+                if (r < 0)
+                        return r;
+
+                ns = us * NSEC_PER_USEC;
+        } else {
+                if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0)
+                        return -ENODATA;
+
+                r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v);
+                if (r == -ENOENT)
+                        return -ENODATA;
+                if (r < 0)
+                        return r;
+
+                r = safe_atou64(v, &ns);
+                if (r < 0)
+                        return r;
+        }
 
         *ret = ns;
         return 0;
@@ -1911,16 +2071,33 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
         nsec_t ns;
         int r;
 
+        assert(u);
+
+        /* Retrieve the current CPU usage counter. This will subtract the CPU counter taken when the unit was
+         * started. If the cgroup has been removed already, returns the last cached value. To cache the value, simply
+         * call this function with a NULL return value. */
+
         r = unit_get_cpu_usage_raw(u, &ns);
+        if (r == -ENODATA && u->cpu_usage_last != NSEC_INFINITY) {
+                /* If we can't get the CPU usage anymore (because the cgroup was already removed, for example), use our
+                 * cached value. */
+
+                if (ret)
+                        *ret = u->cpu_usage_last;
+                return 0;
+        }
         if (r < 0)
                 return r;
 
-        if (ns > u->cpuacct_usage_base)
-                ns -= u->cpuacct_usage_base;
+        if (ns > u->cpu_usage_base)
+                ns -= u->cpu_usage_base;
         else
                 ns = 0;
 
-        *ret = ns;
+        u->cpu_usage_last = ns;
+        if (ret)
+                *ret = ns;
+
         return 0;
 }
 
@@ -1930,13 +2107,15 @@ int unit_reset_cpu_usage(Unit *u) {
 
         assert(u);
 
+        u->cpu_usage_last = NSEC_INFINITY;
+
         r = unit_get_cpu_usage_raw(u, &ns);
         if (r < 0) {
-                u->cpuacct_usage_base = 0;
+                u->cpu_usage_base = 0;
                 return r;
         }
 
-        u->cpuacct_usage_base = ns;
+        u->cpu_usage_base = ns;
         return 0;
 }
 
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index a57403e79f..4cd168f63e 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -89,6 +89,10 @@ struct CGroupContext {
         bool tasks_accounting;
 
         /* For unified hierarchy */
+        uint64_t cpu_weight;
+        uint64_t startup_cpu_weight;
+        usec_t cpu_quota_per_sec_usec;
+
         uint64_t io_weight;
         uint64_t startup_io_weight;
         LIST_HEAD(CGroupIODeviceWeight, io_device_weights);
@@ -97,11 +101,11 @@ struct CGroupContext {
         uint64_t memory_low;
         uint64_t memory_high;
         uint64_t memory_max;
+        uint64_t memory_swap_max;
 
         /* For legacy hierarchies */
         uint64_t cpu_shares;
         uint64_t startup_cpu_shares;
-        usec_t cpu_quota_per_sec_usec;
 
         uint64_t blockio_weight;
         uint64_t startup_blockio_weight;
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index 85b0c86a2f..c4067a95bf 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -210,6 +210,8 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
         SD_BUS_VTABLE_START(0),
         SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
         SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0),
+        SD_BUS_PROPERTY("CPUWeight", "t", NULL, offsetof(CGroupContext, cpu_weight), 0),
+        SD_BUS_PROPERTY("StartupCPUWeight", "t", NULL, offsetof(CGroupContext, startup_cpu_weight), 0),
         SD_BUS_PROPERTY("CPUShares", "t", NULL, offsetof(CGroupContext, cpu_shares), 0),
         SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0),
         SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0),
@@ -231,6 +233,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
         SD_BUS_PROPERTY("MemoryLow", "t", NULL, offsetof(CGroupContext, memory_low), 0),
         SD_BUS_PROPERTY("MemoryHigh", "t", NULL, offsetof(CGroupContext, memory_high), 0),
         SD_BUS_PROPERTY("MemoryMax", "t", NULL, offsetof(CGroupContext, memory_max), 0),
+        SD_BUS_PROPERTY("MemorySwapMax", "t", NULL, offsetof(CGroupContext, memory_swap_max), 0),
         SD_BUS_PROPERTY("MemoryLimit", "t", NULL, offsetof(CGroupContext, memory_limit), 0),
         SD_BUS_PROPERTY("DevicePolicy", "s", property_get_cgroup_device_policy, offsetof(CGroupContext, device_policy), 0),
         SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0),
@@ -303,6 +306,50 @@ int bus_cgroup_set_property(
 
                 return 1;
 
+        } else if (streq(name, "CPUWeight")) {
+                uint64_t weight;
+
+                r = sd_bus_message_read(message, "t", &weight);
+                if (r < 0)
+                        return r;
+
+                if (!CGROUP_WEIGHT_IS_OK(weight))
+                        return sd_bus_error_set_errnof(error, EINVAL, "CPUWeight value out of range");
+
+                if (mode != UNIT_CHECK) {
+                        c->cpu_weight = weight;
+                        unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
+
+                        if (weight == CGROUP_WEIGHT_INVALID)
+                                unit_write_drop_in_private(u, mode, name, "CPUWeight=");
+                        else
+                                unit_write_drop_in_private_format(u, mode, name, "CPUWeight=%" PRIu64, weight);
+                }
+
+                return 1;
+
+        } else if (streq(name, "StartupCPUWeight")) {
+                uint64_t weight;
+
+                r = sd_bus_message_read(message, "t", &weight);
+                if (r < 0)
+                        return r;
+
+                if (!CGROUP_WEIGHT_IS_OK(weight))
+                        return sd_bus_error_set_errnof(error, EINVAL, "StartupCPUWeight value out of range");
+
+                if (mode != UNIT_CHECK) {
+                        c->startup_cpu_weight = weight;
+                        unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
+
+                        if (weight == CGROUP_CPU_SHARES_INVALID)
+                                unit_write_drop_in_private(u, mode, name, "StartupCPUWeight=");
+                        else
+                                unit_write_drop_in_private_format(u, mode, name, "StartupCPUWeight=%" PRIu64, weight);
+                }
+
+                return 1;
+
         } else if (streq(name, "CPUShares")) {
                 uint64_t shares;
 
@@ -829,7 +876,7 @@ int bus_cgroup_set_property(
 
                 return 1;
 
-        } else if (STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax")) {
+        } else if (STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax", "MemorySwapMax")) {
                 uint64_t v;
 
                 r = sd_bus_message_read(message, "t", &v);
@@ -843,6 +890,8 @@ int bus_cgroup_set_property(
                                 c->memory_low = v;
                         else if (streq(name, "MemoryHigh"))
                                 c->memory_high = v;
+                        else if (streq(name, "MemorySwapMax"))
+                                c->memory_swap_max = v;
                         else
                                 c->memory_max = v;
 
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index e35d3ccd2e..1a7f770db1 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -627,6 +627,53 @@ static int property_get_syslog_facility(
         return sd_bus_message_append(reply, "i", LOG_FAC(c->syslog_priority));
 }
 
+static int property_get_input_fdname(
+                sd_bus *bus,
+                const char *path,
+                const char *interface,
+                const char *property,
+                sd_bus_message *reply,
+                void *userdata,
+                sd_bus_error *error) {
+
+        ExecContext *c = userdata;
+        const char *name;
+
+        assert(bus);
+        assert(c);
+        assert(property);
+        assert(reply);
+
+        name = exec_context_fdname(c, STDIN_FILENO);
+
+        return sd_bus_message_append(reply, "s", name);
+}
+
+static int property_get_output_fdname(
+                sd_bus *bus,
+                const char *path,
+                const char *interface,
+                const char *property,
+                sd_bus_message *reply,
+                void *userdata,
+                sd_bus_error *error) {
+
+        ExecContext *c = userdata;
+        const char *name = NULL;
+
+        assert(bus);
+        assert(c);
+        assert(property);
+        assert(reply);
+
+        if (c->std_output == EXEC_OUTPUT_NAMED_FD && streq(property, "StandardOutputFileDescriptorName"))
+                name = exec_context_fdname(c, STDOUT_FILENO);
+        else if (c->std_error == EXEC_OUTPUT_NAMED_FD && streq(property, "StandardErrorFileDescriptorName"))
+                name = exec_context_fdname(c, STDERR_FILENO);
+
+        return sd_bus_message_append(reply, "s", name);
+}
+
 const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_VTABLE_START(0),
         SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(ExecContext, environment), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -677,8 +724,11 @@ const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_PROPERTY("CPUSchedulingResetOnFork", "b", bus_property_get_bool, offsetof(ExecContext, cpu_sched_reset_on_fork), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("NonBlocking", "b", bus_property_get_bool, offsetof(ExecContext, non_blocking), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("StandardInput", "s", property_get_exec_input, offsetof(ExecContext, std_input), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("StandardInputFileDescriptorName", "s", property_get_input_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("StandardOutput", "s", bus_property_get_exec_output, offsetof(ExecContext, std_output), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("StandardOutputFileDescriptorName", "s", property_get_output_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("StandardError", "s", bus_property_get_exec_output, offsetof(ExecContext, std_error), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("StandardErrorFileDescriptorName", "s", property_get_output_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("TTYPath", "s", NULL, offsetof(ExecContext, tty_path), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("TTYReset", "b", bus_property_get_bool, offsetof(ExecContext, tty_reset), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("TTYVHangup", "b", bus_property_get_bool, offsetof(ExecContext, tty_vhangup), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -695,6 +745,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_PROPERTY("User", "s", NULL, offsetof(ExecContext, user), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Group", "s", NULL, offsetof(ExecContext, group), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("DynamicUser", "b", bus_property_get_bool, offsetof(ExecContext, dynamic_user), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("RemoveIPC", "b", bus_property_get_bool, offsetof(ExecContext, remove_ipc), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("SupplementaryGroups", "as", NULL, offsetof(ExecContext, supplementary_groups), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PAMName", "s", NULL, offsetof(ExecContext, pam_name), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("ReadWriteDirectories", "as", NULL, offsetof(ExecContext, read_write_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
@@ -706,6 +757,9 @@ const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_flags), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("ProtectKernelTunables", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_tunables), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("ProtectKernelModules", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_modules), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("ProtectHome", "s", bus_property_get_protect_home, offsetof(ExecContext, protect_home), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -1026,7 +1080,6 @@ int bus_exec_context_set_transient_property(
 
                 return 1;
 
-
         } else if (streq(name, "StandardOutput")) {
                 const char *s;
                 ExecOutput p;
@@ -1068,10 +1121,46 @@ int bus_exec_context_set_transient_property(
                 return 1;
 
         } else if (STR_IN_SET(name,
+                              "StandardInputFileDescriptorName", "StandardOutputFileDescriptorName", "StandardErrorFileDescriptorName")) {
+                const char *s;
+
+                r = sd_bus_message_read(message, "s", &s);
+                if (r < 0)
+                        return r;
+
+                if (!fdname_is_valid(s))
+                        return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid file descriptor name");
+
+                if (mode != UNIT_CHECK) {
+                        if (streq(name, "StandardInputFileDescriptorName")) {
+                                c->std_input = EXEC_INPUT_NAMED_FD;
+                                r = free_and_strdup(&c->stdio_fdname[STDIN_FILENO], s);
+                                if (r < 0)
+                                        return r;
+                                unit_write_drop_in_private_format(u, mode, name, "StandardInput=fd:%s", s);
+                        } else if (streq(name, "StandardOutputFileDescriptorName")) {
+                                c->std_output = EXEC_OUTPUT_NAMED_FD;
+                                r = free_and_strdup(&c->stdio_fdname[STDOUT_FILENO], s);
+                                if (r < 0)
+                                        return r;
+                                unit_write_drop_in_private_format(u, mode, name, "StandardOutput=fd:%s", s);
+                        } else if (streq(name, "StandardErrorFileDescriptorName")) {
+                                c->std_error = EXEC_OUTPUT_NAMED_FD;
+                                r = free_and_strdup(&c->stdio_fdname[STDERR_FILENO], s);
+                                if (r < 0)
+                                        return r;
+                                unit_write_drop_in_private_format(u, mode, name, "StandardError=fd:%s", s);
+                        }
+                }
+
+                return 1;
+
+        } else if (STR_IN_SET(name,
                               "IgnoreSIGPIPE", "TTYVHangup", "TTYReset",
                               "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
                               "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute",
-                              "RestrictRealtime", "DynamicUser")) {
+                              "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
+                              "ProtectKernelModules", "ProtectControlGroups")) {
                 int b;
 
                 r = sd_bus_message_read(message, "b", &b);
@@ -1103,6 +1192,14 @@ int bus_exec_context_set_transient_property(
                                 c->restrict_realtime = b;
                         else if (streq(name, "DynamicUser"))
                                 c->dynamic_user = b;
+                        else if (streq(name, "RemoveIPC"))
+                                c->remove_ipc = b;
+                        else if (streq(name, "ProtectKernelTunables"))
+                                c->protect_kernel_tunables = b;
+                        else if (streq(name, "ProtectKernelModules"))
+                                c->protect_kernel_modules = b;
+                        else if (streq(name, "ProtectControlGroups"))
+                                c->protect_control_groups = b;
 
                         unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, yes_no(b));
                 }
diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c
index ef05a75a8b..d7d3d3c8ce 100644
--- a/src/core/dbus-manager.c
+++ b/src/core/dbus-manager.c
@@ -47,6 +47,11 @@
 #include "virt.h"
 #include "watchdog.h"
 
+static UnitFileFlags unit_file_bools_to_flags(bool runtime, bool force) {
+        return (runtime ? UNIT_FILE_RUNTIME : 0) |
+               (force   ? UNIT_FILE_FORCE   : 0);
+}
+
 static int property_get_version(
                 sd_bus *bus,
                 const char *path,
@@ -464,6 +469,64 @@ static int method_get_unit_by_pid(sd_bus_message *message, void *userdata, sd_bu
         return sd_bus_reply_method_return(message, "o", path);
 }
 
+static int method_get_unit_by_invocation_id(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+        _cleanup_free_ char *path = NULL;
+        Manager *m = userdata;
+        sd_id128_t id;
+        const void *a;
+        Unit *u;
+        size_t sz;
+        int r;
+
+        assert(message);
+        assert(m);
+
+        /* Anyone can call this method */
+
+        r = sd_bus_message_read_array(message, 'y', &a, &sz);
+        if (r < 0)
+                return r;
+        if (sz == 0)
+                id = SD_ID128_NULL;
+        else if (sz == 16)
+                memcpy(&id, a, sz);
+        else
+                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid invocation ID");
+
+        if (sd_id128_is_null(id)) {
+                _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+                pid_t pid;
+
+                r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+                if (r < 0)
+                        return r;
+
+                r = sd_bus_creds_get_pid(creds, &pid);
+                if (r < 0)
+                        return r;
+
+                u = manager_get_unit_by_pid(m, pid);
+                if (!u)
+                        return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Client " PID_FMT " not member of any unit.", pid);
+        } else {
+                u = hashmap_get(m->units_by_invocation_id, &id);
+                if (!u)
+                        return sd_bus_error_setf(error, BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID, "No unit with the specified invocation ID " SD_ID128_FORMAT_STR " known.", SD_ID128_FORMAT_VAL(id));
+        }
+
+        r = mac_selinux_unit_access_check(u, message, "status", error);
+        if (r < 0)
+                return r;
+
+        /* So here's a special trick: the bus path we return actually references the unit by its invocation ID instead
+         * of the unit name. This means it stays valid only as long as the invocation ID stays the same. */
+        path = unit_dbus_path_invocation_id(u);
+        if (!path)
+                return -ENOMEM;
+
+        return sd_bus_reply_method_return(message, "o", path);
+}
+
 static int method_load_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
         _cleanup_free_ char *path = NULL;
         Manager *m = userdata;
@@ -643,6 +706,54 @@ static int method_set_unit_properties(sd_bus_message *message, void *userdata, s
         return bus_unit_method_set_properties(message, u, error);
 }
 
+static int method_ref_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+        Manager *m = userdata;
+        const char *name;
+        Unit *u;
+        int r;
+
+        assert(message);
+        assert(m);
+
+        r = sd_bus_message_read(message, "s", &name);
+        if (r < 0)
+                return r;
+
+        r = manager_load_unit(m, name, NULL, error, &u);
+        if (r < 0)
+                return r;
+
+        r = bus_unit_check_load_state(u, error);
+        if (r < 0)
+                return r;
+
+        return bus_unit_method_ref(message, u, error);
+}
+
+static int method_unref_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+        Manager *m = userdata;
+        const char *name;
+        Unit *u;
+        int r;
+
+        assert(message);
+        assert(m);
+
+        r = sd_bus_message_read(message, "s", &name);
+        if (r < 0)
+                return r;
+
+        r = manager_load_unit(m, name, NULL, error, &u);
+        if (r < 0)
+                return r;
+
+        r = bus_unit_check_load_state(u, error);
+        if (r < 0)
+                return r;
+
+        return bus_unit_method_unref(message, u, error);
+}
+
 static int reply_unit_info(sd_bus_message *reply, Unit *u) {
         _cleanup_free_ char *unit_path = NULL, *job_path = NULL;
         Unit *following;
@@ -781,6 +892,13 @@ static int transient_unit_from_message(
         if (r < 0)
                 return r;
 
+        /* If the client asked for it, automatically add a reference to this unit. */
+        if (u->bus_track_add) {
+                r = bus_unit_track_add_sender(u, message);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to watch sender: %m");
+        }
+
         /* Now load the missing bits of the unit we just created */
         unit_add_to_load_queue(u);
         manager_dispatch_load_queue(m);
@@ -1835,13 +1953,14 @@ static int install_error(
 static int method_enable_unit_files_generic(
                 sd_bus_message *message,
                 Manager *m,
-                int (*call)(UnitFileScope scope, bool runtime, const char *root_dir, char *files[], bool force, UnitFileChange **changes, unsigned *n_changes),
+                int (*call)(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char *files[], UnitFileChange **changes, unsigned *n_changes),
                 bool carries_install_info,
                 sd_bus_error *error) {
 
         _cleanup_strv_free_ char **l = NULL;
         UnitFileChange *changes = NULL;
         unsigned n_changes = 0;
+        UnitFileFlags flags;
         int runtime, force, r;
 
         assert(message);
@@ -1855,13 +1974,15 @@ static int method_enable_unit_files_generic(
         if (r < 0)
                 return r;
 
+        flags = unit_file_bools_to_flags(runtime, force);
+
         r = bus_verify_manage_unit_files_async(m, message, error);
         if (r < 0)
                 return r;
         if (r == 0)
                 return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
 
-        r = call(m->unit_file_scope, runtime, NULL, l, force, &changes, &n_changes);
+        r = call(m->unit_file_scope, flags, NULL, l, &changes, &n_changes);
         if (r < 0)
                 return install_error(error, r, changes, n_changes);
 
@@ -1880,8 +2001,8 @@ static int method_link_unit_files(sd_bus_message *message, void *userdata, sd_bu
         return method_enable_unit_files_generic(message, userdata, unit_file_link, false, error);
 }
 
-static int unit_file_preset_without_mode(UnitFileScope scope, bool runtime, const char *root_dir, char **files, bool force, UnitFileChange **changes, unsigned *n_changes) {
-        return unit_file_preset(scope, runtime, root_dir, files, UNIT_FILE_PRESET_FULL, force, changes, n_changes);
+static int unit_file_preset_without_mode(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char **files, UnitFileChange **changes, unsigned *n_changes) {
+        return unit_file_preset(scope, flags, root_dir, files, UNIT_FILE_PRESET_FULL, changes, n_changes);
 }
 
 static int method_preset_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
@@ -1900,6 +2021,7 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use
         Manager *m = userdata;
         UnitFilePresetMode mm;
         int runtime, force, r;
+        UnitFileFlags flags;
         const char *mode;
 
         assert(message);
@@ -1913,6 +2035,8 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use
         if (r < 0)
                 return r;
 
+        flags = unit_file_bools_to_flags(runtime, force);
+
         if (isempty(mode))
                 mm = UNIT_FILE_PRESET_FULL;
         else {
@@ -1927,7 +2051,7 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use
         if (r == 0)
                 return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
 
-        r = unit_file_preset(m->unit_file_scope, runtime, NULL, l, mm, force, &changes, &n_changes);
+        r = unit_file_preset(m->unit_file_scope, flags, NULL, l, mm, &changes, &n_changes);
         if (r < 0)
                 return install_error(error, r, changes, n_changes);
 
@@ -1937,7 +2061,7 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use
 static int method_disable_unit_files_generic(
                 sd_bus_message *message,
                 Manager *m,
-                int (*call)(UnitFileScope scope, bool runtime, const char *root_dir, char *files[], UnitFileChange **changes, unsigned *n_changes),
+                int (*call)(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char *files[], UnitFileChange **changes, unsigned *n_changes),
                 sd_bus_error *error) {
 
         _cleanup_strv_free_ char **l = NULL;
@@ -1962,7 +2086,7 @@ static int method_disable_unit_files_generic(
         if (r == 0)
                 return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
 
-        r = call(m->unit_file_scope, runtime, NULL, l, &changes, &n_changes);
+        r = call(m->unit_file_scope, runtime ? UNIT_FILE_RUNTIME : 0, NULL, l, &changes, &n_changes);
         if (r < 0)
                 return install_error(error, r, changes, n_changes);
 
@@ -2028,7 +2152,7 @@ static int method_set_default_target(sd_bus_message *message, void *userdata, sd
         if (r == 0)
                 return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
 
-        r = unit_file_set_default(m->unit_file_scope, NULL, name, force, &changes, &n_changes);
+        r = unit_file_set_default(m->unit_file_scope, force ? UNIT_FILE_FORCE : 0, NULL, name, &changes, &n_changes);
         if (r < 0)
                 return install_error(error, r, changes, n_changes);
 
@@ -2041,6 +2165,7 @@ static int method_preset_all_unit_files(sd_bus_message *message, void *userdata,
         Manager *m = userdata;
         UnitFilePresetMode mm;
         const char *mode;
+        UnitFileFlags flags;
         int force, runtime, r;
 
         assert(message);
@@ -2054,6 +2179,8 @@ static int method_preset_all_unit_files(sd_bus_message *message, void *userdata,
         if (r < 0)
                 return r;
 
+        flags = unit_file_bools_to_flags(runtime, force);
+
         if (isempty(mode))
                 mm = UNIT_FILE_PRESET_FULL;
         else {
@@ -2068,7 +2195,7 @@ static int method_preset_all_unit_files(sd_bus_message *message, void *userdata,
         if (r == 0)
                 return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
 
-        r = unit_file_preset_all(m->unit_file_scope, runtime, NULL, mm, force, &changes, &n_changes);
+        r = unit_file_preset_all(m->unit_file_scope, flags, NULL, mm, &changes, &n_changes);
         if (r < 0)
                 return install_error(error, r, changes, n_changes);
 
@@ -2083,6 +2210,7 @@ static int method_add_dependency_unit_files(sd_bus_message *message, void *userd
         int runtime, force, r;
         char *target, *type;
         UnitDependency dep;
+        UnitFileFlags flags;
 
         assert(message);
         assert(m);
@@ -2101,17 +2229,62 @@ static int method_add_dependency_unit_files(sd_bus_message *message, void *userd
         if (r < 0)
                 return r;
 
+        flags = unit_file_bools_to_flags(runtime, force);
+
         dep = unit_dependency_from_string(type);
         if (dep < 0)
                 return -EINVAL;
 
-        r = unit_file_add_dependency(m->unit_file_scope, runtime, NULL, l, target, dep, force, &changes, &n_changes);
+        r = unit_file_add_dependency(m->unit_file_scope, flags, NULL, l, target, dep, &changes, &n_changes);
         if (r < 0)
                 return install_error(error, r, changes, n_changes);
 
         return reply_unit_file_changes_and_free(m, message, -1, changes, n_changes);
 }
 
+static int method_get_unit_file_links(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+        _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+        UnitFileChange *changes = NULL;
+        unsigned n_changes = 0, i;
+        UnitFileFlags flags;
+        const char *name;
+        char **p;
+        int runtime, r;
+
+        r = sd_bus_message_read(message, "sb", &name, &runtime);
+        if (r < 0)
+                return r;
+
+        r = sd_bus_message_new_method_return(message, &reply);
+        if (r < 0)
+                return r;
+
+        r = sd_bus_message_open_container(reply, SD_BUS_TYPE_ARRAY, "s");
+        if (r < 0)
+                return r;
+
+        p = STRV_MAKE(name);
+        flags = UNIT_FILE_DRY_RUN |
+                (runtime ? UNIT_FILE_RUNTIME : 0);
+
+        r = unit_file_disable(UNIT_FILE_SYSTEM, flags, NULL, p, &changes, &n_changes);
+        if (r < 0)
+                return log_error_errno(r, "Failed to get file links for %s: %m", name);
+
+        for (i = 0; i < n_changes; i++)
+                if (changes[i].type == UNIT_FILE_UNLINK) {
+                        r = sd_bus_message_append(reply, "s", changes[i].path);
+                        if (r < 0)
+                                return r;
+                }
+
+        r = sd_bus_message_close_container(reply);
+        if (r < 0)
+                return r;
+
+        return sd_bus_send(NULL, reply, NULL);
+}
+
 const sd_bus_vtable bus_manager_vtable[] = {
         SD_BUS_VTABLE_START(0),
 
@@ -2199,6 +2372,7 @@ const sd_bus_vtable bus_manager_vtable[] = {
 
         SD_BUS_METHOD("GetUnit", "s", "o", method_get_unit, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("GetUnitByPID", "u", "o", method_get_unit_by_pid, SD_BUS_VTABLE_UNPRIVILEGED),
+        SD_BUS_METHOD("GetUnitByInvocationID", "ay", "o", method_get_unit_by_invocation_id, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("LoadUnit", "s", "o", method_load_unit, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("StartUnit", "ss", "o", method_start_unit, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("StartUnitReplace", "sss", "o", method_start_unit_replace, SD_BUS_VTABLE_UNPRIVILEGED),
@@ -2211,6 +2385,8 @@ const sd_bus_vtable bus_manager_vtable[] = {
         SD_BUS_METHOD("KillUnit", "ssi", NULL, method_kill_unit, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("ResetFailedUnit", "s", NULL, method_reset_failed_unit, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("SetUnitProperties", "sba(sv)", NULL, method_set_unit_properties, SD_BUS_VTABLE_UNPRIVILEGED),
+        SD_BUS_METHOD("RefUnit", "s", NULL, method_ref_unit, SD_BUS_VTABLE_UNPRIVILEGED),
+        SD_BUS_METHOD("UnrefUnit", "s", NULL, method_unref_unit, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("StartTransientUnit", "ssa(sv)a(sa(sv))", "o", method_start_transient_unit, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("GetUnitProcesses", "s", "a(sus)", method_get_unit_processes, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("GetJob", "u", "o", method_get_job, SD_BUS_VTABLE_UNPRIVILEGED),
@@ -2254,6 +2430,7 @@ const sd_bus_vtable bus_manager_vtable[] = {
         SD_BUS_METHOD("GetDefaultTarget", NULL, "s", method_get_default_target, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("PresetAllUnitFiles", "sbb", "a(sss)", method_preset_all_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("AddDependencyUnitFiles", "asssbb", "a(sss)", method_add_dependency_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
+        SD_BUS_METHOD("GetUnitFileLinks", "sb", "as", method_get_unit_file_links, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("SetExitCode", "y", NULL, method_set_exit_code, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("LookupDynamicUserByName", "s", "u", method_lookup_dynamic_user_by_name, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("LookupDynamicUserByUID", "u", "s", method_lookup_dynamic_user_by_uid, SD_BUS_VTABLE_UNPRIVILEGED),
diff --git a/src/core/dbus-mount.c b/src/core/dbus-mount.c
index b4bbee0648..76a7a7ce97 100644
--- a/src/core/dbus-mount.c
+++ b/src/core/dbus-mount.c
@@ -116,7 +116,11 @@ const sd_bus_vtable bus_mount_vtable[] = {
         SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Mount, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("DirectoryMode", "u", bus_property_get_mode, offsetof(Mount, directory_mode), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("SloppyOptions", "b", bus_property_get_bool, offsetof(Mount, sloppy_options), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("LazyUnmount", "b", bus_property_get_bool, offsetof(Mount, lazy_unmount), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("ForceUnmount", "b", bus_property_get_bool, offsetof(Mount, force_unmount), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Mount, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+        SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+        SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         BUS_EXEC_COMMAND_VTABLE("ExecMount", offsetof(Mount, exec_command[MOUNT_EXEC_MOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
         BUS_EXEC_COMMAND_VTABLE("ExecUnmount", offsetof(Mount, exec_command[MOUNT_EXEC_UNMOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
         BUS_EXEC_COMMAND_VTABLE("ExecRemount", offsetof(Mount, exec_command[MOUNT_EXEC_REMOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c
index fab3677a01..61b83d2d62 100644
--- a/src/core/dbus-service.c
+++ b/src/core/dbus-service.c
@@ -36,7 +36,7 @@ static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, service_type, ServiceType
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, service_result, ServiceResult);
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_restart, service_restart, ServiceRestart);
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_notify_access, notify_access, NotifyAccess);
-static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_failure_action, failure_action, FailureAction);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_emergency_action, emergency_action, EmergencyAction);
 
 const sd_bus_vtable bus_service_vtable[] = {
         SD_BUS_VTABLE_START(0),
@@ -50,12 +50,7 @@ const sd_bus_vtable bus_service_vtable[] = {
         SD_BUS_PROPERTY("RuntimeMaxUSec", "t", bus_property_get_usec, offsetof(Service, runtime_max_usec), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("WatchdogUSec", "t", bus_property_get_usec, offsetof(Service, watchdog_usec), SD_BUS_VTABLE_PROPERTY_CONST),
         BUS_PROPERTY_DUAL_TIMESTAMP("WatchdogTimestamp", offsetof(Service, watchdog_timestamp), 0),
-        /* The following four are obsolete, and thus marked hidden here. They moved into the Unit interface */
-        SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
-        SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
-        SD_BUS_PROPERTY("StartLimitAction", "s", property_get_failure_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
-        SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
-        SD_BUS_PROPERTY("FailureAction", "s", property_get_failure_action, offsetof(Service, failure_action), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("FailureAction", "s", property_get_emergency_action, offsetof(Service, emergency_action), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PermissionsStartOnly", "b", bus_property_get_bool, offsetof(Service, permissions_start_only), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("RootDirectoryStartOnly", "b", bus_property_get_bool, offsetof(Service, root_directory_start_only), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("RemainAfterExit", "b", bus_property_get_bool, offsetof(Service, remain_after_exit), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -70,6 +65,9 @@ const sd_bus_vtable bus_service_vtable[] = {
         SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Service, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("USBFunctionDescriptors", "s", NULL, offsetof(Service, usb_function_descriptors), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("USBFunctionStrings", "s", NULL, offsetof(Service, usb_function_strings), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+        SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+        SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+
         BUS_EXEC_STATUS_VTABLE("ExecMain", offsetof(Service, main_exec_status), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
         BUS_EXEC_COMMAND_LIST_VTABLE("ExecStart", offsetof(Service, exec_command[SERVICE_EXEC_START]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
@@ -77,6 +75,12 @@ const sd_bus_vtable bus_service_vtable[] = {
         BUS_EXEC_COMMAND_LIST_VTABLE("ExecReload", offsetof(Service, exec_command[SERVICE_EXEC_RELOAD]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
         BUS_EXEC_COMMAND_LIST_VTABLE("ExecStop", offsetof(Service, exec_command[SERVICE_EXEC_STOP]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
         BUS_EXEC_COMMAND_LIST_VTABLE("ExecStopPost", offsetof(Service, exec_command[SERVICE_EXEC_STOP_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+
+        /* The following four are obsolete, and thus marked hidden here. They moved into the Unit interface */
+        SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+        SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+        SD_BUS_PROPERTY("StartLimitAction", "s", property_get_emergency_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+        SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
         SD_BUS_VTABLE_END
 };
 
diff --git a/src/core/dbus-socket.c b/src/core/dbus-socket.c
index 9a071a1355..21adb64e15 100644
--- a/src/core/dbus-socket.c
+++ b/src/core/dbus-socket.c
@@ -152,6 +152,8 @@ const sd_bus_vtable bus_socket_vtable[] = {
         SD_BUS_PROPERTY("SocketProtocol", "i", bus_property_get_int, offsetof(Socket, socket_protocol), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("TriggerLimitIntervalUSec", "t", bus_property_get_usec, offsetof(Socket, trigger_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("TriggerLimitBurst", "u", bus_property_get_unsigned, offsetof(Socket, trigger_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+        SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Socket, exec_command[SOCKET_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
         BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPost", offsetof(Socket, exec_command[SOCKET_EXEC_START_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
         BUS_EXEC_COMMAND_LIST_VTABLE("ExecStopPre", offsetof(Socket, exec_command[SOCKET_EXEC_STOP_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
diff --git a/src/core/dbus-swap.c b/src/core/dbus-swap.c
index 292f8738c6..85a2c26b98 100644
--- a/src/core/dbus-swap.c
+++ b/src/core/dbus-swap.c
@@ -84,6 +84,8 @@ const sd_bus_vtable bus_swap_vtable[] = {
         SD_BUS_PROPERTY("TimeoutUSec", "t", bus_property_get_usec, offsetof(Swap, timeout_usec), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Swap, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Swap, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+        SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+        SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         BUS_EXEC_COMMAND_VTABLE("ExecActivate", offsetof(Swap, exec_command[SWAP_EXEC_ACTIVATE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
         BUS_EXEC_COMMAND_VTABLE("ExecDeactivate", offsetof(Swap, exec_command[SWAP_EXEC_DEACTIVATE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
         SD_BUS_VTABLE_END
diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c
index b55d2cf735..69e249c844 100644
--- a/src/core/dbus-unit.c
+++ b/src/core/dbus-unit.c
@@ -37,7 +37,7 @@
 
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_load_state, unit_load_state, UnitLoadState);
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_job_mode, job_mode, JobMode);
-static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_failure_action, failure_action, FailureAction);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_emergency_action, emergency_action, EmergencyAction);
 
 static int property_get_names(
                 sd_bus *bus,
@@ -263,10 +263,7 @@ static int property_get_can_stop(
         assert(reply);
         assert(u);
 
-        /* On the lower levels we assume that every unit we can start
-         * we can also stop */
-
-        return sd_bus_message_append(reply, "b", unit_can_start(u) && !u->refuse_manual_stop);
+        return sd_bus_message_append(reply, "b", unit_can_stop(u) && !u->refuse_manual_stop);
 }
 
 static int property_get_can_reload(
@@ -418,6 +415,7 @@ static int bus_verify_manage_units_async_full(
                 const char *verb,
                 int capability,
                 const char *polkit_message,
+                bool interactive,
                 sd_bus_message *call,
                 sd_bus_error *error) {
 
@@ -433,7 +431,15 @@ static int bus_verify_manage_units_async_full(
                 details[7] = GETTEXT_PACKAGE;
         }
 
-        return bus_verify_polkit_async(call, capability, "org.freedesktop.systemd1.manage-units", details, false, UID_INVALID, &u->manager->polkit_registry, error);
+        return bus_verify_polkit_async(
+                        call,
+                        capability,
+                        "org.freedesktop.systemd1.manage-units",
+                        details,
+                        interactive,
+                        UID_INVALID,
+                        &u->manager->polkit_registry,
+                        error);
 }
 
 int bus_unit_method_start_generic(
@@ -486,6 +492,7 @@ int bus_unit_method_start_generic(
                         verb,
                         CAP_SYS_ADMIN,
                         job_type < _JOB_TYPE_MAX ? polkit_message_for_job[job_type] : NULL,
+                        true,
                         message,
                         error);
         if (r < 0)
@@ -558,6 +565,7 @@ int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *
                         "kill",
                         CAP_KILL,
                         N_("Authentication is required to kill '$(unit)'."),
+                        true,
                         message,
                         error);
         if (r < 0)
@@ -588,6 +596,7 @@ int bus_unit_method_reset_failed(sd_bus_message *message, void *userdata, sd_bus
                         "reset-failed",
                         CAP_SYS_ADMIN,
                         N_("Authentication is required to reset the \"failed\" state of '$(unit)'."),
+                        true,
                         message,
                         error);
         if (r < 0)
@@ -620,6 +629,7 @@ int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_b
                         "set-property",
                         CAP_SYS_ADMIN,
                         N_("Authentication is required to set properties on '$(unit)'."),
+                        true,
                         message,
                         error);
         if (r < 0)
@@ -634,6 +644,53 @@ int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_b
         return sd_bus_reply_method_return(message, NULL);
 }
 
+int bus_unit_method_ref(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+        Unit *u = userdata;
+        int r;
+
+        assert(message);
+        assert(u);
+
+        r = mac_selinux_unit_access_check(u, message, "start", error);
+        if (r < 0)
+                return r;
+
+        r = bus_verify_manage_units_async_full(
+                        u,
+                        "ref",
+                        CAP_SYS_ADMIN,
+                        NULL,
+                        false,
+                        message,
+                        error);
+        if (r < 0)
+                return r;
+        if (r == 0)
+                return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+        r = bus_unit_track_add_sender(u, message);
+        if (r < 0)
+                return r;
+
+        return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_unit_method_unref(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+        Unit *u = userdata;
+        int r;
+
+        assert(message);
+        assert(u);
+
+        r = bus_unit_track_remove_sender(u, message);
+        if (r == -EUNATCH)
+                return sd_bus_error_setf(error, BUS_ERROR_NOT_REFERENCED, "Unit has not been referenced yet.");
+        if (r < 0)
+                return r;
+
+        return sd_bus_reply_method_return(message, NULL);
+}
+
 const sd_bus_vtable bus_unit_vtable[] = {
         SD_BUS_VTABLE_START(0),
 
@@ -660,10 +717,6 @@ const sd_bus_vtable bus_unit_vtable[] = {
         SD_BUS_PROPERTY("PropagatesReloadTo", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_PROPAGATES_RELOAD_TO]), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("ReloadPropagatedFrom", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_RELOAD_PROPAGATED_FROM]), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("JoinsNamespaceOf", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_JOINS_NAMESPACE_OF]), SD_BUS_VTABLE_PROPERTY_CONST),
-        SD_BUS_PROPERTY("RequiresOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
-        SD_BUS_PROPERTY("RequisiteOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
-        SD_BUS_PROPERTY("RequiredByOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
-        SD_BUS_PROPERTY("RequisiteOfOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
         SD_BUS_PROPERTY("RequiresMountsFor", "as", NULL, offsetof(Unit, requires_mounts_for), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Documentation", "as", NULL, offsetof(Unit, documentation), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Description", "s", property_get_description, 0, SD_BUS_VTABLE_PROPERTY_CONST),
@@ -694,7 +747,7 @@ const sd_bus_vtable bus_unit_vtable[] = {
         SD_BUS_PROPERTY("IgnoreOnIsolate", "b", bus_property_get_bool, offsetof(Unit, ignore_on_isolate), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("NeedDaemonReload", "b", property_get_need_daemon_reload, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("JobTimeoutUSec", "t", bus_property_get_usec, offsetof(Unit, job_timeout), SD_BUS_VTABLE_PROPERTY_CONST),
-        SD_BUS_PROPERTY("JobTimeoutAction", "s", property_get_failure_action, offsetof(Unit, job_timeout_action), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("JobTimeoutAction", "s", property_get_emergency_action, offsetof(Unit, job_timeout_action), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("JobTimeoutRebootArgument", "s", NULL, offsetof(Unit, job_timeout_reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("ConditionResult", "b", bus_property_get_bool, offsetof(Unit, condition_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("AssertResult", "b", bus_property_get_bool, offsetof(Unit, assert_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
@@ -704,11 +757,12 @@ const sd_bus_vtable bus_unit_vtable[] = {
         SD_BUS_PROPERTY("Asserts", "a(sbbsi)", property_get_conditions, offsetof(Unit, asserts), 0),
         SD_BUS_PROPERTY("LoadError", "(ss)", property_get_load_error, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Transient", "b", bus_property_get_bool, offsetof(Unit, transient), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("Perpetual", "b", bus_property_get_bool, offsetof(Unit, perpetual), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("StartLimitIntervalSec", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST),
-        SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), /* obsolete alias name */
         SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST),
-        SD_BUS_PROPERTY("StartLimitAction", "s", property_get_failure_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("StartLimitAction", "s", property_get_emergency_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("InvocationID", "ay", bus_property_get_id128, offsetof(Unit, invocation_id), 0),
 
         SD_BUS_METHOD("Start", "s", "o", method_start, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("Stop", "s", "o", method_stop, SD_BUS_VTABLE_UNPRIVILEGED),
@@ -720,7 +774,15 @@ const sd_bus_vtable bus_unit_vtable[] = {
         SD_BUS_METHOD("Kill", "si", NULL, bus_unit_method_kill, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("ResetFailed", NULL, NULL, bus_unit_method_reset_failed, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("SetProperties", "ba(sv)", NULL, bus_unit_method_set_properties, SD_BUS_VTABLE_UNPRIVILEGED),
+        SD_BUS_METHOD("Ref", NULL, NULL, bus_unit_method_ref, SD_BUS_VTABLE_UNPRIVILEGED),
+        SD_BUS_METHOD("Unref", NULL, NULL, bus_unit_method_unref, SD_BUS_VTABLE_UNPRIVILEGED),
 
+        /* Obsolete properties or obsolete alias names */
+        SD_BUS_PROPERTY("RequiresOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
+        SD_BUS_PROPERTY("RequisiteOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
+        SD_BUS_PROPERTY("RequiredByOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
+        SD_BUS_PROPERTY("RequisiteOfOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
+        SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
         SD_BUS_VTABLE_END
 };
 
@@ -1104,7 +1166,7 @@ void bus_unit_send_removed_signal(Unit *u) {
         int r;
         assert(u);
 
-        if (!u->sent_dbus_new_signal)
+        if (!u->sent_dbus_new_signal || u->in_dbus_queue)
                 bus_unit_send_change_signal(u);
 
         if (!u->id)
@@ -1317,6 +1379,29 @@ static int bus_unit_set_transient_property(
                         return r;
 
                 return 1;
+
+        } else if (streq(name, "AddRef")) {
+
+                int b;
+
+                /* Why is this called "AddRef" rather than just "Ref", or "Reference"? There's already a "Ref()" method
+                 * on the Unit interface, and it's probably not a good idea to expose a property and a method on the
+                 * same interface (well, strictly speaking AddRef isn't exposed as full property, we just read it for
+                 * transient units, but still). And "References" and "ReferencedBy" is already used as unit reference
+                 * dependency type, hence let's not confuse things with that.
+                 *
+                 * Note that we don't acually add the reference to the bus track. We do that only after the setup of
+                 * the transient unit is complete, so that setting this property multiple times in the same transient
+                 * unit creation call doesn't count as individual references. */
+
+                r = sd_bus_message_read(message, "b", &b);
+                if (r < 0)
+                        return r;
+
+                if (mode != UNIT_CHECK)
+                        u->bus_track_add = b;
+
+                return 1;
         }
 
         return 0;
@@ -1421,3 +1506,71 @@ int bus_unit_check_load_state(Unit *u, sd_bus_error *error) {
 
         return sd_bus_error_set_errnof(error, u->load_error, "Unit %s is not loaded properly: %m.", u->id);
 }
+
+static int bus_track_handler(sd_bus_track *t, void *userdata) {
+        Unit *u = userdata;
+
+        assert(t);
+        assert(u);
+
+        u->bus_track = sd_bus_track_unref(u->bus_track); /* make sure we aren't called again */
+
+        unit_add_to_gc_queue(u);
+        return 0;
+}
+
+static int allocate_bus_track(Unit *u) {
+        int r;
+
+        assert(u);
+
+        if (u->bus_track)
+                return 0;
+
+        r = sd_bus_track_new(u->manager->api_bus, &u->bus_track, bus_track_handler, u);
+        if (r < 0)
+                return r;
+
+        r = sd_bus_track_set_recursive(u->bus_track, true);
+        if (r < 0) {
+                u->bus_track = sd_bus_track_unref(u->bus_track);
+                return r;
+        }
+
+        return 0;
+}
+
+int bus_unit_track_add_name(Unit *u, const char *name) {
+        int r;
+
+        assert(u);
+
+        r = allocate_bus_track(u);
+        if (r < 0)
+                return r;
+
+        return sd_bus_track_add_name(u->bus_track, name);
+}
+
+int bus_unit_track_add_sender(Unit *u, sd_bus_message *m) {
+        int r;
+
+        assert(u);
+
+        r = allocate_bus_track(u);
+        if (r < 0)
+                return r;
+
+        return sd_bus_track_add_sender(u->bus_track, m);
+}
+
+int bus_unit_track_remove_sender(Unit *u, sd_bus_message *m) {
+        assert(u);
+
+        /* If we haven't allocated the bus track object yet, then there's definitely no reference taken yet, return an
+         * error */
+        if (!u->bus_track)
+                return -EUNATCH;
+
+        return sd_bus_track_remove_sender(u->bus_track, m);
+}
diff --git a/src/core/dbus-unit.h b/src/core/dbus-unit.h
index 4db88dbebc..b280de7a1d 100644
--- a/src/core/dbus-unit.h
+++ b/src/core/dbus-unit.h
@@ -33,9 +33,15 @@ int bus_unit_method_start_generic(sd_bus_message *message, Unit *u, JobType job_
 int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error);
 int bus_unit_method_reset_failed(sd_bus_message *message, void *userdata, sd_bus_error *error);
 
-int bus_unit_queue_job(sd_bus_message *message, Unit *u, JobType type, JobMode mode, bool reload_if_possible, sd_bus_error *error);
 int bus_unit_set_properties(Unit *u, sd_bus_message *message, UnitSetPropertiesMode mode, bool commit, sd_bus_error *error);
 int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_bus_error *error);
 int bus_unit_method_get_processes(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_ref(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_unref(sd_bus_message *message, void *userdata, sd_bus_error *error);
 
+int bus_unit_queue_job(sd_bus_message *message, Unit *u, JobType type, JobMode mode, bool reload_if_possible, sd_bus_error *error);
 int bus_unit_check_load_state(Unit *u, sd_bus_error *error);
+
+int bus_unit_track_add_name(Unit *u, const char *name);
+int bus_unit_track_add_sender(Unit *u, sd_bus_message *m);
+int bus_unit_track_remove_sender(Unit *u, sd_bus_message *m);
diff --git a/src/core/dbus.c b/src/core/dbus.c
index 3422a02d68..070974fe66 100644
--- a/src/core/dbus.c
+++ b/src/core/dbus.c
@@ -964,10 +964,6 @@ static int bus_init_private(Manager *m) {
         if (m->private_listen_fd >= 0)
                 return 0;
 
-        /* We don't need the private socket if we have kdbus */
-        if (m->kdbus_fd >= 0)
-                return 0;
-
         if (MANAGER_IS_SYSTEM(m)) {
 
                 /* We want the private bus only when running as init */
@@ -1168,60 +1164,57 @@ int bus_foreach_bus(
         return ret;
 }
 
-void bus_track_serialize(sd_bus_track *t, FILE *f) {
+void bus_track_serialize(sd_bus_track *t, FILE *f, const char *prefix) {
         const char *n;
 
         assert(f);
+        assert(prefix);
 
-        for (n = sd_bus_track_first(t); n; n = sd_bus_track_next(t))
-                fprintf(f, "subscribed=%s\n", n);
-}
+        for (n = sd_bus_track_first(t); n; n = sd_bus_track_next(t)) {
+                int c, j;
 
-int bus_track_deserialize_item(char ***l, const char *line) {
-        const char *e;
-        int r;
-
-        assert(l);
-        assert(line);
-
-        e = startswith(line, "subscribed=");
-        if (!e)
-                return 0;
+                c = sd_bus_track_count_name(t, n);
 
-        r = strv_extend(l, e);
-        if (r < 0)
-                return r;
-
-        return 1;
+                for (j = 0; j < c; j++) {
+                        fputs(prefix, f);
+                        fputc('=', f);
+                        fputs(n, f);
+                        fputc('\n', f);
+                }
+        }
 }
 
-int bus_track_coldplug(Manager *m, sd_bus_track **t, char ***l) {
+int bus_track_coldplug(Manager *m, sd_bus_track **t, bool recursive, char **l) {
+        char **i;
         int r = 0;
 
         assert(m);
         assert(t);
-        assert(l);
 
-        if (!strv_isempty(*l) && m->api_bus) {
-                char **i;
-
-                if (!*t) {
-                        r = sd_bus_track_new(m->api_bus, t, NULL, NULL);
-                        if (r < 0)
-                                return r;
-                }
+        if (strv_isempty(l))
+                return 0;
 
-                r = 0;
-                STRV_FOREACH(i, *l) {
-                        int k;
+        if (!m->api_bus)
+                return 0;
 
-                        k = sd_bus_track_add_name(*t, *i);
-                        if (k < 0)
-                                r = k;
-                }
+        if (!*t) {
+                r = sd_bus_track_new(m->api_bus, t, NULL, NULL);
+                if (r < 0)
+                        return r;
         }
 
-        *l = strv_free(*l);
+        r = sd_bus_track_set_recursive(*t, recursive);
+        if (r < 0)
+                return r;
+
+        r = 0;
+        STRV_FOREACH(i, l) {
+                int k;
+
+                k = sd_bus_track_add_name(*t, *i);
+                if (k < 0)
+                        r = k;
+        }
 
         return r;
 }
diff --git a/src/core/dbus.h b/src/core/dbus.h
index 6baaffbd75..a092ed9d76 100644
--- a/src/core/dbus.h
+++ b/src/core/dbus.h
@@ -28,9 +28,8 @@ void bus_done(Manager *m);
 
 int bus_fdset_add_all(Manager *m, FDSet *fds);
 
-void bus_track_serialize(sd_bus_track *t, FILE *f);
-int bus_track_deserialize_item(char ***l, const char *line);
-int bus_track_coldplug(Manager *m, sd_bus_track **t, char ***l);
+void bus_track_serialize(sd_bus_track *t, FILE *f, const char *prefix);
+int bus_track_coldplug(Manager *m, sd_bus_track **t, bool recursive, char **l);
 
 int manager_sync_bus_names(Manager *m, sd_bus *bus);
 
diff --git a/src/core/device.c b/src/core/device.c
index 16e56efcc3..4b9e84aeb6 100644
--- a/src/core/device.c
+++ b/src/core/device.c
@@ -331,11 +331,7 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa
         if (!u) {
                 delete = true;
 
-                u = unit_new(m, sizeof(Device));
-                if (!u)
-                        return log_oom();
-
-                r = unit_add_name(u, e);
+                r = unit_new_for_name(m, sizeof(Device), e, &u);
                 if (r < 0)
                         goto fail;
 
@@ -369,7 +365,7 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa
 fail:
         log_unit_warning_errno(u, r, "Failed to set up device unit: %m");
 
-        if (delete)
+        if (delete && u)
                 unit_free(u);
 
         return r;
@@ -464,6 +460,10 @@ static void device_update_found_one(Device *d, bool add, DeviceFound found, bool
         if (!now)
                 return;
 
+        /* Didn't exist before, but does now? if so, generate a new invocation ID for it */
+        if (previous == DEVICE_NOT_FOUND && d->found != DEVICE_NOT_FOUND)
+                (void) unit_acquire_invocation_id(UNIT(d));
+
         if (d->found & DEVICE_FOUND_UDEV)
                 /* When the device is known to udev we consider it
                  * plugged. */
diff --git a/src/core/dynamic-user.c b/src/core/dynamic-user.c
index 8035bee231..e1846e1adb 100644
--- a/src/core/dynamic-user.c
+++ b/src/core/dynamic-user.c
@@ -31,14 +31,8 @@
 #include "user-util.h"
 #include "fileio.h"
 
-/* Let's pick a UIDs within the 16bit range, so that we are compatible with containers using 16bit user namespacing. At
- * least on Fedora normal users are allocated until UID 60000, hence do not allocate from below this. Also stay away
- * from the upper end of the range as that is often used for overflow/nobody users. */
-#define UID_PICK_MIN ((uid_t) UINT32_C(0x0000EF00))
-#define UID_PICK_MAX ((uid_t) UINT32_C(0x0000FFEF))
-
 /* Takes a value generated randomly or by hashing and turns it into a UID in the right range */
-#define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (UID_PICK_MAX - UID_PICK_MIN + 1)) + UID_PICK_MIN)
+#define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (DYNAMIC_UID_MAX - DYNAMIC_UID_MIN + 1)) + DYNAMIC_UID_MIN)
 
 static DynamicUser* dynamic_user_free(DynamicUser *d) {
         if (!d)
@@ -48,9 +42,7 @@ static DynamicUser* dynamic_user_free(DynamicUser *d) {
                 (void) hashmap_remove(d->manager->dynamic_users, d->name);
 
         safe_close_pair(d->storage_socket);
-        free(d);
-
-        return NULL;
+        return mfree(d);
 }
 
 static int dynamic_user_add(Manager *m, const char *name, int storage_socket[2], DynamicUser **ret) {
@@ -150,6 +142,45 @@ int dynamic_user_acquire(Manager *m, const char *name, DynamicUser** ret) {
         return 1;
 }
 
+static int make_uid_symlinks(uid_t uid, const char *name, bool b) {
+
+        char path1[strlen("/run/systemd/dynamic-uid/direct:") + DECIMAL_STR_MAX(uid_t) + 1];
+        const char *path2;
+        int r = 0, k;
+
+        /* Add direct additional symlinks for direct lookups of dynamic UIDs and their names by userspace code. The
+         * only reason we have this is because dbus-daemon cannot use D-Bus for resolving users and groups (since it
+         * would be its own client then). We hence keep these world-readable symlinks in place, so that the
+         * unprivileged dbus user can read the mappings when it needs them via these symlinks instead of having to go
+         * via the bus. Ideally, we'd use the lock files we keep for this anyway, but we can't since we use BSD locks
+         * on them and as those may be taken by any user with read access we can't make them world-readable. */
+
+        xsprintf(path1, "/run/systemd/dynamic-uid/direct:" UID_FMT, uid);
+        if (unlink(path1) < 0 && errno != ENOENT)
+                r = -errno;
+
+        if (b && symlink(name, path1) < 0) {
+                k = log_warning_errno(errno, "Failed to symlink \"%s\": %m", path1);
+                if (r == 0)
+                        r = k;
+        }
+
+        path2 = strjoina("/run/systemd/dynamic-uid/direct:", name);
+        if (unlink(path2) < 0 && errno != ENOENT) {
+                k = -errno;
+                if (r == 0)
+                        r = k;
+        }
+
+        if (b && symlink(path1 + strlen("/run/systemd/dynamic-uid/direct:"), path2) < 0) {
+                k = log_warning_errno(errno,  "Failed to symlink \"%s\": %m", path2);
+                if (r == 0)
+                        r = k;
+        }
+
+        return r;
+}
+
 static int pick_uid(const char *name, uid_t *ret_uid) {
 
         static const uint8_t hash_key[] = {
@@ -175,7 +206,7 @@ static int pick_uid(const char *name, uid_t *ret_uid) {
                 if (--n_tries <= 0) /* Give up retrying eventually */
                         return -EBUSY;
 
-                if (candidate < UID_PICK_MIN || candidate > UID_PICK_MAX)
+                if (!uid_is_dynamic(candidate))
                         goto next;
 
                 xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, candidate);
@@ -200,7 +231,7 @@ static int pick_uid(const char *name, uid_t *ret_uid) {
                         if (st.st_nlink > 0)
                                 break;
 
-                        /* Oh, bummer, we got got the lock, but the file was unlinked between the time we opened it and
+                        /* Oh, bummer, we got the lock, but the file was unlinked between the time we opened it and
                          * got the lock. Close it, and try again. */
                         lock_fd = safe_close(lock_fd);
                 }
@@ -223,6 +254,7 @@ static int pick_uid(const char *name, uid_t *ret_uid) {
                 }
 
                 (void) ftruncate(lock_fd, l);
+                (void) make_uid_symlinks(candidate, name, true); /* also add direct lookup symlinks */
 
                 *ret_uid = candidate;
                 r = lock_fd;
@@ -324,14 +356,16 @@ static int dynamic_user_push(DynamicUser *d, uid_t uid, int lock_fd) {
         return 0;
 }
 
-static void unlink_uid_lock(int lock_fd, uid_t uid) {
+static void unlink_uid_lock(int lock_fd, uid_t uid, const char *name) {
         char lock_path[strlen("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1];
 
         if (lock_fd < 0)
                 return;
 
         xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, uid);
-        (void) unlink_noerrno(lock_path);
+        (void) unlink(lock_path);
+
+        (void) make_uid_symlinks(uid, name, false); /* remove direct lookup symlinks */
 }
 
 int dynamic_user_realize(DynamicUser *d, uid_t *ret) {
@@ -399,7 +433,7 @@ int dynamic_user_realize(DynamicUser *d, uid_t *ret) {
 
                 /* So, we found a working UID/lock combination. Let's see if we actually still need it. */
                 if (lockf(d->storage_socket[0], F_LOCK, 0) < 0) {
-                        unlink_uid_lock(uid_lock_fd, uid);
+                        unlink_uid_lock(uid_lock_fd, uid, d->name);
                         return -errno;
                 }
 
@@ -407,7 +441,7 @@ int dynamic_user_realize(DynamicUser *d, uid_t *ret) {
                 if (r < 0) {
                         if (r != -EAGAIN) {
                                 /* OK, something bad happened, let's get rid of the bits we acquired. */
-                                unlink_uid_lock(uid_lock_fd, uid);
+                                unlink_uid_lock(uid_lock_fd, uid, d->name);
                                 goto finish;
                         }
 
@@ -416,7 +450,7 @@ int dynamic_user_realize(DynamicUser *d, uid_t *ret) {
                         /* Hmm, so as it appears there's now something stored in the storage socket. Throw away what we
                          * acquired, and use what's stored now. */
 
-                        unlink_uid_lock(uid_lock_fd, uid);
+                        unlink_uid_lock(uid_lock_fd, uid, d->name);
                         safe_close(uid_lock_fd);
 
                         uid = new_uid;
@@ -513,7 +547,7 @@ static int dynamic_user_close(DynamicUser *d) {
                 goto finish;
 
         /* This dynamic user was realized and dynamically allocated. In this case, let's remove the lock file. */
-        unlink_uid_lock(lock_fd, uid);
+        unlink_uid_lock(lock_fd, uid, d->name);
         r = 1;
 
 finish:
@@ -634,11 +668,8 @@ int dynamic_user_lookup_uid(Manager *m, uid_t uid, char **ret) {
         assert(m);
         assert(ret);
 
-        /* A friendly way to translate a dynamic user's UID into a his name. */
-
-        if (uid < UID_PICK_MIN)
-                return -ESRCH;
-        if (uid > UID_PICK_MAX)
+        /* A friendly way to translate a dynamic user's UID into a name. */
+        if (!uid_is_dynamic(uid))
                 return -ESRCH;
 
         xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, uid);
diff --git a/src/core/failure-action.c b/src/core/emergency-action.c
index ddae46190f..90232bc57a 100644
--- a/src/core/failure-action.c
+++ b/src/core/emergency-action.c
@@ -23,59 +23,60 @@
 
 #include "bus-error.h"
 #include "bus-util.h"
-#include "failure-action.h"
+#include "emergency-action.h"
 #include "special.h"
 #include "string-table.h"
 #include "terminal-util.h"
 
-static void log_and_status(Manager *m, const char *message) {
-        log_warning("%s", message);
+static void log_and_status(Manager *m, const char *message, const char *reason) {
+        log_warning("%s: %s", message, reason);
         manager_status_printf(m, STATUS_TYPE_EMERGENCY,
                               ANSI_HIGHLIGHT_RED " !!  " ANSI_NORMAL,
-                              "%s", message);
+                              "%s: %s", message, reason);
 }
 
-int failure_action(
+int emergency_action(
                 Manager *m,
-                FailureAction action,
-                const char *reboot_arg) {
+                EmergencyAction action,
+                const char *reboot_arg,
+                const char *reason) {
 
         assert(m);
         assert(action >= 0);
-        assert(action < _FAILURE_ACTION_MAX);
+        assert(action < _EMERGENCY_ACTION_MAX);
 
-        if (action == FAILURE_ACTION_NONE)
+        if (action == EMERGENCY_ACTION_NONE)
                 return -ECANCELED;
 
         if (!MANAGER_IS_SYSTEM(m)) {
                 /* Downgrade all options to simply exiting if we run
                  * in user mode */
 
-                log_warning("Exiting as result of failure.");
+                log_warning("Exiting: %s", reason);
                 m->exit_code = MANAGER_EXIT;
                 return -ECANCELED;
         }
 
         switch (action) {
 
-        case FAILURE_ACTION_REBOOT:
-                log_and_status(m, "Rebooting as result of failure.");
+        case EMERGENCY_ACTION_REBOOT:
+                log_and_status(m, "Rebooting", reason);
 
                 (void) update_reboot_parameter_and_warn(reboot_arg);
                 (void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_REBOOT_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL);
 
                 break;
 
-        case FAILURE_ACTION_REBOOT_FORCE:
-                log_and_status(m, "Forcibly rebooting as result of failure.");
+        case EMERGENCY_ACTION_REBOOT_FORCE:
+                log_and_status(m, "Forcibly rebooting", reason);
 
                 (void) update_reboot_parameter_and_warn(reboot_arg);
                 m->exit_code = MANAGER_REBOOT;
 
                 break;
 
-        case FAILURE_ACTION_REBOOT_IMMEDIATE:
-                log_and_status(m, "Rebooting immediately as result of failure.");
+        case EMERGENCY_ACTION_REBOOT_IMMEDIATE:
+                log_and_status(m, "Rebooting immediately", reason);
 
                 sync();
 
@@ -89,18 +90,18 @@ int failure_action(
                 reboot(RB_AUTOBOOT);
                 break;
 
-        case FAILURE_ACTION_POWEROFF:
-                log_and_status(m, "Powering off as result of failure.");
+        case EMERGENCY_ACTION_POWEROFF:
+                log_and_status(m, "Powering off", reason);
                 (void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_POWEROFF_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL);
                 break;
 
-        case FAILURE_ACTION_POWEROFF_FORCE:
-                log_and_status(m, "Forcibly powering off as result of failure.");
+        case EMERGENCY_ACTION_POWEROFF_FORCE:
+                log_and_status(m, "Forcibly powering off", reason);
                 m->exit_code = MANAGER_POWEROFF;
                 break;
 
-        case FAILURE_ACTION_POWEROFF_IMMEDIATE:
-                log_and_status(m, "Powering off immediately as result of failure.");
+        case EMERGENCY_ACTION_POWEROFF_IMMEDIATE:
+                log_and_status(m, "Powering off immediately", reason);
 
                 sync();
 
@@ -109,19 +110,19 @@ int failure_action(
                 break;
 
         default:
-                assert_not_reached("Unknown failure action");
+                assert_not_reached("Unknown emergency action");
         }
 
         return -ECANCELED;
 }
 
-static const char* const failure_action_table[_FAILURE_ACTION_MAX] = {
-        [FAILURE_ACTION_NONE] = "none",
-        [FAILURE_ACTION_REBOOT] = "reboot",
-        [FAILURE_ACTION_REBOOT_FORCE] = "reboot-force",
-        [FAILURE_ACTION_REBOOT_IMMEDIATE] = "reboot-immediate",
-        [FAILURE_ACTION_POWEROFF] = "poweroff",
-        [FAILURE_ACTION_POWEROFF_FORCE] = "poweroff-force",
-        [FAILURE_ACTION_POWEROFF_IMMEDIATE] = "poweroff-immediate"
+static const char* const emergency_action_table[_EMERGENCY_ACTION_MAX] = {
+        [EMERGENCY_ACTION_NONE] = "none",
+        [EMERGENCY_ACTION_REBOOT] = "reboot",
+        [EMERGENCY_ACTION_REBOOT_FORCE] = "reboot-force",
+        [EMERGENCY_ACTION_REBOOT_IMMEDIATE] = "reboot-immediate",
+        [EMERGENCY_ACTION_POWEROFF] = "poweroff",
+        [EMERGENCY_ACTION_POWEROFF_FORCE] = "poweroff-force",
+        [EMERGENCY_ACTION_POWEROFF_IMMEDIATE] = "poweroff-immediate"
 };
-DEFINE_STRING_TABLE_LOOKUP(failure_action, FailureAction);
+DEFINE_STRING_TABLE_LOOKUP(emergency_action, EmergencyAction);
diff --git a/src/core/failure-action.h b/src/core/emergency-action.h
index 1adac4ad5c..8804b59752 100644
--- a/src/core/failure-action.h
+++ b/src/core/emergency-action.h
@@ -20,22 +20,22 @@
   along with systemd; If not, see <http://www.gnu.org/licenses/>.
 ***/
 
-typedef enum FailureAction {
-        FAILURE_ACTION_NONE,
-        FAILURE_ACTION_REBOOT,
-        FAILURE_ACTION_REBOOT_FORCE,
-        FAILURE_ACTION_REBOOT_IMMEDIATE,
-        FAILURE_ACTION_POWEROFF,
-        FAILURE_ACTION_POWEROFF_FORCE,
-        FAILURE_ACTION_POWEROFF_IMMEDIATE,
-        _FAILURE_ACTION_MAX,
-        _FAILURE_ACTION_INVALID = -1
-} FailureAction;
+typedef enum EmergencyAction {
+        EMERGENCY_ACTION_NONE,
+        EMERGENCY_ACTION_REBOOT,
+        EMERGENCY_ACTION_REBOOT_FORCE,
+        EMERGENCY_ACTION_REBOOT_IMMEDIATE,
+        EMERGENCY_ACTION_POWEROFF,
+        EMERGENCY_ACTION_POWEROFF_FORCE,
+        EMERGENCY_ACTION_POWEROFF_IMMEDIATE,
+        _EMERGENCY_ACTION_MAX,
+        _EMERGENCY_ACTION_INVALID = -1
+} EmergencyAction;
 
 #include "macro.h"
 #include "manager.h"
 
-int failure_action(Manager *m, FailureAction action, const char *reboot_arg);
+int emergency_action(Manager *m, EmergencyAction action, const char *reboot_arg, const char *reason);
 
-const char* failure_action_to_string(FailureAction i) _const_;
-FailureAction failure_action_from_string(const char *s) _pure_;
+const char* emergency_action_to_string(EmergencyAction i) _const_;
+EmergencyAction emergency_action_from_string(const char *s) _pure_;
diff --git a/src/core/execute.c b/src/core/execute.c
index 6019df7ea6..85ee82c3e1 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -29,8 +29,10 @@
 #include <sys/mman.h>
 #include <sys/personality.h>
 #include <sys/prctl.h>
+#include <sys/shm.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
+#include <sys/types.h>
 #include <sys/un.h>
 #include <unistd.h>
 #include <utmpx.h>
@@ -91,6 +93,7 @@
 #include "selinux-util.h"
 #include "signal-util.h"
 #include "smack-util.h"
+#include "special.h"
 #include "string-table.h"
 #include "string-util.h"
 #include "strv.h"
@@ -410,7 +413,8 @@ static int fixup_output(ExecOutput std_output, int socket_fd) {
 static int setup_input(
                 const ExecContext *context,
                 const ExecParameters *params,
-                int socket_fd) {
+                int socket_fd,
+                int named_iofds[3]) {
 
         ExecInput i;
 
@@ -460,6 +464,10 @@ static int setup_input(
         case EXEC_INPUT_SOCKET:
                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
 
+        case EXEC_INPUT_NAMED_FD:
+                (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
+                return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
+
         default:
                 assert_not_reached("Unknown input type");
         }
@@ -471,6 +479,7 @@ static int setup_output(
                 const ExecParameters *params,
                 int fileno,
                 int socket_fd,
+                int named_iofds[3],
                 const char *ident,
                 uid_t uid,
                 gid_t gid,
@@ -522,7 +531,7 @@ static int setup_output(
                         return fileno;
 
                 /* Duplicate from stdout if possible */
-                if (e == o || e == EXEC_OUTPUT_INHERIT)
+                if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
 
                 o = e;
@@ -584,6 +593,10 @@ static int setup_output(
                 assert(socket_fd >= 0);
                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
 
+        case EXEC_OUTPUT_NAMED_FD:
+                (void) fd_nonblock(named_iofds[fileno], false);
+                return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
+
         default:
                 assert_not_reached("Unknown error type");
         }
@@ -719,73 +732,157 @@ static int ask_for_confirmation(char *response, char **argv) {
         return r;
 }
 
-static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
-        bool keep_groups = false;
+static int get_fixed_user(const ExecContext *c, const char **user,
+                          uid_t *uid, gid_t *gid,
+                          const char **home, const char **shell) {
         int r;
+        const char *name;
 
-        assert(context);
+        assert(c);
+
+        if (!c->user)
+                return 0;
+
+        /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
+         * (i.e. are "/" or "/bin/nologin"). */
+
+        name = c->user;
+        r = get_user_creds_clean(&name, uid, gid, home, shell);
+        if (r < 0)
+                return r;
 
-        /* Lookup and set GID and supplementary group list. Here too
-         * we avoid NSS lookups for gid=0. */
+        *user = name;
+        return 0;
+}
 
-        if (context->group || username) {
+static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
+        int r;
+        const char *name;
+
+        assert(c);
+
+        if (!c->group)
+                return 0;
+
+        name = c->group;
+        r = get_group_creds(&name, gid);
+        if (r < 0)
+                return r;
+
+        *group = name;
+        return 0;
+}
+
+static int get_supplementary_groups(const ExecContext *c, const char *user,
+                                    const char *group, gid_t gid,
+                                    gid_t **supplementary_gids, int *ngids) {
+        char **i;
+        int r, k = 0;
+        int ngroups_max;
+        bool keep_groups = false;
+        gid_t *groups = NULL;
+        _cleanup_free_ gid_t *l_gids = NULL;
+
+        assert(c);
+
+        /*
+         * If user is given, then lookup GID and supplementary groups list.
+         * We avoid NSS lookups for gid=0. Also we have to initialize groups
+         * here and as early as possible so we keep the list of supplementary
+         * groups of the caller.
+         */
+        if (user && gid_is_valid(gid) && gid != 0) {
                 /* First step, initialize groups from /etc/groups */
-                if (username && gid != 0) {
-                        if (initgroups(username, gid) < 0)
-                                return -errno;
+                if (initgroups(user, gid) < 0)
+                        return -errno;
 
-                        keep_groups = true;
-                }
+                keep_groups = true;
+        }
 
-                /* Second step, set our gids */
-                if (setresgid(gid, gid, gid) < 0)
+        if (!c->supplementary_groups)
+                return 0;
+
+        /*
+         * If SupplementaryGroups= was passed then NGROUPS_MAX has to
+         * be positive, otherwise fail.
+         */
+        errno = 0;
+        ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
+        if (ngroups_max <= 0) {
+                if (errno > 0)
                         return -errno;
+                else
+                        return -EOPNOTSUPP; /* For all other values */
         }
 
-        if (context->supplementary_groups) {
-                int ngroups_max, k;
-                gid_t *gids;
-                char **i;
+        l_gids = new(gid_t, ngroups_max);
+        if (!l_gids)
+                return -ENOMEM;
 
-                /* Final step, initialize any manually set supplementary groups */
-                assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
+        if (keep_groups) {
+                /*
+                 * Lookup the list of groups that the user belongs to, we
+                 * avoid NSS lookups here too for gid=0.
+                 */
+                k = ngroups_max;
+                if (getgrouplist(user, gid, l_gids, &k) < 0)
+                        return -EINVAL;
+        } else
+                k = 0;
 
-                if (!(gids = new(gid_t, ngroups_max)))
-                        return -ENOMEM;
+        STRV_FOREACH(i, c->supplementary_groups) {
+                const char *g;
 
-                if (keep_groups) {
-                        k = getgroups(ngroups_max, gids);
-                        if (k < 0) {
-                                free(gids);
-                                return -errno;
-                        }
-                } else
-                        k = 0;
+                if (k >= ngroups_max)
+                        return -E2BIG;
 
-                STRV_FOREACH(i, context->supplementary_groups) {
-                        const char *g;
+                g = *i;
+                r = get_group_creds(&g, l_gids+k);
+                if (r < 0)
+                        return r;
 
-                        if (k >= ngroups_max) {
-                                free(gids);
-                                return -E2BIG;
-                        }
+                k++;
+        }
 
-                        g = *i;
-                        r = get_group_creds(&g, gids+k);
-                        if (r < 0) {
-                                free(gids);
-                                return r;
-                        }
+        /*
+         * Sets ngids to zero to drop all supplementary groups, happens
+         * when we are under root and SupplementaryGroups= is empty.
+         */
+        if (k == 0) {
+                *ngids = 0;
+                return 0;
+        }
 
-                        k++;
-                }
+        /* Otherwise get the final list of supplementary groups */
+        groups = memdup(l_gids, sizeof(gid_t) * k);
+        if (!groups)
+                return -ENOMEM;
 
-                if (setgroups(k, gids) < 0) {
-                        free(gids);
-                        return -errno;
-                }
+        *supplementary_gids = groups;
+        *ngids = k;
+
+        groups = NULL;
+
+        return 0;
+}
+
+static int enforce_groups(const ExecContext *context, gid_t gid,
+                          gid_t *supplementary_gids, int ngids) {
+        int r;
 
-                free(gids);
+        assert(context);
+
+        /* Handle SupplementaryGroups= even if it is empty */
+        if (context->supplementary_groups) {
+                r = maybe_setgroups(ngids, supplementary_gids);
+                if (r < 0)
+                        return r;
+        }
+
+        if (gid_is_valid(gid)) {
+                /* Then set our gids */
+                if (setresgid(gid, gid, gid) < 0)
+                        return -errno;
         }
 
         return 0;
@@ -794,6 +891,9 @@ static int enforce_groups(const ExecContext *context, const char *username, gid_
 static int enforce_user(const ExecContext *context, uid_t uid) {
         assert(context);
 
+        if (!uid_is_valid(uid))
+                return 0;
+
         /* Sets (but doesn't look up) the uid and make sure we keep the
          * capabilities while doing so. */
 
@@ -836,14 +936,19 @@ static int null_conv(
         return PAM_CONV_ERR;
 }
 
+#endif
+
 static int setup_pam(
                 const char *name,
                 const char *user,
                 uid_t uid,
+                gid_t gid,
                 const char *tty,
                 char ***env,
                 int fds[], unsigned n_fds) {
 
+#ifdef HAVE_PAM
+
         static const struct pam_conv conv = {
                 .conv = null_conv,
                 .appdata_ptr = NULL
@@ -943,8 +1048,14 @@ static int setup_pam(
                  * and this will make PR_SET_PDEATHSIG work in most cases.
                  * If this fails, ignore the error - but expect sd-pam threads
                  * to fail to exit normally */
+
+                r = maybe_setgroups(0, NULL);
+                if (r < 0)
+                        log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
+                if (setresgid(gid, gid, gid) < 0)
+                        log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
                 if (setresuid(uid, uid, uid) < 0)
-                        log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
+                        log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
 
                 (void) ignore_signals(SIGPIPE, -1);
 
@@ -1037,8 +1148,10 @@ fail:
         closelog();
 
         return r;
-}
+#else
+        return 0;
 #endif
+}
 
 static void rename_process_from_path(const char *path) {
         char process_name[11];
@@ -1073,15 +1186,29 @@ static void rename_process_from_path(const char *path) {
 
 #ifdef HAVE_SECCOMP
 
-static int apply_seccomp(const ExecContext *c) {
+static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
+
+        if (is_seccomp_available())
+                return false;
+
+        log_open();
+        log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
+        log_close();
+        return true;
+}
+
+static int apply_seccomp(const Unit* u, const ExecContext *c) {
         uint32_t negative_action, action;
-        scmp_filter_ctx *seccomp;
+        scmp_filter_ctx seccomp;
         Iterator i;
         void *id;
         int r;
 
         assert(c);
 
+        if (skip_seccomp_unavailable(u, "syscall filtering"))
+                return 0;
+
         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
 
         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
@@ -1122,20 +1249,23 @@ finish:
         return r;
 }
 
-static int apply_address_families(const ExecContext *c) {
-        scmp_filter_ctx *seccomp;
+static int apply_address_families(const Unit* u, const ExecContext *c) {
+        scmp_filter_ctx seccomp;
         Iterator i;
         int r;
 
+#if defined(__i386__)
+        return 0;
+#endif
+
         assert(c);
 
-        seccomp = seccomp_init(SCMP_ACT_ALLOW);
-        if (!seccomp)
-                return -ENOMEM;
+        if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
+                return 0;
 
-        r = seccomp_add_secondary_archs(seccomp);
+        r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
         if (r < 0)
-                goto finish;
+                return r;
 
         if (c->address_families_whitelist) {
                 int af, first = 0, last = 0;
@@ -1232,10 +1362,6 @@ static int apply_address_families(const ExecContext *c) {
                 }
         }
 
-        r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
-        if (r < 0)
-                goto finish;
-
         r = seccomp_load(seccomp);
 
 finish:
@@ -1243,15 +1369,18 @@ finish:
         return r;
 }
 
-static int apply_memory_deny_write_execute(const ExecContext *c) {
-        scmp_filter_ctx *seccomp;
+static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
+        scmp_filter_ctx seccomp;
         int r;
 
         assert(c);
 
-        seccomp = seccomp_init(SCMP_ACT_ALLOW);
-        if (!seccomp)
-                return -ENOMEM;
+        if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
+                return 0;
+
+        r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
+        if (r < 0)
+                return r;
 
         r = seccomp_rule_add(
                         seccomp,
@@ -1271,7 +1400,12 @@ static int apply_memory_deny_write_execute(const ExecContext *c) {
         if (r < 0)
                 goto finish;
 
-        r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+        r = seccomp_rule_add(
+                        seccomp,
+                        SCMP_ACT_ERRNO(EPERM),
+                        SCMP_SYS(shmat),
+                        1,
+                        SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
         if (r < 0)
                 goto finish;
 
@@ -1282,22 +1416,25 @@ finish:
         return r;
 }
 
-static int apply_restrict_realtime(const ExecContext *c) {
+static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
         static const int permitted_policies[] = {
                 SCHED_OTHER,
                 SCHED_BATCH,
                 SCHED_IDLE,
         };
 
-        scmp_filter_ctx *seccomp;
+        scmp_filter_ctx seccomp;
         unsigned i;
         int r, p, max_policy = 0;
 
         assert(c);
 
-        seccomp = seccomp_init(SCMP_ACT_ALLOW);
-        if (!seccomp)
-                return -ENOMEM;
+        if (skip_seccomp_unavailable(u, "RestrictRealtime="))
+                return 0;
+
+        r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
+        if (r < 0)
+                return r;
 
         /* Determine the highest policy constant we want to allow */
         for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
@@ -1341,7 +1478,34 @@ static int apply_restrict_realtime(const ExecContext *c) {
         if (r < 0)
                 goto finish;
 
-        r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+        r = seccomp_load(seccomp);
+
+finish:
+        seccomp_release(seccomp);
+        return r;
+}
+
+static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
+        scmp_filter_ctx seccomp;
+        int r;
+
+        assert(c);
+
+        /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
+         * let's protect even those systems where this is left on in the kernel. */
+
+        if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
+                return 0;
+
+        r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
+        if (r < 0)
+                return r;
+
+        r = seccomp_rule_add(
+                        seccomp,
+                        SCMP_ACT_ERRNO(EPERM),
+                        SCMP_SYS(_sysctl),
+                        0);
         if (r < 0)
                 goto finish;
 
@@ -1352,12 +1516,33 @@ finish:
         return r;
 }
 
+static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
+        assert(c);
+
+        /* Turn off module syscalls on ProtectKernelModules=yes */
+
+        if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
+                return 0;
+
+        return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
+}
+
+static int apply_private_devices(const Unit *u, const ExecContext *c) {
+        assert(c);
+
+        /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
+
+        if (skip_seccomp_unavailable(u, "PrivateDevices="))
+                return 0;
+
+        return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
+}
+
 #endif
 
 static void do_idle_pipe_dance(int idle_pipe[4]) {
         assert(idle_pipe);
 
-
         idle_pipe[1] = safe_close(idle_pipe[1]);
         idle_pipe[2] = safe_close(idle_pipe[2]);
 
@@ -1384,6 +1569,7 @@ static void do_idle_pipe_dance(int idle_pipe[4]) {
 }
 
 static int build_environment(
+                Unit *u,
                 const ExecContext *c,
                 const ExecParameters *p,
                 unsigned n_fds,
@@ -1398,10 +1584,11 @@ static int build_environment(
         unsigned n_env = 0;
         char *x;
 
+        assert(u);
         assert(c);
         assert(ret);
 
-        our_env = new0(char*, 12);
+        our_env = new0(char*, 14);
         if (!our_env)
                 return -ENOMEM;
 
@@ -1436,6 +1623,16 @@ static int build_environment(
                 our_env[n_env++] = x;
         }
 
+        /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
+         * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
+         * check the database directly. */
+        if (unit_has_name(u, SPECIAL_DBUS_SERVICE)) {
+                x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
+                if (!x)
+                        return -ENOMEM;
+                our_env[n_env++] = x;
+        }
+
         if (home) {
                 x = strappend("HOME=", home);
                 if (!x)
@@ -1462,6 +1659,13 @@ static int build_environment(
                 our_env[n_env++] = x;
         }
 
+        if (!sd_id128_is_null(u->invocation_id)) {
+                if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
+                        return -ENOMEM;
+
+                our_env[n_env++] = x;
+        }
+
         if (exec_context_needs_term(c)) {
                 const char *tty_path, *term = NULL;
 
@@ -1547,7 +1751,10 @@ static bool exec_needs_mount_namespace(
 
         if (context->private_devices ||
             context->protect_system != PROTECT_SYSTEM_NO ||
-            context->protect_home != PROTECT_HOME_NO)
+            context->protect_home != PROTECT_HOME_NO ||
+            context->protect_kernel_tunables ||
+            context->protect_kernel_modules ||
+            context->protect_control_groups)
                 return true;
 
         return false;
@@ -1574,9 +1781,9 @@ static int setup_private_users(uid_t uid, gid_t gid) {
                 asprintf(&uid_map,
                          "0 0 1\n"                      /* Map root → root */
                          UID_FMT " " UID_FMT " 1\n",    /* Map $UID → $UID */
-                         uid, uid);                     /* The case where the above is the same */
+                         uid, uid);
         else
-                uid_map = strdup("0 0 1\n");
+                uid_map = strdup("0 0 1\n");            /* The case where the above is the same */
         if (!uid_map)
                 return -ENOMEM;
 
@@ -1706,6 +1913,197 @@ static int setup_private_users(uid_t uid, gid_t gid) {
         return 0;
 }
 
+static int setup_runtime_directory(
+                const ExecContext *context,
+                const ExecParameters *params,
+                uid_t uid,
+                gid_t gid) {
+
+        char **rt;
+        int r;
+
+        assert(context);
+        assert(params);
+
+        STRV_FOREACH(rt, context->runtime_directory) {
+                _cleanup_free_ char *p;
+
+                p = strjoin(params->runtime_prefix, "/", *rt, NULL);
+                if (!p)
+                        return -ENOMEM;
+
+                r = mkdir_p_label(p, context->runtime_directory_mode);
+                if (r < 0)
+                        return r;
+
+                r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid);
+                if (r < 0)
+                        return r;
+        }
+
+        return 0;
+}
+
+static int setup_smack(
+                const ExecContext *context,
+                const ExecCommand *command) {
+
+#ifdef HAVE_SMACK
+        int r;
+
+        assert(context);
+        assert(command);
+
+        if (!mac_smack_use())
+                return 0;
+
+        if (context->smack_process_label) {
+                r = mac_smack_apply_pid(0, context->smack_process_label);
+                if (r < 0)
+                        return r;
+        }
+#ifdef SMACK_DEFAULT_PROCESS_LABEL
+        else {
+                _cleanup_free_ char *exec_label = NULL;
+
+                r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
+                if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP)
+                        return r;
+
+                r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
+                if (r < 0)
+                        return r;
+        }
+#endif
+#endif
+
+        return 0;
+}
+
+static int compile_read_write_paths(
+                const ExecContext *context,
+                const ExecParameters *params,
+                char ***ret) {
+
+        _cleanup_strv_free_ char **l = NULL;
+        char **rt;
+
+        /* Compile the list of writable paths. This is the combination of the explicitly configured paths, plus all
+         * runtime directories. */
+
+        if (strv_isempty(context->read_write_paths) &&
+            strv_isempty(context->runtime_directory)) {
+                *ret = NULL; /* NOP if neither is set */
+                return 0;
+        }
+
+        l = strv_copy(context->read_write_paths);
+        if (!l)
+                return -ENOMEM;
+
+        STRV_FOREACH(rt, context->runtime_directory) {
+                char *s;
+
+                s = strjoin(params->runtime_prefix, "/", *rt, NULL);
+                if (!s)
+                        return -ENOMEM;
+
+                if (strv_consume(&l, s) < 0)
+                        return -ENOMEM;
+        }
+
+        *ret = l;
+        l = NULL;
+
+        return 0;
+}
+
+static int apply_mount_namespace(Unit *u, const ExecContext *context,
+                                 const ExecParameters *params,
+                                 ExecRuntime *runtime) {
+        int r;
+        _cleanup_free_ char **rw = NULL;
+        char *tmp = NULL, *var = NULL;
+        const char *root_dir = NULL;
+        NameSpaceInfo ns_info = {
+                .private_dev = context->private_devices,
+                .protect_control_groups = context->protect_control_groups,
+                .protect_kernel_tunables = context->protect_kernel_tunables,
+                .protect_kernel_modules = context->protect_kernel_modules,
+        };
+
+        assert(context);
+
+        /* The runtime struct only contains the parent of the private /tmp,
+         * which is non-accessible to world users. Inside of it there's a /tmp
+         * that is sticky, and that's the one we want to use here. */
+
+        if (context->private_tmp && runtime) {
+                if (runtime->tmp_dir)
+                        tmp = strjoina(runtime->tmp_dir, "/tmp");
+                if (runtime->var_tmp_dir)
+                        var = strjoina(runtime->var_tmp_dir, "/tmp");
+        }
+
+        r = compile_read_write_paths(context, params, &rw);
+        if (r < 0)
+                return r;
+
+        if (params->flags & EXEC_APPLY_CHROOT)
+                root_dir = context->root_directory;
+
+        r = setup_namespace(root_dir, &ns_info, rw,
+                            context->read_only_paths,
+                            context->inaccessible_paths,
+                            tmp,
+                            var,
+                            context->protect_home,
+                            context->protect_system,
+                            context->mount_flags);
+
+        /* If we couldn't set up the namespace this is probably due to a
+         * missing capability. In this case, silently proceeed. */
+        if (IN_SET(r, -EPERM, -EACCES)) {
+                log_open();
+                log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
+                log_close();
+                r = 0;
+        }
+
+        return r;
+}
+
+static int apply_working_directory(const ExecContext *context,
+                                   const ExecParameters *params,
+                                   const char *home,
+                                   const bool needs_mount_ns) {
+        const char *d;
+        const char *wd;
+
+        assert(context);
+
+        if (context->working_directory_home)
+                wd = home;
+        else if (context->working_directory)
+                wd = context->working_directory;
+        else
+                wd = "/";
+
+        if (params->flags & EXEC_APPLY_CHROOT) {
+                if (!needs_mount_ns && context->root_directory)
+                        if (chroot(context->root_directory) < 0)
+                                return -errno;
+
+                d = wd;
+        } else
+                d = strjoina(strempty(context->root_directory), "/", strempty(wd));
+
+        if (chdir(d) < 0 && !context->working_directory_missing_ok)
+                return -errno;
+
+        return 0;
+}
+
 static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
         assert(array);
         assert(n);
@@ -1723,11 +2121,12 @@ static int close_remaining_fds(
                 const ExecParameters *params,
                 ExecRuntime *runtime,
                 DynamicCreds *dcreds,
+                int user_lookup_fd,
                 int socket_fd,
                 int *fds, unsigned n_fds) {
 
         unsigned n_dont_close = 0;
-        int dont_close[n_fds + 11];
+        int dont_close[n_fds + 12];
 
         assert(params);
 
@@ -1755,9 +2154,73 @@ static int close_remaining_fds(
                         append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
         }
 
+        if (user_lookup_fd >= 0)
+                dont_close[n_dont_close++] = user_lookup_fd;
+
         return close_all_fds(dont_close, n_dont_close);
 }
 
+static bool context_has_address_families(const ExecContext *c) {
+        assert(c);
+
+        return c->address_families_whitelist ||
+                !set_isempty(c->address_families);
+}
+
+static bool context_has_syscall_filters(const ExecContext *c) {
+        assert(c);
+
+        return c->syscall_whitelist ||
+                !set_isempty(c->syscall_filter) ||
+                !set_isempty(c->syscall_archs);
+}
+
+static bool context_has_no_new_privileges(const ExecContext *c) {
+        assert(c);
+
+        if (c->no_new_privileges)
+                return true;
+
+        if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
+                return false;
+
+        return context_has_address_families(c) || /* we need NNP if we have any form of seccomp and are unprivileged */
+                c->memory_deny_write_execute ||
+                c->restrict_realtime ||
+                c->protect_kernel_tunables ||
+                c->protect_kernel_modules ||
+                c->private_devices ||
+                context_has_syscall_filters(c);
+}
+
+static int send_user_lookup(
+                Unit *unit,
+                int user_lookup_fd,
+                uid_t uid,
+                gid_t gid) {
+
+        assert(unit);
+
+        /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
+         * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
+         * specified. */
+
+        if (user_lookup_fd < 0)
+                return 0;
+
+        if (!uid_is_valid(uid) && !gid_is_valid(gid))
+                return 0;
+
+        if (writev(user_lookup_fd,
+               (struct iovec[]) {
+                           { .iov_base = &uid, .iov_len = sizeof(uid) },
+                           { .iov_base = &gid, .iov_len = sizeof(gid) },
+                           { .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0)
+                return -errno;
+
+        return 0;
+}
+
 static int exec_child(
                 Unit *unit,
                 ExecCommand *command,
@@ -1767,19 +2230,23 @@ static int exec_child(
                 DynamicCreds *dcreds,
                 char **argv,
                 int socket_fd,
+                int named_iofds[3],
                 int *fds, unsigned n_fds,
                 char **files_env,
+                int user_lookup_fd,
                 int *exit_status) {
 
         _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
         _cleanup_free_ char *mac_selinux_context_net = NULL;
-        const char *username = NULL, *home = NULL, *shell = NULL, *wd;
+        _cleanup_free_ gid_t *supplementary_gids = NULL;
+        const char *username = NULL, *groupname = NULL;
+        const char *home = NULL, *shell = NULL;
         dev_t journal_stream_dev = 0;
         ino_t journal_stream_ino = 0;
         bool needs_mount_namespace;
         uid_t uid = UID_INVALID;
         gid_t gid = GID_INVALID;
-        int i, r;
+        int i, r, ngids = 0;
 
         assert(unit);
         assert(command);
@@ -1815,7 +2282,7 @@ static int exec_child(
 
         log_forget_fds();
 
-        r = close_remaining_fds(params, runtime, dcreds, socket_fd, fds, n_fds);
+        r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
         if (r < 0) {
                 *exit_status = EXIT_FDS;
                 return r;
@@ -1862,7 +2329,7 @@ static int exec_child(
                         return r;
                 }
 
-                if (uid == UID_INVALID || gid == GID_INVALID) {
+                if (!uid_is_valid(uid) || !gid_is_valid(gid)) {
                         *exit_status = EXIT_USER;
                         return -ESRCH;
                 }
@@ -1871,55 +2338,53 @@ static int exec_child(
                         username = dcreds->user->name;
 
         } else {
-                if (context->user) {
-                        username = context->user;
-                        r = get_user_creds(&username, &uid, &gid, &home, &shell);
-                        if (r < 0) {
-                                *exit_status = EXIT_USER;
-                                return r;
-                        }
-
-                        /* Don't set $HOME or $SHELL if they are are not particularly enlightening anyway. */
-                        if (isempty(home) || path_equal(home, "/"))
-                                home = NULL;
+                r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
+                if (r < 0) {
+                        *exit_status = EXIT_USER;
+                        return r;
+                }
 
-                        if (isempty(shell) || PATH_IN_SET(shell,
-                                                          "/bin/nologin",
-                                                          "/sbin/nologin",
-                                                          "/usr/bin/nologin",
-                                                          "/usr/sbin/nologin"))
-                                shell = NULL;
+                r = get_fixed_group(context, &groupname, &gid);
+                if (r < 0) {
+                        *exit_status = EXIT_GROUP;
+                        return r;
                 }
+        }
 
-                if (context->group) {
-                        const char *g = context->group;
+        /* Initialize user supplementary groups and get SupplementaryGroups= ones */
+        r = get_supplementary_groups(context, username, groupname, gid,
+                                     &supplementary_gids, &ngids);
+        if (r < 0) {
+                *exit_status = EXIT_GROUP;
+                return r;
+        }
 
-                        r = get_group_creds(&g, &gid);
-                        if (r < 0) {
-                                *exit_status = EXIT_GROUP;
-                                return r;
-                        }
-                }
+        r = send_user_lookup(unit, user_lookup_fd, uid, gid);
+        if (r < 0) {
+                *exit_status = EXIT_USER;
+                return r;
         }
 
+        user_lookup_fd = safe_close(user_lookup_fd);
+
         /* If a socket is connected to STDIN/STDOUT/STDERR, we
          * must sure to drop O_NONBLOCK */
         if (socket_fd >= 0)
                 (void) fd_nonblock(socket_fd, false);
 
-        r = setup_input(context, params, socket_fd);
+        r = setup_input(context, params, socket_fd, named_iofds);
         if (r < 0) {
                 *exit_status = EXIT_STDIN;
                 return r;
         }
 
-        r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
+        r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
         if (r < 0) {
                 *exit_status = EXIT_STDOUT;
                 return r;
         }
 
-        r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
+        r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
         if (r < 0) {
                 *exit_status = EXIT_STDERR;
                 return r;
@@ -2006,7 +2471,7 @@ static int exec_child(
                                       USER_PROCESS,
                                       username ? "root" : context->user);
 
-        if (context->user && is_terminal_input(context->std_input)) {
+        if (context->user) {
                 r = chown_terminal(STDIN_FILENO, uid);
                 if (r < 0) {
                         *exit_status = EXIT_STDIN;
@@ -2033,32 +2498,15 @@ static int exec_child(
         }
 
         if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
-                char **rt;
-
-                STRV_FOREACH(rt, context->runtime_directory) {
-                        _cleanup_free_ char *p;
-
-                        p = strjoin(params->runtime_prefix, "/", *rt, NULL);
-                        if (!p) {
-                                *exit_status = EXIT_RUNTIME_DIRECTORY;
-                                return -ENOMEM;
-                        }
-
-                        r = mkdir_p_label(p, context->runtime_directory_mode);
-                        if (r < 0) {
-                                *exit_status = EXIT_RUNTIME_DIRECTORY;
-                                return r;
-                        }
-
-                        r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid);
-                        if (r < 0) {
-                                *exit_status = EXIT_RUNTIME_DIRECTORY;
-                                return r;
-                        }
+                r = setup_runtime_directory(context, params, uid, gid);
+                if (r < 0) {
+                        *exit_status = EXIT_RUNTIME_DIRECTORY;
+                        return r;
                 }
         }
 
         r = build_environment(
+                        unit,
                         context,
                         params,
                         n_fds,
@@ -2092,49 +2540,16 @@ static int exec_child(
         }
         accum_env = strv_env_clean(accum_env);
 
-        umask(context->umask);
+        (void) umask(context->umask);
 
         if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
-                r = enforce_groups(context, username, gid);
-                if (r < 0) {
-                        *exit_status = EXIT_GROUP;
-                        return r;
-                }
-#ifdef HAVE_SMACK
-                if (context->smack_process_label) {
-                        r = mac_smack_apply_pid(0, context->smack_process_label);
-                        if (r < 0) {
-                                *exit_status = EXIT_SMACK_PROCESS_LABEL;
-                                return r;
-                        }
-                }
-#ifdef SMACK_DEFAULT_PROCESS_LABEL
-                else {
-                        _cleanup_free_ char *exec_label = NULL;
-
-                        r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
-                        if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP) {
-                                *exit_status = EXIT_SMACK_PROCESS_LABEL;
-                                return r;
-                        }
-
-                        r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
-                        if (r < 0) {
-                                *exit_status = EXIT_SMACK_PROCESS_LABEL;
-                                return r;
-                        }
-                }
-#endif
-#endif
-#ifdef HAVE_PAM
                 if (context->pam_name && username) {
-                        r = setup_pam(context->pam_name, username, uid, context->tty_path, &accum_env, fds, n_fds);
+                        r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
                         if (r < 0) {
                                 *exit_status = EXIT_PAM;
                                 return r;
                         }
                 }
-#endif
         }
 
         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
@@ -2146,75 +2561,27 @@ static int exec_child(
         }
 
         needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
-
         if (needs_mount_namespace) {
-                char *tmp = NULL, *var = NULL;
-
-                /* The runtime struct only contains the parent
-                 * of the private /tmp, which is
-                 * non-accessible to world users. Inside of it
-                 * there's a /tmp that is sticky, and that's
-                 * the one we want to use here. */
-
-                if (context->private_tmp && runtime) {
-                        if (runtime->tmp_dir)
-                                tmp = strjoina(runtime->tmp_dir, "/tmp");
-                        if (runtime->var_tmp_dir)
-                                var = strjoina(runtime->var_tmp_dir, "/tmp");
-                }
-
-                r = setup_namespace(
-                                (params->flags & EXEC_APPLY_CHROOT) ? context->root_directory : NULL,
-                                context->read_write_paths,
-                                context->read_only_paths,
-                                context->inaccessible_paths,
-                                tmp,
-                                var,
-                                context->private_devices,
-                                context->protect_home,
-                                context->protect_system,
-                                context->mount_flags);
-
-                /* If we couldn't set up the namespace this is
-                 * probably due to a missing capability. In this case,
-                 * silently proceeed. */
-                if (r == -EPERM || r == -EACCES) {
-                        log_open();
-                        log_unit_debug_errno(unit, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
-                        log_close();
-                } else if (r < 0) {
+                r = apply_mount_namespace(unit, context, params, runtime);
+                if (r < 0) {
                         *exit_status = EXIT_NAMESPACE;
                         return r;
                 }
         }
 
-        if (context->working_directory_home)
-                wd = home;
-        else if (context->working_directory)
-                wd = context->working_directory;
-        else
-                wd = "/";
-
-        if (params->flags & EXEC_APPLY_CHROOT) {
-                if (!needs_mount_namespace && context->root_directory)
-                        if (chroot(context->root_directory) < 0) {
-                                *exit_status = EXIT_CHROOT;
-                                return -errno;
-                        }
-
-                if (chdir(wd) < 0 &&
-                    !context->working_directory_missing_ok) {
-                        *exit_status = EXIT_CHDIR;
-                        return -errno;
-                }
-        } else {
-                const char *d;
+        /* Apply just after mount namespace setup */
+        r = apply_working_directory(context, params, home, needs_mount_namespace);
+        if (r < 0) {
+                *exit_status = EXIT_CHROOT;
+                return r;
+        }
 
-                d = strjoina(strempty(context->root_directory), "/", strempty(wd));
-                if (chdir(d) < 0 &&
-                    !context->working_directory_missing_ok) {
-                        *exit_status = EXIT_CHDIR;
-                        return -errno;
+        /* Drop groups as early as possbile */
+        if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
+                r = enforce_groups(context, gid, supplementary_gids, ngids);
+                if (r < 0) {
+                        *exit_status = EXIT_GROUP;
+                        return r;
                 }
         }
 
@@ -2259,11 +2626,6 @@ static int exec_child(
 
         if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
 
-                bool use_address_families = context->address_families_whitelist ||
-                        !set_isempty(context->address_families);
-                bool use_syscall_filter = context->syscall_whitelist ||
-                        !set_isempty(context->syscall_filter) ||
-                        !set_isempty(context->syscall_archs);
                 int secure_bits = context->secure_bits;
 
                 for (i = 0; i < _RLIMIT_MAX; i++) {
@@ -2330,6 +2692,41 @@ static int exec_child(
                         }
                 }
 
+                /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
+                 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
+                 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
+                 * are restricted. */
+
+#ifdef HAVE_SELINUX
+                if (mac_selinux_use()) {
+                        char *exec_context = mac_selinux_context_net ?: context->selinux_context;
+
+                        if (exec_context) {
+                                r = setexeccon(exec_context);
+                                if (r < 0) {
+                                        *exit_status = EXIT_SELINUX_CONTEXT;
+                                        return r;
+                                }
+                        }
+                }
+#endif
+
+                r = setup_smack(context, command);
+                if (r < 0) {
+                        *exit_status = EXIT_SMACK_PROCESS_LABEL;
+                        return r;
+                }
+
+#ifdef HAVE_APPARMOR
+                if (context->apparmor_profile && mac_apparmor_use()) {
+                        r = aa_change_onexec(context->apparmor_profile);
+                        if (r < 0 && !context->apparmor_profile_ignore) {
+                                *exit_status = EXIT_APPARMOR_PROFILE;
+                                return -errno;
+                        }
+                }
+#endif
+
                 /* PR_GET_SECUREBITS is not privileged, while
                  * PR_SET_SECUREBITS is. So to suppress
                  * potential EPERMs we'll try not to call
@@ -2340,16 +2737,15 @@ static int exec_child(
                                 return -errno;
                         }
 
-                if (context->no_new_privileges ||
-                    (!have_effective_cap(CAP_SYS_ADMIN) && (use_address_families || context->memory_deny_write_execute || context->restrict_realtime || use_syscall_filter)))
+                if (context_has_no_new_privileges(context))
                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
                                 *exit_status = EXIT_NO_NEW_PRIVILEGES;
                                 return -errno;
                         }
 
 #ifdef HAVE_SECCOMP
-                if (use_address_families) {
-                        r = apply_address_families(context);
+                if (context_has_address_families(context)) {
+                        r = apply_address_families(unit, context);
                         if (r < 0) {
                                 *exit_status = EXIT_ADDRESS_FAMILIES;
                                 return r;
@@ -2357,7 +2753,7 @@ static int exec_child(
                 }
 
                 if (context->memory_deny_write_execute) {
-                        r = apply_memory_deny_write_execute(context);
+                        r = apply_memory_deny_write_execute(unit, context);
                         if (r < 0) {
                                 *exit_status = EXIT_SECCOMP;
                                 return r;
@@ -2365,42 +2761,44 @@ static int exec_child(
                 }
 
                 if (context->restrict_realtime) {
-                        r = apply_restrict_realtime(context);
+                        r = apply_restrict_realtime(unit, context);
                         if (r < 0) {
                                 *exit_status = EXIT_SECCOMP;
                                 return r;
                         }
                 }
 
-                if (use_syscall_filter) {
-                        r = apply_seccomp(context);
+                if (context->protect_kernel_tunables) {
+                        r = apply_protect_sysctl(unit, context);
                         if (r < 0) {
                                 *exit_status = EXIT_SECCOMP;
                                 return r;
                         }
                 }
-#endif
 
-#ifdef HAVE_SELINUX
-                if (mac_selinux_use()) {
-                        char *exec_context = mac_selinux_context_net ?: context->selinux_context;
+                if (context->protect_kernel_modules) {
+                        r = apply_protect_kernel_modules(unit, context);
+                        if (r < 0) {
+                                *exit_status = EXIT_SECCOMP;
+                                return r;
+                        }
+                }
 
-                        if (exec_context) {
-                                r = setexeccon(exec_context);
-                                if (r < 0) {
-                                        *exit_status = EXIT_SELINUX_CONTEXT;
-                                        return r;
-                                }
+                if (context->private_devices) {
+                        r = apply_private_devices(unit, context);
+                        if (r < 0) {
+                                *exit_status = EXIT_SECCOMP;
+                                return r;
                         }
                 }
-#endif
 
-#ifdef HAVE_APPARMOR
-                if (context->apparmor_profile && mac_apparmor_use()) {
-                        r = aa_change_onexec(context->apparmor_profile);
-                        if (r < 0 && !context->apparmor_profile_ignore) {
-                                *exit_status = EXIT_APPARMOR_PROFILE;
-                                return -errno;
+                /* This really should remain the last step before the execve(), to make sure our own code is unaffected
+                 * by the filter as little as possible. */
+                if (context_has_syscall_filters(context)) {
+                        r = apply_seccomp(unit, context);
+                        if (r < 0) {
+                                *exit_status = EXIT_SECCOMP;
+                                return r;
                         }
                 }
 #endif
@@ -2444,6 +2842,7 @@ int exec_spawn(Unit *unit,
         int *fds = NULL; unsigned n_fds = 0;
         _cleanup_free_ char *line = NULL;
         int socket_fd, r;
+        int named_iofds[3] = { -1, -1, -1 };
         char **argv;
         pid_t pid;
 
@@ -2470,6 +2869,10 @@ int exec_spawn(Unit *unit,
                 n_fds = params->n_fds;
         }
 
+        r = exec_context_named_iofds(unit, context, params, named_iofds);
+        if (r < 0)
+                return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
+
         r = exec_context_load_environment(unit, context, &files_env);
         if (r < 0)
                 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
@@ -2499,8 +2902,10 @@ int exec_spawn(Unit *unit,
                                dcreds,
                                argv,
                                socket_fd,
+                               named_iofds,
                                fds, n_fds,
                                files_env,
+                               unit->manager->user_lookup_fds[1],
                                &exit_status);
                 if (r < 0) {
                         log_open();
@@ -2560,6 +2965,9 @@ void exec_context_done(ExecContext *c) {
         for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
                 c->rlimit[l] = mfree(c->rlimit[l]);
 
+        for (l = 0; l < 3; l++)
+                c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
+
         c->working_directory = mfree(c->working_directory);
         c->root_directory = mfree(c->root_directory);
         c->tty_path = mfree(c->tty_path);
@@ -2658,6 +3066,56 @@ static void invalid_env(const char *p, void *userdata) {
         log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
 }
 
+const char* exec_context_fdname(const ExecContext *c, int fd_index) {
+        assert(c);
+
+        switch (fd_index) {
+        case STDIN_FILENO:
+                if (c->std_input != EXEC_INPUT_NAMED_FD)
+                        return NULL;
+                return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
+        case STDOUT_FILENO:
+                if (c->std_output != EXEC_OUTPUT_NAMED_FD)
+                        return NULL;
+                return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
+        case STDERR_FILENO:
+                if (c->std_error != EXEC_OUTPUT_NAMED_FD)
+                        return NULL;
+                return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
+        default:
+                return NULL;
+        }
+}
+
+int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
+        unsigned i, targets;
+        const char *stdio_fdname[3];
+
+        assert(c);
+        assert(p);
+
+        targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
+                  (c->std_output == EXEC_OUTPUT_NAMED_FD) +
+                  (c->std_error == EXEC_OUTPUT_NAMED_FD);
+
+        for (i = 0; i < 3; i++)
+                stdio_fdname[i] = exec_context_fdname(c, i);
+
+        for (i = 0; i < p->n_fds && targets > 0; i++)
+                if (named_iofds[STDIN_FILENO] < 0 && c->std_input == EXEC_INPUT_NAMED_FD && stdio_fdname[STDIN_FILENO] && streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
+                        named_iofds[STDIN_FILENO] = p->fds[i];
+                        targets--;
+                } else if (named_iofds[STDOUT_FILENO] < 0 && c->std_output == EXEC_OUTPUT_NAMED_FD && stdio_fdname[STDOUT_FILENO] && streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
+                        named_iofds[STDOUT_FILENO] = p->fds[i];
+                        targets--;
+                } else if (named_iofds[STDERR_FILENO] < 0 && c->std_error == EXEC_OUTPUT_NAMED_FD && stdio_fdname[STDERR_FILENO] && streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
+                        named_iofds[STDERR_FILENO] = p->fds[i];
+                        targets--;
+                }
+
+        return (targets == 0 ? 0 : -ENOENT);
+}
+
 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
         char **i, **r = NULL;
 
@@ -2803,6 +3261,9 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
                 "%sNonBlocking: %s\n"
                 "%sPrivateTmp: %s\n"
                 "%sPrivateDevices: %s\n"
+                "%sProtectKernelTunables: %s\n"
+                "%sProtectKernelModules: %s\n"
+                "%sProtectControlGroups: %s\n"
                 "%sPrivateNetwork: %s\n"
                 "%sPrivateUsers: %s\n"
                 "%sProtectHome: %s\n"
@@ -2816,6 +3277,9 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
                 prefix, yes_no(c->non_blocking),
                 prefix, yes_no(c->private_tmp),
                 prefix, yes_no(c->private_devices),
+                prefix, yes_no(c->protect_kernel_tunables),
+                prefix, yes_no(c->protect_kernel_modules),
+                prefix, yes_no(c->protect_control_groups),
                 prefix, yes_no(c->private_network),
                 prefix, yes_no(c->private_users),
                 prefix, protect_home_to_string(c->protect_home),
@@ -3159,7 +3623,8 @@ char *exec_command_line(char **argv) {
         STRV_FOREACH(a, argv)
                 k += strlen(*a)+3;
 
-        if (!(n = new(char, k)))
+        n = new(char, k);
+        if (!n)
                 return NULL;
 
         p = n;
@@ -3348,9 +3813,7 @@ ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
         free(r->tmp_dir);
         free(r->var_tmp_dir);
         safe_close_pair(r->netns_storage_socket);
-        free(r);
-
-        return NULL;
+        return mfree(r);
 }
 
 int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
@@ -3506,7 +3969,8 @@ static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
         [EXEC_INPUT_TTY] = "tty",
         [EXEC_INPUT_TTY_FORCE] = "tty-force",
         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
-        [EXEC_INPUT_SOCKET] = "socket"
+        [EXEC_INPUT_SOCKET] = "socket",
+        [EXEC_INPUT_NAMED_FD] = "fd",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
@@ -3521,7 +3985,8 @@ static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
         [EXEC_OUTPUT_JOURNAL] = "journal",
         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
-        [EXEC_OUTPUT_SOCKET] = "socket"
+        [EXEC_OUTPUT_SOCKET] = "socket",
+        [EXEC_OUTPUT_NAMED_FD] = "fd",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
diff --git a/src/core/execute.h b/src/core/execute.h
index 106154f81a..c7d0f7761e 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -50,6 +50,7 @@ typedef enum ExecInput {
         EXEC_INPUT_TTY_FORCE,
         EXEC_INPUT_TTY_FAIL,
         EXEC_INPUT_SOCKET,
+        EXEC_INPUT_NAMED_FD,
         _EXEC_INPUT_MAX,
         _EXEC_INPUT_INVALID = -1
 } ExecInput;
@@ -65,6 +66,7 @@ typedef enum ExecOutput {
         EXEC_OUTPUT_JOURNAL,
         EXEC_OUTPUT_JOURNAL_AND_CONSOLE,
         EXEC_OUTPUT_SOCKET,
+        EXEC_OUTPUT_NAMED_FD,
         _EXEC_OUTPUT_MAX,
         _EXEC_OUTPUT_INVALID = -1
 } ExecOutput;
@@ -120,6 +122,7 @@ struct ExecContext {
         ExecInput std_input;
         ExecOutput std_output;
         ExecOutput std_error;
+        char *stdio_fdname[3];
 
         nsec_t timer_slack_nsec;
 
@@ -174,10 +177,14 @@ struct ExecContext {
         bool private_users;
         ProtectSystem protect_system;
         ProtectHome protect_home;
+        bool protect_kernel_tunables;
+        bool protect_kernel_modules;
+        bool protect_control_groups;
 
         bool no_new_privileges;
 
         bool dynamic_user;
+        bool remove_ipc;
 
         /* This is not exposed to the user but available
          * internally. We need it to make sure that whenever we spawn
@@ -280,6 +287,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix);
 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_root);
 
 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l);
+int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]);
+const char* exec_context_fdname(const ExecContext *c, int fd_index);
 
 bool exec_context_may_touch_console(ExecContext *c);
 bool exec_context_maintains_privileges(ExecContext *c);
diff --git a/src/core/job.c b/src/core/job.c
index 7557874d4d..ac6910a906 100644
--- a/src/core/job.c
+++ b/src/core/job.c
@@ -690,16 +690,16 @@ _pure_ static const char *job_get_status_message_format(Unit *u, JobType t, JobR
 }
 
 static void job_print_status_message(Unit *u, JobType t, JobResult result) {
-        static struct {
+        static const struct {
                 const char *color, *word;
         } const statuses[_JOB_RESULT_MAX] = {
-                [JOB_DONE]        = {ANSI_GREEN,            "  OK  "},
-                [JOB_TIMEOUT]     = {ANSI_HIGHLIGHT_RED,    " TIME "},
-                [JOB_FAILED]      = {ANSI_HIGHLIGHT_RED,    "FAILED"},
-                [JOB_DEPENDENCY]  = {ANSI_HIGHLIGHT_YELLOW, "DEPEND"},
-                [JOB_SKIPPED]     = {ANSI_HIGHLIGHT,        " INFO "},
-                [JOB_ASSERT]      = {ANSI_HIGHLIGHT_YELLOW, "ASSERT"},
-                [JOB_UNSUPPORTED] = {ANSI_HIGHLIGHT_YELLOW, "UNSUPP"},
+                [JOB_DONE]        = { ANSI_GREEN,            "  OK  " },
+                [JOB_TIMEOUT]     = { ANSI_HIGHLIGHT_RED,    " TIME " },
+                [JOB_FAILED]      = { ANSI_HIGHLIGHT_RED,    "FAILED" },
+                [JOB_DEPENDENCY]  = { ANSI_HIGHLIGHT_YELLOW, "DEPEND" },
+                [JOB_SKIPPED]     = { ANSI_HIGHLIGHT,        " INFO " },
+                [JOB_ASSERT]      = { ANSI_HIGHLIGHT_YELLOW, "ASSERT" },
+                [JOB_UNSUPPORTED] = { ANSI_HIGHLIGHT_YELLOW, "UNSUPP" },
         };
 
         const char *format;
@@ -767,8 +767,9 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) {
         if (!format)
                 return;
 
+        /* The description might be longer than the buffer, but that's OK, we'll just truncate it here */
         DISABLE_WARNING_FORMAT_NONLITERAL;
-        xsprintf(buf, format, unit_description(u));
+        snprintf(buf, sizeof(buf), format, unit_description(u));
         REENABLE_WARNING;
 
         switch (t) {
@@ -927,7 +928,7 @@ static int job_dispatch_timer(sd_event_source *s, uint64_t monotonic, void *user
         u = j->unit;
         job_finish_and_invalidate(j, JOB_TIMEOUT, true, false);
 
-        failure_action(u->manager, u->job_timeout_action, u->job_timeout_reboot_arg);
+        emergency_action(u->manager, u->job_timeout_action, u->job_timeout_reboot_arg, "job timed out");
 
         return 0;
 }
@@ -997,7 +998,10 @@ char *job_dbus_path(Job *j) {
         return p;
 }
 
-int job_serialize(Job *j, FILE *f, FDSet *fds) {
+int job_serialize(Job *j, FILE *f) {
+        assert(j);
+        assert(f);
+
         fprintf(f, "job-id=%u\n", j->id);
         fprintf(f, "job-type=%s\n", job_type_to_string(j->type));
         fprintf(f, "job-state=%s\n", job_state_to_string(j->state));
@@ -1008,15 +1012,16 @@ int job_serialize(Job *j, FILE *f, FDSet *fds) {
         if (j->begin_usec > 0)
                 fprintf(f, "job-begin="USEC_FMT"\n", j->begin_usec);
 
-        bus_track_serialize(j->clients, f);
+        bus_track_serialize(j->clients, f, "subscribed");
 
         /* End marker */
         fputc('\n', f);
         return 0;
 }
 
-int job_deserialize(Job *j, FILE *f, FDSet *fds) {
+int job_deserialize(Job *j, FILE *f) {
         assert(j);
+        assert(f);
 
         for (;;) {
                 char line[LINE_MAX], *l, *v;
@@ -1106,7 +1111,7 @@ int job_deserialize(Job *j, FILE *f, FDSet *fds) {
                 } else if (streq(l, "subscribed")) {
 
                         if (strv_extend(&j->deserialized_clients, v) < 0)
-                                return log_oom();
+                                log_oom();
                 }
         }
 }
@@ -1118,9 +1123,8 @@ int job_coldplug(Job *j) {
 
         /* After deserialization is complete and the bus connection
          * set up again, let's start watching our subscribers again */
-        r = bus_track_coldplug(j->manager, &j->clients, &j->deserialized_clients);
-        if (r < 0)
-                return r;
+        (void) bus_track_coldplug(j->manager, &j->clients, false, j->deserialized_clients);
+        j->deserialized_clients = strv_free(j->deserialized_clients);
 
         if (j->state == JOB_WAITING)
                 job_add_to_run_queue(j);
diff --git a/src/core/job.h b/src/core/job.h
index d359e8bb3e..85368f0d30 100644
--- a/src/core/job.h
+++ b/src/core/job.h
@@ -177,8 +177,8 @@ Job* job_install(Job *j);
 int job_install_deserialized(Job *j);
 void job_uninstall(Job *j);
 void job_dump(Job *j, FILE*f, const char *prefix);
-int job_serialize(Job *j, FILE *f, FDSet *fds);
-int job_deserialize(Job *j, FILE *f, FDSet *fds);
+int job_serialize(Job *j, FILE *f);
+int job_deserialize(Job *j, FILE *f);
 int job_coldplug(Job *j);
 
 JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts);
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 251155b428..af2f9d960b 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -35,9 +35,9 @@ $1.Environment,                  config_parse_environ,               0,
 $1.EnvironmentFile,              config_parse_unit_env_file,         0,                             offsetof($1, exec_context.environment_files)
 $1.PassEnvironment,              config_parse_pass_environ,          0,                             offsetof($1, exec_context.pass_environment)
 $1.DynamicUser,                  config_parse_bool,                  0,                             offsetof($1, exec_context.dynamic_user)
-$1.StandardInput,                config_parse_input,                 0,                             offsetof($1, exec_context.std_input)
-$1.StandardOutput,               config_parse_output,                0,                             offsetof($1, exec_context.std_output)
-$1.StandardError,                config_parse_output,                0,                             offsetof($1, exec_context.std_error)
+$1.StandardInput,                config_parse_exec_input,            0,                             offsetof($1, exec_context)
+$1.StandardOutput,               config_parse_exec_output,           0,                             offsetof($1, exec_context)
+$1.StandardError,                config_parse_exec_output,           0,                             offsetof($1, exec_context)
 $1.TTYPath,                      config_parse_unit_path_printf,      0,                             offsetof($1, exec_context.tty_path)
 $1.TTYReset,                     config_parse_bool,                  0,                             offsetof($1, exec_context.tty_reset)
 $1.TTYVHangup,                   config_parse_bool,                  0,                             offsetof($1, exec_context.tty_vhangup)
@@ -89,6 +89,9 @@ $1.ReadOnlyPaths,                config_parse_namespace_path_strv,   0,
 $1.InaccessiblePaths,            config_parse_namespace_path_strv,   0,                             offsetof($1, exec_context.inaccessible_paths)
 $1.PrivateTmp,                   config_parse_bool,                  0,                             offsetof($1, exec_context.private_tmp)
 $1.PrivateDevices,               config_parse_bool,                  0,                             offsetof($1, exec_context.private_devices)
+$1.ProtectKernelTunables,        config_parse_bool,                  0,                             offsetof($1, exec_context.protect_kernel_tunables)
+$1.ProtectKernelModules,         config_parse_bool,                  0,                             offsetof($1, exec_context.protect_kernel_modules)
+$1.ProtectControlGroups,         config_parse_bool,                  0,                             offsetof($1, exec_context.protect_control_groups)
 $1.PrivateNetwork,               config_parse_bool,                  0,                             offsetof($1, exec_context.private_network)
 $1.PrivateUsers,                 config_parse_bool,                  0,                             offsetof($1, exec_context.private_users)
 $1.ProtectSystem,                config_parse_protect_system,        0,                             offsetof($1, exec_context)
@@ -122,6 +125,8 @@ $1.KillSignal,                   config_parse_signal,                0,
 m4_define(`CGROUP_CONTEXT_CONFIG_ITEMS',
 `$1.Slice,                       config_parse_unit_slice,            0,                             0
 $1.CPUAccounting,                config_parse_bool,                  0,                             offsetof($1, cgroup_context.cpu_accounting)
+$1.CPUWeight,                    config_parse_cpu_weight,            0,                             offsetof($1, cgroup_context.cpu_weight)
+$1.StartupCPUWeight,             config_parse_cpu_weight,            0,                             offsetof($1, cgroup_context.startup_cpu_weight)
 $1.CPUShares,                    config_parse_cpu_shares,            0,                             offsetof($1, cgroup_context.cpu_shares)
 $1.StartupCPUShares,             config_parse_cpu_shares,            0,                             offsetof($1, cgroup_context.startup_cpu_shares)
 $1.CPUQuota,                     config_parse_cpu_quota,             0,                             offsetof($1, cgroup_context)
@@ -129,6 +134,7 @@ $1.MemoryAccounting,             config_parse_bool,                  0,
 $1.MemoryLow,                    config_parse_memory_limit,          0,                             offsetof($1, cgroup_context)
 $1.MemoryHigh,                   config_parse_memory_limit,          0,                             offsetof($1, cgroup_context)
 $1.MemoryMax,                    config_parse_memory_limit,          0,                             offsetof($1, cgroup_context)
+$1.MemorySwapMax,                config_parse_memory_limit,          0,                             offsetof($1, cgroup_context)
 $1.MemoryLimit,                  config_parse_memory_limit,          0,                             offsetof($1, cgroup_context)
 $1.DeviceAllow,                  config_parse_device_allow,          0,                             offsetof($1, cgroup_context)
 $1.DevicePolicy,                 config_parse_device_policy,         0,                             offsetof($1, cgroup_context.device_policy)
@@ -182,13 +188,13 @@ Unit.OnFailureIsolate,           config_parse_job_mode_isolate,      0,
 Unit.IgnoreOnIsolate,            config_parse_bool,                  0,                             offsetof(Unit, ignore_on_isolate)
 Unit.IgnoreOnSnapshot,           config_parse_warn_compat,           DISABLED_LEGACY,               0
 Unit.JobTimeoutSec,              config_parse_sec_fix_0,             0,                             offsetof(Unit, job_timeout)
-Unit.JobTimeoutAction,           config_parse_failure_action,        0,                             offsetof(Unit, job_timeout_action)
+Unit.JobTimeoutAction,           config_parse_emergency_action,      0,                             offsetof(Unit, job_timeout_action)
 Unit.JobTimeoutRebootArgument,   config_parse_string,                0,                             offsetof(Unit, job_timeout_reboot_arg)
 Unit.StartLimitIntervalSec,      config_parse_sec,                   0,                             offsetof(Unit, start_limit.interval)
 m4_dnl The following is a legacy alias name for compatibility
 Unit.StartLimitInterval,         config_parse_sec,                   0,                             offsetof(Unit, start_limit.interval)
 Unit.StartLimitBurst,            config_parse_unsigned,              0,                             offsetof(Unit, start_limit.burst)
-Unit.StartLimitAction,           config_parse_failure_action,        0,                             offsetof(Unit, start_limit_action)
+Unit.StartLimitAction,           config_parse_emergency_action,      0,                             offsetof(Unit, start_limit_action)
 Unit.RebootArgument,             config_parse_string,                0,                             offsetof(Unit, reboot_arg)
 Unit.ConditionPathExists,        config_parse_unit_condition_path,   CONDITION_PATH_EXISTS,         offsetof(Unit, conditions)
 Unit.ConditionPathExistsGlob,    config_parse_unit_condition_path,   CONDITION_PATH_EXISTS_GLOB,    offsetof(Unit, conditions)
@@ -245,9 +251,9 @@ Service.WatchdogSec,             config_parse_sec,                   0,
 m4_dnl The following three only exist for compatibility, they moved into Unit, see above
 Service.StartLimitInterval,      config_parse_sec,                   0,                             offsetof(Unit, start_limit.interval)
 Service.StartLimitBurst,         config_parse_unsigned,              0,                             offsetof(Unit, start_limit.burst)
-Service.StartLimitAction,        config_parse_failure_action,        0,                             offsetof(Unit, start_limit_action)
+Service.StartLimitAction,        config_parse_emergency_action,      0,                             offsetof(Unit, start_limit_action)
 Service.RebootArgument,          config_parse_string,                0,                             offsetof(Unit, reboot_arg)
-Service.FailureAction,           config_parse_failure_action,        0,                             offsetof(Service, failure_action)
+Service.FailureAction,           config_parse_emergency_action,      0,                             offsetof(Service, emergency_action)
 Service.Type,                    config_parse_service_type,          0,                             offsetof(Service, type)
 Service.Restart,                 config_parse_service_restart,       0,                             offsetof(Service, restart)
 Service.PermissionsStartOnly,    config_parse_bool,                  0,                             offsetof(Service, permissions_start_only)
@@ -353,6 +359,8 @@ Mount.Type,                      config_parse_string,                0,
 Mount.TimeoutSec,                config_parse_sec,                   0,                             offsetof(Mount, timeout_usec)
 Mount.DirectoryMode,             config_parse_mode,                  0,                             offsetof(Mount, directory_mode)
 Mount.SloppyOptions,             config_parse_bool,                  0,                             offsetof(Mount, sloppy_options)
+Mount.LazyUnmount,               config_parse_bool,                  0,                             offsetof(Mount, lazy_unmount)
+Mount.ForceUnmount,              config_parse_bool,                  0,                             offsetof(Mount, force_unmount)
 EXEC_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
 CGROUP_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
 KILL_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 420f368689..cbc826809e 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -776,8 +776,104 @@ int config_parse_socket_bindtodevice(
         return 0;
 }
 
-DEFINE_CONFIG_PARSE_ENUM(config_parse_output, exec_output, ExecOutput, "Failed to parse output specifier");
-DEFINE_CONFIG_PARSE_ENUM(config_parse_input, exec_input, ExecInput, "Failed to parse input specifier");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_input, exec_input, ExecInput, "Failed to parse input literal specifier");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_output, exec_output, ExecOutput, "Failed to parse output literal specifier");
+
+int config_parse_exec_input(const char *unit,
+                            const char *filename,
+                            unsigned line,
+                            const char *section,
+                            unsigned section_line,
+                            const char *lvalue,
+                            int ltype,
+                            const char *rvalue,
+                            void *data,
+                            void *userdata) {
+        ExecContext *c = data;
+        const char *name;
+        int r;
+
+        assert(data);
+        assert(filename);
+        assert(line);
+        assert(rvalue);
+
+        name = startswith(rvalue, "fd:");
+        if (name) {
+                /* Strip prefix and validate fd name */
+                if (!fdname_is_valid(name)) {
+                        log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid file descriptor name, ignoring: %s", name);
+                        return 0;
+                }
+                c->std_input = EXEC_INPUT_NAMED_FD;
+                r = free_and_strdup(&c->stdio_fdname[STDIN_FILENO], name);
+                if (r < 0)
+                        log_oom();
+                return r;
+        } else {
+                ExecInput ei = exec_input_from_string(rvalue);
+                if (ei == _EXEC_INPUT_INVALID)
+                        log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse input specifier, ignoring: %s", rvalue);
+                else
+                        c->std_input = ei;
+                return 0;
+        }
+}
+
+int config_parse_exec_output(const char *unit,
+                             const char *filename,
+                             unsigned line,
+                             const char *section,
+                             unsigned section_line,
+                             const char *lvalue,
+                             int ltype,
+                             const char *rvalue,
+                             void *data,
+                             void *userdata) {
+        ExecContext *c = data;
+        ExecOutput eo;
+        const char *name;
+        int r;
+
+        assert(data);
+        assert(filename);
+        assert(line);
+        assert(lvalue);
+        assert(rvalue);
+
+        name = startswith(rvalue, "fd:");
+        if (name) {
+                /* Strip prefix and validate fd name */
+                if (!fdname_is_valid(name)) {
+                        log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid file descriptor name, ignoring: %s", name);
+                        return 0;
+                }
+                eo = EXEC_OUTPUT_NAMED_FD;
+        } else {
+                eo = exec_output_from_string(rvalue);
+                if (eo == _EXEC_OUTPUT_INVALID) {
+                        log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse output specifier, ignoring: %s", rvalue);
+                        return 0;
+                }
+        }
+
+        if (streq(lvalue, "StandardOutput")) {
+                c->std_output = eo;
+                r = free_and_strdup(&c->stdio_fdname[STDOUT_FILENO], name);
+                if (r < 0)
+                        log_oom();
+                return r;
+        } else if (streq(lvalue, "StandardError")) {
+                c->std_error = eo;
+                r = free_and_strdup(&c->stdio_fdname[STDERR_FILENO], name);
+                if (r < 0)
+                        log_oom();
+                return r;
+        } else {
+                log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse output property, ignoring: %s", lvalue);
+                return 0;
+        }
+}
 
 int config_parse_exec_io_class(const char *unit,
                                const char *filename,
@@ -1338,10 +1434,13 @@ int config_parse_timer(const char *unit,
                        void *userdata) {
 
         Timer *t = data;
-        usec_t u = 0;
+        usec_t usec = 0;
         TimerValue *v;
         TimerBase b;
         CalendarSpec *c = NULL;
+        Unit *u = userdata;
+        _cleanup_free_ char *k = NULL;
+        int r;
 
         assert(filename);
         assert(lvalue);
@@ -1360,14 +1459,20 @@ int config_parse_timer(const char *unit,
                 return 0;
         }
 
+        r = unit_full_printf(u, rvalue, &k);
+        if (r < 0) {
+                log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue);
+                return 0;
+        }
+
         if (b == TIMER_CALENDAR) {
-                if (calendar_spec_from_string(rvalue, &c) < 0) {
-                        log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse calendar specification, ignoring: %s", rvalue);
+                if (calendar_spec_from_string(k, &c) < 0) {
+                        log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse calendar specification, ignoring: %s", k);
                         return 0;
                 }
         } else {
-                if (parse_sec(rvalue, &u) < 0) {
-                        log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse timer value, ignoring: %s", rvalue);
+                if (parse_sec(k, &usec) < 0) {
+                        log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse timer value, ignoring: %s", k);
                         return 0;
                 }
         }
@@ -1379,7 +1484,7 @@ int config_parse_timer(const char *unit,
         }
 
         v->base = b;
-        v->value = u;
+        v->value = usec;
         v->calendar_spec = c;
 
         LIST_PREPEND(value, t->values, v);
@@ -1582,11 +1687,7 @@ int config_parse_fdname(
                 return 0;
         }
 
-        free(s->fdname);
-        s->fdname = p;
-        p = NULL;
-
-        return 0;
+        return free_and_replace(s->fdname, p);
 }
 
 int config_parse_service_sockets(
@@ -2043,9 +2144,7 @@ int config_parse_working_directory(
                         return 0;
                 }
 
-                free(c->working_directory);
-                c->working_directory = k;
-                k = NULL;
+                free_and_replace(c->working_directory, k);
 
                 c->working_directory_home = false;
         }
@@ -2424,7 +2523,7 @@ int config_parse_unit_condition_null(
 }
 
 DEFINE_CONFIG_PARSE_ENUM(config_parse_notify_access, notify_access, NotifyAccess, "Failed to parse notify access specifier");
-DEFINE_CONFIG_PARSE_ENUM(config_parse_failure_action, failure_action, FailureAction, "Failed to parse failure action specifier");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_emergency_action, emergency_action, EmergencyAction, "Failed to parse failure action specifier");
 
 int config_parse_unit_requires_mounts_for(
                 const char *unit,
@@ -2519,6 +2618,7 @@ int config_parse_documentation(const char *unit,
 }
 
 #ifdef HAVE_SECCOMP
+
 static int syscall_filter_parse_one(
                 const char *unit,
                 const char *filename,
@@ -2529,27 +2629,29 @@ static int syscall_filter_parse_one(
                 bool warn) {
         int r;
 
-        if (*t == '@') {
-                const SystemCallFilterSet *set;
+        if (t[0] == '@') {
+                const SyscallFilterSet *set;
+                const char *i;
 
-                for (set = syscall_filter_sets; set->set_name; set++)
-                        if (streq(set->set_name, t)) {
-                                const char *sys;
+                set = syscall_filter_set_find(t);
+                if (!set) {
+                        if (warn)
+                                log_syntax(unit, LOG_WARNING, filename, line, 0, "Don't know system call group, ignoring: %s", t);
+                        return 0;
+                }
 
-                                NULSTR_FOREACH(sys, set->value) {
-                                        r = syscall_filter_parse_one(unit, filename, line, c, invert, sys, false);
-                                        if (r < 0)
-                                                return r;
-                                }
-                                break;
-                        }
+                NULSTR_FOREACH(i, set->value) {
+                        r = syscall_filter_parse_one(unit, filename, line, c, invert, i, false);
+                        if (r < 0)
+                                return r;
+                }
         } else {
                 int id;
 
                 id = seccomp_syscall_resolve_name(t);
                 if (id == __NR_SCMP_ERROR)  {
                         if (warn)
-                                log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t);
+                                log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse system call, ignoring: %s", t);
                         return 0;
                 }
 
@@ -2563,8 +2665,9 @@ static int syscall_filter_parse_one(
                         if (r < 0)
                                 return log_oom();
                 } else
-                        set_remove(c->syscall_filter, INT_TO_PTR(id + 1));
+                        (void) set_remove(c->syscall_filter, INT_TO_PTR(id + 1));
         }
+
         return 0;
 }
 
@@ -2583,8 +2686,7 @@ int config_parse_syscall_filter(
         ExecContext *c = data;
         Unit *u = userdata;
         bool invert = false;
-        const char *word, *state;
-        size_t l;
+        const char *p;
         int r;
 
         assert(filename);
@@ -2623,24 +2725,24 @@ int config_parse_syscall_filter(
                 }
         }
 
-        FOREACH_WORD_QUOTED(word, l, rvalue, state) {
-                _cleanup_free_ char *t = NULL;
+        p = rvalue;
+        for (;;) {
+                _cleanup_free_ char *word = NULL;
 
-                t = strndup(word, l);
-                if (!t)
+                r = extract_first_word(&p, &word, NULL, 0);
+                if (r == 0)
+                        break;
+                if (r == -ENOMEM)
                         return log_oom();
+                if (r < 0) {
+                        log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+                        break;
+                }
 
-                r = syscall_filter_parse_one(unit, filename, line, c, invert, t, true);
+                r = syscall_filter_parse_one(unit, filename, line, c, invert, word, true);
                 if (r < 0)
                         return r;
         }
-        if (!isempty(state))
-                log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring.");
-
-        /* Turn on NNP, but only if it wasn't configured explicitly
-         * before, and only if we are in user mode. */
-        if (!c->no_new_privileges_set && MANAGER_IS_USER(u->manager))
-                c->no_new_privileges = true;
 
         return 0;
 }
@@ -2851,6 +2953,34 @@ int config_parse_unit_slice(
 
 DEFINE_CONFIG_PARSE_ENUM(config_parse_device_policy, cgroup_device_policy, CGroupDevicePolicy, "Failed to parse device policy");
 
+int config_parse_cpu_weight(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        uint64_t *weight = data;
+        int r;
+
+        assert(filename);
+        assert(lvalue);
+        assert(rvalue);
+
+        r = cg_weight_parse(rvalue, weight);
+        if (r < 0) {
+                log_syntax(unit, LOG_ERR, filename, line, r, "CPU weight '%s' invalid. Ignoring.", rvalue);
+                return 0;
+        }
+
+        return 0;
+}
+
 int config_parse_cpu_shares(
                 const char *unit,
                 const char *filename,
@@ -2953,8 +3083,12 @@ int config_parse_memory_limit(
                 c->memory_high = bytes;
         else if (streq(lvalue, "MemoryMax"))
                 c->memory_max = bytes;
-        else
+        else if (streq(lvalue, "MemorySwapMax"))
+                c->memory_swap_max = bytes;
+        else if (streq(lvalue, "MemoryLimit"))
                 c->memory_limit = bytes;
+        else
+                return -EINVAL;
 
         return 0;
 }
@@ -2971,30 +3105,36 @@ int config_parse_tasks_max(
                 void *data,
                 void *userdata) {
 
-        uint64_t *tasks_max = data, u;
+        uint64_t *tasks_max = data, v;
+        Unit *u = userdata;
         int r;
 
-        if (isempty(rvalue) || streq(rvalue, "infinity")) {
-                *tasks_max = (uint64_t) -1;
+        if (isempty(rvalue)) {
+                *tasks_max = u->manager->default_tasks_max;
+                return 0;
+        }
+
+        if (streq(rvalue, "infinity")) {
+                *tasks_max = CGROUP_LIMIT_MAX;
                 return 0;
         }
 
         r = parse_percent(rvalue);
         if (r < 0) {
-                r = safe_atou64(rvalue, &u);
+                r = safe_atou64(rvalue, &v);
                 if (r < 0) {
                         log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue);
                         return 0;
                 }
         } else
-                u = system_tasks_max_scale(r, 100U);
+                v = system_tasks_max_scale(r, 100U);
 
-        if (u <= 0 || u >= UINT64_MAX) {
+        if (v <= 0 || v >= UINT64_MAX) {
                 log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range. Ignoring.", rvalue);
                 return 0;
         }
 
-        *tasks_max = u;
+        *tasks_max = v;
         return 0;
 }
 
@@ -3037,9 +3177,7 @@ int config_parse_device_allow(
         if (!path)
                 return log_oom();
 
-        if (!startswith(path, "/dev/") &&
-            !startswith(path, "block-") &&
-            !startswith(path, "char-")) {
+        if (!is_deviceallow_pattern(path)) {
                 log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid device node path '%s'. Ignoring.", path);
                 return 0;
         }
@@ -3694,7 +3832,7 @@ int config_parse_no_new_privileges(
                 return 0;
         }
 
-        c->no_new_privileges = !!k;
+        c->no_new_privileges = k;
         c->no_new_privileges_set = true;
 
         return 0;
@@ -4144,8 +4282,8 @@ void unit_dump_config_items(FILE *f) {
                 { config_parse_exec_cpu_affinity,     "CPUAFFINITY" },
                 { config_parse_mode,                  "MODE" },
                 { config_parse_unit_env_file,         "FILE" },
-                { config_parse_output,                "OUTPUT" },
-                { config_parse_input,                 "INPUT" },
+                { config_parse_exec_output,           "OUTPUT" },
+                { config_parse_exec_input,            "INPUT" },
                 { config_parse_log_facility,          "FACILITY" },
                 { config_parse_log_level,             "LEVEL" },
                 { config_parse_exec_secure_bits,      "SECUREBITS" },
@@ -4180,7 +4318,7 @@ void unit_dump_config_items(FILE *f) {
                 { config_parse_unit_slice,            "SLICE" },
                 { config_parse_documentation,         "URL" },
                 { config_parse_service_timeout,       "SECONDS" },
-                { config_parse_failure_action,        "ACTION" },
+                { config_parse_emergency_action,      "ACTION" },
                 { config_parse_set_status,            "STATUS" },
                 { config_parse_service_sockets,       "SOCKETS" },
                 { config_parse_environ,               "ENVIRON" },
@@ -4191,6 +4329,7 @@ void unit_dump_config_items(FILE *f) {
                 { config_parse_address_families,      "FAMILIES" },
 #endif
                 { config_parse_cpu_shares,            "SHARES" },
+                { config_parse_cpu_weight,            "WEIGHT" },
                 { config_parse_memory_limit,          "LIMIT" },
                 { config_parse_device_allow,          "DEVICE" },
                 { config_parse_device_policy,         "POLICY" },
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index 213bce55a7..c05f205c37 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -45,7 +45,9 @@ int config_parse_service_timeout(const char *unit, const char *filename, unsigne
 int config_parse_service_type(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_service_restart(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_socket_bindtodevice(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_exec_output(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_output(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_exec_input(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_input(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_exec_io_class(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_exec_io_priority(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
@@ -73,7 +75,7 @@ int config_parse_unit_condition_string(const char *unit, const char *filename, u
 int config_parse_unit_condition_null(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_kill_mode(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_notify_access(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
-int config_parse_failure_action(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_emergency_action(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_unit_requires_mounts_for(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_syscall_filter(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_syscall_archs(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
@@ -81,6 +83,7 @@ int config_parse_syscall_errno(const char *unit, const char *filename, unsigned
 int config_parse_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_pass_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_unit_slice(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_cpu_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_cpu_shares(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_memory_limit(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_tasks_max(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
diff --git a/src/core/main.c b/src/core/main.c
index 35b0a43901..f07ed71b31 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -72,6 +72,9 @@
 #include "process-util.h"
 #include "raw-clone.h"
 #include "rlimit-util.h"
+#ifdef HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
 #include "selinux-setup.h"
 #include "selinux-util.h"
 #include "signal-util.h"
@@ -86,6 +89,7 @@
 #include "user-util.h"
 #include "virt.h"
 #include "watchdog.h"
+#include "emergency-action.h"
 
 static enum {
         ACTION_RUN,
@@ -128,6 +132,7 @@ static bool arg_default_memory_accounting = false;
 static bool arg_default_tasks_accounting = true;
 static uint64_t arg_default_tasks_max = UINT64_MAX;
 static sd_id128_t arg_machine_id = {};
+static EmergencyAction arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
 
 noreturn static void freeze_or_reboot(void) {
 
@@ -199,7 +204,7 @@ noreturn static void crash(int sig) {
                                               pid, sigchld_code_to_string(status.si_code),
                                               status.si_status,
                                               strna(status.si_code == CLD_EXITED
-                                                    ? exit_status_to_string(status.si_status, EXIT_STATUS_FULL)
+                                                    ? exit_status_to_string(status.si_status, EXIT_STATUS_MINIMAL)
                                                     : signal_to_string(status.si_status)));
                         else
                                 log_emergency("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid);
@@ -303,7 +308,7 @@ static int set_machine_id(const char *m) {
         return 0;
 }
 
-static int parse_proc_cmdline_item(const char *key, const char *value) {
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
 
         int r;
 
@@ -699,6 +704,7 @@ static int parse_config_file(void) {
                 { "Manager", "DefaultMemoryAccounting",   config_parse_bool,             0, &arg_default_memory_accounting         },
                 { "Manager", "DefaultTasksAccounting",    config_parse_bool,             0, &arg_default_tasks_accounting          },
                 { "Manager", "DefaultTasksMax",           config_parse_tasks_max,        0, &arg_default_tasks_max                 },
+                { "Manager", "CtrlAltDelBurstAction",     config_parse_emergency_action, 0, &arg_cad_burst_action                  },
                 {}
         };
 
@@ -712,7 +718,7 @@ static int parse_config_file(void) {
                 CONF_PATHS_NULSTR("systemd/system.conf.d") :
                 CONF_PATHS_NULSTR("systemd/user.conf.d");
 
-        config_parse_many(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, false, NULL);
+        config_parse_many_nulstr(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, false, NULL);
 
         /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we used USEC_INFINITY
          * like everywhere else. */
@@ -993,10 +999,8 @@ static int parse_argv(int argc, char *argv[]) {
 
                 case ARG_MACHINE_ID:
                         r = set_machine_id(optarg);
-                        if (r < 0) {
-                                log_error("MachineID '%s' is not valid.", optarg);
-                                return r;
-                        }
+                        if (r < 0)
+                                return log_error_errno(r, "MachineID '%s' is not valid.", optarg);
                         break;
 
                 case 'h':
@@ -1119,7 +1123,7 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
          * later when transitioning from the initrd to the main
          * systemd or suchlike. */
         if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0)
-                return log_error_errno(errno, "Reading RLIMIT_NOFILE failed: %m");
+                return log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
 
         /* Make sure forked processes get the default kernel setting */
         if (!arg_default_rlimit[RLIMIT_NOFILE]) {
@@ -1136,7 +1140,7 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
         nl.rlim_cur = nl.rlim_max = 64*1024;
         r = setrlimit_closest(RLIMIT_NOFILE, &nl);
         if (r < 0)
-                return log_error_errno(r, "Setting RLIMIT_NOFILE failed: %m");
+                return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
 
         return 0;
 }
@@ -1186,6 +1190,9 @@ static int enforce_syscall_archs(Set *archs) {
         void *id;
         int r;
 
+        if (!is_seccomp_available())
+                return 0;
+
         seccomp = seccomp_init(SCMP_ACT_ALLOW);
         if (!seccomp)
                 return log_oom();
@@ -1505,7 +1512,8 @@ int main(int argc, char *argv[]) {
         if (getpid() == 1) {
                 /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
                  * will process core dumps for system services by default. */
-                (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY));
+                if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
+                        log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
 
                 /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored
                  * until the systemd-coredump tool is enabled via sysctl. */
@@ -1523,15 +1531,9 @@ int main(int argc, char *argv[]) {
                  * need to do that for user instances since they never log
                  * into the console. */
                 log_show_color(colors_enabled());
-                make_null_stdio();
-        }
-
-        /* Initialize default unit */
-        r = free_and_strdup(&arg_default_unit, SPECIAL_DEFAULT_TARGET);
-        if (r < 0) {
-                log_emergency_errno(r, "Failed to set default unit %s: %m", SPECIAL_DEFAULT_TARGET);
-                error_message = "Failed to set default unit";
-                goto finish;
+                r = make_null_stdio();
+                if (r < 0)
+                        log_warning_errno(r, "Failed to redirect standard streams to /dev/null: %m");
         }
 
         r = initialize_join_controllers();
@@ -1559,7 +1561,7 @@ int main(int argc, char *argv[]) {
         (void) reset_all_signal_handlers();
         (void) ignore_signals(SIGNALS_IGNORE, -1);
 
-        arg_default_tasks_max = system_tasks_max_scale(15U, 100U); /* 15% the system PIDs equals 4915 by default. */
+        arg_default_tasks_max = system_tasks_max_scale(DEFAULT_TASKS_MAX_PERCENTAGE, 100U);
 
         if (parse_config_file() < 0) {
                 error_message = "Failed to parse config file";
@@ -1567,7 +1569,7 @@ int main(int argc, char *argv[]) {
         }
 
         if (arg_system) {
-                r = parse_proc_cmdline(parse_proc_cmdline_item);
+                r = parse_proc_cmdline(parse_proc_cmdline_item, NULL, false);
                 if (r < 0)
                         log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
         }
@@ -1581,6 +1583,16 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
+        /* Initialize default unit */
+        if (!arg_default_unit) {
+                arg_default_unit = strdup(SPECIAL_DEFAULT_TARGET);
+                if (!arg_default_unit) {
+                        r = log_oom();
+                        error_message = "Failed to set default unit";
+                        goto finish;
+                }
+        }
+
         if (arg_action == ACTION_TEST &&
             geteuid() == 0) {
                 log_error("Don't run test mode as root.");
@@ -1601,11 +1613,10 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
-        if (arg_action == ACTION_TEST)
-                skip_setup = true;
-
-        if (arg_action == ACTION_TEST || arg_action == ACTION_HELP)
+        if (arg_action == ACTION_TEST || arg_action == ACTION_HELP) {
                 pager_open(arg_no_pager, false);
+                skip_setup = true;
+        }
 
         if (arg_action == ACTION_HELP) {
                 retval = help();
@@ -1763,10 +1774,10 @@ int main(int argc, char *argv[]) {
                         log_warning_errno(errno, "Failed to make us a subreaper: %m");
 
         if (arg_system) {
-                bump_rlimit_nofile(&saved_rlimit_nofile);
+                (void) bump_rlimit_nofile(&saved_rlimit_nofile);
 
                 if (empty_etc) {
-                        r = unit_file_preset_all(UNIT_FILE_SYSTEM, false, NULL, UNIT_FILE_PRESET_ENABLE_ONLY, false, NULL, 0);
+                        r = unit_file_preset_all(UNIT_FILE_SYSTEM, 0, NULL, UNIT_FILE_PRESET_ENABLE_ONLY, NULL, 0);
                         if (r < 0)
                                 log_full_errno(r == -EEXIST ? LOG_NOTICE : LOG_WARNING, r, "Failed to populate /etc with preset unit settings, ignoring: %m");
                         else
@@ -1789,6 +1800,7 @@ int main(int argc, char *argv[]) {
         m->initrd_timestamp = initrd_timestamp;
         m->security_start_timestamp = security_start_timestamp;
         m->security_finish_timestamp = security_finish_timestamp;
+        m->cad_burst_action = arg_cad_burst_action;
 
         manager_set_defaults(m);
         manager_set_show_status(m, arg_show_status);
diff --git a/src/core/manager.c b/src/core/manager.c
index bb2000d860..ffccfdcd5e 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -45,6 +45,7 @@
 #include "bus-error.h"
 #include "bus-kernel.h"
 #include "bus-util.h"
+#include "clean-ipc.h"
 #include "dbus-job.h"
 #include "dbus-manager.h"
 #include "dbus-unit.h"
@@ -81,6 +82,7 @@
 #include "transaction.h"
 #include "umask-util.h"
 #include "unit-name.h"
+#include "user-util.h"
 #include "util.h"
 #include "virt.h"
 #include "watchdog.h"
@@ -98,6 +100,7 @@ static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, ui
 static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
 static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
 static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
 static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata);
 static int manager_dispatch_run_queue(sd_event_source *source, void *userdata);
 static int manager_run_generators(Manager *m);
@@ -519,6 +522,7 @@ static void manager_clean_environment(Manager *m) {
                         "LISTEN_FDNAMES",
                         "WATCHDOG_PID",
                         "WATCHDOG_USEC",
+                        "INVOCATION_ID",
                         NULL);
 }
 
@@ -579,17 +583,25 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) {
         if (MANAGER_IS_SYSTEM(m)) {
                 m->unit_log_field = "UNIT=";
                 m->unit_log_format_string = "UNIT=%s";
+
+                m->invocation_log_field = "INVOCATION_ID=";
+                m->invocation_log_format_string = "INVOCATION_ID=" SD_ID128_FORMAT_STR;
         } else {
                 m->unit_log_field = "USER_UNIT=";
                 m->unit_log_format_string = "USER_UNIT=%s";
+
+                m->invocation_log_field = "USER_INVOCATION_ID=";
+                m->invocation_log_format_string = "USER_INVOCATION_ID=" SD_ID128_FORMAT_STR;
         }
 
         m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1;
 
         m->pin_cgroupfs_fd = m->notify_fd = m->cgroups_agent_fd = m->signal_fd = m->time_change_fd =
-                m->dev_autofs_fd = m->private_listen_fd = m->kdbus_fd = m->cgroup_inotify_fd =
+                m->dev_autofs_fd = m->private_listen_fd = m->cgroup_inotify_fd =
                 m->ask_password_inotify_fd = -1;
 
+        m->user_lookup_fds[0] = m->user_lookup_fds[1] = -1;
+
         m->current_job_id = 1; /* start as id #1, so that we can leave #0 around as "null-like" value */
 
         m->have_ask_password = -EINVAL; /* we don't know */
@@ -656,9 +668,8 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) {
                 goto fail;
         }
 
-        /* Note that we set up neither kdbus, nor the notify fd
-         * here. We do that after deserialization, since they might
-         * have gotten serialized across the reexec. */
+        /* Note that we do not set up the notify fd here. We do that after deserialization,
+         * since they might have gotten serialized across the reexec. */
 
         m->taint_usr = dir_is_empty("/usr") > 0;
 
@@ -766,7 +777,7 @@ static int manager_setup_cgroups_agent(Manager *m) {
         if (!MANAGER_IS_SYSTEM(m))
                 return 0;
 
-        if (cg_unified() > 0) /* We don't need this anymore on the unified hierarchy */
+        if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0) /* We don't need this anymore on the unified hierarchy */
                 return 0;
 
         if (m->cgroups_agent_fd < 0) {
@@ -812,6 +823,59 @@ static int manager_setup_cgroups_agent(Manager *m) {
         return 0;
 }
 
+static int manager_setup_user_lookup_fd(Manager *m) {
+        int r;
+
+        assert(m);
+
+        /* Set up the socket pair used for passing UID/GID resolution results from forked off processes to PID
+         * 1. Background: we can't do name lookups (NSS) from PID 1, since it might involve IPC and thus activation,
+         * and we might hence deadlock on ourselves. Hence we do all user/group lookups asynchronously from the forked
+         * off processes right before executing the binaries to start. In order to be able to clean up any IPC objects
+         * created by a unit (see RemoveIPC=) we need to know in PID 1 the used UID/GID of the executed processes,
+         * hence we establish this communication channel so that forked off processes can pass their UID/GID
+         * information back to PID 1. The forked off processes send their resolved UID/GID to PID 1 in a simple
+         * datagram, along with their unit name, so that we can share one communication socket pair among all units for
+         * this purpose.
+         *
+         * You might wonder why we need a communication channel for this that is independent of the usual notification
+         * socket scheme (i.e. $NOTIFY_SOCKET). The primary difference is about trust: data sent via the $NOTIFY_SOCKET
+         * channel is only accepted if it originates from the right unit and if reception was enabled for it. The user
+         * lookup socket OTOH is only accessible by PID 1 and its children until they exec(), and always available.
+         *
+         * Note that this function is called under two circumstances: when we first initialize (in which case we
+         * allocate both the socket pair and the event source to listen on it), and when we deserialize after a reload
+         * (in which case the socket pair already exists but we still need to allocate the event source for it). */
+
+        if (m->user_lookup_fds[0] < 0) {
+
+                /* Free all secondary fields */
+                safe_close_pair(m->user_lookup_fds);
+                m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source);
+
+                if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, m->user_lookup_fds) < 0)
+                        return log_error_errno(errno, "Failed to allocate user lookup socket: %m");
+
+                (void) fd_inc_rcvbuf(m->user_lookup_fds[0], NOTIFY_RCVBUF_SIZE);
+        }
+
+        if (!m->user_lookup_event_source) {
+                r = sd_event_add_io(m->event, &m->user_lookup_event_source, m->user_lookup_fds[0], EPOLLIN, manager_dispatch_user_lookup_fd, m);
+                if (r < 0)
+                        return log_error_errno(errno, "Failed to allocate user lookup event source: %m");
+
+                /* Process even earlier than the notify event source, so that we always know first about valid UID/GID
+                 * resolutions */
+                r = sd_event_source_set_priority(m->user_lookup_event_source, SD_EVENT_PRIORITY_NORMAL-8);
+                if (r < 0)
+                        return log_error_errno(errno, "Failed to set priority ot user lookup event source: %m");
+
+                (void) sd_event_source_set_description(m->user_lookup_event_source, "user-lookup");
+        }
+
+        return 0;
+}
+
 static int manager_connect_bus(Manager *m, bool reexecuting) {
         bool try_bus_connect;
 
@@ -821,7 +885,6 @@ static int manager_connect_bus(Manager *m, bool reexecuting) {
                 return 0;
 
         try_bus_connect =
-                m->kdbus_fd >= 0 ||
                 reexecuting ||
                 (MANAGER_IS_USER(m) && getenv("DBUS_SESSION_BUS_ADDRESS"));
 
@@ -853,8 +916,7 @@ enum {
         _GC_OFFSET_MAX
 };
 
-static void unit_gc_mark_good(Unit *u, unsigned gc_marker)
-{
+static void unit_gc_mark_good(Unit *u, unsigned gc_marker) {
         Iterator i;
         Unit *other;
 
@@ -1007,6 +1069,7 @@ Manager* manager_free(Manager *m) {
         hashmap_free(m->dynamic_users);
 
         hashmap_free(m->units);
+        hashmap_free(m->units_by_invocation_id);
         hashmap_free(m->jobs);
         hashmap_free(m->watch_pids1);
         hashmap_free(m->watch_pids2);
@@ -1021,12 +1084,13 @@ Manager* manager_free(Manager *m) {
         sd_event_source_unref(m->time_change_event_source);
         sd_event_source_unref(m->jobs_in_progress_event_source);
         sd_event_source_unref(m->run_queue_event_source);
+        sd_event_source_unref(m->user_lookup_event_source);
 
         safe_close(m->signal_fd);
         safe_close(m->notify_fd);
         safe_close(m->cgroups_agent_fd);
         safe_close(m->time_change_fd);
-        safe_close(m->kdbus_fd);
+        safe_close_pair(m->user_lookup_fds);
 
         manager_close_ask_password(m);
 
@@ -1052,8 +1116,10 @@ Manager* manager_free(Manager *m) {
         assert(hashmap_isempty(m->units_requiring_mounts_for));
         hashmap_free(m->units_requiring_mounts_for);
 
-        free(m);
-        return NULL;
+        hashmap_free(m->uid_refs);
+        hashmap_free(m->gid_refs);
+
+        return mfree(m);
 }
 
 void manager_enumerate(Manager *m) {
@@ -1177,9 +1243,11 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
                 return r;
 
         /* Make sure the transient directory always exists, so that it remains in the search path */
-        r = mkdir_p_label(m->lookup_paths.transient, 0755);
-        if (r < 0)
-                return r;
+        if (!m->test_run) {
+                r = mkdir_p_label(m->lookup_paths.transient, 0755);
+                if (r < 0)
+                        return r;
+        }
 
         dual_timestamp_get(&m->generators_start_timestamp);
         r = manager_run_generators(m);
@@ -1221,10 +1289,15 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
         if (q < 0 && r == 0)
                 r = q;
 
-        /* We might have deserialized the kdbus control fd, but if we
-         * didn't, then let's create the bus now. */
-        manager_connect_bus(m, !!serialization);
-        bus_track_coldplug(m, &m->subscribed, &m->deserialized_subscribed);
+        q = manager_setup_user_lookup_fd(m);
+        if (q < 0 && r == 0)
+                r = q;
+
+        /* Let's connect to the bus now. */
+        (void) manager_connect_bus(m, !!serialization);
+
+        (void) bus_track_coldplug(m, &m->subscribed, false, m->deserialized_subscribed);
+        m->deserialized_subscribed = strv_free(m->deserialized_subscribed);
 
         /* Third, fire things up! */
         manager_coldplug(m);
@@ -1232,6 +1305,10 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
         /* Release any dynamic users no longer referenced */
         dynamic_user_vacuum(m, true);
 
+        /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */
+        manager_vacuum_uid_refs(m);
+        manager_vacuum_gid_refs(m);
+
         if (serialization) {
                 assert(m->n_reloading > 0);
                 m->n_reloading--;
@@ -1604,8 +1681,14 @@ static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const
 
         if (UNIT_VTABLE(u)->notify_message)
                 UNIT_VTABLE(u)->notify_message(u, pid, tags, fds);
-        else
-                log_unit_debug(u, "Got notification message for unit. Ignoring.");
+        else if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
+                _cleanup_free_ char *x = NULL, *y = NULL;
+
+                x = cescape(buf);
+                if (x)
+                        y = ellipsize(x, 20, 90);
+                log_unit_debug(u, "Got notification message \"%s\", ignoring.", strnull(y));
+        }
 }
 
 static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
@@ -1631,7 +1714,6 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
 
         struct cmsghdr *cmsg;
         struct ucred *ucred = NULL;
-        bool found = false;
         Unit *u1, *u2, *u3;
         int r, *fd_array = NULL;
         unsigned n_fds = 0;
@@ -1645,16 +1727,15 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
                 return 0;
         }
 
-        n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+        n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC|MSG_TRUNC);
         if (n < 0) {
-                if (!IN_SET(errno, EAGAIN, EINTR))
-                        log_error("Failed to receive notification message: %m");
+                if (IN_SET(errno, EAGAIN, EINTR))
+                        return 0; /* Spurious wakeup, try again */
 
-                /* It's not an option to return an error here since it
-                 * would disable the notification handler entirely. Services
-                 * wouldn't be able to send the WATCHDOG message for
-                 * example... */
-                return 0;
+                /* If this is any other, real error, then let's stop processing this socket. This of course means we
+                 * won't take notification messages anymore, but that's still better than busy looping around this:
+                 * being woken up over and over again but being unable to actually read the message off the socket. */
+                return log_error_errno(errno, "Failed to receive notification message: %m");
         }
 
         CMSG_FOREACH(cmsg, &msghdr) {
@@ -1687,40 +1768,40 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
                 return 0;
         }
 
-        if ((size_t) n >= sizeof(buf)) {
+        if ((size_t) n >= sizeof(buf) || (msghdr.msg_flags & MSG_TRUNC)) {
                 log_warning("Received notify message exceeded maximum size. Ignoring.");
                 return 0;
         }
 
-        /* The message should be a string. Here we make sure it's NUL-terminated,
-         * but only the part until first NUL will be used anyway. */
+        /* As extra safety check, let's make sure the string we get doesn't contain embedded NUL bytes. We permit one
+         * trailing NUL byte in the message, but don't expect it. */
+        if (n > 1 && memchr(buf, 0, n-1)) {
+                log_warning("Received notify message with embedded NUL bytes. Ignoring.");
+                return 0;
+        }
+
+        /* Make sure it's NUL-terminated. */
         buf[n] = 0;
 
         /* Notify every unit that might be interested, but try
          * to avoid notifying the same one multiple times. */
         u1 = manager_get_unit_by_pid_cgroup(m, ucred->pid);
-        if (u1) {
+        if (u1)
                 manager_invoke_notify_message(m, u1, ucred->pid, buf, fds);
-                found = true;
-        }
 
         u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(ucred->pid));
-        if (u2 && u2 != u1) {
+        if (u2 && u2 != u1)
                 manager_invoke_notify_message(m, u2, ucred->pid, buf, fds);
-                found = true;
-        }
 
         u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(ucred->pid));
-        if (u3 && u3 != u2 && u3 != u1) {
+        if (u3 && u3 != u2 && u3 != u1)
                 manager_invoke_notify_message(m, u3, ucred->pid, buf, fds);
-                found = true;
-        }
 
-        if (!found)
+        if (!u1 && !u2 && !u3)
                 log_warning("Cannot find unit for notify message of PID "PID_FMT".", ucred->pid);
 
         if (fdset_size(fds) > 0)
-                log_warning("Got auxiliary fds with notification message, closing all.");
+                log_warning("Got extra auxiliary fds with notification message, closing them.");
 
         return 0;
 }
@@ -1825,6 +1906,18 @@ static int manager_start_target(Manager *m, const char *name, JobMode mode) {
         return r;
 }
 
+static void manager_handle_ctrl_alt_del(Manager *m) {
+        /* If the user presses C-A-D more than
+         * 7 times within 2s, we reboot/shutdown immediately,
+         * unless it was disabled in system.conf */
+
+        if (ratelimit_test(&m->ctrl_alt_del_ratelimit) || m->cad_burst_action == EMERGENCY_ACTION_NONE)
+                manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY);
+        else
+                emergency_action(m, m->cad_burst_action, NULL,
+                                "Ctrl-Alt-Del was pressed more than 7 times within 2s");
+}
+
 static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
         Manager *m = userdata;
         ssize_t n;
@@ -1843,14 +1936,17 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
         for (;;) {
                 n = read(m->signal_fd, &sfsi, sizeof(sfsi));
                 if (n != sizeof(sfsi)) {
+                        if (n >= 0) {
+                                log_warning("Truncated read from signal fd (%zu bytes)!", n);
+                                return 0;
+                        }
 
-                        if (n >= 0)
-                                return -EIO;
-
-                        if (errno == EINTR || errno == EAGAIN)
+                        if (IN_SET(errno, EINTR, EAGAIN))
                                 break;
 
-                        return -errno;
+                        /* We return an error here, which will kill this handler,
+                         * to avoid a busy loop on read error. */
+                        return log_error_errno(errno, "Reading from signal fd failed: %m");
                 }
 
                 log_received_signal(sfsi.ssi_signo == SIGCHLD ||
@@ -1876,19 +1972,7 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
 
                 case SIGINT:
                         if (MANAGER_IS_SYSTEM(m)) {
-
-                                /* If the user presses C-A-D more than
-                                 * 7 times within 2s, we reboot
-                                 * immediately. */
-
-                                if (ratelimit_test(&m->ctrl_alt_del_ratelimit))
-                                        manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY);
-                                else {
-                                        log_notice("Ctrl-Alt-Del was pressed more than 7 times within 2s, rebooting immediately.");
-                                        status_printf(NULL, true, false, "Ctrl-Alt-Del was pressed more than 7 times within 2s, rebooting immediately.");
-                                        m->exit_code = MANAGER_REBOOT;
-                                }
-
+                                manager_handle_ctrl_alt_del(m);
                                 break;
                         }
 
@@ -2174,6 +2258,7 @@ int manager_loop(Manager *m) {
 
 int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e, Unit **_u) {
         _cleanup_free_ char *n = NULL;
+        sd_id128_t invocation_id;
         Unit *u;
         int r;
 
@@ -2185,12 +2270,25 @@ int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e,
         if (r < 0)
                 return r;
 
+        /* Permit addressing units by invocation ID: if the passed bus path is suffixed by a 128bit ID then we use it
+         * as invocation ID. */
+        r = sd_id128_from_string(n, &invocation_id);
+        if (r >= 0) {
+                u = hashmap_get(m->units_by_invocation_id, &invocation_id);
+                if (u) {
+                        *_u = u;
+                        return 0;
+                }
+
+                return sd_bus_error_setf(e, BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID, "No unit with the specified invocation ID " SD_ID128_FORMAT_STR " known.", SD_ID128_FORMAT_VAL(invocation_id));
+        }
+
+        /* If this didn't work, we use the suffix as unit name. */
         r = manager_load_unit(m, n, NULL, e, &u);
         if (r < 0)
                 return r;
 
         *_u = u;
-
         return 0;
 }
 
@@ -2402,22 +2500,29 @@ int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) {
                 fprintf(f, "cgroups-agent-fd=%i\n", copy);
         }
 
-        if (m->kdbus_fd >= 0) {
-                int copy;
+        if (m->user_lookup_fds[0] >= 0) {
+                int copy0, copy1;
 
-                copy = fdset_put_dup(fds, m->kdbus_fd);
-                if (copy < 0)
-                        return copy;
+                copy0 = fdset_put_dup(fds, m->user_lookup_fds[0]);
+                if (copy0 < 0)
+                        return copy0;
+
+                copy1 = fdset_put_dup(fds, m->user_lookup_fds[1]);
+                if (copy1 < 0)
+                        return copy1;
 
-                fprintf(f, "kdbus-fd=%i\n", copy);
+                fprintf(f, "user-lookup=%i %i\n", copy0, copy1);
         }
 
-        bus_track_serialize(m->subscribed, f);
+        bus_track_serialize(m->subscribed, f, "subscribed");
 
         r = dynamic_user_serialize(m, f, fds);
         if (r < 0)
                 return r;
 
+        manager_serialize_uid_refs(m, f);
+        manager_serialize_gid_refs(m, f);
+
         fputc('\n', f);
 
         HASHMAP_FOREACH_KEY(u, t, m->units, i) {
@@ -2584,27 +2689,31 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
                                 m->cgroups_agent_fd = fdset_remove(fds, fd);
                         }
 
-                } else if (startswith(l, "kdbus-fd=")) {
-                        int fd;
+                } else if (startswith(l, "user-lookup=")) {
+                        int fd0, fd1;
 
-                        if (safe_atoi(l + 9, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
-                                log_debug("Failed to parse kdbus fd: %s", l + 9);
+                        if (sscanf(l + 12, "%i %i", &fd0, &fd1) != 2 || fd0 < 0 || fd1 < 0 || fd0 == fd1 || !fdset_contains(fds, fd0) || !fdset_contains(fds, fd1))
+                                log_debug("Failed to parse user lookup fd: %s", l + 12);
                         else {
-                                safe_close(m->kdbus_fd);
-                                m->kdbus_fd = fdset_remove(fds, fd);
+                                m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source);
+                                safe_close_pair(m->user_lookup_fds);
+                                m->user_lookup_fds[0] = fdset_remove(fds, fd0);
+                                m->user_lookup_fds[1] = fdset_remove(fds, fd1);
                         }
 
                 } else if (startswith(l, "dynamic-user="))
                         dynamic_user_deserialize_one(m, l + 13, fds);
-                else {
-                        int k;
-
-                        k = bus_track_deserialize_item(&m->deserialized_subscribed, l);
-                        if (k < 0)
-                                log_debug_errno(k, "Failed to deserialize bus tracker object: %m");
-                        else if (k == 0)
-                                log_debug("Unknown serialization item '%s'", l);
-                }
+                else if (startswith(l, "destroy-ipc-uid="))
+                        manager_deserialize_uid_refs_one(m, l + 16);
+                else if (startswith(l, "destroy-ipc-gid="))
+                        manager_deserialize_gid_refs_one(m, l + 16);
+                else if (startswith(l, "subscribed=")) {
+
+                        if (strv_extend(&m->deserialized_subscribed, l+11) < 0)
+                                log_oom();
+
+                } else if (!startswith(l, "kdbus-fd=")) /* ignore this one */
+                        log_debug("Unknown serialization item '%s'", l);
         }
 
         for (;;) {
@@ -2678,6 +2787,8 @@ int manager_reload(Manager *m) {
         lookup_paths_flush_generator(&m->lookup_paths);
         lookup_paths_free(&m->lookup_paths);
         dynamic_user_vacuum(m, false);
+        m->uid_refs = hashmap_free(m->uid_refs);
+        m->gid_refs = hashmap_free(m->gid_refs);
 
         q = lookup_paths_init(&m->lookup_paths, m->unit_file_scope, 0, NULL);
         if (q < 0 && r >= 0)
@@ -2711,12 +2822,20 @@ int manager_reload(Manager *m) {
         if (q < 0 && r >= 0)
                 r = q;
 
+        q = manager_setup_user_lookup_fd(m);
+        if (q < 0 && r >= 0)
+                r = q;
+
         /* Third, fire things up! */
         manager_coldplug(m);
 
         /* Release any dynamic users no longer referenced */
         dynamic_user_vacuum(m, true);
 
+        /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */
+        manager_vacuum_uid_refs(m);
+        manager_vacuum_gid_refs(m);
+
         /* Sync current state of bus names with our set of listening units */
         if (m->api_bus)
                 manager_sync_bus_names(m, m->api_bus);
@@ -2962,7 +3081,7 @@ int manager_set_default_rlimits(Manager *m, struct rlimit **default_rlimit) {
 
                 m->rlimit[i] = newdup(struct rlimit, default_rlimit[i], 1);
                 if (!m->rlimit[i])
-                        return -ENOMEM;
+                        return log_oom();
         }
 
         return 0;
@@ -3150,6 +3269,300 @@ ManagerState manager_state(Manager *m) {
         return MANAGER_RUNNING;
 }
 
+#define DESTROY_IPC_FLAG (UINT32_C(1) << 31)
+
+static void manager_unref_uid_internal(
+                Manager *m,
+                Hashmap **uid_refs,
+                uid_t uid,
+                bool destroy_now,
+                int (*_clean_ipc)(uid_t uid)) {
+
+        uint32_t c, n;
+
+        assert(m);
+        assert(uid_refs);
+        assert(uid_is_valid(uid));
+        assert(_clean_ipc);
+
+        /* A generic implementation, covering both manager_unref_uid() and manager_unref_gid(), under the assumption
+         * that uid_t and gid_t are actually defined the same way, with the same validity rules.
+         *
+         * We store a hashmap where the UID/GID is they key and the value is a 32bit reference counter, whose highest
+         * bit is used as flag for marking UIDs/GIDs whose IPC objects to remove when the last reference to the UID/GID
+         * is dropped. The flag is set to on, once at least one reference from a unit where RemoveIPC= is set is added
+         * on a UID/GID. It is reset when the UID's/GID's reference counter drops to 0 again. */
+
+        assert_cc(sizeof(uid_t) == sizeof(gid_t));
+        assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+        if (uid == 0) /* We don't keep track of root, and will never destroy it */
+                return;
+
+        c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
+
+        n = c & ~DESTROY_IPC_FLAG;
+        assert(n > 0);
+        n--;
+
+        if (destroy_now && n == 0) {
+                hashmap_remove(*uid_refs, UID_TO_PTR(uid));
+
+                if (c & DESTROY_IPC_FLAG) {
+                        log_debug("%s " UID_FMT " is no longer referenced, cleaning up its IPC.",
+                                  _clean_ipc == clean_ipc_by_uid ? "UID" : "GID",
+                                  uid);
+                        (void) _clean_ipc(uid);
+                }
+        } else {
+                c = n | (c & DESTROY_IPC_FLAG);
+                assert_se(hashmap_update(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c)) >= 0);
+        }
+}
+
+void manager_unref_uid(Manager *m, uid_t uid, bool destroy_now) {
+        manager_unref_uid_internal(m, &m->uid_refs, uid, destroy_now, clean_ipc_by_uid);
+}
+
+void manager_unref_gid(Manager *m, gid_t gid, bool destroy_now) {
+        manager_unref_uid_internal(m, &m->gid_refs, (uid_t) gid, destroy_now, clean_ipc_by_gid);
+}
+
+static int manager_ref_uid_internal(
+                Manager *m,
+                Hashmap **uid_refs,
+                uid_t uid,
+                bool clean_ipc) {
+
+        uint32_t c, n;
+        int r;
+
+        assert(m);
+        assert(uid_refs);
+        assert(uid_is_valid(uid));
+
+        /* A generic implementation, covering both manager_ref_uid() and manager_ref_gid(), under the assumption
+         * that uid_t and gid_t are actually defined the same way, with the same validity rules. */
+
+        assert_cc(sizeof(uid_t) == sizeof(gid_t));
+        assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+        if (uid == 0) /* We don't keep track of root, and will never destroy it */
+                return 0;
+
+        r = hashmap_ensure_allocated(uid_refs, &trivial_hash_ops);
+        if (r < 0)
+                return r;
+
+        c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
+
+        n = c & ~DESTROY_IPC_FLAG;
+        n++;
+
+        if (n & DESTROY_IPC_FLAG) /* check for overflow */
+                return -EOVERFLOW;
+
+        c = n | (c & DESTROY_IPC_FLAG) | (clean_ipc ? DESTROY_IPC_FLAG : 0);
+
+        return hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c));
+}
+
+int manager_ref_uid(Manager *m, uid_t uid, bool clean_ipc) {
+        return manager_ref_uid_internal(m, &m->uid_refs, uid, clean_ipc);
+}
+
+int manager_ref_gid(Manager *m, gid_t gid, bool clean_ipc) {
+        return manager_ref_uid_internal(m, &m->gid_refs, (uid_t) gid, clean_ipc);
+}
+
+static void manager_vacuum_uid_refs_internal(
+                Manager *m,
+                Hashmap **uid_refs,
+                int (*_clean_ipc)(uid_t uid)) {
+
+        Iterator i;
+        void *p, *k;
+
+        assert(m);
+        assert(uid_refs);
+        assert(_clean_ipc);
+
+        HASHMAP_FOREACH_KEY(p, k, *uid_refs, i) {
+                uint32_t c, n;
+                uid_t uid;
+
+                uid = PTR_TO_UID(k);
+                c = PTR_TO_UINT32(p);
+
+                n = c & ~DESTROY_IPC_FLAG;
+                if (n > 0)
+                        continue;
+
+                if (c & DESTROY_IPC_FLAG) {
+                        log_debug("Found unreferenced %s " UID_FMT " after reload/reexec. Cleaning up.",
+                                  _clean_ipc == clean_ipc_by_uid ? "UID" : "GID",
+                                  uid);
+                        (void) _clean_ipc(uid);
+                }
+
+                assert_se(hashmap_remove(*uid_refs, k) == p);
+        }
+}
+
+void manager_vacuum_uid_refs(Manager *m) {
+        manager_vacuum_uid_refs_internal(m, &m->uid_refs, clean_ipc_by_uid);
+}
+
+void manager_vacuum_gid_refs(Manager *m) {
+        manager_vacuum_uid_refs_internal(m, &m->gid_refs, clean_ipc_by_gid);
+}
+
+static void manager_serialize_uid_refs_internal(
+                Manager *m,
+                FILE *f,
+                Hashmap **uid_refs,
+                const char *field_name) {
+
+        Iterator i;
+        void *p, *k;
+
+        assert(m);
+        assert(f);
+        assert(uid_refs);
+        assert(field_name);
+
+        /* Serialize the UID reference table. Or actually, just the IPC destruction flag of it, as the actual counter
+         * of it is better rebuild after a reload/reexec. */
+
+        HASHMAP_FOREACH_KEY(p, k, *uid_refs, i) {
+                uint32_t c;
+                uid_t uid;
+
+                uid = PTR_TO_UID(k);
+                c = PTR_TO_UINT32(p);
+
+                if (!(c & DESTROY_IPC_FLAG))
+                        continue;
+
+                fprintf(f, "%s=" UID_FMT "\n", field_name, uid);
+        }
+}
+
+void manager_serialize_uid_refs(Manager *m, FILE *f) {
+        manager_serialize_uid_refs_internal(m, f, &m->uid_refs, "destroy-ipc-uid");
+}
+
+void manager_serialize_gid_refs(Manager *m, FILE *f) {
+        manager_serialize_uid_refs_internal(m, f, &m->gid_refs, "destroy-ipc-gid");
+}
+
+static void manager_deserialize_uid_refs_one_internal(
+                Manager *m,
+                Hashmap** uid_refs,
+                const char *value) {
+
+        uid_t uid;
+        uint32_t c;
+        int r;
+
+        assert(m);
+        assert(uid_refs);
+        assert(value);
+
+        r = parse_uid(value, &uid);
+        if (r < 0 || uid == 0) {
+                log_debug("Unable to parse UID reference serialization");
+                return;
+        }
+
+        r = hashmap_ensure_allocated(uid_refs, &trivial_hash_ops);
+        if (r < 0) {
+                log_oom();
+                return;
+        }
+
+        c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
+        if (c & DESTROY_IPC_FLAG)
+                return;
+
+        c |= DESTROY_IPC_FLAG;
+
+        r = hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c));
+        if (r < 0) {
+                log_debug("Failed to add UID reference entry");
+                return;
+        }
+}
+
+void manager_deserialize_uid_refs_one(Manager *m, const char *value) {
+        manager_deserialize_uid_refs_one_internal(m, &m->uid_refs, value);
+}
+
+void manager_deserialize_gid_refs_one(Manager *m, const char *value) {
+        manager_deserialize_uid_refs_one_internal(m, &m->gid_refs, value);
+}
+
+int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+        struct buffer {
+                uid_t uid;
+                gid_t gid;
+                char unit_name[UNIT_NAME_MAX+1];
+        } _packed_ buffer;
+
+        Manager *m = userdata;
+        ssize_t l;
+        size_t n;
+        Unit *u;
+
+        assert_se(source);
+        assert_se(m);
+
+        /* Invoked whenever a child process succeeded resolving its user/group to use and sent us the resulting UID/GID
+         * in a datagram. We parse the datagram here and pass it off to the unit, so that it can add a reference to the
+         * UID/GID so that it can destroy the UID/GID's IPC objects when the reference counter drops to 0. */
+
+        l = recv(fd, &buffer, sizeof(buffer), MSG_DONTWAIT);
+        if (l < 0) {
+                if (errno == EINTR || errno == EAGAIN)
+                        return 0;
+
+                return log_error_errno(errno, "Failed to read from user lookup fd: %m");
+        }
+
+        if ((size_t) l <= offsetof(struct buffer, unit_name)) {
+                log_warning("Received too short user lookup message, ignoring.");
+                return 0;
+        }
+
+        if ((size_t) l > offsetof(struct buffer, unit_name) + UNIT_NAME_MAX) {
+                log_warning("Received too long user lookup message, ignoring.");
+                return 0;
+        }
+
+        if (!uid_is_valid(buffer.uid) && !gid_is_valid(buffer.gid)) {
+                log_warning("Got user lookup message with invalid UID/GID pair, ignoring.");
+                return 0;
+        }
+
+        n = (size_t) l - offsetof(struct buffer, unit_name);
+        if (memchr(buffer.unit_name, 0, n)) {
+                log_warning("Received lookup message with embedded NUL character, ignoring.");
+                return 0;
+        }
+
+        buffer.unit_name[n] = 0;
+        u = manager_get_unit(m, buffer.unit_name);
+        if (!u) {
+                log_debug("Got user lookup message but unit doesn't exist, ignoring.");
+                return 0;
+        }
+
+        log_unit_debug(u, "User lookup succeeded: uid=" UID_FMT " gid=" GID_FMT, buffer.uid, buffer.gid);
+
+        unit_notify_user_lookup(u, buffer.uid, buffer.gid);
+        return 0;
+}
+
 static const char *const manager_state_table[_MANAGER_STATE_MAX] = {
         [MANAGER_INITIALIZING] = "initializing",
         [MANAGER_STARTING] = "starting",
diff --git a/src/core/manager.h b/src/core/manager.h
index c681d5dc46..35172fdba9 100644
--- a/src/core/manager.h
+++ b/src/core/manager.h
@@ -81,6 +81,7 @@ struct Manager {
 
         /* Active jobs and units */
         Hashmap *units;  /* name string => Unit object n:1 */
+        Hashmap *units_by_invocation_id;
         Hashmap *jobs;   /* job id => Job object 1:1 */
 
         /* To make it easy to iterate through the units of a specific
@@ -143,6 +144,9 @@ struct Manager {
 
         sd_event_source *jobs_in_progress_event_source;
 
+        int user_lookup_fds[2];
+        sd_event_source *user_lookup_event_source;
+
         UnitFileScope unit_file_scope;
         LookupPaths lookup_paths;
         Set *unit_path_cache;
@@ -234,7 +238,6 @@ struct Manager {
         bool dispatching_dbus_queue:1;
 
         bool taint_usr:1;
-
         bool test_run:1;
 
         /* If non-zero, exit with the following value when the systemd
@@ -292,21 +295,26 @@ struct Manager {
          * value where Unit objects are contained. */
         Hashmap *units_requiring_mounts_for;
 
-        /* Reference to the kdbus bus control fd */
-        int kdbus_fd;
-
         /* Used for processing polkit authorization responses */
         Hashmap *polkit_registry;
 
         /* Dynamic users/groups, indexed by their name */
         Hashmap *dynamic_users;
 
-        /* When the user hits C-A-D more than 7 times per 2s, reboot immediately... */
+        /* Keep track of all UIDs and GIDs any of our services currently use. This is useful for the RemoveIPC= logic. */
+        Hashmap *uid_refs;
+        Hashmap *gid_refs;
+
+        /* When the user hits C-A-D more than 7 times per 2s, do something immediately... */
         RateLimit ctrl_alt_del_ratelimit;
+        EmergencyAction cad_burst_action;
 
         const char *unit_log_field;
         const char *unit_log_format_string;
 
+        const char *invocation_log_field;
+        const char *invocation_log_format_string;
+
         int first_boot; /* tri-state */
 };
 
@@ -378,5 +386,20 @@ ManagerState manager_state(Manager *m);
 
 int manager_update_failed_units(Manager *m, Unit *u, bool failed);
 
+void manager_unref_uid(Manager *m, uid_t uid, bool destroy_now);
+int manager_ref_uid(Manager *m, uid_t uid, bool clean_ipc);
+
+void manager_unref_gid(Manager *m, gid_t gid, bool destroy_now);
+int manager_ref_gid(Manager *m, gid_t gid, bool destroy_now);
+
+void manager_vacuum_uid_refs(Manager *m);
+void manager_vacuum_gid_refs(Manager *m);
+
+void manager_serialize_uid_refs(Manager *m, FILE *f);
+void manager_deserialize_uid_refs_one(Manager *m, const char *value);
+
+void manager_serialize_gid_refs(Manager *m, FILE *f);
+void manager_deserialize_gid_refs_one(Manager *m, const char *value);
+
 const char *manager_state_to_string(ManagerState m) _const_;
 ManagerState manager_state_from_string(const char *s) _pure_;
diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c
index 5d8ab0ec70..ca63a93e8b 100644
--- a/src/core/mount-setup.c
+++ b/src/core/mount-setup.c
@@ -99,10 +99,12 @@ static const MountPoint mount_table[] = {
           cg_is_unified_wanted, MNT_FATAL|MNT_IN_CONTAINER },
         { "tmpfs",       "/sys/fs/cgroup",            "tmpfs",      "mode=755",                MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
           cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
+        { "cgroup",      "/sys/fs/cgroup/systemd",    "cgroup2",    NULL,                      MS_NOSUID|MS_NOEXEC|MS_NODEV,
+          cg_is_unified_systemd_controller_wanted, MNT_IN_CONTAINER },
         { "cgroup",      "/sys/fs/cgroup/systemd",    "cgroup",     "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
-          cg_is_legacy_wanted, MNT_IN_CONTAINER           },
+          cg_is_legacy_systemd_controller_wanted, MNT_IN_CONTAINER           },
         { "cgroup",      "/sys/fs/cgroup/systemd",    "cgroup",     "none,name=systemd",       MS_NOSUID|MS_NOEXEC|MS_NODEV,
-          cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
+          cg_is_legacy_systemd_controller_wanted, MNT_FATAL|MNT_IN_CONTAINER },
         { "pstore",      "/sys/fs/pstore",            "pstore",     NULL,                      MS_NOSUID|MS_NOEXEC|MS_NODEV,
           NULL,          MNT_NONE                   },
 #ifdef ENABLE_EFI
diff --git a/src/core/mount.c b/src/core/mount.c
index f3ccf6d48a..d749e49df5 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -159,17 +159,6 @@ static void mount_init(Unit *u) {
         m->timeout_usec = u->manager->default_timeout_start_usec;
         m->directory_mode = 0755;
 
-        if (unit_has_name(u, "-.mount")) {
-                /* Don't allow start/stop for root directory */
-                u->refuse_manual_start = true;
-                u->refuse_manual_stop = true;
-        } else {
-                /* The stdio/kmsg bridge socket is on /, in order to avoid a
-                 * dep loop, don't use kmsg logging for -.mount */
-                m->exec_context.std_output = u->manager->default_std_output;
-                m->exec_context.std_error = u->manager->default_std_error;
-        }
-
         /* We need to make sure that /usr/bin/mount is always called
          * in the same process group as us, so that the autofs kernel
          * side doesn't send us another mount request while we are
@@ -577,6 +566,25 @@ static int mount_add_extras(Mount *m) {
         return 0;
 }
 
+static int mount_load_root_mount(Unit *u) {
+        assert(u);
+
+        if (!unit_has_name(u, SPECIAL_ROOT_MOUNT))
+                return 0;
+
+        u->perpetual = true;
+        u->default_dependencies = false;
+
+        /* The stdio/kmsg bridge socket is on /, in order to avoid a dep loop, don't use kmsg logging for -.mount */
+        MOUNT(u)->exec_context.std_output = EXEC_OUTPUT_NULL;
+        MOUNT(u)->exec_context.std_input = EXEC_INPUT_NULL;
+
+        if (!u->description)
+                u->description = strdup("Root Mount");
+
+        return 1;
+}
+
 static int mount_load(Unit *u) {
         Mount *m = MOUNT(u);
         int r;
@@ -584,11 +592,14 @@ static int mount_load(Unit *u) {
         assert(u);
         assert(u->load_state == UNIT_STUB);
 
-        if (m->from_proc_self_mountinfo)
+        r = mount_load_root_mount(u);
+        if (r < 0)
+                return r;
+
+        if (m->from_proc_self_mountinfo || u->perpetual)
                 r = unit_load_fragment_and_dropin_optional(u);
         else
                 r = unit_load_fragment_and_dropin(u);
-
         if (r < 0)
                 return r;
 
@@ -677,7 +688,10 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
                 "%sOptions: %s\n"
                 "%sFrom /proc/self/mountinfo: %s\n"
                 "%sFrom fragment: %s\n"
-                "%sDirectoryMode: %04o\n",
+                "%sDirectoryMode: %04o\n"
+                "%sSloppyOptions: %s\n"
+                "%sLazyUnmount: %s\n"
+                "%sForceUnmount: %s\n",
                 prefix, mount_state_to_string(m->state),
                 prefix, mount_result_to_string(m->result),
                 prefix, m->where,
@@ -686,7 +700,10 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
                 prefix, p ? strna(p->options) : "n/a",
                 prefix, yes_no(m->from_proc_self_mountinfo),
                 prefix, yes_no(m->from_fragment),
-                prefix, m->directory_mode);
+                prefix, m->directory_mode,
+                prefix, yes_no(m->sloppy_options),
+                prefix, yes_no(m->lazy_unmount),
+                prefix, yes_no(m->force_unmount));
 
         if (m->control_pid > 0)
                 fprintf(f,
@@ -769,6 +786,8 @@ static void mount_enter_dead(Mount *m, MountResult f) {
 
         exec_context_destroy_runtime_directory(&m->exec_context, manager_get_runtime_prefix(UNIT(m)->manager));
 
+        unit_unref_uid_gid(UNIT(m), true);
+
         dynamic_creds_destroy(&m->dynamic_creds);
 }
 
@@ -822,7 +841,7 @@ static void mount_enter_signal(Mount *m, MountState state, MountResult f) {
 fail:
         log_unit_warning_errno(UNIT(m), r, "Failed to kill processes: %m");
 
-        if (state == MOUNT_REMOUNTING_SIGTERM || state == MOUNT_REMOUNTING_SIGKILL)
+        if (IN_SET(state, MOUNT_REMOUNTING_SIGTERM, MOUNT_REMOUNTING_SIGKILL))
                 mount_enter_mounted(m, MOUNT_FAILURE_RESOURCES);
         else
                 mount_enter_dead(m, MOUNT_FAILURE_RESOURCES);
@@ -844,6 +863,10 @@ static void mount_enter_unmounting(Mount *m) {
         m->control_command = m->exec_command + MOUNT_EXEC_UNMOUNT;
 
         r = exec_command_set(m->control_command, UMOUNT_PATH, m->where, NULL);
+        if (r >= 0 && m->lazy_unmount)
+                r = exec_command_append(m->control_command, "-l", NULL);
+        if (r >= 0 && m->force_unmount)
+                r = exec_command_append(m->control_command, "-f", NULL);
         if (r < 0)
                 goto fail;
 
@@ -974,18 +997,19 @@ static int mount_start(Unit *u) {
 
         /* We cannot fulfill this request right now, try again later
          * please! */
-        if (m->state == MOUNT_UNMOUNTING ||
-            m->state == MOUNT_UNMOUNTING_SIGTERM ||
-            m->state == MOUNT_UNMOUNTING_SIGKILL ||
-            m->state == MOUNT_MOUNTING_SIGTERM ||
-            m->state == MOUNT_MOUNTING_SIGKILL)
+        if (IN_SET(m->state,
+                   MOUNT_UNMOUNTING,
+                   MOUNT_UNMOUNTING_SIGTERM,
+                   MOUNT_UNMOUNTING_SIGKILL,
+                   MOUNT_MOUNTING_SIGTERM,
+                   MOUNT_MOUNTING_SIGKILL))
                 return -EAGAIN;
 
         /* Already on it! */
         if (m->state == MOUNT_MOUNTING)
                 return 0;
 
-        assert(m->state == MOUNT_DEAD || m->state == MOUNT_FAILED);
+        assert(IN_SET(m->state, MOUNT_DEAD, MOUNT_FAILED));
 
         r = unit_start_limit_test(u);
         if (r < 0) {
@@ -993,6 +1017,10 @@ static int mount_start(Unit *u) {
                 return r;
         }
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         m->result = MOUNT_SUCCESS;
         m->reload_result = MOUNT_SUCCESS;
         m->reset_cpu_usage = true;
@@ -1007,19 +1035,21 @@ static int mount_stop(Unit *u) {
         assert(m);
 
         /* Already on it */
-        if (m->state == MOUNT_UNMOUNTING ||
-            m->state == MOUNT_UNMOUNTING_SIGKILL ||
-            m->state == MOUNT_UNMOUNTING_SIGTERM ||
-            m->state == MOUNT_MOUNTING_SIGTERM ||
-            m->state == MOUNT_MOUNTING_SIGKILL)
+        if (IN_SET(m->state,
+                   MOUNT_UNMOUNTING,
+                   MOUNT_UNMOUNTING_SIGKILL,
+                   MOUNT_UNMOUNTING_SIGTERM,
+                   MOUNT_MOUNTING_SIGTERM,
+                   MOUNT_MOUNTING_SIGKILL))
                 return 0;
 
-        assert(m->state == MOUNT_MOUNTING ||
-               m->state == MOUNT_MOUNTING_DONE ||
-               m->state == MOUNT_MOUNTED ||
-               m->state == MOUNT_REMOUNTING ||
-               m->state == MOUNT_REMOUNTING_SIGTERM ||
-               m->state == MOUNT_REMOUNTING_SIGKILL);
+        assert(IN_SET(m->state,
+                      MOUNT_MOUNTING,
+                      MOUNT_MOUNTING_DONE,
+                      MOUNT_MOUNTED,
+                      MOUNT_REMOUNTING,
+                      MOUNT_REMOUNTING_SIGTERM,
+                      MOUNT_REMOUNTING_SIGKILL));
 
         mount_enter_unmounting(m);
         return 1;
@@ -1147,7 +1177,7 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 
         m->control_pid = 0;
 
-        if (is_clean_exit(code, status, NULL))
+        if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
                 f = MOUNT_SUCCESS;
         else if (code == CLD_EXITED)
                 f = MOUNT_FAILURE_EXIT_CODE;
@@ -1185,9 +1215,10 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) {
         case MOUNT_MOUNTING_SIGKILL:
         case MOUNT_MOUNTING_SIGTERM:
 
-                if (f == MOUNT_SUCCESS)
-                        mount_enter_mounted(m, f);
-                else if (m->from_proc_self_mountinfo)
+                if (f == MOUNT_SUCCESS || m->from_proc_self_mountinfo)
+                        /* If /bin/mount returned success, or if we see the mount point in /proc/self/mountinfo we are
+                         * happy. If we see the first condition first, we should see the the second condition
+                         * immediately after – or /bin/mount lies to us and is broken. */
                         mount_enter_mounted(m, f);
                 else
                         mount_enter_dead(m, f);
@@ -1373,11 +1404,7 @@ static int mount_setup_unit(
         if (!u) {
                 delete = true;
 
-                u = unit_new(m, sizeof(Mount));
-                if (!u)
-                        return log_oom();
-
-                r = unit_add_name(u, e);
+                r = unit_new_for_name(m, sizeof(Mount), e, &u);
                 if (r < 0)
                         goto fail;
 
@@ -1464,17 +1491,9 @@ static int mount_setup_unit(
 
         MOUNT(u)->from_proc_self_mountinfo = true;
 
-        free(p->what);
-        p->what = w;
-        w = NULL;
-
-        free(p->options);
-        p->options = o;
-        o = NULL;
-
-        free(p->fstype);
-        p->fstype = f;
-        f = NULL;
+        free_and_replace(p->what, w);
+        free_and_replace(p->options, o);
+        free_and_replace(p->fstype, f);
 
         if (load_extras) {
                 r = mount_add_extras(MOUNT(u));
@@ -1580,11 +1599,46 @@ static int mount_get_timeout(Unit *u, usec_t *timeout) {
         return 1;
 }
 
+static int synthesize_root_mount(Manager *m) {
+        Unit *u;
+        int r;
+
+        assert(m);
+
+        /* Whatever happens, we know for sure that the root directory is around, and cannot go away. Let's
+         * unconditionally synthesize it here and mark it as perpetual. */
+
+        u = manager_get_unit(m, SPECIAL_ROOT_MOUNT);
+        if (!u) {
+                r = unit_new_for_name(m, sizeof(Mount), SPECIAL_ROOT_MOUNT, &u);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to allocate the special " SPECIAL_ROOT_MOUNT " unit: %m");
+        }
+
+        u->perpetual = true;
+        MOUNT(u)->deserialized_state = MOUNT_MOUNTED;
+
+        unit_add_to_load_queue(u);
+        unit_add_to_dbus_queue(u);
+
+        return 0;
+}
+
+static bool mount_is_mounted(Mount *m) {
+        assert(m);
+
+        return UNIT(m)->perpetual || m->is_mounted;
+}
+
 static void mount_enumerate(Manager *m) {
         int r;
 
         assert(m);
 
+        r = synthesize_root_mount(m);
+        if (r < 0)
+                goto fail;
+
         mnt_init_debug(0);
 
         if (!m->mount_monitor) {
@@ -1691,7 +1745,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
         LIST_FOREACH(units_by_type, u, m->units_by_type[UNIT_MOUNT]) {
                 Mount *mount = MOUNT(u);
 
-                if (!mount->is_mounted) {
+                if (!mount_is_mounted(mount)) {
 
                         /* A mount point is not around right now. It
                          * might be gone, or might never have
@@ -1730,9 +1784,10 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
 
                         case MOUNT_DEAD:
                         case MOUNT_FAILED:
-                                /* This has just been mounted by
-                                 * somebody else, follow the state
-                                 * change. */
+
+                                /* This has just been mounted by somebody else, follow the state change, but let's
+                                 * generate a new invocation ID for this implicitly and automatically. */
+                                (void) unit_acquire_invocation_id(UNIT(mount));
                                 mount_enter_mounted(mount, MOUNT_SUCCESS);
                                 break;
 
@@ -1751,7 +1806,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
                         }
                 }
 
-                if (mount->is_mounted &&
+                if (mount_is_mounted(mount) &&
                     mount->from_proc_self_mountinfo &&
                     mount->parameters_proc_self_mountinfo.what) {
 
diff --git a/src/core/mount.h b/src/core/mount.h
index ac27b518cc..9f7326ba6a 100644
--- a/src/core/mount.h
+++ b/src/core/mount.h
@@ -71,6 +71,9 @@ struct Mount {
 
         bool sloppy_options;
 
+        bool lazy_unmount;
+        bool force_unmount;
+
         MountResult result;
         MountResult reload_result;
 
diff --git a/src/core/namespace.c b/src/core/namespace.c
index 52a2505d94..1195e9a854 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -29,6 +29,7 @@
 #include "alloc-util.h"
 #include "dev-setup.h"
 #include "fd-util.h"
+#include "fs-util.h"
 #include "loopback-setup.h"
 #include "missing.h"
 #include "mkdir.h"
@@ -53,61 +54,245 @@ typedef enum MountMode {
         PRIVATE_TMP,
         PRIVATE_VAR_TMP,
         PRIVATE_DEV,
-        READWRITE
+        READWRITE,
 } MountMode;
 
 typedef struct BindMount {
-        const char *path;
+        const char *path; /* stack memory, doesn't need to be freed explicitly */
+        char *chased; /* malloc()ed memory, needs to be freed */
         MountMode mode;
-        bool done;
-        bool ignore;
+        bool ignore; /* Ignore if path does not exist */
 } BindMount;
 
+typedef struct TargetMount {
+        const char *path;
+        MountMode mode;
+        bool ignore; /* Ignore if path does not exist */
+} TargetMount;
+
+/*
+ * The following Protect tables are to protect paths and mark some of them
+ * READONLY, in case a path is covered by an option from another table, then
+ * it is marked READWRITE in the current one, and the more restrictive mode is
+ * applied from that other table. This way all options can be combined in a
+ * safe and comprehensible way for users.
+ */
+
+/* ProtectKernelTunables= option and the related filesystem APIs */
+static const TargetMount protect_kernel_tunables_table[] = {
+        { "/proc/sys",                  READONLY,       false },
+        { "/proc/sysrq-trigger",        READONLY,       true  },
+        { "/proc/latency_stats",        READONLY,       true  },
+        { "/proc/mtrr",                 READONLY,       true  },
+        { "/proc/apm",                  READONLY,       true  },
+        { "/proc/acpi",                 READONLY,       true  },
+        { "/proc/timer_stats",          READONLY,       true  },
+        { "/proc/asound",               READONLY,       true  },
+        { "/proc/bus",                  READONLY,       true  },
+        { "/proc/fs",                   READONLY,       true  },
+        { "/proc/irq",                  READONLY,       true  },
+        { "/sys",                       READONLY,       false },
+        { "/sys/kernel/debug",          READONLY,       true  },
+        { "/sys/kernel/tracing",        READONLY,       true  },
+        { "/sys/fs/cgroup",             READWRITE,      false }, /* READONLY is set by ProtectControlGroups= option */
+};
+
+/* ProtectKernelModules= option */
+static const TargetMount protect_kernel_modules_table[] = {
+#ifdef HAVE_SPLIT_USR
+        { "/lib/modules",               INACCESSIBLE,   true  },
+#endif
+        { "/usr/lib/modules",           INACCESSIBLE,   true  },
+};
+
+/*
+ * ProtectHome=read-only table, protect $HOME and $XDG_RUNTIME_DIR and rest of
+ * system should be protected by ProtectSystem=
+ */
+static const TargetMount protect_home_read_only_table[] = {
+        { "/home",      READONLY,       true },
+        { "/run/user",  READONLY,       true },
+        { "/root",      READONLY,       true },
+};
+
+/* ProtectHome=yes table */
+static const TargetMount protect_home_yes_table[] = {
+        { "/home",      INACCESSIBLE,   true },
+        { "/run/user",  INACCESSIBLE,   true },
+        { "/root",      INACCESSIBLE,   true },
+};
+
+/* ProtectSystem=yes table */
+static const TargetMount protect_system_yes_table[] = {
+        { "/usr",       READONLY,       false },
+        { "/boot",      READONLY,       true  },
+        { "/efi",       READONLY,       true  },
+};
+
+/* ProtectSystem=full includes ProtectSystem=yes */
+static const TargetMount protect_system_full_table[] = {
+        { "/usr",       READONLY,       false },
+        { "/boot",      READONLY,       true  },
+        { "/efi",       READONLY,       true  },
+        { "/etc",       READONLY,       false },
+};
+
+/*
+ * ProtectSystem=strict table. In this strict mode, we mount everything
+ * read-only, except for /proc, /dev, /sys which are the kernel API VFS,
+ * which are left writable, but PrivateDevices= + ProtectKernelTunables=
+ * protect those, and these options should be fully orthogonal.
+ * (And of course /home and friends are also left writable, as ProtectHome=
+ * shall manage those, orthogonally).
+ */
+static const TargetMount protect_system_strict_table[] = {
+        { "/",          READONLY,       false },
+        { "/proc",      READWRITE,      false },      /* ProtectKernelTunables= */
+        { "/sys",       READWRITE,      false },      /* ProtectKernelTunables= */
+        { "/dev",       READWRITE,      false },      /* PrivateDevices= */
+        { "/home",      READWRITE,      true  },      /* ProtectHome= */
+        { "/run/user",  READWRITE,      true  },      /* ProtectHome= */
+        { "/root",      READWRITE,      true  },      /* ProtectHome= */
+};
+
+static void set_bind_mount(BindMount **p, const char *path, MountMode mode, bool ignore) {
+        (*p)->path = path;
+        (*p)->mode = mode;
+        (*p)->ignore = ignore;
+}
+
 static int append_mounts(BindMount **p, char **strv, MountMode mode) {
         char **i;
 
         assert(p);
 
         STRV_FOREACH(i, strv) {
+                bool ignore = false;
 
-                (*p)->ignore = false;
-                (*p)->done = false;
-
-                if ((mode == INACCESSIBLE || mode == READONLY || mode == READWRITE) && (*i)[0] == '-') {
-                        (*p)->ignore = true;
+                if (IN_SET(mode, INACCESSIBLE, READONLY, READWRITE) && startswith(*i, "-")) {
                         (*i)++;
+                        ignore = true;
                 }
 
                 if (!path_is_absolute(*i))
                         return -EINVAL;
 
-                (*p)->path = *i;
-                (*p)->mode = mode;
+                set_bind_mount(p, *i, mode, ignore);
                 (*p)++;
         }
 
         return 0;
 }
 
-static int mount_path_compare(const void *a, const void *b) {
-        const BindMount *p = a, *q = b;
-        int d;
+static int append_target_mounts(BindMount **p, const char *root_directory, const TargetMount *mounts, const size_t size) {
+        unsigned i;
 
-        d = path_compare(p->path, q->path);
+        assert(p);
+        assert(mounts);
+
+        for (i = 0; i < size; i++) {
+                /*
+                 * Here we assume that the ignore field is set during
+                 * declaration we do not support "-" at the beginning.
+                 */
+                const TargetMount *m = &mounts[i];
+                const char *path = prefix_roota(root_directory, m->path);
+
+                if (!path_is_absolute(path))
+                        return -EINVAL;
+
+                set_bind_mount(p, path, m->mode, m->ignore);
+                (*p)++;
+        }
+
+        return 0;
+}
+
+static int append_protect_kernel_tunables(BindMount **p, const char *root_directory) {
+        assert(p);
+
+        return append_target_mounts(p, root_directory, protect_kernel_tunables_table,
+                                    ELEMENTSOF(protect_kernel_tunables_table));
+}
+
+static int append_protect_kernel_modules(BindMount **p, const char *root_directory) {
+        assert(p);
+
+        return append_target_mounts(p, root_directory, protect_kernel_modules_table,
+                                    ELEMENTSOF(protect_kernel_modules_table));
+}
+
+static int append_protect_home(BindMount **p, const char *root_directory, ProtectHome protect_home) {
+        int r = 0;
+
+        assert(p);
+
+        if (protect_home == PROTECT_HOME_NO)
+                return 0;
+
+        switch (protect_home) {
+        case PROTECT_HOME_READ_ONLY:
+                r = append_target_mounts(p, root_directory, protect_home_read_only_table,
+                                         ELEMENTSOF(protect_home_read_only_table));
+                break;
+        case PROTECT_HOME_YES:
+                r = append_target_mounts(p, root_directory, protect_home_yes_table,
+                                         ELEMENTSOF(protect_home_yes_table));
+                break;
+        default:
+                r = -EINVAL;
+                break;
+        }
+
+        return r;
+}
 
-        if (d == 0) {
-                /* If the paths are equal, check the mode */
-                if (p->mode < q->mode)
-                        return -1;
+static int append_protect_system(BindMount **p, const char *root_directory, ProtectSystem protect_system) {
+        int r = 0;
 
-                if (p->mode > q->mode)
-                        return 1;
+        assert(p);
 
+        if (protect_system == PROTECT_SYSTEM_NO)
                 return 0;
+
+        switch (protect_system) {
+        case PROTECT_SYSTEM_STRICT:
+                r = append_target_mounts(p, root_directory, protect_system_strict_table,
+                                         ELEMENTSOF(protect_system_strict_table));
+                break;
+        case PROTECT_SYSTEM_YES:
+                r = append_target_mounts(p, root_directory, protect_system_yes_table,
+                                         ELEMENTSOF(protect_system_yes_table));
+                break;
+        case PROTECT_SYSTEM_FULL:
+                r = append_target_mounts(p, root_directory, protect_system_full_table,
+                                         ELEMENTSOF(protect_system_full_table));
+                break;
+        default:
+                r = -EINVAL;
+                break;
         }
 
+        return r;
+}
+
+static int mount_path_compare(const void *a, const void *b) {
+        const BindMount *p = a, *q = b;
+        int d;
+
         /* If the paths are not equal, then order prefixes first */
-        return d;
+        d = path_compare(p->path, q->path);
+        if (d != 0)
+                return d;
+
+        /* If the paths are equal, check the mode */
+        if (p->mode < q->mode)
+                return -1;
+
+        if (p->mode > q->mode)
+                return 1;
+
+        return 0;
 }
 
 static void drop_duplicates(BindMount *m, unsigned *n) {
@@ -116,16 +301,110 @@ static void drop_duplicates(BindMount *m, unsigned *n) {
         assert(m);
         assert(n);
 
+        /* Drops duplicate entries. Expects that the array is properly ordered already. */
+
         for (f = m, t = m, previous = NULL; f < m+*n; f++) {
 
-                /* The first one wins */
-                if (previous && path_equal(f->path, previous->path))
+                /* The first one wins (which is the one with the more restrictive mode), see mount_path_compare()
+                 * above. */
+                if (previous && path_equal(f->path, previous->path)) {
+                        log_debug("%s is duplicate.", f->path);
                         continue;
+                }
 
                 *t = *f;
-
                 previous = t;
+                t++;
+        }
+
+        *n = t - m;
+}
+
+static void drop_inaccessible(BindMount *m, unsigned *n) {
+        BindMount *f, *t;
+        const char *clear = NULL;
 
+        assert(m);
+        assert(n);
+
+        /* Drops all entries obstructed by another entry further up the tree. Expects that the array is properly
+         * ordered already. */
+
+        for (f = m, t = m; f < m+*n; f++) {
+
+                /* If we found a path set for INACCESSIBLE earlier, and this entry has it as prefix we should drop
+                 * it, as inaccessible paths really should drop the entire subtree. */
+                if (clear && path_startswith(f->path, clear)) {
+                        log_debug("%s is masked by %s.", f->path, clear);
+                        continue;
+                }
+
+                clear = f->mode == INACCESSIBLE ? f->path : NULL;
+
+                *t = *f;
+                t++;
+        }
+
+        *n = t - m;
+}
+
+static void drop_nop(BindMount *m, unsigned *n) {
+        BindMount *f, *t;
+
+        assert(m);
+        assert(n);
+
+        /* Drops all entries which have an immediate parent that has the same type, as they are redundant. Assumes the
+         * list is ordered by prefixes. */
+
+        for (f = m, t = m; f < m+*n; f++) {
+
+                /* Only suppress such subtrees for READONLY and READWRITE entries */
+                if (IN_SET(f->mode, READONLY, READWRITE)) {
+                        BindMount *p;
+                        bool found = false;
+
+                        /* Now let's find the first parent of the entry we are looking at. */
+                        for (p = t-1; p >= m; p--) {
+                                if (path_startswith(f->path, p->path)) {
+                                        found = true;
+                                        break;
+                                }
+                        }
+
+                        /* We found it, let's see if it's the same mode, if so, we can drop this entry */
+                        if (found && p->mode == f->mode) {
+                                log_debug("%s is redundant by %s", f->path, p->path);
+                                continue;
+                        }
+                }
+
+                *t = *f;
+                t++;
+        }
+
+        *n = t - m;
+}
+
+static void drop_outside_root(const char *root_directory, BindMount *m, unsigned *n) {
+        BindMount *f, *t;
+
+        assert(m);
+        assert(n);
+
+        if (!root_directory)
+                return;
+
+        /* Drops all mounts that are outside of the root directory. */
+
+        for (f = m, t = m; f < m+*n; f++) {
+
+                if (!path_startswith(f->path, root_directory)) {
+                        log_debug("%s is outside of root directory.", f->path);
+                        continue;
+                }
+
+                *t = *f;
                 t++;
         }
 
@@ -278,24 +557,23 @@ static int apply_mount(
 
         const char *what;
         int r;
-        struct stat target;
 
         assert(m);
 
+        log_debug("Applying namespace mount on %s", m->path);
+
         switch (m->mode) {
 
-        case INACCESSIBLE:
+        case INACCESSIBLE: {
+                struct stat target;
 
                 /* First, get rid of everything that is below if there
                  * is anything... Then, overmount it with an
                  * inaccessible path. */
-                umount_recursive(m->path, 0);
+                (void) umount_recursive(m->path, 0);
 
-                if (lstat(m->path, &target) < 0) {
-                        if (m->ignore && errno == ENOENT)
-                                return 0;
-                        return -errno;
-                }
+                if (lstat(m->path, &target) < 0)
+                        return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", m->path);
 
                 what = mode_to_inaccessible_node(target.st_mode);
                 if (!what) {
@@ -303,11 +581,20 @@ static int apply_mount(
                         return -ELOOP;
                 }
                 break;
+        }
+
         case READONLY:
         case READWRITE:
-                /* Nothing to mount here, we just later toggle the
-                 * MS_RDONLY bit for the mount point */
-                return 0;
+
+                r = path_is_mount_point(m->path, 0);
+                if (r < 0)
+                        return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", m->path);
+                if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */
+                        return 0;
+
+                /* This isn't a mount point yet, let's make it one. */
+                what = m->path;
+                break;
 
         case PRIVATE_TMP:
                 what = tmp_dir;
@@ -326,68 +613,133 @@ static int apply_mount(
 
         assert(what);
 
-        r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL);
-        if (r >= 0) {
-                log_debug("Successfully mounted %s to %s", what, m->path);
-                return r;
-        } else {
-                if (m->ignore && errno == ENOENT)
-                        return 0;
+        if (mount(what, m->path, NULL, MS_BIND|MS_REC, NULL) < 0)
                 return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, m->path);
-        }
+
+        log_debug("Successfully mounted %s to %s", what, m->path);
+        return 0;
 }
 
-static int make_read_only(BindMount *m) {
-        int r;
+static int make_read_only(BindMount *m, char **blacklist) {
+        int r = 0;
 
         assert(m);
 
         if (IN_SET(m->mode, INACCESSIBLE, READONLY))
-                r = bind_remount_recursive(m->path, true);
-        else if (IN_SET(m->mode, READWRITE, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV)) {
-                r = bind_remount_recursive(m->path, false);
-                if (r == 0 && m->mode == PRIVATE_DEV) /* can be readonly but the submounts can't*/
-                        if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0)
-                                r = -errno;
+                r = bind_remount_recursive(m->path, true, blacklist);
+        else if (m->mode == PRIVATE_DEV) { /* Can be readonly but the submounts can't*/
+                if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0)
+                        r = -errno;
         } else
-                r = 0;
-
-        if (m->ignore && r == -ENOENT)
                 return 0;
 
+        /* Not that we only turn on the MS_RDONLY flag here, we never turn it off. Something that was marked read-only
+         * already stays this way. This improves compatibility with container managers, where we won't attempt to undo
+         * read-only mounts already applied. */
+
         return r;
 }
 
+static int chase_all_symlinks(const char *root_directory, BindMount *m, unsigned *n) {
+        BindMount *f, *t;
+        int r;
+
+        assert(m);
+        assert(n);
+
+        /* Since mount() will always follow symlinks and we need to take the different root directory into account we
+         * chase the symlinks on our own first. This call wil do so for all entries and remove all entries where we
+         * can't resolve the path, and which have been marked for such removal. */
+
+        for (f = m, t = m; f < m+*n; f++) {
+
+                r = chase_symlinks(f->path, root_directory, &f->chased);
+                if (r == -ENOENT && f->ignore) /* Doesn't exist? Then remove it! */
+                        continue;
+                if (r < 0)
+                        return log_debug_errno(r, "Failed to chase symlinks for %s: %m", f->path);
+
+                if (path_equal(f->path, f->chased))
+                        f->chased = mfree(f->chased);
+                else {
+                        log_debug("Chased %s → %s", f->path, f->chased);
+                        f->path = f->chased;
+                }
+
+                *t = *f;
+                t++;
+        }
+
+        *n = t - m;
+        return 0;
+}
+
+static unsigned namespace_calculate_mounts(
+                const NameSpaceInfo *ns_info,
+                char** read_write_paths,
+                char** read_only_paths,
+                char** inaccessible_paths,
+                const char* tmp_dir,
+                const char* var_tmp_dir,
+                ProtectHome protect_home,
+                ProtectSystem protect_system) {
+
+        unsigned protect_home_cnt;
+        unsigned protect_system_cnt =
+                (protect_system == PROTECT_SYSTEM_STRICT ?
+                 ELEMENTSOF(protect_system_strict_table) :
+                 ((protect_system == PROTECT_SYSTEM_FULL) ?
+                  ELEMENTSOF(protect_system_full_table) :
+                  ((protect_system == PROTECT_SYSTEM_YES) ?
+                   ELEMENTSOF(protect_system_yes_table) : 0)));
+
+        protect_home_cnt =
+                (protect_home == PROTECT_HOME_YES ?
+                 ELEMENTSOF(protect_home_yes_table) :
+                 ((protect_home == PROTECT_HOME_READ_ONLY) ?
+                  ELEMENTSOF(protect_home_read_only_table) : 0));
+
+        return !!tmp_dir + !!var_tmp_dir +
+                strv_length(read_write_paths) +
+                strv_length(read_only_paths) +
+                strv_length(inaccessible_paths) +
+                ns_info->private_dev +
+                (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +
+                (ns_info->protect_control_groups ? 1 : 0) +
+                (ns_info->protect_kernel_modules ? ELEMENTSOF(protect_kernel_modules_table) : 0) +
+                protect_home_cnt + protect_system_cnt;
+}
+
 int setup_namespace(
                 const char* root_directory,
+                const NameSpaceInfo *ns_info,
                 char** read_write_paths,
                 char** read_only_paths,
                 char** inaccessible_paths,
                 const char* tmp_dir,
                 const char* var_tmp_dir,
-                bool private_dev,
                 ProtectHome protect_home,
                 ProtectSystem protect_system,
                 unsigned long mount_flags) {
 
         BindMount *m, *mounts = NULL;
+        bool make_slave = false;
         unsigned n;
         int r = 0;
 
         if (mount_flags == 0)
                 mount_flags = MS_SHARED;
 
-        if (unshare(CLONE_NEWNS) < 0)
-                return -errno;
+        n = namespace_calculate_mounts(ns_info,
+                                       read_write_paths,
+                                       read_only_paths,
+                                       inaccessible_paths,
+                                       tmp_dir, var_tmp_dir,
+                                       protect_home, protect_system);
 
-        n = !!tmp_dir + !!var_tmp_dir +
-                strv_length(read_write_paths) +
-                strv_length(read_only_paths) +
-                strv_length(inaccessible_paths) +
-                private_dev +
-                (protect_home != PROTECT_HOME_NO ? 3 : 0) +
-                (protect_system != PROTECT_SYSTEM_NO ? 2 : 0) +
-                (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0);
+        /* Set mount slave mode */
+        if (root_directory || n > 0)
+                make_slave = true;
 
         if (n > 0) {
                 m = mounts = (BindMount *) alloca0(n * sizeof(BindMount));
@@ -415,100 +767,127 @@ int setup_namespace(
                         m++;
                 }
 
-                if (private_dev) {
+                if (ns_info->private_dev) {
                         m->path = prefix_roota(root_directory, "/dev");
                         m->mode = PRIVATE_DEV;
                         m++;
                 }
 
-                if (protect_home != PROTECT_HOME_NO) {
-                        const char *home_dir, *run_user_dir, *root_dir;
-
-                        home_dir = prefix_roota(root_directory, "/home");
-                        home_dir = strjoina("-", home_dir);
-                        run_user_dir = prefix_roota(root_directory, "/run/user");
-                        run_user_dir = strjoina("-", run_user_dir);
-                        root_dir = prefix_roota(root_directory, "/root");
-                        root_dir = strjoina("-", root_dir);
-
-                        r = append_mounts(&m, STRV_MAKE(home_dir, run_user_dir, root_dir),
-                                protect_home == PROTECT_HOME_READ_ONLY ? READONLY : INACCESSIBLE);
+                if (ns_info->protect_kernel_tunables) {
+                        r = append_protect_kernel_tunables(&m, root_directory);
                         if (r < 0)
                                 return r;
                 }
 
-                if (protect_system != PROTECT_SYSTEM_NO) {
-                        const char *usr_dir, *boot_dir, *etc_dir;
-
-                        usr_dir = prefix_roota(root_directory, "/usr");
-                        boot_dir = prefix_roota(root_directory, "/boot");
-                        boot_dir = strjoina("-", boot_dir);
-                        etc_dir = prefix_roota(root_directory, "/etc");
-
-                        r = append_mounts(&m, protect_system == PROTECT_SYSTEM_FULL
-                                ? STRV_MAKE(usr_dir, boot_dir, etc_dir)
-                                : STRV_MAKE(usr_dir, boot_dir), READONLY);
+                if (ns_info->protect_kernel_modules) {
+                        r = append_protect_kernel_modules(&m, root_directory);
                         if (r < 0)
                                 return r;
                 }
 
+                if (ns_info->protect_control_groups) {
+                        m->path = prefix_roota(root_directory, "/sys/fs/cgroup");
+                        m->mode = READONLY;
+                        m++;
+                }
+
+                r = append_protect_home(&m, root_directory, protect_home);
+                if (r < 0)
+                        return r;
+
+                r = append_protect_system(&m, root_directory, protect_system);
+                if (r < 0)
+                        return r;
+
                 assert(mounts + n == m);
 
+                /* Resolve symlinks manually first, as mount() will always follow them relative to the host's
+                 * root. Moreover we want to suppress duplicates based on the resolved paths. This of course is a bit
+                 * racy. */
+                r = chase_all_symlinks(root_directory, mounts, &n);
+                if (r < 0)
+                        goto finish;
+
                 qsort(mounts, n, sizeof(BindMount), mount_path_compare);
+
                 drop_duplicates(mounts, &n);
+                drop_outside_root(root_directory, mounts, &n);
+                drop_inaccessible(mounts, &n);
+                drop_nop(mounts, &n);
         }
 
-        if (n > 0 || root_directory) {
+        if (unshare(CLONE_NEWNS) < 0) {
+                r = -errno;
+                goto finish;
+        }
+
+        if (make_slave) {
                 /* Remount / as SLAVE so that nothing now mounted in the namespace
                    shows up in the parent */
-                if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
-                        return -errno;
+                if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
+                        r = -errno;
+                        goto finish;
+                }
         }
 
         if (root_directory) {
-                /* Turn directory into bind mount */
-                if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0)
-                        return -errno;
+                /* Turn directory into bind mount, if it isn't one yet */
+                r = path_is_mount_point(root_directory, AT_SYMLINK_FOLLOW);
+                if (r < 0)
+                        goto finish;
+                if (r == 0) {
+                        if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0) {
+                                r = -errno;
+                                goto finish;
+                        }
+                }
         }
 
         if (n > 0) {
+                char **blacklist;
+                unsigned j;
+
+                /* First round, add in all special mounts we need */
                 for (m = mounts; m < mounts + n; ++m) {
                         r = apply_mount(m, tmp_dir, var_tmp_dir);
                         if (r < 0)
-                                goto fail;
+                                goto finish;
                 }
 
+                /* Create a blacklist we can pass to bind_mount_recursive() */
+                blacklist = newa(char*, n+1);
+                for (j = 0; j < n; j++)
+                        blacklist[j] = (char*) mounts[j].path;
+                blacklist[j] = NULL;
+
+                /* Second round, flip the ro bits if necessary. */
                 for (m = mounts; m < mounts + n; ++m) {
-                        r = make_read_only(m);
+                        r = make_read_only(m, blacklist);
                         if (r < 0)
-                                goto fail;
+                                goto finish;
                 }
         }
 
         if (root_directory) {
                 /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */
                 r = mount_move_root(root_directory);
-
-                /* at this point, we cannot rollback */
                 if (r < 0)
-                        return r;
+                        goto finish;
         }
 
         /* Remount / as the desired mode. Not that this will not
          * reestablish propagation from our side to the host, since
          * what's disconnected is disconnected. */
-        if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0)
-                /* at this point, we cannot rollback */
-                return -errno;
+        if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) {
+                r = -errno;
+                goto finish;
+        }
 
-        return 0;
+        r = 0;
 
-fail:
-        if (n > 0) {
-                for (m = mounts; m < mounts + n; ++m)
-                        if (m->done)
-                                (void) umount2(m->path, MNT_DETACH);
-        }
+finish:
+        for (m = mounts; m < mounts + n; m++)
+                free(m->chased);
 
         return r;
 }
@@ -658,6 +1037,7 @@ static const char *const protect_system_table[_PROTECT_SYSTEM_MAX] = {
         [PROTECT_SYSTEM_NO] = "no",
         [PROTECT_SYSTEM_YES] = "yes",
         [PROTECT_SYSTEM_FULL] = "full",
+        [PROTECT_SYSTEM_STRICT] = "strict",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(protect_system, ProtectSystem);
diff --git a/src/core/namespace.h b/src/core/namespace.h
index 1aedf5f208..6310638e9a 100644
--- a/src/core/namespace.h
+++ b/src/core/namespace.h
@@ -4,6 +4,7 @@
   This file is part of systemd.
 
   Copyright 2010 Lennart Poettering
+  Copyright 2016 Djalal Harouni
 
   systemd is free software; you can redistribute it and/or modify it
   under the terms of the GNU Lesser General Public License as published by
@@ -19,6 +20,8 @@
   along with systemd; If not, see <http://www.gnu.org/licenses/>.
 ***/
 
+typedef struct NameSpaceInfo NameSpaceInfo;
+
 #include <stdbool.h>
 
 #include "macro.h"
@@ -35,17 +38,25 @@ typedef enum ProtectSystem {
         PROTECT_SYSTEM_NO,
         PROTECT_SYSTEM_YES,
         PROTECT_SYSTEM_FULL,
+        PROTECT_SYSTEM_STRICT,
         _PROTECT_SYSTEM_MAX,
         _PROTECT_SYSTEM_INVALID = -1
 } ProtectSystem;
 
+struct NameSpaceInfo {
+        bool private_dev:1;
+        bool protect_control_groups:1;
+        bool protect_kernel_tunables:1;
+        bool protect_kernel_modules:1;
+};
+
 int setup_namespace(const char *chroot,
+                    const NameSpaceInfo *ns_info,
                     char **read_write_paths,
                     char **read_only_paths,
                     char **inaccessible_paths,
                     const char *tmp_dir,
                     const char *var_tmp_dir,
-                    bool private_dev,
                     ProtectHome protect_home,
                     ProtectSystem protect_system,
                     unsigned long mount_flags);
diff --git a/src/core/org.freedesktop.systemd1.conf b/src/core/org.freedesktop.systemd1.conf
index 14f6aec029..a61677e645 100644
--- a/src/core/org.freedesktop.systemd1.conf
+++ b/src/core/org.freedesktop.systemd1.conf
@@ -54,6 +54,10 @@
 
                 <allow send_destination="org.freedesktop.systemd1"
                        send_interface="org.freedesktop.systemd1.Manager"
+                       send_member="GetUnitByInvocationID"/>
+
+                <allow send_destination="org.freedesktop.systemd1"
+                       send_interface="org.freedesktop.systemd1.Manager"
                        send_member="LoadUnit"/>
 
                 <allow send_destination="org.freedesktop.systemd1"
@@ -90,6 +94,10 @@
 
                 <allow send_destination="org.freedesktop.systemd1"
                        send_interface="org.freedesktop.systemd1.Manager"
+                       send_member="GetUnitFileLinks"/>
+
+                <allow send_destination="org.freedesktop.systemd1"
+                       send_interface="org.freedesktop.systemd1.Manager"
                        send_member="ListJobs"/>
 
                 <allow send_destination="org.freedesktop.systemd1"
@@ -184,6 +192,14 @@
 
                 <allow send_destination="org.freedesktop.systemd1"
                        send_interface="org.freedesktop.systemd1.Manager"
+                       send_member="RefUnit"/>
+
+                <allow send_destination="org.freedesktop.systemd1"
+                       send_interface="org.freedesktop.systemd1.Manager"
+                       send_member="UnrefUnit"/>
+
+                <allow send_destination="org.freedesktop.systemd1"
+                       send_interface="org.freedesktop.systemd1.Manager"
                        send_member="EnableUnitFiles"/>
 
                 <allow send_destination="org.freedesktop.systemd1"
diff --git a/src/core/path.c b/src/core/path.c
index 10f9b06974..83f794be89 100644
--- a/src/core/path.c
+++ b/src/core/path.c
@@ -577,6 +577,10 @@ static int path_start(Unit *u) {
                 return r;
         }
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         path_mkdir(p);
 
         p->result = PATH_SUCCESS;
diff --git a/src/core/scope.c b/src/core/scope.c
index b278aed3d6..d6e1f8e392 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -147,6 +147,32 @@ static int scope_verify(Scope *s) {
         return 0;
 }
 
+static int scope_load_init_scope(Unit *u) {
+        assert(u);
+
+        if (!unit_has_name(u, SPECIAL_INIT_SCOPE))
+                return 0;
+
+        u->transient = true;
+        u->perpetual = true;
+
+        /* init.scope is a bit special, as it has to stick around forever. Because of its special semantics we
+         * synthesize it here, instead of relying on the unit file on disk. */
+
+        u->default_dependencies = false;
+        u->ignore_on_isolate = true;
+
+        SCOPE(u)->kill_context.kill_signal = SIGRTMIN+14;
+
+        /* Prettify things, if we can. */
+        if (!u->description)
+                u->description = strdup("System and Service Manager");
+        if (!u->documentation)
+                (void) strv_extend(&u->documentation, "man:systemd(1)");
+
+        return 1;
+}
+
 static int scope_load(Unit *u) {
         Scope *s = SCOPE(u);
         int r;
@@ -158,6 +184,9 @@ static int scope_load(Unit *u) {
                 /* Refuse to load non-transient scope units, but allow them while reloading. */
                 return -ENOENT;
 
+        r = scope_load_init_scope(u);
+        if (r < 0)
+                return r;
         r = unit_load_fragment_and_dropin_optional(u);
         if (r < 0)
                 return r;
@@ -298,6 +327,10 @@ static int scope_start(Unit *u) {
         if (!u->transient && !MANAGER_IS_RELOADING(u->manager))
                 return -ENOENT;
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         (void) unit_realize_cgroup(u);
         (void) unit_reset_cpu_usage(u);
 
@@ -441,7 +474,7 @@ static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 
         /* If the PID set is empty now, then let's finish this off
            (On unified we use proper notifications) */
-        if (cg_unified() <= 0 && set_isempty(u->pids))
+        if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) <= 0 && set_isempty(u->pids))
                 scope_notify_cgroup_empty_event(u);
 }
 
@@ -530,34 +563,16 @@ static void scope_enumerate(Manager *m) {
 
         u = manager_get_unit(m, SPECIAL_INIT_SCOPE);
         if (!u) {
-                u = unit_new(m, sizeof(Scope));
-                if (!u) {
-                        log_oom();
-                        return;
-                }
-
-                r = unit_add_name(u, SPECIAL_INIT_SCOPE);
+                r = unit_new_for_name(m, sizeof(Scope), SPECIAL_INIT_SCOPE, &u);
                 if (r < 0)  {
-                        unit_free(u);
-                        log_error_errno(r, "Failed to add init.scope name");
+                        log_error_errno(r, "Failed to allocate the special " SPECIAL_INIT_SCOPE " unit: %m");
                         return;
                 }
         }
 
         u->transient = true;
-        u->default_dependencies = false;
-        u->no_gc = true;
-        u->ignore_on_isolate = true;
-        u->refuse_manual_start = true;
-        u->refuse_manual_stop = true;
+        u->perpetual = true;
         SCOPE(u)->deserialized_state = SCOPE_RUNNING;
-        SCOPE(u)->kill_context.kill_signal = SIGRTMIN+14;
-
-        /* Prettify things, if we can. */
-        if (!u->description)
-                u->description = strdup("System and Service Manager");
-        if (!u->documentation)
-                (void) strv_extend(&u->documentation, "man:systemd(1)");
 
         unit_add_to_load_queue(u);
         unit_add_to_dbus_queue(u);
diff --git a/src/core/selinux-access.h b/src/core/selinux-access.h
index 8f1f058a32..f46370d020 100644
--- a/src/core/selinux-access.h
+++ b/src/core/selinux-access.h
@@ -33,7 +33,7 @@ int mac_selinux_generic_access_check(sd_bus_message *message, const char *path,
 
 #define mac_selinux_unit_access_check(unit, message, permission, error) \
         ({                                                              \
-                Unit *_unit = (unit);                                   \
+                const Unit *_unit = (unit);                             \
                 mac_selinux_generic_access_check((message), _unit->source_path ?: _unit->fragment_path, (permission), (error)); \
         })
 
diff --git a/src/core/service.c b/src/core/service.c
index 4a37702f52..a7274a758f 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -289,7 +289,17 @@ static void service_fd_store_unlink(ServiceFDStore *fs) {
         free(fs);
 }
 
-static void service_release_resources(Unit *u) {
+static void service_release_fd_store(Service *s) {
+        assert(s);
+
+        log_unit_debug(UNIT(s), "Releasing all stored fds");
+        while (s->fd_store)
+                service_fd_store_unlink(s->fd_store);
+
+        assert(s->n_fd_store == 0);
+}
+
+static void service_release_resources(Unit *u, bool inactive) {
         Service *s = SERVICE(u);
 
         assert(s);
@@ -297,16 +307,14 @@ static void service_release_resources(Unit *u) {
         if (!s->fd_store && s->stdin_fd < 0 && s->stdout_fd < 0 && s->stderr_fd < 0)
                 return;
 
-        log_unit_debug(u, "Releasing all resources.");
+        log_unit_debug(u, "Releasing resources.");
 
         s->stdin_fd = safe_close(s->stdin_fd);
         s->stdout_fd = safe_close(s->stdout_fd);
         s->stderr_fd = safe_close(s->stderr_fd);
 
-        while (s->fd_store)
-                service_fd_store_unlink(s->fd_store);
-
-        assert(s->n_fd_store == 0);
+        if (inactive)
+                service_release_fd_store(s);
 }
 
 static void service_done(Unit *u) {
@@ -350,7 +358,7 @@ static void service_done(Unit *u) {
 
         s->timer_event_source = sd_event_source_unref(s->timer_event_source);
 
-        service_release_resources(u);
+        service_release_resources(u, true);
 }
 
 static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
@@ -360,6 +368,10 @@ static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *us
         assert(fs);
 
         /* If we get either EPOLLHUP or EPOLLERR, it's time to remove this entry from the fd store */
+        log_unit_debug(UNIT(fs->service),
+                       "Received %s on stored fd %d (%s), closing.",
+                       revents & EPOLLERR ? "EPOLLERR" : "EPOLLHUP",
+                       fs->fd, strna(fs->fdname));
         service_fd_store_unlink(fs);
         return 0;
 }
@@ -368,20 +380,23 @@ static int service_add_fd_store(Service *s, int fd, const char *name) {
         ServiceFDStore *fs;
         int r;
 
+        /* fd is always consumed if we return >= 0 */
+
         assert(s);
         assert(fd >= 0);
 
         if (s->n_fd_store >= s->n_fd_store_max)
-                return 0;
+                return -EXFULL; /* Our store is full.
+                                 * Use this errno rather than E[NM]FILE to distinguish from
+                                 * the case where systemd itself hits the file limit. */
 
         LIST_FOREACH(fd_store, fs, s->fd_store) {
                 r = same_fd(fs->fd, fd);
                 if (r < 0)
                         return r;
                 if (r > 0) {
-                        /* Already included */
                         safe_close(fd);
-                        return 1;
+                        return 0; /* fd already included */
                 }
         }
 
@@ -409,7 +424,7 @@ static int service_add_fd_store(Service *s, int fd, const char *name) {
         LIST_PREPEND(fd_store, s->fd_store, fs);
         s->n_fd_store++;
 
-        return 1;
+        return 1; /* fd newly stored */
 }
 
 static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
@@ -417,10 +432,7 @@ static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
 
         assert(s);
 
-        if (fdset_size(fds) <= 0)
-                return 0;
-
-        while (s->n_fd_store < s->n_fd_store_max) {
+        while (fdset_size(fds) > 0) {
                 _cleanup_close_ int fd = -1;
 
                 fd = fdset_steal_first(fds);
@@ -428,17 +440,17 @@ static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
                         break;
 
                 r = service_add_fd_store(s, fd, name);
+                if (r == -EXFULL)
+                        return log_unit_warning_errno(UNIT(s), r,
+                                                      "Cannot store more fds than FileDescriptorStoreMax=%u, closing remaining.",
+                                                      s->n_fd_store_max);
                 if (r < 0)
-                        return log_unit_error_errno(UNIT(s), r, "Couldn't add fd to fd store: %m");
-                if (r > 0) {
-                        log_unit_debug(UNIT(s), "Added fd to fd store.");
-                        fd = -1;
-                }
+                        return log_unit_error_errno(UNIT(s), r, "Failed to add fd to store: %m");
+                if (r > 0)
+                        log_unit_debug(UNIT(s), "Added fd %u (%s) to fd store.", fd, strna(name));
+                fd = -1;
         }
 
-        if (fdset_size(fds) > 0)
-                log_unit_warning(UNIT(s), "Tried to store more fds than FileDescriptorStoreMax=%u allows, closing remaining.", s->n_fd_store_max);
-
         return 0;
 }
 
@@ -1225,6 +1237,7 @@ static int service_spawn(
                         return r;
 
                 n_fds = r;
+                log_unit_debug(UNIT(s), "Passing %i fds to service", n_fds);
         }
 
         r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), timeout));
@@ -1256,10 +1269,16 @@ static int service_spawn(
                 socklen_t salen = sizeof(sa);
 
                 r = getpeername(s->socket_fd, &sa.sa, &salen);
-                if (r < 0)
-                        return -errno;
+                if (r < 0) {
+                        r = -errno;
 
-                if (IN_SET(sa.sa.sa_family, AF_INET, AF_INET6)) {
+                        /* ENOTCONN is legitimate if the endpoint disappeared on shutdown.
+                         * This connection is over, but the socket unit lives on. */
+                        if (r != -ENOTCONN || !IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST))
+                                return r;
+                }
+
+                if (r == 0 && IN_SET(sa.sa.sa_family, AF_INET, AF_INET6)) {
                         _cleanup_free_ char *addr = NULL;
                         char *t;
                         int port;
@@ -1449,7 +1468,7 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
 
         if (s->result != SERVICE_SUCCESS) {
                 log_unit_warning(UNIT(s), "Failed with result '%s'.", service_result_to_string(s->result));
-                failure_action(UNIT(s)->manager, s->failure_action, UNIT(s)->reboot_arg);
+                emergency_action(UNIT(s)->manager, s->emergency_action, UNIT(s)->reboot_arg, "service failed");
         }
 
         if (allow_restart && service_shall_restart(s)) {
@@ -1471,6 +1490,9 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
         /* Also, remove the runtime directory */
         exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager));
 
+        /* Get rid of the IPC bits of the user */
+        unit_unref_uid_gid(UNIT(s), true);
+
         /* Release the user, and destroy it if we are the only remaining owner */
         dynamic_creds_destroy(&s->dynamic_creds);
 
@@ -1747,7 +1769,15 @@ static void service_enter_start(Service *s) {
         }
 
         if (!c) {
-                assert(s->type == SERVICE_ONESHOT);
+                if (s->type != SERVICE_ONESHOT) {
+                        /* There's no command line configured for the main command? Hmm, that is strange. This can only
+                         * happen if the configuration changes at runtime. In this case, let's enter a failure
+                         * state. */
+                        log_unit_error(UNIT(s), "There's no 'start' task anymore we could start: %m");
+                        r = -ENXIO;
+                        goto fail;
+                }
+
                 service_enter_start_post(s);
                 return;
         }
@@ -2024,6 +2054,10 @@ static int service_start(Unit *u) {
                 return r;
         }
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         s->result = SERVICE_SUCCESS;
         s->reload_result = SERVICE_SUCCESS;
         s->main_pid_known = false;
@@ -2315,7 +2349,7 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
                         r = service_add_fd_store(s, fd, t);
                         if (r < 0)
                                 log_unit_error_errno(u, r, "Failed to add fd to store: %m");
-                        else if (r > 0)
+                        else
                                 fdset_remove(fds, fd);
                 }
 
@@ -2591,8 +2625,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
         assert(s);
         assert(pid >= 0);
 
-        if (UNIT(s)->fragment_path ? is_clean_exit(code, status, &s->success_status) :
-                                     is_clean_exit_lsb(code, status, &s->success_status))
+        if (is_clean_exit(code, status, s->type == SERVICE_ONESHOT ? EXIT_CLEAN_COMMAND : EXIT_CLEAN_DAEMON, &s->success_status))
                 f = SERVICE_SUCCESS;
         else if (code == CLD_EXITED)
                 f = SERVICE_FAILURE_EXIT_CODE;
@@ -2634,7 +2667,14 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
                                 f = SERVICE_SUCCESS;
                 }
 
-                log_struct(f == SERVICE_SUCCESS ? LOG_DEBUG : LOG_NOTICE,
+                /* When this is a successful exit, let's log about the exit code on DEBUG level. If this is a failure
+                 * and the process exited on its own via exit(), then let's make this a NOTICE, under the assumption
+                 * that the service already logged the reason at a higher log level on its own. However, if the service
+                 * died due to a signal, then it most likely didn't say anything about any reason, hence let's raise
+                 * our log level to WARNING then. */
+
+                log_struct(f == SERVICE_SUCCESS ? LOG_DEBUG :
+                           (code == CLD_EXITED ? LOG_NOTICE : LOG_WARNING),
                            LOG_UNIT_ID(u),
                            LOG_UNIT_MESSAGE(u, "Main process exited, code=%s, status=%i/%s",
                                             sigchld_code_to_string(code), status,
@@ -2866,7 +2906,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 
         /* If the PID set is empty now, then let's finish this off
            (On unified we use proper notifications) */
-        if (cg_unified() <= 0 && set_isempty(u->pids))
+        if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) <= 0 && set_isempty(u->pids))
                 service_notify_cgroup_empty_event(u);
 }
 
@@ -3076,9 +3116,7 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags, FDSet *fds)
 
                 if (!streq_ptr(s->status_text, t)) {
 
-                        free(s->status_text);
-                        s->status_text = t;
-                        t = NULL;
+                        free_and_replace(s->status_text, t);
 
                         notify_dbus = true;
                 }
diff --git a/src/core/service.h b/src/core/service.h
index 888007cc0b..2869144fcb 100644
--- a/src/core/service.h
+++ b/src/core/service.h
@@ -178,7 +178,7 @@ struct Service {
         char *status_text;
         int status_errno;
 
-        FailureAction failure_action;
+        EmergencyAction emergency_action;
 
         UnitRef accept_socket;
 
diff --git a/src/core/show-status.c b/src/core/show-status.c
index 59ebdc7219..65f9cb888a 100644
--- a/src/core/show-status.c
+++ b/src/core/show-status.c
@@ -61,6 +61,11 @@ int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char
         if (vasprintf(&s, format, ap) < 0)
                 return log_oom();
 
+        /* Before you ask: yes, on purpose we open/close the console for each status line we write individually. This
+         * is a good strategy to avoid PID 1 getting killed by the kernel's SAK concept (it doesn't fix this entirely,
+         * but minimizes the time window the kernel might end up killing PID 1 due to SAK). It also makes things easier
+         * for us so that we don't have to recover from hangups and suchlike triggered on the console. */
+
         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
         if (fd < 0)
                 return fd;
diff --git a/src/core/slice.c b/src/core/slice.c
index c7700b8857..ed5d3fd701 100644
--- a/src/core/slice.c
+++ b/src/core/slice.c
@@ -130,6 +130,28 @@ static int slice_verify(Slice *s) {
         return 0;
 }
 
+static int slice_load_root_slice(Unit *u) {
+        assert(u);
+
+        if (!unit_has_name(u, SPECIAL_ROOT_SLICE))
+                return 0;
+
+        u->perpetual = true;
+
+        /* The root slice is a bit special. For example it is always running and cannot be terminated. Because of its
+         * special semantics we synthesize it here, instead of relying on the unit file on disk. */
+
+        u->default_dependencies = false;
+        u->ignore_on_isolate = true;
+
+        if (!u->description)
+                u->description = strdup("Root Slice");
+        if (!u->documentation)
+                u->documentation = strv_new("man:systemd.special(7)", NULL);
+
+        return 1;
+}
+
 static int slice_load(Unit *u) {
         Slice *s = SLICE(u);
         int r;
@@ -137,6 +159,9 @@ static int slice_load(Unit *u) {
         assert(s);
         assert(u->load_state == UNIT_STUB);
 
+        r = slice_load_root_slice(u);
+        if (r < 0)
+                return r;
         r = unit_load_fragment_and_dropin_optional(u);
         if (r < 0)
                 return r;
@@ -187,10 +212,15 @@ static void slice_dump(Unit *u, FILE *f, const char *prefix) {
 
 static int slice_start(Unit *u) {
         Slice *t = SLICE(u);
+        int r;
 
         assert(t);
         assert(t->state == SLICE_DEAD);
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         (void) unit_realize_cgroup(u);
         (void) unit_reset_cpu_usage(u);
 
@@ -269,32 +299,16 @@ static void slice_enumerate(Manager *m) {
 
         u = manager_get_unit(m, SPECIAL_ROOT_SLICE);
         if (!u) {
-                u = unit_new(m, sizeof(Slice));
-                if (!u)  {
-                        log_oom();
-                        return;
-                }
-
-                r = unit_add_name(u, SPECIAL_ROOT_SLICE);
+                r = unit_new_for_name(m, sizeof(Slice), SPECIAL_ROOT_SLICE, &u);
                 if (r < 0) {
-                        unit_free(u);
-                        log_error_errno(r, "Failed to add -.slice name");
+                        log_error_errno(r, "Failed to allocate the special " SPECIAL_ROOT_SLICE " unit: %m");
                         return;
                 }
         }
 
-        u->default_dependencies = false;
-        u->no_gc = true;
-        u->ignore_on_isolate = true;
-        u->refuse_manual_start = true;
-        u->refuse_manual_stop = true;
+        u->perpetual = true;
         SLICE(u)->deserialized_state = SLICE_ACTIVE;
 
-        if (!u->description)
-                u->description = strdup("Root Slice");
-        if (!u->documentation)
-                (void) strv_extend(&u->documentation, "man:systemd.special(7)");
-
         unit_add_to_load_queue(u);
         unit_add_to_dbus_queue(u);
 }
diff --git a/src/core/socket.c b/src/core/socket.c
index 50872e8366..0b1c4acfec 100644
--- a/src/core/socket.c
+++ b/src/core/socket.c
@@ -1334,14 +1334,9 @@ static int usbffs_select_ep(const struct dirent *d) {
 
 static int usbffs_dispatch_eps(SocketPort *p) {
         _cleanup_free_ struct dirent **ent = NULL;
-        _cleanup_free_ char *path = NULL;
         int r, i, n, k;
 
-        path = dirname_malloc(p->path);
-        if (!path)
-                return -ENOMEM;
-
-        r = scandir(path, &ent, usbffs_select_ep, alphasort);
+        r = scandir(p->path, &ent, usbffs_select_ep, alphasort);
         if (r < 0)
                 return -errno;
 
@@ -1356,7 +1351,7 @@ static int usbffs_dispatch_eps(SocketPort *p) {
         for (i = 0; i < n; ++i) {
                 _cleanup_free_ char *ep = NULL;
 
-                ep = path_make_absolute(ent[i]->d_name, path);
+                ep = path_make_absolute(ent[i]->d_name, p->path);
                 if (!ep)
                         return -ENOMEM;
 
@@ -1905,6 +1900,8 @@ static void socket_enter_dead(Socket *s, SocketResult f) {
 
         exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager));
 
+        unit_unref_uid_gid(UNIT(s), true);
+
         dynamic_creds_destroy(&s->dynamic_creds);
 }
 
@@ -2357,11 +2354,14 @@ static int socket_start(Unit *u) {
                 return r;
         }
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         s->result = SOCKET_SUCCESS;
         s->reset_cpu_usage = true;
 
         socket_enter_start_pre(s);
-
         return 1;
 }
 
@@ -2746,7 +2746,7 @@ static void socket_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 
         s->control_pid = 0;
 
-        if (is_clean_exit(code, status, NULL))
+        if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
                 f = SOCKET_SUCCESS;
         else if (code == CLD_EXITED)
                 f = SOCKET_FAILURE_EXIT_CODE;
diff --git a/src/core/swap.c b/src/core/swap.c
index 2c802da3b5..2228a254bb 100644
--- a/src/core/swap.c
+++ b/src/core/swap.c
@@ -381,11 +381,7 @@ static int swap_setup_unit(
         if (!u) {
                 delete = true;
 
-                u = unit_new(m, sizeof(Swap));
-                if (!u)
-                        return log_oom();
-
-                r = unit_add_name(u, e);
+                r = unit_new_for_name(m, sizeof(Swap), e, &u);
                 if (r < 0)
                         goto fail;
 
@@ -683,6 +679,8 @@ static void swap_enter_dead(Swap *s, SwapResult f) {
 
         exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager));
 
+        unit_unref_uid_gid(UNIT(s), true);
+
         dynamic_creds_destroy(&s->dynamic_creds);
 }
 
@@ -859,6 +857,10 @@ static int swap_start(Unit *u) {
                 return r;
         }
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         s->result = SWAP_SUCCESS;
         s->reset_cpu_usage = true;
 
@@ -986,7 +988,7 @@ static void swap_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 
         s->control_pid = 0;
 
-        if (is_clean_exit(code, status, NULL))
+        if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
                 f = SWAP_SUCCESS;
         else if (code == CLD_EXITED)
                 f = SWAP_FAILURE_EXIT_CODE;
@@ -1187,6 +1189,7 @@ static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, v
 
                         case SWAP_DEAD:
                         case SWAP_FAILED:
+                                (void) unit_acquire_invocation_id(UNIT(swap));
                                 swap_enter_active(swap, SWAP_SUCCESS);
                                 break;
 
diff --git a/src/core/system.conf b/src/core/system.conf
index c6bb050aac..746572b7ff 100644
--- a/src/core/system.conf
+++ b/src/core/system.conf
@@ -21,6 +21,7 @@
 #CrashChangeVT=no
 #CrashShell=no
 #CrashReboot=no
+#CtrlAltDelBurstAction=reboot-force
 #CPUAffinity=1 2
 #JoinControllers=cpu,cpuacct net_cls,net_prio
 #RuntimeWatchdogSec=0
diff --git a/src/core/target.c b/src/core/target.c
index 61a91aad07..765c1f3fa4 100644
--- a/src/core/target.c
+++ b/src/core/target.c
@@ -124,10 +124,15 @@ static void target_dump(Unit *u, FILE *f, const char *prefix) {
 
 static int target_start(Unit *u) {
         Target *t = TARGET(u);
+        int r;
 
         assert(t);
         assert(t->state == TARGET_DEAD);
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         target_set_state(t, TARGET_ACTIVE);
         return 1;
 }
diff --git a/src/core/timer.c b/src/core/timer.c
index e2b43f02f8..2469a517ea 100644
--- a/src/core/timer.c
+++ b/src/core/timer.c
@@ -261,6 +261,8 @@ static void timer_set_state(Timer *t, TimerState state) {
         if (state != TIMER_WAITING) {
                 t->monotonic_event_source = sd_event_source_unref(t->monotonic_event_source);
                 t->realtime_event_source = sd_event_source_unref(t->realtime_event_source);
+                t->next_elapse_monotonic_or_boottime = USEC_INFINITY;
+                t->next_elapse_realtime = USEC_INFINITY;
         }
 
         if (state != old_state)
@@ -616,6 +618,10 @@ static int timer_start(Unit *u) {
                 return r;
         }
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         t->last_trigger = DUAL_TIMESTAMP_NULL;
 
         /* Reenable all timers that depend on unit activation time */
@@ -632,7 +638,7 @@ static int timer_start(Unit *u) {
                         /* The timer has never run before,
                          * make sure a stamp file exists.
                          */
-                        touch_file(t->stamp_path, true, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID);
+                        (void) touch_file(t->stamp_path, true, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID);
         }
 
         t->result = TIMER_SUCCESS;
diff --git a/src/core/transaction.c b/src/core/transaction.c
index 8370b864fb..e22e3b30c2 100644
--- a/src/core/transaction.c
+++ b/src/core/transaction.c
@@ -1085,10 +1085,8 @@ Transaction *transaction_new(bool irreversible) {
                 return NULL;
 
         tr->jobs = hashmap_new(NULL);
-        if (!tr->jobs) {
-                free(tr);
-                return NULL;
-        }
+        if (!tr->jobs)
+                return mfree(tr);
 
         tr->irreversible = irreversible;
 
diff --git a/src/core/umount.c b/src/core/umount.c
index c21a2be54e..1e5459ed80 100644
--- a/src/core/umount.c
+++ b/src/core/umount.c
@@ -375,7 +375,7 @@ static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_e
                 /* If we are in a container, don't attempt to
                    read-only mount anything as that brings no real
                    benefits, but might confuse the host, as we remount
-                   the superblock here, not the bind mound. */
+                   the superblock here, not the bind mount. */
                 if (detect_container() <= 0)  {
                         _cleanup_free_ char *options = NULL;
                         /* MS_REMOUNT requires that the data parameter
diff --git a/src/core/unit.c b/src/core/unit.c
index ff7c562fba..e664e23892 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -37,6 +37,7 @@
 #include "execute.h"
 #include "fileio-label.h"
 #include "formats-util.h"
+#include "id128-util.h"
 #include "load-dropin.h"
 #include "load-fragment.h"
 #include "log.h"
@@ -87,10 +88,8 @@ Unit *unit_new(Manager *m, size_t size) {
                 return NULL;
 
         u->names = set_new(&string_hash_ops);
-        if (!u->names) {
-                free(u);
-                return NULL;
-        }
+        if (!u->names)
+                return mfree(u);
 
         u->manager = m;
         u->type = _UNIT_TYPE_INVALID;
@@ -100,7 +99,9 @@ Unit *unit_new(Manager *m, size_t size) {
         u->on_failure_job_mode = JOB_REPLACE;
         u->cgroup_inotify_wd = -1;
         u->job_timeout = USEC_INFINITY;
-        u->sigchldgen = 0;
+        u->ref_uid = UID_INVALID;
+        u->ref_gid = GID_INVALID;
+        u->cpu_usage_last = NSEC_INFINITY;
 
         RATELIMIT_INIT(u->start_limit, m->default_start_limit_interval, m->default_start_limit_burst);
         RATELIMIT_INIT(u->auto_stop_ratelimit, 10 * USEC_PER_SEC, 16);
@@ -108,11 +109,29 @@ Unit *unit_new(Manager *m, size_t size) {
         return u;
 }
 
+int unit_new_for_name(Manager *m, size_t size, const char *name, Unit **ret) {
+        Unit *u;
+        int r;
+
+        u = unit_new(m, size);
+        if (!u)
+                return -ENOMEM;
+
+        r = unit_add_name(u, name);
+        if (r < 0) {
+                unit_free(u);
+                return r;
+        }
+
+        *ret = u;
+        return r;
+}
+
 bool unit_has_name(Unit *u, const char *name) {
         assert(u);
         assert(name);
 
-        return !!set_get(u->names, (char*) name);
+        return set_contains(u->names, (char*) name);
 }
 
 static void unit_init(Unit *u) {
@@ -301,6 +320,7 @@ int unit_set_description(Unit *u, const char *description) {
 
 bool unit_check_gc(Unit *u) {
         UnitActiveState state;
+        bool inactive;
         assert(u);
 
         if (u->job)
@@ -310,24 +330,28 @@ bool unit_check_gc(Unit *u) {
                 return true;
 
         state = unit_active_state(u);
+        inactive = state == UNIT_INACTIVE;
 
         /* If the unit is inactive and failed and no job is queued for
          * it, then release its runtime resources */
         if (UNIT_IS_INACTIVE_OR_FAILED(state) &&
             UNIT_VTABLE(u)->release_resources)
-                UNIT_VTABLE(u)->release_resources(u);
+                UNIT_VTABLE(u)->release_resources(u, inactive);
 
         /* But we keep the unit object around for longer when it is
          * referenced or configured to not be gc'ed */
-        if (state != UNIT_INACTIVE)
+        if (!inactive)
                 return true;
 
-        if (u->no_gc)
+        if (u->perpetual)
                 return true;
 
         if (u->refs)
                 return true;
 
+        if (sd_bus_track_count(u->bus_track) > 0)
+                return true;
+
         if (UNIT_VTABLE(u)->check_gc)
                 if (UNIT_VTABLE(u)->check_gc(u))
                         return true;
@@ -508,11 +532,17 @@ void unit_free(Unit *u) {
 
         sd_bus_slot_unref(u->match_bus_slot);
 
+        sd_bus_track_unref(u->bus_track);
+        u->deserialized_refs = strv_free(u->deserialized_refs);
+
         unit_free_requires_mounts_for(u);
 
         SET_FOREACH(t, u->names, i)
                 hashmap_remove_value(u->manager->units, t, u);
 
+        if (!sd_id128_is_null(u->invocation_id))
+                hashmap_remove_value(u->manager->units_by_invocation_id, &u->invocation_id, u);
+
         if (u->job) {
                 Job *j = u->job;
                 job_uninstall(j);
@@ -550,6 +580,8 @@ void unit_free(Unit *u) {
 
         unit_release_cgroup(u);
 
+        unit_unref_uid_gid(u, false);
+
         (void) manager_update_failed_units(u->manager, u, false);
         set_remove(u->manager->startup_units, u);
 
@@ -846,18 +878,14 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) {
                         return r;
         }
 
-        if (c->std_output != EXEC_OUTPUT_KMSG &&
-            c->std_output != EXEC_OUTPUT_SYSLOG &&
-            c->std_output != EXEC_OUTPUT_JOURNAL &&
-            c->std_output != EXEC_OUTPUT_KMSG_AND_CONSOLE &&
-            c->std_output != EXEC_OUTPUT_SYSLOG_AND_CONSOLE &&
-            c->std_output != EXEC_OUTPUT_JOURNAL_AND_CONSOLE &&
-            c->std_error != EXEC_OUTPUT_KMSG &&
-            c->std_error != EXEC_OUTPUT_SYSLOG &&
-            c->std_error != EXEC_OUTPUT_JOURNAL &&
-            c->std_error != EXEC_OUTPUT_KMSG_AND_CONSOLE &&
-            c->std_error != EXEC_OUTPUT_JOURNAL_AND_CONSOLE &&
-            c->std_error != EXEC_OUTPUT_SYSLOG_AND_CONSOLE)
+        if (!IN_SET(c->std_output,
+                    EXEC_OUTPUT_JOURNAL, EXEC_OUTPUT_JOURNAL_AND_CONSOLE,
+                    EXEC_OUTPUT_KMSG, EXEC_OUTPUT_KMSG_AND_CONSOLE,
+                    EXEC_OUTPUT_SYSLOG, EXEC_OUTPUT_SYSLOG_AND_CONSOLE) &&
+            !IN_SET(c->std_error,
+                    EXEC_OUTPUT_JOURNAL, EXEC_OUTPUT_JOURNAL_AND_CONSOLE,
+                    EXEC_OUTPUT_KMSG, EXEC_OUTPUT_KMSG_AND_CONSOLE,
+                    EXEC_OUTPUT_SYSLOG, EXEC_OUTPUT_SYSLOG_AND_CONSOLE))
                 return 0;
 
         /* If syslog or kernel logging is requested, make sure our own
@@ -894,6 +922,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
         Unit *following;
         _cleanup_set_free_ Set *following_set = NULL;
         int r;
+        const char *n;
 
         assert(u);
         assert(u->type >= 0);
@@ -915,6 +944,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
                 "%s\tGC Check Good: %s\n"
                 "%s\tNeed Daemon Reload: %s\n"
                 "%s\tTransient: %s\n"
+                "%s\tPerpetual: %s\n"
                 "%s\tSlice: %s\n"
                 "%s\tCGroup: %s\n"
                 "%s\tCGroup realized: %s\n"
@@ -933,6 +963,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
                 prefix, yes_no(unit_check_gc(u)),
                 prefix, yes_no(unit_need_daemon_reload(u)),
                 prefix, yes_no(u->transient),
+                prefix, yes_no(u->perpetual),
                 prefix, strna(unit_slice_name(u)),
                 prefix, strna(u->cgroup_path),
                 prefix, yes_no(u->cgroup_realized),
@@ -942,6 +973,10 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
         SET_FOREACH(t, u->names, i)
                 fprintf(f, "%s\tName: %s\n", prefix, t);
 
+        if (!sd_id128_is_null(u->invocation_id))
+                fprintf(f, "%s\tInvocation ID: " SD_ID128_FORMAT_STR "\n",
+                        prefix, SD_ID128_FORMAT_VAL(u->invocation_id));
+
         STRV_FOREACH(j, u->documentation)
                 fprintf(f, "%s\tDocumentation: %s\n", prefix, *j);
 
@@ -969,8 +1004,8 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
         if (u->job_timeout != USEC_INFINITY)
                 fprintf(f, "%s\tJob Timeout: %s\n", prefix, format_timespan(timespan, sizeof(timespan), u->job_timeout, 0));
 
-        if (u->job_timeout_action != FAILURE_ACTION_NONE)
-                fprintf(f, "%s\tJob Timeout Action: %s\n", prefix, failure_action_to_string(u->job_timeout_action));
+        if (u->job_timeout_action != EMERGENCY_ACTION_NONE)
+                fprintf(f, "%s\tJob Timeout Action: %s\n", prefix, emergency_action_to_string(u->job_timeout_action));
 
         if (u->job_timeout_reboot_arg)
                 fprintf(f, "%s\tJob Timeout Reboot Argument: %s\n", prefix, u->job_timeout_reboot_arg);
@@ -1035,13 +1070,14 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
         else if (u->load_state == UNIT_ERROR)
                 fprintf(f, "%s\tLoad Error Code: %s\n", prefix, strerror(-u->load_error));
 
+        for (n = sd_bus_track_first(u->bus_track); n; n = sd_bus_track_next(u->bus_track))
+                fprintf(f, "%s\tBus Ref: %s\n", prefix, n);
 
         if (u->job)
                 job_dump(u->job, f, prefix2);
 
         if (u->nop_job)
                 job_dump(u->nop_job, f, prefix2);
-
 }
 
 /* Common implementation for multiple backends */
@@ -1436,7 +1472,7 @@ static void unit_status_log_starting_stopping_reloading(Unit *u, JobType t) {
         format = unit_get_status_message_format(u, t);
 
         DISABLE_WARNING_FORMAT_NONLITERAL;
-        xsprintf(buf, format, unit_description(u));
+        snprintf(buf, sizeof buf, format, unit_description(u));
         REENABLE_WARNING;
 
         mid = t == JOB_START ? SD_MESSAGE_UNIT_STARTING :
@@ -1476,7 +1512,7 @@ int unit_start_limit_test(Unit *u) {
         log_unit_warning(u, "Start request repeated too quickly.");
         u->start_limit_hit = true;
 
-        return failure_action(u->manager, u->start_limit_action, u->reboot_arg);
+        return emergency_action(u->manager, u->start_limit_action, u->reboot_arg, "unit failed");
 }
 
 /* Errors:
@@ -1602,6 +1638,18 @@ int unit_stop(Unit *u) {
         return UNIT_VTABLE(u)->stop(u);
 }
 
+bool unit_can_stop(Unit *u) {
+        assert(u);
+
+        if (!unit_supported(u))
+                return false;
+
+        if (u->perpetual)
+                return false;
+
+        return !!UNIT_VTABLE(u)->stop;
+}
+
 /* Errors:
  *         -EBADR:    This unit type does not support reloading.
  *         -ENOEXEC:  Unit is not started.
@@ -2136,13 +2184,20 @@ bool unit_job_is_applicable(Unit *u, JobType j) {
 
         case JOB_VERIFY_ACTIVE:
         case JOB_START:
-        case JOB_STOP:
         case JOB_NOP:
+                /* Note that we don't check unit_can_start() here. That's because .device units and suchlike are not
+                 * startable by us but may appear due to external events, and it thus makes sense to permit enqueing
+                 * jobs for it. */
                 return true;
 
+        case JOB_STOP:
+                /* Similar as above. However, perpetual units can never be stopped (neither explicitly nor due to
+                 * external events), hence it makes no sense to permit enqueing such a request either. */
+                return !u->perpetual;
+
         case JOB_RESTART:
         case JOB_TRY_RESTART:
-                return unit_can_start(u);
+                return unit_can_stop(u) && unit_can_start(u);
 
         case JOB_RELOAD:
         case JOB_TRY_RELOAD:
@@ -2212,6 +2267,11 @@ int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_referen
                 return 0;
         }
 
+        if (d == UNIT_BEFORE && other->type == UNIT_DEVICE) {
+                log_unit_warning(u, "Dependency Before=%s ignored (.device units cannot be delayed)", other->id);
+                return 0;
+        }
+
         r = set_ensure_allocated(&u->dependencies[d], NULL);
         if (r < 0)
                 return r;
@@ -2374,6 +2434,15 @@ char *unit_dbus_path(Unit *u) {
         return unit_dbus_path_from_name(u->id);
 }
 
+char *unit_dbus_path_invocation_id(Unit *u) {
+        assert(u);
+
+        if (sd_id128_is_null(u->invocation_id))
+                return NULL;
+
+        return unit_dbus_path_from_name(u->invocation_id_string);
+}
+
 int unit_set_slice(Unit *u, Unit *slice) {
         assert(u);
         assert(slice);
@@ -2608,21 +2677,34 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
                 unit_serialize_item(u, f, "assert-result", yes_no(u->assert_result));
 
         unit_serialize_item(u, f, "transient", yes_no(u->transient));
-        unit_serialize_item_format(u, f, "cpuacct-usage-base", "%" PRIu64, u->cpuacct_usage_base);
+
+        unit_serialize_item_format(u, f, "cpu-usage-base", "%" PRIu64, u->cpu_usage_base);
+        if (u->cpu_usage_last != NSEC_INFINITY)
+                unit_serialize_item_format(u, f, "cpu-usage-last", "%" PRIu64, u->cpu_usage_last);
 
         if (u->cgroup_path)
                 unit_serialize_item(u, f, "cgroup", u->cgroup_path);
         unit_serialize_item(u, f, "cgroup-realized", yes_no(u->cgroup_realized));
 
+        if (uid_is_valid(u->ref_uid))
+                unit_serialize_item_format(u, f, "ref-uid", UID_FMT, u->ref_uid);
+        if (gid_is_valid(u->ref_gid))
+                unit_serialize_item_format(u, f, "ref-gid", GID_FMT, u->ref_gid);
+
+        if (!sd_id128_is_null(u->invocation_id))
+                unit_serialize_item_format(u, f, "invocation-id", SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id));
+
+        bus_track_serialize(u->bus_track, f, "ref");
+
         if (serialize_jobs) {
                 if (u->job) {
                         fprintf(f, "job\n");
-                        job_serialize(u->job, f, fds);
+                        job_serialize(u->job, f);
                 }
 
                 if (u->nop_job) {
                         fprintf(f, "job\n");
-                        job_serialize(u->nop_job, f, fds);
+                        job_serialize(u->nop_job, f);
                 }
         }
 
@@ -2752,7 +2834,7 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
                                 if (!j)
                                         return log_oom();
 
-                                r = job_deserialize(j, f, fds);
+                                r = job_deserialize(j, f);
                                 if (r < 0) {
                                         job_free(j);
                                         return r;
@@ -2824,11 +2906,19 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
 
                         continue;
 
-                } else if (streq(l, "cpuacct-usage-base")) {
+                } else if (STR_IN_SET(l, "cpu-usage-base", "cpuacct-usage-base")) {
 
-                        r = safe_atou64(v, &u->cpuacct_usage_base);
+                        r = safe_atou64(v, &u->cpu_usage_base);
                         if (r < 0)
-                                log_unit_debug(u, "Failed to parse CPU usage %s, ignoring.", v);
+                                log_unit_debug(u, "Failed to parse CPU usage base %s, ignoring.", v);
+
+                        continue;
+
+                } else if (streq(l, "cpu-usage-last")) {
+
+                        r = safe_atou64(v, &u->cpu_usage_last);
+                        if (r < 0)
+                                log_unit_debug(u, "Failed to read CPU usage last %s, ignoring.", v);
 
                         continue;
 
@@ -2851,6 +2941,47 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
                                 u->cgroup_realized = b;
 
                         continue;
+
+                } else if (streq(l, "ref-uid")) {
+                        uid_t uid;
+
+                        r = parse_uid(v, &uid);
+                        if (r < 0)
+                                log_unit_debug(u, "Failed to parse referenced UID %s, ignoring.", v);
+                        else
+                                unit_ref_uid_gid(u, uid, GID_INVALID);
+
+                        continue;
+
+                } else if (streq(l, "ref-gid")) {
+                        gid_t gid;
+
+                        r = parse_gid(v, &gid);
+                        if (r < 0)
+                                log_unit_debug(u, "Failed to parse referenced GID %s, ignoring.", v);
+                        else
+                                unit_ref_uid_gid(u, UID_INVALID, gid);
+
+                } else if (streq(l, "ref")) {
+
+                        r = strv_extend(&u->deserialized_refs, v);
+                        if (r < 0)
+                                log_oom();
+
+                        continue;
+                } else if (streq(l, "invocation-id")) {
+                        sd_id128_t id;
+
+                        r = sd_id128_from_string(v, &id);
+                        if (r < 0)
+                                log_unit_debug(u, "Failed to parse invocation id %s, ignoring.", v);
+                        else {
+                                r = unit_set_invocation_id(u, id);
+                                if (r < 0)
+                                        log_unit_warning_errno(u, r, "Failed to set invocation ID for unit: %m");
+                        }
+
+                        continue;
                 }
 
                 if (unit_can_serialize(u)) {
@@ -2925,7 +3056,8 @@ int unit_add_node_link(Unit *u, const char *what, bool wants, UnitDependency dep
 }
 
 int unit_coldplug(Unit *u) {
-        int r = 0, q = 0;
+        int r = 0, q;
+        char **i;
 
         assert(u);
 
@@ -2936,21 +3068,29 @@ int unit_coldplug(Unit *u) {
 
         u->coldplugged = true;
 
-        if (UNIT_VTABLE(u)->coldplug)
-                r = UNIT_VTABLE(u)->coldplug(u);
+        STRV_FOREACH(i, u->deserialized_refs) {
+                q = bus_unit_track_add_name(u, *i);
+                if (q < 0 && r >= 0)
+                        r = q;
+        }
+        u->deserialized_refs = strv_free(u->deserialized_refs);
+
+        if (UNIT_VTABLE(u)->coldplug) {
+                q = UNIT_VTABLE(u)->coldplug(u);
+                if (q < 0 && r >= 0)
+                        r = q;
+        }
 
-        if (u->job)
+        if (u->job) {
                 q = job_coldplug(u->job);
+                if (q < 0 && r >= 0)
+                        r = q;
+        }
 
-        if (r < 0)
-                return r;
-        if (q < 0)
-                return q;
-
-        return 0;
+        return r;
 }
 
-static bool fragment_mtime_newer(const char *path, usec_t mtime) {
+static bool fragment_mtime_newer(const char *path, usec_t mtime, bool path_masked) {
         struct stat st;
 
         if (!path)
@@ -2960,12 +3100,12 @@ static bool fragment_mtime_newer(const char *path, usec_t mtime) {
                 /* What, cannot access this anymore? */
                 return true;
 
-        if (mtime > 0)
+        if (path_masked)
+                /* For masked files check if they are still so */
+                return !null_or_empty(&st);
+        else
                 /* For non-empty files check the mtime */
                 return timespec_load(&st.st_mtim) > mtime;
-        else if (!null_or_empty(&st))
-                /* For masked files check if they are still so */
-                return true;
 
         return false;
 }
@@ -2976,18 +3116,22 @@ bool unit_need_daemon_reload(Unit *u) {
 
         assert(u);
 
-        if (fragment_mtime_newer(u->fragment_path, u->fragment_mtime))
+        /* For unit files, we allow masking… */
+        if (fragment_mtime_newer(u->fragment_path, u->fragment_mtime,
+                                 u->load_state == UNIT_MASKED))
                 return true;
 
-        if (fragment_mtime_newer(u->source_path, u->source_mtime))
+        /* Source paths should not be masked… */
+        if (fragment_mtime_newer(u->source_path, u->source_mtime, false))
                 return true;
 
         (void) unit_find_dropin_paths(u, &t);
         if (!strv_equal(u->dropin_paths, t))
                 return true;
 
+        /* … any drop-ins that are masked are simply omitted from the list. */
         STRV_FOREACH(path, u->dropin_paths)
-                if (fragment_mtime_newer(*path, u->dropin_mtime))
+                if (fragment_mtime_newer(*path, u->dropin_mtime, false))
                         return true;
 
         return false;
@@ -3294,7 +3438,10 @@ int unit_patch_contexts(Unit *u) {
                         ec->no_new_privileges = true;
 
                 if (ec->private_devices)
-                        ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_MKNOD);
+                        ec->capability_bounding_set &= ~((UINT64_C(1) << CAP_MKNOD) | (UINT64_C(1) << CAP_SYS_RAWIO));
+
+                if (ec->protect_kernel_modules)
+                        ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_SYS_MODULE);
 
                 if (ec->dynamic_user) {
                         if (!ec->user) {
@@ -3309,7 +3456,14 @@ int unit_patch_contexts(Unit *u) {
                                         return -ENOMEM;
                         }
 
+                        /* If the dynamic user option is on, let's make sure that the unit can't leave its UID/GID
+                         * around in the file system or on IPC objects. Hence enforce a strict sandbox. */
+
                         ec->private_tmp = true;
+                        ec->remove_ipc = true;
+                        ec->protect_system = PROTECT_SYSTEM_STRICT;
+                        if (ec->protect_home == PROTECT_HOME_NO)
+                                ec->protect_home = PROTECT_HOME_READ_ONLY;
                 }
         }
 
@@ -3695,7 +3849,7 @@ int unit_kill_context(
                          * there we get proper events. Hence rely on
                          * them.*/
 
-                        if  (cg_unified() > 0 ||
+                        if  (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0 ||
                              (detect_container() == 0 && !unit_cgroup_delegate(u)))
                                 wait_for_exit = true;
 
@@ -3932,3 +4086,198 @@ pid_t unit_main_pid(Unit *u) {
 
         return 0;
 }
+
+static void unit_unref_uid_internal(
+                Unit *u,
+                uid_t *ref_uid,
+                bool destroy_now,
+                void (*_manager_unref_uid)(Manager *m, uid_t uid, bool destroy_now)) {
+
+        assert(u);
+        assert(ref_uid);
+        assert(_manager_unref_uid);
+
+        /* Generic implementation of both unit_unref_uid() and unit_unref_gid(), under the assumption that uid_t and
+         * gid_t are actually the same time, with the same validity rules.
+         *
+         * Drops a reference to UID/GID from a unit. */
+
+        assert_cc(sizeof(uid_t) == sizeof(gid_t));
+        assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+        if (!uid_is_valid(*ref_uid))
+                return;
+
+        _manager_unref_uid(u->manager, *ref_uid, destroy_now);
+        *ref_uid = UID_INVALID;
+}
+
+void unit_unref_uid(Unit *u, bool destroy_now) {
+        unit_unref_uid_internal(u, &u->ref_uid, destroy_now, manager_unref_uid);
+}
+
+void unit_unref_gid(Unit *u, bool destroy_now) {
+        unit_unref_uid_internal(u, (uid_t*) &u->ref_gid, destroy_now, manager_unref_gid);
+}
+
+static int unit_ref_uid_internal(
+                Unit *u,
+                uid_t *ref_uid,
+                uid_t uid,
+                bool clean_ipc,
+                int (*_manager_ref_uid)(Manager *m, uid_t uid, bool clean_ipc)) {
+
+        int r;
+
+        assert(u);
+        assert(ref_uid);
+        assert(uid_is_valid(uid));
+        assert(_manager_ref_uid);
+
+        /* Generic implementation of both unit_ref_uid() and unit_ref_guid(), under the assumption that uid_t and gid_t
+         * are actually the same type, and have the same validity rules.
+         *
+         * Adds a reference on a specific UID/GID to this unit. Each unit referencing the same UID/GID maintains a
+         * reference so that we can destroy the UID/GID's IPC resources as soon as this is requested and the counter
+         * drops to zero. */
+
+        assert_cc(sizeof(uid_t) == sizeof(gid_t));
+        assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+        if (*ref_uid == uid)
+                return 0;
+
+        if (uid_is_valid(*ref_uid)) /* Already set? */
+                return -EBUSY;
+
+        r = _manager_ref_uid(u->manager, uid, clean_ipc);
+        if (r < 0)
+                return r;
+
+        *ref_uid = uid;
+        return 1;
+}
+
+int unit_ref_uid(Unit *u, uid_t uid, bool clean_ipc) {
+        return unit_ref_uid_internal(u, &u->ref_uid, uid, clean_ipc, manager_ref_uid);
+}
+
+int unit_ref_gid(Unit *u, gid_t gid, bool clean_ipc) {
+        return unit_ref_uid_internal(u, (uid_t*) &u->ref_gid, (uid_t) gid, clean_ipc, manager_ref_gid);
+}
+
+static int unit_ref_uid_gid_internal(Unit *u, uid_t uid, gid_t gid, bool clean_ipc) {
+        int r = 0, q = 0;
+
+        assert(u);
+
+        /* Reference both a UID and a GID in one go. Either references both, or neither. */
+
+        if (uid_is_valid(uid)) {
+                r = unit_ref_uid(u, uid, clean_ipc);
+                if (r < 0)
+                        return r;
+        }
+
+        if (gid_is_valid(gid)) {
+                q = unit_ref_gid(u, gid, clean_ipc);
+                if (q < 0) {
+                        if (r > 0)
+                                unit_unref_uid(u, false);
+
+                        return q;
+                }
+        }
+
+        return r > 0 || q > 0;
+}
+
+int unit_ref_uid_gid(Unit *u, uid_t uid, gid_t gid) {
+        ExecContext *c;
+        int r;
+
+        assert(u);
+
+        c = unit_get_exec_context(u);
+
+        r = unit_ref_uid_gid_internal(u, uid, gid, c ? c->remove_ipc : false);
+        if (r < 0)
+                return log_unit_warning_errno(u, r, "Couldn't add UID/GID reference to unit, proceeding without: %m");
+
+        return r;
+}
+
+void unit_unref_uid_gid(Unit *u, bool destroy_now) {
+        assert(u);
+
+        unit_unref_uid(u, destroy_now);
+        unit_unref_gid(u, destroy_now);
+}
+
+void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid) {
+        int r;
+
+        assert(u);
+
+        /* This is invoked whenever one of the forked off processes let's us know the UID/GID its user name/group names
+         * resolved to. We keep track of which UID/GID is currently assigned in order to be able to destroy its IPC
+         * objects when no service references the UID/GID anymore. */
+
+        r = unit_ref_uid_gid(u, uid, gid);
+        if (r > 0)
+                bus_unit_send_change_signal(u);
+}
+
+int unit_set_invocation_id(Unit *u, sd_id128_t id) {
+        int r;
+
+        assert(u);
+
+        /* Set the invocation ID for this unit. If we cannot, this will not roll back, but reset the whole thing. */
+
+        if (sd_id128_equal(u->invocation_id, id))
+                return 0;
+
+        if (!sd_id128_is_null(u->invocation_id))
+                (void) hashmap_remove_value(u->manager->units_by_invocation_id, &u->invocation_id, u);
+
+        if (sd_id128_is_null(id)) {
+                r = 0;
+                goto reset;
+        }
+
+        r = hashmap_ensure_allocated(&u->manager->units_by_invocation_id, &id128_hash_ops);
+        if (r < 0)
+                goto reset;
+
+        u->invocation_id = id;
+        sd_id128_to_string(id, u->invocation_id_string);
+
+        r = hashmap_put(u->manager->units_by_invocation_id, &u->invocation_id, u);
+        if (r < 0)
+                goto reset;
+
+        return 0;
+
+reset:
+        u->invocation_id = SD_ID128_NULL;
+        u->invocation_id_string[0] = 0;
+        return r;
+}
+
+int unit_acquire_invocation_id(Unit *u) {
+        sd_id128_t id;
+        int r;
+
+        assert(u);
+
+        r = sd_id128_randomize(&id);
+        if (r < 0)
+                return log_unit_error_errno(u, r, "Failed to generate invocation ID for unit: %m");
+
+        r = unit_set_invocation_id(u, id);
+        if (r < 0)
+                return log_unit_error_errno(u, r, "Failed to set invocation ID for unit: %m");
+
+        return 0;
+}
diff --git a/src/core/unit.h b/src/core/unit.h
index 47eb8d50a6..991543664b 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -29,7 +29,7 @@ typedef struct UnitRef UnitRef;
 typedef struct UnitStatusMessageFormats UnitStatusMessageFormats;
 
 #include "condition.h"
-#include "failure-action.h"
+#include "emergency-action.h"
 #include "install.h"
 #include "list.h"
 #include "unit-name.h"
@@ -108,9 +108,13 @@ struct Unit {
         /* The slot used for watching NameOwnerChanged signals */
         sd_bus_slot *match_bus_slot;
 
+        /* References to this unit from clients */
+        sd_bus_track *bus_track;
+        char **deserialized_refs;
+
         /* Job timeout and action to take */
         usec_t job_timeout;
-        FailureAction job_timeout_action;
+        EmergencyAction job_timeout_action;
         char *job_timeout_reboot_arg;
 
         /* References to this */
@@ -174,18 +178,23 @@ struct Unit {
 
         /* Put a ratelimit on unit starting */
         RateLimit start_limit;
-        FailureAction start_limit_action;
+        EmergencyAction start_limit_action;
         char *reboot_arg;
 
         /* Make sure we never enter endless loops with the check unneeded logic, or the BindsTo= logic */
         RateLimit auto_stop_ratelimit;
 
+        /* Reference to a specific UID/GID */
+        uid_t ref_uid;
+        gid_t ref_gid;
+
         /* Cached unit file state and preset */
         UnitFileState unit_file_state;
         int unit_file_preset;
 
-        /* Where the cpuacct.usage cgroup counter was at the time the unit was started */
-        nsec_t cpuacct_usage_base;
+        /* Where the cpu.stat or cpuacct.usage was at the time the unit was started */
+        nsec_t cpu_usage_base;
+        nsec_t cpu_usage_last; /* the most recently read value */
 
         /* Counterparts in the cgroup filesystem */
         char *cgroup_path;
@@ -195,11 +204,13 @@ struct Unit {
         CGroupMask cgroup_members_mask;
         int cgroup_inotify_wd;
 
-        uint32_t cgroup_netclass_id;
-
         /* How to start OnFailure units */
         JobMode on_failure_job_mode;
 
+        /* The current invocation ID */
+        sd_id128_t invocation_id;
+        char invocation_id_string[SD_ID128_STRING_MAX]; /* useful when logging */
+
         /* Garbage collect us we nobody wants or requires us anymore */
         bool stop_when_unneeded;
 
@@ -225,6 +236,9 @@ struct Unit {
         /* Is this a transient unit? */
         bool transient;
 
+        /* Is this a unit that is always running and cannot be stopped? */
+        bool perpetual;
+
         bool in_load_queue:1;
         bool in_dbus_queue:1;
         bool in_cleanup_queue:1;
@@ -233,8 +247,6 @@ struct Unit {
 
         bool sent_dbus_new_signal:1;
 
-        bool no_gc:1;
-
         bool in_audit:1;
 
         bool cgroup_realized:1;
@@ -245,6 +257,9 @@ struct Unit {
 
         /* Did we already invoke unit_coldplug() for this unit? */
         bool coldplugged:1;
+
+        /* For transient units: whether to add a bus track reference after creating the unit */
+        bool bus_track_add:1;
 };
 
 struct UnitStatusMessageFormats {
@@ -358,7 +373,7 @@ struct UnitVTable {
 
         /* When the unit is not running and no job for it queued we
          * shall release its runtime resources */
-        void (*release_resources)(Unit *u);
+        void (*release_resources)(Unit *u, bool inactive);
 
         /* Invoked on every child that died */
         void (*sigchld_event)(Unit *u, pid_t pid, int code, int status);
@@ -373,8 +388,7 @@ struct UnitVTable {
         /* Called whenever a process of this unit sends us a message */
         void (*notify_message)(Unit *u, pid_t pid, char **tags, FDSet *fds);
 
-        /* Called whenever a name this Unit registered for comes or
-         * goes away. */
+        /* Called whenever a name this Unit registered for comes or goes away. */
         void (*bus_name_owner_change)(Unit *u, const char *name, const char *old_owner, const char *new_owner);
 
         /* Called for each property that is being set */
@@ -467,6 +481,7 @@ DEFINE_CAST(SCOPE, Scope);
 Unit *unit_new(Manager *m, size_t size);
 void unit_free(Unit *u);
 
+int unit_new_for_name(Manager *m, size_t size, const char *name, Unit **ret);
 int unit_add_name(Unit *u, const char *name);
 
 int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_reference);
@@ -511,6 +526,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix);
 
 bool unit_can_reload(Unit *u) _pure_;
 bool unit_can_start(Unit *u) _pure_;
+bool unit_can_stop(Unit *u) _pure_;
 bool unit_can_isolate(Unit *u) _pure_;
 
 int unit_start(Unit *u);
@@ -537,6 +553,7 @@ bool unit_job_is_applicable(Unit *u, JobType j);
 int set_unit_path(const char *p);
 
 char *unit_dbus_path(Unit *u);
+char *unit_dbus_path_invocation_id(Unit *u);
 
 int unit_load_related_unit(Unit *u, const char *type, Unit **_found);
 
@@ -623,12 +640,26 @@ int unit_fail_if_symlink(Unit *u, const char* where);
 
 int unit_start_limit_test(Unit *u);
 
+void unit_unref_uid(Unit *u, bool destroy_now);
+int unit_ref_uid(Unit *u, uid_t uid, bool clean_ipc);
+
+void unit_unref_gid(Unit *u, bool destroy_now);
+int unit_ref_gid(Unit *u, gid_t gid, bool clean_ipc);
+
+int unit_ref_uid_gid(Unit *u, uid_t uid, gid_t gid);
+void unit_unref_uid_gid(Unit *u, bool destroy_now);
+
+void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid);
+
+int unit_set_invocation_id(Unit *u, sd_id128_t id);
+int unit_acquire_invocation_id(Unit *u);
+
 /* Macros which append UNIT= or USER_UNIT= to the message */
 
 #define log_unit_full(unit, level, error, ...)                          \
         ({                                                              \
-                Unit *_u = (unit);                                      \
-                _u ? log_object_internal(level, error, __FILE__, __LINE__, __func__, _u->manager->unit_log_field, _u->id, ##__VA_ARGS__) : \
+                const Unit *_u = (unit);                                \
+                _u ? log_object_internal(level, error, __FILE__, __LINE__, __func__, _u->manager->unit_log_field, _u->id, _u->manager->invocation_log_field, _u->invocation_id_string, ##__VA_ARGS__) : \
                         log_internal(level, error, __FILE__, __LINE__, __func__, ##__VA_ARGS__); \
         })