From 66ebf6c0a1005f90099e25ece5630551ad97a3c3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@fb.com>
Date: Sun, 7 Aug 2016 09:45:39 -0400
Subject: core: add cgroup CPU controller support on the unified hierarchy

Unfortunately, due to the disagreements in the kernel development community,
CPU controller cgroup v2 support has not been merged and enabling it requires
applying two small out-of-tree kernel patches.  The situation is explained in
the following documentation.

 https://git.kernel.org/cgit/linux/kernel/git/tj/cgroup.git/tree/Documentation/cgroup-v2-cpu.txt?h=cgroup-v2-cpu

While it isn't clear what will happen with CPU controller cgroup v2 support,
there are critical features which are possible only on cgroup v2 such as
buffered write control making cgroup v2 essential for a lot of workloads.  This
commit implements systemd CPU controller support on the unified hierarchy so
that users who choose to deploy CPU controller cgroup v2 support can easily
take advantage of it.

On the unified hierarchy, "cpu.weight" knob replaces "cpu.shares" and "cpu.max"
replaces "cpu.cfs_period_us" and "cpu.cfs_quota_us".  [Startup]CPUWeight config
options are added with the usual compat translation.  CPU quota settings remain
unchanged and apply to both legacy and unified hierarchies.

v2: - Error in man page corrected.
    - CPU config application in cgroup_context_apply() refactored.
    - CPU accounting now works on unified hierarchy.
---
 src/core/cgroup.c                     | 203 +++++++++++++++++++++++++++-------
 src/core/cgroup.h                     |   5 +-
 src/core/dbus-cgroup.c                |  46 ++++++++
 src/core/load-fragment-gperf.gperf.m4 |   2 +
 src/core/load-fragment.c              |  29 +++++
 src/core/load-fragment.h              |   1 +
 src/core/unit.c                       |   6 +-
 src/core/unit.h                       |   4 +-
 8 files changed, 252 insertions(+), 44 deletions(-)

(limited to 'src/core')

diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index c19e43f571..910a64b4da 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -57,9 +57,12 @@ void cgroup_context_init(CGroupContext *c) {
         /* Initialize everything to the kernel defaults, assuming the
          * structure is preinitialized to 0 */
 
+        c->cpu_weight = CGROUP_WEIGHT_INVALID;
+        c->startup_cpu_weight = CGROUP_WEIGHT_INVALID;
+        c->cpu_quota_per_sec_usec = USEC_INFINITY;
+
         c->cpu_shares = CGROUP_CPU_SHARES_INVALID;
         c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID;
-        c->cpu_quota_per_sec_usec = USEC_INFINITY;
 
         c->memory_high = CGROUP_LIMIT_MAX;
         c->memory_max = CGROUP_LIMIT_MAX;
@@ -158,6 +161,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
                 "%sBlockIOAccounting=%s\n"
                 "%sMemoryAccounting=%s\n"
                 "%sTasksAccounting=%s\n"
+                "%sCPUWeight=%" PRIu64 "\n"
+                "%sStartupCPUWeight=%" PRIu64 "\n"
                 "%sCPUShares=%" PRIu64 "\n"
                 "%sStartupCPUShares=%" PRIu64 "\n"
                 "%sCPUQuotaPerSecSec=%s\n"
@@ -177,6 +182,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
                 prefix, yes_no(c->blockio_accounting),
                 prefix, yes_no(c->memory_accounting),
                 prefix, yes_no(c->tasks_accounting),
+                prefix, c->cpu_weight,
+                prefix, c->startup_cpu_weight,
                 prefix, c->cpu_shares,
                 prefix, c->startup_cpu_shares,
                 prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1),
@@ -382,6 +389,95 @@ fail:
         return -errno;
 }
 
+static bool cgroup_context_has_cpu_weight(CGroupContext *c) {
+        return c->cpu_weight != CGROUP_WEIGHT_INVALID ||
+                c->startup_cpu_weight != CGROUP_WEIGHT_INVALID;
+}
+
+static bool cgroup_context_has_cpu_shares(CGroupContext *c) {
+        return c->cpu_shares != CGROUP_CPU_SHARES_INVALID ||
+                c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID;
+}
+
+static uint64_t cgroup_context_cpu_weight(CGroupContext *c, ManagerState state) {
+        if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+            c->startup_cpu_weight != CGROUP_WEIGHT_INVALID)
+                return c->startup_cpu_weight;
+        else if (c->cpu_weight != CGROUP_WEIGHT_INVALID)
+                return c->cpu_weight;
+        else
+                return CGROUP_WEIGHT_DEFAULT;
+}
+
+static uint64_t cgroup_context_cpu_shares(CGroupContext *c, ManagerState state) {
+        if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+            c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID)
+                return c->startup_cpu_shares;
+        else if (c->cpu_shares != CGROUP_CPU_SHARES_INVALID)
+                return c->cpu_shares;
+        else
+                return CGROUP_CPU_SHARES_DEFAULT;
+}
+
+static void cgroup_apply_unified_cpu_config(Unit *u, uint64_t weight, uint64_t quota) {
+        char buf[MAX(DECIMAL_STR_MAX(uint64_t) + 1, (DECIMAL_STR_MAX(usec_t) + 1) * 2)];
+        int r;
+
+        xsprintf(buf, "%" PRIu64 "\n", weight);
+        r = cg_set_attribute("cpu", u->cgroup_path, "cpu.weight", buf);
+        if (r < 0)
+                log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+                              "Failed to set cpu.weight: %m");
+
+        if (quota != USEC_INFINITY)
+                xsprintf(buf, USEC_FMT " " USEC_FMT "\n",
+                         quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC, CGROUP_CPU_QUOTA_PERIOD_USEC);
+        else
+                xsprintf(buf, "max " USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
+
+        r = cg_set_attribute("cpu", u->cgroup_path, "cpu.max", buf);
+
+        if (r < 0)
+                log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+                              "Failed to set cpu.max: %m");
+}
+
+static void cgroup_apply_legacy_cpu_config(Unit *u, uint64_t shares, uint64_t quota) {
+        char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1];
+        int r;
+
+        xsprintf(buf, "%" PRIu64 "\n", shares);
+        r = cg_set_attribute("cpu", u->cgroup_path, "cpu.shares", buf);
+        if (r < 0)
+                log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+                              "Failed to set cpu.shares: %m");
+
+        xsprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
+        r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_period_us", buf);
+        if (r < 0)
+                log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+                              "Failed to set cpu.cfs_period_us: %m");
+
+        if (quota != USEC_INFINITY) {
+                xsprintf(buf, USEC_FMT "\n", quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
+                r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", buf);
+        } else
+                r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", "-1");
+        if (r < 0)
+                log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+                              "Failed to set cpu.cfs_quota_us: %m");
+}
+
+static uint64_t cgroup_cpu_shares_to_weight(uint64_t shares) {
+        return CLAMP(shares * CGROUP_WEIGHT_DEFAULT / CGROUP_CPU_SHARES_DEFAULT,
+                     CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX);
+}
+
+static uint64_t cgroup_cpu_weight_to_shares(uint64_t weight) {
+        return CLAMP(weight * CGROUP_CPU_SHARES_DEFAULT / CGROUP_WEIGHT_DEFAULT,
+                     CGROUP_CPU_SHARES_MIN, CGROUP_CPU_SHARES_MAX);
+}
+
 static bool cgroup_context_has_io_config(CGroupContext *c) {
         return c->io_accounting ||
                 c->io_weight != CGROUP_WEIGHT_INVALID ||
@@ -566,30 +662,42 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
          * and missing cgroups, i.e. EROFS and ENOENT. */
 
         if ((mask & CGROUP_MASK_CPU) && !is_root) {
-                char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1];
+                bool has_weight = cgroup_context_has_cpu_weight(c);
+                bool has_shares = cgroup_context_has_cpu_shares(c);
 
-                sprintf(buf, "%" PRIu64 "\n",
-                        IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->startup_cpu_shares :
-                        c->cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->cpu_shares : CGROUP_CPU_SHARES_DEFAULT);
-                r = cg_set_attribute("cpu", path, "cpu.shares", buf);
-                if (r < 0)
-                        log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
-                                      "Failed to set cpu.shares: %m");
+                if (cg_unified() > 0) {
+                        uint64_t weight;
 
-                sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
-                r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf);
-                if (r < 0)
-                        log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
-                                      "Failed to set cpu.cfs_period_us: %m");
+                        if (has_weight)
+                                weight = cgroup_context_cpu_weight(c, state);
+                        else if (has_shares) {
+                                uint64_t shares = cgroup_context_cpu_shares(c, state);
 
-                if (c->cpu_quota_per_sec_usec != USEC_INFINITY) {
-                        sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
-                        r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf);
-                } else
-                        r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1");
-                if (r < 0)
-                        log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
-                                      "Failed to set cpu.cfs_quota_us: %m");
+                                weight = cgroup_cpu_shares_to_weight(shares);
+
+                                log_cgroup_compat(u, "Applying [Startup]CpuShares %" PRIu64 " as [Startup]CpuWeight %" PRIu64 " on %s",
+                                                  shares, weight, path);
+                        } else
+                                weight = CGROUP_WEIGHT_DEFAULT;
+
+                        cgroup_apply_unified_cpu_config(u, weight, c->cpu_quota_per_sec_usec);
+                } else {
+                        uint64_t shares;
+
+                        if (has_shares)
+                                shares = cgroup_context_cpu_shares(c, state);
+                        else if (has_weight) {
+                                uint64_t weight = cgroup_context_cpu_weight(c, state);
+
+                                shares = cgroup_cpu_weight_to_shares(weight);
+
+                                log_cgroup_compat(u, "Applying [Startup]CpuWeight %" PRIu64 " as [Startup]CpuShares %" PRIu64 " on %s",
+                                                  weight, shares, path);
+                        } else
+                                shares = CGROUP_CPU_SHARES_DEFAULT;
+
+                        cgroup_apply_legacy_cpu_config(u, shares, c->cpu_quota_per_sec_usec);
+                }
         }
 
         if (mask & CGROUP_MASK_IO) {
@@ -864,8 +972,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) {
         /* Figure out which controllers we need */
 
         if (c->cpu_accounting ||
-            c->cpu_shares != CGROUP_CPU_SHARES_INVALID ||
-            c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ||
+            cgroup_context_has_cpu_weight(c) ||
+            cgroup_context_has_cpu_shares(c) ||
             c->cpu_quota_per_sec_usec != USEC_INFINITY)
                 mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU;
 
@@ -1890,18 +1998,37 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
         if (!u->cgroup_path)
                 return -ENODATA;
 
-        if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0)
-                return -ENODATA;
+        if (cg_unified() > 0) {
+                const char *keys[] = { "usage_usec", NULL };
+                _cleanup_free_ char *val = NULL;
+                uint64_t us;
 
-        r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v);
-        if (r == -ENOENT)
-                return -ENODATA;
-        if (r < 0)
-                return r;
+                if ((u->cgroup_realized_mask & CGROUP_MASK_CPU) == 0)
+                        return -ENODATA;
 
-        r = safe_atou64(v, &ns);
-        if (r < 0)
-                return r;
+                r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", keys, &val);
+                if (r < 0)
+                        return r;
+
+                r = safe_atou64(val, &us);
+                if (r < 0)
+                        return r;
+
+                ns = us * NSEC_PER_USEC;
+        } else {
+                if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0)
+                        return -ENODATA;
+
+                r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v);
+                if (r == -ENOENT)
+                        return -ENODATA;
+                if (r < 0)
+                        return r;
+
+                r = safe_atou64(v, &ns);
+                if (r < 0)
+                        return r;
+        }
 
         *ret = ns;
         return 0;
@@ -1915,8 +2042,8 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
         if (r < 0)
                 return r;
 
-        if (ns > u->cpuacct_usage_base)
-                ns -= u->cpuacct_usage_base;
+        if (ns > u->cpu_usage_base)
+                ns -= u->cpu_usage_base;
         else
                 ns = 0;
 
@@ -1932,11 +2059,11 @@ int unit_reset_cpu_usage(Unit *u) {
 
         r = unit_get_cpu_usage_raw(u, &ns);
         if (r < 0) {
-                u->cpuacct_usage_base = 0;
+                u->cpu_usage_base = 0;
                 return r;
         }
 
-        u->cpuacct_usage_base = ns;
+        u->cpu_usage_base = ns;
         return 0;
 }
 
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index a57403e79f..2fe9cc4039 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -89,6 +89,10 @@ struct CGroupContext {
         bool tasks_accounting;
 
         /* For unified hierarchy */
+        uint64_t cpu_weight;
+        uint64_t startup_cpu_weight;
+        usec_t cpu_quota_per_sec_usec;
+
         uint64_t io_weight;
         uint64_t startup_io_weight;
         LIST_HEAD(CGroupIODeviceWeight, io_device_weights);
@@ -101,7 +105,6 @@ struct CGroupContext {
         /* For legacy hierarchies */
         uint64_t cpu_shares;
         uint64_t startup_cpu_shares;
-        usec_t cpu_quota_per_sec_usec;
 
         uint64_t blockio_weight;
         uint64_t startup_blockio_weight;
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index 85b0c86a2f..2ca80d2996 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -210,6 +210,8 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
         SD_BUS_VTABLE_START(0),
         SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
         SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0),
+        SD_BUS_PROPERTY("CPUWeight", "t", NULL, offsetof(CGroupContext, cpu_weight), 0),
+        SD_BUS_PROPERTY("StartupCPUWeight", "t", NULL, offsetof(CGroupContext, startup_cpu_weight), 0),
         SD_BUS_PROPERTY("CPUShares", "t", NULL, offsetof(CGroupContext, cpu_shares), 0),
         SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0),
         SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0),
@@ -303,6 +305,50 @@ int bus_cgroup_set_property(
 
                 return 1;
 
+        } else if (streq(name, "CPUWeight")) {
+                uint64_t weight;
+
+                r = sd_bus_message_read(message, "t", &weight);
+                if (r < 0)
+                        return r;
+
+                if (!CGROUP_WEIGHT_IS_OK(weight))
+                        return sd_bus_error_set_errnof(error, EINVAL, "CPUWeight value out of range");
+
+                if (mode != UNIT_CHECK) {
+                        c->cpu_weight = weight;
+                        unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
+
+                        if (weight == CGROUP_WEIGHT_INVALID)
+                                unit_write_drop_in_private(u, mode, name, "CPUWeight=");
+                        else
+                                unit_write_drop_in_private_format(u, mode, name, "CPUWeight=%" PRIu64, weight);
+                }
+
+                return 1;
+
+        } else if (streq(name, "StartupCPUWeight")) {
+                uint64_t weight;
+
+                r = sd_bus_message_read(message, "t", &weight);
+                if (r < 0)
+                        return r;
+
+                if (!CGROUP_WEIGHT_IS_OK(weight))
+                        return sd_bus_error_set_errnof(error, EINVAL, "StartupCPUWeight value out of range");
+
+                if (mode != UNIT_CHECK) {
+                        c->startup_cpu_weight = weight;
+                        unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
+
+                        if (weight == CGROUP_CPU_SHARES_INVALID)
+                                unit_write_drop_in_private(u, mode, name, "StartupCPUWeight=");
+                        else
+                                unit_write_drop_in_private_format(u, mode, name, "StartupCPUWeight=%" PRIu64, weight);
+                }
+
+                return 1;
+
         } else if (streq(name, "CPUShares")) {
                 uint64_t shares;
 
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 396f847213..edcfa6fa1b 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -121,6 +121,8 @@ $1.KillSignal,                   config_parse_signal,                0,
 m4_define(`CGROUP_CONTEXT_CONFIG_ITEMS',
 `$1.Slice,                       config_parse_unit_slice,            0,                             0
 $1.CPUAccounting,                config_parse_bool,                  0,                             offsetof($1, cgroup_context.cpu_accounting)
+$1.CPUWeight,                    config_parse_cpu_weight,            0,                             offsetof($1, cgroup_context.cpu_weight)
+$1.StartupCPUWeight,             config_parse_cpu_weight,            0,                             offsetof($1, cgroup_context.startup_cpu_weight)
 $1.CPUShares,                    config_parse_cpu_shares,            0,                             offsetof($1, cgroup_context.cpu_shares)
 $1.StartupCPUShares,             config_parse_cpu_shares,            0,                             offsetof($1, cgroup_context.startup_cpu_shares)
 $1.CPUQuota,                     config_parse_cpu_quota,             0,                             offsetof($1, cgroup_context)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 420f368689..4ad6c4d79b 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -2851,6 +2851,34 @@ int config_parse_unit_slice(
 
 DEFINE_CONFIG_PARSE_ENUM(config_parse_device_policy, cgroup_device_policy, CGroupDevicePolicy, "Failed to parse device policy");
 
+int config_parse_cpu_weight(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        uint64_t *weight = data;
+        int r;
+
+        assert(filename);
+        assert(lvalue);
+        assert(rvalue);
+
+        r = cg_weight_parse(rvalue, weight);
+        if (r < 0) {
+                log_syntax(unit, LOG_ERR, filename, line, r, "CPU weight '%s' invalid. Ignoring.", rvalue);
+                return 0;
+        }
+
+        return 0;
+}
+
 int config_parse_cpu_shares(
                 const char *unit,
                 const char *filename,
@@ -4191,6 +4219,7 @@ void unit_dump_config_items(FILE *f) {
                 { config_parse_address_families,      "FAMILIES" },
 #endif
                 { config_parse_cpu_shares,            "SHARES" },
+                { config_parse_cpu_weight,            "WEIGHT" },
                 { config_parse_memory_limit,          "LIMIT" },
                 { config_parse_device_allow,          "DEVICE" },
                 { config_parse_device_policy,         "POLICY" },
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index 213bce55a7..8b688740cf 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -81,6 +81,7 @@ int config_parse_syscall_errno(const char *unit, const char *filename, unsigned
 int config_parse_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_pass_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_unit_slice(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_cpu_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_cpu_shares(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_memory_limit(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_tasks_max(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
diff --git a/src/core/unit.c b/src/core/unit.c
index ff7c562fba..952604e0db 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -2608,7 +2608,7 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
                 unit_serialize_item(u, f, "assert-result", yes_no(u->assert_result));
 
         unit_serialize_item(u, f, "transient", yes_no(u->transient));
-        unit_serialize_item_format(u, f, "cpuacct-usage-base", "%" PRIu64, u->cpuacct_usage_base);
+        unit_serialize_item_format(u, f, "cpu-usage-base", "%" PRIu64, u->cpu_usage_base);
 
         if (u->cgroup_path)
                 unit_serialize_item(u, f, "cgroup", u->cgroup_path);
@@ -2824,9 +2824,9 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
 
                         continue;
 
-                } else if (streq(l, "cpuacct-usage-base")) {
+                } else if (streq(l, "cpu-usage-base") || streq(l, "cpuacct-usage-base")) {
 
-                        r = safe_atou64(v, &u->cpuacct_usage_base);
+                        r = safe_atou64(v, &u->cpu_usage_base);
                         if (r < 0)
                                 log_unit_debug(u, "Failed to parse CPU usage %s, ignoring.", v);
 
diff --git a/src/core/unit.h b/src/core/unit.h
index 47eb8d50a6..a6c69938dd 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -184,8 +184,8 @@ struct Unit {
         UnitFileState unit_file_state;
         int unit_file_preset;
 
-        /* Where the cpuacct.usage cgroup counter was at the time the unit was started */
-        nsec_t cpuacct_usage_base;
+        /* Where the cpu.stat or cpuacct.usage was at the time the unit was started */
+        nsec_t cpu_usage_base;
 
         /* Counterparts in the cgroup filesystem */
         char *cgroup_path;
-- 
cgit v1.2.3-54-g00ecf


From ca2f6384aa2076576b316c7253964be90b102d96 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@fb.com>
Date: Mon, 15 Aug 2016 18:13:36 -0400
Subject: core: rename cg_unified() to cg_all_unified()

A following patch will update cgroup handling so that the systemd controller
(/sys/fs/cgroup/systemd) can use the unified hierarchy even if the kernel
resource controllers are on the legacy hierarchies.  This would require
distinguishing whether all controllers are on cgroup v2 or only the systemd
controller is.  In preparation, this patch renames cg_unified() to
cg_all_unified().

This patch doesn't cause any functional changes.
---
 src/basic/cgroup-util.c                | 30 +++++++++++++++---------------
 src/basic/cgroup-util.h                |  2 +-
 src/cgls/cgls.c                        |  2 +-
 src/cgtop/cgtop.c                      | 10 +++++-----
 src/core/cgroup.c                      | 16 ++++++++--------
 src/core/manager.c                     |  2 +-
 src/core/scope.c                       |  2 +-
 src/core/service.c                     |  2 +-
 src/core/unit.c                        |  2 +-
 src/libsystemd/sd-bus/test-bus-creds.c |  2 +-
 src/nspawn/nspawn-cgroup.c             |  4 ++--
 src/nspawn/nspawn-mount.c              |  4 ++--
 src/nspawn/nspawn.c                    |  2 +-
 13 files changed, 40 insertions(+), 40 deletions(-)

(limited to 'src/core')

diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c
index 25ef8a5c76..5473fec0dd 100644
--- a/src/basic/cgroup-util.c
+++ b/src/basic/cgroup-util.c
@@ -623,7 +623,7 @@ int cg_get_path(const char *controller, const char *path, const char *suffix, ch
         if (!cg_controller_is_valid(controller))
                 return -EINVAL;
 
-        unified = cg_unified();
+        unified = cg_all_unified();
         if (unified < 0)
                 return unified;
 
@@ -651,7 +651,7 @@ static int controller_is_accessible(const char *controller) {
         if (!cg_controller_is_valid(controller))
                 return -EINVAL;
 
-        unified = cg_unified();
+        unified = cg_all_unified();
         if (unified < 0)
                 return unified;
         if (unified > 0) {
@@ -869,7 +869,7 @@ int cg_set_task_access(
         if (r < 0)
                 return r;
 
-        unified = cg_unified();
+        unified = cg_all_unified();
         if (unified < 0)
                 return unified;
         if (unified)
@@ -893,7 +893,7 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
         assert(path);
         assert(pid >= 0);
 
-        unified = cg_unified();
+        unified = cg_all_unified();
         if (unified < 0)
                 return unified;
         if (unified == 0) {
@@ -969,7 +969,7 @@ int cg_install_release_agent(const char *controller, const char *agent) {
 
         assert(agent);
 
-        unified = cg_unified();
+        unified = cg_all_unified();
         if (unified < 0)
                 return unified;
         if (unified) /* doesn't apply to unified hierarchy */
@@ -1020,7 +1020,7 @@ int cg_uninstall_release_agent(const char *controller) {
         _cleanup_free_ char *fs = NULL;
         int r, unified;
 
-        unified = cg_unified();
+        unified = cg_all_unified();
         if (unified < 0)
                 return unified;
         if (unified) /* Doesn't apply to unified hierarchy */
@@ -1076,7 +1076,7 @@ int cg_is_empty_recursive(const char *controller, const char *path) {
         if (controller && (isempty(path) || path_equal(path, "/")))
                 return false;
 
-        unified = cg_unified();
+        unified = cg_all_unified();
         if (unified < 0)
                 return unified;
 
@@ -1962,7 +1962,7 @@ int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path
                 return r;
 
         /* If we are in the unified hierarchy, we are done now */
-        unified = cg_unified();
+        unified = cg_all_unified();
         if (unified < 0)
                 return unified;
         if (unified > 0)
@@ -1992,7 +1992,7 @@ int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_m
         if (r < 0)
                 return r;
 
-        unified = cg_unified();
+        unified = cg_all_unified();
         if (unified < 0)
                 return unified;
         if (unified > 0)
@@ -2044,7 +2044,7 @@ int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to
                         return r;
         }
 
-        unified = cg_unified();
+        unified = cg_all_unified();
         if (unified < 0)
                 return unified;
         if (unified > 0)
@@ -2077,7 +2077,7 @@ int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root)
         if (r < 0)
                 return r;
 
-        unified = cg_unified();
+        unified = cg_all_unified();
         if (unified < 0)
                 return unified;
         if (unified > 0)
@@ -2103,7 +2103,7 @@ int cg_mask_supported(CGroupMask *ret) {
          * includes controllers we can make sense of and that are
          * actually accessible. */
 
-        unified = cg_unified();
+        unified = cg_all_unified();
         if (unified < 0)
                 return unified;
         if (unified > 0) {
@@ -2226,7 +2226,7 @@ int cg_kernel_controllers(Set *controllers) {
 
 static thread_local int unified_cache = -1;
 
-int cg_unified(void) {
+int cg_all_unified(void) {
         struct statfs fs;
 
         /* Checks if we support the unified hierarchy. Returns an
@@ -2264,7 +2264,7 @@ int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
         if (supported == 0)
                 return 0;
 
-        unified = cg_unified();
+        unified = cg_all_unified();
         if (unified < 0)
                 return unified;
         if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
@@ -2303,7 +2303,7 @@ bool cg_is_unified_wanted(void) {
 
         /* If the hierarchy is already mounted, then follow whatever
          * was chosen for it. */
-        unified = cg_unified();
+        unified = cg_all_unified();
         if (unified >= 0)
                 return unified;
 
diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h
index a509a1775c..5fa856affc 100644
--- a/src/basic/cgroup-util.h
+++ b/src/basic/cgroup-util.h
@@ -225,7 +225,7 @@ int cg_kernel_controllers(Set *controllers);
 
 bool cg_ns_supported(void);
 
-int cg_unified(void);
+int cg_all_unified(void);
 void cg_unified_flush(void);
 
 bool cg_is_unified_wanted(void);
diff --git a/src/cgls/cgls.c b/src/cgls/cgls.c
index dcb5912b83..adf488e8e1 100644
--- a/src/cgls/cgls.c
+++ b/src/cgls/cgls.c
@@ -166,7 +166,7 @@ static int get_cgroup_root(char **ret) {
 
 static void show_cg_info(const char *controller, const char *path) {
 
-        if (cg_unified() <= 0 && controller && !streq(controller, SYSTEMD_CGROUP_CONTROLLER))
+        if (cg_all_unified() <= 0 && controller && !streq(controller, SYSTEMD_CGROUP_CONTROLLER))
                 printf("Controller %s; ", controller);
 
         printf("Control group %s:\n", isempty(path) ? "/" : path);
diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c
index 6045ae0437..aba17c9829 100644
--- a/src/cgtop/cgtop.c
+++ b/src/cgtop/cgtop.c
@@ -213,7 +213,7 @@ static int process(
                 uint64_t new_usage;
                 nsec_t timestamp;
 
-                if (cg_unified() > 0) {
+                if (cg_all_unified() > 0) {
                         const char *keys[] = { "usage_usec", NULL };
                         _cleanup_free_ char *val = NULL;
 
@@ -273,7 +273,7 @@ static int process(
         } else if (streq(controller, "memory")) {
                 _cleanup_free_ char *p = NULL, *v = NULL;
 
-                if (cg_unified() <= 0)
+                if (cg_all_unified() <= 0)
                         r = cg_get_path(controller, path, "memory.usage_in_bytes", &p);
                 else
                         r = cg_get_path(controller, path, "memory.current", &p);
@@ -293,11 +293,11 @@ static int process(
                 if (g->memory > 0)
                         g->memory_valid = true;
 
-        } else if ((streq(controller, "io") && cg_unified() > 0) ||
-                   (streq(controller, "blkio") && cg_unified() <= 0)) {
+        } else if ((streq(controller, "io") && cg_all_unified() > 0) ||
+                   (streq(controller, "blkio") && cg_all_unified() <= 0)) {
                 _cleanup_fclose_ FILE *f = NULL;
                 _cleanup_free_ char *p = NULL;
-                bool unified = cg_unified() > 0;
+                bool unified = cg_all_unified() > 0;
                 uint64_t wr = 0, rd = 0;
                 nsec_t timestamp;
 
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 910a64b4da..7c6fc01c0e 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -665,7 +665,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                 bool has_weight = cgroup_context_has_cpu_weight(c);
                 bool has_shares = cgroup_context_has_cpu_shares(c);
 
-                if (cg_unified() > 0) {
+                if (cg_all_unified() > 0) {
                         uint64_t weight;
 
                         if (has_weight)
@@ -846,7 +846,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
         }
 
         if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
-                if (cg_unified() > 0) {
+                if (cg_all_unified() > 0) {
                         uint64_t max = c->memory_max;
 
                         if (cgroup_context_has_unified_memory_config(c))
@@ -1019,7 +1019,7 @@ CGroupMask unit_get_own_mask(Unit *u) {
                 e = unit_get_exec_context(u);
                 if (!e ||
                     exec_context_maintains_privileges(e) ||
-                    cg_unified() > 0)
+                    cg_all_unified() > 0)
                         return _CGROUP_MASK_ALL;
         }
 
@@ -1245,7 +1245,7 @@ int unit_watch_cgroup(Unit *u) {
                 return 0;
 
         /* Only applies to the unified hierarchy */
-        r = cg_unified();
+        r = cg_all_unified();
         if (r < 0)
                 return log_unit_error_errno(u, r, "Failed detect whether the unified hierarchy is used: %m");
         if (r == 0)
@@ -1645,7 +1645,7 @@ int unit_watch_all_pids(Unit *u) {
         if (!u->cgroup_path)
                 return -ENOENT;
 
-        if (cg_unified() > 0) /* On unified we can use proper notifications */
+        if (cg_all_unified() > 0) /* On unified we can use proper notifications */
                 return 0;
 
         return unit_watch_pids_in_path(u, u->cgroup_path);
@@ -1755,7 +1755,7 @@ int manager_setup_cgroup(Manager *m) {
         if (r < 0)
                 return log_error_errno(r, "Cannot find cgroup mount point: %m");
 
-        unified = cg_unified();
+        unified = cg_all_unified();
         if (unified < 0)
                 return log_error_errno(r, "Couldn't determine if we are running in the unified hierarchy: %m");
         if (unified > 0)
@@ -1953,7 +1953,7 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) {
         if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0)
                 return -ENODATA;
 
-        if (cg_unified() <= 0)
+        if (cg_all_unified() <= 0)
                 r = cg_get_attribute("memory", u->cgroup_path, "memory.usage_in_bytes", &v);
         else
                 r = cg_get_attribute("memory", u->cgroup_path, "memory.current", &v);
@@ -1998,7 +1998,7 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
         if (!u->cgroup_path)
                 return -ENODATA;
 
-        if (cg_unified() > 0) {
+        if (cg_all_unified() > 0) {
                 const char *keys[] = { "usage_usec", NULL };
                 _cleanup_free_ char *val = NULL;
                 uint64_t us;
diff --git a/src/core/manager.c b/src/core/manager.c
index c20e185d78..108591c464 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -766,7 +766,7 @@ static int manager_setup_cgroups_agent(Manager *m) {
         if (!MANAGER_IS_SYSTEM(m))
                 return 0;
 
-        if (cg_unified() > 0) /* We don't need this anymore on the unified hierarchy */
+        if (cg_all_unified() > 0) /* We don't need this anymore on the unified hierarchy */
                 return 0;
 
         if (m->cgroups_agent_fd < 0) {
diff --git a/src/core/scope.c b/src/core/scope.c
index b278aed3d6..7c72bb7091 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -441,7 +441,7 @@ static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 
         /* If the PID set is empty now, then let's finish this off
            (On unified we use proper notifications) */
-        if (cg_unified() <= 0 && set_isempty(u->pids))
+        if (cg_all_unified() <= 0 && set_isempty(u->pids))
                 scope_notify_cgroup_empty_event(u);
 }
 
diff --git a/src/core/service.c b/src/core/service.c
index 4a37702f52..29e53867bf 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -2866,7 +2866,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 
         /* If the PID set is empty now, then let's finish this off
            (On unified we use proper notifications) */
-        if (cg_unified() <= 0 && set_isempty(u->pids))
+        if (cg_all_unified() <= 0 && set_isempty(u->pids))
                 service_notify_cgroup_empty_event(u);
 }
 
diff --git a/src/core/unit.c b/src/core/unit.c
index 952604e0db..b24c32569c 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -3695,7 +3695,7 @@ int unit_kill_context(
                          * there we get proper events. Hence rely on
                          * them.*/
 
-                        if  (cg_unified() > 0 ||
+                        if  (cg_all_unified() > 0 ||
                              (detect_container() == 0 && !unit_cgroup_delegate(u)))
                                 wait_for_exit = true;
 
diff --git a/src/libsystemd/sd-bus/test-bus-creds.c b/src/libsystemd/sd-bus/test-bus-creds.c
index e9ef483bdd..82237af115 100644
--- a/src/libsystemd/sd-bus/test-bus-creds.c
+++ b/src/libsystemd/sd-bus/test-bus-creds.c
@@ -27,7 +27,7 @@ int main(int argc, char *argv[]) {
         _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
         int r;
 
-        if (cg_unified() == -ENOMEDIUM) {
+        if (cg_all_unified() == -ENOMEDIUM) {
                 puts("Skipping test: /sys/fs/cgroup/ not available");
                 return EXIT_TEST_SKIP;
         }
diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c
index b1580236c9..ea3cab513c 100644
--- a/src/nspawn/nspawn-cgroup.c
+++ b/src/nspawn/nspawn-cgroup.c
@@ -70,7 +70,7 @@ int sync_cgroup(pid_t pid, bool unified_requested) {
         const char *fn;
         int unified, r;
 
-        unified = cg_unified();
+        unified = cg_all_unified();
         if (unified < 0)
                 return log_error_errno(unified, "Failed to determine whether the unified hierarchy is used: %m");
 
@@ -132,7 +132,7 @@ int create_subcgroup(pid_t pid, bool unified_requested) {
         if (!unified_requested)
                 return 0;
 
-        unified = cg_unified();
+        unified = cg_all_unified();
         if (unified < 0)
                 return log_error_errno(unified, "Failed to determine whether the unified hierarchy is used: %m");
         if (unified == 0)
diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c
index 803caef3dd..b5d83d481a 100644
--- a/src/nspawn/nspawn-mount.c
+++ b/src/nspawn/nspawn-mount.c
@@ -721,7 +721,7 @@ static int mount_legacy_cgns_supported(
                         return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m");
         }
 
-        if (cg_unified() > 0)
+        if (cg_all_unified() > 0)
                 goto skip_controllers;
 
         controllers = set_new(&string_hash_ops);
@@ -813,7 +813,7 @@ static int mount_legacy_cgns_unsupported(
                         return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m");
         }
 
-        if (cg_unified() > 0)
+        if (cg_all_unified() > 0)
                 goto skip_controllers;
 
         controllers = set_new(&string_hash_ops);
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index fcf14bba4c..429b6ddc4f 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -332,7 +332,7 @@ static int detect_unified_cgroup_hierarchy(void) {
         }
 
         /* Otherwise inherit the default from the host system */
-        r = cg_unified();
+        r = cg_all_unified();
         if (r < 0)
                 return log_error_errno(r, "Failed to determine whether the unified cgroups hierarchy is used: %m");
 
-- 
cgit v1.2.3-54-g00ecf


From 5da38d0768bf53f611ccdd47d7d941e1c560e44e Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@fb.com>
Date: Mon, 15 Aug 2016 18:13:36 -0400
Subject: core: use the unified hierarchy for the systemd cgroup controller
 hierarchy

Currently, systemd uses either the legacy hierarchies or the unified hierarchy.
When the legacy hierarchies are used, systemd uses a named legacy hierarchy
mounted on /sys/fs/cgroup/systemd without any kernel controllers for process
management.  Due to the shortcomings in the legacy hierarchy, this involves a
lot of workarounds and complexities.

Because the unified hierarchy can be mounted and used in parallel to legacy
hierarchies, there's no reason for systemd to use a legacy hierarchy for
management even if the kernel resource controllers need to be mounted on legacy
hierarchies.  It can simply mount the unified hierarchy under
/sys/fs/cgroup/systemd and use it without affecting other legacy hierarchies.
This disables a significant amount of fragile workaround logics and would allow
using features which depend on the unified hierarchy membership such bpf cgroup
v2 membership test.  In time, this would also allow deleting the said
complexities.

This patch updates systemd so that it prefers the unified hierarchy for the
systemd cgroup controller hierarchy when legacy hierarchies are used for kernel
resource controllers.

* cg_unified(@controller) is introduced which tests whether the specific
  controller in on unified hierarchy and used to choose the unified hierarchy
  code path for process and service management when available.  Kernel
  controller specific operations remain gated by cg_all_unified().

* "systemd.legacy_systemd_cgroup_controller" kernel argument can be used to
  force the use of legacy hierarchy for systemd cgroup controller.

* nspawn: By default nspawn uses the same hierarchies as the host.  If
  UNIFIED_CGROUP_HIERARCHY is set to 1, unified hierarchy is used for all.  If
  0, legacy for all.

* nspawn: arg_unified_cgroup_hierarchy is made an enum and now encodes one of
  three options - legacy, only systemd controller on unified, and unified.  The
  value is passed into mount setup functions and controls cgroup configuration.

* nspawn: Interpretation of SYSTEMD_CGROUP_CONTROLLER to the actual mount
  option is moved to mount_legacy_cgroup_hierarchy() so that it can take an
  appropriate action depending on the configuration of the host.

v2: - CGroupUnified enum replaces open coded integer values to indicate the
      cgroup operation mode.
    - Various style updates.

v3: Fixed a bug in detect_unified_cgroup_hierarchy() introduced during v2.

v4: Restored legacy container on unified host support and fixed another bug in
    detect_unified_cgroup_hierarchy().
---
 src/basic/cgroup-util.c    | 113 ++++++++++++++++++++++++++++++++++++---------
 src/basic/cgroup-util.h    |  10 ++++
 src/core/cgroup.c          |  24 ++++++----
 src/core/manager.c         |   2 +-
 src/core/mount-setup.c     |   6 ++-
 src/core/scope.c           |   2 +-
 src/core/service.c         |   2 +-
 src/core/unit.c            |   2 +-
 src/nspawn/nspawn-cgroup.c |  13 +++---
 src/nspawn/nspawn-cgroup.h |   6 ++-
 src/nspawn/nspawn-mount.c  |  42 ++++++++++-------
 src/nspawn/nspawn-mount.h  |   6 ++-
 src/nspawn/nspawn.c        |  26 ++++++++---
 13 files changed, 181 insertions(+), 73 deletions(-)

(limited to 'src/core')

diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c
index 5473fec0dd..1ef1de0604 100644
--- a/src/basic/cgroup-util.c
+++ b/src/basic/cgroup-util.c
@@ -869,7 +869,7 @@ int cg_set_task_access(
         if (r < 0)
                 return r;
 
-        unified = cg_all_unified();
+        unified = cg_unified(controller);
         if (unified < 0)
                 return unified;
         if (unified)
@@ -893,18 +893,17 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
         assert(path);
         assert(pid >= 0);
 
-        unified = cg_all_unified();
+        if (controller) {
+                if (!cg_controller_is_valid(controller))
+                        return -EINVAL;
+        } else
+                controller = SYSTEMD_CGROUP_CONTROLLER;
+
+        unified = cg_unified(controller);
         if (unified < 0)
                 return unified;
-        if (unified == 0) {
-                if (controller) {
-                        if (!cg_controller_is_valid(controller))
-                                return -EINVAL;
-                } else
-                        controller = SYSTEMD_CGROUP_CONTROLLER;
-
+        if (unified == 0)
                 cs = strlen(controller);
-        }
 
         fs = procfs_file_alloca(pid, "cgroup");
         f = fopen(fs, "re");
@@ -969,7 +968,7 @@ int cg_install_release_agent(const char *controller, const char *agent) {
 
         assert(agent);
 
-        unified = cg_all_unified();
+        unified = cg_unified(controller);
         if (unified < 0)
                 return unified;
         if (unified) /* doesn't apply to unified hierarchy */
@@ -1020,7 +1019,7 @@ int cg_uninstall_release_agent(const char *controller) {
         _cleanup_free_ char *fs = NULL;
         int r, unified;
 
-        unified = cg_all_unified();
+        unified = cg_unified(controller);
         if (unified < 0)
                 return unified;
         if (unified) /* Doesn't apply to unified hierarchy */
@@ -1076,7 +1075,7 @@ int cg_is_empty_recursive(const char *controller, const char *path) {
         if (controller && (isempty(path) || path_equal(path, "/")))
                 return false;
 
-        unified = cg_all_unified();
+        unified = cg_unified(controller);
         if (unified < 0)
                 return unified;
 
@@ -2224,9 +2223,10 @@ int cg_kernel_controllers(Set *controllers) {
         return 0;
 }
 
-static thread_local int unified_cache = -1;
+static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
+
+static int cg_update_unified(void) {
 
-int cg_all_unified(void) {
         struct statfs fs;
 
         /* Checks if we support the unified hierarchy. Returns an
@@ -2234,24 +2234,47 @@ int cg_all_unified(void) {
          * have any other trouble determining if the unified hierarchy
          * is supported. */
 
-        if (unified_cache >= 0)
-                return unified_cache;
+        if (unified_cache >= CGROUP_UNIFIED_NONE)
+                return 0;
 
         if (statfs("/sys/fs/cgroup/", &fs) < 0)
                 return -errno;
 
         if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC))
-                unified_cache = true;
-        else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
-                unified_cache = false;
-        else
+                unified_cache = CGROUP_UNIFIED_ALL;
+        else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
+                if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
+                        return -errno;
+
+                unified_cache = F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC) ?
+                        CGROUP_UNIFIED_SYSTEMD : CGROUP_UNIFIED_NONE;
+        } else
                 return -ENOMEDIUM;
 
-        return unified_cache;
+        return 0;
+}
+
+int cg_unified(const char *controller) {
+
+        int r;
+
+        r = cg_update_unified();
+        if (r < 0)
+                return r;
+
+        if (streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER))
+                return unified_cache >= CGROUP_UNIFIED_SYSTEMD;
+        else
+                return unified_cache >= CGROUP_UNIFIED_ALL;
+}
+
+int cg_all_unified(void) {
+
+        return cg_unified(NULL);
 }
 
 void cg_unified_flush(void) {
-        unified_cache = -1;
+        unified_cache = CGROUP_UNIFIED_UNKNOWN;
 }
 
 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
@@ -2333,6 +2356,50 @@ bool cg_is_legacy_wanted(void) {
         return !cg_is_unified_wanted();
 }
 
+bool cg_is_unified_systemd_controller_wanted(void) {
+        static thread_local int wanted = -1;
+        int r, unified;
+
+        /* If the unified hierarchy is requested in full, no need to
+         * bother with this. */
+        if (cg_is_unified_wanted())
+                return 0;
+
+        /* If the hierarchy is already mounted, then follow whatever
+         * was chosen for it. */
+        unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
+        if (unified >= 0)
+                return unified;
+
+        /* Otherwise, let's see what the kernel command line has to
+         * say. Since checking that is expensive, let's cache the
+         * result. */
+        if (wanted >= 0)
+                return wanted;
+
+        r = get_proc_cmdline_key("systemd.legacy_systemd_cgroup_controller", NULL);
+        if (r > 0) {
+                wanted = false;
+        } else {
+                _cleanup_free_ char *value = NULL;
+
+                r = get_proc_cmdline_key("systemd.legacy_systemd_cgroup_controller=", &value);
+                if (r < 0)
+                        return true;
+
+                if (r == 0)
+                        wanted = true;
+                else
+                        wanted = parse_boolean(value) <= 0;
+        }
+
+        return wanted;
+}
+
+bool cg_is_legacy_systemd_controller_wanted(void) {
+        return cg_is_legacy_wanted() && !cg_is_unified_systemd_controller_wanted();
+}
+
 int cg_weight_parse(const char *s, uint64_t *ret) {
         uint64_t u;
         int r;
diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h
index 5fa856affc..5d9bee50f5 100644
--- a/src/basic/cgroup-util.h
+++ b/src/basic/cgroup-util.h
@@ -112,6 +112,13 @@ static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x) {
             (x >= CGROUP_BLKIO_WEIGHT_MIN && x <= CGROUP_BLKIO_WEIGHT_MAX);
 }
 
+typedef enum CGroupUnified {
+        CGROUP_UNIFIED_UNKNOWN = -1,
+        CGROUP_UNIFIED_NONE = 0,        /* Both systemd and controllers on legacy */
+        CGROUP_UNIFIED_SYSTEMD = 1,     /* Only systemd on unified */
+        CGROUP_UNIFIED_ALL = 2,         /* Both systemd and controllers on unified */
+} CGroupUnified;
+
 /*
  * General rules:
  *
@@ -226,10 +233,13 @@ int cg_kernel_controllers(Set *controllers);
 bool cg_ns_supported(void);
 
 int cg_all_unified(void);
+int cg_unified(const char *controller);
 void cg_unified_flush(void);
 
 bool cg_is_unified_wanted(void);
 bool cg_is_legacy_wanted(void);
+bool cg_is_unified_systemd_controller_wanted(void);
+bool cg_is_legacy_systemd_controller_wanted(void);
 
 const char* cgroup_controller_to_string(CGroupController c) _const_;
 CGroupController cgroup_controller_from_string(const char *s) _pure_;
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 7c6fc01c0e..4b73a9ac13 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -1245,7 +1245,7 @@ int unit_watch_cgroup(Unit *u) {
                 return 0;
 
         /* Only applies to the unified hierarchy */
-        r = cg_all_unified();
+        r = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
         if (r < 0)
                 return log_unit_error_errno(u, r, "Failed detect whether the unified hierarchy is used: %m");
         if (r == 0)
@@ -1645,7 +1645,7 @@ int unit_watch_all_pids(Unit *u) {
         if (!u->cgroup_path)
                 return -ENOENT;
 
-        if (cg_all_unified() > 0) /* On unified we can use proper notifications */
+        if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0) /* On unified we can use proper notifications */
                 return 0;
 
         return unit_watch_pids_in_path(u, u->cgroup_path);
@@ -1718,7 +1718,7 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents,
 int manager_setup_cgroup(Manager *m) {
         _cleanup_free_ char *path = NULL;
         CGroupController c;
-        int r, unified;
+        int r, all_unified, systemd_unified;
         char *e;
 
         assert(m);
@@ -1755,11 +1755,17 @@ int manager_setup_cgroup(Manager *m) {
         if (r < 0)
                 return log_error_errno(r, "Cannot find cgroup mount point: %m");
 
-        unified = cg_all_unified();
-        if (unified < 0)
-                return log_error_errno(r, "Couldn't determine if we are running in the unified hierarchy: %m");
-        if (unified > 0)
+        all_unified = cg_all_unified();
+        systemd_unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
+
+        if (all_unified < 0 || systemd_unified < 0)
+                return log_error_errno(all_unified < 0 ? all_unified : systemd_unified,
+                                       "Couldn't determine if we are running in the unified hierarchy: %m");
+
+        if (all_unified > 0)
                 log_debug("Unified cgroup hierarchy is located at %s.", path);
+        else if (systemd_unified > 0)
+                log_debug("Unified cgroup hierarchy is located at %s. Controllers are on legacy hierarchies.", path);
         else
                 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
 
@@ -1767,7 +1773,7 @@ int manager_setup_cgroup(Manager *m) {
                 const char *scope_path;
 
                 /* 3. Install agent */
-                if (unified) {
+                if (systemd_unified) {
 
                         /* In the unified hierarchy we can get
                          * cgroup empty notifications via inotify. */
@@ -1827,7 +1833,7 @@ int manager_setup_cgroup(Manager *m) {
                         return log_error_errno(errno, "Failed to open pin file: %m");
 
                 /* 6.  Always enable hierarchical support if it exists... */
-                if (!unified)
+                if (!all_unified)
                         (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
         }
 
diff --git a/src/core/manager.c b/src/core/manager.c
index 108591c464..5c58e64a1b 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -766,7 +766,7 @@ static int manager_setup_cgroups_agent(Manager *m) {
         if (!MANAGER_IS_SYSTEM(m))
                 return 0;
 
-        if (cg_all_unified() > 0) /* We don't need this anymore on the unified hierarchy */
+        if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0) /* We don't need this anymore on the unified hierarchy */
                 return 0;
 
         if (m->cgroups_agent_fd < 0) {
diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c
index 5d8ab0ec70..ca63a93e8b 100644
--- a/src/core/mount-setup.c
+++ b/src/core/mount-setup.c
@@ -99,10 +99,12 @@ static const MountPoint mount_table[] = {
           cg_is_unified_wanted, MNT_FATAL|MNT_IN_CONTAINER },
         { "tmpfs",       "/sys/fs/cgroup",            "tmpfs",      "mode=755",                MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
           cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
+        { "cgroup",      "/sys/fs/cgroup/systemd",    "cgroup2",    NULL,                      MS_NOSUID|MS_NOEXEC|MS_NODEV,
+          cg_is_unified_systemd_controller_wanted, MNT_IN_CONTAINER },
         { "cgroup",      "/sys/fs/cgroup/systemd",    "cgroup",     "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
-          cg_is_legacy_wanted, MNT_IN_CONTAINER           },
+          cg_is_legacy_systemd_controller_wanted, MNT_IN_CONTAINER           },
         { "cgroup",      "/sys/fs/cgroup/systemd",    "cgroup",     "none,name=systemd",       MS_NOSUID|MS_NOEXEC|MS_NODEV,
-          cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
+          cg_is_legacy_systemd_controller_wanted, MNT_FATAL|MNT_IN_CONTAINER },
         { "pstore",      "/sys/fs/pstore",            "pstore",     NULL,                      MS_NOSUID|MS_NOEXEC|MS_NODEV,
           NULL,          MNT_NONE                   },
 #ifdef ENABLE_EFI
diff --git a/src/core/scope.c b/src/core/scope.c
index 7c72bb7091..65fa65493b 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -441,7 +441,7 @@ static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 
         /* If the PID set is empty now, then let's finish this off
            (On unified we use proper notifications) */
-        if (cg_all_unified() <= 0 && set_isempty(u->pids))
+        if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) <= 0 && set_isempty(u->pids))
                 scope_notify_cgroup_empty_event(u);
 }
 
diff --git a/src/core/service.c b/src/core/service.c
index 29e53867bf..ef5f506fa1 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -2866,7 +2866,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 
         /* If the PID set is empty now, then let's finish this off
            (On unified we use proper notifications) */
-        if (cg_all_unified() <= 0 && set_isempty(u->pids))
+        if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) <= 0 && set_isempty(u->pids))
                 service_notify_cgroup_empty_event(u);
 }
 
diff --git a/src/core/unit.c b/src/core/unit.c
index b24c32569c..d690cf89de 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -3695,7 +3695,7 @@ int unit_kill_context(
                          * there we get proper events. Hence rely on
                          * them.*/
 
-                        if  (cg_all_unified() > 0 ||
+                        if  (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0 ||
                              (detect_container() == 0 && !unit_cgroup_delegate(u)))
                                 wait_for_exit = true;
 
diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c
index ea3cab513c..aa0da04955 100644
--- a/src/nspawn/nspawn-cgroup.c
+++ b/src/nspawn/nspawn-cgroup.c
@@ -20,7 +20,6 @@
 #include <sys/mount.h>
 
 #include "alloc-util.h"
-#include "cgroup-util.h"
 #include "fd-util.h"
 #include "fileio.h"
 #include "mkdir.h"
@@ -63,18 +62,18 @@ int chown_cgroup(pid_t pid, uid_t uid_shift) {
         return 0;
 }
 
-int sync_cgroup(pid_t pid, bool unified_requested) {
+int sync_cgroup(pid_t pid, CGroupUnified unified_requested) {
         _cleanup_free_ char *cgroup = NULL;
         char tree[] = "/tmp/unifiedXXXXXX", pid_string[DECIMAL_STR_MAX(pid) + 1];
         bool undo_mount = false;
         const char *fn;
         int unified, r;
 
-        unified = cg_all_unified();
+        unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
         if (unified < 0)
                 return log_error_errno(unified, "Failed to determine whether the unified hierarchy is used: %m");
 
-        if ((unified > 0) == unified_requested)
+        if ((unified > 0) == (unified_requested >= CGROUP_UNIFIED_SYSTEMD))
                 return 0;
 
         /* When the host uses the legacy cgroup setup, but the
@@ -117,7 +116,7 @@ finish:
         return r;
 }
 
-int create_subcgroup(pid_t pid, bool unified_requested) {
+int create_subcgroup(pid_t pid, CGroupUnified unified_requested) {
         _cleanup_free_ char *cgroup = NULL;
         const char *child;
         int unified, r;
@@ -129,10 +128,10 @@ int create_subcgroup(pid_t pid, bool unified_requested) {
          * did not create a scope unit for the container move us and
          * the container into two separate subcgroups. */
 
-        if (!unified_requested)
+        if (unified_requested == CGROUP_UNIFIED_NONE)
                 return 0;
 
-        unified = cg_all_unified();
+        unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
         if (unified < 0)
                 return log_error_errno(unified, "Failed to determine whether the unified hierarchy is used: %m");
         if (unified == 0)
diff --git a/src/nspawn/nspawn-cgroup.h b/src/nspawn/nspawn-cgroup.h
index 1ff35a299a..dc33da8abe 100644
--- a/src/nspawn/nspawn-cgroup.h
+++ b/src/nspawn/nspawn-cgroup.h
@@ -22,6 +22,8 @@
 #include <stdbool.h>
 #include <sys/types.h>
 
+#include "cgroup-util.h"
+
 int chown_cgroup(pid_t pid, uid_t uid_shift);
-int sync_cgroup(pid_t pid, bool unified_requested);
-int create_subcgroup(pid_t pid, bool unified_requested);
+int sync_cgroup(pid_t pid, CGroupUnified unified_requested);
+int create_subcgroup(pid_t pid, CGroupUnified unified_requested);
diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c
index b5d83d481a..295b75341f 100644
--- a/src/nspawn/nspawn-mount.c
+++ b/src/nspawn/nspawn-mount.c
@@ -21,7 +21,6 @@
 #include <linux/magic.h>
 
 #include "alloc-util.h"
-#include "cgroup-util.h"
 #include "escape.h"
 #include "fd-util.h"
 #include "fileio.h"
@@ -661,7 +660,8 @@ static int get_controllers(Set *subsystems) {
         return 0;
 }
 
-static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy, bool read_only) {
+static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy,
+                                         CGroupUnified unified_requested, bool read_only) {
         char *to;
         int r;
 
@@ -677,7 +677,15 @@ static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controlle
 
         /* The superblock mount options of the mount point need to be
          * identical to the hosts', and hence writable... */
-        if (mount("cgroup", to, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, controller) < 0)
+        if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+                if (unified_requested >= CGROUP_UNIFIED_SYSTEMD)
+                        r = mount("cgroup", to, "cgroup2", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+                else
+                        r = mount("cgroup", to, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "none,name=systemd,xattr");
+        } else
+                r = mount("cgroup", to, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, controller);
+
+        if (r < 0)
                 return log_error_errno(errno, "Failed to mount to %s: %m", to);
 
         /* ... hence let's only make the bind mount read-only, not the
@@ -691,8 +699,8 @@ static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controlle
 
 /* Mount a legacy cgroup hierarchy when cgroup namespaces are supported. */
 static int mount_legacy_cgns_supported(
-                bool userns, uid_t uid_shift, uid_t uid_range,
-                const char *selinux_apifs_context) {
+                CGroupUnified unified_requested, bool userns, uid_t uid_shift,
+                uid_t uid_range, const char *selinux_apifs_context) {
         _cleanup_set_free_free_ Set *controllers = NULL;
         const char *cgroup_root = "/sys/fs/cgroup", *c;
         int r;
@@ -739,7 +747,7 @@ static int mount_legacy_cgns_supported(
                 if (!controller)
                         break;
 
-                r = mount_legacy_cgroup_hierarchy("", controller, controller, !userns);
+                r = mount_legacy_cgroup_hierarchy("", controller, controller, unified_requested, !userns);
                 if (r < 0)
                         return r;
 
@@ -773,7 +781,7 @@ static int mount_legacy_cgns_supported(
         }
 
 skip_controllers:
-        r = mount_legacy_cgroup_hierarchy("", "none,name=systemd,xattr", "systemd", false);
+        r = mount_legacy_cgroup_hierarchy("", SYSTEMD_CGROUP_CONTROLLER, "systemd", unified_requested, false);
         if (r < 0)
                 return r;
 
@@ -788,7 +796,7 @@ skip_controllers:
 /* Mount legacy cgroup hierarchy when cgroup namespaces are unsupported. */
 static int mount_legacy_cgns_unsupported(
                 const char *dest,
-                bool userns, uid_t uid_shift, uid_t uid_range,
+                CGroupUnified unified_requested, bool userns, uid_t uid_shift, uid_t uid_range,
                 const char *selinux_apifs_context) {
         _cleanup_set_free_free_ Set *controllers = NULL;
         const char *cgroup_root;
@@ -839,7 +847,7 @@ static int mount_legacy_cgns_unsupported(
                 if (r == -EINVAL) {
                         /* Not a symbolic link, but directly a single cgroup hierarchy */
 
-                        r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);
+                        r = mount_legacy_cgroup_hierarchy(dest, controller, controller, unified_requested, true);
                         if (r < 0)
                                 return r;
 
@@ -859,7 +867,7 @@ static int mount_legacy_cgns_unsupported(
                                 continue;
                         }
 
-                        r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);
+                        r = mount_legacy_cgroup_hierarchy(dest, combined, combined, unified_requested, true);
                         if (r < 0)
                                 return r;
 
@@ -872,7 +880,7 @@ static int mount_legacy_cgns_unsupported(
         }
 
 skip_controllers:
-        r = mount_legacy_cgroup_hierarchy(dest, "none,name=systemd,xattr", "systemd", false);
+        r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER, "systemd", unified_requested, false);
         if (r < 0)
                 return r;
 
@@ -914,22 +922,22 @@ static int mount_unified_cgroups(const char *dest) {
 
 int mount_cgroups(
                 const char *dest,
-                bool unified_requested,
+                CGroupUnified unified_requested,
                 bool userns, uid_t uid_shift, uid_t uid_range,
                 const char *selinux_apifs_context,
                 bool use_cgns) {
 
-        if (unified_requested)
+        if (unified_requested >= CGROUP_UNIFIED_ALL)
                 return mount_unified_cgroups(dest);
         else if (use_cgns && cg_ns_supported())
-                return mount_legacy_cgns_supported(userns, uid_shift, uid_range, selinux_apifs_context);
+                return mount_legacy_cgns_supported(unified_requested, userns, uid_shift, uid_range, selinux_apifs_context);
 
-        return mount_legacy_cgns_unsupported(dest, userns, uid_shift, uid_range, selinux_apifs_context);
+        return mount_legacy_cgns_unsupported(dest, unified_requested, userns, uid_shift, uid_range, selinux_apifs_context);
 }
 
 int mount_systemd_cgroup_writable(
                 const char *dest,
-                bool unified_requested) {
+                CGroupUnified unified_requested) {
 
         _cleanup_free_ char *own_cgroup_path = NULL;
         const char *systemd_root, *systemd_own;
@@ -945,7 +953,7 @@ int mount_systemd_cgroup_writable(
         if (path_equal(own_cgroup_path, "/"))
                 return 0;
 
-        if (unified_requested) {
+        if (unified_requested >= CGROUP_UNIFIED_ALL) {
                 systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path);
                 systemd_root = prefix_roota(dest, "/sys/fs/cgroup");
         } else {
diff --git a/src/nspawn/nspawn-mount.h b/src/nspawn/nspawn-mount.h
index 0eff8e1006..7307a838a5 100644
--- a/src/nspawn/nspawn-mount.h
+++ b/src/nspawn/nspawn-mount.h
@@ -21,6 +21,8 @@
 
 #include <stdbool.h>
 
+#include "cgroup-util.h"
+
 typedef enum VolatileMode {
         VOLATILE_NO,
         VOLATILE_YES,
@@ -58,8 +60,8 @@ int custom_mount_compare(const void *a, const void *b);
 int mount_all(const char *dest, bool use_userns, bool in_userns, bool use_netns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
 int mount_sysfs(const char *dest);
 
-int mount_cgroups(const char *dest, bool unified_requested, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context, bool use_cgns);
-int mount_systemd_cgroup_writable(const char *dest, bool unified_requested);
+int mount_cgroups(const char *dest, CGroupUnified unified_requested, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context, bool use_cgns);
+int mount_systemd_cgroup_writable(const char *dest, CGroupUnified unified_requested);
 
 int mount_custom(const char *dest, CustomMount *mounts, unsigned n, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
 
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 429b6ddc4f..24d243109a 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -188,7 +188,7 @@ static UserNamespaceMode arg_userns_mode = USER_NAMESPACE_NO;
 static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U;
 static bool arg_userns_chown = false;
 static int arg_kill_signal = 0;
-static bool arg_unified_cgroup_hierarchy = false;
+static CGroupUnified arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_UNKNOWN;
 static SettingsMask arg_settings_mask = 0;
 static int arg_settings_trusted = -1;
 static char **arg_parameters = NULL;
@@ -318,7 +318,14 @@ static int custom_mounts_prepare(void) {
 
 static int detect_unified_cgroup_hierarchy(void) {
         const char *e;
-        int r;
+        int r, all_unified, systemd_unified;
+
+        all_unified = cg_all_unified();
+        systemd_unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
+
+        if (all_unified < 0 || systemd_unified < 0)
+                return log_error_errno(all_unified < 0 ? all_unified : systemd_unified,
+                                       "Failed to determine whether the unified cgroups hierarchy is used: %m");
 
         /* Allow the user to control whether the unified hierarchy is used */
         e = getenv("UNIFIED_CGROUP_HIERARCHY");
@@ -326,17 +333,22 @@ static int detect_unified_cgroup_hierarchy(void) {
                 r = parse_boolean(e);
                 if (r < 0)
                         return log_error_errno(r, "Failed to parse $UNIFIED_CGROUP_HIERARCHY.");
+                if (r > 0)
+                        arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_ALL;
+                else
+                        arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE;
 
-                arg_unified_cgroup_hierarchy = r;
                 return 0;
         }
 
         /* Otherwise inherit the default from the host system */
-        r = cg_all_unified();
-        if (r < 0)
-                return log_error_errno(r, "Failed to determine whether the unified cgroups hierarchy is used: %m");
+        if (all_unified > 0)
+                arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_ALL;
+        else if (systemd_unified > 0)
+                arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_SYSTEMD;
+        else
+                arg_unified_cgroup_hierarchy = CGROUP_UNIFIED_NONE;
 
-        arg_unified_cgroup_hierarchy = r;
         return 0;
 }
 
-- 
cgit v1.2.3-54-g00ecf


From b4c990e91b33ec0130c428a1960a60fcaa6785bc Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Thu, 28 Jul 2016 09:50:28 +0200
Subject: unit: remove orphaned cgroup_netclass_id field

---
 src/core/unit.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'src/core')

diff --git a/src/core/unit.h b/src/core/unit.h
index a6c69938dd..513ea1614c 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -195,8 +195,6 @@ struct Unit {
         CGroupMask cgroup_members_mask;
         int cgroup_inotify_wd;
 
-        uint32_t cgroup_netclass_id;
-
         /* How to start OnFailure units */
         JobMode on_failure_job_mode;
 
-- 
cgit v1.2.3-54-g00ecf


From 92b25bcabb75846928f5ea1417808dab981d5e25 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Fri, 29 Jul 2016 16:22:30 +0200
Subject: core: make use of uid_is_valid() when checking for UID validity

---
 src/core/execute.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index 6019df7ea6..18bb421cab 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1862,7 +1862,7 @@ static int exec_child(
                         return r;
                 }
 
-                if (uid == UID_INVALID || gid == GID_INVALID) {
+                if (!uid_is_valid(uid) || !gid_is_valid(gid)) {
                         *exit_status = EXIT_USER;
                         return -ESRCH;
                 }
-- 
cgit v1.2.3-54-g00ecf


From 51d73fd96a55810ca40324eec098e66c6657699b Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 1 Aug 2016 17:40:37 +0200
Subject: core: move obsolete properties to the end of vtables

This makes it easier to discern the relevant and obsolete parts of the vtables,
and in particular helps when comparing introspection data with the actual
vtable definitions.
---
 src/core/dbus-service.c | 11 ++++++-----
 src/core/dbus-unit.c    | 11 ++++++-----
 2 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'src/core')

diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c
index fab3677a01..5eac876a6e 100644
--- a/src/core/dbus-service.c
+++ b/src/core/dbus-service.c
@@ -50,11 +50,6 @@ const sd_bus_vtable bus_service_vtable[] = {
         SD_BUS_PROPERTY("RuntimeMaxUSec", "t", bus_property_get_usec, offsetof(Service, runtime_max_usec), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("WatchdogUSec", "t", bus_property_get_usec, offsetof(Service, watchdog_usec), SD_BUS_VTABLE_PROPERTY_CONST),
         BUS_PROPERTY_DUAL_TIMESTAMP("WatchdogTimestamp", offsetof(Service, watchdog_timestamp), 0),
-        /* The following four are obsolete, and thus marked hidden here. They moved into the Unit interface */
-        SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
-        SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
-        SD_BUS_PROPERTY("StartLimitAction", "s", property_get_failure_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
-        SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
         SD_BUS_PROPERTY("FailureAction", "s", property_get_failure_action, offsetof(Service, failure_action), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PermissionsStartOnly", "b", bus_property_get_bool, offsetof(Service, permissions_start_only), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("RootDirectoryStartOnly", "b", bus_property_get_bool, offsetof(Service, root_directory_start_only), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -77,6 +72,12 @@ const sd_bus_vtable bus_service_vtable[] = {
         BUS_EXEC_COMMAND_LIST_VTABLE("ExecReload", offsetof(Service, exec_command[SERVICE_EXEC_RELOAD]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
         BUS_EXEC_COMMAND_LIST_VTABLE("ExecStop", offsetof(Service, exec_command[SERVICE_EXEC_STOP]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
         BUS_EXEC_COMMAND_LIST_VTABLE("ExecStopPost", offsetof(Service, exec_command[SERVICE_EXEC_STOP_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+
+        /* The following four are obsolete, and thus marked hidden here. They moved into the Unit interface */
+        SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+        SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+        SD_BUS_PROPERTY("StartLimitAction", "s", property_get_failure_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+        SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
         SD_BUS_VTABLE_END
 };
 
diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c
index b55d2cf735..89e56a2e51 100644
--- a/src/core/dbus-unit.c
+++ b/src/core/dbus-unit.c
@@ -660,10 +660,6 @@ const sd_bus_vtable bus_unit_vtable[] = {
         SD_BUS_PROPERTY("PropagatesReloadTo", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_PROPAGATES_RELOAD_TO]), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("ReloadPropagatedFrom", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_RELOAD_PROPAGATED_FROM]), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("JoinsNamespaceOf", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_JOINS_NAMESPACE_OF]), SD_BUS_VTABLE_PROPERTY_CONST),
-        SD_BUS_PROPERTY("RequiresOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
-        SD_BUS_PROPERTY("RequisiteOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
-        SD_BUS_PROPERTY("RequiredByOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
-        SD_BUS_PROPERTY("RequisiteOfOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
         SD_BUS_PROPERTY("RequiresMountsFor", "as", NULL, offsetof(Unit, requires_mounts_for), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Documentation", "as", NULL, offsetof(Unit, documentation), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Description", "s", property_get_description, 0, SD_BUS_VTABLE_PROPERTY_CONST),
@@ -705,7 +701,6 @@ const sd_bus_vtable bus_unit_vtable[] = {
         SD_BUS_PROPERTY("LoadError", "(ss)", property_get_load_error, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Transient", "b", bus_property_get_bool, offsetof(Unit, transient), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("StartLimitIntervalSec", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST),
-        SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), /* obsolete alias name */
         SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("StartLimitAction", "s", property_get_failure_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -721,6 +716,12 @@ const sd_bus_vtable bus_unit_vtable[] = {
         SD_BUS_METHOD("ResetFailed", NULL, NULL, bus_unit_method_reset_failed, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("SetProperties", "ba(sv)", NULL, bus_unit_method_set_properties, SD_BUS_VTABLE_UNPRIVILEGED),
 
+        /* Obsolete properties or obsolete alias names */
+        SD_BUS_PROPERTY("RequiresOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
+        SD_BUS_PROPERTY("RequisiteOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
+        SD_BUS_PROPERTY("RequiredByOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
+        SD_BUS_PROPERTY("RequisiteOfOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
+        SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
         SD_BUS_VTABLE_END
 };
 
-- 
cgit v1.2.3-54-g00ecf


From 00d9ef8560c252d8504be99cb38d1a54d35a9144 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 1 Aug 2016 19:24:40 +0200
Subject: core: add RemoveIPC= setting

This adds the boolean RemoveIPC= setting to service, socket, mount and swap
units (i.e.  all unit types that may invoke processes). if turned on, and the
unit's user/group is not root, all IPC objects of the user/group are removed
when the service is shut down. The life-cycle of the IPC objects is hence bound
to the unit life-cycle.

This is particularly relevant for units with dynamic users, as it is essential
that no objects owned by the dynamic users survive the service exiting. In
fact, this patch adds code to imply RemoveIPC= if DynamicUser= is set.

In order to communicate the UID/GID of an executed process back to PID 1 this
adds a new "user lookup" socket pair, that is inherited into the forked
processes, and closed before the exec(). This is needed since we cannot do NSS
from PID 1 due to deadlock risks, However need to know the used UID/GID in
order to clean up IPC owned by it if the unit shuts down.
---
 man/systemd.exec.xml       |  60 ++++---
 src/core/dbus-execute.c    |   5 +-
 src/core/dbus-mount.c      |   2 +
 src/core/dbus-service.c    |   3 +
 src/core/dbus-socket.c     |   2 +
 src/core/dbus-swap.c       |   2 +
 src/core/execute.c         |  46 ++++-
 src/core/execute.h         |   1 +
 src/core/manager.c         | 411 ++++++++++++++++++++++++++++++++++++++++++++-
 src/core/manager.h         |  23 ++-
 src/core/mount.c           |   2 +
 src/core/service.c         |   3 +
 src/core/socket.c          |   2 +
 src/core/swap.c            |   2 +
 src/core/unit.c            | 174 ++++++++++++++++++-
 src/core/unit.h            |  18 +-
 src/login/logind-user.c    |  11 +-
 src/shared/bus-unit-util.c |   2 +-
 src/shared/clean-ipc.c     |  66 +++++---
 src/shared/clean-ipc.h     |   4 +-
 src/test/test-ipcrm.c      |   2 +-
 21 files changed, 777 insertions(+), 64 deletions(-)

(limited to 'src/core')

diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index bf82326096..bcedebd5bb 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -160,14 +160,14 @@
         use. However, UID/GIDs are recycled after a unit is terminated. Care should be taken that any processes running
         as part of a unit for which dynamic users/groups are enabled do not leave files or directories owned by these
         users/groups around, as a different unit might get the same UID/GID assigned later on, and thus gain access to
-        these files or directories. If <varname>DynamicUser=</varname> is enabled, <varname>PrivateTmp=</varname> is
-        implied. This ensures that the lifetime of temporary files created by the executed processes is bound to the
-        runtime of the service, and hence the lifetime of the dynamic user/group. Since <filename>/tmp</filename> and
-        <filename>/var/tmp</filename> are usually the only world-writable directories on a system this ensures that a
-        unit making use of dynamic user/group allocation cannot leave files around after unit termination. Use
-        <varname>RuntimeDirectory=</varname> (see below) in order to assign a writable runtime directory to a service,
-        owned by the dynamic user/group and removed automatically when the unit is terminated. Defaults to
-        off.</para></listitem>
+        these files or directories. If <varname>DynamicUser=</varname> is enabled, <varname>RemoveIPC=</varname> and
+        <varname>PrivateTmp=</varname> are implied. This ensures that the lifetime of IPC objects and temporary files
+        created by the executed processes is bound to the runtime of the service, and hence the lifetime of the dynamic
+        user/group. Since <filename>/tmp</filename> and <filename>/var/tmp</filename> are usually the only
+        world-writable directories on a system this ensures that a unit making use of dynamic user/group allocation
+        cannot leave files around after unit termination. Use <varname>RuntimeDirectory=</varname> (see below) in order
+        to assign a writable runtime directory to a service, owned by the dynamic user/group and removed automatically
+        when the unit is terminated. Defaults to off.</para></listitem>
       </varlistentry>
 
       <varlistentry>
@@ -185,6 +185,18 @@
         user. This does not affect commands prefixed with <literal>+</literal>.</para></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>RemoveIPC=</varname></term>
+
+        <listitem><para>Takes a boolean parameter. If set, all System V and POSIX IPC objects owned by the user and
+        group the processes of this unit are run as are removed when the unit is stopped. This setting only has an
+        effect if at least one of <varname>User=</varname>, <varname>Group=</varname> and
+        <varname>DynamicUser=</varname> are used. It has no effect on IPC objects owned by the root user. Specifically,
+        this removes System V semaphores, as well as System V and POSIX shared memory segments and message queues. If
+        multiple units use the same user or group the IPC objects are removed when the last of these units is
+        stopped. This setting is implied if <varname>DynamicUser=</varname> is set.</para></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><varname>Nice=</varname></term>
 
@@ -920,27 +932,19 @@
       <varlistentry>
         <term><varname>PrivateTmp=</varname></term>
 
-        <listitem><para>Takes a boolean argument. If true, sets up a
-        new file system namespace for the executed processes and
-        mounts private <filename>/tmp</filename> and
-        <filename>/var/tmp</filename> directories inside it that is
-        not shared by processes outside of the namespace. This is
-        useful to secure access to temporary files of the process, but
-        makes sharing between processes via <filename>/tmp</filename>
-        or <filename>/var/tmp</filename> impossible. If this is
-        enabled, all temporary files created by a service in these
-        directories will be removed after the service is stopped.
-        Defaults to false. It is possible to run two or more units
-        within the same private <filename>/tmp</filename> and
-        <filename>/var/tmp</filename> namespace by using the
+        <listitem><para>Takes a boolean argument. If true, sets up a new file system namespace for the executed
+        processes and mounts private <filename>/tmp</filename> and <filename>/var/tmp</filename> directories inside it
+        that is not shared by processes outside of the namespace. This is useful to secure access to temporary files of
+        the process, but makes sharing between processes via <filename>/tmp</filename> or <filename>/var/tmp</filename>
+        impossible. If this is enabled, all temporary files created by a service in these directories will be removed
+        after the service is stopped.  Defaults to false. It is possible to run two or more units within the same
+        private <filename>/tmp</filename> and <filename>/var/tmp</filename> namespace by using the
         <varname>JoinsNamespaceOf=</varname> directive, see
-        <citerefentry><refentrytitle>systemd.unit</refentrytitle><manvolnum>5</manvolnum></citerefentry>
-        for details. Note that using this setting will disconnect
-        propagation of mounts from the service to the host
-        (propagation in the opposite direction continues to work).
-        This means that this setting may not be used for services
-        which shall be able to install mount points in the main mount
-        namespace.</para></listitem>
+        <citerefentry><refentrytitle>systemd.unit</refentrytitle><manvolnum>5</manvolnum></citerefentry> for
+        details. Note that using this setting will disconnect propagation of mounts from the service to the host
+        (propagation in the opposite direction continues to work).  This means that this setting may not be used for
+        services which shall be able to install mount points in the main mount namespace. This setting is implied if
+        <varname>DynamicUser=</varname> is set.</para></listitem>
       </varlistentry>
 
       <varlistentry>
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index e35d3ccd2e..7e33a2d201 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -695,6 +695,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_PROPERTY("User", "s", NULL, offsetof(ExecContext, user), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Group", "s", NULL, offsetof(ExecContext, group), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("DynamicUser", "b", bus_property_get_bool, offsetof(ExecContext, dynamic_user), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("RemoveIPC", "b", bus_property_get_bool, offsetof(ExecContext, remove_ipc), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("SupplementaryGroups", "as", NULL, offsetof(ExecContext, supplementary_groups), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PAMName", "s", NULL, offsetof(ExecContext, pam_name), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("ReadWriteDirectories", "as", NULL, offsetof(ExecContext, read_write_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
@@ -1071,7 +1072,7 @@ int bus_exec_context_set_transient_property(
                               "IgnoreSIGPIPE", "TTYVHangup", "TTYReset",
                               "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
                               "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute",
-                              "RestrictRealtime", "DynamicUser")) {
+                              "RestrictRealtime", "DynamicUser", "RemoveIPC")) {
                 int b;
 
                 r = sd_bus_message_read(message, "b", &b);
@@ -1103,6 +1104,8 @@ int bus_exec_context_set_transient_property(
                                 c->restrict_realtime = b;
                         else if (streq(name, "DynamicUser"))
                                 c->dynamic_user = b;
+                        else if (streq(name, "RemoveIPC"))
+                                c->remove_ipc = b;
 
                         unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, yes_no(b));
                 }
diff --git a/src/core/dbus-mount.c b/src/core/dbus-mount.c
index b4bbee0648..3c6bda4073 100644
--- a/src/core/dbus-mount.c
+++ b/src/core/dbus-mount.c
@@ -117,6 +117,8 @@ const sd_bus_vtable bus_mount_vtable[] = {
         SD_BUS_PROPERTY("DirectoryMode", "u", bus_property_get_mode, offsetof(Mount, directory_mode), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("SloppyOptions", "b", bus_property_get_bool, offsetof(Mount, sloppy_options), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Mount, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+        SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+        SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         BUS_EXEC_COMMAND_VTABLE("ExecMount", offsetof(Mount, exec_command[MOUNT_EXEC_MOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
         BUS_EXEC_COMMAND_VTABLE("ExecUnmount", offsetof(Mount, exec_command[MOUNT_EXEC_UNMOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
         BUS_EXEC_COMMAND_VTABLE("ExecRemount", offsetof(Mount, exec_command[MOUNT_EXEC_REMOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c
index 5eac876a6e..3c55e0f7fe 100644
--- a/src/core/dbus-service.c
+++ b/src/core/dbus-service.c
@@ -65,6 +65,9 @@ const sd_bus_vtable bus_service_vtable[] = {
         SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Service, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("USBFunctionDescriptors", "s", NULL, offsetof(Service, usb_function_descriptors), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("USBFunctionStrings", "s", NULL, offsetof(Service, usb_function_strings), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+        SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+        SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+
         BUS_EXEC_STATUS_VTABLE("ExecMain", offsetof(Service, main_exec_status), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
         BUS_EXEC_COMMAND_LIST_VTABLE("ExecStart", offsetof(Service, exec_command[SERVICE_EXEC_START]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
diff --git a/src/core/dbus-socket.c b/src/core/dbus-socket.c
index 9a071a1355..21adb64e15 100644
--- a/src/core/dbus-socket.c
+++ b/src/core/dbus-socket.c
@@ -152,6 +152,8 @@ const sd_bus_vtable bus_socket_vtable[] = {
         SD_BUS_PROPERTY("SocketProtocol", "i", bus_property_get_int, offsetof(Socket, socket_protocol), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("TriggerLimitIntervalUSec", "t", bus_property_get_usec, offsetof(Socket, trigger_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("TriggerLimitBurst", "u", bus_property_get_unsigned, offsetof(Socket, trigger_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+        SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Socket, exec_command[SOCKET_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
         BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPost", offsetof(Socket, exec_command[SOCKET_EXEC_START_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
         BUS_EXEC_COMMAND_LIST_VTABLE("ExecStopPre", offsetof(Socket, exec_command[SOCKET_EXEC_STOP_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
diff --git a/src/core/dbus-swap.c b/src/core/dbus-swap.c
index 292f8738c6..85a2c26b98 100644
--- a/src/core/dbus-swap.c
+++ b/src/core/dbus-swap.c
@@ -84,6 +84,8 @@ const sd_bus_vtable bus_swap_vtable[] = {
         SD_BUS_PROPERTY("TimeoutUSec", "t", bus_property_get_usec, offsetof(Swap, timeout_usec), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Swap, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Swap, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+        SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+        SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         BUS_EXEC_COMMAND_VTABLE("ExecActivate", offsetof(Swap, exec_command[SWAP_EXEC_ACTIVATE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
         BUS_EXEC_COMMAND_VTABLE("ExecDeactivate", offsetof(Swap, exec_command[SWAP_EXEC_DEACTIVATE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
         SD_BUS_VTABLE_END
diff --git a/src/core/execute.c b/src/core/execute.c
index 18bb421cab..4c786a2e33 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1723,11 +1723,12 @@ static int close_remaining_fds(
                 const ExecParameters *params,
                 ExecRuntime *runtime,
                 DynamicCreds *dcreds,
+                int user_lookup_fd,
                 int socket_fd,
                 int *fds, unsigned n_fds) {
 
         unsigned n_dont_close = 0;
-        int dont_close[n_fds + 11];
+        int dont_close[n_fds + 12];
 
         assert(params);
 
@@ -1755,9 +1756,40 @@ static int close_remaining_fds(
                         append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
         }
 
+        if (user_lookup_fd >= 0)
+                dont_close[n_dont_close++] = user_lookup_fd;
+
         return close_all_fds(dont_close, n_dont_close);
 }
 
+static int send_user_lookup(
+                Unit *unit,
+                int user_lookup_fd,
+                uid_t uid,
+                gid_t gid) {
+
+        assert(unit);
+
+        /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
+         * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
+         * specified. */
+
+        if (user_lookup_fd < 0)
+                return 0;
+
+        if (!uid_is_valid(uid) && !gid_is_valid(gid))
+                return 0;
+
+        if (writev(user_lookup_fd,
+               (struct iovec[]) {
+                           { .iov_base = &uid, .iov_len = sizeof(uid) },
+                           { .iov_base = &gid, .iov_len = sizeof(gid) },
+                           { .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0)
+                return -errno;
+
+        return 0;
+}
+
 static int exec_child(
                 Unit *unit,
                 ExecCommand *command,
@@ -1769,6 +1801,7 @@ static int exec_child(
                 int socket_fd,
                 int *fds, unsigned n_fds,
                 char **files_env,
+                int user_lookup_fd,
                 int *exit_status) {
 
         _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
@@ -1815,7 +1848,7 @@ static int exec_child(
 
         log_forget_fds();
 
-        r = close_remaining_fds(params, runtime, dcreds, socket_fd, fds, n_fds);
+        r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
         if (r < 0) {
                 *exit_status = EXIT_FDS;
                 return r;
@@ -1902,6 +1935,14 @@ static int exec_child(
                 }
         }
 
+        r = send_user_lookup(unit, user_lookup_fd, uid, gid);
+        if (r < 0) {
+                *exit_status = EXIT_USER;
+                return r;
+        }
+
+        user_lookup_fd = safe_close(user_lookup_fd);
+
         /* If a socket is connected to STDIN/STDOUT/STDERR, we
          * must sure to drop O_NONBLOCK */
         if (socket_fd >= 0)
@@ -2501,6 +2542,7 @@ int exec_spawn(Unit *unit,
                                socket_fd,
                                fds, n_fds,
                                files_env,
+                               unit->manager->user_lookup_fds[1],
                                &exit_status);
                 if (r < 0) {
                         log_open();
diff --git a/src/core/execute.h b/src/core/execute.h
index 106154f81a..6082c42aba 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -178,6 +178,7 @@ struct ExecContext {
         bool no_new_privileges;
 
         bool dynamic_user;
+        bool remove_ipc;
 
         /* This is not exposed to the user but available
          * internally. We need it to make sure that whenever we spawn
diff --git a/src/core/manager.c b/src/core/manager.c
index c20e185d78..6f2477eef4 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -45,6 +45,7 @@
 #include "bus-error.h"
 #include "bus-kernel.h"
 #include "bus-util.h"
+#include "clean-ipc.h"
 #include "dbus-job.h"
 #include "dbus-manager.h"
 #include "dbus-unit.h"
@@ -81,6 +82,7 @@
 #include "transaction.h"
 #include "umask-util.h"
 #include "unit-name.h"
+#include "user-util.h"
 #include "util.h"
 #include "virt.h"
 #include "watchdog.h"
@@ -98,6 +100,7 @@ static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, ui
 static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
 static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
 static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
 static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata);
 static int manager_dispatch_run_queue(sd_event_source *source, void *userdata);
 static int manager_run_generators(Manager *m);
@@ -590,6 +593,8 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) {
                 m->dev_autofs_fd = m->private_listen_fd = m->kdbus_fd = m->cgroup_inotify_fd =
                 m->ask_password_inotify_fd = -1;
 
+        m->user_lookup_fds[0] = m->user_lookup_fds[1] = -1;
+
         m->current_job_id = 1; /* start as id #1, so that we can leave #0 around as "null-like" value */
 
         m->have_ask_password = -EINVAL; /* we don't know */
@@ -812,6 +817,59 @@ static int manager_setup_cgroups_agent(Manager *m) {
         return 0;
 }
 
+static int manager_setup_user_lookup_fd(Manager *m) {
+        int r;
+
+        assert(m);
+
+        /* Set up the socket pair used for passing UID/GID resolution results from forked off processes to PID
+         * 1. Background: we can't do name lookups (NSS) from PID 1, since it might involve IPC and thus activation,
+         * and we might hence deadlock on ourselves. Hence we do all user/group lookups asynchronously from the forked
+         * off processes right before executing the binaries to start. In order to be able to clean up any IPC objects
+         * created by a unit (see RemoveIPC=) we need to know in PID 1 the used UID/GID of the executed processes,
+         * hence we establish this communication channel so that forked off processes can pass their UID/GID
+         * information back to PID 1. The forked off processes send their resolved UID/GID to PID 1 in a simple
+         * datagram, along with their unit name, so that we can share one communication socket pair among all units for
+         * this purpose.
+         *
+         * You might wonder why we need a communication channel for this that is independent of the usual notification
+         * socket scheme (i.e. $NOTIFY_SOCKET). The primary difference is about trust: data sent via the $NOTIFY_SOCKET
+         * channel is only accepted if it originates from the right unit and if reception was enabled for it. The user
+         * lookup socket OTOH is only accessible by PID 1 and its children until they exec(), and always available.
+         *
+         * Note that this function is called under two circumstances: when we first initialize (in which case we
+         * allocate both the socket pair and the event source to listen on it), and when we deserialize after a reload
+         * (in which case the socket pair already exists but we still need to allocate the event source for it). */
+
+        if (m->user_lookup_fds[0] < 0) {
+
+                /* Free all secondary fields */
+                safe_close_pair(m->user_lookup_fds);
+                m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source);
+
+                if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, m->user_lookup_fds) < 0)
+                        return log_error_errno(errno, "Failed to allocate user lookup socket: %m");
+
+                (void) fd_inc_rcvbuf(m->user_lookup_fds[0], NOTIFY_RCVBUF_SIZE);
+        }
+
+        if (!m->user_lookup_event_source) {
+                r = sd_event_add_io(m->event, &m->user_lookup_event_source, m->user_lookup_fds[0], EPOLLIN, manager_dispatch_user_lookup_fd, m);
+                if (r < 0)
+                        return log_error_errno(errno, "Failed to allocate user lookup event source: %m");
+
+                /* Process even earlier than the notify event source, so that we always know first about valid UID/GID
+                 * resolutions */
+                r = sd_event_source_set_priority(m->user_lookup_event_source, SD_EVENT_PRIORITY_NORMAL-8);
+                if (r < 0)
+                        return log_error_errno(errno, "Failed to set priority ot user lookup event source: %m");
+
+                (void) sd_event_source_set_description(m->user_lookup_event_source, "user-lookup");
+        }
+
+        return 0;
+}
+
 static int manager_connect_bus(Manager *m, bool reexecuting) {
         bool try_bus_connect;
 
@@ -853,8 +911,7 @@ enum {
         _GC_OFFSET_MAX
 };
 
-static void unit_gc_mark_good(Unit *u, unsigned gc_marker)
-{
+static void unit_gc_mark_good(Unit *u, unsigned gc_marker) {
         Iterator i;
         Unit *other;
 
@@ -1021,12 +1078,14 @@ Manager* manager_free(Manager *m) {
         sd_event_source_unref(m->time_change_event_source);
         sd_event_source_unref(m->jobs_in_progress_event_source);
         sd_event_source_unref(m->run_queue_event_source);
+        sd_event_source_unref(m->user_lookup_event_source);
 
         safe_close(m->signal_fd);
         safe_close(m->notify_fd);
         safe_close(m->cgroups_agent_fd);
         safe_close(m->time_change_fd);
         safe_close(m->kdbus_fd);
+        safe_close_pair(m->user_lookup_fds);
 
         manager_close_ask_password(m);
 
@@ -1052,6 +1111,9 @@ Manager* manager_free(Manager *m) {
         assert(hashmap_isempty(m->units_requiring_mounts_for));
         hashmap_free(m->units_requiring_mounts_for);
 
+        hashmap_free(m->uid_refs);
+        hashmap_free(m->gid_refs);
+
         free(m);
         return NULL;
 }
@@ -1221,6 +1283,10 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
         if (q < 0 && r == 0)
                 r = q;
 
+        q = manager_setup_user_lookup_fd(m);
+        if (q < 0 && r == 0)
+                r = q;
+
         /* We might have deserialized the kdbus control fd, but if we
          * didn't, then let's create the bus now. */
         manager_connect_bus(m, !!serialization);
@@ -1232,6 +1298,10 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
         /* Release any dynamic users no longer referenced */
         dynamic_user_vacuum(m, true);
 
+        /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */
+        manager_vacuum_uid_refs(m);
+        manager_vacuum_gid_refs(m);
+
         if (serialization) {
                 assert(m->n_reloading > 0);
                 m->n_reloading--;
@@ -2396,6 +2466,20 @@ int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) {
                 fprintf(f, "cgroups-agent-fd=%i\n", copy);
         }
 
+        if (m->user_lookup_fds[0] >= 0) {
+                int copy0, copy1;
+
+                copy0 = fdset_put_dup(fds, m->user_lookup_fds[0]);
+                if (copy0 < 0)
+                        return copy0;
+
+                copy1 = fdset_put_dup(fds, m->user_lookup_fds[1]);
+                if (copy1 < 0)
+                        return copy1;
+
+                fprintf(f, "user-lookup=%i %i\n", copy0, copy1);
+        }
+
         if (m->kdbus_fd >= 0) {
                 int copy;
 
@@ -2412,6 +2496,9 @@ int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) {
         if (r < 0)
                 return r;
 
+        manager_serialize_uid_refs(m, f);
+        manager_serialize_gid_refs(m, f);
+
         fputc('\n', f);
 
         HASHMAP_FOREACH_KEY(u, t, m->units, i) {
@@ -2578,6 +2665,18 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
                                 m->cgroups_agent_fd = fdset_remove(fds, fd);
                         }
 
+                } else if (startswith(l, "user-lookup=")) {
+                        int fd0, fd1;
+
+                        if (sscanf(l + 12, "%i %i", &fd0, &fd1) != 2 || fd0 < 0 || fd1 < 0 || fd0 == fd1 || !fdset_contains(fds, fd0) || !fdset_contains(fds, fd1))
+                                log_debug("Failed to parse user lookup fd: %s", l + 12);
+                        else {
+                                m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source);
+                                safe_close_pair(m->user_lookup_fds);
+                                m->user_lookup_fds[0] = fdset_remove(fds, fd0);
+                                m->user_lookup_fds[1] = fdset_remove(fds, fd1);
+                        }
+
                 } else if (startswith(l, "kdbus-fd=")) {
                         int fd;
 
@@ -2590,6 +2689,10 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
 
                 } else if (startswith(l, "dynamic-user="))
                         dynamic_user_deserialize_one(m, l + 13, fds);
+                else if (startswith(l, "destroy-ipc-uid="))
+                        manager_deserialize_uid_refs_one(m, l + 16);
+                else if (startswith(l, "destroy-ipc-gid="))
+                        manager_deserialize_gid_refs_one(m, l + 16);
                 else {
                         int k;
 
@@ -2672,6 +2775,8 @@ int manager_reload(Manager *m) {
         lookup_paths_flush_generator(&m->lookup_paths);
         lookup_paths_free(&m->lookup_paths);
         dynamic_user_vacuum(m, false);
+        m->uid_refs = hashmap_free(m->uid_refs);
+        m->gid_refs = hashmap_free(m->gid_refs);
 
         q = lookup_paths_init(&m->lookup_paths, m->unit_file_scope, 0, NULL);
         if (q < 0 && r >= 0)
@@ -2705,12 +2810,20 @@ int manager_reload(Manager *m) {
         if (q < 0 && r >= 0)
                 r = q;
 
+        q = manager_setup_user_lookup_fd(m);
+        if (q < 0 && r >= 0)
+                r = q;
+
         /* Third, fire things up! */
         manager_coldplug(m);
 
         /* Release any dynamic users no longer referenced */
         dynamic_user_vacuum(m, true);
 
+        /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */
+        manager_vacuum_uid_refs(m);
+        manager_vacuum_gid_refs(m);
+
         /* Sync current state of bus names with our set of listening units */
         if (m->api_bus)
                 manager_sync_bus_names(m, m->api_bus);
@@ -3144,6 +3257,300 @@ ManagerState manager_state(Manager *m) {
         return MANAGER_RUNNING;
 }
 
+#define DESTROY_IPC_FLAG (UINT32_C(1) << 31)
+
+static void manager_unref_uid_internal(
+                Manager *m,
+                Hashmap **uid_refs,
+                uid_t uid,
+                bool destroy_now,
+                int (*_clean_ipc)(uid_t uid)) {
+
+        uint32_t c, n;
+
+        assert(m);
+        assert(uid_refs);
+        assert(uid_is_valid(uid));
+        assert(_clean_ipc);
+
+        /* A generic implementation, covering both manager_unref_uid() and manager_unref_gid(), under the assumption
+         * that uid_t and gid_t are actually defined the same way, with the same validity rules.
+         *
+         * We store a hashmap where the UID/GID is they key and the value is a 32bit reference counter, whose highest
+         * bit is used as flag for marking UIDs/GIDs whose IPC objects to remove when the last reference to the UID/GID
+         * is dropped. The flag is set to on, once at least one reference from a unit where RemoveIPC= is set is added
+         * on a UID/GID. It is reset when the UID's/GID's reference counter drops to 0 again. */
+
+        assert_cc(sizeof(uid_t) == sizeof(gid_t));
+        assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+        if (uid == 0) /* We don't keep track of root, and will never destroy it */
+                return;
+
+        c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
+
+        n = c & ~DESTROY_IPC_FLAG;
+        assert(n > 0);
+        n--;
+
+        if (destroy_now && n == 0) {
+                hashmap_remove(*uid_refs, UID_TO_PTR(uid));
+
+                if (c & DESTROY_IPC_FLAG) {
+                        log_debug("%s " UID_FMT " is no longer referenced, cleaning up its IPC.",
+                                  _clean_ipc == clean_ipc_by_uid ? "UID" : "GID",
+                                  uid);
+                        (void) _clean_ipc(uid);
+                }
+        } else {
+                c = n | (c & DESTROY_IPC_FLAG);
+                assert_se(hashmap_update(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c)) >= 0);
+        }
+}
+
+void manager_unref_uid(Manager *m, uid_t uid, bool destroy_now) {
+        manager_unref_uid_internal(m, &m->uid_refs, uid, destroy_now, clean_ipc_by_uid);
+}
+
+void manager_unref_gid(Manager *m, gid_t gid, bool destroy_now) {
+        manager_unref_uid_internal(m, &m->gid_refs, (uid_t) gid, destroy_now, clean_ipc_by_gid);
+}
+
+static int manager_ref_uid_internal(
+                Manager *m,
+                Hashmap **uid_refs,
+                uid_t uid,
+                bool clean_ipc) {
+
+        uint32_t c, n;
+        int r;
+
+        assert(m);
+        assert(uid_refs);
+        assert(uid_is_valid(uid));
+
+        /* A generic implementation, covering both manager_ref_uid() and manager_ref_gid(), under the assumption
+         * that uid_t and gid_t are actually defined the same way, with the same validity rules. */
+
+        assert_cc(sizeof(uid_t) == sizeof(gid_t));
+        assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+        if (uid == 0) /* We don't keep track of root, and will never destroy it */
+                return 0;
+
+        r = hashmap_ensure_allocated(uid_refs, &trivial_hash_ops);
+        if (r < 0)
+                return r;
+
+        c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
+
+        n = c & ~DESTROY_IPC_FLAG;
+        n++;
+
+        if (n & DESTROY_IPC_FLAG) /* check for overflow */
+                return -EOVERFLOW;
+
+        c = n | (c & DESTROY_IPC_FLAG) | (clean_ipc ? DESTROY_IPC_FLAG : 0);
+
+        return hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c));
+}
+
+int manager_ref_uid(Manager *m, uid_t uid, bool clean_ipc) {
+        return manager_ref_uid_internal(m, &m->uid_refs, uid, clean_ipc);
+}
+
+int manager_ref_gid(Manager *m, gid_t gid, bool clean_ipc) {
+        return manager_ref_uid_internal(m, &m->gid_refs, (uid_t) gid, clean_ipc);
+}
+
+static void manager_vacuum_uid_refs_internal(
+                Manager *m,
+                Hashmap **uid_refs,
+                int (*_clean_ipc)(uid_t uid)) {
+
+        Iterator i;
+        void *p, *k;
+
+        assert(m);
+        assert(uid_refs);
+        assert(_clean_ipc);
+
+        HASHMAP_FOREACH_KEY(p, k, *uid_refs, i) {
+                uint32_t c, n;
+                uid_t uid;
+
+                uid = PTR_TO_UID(k);
+                c = PTR_TO_UINT32(p);
+
+                n = c & ~DESTROY_IPC_FLAG;
+                if (n > 0)
+                        continue;
+
+                if (c & DESTROY_IPC_FLAG) {
+                        log_debug("Found unreferenced %s " UID_FMT " after reload/reexec. Cleaning up.",
+                                  _clean_ipc == clean_ipc_by_uid ? "UID" : "GID",
+                                  uid);
+                        (void) _clean_ipc(uid);
+                }
+
+                assert_se(hashmap_remove(*uid_refs, k) == p);
+        }
+}
+
+void manager_vacuum_uid_refs(Manager *m) {
+        manager_vacuum_uid_refs_internal(m, &m->uid_refs, clean_ipc_by_uid);
+}
+
+void manager_vacuum_gid_refs(Manager *m) {
+        manager_vacuum_uid_refs_internal(m, &m->gid_refs, clean_ipc_by_gid);
+}
+
+static void manager_serialize_uid_refs_internal(
+                Manager *m,
+                FILE *f,
+                Hashmap **uid_refs,
+                const char *field_name) {
+
+        Iterator i;
+        void *p, *k;
+
+        assert(m);
+        assert(f);
+        assert(uid_refs);
+        assert(field_name);
+
+        /* Serialize the UID reference table. Or actually, just the IPC destruction flag of it, as the actual counter
+         * of it is better rebuild after a reload/reexec. */
+
+        HASHMAP_FOREACH_KEY(p, k, *uid_refs, i) {
+                uint32_t c;
+                uid_t uid;
+
+                uid = PTR_TO_UID(k);
+                c = PTR_TO_UINT32(p);
+
+                if (!(c & DESTROY_IPC_FLAG))
+                        continue;
+
+                fprintf(f, "%s=" UID_FMT "\n", field_name, uid);
+        }
+}
+
+void manager_serialize_uid_refs(Manager *m, FILE *f) {
+        manager_serialize_uid_refs_internal(m, f, &m->uid_refs, "destroy-ipc-uid");
+}
+
+void manager_serialize_gid_refs(Manager *m, FILE *f) {
+        manager_serialize_uid_refs_internal(m, f, &m->gid_refs, "destroy-ipc-gid");
+}
+
+static void manager_deserialize_uid_refs_one_internal(
+                Manager *m,
+                Hashmap** uid_refs,
+                const char *value) {
+
+        uid_t uid;
+        uint32_t c;
+        int r;
+
+        assert(m);
+        assert(uid_refs);
+        assert(value);
+
+        r = parse_uid(value, &uid);
+        if (r < 0 || uid == 0) {
+                log_debug("Unable to parse UID reference serialization");
+                return;
+        }
+
+        r = hashmap_ensure_allocated(uid_refs, &trivial_hash_ops);
+        if (r < 0) {
+                log_oom();
+                return;
+        }
+
+        c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
+        if (c & DESTROY_IPC_FLAG)
+                return;
+
+        c |= DESTROY_IPC_FLAG;
+
+        r = hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c));
+        if (r < 0) {
+                log_debug("Failed to add UID reference entry");
+                return;
+        }
+}
+
+void manager_deserialize_uid_refs_one(Manager *m, const char *value) {
+        manager_deserialize_uid_refs_one_internal(m, &m->uid_refs, value);
+}
+
+void manager_deserialize_gid_refs_one(Manager *m, const char *value) {
+        manager_deserialize_uid_refs_one_internal(m, &m->gid_refs, value);
+}
+
+int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+        struct buffer {
+                uid_t uid;
+                gid_t gid;
+                char unit_name[UNIT_NAME_MAX+1];
+        } _packed_ buffer;
+
+        Manager *m = userdata;
+        ssize_t l;
+        size_t n;
+        Unit *u;
+
+        assert_se(source);
+        assert_se(m);
+
+        /* Invoked whenever a child process succeeded resolving its user/group to use and sent us the resulting UID/GID
+         * in a datagram. We parse the datagram here and pass it off to the unit, so that it can add a reference to the
+         * UID/GID so that it can destroy the UID/GID's IPC objects when the reference counter drops to 0. */
+
+        l = recv(fd, &buffer, sizeof(buffer), MSG_DONTWAIT);
+        if (l < 0) {
+                if (errno == EINTR || errno == EAGAIN)
+                        return 0;
+
+                return log_error_errno(errno, "Failed to read from user lookup fd: %m");
+        }
+
+        if ((size_t) l <= offsetof(struct buffer, unit_name)) {
+                log_warning("Received too short user lookup message, ignoring.");
+                return 0;
+        }
+
+        if ((size_t) l > offsetof(struct buffer, unit_name) + UNIT_NAME_MAX) {
+                log_warning("Received too long user lookup message, ignoring.");
+                return 0;
+        }
+
+        if (!uid_is_valid(buffer.uid) && !gid_is_valid(buffer.gid)) {
+                log_warning("Got user lookup message with invalid UID/GID pair, ignoring.");
+                return 0;
+        }
+
+        n = (size_t) l - offsetof(struct buffer, unit_name);
+        if (memchr(buffer.unit_name, 0, n)) {
+                log_warning("Received lookup message with embedded NUL character, ignoring.");
+                return 0;
+        }
+
+        buffer.unit_name[n] = 0;
+        u = manager_get_unit(m, buffer.unit_name);
+        if (!u) {
+                log_debug("Got user lookup message but unit doesn't exist, ignoring.");
+                return 0;
+        }
+
+        log_unit_debug(u, "User lookup succeeded: uid=" UID_FMT " gid=" GID_FMT, buffer.uid, buffer.gid);
+
+        unit_notify_user_lookup(u, buffer.uid, buffer.gid);
+        return 0;
+}
+
 static const char *const manager_state_table[_MANAGER_STATE_MAX] = {
         [MANAGER_INITIALIZING] = "initializing",
         [MANAGER_STARTING] = "starting",
diff --git a/src/core/manager.h b/src/core/manager.h
index c681d5dc46..b9f2e4b5a1 100644
--- a/src/core/manager.h
+++ b/src/core/manager.h
@@ -143,6 +143,9 @@ struct Manager {
 
         sd_event_source *jobs_in_progress_event_source;
 
+        int user_lookup_fds[2];
+        sd_event_source *user_lookup_event_source;
+
         UnitFileScope unit_file_scope;
         LookupPaths lookup_paths;
         Set *unit_path_cache;
@@ -234,7 +237,6 @@ struct Manager {
         bool dispatching_dbus_queue:1;
 
         bool taint_usr:1;
-
         bool test_run:1;
 
         /* If non-zero, exit with the following value when the systemd
@@ -301,6 +303,10 @@ struct Manager {
         /* Dynamic users/groups, indexed by their name */
         Hashmap *dynamic_users;
 
+        /* Keep track of all UIDs and GIDs any of our services currently use. This is useful for the RemoveIPC= logic. */
+        Hashmap *uid_refs;
+        Hashmap *gid_refs;
+
         /* When the user hits C-A-D more than 7 times per 2s, reboot immediately... */
         RateLimit ctrl_alt_del_ratelimit;
 
@@ -378,5 +384,20 @@ ManagerState manager_state(Manager *m);
 
 int manager_update_failed_units(Manager *m, Unit *u, bool failed);
 
+void manager_unref_uid(Manager *m, uid_t uid, bool destroy_now);
+int manager_ref_uid(Manager *m, uid_t uid, bool clean_ipc);
+
+void manager_unref_gid(Manager *m, gid_t gid, bool destroy_now);
+int manager_ref_gid(Manager *m, gid_t gid, bool destroy_now);
+
+void manager_vacuum_uid_refs(Manager *m);
+void manager_vacuum_gid_refs(Manager *m);
+
+void manager_serialize_uid_refs(Manager *m, FILE *f);
+void manager_deserialize_uid_refs_one(Manager *m, const char *value);
+
+void manager_serialize_gid_refs(Manager *m, FILE *f);
+void manager_deserialize_gid_refs_one(Manager *m, const char *value);
+
 const char *manager_state_to_string(ManagerState m) _const_;
 ManagerState manager_state_from_string(const char *s) _pure_;
diff --git a/src/core/mount.c b/src/core/mount.c
index f3ccf6d48a..f2ac8d171f 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -769,6 +769,8 @@ static void mount_enter_dead(Mount *m, MountResult f) {
 
         exec_context_destroy_runtime_directory(&m->exec_context, manager_get_runtime_prefix(UNIT(m)->manager));
 
+        unit_unref_uid_gid(UNIT(m), true);
+
         dynamic_creds_destroy(&m->dynamic_creds);
 }
 
diff --git a/src/core/service.c b/src/core/service.c
index 4a37702f52..1951ba9222 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -1471,6 +1471,9 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
         /* Also, remove the runtime directory */
         exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager));
 
+        /* Get rid of the IPC bits of the user */
+        unit_unref_uid_gid(UNIT(s), true);
+
         /* Release the user, and destroy it if we are the only remaining owner */
         dynamic_creds_destroy(&s->dynamic_creds);
 
diff --git a/src/core/socket.c b/src/core/socket.c
index 50872e8366..70d55dd9ed 100644
--- a/src/core/socket.c
+++ b/src/core/socket.c
@@ -1905,6 +1905,8 @@ static void socket_enter_dead(Socket *s, SocketResult f) {
 
         exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager));
 
+        unit_unref_uid_gid(UNIT(s), true);
+
         dynamic_creds_destroy(&s->dynamic_creds);
 }
 
diff --git a/src/core/swap.c b/src/core/swap.c
index 2c802da3b5..fb222b6858 100644
--- a/src/core/swap.c
+++ b/src/core/swap.c
@@ -683,6 +683,8 @@ static void swap_enter_dead(Swap *s, SwapResult f) {
 
         exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager));
 
+        unit_unref_uid_gid(UNIT(s), true);
+
         dynamic_creds_destroy(&s->dynamic_creds);
 }
 
diff --git a/src/core/unit.c b/src/core/unit.c
index 952604e0db..4b8d81c3f1 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -100,7 +100,8 @@ Unit *unit_new(Manager *m, size_t size) {
         u->on_failure_job_mode = JOB_REPLACE;
         u->cgroup_inotify_wd = -1;
         u->job_timeout = USEC_INFINITY;
-        u->sigchldgen = 0;
+        u->ref_uid = UID_INVALID;
+        u->ref_gid = GID_INVALID;
 
         RATELIMIT_INIT(u->start_limit, m->default_start_limit_interval, m->default_start_limit_burst);
         RATELIMIT_INIT(u->auto_stop_ratelimit, 10 * USEC_PER_SEC, 16);
@@ -550,6 +551,8 @@ void unit_free(Unit *u) {
 
         unit_release_cgroup(u);
 
+        unit_unref_uid_gid(u, false);
+
         (void) manager_update_failed_units(u->manager, u, false);
         set_remove(u->manager->startup_units, u);
 
@@ -2614,6 +2617,11 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
                 unit_serialize_item(u, f, "cgroup", u->cgroup_path);
         unit_serialize_item(u, f, "cgroup-realized", yes_no(u->cgroup_realized));
 
+        if (uid_is_valid(u->ref_uid))
+                unit_serialize_item_format(u, f, "ref-uid", UID_FMT, u->ref_uid);
+        if (gid_is_valid(u->ref_gid))
+                unit_serialize_item_format(u, f, "ref-gid", GID_FMT, u->ref_gid);
+
         if (serialize_jobs) {
                 if (u->job) {
                         fprintf(f, "job\n");
@@ -2851,6 +2859,28 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
                                 u->cgroup_realized = b;
 
                         continue;
+
+                } else if (streq(l, "ref-uid")) {
+                        uid_t uid;
+
+                        r = parse_uid(v, &uid);
+                        if (r < 0)
+                                log_unit_debug(u, "Failed to parse referenced UID %s, ignoring.", v);
+                        else
+                                unit_ref_uid_gid(u, uid, GID_INVALID);
+
+                        continue;
+
+                } else if (streq(l, "ref-gid")) {
+                        gid_t gid;
+
+                        r = parse_gid(v, &gid);
+                        if (r < 0)
+                                log_unit_debug(u, "Failed to parse referenced GID %s, ignoring.", v);
+                        else
+                                unit_ref_uid_gid(u, UID_INVALID, gid);
+
+                        continue;
                 }
 
                 if (unit_can_serialize(u)) {
@@ -3310,6 +3340,7 @@ int unit_patch_contexts(Unit *u) {
                         }
 
                         ec->private_tmp = true;
+                        ec->remove_ipc = true;
                 }
         }
 
@@ -3932,3 +3963,144 @@ pid_t unit_main_pid(Unit *u) {
 
         return 0;
 }
+
+static void unit_unref_uid_internal(
+                Unit *u,
+                uid_t *ref_uid,
+                bool destroy_now,
+                void (*_manager_unref_uid)(Manager *m, uid_t uid, bool destroy_now)) {
+
+        assert(u);
+        assert(ref_uid);
+        assert(_manager_unref_uid);
+
+        /* Generic implementation of both unit_unref_uid() and unit_unref_gid(), under the assumption that uid_t and
+         * gid_t are actually the same time, with the same validity rules.
+         *
+         * Drops a reference to UID/GID from a unit. */
+
+        assert_cc(sizeof(uid_t) == sizeof(gid_t));
+        assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+        if (!uid_is_valid(*ref_uid))
+                return;
+
+        _manager_unref_uid(u->manager, *ref_uid, destroy_now);
+        *ref_uid = UID_INVALID;
+}
+
+void unit_unref_uid(Unit *u, bool destroy_now) {
+        unit_unref_uid_internal(u, &u->ref_uid, destroy_now, manager_unref_uid);
+}
+
+void unit_unref_gid(Unit *u, bool destroy_now) {
+        unit_unref_uid_internal(u, (uid_t*) &u->ref_gid, destroy_now, manager_unref_gid);
+}
+
+static int unit_ref_uid_internal(
+                Unit *u,
+                uid_t *ref_uid,
+                uid_t uid,
+                bool clean_ipc,
+                int (*_manager_ref_uid)(Manager *m, uid_t uid, bool clean_ipc)) {
+
+        int r;
+
+        assert(u);
+        assert(ref_uid);
+        assert(uid_is_valid(uid));
+        assert(_manager_ref_uid);
+
+        /* Generic implementation of both unit_ref_uid() and unit_ref_guid(), under the assumption that uid_t and gid_t
+         * are actually the same type, and have the same validity rules.
+         *
+         * Adds a reference on a specific UID/GID to this unit. Each unit referencing the same UID/GID maintains a
+         * reference so that we can destroy the UID/GID's IPC resources as soon as this is requested and the counter
+         * drops to zero. */
+
+        assert_cc(sizeof(uid_t) == sizeof(gid_t));
+        assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+        if (*ref_uid == uid)
+                return 0;
+
+        if (uid_is_valid(*ref_uid)) /* Already set? */
+                return -EBUSY;
+
+        r = _manager_ref_uid(u->manager, uid, clean_ipc);
+        if (r < 0)
+                return r;
+
+        *ref_uid = uid;
+        return 1;
+}
+
+int unit_ref_uid(Unit *u, uid_t uid, bool clean_ipc) {
+        return unit_ref_uid_internal(u, &u->ref_uid, uid, clean_ipc, manager_ref_uid);
+}
+
+int unit_ref_gid(Unit *u, gid_t gid, bool clean_ipc) {
+        return unit_ref_uid_internal(u, (uid_t*) &u->ref_gid, (uid_t) gid, clean_ipc, manager_ref_gid);
+}
+
+static int unit_ref_uid_gid_internal(Unit *u, uid_t uid, gid_t gid, bool clean_ipc) {
+        int r = 0, q = 0;
+
+        assert(u);
+
+        /* Reference both a UID and a GID in one go. Either references both, or neither. */
+
+        if (uid_is_valid(uid)) {
+                r = unit_ref_uid(u, uid, clean_ipc);
+                if (r < 0)
+                        return r;
+        }
+
+        if (gid_is_valid(gid)) {
+                q = unit_ref_gid(u, gid, clean_ipc);
+                if (q < 0) {
+                        if (r > 0)
+                                unit_unref_uid(u, false);
+
+                        return q;
+                }
+        }
+
+        return r > 0 || q > 0;
+}
+
+int unit_ref_uid_gid(Unit *u, uid_t uid, gid_t gid) {
+        ExecContext *c;
+        int r;
+
+        assert(u);
+
+        c = unit_get_exec_context(u);
+
+        r = unit_ref_uid_gid_internal(u, uid, gid, c ? c->remove_ipc : false);
+        if (r < 0)
+                return log_unit_warning_errno(u, r, "Couldn't add UID/GID reference to unit, proceeding without: %m");
+
+        return r;
+}
+
+void unit_unref_uid_gid(Unit *u, bool destroy_now) {
+        assert(u);
+
+        unit_unref_uid(u, destroy_now);
+        unit_unref_gid(u, destroy_now);
+}
+
+void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid) {
+        int r;
+
+        assert(u);
+
+        /* This is invoked whenever one of the forked off processes let's us know the UID/GID its user name/group names
+         * resolved to. We keep track of which UID/GID is currently assigned in order to be able to destroy its IPC
+         * objects when no service references the UID/GID anymore. */
+
+        r = unit_ref_uid_gid(u, uid, gid);
+        if (r > 0)
+                bus_unit_send_change_signal(u);
+}
diff --git a/src/core/unit.h b/src/core/unit.h
index 513ea1614c..53875653d7 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -180,6 +180,10 @@ struct Unit {
         /* Make sure we never enter endless loops with the check unneeded logic, or the BindsTo= logic */
         RateLimit auto_stop_ratelimit;
 
+        /* Reference to a specific UID/GID */
+        uid_t ref_uid;
+        gid_t ref_gid;
+
         /* Cached unit file state and preset */
         UnitFileState unit_file_state;
         int unit_file_preset;
@@ -371,8 +375,7 @@ struct UnitVTable {
         /* Called whenever a process of this unit sends us a message */
         void (*notify_message)(Unit *u, pid_t pid, char **tags, FDSet *fds);
 
-        /* Called whenever a name this Unit registered for comes or
-         * goes away. */
+        /* Called whenever a name this Unit registered for comes or goes away. */
         void (*bus_name_owner_change)(Unit *u, const char *name, const char *old_owner, const char *new_owner);
 
         /* Called for each property that is being set */
@@ -621,6 +624,17 @@ int unit_fail_if_symlink(Unit *u, const char* where);
 
 int unit_start_limit_test(Unit *u);
 
+void unit_unref_uid(Unit *u, bool destroy_now);
+int unit_ref_uid(Unit *u, uid_t uid, bool clean_ipc);
+
+void unit_unref_gid(Unit *u, bool destroy_now);
+int unit_ref_gid(Unit *u, gid_t gid, bool clean_ipc);
+
+int unit_ref_uid_gid(Unit *u, uid_t uid, gid_t gid);
+void unit_unref_uid_gid(Unit *u, bool destroy_now);
+
+void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid);
+
 /* Macros which append UNIT= or USER_UNIT= to the message */
 
 #define log_unit_full(unit, level, error, ...)                          \
diff --git a/src/login/logind-user.c b/src/login/logind-user.c
index 63363035e7..11951aca5b 100644
--- a/src/login/logind-user.c
+++ b/src/login/logind-user.c
@@ -612,9 +612,14 @@ int user_finalize(User *u) {
         if (k < 0)
                 r = k;
 
-        /* Clean SysV + POSIX IPC objects */
-        if (u->manager->remove_ipc) {
-                k = clean_ipc(u->uid);
+        /* Clean SysV + POSIX IPC objects, but only if this is not a system user. Background: in many setups cronjobs
+         * are run in full PAM and thus logind sessions, even if the code run doesn't belong to actual users but to
+         * system components. Since enable RemoveIPC= globally for all users, we need to be a bit careful with such
+         * cases, as we shouldn't accidentally remove a system service's IPC objects while it is running, just because
+         * a cronjob running as the same user just finished. Hence: exclude system users generally from IPC clean-up,
+         * and do it only for normal users. */
+        if (u->manager->remove_ipc && u->uid > SYSTEM_UID_MAX) {
+                k = clean_ipc_by_uid(u->uid);
                 if (k < 0)
                         r = k;
         }
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index f9e12e0578..ab30afb527 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -204,7 +204,7 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
                               "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "RemainAfterExit",
                               "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", "NoNewPrivileges",
                               "SyslogLevelPrefix", "Delegate", "RemainAfterElapse", "MemoryDenyWriteExecute",
-                              "RestrictRealtime", "DynamicUser")) {
+                              "RestrictRealtime", "DynamicUser", "RemoveIPC")) {
 
                 r = parse_boolean(eq);
                 if (r < 0)
diff --git a/src/shared/clean-ipc.c b/src/shared/clean-ipc.c
index 95686348c1..64f9b94641 100644
--- a/src/shared/clean-ipc.c
+++ b/src/shared/clean-ipc.c
@@ -41,8 +41,20 @@
 #include "macro.h"
 #include "string-util.h"
 #include "strv.h"
+#include "user-util.h"
 
-static int clean_sysvipc_shm(uid_t delete_uid) {
+static bool match_uid_gid(uid_t subject_uid, gid_t subject_gid, uid_t delete_uid, gid_t delete_gid) {
+
+        if (uid_is_valid(delete_uid) && subject_uid == delete_uid)
+                return true;
+
+        if (gid_is_valid(delete_gid) && subject_gid == delete_gid)
+                return true;
+
+        return false;
+}
+
+static int clean_sysvipc_shm(uid_t delete_uid, gid_t delete_gid) {
         _cleanup_fclose_ FILE *f = NULL;
         char line[LINE_MAX];
         bool first = true;
@@ -77,7 +89,7 @@ static int clean_sysvipc_shm(uid_t delete_uid) {
                 if (n_attached > 0)
                         continue;
 
-                if (uid != delete_uid)
+                if (!match_uid_gid(uid, gid, delete_uid, delete_gid))
                         continue;
 
                 if (shmctl(shmid, IPC_RMID, NULL) < 0) {
@@ -98,7 +110,7 @@ fail:
         return log_warning_errno(errno, "Failed to read /proc/sysvipc/shm: %m");
 }
 
-static int clean_sysvipc_sem(uid_t delete_uid) {
+static int clean_sysvipc_sem(uid_t delete_uid, gid_t delete_gid) {
         _cleanup_fclose_ FILE *f = NULL;
         char line[LINE_MAX];
         bool first = true;
@@ -128,7 +140,7 @@ static int clean_sysvipc_sem(uid_t delete_uid) {
                            &semid, &uid, &gid, &cuid, &cgid) != 5)
                         continue;
 
-                if (uid != delete_uid)
+                if (!match_uid_gid(uid, gid, delete_uid, delete_gid))
                         continue;
 
                 if (semctl(semid, 0, IPC_RMID) < 0) {
@@ -149,7 +161,7 @@ fail:
         return log_warning_errno(errno, "Failed to read /proc/sysvipc/sem: %m");
 }
 
-static int clean_sysvipc_msg(uid_t delete_uid) {
+static int clean_sysvipc_msg(uid_t delete_uid, gid_t delete_gid) {
         _cleanup_fclose_ FILE *f = NULL;
         char line[LINE_MAX];
         bool first = true;
@@ -180,7 +192,7 @@ static int clean_sysvipc_msg(uid_t delete_uid) {
                            &msgid, &cpid, &lpid, &uid, &gid, &cuid, &cgid) != 7)
                         continue;
 
-                if (uid != delete_uid)
+                if (!match_uid_gid(uid, gid, delete_uid, delete_gid))
                         continue;
 
                 if (msgctl(msgid, IPC_RMID, NULL) < 0) {
@@ -201,7 +213,7 @@ fail:
         return log_warning_errno(errno, "Failed to read /proc/sysvipc/msg: %m");
 }
 
-static int clean_posix_shm_internal(DIR *dir, uid_t uid) {
+static int clean_posix_shm_internal(DIR *dir, uid_t uid, gid_t gid) {
         struct dirent *de;
         int ret = 0, r;
 
@@ -221,7 +233,7 @@ static int clean_posix_shm_internal(DIR *dir, uid_t uid) {
                         continue;
                 }
 
-                if (st.st_uid != uid)
+                if (!match_uid_gid(st.st_uid, st.st_gid, uid, gid))
                         continue;
 
                 if (S_ISDIR(st.st_mode)) {
@@ -232,7 +244,7 @@ static int clean_posix_shm_internal(DIR *dir, uid_t uid) {
                                 if (errno != ENOENT)
                                         ret = log_warning_errno(errno, "Failed to enter shared memory directory %s: %m", de->d_name);
                         } else {
-                                r = clean_posix_shm_internal(kid, uid);
+                                r = clean_posix_shm_internal(kid, uid, gid);
                                 if (r < 0)
                                         ret = r;
                         }
@@ -262,7 +274,7 @@ fail:
         return log_warning_errno(errno, "Failed to read /dev/shm: %m");
 }
 
-static int clean_posix_shm(uid_t uid) {
+static int clean_posix_shm(uid_t uid, gid_t gid) {
         _cleanup_closedir_ DIR *dir = NULL;
 
         dir = opendir("/dev/shm");
@@ -273,10 +285,10 @@ static int clean_posix_shm(uid_t uid) {
                 return log_warning_errno(errno, "Failed to open /dev/shm: %m");
         }
 
-        return clean_posix_shm_internal(dir, uid);
+        return clean_posix_shm_internal(dir, uid, gid);
 }
 
-static int clean_posix_mq(uid_t uid) {
+static int clean_posix_mq(uid_t uid, gid_t gid) {
         _cleanup_closedir_ DIR *dir = NULL;
         struct dirent *de;
         int ret = 0;
@@ -306,7 +318,7 @@ static int clean_posix_mq(uid_t uid) {
                         continue;
                 }
 
-                if (st.st_uid != uid)
+                if (!match_uid_gid(st.st_uid, st.st_gid, uid, gid))
                         continue;
 
                 fn[0] = '/';
@@ -328,32 +340,44 @@ fail:
         return log_warning_errno(errno, "Failed to read /dev/mqueue: %m");
 }
 
-int clean_ipc(uid_t uid) {
+int clean_ipc(uid_t uid, gid_t gid) {
         int ret = 0, r;
 
-        /* Refuse to clean IPC of the root and system users */
-        if (uid <= SYSTEM_UID_MAX)
+        /* Anything to do? */
+        if (!uid_is_valid(uid) && !gid_is_valid(gid))
                 return 0;
 
-        r = clean_sysvipc_shm(uid);
+        /* Refuse to clean IPC of the root user */
+        if (uid == 0 && gid == 0)
+                return 0;
+
+        r = clean_sysvipc_shm(uid, gid);
         if (r < 0)
                 ret = r;
 
-        r = clean_sysvipc_sem(uid);
+        r = clean_sysvipc_sem(uid, gid);
         if (r < 0)
                 ret = r;
 
-        r = clean_sysvipc_msg(uid);
+        r = clean_sysvipc_msg(uid, gid);
         if (r < 0)
                 ret = r;
 
-        r = clean_posix_shm(uid);
+        r = clean_posix_shm(uid, gid);
         if (r < 0)
                 ret = r;
 
-        r = clean_posix_mq(uid);
+        r = clean_posix_mq(uid, gid);
         if (r < 0)
                 ret = r;
 
         return ret;
 }
+
+int clean_ipc_by_uid(uid_t uid) {
+        return clean_ipc(uid, GID_INVALID);
+}
+
+int clean_ipc_by_gid(gid_t gid) {
+        return clean_ipc(UID_INVALID, gid);
+}
diff --git a/src/shared/clean-ipc.h b/src/shared/clean-ipc.h
index 44a83afcf7..6ca57f44fd 100644
--- a/src/shared/clean-ipc.h
+++ b/src/shared/clean-ipc.h
@@ -21,4 +21,6 @@
 
 #include <sys/types.h>
 
-int clean_ipc(uid_t uid);
+int clean_ipc(uid_t uid, gid_t gid);
+int clean_ipc_by_uid(uid_t uid);
+int clean_ipc_by_gid(gid_t gid);
diff --git a/src/test/test-ipcrm.c b/src/test/test-ipcrm.c
index c5bcaf47bb..551eba7215 100644
--- a/src/test/test-ipcrm.c
+++ b/src/test/test-ipcrm.c
@@ -32,5 +32,5 @@ int main(int argc, char *argv[]) {
                 return EXIT_FAILURE;
         }
 
-        return clean_ipc(uid) < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+        return clean_ipc_by_uid(uid) < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
 }
-- 
cgit v1.2.3-54-g00ecf


From fd63e712b2025d235ce4bfbb512fada10e2690b5 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Tue, 2 Aug 2016 12:28:51 +0200
Subject: core: bypass dynamic user lookups from dbus-daemon

dbus-daemon does NSS name look-ups in order to enforce its bus policy. This
might dead-lock if an NSS module use wants to use D-Bus for the look-up itself,
like our nss-systemd does. Let's work around this by bypassing bus
communication in the NSS module if we run inside of dbus-daemon. To make this
work we keep a bit of extra state in /run/systemd/dynamic-uid/ so that we don't
have to consult the bus, but can still resolve the names.

Note that the normal codepath continues to be via the bus, so that resolving
works from all mount namespaces and is subject to authentication, as before.

This is a bit dirty, but not too dirty, as dbus daemon is kinda special anyway
for PID 1.
---
 src/core/dynamic-user.c       |  51 +++++++-
 src/core/execute.c            |  15 ++-
 src/nss-systemd/nss-systemd.c | 262 ++++++++++++++++++++++++++++--------------
 3 files changed, 235 insertions(+), 93 deletions(-)

(limited to 'src/core')

diff --git a/src/core/dynamic-user.c b/src/core/dynamic-user.c
index 8035bee231..185d0e5f00 100644
--- a/src/core/dynamic-user.c
+++ b/src/core/dynamic-user.c
@@ -150,6 +150,42 @@ int dynamic_user_acquire(Manager *m, const char *name, DynamicUser** ret) {
         return 1;
 }
 
+static int make_uid_symlinks(uid_t uid, const char *name, bool b) {
+
+        char path1[strlen("/run/systemd/dynamic-uid/direct:") + DECIMAL_STR_MAX(uid_t) + 1];
+        const char *path2;
+        int r = 0;
+
+        /* Add direct additional symlinks for direct lookups of dynamic UIDs and their names by userspace code. The
+         * only reason we have this is because dbus-daemon cannot use D-Bus for resolving users and groups (since it
+         * would be its own client then). We hence keep these world-readable symlinks in place, so that the
+         * unprivileged dbus user can read the mappings when it needs them via these symlinks instead of having to go
+         * via the bus. Ideally, we'd use the lock files we keep for this anyway, but we can't since we use BSD locks
+         * on them and as those may be taken by any user with read access we can't make them world-readable. */
+
+        xsprintf(path1, "/run/systemd/dynamic-uid/direct:" UID_FMT, uid);
+        if (unlink(path1) < 0) {
+                if (errno != ENOENT)
+                        r = -errno;
+        }
+        if (b) {
+                if (symlink(name, path1) < 0)
+                        r = -errno;
+        }
+
+        path2 = strjoina("/run/systemd/dynamic-uid/direct:", name);
+        if (unlink(path2) < 0) {
+                if (errno != ENOENT)
+                        r = -errno;
+        }
+        if (b) {
+                if (symlink(path1 + strlen("/run/systemd/dynamic-uid/direct:"), path2) < 0)
+                        r = -errno;
+        }
+
+        return r;
+}
+
 static int pick_uid(const char *name, uid_t *ret_uid) {
 
         static const uint8_t hash_key[] = {
@@ -223,6 +259,7 @@ static int pick_uid(const char *name, uid_t *ret_uid) {
                 }
 
                 (void) ftruncate(lock_fd, l);
+                (void) make_uid_symlinks(candidate, name, true); /* also add direct lookup symlinks */
 
                 *ret_uid = candidate;
                 r = lock_fd;
@@ -324,14 +361,16 @@ static int dynamic_user_push(DynamicUser *d, uid_t uid, int lock_fd) {
         return 0;
 }
 
-static void unlink_uid_lock(int lock_fd, uid_t uid) {
+static void unlink_uid_lock(int lock_fd, uid_t uid, const char *name) {
         char lock_path[strlen("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1];
 
         if (lock_fd < 0)
                 return;
 
         xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, uid);
-        (void) unlink_noerrno(lock_path);
+        (void) unlink(lock_path);
+
+        (void) make_uid_symlinks(uid, name, false); /* remove direct lookup symlinks */
 }
 
 int dynamic_user_realize(DynamicUser *d, uid_t *ret) {
@@ -399,7 +438,7 @@ int dynamic_user_realize(DynamicUser *d, uid_t *ret) {
 
                 /* So, we found a working UID/lock combination. Let's see if we actually still need it. */
                 if (lockf(d->storage_socket[0], F_LOCK, 0) < 0) {
-                        unlink_uid_lock(uid_lock_fd, uid);
+                        unlink_uid_lock(uid_lock_fd, uid, d->name);
                         return -errno;
                 }
 
@@ -407,7 +446,7 @@ int dynamic_user_realize(DynamicUser *d, uid_t *ret) {
                 if (r < 0) {
                         if (r != -EAGAIN) {
                                 /* OK, something bad happened, let's get rid of the bits we acquired. */
-                                unlink_uid_lock(uid_lock_fd, uid);
+                                unlink_uid_lock(uid_lock_fd, uid, d->name);
                                 goto finish;
                         }
 
@@ -416,7 +455,7 @@ int dynamic_user_realize(DynamicUser *d, uid_t *ret) {
                         /* Hmm, so as it appears there's now something stored in the storage socket. Throw away what we
                          * acquired, and use what's stored now. */
 
-                        unlink_uid_lock(uid_lock_fd, uid);
+                        unlink_uid_lock(uid_lock_fd, uid, d->name);
                         safe_close(uid_lock_fd);
 
                         uid = new_uid;
@@ -513,7 +552,7 @@ static int dynamic_user_close(DynamicUser *d) {
                 goto finish;
 
         /* This dynamic user was realized and dynamically allocated. In this case, let's remove the lock file. */
-        unlink_uid_lock(lock_fd, uid);
+        unlink_uid_lock(lock_fd, uid, d->name);
         r = 1;
 
 finish:
diff --git a/src/core/execute.c b/src/core/execute.c
index 4c786a2e33..0af8eb5a02 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -91,6 +91,7 @@
 #include "selinux-util.h"
 #include "signal-util.h"
 #include "smack-util.h"
+#include "special.h"
 #include "string-table.h"
 #include "string-util.h"
 #include "strv.h"
@@ -1384,6 +1385,7 @@ static void do_idle_pipe_dance(int idle_pipe[4]) {
 }
 
 static int build_environment(
+                Unit *u,
                 const ExecContext *c,
                 const ExecParameters *p,
                 unsigned n_fds,
@@ -1401,7 +1403,7 @@ static int build_environment(
         assert(c);
         assert(ret);
 
-        our_env = new0(char*, 12);
+        our_env = new0(char*, 13);
         if (!our_env)
                 return -ENOMEM;
 
@@ -1436,6 +1438,16 @@ static int build_environment(
                 our_env[n_env++] = x;
         }
 
+        /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
+         * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
+         * check the database directly. */
+        if (unit_has_name(u, SPECIAL_DBUS_SERVICE)) {
+                x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
+                if (!x)
+                        return -ENOMEM;
+                our_env[n_env++] = x;
+        }
+
         if (home) {
                 x = strappend("HOME=", home);
                 if (!x)
@@ -2100,6 +2112,7 @@ static int exec_child(
         }
 
         r = build_environment(
+                        unit,
                         context,
                         params,
                         n_fds,
diff --git a/src/nss-systemd/nss-systemd.c b/src/nss-systemd/nss-systemd.c
index 7078c0c50c..17d04e958d 100644
--- a/src/nss-systemd/nss-systemd.c
+++ b/src/nss-systemd/nss-systemd.c
@@ -21,11 +21,14 @@
 
 #include "sd-bus.h"
 
+#include "alloc-util.h"
 #include "bus-common-errors.h"
 #include "env-util.h"
+#include "fs-util.h"
 #include "macro.h"
 #include "nss-util.h"
 #include "signal-util.h"
+#include "stdio-util.h"
 #include "string-util.h"
 #include "user-util.h"
 #include "util.h"
@@ -75,15 +78,50 @@ static const struct group nobody_group = {
 NSS_GETPW_PROTOTYPES(systemd);
 NSS_GETGR_PROTOTYPES(systemd);
 
+static int direct_lookup_name(const char *name, uid_t *ret) {
+        _cleanup_free_ char *s = NULL;
+        const char *path;
+        int r;
+
+        assert(name);
+
+        /* Normally, we go via the bus to resolve names. That has the benefit that it is available from any mount
+         * namespace and subject to proper authentication. However, there's one problem: if our module is called from
+         * dbus-daemon itself we really can't use D-Bus to communicate. In this case, resort to a client-side hack,
+         * and look for the dynamic names directly. This is pretty ugly, but breaks the cyclic dependency. */
+
+        path = strjoina("/run/systemd/dynamic-uid/direct:", name);
+        r = readlink_malloc(path, &s);
+        if (r < 0)
+                return r;
+
+        return parse_uid(s, ret);
+}
+
+static int direct_lookup_uid(uid_t uid, char **ret) {
+        char path[strlen("/run/systemd/dynamic-uid/direct:") + DECIMAL_STR_MAX(uid_t) + 1], *s;
+        int r;
+
+        xsprintf(path, "/run/systemd/dynamic-uid/direct:" UID_FMT, uid);
+
+        r = readlink_malloc(path, &s);
+        if (r < 0)
+                return r;
+        if (!valid_user_group_name(s)) { /* extra safety check */
+                free(s);
+                return -EINVAL;
+        }
+
+        *ret = s;
+        return 0;
+}
+
 enum nss_status _nss_systemd_getpwnam_r(
                 const char *name,
                 struct passwd *pwd,
                 char *buffer, size_t buflen,
                 int *errnop) {
 
-        _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
-        _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL;
-        _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
         uint32_t translated;
         size_t l;
         int r;
@@ -114,30 +152,45 @@ enum nss_status _nss_systemd_getpwnam_r(
         if (getenv_bool("SYSTEMD_NSS_DYNAMIC_BYPASS") > 0)
                 goto not_found;
 
-        r = sd_bus_open_system(&bus);
-        if (r < 0)
-                goto fail;
+        if (getenv_bool("SYSTEMD_NSS_BYPASS_BUS") > 0) {
 
-        r = sd_bus_call_method(bus,
-                               "org.freedesktop.systemd1",
-                               "/org/freedesktop/systemd1",
-                               "org.freedesktop.systemd1.Manager",
-                               "LookupDynamicUserByName",
-                               &error,
-                               &reply,
-                               "s",
-                               name);
-        if (r < 0) {
-                if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_DYNAMIC_USER))
+                /* Access the dynamic UID allocation directly if we are called from dbus-daemon, see above. */
+                r = direct_lookup_name(name, (uid_t*) &translated);
+                if (r == -ENOENT)
                         goto not_found;
-
-                goto fail;
+                if (r < 0)
+                        goto fail;
+
+        } else {
+                _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+                _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL;
+                _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+
+                r = sd_bus_open_system(&bus);
+                if (r < 0)
+                        goto fail;
+
+                r = sd_bus_call_method(bus,
+                                       "org.freedesktop.systemd1",
+                                       "/org/freedesktop/systemd1",
+                                       "org.freedesktop.systemd1.Manager",
+                                       "LookupDynamicUserByName",
+                                       &error,
+                                       &reply,
+                                       "s",
+                                       name);
+                if (r < 0) {
+                        if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_DYNAMIC_USER))
+                                goto not_found;
+
+                        goto fail;
+                }
+
+                r = sd_bus_message_read(reply, "u", &translated);
+                if (r < 0)
+                        goto fail;
         }
 
-        r = sd_bus_message_read(reply, "u", &translated);
-        if (r < 0)
-                goto fail;
-
         l = strlen(name);
         if (buflen < l+1) {
                 *errnop = ENOMEM;
@@ -175,6 +228,7 @@ enum nss_status _nss_systemd_getpwuid_r(
         _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
         _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL;
         _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+        _cleanup_free_ char *direct = NULL;
         const char *translated;
         size_t l;
         int r;
@@ -204,30 +258,42 @@ enum nss_status _nss_systemd_getpwuid_r(
         if (getenv_bool("SYSTEMD_NSS_DYNAMIC_BYPASS") > 0)
                 goto not_found;
 
-        r = sd_bus_open_system(&bus);
-        if (r < 0)
-                goto fail;
+        if (getenv_bool("SYSTEMD_NSS_BYPASS_BUS") > 0) {
 
-        r = sd_bus_call_method(bus,
-                               "org.freedesktop.systemd1",
-                               "/org/freedesktop/systemd1",
-                               "org.freedesktop.systemd1.Manager",
-                               "LookupDynamicUserByUID",
-                               &error,
-                               &reply,
-                               "u",
-                               (uint32_t) uid);
-        if (r < 0) {
-                if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_DYNAMIC_USER))
+                r = direct_lookup_uid(uid, &direct);
+                if (r == -ENOENT)
                         goto not_found;
-
-                goto fail;
+                if (r < 0)
+                        goto fail;
+
+                translated = direct;
+
+        } else {
+                r = sd_bus_open_system(&bus);
+                if (r < 0)
+                        goto fail;
+
+                r = sd_bus_call_method(bus,
+                                       "org.freedesktop.systemd1",
+                                       "/org/freedesktop/systemd1",
+                                       "org.freedesktop.systemd1.Manager",
+                                       "LookupDynamicUserByUID",
+                                       &error,
+                                       &reply,
+                                       "u",
+                                       (uint32_t) uid);
+                if (r < 0) {
+                        if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_DYNAMIC_USER))
+                                goto not_found;
+
+                        goto fail;
+                }
+
+                r = sd_bus_message_read(reply, "s", &translated);
+                if (r < 0)
+                        goto fail;
         }
 
-        r = sd_bus_message_read(reply, "s", &translated);
-        if (r < 0)
-                goto fail;
-
         l = strlen(translated) + 1;
         if (buflen < l) {
                 *errnop = ENOMEM;
@@ -262,9 +328,6 @@ enum nss_status _nss_systemd_getgrnam_r(
                 char *buffer, size_t buflen,
                 int *errnop) {
 
-        _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
-        _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL;
-        _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
         uint32_t translated;
         size_t l;
         int r;
@@ -294,30 +357,45 @@ enum nss_status _nss_systemd_getgrnam_r(
         if (getenv_bool("SYSTEMD_NSS_DYNAMIC_BYPASS") > 0)
                 goto not_found;
 
-        r = sd_bus_open_system(&bus);
-        if (r < 0)
-                goto fail;
+        if (getenv_bool("SYSTEMD_NSS_BYPASS_BUS") > 0) {
 
-        r = sd_bus_call_method(bus,
-                               "org.freedesktop.systemd1",
-                               "/org/freedesktop/systemd1",
-                               "org.freedesktop.systemd1.Manager",
-                               "LookupDynamicUserByName",
-                               &error,
-                               &reply,
-                               "s",
-                               name);
-        if (r < 0) {
-                if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_DYNAMIC_USER))
+                /* Access the dynamic GID allocation directly if we are called from dbus-daemon, see above. */
+                r = direct_lookup_name(name, (uid_t*) &translated);
+                if (r == -ENOENT)
                         goto not_found;
-
-                goto fail;
+                if (r < 0)
+                        goto fail;
+        } else {
+
+                _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+                _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL;
+                _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+
+                r = sd_bus_open_system(&bus);
+                if (r < 0)
+                        goto fail;
+
+                r = sd_bus_call_method(bus,
+                                       "org.freedesktop.systemd1",
+                                       "/org/freedesktop/systemd1",
+                                       "org.freedesktop.systemd1.Manager",
+                                       "LookupDynamicUserByName",
+                                       &error,
+                                       &reply,
+                                       "s",
+                                       name);
+                if (r < 0) {
+                        if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_DYNAMIC_USER))
+                                goto not_found;
+
+                        goto fail;
+                }
+
+                r = sd_bus_message_read(reply, "u", &translated);
+                if (r < 0)
+                        goto fail;
         }
 
-        r = sd_bus_message_read(reply, "u", &translated);
-        if (r < 0)
-                goto fail;
-
         l = sizeof(char*) + strlen(name) + 1;
         if (buflen < l) {
                 *errnop = ENOMEM;
@@ -353,6 +431,7 @@ enum nss_status _nss_systemd_getgrgid_r(
         _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
         _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL;
         _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+        _cleanup_free_ char *direct = NULL;
         const char *translated;
         size_t l;
         int r;
@@ -382,30 +461,41 @@ enum nss_status _nss_systemd_getgrgid_r(
         if (getenv_bool("SYSTEMD_NSS_DYNAMIC_BYPASS") > 0)
                 goto not_found;
 
-        r = sd_bus_open_system(&bus);
-        if (r < 0)
-                goto fail;
+        if (getenv_bool("SYSTEMD_NSS_BYPASS_BUS") > 0) {
 
-        r = sd_bus_call_method(bus,
-                               "org.freedesktop.systemd1",
-                               "/org/freedesktop/systemd1",
-                               "org.freedesktop.systemd1.Manager",
-                               "LookupDynamicUserByUID",
-                               &error,
-                               &reply,
-                               "u",
-                               (uint32_t) gid);
-        if (r < 0) {
-                if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_DYNAMIC_USER))
+                r = direct_lookup_uid(gid, &direct);
+                if (r == -ENOENT)
                         goto not_found;
-
-                goto fail;
+                if (r < 0)
+                        goto fail;
+
+                translated = direct;
+        } else {
+                r = sd_bus_open_system(&bus);
+                if (r < 0)
+                        goto fail;
+
+                r = sd_bus_call_method(bus,
+                                       "org.freedesktop.systemd1",
+                                       "/org/freedesktop/systemd1",
+                                       "org.freedesktop.systemd1.Manager",
+                                       "LookupDynamicUserByUID",
+                                       &error,
+                                       &reply,
+                                       "u",
+                                       (uint32_t) gid);
+                if (r < 0) {
+                        if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_DYNAMIC_USER))
+                                goto not_found;
+
+                        goto fail;
+                }
+
+                r = sd_bus_message_read(reply, "s", &translated);
+                if (r < 0)
+                        goto fail;
         }
 
-        r = sd_bus_message_read(reply, "s", &translated);
-        if (r < 0)
-                goto fail;
-
         l = sizeof(char*) + strlen(translated) + 1;
         if (buflen < l) {
                 *errnop = ENOMEM;
-- 
cgit v1.2.3-54-g00ecf


From 206fc4b284afe68b1a0b7d776212ee81c12abf64 Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Wed, 17 Aug 2016 12:06:07 -0400
Subject: systemd: warn when setrlimit fails

This should make it easier to figure things out.
---
 src/core/main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/core')

diff --git a/src/core/main.c b/src/core/main.c
index 02324d325e..3f4fa74fed 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -1505,7 +1505,8 @@ int main(int argc, char *argv[]) {
         if (getpid() == 1) {
                 /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
                  * will process core dumps for system services by default. */
-                (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY));
+                if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
+                        log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
 
                 /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored
                  * until the systemd-coredump tool is enabled via sysctl. */
-- 
cgit v1.2.3-54-g00ecf


From bd64d82c1c0e3fe2a5f9b3dd9132d62834f50b2d Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Thu, 18 Aug 2016 21:39:39 -0400
Subject: Revert "pid1: reconnect to the console before being re-executed"

This reverts commit affd7ed1a923b0df8479cff1bd9eafb625fdaa66.

> So it looks like make_console_stdio() has bad side effect. More specifically it
> does a TIOCSCTTY ioctl (via acquire_terminal()) which sees to disturb the
> process which was using/owning the console.

Fixes #3842.
https://bugs.debian.org/834367
https://bugzilla.redhat.com/show_bug.cgi?id=1367766
---
 src/core/main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/core')

diff --git a/src/core/main.c b/src/core/main.c
index 3f4fa74fed..fd72c900e7 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -2014,9 +2014,6 @@ finish:
                                 log_error_errno(r, "Failed to switch root, trying to continue: %m");
                 }
 
-                /* Reopen the console */
-                (void) make_console_stdio();
-
                 args_size = MAX(6, argc+1);
                 args = newa(const char*, args_size);
 
@@ -2064,6 +2061,9 @@ finish:
                 arg_serialization = safe_fclose(arg_serialization);
                 fds = fdset_free(fds);
 
+                /* Reopen the console */
+                (void) make_console_stdio();
+
                 for (j = 1, i = 1; j < (unsigned) argc; j++)
                         args[i++] = argv[j];
                 args[i++] = NULL;
-- 
cgit v1.2.3-54-g00ecf


From f50582649f8eee73f59aff95fadd9a963ed4ffea Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@fb.com>
Date: Thu, 18 Aug 2016 22:57:53 -0400
Subject: logind: update empty and "infinity" handling for [User]TasksMax
 (#3835)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The parsing functions for [User]TasksMax were inconsistent.  Empty string and
"infinity" were interpreted as no limit for TasksMax but not accepted for
UserTasksMax.  Update them so that they're consistent with other knobs.

* Empty string indicates the default value.
* "infinity" indicates no limit.

While at it, replace opencoded (uint64_t) -1 with CGROUP_LIMIT_MAX in TasksMax
handling.

v2: Update empty string to indicate the default value as suggested by Zbigniew
    Jędrzejewski-Szmek.

v3: Fixed empty UserTasksMax handling.
---
 man/logind.conf.xml      |  5 +++--
 src/basic/cgroup-util.h  |  4 ++++
 src/core/cgroup.c        |  2 +-
 src/core/load-fragment.c | 20 +++++++++++++-------
 src/core/main.c          |  2 +-
 src/login/logind-user.c  | 13 ++++++++++++-
 src/login/logind.c       |  3 ++-
 7 files changed, 36 insertions(+), 13 deletions(-)

(limited to 'src/core')

diff --git a/man/logind.conf.xml b/man/logind.conf.xml
index adba5a4131..cbee83357b 100644
--- a/man/logind.conf.xml
+++ b/man/logind.conf.xml
@@ -318,8 +318,9 @@
         <listitem><para>Sets the maximum number of OS tasks each user may run concurrently. This controls the
         <varname>TasksMax=</varname> setting of the per-user slice unit, see
         <citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
-        for details. Defaults to 33%, which equals 10813 with the kernel's defaults on the host, but might be smaller
-        in OS containers.</para></listitem>
+        for details. If assigned the special value <literal>infinity</literal>, no tasks limit is applied.
+        Defaults to 33%, which equals 10813 with the kernel's defaults on the host, but might be smaller in
+        OS containers.</para></listitem>
       </varlistentry>
 
       <varlistentry>
diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h
index a509a1775c..f1617a16be 100644
--- a/src/basic/cgroup-util.h
+++ b/src/basic/cgroup-util.h
@@ -112,6 +112,10 @@ static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x) {
             (x >= CGROUP_BLKIO_WEIGHT_MIN && x <= CGROUP_BLKIO_WEIGHT_MAX);
 }
 
+/* Default resource limits */
+#define DEFAULT_TASKS_MAX_PERCENTAGE            15U /* 15% of PIDs, 4915 on default settings */
+#define DEFAULT_USER_TASKS_MAX_PERCENTAGE       33U /* 33% of PIDs, 10813 on default settings */
+
 /*
  * General rules:
  *
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 910a64b4da..ca3c3366f3 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -952,7 +952,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
 
         if ((mask & CGROUP_MASK_PIDS) && !is_root) {
 
-                if (c->tasks_max != (uint64_t) -1) {
+                if (c->tasks_max != CGROUP_LIMIT_MAX) {
                         char buf[DECIMAL_STR_MAX(uint64_t) + 2];
 
                         sprintf(buf, "%" PRIu64 "\n", c->tasks_max);
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 4ad6c4d79b..d5185cf6a0 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -2999,30 +2999,36 @@ int config_parse_tasks_max(
                 void *data,
                 void *userdata) {
 
-        uint64_t *tasks_max = data, u;
+        uint64_t *tasks_max = data, v;
+        Unit *u = userdata;
         int r;
 
-        if (isempty(rvalue) || streq(rvalue, "infinity")) {
-                *tasks_max = (uint64_t) -1;
+        if (isempty(rvalue)) {
+                *tasks_max = u->manager->default_tasks_max;
+                return 0;
+        }
+
+        if (streq(rvalue, "infinity")) {
+                *tasks_max = CGROUP_LIMIT_MAX;
                 return 0;
         }
 
         r = parse_percent(rvalue);
         if (r < 0) {
-                r = safe_atou64(rvalue, &u);
+                r = safe_atou64(rvalue, &v);
                 if (r < 0) {
                         log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue);
                         return 0;
                 }
         } else
-                u = system_tasks_max_scale(r, 100U);
+                v = system_tasks_max_scale(r, 100U);
 
-        if (u <= 0 || u >= UINT64_MAX) {
+        if (v <= 0 || v >= UINT64_MAX) {
                 log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range. Ignoring.", rvalue);
                 return 0;
         }
 
-        *tasks_max = u;
+        *tasks_max = v;
         return 0;
 }
 
diff --git a/src/core/main.c b/src/core/main.c
index 02324d325e..c7c6df3447 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -1559,7 +1559,7 @@ int main(int argc, char *argv[]) {
         (void) reset_all_signal_handlers();
         (void) ignore_signals(SIGNALS_IGNORE, -1);
 
-        arg_default_tasks_max = system_tasks_max_scale(15U, 100U); /* 15% the system PIDs equals 4915 by default. */
+        arg_default_tasks_max = system_tasks_max_scale(DEFAULT_TASKS_MAX_PERCENTAGE, 100U);
 
         if (parse_config_file() < 0) {
                 error_message = "Failed to parse config file";
diff --git a/src/login/logind-user.c b/src/login/logind-user.c
index 11951aca5b..e0e73b034d 100644
--- a/src/login/logind-user.c
+++ b/src/login/logind-user.c
@@ -26,6 +26,7 @@
 #include "bus-common-errors.h"
 #include "bus-error.h"
 #include "bus-util.h"
+#include "cgroup-util.h"
 #include "clean-ipc.h"
 #include "conf-parser.h"
 #include "escape.h"
@@ -896,7 +897,17 @@ int config_parse_user_tasks_max(
         assert(rvalue);
         assert(data);
 
-        /* First, try to parse as percentage */
+        if (isempty(rvalue)) {
+                *m = system_tasks_max_scale(DEFAULT_USER_TASKS_MAX_PERCENTAGE, 100U);
+                return 0;
+        }
+
+        if (streq(rvalue, "infinity")) {
+                *m = CGROUP_LIMIT_MAX;
+                return 0;
+        }
+
+        /* Try to parse as percentage */
         r = parse_percent(rvalue);
         if (r >= 0)
                 k = system_tasks_max_scale(r, 100U);
diff --git a/src/login/logind.c b/src/login/logind.c
index 5ce36d28c7..bbbf4aef57 100644
--- a/src/login/logind.c
+++ b/src/login/logind.c
@@ -38,6 +38,7 @@
 #include "signal-util.h"
 #include "strv.h"
 #include "udev-util.h"
+#include "cgroup-util.h"
 
 static void manager_free(Manager *m);
 
@@ -62,7 +63,7 @@ static void manager_reset_config(Manager *m) {
         m->idle_action = HANDLE_IGNORE;
 
         m->runtime_dir_size = physical_memory_scale(10U, 100U); /* 10% */
-        m->user_tasks_max = system_tasks_max_scale(33U, 100U); /* 33% */
+        m->user_tasks_max = system_tasks_max_scale(DEFAULT_USER_TASKS_MAX_PERCENTAGE, 100U); /* 33% */
         m->sessions_max = 8192;
         m->inhibitors_max = 8192;
 
-- 
cgit v1.2.3-54-g00ecf


From 986a34a6834795f1a2d25bcffbe67debf9bbd6b2 Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Thu, 18 Aug 2016 23:09:29 -0400
Subject: core/dynamic-users: warn when creation of symlinks for dynamic users
 fails

Also return the first error, since it's most likely to be interesting.
If unlink fails, symlink will usually return EEXIST.
---
 src/core/dynamic-user.c | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

(limited to 'src/core')

diff --git a/src/core/dynamic-user.c b/src/core/dynamic-user.c
index 185d0e5f00..53c1699b92 100644
--- a/src/core/dynamic-user.c
+++ b/src/core/dynamic-user.c
@@ -154,7 +154,7 @@ static int make_uid_symlinks(uid_t uid, const char *name, bool b) {
 
         char path1[strlen("/run/systemd/dynamic-uid/direct:") + DECIMAL_STR_MAX(uid_t) + 1];
         const char *path2;
-        int r = 0;
+        int r = 0, k;
 
         /* Add direct additional symlinks for direct lookups of dynamic UIDs and their names by userspace code. The
          * only reason we have this is because dbus-daemon cannot use D-Bus for resolving users and groups (since it
@@ -164,23 +164,26 @@ static int make_uid_symlinks(uid_t uid, const char *name, bool b) {
          * on them and as those may be taken by any user with read access we can't make them world-readable. */
 
         xsprintf(path1, "/run/systemd/dynamic-uid/direct:" UID_FMT, uid);
-        if (unlink(path1) < 0) {
-                if (errno != ENOENT)
-                        r = -errno;
-        }
-        if (b) {
-                if (symlink(name, path1) < 0)
-                        r = -errno;
+        if (unlink(path1) < 0 && errno != ENOENT)
+                r = -errno;
+
+        if (b && symlink(name, path1) < 0) {
+                k = log_warning_errno(errno, "Failed to symlink \"%s\": %m", path1);
+                if (r == 0)
+                        r = k;
         }
 
         path2 = strjoina("/run/systemd/dynamic-uid/direct:", name);
-        if (unlink(path2) < 0) {
-                if (errno != ENOENT)
-                        r = -errno;
+        if (unlink(path2) < 0 && errno != ENOENT) {
+                k = -errno;
+                if (r == 0)
+                        r = k;
         }
-        if (b) {
-                if (symlink(path1 + strlen("/run/systemd/dynamic-uid/direct:"), path2) < 0)
-                        r = -errno;
+
+        if (b && symlink(path1 + strlen("/run/systemd/dynamic-uid/direct:"), path2) < 0) {
+                k = log_warning_errno(errno,  "Failed to symlink \"%s\": %m", path2);
+                if (r == 0)
+                        r = k;
         }
 
         return r;
-- 
cgit v1.2.3-54-g00ecf


From 61755fdae08fffd72229feb52af74a85a9e0c8f7 Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Thu, 18 Aug 2016 23:19:10 -0400
Subject: journald: do not create split journals for dynamic users

Dynamic users should be treated like system users, and their logs
should end up in the main system journal.
---
 src/basic/user-util.h         | 16 ++++++++++++++--
 src/core/dynamic-user.c       | 17 ++++-------------
 src/journal/journald-server.c |  2 +-
 3 files changed, 19 insertions(+), 16 deletions(-)

(limited to 'src/core')

diff --git a/src/basic/user-util.h b/src/basic/user-util.h
index 36f71fb004..f569363811 100644
--- a/src/basic/user-util.h
+++ b/src/basic/user-util.h
@@ -20,6 +20,7 @@
 ***/
 
 #include <stdbool.h>
+#include <stdint.h>
 #include <sys/types.h>
 #include <unistd.h>
 
@@ -57,8 +58,19 @@ int take_etc_passwd_lock(const char *root);
 #define UID_INVALID ((uid_t) -1)
 #define GID_INVALID ((gid_t) -1)
 
-/* The following macros add 1 when converting things, since UID 0 is a
- * valid UID, while the pointer NULL is special */
+/* Let's pick a UIDs within the 16bit range, so that we are compatible with containers using 16bit
+ * user namespacing. At least on Fedora normal users are allocated until UID 60000, hence do not
+ * allocate from below this. Also stay away from the upper end of the range as that is often used
+ * for overflow/nobody users. */
+#define DYNAMIC_UID_MIN ((uid_t) UINT32_C(0x0000EF00))
+#define DYNAMIC_UID_MAX ((uid_t) UINT32_C(0x0000FFEF))
+
+static inline bool uid_is_dynamic(uid_t uid) {
+        return DYNAMIC_UID_MIN <= uid && uid <= DYNAMIC_UID_MAX;
+}
+
+/* The following macros add 1 when converting things, since UID 0 is a valid UID, while the pointer
+ * NULL is special */
 #define PTR_TO_UID(p) ((uid_t) (((uintptr_t) (p))-1))
 #define UID_TO_PTR(u) ((void*) (((uintptr_t) (u))+1))
 
diff --git a/src/core/dynamic-user.c b/src/core/dynamic-user.c
index 53c1699b92..310aaa94e1 100644
--- a/src/core/dynamic-user.c
+++ b/src/core/dynamic-user.c
@@ -31,14 +31,8 @@
 #include "user-util.h"
 #include "fileio.h"
 
-/* Let's pick a UIDs within the 16bit range, so that we are compatible with containers using 16bit user namespacing. At
- * least on Fedora normal users are allocated until UID 60000, hence do not allocate from below this. Also stay away
- * from the upper end of the range as that is often used for overflow/nobody users. */
-#define UID_PICK_MIN ((uid_t) UINT32_C(0x0000EF00))
-#define UID_PICK_MAX ((uid_t) UINT32_C(0x0000FFEF))
-
 /* Takes a value generated randomly or by hashing and turns it into a UID in the right range */
-#define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (UID_PICK_MAX - UID_PICK_MIN + 1)) + UID_PICK_MIN)
+#define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (DYNAMIC_UID_MAX - DYNAMIC_UID_MIN + 1)) + DYNAMIC_UID_MIN)
 
 static DynamicUser* dynamic_user_free(DynamicUser *d) {
         if (!d)
@@ -214,7 +208,7 @@ static int pick_uid(const char *name, uid_t *ret_uid) {
                 if (--n_tries <= 0) /* Give up retrying eventually */
                         return -EBUSY;
 
-                if (candidate < UID_PICK_MIN || candidate > UID_PICK_MAX)
+                if (!uid_is_dynamic(candidate))
                         goto next;
 
                 xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, candidate);
@@ -676,11 +670,8 @@ int dynamic_user_lookup_uid(Manager *m, uid_t uid, char **ret) {
         assert(m);
         assert(ret);
 
-        /* A friendly way to translate a dynamic user's UID into a his name. */
-
-        if (uid < UID_PICK_MIN)
-                return -ESRCH;
-        if (uid > UID_PICK_MAX)
+        /* A friendly way to translate a dynamic user's UID into a name. */
+        if (!uid_is_dynamic(uid))
                 return -ESRCH;
 
         xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, uid);
diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c
index 443b2a4cd7..2a043a95b1 100644
--- a/src/journal/journald-server.c
+++ b/src/journal/journald-server.c
@@ -370,7 +370,7 @@ static JournalFile* find_journal(Server *s, uid_t uid) {
         if (s->runtime_journal)
                 return s->runtime_journal;
 
-        if (uid <= SYSTEM_UID_MAX)
+        if (uid <= SYSTEM_UID_MAX || uid_is_dynamic(uid))
                 return s->system_journal;
 
         r = sd_id128_get_machine(&machine);
-- 
cgit v1.2.3-54-g00ecf


From 05a98afd3e0513de50c5949b7fa50ff0989d68bc Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 15 Aug 2016 18:12:01 +0200
Subject: core: add Ref()/Unref() bus calls for units

This adds two (privileged) bus calls Ref() and Unref() to the Unit interface.
The two calls may be used by clients to pin a unit into memory, so that various
runtime properties aren't flushed out by the automatic GC. This is necessary
to permit clients to race-freely acquire runtime results (such as process exit
status/code or accumulated CPU time) on successful service termination.

Ref() and Unref() are fully recursive, hence act like the usual reference
counting concept in C. Taking a reference is a privileged operation, as this
allows pinning units into memory which consumes resources.

Transient units may also gain a reference at the time of creation, via the new
AddRef property (that is only defined for transient units at the time of
creation).
---
 man/systemctl.xml                         |  12 +--
 src/core/dbus-manager.c                   |  57 +++++++++++
 src/core/dbus-unit.c                      | 155 +++++++++++++++++++++++++++++-
 src/core/dbus-unit.h                      |   8 +-
 src/core/dbus.c                           |  71 +++++++-------
 src/core/dbus.h                           |   5 +-
 src/core/job.c                            |  17 ++--
 src/core/job.h                            |   4 +-
 src/core/manager.c                        |  27 +++---
 src/core/org.freedesktop.systemd1.conf    |   8 ++
 src/core/unit.c                           |  52 +++++++---
 src/core/unit.h                           |   7 ++
 src/libsystemd/sd-bus/bus-common-errors.c |   1 +
 src/libsystemd/sd-bus/bus-common-errors.h |   1 +
 14 files changed, 341 insertions(+), 84 deletions(-)

(limited to 'src/core')

diff --git a/man/systemctl.xml b/man/systemctl.xml
index fde4f4f3bb..7e0ac9613a 100644
--- a/man/systemctl.xml
+++ b/man/systemctl.xml
@@ -642,13 +642,13 @@
           <term><command>list-units <optional><replaceable>PATTERN</replaceable>...</optional></command></term>
 
           <listitem>
-            <para>List units that <command>systemd</command> has loaded. This includes units that
-            are either referenced directly or through a dependency, or units that were active in the
-            past and have failed. By default only units which are active, have pending jobs, or have
+            <para>List units that <command>systemd</command> has loaded. This includes units that are either referenced
+            directly or through a dependency, units that are pinned by applications programmatically, or units that
+            were active in the past and have failed. By default only units which are active, have pending jobs, or have
             failed are shown; this can be changed with option <option>--all</option>. If one or more
-            <replaceable>PATTERN</replaceable>s are specified, only units matching one of them are
-            shown. The units that are shown are additionally filtered by <option>--type=</option>
-            and <option>--state=</option> if those options are specified.</para>
+            <replaceable>PATTERN</replaceable>s are specified, only units matching one of them are shown. The units
+            that are shown are additionally filtered by <option>--type=</option> and <option>--state=</option> if those
+            options are specified.</para>
 
             <para>This is the default command.</para>
           </listitem>
diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c
index ef05a75a8b..ea7ced2fd0 100644
--- a/src/core/dbus-manager.c
+++ b/src/core/dbus-manager.c
@@ -643,6 +643,54 @@ static int method_set_unit_properties(sd_bus_message *message, void *userdata, s
         return bus_unit_method_set_properties(message, u, error);
 }
 
+static int method_ref_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+        Manager *m = userdata;
+        const char *name;
+        Unit *u;
+        int r;
+
+        assert(message);
+        assert(m);
+
+        r = sd_bus_message_read(message, "s", &name);
+        if (r < 0)
+                return r;
+
+        r = manager_load_unit(m, name, NULL, error, &u);
+        if (r < 0)
+                return r;
+
+        r = bus_unit_check_load_state(u, error);
+        if (r < 0)
+                return r;
+
+        return bus_unit_method_ref(message, u, error);
+}
+
+static int method_unref_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+        Manager *m = userdata;
+        const char *name;
+        Unit *u;
+        int r;
+
+        assert(message);
+        assert(m);
+
+        r = sd_bus_message_read(message, "s", &name);
+        if (r < 0)
+                return r;
+
+        r = manager_load_unit(m, name, NULL, error, &u);
+        if (r < 0)
+                return r;
+
+        r = bus_unit_check_load_state(u, error);
+        if (r < 0)
+                return r;
+
+        return bus_unit_method_unref(message, u, error);
+}
+
 static int reply_unit_info(sd_bus_message *reply, Unit *u) {
         _cleanup_free_ char *unit_path = NULL, *job_path = NULL;
         Unit *following;
@@ -781,6 +829,13 @@ static int transient_unit_from_message(
         if (r < 0)
                 return r;
 
+        /* If the client asked for it, automatically add a reference to this unit. */
+        if (u->bus_track_add) {
+                r = bus_unit_track_add_sender(u, message);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to watch sender: %m");
+        }
+
         /* Now load the missing bits of the unit we just created */
         unit_add_to_load_queue(u);
         manager_dispatch_load_queue(m);
@@ -2211,6 +2266,8 @@ const sd_bus_vtable bus_manager_vtable[] = {
         SD_BUS_METHOD("KillUnit", "ssi", NULL, method_kill_unit, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("ResetFailedUnit", "s", NULL, method_reset_failed_unit, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("SetUnitProperties", "sba(sv)", NULL, method_set_unit_properties, SD_BUS_VTABLE_UNPRIVILEGED),
+        SD_BUS_METHOD("RefUnit", "s", NULL, method_ref_unit, SD_BUS_VTABLE_UNPRIVILEGED),
+        SD_BUS_METHOD("UnrefUnit", "s", NULL, method_unref_unit, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("StartTransientUnit", "ssa(sv)a(sa(sv))", "o", method_start_transient_unit, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("GetUnitProcesses", "s", "a(sus)", method_get_unit_processes, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("GetJob", "u", "o", method_get_job, SD_BUS_VTABLE_UNPRIVILEGED),
diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c
index 89e56a2e51..1b86bdde43 100644
--- a/src/core/dbus-unit.c
+++ b/src/core/dbus-unit.c
@@ -418,6 +418,7 @@ static int bus_verify_manage_units_async_full(
                 const char *verb,
                 int capability,
                 const char *polkit_message,
+                bool interactive,
                 sd_bus_message *call,
                 sd_bus_error *error) {
 
@@ -433,7 +434,15 @@ static int bus_verify_manage_units_async_full(
                 details[7] = GETTEXT_PACKAGE;
         }
 
-        return bus_verify_polkit_async(call, capability, "org.freedesktop.systemd1.manage-units", details, false, UID_INVALID, &u->manager->polkit_registry, error);
+        return bus_verify_polkit_async(
+                        call,
+                        capability,
+                        "org.freedesktop.systemd1.manage-units",
+                        details,
+                        interactive,
+                        UID_INVALID,
+                        &u->manager->polkit_registry,
+                        error);
 }
 
 int bus_unit_method_start_generic(
@@ -486,6 +495,7 @@ int bus_unit_method_start_generic(
                         verb,
                         CAP_SYS_ADMIN,
                         job_type < _JOB_TYPE_MAX ? polkit_message_for_job[job_type] : NULL,
+                        true,
                         message,
                         error);
         if (r < 0)
@@ -558,6 +568,7 @@ int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *
                         "kill",
                         CAP_KILL,
                         N_("Authentication is required to kill '$(unit)'."),
+                        true,
                         message,
                         error);
         if (r < 0)
@@ -588,6 +599,7 @@ int bus_unit_method_reset_failed(sd_bus_message *message, void *userdata, sd_bus
                         "reset-failed",
                         CAP_SYS_ADMIN,
                         N_("Authentication is required to reset the \"failed\" state of '$(unit)'."),
+                        true,
                         message,
                         error);
         if (r < 0)
@@ -620,6 +632,7 @@ int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_b
                         "set-property",
                         CAP_SYS_ADMIN,
                         N_("Authentication is required to set properties on '$(unit)'."),
+                        true,
                         message,
                         error);
         if (r < 0)
@@ -634,6 +647,53 @@ int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_b
         return sd_bus_reply_method_return(message, NULL);
 }
 
+int bus_unit_method_ref(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+        Unit *u = userdata;
+        int r;
+
+        assert(message);
+        assert(u);
+
+        r = mac_selinux_unit_access_check(u, message, "start", error);
+        if (r < 0)
+                return r;
+
+        r = bus_verify_manage_units_async_full(
+                        u,
+                        "ref",
+                        CAP_SYS_ADMIN,
+                        NULL,
+                        false,
+                        message,
+                        error);
+        if (r < 0)
+                return r;
+        if (r == 0)
+                return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+        r = bus_unit_track_add_sender(u, message);
+        if (r < 0)
+                return r;
+
+        return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_unit_method_unref(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+        Unit *u = userdata;
+        int r;
+
+        assert(message);
+        assert(u);
+
+        r = bus_unit_track_remove_sender(u, message);
+        if (r == -EUNATCH)
+                return sd_bus_error_setf(error, BUS_ERROR_NOT_REFERENCED, "Unit has not been referenced yet.");
+        if (r < 0)
+                return r;
+
+        return sd_bus_reply_method_return(message, NULL);
+}
+
 const sd_bus_vtable bus_unit_vtable[] = {
         SD_BUS_VTABLE_START(0),
 
@@ -715,6 +775,8 @@ const sd_bus_vtable bus_unit_vtable[] = {
         SD_BUS_METHOD("Kill", "si", NULL, bus_unit_method_kill, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("ResetFailed", NULL, NULL, bus_unit_method_reset_failed, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("SetProperties", "ba(sv)", NULL, bus_unit_method_set_properties, SD_BUS_VTABLE_UNPRIVILEGED),
+        SD_BUS_METHOD("Ref", NULL, NULL, bus_unit_method_ref, SD_BUS_VTABLE_UNPRIVILEGED),
+        SD_BUS_METHOD("Unref", NULL, NULL, bus_unit_method_unref, SD_BUS_VTABLE_UNPRIVILEGED),
 
         /* Obsolete properties or obsolete alias names */
         SD_BUS_PROPERTY("RequiresOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
@@ -1318,6 +1380,29 @@ static int bus_unit_set_transient_property(
                         return r;
 
                 return 1;
+
+        } else if (streq(name, "AddRef")) {
+
+                int b;
+
+                /* Why is this called "AddRef" rather than just "Ref", or "Reference"? There's already a "Ref()" method
+                 * on the Unit interface, and it's probably not a good idea to expose a property and a method on the
+                 * same interface (well, strictly speaking AddRef isn't exposed as full property, we just read it for
+                 * transient units, but still). And "References" and "ReferencedBy" is already used as unit reference
+                 * dependency type, hence let's not confuse things with that.
+                 *
+                 * Note that we don't acually add the reference to the bus track. We do that only after the setup of
+                 * the transient unit is complete, so that setting this property multiple times in the same transient
+                 * unit creation call doesn't count as individual references. */
+
+                r = sd_bus_message_read(message, "b", &b);
+                if (r < 0)
+                        return r;
+
+                if (mode != UNIT_CHECK)
+                        u->bus_track_add = b;
+
+                return 1;
         }
 
         return 0;
@@ -1422,3 +1507,71 @@ int bus_unit_check_load_state(Unit *u, sd_bus_error *error) {
 
         return sd_bus_error_set_errnof(error, u->load_error, "Unit %s is not loaded properly: %m.", u->id);
 }
+
+static int bus_track_handler(sd_bus_track *t, void *userdata) {
+        Unit *u = userdata;
+
+        assert(t);
+        assert(u);
+
+        u->bus_track = sd_bus_track_unref(u->bus_track); /* make sure we aren't called again */
+
+        unit_add_to_gc_queue(u);
+        return 0;
+}
+
+static int allocate_bus_track(Unit *u) {
+        int r;
+
+        assert(u);
+
+        if (u->bus_track)
+                return 0;
+
+        r = sd_bus_track_new(u->manager->api_bus, &u->bus_track, bus_track_handler, u);
+        if (r < 0)
+                return r;
+
+        r = sd_bus_track_set_recursive(u->bus_track, true);
+        if (r < 0) {
+                u->bus_track = sd_bus_track_unref(u->bus_track);
+                return r;
+        }
+
+        return 0;
+}
+
+int bus_unit_track_add_name(Unit *u, const char *name) {
+        int r;
+
+        assert(u);
+
+        r = allocate_bus_track(u);
+        if (r < 0)
+                return r;
+
+        return sd_bus_track_add_name(u->bus_track, name);
+}
+
+int bus_unit_track_add_sender(Unit *u, sd_bus_message *m) {
+        int r;
+
+        assert(u);
+
+        r = allocate_bus_track(u);
+        if (r < 0)
+                return r;
+
+        return sd_bus_track_add_sender(u->bus_track, m);
+}
+
+int bus_unit_track_remove_sender(Unit *u, sd_bus_message *m) {
+        assert(u);
+
+        /* If we haven't allocated the bus track object yet, then there's definitely no reference taken yet, return an
+         * error */
+        if (!u->bus_track)
+                return -EUNATCH;
+
+        return sd_bus_track_remove_sender(u->bus_track, m);
+}
diff --git a/src/core/dbus-unit.h b/src/core/dbus-unit.h
index 4db88dbebc..b280de7a1d 100644
--- a/src/core/dbus-unit.h
+++ b/src/core/dbus-unit.h
@@ -33,9 +33,15 @@ int bus_unit_method_start_generic(sd_bus_message *message, Unit *u, JobType job_
 int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error);
 int bus_unit_method_reset_failed(sd_bus_message *message, void *userdata, sd_bus_error *error);
 
-int bus_unit_queue_job(sd_bus_message *message, Unit *u, JobType type, JobMode mode, bool reload_if_possible, sd_bus_error *error);
 int bus_unit_set_properties(Unit *u, sd_bus_message *message, UnitSetPropertiesMode mode, bool commit, sd_bus_error *error);
 int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_bus_error *error);
 int bus_unit_method_get_processes(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_ref(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_unref(sd_bus_message *message, void *userdata, sd_bus_error *error);
 
+int bus_unit_queue_job(sd_bus_message *message, Unit *u, JobType type, JobMode mode, bool reload_if_possible, sd_bus_error *error);
 int bus_unit_check_load_state(Unit *u, sd_bus_error *error);
+
+int bus_unit_track_add_name(Unit *u, const char *name);
+int bus_unit_track_add_sender(Unit *u, sd_bus_message *m);
+int bus_unit_track_remove_sender(Unit *u, sd_bus_message *m);
diff --git a/src/core/dbus.c b/src/core/dbus.c
index 3422a02d68..1e41a42aa6 100644
--- a/src/core/dbus.c
+++ b/src/core/dbus.c
@@ -1168,60 +1168,57 @@ int bus_foreach_bus(
         return ret;
 }
 
-void bus_track_serialize(sd_bus_track *t, FILE *f) {
+void bus_track_serialize(sd_bus_track *t, FILE *f, const char *prefix) {
         const char *n;
 
         assert(f);
+        assert(prefix);
 
-        for (n = sd_bus_track_first(t); n; n = sd_bus_track_next(t))
-                fprintf(f, "subscribed=%s\n", n);
-}
-
-int bus_track_deserialize_item(char ***l, const char *line) {
-        const char *e;
-        int r;
-
-        assert(l);
-        assert(line);
-
-        e = startswith(line, "subscribed=");
-        if (!e)
-                return 0;
+        for (n = sd_bus_track_first(t); n; n = sd_bus_track_next(t)) {
+                int c, j;
 
-        r = strv_extend(l, e);
-        if (r < 0)
-                return r;
+                c = sd_bus_track_count_name(t, n);
 
-        return 1;
+                for (j = 0; j < c; j++) {
+                        fputs(prefix, f);
+                        fputc('=', f);
+                        fputs(n, f);
+                        fputc('\n', f);
+                }
+        }
 }
 
-int bus_track_coldplug(Manager *m, sd_bus_track **t, char ***l) {
+int bus_track_coldplug(Manager *m, sd_bus_track **t, bool recursive, char **l) {
+        char **i;
         int r = 0;
 
         assert(m);
         assert(t);
-        assert(l);
-
-        if (!strv_isempty(*l) && m->api_bus) {
-                char **i;
 
-                if (!*t) {
-                        r = sd_bus_track_new(m->api_bus, t, NULL, NULL);
-                        if (r < 0)
-                                return r;
-                }
+        if (strv_isempty(l))
+                return 0;
 
-                r = 0;
-                STRV_FOREACH(i, *l) {
-                        int k;
+        if (!m->api_bus)
+                return 0;
 
-                        k = sd_bus_track_add_name(*t, *i);
-                        if (k < 0)
-                                r = k;
-                }
+        if (!*t) {
+                r = sd_bus_track_new(m->api_bus, t, NULL, NULL);
+                if (r < 0)
+                        return r;
         }
 
-        *l = strv_free(*l);
+        r = sd_bus_track_set_recursive(*t, recursive);
+        if (r < 0)
+                return r;
+
+        r = 0;
+        STRV_FOREACH(i, l) {
+                int k;
+
+                k = sd_bus_track_add_name(*t, *i);
+                if (k < 0)
+                        r = k;
+        }
 
         return r;
 }
diff --git a/src/core/dbus.h b/src/core/dbus.h
index 6baaffbd75..a092ed9d76 100644
--- a/src/core/dbus.h
+++ b/src/core/dbus.h
@@ -28,9 +28,8 @@ void bus_done(Manager *m);
 
 int bus_fdset_add_all(Manager *m, FDSet *fds);
 
-void bus_track_serialize(sd_bus_track *t, FILE *f);
-int bus_track_deserialize_item(char ***l, const char *line);
-int bus_track_coldplug(Manager *m, sd_bus_track **t, char ***l);
+void bus_track_serialize(sd_bus_track *t, FILE *f, const char *prefix);
+int bus_track_coldplug(Manager *m, sd_bus_track **t, bool recursive, char **l);
 
 int manager_sync_bus_names(Manager *m, sd_bus *bus);
 
diff --git a/src/core/job.c b/src/core/job.c
index 7557874d4d..7faf2ef686 100644
--- a/src/core/job.c
+++ b/src/core/job.c
@@ -997,7 +997,10 @@ char *job_dbus_path(Job *j) {
         return p;
 }
 
-int job_serialize(Job *j, FILE *f, FDSet *fds) {
+int job_serialize(Job *j, FILE *f) {
+        assert(j);
+        assert(f);
+
         fprintf(f, "job-id=%u\n", j->id);
         fprintf(f, "job-type=%s\n", job_type_to_string(j->type));
         fprintf(f, "job-state=%s\n", job_state_to_string(j->state));
@@ -1008,15 +1011,16 @@ int job_serialize(Job *j, FILE *f, FDSet *fds) {
         if (j->begin_usec > 0)
                 fprintf(f, "job-begin="USEC_FMT"\n", j->begin_usec);
 
-        bus_track_serialize(j->clients, f);
+        bus_track_serialize(j->clients, f, "subscribed");
 
         /* End marker */
         fputc('\n', f);
         return 0;
 }
 
-int job_deserialize(Job *j, FILE *f, FDSet *fds) {
+int job_deserialize(Job *j, FILE *f) {
         assert(j);
+        assert(f);
 
         for (;;) {
                 char line[LINE_MAX], *l, *v;
@@ -1106,7 +1110,7 @@ int job_deserialize(Job *j, FILE *f, FDSet *fds) {
                 } else if (streq(l, "subscribed")) {
 
                         if (strv_extend(&j->deserialized_clients, v) < 0)
-                                return log_oom();
+                                log_oom();
                 }
         }
 }
@@ -1118,9 +1122,8 @@ int job_coldplug(Job *j) {
 
         /* After deserialization is complete and the bus connection
          * set up again, let's start watching our subscribers again */
-        r = bus_track_coldplug(j->manager, &j->clients, &j->deserialized_clients);
-        if (r < 0)
-                return r;
+        (void) bus_track_coldplug(j->manager, &j->clients, false, j->deserialized_clients);
+        j->deserialized_clients = strv_free(j->deserialized_clients);
 
         if (j->state == JOB_WAITING)
                 job_add_to_run_queue(j);
diff --git a/src/core/job.h b/src/core/job.h
index d359e8bb3e..85368f0d30 100644
--- a/src/core/job.h
+++ b/src/core/job.h
@@ -177,8 +177,8 @@ Job* job_install(Job *j);
 int job_install_deserialized(Job *j);
 void job_uninstall(Job *j);
 void job_dump(Job *j, FILE*f, const char *prefix);
-int job_serialize(Job *j, FILE *f, FDSet *fds);
-int job_deserialize(Job *j, FILE *f, FDSet *fds);
+int job_serialize(Job *j, FILE *f);
+int job_deserialize(Job *j, FILE *f);
 int job_coldplug(Job *j);
 
 JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts);
diff --git a/src/core/manager.c b/src/core/manager.c
index 7576d038a2..b58f68fa7a 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1287,10 +1287,11 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
         if (q < 0 && r == 0)
                 r = q;
 
-        /* We might have deserialized the kdbus control fd, but if we
-         * didn't, then let's create the bus now. */
-        manager_connect_bus(m, !!serialization);
-        bus_track_coldplug(m, &m->subscribed, &m->deserialized_subscribed);
+        /* We might have deserialized the kdbus control fd, but if we didn't, then let's create the bus now. */
+        (void) manager_connect_bus(m, !!serialization);
+
+        (void) bus_track_coldplug(m, &m->subscribed, false, m->deserialized_subscribed);
+        m->deserialized_subscribed = strv_free(m->deserialized_subscribed);
 
         /* Third, fire things up! */
         manager_coldplug(m);
@@ -2490,7 +2491,7 @@ int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) {
                 fprintf(f, "kdbus-fd=%i\n", copy);
         }
 
-        bus_track_serialize(m->subscribed, f);
+        bus_track_serialize(m->subscribed, f, "subscribed");
 
         r = dynamic_user_serialize(m, f, fds);
         if (r < 0)
@@ -2693,15 +2694,13 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
                         manager_deserialize_uid_refs_one(m, l + 16);
                 else if (startswith(l, "destroy-ipc-gid="))
                         manager_deserialize_gid_refs_one(m, l + 16);
-                else {
-                        int k;
-
-                        k = bus_track_deserialize_item(&m->deserialized_subscribed, l);
-                        if (k < 0)
-                                log_debug_errno(k, "Failed to deserialize bus tracker object: %m");
-                        else if (k == 0)
-                                log_debug("Unknown serialization item '%s'", l);
-                }
+                else if (startswith(l, "subscribed=")) {
+
+                        if (strv_extend(&m->deserialized_subscribed, l+11) < 0)
+                                log_oom();
+
+                } else
+                        log_debug("Unknown serialization item '%s'", l);
         }
 
         for (;;) {
diff --git a/src/core/org.freedesktop.systemd1.conf b/src/core/org.freedesktop.systemd1.conf
index 14f6aec029..647e5f736c 100644
--- a/src/core/org.freedesktop.systemd1.conf
+++ b/src/core/org.freedesktop.systemd1.conf
@@ -182,6 +182,14 @@
                        send_interface="org.freedesktop.systemd1.Manager"
                        send_member="Reexecute"/>
 
+                <allow send_destination="org.freedesktop.systemd1"
+                       send_interface="org.freedesktop.systemd1.Manager"
+                       send_member="RefUnit"/>
+
+                <allow send_destination="org.freedesktop.systemd1"
+                       send_interface="org.freedesktop.systemd1.Manager"
+                       send_member="UnrefUnit"/>
+
                 <allow send_destination="org.freedesktop.systemd1"
                        send_interface="org.freedesktop.systemd1.Manager"
                        send_member="EnableUnitFiles"/>
diff --git a/src/core/unit.c b/src/core/unit.c
index c58c501bc3..6d92eb0c30 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -329,6 +329,9 @@ bool unit_check_gc(Unit *u) {
         if (u->refs)
                 return true;
 
+        if (sd_bus_track_count(u->bus_track) > 0)
+                return true;
+
         if (UNIT_VTABLE(u)->check_gc)
                 if (UNIT_VTABLE(u)->check_gc(u))
                         return true;
@@ -509,6 +512,9 @@ void unit_free(Unit *u) {
 
         sd_bus_slot_unref(u->match_bus_slot);
 
+        sd_bus_track_unref(u->bus_track);
+        u->deserialized_refs = strv_free(u->deserialized_refs);
+
         unit_free_requires_mounts_for(u);
 
         SET_FOREACH(t, u->names, i)
@@ -897,6 +903,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
         Unit *following;
         _cleanup_set_free_ Set *following_set = NULL;
         int r;
+        const char *n;
 
         assert(u);
         assert(u->type >= 0);
@@ -1038,6 +1045,8 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
         else if (u->load_state == UNIT_ERROR)
                 fprintf(f, "%s\tLoad Error Code: %s\n", prefix, strerror(-u->load_error));
 
+        for (n = sd_bus_track_first(u->bus_track); n; n = sd_bus_track_next(u->bus_track))
+                fprintf(f, "%s\tBus Ref: %s\n", prefix, n);
 
         if (u->job)
                 job_dump(u->job, f, prefix2);
@@ -2622,15 +2631,17 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
         if (gid_is_valid(u->ref_gid))
                 unit_serialize_item_format(u, f, "ref-gid", GID_FMT, u->ref_gid);
 
+        bus_track_serialize(u->bus_track, f, "ref");
+
         if (serialize_jobs) {
                 if (u->job) {
                         fprintf(f, "job\n");
-                        job_serialize(u->job, f, fds);
+                        job_serialize(u->job, f);
                 }
 
                 if (u->nop_job) {
                         fprintf(f, "job\n");
-                        job_serialize(u->nop_job, f, fds);
+                        job_serialize(u->nop_job, f);
                 }
         }
 
@@ -2760,7 +2771,7 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
                                 if (!j)
                                         return log_oom();
 
-                                r = job_deserialize(j, f, fds);
+                                r = job_deserialize(j, f);
                                 if (r < 0) {
                                         job_free(j);
                                         return r;
@@ -2880,6 +2891,12 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
                         else
                                 unit_ref_uid_gid(u, UID_INVALID, gid);
 
+                } else if (streq(l, "ref")) {
+
+                        r = strv_extend(&u->deserialized_refs, v);
+                        if (r < 0)
+                                log_oom();
+
                         continue;
                 }
 
@@ -2955,7 +2972,8 @@ int unit_add_node_link(Unit *u, const char *what, bool wants, UnitDependency dep
 }
 
 int unit_coldplug(Unit *u) {
-        int r = 0, q = 0;
+        int r = 0, q;
+        char **i;
 
         assert(u);
 
@@ -2966,18 +2984,26 @@ int unit_coldplug(Unit *u) {
 
         u->coldplugged = true;
 
-        if (UNIT_VTABLE(u)->coldplug)
-                r = UNIT_VTABLE(u)->coldplug(u);
+        STRV_FOREACH(i, u->deserialized_refs) {
+                q = bus_unit_track_add_name(u, *i);
+                if (q < 0 && r >= 0)
+                        r = q;
+        }
+        u->deserialized_refs = strv_free(u->deserialized_refs);
 
-        if (u->job)
-                q = job_coldplug(u->job);
+        if (UNIT_VTABLE(u)->coldplug) {
+                q = UNIT_VTABLE(u)->coldplug(u);
+                if (q < 0 && r >= 0)
+                        r = q;
+        }
 
-        if (r < 0)
-                return r;
-        if (q < 0)
-                return q;
+        if (u->job) {
+                q = job_coldplug(u->job);
+                if (q < 0 && r >= 0)
+                        r = q;
+        }
 
-        return 0;
+        return r;
 }
 
 static bool fragment_mtime_newer(const char *path, usec_t mtime) {
diff --git a/src/core/unit.h b/src/core/unit.h
index 53875653d7..e5a2a77b7b 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -108,6 +108,10 @@ struct Unit {
         /* The slot used for watching NameOwnerChanged signals */
         sd_bus_slot *match_bus_slot;
 
+        /* References to this unit from clients */
+        sd_bus_track *bus_track;
+        char **deserialized_refs;
+
         /* Job timeout and action to take */
         usec_t job_timeout;
         FailureAction job_timeout_action;
@@ -247,6 +251,9 @@ struct Unit {
 
         /* Did we already invoke unit_coldplug() for this unit? */
         bool coldplugged:1;
+
+        /* For transient units: whether to add a bus track reference after creating the unit */
+        bool bus_track_add:1;
 };
 
 struct UnitStatusMessageFormats {
diff --git a/src/libsystemd/sd-bus/bus-common-errors.c b/src/libsystemd/sd-bus/bus-common-errors.c
index 32be3cdc38..a69193aa32 100644
--- a/src/libsystemd/sd-bus/bus-common-errors.c
+++ b/src/libsystemd/sd-bus/bus-common-errors.c
@@ -45,6 +45,7 @@ BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map bus_common_errors[] = {
         SD_BUS_ERROR_MAP(BUS_ERROR_SHUTTING_DOWN,                ECANCELED),
         SD_BUS_ERROR_MAP(BUS_ERROR_SCOPE_NOT_RUNNING,            EHOSTDOWN),
         SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_DYNAMIC_USER,         ESRCH),
+        SD_BUS_ERROR_MAP(BUS_ERROR_NOT_REFERENCED,               EUNATCH),
 
         SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_MACHINE,              ENXIO),
         SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_IMAGE,                ENOENT),
diff --git a/src/libsystemd/sd-bus/bus-common-errors.h b/src/libsystemd/sd-bus/bus-common-errors.h
index befb6fbfe0..5df21c8926 100644
--- a/src/libsystemd/sd-bus/bus-common-errors.h
+++ b/src/libsystemd/sd-bus/bus-common-errors.h
@@ -41,6 +41,7 @@
 #define BUS_ERROR_SHUTTING_DOWN "org.freedesktop.systemd1.ShuttingDown"
 #define BUS_ERROR_SCOPE_NOT_RUNNING "org.freedesktop.systemd1.ScopeNotRunning"
 #define BUS_ERROR_NO_SUCH_DYNAMIC_USER "org.freedesktop.systemd1.NoSuchDynamicUser"
+#define BUS_ERROR_NOT_REFERENCED "org.freedesktop.systemd1.NotReferenced"
 
 #define BUS_ERROR_NO_SUCH_MACHINE "org.freedesktop.machine1.NoSuchMachine"
 #define BUS_ERROR_NO_SUCH_IMAGE "org.freedesktop.machine1.NoSuchImage"
-- 
cgit v1.2.3-54-g00ecf


From fe700f46ec5490efcb8da49e644bb4d781f896f2 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Thu, 18 Aug 2016 20:58:10 +0200
Subject: core: cache last CPU usage counter, before destorying a cgroup

It is useful for clients to be able to read the last CPU usage counter value of
a unit even if the unit is already terminated. Hence, before destroying a
cgroup's cgroup cache the last CPU usage counter and return it if the cgroup is
gone.
---
 src/core/cgroup.c | 23 ++++++++++++++++++++++-
 src/core/unit.c   | 16 ++++++++++++++--
 src/core/unit.h   |  1 +
 3 files changed, 37 insertions(+), 3 deletions(-)

(limited to 'src/core')

diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 086cd7a789..20f9f9fc7e 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -1524,6 +1524,8 @@ void unit_prune_cgroup(Unit *u) {
         if (!u->cgroup_path)
                 return;
 
+        (void) unit_get_cpu_usage(u, NULL); /* Cache the last CPU usage value before we destroy the cgroup */
+
         is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE);
 
         r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice);
@@ -2044,7 +2046,21 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
         nsec_t ns;
         int r;
 
+        assert(u);
+
+        /* Retrieve the current CPU usage counter. This will subtract the CPU counter taken when the unit was
+         * started. If the cgroup has been removed already, returns the last cached value. To cache the value, simply
+         * call this function with a NULL return value. */
+
         r = unit_get_cpu_usage_raw(u, &ns);
+        if (r == -ENODATA && u->cpu_usage_last != NSEC_INFINITY) {
+                /* If we can't get the CPU usage anymore (because the cgroup was already removed, for example), use our
+                 * cached value. */
+
+                if (ret)
+                        *ret = u->cpu_usage_last;
+                return 0;
+        }
         if (r < 0)
                 return r;
 
@@ -2053,7 +2069,10 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
         else
                 ns = 0;
 
-        *ret = ns;
+        u->cpu_usage_last = ns;
+        if (ret)
+                *ret = ns;
+
         return 0;
 }
 
@@ -2063,6 +2082,8 @@ int unit_reset_cpu_usage(Unit *u) {
 
         assert(u);
 
+        u->cpu_usage_last = NSEC_INFINITY;
+
         r = unit_get_cpu_usage_raw(u, &ns);
         if (r < 0) {
                 u->cpu_usage_base = 0;
diff --git a/src/core/unit.c b/src/core/unit.c
index 6d92eb0c30..03ee6424fa 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -102,6 +102,7 @@ Unit *unit_new(Manager *m, size_t size) {
         u->job_timeout = USEC_INFINITY;
         u->ref_uid = UID_INVALID;
         u->ref_gid = GID_INVALID;
+        u->cpu_usage_last = NSEC_INFINITY;
 
         RATELIMIT_INIT(u->start_limit, m->default_start_limit_interval, m->default_start_limit_burst);
         RATELIMIT_INIT(u->auto_stop_ratelimit, 10 * USEC_PER_SEC, 16);
@@ -2620,7 +2621,10 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
                 unit_serialize_item(u, f, "assert-result", yes_no(u->assert_result));
 
         unit_serialize_item(u, f, "transient", yes_no(u->transient));
+
         unit_serialize_item_format(u, f, "cpu-usage-base", "%" PRIu64, u->cpu_usage_base);
+        if (u->cpu_usage_last != NSEC_INFINITY)
+                unit_serialize_item_format(u, f, "cpu-usage-last", "%" PRIu64, u->cpu_usage_last);
 
         if (u->cgroup_path)
                 unit_serialize_item(u, f, "cgroup", u->cgroup_path);
@@ -2843,11 +2847,19 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
 
                         continue;
 
-                } else if (streq(l, "cpu-usage-base") || streq(l, "cpuacct-usage-base")) {
+                } else if (STR_IN_SET(l, "cpu-usage-base", "cpuacct-usage-base")) {
 
                         r = safe_atou64(v, &u->cpu_usage_base);
                         if (r < 0)
-                                log_unit_debug(u, "Failed to parse CPU usage %s, ignoring.", v);
+                                log_unit_debug(u, "Failed to parse CPU usage base %s, ignoring.", v);
+
+                        continue;
+
+                } else if (streq(l, "cpu-usage-last")) {
+
+                        r = safe_atou64(v, &u->cpu_usage_last);
+                        if (r < 0)
+                                log_unit_debug(u, "Failed to read CPU usage last %s, ignoring.", v);
 
                         continue;
 
diff --git a/src/core/unit.h b/src/core/unit.h
index e5a2a77b7b..31f0fef87b 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -194,6 +194,7 @@ struct Unit {
 
         /* Where the cpu.stat or cpuacct.usage was at the time the unit was started */
         nsec_t cpu_usage_base;
+        nsec_t cpu_usage_last; /* the most recently read value */
 
         /* Counterparts in the cgroup filesystem */
         char *cgroup_path;
-- 
cgit v1.2.3-54-g00ecf


From 390bc2b149a09661c81df404692b191e20dac7a0 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Fri, 19 Aug 2016 19:30:06 +0200
Subject: core: let's use set_contains() where appropriate

---
 src/core/unit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/core')

diff --git a/src/core/unit.c b/src/core/unit.c
index 03ee6424fa..de22f657c6 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -114,7 +114,7 @@ bool unit_has_name(Unit *u, const char *name) {
         assert(u);
         assert(name);
 
-        return !!set_get(u->names, (char*) name);
+        return set_contains(u->names, (char*) name);
 }
 
 static void unit_init(Unit *u) {
-- 
cgit v1.2.3-54-g00ecf


From 83f12b27d14853e7c89a326f7cd31a6c739d378e Mon Sep 17 00:00:00 2001
From: Felipe Sateler <fsateler@users.noreply.github.com>
Date: Mon, 22 Aug 2016 16:40:58 -0300
Subject: core: do not fail at step SECCOMP if there is no kernel support
 (#4004)

Fixes #3882
---
 src/core/execute.c        | 38 ++++++++++++++++++++++++++++++--------
 src/core/main.c           |  6 ++++++
 src/shared/seccomp-util.c | 10 ++++++++++
 src/shared/seccomp-util.h |  2 ++
 src/test/test-execute.c   | 11 ++++++++++-
 5 files changed, 58 insertions(+), 9 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index 0af8eb5a02..55f15d7e49 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1074,7 +1074,17 @@ static void rename_process_from_path(const char *path) {
 
 #ifdef HAVE_SECCOMP
 
-static int apply_seccomp(const ExecContext *c) {
+static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
+        if (!is_seccomp_available()) {
+                log_open();
+                log_unit_debug(u, "SECCOMP not detected in the kernel, skipping %s", msg);
+                log_close();
+                return true;
+        }
+        return false;
+}
+
+static int apply_seccomp(const Unit* u, const ExecContext *c) {
         uint32_t negative_action, action;
         scmp_filter_ctx *seccomp;
         Iterator i;
@@ -1083,6 +1093,9 @@ static int apply_seccomp(const ExecContext *c) {
 
         assert(c);
 
+        if (skip_seccomp_unavailable(u, "syscall filtering"))
+                return 0;
+
         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
 
         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
@@ -1123,13 +1136,16 @@ finish:
         return r;
 }
 
-static int apply_address_families(const ExecContext *c) {
+static int apply_address_families(const Unit* u, const ExecContext *c) {
         scmp_filter_ctx *seccomp;
         Iterator i;
         int r;
 
         assert(c);
 
+        if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
+                return 0;
+
         seccomp = seccomp_init(SCMP_ACT_ALLOW);
         if (!seccomp)
                 return -ENOMEM;
@@ -1244,12 +1260,15 @@ finish:
         return r;
 }
 
-static int apply_memory_deny_write_execute(const ExecContext *c) {
+static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
         scmp_filter_ctx *seccomp;
         int r;
 
         assert(c);
 
+        if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
+                return 0;
+
         seccomp = seccomp_init(SCMP_ACT_ALLOW);
         if (!seccomp)
                 return -ENOMEM;
@@ -1283,7 +1302,7 @@ finish:
         return r;
 }
 
-static int apply_restrict_realtime(const ExecContext *c) {
+static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
         static const int permitted_policies[] = {
                 SCHED_OTHER,
                 SCHED_BATCH,
@@ -1296,6 +1315,9 @@ static int apply_restrict_realtime(const ExecContext *c) {
 
         assert(c);
 
+        if (skip_seccomp_unavailable(u, "RestrictRealtime="))
+                return 0;
+
         seccomp = seccomp_init(SCMP_ACT_ALLOW);
         if (!seccomp)
                 return -ENOMEM;
@@ -2403,7 +2425,7 @@ static int exec_child(
 
 #ifdef HAVE_SECCOMP
                 if (use_address_families) {
-                        r = apply_address_families(context);
+                        r = apply_address_families(unit, context);
                         if (r < 0) {
                                 *exit_status = EXIT_ADDRESS_FAMILIES;
                                 return r;
@@ -2411,7 +2433,7 @@ static int exec_child(
                 }
 
                 if (context->memory_deny_write_execute) {
-                        r = apply_memory_deny_write_execute(context);
+                        r = apply_memory_deny_write_execute(unit, context);
                         if (r < 0) {
                                 *exit_status = EXIT_SECCOMP;
                                 return r;
@@ -2419,7 +2441,7 @@ static int exec_child(
                 }
 
                 if (context->restrict_realtime) {
-                        r = apply_restrict_realtime(context);
+                        r = apply_restrict_realtime(unit, context);
                         if (r < 0) {
                                 *exit_status = EXIT_SECCOMP;
                                 return r;
@@ -2427,7 +2449,7 @@ static int exec_child(
                 }
 
                 if (use_syscall_filter) {
-                        r = apply_seccomp(context);
+                        r = apply_seccomp(unit, context);
                         if (r < 0) {
                                 *exit_status = EXIT_SECCOMP;
                                 return r;
diff --git a/src/core/main.c b/src/core/main.c
index 125cfb28f0..7d8322ebd8 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -72,6 +72,9 @@
 #include "process-util.h"
 #include "raw-clone.h"
 #include "rlimit-util.h"
+#ifdef HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
 #include "selinux-setup.h"
 #include "selinux-util.h"
 #include "signal-util.h"
@@ -1186,6 +1189,9 @@ static int enforce_syscall_archs(Set *archs) {
         void *id;
         int r;
 
+        if (!is_seccomp_available())
+                return 0;
+
         seccomp = seccomp_init(SCMP_ACT_ALLOW);
         if (!seccomp)
                 return log_oom();
diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c
index 8656d112b8..4667f508c7 100644
--- a/src/shared/seccomp-util.c
+++ b/src/shared/seccomp-util.c
@@ -21,6 +21,8 @@
 #include <seccomp.h>
 #include <stddef.h>
 
+#include "alloc-util.h"
+#include "fileio.h"
 #include "macro.h"
 #include "seccomp-util.h"
 #include "string-util.h"
@@ -89,6 +91,14 @@ int seccomp_add_secondary_archs(scmp_filter_ctx *c) {
 
 }
 
+bool is_seccomp_available(void) {
+        _cleanup_free_ char* field = NULL;
+        static int cached_enabled = -1;
+        if (cached_enabled < 0)
+                cached_enabled = get_proc_field("/proc/self/status", "Seccomp", "\n", &field) == 0;
+        return cached_enabled;
+}
+
 const SystemCallFilterSet syscall_filter_sets[] = {
         {
                 /* Clock */
diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h
index be33eecb85..cca7c17912 100644
--- a/src/shared/seccomp-util.h
+++ b/src/shared/seccomp-util.h
@@ -27,6 +27,8 @@ int seccomp_arch_from_string(const char *n, uint32_t *ret);
 
 int seccomp_add_secondary_archs(scmp_filter_ctx *c);
 
+bool is_seccomp_available(void);
+
 typedef struct SystemCallFilterSet {
         const char *set_name;
         const char *value;
diff --git a/src/test/test-execute.c b/src/test/test-execute.c
index 1d24115b5c..05ec1d2eb1 100644
--- a/src/test/test-execute.c
+++ b/src/test/test-execute.c
@@ -30,6 +30,9 @@
 #include "mkdir.h"
 #include "path-util.h"
 #include "rm-rf.h"
+#ifdef HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
 #include "test-helper.h"
 #include "unit.h"
 #include "util.h"
@@ -132,21 +135,27 @@ static void test_exec_privatedevices(Manager *m) {
 
 static void test_exec_systemcallfilter(Manager *m) {
 #ifdef HAVE_SECCOMP
+        if (!is_seccomp_available())
+                return;
         test(m, "exec-systemcallfilter-not-failing.service", 0, CLD_EXITED);
         test(m, "exec-systemcallfilter-not-failing2.service", 0, CLD_EXITED);
         test(m, "exec-systemcallfilter-failing.service", SIGSYS, CLD_KILLED);
         test(m, "exec-systemcallfilter-failing2.service", SIGSYS, CLD_KILLED);
+
 #endif
 }
 
 static void test_exec_systemcallerrornumber(Manager *m) {
 #ifdef HAVE_SECCOMP
-        test(m, "exec-systemcallerrornumber.service", 1, CLD_EXITED);
+        if (is_seccomp_available())
+                test(m, "exec-systemcallerrornumber.service", 1, CLD_EXITED);
 #endif
 }
 
 static void test_exec_systemcall_system_mode_with_user(Manager *m) {
 #ifdef HAVE_SECCOMP
+        if (!is_seccomp_available())
+                return;
         if (getpwnam("nobody"))
                 test(m, "exec-systemcallfilter-system-user.service", 0, CLD_EXITED);
         else if (getpwnam("nfsnobody"))
-- 
cgit v1.2.3-54-g00ecf


From 8dec4a9d2d8301102e12f8cf1d9a646e685fb1ea Mon Sep 17 00:00:00 2001
From: Felipe Sateler <fsateler@users.noreply.github.com>
Date: Tue, 23 Aug 2016 06:29:30 -0300
Subject: core,network: Use const qualifiers for block-local variables in macro
 functions (#4019)

Prevents discard-qualifiers warnings when the passed variable was const
---
 src/core/selinux-access.h     | 2 +-
 src/core/unit.h               | 2 +-
 src/network/networkd-link.h   | 2 +-
 src/network/networkd-netdev.h | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/core')

diff --git a/src/core/selinux-access.h b/src/core/selinux-access.h
index 8f1f058a32..f46370d020 100644
--- a/src/core/selinux-access.h
+++ b/src/core/selinux-access.h
@@ -33,7 +33,7 @@ int mac_selinux_generic_access_check(sd_bus_message *message, const char *path,
 
 #define mac_selinux_unit_access_check(unit, message, permission, error) \
         ({                                                              \
-                Unit *_unit = (unit);                                   \
+                const Unit *_unit = (unit);                             \
                 mac_selinux_generic_access_check((message), _unit->source_path ?: _unit->fragment_path, (permission), (error)); \
         })
 
diff --git a/src/core/unit.h b/src/core/unit.h
index 53875653d7..f7a65782f1 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -639,7 +639,7 @@ void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid);
 
 #define log_unit_full(unit, level, error, ...)                          \
         ({                                                              \
-                Unit *_u = (unit);                                      \
+                const Unit *_u = (unit);                                \
                 _u ? log_object_internal(level, error, __FILE__, __LINE__, __func__, _u->manager->unit_log_field, _u->id, ##__VA_ARGS__) : \
                         log_internal(level, error, __FILE__, __LINE__, __func__, ##__VA_ARGS__); \
         })
diff --git a/src/network/networkd-link.h b/src/network/networkd-link.h
index 2809b1fe0b..05b2a2b323 100644
--- a/src/network/networkd-link.h
+++ b/src/network/networkd-link.h
@@ -186,7 +186,7 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(Link*, link_unref);
 
 #define log_link_full(link, level, error, ...)                          \
         ({                                                              \
-                Link *_l = (link);                                      \
+                const Link *_l = (link);                                \
                 _l ? log_object_internal(level, error, __FILE__, __LINE__, __func__, "INTERFACE=", _l->ifname, ##__VA_ARGS__) : \
                         log_internal(level, error, __FILE__, __LINE__, __func__, ##__VA_ARGS__); \
         })                                                              \
diff --git a/src/network/networkd-netdev.h b/src/network/networkd-netdev.h
index b92a973b85..09863e72b4 100644
--- a/src/network/networkd-netdev.h
+++ b/src/network/networkd-netdev.h
@@ -180,7 +180,7 @@ const struct ConfigPerfItem* network_netdev_gperf_lookup(const char *key, unsign
 
 #define log_netdev_full(netdev, level, error, ...)                      \
         ({                                                              \
-                NetDev *_n = (netdev);                                  \
+                const NetDev *_n = (netdev);                            \
                 _n ? log_object_internal(level, error, __FILE__, __LINE__, __func__, "INTERFACE=", _n->ifname, ##__VA_ARGS__) : \
                         log_internal(level, error, __FILE__, __LINE__, __func__, ##__VA_ARGS__); \
         })
-- 
cgit v1.2.3-54-g00ecf


From e520950a03419957875034bc27795b0b81d8e793 Mon Sep 17 00:00:00 2001
From: brulon <barron@lexmark.com>
Date: Fri, 26 Aug 2016 11:57:22 -0400
Subject: mount: add new LazyUnmount= setting for mount units, mapping to
 umount(8)'s "-l" switch (#3827)

---
 man/systemd.mount.xml                 | 13 +++++++++++++
 src/core/dbus-mount.c                 |  1 +
 src/core/load-fragment-gperf.gperf.m4 |  1 +
 src/core/mount.c                      |  8 ++++++--
 src/core/mount.h                      |  2 ++
 5 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'src/core')

diff --git a/man/systemd.mount.xml b/man/systemd.mount.xml
index a38165f9b9..fa17f22dc3 100644
--- a/man/systemd.mount.xml
+++ b/man/systemd.mount.xml
@@ -351,6 +351,19 @@
         off.</para></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>LazyUnmount=</varname></term>
+
+        <listitem><para>Takes a boolean argument. If true, detach the
+        filesystem from the filesystem hierarchy at time of the unmount
+        operation, and clean up all references to the filesystem as
+        soon as they are not busy anymore.
+        This corresponds with
+        <citerefentry project='man-pages'><refentrytitle>umount</refentrytitle><manvolnum>8</manvolnum></citerefentry>'s
+        <parameter>-l</parameter> switch. Defaults to
+        off.</para></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><varname>DirectoryMode=</varname></term>
         <listitem><para>Directories of mount points (and any parent
diff --git a/src/core/dbus-mount.c b/src/core/dbus-mount.c
index 3c6bda4073..4421f26bc1 100644
--- a/src/core/dbus-mount.c
+++ b/src/core/dbus-mount.c
@@ -116,6 +116,7 @@ const sd_bus_vtable bus_mount_vtable[] = {
         SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Mount, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("DirectoryMode", "u", bus_property_get_mode, offsetof(Mount, directory_mode), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("SloppyOptions", "b", bus_property_get_bool, offsetof(Mount, sloppy_options), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("LazyUnmount", "b", bus_property_get_bool, offsetof(Mount, lazy_unmount), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Mount, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 05fe0df7e3..420d32dbd7 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -355,6 +355,7 @@ Mount.Type,                      config_parse_string,                0,
 Mount.TimeoutSec,                config_parse_sec,                   0,                             offsetof(Mount, timeout_usec)
 Mount.DirectoryMode,             config_parse_mode,                  0,                             offsetof(Mount, directory_mode)
 Mount.SloppyOptions,             config_parse_bool,                  0,                             offsetof(Mount, sloppy_options)
+Mount.LazyUnmount,               config_parse_bool,                  0,                             offsetof(Mount, lazy_unmount)
 EXEC_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
 CGROUP_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
 KILL_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
diff --git a/src/core/mount.c b/src/core/mount.c
index f2ac8d171f..2c2a54edbb 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -677,7 +677,8 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
                 "%sOptions: %s\n"
                 "%sFrom /proc/self/mountinfo: %s\n"
                 "%sFrom fragment: %s\n"
-                "%sDirectoryMode: %04o\n",
+                "%sDirectoryMode: %04o\n"
+                "%sLazyUnmount: %s\n",
                 prefix, mount_state_to_string(m->state),
                 prefix, mount_result_to_string(m->result),
                 prefix, m->where,
@@ -686,7 +687,8 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
                 prefix, p ? strna(p->options) : "n/a",
                 prefix, yes_no(m->from_proc_self_mountinfo),
                 prefix, yes_no(m->from_fragment),
-                prefix, m->directory_mode);
+                prefix, m->directory_mode,
+                prefix, yes_no(m->lazy_unmount));
 
         if (m->control_pid > 0)
                 fprintf(f,
@@ -846,6 +848,8 @@ static void mount_enter_unmounting(Mount *m) {
         m->control_command = m->exec_command + MOUNT_EXEC_UNMOUNT;
 
         r = exec_command_set(m->control_command, UMOUNT_PATH, m->where, NULL);
+        if (r >= 0 && m->lazy_unmount)
+                r = exec_command_append(m->control_command, "-l", NULL);
         if (r < 0)
                 goto fail;
 
diff --git a/src/core/mount.h b/src/core/mount.h
index ac27b518cc..c4a2bff100 100644
--- a/src/core/mount.h
+++ b/src/core/mount.h
@@ -71,6 +71,8 @@ struct Mount {
 
         bool sloppy_options;
 
+        bool lazy_unmount;
+
         MountResult result;
         MountResult reload_result;
 
-- 
cgit v1.2.3-54-g00ecf


From 2507992f6bec6ccb4d96abd1f55ba278a38b1723 Mon Sep 17 00:00:00 2001
From: Douglas Christman <DouglasChristman@gmail.com>
Date: Fri, 26 Aug 2016 12:13:16 -0400
Subject: load-fragment: Resolve specifiers in OnCalendar and On*Sec

Resolves #3534
---
 src/core/load-fragment.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

(limited to 'src/core')

diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index d5185cf6a0..d3fc3e36d9 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -1338,10 +1338,13 @@ int config_parse_timer(const char *unit,
                        void *userdata) {
 
         Timer *t = data;
-        usec_t u = 0;
+        usec_t usec = 0;
         TimerValue *v;
         TimerBase b;
         CalendarSpec *c = NULL;
+        Unit *u = userdata;
+        _cleanup_free_ char *k = NULL;
+        int r;
 
         assert(filename);
         assert(lvalue);
@@ -1360,14 +1363,20 @@ int config_parse_timer(const char *unit,
                 return 0;
         }
 
+        r = unit_full_printf(u, rvalue, &k);
+        if (r < 0) {
+                log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue);
+                return 0;
+        }
+
         if (b == TIMER_CALENDAR) {
-                if (calendar_spec_from_string(rvalue, &c) < 0) {
-                        log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse calendar specification, ignoring: %s", rvalue);
+                if (calendar_spec_from_string(k, &c) < 0) {
+                        log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse calendar specification, ignoring: %s", k);
                         return 0;
                 }
         } else {
-                if (parse_sec(rvalue, &u) < 0) {
-                        log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse timer value, ignoring: %s", rvalue);
+                if (parse_sec(k, &usec) < 0) {
+                        log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse timer value, ignoring: %s", k);
                         return 0;
                 }
         }
@@ -1379,7 +1388,7 @@ int config_parse_timer(const char *unit,
         }
 
         v->base = b;
-        v->value = u;
+        v->value = usec;
         v->calendar_spec = c;
 
         LIST_PREPEND(value, t->values, v);
-- 
cgit v1.2.3-54-g00ecf


From 4f8d40a9dc3d2dc754a30bc9455817ee6a857c62 Mon Sep 17 00:00:00 2001
From: Barron Rulon <barron@lexmark.com>
Date: Sat, 27 Aug 2016 10:27:49 -0400
Subject: mount: add new ForceUnmount= setting for mount units, mapping to
 umount(8)'s "-f" switch

---
 man/systemd.mount.xml                 | 11 +++++++++++
 src/core/dbus-mount.c                 |  1 +
 src/core/load-fragment-gperf.gperf.m4 |  1 +
 src/core/mount.c                      |  8 ++++++--
 src/core/mount.h                      |  1 +
 5 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'src/core')

diff --git a/man/systemd.mount.xml b/man/systemd.mount.xml
index fa17f22dc3..dd08feb5ad 100644
--- a/man/systemd.mount.xml
+++ b/man/systemd.mount.xml
@@ -364,6 +364,17 @@
         off.</para></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>ForceUnmount=</varname></term>
+
+        <listitem><para>Takes a boolean argument. If true, force an
+        unmount (in case of an unreachable NFS system).
+        This corresponds with
+        <citerefentry project='man-pages'><refentrytitle>umount</refentrytitle><manvolnum>8</manvolnum></citerefentry>'s
+        <parameter>-f</parameter> switch. Defaults to
+        off.</para></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><varname>DirectoryMode=</varname></term>
         <listitem><para>Directories of mount points (and any parent
diff --git a/src/core/dbus-mount.c b/src/core/dbus-mount.c
index 4421f26bc1..76a7a7ce97 100644
--- a/src/core/dbus-mount.c
+++ b/src/core/dbus-mount.c
@@ -117,6 +117,7 @@ const sd_bus_vtable bus_mount_vtable[] = {
         SD_BUS_PROPERTY("DirectoryMode", "u", bus_property_get_mode, offsetof(Mount, directory_mode), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("SloppyOptions", "b", bus_property_get_bool, offsetof(Mount, sloppy_options), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("LazyUnmount", "b", bus_property_get_bool, offsetof(Mount, lazy_unmount), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("ForceUnmount", "b", bus_property_get_bool, offsetof(Mount, force_unmount), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Mount, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 420d32dbd7..057b92bfa3 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -356,6 +356,7 @@ Mount.TimeoutSec,                config_parse_sec,                   0,
 Mount.DirectoryMode,             config_parse_mode,                  0,                             offsetof(Mount, directory_mode)
 Mount.SloppyOptions,             config_parse_bool,                  0,                             offsetof(Mount, sloppy_options)
 Mount.LazyUnmount,               config_parse_bool,                  0,                             offsetof(Mount, lazy_unmount)
+Mount.ForceUnmount,              config_parse_bool,                  0,                             offsetof(Mount, force_unmount)
 EXEC_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
 CGROUP_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
 KILL_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
diff --git a/src/core/mount.c b/src/core/mount.c
index 2c2a54edbb..6c794d4073 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -678,7 +678,8 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
                 "%sFrom /proc/self/mountinfo: %s\n"
                 "%sFrom fragment: %s\n"
                 "%sDirectoryMode: %04o\n"
-                "%sLazyUnmount: %s\n",
+                "%sLazyUnmount: %s\n"
+                "%sForceUnmount: %s\n",
                 prefix, mount_state_to_string(m->state),
                 prefix, mount_result_to_string(m->result),
                 prefix, m->where,
@@ -688,7 +689,8 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
                 prefix, yes_no(m->from_proc_self_mountinfo),
                 prefix, yes_no(m->from_fragment),
                 prefix, m->directory_mode,
-                prefix, yes_no(m->lazy_unmount));
+                prefix, yes_no(m->lazy_unmount),
+                prefix, yes_no(m->force_unmount));
 
         if (m->control_pid > 0)
                 fprintf(f,
@@ -850,6 +852,8 @@ static void mount_enter_unmounting(Mount *m) {
         r = exec_command_set(m->control_command, UMOUNT_PATH, m->where, NULL);
         if (r >= 0 && m->lazy_unmount)
                 r = exec_command_append(m->control_command, "-l", NULL);
+        if (r >= 0 && m->force_unmount)
+                r = exec_command_append(m->control_command, "-f", NULL);
         if (r < 0)
                 goto fail;
 
diff --git a/src/core/mount.h b/src/core/mount.h
index c4a2bff100..9f7326ba6a 100644
--- a/src/core/mount.h
+++ b/src/core/mount.h
@@ -72,6 +72,7 @@ struct Mount {
         bool sloppy_options;
 
         bool lazy_unmount;
+        bool force_unmount;
 
         MountResult result;
         MountResult reload_result;
-- 
cgit v1.2.3-54-g00ecf


From 49915de2451a2ddbd2449a825c682b9731432895 Mon Sep 17 00:00:00 2001
From: Barron Rulon <barron@lexmark.com>
Date: Sat, 27 Aug 2016 10:42:05 -0400
Subject: mount: add SloppyOptions= to mount_dump()

---
 src/core/mount.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/core')

diff --git a/src/core/mount.c b/src/core/mount.c
index 6c794d4073..04025b83b9 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -678,6 +678,7 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
                 "%sFrom /proc/self/mountinfo: %s\n"
                 "%sFrom fragment: %s\n"
                 "%sDirectoryMode: %04o\n"
+                "%sSloppyOptions: %s\n"
                 "%sLazyUnmount: %s\n"
                 "%sForceUnmount: %s\n",
                 prefix, mount_state_to_string(m->state),
@@ -689,6 +690,7 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
                 prefix, yes_no(m->from_proc_self_mountinfo),
                 prefix, yes_no(m->from_fragment),
                 prefix, m->directory_mode,
+                prefix, yes_no(m->sloppy_options),
                 prefix, yes_no(m->lazy_unmount),
                 prefix, yes_no(m->force_unmount));
 
-- 
cgit v1.2.3-54-g00ecf


From 96e131ea091f748780776b81b7163f8084ed8244 Mon Sep 17 00:00:00 2001
From: WaLyong Cho <walyong.cho@samsung.com>
Date: Mon, 4 Jul 2016 07:03:54 +0000
Subject: core: introduce MemorySwapMax=

Similar to MemoryMax=, MemorySwapMax= limits swap usage. This controls
controls "memory.swap.max" attribute in unified cgroup.
---
 man/systemd.resource-control.xml      | 18 ++++++++++++++++++
 src/core/cgroup.c                     | 12 +++++++++---
 src/core/cgroup.h                     |  1 +
 src/core/dbus-cgroup.c                |  5 ++++-
 src/core/load-fragment-gperf.gperf.m4 |  1 +
 src/core/load-fragment.c              |  6 +++++-
 src/systemctl/systemctl.c             | 11 ++++++++++-
 7 files changed, 48 insertions(+), 6 deletions(-)

(limited to 'src/core')

diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml
index 84dbfa2ff3..c11f420fe5 100644
--- a/man/systemd.resource-control.xml
+++ b/man/systemd.resource-control.xml
@@ -325,6 +325,24 @@
         </listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>MemorySwapMax=<replaceable>bytes</replaceable></varname></term>
+
+        <listitem>
+          <para>Specify the absolute limit on swap usage of the executed processes in this unit.</para>
+
+          <para>Takes a swap size in bytes. If the value is suffixed with K, M, G or T, the specified swap size is
+          parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes (with the base 1024), respectively. If assigned the
+          special value <literal>infinity</literal>, no swap limit is applied. This controls the
+          <literal>memory.swap.max</literal> control group attribute. For details about this control group attribute,
+          see <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>.</para>
+
+          <para>Implies <literal>MemoryAccounting=true</literal>.</para>
+
+          <para>This setting is supported only if the unified control group hierarchy is used.</para>
+        </listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><varname>MemoryLimit=<replaceable>bytes</replaceable></varname></term>
 
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 20f9f9fc7e..7873f88785 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -66,6 +66,7 @@ void cgroup_context_init(CGroupContext *c) {
 
         c->memory_high = CGROUP_LIMIT_MAX;
         c->memory_max = CGROUP_LIMIT_MAX;
+        c->memory_swap_max = CGROUP_LIMIT_MAX;
 
         c->memory_limit = CGROUP_LIMIT_MAX;
 
@@ -173,6 +174,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
                 "%sMemoryLow=%" PRIu64 "\n"
                 "%sMemoryHigh=%" PRIu64 "\n"
                 "%sMemoryMax=%" PRIu64 "\n"
+                "%sMemorySwapMax=%" PRIu64 "\n"
                 "%sMemoryLimit=%" PRIu64 "\n"
                 "%sTasksMax=%" PRIu64 "\n"
                 "%sDevicePolicy=%s\n"
@@ -194,6 +196,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
                 prefix, c->memory_low,
                 prefix, c->memory_high,
                 prefix, c->memory_max,
+                prefix, c->memory_swap_max,
                 prefix, c->memory_limit,
                 prefix, c->tasks_max,
                 prefix, cgroup_device_policy_to_string(c->device_policy),
@@ -617,7 +620,7 @@ static unsigned cgroup_apply_blkio_device_limit(Unit *u, const char *dev_path, u
 }
 
 static bool cgroup_context_has_unified_memory_config(CGroupContext *c) {
-        return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX;
+        return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX || c->memory_swap_max != CGROUP_LIMIT_MAX;
 }
 
 static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_t v) {
@@ -848,10 +851,12 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
         if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
                 if (cg_all_unified() > 0) {
                         uint64_t max = c->memory_max;
+                        uint64_t swap_max = c->memory_swap_max;
 
-                        if (cgroup_context_has_unified_memory_config(c))
+                        if (cgroup_context_has_unified_memory_config(c)) {
                                 max = c->memory_max;
-                        else {
+                                swap_max = c->memory_swap_max;
+                        } else {
                                 max = c->memory_limit;
 
                                 if (max != CGROUP_LIMIT_MAX)
@@ -861,6 +866,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                         cgroup_apply_unified_memory_limit(u, "memory.low", c->memory_low);
                         cgroup_apply_unified_memory_limit(u, "memory.high", c->memory_high);
                         cgroup_apply_unified_memory_limit(u, "memory.max", max);
+                        cgroup_apply_unified_memory_limit(u, "memory.swap.max", swap_max);
                 } else {
                         char buf[DECIMAL_STR_MAX(uint64_t) + 1];
                         uint64_t val = c->memory_limit;
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index 2fe9cc4039..4cd168f63e 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -101,6 +101,7 @@ struct CGroupContext {
         uint64_t memory_low;
         uint64_t memory_high;
         uint64_t memory_max;
+        uint64_t memory_swap_max;
 
         /* For legacy hierarchies */
         uint64_t cpu_shares;
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index 2ca80d2996..c4067a95bf 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -233,6 +233,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
         SD_BUS_PROPERTY("MemoryLow", "t", NULL, offsetof(CGroupContext, memory_low), 0),
         SD_BUS_PROPERTY("MemoryHigh", "t", NULL, offsetof(CGroupContext, memory_high), 0),
         SD_BUS_PROPERTY("MemoryMax", "t", NULL, offsetof(CGroupContext, memory_max), 0),
+        SD_BUS_PROPERTY("MemorySwapMax", "t", NULL, offsetof(CGroupContext, memory_swap_max), 0),
         SD_BUS_PROPERTY("MemoryLimit", "t", NULL, offsetof(CGroupContext, memory_limit), 0),
         SD_BUS_PROPERTY("DevicePolicy", "s", property_get_cgroup_device_policy, offsetof(CGroupContext, device_policy), 0),
         SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0),
@@ -875,7 +876,7 @@ int bus_cgroup_set_property(
 
                 return 1;
 
-        } else if (STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax")) {
+        } else if (STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax", "MemorySwapMax")) {
                 uint64_t v;
 
                 r = sd_bus_message_read(message, "t", &v);
@@ -889,6 +890,8 @@ int bus_cgroup_set_property(
                                 c->memory_low = v;
                         else if (streq(name, "MemoryHigh"))
                                 c->memory_high = v;
+                        else if (streq(name, "MemorySwapMax"))
+                                c->memory_swap_max = v;
                         else
                                 c->memory_max = v;
 
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 420d32dbd7..0741aca616 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -131,6 +131,7 @@ $1.MemoryAccounting,             config_parse_bool,                  0,
 $1.MemoryLow,                    config_parse_memory_limit,          0,                             offsetof($1, cgroup_context)
 $1.MemoryHigh,                   config_parse_memory_limit,          0,                             offsetof($1, cgroup_context)
 $1.MemoryMax,                    config_parse_memory_limit,          0,                             offsetof($1, cgroup_context)
+$1.MemorySwapMax,                config_parse_memory_limit,          0,                             offsetof($1, cgroup_context)
 $1.MemoryLimit,                  config_parse_memory_limit,          0,                             offsetof($1, cgroup_context)
 $1.DeviceAllow,                  config_parse_device_allow,          0,                             offsetof($1, cgroup_context)
 $1.DevicePolicy,                 config_parse_device_policy,         0,                             offsetof($1, cgroup_context.device_policy)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index d5185cf6a0..65ca9d8a33 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -2981,8 +2981,12 @@ int config_parse_memory_limit(
                 c->memory_high = bytes;
         else if (streq(lvalue, "MemoryMax"))
                 c->memory_max = bytes;
-        else
+        else if (streq(lvalue, "MemorySwapMax"))
+                c->memory_swap_max = bytes;
+        else if (streq(lvalue, "MemoryLimit"))
                 c->memory_limit = bytes;
+        else
+                return -EINVAL;
 
         return 0;
 }
diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c
index 4b87bdceb2..682805045d 100644
--- a/src/systemctl/systemctl.c
+++ b/src/systemctl/systemctl.c
@@ -3571,6 +3571,7 @@ typedef struct UnitStatusInfo {
         uint64_t memory_low;
         uint64_t memory_high;
         uint64_t memory_max;
+        uint64_t memory_swap_max;
         uint64_t memory_limit;
         uint64_t cpu_usage_nsec;
         uint64_t tasks_current;
@@ -3883,7 +3884,8 @@ static void print_status_info(
 
                 printf("   Memory: %s", format_bytes(buf, sizeof(buf), i->memory_current));
 
-                if (i->memory_low > 0 || i->memory_high != CGROUP_LIMIT_MAX || i->memory_max != CGROUP_LIMIT_MAX ||
+                if (i->memory_low > 0 || i->memory_high != CGROUP_LIMIT_MAX ||
+                    i->memory_max != CGROUP_LIMIT_MAX || i->memory_swap_max != CGROUP_LIMIT_MAX ||
                     i->memory_limit != CGROUP_LIMIT_MAX) {
                         const char *prefix = "";
 
@@ -3900,6 +3902,10 @@ static void print_status_info(
                                 printf("%smax: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_max));
                                 prefix = " ";
                         }
+                        if (i->memory_swap_max != CGROUP_LIMIT_MAX) {
+                                printf("%sswap max: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_swap_max));
+                                prefix = " ";
+                        }
                         if (i->memory_limit != CGROUP_LIMIT_MAX) {
                                 printf("%slimit: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_limit));
                                 prefix = " ";
@@ -4140,6 +4146,8 @@ static int status_property(const char *name, sd_bus_message *m, UnitStatusInfo *
                         i->memory_high = u;
                 else if (streq(name, "MemoryMax"))
                         i->memory_max = u;
+                else if (streq(name, "MemorySwapMax"))
+                        i->memory_swap_max = u;
                 else if (streq(name, "MemoryLimit"))
                         i->memory_limit = u;
                 else if (streq(name, "TasksCurrent"))
@@ -4655,6 +4663,7 @@ static int show_one(
                 .memory_current = (uint64_t) -1,
                 .memory_high = CGROUP_LIMIT_MAX,
                 .memory_max = CGROUP_LIMIT_MAX,
+                .memory_swap_max = CGROUP_LIMIT_MAX,
                 .memory_limit = (uint64_t) -1,
                 .cpu_usage_nsec = (uint64_t) -1,
                 .tasks_current = (uint64_t) -1,
-- 
cgit v1.2.3-54-g00ecf


From d347d9029c7ec6b30eaaab93649105d935061b55 Mon Sep 17 00:00:00 2001
From: Felipe Sateler <fsateler@debian.org>
Date: Wed, 31 Aug 2016 10:00:35 -0300
Subject: seccomp: also detect if seccomp filtering is enabled

In https://github.com/systemd/systemd/pull/4004 , a runtime detection
method for seccomp was added. However, it does not detect the case
where CONFIG_SECCOMP=y but CONFIG_SECCOMP_FILTER=n. This is possible
if the architecture does not support filtering yet.
Add a check for that case too.

While at it, change get_proc_field usage to use PR_GET_SECCOMP prctl,
as that should save a few system calls and (unnecessary) allocations.
Previously, reading of /proc/self/stat was done as recommended by
prctl(2) as safer. However, given that we need to do the prctl call
anyway, lets skip opening, reading and parsing the file.

Code for checking inspired by
https://outflux.net/teach-seccomp/autodetect.html
---
 src/core/execute.c        |  2 +-
 src/shared/seccomp-util.c | 19 +++++++++++++++----
 2 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index 55f15d7e49..2026137721 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1077,7 +1077,7 @@ static void rename_process_from_path(const char *path) {
 static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
         if (!is_seccomp_available()) {
                 log_open();
-                log_unit_debug(u, "SECCOMP not detected in the kernel, skipping %s", msg);
+                log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
                 log_close();
                 return true;
         }
diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c
index 6c489284d1..2f42381fc1 100644
--- a/src/shared/seccomp-util.c
+++ b/src/shared/seccomp-util.c
@@ -20,9 +20,9 @@
 #include <errno.h>
 #include <seccomp.h>
 #include <stddef.h>
+#include <sys/prctl.h>
+#include <linux/seccomp.h>
 
-#include "alloc-util.h"
-#include "fileio.h"
 #include "macro.h"
 #include "seccomp-util.h"
 #include "string-util.h"
@@ -91,11 +91,22 @@ int seccomp_add_secondary_archs(scmp_filter_ctx *c) {
 
 }
 
+static bool is_basic_seccomp_available(void) {
+        int r;
+        r = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+        return r >= 0;
+}
+
+static bool is_seccomp_filter_available(void) {
+        int r;
+        r = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, 0, 0);
+        return r < 0 && errno == EFAULT;
+}
+
 bool is_seccomp_available(void) {
-        _cleanup_free_ char* field = NULL;
         static int cached_enabled = -1;
         if (cached_enabled < 0)
-                cached_enabled = get_proc_field("/proc/self/status", "Seccomp", "\n", &field) == 0;
+                cached_enabled = is_basic_seccomp_available() && is_seccomp_filter_available();
         return cached_enabled;
 }
 
-- 
cgit v1.2.3-54-g00ecf


From f2dbd059a6b325f058c1eff65f2441a0f9f90eb1 Mon Sep 17 00:00:00 2001
From: Kyle Russell <bkylerussell@gmail.com>
Date: Thu, 8 Sep 2016 22:34:43 -0400
Subject: service: Continue shutdown on socket activated unit on termination
 (#4108)

ENOTCONN may be a legitimate return code if the endpoint disappeared,
but the service should still attempt to shutdown cleanly.
---
 src/core/service.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'src/core')

diff --git a/src/core/service.c b/src/core/service.c
index 969c62bd83..57f8d90ee5 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -1256,10 +1256,17 @@ static int service_spawn(
                 socklen_t salen = sizeof(sa);
 
                 r = getpeername(s->socket_fd, &sa.sa, &salen);
-                if (r < 0)
-                        return -errno;
+                if (r < 0) {
+                        r = -errno;
+
+                        /* ENOTCONN is legitimate if the endpoint disappeared on shutdown.
+                         * This connection is over, but the socket unit lives on. */
+                        if (r != -ENOTCONN ||
+                            (c != s->exec_command[SERVICE_EXEC_STOP] && c != s->exec_command[SERVICE_EXEC_STOP_POST]))
+                                return r;
+                }
 
-                if (IN_SET(sa.sa.sa_family, AF_INET, AF_INET6)) {
+                if (r == 0 && IN_SET(sa.sa.sa_family, AF_INET, AF_INET6)) {
                         _cleanup_free_ char *addr = NULL;
                         char *t;
                         int port;
-- 
cgit v1.2.3-54-g00ecf


From 232f6754f60ae803c992ca156cbc25fa80a5b9db Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Fri, 9 Sep 2016 15:16:26 +0100
Subject: pid1: drop kdbus_fd and all associated logic

---
 src/core/dbus.c    |  4 ----
 src/core/manager.c | 33 +++++----------------------------
 src/core/manager.h |  3 ---
 3 files changed, 5 insertions(+), 35 deletions(-)

(limited to 'src/core')

diff --git a/src/core/dbus.c b/src/core/dbus.c
index 1e41a42aa6..070974fe66 100644
--- a/src/core/dbus.c
+++ b/src/core/dbus.c
@@ -964,10 +964,6 @@ static int bus_init_private(Manager *m) {
         if (m->private_listen_fd >= 0)
                 return 0;
 
-        /* We don't need the private socket if we have kdbus */
-        if (m->kdbus_fd >= 0)
-                return 0;
-
         if (MANAGER_IS_SYSTEM(m)) {
 
                 /* We want the private bus only when running as init */
diff --git a/src/core/manager.c b/src/core/manager.c
index b58f68fa7a..fa8deb9b1b 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -590,7 +590,7 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) {
         m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1;
 
         m->pin_cgroupfs_fd = m->notify_fd = m->cgroups_agent_fd = m->signal_fd = m->time_change_fd =
-                m->dev_autofs_fd = m->private_listen_fd = m->kdbus_fd = m->cgroup_inotify_fd =
+                m->dev_autofs_fd = m->private_listen_fd = m->cgroup_inotify_fd =
                 m->ask_password_inotify_fd = -1;
 
         m->user_lookup_fds[0] = m->user_lookup_fds[1] = -1;
@@ -661,9 +661,8 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) {
                 goto fail;
         }
 
-        /* Note that we set up neither kdbus, nor the notify fd
-         * here. We do that after deserialization, since they might
-         * have gotten serialized across the reexec. */
+        /* Note that we do not set up the notify fd here. We do that after deserialization,
+         * since they might have gotten serialized across the reexec. */
 
         m->taint_usr = dir_is_empty("/usr") > 0;
 
@@ -879,7 +878,6 @@ static int manager_connect_bus(Manager *m, bool reexecuting) {
                 return 0;
 
         try_bus_connect =
-                m->kdbus_fd >= 0 ||
                 reexecuting ||
                 (MANAGER_IS_USER(m) && getenv("DBUS_SESSION_BUS_ADDRESS"));
 
@@ -1084,7 +1082,6 @@ Manager* manager_free(Manager *m) {
         safe_close(m->notify_fd);
         safe_close(m->cgroups_agent_fd);
         safe_close(m->time_change_fd);
-        safe_close(m->kdbus_fd);
         safe_close_pair(m->user_lookup_fds);
 
         manager_close_ask_password(m);
@@ -1287,7 +1284,7 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
         if (q < 0 && r == 0)
                 r = q;
 
-        /* We might have deserialized the kdbus control fd, but if we didn't, then let's create the bus now. */
+        /* Let's connect to the bus now. */
         (void) manager_connect_bus(m, !!serialization);
 
         (void) bus_track_coldplug(m, &m->subscribed, false, m->deserialized_subscribed);
@@ -2481,16 +2478,6 @@ int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) {
                 fprintf(f, "user-lookup=%i %i\n", copy0, copy1);
         }
 
-        if (m->kdbus_fd >= 0) {
-                int copy;
-
-                copy = fdset_put_dup(fds, m->kdbus_fd);
-                if (copy < 0)
-                        return copy;
-
-                fprintf(f, "kdbus-fd=%i\n", copy);
-        }
-
         bus_track_serialize(m->subscribed, f, "subscribed");
 
         r = dynamic_user_serialize(m, f, fds);
@@ -2678,16 +2665,6 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
                                 m->user_lookup_fds[1] = fdset_remove(fds, fd1);
                         }
 
-                } else if (startswith(l, "kdbus-fd=")) {
-                        int fd;
-
-                        if (safe_atoi(l + 9, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
-                                log_debug("Failed to parse kdbus fd: %s", l + 9);
-                        else {
-                                safe_close(m->kdbus_fd);
-                                m->kdbus_fd = fdset_remove(fds, fd);
-                        }
-
                 } else if (startswith(l, "dynamic-user="))
                         dynamic_user_deserialize_one(m, l + 13, fds);
                 else if (startswith(l, "destroy-ipc-uid="))
@@ -2699,7 +2676,7 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
                         if (strv_extend(&m->deserialized_subscribed, l+11) < 0)
                                 log_oom();
 
-                } else
+                } else if (!startswith(l, "kdbus-fd=")) /* ignore this one */
                         log_debug("Unknown serialization item '%s'", l);
         }
 
diff --git a/src/core/manager.h b/src/core/manager.h
index b9f2e4b5a1..a592f1cb94 100644
--- a/src/core/manager.h
+++ b/src/core/manager.h
@@ -294,9 +294,6 @@ struct Manager {
          * value where Unit objects are contained. */
         Hashmap *units_requiring_mounts_for;
 
-        /* Reference to the kdbus bus control fd */
-        int kdbus_fd;
-
         /* Used for processing polkit authorization responses */
         Hashmap *polkit_registry;
 
-- 
cgit v1.2.3-54-g00ecf


From 0dd99f86addd1f81e24e89807b6bc4aab57d5793 Mon Sep 17 00:00:00 2001
From: Michael Olbrich <m.olbrich@pengutronix.de>
Date: Fri, 9 Sep 2016 17:05:06 +0200
Subject: unit: sent change signal before removing the unit if necessary
 (#4106)

If the unit is in the dbus queue when it is removed then the last change
signal is never sent. Fix this by checking the dbus queue and explicitly
send the change signal before sending the remove signal.
---
 src/core/dbus-unit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/core')

diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c
index 1b86bdde43..5020dfba4b 100644
--- a/src/core/dbus-unit.c
+++ b/src/core/dbus-unit.c
@@ -1167,7 +1167,7 @@ void bus_unit_send_removed_signal(Unit *u) {
         int r;
         assert(u);
 
-        if (!u->sent_dbus_new_signal)
+        if (!u->sent_dbus_new_signal || u->in_dbus_queue)
                 bus_unit_send_change_signal(u);
 
         if (!u->id)
-- 
cgit v1.2.3-54-g00ecf


From 7dd736abecd8048843d117de56c65bbf489a6f4f Mon Sep 17 00:00:00 2001
From: Kyle Russell <bkylerussell@gmail.com>
Date: Sat, 10 Sep 2016 01:55:36 -0400
Subject: service: fixup ExecStop for socket-activated shutdown (#4120)

Previous fix didn't consider handling multiple ExecStop commands.
---
 src/core/service.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src/core')

diff --git a/src/core/service.c b/src/core/service.c
index 57f8d90ee5..99a70395fc 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -1261,8 +1261,7 @@ static int service_spawn(
 
                         /* ENOTCONN is legitimate if the endpoint disappeared on shutdown.
                          * This connection is over, but the socket unit lives on. */
-                        if (r != -ENOTCONN ||
-                            (c != s->exec_command[SERVICE_EXEC_STOP] && c != s->exec_command[SERVICE_EXEC_STOP_POST]))
+                        if (r != -ENOTCONN || !IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST))
                                 return r;
                 }
 
-- 
cgit v1.2.3-54-g00ecf


From 43688c49d1fdb585196d94e2e30bb29755fa591b Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Sat, 10 Sep 2016 11:02:40 +0100
Subject: tree-wide: rename config_parse_many to …_nulstr
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In preparation for adding a version which takes a strv.
---
 src/basic/def.h                     |  2 +-
 src/core/main.c                     |  2 +-
 src/coredump/coredump.c             |  2 +-
 src/journal-remote/journal-remote.c |  2 +-
 src/journal-remote/journal-upload.c |  2 +-
 src/journal/journald-server.c       |  2 +-
 src/login/logind.c                  |  2 +-
 src/network/networkd-conf.c         |  2 +-
 src/network/networkd-network.c      |  2 +-
 src/resolve/resolved-conf.c         |  2 +-
 src/shared/conf-parser.c            | 16 +++++++++-------
 src/shared/conf-parser.h            | 38 +++++++++++++++++++------------------
 src/shared/sleep-config.c           |  2 +-
 src/timesync/timesyncd-conf.c       |  2 +-
 14 files changed, 41 insertions(+), 37 deletions(-)

(limited to 'src/core')

diff --git a/src/basic/def.h b/src/basic/def.h
index 1a7a0f4928..2266eff650 100644
--- a/src/basic/def.h
+++ b/src/basic/def.h
@@ -79,7 +79,7 @@
 #endif
 
 /* Return a nulstr for a standard cascade of configuration paths,
- * suitable to pass to conf_files_list_nulstr() or config_parse_many()
+ * suitable to pass to conf_files_list_nulstr() or config_parse_many_nulstr()
  * to implement drop-in directories for extending configuration
  * files. */
 #define CONF_PATHS_NULSTR(n) \
diff --git a/src/core/main.c b/src/core/main.c
index 7d8322ebd8..803307c9d5 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -715,7 +715,7 @@ static int parse_config_file(void) {
                 CONF_PATHS_NULSTR("systemd/system.conf.d") :
                 CONF_PATHS_NULSTR("systemd/user.conf.d");
 
-        config_parse_many(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, false, NULL);
+        config_parse_many_nulstr(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, false, NULL);
 
         /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we used USEC_INFINITY
          * like everywhere else. */
diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c
index be724aed4e..9dea10b3e1 100644
--- a/src/coredump/coredump.c
+++ b/src/coredump/coredump.c
@@ -128,7 +128,7 @@ static int parse_config(void) {
                 {}
         };
 
-        return config_parse_many(PKGSYSCONFDIR "/coredump.conf",
+        return config_parse_many_nulstr(PKGSYSCONFDIR "/coredump.conf",
                                  CONF_PATHS_NULSTR("systemd/coredump.conf.d"),
                                  "Coredump\0",
                                  config_item_table_lookup, items,
diff --git a/src/journal-remote/journal-remote.c b/src/journal-remote/journal-remote.c
index 80e2adb100..220c71754a 100644
--- a/src/journal-remote/journal-remote.c
+++ b/src/journal-remote/journal-remote.c
@@ -1198,7 +1198,7 @@ static int parse_config(void) {
                 { "Remote",  "TrustedCertificateFile", config_parse_path,             0, &arg_trust      },
                 {}};
 
-        return config_parse_many(PKGSYSCONFDIR "/journal-remote.conf",
+        return config_parse_many_nulstr(PKGSYSCONFDIR "/journal-remote.conf",
                                  CONF_PATHS_NULSTR("systemd/journal-remote.conf.d"),
                                  "Remote\0", config_item_table_lookup, items,
                                  false, NULL);
diff --git a/src/journal-remote/journal-upload.c b/src/journal-remote/journal-upload.c
index 4647cfdeb3..c0f967ab94 100644
--- a/src/journal-remote/journal-upload.c
+++ b/src/journal-remote/journal-upload.c
@@ -542,7 +542,7 @@ static int parse_config(void) {
                 { "Upload",  "TrustedCertificateFile", config_parse_path,   0, &arg_trust  },
                 {}};
 
-        return config_parse_many(PKGSYSCONFDIR "/journal-upload.conf",
+        return config_parse_many_nulstr(PKGSYSCONFDIR "/journal-upload.conf",
                                  CONF_PATHS_NULSTR("systemd/journal-upload.conf.d"),
                                  "Upload\0", config_item_table_lookup, items,
                                  false, NULL);
diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c
index 3507910919..cc352dbae2 100644
--- a/src/journal/journald-server.c
+++ b/src/journal/journald-server.c
@@ -1474,7 +1474,7 @@ static int server_parse_proc_cmdline(Server *s) {
 static int server_parse_config_file(Server *s) {
         assert(s);
 
-        return config_parse_many(PKGSYSCONFDIR "/journald.conf",
+        return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
                                  CONF_PATHS_NULSTR("systemd/journald.conf.d"),
                                  "Journal\0",
                                  config_item_perf_lookup, journald_gperf_lookup,
diff --git a/src/login/logind.c b/src/login/logind.c
index bbbf4aef57..a9a06f5e28 100644
--- a/src/login/logind.c
+++ b/src/login/logind.c
@@ -1002,7 +1002,7 @@ static int manager_dispatch_idle_action(sd_event_source *s, uint64_t t, void *us
 static int manager_parse_config_file(Manager *m) {
         assert(m);
 
-        return config_parse_many(PKGSYSCONFDIR "/logind.conf",
+        return config_parse_many_nulstr(PKGSYSCONFDIR "/logind.conf",
                                  CONF_PATHS_NULSTR("systemd/logind.conf.d"),
                                  "Login\0",
                                  config_item_perf_lookup, logind_gperf_lookup,
diff --git a/src/network/networkd-conf.c b/src/network/networkd-conf.c
index c03e2b2ebf..49bb8c18f6 100644
--- a/src/network/networkd-conf.c
+++ b/src/network/networkd-conf.c
@@ -29,7 +29,7 @@
 int manager_parse_config_file(Manager *m) {
         assert(m);
 
-        return config_parse_many(PKGSYSCONFDIR "/networkd.conf",
+        return config_parse_many_nulstr(PKGSYSCONFDIR "/networkd.conf",
                                  CONF_PATHS_NULSTR("systemd/networkd.conf.d"),
                                  "DHCP\0",
                                  config_item_perf_lookup, networkd_gperf_lookup,
diff --git a/src/network/networkd-network.c b/src/network/networkd-network.c
index 0b36f13be8..1ce23b1ddb 100644
--- a/src/network/networkd-network.c
+++ b/src/network/networkd-network.c
@@ -151,7 +151,7 @@ static int network_load_one(Manager *manager, const char *filename) {
         if (r < 0)
                 return r;
 
-        r = config_parse_many(filename, dropin_dirs_nulstr,
+        r = config_parse_many_nulstr(filename, dropin_dirs_nulstr,
                          "Match\0"
                          "Link\0"
                          "Network\0"
diff --git a/src/resolve/resolved-conf.c b/src/resolve/resolved-conf.c
index dd233e7c4a..83f7596ac8 100644
--- a/src/resolve/resolved-conf.c
+++ b/src/resolve/resolved-conf.c
@@ -221,7 +221,7 @@ int manager_parse_config_file(Manager *m) {
 
         assert(m);
 
-        r = config_parse_many(PKGSYSCONFDIR "/resolved.conf",
+        r = config_parse_many_nulstr(PKGSYSCONFDIR "/resolved.conf",
                               CONF_PATHS_NULSTR("systemd/resolved.conf.d"),
                               "Resolve\0",
                               config_item_perf_lookup, resolved_gperf_lookup,
diff --git a/src/shared/conf-parser.c b/src/shared/conf-parser.c
index f31d219418..f3351c6bb2 100644
--- a/src/shared/conf-parser.c
+++ b/src/shared/conf-parser.c
@@ -397,13 +397,15 @@ int config_parse(const char *unit,
 }
 
 /* Parse each config file in the specified directories. */
-int config_parse_many(const char *conf_file,
-                      const char *conf_file_dirs,
-                      const char *sections,
-                      ConfigItemLookup lookup,
-                      const void *table,
-                      bool relaxed,
-                      void *userdata) {
+int config_parse_many_nulstr(
+                const char *conf_file,
+                const char *conf_file_dirs,
+                const char *sections,
+                ConfigItemLookup lookup,
+                const void *table,
+                bool relaxed,
+                void *userdata) {
+
         _cleanup_strv_free_ char **files = NULL;
         char **fn;
         int r;
diff --git a/src/shared/conf-parser.h b/src/shared/conf-parser.h
index 3298dc0cea..e0b8d83dab 100644
--- a/src/shared/conf-parser.h
+++ b/src/shared/conf-parser.h
@@ -84,24 +84,26 @@ int config_item_table_lookup(const void *table, const char *section, const char
  * ConfigPerfItem tables */
 int config_item_perf_lookup(const void *table, const char *section, const char *lvalue, ConfigParserCallback *func, int *ltype, void **data, void *userdata);
 
-int config_parse(const char *unit,
-                 const char *filename,
-                 FILE *f,
-                 const char *sections,  /* nulstr */
-                 ConfigItemLookup lookup,
-                 const void *table,
-                 bool relaxed,
-                 bool allow_include,
-                 bool warn,
-                 void *userdata);
-
-int config_parse_many(const char *conf_file,      /* possibly NULL */
-                      const char *conf_file_dirs, /* nulstr */
-                      const char *sections,       /* nulstr */
-                      ConfigItemLookup lookup,
-                      const void *table,
-                      bool relaxed,
-                      void *userdata);
+int config_parse(
+                const char *unit,
+                const char *filename,
+                FILE *f,
+                const char *sections,  /* nulstr */
+                ConfigItemLookup lookup,
+                const void *table,
+                bool relaxed,
+                bool allow_include,
+                bool warn,
+                void *userdata);
+
+int config_parse_many_nulstr(
+                const char *conf_file,      /* possibly NULL */
+                const char *conf_file_dirs, /* nulstr */
+                const char *sections,       /* nulstr */
+                ConfigItemLookup lookup,
+                const void *table,
+                bool relaxed,
+                void *userdata);
 
 /* Generic parsers */
 int config_parse_int(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
diff --git a/src/shared/sleep-config.c b/src/shared/sleep-config.c
index f00624d0f2..ed31a80c8d 100644
--- a/src/shared/sleep-config.c
+++ b/src/shared/sleep-config.c
@@ -58,7 +58,7 @@ int parse_sleep_config(const char *verb, char ***_modes, char ***_states) {
                 {}
         };
 
-        config_parse_many(PKGSYSCONFDIR "/sleep.conf",
+        config_parse_many_nulstr(PKGSYSCONFDIR "/sleep.conf",
                           CONF_PATHS_NULSTR("systemd/sleep.conf.d"),
                           "Sleep\0", config_item_table_lookup, items,
                           false, NULL);
diff --git a/src/timesync/timesyncd-conf.c b/src/timesync/timesyncd-conf.c
index 20c64a3354..bf25b112e1 100644
--- a/src/timesync/timesyncd-conf.c
+++ b/src/timesync/timesyncd-conf.c
@@ -98,7 +98,7 @@ int config_parse_servers(
 int manager_parse_config_file(Manager *m) {
         assert(m);
 
-        return config_parse_many(PKGSYSCONFDIR "/timesyncd.conf",
+        return config_parse_many_nulstr(PKGSYSCONFDIR "/timesyncd.conf",
                                  CONF_PATHS_NULSTR("systemd/timesyncd.conf.d"),
                                  "Time\0",
                                  config_item_perf_lookup, timesyncd_gperf_lookup,
-- 
cgit v1.2.3-54-g00ecf


From 72246c2a654ead7f7ee6e7799161e2e46dc0b84b Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 22 Aug 2016 19:01:14 +0200
Subject: core: enforce seccomp for secondary archs too, for all rules

Let's make sure that all our rules apply to all archs the local kernel
supports.
---
 src/core/execute.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index 2026137721..ee734e8445 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1273,6 +1273,10 @@ static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c)
         if (!seccomp)
                 return -ENOMEM;
 
+        r = seccomp_add_secondary_archs(seccomp);
+        if (r < 0)
+                goto finish;
+
         r = seccomp_rule_add(
                         seccomp,
                         SCMP_ACT_ERRNO(EPERM),
@@ -1322,6 +1326,10 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
         if (!seccomp)
                 return -ENOMEM;
 
+        r = seccomp_add_secondary_archs(seccomp);
+        if (r < 0)
+                goto finish;
+
         /* Determine the highest policy constant we want to allow */
         for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
                 if (permitted_policies[i] > max_policy)
-- 
cgit v1.2.3-54-g00ecf


From 59eeb84ba65483c5543d1bc840c2ac75642ef638 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 22 Aug 2016 18:43:59 +0200
Subject: core: add two new service settings ProtectKernelTunables= and
 ProtectControlGroups=

If enabled, these will block write access to /sys, /proc/sys and
/proc/sys/fs/cgroup.
---
 man/systemd.exec.xml                  |  20 +++++++
 src/core/dbus-execute.c               |   9 ++-
 src/core/execute.c                    | 100 ++++++++++++++++++++++++++++++----
 src/core/execute.h                    |   2 +
 src/core/load-fragment-gperf.gperf.m4 |   2 +
 src/core/namespace.c                  |  36 ++++++++++--
 src/core/namespace.h                  |   2 +
 src/shared/bus-unit-util.c            |   2 +-
 src/test/test-ns.c                    |   2 +
 9 files changed, 159 insertions(+), 16 deletions(-)

(limited to 'src/core')

diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index bcedebd5bb..07128b489e 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -1059,6 +1059,26 @@
         Defaults to off.</para></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>ProtectKernelTunables=</varname></term>
+
+        <listitem><para>Takes a boolean argument. If true, kernel variables accessible through
+        <filename>/proc/sys</filename> and <filename>/sys</filename> will be made read-only to all processes of the
+        unit. Usually, tunable kernel variables should only be written at boot-time, with the
+        <citerefentry><refentrytitle>sysctl.d</refentrytitle><manvolnum>5</manvolnum></citerefentry> mechanism. Almost
+        no services need to write to these at runtime; it is hence recommended to turn this on for most
+        services. Defaults to off.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>ProtectControlGroups=</varname></term>
+
+        <listitem><para>Takes a boolean argument. If true, the Linux Control Groups ("cgroups") hierarchies accessible
+        through <filename>/sys/fs/cgroup</filename> will be made read-only to all processes of the unit. Except for
+        container managers no services should require write access to the control groups hierarchies; it is hence
+        recommended to turn this on for most services. Defaults to off.</para></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><varname>MountFlags=</varname></term>
 
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index 7e33a2d201..eec4500c8c 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -707,6 +707,8 @@ const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_flags), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("ProtectKernelTunables", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_tunables), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("ProtectHome", "s", bus_property_get_protect_home, offsetof(ExecContext, protect_home), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -1072,7 +1074,8 @@ int bus_exec_context_set_transient_property(
                               "IgnoreSIGPIPE", "TTYVHangup", "TTYReset",
                               "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
                               "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute",
-                              "RestrictRealtime", "DynamicUser", "RemoveIPC")) {
+                              "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
+                              "ProtectControlGroups")) {
                 int b;
 
                 r = sd_bus_message_read(message, "b", &b);
@@ -1106,6 +1109,10 @@ int bus_exec_context_set_transient_property(
                                 c->dynamic_user = b;
                         else if (streq(name, "RemoveIPC"))
                                 c->remove_ipc = b;
+                        else if (streq(name, "ProtectKernelTunables"))
+                                c->protect_kernel_tunables = b;
+                        else if (streq(name, "ProtectControlGroups"))
+                                c->protect_control_groups = b;
 
                         unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, yes_no(b));
                 }
diff --git a/src/core/execute.c b/src/core/execute.c
index ee734e8445..609b69a859 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1383,6 +1383,45 @@ finish:
         return r;
 }
 
+static int apply_protect_sysctl(Unit *u, const ExecContext *c) {
+        scmp_filter_ctx *seccomp;
+        int r;
+
+        assert(c);
+
+        /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
+         * let's protect even those systems where this is left on in the kernel. */
+
+        if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
+                return 0;
+
+        seccomp = seccomp_init(SCMP_ACT_ALLOW);
+        if (!seccomp)
+                return -ENOMEM;
+
+        r = seccomp_add_secondary_archs(seccomp);
+        if (r < 0)
+                goto finish;
+
+        r = seccomp_rule_add(
+                        seccomp,
+                        SCMP_ACT_ERRNO(EPERM),
+                        SCMP_SYS(_sysctl),
+                        0);
+        if (r < 0)
+                goto finish;
+
+        r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+        if (r < 0)
+                goto finish;
+
+        r = seccomp_load(seccomp);
+
+finish:
+        seccomp_release(seccomp);
+        return r;
+}
+
 #endif
 
 static void do_idle_pipe_dance(int idle_pipe[4]) {
@@ -1589,7 +1628,9 @@ static bool exec_needs_mount_namespace(
 
         if (context->private_devices ||
             context->protect_system != PROTECT_SYSTEM_NO ||
-            context->protect_home != PROTECT_HOME_NO)
+            context->protect_home != PROTECT_HOME_NO ||
+            context->protect_kernel_tunables ||
+            context->protect_control_groups)
                 return true;
 
         return false;
@@ -1804,6 +1845,37 @@ static int close_remaining_fds(
         return close_all_fds(dont_close, n_dont_close);
 }
 
+static bool context_has_address_families(const ExecContext *c) {
+        assert(c);
+
+        return c->address_families_whitelist ||
+                !set_isempty(c->address_families);
+}
+
+static bool context_has_syscall_filters(const ExecContext *c) {
+        assert(c);
+
+        return c->syscall_whitelist ||
+                !set_isempty(c->syscall_filter) ||
+                !set_isempty(c->syscall_archs);
+}
+
+static bool context_has_no_new_privileges(const ExecContext *c) {
+        assert(c);
+
+        if (c->no_new_privileges)
+                return true;
+
+        if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
+                return false;
+
+        return context_has_address_families(c) || /* we need NNP if we have any form of seccomp and are unprivileged */
+                c->memory_deny_write_execute ||
+                c->restrict_realtime ||
+                c->protect_kernel_tunables ||
+                context_has_syscall_filters(c);
+}
+
 static int send_user_lookup(
                 Unit *unit,
                 int user_lookup_fd,
@@ -2255,6 +2327,8 @@ static int exec_child(
                                 tmp,
                                 var,
                                 context->private_devices,
+                                context->protect_kernel_tunables,
+                                context->protect_control_groups,
                                 context->protect_home,
                                 context->protect_system,
                                 context->mount_flags);
@@ -2343,11 +2417,6 @@ static int exec_child(
 
         if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
 
-                bool use_address_families = context->address_families_whitelist ||
-                        !set_isempty(context->address_families);
-                bool use_syscall_filter = context->syscall_whitelist ||
-                        !set_isempty(context->syscall_filter) ||
-                        !set_isempty(context->syscall_archs);
                 int secure_bits = context->secure_bits;
 
                 for (i = 0; i < _RLIMIT_MAX; i++) {
@@ -2424,15 +2493,14 @@ static int exec_child(
                                 return -errno;
                         }
 
-                if (context->no_new_privileges ||
-                    (!have_effective_cap(CAP_SYS_ADMIN) && (use_address_families || context->memory_deny_write_execute || context->restrict_realtime || use_syscall_filter)))
+                if (context_has_no_new_privileges(context))
                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
                                 *exit_status = EXIT_NO_NEW_PRIVILEGES;
                                 return -errno;
                         }
 
 #ifdef HAVE_SECCOMP
-                if (use_address_families) {
+                if (context_has_address_families(context)) {
                         r = apply_address_families(unit, context);
                         if (r < 0) {
                                 *exit_status = EXIT_ADDRESS_FAMILIES;
@@ -2456,7 +2524,15 @@ static int exec_child(
                         }
                 }
 
-                if (use_syscall_filter) {
+                if (context->protect_kernel_tunables) {
+                        r = apply_protect_sysctl(unit, context);
+                        if (r < 0) {
+                                *exit_status = EXIT_SECCOMP;
+                                return r;
+                        }
+                }
+
+                if (context_has_syscall_filters(context)) {
                         r = apply_seccomp(unit, context);
                         if (r < 0) {
                                 *exit_status = EXIT_SECCOMP;
@@ -2888,6 +2964,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
                 "%sNonBlocking: %s\n"
                 "%sPrivateTmp: %s\n"
                 "%sPrivateDevices: %s\n"
+                "%sProtectKernelTunables: %s\n"
+                "%sProtectControlGroups: %s\n"
                 "%sPrivateNetwork: %s\n"
                 "%sPrivateUsers: %s\n"
                 "%sProtectHome: %s\n"
@@ -2901,6 +2979,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
                 prefix, yes_no(c->non_blocking),
                 prefix, yes_no(c->private_tmp),
                 prefix, yes_no(c->private_devices),
+                prefix, yes_no(c->protect_kernel_tunables),
+                prefix, yes_no(c->protect_control_groups),
                 prefix, yes_no(c->private_network),
                 prefix, yes_no(c->private_users),
                 prefix, protect_home_to_string(c->protect_home),
diff --git a/src/core/execute.h b/src/core/execute.h
index 6082c42aba..449180c903 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -174,6 +174,8 @@ struct ExecContext {
         bool private_users;
         ProtectSystem protect_system;
         ProtectHome protect_home;
+        bool protect_kernel_tunables;
+        bool protect_control_groups;
 
         bool no_new_privileges;
 
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 2e6c965aec..c49c1d6732 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -89,6 +89,8 @@ $1.ReadOnlyPaths,                config_parse_namespace_path_strv,   0,
 $1.InaccessiblePaths,            config_parse_namespace_path_strv,   0,                             offsetof($1, exec_context.inaccessible_paths)
 $1.PrivateTmp,                   config_parse_bool,                  0,                             offsetof($1, exec_context.private_tmp)
 $1.PrivateDevices,               config_parse_bool,                  0,                             offsetof($1, exec_context.private_devices)
+$1.ProtectKernelTunables,        config_parse_bool,                  0,                             offsetof($1, exec_context.protect_kernel_tunables)
+$1.ProtectControlGroups,         config_parse_bool,                  0,                             offsetof($1, exec_context.protect_control_groups)
 $1.PrivateNetwork,               config_parse_bool,                  0,                             offsetof($1, exec_context.private_network)
 $1.PrivateUsers,                 config_parse_bool,                  0,                             offsetof($1, exec_context.private_users)
 $1.ProtectSystem,                config_parse_protect_system,        0,                             offsetof($1, exec_context)
diff --git a/src/core/namespace.c b/src/core/namespace.c
index 52a2505d94..f2768aeb28 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -53,7 +53,7 @@ typedef enum MountMode {
         PRIVATE_TMP,
         PRIVATE_VAR_TMP,
         PRIVATE_DEV,
-        READWRITE
+        READWRITE,
 } MountMode;
 
 typedef struct BindMount {
@@ -366,6 +366,8 @@ int setup_namespace(
                 const char* tmp_dir,
                 const char* var_tmp_dir,
                 bool private_dev,
+                bool protect_sysctl,
+                bool protect_cgroups,
                 ProtectHome protect_home,
                 ProtectSystem protect_system,
                 unsigned long mount_flags) {
@@ -385,6 +387,8 @@ int setup_namespace(
                 strv_length(read_only_paths) +
                 strv_length(inaccessible_paths) +
                 private_dev +
+                (protect_sysctl ? 3 : 0) +
+                (protect_cgroups != protect_sysctl) +
                 (protect_home != PROTECT_HOME_NO ? 3 : 0) +
                 (protect_system != PROTECT_SYSTEM_NO ? 2 : 0) +
                 (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0);
@@ -421,6 +425,27 @@ int setup_namespace(
                         m++;
                 }
 
+                if (protect_sysctl) {
+                        m->path = prefix_roota(root_directory, "/proc/sys");
+                        m->mode = READONLY;
+                        m++;
+
+                        m->path = prefix_roota(root_directory, "/proc/sysrq-trigger");
+                        m->mode = READONLY;
+                        m->ignore = true; /* Not always compiled into the kernel */
+                        m++;
+
+                        m->path = prefix_roota(root_directory, "/sys");
+                        m->mode = READONLY;
+                        m++;
+                }
+
+                if (protect_cgroups != protect_sysctl) {
+                        m->path = prefix_roota(root_directory, "/sys/fs/cgroup");
+                        m->mode = protect_cgroups ? READONLY : READWRITE;
+                        m++;
+                }
+
                 if (protect_home != PROTECT_HOME_NO) {
                         const char *home_dir, *run_user_dir, *root_dir;
 
@@ -505,9 +530,12 @@ int setup_namespace(
 
 fail:
         if (n > 0) {
-                for (m = mounts; m < mounts + n; ++m)
-                        if (m->done)
-                                (void) umount2(m->path, MNT_DETACH);
+                for (m = mounts; m < mounts + n; ++m) {
+                        if (!m->done)
+                                continue;
+
+                        (void) umount2(m->path, MNT_DETACH);
+                }
         }
 
         return r;
diff --git a/src/core/namespace.h b/src/core/namespace.h
index 1aedf5f208..3845336287 100644
--- a/src/core/namespace.h
+++ b/src/core/namespace.h
@@ -46,6 +46,8 @@ int setup_namespace(const char *chroot,
                     const char *tmp_dir,
                     const char *var_tmp_dir,
                     bool private_dev,
+                    bool protect_sysctl,
+                    bool protect_cgroups,
                     ProtectHome protect_home,
                     ProtectSystem protect_system,
                     unsigned long mount_flags);
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index feb4a06737..c6bd2f145c 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -204,7 +204,7 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
                               "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "RemainAfterExit",
                               "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", "NoNewPrivileges",
                               "SyslogLevelPrefix", "Delegate", "RemainAfterElapse", "MemoryDenyWriteExecute",
-                              "RestrictRealtime", "DynamicUser", "RemoveIPC")) {
+                              "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables", "ProtectControlGroups")) {
 
                 r = parse_boolean(eq);
                 if (r < 0)
diff --git a/src/test/test-ns.c b/src/test/test-ns.c
index 9248f2987c..05f243c75c 100644
--- a/src/test/test-ns.c
+++ b/src/test/test-ns.c
@@ -69,6 +69,8 @@ int main(int argc, char *argv[]) {
                             tmp_dir,
                             var_tmp_dir,
                             true,
+                            true,
+                            true,
                             PROTECT_HOME_NO,
                             PROTECT_SYSTEM_NO,
                             0);
-- 
cgit v1.2.3-54-g00ecf


From fe3c2583bee339b6744872dc1897e6486d5bd7e0 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Wed, 24 Aug 2016 23:17:42 +0200
Subject: namespace: make sure InaccessibleDirectories= masks all mounts
 further down

If a dir is marked to be inaccessible then everything below it should be masked
by it.
---
 src/core/namespace.c | 44 ++++++++++++++++++++++++++++++++++++++++----
 src/test/test-ns.c   |  4 +++-
 2 files changed, 43 insertions(+), 5 deletions(-)

(limited to 'src/core')

diff --git a/src/core/namespace.c b/src/core/namespace.c
index f2768aeb28..102fe576f3 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -116,16 +116,47 @@ static void drop_duplicates(BindMount *m, unsigned *n) {
         assert(m);
         assert(n);
 
+        /* Drops duplicate entries. Expects that the array is properly ordered already. */
+
         for (f = m, t = m, previous = NULL; f < m+*n; f++) {
 
-                /* The first one wins */
-                if (previous && path_equal(f->path, previous->path))
+                /* The first one wins (which is the one with the more restrictive mode), see mount_path_compare()
+                 * above. */
+                if (previous && path_equal(f->path, previous->path)) {
+                        log_debug("%s is duplicate.", f->path);
                         continue;
+                }
 
                 *t = *f;
-
                 previous = t;
+                t++;
+        }
+
+        *n = t - m;
+}
+
+static void drop_inaccessible(BindMount *m, unsigned *n) {
+        BindMount *f, *t;
+        const char *clear = NULL;
+
+        assert(m);
+        assert(n);
+
+        /* Drops all entries obstructed by another entry further up the tree. Expects that the array is properly
+         * ordered already. */
+
+        for (f = m, t = m; f < m+*n; f++) {
+
+                /* If we found a path set for INACCESSIBLE earlier, and this entry has it as prefix we should drop
+                 * it, as inaccessible paths really should drop the entire subtree. */
+                if (clear && path_startswith(f->path, clear)) {
+                        log_debug("%s is masked by %s.", f->path, clear);
+                        continue;
+                }
 
+                clear = f->mode == INACCESSIBLE ? f->path : NULL;
+
+                *t = *f;
                 t++;
         }
 
@@ -282,6 +313,8 @@ static int apply_mount(
 
         assert(m);
 
+        log_debug("Applying namespace mount on %s", m->path);
+
         switch (m->mode) {
 
         case INACCESSIBLE:
@@ -289,7 +322,7 @@ static int apply_mount(
                 /* First, get rid of everything that is below if there
                  * is anything... Then, overmount it with an
                  * inaccessible path. */
-                umount_recursive(m->path, 0);
+                (void) umount_recursive(m->path, 0);
 
                 if (lstat(m->path, &target) < 0) {
                         if (m->ignore && errno == ENOENT)
@@ -303,6 +336,7 @@ static int apply_mount(
                         return -ELOOP;
                 }
                 break;
+
         case READONLY:
         case READWRITE:
                 /* Nothing to mount here, we just later toggle the
@@ -480,7 +514,9 @@ int setup_namespace(
                 assert(mounts + n == m);
 
                 qsort(mounts, n, sizeof(BindMount), mount_path_compare);
+
                 drop_duplicates(mounts, &n);
+                drop_inaccessible(mounts, &n);
         }
 
         if (n > 0 || root_directory) {
diff --git a/src/test/test-ns.c b/src/test/test-ns.c
index 05f243c75c..03a24620af 100644
--- a/src/test/test-ns.c
+++ b/src/test/test-ns.c
@@ -26,6 +26,7 @@
 int main(int argc, char *argv[]) {
         const char * const writable[] = {
                 "/home",
+                "/home/lennart/projects/foobar", /* this should be masked automatically */
                 NULL
         };
 
@@ -42,11 +43,12 @@ int main(int argc, char *argv[]) {
         };
         char *root_directory;
         char *projects_directory;
-
         int r;
         char tmp_dir[] = "/tmp/systemd-private-XXXXXX",
              var_tmp_dir[] = "/var/tmp/systemd-private-XXXXXX";
 
+        log_set_max_level(LOG_DEBUG);
+
         assert_se(mkdtemp(tmp_dir));
         assert_se(mkdtemp(var_tmp_dir));
 
-- 
cgit v1.2.3-54-g00ecf


From 07689d5d2c07ee434437de5e39bf0abaa772818b Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Thu, 25 Aug 2016 10:12:57 +0200
Subject: execute: split out creation of runtime dirs into its own functions

---
 src/core/execute.c | 57 +++++++++++++++++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 22 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index 609b69a859..3877293b4f 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1789,6 +1789,37 @@ static int setup_private_users(uid_t uid, gid_t gid) {
         return 0;
 }
 
+static int setup_runtime_directory(
+                const ExecContext *context,
+                const ExecParameters *params,
+                uid_t uid,
+                gid_t gid) {
+
+        char **rt;
+        int r;
+
+        assert(context);
+        assert(params);
+
+        STRV_FOREACH(rt, context->runtime_directory) {
+                _cleanup_free_ char *p;
+
+                p = strjoin(params->runtime_prefix, "/", *rt, NULL);
+                if (!p)
+                        return -ENOMEM;
+
+                r = mkdir_p_label(p, context->runtime_directory_mode);
+                if (r < 0)
+                        return r;
+
+                r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid);
+                if (r < 0)
+                        return r;
+        }
+
+        return 0;
+}
+
 static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
         assert(array);
         assert(n);
@@ -2188,28 +2219,10 @@ static int exec_child(
         }
 
         if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
-                char **rt;
-
-                STRV_FOREACH(rt, context->runtime_directory) {
-                        _cleanup_free_ char *p;
-
-                        p = strjoin(params->runtime_prefix, "/", *rt, NULL);
-                        if (!p) {
-                                *exit_status = EXIT_RUNTIME_DIRECTORY;
-                                return -ENOMEM;
-                        }
-
-                        r = mkdir_p_label(p, context->runtime_directory_mode);
-                        if (r < 0) {
-                                *exit_status = EXIT_RUNTIME_DIRECTORY;
-                                return r;
-                        }
-
-                        r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid);
-                        if (r < 0) {
-                                *exit_status = EXIT_RUNTIME_DIRECTORY;
-                                return r;
-                        }
+                r = setup_runtime_directory(context, params, uid, gid);
+                if (r < 0) {
+                        *exit_status = EXIT_RUNTIME_DIRECTORY;
+                        return r;
                 }
         }
 
-- 
cgit v1.2.3-54-g00ecf


From be39ccf3a0d4d15324af1de4d8552a1d65f40808 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Thu, 25 Aug 2016 10:24:10 +0200
Subject: execute: move suppression of HOME=/ and SHELL=/bin/nologin into
 user-util.c

This adds a new call get_user_creds_clean(), which is just like
get_user_creds() but returns NULL in the home/shell parameters if they contain
no useful information. This code previously lived in execute.c, but by
generalizing this we can reuse it in run.c.
---
 src/basic/user-util.c | 32 +++++++++++++++++++++++++++++++-
 src/basic/user-util.h |  1 +
 src/core/execute.c    | 14 +++-----------
 src/run/run.c         | 18 +++++++++++-------
 4 files changed, 46 insertions(+), 19 deletions(-)

(limited to 'src/core')

diff --git a/src/basic/user-util.c b/src/basic/user-util.c
index 122d9a0c7c..0522bce1d1 100644
--- a/src/basic/user-util.c
+++ b/src/basic/user-util.c
@@ -31,14 +31,15 @@
 #include <unistd.h>
 #include <utmp.h>
 
-#include "missing.h"
 #include "alloc-util.h"
 #include "fd-util.h"
 #include "formats-util.h"
 #include "macro.h"
+#include "missing.h"
 #include "parse-util.h"
 #include "path-util.h"
 #include "string-util.h"
+#include "strv.h"
 #include "user-util.h"
 #include "utf8.h"
 
@@ -175,6 +176,35 @@ int get_user_creds(
         return 0;
 }
 
+int get_user_creds_clean(
+                const char **username,
+                uid_t *uid, gid_t *gid,
+                const char **home,
+                const char **shell) {
+
+        int r;
+
+        /* Like get_user_creds(), but resets home/shell to NULL if they don't contain anything relevant. */
+
+        r = get_user_creds(username, uid, gid, home, shell);
+        if (r < 0)
+                return r;
+
+        if (shell &&
+            (isempty(*shell) || PATH_IN_SET(*shell,
+                                            "/bin/nologin",
+                                            "/sbin/nologin",
+                                            "/usr/bin/nologin",
+                                            "/usr/sbin/nologin")))
+                *shell = NULL;
+
+        if (home &&
+            (isempty(*home) || path_equal(*home, "/")))
+                *home = NULL;
+
+        return 0;
+}
+
 int get_group_creds(const char **groupname, gid_t *gid) {
         struct group *g;
         gid_t id;
diff --git a/src/basic/user-util.h b/src/basic/user-util.h
index f569363811..6c61f63cae 100644
--- a/src/basic/user-util.h
+++ b/src/basic/user-util.h
@@ -40,6 +40,7 @@ char* getlogname_malloc(void);
 char* getusername_malloc(void);
 
 int get_user_creds(const char **username, uid_t *uid, gid_t *gid, const char **home, const char **shell);
+int get_user_creds_clean(const char **username, uid_t *uid, gid_t *gid, const char **home, const char **shell);
 int get_group_creds(const char **groupname, gid_t *gid);
 
 char* uid_to_name(uid_t uid);
diff --git a/src/core/execute.c b/src/core/execute.c
index 3877293b4f..c7a3ea39e7 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -2051,22 +2051,14 @@ static int exec_child(
         } else {
                 if (context->user) {
                         username = context->user;
-                        r = get_user_creds(&username, &uid, &gid, &home, &shell);
+                        r = get_user_creds_clean(&username, &uid, &gid, &home, &shell);
                         if (r < 0) {
                                 *exit_status = EXIT_USER;
                                 return r;
                         }
 
-                        /* Don't set $HOME or $SHELL if they are are not particularly enlightening anyway. */
-                        if (isempty(home) || path_equal(home, "/"))
-                                home = NULL;
-
-                        if (isempty(shell) || PATH_IN_SET(shell,
-                                                          "/bin/nologin",
-                                                          "/sbin/nologin",
-                                                          "/usr/bin/nologin",
-                                                          "/usr/sbin/nologin"))
-                                shell = NULL;
+                        /* Note that we don't set $HOME or $SHELL if they are are not particularly enlightening anyway
+                         * (i.e. are "/" or "/bin/nologin"). */
                 }
 
                 if (context->group) {
diff --git a/src/run/run.c b/src/run/run.c
index 2dd229868c..81b53fdfab 100644
--- a/src/run/run.c
+++ b/src/run/run.c
@@ -1168,17 +1168,21 @@ static int start_transient_scope(
                 uid_t uid;
                 gid_t gid;
 
-                r = get_user_creds(&arg_exec_user, &uid, &gid, &home, &shell);
+                r = get_user_creds_clean(&arg_exec_user, &uid, &gid, &home, &shell);
                 if (r < 0)
                         return log_error_errno(r, "Failed to resolve user %s: %m", arg_exec_user);
 
-                r = strv_extendf(&user_env, "HOME=%s", home);
-                if (r < 0)
-                        return log_oom();
+                if (home) {
+                        r = strv_extendf(&user_env, "HOME=%s", home);
+                        if (r < 0)
+                                return log_oom();
+                }
 
-                r = strv_extendf(&user_env, "SHELL=%s", shell);
-                if (r < 0)
-                        return log_oom();
+                if (shell) {
+                        r = strv_extendf(&user_env, "SHELL=%s", shell);
+                        if (r < 0)
+                                return log_oom();
+                }
 
                 r = strv_extendf(&user_env, "USER=%s", arg_exec_user);
                 if (r < 0)
-- 
cgit v1.2.3-54-g00ecf


From 3fbe8dbe41ad662d7cae0525f6fd62a66d2c5ec5 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Thu, 25 Aug 2016 10:42:38 +0200
Subject: execute: if RuntimeDirectory= is set, it should be writable

Implicitly make all dirs set with RuntimeDirectory= writable, as the concept
otherwise makes no sense.
---
 src/core/execute.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 46 insertions(+), 2 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index c7a3ea39e7..20e74ec8a6 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1820,6 +1820,44 @@ static int setup_runtime_directory(
         return 0;
 }
 
+static int compile_read_write_paths(
+                const ExecContext *context,
+                const ExecParameters *params,
+                char ***ret) {
+
+        _cleanup_strv_free_ char **l = NULL;
+        char **rt;
+
+        /* Compile the list of writable paths. This is the combination of the explicitly configured paths, plus all
+         * runtime directories. */
+
+        if (strv_isempty(context->read_write_paths) &&
+            strv_isempty(context->runtime_directory)) {
+                *ret = NULL; /* NOP if neither is set */
+                return 0;
+        }
+
+        l = strv_copy(context->read_write_paths);
+        if (!l)
+                return -ENOMEM;
+
+        STRV_FOREACH(rt, context->runtime_directory) {
+                char *s;
+
+                s = strjoin(params->runtime_prefix, "/", *rt, NULL);
+                if (!s)
+                        return -ENOMEM;
+
+                if (strv_consume(&l, s) < 0)
+                        return -ENOMEM;
+        }
+
+        *ret = l;
+        l = NULL;
+
+        return 0;
+}
+
 static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
         assert(array);
         assert(n);
@@ -2307,8 +2345,8 @@ static int exec_child(
         }
 
         needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
-
         if (needs_mount_namespace) {
+                _cleanup_free_ char **rw = NULL;
                 char *tmp = NULL, *var = NULL;
 
                 /* The runtime struct only contains the parent
@@ -2324,9 +2362,15 @@ static int exec_child(
                                 var = strjoina(runtime->var_tmp_dir, "/tmp");
                 }
 
+                r = compile_read_write_paths(context, params, &rw);
+                if (r < 0) {
+                        *exit_status = EXIT_NAMESPACE;
+                        return r;
+                }
+
                 r = setup_namespace(
                                 (params->flags & EXEC_APPLY_CHROOT) ? context->root_directory : NULL,
-                                context->read_write_paths,
+                                rw,
                                 context->read_only_paths,
                                 context->inaccessible_paths,
                                 tmp,
-- 
cgit v1.2.3-54-g00ecf


From 6ee1a919cf9013a695da2a01ae67327b996a6ef6 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Thu, 25 Aug 2016 10:44:09 +0200
Subject: namespace: simplify mount_path_compare() a bit

---
 src/core/namespace.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

(limited to 'src/core')

diff --git a/src/core/namespace.c b/src/core/namespace.c
index 102fe576f3..74201caa10 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -93,21 +93,19 @@ static int mount_path_compare(const void *a, const void *b) {
         const BindMount *p = a, *q = b;
         int d;
 
+        /* If the paths are not equal, then order prefixes first */
         d = path_compare(p->path, q->path);
+        if (d != 0)
+                return d;
 
-        if (d == 0) {
-                /* If the paths are equal, check the mode */
-                if (p->mode < q->mode)
-                        return -1;
-
-                if (p->mode > q->mode)
-                        return 1;
+        /* If the paths are equal, check the mode */
+        if (p->mode < q->mode)
+                return -1;
 
-                return 0;
-        }
+        if (p->mode > q->mode)
+                return 1;
 
-        /* If the paths are not equal, then order prefixes first */
-        return d;
+        return 0;
 }
 
 static void drop_duplicates(BindMount *m, unsigned *n) {
-- 
cgit v1.2.3-54-g00ecf


From 7648a565d14dfb5516d93bacf0d87de2de5b5d91 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Thu, 25 Aug 2016 11:29:32 +0200
Subject: namespace: when enforcing fs namespace restrictions suppress
 redundant mounts

If /foo is marked to be read-only, and /foo/bar too, then the latter may be
suppressed as it has no effect.
---
 src/core/namespace.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'src/core')

diff --git a/src/core/namespace.c b/src/core/namespace.c
index 74201caa10..72f850b2f2 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -161,6 +161,44 @@ static void drop_inaccessible(BindMount *m, unsigned *n) {
         *n = t - m;
 }
 
+static void drop_nop(BindMount *m, unsigned *n) {
+        BindMount *f, *t;
+
+        assert(m);
+        assert(n);
+
+        /* Drops all entries which have an immediate parent that has the same type, as they are redundant. Assumes the
+         * list is ordered by prefixes. */
+
+        for (f = m, t = m; f < m+*n; f++) {
+
+                /* Only suppress such subtrees for READONLY and READWRITE entries */
+                if (IN_SET(f->mode, READONLY, READWRITE)) {
+                        BindMount *p;
+                        bool found = false;
+
+                        /* Now let's find the first parent of the entry we are looking at. */
+                        for (p = t-1; p >= m; p--) {
+                                if (path_startswith(f->path, p->path)) {
+                                        found = true;
+                                        break;
+                                }
+                        }
+
+                        /* We found it, let's see if it's the same mode, if so, we can drop this entry */
+                        if (found && p->mode == f->mode) {
+                                log_debug("%s is redundant by %s", f->path, p->path);
+                                continue;
+                        }
+                }
+
+                *t = *f;
+                t++;
+        }
+
+        *n = t - m;
+}
+
 static int mount_dev(BindMount *m) {
         static const char devnodes[] =
                 "/dev/null\0"
@@ -515,6 +553,7 @@ int setup_namespace(
 
                 drop_duplicates(mounts, &n);
                 drop_inaccessible(mounts, &n);
+                drop_nop(mounts, &n);
         }
 
         if (n > 0 || root_directory) {
-- 
cgit v1.2.3-54-g00ecf


From 6b7c9f8bce4679c89f3b89cacfd4932c0aeadad4 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Sun, 25 Sep 2016 10:40:51 +0200
Subject: namespace: rework how ReadWritePaths= is applied

Previously, if ReadWritePaths= was nested inside a ReadOnlyPaths=
specification, then we'd first recursively apply the ReadOnlyPaths= paths, and
make everything below read-only, only in order to then flip the read-only bit
again for the subdirs listed in ReadWritePaths= below it.

This is not only ugly (as for the dirs in question we first turn on the RO bit,
only to turn it off again immediately after), but also problematic in
containers, where a container manager might have marked a set of dirs read-only
and this code will undo this is ReadWritePaths= is set for any.

With this patch behaviour in this regard is altered: ReadOnlyPaths= will not be
applied to the children listed in ReadWritePaths= in the first place, so that
we do not need to turn off the RO bit for those after all.

This means that ReadWritePaths=/ReadOnlyPaths= may only be used to turn on the
RO bit, but never to turn it off again. Or to say this differently: if some
dirs are marked read-only via some external tool, then ReadWritePaths= will not
undo it.

This is not only the safer option, but also more in-line with what the man page
currently claims:

        "Entries (files or directories) listed in ReadWritePaths= are
        accessible from within the namespace with the same access rights as
        from outside."

To implement this change bind_remount_recursive() gained a new "blacklist"
string list parameter, which when passed may contain subdirs that shall be
excluded from the read-only mounting.

A number of functions are updated to add more debug logging to make this more
digestable.
---
 src/basic/mount-util.c    | 71 ++++++++++++++++++++++++++++++++---------------
 src/basic/mount-util.h    |  2 +-
 src/core/namespace.c      | 66 ++++++++++++++++++++++++++++---------------
 src/nspawn/nspawn-mount.c |  6 ++--
 src/nspawn/nspawn.c       |  2 +-
 5 files changed, 96 insertions(+), 51 deletions(-)

(limited to 'src/core')

diff --git a/src/basic/mount-util.c b/src/basic/mount-util.c
index bfa04394fe..b9affb4e70 100644
--- a/src/basic/mount-util.c
+++ b/src/basic/mount-util.c
@@ -36,6 +36,7 @@
 #include "set.h"
 #include "stdio-util.h"
 #include "string-util.h"
+#include "strv.h"
 
 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
         char path[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
@@ -287,10 +288,12 @@ int umount_recursive(const char *prefix, int flags) {
                                 continue;
 
                         if (umount2(p, flags) < 0) {
-                                r = -errno;
+                                r = log_debug_errno(errno, "Failed to umount %s: %m", p);
                                 continue;
                         }
 
+                        log_debug("Successfully unmounted %s", p);
+
                         again = true;
                         n++;
 
@@ -311,24 +314,21 @@ static int get_mount_flags(const char *path, unsigned long *flags) {
         return 0;
 }
 
-int bind_remount_recursive(const char *prefix, bool ro) {
+int bind_remount_recursive(const char *prefix, bool ro, char **blacklist) {
         _cleanup_set_free_free_ Set *done = NULL;
         _cleanup_free_ char *cleaned = NULL;
         int r;
 
-        /* Recursively remount a directory (and all its submounts)
-         * read-only or read-write. If the directory is already
-         * mounted, we reuse the mount and simply mark it
-         * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
-         * operation). If it isn't we first make it one. Afterwards we
-         * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all
-         * submounts we can access, too. When mounts are stacked on
-         * the same mount point we only care for each individual
-         * "top-level" mount on each point, as we cannot
-         * influence/access the underlying mounts anyway. We do not
-         * have any effect on future submounts that might get
-         * propagated, they migt be writable. This includes future
-         * submounts that have been triggered via autofs. */
+        /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
+         * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
+         * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
+         * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
+         * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
+         * do not have any effect on future submounts that might get propagated, they migt be writable. This includes
+         * future submounts that have been triggered via autofs.
+         *
+         * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the
+         * remount operation. Note that we'll ignore the blacklist for the top-level path. */
 
         cleaned = strdup(prefix);
         if (!cleaned)
@@ -385,6 +385,33 @@ int bind_remount_recursive(const char *prefix, bool ro) {
                         if (r < 0)
                                 return r;
 
+                        if (!path_startswith(p, cleaned))
+                                continue;
+
+                        /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount we shall
+                         * operate on. */
+                        if (!path_equal(cleaned, p)) {
+                                bool blacklisted = false;
+                                char **i;
+
+                                STRV_FOREACH(i, blacklist) {
+
+                                        if (path_equal(*i, cleaned))
+                                                continue;
+
+                                        if (!path_startswith(*i, cleaned))
+                                                continue;
+
+                                        if (path_startswith(p, *i)) {
+                                                blacklisted = true;
+                                                log_debug("Not remounting %s, because blacklisted by %s, called for %s", p, *i, cleaned);
+                                                break;
+                                        }
+                                }
+                                if (blacklisted)
+                                        continue;
+                        }
+
                         /* Let's ignore autofs mounts.  If they aren't
                          * triggered yet, we want to avoid triggering
                          * them, as we don't make any guarantees for
@@ -396,12 +423,9 @@ int bind_remount_recursive(const char *prefix, bool ro) {
                                 continue;
                         }
 
-                        if (path_startswith(p, cleaned) &&
-                            !set_contains(done, p)) {
-
+                        if (!set_contains(done, p)) {
                                 r = set_consume(todo, p);
                                 p = NULL;
-
                                 if (r == -EEXIST)
                                         continue;
                                 if (r < 0)
@@ -418,8 +442,7 @@ int bind_remount_recursive(const char *prefix, bool ro) {
 
                 if (!set_contains(done, cleaned) &&
                     !set_contains(todo, cleaned)) {
-                        /* The prefix directory itself is not yet a
-                         * mount, make it one. */
+                        /* The prefix directory itself is not yet a mount, make it one. */
                         if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
                                 return -errno;
 
@@ -430,6 +453,8 @@ int bind_remount_recursive(const char *prefix, bool ro) {
                         if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
                                 return -errno;
 
+                        log_debug("Made top-level directory %s a mount point.", prefix);
+
                         x = strdup(cleaned);
                         if (!x)
                                 return -ENOMEM;
@@ -447,8 +472,7 @@ int bind_remount_recursive(const char *prefix, bool ro) {
                         if (r < 0)
                                 return r;
 
-                        /* Deal with mount points that are obstructed by a
-                         * later mount */
+                        /* Deal with mount points that are obstructed by a later mount */
                         r = path_is_mount_point(x, 0);
                         if (r == -ENOENT || r == 0)
                                 continue;
@@ -463,6 +487,7 @@ int bind_remount_recursive(const char *prefix, bool ro) {
                         if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
                                 return -errno;
 
+                        log_debug("Remounted %s read-only.", x);
                 }
         }
 }
diff --git a/src/basic/mount-util.h b/src/basic/mount-util.h
index f46989ebb3..74730de663 100644
--- a/src/basic/mount-util.h
+++ b/src/basic/mount-util.h
@@ -35,7 +35,7 @@ int path_is_mount_point(const char *path, int flags);
 int repeat_unmount(const char *path, int flags);
 
 int umount_recursive(const char *target, int flags);
-int bind_remount_recursive(const char *prefix, bool ro);
+int bind_remount_recursive(const char *prefix, bool ro, char **blacklist);
 
 int mount_move_root(const char *path);
 
diff --git a/src/core/namespace.c b/src/core/namespace.c
index 72f850b2f2..b0dab9459e 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -375,9 +375,19 @@ static int apply_mount(
 
         case READONLY:
         case READWRITE:
-                /* Nothing to mount here, we just later toggle the
-                 * MS_RDONLY bit for the mount point */
-                return 0;
+
+                r = path_is_mount_point(m->path, 0);
+                if (r < 0) {
+                        if (m->ignore && errno == ENOENT)
+                                return 0;
+                        return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", m->path);
+                }
+                if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */
+                        return 0;
+
+                /* This isn't a mount point yet, let's make it one. */
+                what = m->path;
+                break;
 
         case PRIVATE_TMP:
                 what = tmp_dir;
@@ -396,31 +406,33 @@ static int apply_mount(
 
         assert(what);
 
-        r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL);
-        if (r >= 0) {
-                log_debug("Successfully mounted %s to %s", what, m->path);
-                return r;
-        } else {
+        if (mount(what, m->path, NULL, MS_BIND|MS_REC, NULL) < 0) {
                 if (m->ignore && errno == ENOENT)
                         return 0;
+
                 return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, m->path);
         }
+
+        log_debug("Successfully mounted %s to %s", what, m->path);
+        return 0;
 }
 
-static int make_read_only(BindMount *m) {
-        int r;
+static int make_read_only(BindMount *m, char **blacklist) {
+        int r = 0;
 
         assert(m);
 
         if (IN_SET(m->mode, INACCESSIBLE, READONLY))
-                r = bind_remount_recursive(m->path, true);
-        else if (IN_SET(m->mode, READWRITE, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV)) {
-                r = bind_remount_recursive(m->path, false);
-                if (r == 0 && m->mode == PRIVATE_DEV) /* can be readonly but the submounts can't*/
-                        if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0)
-                                r = -errno;
+                r = bind_remount_recursive(m->path, true, blacklist);
+        else if (m->mode == PRIVATE_DEV) { /* Can be readonly but the submounts can't*/
+                if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0)
+                        r = -errno;
         } else
-                r = 0;
+                return 0;
+
+        /* Not that we only turn on the MS_RDONLY flag here, we never turn it off. Something that was marked read-only
+         * already stays this way. This improves compatibility with container managers, where we won't attempt to undo
+         * read-only mounts already applied. */
 
         if (m->ignore && r == -ENOENT)
                 return 0;
@@ -570,14 +582,25 @@ int setup_namespace(
         }
 
         if (n > 0) {
+                char **blacklist;
+                unsigned j;
+
+                /* First round, add in all special mounts we need */
                 for (m = mounts; m < mounts + n; ++m) {
                         r = apply_mount(m, tmp_dir, var_tmp_dir);
                         if (r < 0)
                                 goto fail;
                 }
 
+                /* Create a blacklist we can pass to bind_mount_recursive() */
+                blacklist = newa(char*, n+1);
+                for (j = 0; j < n; j++)
+                        blacklist[j] = (char*) mounts[j].path;
+                blacklist[j] = NULL;
+
+                /* Second round, flip the ro bits if necessary. */
                 for (m = mounts; m < mounts + n; ++m) {
-                        r = make_read_only(m);
+                        r = make_read_only(m, blacklist);
                         if (r < 0)
                                 goto fail;
                 }
@@ -586,9 +609,7 @@ int setup_namespace(
         if (root_directory) {
                 /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */
                 r = mount_move_root(root_directory);
-
-                /* at this point, we cannot rollback */
-                if (r < 0)
+                if (r < 0) /* at this point, we cannot rollback */
                         return r;
         }
 
@@ -596,8 +617,7 @@ int setup_namespace(
          * reestablish propagation from our side to the host, since
          * what's disconnected is disconnected. */
         if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0)
-                /* at this point, we cannot rollback */
-                return -errno;
+                return -errno; /* at this point, we cannot rollback */
 
         return 0;
 
diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c
index 295b75341f..8457357003 100644
--- a/src/nspawn/nspawn-mount.c
+++ b/src/nspawn/nspawn-mount.c
@@ -476,7 +476,7 @@ static int mount_bind(const char *dest, CustomMount *m) {
                 return log_error_errno(errno, "mount(%s) failed: %m", where);
 
         if (m->read_only) {
-                r = bind_remount_recursive(where, true);
+                r = bind_remount_recursive(where, true, NULL);
                 if (r < 0)
                         return log_error_errno(r, "Read-only bind mount failed: %m");
         }
@@ -990,7 +990,7 @@ int setup_volatile_state(
         /* --volatile=state means we simply overmount /var
            with a tmpfs, and the rest read-only. */
 
-        r = bind_remount_recursive(directory, true);
+        r = bind_remount_recursive(directory, true, NULL);
         if (r < 0)
                 return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
 
@@ -1065,7 +1065,7 @@ int setup_volatile(
 
         bind_mounted = true;
 
-        r = bind_remount_recursive(t, true);
+        r = bind_remount_recursive(t, true, NULL);
         if (r < 0) {
                 log_error_errno(r, "Failed to remount %s read-only: %m", t);
                 goto fail;
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 0d61d34ebf..1f3e1f2dac 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -3019,7 +3019,7 @@ static int outer_child(
                 return r;
 
         if (arg_read_only) {
-                r = bind_remount_recursive(directory, true);
+                r = bind_remount_recursive(directory, true, NULL);
                 if (r < 0)
                         return log_error_errno(r, "Failed to make tree read-only: %m");
         }
-- 
cgit v1.2.3-54-g00ecf


From 160cfdbed3eb23b6bc3c17613685b756f23be4a1 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Thu, 25 Aug 2016 15:51:37 +0200
Subject: namespace: add some debug logging when enforcing InaccessiblePaths=

---
 src/core/namespace.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'src/core')

diff --git a/src/core/namespace.c b/src/core/namespace.c
index b0dab9459e..e08d7459c5 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -345,7 +345,6 @@ static int apply_mount(
 
         const char *what;
         int r;
-        struct stat target;
 
         assert(m);
 
@@ -353,7 +352,8 @@ static int apply_mount(
 
         switch (m->mode) {
 
-        case INACCESSIBLE:
+        case INACCESSIBLE: {
+                struct stat target;
 
                 /* First, get rid of everything that is below if there
                  * is anything... Then, overmount it with an
@@ -363,7 +363,7 @@ static int apply_mount(
                 if (lstat(m->path, &target) < 0) {
                         if (m->ignore && errno == ENOENT)
                                 return 0;
-                        return -errno;
+                        return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", m->path);
                 }
 
                 what = mode_to_inaccessible_node(target.st_mode);
@@ -372,6 +372,7 @@ static int apply_mount(
                         return -ELOOP;
                 }
                 break;
+        }
 
         case READONLY:
         case READWRITE:
-- 
cgit v1.2.3-54-g00ecf


From 3f815163ff8fdcdbd329680580df36f94e15325d Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Thu, 25 Aug 2016 15:57:21 +0200
Subject: core: introduce ProtectSystem=strict

Let's tighten our sandbox a bit more: with this change ProtectSystem= gains a
new setting "strict". If set, the entire directory tree of the system is
mounted read-only, but the API file systems /proc, /dev, /sys are excluded
(they may be managed with PrivateDevices= and ProtectKernelTunables=). Also,
/home and /root are excluded as those are left for ProtectHome= to manage.

In this mode, all "real" file systems (i.e. non-API file systems) are mounted
read-only, and specific directories may only be excluded via
ReadWriteDirectories=, thus implementing an effective whitelist instead of
blacklist of writable directories.

While we are at, also add /efi to the list of paths always affected by
ProtectSystem=. This is a follow-up for
b52a109ad38cd37b660ccd5394ff5c171a5e5355 which added /efi as alternative for
/boot. Our namespacing logic should respect that too.
---
 man/systemd.exec.xml | 33 ++++++++++++++++---------------
 src/core/namespace.c | 56 +++++++++++++++++++++++++++++++++++++++++++---------
 src/core/namespace.h |  1 +
 3 files changed, 65 insertions(+), 25 deletions(-)

(limited to 'src/core')

diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 07128b489e..1b672fe0c9 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -1020,22 +1020,23 @@
       <varlistentry>
         <term><varname>ProtectSystem=</varname></term>
 
-        <listitem><para>Takes a boolean argument or
-        <literal>full</literal>. If true, mounts the
-        <filename>/usr</filename> and <filename>/boot</filename>
-        directories read-only for processes invoked by this unit. If
-        set to <literal>full</literal>, the <filename>/etc</filename>
-        directory is mounted read-only, too. This setting ensures that
-        any modification of the vendor-supplied operating system (and
-        optionally its configuration) is prohibited for the service.
-        It is recommended to enable this setting for all long-running
-        services, unless they are involved with system updates or need
-        to modify the operating system in other ways. Note however
-        that processes retaining the CAP_SYS_ADMIN capability can undo
-        the effect of this setting. This setting is hence particularly
-        useful for daemons which have this capability removed, for
-        example with <varname>CapabilityBoundingSet=</varname>.
-        Defaults to off.</para></listitem>
+        <listitem><para>Takes a boolean argument or the special values <literal>full</literal> or
+        <literal>strict</literal>. If true, mounts the <filename>/usr</filename> and <filename>/boot</filename>
+        directories read-only for processes invoked by this unit. If set to <literal>full</literal>, the
+        <filename>/etc</filename> directory is mounted read-only, too. If set to <literal>strict</literal> the entire
+        file system hierarchy is mounted read-only, except for the API file system subtrees <filename>/dev</filename>,
+        <filename>/proc</filename> and <filename>/sys</filename> (protect these directories using
+        <varname>PrivateDevices=</varname>, <varname>ProtectKernelTunables=</varname>,
+        <varname>ProtectControlGroups=</varname>). This setting ensures that any modification of the vendor-supplied
+        operating system (and optionally its configuration, and local mounts) is prohibited for the service.  It is
+        recommended to enable this setting for all long-running services, unless they are involved with system updates
+        or need to modify the operating system in other ways. If this option is used,
+        <varname>ReadWritePaths=</varname> may be used to exclude specific directories from being made read-only. Note
+        that processes retaining the <constant>CAP_SYS_ADMIN</constant> capability (and with no system call filter that
+        prohibits mount-related system calls applied) can undo the effect of this setting. This setting is hence
+        particularly useful for daemons which have this either the <literal>@mount</literal> set filtered using
+        <varname>SystemCallFilter=</varname>, or have the <constant>CAP_SYS_ADMIN</constant> capability removed, for
+        example with <varname>CapabilityBoundingSet=</varname>.  Defaults to off.</para></listitem>
       </varlistentry>
 
       <varlistentry>
diff --git a/src/core/namespace.c b/src/core/namespace.c
index e08d7459c5..498cd139bf 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -472,9 +472,11 @@ int setup_namespace(
                 private_dev +
                 (protect_sysctl ? 3 : 0) +
                 (protect_cgroups != protect_sysctl) +
-                (protect_home != PROTECT_HOME_NO ? 3 : 0) +
-                (protect_system != PROTECT_SYSTEM_NO ? 2 : 0) +
-                (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0);
+                (protect_home != PROTECT_HOME_NO || protect_system == PROTECT_SYSTEM_STRICT ? 3 : 0) +
+                (protect_system == PROTECT_SYSTEM_STRICT ?
+                 (2 + !private_dev + !protect_sysctl) :
+                 ((protect_system != PROTECT_SYSTEM_NO ? 3 : 0) +
+                  (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0)));
 
         if (n > 0) {
                 m = mounts = (BindMount *) alloca0(n * sizeof(BindMount));
@@ -529,9 +531,13 @@ int setup_namespace(
                         m++;
                 }
 
-                if (protect_home != PROTECT_HOME_NO) {
+                if (protect_home != PROTECT_HOME_NO || protect_system == PROTECT_SYSTEM_STRICT) {
                         const char *home_dir, *run_user_dir, *root_dir;
 
+                        /* If protection of $HOME and $XDG_RUNTIME_DIR is requested, then go for it. If we are in
+                         * strict system protection mode, then also add entries for these directories, but mark them
+                         * writable. This is because we want ProtectHome= and ProtectSystem= to be fully orthogonal. */
+
                         home_dir = prefix_roota(root_directory, "/home");
                         home_dir = strjoina("-", home_dir);
                         run_user_dir = prefix_roota(root_directory, "/run/user");
@@ -540,22 +546,53 @@ int setup_namespace(
                         root_dir = strjoina("-", root_dir);
 
                         r = append_mounts(&m, STRV_MAKE(home_dir, run_user_dir, root_dir),
-                                protect_home == PROTECT_HOME_READ_ONLY ? READONLY : INACCESSIBLE);
+                                protect_home == PROTECT_HOME_READ_ONLY ? READONLY :
+                                protect_home == PROTECT_HOME_YES ? INACCESSIBLE : READWRITE);
                         if (r < 0)
                                 return r;
                 }
 
-                if (protect_system != PROTECT_SYSTEM_NO) {
-                        const char *usr_dir, *boot_dir, *etc_dir;
+                if (protect_system == PROTECT_SYSTEM_STRICT) {
+                        /* In strict mode, we mount everything read-only, except for /proc, /dev, /sys which are the
+                         * kernel API VFS, which are left writable, but PrivateDevices= + ProtectKernelTunables=
+                         * protect those, and these options should be fully orthogonal. (And of course /home and
+                         * friends are also left writable, as ProtectHome= shall manage those, orthogonally, see
+                         * above). */
+
+                        m->path = prefix_roota(root_directory, "/");
+                        m->mode = READONLY;
+                        m++;
+
+                        m->path = prefix_roota(root_directory, "/proc");
+                        m->mode = READWRITE;
+                        m++;
+
+                        if (!private_dev) {
+                                m->path = prefix_roota(root_directory, "/dev");
+                                m->mode = READWRITE;
+                                m++;
+                        }
+                        if (!protect_sysctl) {
+                                m->path = prefix_roota(root_directory, "/sys");
+                                m->mode = READWRITE;
+                                m++;
+                        }
+
+                } else if (protect_system != PROTECT_SYSTEM_NO) {
+                        const char *usr_dir, *boot_dir, *efi_dir, *etc_dir;
+
+                        /* In any other mode we simply mark the relevant three directories ready-only. */
 
                         usr_dir = prefix_roota(root_directory, "/usr");
                         boot_dir = prefix_roota(root_directory, "/boot");
                         boot_dir = strjoina("-", boot_dir);
+                        efi_dir = prefix_roota(root_directory, "/efi");
+                        efi_dir = strjoina("-", efi_dir);
                         etc_dir = prefix_roota(root_directory, "/etc");
 
                         r = append_mounts(&m, protect_system == PROTECT_SYSTEM_FULL
-                                ? STRV_MAKE(usr_dir, boot_dir, etc_dir)
-                                : STRV_MAKE(usr_dir, boot_dir), READONLY);
+                                          ? STRV_MAKE(usr_dir, boot_dir, efi_dir, etc_dir)
+                                          : STRV_MAKE(usr_dir, boot_dir, efi_dir), READONLY);
                         if (r < 0)
                                 return r;
                 }
@@ -780,6 +817,7 @@ static const char *const protect_system_table[_PROTECT_SYSTEM_MAX] = {
         [PROTECT_SYSTEM_NO] = "no",
         [PROTECT_SYSTEM_YES] = "yes",
         [PROTECT_SYSTEM_FULL] = "full",
+        [PROTECT_SYSTEM_STRICT] = "strict",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(protect_system, ProtectSystem);
diff --git a/src/core/namespace.h b/src/core/namespace.h
index 3845336287..6505bcc499 100644
--- a/src/core/namespace.h
+++ b/src/core/namespace.h
@@ -35,6 +35,7 @@ typedef enum ProtectSystem {
         PROTECT_SYSTEM_NO,
         PROTECT_SYSTEM_YES,
         PROTECT_SYSTEM_FULL,
+        PROTECT_SYSTEM_STRICT,
         _PROTECT_SYSTEM_MAX,
         _PROTECT_SYSTEM_INVALID = -1
 } ProtectSystem;
-- 
cgit v1.2.3-54-g00ecf


From 63bb64a056113d4be5fefb16604accf08c8c204a Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Thu, 25 Aug 2016 16:12:46 +0200
Subject: core: imply ProtectHome=read-only and ProtectSystem=strict if
 DynamicUser=1

Let's make sure that services that use DynamicUser=1 cannot leave files in the
file system should the system accidentally have a world-writable directory
somewhere.

This effectively ensures that directories need to be whitelisted rather than
blacklisted for access when DynamicUser=1 is set.
---
 man/systemd.exec.xml | 12 ++++++++----
 src/core/unit.c      |  6 ++++++
 2 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'src/core')

diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 1b672fe0c9..e4d9c0ef1b 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -160,14 +160,18 @@
         use. However, UID/GIDs are recycled after a unit is terminated. Care should be taken that any processes running
         as part of a unit for which dynamic users/groups are enabled do not leave files or directories owned by these
         users/groups around, as a different unit might get the same UID/GID assigned later on, and thus gain access to
-        these files or directories. If <varname>DynamicUser=</varname> is enabled, <varname>RemoveIPC=</varname> and
+        these files or directories. If <varname>DynamicUser=</varname> is enabled, <varname>RemoveIPC=</varname>,
         <varname>PrivateTmp=</varname> are implied. This ensures that the lifetime of IPC objects and temporary files
         created by the executed processes is bound to the runtime of the service, and hence the lifetime of the dynamic
         user/group. Since <filename>/tmp</filename> and <filename>/var/tmp</filename> are usually the only
         world-writable directories on a system this ensures that a unit making use of dynamic user/group allocation
-        cannot leave files around after unit termination. Use <varname>RuntimeDirectory=</varname> (see below) in order
-        to assign a writable runtime directory to a service, owned by the dynamic user/group and removed automatically
-        when the unit is terminated. Defaults to off.</para></listitem>
+        cannot leave files around after unit termination. Moreover <varname>ProtectSystem=strict</varname> and
+        <varname>ProtectHome=read-only</varname> are implied, thus prohibiting the service to write to arbitrary file
+        system locations. In order to allow the service to write to certain directories, they have to be whitelisted
+        using <varname>ReadWritePaths=</varname>, but care must be taken so that that UID/GID recycling doesn't
+        create security issues involving files created by the service. Use <varname>RuntimeDirectory=</varname> (see
+        below) in order to assign a writable runtime directory to a service, owned by the dynamic user/group and
+        removed automatically when the unit is terminated. Defaults to off.</para></listitem>
       </varlistentry>
 
       <varlistentry>
diff --git a/src/core/unit.c b/src/core/unit.c
index de22f657c6..5d284a359d 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -3377,8 +3377,14 @@ int unit_patch_contexts(Unit *u) {
                                         return -ENOMEM;
                         }
 
+                        /* If the dynamic user option is on, let's make sure that the unit can't leave its UID/GID
+                         * around in the file system or on IPC objects. Hence enforce a strict sandbox. */
+
                         ec->private_tmp = true;
                         ec->remove_ipc = true;
+                        ec->protect_system = PROTECT_SYSTEM_STRICT;
+                        if (ec->protect_home == PROTECT_HOME_NO)
+                                ec->protect_home = PROTECT_HOME_READ_ONLY;
                 }
         }
 
-- 
cgit v1.2.3-54-g00ecf


From 096424d1230e0a0339735c51b43949809e972430 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Thu, 25 Aug 2016 17:29:12 +0200
Subject: execute: drop group priviliges only after setting up namespace

If PrivateDevices=yes is set, the namespace code creates device nodes in /dev
that should be owned by the host's root, hence let's make sure we set up the
namespace before dropping group privileges.
---
 src/core/execute.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index 20e74ec8a6..ae251b2a4c 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -2291,14 +2291,9 @@ static int exec_child(
         }
         accum_env = strv_env_clean(accum_env);
 
-        umask(context->umask);
+        (void) umask(context->umask);
 
         if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
-                r = enforce_groups(context, username, gid);
-                if (r < 0) {
-                        *exit_status = EXIT_GROUP;
-                        return r;
-                }
 #ifdef HAVE_SMACK
                 if (context->smack_process_label) {
                         r = mac_smack_apply_pid(0, context->smack_process_label);
@@ -2395,6 +2390,14 @@ static int exec_child(
                 }
         }
 
+        if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
+                r = enforce_groups(context, username, gid);
+                if (r < 0) {
+                        *exit_status = EXIT_GROUP;
+                        return r;
+                }
+        }
+
         if (context->working_directory_home)
                 wd = home;
         else if (context->working_directory)
-- 
cgit v1.2.3-54-g00ecf


From 1e4e94c8819e2fe3a7217690c0590dba8ab0be9e Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Thu, 25 Aug 2016 17:30:47 +0200
Subject: namespace: invoke unshare() only after checking all parameters

Let's create the new namespace only after we validated and processed all
parameters, right before we start with actually mounting things.

This way, the window where we can roll back is larger (not that it matters
IRL...)
---
 src/core/namespace.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/core')

diff --git a/src/core/namespace.c b/src/core/namespace.c
index 498cd139bf..356d3c8121 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -462,9 +462,6 @@ int setup_namespace(
         if (mount_flags == 0)
                 mount_flags = MS_SHARED;
 
-        if (unshare(CLONE_NEWNS) < 0)
-                return -errno;
-
         n = !!tmp_dir + !!var_tmp_dir +
                 strv_length(read_write_paths) +
                 strv_length(read_only_paths) +
@@ -606,6 +603,9 @@ int setup_namespace(
                 drop_nop(mounts, &n);
         }
 
+        if (unshare(CLONE_NEWNS) < 0)
+                return -errno;
+
         if (n > 0 || root_directory) {
                 /* Remount / as SLAVE so that nothing now mounted in the namespace
                    shows up in the parent */
-- 
cgit v1.2.3-54-g00ecf


From d944dc9553009822deaddec76814f5642a6a8176 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Sat, 24 Sep 2016 12:41:30 +0200
Subject: namespace: chase symlinks for mounts to set up in userspace

This adds logic to chase symlinks for all mount points that shall be created in
a namespace environment in userspace, instead of leaving this to the kernel.
This has the advantage that we can correctly handle absolute symlinks that
shall be taken relative to a specific root directory. Moreover, we can properly
handle mounts created on symlinked files or directories as we can merge their
mounts as necessary.

(This also drops the "done" flag in the namespace logic, which was never
actually working, but was supposed to permit a partial rollback of the
namespace logic, which however is only mildly useful as it wasn't clear in
which case it would or would not be able to roll back.)

Fixes: #3867
---
 src/basic/fs-util.c     | 187 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/basic/fs-util.h     |   2 +
 src/core/namespace.c    | 118 +++++++++++++++++++-----------
 src/test/test-fs-util.c |  96 ++++++++++++++++++++++++-
 src/test/test-ns.c      |  10 ++-
 5 files changed, 367 insertions(+), 46 deletions(-)

(limited to 'src/core')

diff --git a/src/basic/fs-util.c b/src/basic/fs-util.c
index ce87257bc1..86d9ad7e36 100644
--- a/src/basic/fs-util.c
+++ b/src/basic/fs-util.c
@@ -597,3 +597,190 @@ int inotify_add_watch_fd(int fd, int what, uint32_t mask) {
 
         return r;
 }
+
+int chase_symlinks(const char *path, const char *_root, char **ret) {
+        _cleanup_free_ char *buffer = NULL, *done = NULL, *root = NULL;
+        _cleanup_close_ int fd = -1;
+        unsigned max_follow = 32; /* how many symlinks to follow before giving up and returning ELOOP */
+        char *todo;
+        int r;
+
+        assert(path);
+
+        /* This is a lot like canonicalize_file_name(), but takes an additional "root" parameter, that allows following
+         * symlinks relative to a root directory, instead of the root of the host.
+         *
+         * Note that "root" matters only if we encounter an absolute symlink, it's unused otherwise. Most importantly
+         * this means the path parameter passed in is not prefixed by it.
+         *
+         * Algorithmically this operates on two path buffers: "done" are the components of the path we already
+         * processed and resolved symlinks, "." and ".." of. "todo" are the components of the path we still need to
+         * process. On each iteration, we move one component from "todo" to "done", processing it's special meaning
+         * each time. The "todo" path always starts with at least one slash, the "done" path always ends in no
+         * slash. We always keep an O_PATH fd to the component we are currently processing, thus keeping lookup races
+         * at a minimum. */
+
+        r = path_make_absolute_cwd(path, &buffer);
+        if (r < 0)
+                return r;
+
+        if (_root) {
+                r = path_make_absolute_cwd(_root, &root);
+                if (r < 0)
+                        return r;
+        }
+
+        fd = open("/", O_CLOEXEC|O_NOFOLLOW|O_PATH);
+        if (fd < 0)
+                return -errno;
+
+        todo = buffer;
+        for (;;) {
+                _cleanup_free_ char *first = NULL;
+                _cleanup_close_ int child = -1;
+                struct stat st;
+                size_t n, m;
+
+                /* Determine length of first component in the path */
+                n = strspn(todo, "/");                  /* The slashes */
+                m = n + strcspn(todo + n, "/");         /* The entire length of the component */
+
+                /* Extract the first component. */
+                first = strndup(todo, m);
+                if (!first)
+                        return -ENOMEM;
+
+                todo += m;
+
+                /* Just a single slash? Then we reached the end. */
+                if (isempty(first) || path_equal(first, "/"))
+                        break;
+
+                /* Just a dot? Then let's eat this up. */
+                if (path_equal(first, "/."))
+                        continue;
+
+                /* Two dots? Then chop off the last bit of what we already found out. */
+                if (path_equal(first, "/..")) {
+                        _cleanup_free_ char *parent = NULL;
+                        int fd_parent = -1;
+
+                        if (isempty(done) || path_equal(done, "/"))
+                                return -EINVAL;
+
+                        parent = dirname_malloc(done);
+                        if (!parent)
+                                return -ENOMEM;
+
+                        /* Don't allow this to leave the root dir */
+                        if (root &&
+                            path_startswith(done, root) &&
+                            !path_startswith(parent, root))
+                                return -EINVAL;
+
+                        free(done);
+                        done = parent;
+                        parent = NULL;
+
+                        fd_parent = openat(fd, "..", O_CLOEXEC|O_NOFOLLOW|O_PATH);
+                        if (fd_parent < 0)
+                                return -errno;
+
+                        safe_close(fd);
+                        fd = fd_parent;
+
+                        continue;
+                }
+
+                /* Otherwise let's see what this is. */
+                child = openat(fd, first + n, O_CLOEXEC|O_NOFOLLOW|O_PATH);
+                if (child < 0)
+                        return -errno;
+
+                if (fstat(child, &st) < 0)
+                        return -errno;
+
+                if (S_ISLNK(st.st_mode)) {
+                        _cleanup_free_ char *destination = NULL;
+
+                        /* This is a symlink, in this case read the destination. But let's make sure we don't follow
+                         * symlinks without bounds. */
+                        if (--max_follow <= 0)
+                                return -ELOOP;
+
+                        r = readlinkat_malloc(fd, first + n, &destination);
+                        if (r < 0)
+                                return r;
+                        if (isempty(destination))
+                                return -EINVAL;
+
+                        if (path_is_absolute(destination)) {
+
+                                /* An absolute destination. Start the loop from the beginning, but use the root
+                                 * directory as base. */
+
+                                safe_close(fd);
+                                fd = open(root ?: "/", O_CLOEXEC|O_NOFOLLOW|O_PATH);
+                                if (fd < 0)
+                                        return -errno;
+
+                                free(buffer);
+                                buffer = destination;
+                                destination = NULL;
+
+                                todo = buffer;
+                                free(done);
+
+                                /* Note that we do not revalidate the root, we take it as is. */
+                                if (isempty(root))
+                                        done = NULL;
+                                else {
+                                        done = strdup(root);
+                                        if (!done)
+                                                return -ENOMEM;
+                                }
+
+                        } else {
+                                char *joined;
+
+                                /* A relative destination. If so, this is what we'll prefix what's left to do with what
+                                 * we just read, and start the loop again, but remain in the current directory. */
+
+                                joined = strjoin("/", destination, todo, NULL);
+                                if (!joined)
+                                        return -ENOMEM;
+
+                                free(buffer);
+                                todo = buffer = joined;
+                        }
+
+                        continue;
+                }
+
+                /* If this is not a symlink, then let's just add the name we read to what we already verified. */
+                if (!done) {
+                        done = first;
+                        first = NULL;
+                } else {
+                        if (!strextend(&done, first, NULL))
+                                return -ENOMEM;
+                }
+
+                /* And iterate again, but go one directory further down. */
+                safe_close(fd);
+                fd = child;
+                child = -1;
+        }
+
+        if (!done) {
+                /* Special case, turn the empty string into "/", to indicate the root directory. */
+                done = strdup("/");
+                if (!done)
+                        return -ENOMEM;
+        }
+
+        *ret = done;
+        done = NULL;
+
+        return 0;
+}
diff --git a/src/basic/fs-util.h b/src/basic/fs-util.h
index 2c3b9a1c74..31df47cf1e 100644
--- a/src/basic/fs-util.h
+++ b/src/basic/fs-util.h
@@ -77,3 +77,5 @@ union inotify_event_buffer {
 };
 
 int inotify_add_watch_fd(int fd, int what, uint32_t mask);
+
+int chase_symlinks(const char *path, const char *_root, char **ret);
diff --git a/src/core/namespace.c b/src/core/namespace.c
index 356d3c8121..d3ab2e8e3e 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -29,6 +29,7 @@
 #include "alloc-util.h"
 #include "dev-setup.h"
 #include "fd-util.h"
+#include "fs-util.h"
 #include "loopback-setup.h"
 #include "missing.h"
 #include "mkdir.h"
@@ -57,9 +58,9 @@ typedef enum MountMode {
 } MountMode;
 
 typedef struct BindMount {
-        const char *path;
+        const char *path; /* stack memory, doesn't need to be freed explicitly */
+        char *chased; /* malloc()ed memory, needs to be freed */
         MountMode mode;
-        bool done;
         bool ignore;
 } BindMount;
 
@@ -71,7 +72,6 @@ static int append_mounts(BindMount **p, char **strv, MountMode mode) {
         STRV_FOREACH(i, strv) {
 
                 (*p)->ignore = false;
-                (*p)->done = false;
 
                 if ((mode == INACCESSIBLE || mode == READONLY || mode == READWRITE) && (*i)[0] == '-') {
                         (*p)->ignore = true;
@@ -360,11 +360,8 @@ static int apply_mount(
                  * inaccessible path. */
                 (void) umount_recursive(m->path, 0);
 
-                if (lstat(m->path, &target) < 0) {
-                        if (m->ignore && errno == ENOENT)
-                                return 0;
+                if (lstat(m->path, &target) < 0)
                         return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", m->path);
-                }
 
                 what = mode_to_inaccessible_node(target.st_mode);
                 if (!what) {
@@ -378,11 +375,8 @@ static int apply_mount(
         case READWRITE:
 
                 r = path_is_mount_point(m->path, 0);
-                if (r < 0) {
-                        if (m->ignore && errno == ENOENT)
-                                return 0;
+                if (r < 0)
                         return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", m->path);
-                }
                 if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */
                         return 0;
 
@@ -407,12 +401,8 @@ static int apply_mount(
 
         assert(what);
 
-        if (mount(what, m->path, NULL, MS_BIND|MS_REC, NULL) < 0) {
-                if (m->ignore && errno == ENOENT)
-                        return 0;
-
+        if (mount(what, m->path, NULL, MS_BIND|MS_REC, NULL) < 0)
                 return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, m->path);
-        }
 
         log_debug("Successfully mounted %s to %s", what, m->path);
         return 0;
@@ -435,12 +425,43 @@ static int make_read_only(BindMount *m, char **blacklist) {
          * already stays this way. This improves compatibility with container managers, where we won't attempt to undo
          * read-only mounts already applied. */
 
-        if (m->ignore && r == -ENOENT)
-                return 0;
-
         return r;
 }
 
+static int chase_all_symlinks(const char *root_directory, BindMount *m, unsigned *n) {
+        BindMount *f, *t;
+        int r;
+
+        assert(m);
+        assert(n);
+
+        /* Since mount() will always follow symlinks and we need to take the different root directory into account we
+         * chase the symlinks on our own first. This call wil do so for all entries and remove all entries where we
+         * can't resolve the path, and which have been marked for such removal. */
+
+        for (f = m, t = m; f < m+*n; f++) {
+
+                r = chase_symlinks(f->path, root_directory, &f->chased);
+                if (r == -ENOENT && f->ignore) /* Doesn't exist? Then remove it! */
+                        continue;
+                if (r < 0)
+                        return log_debug_errno(r, "Failed to chase symlinks for %s: %m", f->path);
+
+                if (path_equal(f->path, f->chased))
+                        f->chased = mfree(f->chased);
+                else {
+                        log_debug("Chased %s → %s", f->path, f->chased);
+                        f->path = f->chased;
+                }
+
+                *t = *f;
+                t++;
+        }
+
+        *n = t - m;
+        return 0;
+}
+
 int setup_namespace(
                 const char* root_directory,
                 char** read_write_paths,
@@ -456,6 +477,7 @@ int setup_namespace(
                 unsigned long mount_flags) {
 
         BindMount *m, *mounts = NULL;
+        bool make_slave = false;
         unsigned n;
         int r = 0;
 
@@ -475,6 +497,9 @@ int setup_namespace(
                  ((protect_system != PROTECT_SYSTEM_NO ? 3 : 0) +
                   (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0)));
 
+        if (root_directory || n > 0)
+                make_slave = true;
+
         if (n > 0) {
                 m = mounts = (BindMount *) alloca0(n * sizeof(BindMount));
                 r = append_mounts(&m, read_write_paths, READWRITE);
@@ -596,6 +621,13 @@ int setup_namespace(
 
                 assert(mounts + n == m);
 
+                /* Resolve symlinks manually first, as mount() will always follow them relative to the host's
+                 * root. Moreover we want to suppress duplicates based on the resolved paths. This of course is a bit
+                 * racy. */
+                r = chase_all_symlinks(root_directory, mounts, &n);
+                if (r < 0)
+                        goto finish;
+
                 qsort(mounts, n, sizeof(BindMount), mount_path_compare);
 
                 drop_duplicates(mounts, &n);
@@ -603,20 +635,26 @@ int setup_namespace(
                 drop_nop(mounts, &n);
         }
 
-        if (unshare(CLONE_NEWNS) < 0)
-                return -errno;
+        if (unshare(CLONE_NEWNS) < 0) {
+                r = -errno;
+                goto finish;
+        }
 
-        if (n > 0 || root_directory) {
+        if (make_slave) {
                 /* Remount / as SLAVE so that nothing now mounted in the namespace
                    shows up in the parent */
-                if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
-                        return -errno;
+                if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
+                        r = -errno;
+                        goto finish;
+                }
         }
 
         if (root_directory) {
                 /* Turn directory into bind mount */
-                if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0)
-                        return -errno;
+                if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0) {
+                        r = -errno;
+                        goto finish;
+                }
         }
 
         if (n > 0) {
@@ -627,7 +665,7 @@ int setup_namespace(
                 for (m = mounts; m < mounts + n; ++m) {
                         r = apply_mount(m, tmp_dir, var_tmp_dir);
                         if (r < 0)
-                                goto fail;
+                                goto finish;
                 }
 
                 /* Create a blacklist we can pass to bind_mount_recursive() */
@@ -640,34 +678,30 @@ int setup_namespace(
                 for (m = mounts; m < mounts + n; ++m) {
                         r = make_read_only(m, blacklist);
                         if (r < 0)
-                                goto fail;
+                                goto finish;
                 }
         }
 
         if (root_directory) {
                 /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */
                 r = mount_move_root(root_directory);
-                if (r < 0) /* at this point, we cannot rollback */
-                        return r;
+                if (r < 0)
+                        goto finish;
         }
 
         /* Remount / as the desired mode. Not that this will not
          * reestablish propagation from our side to the host, since
          * what's disconnected is disconnected. */
-        if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0)
-                return -errno; /* at this point, we cannot rollback */
+        if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) {
+                r = -errno;
+                goto finish;
+        }
 
-        return 0;
+        r = 0;
 
-fail:
-        if (n > 0) {
-                for (m = mounts; m < mounts + n; ++m) {
-                        if (!m->done)
-                                continue;
-
-                        (void) umount2(m->path, MNT_DETACH);
-                }
-        }
+finish:
+        for (m = mounts; m < mounts + n; m++)
+                free(m->chased);
 
         return r;
 }
diff --git a/src/test/test-fs-util.c b/src/test/test-fs-util.c
index b35a2ea2c8..53a3cdc663 100644
--- a/src/test/test-fs-util.c
+++ b/src/test/test-fs-util.c
@@ -20,16 +20,109 @@
 #include <unistd.h>
 
 #include "alloc-util.h"
-#include "fileio.h"
 #include "fd-util.h"
+#include "fileio.h"
 #include "fs-util.h"
 #include "macro.h"
 #include "mkdir.h"
+#include "path-util.h"
 #include "rm-rf.h"
 #include "string-util.h"
 #include "strv.h"
 #include "util.h"
 
+static void test_chase_symlinks(void) {
+        _cleanup_free_ char *result = NULL;
+        char temp[] = "/tmp/test-chase.XXXXXX";
+        const char *top, *p, *q;
+        int r;
+
+        assert_se(mkdtemp(temp));
+
+        top = strjoina(temp, "/top");
+        assert_se(mkdir(top, 0700) >= 0);
+
+        p = strjoina(top, "/dot");
+        assert_se(symlink(".", p) >= 0);
+
+        p = strjoina(top, "/dotdot");
+        assert_se(symlink("..", p) >= 0);
+
+        p = strjoina(top, "/dotdota");
+        assert_se(symlink("../a", p) >= 0);
+
+        p = strjoina(temp, "/a");
+        assert_se(symlink("b", p) >= 0);
+
+        p = strjoina(temp, "/b");
+        assert_se(symlink("/usr", p) >= 0);
+
+        p = strjoina(temp, "/start");
+        assert_se(symlink("top/dot/dotdota", p) >= 0);
+
+        r = chase_symlinks(p, NULL, &result);
+        assert_se(r >= 0);
+        assert_se(path_equal(result, "/usr"));
+
+        result = mfree(result);
+        r = chase_symlinks(p, temp, &result);
+        assert_se(r == -ENOENT);
+
+        q = strjoina(temp, "/usr");
+        assert_se(mkdir(q, 0700) >= 0);
+
+        r = chase_symlinks(p, temp, &result);
+        assert_se(r >= 0);
+        assert_se(path_equal(result, q));
+
+        p = strjoina(temp, "/slash");
+        assert_se(symlink("/", p) >= 0);
+
+        result = mfree(result);
+        r = chase_symlinks(p, NULL, &result);
+        assert_se(r >= 0);
+        assert_se(path_equal(result, "/"));
+
+        result = mfree(result);
+        r = chase_symlinks(p, temp, &result);
+        assert_se(r >= 0);
+        assert_se(path_equal(result, temp));
+
+        p = strjoina(temp, "/slashslash");
+        assert_se(symlink("///usr///", p) >= 0);
+
+        result = mfree(result);
+        r = chase_symlinks(p, NULL, &result);
+        assert_se(r >= 0);
+        assert_se(path_equal(result, "/usr"));
+
+        result = mfree(result);
+        r = chase_symlinks(p, temp, &result);
+        assert_se(r >= 0);
+        assert_se(path_equal(result, q));
+
+        result = mfree(result);
+        r = chase_symlinks("/etc/./.././", NULL, &result);
+        assert_se(r >= 0);
+        assert_se(path_equal(result, "/"));
+
+        result = mfree(result);
+        r = chase_symlinks("/etc/./.././", "/etc", &result);
+        assert_se(r == -EINVAL);
+
+        result = mfree(result);
+        r = chase_symlinks("/etc/machine-id/foo", NULL, &result);
+        assert_se(r == -ENOTDIR);
+
+        result = mfree(result);
+        p = strjoina(temp, "/recursive-symlink");
+        assert_se(symlink("recursive-symlink", p) >= 0);
+        r = chase_symlinks(p, NULL, &result);
+        assert_se(r == -ELOOP);
+
+        assert_se(rm_rf(temp, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+}
+
 static void test_unlink_noerrno(void) {
         char name[] = "/tmp/test-close_nointr.XXXXXX";
         int fd;
@@ -144,6 +237,7 @@ int main(int argc, char *argv[]) {
         test_readlink_and_make_absolute();
         test_get_files_in_directory();
         test_var_tmp();
+        test_chase_symlinks();
 
         return 0;
 }
diff --git a/src/test/test-ns.c b/src/test/test-ns.c
index 03a24620af..c4d4da6d05 100644
--- a/src/test/test-ns.c
+++ b/src/test/test-ns.c
@@ -26,14 +26,18 @@
 int main(int argc, char *argv[]) {
         const char * const writable[] = {
                 "/home",
-                "/home/lennart/projects/foobar", /* this should be masked automatically */
+                "-/home/lennart/projects/foobar", /* this should be masked automatically */
                 NULL
         };
 
         const char * const readonly[] = {
-                "/",
-                "/usr",
+                /* "/", */
+                /* "/usr", */
                 "/boot",
+                "/lib",
+                "/usr/lib",
+                "-/lib64",
+                "-/usr/lib64",
                 NULL
         };
 
-- 
cgit v1.2.3-54-g00ecf


From 8f1ad200f010dc2106f7e3ff5879f0330ee36996 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Fri, 26 Aug 2016 11:27:38 +0200
Subject: namespace: don't make the root directory of a namespace a mount if it
 already is one

Let's not stack mounts needlessly.
---
 src/core/namespace.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'src/core')

diff --git a/src/core/namespace.c b/src/core/namespace.c
index d3ab2e8e3e..a7451ffbdc 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -650,10 +650,15 @@ int setup_namespace(
         }
 
         if (root_directory) {
-                /* Turn directory into bind mount */
-                if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0) {
-                        r = -errno;
+                /* Turn directory into bind mount, if it isn't one yet */
+                r = path_is_mount_point(root_directory, AT_SYMLINK_FOLLOW);
+                if (r < 0)
                         goto finish;
+                if (r == 0) {
+                        if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0) {
+                                r = -errno;
+                                goto finish;
+                        }
                 }
         }
 
-- 
cgit v1.2.3-54-g00ecf


From ba128bb809cc59ca60db65f0c09bd7f48876fa83 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Fri, 26 Aug 2016 16:39:04 +0200
Subject: execute: filter low-level I/O syscalls if PrivateDevices= is set

If device access is restricted via PrivateDevices=, let's also block the
various low-level I/O syscalls at the same time, so that we know that the
minimal set of devices in our virtualized /dev are really everything the unit
can access.
---
 src/core/execute.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 1 deletion(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index ae251b2a4c..a20e9ea829 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1422,12 +1422,67 @@ finish:
         return r;
 }
 
+static int apply_private_devices(Unit *u, const ExecContext *c) {
+
+        static const int device_syscalls[] = {
+                SCMP_SYS(ioperm),
+                SCMP_SYS(iopl),
+                SCMP_SYS(pciconfig_iobase),
+                SCMP_SYS(pciconfig_read),
+                SCMP_SYS(pciconfig_write),
+#ifdef __NR_s390_pci_mmio_read
+                SCMP_SYS(s390_pci_mmio_read),
+#endif
+#ifdef __NR_s390_pci_mmio_write
+                SCMP_SYS(s390_pci_mmio_write),
+#endif
+        };
+
+        scmp_filter_ctx *seccomp;
+        unsigned i;
+        int r;
+
+        assert(c);
+
+        /* If PrivateDevices= is set, also turn off iopl and friends. */
+
+        if (skip_seccomp_unavailable(u, "PrivateDevices="))
+                return 0;
+
+        seccomp = seccomp_init(SCMP_ACT_ALLOW);
+        if (!seccomp)
+                return -ENOMEM;
+
+        r = seccomp_add_secondary_archs(seccomp);
+        if (r < 0)
+                goto finish;
+
+        for (i = 0; i < ELEMENTSOF(device_syscalls); i++) {
+                r = seccomp_rule_add(
+                                seccomp,
+                                SCMP_ACT_ERRNO(EPERM),
+                                device_syscalls[i],
+                                0);
+                if (r < 0)
+                        goto finish;
+        }
+
+        r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+        if (r < 0)
+                goto finish;
+
+        r = seccomp_load(seccomp);
+
+finish:
+        seccomp_release(seccomp);
+        return r;
+}
+
 #endif
 
 static void do_idle_pipe_dance(int idle_pipe[4]) {
         assert(idle_pipe);
 
-
         idle_pipe[1] = safe_close(idle_pipe[1]);
         idle_pipe[2] = safe_close(idle_pipe[2]);
 
@@ -2584,6 +2639,14 @@ static int exec_child(
                         }
                 }
 
+                if (context->private_devices) {
+                        r = apply_private_devices(unit, context);
+                        if (r < 0) {
+                                *exit_status = EXIT_SECCOMP;
+                                return r;
+                        }
+                }
+
                 if (context_has_syscall_filters(context)) {
                         r = apply_seccomp(unit, context);
                         if (r < 0) {
-- 
cgit v1.2.3-54-g00ecf


From 54500613a46023fe991f424e21ed15948b9a74f5 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Fri, 26 Aug 2016 17:25:08 +0200
Subject: main: minor simplification

---
 src/core/main.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'src/core')

diff --git a/src/core/main.c b/src/core/main.c
index 803307c9d5..be0cb0b6d1 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -996,10 +996,8 @@ static int parse_argv(int argc, char *argv[]) {
 
                 case ARG_MACHINE_ID:
                         r = set_machine_id(optarg);
-                        if (r < 0) {
-                                log_error("MachineID '%s' is not valid.", optarg);
-                                return r;
-                        }
+                        if (r < 0)
+                                return log_error_errno(r, "MachineID '%s' is not valid.", optarg);
                         break;
 
                 case 'h':
-- 
cgit v1.2.3-54-g00ecf


From cd2902c9546eabfffcf5d6de4d0bd4dfe6a4d427 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Fri, 26 Aug 2016 17:25:40 +0200
Subject: namespace: drop all mounts outside of the new root directory

There's no point in mounting these, if they are outside of the root directory
we'll move to.
---
 src/core/namespace.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'src/core')

diff --git a/src/core/namespace.c b/src/core/namespace.c
index a7451ffbdc..c9b2154985 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -199,6 +199,31 @@ static void drop_nop(BindMount *m, unsigned *n) {
         *n = t - m;
 }
 
+static void drop_outside_root(const char *root_directory, BindMount *m, unsigned *n) {
+        BindMount *f, *t;
+
+        assert(m);
+        assert(n);
+
+        if (!root_directory)
+                return;
+
+        /* Drops all mounts that are outside of the root directory. */
+
+        for (f = m, t = m; f < m+*n; f++) {
+
+                if (!path_startswith(f->path, root_directory)) {
+                        log_debug("%s is outside of root directory.", f->path);
+                        continue;
+                }
+
+                *t = *f;
+                t++;
+        }
+
+        *n = t - m;
+}
+
 static int mount_dev(BindMount *m) {
         static const char devnodes[] =
                 "/dev/null\0"
@@ -631,6 +656,7 @@ int setup_namespace(
                 qsort(mounts, n, sizeof(BindMount), mount_path_compare);
 
                 drop_duplicates(mounts, &n);
+                drop_outside_root(root_directory, mounts, &n);
                 drop_inaccessible(mounts, &n);
                 drop_nop(mounts, &n);
         }
-- 
cgit v1.2.3-54-g00ecf


From cefc33aee299fa214f093d3d1b4c171ac3b30dde Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Fri, 26 Aug 2016 17:40:42 +0200
Subject: execute: move SMACK setup code into its own function

While we are at it, move PAM code #ifdeffery into setup_pam() to simplify the
main execution logic a bit.
---
 src/core/execute.c | 74 ++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 47 insertions(+), 27 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index a20e9ea829..0488ba2ca9 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -837,6 +837,8 @@ static int null_conv(
         return PAM_CONV_ERR;
 }
 
+#endif
+
 static int setup_pam(
                 const char *name,
                 const char *user,
@@ -845,6 +847,8 @@ static int setup_pam(
                 char ***env,
                 int fds[], unsigned n_fds) {
 
+#ifdef HAVE_PAM
+
         static const struct pam_conv conv = {
                 .conv = null_conv,
                 .appdata_ptr = NULL
@@ -1038,8 +1042,10 @@ fail:
         closelog();
 
         return r;
-}
+#else
+        return 0;
 #endif
+}
 
 static void rename_process_from_path(const char *path) {
         char process_name[11];
@@ -1875,6 +1881,42 @@ static int setup_runtime_directory(
         return 0;
 }
 
+static int setup_smack(
+                const ExecContext *context,
+                const ExecCommand *command) {
+
+#ifdef HAVE_SMACK
+        int r;
+
+        assert(context);
+        assert(command);
+
+        if (!mac_smack_use())
+                return 0;
+
+        if (context->smack_process_label) {
+                r = mac_smack_apply_pid(0, context->smack_process_label);
+                if (r < 0)
+                        return r;
+        }
+#ifdef SMACK_DEFAULT_PROCESS_LABEL
+        else {
+                _cleanup_free_ char *exec_label = NULL;
+
+                r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
+                if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP)
+                        return r;
+
+                r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
+                if (r < 0)
+                        return r;
+        }
+#endif
+#endif
+
+        return 0;
+}
+
 static int compile_read_write_paths(
                 const ExecContext *context,
                 const ExecParameters *params,
@@ -2349,33 +2391,12 @@ static int exec_child(
         (void) umask(context->umask);
 
         if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
-#ifdef HAVE_SMACK
-                if (context->smack_process_label) {
-                        r = mac_smack_apply_pid(0, context->smack_process_label);
-                        if (r < 0) {
-                                *exit_status = EXIT_SMACK_PROCESS_LABEL;
-                                return r;
-                        }
+                r = setup_smack(context, command);
+                if (r < 0) {
+                        *exit_status = EXIT_SMACK_PROCESS_LABEL;
+                        return r;
                 }
-#ifdef SMACK_DEFAULT_PROCESS_LABEL
-                else {
-                        _cleanup_free_ char *exec_label = NULL;
 
-                        r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
-                        if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP) {
-                                *exit_status = EXIT_SMACK_PROCESS_LABEL;
-                                return r;
-                        }
-
-                        r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
-                        if (r < 0) {
-                                *exit_status = EXIT_SMACK_PROCESS_LABEL;
-                                return r;
-                        }
-                }
-#endif
-#endif
-#ifdef HAVE_PAM
                 if (context->pam_name && username) {
                         r = setup_pam(context->pam_name, username, uid, context->tty_path, &accum_env, fds, n_fds);
                         if (r < 0) {
@@ -2383,7 +2404,6 @@ static int exec_child(
                                 return r;
                         }
                 }
-#endif
         }
 
         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
-- 
cgit v1.2.3-54-g00ecf


From 9c94d52e0919e4d7999e49b9ba2654a9e2ca4543 Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Sun, 25 Sep 2016 11:03:21 +0200
Subject: core:namespace: minor improvements to append_mounts()

---
 src/core/namespace.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/core')

diff --git a/src/core/namespace.c b/src/core/namespace.c
index c9b2154985..8de774e6f6 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -70,12 +70,11 @@ static int append_mounts(BindMount **p, char **strv, MountMode mode) {
         assert(p);
 
         STRV_FOREACH(i, strv) {
+                bool ignore = false;
 
-                (*p)->ignore = false;
-
-                if ((mode == INACCESSIBLE || mode == READONLY || mode == READWRITE) && (*i)[0] == '-') {
-                        (*p)->ignore = true;
+                if (IN_SET(mode, INACCESSIBLE, READONLY, READWRITE) && startswith(*i, "-")) {
                         (*i)++;
+                        ignore = true;
                 }
 
                 if (!path_is_absolute(*i))
@@ -83,6 +82,7 @@ static int append_mounts(BindMount **p, char **strv, MountMode mode) {
 
                 (*p)->path = *i;
                 (*p)->mode = mode;
+                (*p)->ignore = ignore;
                 (*p)++;
         }
 
-- 
cgit v1.2.3-54-g00ecf


From 11a30cec2a9b6168b024c06720ad238dd1390794 Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Sun, 25 Sep 2016 11:16:44 +0200
Subject: core:namespace: put paths protected by ProtectKernelTunables= in

Instead of having all these paths everywhere, put the ones that are
protected by ProtectKernelTunables= into their own table. This way it
is easy to add paths and track which ones are protected.
---
 src/core/namespace.c | 54 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 35 insertions(+), 19 deletions(-)

(limited to 'src/core')

diff --git a/src/core/namespace.c b/src/core/namespace.c
index 8de774e6f6..13f6aeba51 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -61,9 +61,23 @@ typedef struct BindMount {
         const char *path; /* stack memory, doesn't need to be freed explicitly */
         char *chased; /* malloc()ed memory, needs to be freed */
         MountMode mode;
-        bool ignore;
+        bool ignore; /* Ignore if path does not exist */
 } BindMount;
 
+typedef struct TargetMount {
+        const char *path;
+        MountMode mode;
+        bool ignore; /* Ignore if path does not exist */
+} TargetMount;
+
+/* ProtectKernelTunables= option and the related filesystem APIs */
+static const TargetMount protect_kernel_tunables_table[] = {
+        { "/proc/sys",                  READONLY,       false },
+        { "/proc/sysrq-trigger",        READONLY,       true  },
+        { "/sys",                       READONLY,       false },
+        { "/sys/fs/cgroup",             READWRITE,      false }, /* READONLY is set by ProtectControlGroups= option */
+};
+
 static int append_mounts(BindMount **p, char **strv, MountMode mode) {
         char **i;
 
@@ -89,6 +103,20 @@ static int append_mounts(BindMount **p, char **strv, MountMode mode) {
         return 0;
 }
 
+static void append_protect_kernel_tunables(BindMount **p, const char *root_directory) {
+        unsigned int i;
+
+        assert(p);
+
+        for (i = 0; i < ELEMENTSOF(protect_kernel_tunables_table); i++) {
+                const TargetMount *t = &protect_kernel_tunables_table[i];
+                (*p)->path = prefix_roota(root_directory, t->path);
+                (*p)->mode = t->mode;
+                (*p)->ignore = t->ignore;
+                (*p)++;
+        }
+}
+
 static int mount_path_compare(const void *a, const void *b) {
         const BindMount *p = a, *q = b;
         int d;
@@ -514,8 +542,8 @@ int setup_namespace(
                 strv_length(read_only_paths) +
                 strv_length(inaccessible_paths) +
                 private_dev +
-                (protect_sysctl ? 3 : 0) +
-                (protect_cgroups != protect_sysctl) +
+                (protect_sysctl ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +
+                (protect_cgroups ? 1 : 0) +
                 (protect_home != PROTECT_HOME_NO || protect_system == PROTECT_SYSTEM_STRICT ? 3 : 0) +
                 (protect_system == PROTECT_SYSTEM_STRICT ?
                  (2 + !private_dev + !protect_sysctl) :
@@ -557,24 +585,12 @@ int setup_namespace(
                         m++;
                 }
 
-                if (protect_sysctl) {
-                        m->path = prefix_roota(root_directory, "/proc/sys");
-                        m->mode = READONLY;
-                        m++;
-
-                        m->path = prefix_roota(root_directory, "/proc/sysrq-trigger");
-                        m->mode = READONLY;
-                        m->ignore = true; /* Not always compiled into the kernel */
-                        m++;
+                if (protect_sysctl)
+                        append_protect_kernel_tunables(&m, root_directory);
 
-                        m->path = prefix_roota(root_directory, "/sys");
-                        m->mode = READONLY;
-                        m++;
-                }
-
-                if (protect_cgroups != protect_sysctl) {
+                if (protect_cgroups) {
                         m->path = prefix_roota(root_directory, "/sys/fs/cgroup");
-                        m->mode = protect_cgroups ? READONLY : READWRITE;
+                        m->mode = READONLY;
                         m++;
                 }
 
-- 
cgit v1.2.3-54-g00ecf


From 2652c6c10394623b2c3e2ed5d4616c85918d140c Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Sun, 25 Sep 2016 11:25:00 +0200
Subject: core:namespace: simplify mount calculation

Move out mount calculation on its own function. Actually the logic is
smart enough to later drop nop and duplicates mounts, this change
improves code readability.
---
 src/core/namespace.c | 47 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 11 deletions(-)
---
 src/core/namespace.c | 46 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 34 insertions(+), 12 deletions(-)

(limited to 'src/core')

diff --git a/src/core/namespace.c b/src/core/namespace.c
index 13f6aeba51..8aa8b83c88 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -515,6 +515,32 @@ static int chase_all_symlinks(const char *root_directory, BindMount *m, unsigned
         return 0;
 }
 
+static unsigned namespace_calculate_mounts(
+                char** read_write_paths,
+                char** read_only_paths,
+                char** inaccessible_paths,
+                const char* tmp_dir,
+                const char* var_tmp_dir,
+                bool private_dev,
+                bool protect_sysctl,
+                bool protect_cgroups,
+                ProtectHome protect_home,
+                ProtectSystem protect_system) {
+
+        return !!tmp_dir + !!var_tmp_dir +
+                strv_length(read_write_paths) +
+                strv_length(read_only_paths) +
+                strv_length(inaccessible_paths) +
+                private_dev +
+                (protect_sysctl ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +
+                (protect_cgroups ? 1 : 0) +
+                (protect_home != PROTECT_HOME_NO || protect_system == PROTECT_SYSTEM_STRICT ? 3 : 0) +
+                (protect_system == PROTECT_SYSTEM_STRICT ?
+                 (2 + !private_dev + !protect_sysctl) :
+                 ((protect_system != PROTECT_SYSTEM_NO ? 3 : 0) +
+                  (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0)));
+}
+
 int setup_namespace(
                 const char* root_directory,
                 char** read_write_paths,
@@ -537,19 +563,15 @@ int setup_namespace(
         if (mount_flags == 0)
                 mount_flags = MS_SHARED;
 
-        n = !!tmp_dir + !!var_tmp_dir +
-                strv_length(read_write_paths) +
-                strv_length(read_only_paths) +
-                strv_length(inaccessible_paths) +
-                private_dev +
-                (protect_sysctl ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +
-                (protect_cgroups ? 1 : 0) +
-                (protect_home != PROTECT_HOME_NO || protect_system == PROTECT_SYSTEM_STRICT ? 3 : 0) +
-                (protect_system == PROTECT_SYSTEM_STRICT ?
-                 (2 + !private_dev + !protect_sysctl) :
-                 ((protect_system != PROTECT_SYSTEM_NO ? 3 : 0) +
-                  (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0)));
+        n = namespace_calculate_mounts(read_write_paths,
+                                       read_only_paths,
+                                       inaccessible_paths,
+                                       tmp_dir, var_tmp_dir,
+                                       private_dev, protect_sysctl,
+                                       protect_cgroups, protect_home,
+                                       protect_system);
 
+        /* Set mount slave mode */
         if (root_directory || n > 0)
                 make_slave = true;
 
-- 
cgit v1.2.3-54-g00ecf


From 49accde7bd915944d99c947dca0cf26ae0f24165 Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Sun, 25 Sep 2016 11:30:11 +0200
Subject: core:sandbox: add more /proc/* entries to ProtectKernelTunables=

Make ALSA entries, latency interface, mtrr, apm/acpi, suspend interface,
filesystems configuration and IRQ tuning readonly.

Most of these interfaces now days should be in /sys but they are still
available through /proc, so just protect them. This patch does not touch
/proc/net/...
---
 man/systemd.exec.xml |  6 ++++--
 src/core/namespace.c | 11 +++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'src/core')

diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index a3a431c82b..f19e7f6ee9 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -1026,8 +1026,10 @@
         <term><varname>ProtectKernelTunables=</varname></term>
 
         <listitem><para>Takes a boolean argument. If true, kernel variables accessible through
-        <filename>/proc/sys</filename>, <filename>/sys</filename> and <filename>/proc/sysrq-trigger</filename> will be
-        made read-only to all processes of the unit. Usually, tunable kernel variables should only be written at
+        <filename>/proc/sys</filename>, <filename>/sys</filename>, <filename>/proc/sysrq-trigger</filename>,
+        <filename>/proc/latency_stats</filename>, <filename>/proc/acpi</filename>,
+        <filename>/proc/timer_stats</filename>, <filename>/proc/fs</filename> and <filename>/proc/irq</filename> will
+        be made read-only to all processes of the unit. Usually, tunable kernel variables should only be written at
         boot-time, with the <citerefentry><refentrytitle>sysctl.d</refentrytitle><manvolnum>5</manvolnum></citerefentry>
         mechanism. Almost no services need to write to these at runtime; it is hence recommended to turn this on for
         most services. For this setting the same restrictions regarding mount propagation and privileges apply as for
diff --git a/src/core/namespace.c b/src/core/namespace.c
index 8aa8b83c88..3234fab4bc 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -74,7 +74,18 @@ typedef struct TargetMount {
 static const TargetMount protect_kernel_tunables_table[] = {
         { "/proc/sys",                  READONLY,       false },
         { "/proc/sysrq-trigger",        READONLY,       true  },
+        { "/proc/latency_stats",        READONLY,       true  },
+        { "/proc/mtrr",                 READONLY,       true  },
+        { "/proc/apm",                  READONLY,       true  },
+        { "/proc/acpi",                 READONLY,       true  },
+        { "/proc/timer_stats",          READONLY,       true  },
+        { "/proc/asound",               READONLY,       true  },
+        { "/proc/bus",                  READONLY,       true  },
+        { "/proc/fs",                   READONLY,       true  },
+        { "/proc/irq",                  READONLY,       true  },
         { "/sys",                       READONLY,       false },
+        { "/sys/kernel/debug",          READONLY,       true  },
+        { "/sys/kernel/tracing",        READONLY,       true  },
         { "/sys/fs/cgroup",             READWRITE,      false }, /* READONLY is set by ProtectControlGroups= option */
 };
 
-- 
cgit v1.2.3-54-g00ecf


From f471b2afa11c97e48a4b6756f7254f88cc436960 Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Sun, 25 Sep 2016 12:21:25 +0200
Subject: core: simplify ProtectSystem= implementation

ProtectSystem= with all its different modes and other options like
PrivateDevices= + ProtectKernelTunables= + ProtectHome= are orthogonal,
however currently it's a bit hard to parse that from the implementation
view. Simplify it by giving each mode its own table with all paths and
references to other Protect options.

With this change some entries are duplicated, but we do not care since
duplicate mounts are first sorted by the most restrictive mode then
cleaned.
---
 src/core/namespace.c | 171 ++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 113 insertions(+), 58 deletions(-)

(limited to 'src/core')

diff --git a/src/core/namespace.c b/src/core/namespace.c
index 3234fab4bc..985e343096 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -70,6 +70,14 @@ typedef struct TargetMount {
         bool ignore; /* Ignore if path does not exist */
 } TargetMount;
 
+/*
+ * The following Protect tables are to protect paths and mark some of them
+ * READONLY, in case a path is covered by an option from another table, then
+ * it is marked READWRITE in the current one, and the more restrictive mode is
+ * applied from that other table. This way all options can be combined in a
+ * safe and comprehensible way for users.
+ */
+
 /* ProtectKernelTunables= option and the related filesystem APIs */
 static const TargetMount protect_kernel_tunables_table[] = {
         { "/proc/sys",                  READONLY,       false },
@@ -89,6 +97,45 @@ static const TargetMount protect_kernel_tunables_table[] = {
         { "/sys/fs/cgroup",             READWRITE,      false }, /* READONLY is set by ProtectControlGroups= option */
 };
 
+/* ProtectSystem=yes table */
+static const TargetMount protect_system_yes_table[] = {
+        { "/usr",       READONLY,       false },
+        { "/boot",      READONLY,       true  },
+        { "/efi",       READONLY,       true  },
+};
+
+/* ProtectSystem=full includes ProtectSystem=yes */
+static const TargetMount protect_system_full_table[] = {
+        { "/usr",       READONLY,       false },
+        { "/boot",      READONLY,       true  },
+        { "/efi",       READONLY,       true  },
+        { "/etc",       READONLY,       false },
+};
+
+/*
+ * ProtectSystem=strict table. In this strict mode, we mount everything
+ * read-only, except for /proc, /dev, /sys which are the kernel API VFS,
+ * which are left writable, but PrivateDevices= + ProtectKernelTunables=
+ * protect those, and these options should be fully orthogonal.
+ * (And of course /home and friends are also left writable, as ProtectHome=
+ * shall manage those, orthogonally).
+ */
+static const TargetMount protect_system_strict_table[] = {
+        { "/",          READONLY,       false },
+        { "/proc",      READWRITE,      false },      /* ProtectKernelTunables= */
+        { "/sys",       READWRITE,      false },      /* ProtectKernelTunables= */
+        { "/dev",       READWRITE,      false },      /* PrivateDevices= */
+        { "/home",      READWRITE,      true  },      /* ProtectHome= */
+        { "/run/user",  READWRITE,      true  },      /* ProtectHome= */
+        { "/root",      READWRITE,      true  },      /* ProtectHome= */
+};
+
+static void set_bind_mount(BindMount **p, const char *path, MountMode mode, bool ignore) {
+        (*p)->path = path;
+        (*p)->mode = mode;
+        (*p)->ignore = ignore;
+}
+
 static int append_mounts(BindMount **p, char **strv, MountMode mode) {
         char **i;
 
@@ -105,27 +152,71 @@ static int append_mounts(BindMount **p, char **strv, MountMode mode) {
                 if (!path_is_absolute(*i))
                         return -EINVAL;
 
-                (*p)->path = *i;
-                (*p)->mode = mode;
-                (*p)->ignore = ignore;
+                set_bind_mount(p, *i, mode, ignore);
                 (*p)++;
         }
 
         return 0;
 }
 
-static void append_protect_kernel_tunables(BindMount **p, const char *root_directory) {
-        unsigned int i;
+static int append_target_mounts(BindMount **p, const char *root_directory, const TargetMount *mounts, const size_t size) {
+        unsigned i;
 
         assert(p);
+        assert(mounts);
 
-        for (i = 0; i < ELEMENTSOF(protect_kernel_tunables_table); i++) {
-                const TargetMount *t = &protect_kernel_tunables_table[i];
-                (*p)->path = prefix_roota(root_directory, t->path);
-                (*p)->mode = t->mode;
-                (*p)->ignore = t->ignore;
+        for (i = 0; i < size; i++) {
+                /*
+                 * Here we assume that the ignore field is set during
+                 * declaration we do not support "-" at the beginning.
+                 */
+                const TargetMount *m = &mounts[i];
+                const char *path = prefix_roota(root_directory, m->path);
+
+                if (!path_is_absolute(path))
+                        return -EINVAL;
+
+                set_bind_mount(p, path, m->mode, m->ignore);
                 (*p)++;
         }
+
+        return 0;
+}
+
+static int append_protect_kernel_tunables(BindMount **p, const char *root_directory) {
+        assert(p);
+
+        return append_target_mounts(p, root_directory, protect_kernel_tunables_table,
+                                    ELEMENTSOF(protect_kernel_tunables_table));
+}
+
+static int append_protect_system(BindMount **p, const char *root_directory, ProtectSystem protect_system) {
+        int r = 0;
+
+        assert(p);
+
+        if (protect_system == PROTECT_SYSTEM_NO)
+                return 0;
+
+        switch (protect_system) {
+        case PROTECT_SYSTEM_STRICT:
+                r = append_target_mounts(p, root_directory, protect_system_strict_table,
+                                         ELEMENTSOF(protect_system_strict_table));
+                break;
+        case PROTECT_SYSTEM_YES:
+                r = append_target_mounts(p, root_directory, protect_system_yes_table,
+                                         ELEMENTSOF(protect_system_yes_table));
+                break;
+        case PROTECT_SYSTEM_FULL:
+                r = append_target_mounts(p, root_directory, protect_system_full_table,
+                                         ELEMENTSOF(protect_system_full_table));
+                break;
+        default:
+                r = -EINVAL;
+                break;
+        }
+
+        return r;
 }
 
 static int mount_path_compare(const void *a, const void *b) {
@@ -538,6 +629,14 @@ static unsigned namespace_calculate_mounts(
                 ProtectHome protect_home,
                 ProtectSystem protect_system) {
 
+        unsigned protect_system_cnt =
+                (protect_system == PROTECT_SYSTEM_STRICT ?
+                 ELEMENTSOF(protect_system_strict_table) :
+                 ((protect_system == PROTECT_SYSTEM_FULL) ?
+                  ELEMENTSOF(protect_system_full_table) :
+                  ((protect_system == PROTECT_SYSTEM_YES) ?
+                   ELEMENTSOF(protect_system_yes_table) : 0)));
+
         return !!tmp_dir + !!var_tmp_dir +
                 strv_length(read_write_paths) +
                 strv_length(read_only_paths) +
@@ -546,10 +645,7 @@ static unsigned namespace_calculate_mounts(
                 (protect_sysctl ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +
                 (protect_cgroups ? 1 : 0) +
                 (protect_home != PROTECT_HOME_NO || protect_system == PROTECT_SYSTEM_STRICT ? 3 : 0) +
-                (protect_system == PROTECT_SYSTEM_STRICT ?
-                 (2 + !private_dev + !protect_sysctl) :
-                 ((protect_system != PROTECT_SYSTEM_NO ? 3 : 0) +
-                  (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0)));
+                protect_system_cnt;
 }
 
 int setup_namespace(
@@ -648,50 +744,9 @@ int setup_namespace(
                                 return r;
                 }
 
-                if (protect_system == PROTECT_SYSTEM_STRICT) {
-                        /* In strict mode, we mount everything read-only, except for /proc, /dev, /sys which are the
-                         * kernel API VFS, which are left writable, but PrivateDevices= + ProtectKernelTunables=
-                         * protect those, and these options should be fully orthogonal. (And of course /home and
-                         * friends are also left writable, as ProtectHome= shall manage those, orthogonally, see
-                         * above). */
-
-                        m->path = prefix_roota(root_directory, "/");
-                        m->mode = READONLY;
-                        m++;
-
-                        m->path = prefix_roota(root_directory, "/proc");
-                        m->mode = READWRITE;
-                        m++;
-
-                        if (!private_dev) {
-                                m->path = prefix_roota(root_directory, "/dev");
-                                m->mode = READWRITE;
-                                m++;
-                        }
-                        if (!protect_sysctl) {
-                                m->path = prefix_roota(root_directory, "/sys");
-                                m->mode = READWRITE;
-                                m++;
-                        }
-
-                } else if (protect_system != PROTECT_SYSTEM_NO) {
-                        const char *usr_dir, *boot_dir, *efi_dir, *etc_dir;
-
-                        /* In any other mode we simply mark the relevant three directories ready-only. */
-
-                        usr_dir = prefix_roota(root_directory, "/usr");
-                        boot_dir = prefix_roota(root_directory, "/boot");
-                        boot_dir = strjoina("-", boot_dir);
-                        efi_dir = prefix_roota(root_directory, "/efi");
-                        efi_dir = strjoina("-", efi_dir);
-                        etc_dir = prefix_roota(root_directory, "/etc");
-
-                        r = append_mounts(&m, protect_system == PROTECT_SYSTEM_FULL
-                                          ? STRV_MAKE(usr_dir, boot_dir, efi_dir, etc_dir)
-                                          : STRV_MAKE(usr_dir, boot_dir, efi_dir), READONLY);
-                        if (r < 0)
-                                return r;
-                }
+                r = append_protect_system(&m, root_directory, protect_system);
+                if (r < 0)
+                        return r;
 
                 assert(mounts + n == m);
 
-- 
cgit v1.2.3-54-g00ecf


From b6c432ca7ed930c7e9078ac2266ae439aa242632 Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Sun, 25 Sep 2016 12:41:16 +0200
Subject: core:namespace: simplify ProtectHome= implementation

As with previous patch simplify ProtectHome and don't care about
duplicates, they will be sorted by most restrictive mode and cleaned.
---
 src/core/namespace.c | 75 +++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 53 insertions(+), 22 deletions(-)

(limited to 'src/core')

diff --git a/src/core/namespace.c b/src/core/namespace.c
index 985e343096..43a2f4ba6e 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -97,6 +97,23 @@ static const TargetMount protect_kernel_tunables_table[] = {
         { "/sys/fs/cgroup",             READWRITE,      false }, /* READONLY is set by ProtectControlGroups= option */
 };
 
+/*
+ * ProtectHome=read-only table, protect $HOME and $XDG_RUNTIME_DIR and rest of
+ * system should be protected by ProtectSystem=
+ */
+static const TargetMount protect_home_read_only_table[] = {
+        { "/home",      READONLY,       true },
+        { "/run/user",  READONLY,       true },
+        { "/root",      READONLY,       true },
+};
+
+/* ProtectHome=yes table */
+static const TargetMount protect_home_yes_table[] = {
+        { "/home",      INACCESSIBLE,   true },
+        { "/run/user",  INACCESSIBLE,   true },
+        { "/root",      INACCESSIBLE,   true },
+};
+
 /* ProtectSystem=yes table */
 static const TargetMount protect_system_yes_table[] = {
         { "/usr",       READONLY,       false },
@@ -190,6 +207,31 @@ static int append_protect_kernel_tunables(BindMount **p, const char *root_direct
                                     ELEMENTSOF(protect_kernel_tunables_table));
 }
 
+static int append_protect_home(BindMount **p, const char *root_directory, ProtectHome protect_home) {
+        int r = 0;
+
+        assert(p);
+
+        if (protect_home == PROTECT_HOME_NO)
+                return 0;
+
+        switch (protect_home) {
+        case PROTECT_HOME_READ_ONLY:
+                r = append_target_mounts(p, root_directory, protect_home_read_only_table,
+                                         ELEMENTSOF(protect_home_read_only_table));
+                break;
+        case PROTECT_HOME_YES:
+                r = append_target_mounts(p, root_directory, protect_home_yes_table,
+                                         ELEMENTSOF(protect_home_yes_table));
+                break;
+        default:
+                r = -EINVAL;
+                break;
+        }
+
+        return r;
+}
+
 static int append_protect_system(BindMount **p, const char *root_directory, ProtectSystem protect_system) {
         int r = 0;
 
@@ -629,6 +671,7 @@ static unsigned namespace_calculate_mounts(
                 ProtectHome protect_home,
                 ProtectSystem protect_system) {
 
+        unsigned protect_home_cnt;
         unsigned protect_system_cnt =
                 (protect_system == PROTECT_SYSTEM_STRICT ?
                  ELEMENTSOF(protect_system_strict_table) :
@@ -637,6 +680,12 @@ static unsigned namespace_calculate_mounts(
                   ((protect_system == PROTECT_SYSTEM_YES) ?
                    ELEMENTSOF(protect_system_yes_table) : 0)));
 
+        protect_home_cnt =
+                (protect_home == PROTECT_HOME_YES ?
+                 ELEMENTSOF(protect_home_yes_table) :
+                 ((protect_home == PROTECT_HOME_READ_ONLY) ?
+                  ELEMENTSOF(protect_home_read_only_table) : 0));
+
         return !!tmp_dir + !!var_tmp_dir +
                 strv_length(read_write_paths) +
                 strv_length(read_only_paths) +
@@ -644,8 +693,7 @@ static unsigned namespace_calculate_mounts(
                 private_dev +
                 (protect_sysctl ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +
                 (protect_cgroups ? 1 : 0) +
-                (protect_home != PROTECT_HOME_NO || protect_system == PROTECT_SYSTEM_STRICT ? 3 : 0) +
-                protect_system_cnt;
+                protect_home_cnt + protect_system_cnt;
 }
 
 int setup_namespace(
@@ -723,26 +771,9 @@ int setup_namespace(
                         m++;
                 }
 
-                if (protect_home != PROTECT_HOME_NO || protect_system == PROTECT_SYSTEM_STRICT) {
-                        const char *home_dir, *run_user_dir, *root_dir;
-
-                        /* If protection of $HOME and $XDG_RUNTIME_DIR is requested, then go for it. If we are in
-                         * strict system protection mode, then also add entries for these directories, but mark them
-                         * writable. This is because we want ProtectHome= and ProtectSystem= to be fully orthogonal. */
-
-                        home_dir = prefix_roota(root_directory, "/home");
-                        home_dir = strjoina("-", home_dir);
-                        run_user_dir = prefix_roota(root_directory, "/run/user");
-                        run_user_dir = strjoina("-", run_user_dir);
-                        root_dir = prefix_roota(root_directory, "/root");
-                        root_dir = strjoina("-", root_dir);
-
-                        r = append_mounts(&m, STRV_MAKE(home_dir, run_user_dir, root_dir),
-                                protect_home == PROTECT_HOME_READ_ONLY ? READONLY :
-                                protect_home == PROTECT_HOME_YES ? INACCESSIBLE : READWRITE);
-                        if (r < 0)
-                                return r;
-                }
+                r = append_protect_home(&m, root_directory, protect_home);
+                if (r < 0)
+                        return r;
 
                 r = append_protect_system(&m, root_directory, protect_system);
                 if (r < 0)
-- 
cgit v1.2.3-54-g00ecf


From 8f81a5f61bcf745bae3acad599d7a9da686643e3 Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Sun, 25 Sep 2016 12:52:27 +0200
Subject: core: Use @raw-io syscall group to filter I/O syscalls when
 PrivateDevices= is set

Instead of having a local syscall list, use the @raw-io group which
contains the same set of syscalls to filter.
---
 man/systemd.exec.xml |  6 ++++--
 src/core/execute.c   | 55 +++++++++++++++++++++++++++++++++-------------------
 2 files changed, 39 insertions(+), 22 deletions(-)

(limited to 'src/core')

diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index f19e7f6ee9..f70e5c36d4 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -933,8 +933,10 @@
         <filename>/dev/random</filename> (as well as the pseudo TTY subsystem) to it, but no physical devices such as
         <filename>/dev/sda</filename>, system memory <filename>/dev/mem</filename>, system ports
         <filename>/dev/port</filename> and others. This is useful to securely turn off physical device access by the
-        executed process. Defaults to false. Enabling this option will also remove <constant>CAP_MKNOD</constant> from
-        the capability bounding set for the unit (see above), and set <varname>DevicePolicy=closed</varname> (see
+        executed process. Defaults to false. Enabling this option will install a system call filter to block low-level
+        I/O system calls that are grouped in the <varname>@raw-io</varname> set, will also remove
+        <constant>CAP_MKNOD</constant> from the capability bounding set for the unit (see above), and set
+        <varname>DevicePolicy=closed</varname> (see
         <citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
         for details). Note that using this setting will disconnect propagation of mounts from the service to the host
         (propagation in the opposite direction continues to work).  This means that this setting may not be used for
diff --git a/src/core/execute.c b/src/core/execute.c
index 0488ba2ca9..3da7ef3be6 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1429,28 +1429,15 @@ finish:
 }
 
 static int apply_private_devices(Unit *u, const ExecContext *c) {
-
-        static const int device_syscalls[] = {
-                SCMP_SYS(ioperm),
-                SCMP_SYS(iopl),
-                SCMP_SYS(pciconfig_iobase),
-                SCMP_SYS(pciconfig_read),
-                SCMP_SYS(pciconfig_write),
-#ifdef __NR_s390_pci_mmio_read
-                SCMP_SYS(s390_pci_mmio_read),
-#endif
-#ifdef __NR_s390_pci_mmio_write
-                SCMP_SYS(s390_pci_mmio_write),
-#endif
-        };
-
+        const SystemCallFilterSet *set;
         scmp_filter_ctx *seccomp;
-        unsigned i;
+        const char *sys;
+        bool syscalls_found = false;
         int r;
 
         assert(c);
 
-        /* If PrivateDevices= is set, also turn off iopl and friends. */
+        /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
 
         if (skip_seccomp_unavailable(u, "PrivateDevices="))
                 return 0;
@@ -1463,12 +1450,40 @@ static int apply_private_devices(Unit *u, const ExecContext *c) {
         if (r < 0)
                 goto finish;
 
-        for (i = 0; i < ELEMENTSOF(device_syscalls); i++) {
+        for (set = syscall_filter_sets; set->set_name; set++)
+                if (streq(set->set_name, "@raw-io")) {
+                        syscalls_found = true;
+                        break;
+                }
+
+        /* We should never fail here */
+        if (!syscalls_found) {
+                r = -EOPNOTSUPP;
+                goto finish;
+        }
+
+        NULSTR_FOREACH(sys, set->value) {
+                int id;
+                bool add = true;
+
+#ifndef __NR_s390_pci_mmio_read
+                if (streq(sys, "s390_pci_mmio_read"))
+                        add = false;
+#endif
+#ifndef __NR_s390_pci_mmio_write
+                if (streq(sys, "s390_pci_mmio_write"))
+                        add = false;
+#endif
+
+                if (!add)
+                        continue;
+
+                id = seccomp_syscall_resolve_name(sys);
+
                 r = seccomp_rule_add(
                                 seccomp,
                                 SCMP_ACT_ERRNO(EPERM),
-                                device_syscalls[i],
-                                0);
+                                id, 0);
                 if (r < 0)
                         goto finish;
         }
-- 
cgit v1.2.3-54-g00ecf


From 00bb64ecfa87c3da13764578e0e65e9b4e72bbf8 Mon Sep 17 00:00:00 2001
From: Paweł Szewczyk <p.szewczyk@samsung.com>
Date: Mon, 26 Sep 2016 18:45:47 +0200
Subject: core: Fix USB functionfs activation and clarify its documentation
 (#4188)

There was no certainty about how the path in service file should look
like for usb functionfs activation. Because of this it was treated
differently in different places, which made this feature unusable.

This patch fixes the path to be the *mount directory* of functionfs, not
ep0 file path and clarifies in the documentation that ListenUSBFunction should be
the location of functionfs mount point, not ep0 file itself.
---
 man/systemd.socket.xml | 6 +++---
 src/core/socket.c      | 9 ++-------
 2 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'src/core')

diff --git a/man/systemd.socket.xml b/man/systemd.socket.xml
index 26e5d3ce7b..5b6045f69b 100644
--- a/man/systemd.socket.xml
+++ b/man/systemd.socket.xml
@@ -294,10 +294,10 @@
         <term><varname>ListenUSBFunction=</varname></term>
         <listitem><para>Specifies a <ulink
         url="https://www.kernel.org/doc/Documentation/usb/functionfs.txt">USB
-        FunctionFS</ulink> endpoint location to listen on, for
+        FunctionFS</ulink> endpoints location to listen on, for
         implementation of USB gadget functions. This expects an
-        absolute file system path as the argument. Behavior otherwise
-        is very similar to the <varname>ListenFIFO=</varname>
+        absolute file system path of functionfs mount point as the argument.
+        Behavior otherwise is very similar to the <varname>ListenFIFO=</varname>
         directive above. Use this to open the FunctionFS endpoint
         <filename>ep0</filename>. When using this option, the
         activated service has to have the
diff --git a/src/core/socket.c b/src/core/socket.c
index 70d55dd9ed..b9032fa5c9 100644
--- a/src/core/socket.c
+++ b/src/core/socket.c
@@ -1334,14 +1334,9 @@ static int usbffs_select_ep(const struct dirent *d) {
 
 static int usbffs_dispatch_eps(SocketPort *p) {
         _cleanup_free_ struct dirent **ent = NULL;
-        _cleanup_free_ char *path = NULL;
         int r, i, n, k;
 
-        path = dirname_malloc(p->path);
-        if (!path)
-                return -ENOMEM;
-
-        r = scandir(path, &ent, usbffs_select_ep, alphasort);
+        r = scandir(p->path, &ent, usbffs_select_ep, alphasort);
         if (r < 0)
                 return -errno;
 
@@ -1356,7 +1351,7 @@ static int usbffs_dispatch_eps(SocketPort *p) {
         for (i = 0; i < n; ++i) {
                 _cleanup_free_ char *ep = NULL;
 
-                ep = path_make_absolute(ent[i]->d_name, path);
+                ep = path_make_absolute(ent[i]->d_name, p->path);
                 if (!ep)
                         return -ENOMEM;
 
-- 
cgit v1.2.3-54-g00ecf


From 531ac2b2349da02acc9c382849758e07eb92b020 Mon Sep 17 00:00:00 2001
From: Jorge Niedbalski <jorge.niedbalski@canonical.com>
Date: Wed, 28 Sep 2016 18:25:50 -0300
Subject: If the notification message length is 0, ignore the message (#4237)

Fixes #4234.

Signed-off-by: Jorge Niedbalski <jnr@metaklass.org>
---
 src/core/manager.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/core')

diff --git a/src/core/manager.c b/src/core/manager.c
index fa8deb9b1b..43e231c328 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1721,6 +1721,10 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
 
                 return -errno;
         }
+        if (n == 0) {
+                log_debug("Got zero-length notification message. Ignoring.");
+                return 0;
+        }
 
         CMSG_FOREACH(cmsg, &msghdr) {
                 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
-- 
cgit v1.2.3-54-g00ecf


From 9987750e7a4c62e0eb8473603150596ba7c3a015 Mon Sep 17 00:00:00 2001
From: Franck Bui <fbui@suse.com>
Date: Thu, 29 Sep 2016 19:44:34 +0200
Subject: pid1: don't return any error in manager_dispatch_notify_fd() (#4240)

If manager_dispatch_notify_fd() fails and returns an error then the handling of
service notifications will be disabled entirely leading to a compromised system.

For example pid1 won't be able to receive the WATCHDOG messages anymore and
will kill all services supposed to send such messages.
---
 src/core/manager.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'src/core')

diff --git a/src/core/manager.c b/src/core/manager.c
index 43e231c328..5704005a0c 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1716,10 +1716,14 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
 
         n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
         if (n < 0) {
-                if (errno == EAGAIN || errno == EINTR)
-                        return 0;
+                if (!IN_SET(errno, EAGAIN, EINTR))
+                        log_error("Failed to receive notification message: %m");
 
-                return -errno;
+                /* It's not an option to return an error here since it
+                 * would disable the notification handler entirely. Services
+                 * wouldn't be able to send the WATCHDOG message for
+                 * example... */
+                return 0;
         }
         if (n == 0) {
                 log_debug("Got zero-length notification message. Ignoring.");
@@ -1746,7 +1750,8 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
                 r = fdset_new_array(&fds, fd_array, n_fds);
                 if (r < 0) {
                         close_many(fd_array, n_fds);
-                        return log_oom();
+                        log_oom();
+                        return 0;
                 }
         }
 
-- 
cgit v1.2.3-54-g00ecf


From 8523bf7dd514a3a2c6114b7b8fb8f308b4f09fc4 Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Thu, 29 Sep 2016 16:06:02 +0200
Subject: pid1: process zero-length notification messages again

This undoes 531ac2b234. I acked that patch without looking at the code
carefully enough. There are two problems:
- we want to process the fds anyway
- in principle empty notification messages are valid, and we should
  process them as usual, including logging using log_unit_debug().
---
 src/core/manager.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

(limited to 'src/core')

diff --git a/src/core/manager.c b/src/core/manager.c
index 5704005a0c..1ea6539ebc 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1657,13 +1657,12 @@ static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, ui
         return 0;
 }
 
-static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const char *buf, size_t n, FDSet *fds) {
+static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const char *buf, FDSet *fds) {
         _cleanup_strv_free_ char **tags = NULL;
 
         assert(m);
         assert(u);
         assert(buf);
-        assert(n > 0);
 
         tags = strv_split(buf, "\n\r");
         if (!tags) {
@@ -1725,10 +1724,6 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
                  * example... */
                 return 0;
         }
-        if (n == 0) {
-                log_debug("Got zero-length notification message. Ignoring.");
-                return 0;
-        }
 
         CMSG_FOREACH(cmsg, &msghdr) {
                 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
@@ -1765,25 +1760,27 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
                 return 0;
         }
 
+        /* The message should be a string. Here we make sure it's NUL-terminated,
+         * but only the part until first NUL will be used anyway. */
         buf[n] = 0;
 
         /* Notify every unit that might be interested, but try
          * to avoid notifying the same one multiple times. */
         u1 = manager_get_unit_by_pid_cgroup(m, ucred->pid);
         if (u1) {
-                manager_invoke_notify_message(m, u1, ucred->pid, buf, n, fds);
+                manager_invoke_notify_message(m, u1, ucred->pid, buf, fds);
                 found = true;
         }
 
         u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(ucred->pid));
         if (u2 && u2 != u1) {
-                manager_invoke_notify_message(m, u2, ucred->pid, buf, n, fds);
+                manager_invoke_notify_message(m, u2, ucred->pid, buf, fds);
                 found = true;
         }
 
         u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(ucred->pid));
         if (u3 && u3 != u2 && u3 != u1) {
-                manager_invoke_notify_message(m, u3, ucred->pid, buf, n, fds);
+                manager_invoke_notify_message(m, u3, ucred->pid, buf, fds);
                 found = true;
         }
 
-- 
cgit v1.2.3-54-g00ecf


From a86b76753d7868c2d05f046f601bc7dc89fc2203 Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Thu, 29 Sep 2016 16:07:41 +0200
Subject: pid1: more informative error message for ignored notifications

It's probably easier to diagnose a bad notification message if the
contents are printed. But still, do anything only if debugging is on.
---
 src/core/manager.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'src/core')

diff --git a/src/core/manager.c b/src/core/manager.c
index 1ea6539ebc..0f95bf49fb 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1672,8 +1672,14 @@ static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const
 
         if (UNIT_VTABLE(u)->notify_message)
                 UNIT_VTABLE(u)->notify_message(u, pid, tags, fds);
-        else
-                log_unit_debug(u, "Got notification message for unit. Ignoring.");
+        else if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
+                _cleanup_free_ char *x = NULL, *y = NULL;
+
+                x = cescape(buf);
+                if (x)
+                        y = ellipsize(x, 20, 90);
+                log_unit_debug(u, "Got notification message \"%s\", ignoring.", strnull(y));
+        }
 }
 
 static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
-- 
cgit v1.2.3-54-g00ecf


From c4bee3c40e13b27f1a33f67a9c5692d042b463d7 Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Fri, 30 Sep 2016 13:34:10 +0200
Subject: core: get rid of unneeded state variable

No functional change.
---
 src/core/manager.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

(limited to 'src/core')

diff --git a/src/core/manager.c b/src/core/manager.c
index 0f95bf49fb..26a3152484 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1705,7 +1705,6 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
 
         struct cmsghdr *cmsg;
         struct ucred *ucred = NULL;
-        bool found = false;
         Unit *u1, *u2, *u3;
         int r, *fd_array = NULL;
         unsigned n_fds = 0;
@@ -1773,24 +1772,18 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
         /* Notify every unit that might be interested, but try
          * to avoid notifying the same one multiple times. */
         u1 = manager_get_unit_by_pid_cgroup(m, ucred->pid);
-        if (u1) {
+        if (u1)
                 manager_invoke_notify_message(m, u1, ucred->pid, buf, fds);
-                found = true;
-        }
 
         u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(ucred->pid));
-        if (u2 && u2 != u1) {
+        if (u2 && u2 != u1)
                 manager_invoke_notify_message(m, u2, ucred->pid, buf, fds);
-                found = true;
-        }
 
         u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(ucred->pid));
-        if (u3 && u3 != u2 && u3 != u1) {
+        if (u3 && u3 != u2 && u3 != u1)
                 manager_invoke_notify_message(m, u3, ucred->pid, buf, fds);
-                found = true;
-        }
 
-        if (!found)
+        if (!u1 && !u2 && !u3)
                 log_warning("Cannot find unit for notify message of PID "PID_FMT".", ucred->pid);
 
         if (fdset_size(fds) > 0)
-- 
cgit v1.2.3-54-g00ecf


From 5fd2c135f1fd6b5147de54531940f398c6213b0c Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Fri, 30 Sep 2016 13:35:07 +0200
Subject: core: update warning message

"closing all" might suggest that _all_ fds received with the notification message
will be closed. Reword the message to clarify that only the "unused" ones will be
closed.
---
 src/core/manager.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/core')

diff --git a/src/core/manager.c b/src/core/manager.c
index 26a3152484..e63b31bdf3 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1787,7 +1787,7 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
                 log_warning("Cannot find unit for notify message of PID "PID_FMT".", ucred->pid);
 
         if (fdset_size(fds) > 0)
-                log_warning("Got auxiliary fds with notification message, closing all.");
+                log_warning("Got extra auxiliary fds with notification message, closing them.");
 
         return 0;
 }
-- 
cgit v1.2.3-54-g00ecf


From dd5e7000cb54f94fdf4756a28144ab32d6e214c8 Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Sun, 25 Sep 2016 08:34:30 -0400
Subject: core: complain if Before= dep on .device is declared

[Unit]
Before=foobar.device

[Service]
ExecStart=/bin/true
Type=oneshot

$ systemd-analyze verify before-device.service
before-device.service: Dependency Before=foobar.device ignored (.device units cannot be delayed)
---
 TODO            | 2 --
 src/core/unit.c | 5 +++++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'src/core')

diff --git a/TODO b/TODO
index 64d530b7d7..ff9e9be9fb 100644
--- a/TODO
+++ b/TODO
@@ -139,8 +139,6 @@ Features:
 
 * PID 1 should send out sd_notify("WATCHDOG=1") messages (for usage in the --user mode, and when run via nspawn)
 
-* consider throwing a warning if a service declares it wants to be "Before=" a .device unit.
-
 * there's probably something wrong with having user mounts below /sys,
   as we have for debugfs. for exmaple, src/core/mount.c handles mounts
   prefixed with /sys generally special.
diff --git a/src/core/unit.c b/src/core/unit.c
index 5d284a359d..693f75c928 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -2225,6 +2225,11 @@ int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_referen
                 return 0;
         }
 
+        if (d == UNIT_BEFORE && other->type == UNIT_DEVICE) {
+                log_unit_warning(u, "Dependency Before=%s ignored (.device units cannot be delayed)", other->id);
+                return 0;
+        }
+
         r = set_ensure_allocated(&u->dependencies[d], NULL);
         if (r < 0)
                 return r;
-- 
cgit v1.2.3-54-g00ecf


From a63ee40751ee3cae053470f4e0fb0016fbc40f25 Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Sun, 25 Sep 2016 09:58:29 -0400
Subject: core: do not try to create /run/systemd/transient in test mode

This prevented systemd-analyze from unprivileged operation on older systemd
installations, which should be possible.
Also, we shouldn't touch the file system in test mode even if we can.
---
 src/core/manager.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'src/core')

diff --git a/src/core/manager.c b/src/core/manager.c
index e63b31bdf3..dd0d1fa984 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1236,9 +1236,11 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
                 return r;
 
         /* Make sure the transient directory always exists, so that it remains in the search path */
-        r = mkdir_p_label(m->lookup_paths.transient, 0755);
-        if (r < 0)
-                return r;
+        if (!m->test_run) {
+                r = mkdir_p_label(m->lookup_paths.transient, 0755);
+                if (r < 0)
+                        return r;
+        }
 
         dual_timestamp_get(&m->generators_start_timestamp);
         r = manager_run_generators(m);
-- 
cgit v1.2.3-54-g00ecf


From c080fbce9ca4ac21d8dbb1c2d0e8c9205edfdbfb Mon Sep 17 00:00:00 2001
From: Michael Olbrich <m.olbrich@pengutronix.de>
Date: Tue, 4 Oct 2016 16:13:27 +0200
Subject: automount: make sure the expire event is restarted after a
 daemon-reload (#4265)

If the corresponding mount unit is deserialized after the automount unit
then the expire event is set up in automount_trigger_notify(). However, if
the mount unit is deserialized first then the automount unit is still in
state AUTOMOUNT_DEAD and automount_trigger_notify() aborts without setting
up the expire event.
Explicitly call automount_start_expire() during coldplug to make sure that
the expire event is set up as necessary.

Fixes #4249.
---
 src/core/automount.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'src/core')

diff --git a/src/core/automount.c b/src/core/automount.c
index 00295cf769..bdc0e06965 100644
--- a/src/core/automount.c
+++ b/src/core/automount.c
@@ -271,6 +271,11 @@ static int automount_coldplug(Unit *u) {
                                 return r;
 
                         (void) sd_event_source_set_description(a->pipe_event_source, "automount-io");
+                        if (a->deserialized_state == AUTOMOUNT_RUNNING) {
+                                r = automount_start_expire(a);
+                                if (r < 0)
+                                        log_unit_warning_errno(UNIT(a), r, "Failed to start expiration timer, ignoring: %m");
+                        }
                 }
 
                 automount_set_state(a, a->deserialized_state);
-- 
cgit v1.2.3-54-g00ecf


From 629ff674ac48653703440ab16998adb03391f986 Mon Sep 17 00:00:00 2001
From: Stefan Schweter <stefan@schweter.it>
Date: Sun, 2 Oct 2016 19:37:21 +0200
Subject: tree-wide: remove consecutive duplicate words in comments

---
 src/basic/escape.c         | 2 +-
 src/basic/util.c           | 2 +-
 src/core/dynamic-user.c    | 2 +-
 src/core/execute.c         | 2 +-
 src/hostname/hostnamectl.c | 2 +-
 src/login/logind-session.c | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'src/core')

diff --git a/src/basic/escape.c b/src/basic/escape.c
index 01daf11ce7..4a1ec4505e 100644
--- a/src/basic/escape.c
+++ b/src/basic/escape.c
@@ -333,7 +333,7 @@ int cunescape_length_with_prefix(const char *s, size_t length, const char *prefi
                 assert(remaining > 0);
 
                 if (*f != '\\') {
-                        /* A literal literal, copy verbatim */
+                        /* A literal, copy verbatim */
                         *(t++) = *f;
                         continue;
                 }
diff --git a/src/basic/util.c b/src/basic/util.c
index 9d66d28eb7..ec7939dc83 100644
--- a/src/basic/util.c
+++ b/src/basic/util.c
@@ -467,7 +467,7 @@ bool in_initrd(void) {
          * 2. the root file system must be a memory file system
          *
          * The second check is extra paranoia, since misdetecting an
-         * initrd can have bad bad consequences due the initrd
+         * initrd can have bad consequences due the initrd
          * emptying when transititioning to the main systemd.
          */
 
diff --git a/src/core/dynamic-user.c b/src/core/dynamic-user.c
index 310aaa94e1..1043da3eb7 100644
--- a/src/core/dynamic-user.c
+++ b/src/core/dynamic-user.c
@@ -233,7 +233,7 @@ static int pick_uid(const char *name, uid_t *ret_uid) {
                         if (st.st_nlink > 0)
                                 break;
 
-                        /* Oh, bummer, we got got the lock, but the file was unlinked between the time we opened it and
+                        /* Oh, bummer, we got the lock, but the file was unlinked between the time we opened it and
                          * got the lock. Close it, and try again. */
                         lock_fd = safe_close(lock_fd);
                 }
diff --git a/src/core/execute.c b/src/core/execute.c
index 3da7ef3be6..82d8c978c1 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -2207,7 +2207,7 @@ static int exec_child(
                                 return r;
                         }
 
-                        /* Note that we don't set $HOME or $SHELL if they are are not particularly enlightening anyway
+                        /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
                          * (i.e. are "/" or "/bin/nologin"). */
                 }
 
diff --git a/src/hostname/hostnamectl.c b/src/hostname/hostnamectl.c
index 4795324667..07c57fb567 100644
--- a/src/hostname/hostnamectl.c
+++ b/src/hostname/hostnamectl.c
@@ -278,7 +278,7 @@ static int set_hostname(sd_bus *bus, char **args, unsigned n) {
                 /* Now that we set the pretty hostname, let's clean up the parameter and use that as static
                  * hostname. If the hostname was already valid as static hostname, this will only chop off the trailing
                  * dot if there is one. If it was not valid, then it will be made fully valid by truncating, dropping
-                 * multiple dots, and and dropping weird chars. Note that we clean the name up only if we also are
+                 * multiple dots, and dropping weird chars. Note that we clean the name up only if we also are
                  * supposed to set the pretty name. If the pretty name is not being set we assume the user knows what
                  * he does and pass the name as-is. */
                 h = strdup(hostname);
diff --git a/src/login/logind-session.c b/src/login/logind-session.c
index b6da237397..ba1bcc2630 100644
--- a/src/login/logind-session.c
+++ b/src/login/logind-session.c
@@ -611,7 +611,7 @@ static int session_stop_scope(Session *s, bool force) {
                 return 0;
 
         /* Let's always abandon the scope first. This tells systemd that we are not interested anymore, and everything
-         * that is left in in the scope is "left-over". Informing systemd about this has the benefit that it will log
+         * that is left in the scope is "left-over". Informing systemd about this has the benefit that it will log
          * when killing any processes left after this point. */
         r = manager_abandon_scope(s->manager, s->scope, &error);
         if (r < 0)
-- 
cgit v1.2.3-54-g00ecf


From 77531863ca50fb5c0dfb952dbda50250bbe3e5d1 Mon Sep 17 00:00:00 2001
From: Giuseppe Scrivano <gscrivan@redhat.com>
Date: Tue, 4 Oct 2016 10:51:25 +0200
Subject: Fix typo

---
 src/core/umount.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/core')

diff --git a/src/core/umount.c b/src/core/umount.c
index c21a2be54e..1e5459ed80 100644
--- a/src/core/umount.c
+++ b/src/core/umount.c
@@ -375,7 +375,7 @@ static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_e
                 /* If we are in a container, don't attempt to
                    read-only mount anything as that brings no real
                    benefits, but might confuse the host, as we remount
-                   the superblock here, not the bind mound. */
+                   the superblock here, not the bind mount. */
                 if (detect_container() <= 0)  {
                         _cleanup_free_ char *options = NULL;
                         /* MS_REMOUNT requires that the data parameter
-- 
cgit v1.2.3-54-g00ecf


From 36d854780c01d589e5da1fc6e94f46aa41f7016f Mon Sep 17 00:00:00 2001
From: Giuseppe Scrivano <gscrivan@redhat.com>
Date: Wed, 28 Sep 2016 18:37:39 +0200
Subject: core: do not fail in a container if we can't use setgroups

It might be blocked through /proc/PID/setgroups
---
 src/basic/capability-util.c |  3 ++-
 src/basic/user-util.c       | 27 ++++++++++++++++++++++++++-
 src/basic/user-util.h       |  2 ++
 src/core/execute.c          |  2 +-
 4 files changed, 31 insertions(+), 3 deletions(-)

(limited to 'src/core')

diff --git a/src/basic/capability-util.c b/src/basic/capability-util.c
index d4c5bd6937..f8db6e0212 100644
--- a/src/basic/capability-util.c
+++ b/src/basic/capability-util.c
@@ -31,6 +31,7 @@
 #include "log.h"
 #include "macro.h"
 #include "parse-util.h"
+#include "user-util.h"
 #include "util.h"
 
 int have_effective_cap(int value) {
@@ -295,7 +296,7 @@ int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities) {
         if (setresgid(gid, gid, gid) < 0)
                 return log_error_errno(errno, "Failed to change group ID: %m");
 
-        if (setgroups(0, NULL) < 0)
+        if (maybe_setgroups(0, NULL) < 0)
                 return log_error_errno(errno, "Failed to drop auxiliary groups list: %m");
 
         /* Ensure we keep the permitted caps across the setresuid() */
diff --git a/src/basic/user-util.c b/src/basic/user-util.c
index 0522bce1d1..16496fccfa 100644
--- a/src/basic/user-util.c
+++ b/src/basic/user-util.c
@@ -33,6 +33,7 @@
 
 #include "alloc-util.h"
 #include "fd-util.h"
+#include "fileio.h"
 #include "formats-util.h"
 #include "macro.h"
 #include "missing.h"
@@ -460,7 +461,7 @@ int get_shell(char **_s) {
 
 int reset_uid_gid(void) {
 
-        if (setgroups(0, NULL) < 0)
+        if (maybe_setgroups(0, NULL) < 0)
                 return -errno;
 
         if (setresgid(0, 0, 0) < 0)
@@ -602,3 +603,27 @@ bool valid_home(const char *p) {
 
         return true;
 }
+
+int maybe_setgroups(size_t size, const gid_t *list) {
+        static int cached_can_setgroups = -1;
+        /* check if setgroups is allowed before we try to drop all the auxiliary groups */
+        if (size == 0) {
+                if (cached_can_setgroups < 0) {
+                        _cleanup_free_ char *setgroups_content = NULL;
+                        int r = read_one_line_file("/proc/self/setgroups", &setgroups_content);
+                        if (r < 0 && errno != ENOENT)
+                                return r;
+                        if (r < 0) {
+                                /* old kernels don't have /proc/self/setgroups, so assume we can use setgroups */
+                                cached_can_setgroups = true;
+                        } else {
+                                cached_can_setgroups = streq(setgroups_content, "allow");
+                                if (!cached_can_setgroups)
+                                        log_debug("skip setgroups, /proc/self/setgroups is set to 'deny'");
+                        }
+                }
+                if (!cached_can_setgroups)
+                        return 0;
+        }
+        return setgroups(size, list);
+}
diff --git a/src/basic/user-util.h b/src/basic/user-util.h
index 6c61f63cae..dfea561bde 100644
--- a/src/basic/user-util.h
+++ b/src/basic/user-util.h
@@ -86,3 +86,5 @@ bool valid_user_group_name(const char *u);
 bool valid_user_group_name_or_id(const char *u);
 bool valid_gecos(const char *d);
 bool valid_home(const char *p);
+
+int maybe_setgroups(size_t size, const gid_t *list);
diff --git a/src/core/execute.c b/src/core/execute.c
index 82d8c978c1..019ff8490b 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -781,7 +781,7 @@ static int enforce_groups(const ExecContext *context, const char *username, gid_
                         k++;
                 }
 
-                if (setgroups(k, gids) < 0) {
+                if (maybe_setgroups(k, gids) < 0) {
                         free(gids);
                         return -errno;
                 }
-- 
cgit v1.2.3-54-g00ecf


From 2d6fce8d7c397fe915230b728cb92aa749245e43 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Thu, 6 Oct 2016 16:03:01 +0200
Subject: core: leave PAM stub process around with GIDs updated

In the process execution code of PID 1, before
096424d1230e0a0339735c51b43949809e972430 the GID settings where changed before
invoking PAM, and the UID settings after. After the change both changes are
made after the PAM session hooks are run. When invoking PAM we fork once, and
leave a stub process around which will invoke the PAM session end hooks when
the session goes away. This code previously was dropping the remaining privs
(which were precisely the UID). Fix this code to do this correctly again, by
really dropping them else (i.e. the GID as well).

While we are at it, also fix error logging of this code.

Fixes: #4238
---
 src/core/execute.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index 019ff8490b..e4a23ac169 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -843,6 +843,7 @@ static int setup_pam(
                 const char *name,
                 const char *user,
                 uid_t uid,
+                gid_t gid,
                 const char *tty,
                 char ***env,
                 int fds[], unsigned n_fds) {
@@ -948,8 +949,13 @@ static int setup_pam(
                  * and this will make PR_SET_PDEATHSIG work in most cases.
                  * If this fails, ignore the error - but expect sd-pam threads
                  * to fail to exit normally */
+
+                if (maybe_setgroups(0, NULL) < 0)
+                        log_warning_errno(errno, "Failed to setgroups() in sd-pam: %m");
+                if (setresgid(gid, gid, gid) < 0)
+                        log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
                 if (setresuid(uid, uid, uid) < 0)
-                        log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
+                        log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
 
                 (void) ignore_signals(SIGPIPE, -1);
 
@@ -2413,7 +2419,7 @@ static int exec_child(
                 }
 
                 if (context->pam_name && username) {
-                        r = setup_pam(context->pam_name, username, uid, context->tty_path, &accum_env, fds, n_fds);
+                        r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
                         if (r < 0) {
                                 *exit_status = EXIT_PAM;
                                 return r;
-- 
cgit v1.2.3-54-g00ecf


From 97f0e76f18d322d29bcfbc4ab6bb9cd67a1cdd54 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Thu, 6 Oct 2016 17:54:12 +0200
Subject: user-util: rework maybe_setgroups() a bit

Let's drop the caching of the setgroups /proc field for now. While there's a
strict regime in place when it changes states, let's better not cache it since
we cannot really be sure we follow that regime correctly.

More importantly however, this is not in performance sensitive code, and
there's no indication the cache is really beneficial, hence let's drop the
caching and make things a bit simpler.

Also, while we are at it, rework the error handling a bit, and always return
negative errno-style error codes, following our usual coding style. This has
the benefit that we can sensible hanld read_one_line_file() errors, without
having to updat errno explicitly.
---
 src/basic/capability-util.c |  5 +++--
 src/basic/user-util.c       | 49 ++++++++++++++++++++++++++-------------------
 src/core/execute.c          | 10 +++++----
 3 files changed, 37 insertions(+), 27 deletions(-)

(limited to 'src/core')

diff --git a/src/basic/capability-util.c b/src/basic/capability-util.c
index f8db6e0212..c3de20a0e8 100644
--- a/src/basic/capability-util.c
+++ b/src/basic/capability-util.c
@@ -296,8 +296,9 @@ int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities) {
         if (setresgid(gid, gid, gid) < 0)
                 return log_error_errno(errno, "Failed to change group ID: %m");
 
-        if (maybe_setgroups(0, NULL) < 0)
-                return log_error_errno(errno, "Failed to drop auxiliary groups list: %m");
+        r = maybe_setgroups(0, NULL);
+        if (r < 0)
+                return log_error_errno(r, "Failed to drop auxiliary groups list: %m");
 
         /* Ensure we keep the permitted caps across the setresuid() */
         if (prctl(PR_SET_KEEPCAPS, 1) < 0)
diff --git a/src/basic/user-util.c b/src/basic/user-util.c
index 16496fccfa..de6c93056e 100644
--- a/src/basic/user-util.c
+++ b/src/basic/user-util.c
@@ -460,9 +460,11 @@ int get_shell(char **_s) {
 }
 
 int reset_uid_gid(void) {
+        int r;
 
-        if (maybe_setgroups(0, NULL) < 0)
-                return -errno;
+        r = maybe_setgroups(0, NULL);
+        if (r < 0)
+                return r;
 
         if (setresgid(0, 0, 0) < 0)
                 return -errno;
@@ -605,25 +607,30 @@ bool valid_home(const char *p) {
 }
 
 int maybe_setgroups(size_t size, const gid_t *list) {
-        static int cached_can_setgroups = -1;
-        /* check if setgroups is allowed before we try to drop all the auxiliary groups */
-        if (size == 0) {
-                if (cached_can_setgroups < 0) {
-                        _cleanup_free_ char *setgroups_content = NULL;
-                        int r = read_one_line_file("/proc/self/setgroups", &setgroups_content);
-                        if (r < 0 && errno != ENOENT)
-                                return r;
-                        if (r < 0) {
-                                /* old kernels don't have /proc/self/setgroups, so assume we can use setgroups */
-                                cached_can_setgroups = true;
-                        } else {
-                                cached_can_setgroups = streq(setgroups_content, "allow");
-                                if (!cached_can_setgroups)
-                                        log_debug("skip setgroups, /proc/self/setgroups is set to 'deny'");
-                        }
-                }
-                if (!cached_can_setgroups)
+        int r;
+
+        /* Check if setgroups is allowed before we try to drop all the auxiliary groups */
+        if (size == 0) { /* Dropping all aux groups? */
+                _cleanup_free_ char *setgroups_content = NULL;
+                bool can_setgroups;
+
+                r = read_one_line_file("/proc/self/setgroups", &setgroups_content);
+                if (r == -ENOENT)
+                        /* Old kernels don't have /proc/self/setgroups, so assume we can use setgroups */
+                        can_setgroups = true;
+                else if (r < 0)
+                        return r;
+                else
+                        can_setgroups = streq(setgroups_content, "allow");
+
+                if (!can_setgroups) {
+                        log_debug("Skipping setgroups(), /proc/self/setgroups is set to 'deny'");
                         return 0;
+                }
         }
-        return setgroups(size, list);
+
+        if (setgroups(size, list) < 0)
+                return -errno;
+
+        return 0;
 }
diff --git a/src/core/execute.c b/src/core/execute.c
index e4a23ac169..d5c4e60796 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -781,9 +781,10 @@ static int enforce_groups(const ExecContext *context, const char *username, gid_
                         k++;
                 }
 
-                if (maybe_setgroups(k, gids) < 0) {
+                r = maybe_setgroups(k, gids);
+                if (r < 0) {
                         free(gids);
-                        return -errno;
+                        return r;
                 }
 
                 free(gids);
@@ -950,8 +951,9 @@ static int setup_pam(
                  * If this fails, ignore the error - but expect sd-pam threads
                  * to fail to exit normally */
 
-                if (maybe_setgroups(0, NULL) < 0)
-                        log_warning_errno(errno, "Failed to setgroups() in sd-pam: %m");
+                r = maybe_setgroups(0, NULL);
+                if (r < 0)
+                        log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
                 if (setresgid(gid, gid, gid) < 0)
                         log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
                 if (setresuid(uid, uid, uid) < 0)
-- 
cgit v1.2.3-54-g00ecf


From 24dd31c19ede505143833346ff850af942694aa6 Mon Sep 17 00:00:00 2001
From: Lukáš Nykrýn <lnykryn@redhat.com>
Date: Fri, 7 Oct 2016 03:08:21 +0200
Subject: core: add possibility to set action for ctrl-alt-del burst (#4105)

For some certification, it should not be possible to reboot the machine through ctrl-alt-delete. Currently we suggest our customers to mask the ctrl-alt-delete target, but that is obviously not enough.

Patching the keymaps to disable that is really not a way to go for them, because the settings need to be easily checked by some SCAP tools.
---
 man/systemd-system.conf.xml | 11 ++++++++++
 src/core/main.c             |  5 +++++
 src/core/manager.c          | 51 +++++++++++++++++++++++++++++++++------------
 src/core/manager.h          | 14 ++++++++++++-
 src/core/system.conf        |  1 +
 5 files changed, 68 insertions(+), 14 deletions(-)

(limited to 'src/core')

diff --git a/man/systemd-system.conf.xml b/man/systemd-system.conf.xml
index 1bb40fd234..a268397d09 100644
--- a/man/systemd-system.conf.xml
+++ b/man/systemd-system.conf.xml
@@ -105,6 +105,17 @@
         arguments.</para></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>CtrlAltDelBurstAction=</varname></term>
+
+        <listitem><para>Defines what action will be performed
+        if user presses Ctr-Alt-Delete more than 7 times in 2s.
+        Can be set to <literal>reboot-force</literal>, <literal>poweroff-force</literal>
+        or disabled with <literal>ignore</literal>. Defaults to
+        <literal>reboot-force</literal>.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><varname>CPUAffinity=</varname></term>
 
diff --git a/src/core/main.c b/src/core/main.c
index be0cb0b6d1..6fe440277e 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -131,6 +131,7 @@ static bool arg_default_memory_accounting = false;
 static bool arg_default_tasks_accounting = true;
 static uint64_t arg_default_tasks_max = UINT64_MAX;
 static sd_id128_t arg_machine_id = {};
+static CADBurstAction arg_cad_burst_action = CAD_BURST_ACTION_REBOOT;
 
 noreturn static void freeze_or_reboot(void) {
 
@@ -648,6 +649,8 @@ static int config_parse_join_controllers(const char *unit,
         return 0;
 }
 
+static DEFINE_CONFIG_PARSE_ENUM(config_parse_cad_burst_action, cad_burst_action, CADBurstAction, "Failed to parse service restart specifier");
+
 static int parse_config_file(void) {
 
         const ConfigTableItem items[] = {
@@ -702,6 +705,7 @@ static int parse_config_file(void) {
                 { "Manager", "DefaultMemoryAccounting",   config_parse_bool,             0, &arg_default_memory_accounting         },
                 { "Manager", "DefaultTasksAccounting",    config_parse_bool,             0, &arg_default_tasks_accounting          },
                 { "Manager", "DefaultTasksMax",           config_parse_tasks_max,        0, &arg_default_tasks_max                 },
+                { "Manager", "CtrlAltDelBurstAction",     config_parse_cad_burst_action, 0, &arg_cad_burst_action},
                 {}
         };
 
@@ -1794,6 +1798,7 @@ int main(int argc, char *argv[]) {
         m->initrd_timestamp = initrd_timestamp;
         m->security_start_timestamp = security_start_timestamp;
         m->security_finish_timestamp = security_finish_timestamp;
+        m->cad_burst_action = arg_cad_burst_action;
 
         manager_set_defaults(m);
         manager_set_show_status(m, arg_show_status);
diff --git a/src/core/manager.c b/src/core/manager.c
index dd0d1fa984..5253cb3712 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1894,6 +1894,35 @@ static int manager_start_target(Manager *m, const char *name, JobMode mode) {
         return r;
 }
 
+static void manager_handle_ctrl_alt_del(Manager *m) {
+        /* If the user presses C-A-D more than
+         * 7 times within 2s, we reboot/shutdown immediately,
+         * unless it was disabled in system.conf */
+
+        if (ratelimit_test(&m->ctrl_alt_del_ratelimit) || m->cad_burst_action == CAD_BURST_ACTION_IGNORE)
+                manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY);
+        else {
+                switch (m->cad_burst_action) {
+
+                case CAD_BURST_ACTION_REBOOT:
+                        m->exit_code = MANAGER_REBOOT;
+                        break;
+
+                case CAD_BURST_ACTION_POWEROFF:
+                        m->exit_code = MANAGER_POWEROFF;
+                        break;
+
+                default:
+                        assert_not_reached("Unknown action.");
+                }
+
+                log_notice("Ctrl-Alt-Del was pressed more than 7 times within 2s, performing immediate %s.",
+                                cad_burst_action_to_string(m->cad_burst_action));
+                status_printf(NULL, true, false, "Ctrl-Alt-Del was pressed more than 7 times within 2s, performing immediate %s.",
+                                cad_burst_action_to_string(m->cad_burst_action));
+        }
+}
+
 static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
         Manager *m = userdata;
         ssize_t n;
@@ -1945,19 +1974,7 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
 
                 case SIGINT:
                         if (MANAGER_IS_SYSTEM(m)) {
-
-                                /* If the user presses C-A-D more than
-                                 * 7 times within 2s, we reboot
-                                 * immediately. */
-
-                                if (ratelimit_test(&m->ctrl_alt_del_ratelimit))
-                                        manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY);
-                                else {
-                                        log_notice("Ctrl-Alt-Del was pressed more than 7 times within 2s, rebooting immediately.");
-                                        status_printf(NULL, true, false, "Ctrl-Alt-Del was pressed more than 7 times within 2s, rebooting immediately.");
-                                        m->exit_code = MANAGER_REBOOT;
-                                }
-
+                                manager_handle_ctrl_alt_del(m);
                                 break;
                         }
 
@@ -3544,3 +3561,11 @@ static const char *const manager_state_table[_MANAGER_STATE_MAX] = {
 };
 
 DEFINE_STRING_TABLE_LOOKUP(manager_state, ManagerState);
+
+static const char *const cad_burst_action_table[_CAD_BURST_ACTION_MAX] = {
+        [CAD_BURST_ACTION_IGNORE] = "ignore",
+        [CAD_BURST_ACTION_REBOOT] = "reboot-force",
+        [CAD_BURST_ACTION_POWEROFF] = "poweroff-force",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(cad_burst_action, CADBurstAction);
diff --git a/src/core/manager.h b/src/core/manager.h
index a592f1cb94..495440b446 100644
--- a/src/core/manager.h
+++ b/src/core/manager.h
@@ -62,6 +62,14 @@ typedef enum ManagerExitCode {
         _MANAGER_EXIT_CODE_INVALID = -1
 } ManagerExitCode;
 
+typedef enum CADBurstAction {
+        CAD_BURST_ACTION_IGNORE,
+        CAD_BURST_ACTION_REBOOT,
+        CAD_BURST_ACTION_POWEROFF,
+        _CAD_BURST_ACTION_MAX,
+        _CAD_BURST_ACTION_INVALID = -1
+} CADBurstAction;
+
 typedef enum StatusType {
         STATUS_TYPE_EPHEMERAL,
         STATUS_TYPE_NORMAL,
@@ -304,8 +312,9 @@ struct Manager {
         Hashmap *uid_refs;
         Hashmap *gid_refs;
 
-        /* When the user hits C-A-D more than 7 times per 2s, reboot immediately... */
+        /* When the user hits C-A-D more than 7 times per 2s, do something immediately... */
         RateLimit ctrl_alt_del_ratelimit;
+        CADBurstAction cad_burst_action;
 
         const char *unit_log_field;
         const char *unit_log_format_string;
@@ -398,3 +407,6 @@ void manager_deserialize_gid_refs_one(Manager *m, const char *value);
 
 const char *manager_state_to_string(ManagerState m) _const_;
 ManagerState manager_state_from_string(const char *s) _pure_;
+
+const char *cad_burst_action_to_string(CADBurstAction a) _const_;
+CADBurstAction cad_burst_action_from_string(const char *s) _pure_;
diff --git a/src/core/system.conf b/src/core/system.conf
index c6bb050aac..746572b7ff 100644
--- a/src/core/system.conf
+++ b/src/core/system.conf
@@ -21,6 +21,7 @@
 #CrashChangeVT=no
 #CrashShell=no
 #CrashReboot=no
+#CtrlAltDelBurstAction=reboot-force
 #CPUAffinity=1 2
 #JoinControllers=cpu,cpuacct net_cls,net_prio
 #RuntimeWatchdogSec=0
-- 
cgit v1.2.3-54-g00ecf


From c55ae51e77e1fc56fde7bc3466dd2021ff3856cb Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Fri, 7 Oct 2016 12:08:51 +0200
Subject: manager: don't ever busy loop when we get a notification message we
 can't process

If the kernel doesn't permit us to dequeue/process an incoming notification
datagram message it's still better to stop processing the notification messages
altogether than to enter a busy loop where we keep getting notified but can't
do a thing about it.

With this change, manager_dispatch_notify_fd() behaviour is changed like this:

- if an error indicating a spurious wake-up is seen on recvmsg(), ignore it
  (EAGAIN/EINTR)

- if any other error is seen on recvmsg() propagate it, thus disabling
  processing of further wakeups

- if any error is seen on later code in the function, warn about it but do not
  propagate it, as in this cas we're not going to busy loop as the offending
  message is already dequeued.
---
 src/core/manager.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'src/core')

diff --git a/src/core/manager.c b/src/core/manager.c
index 5253cb3712..ab65d630a1 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1722,14 +1722,13 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
 
         n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
         if (n < 0) {
-                if (!IN_SET(errno, EAGAIN, EINTR))
-                        log_error("Failed to receive notification message: %m");
+                if (IN_SET(errno, EAGAIN, EINTR))
+                        return 0; /* Spurious wakeup, try again */
 
-                /* It's not an option to return an error here since it
-                 * would disable the notification handler entirely. Services
-                 * wouldn't be able to send the WATCHDOG message for
-                 * example... */
-                return 0;
+                /* If this is any other, real error, then let's stop processing this socket. This of course means we
+                 * won't take notification messages anymore, but that's still better than busy looping around this:
+                 * being woken up over and over again but being unable to actually read the message off the socket. */
+                return log_error_errno(errno, "Failed to receive notification message: %m");
         }
 
         CMSG_FOREACH(cmsg, &msghdr) {
-- 
cgit v1.2.3-54-g00ecf


From 045a3d5989f7565dc496013a9e96d95d86a12cc8 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Fri, 7 Oct 2016 12:12:10 +0200
Subject: manager: be stricter with incomining notifications, warn properly
 about too large ones

Let's make the kernel let us know the full, original datagram size of the
incoming message. If it's larger than the buffer space provided by us, drop the
whole message with a warning.

Before this change the kernel would truncate the message for us to the buffer
space provided, and we'd not complain about this, and simply process the
incomplete message as far as it made sense.
---
 src/core/manager.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/core')

diff --git a/src/core/manager.c b/src/core/manager.c
index ab65d630a1..66b8904e4e 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1720,7 +1720,7 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
                 return 0;
         }
 
-        n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+        n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC|MSG_TRUNC);
         if (n < 0) {
                 if (IN_SET(errno, EAGAIN, EINTR))
                         return 0; /* Spurious wakeup, try again */
@@ -1761,7 +1761,7 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
                 return 0;
         }
 
-        if ((size_t) n >= sizeof(buf)) {
+        if ((size_t) n >= sizeof(buf) || (msghdr.msg_flags & MSG_TRUNC)) {
                 log_warning("Received notify message exceeded maximum size. Ignoring.");
                 return 0;
         }
-- 
cgit v1.2.3-54-g00ecf


From 875ca88da576b4f7c412f6a5e1fc642ba3bd288a Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Fri, 7 Oct 2016 12:14:33 +0200
Subject: manager: tighten incoming notification message checks

Let's not accept datagrams with embedded NUL bytes. Previously we'd simply
ignore everything after the first NUL byte. But given that sending us that is
pretty ugly let's instead complain and refuse.

With this change we'll only accept messages that have exactly zero or one NUL
bytes at the very end of the datagram.
---
 src/core/manager.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'src/core')

diff --git a/src/core/manager.c b/src/core/manager.c
index 66b8904e4e..34db276a7d 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1766,8 +1766,14 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
                 return 0;
         }
 
-        /* The message should be a string. Here we make sure it's NUL-terminated,
-         * but only the part until first NUL will be used anyway. */
+        /* As extra safety check, let's make sure the string we get doesn't contain embedded NUL bytes. We permit one
+         * trailing NUL byte in the message, but don't expect it. */
+        if (n > 1 && memchr(buf, 0, n-1)) {
+                log_warning("Received notify message with embedded NUL bytes. Ignoring.");
+                return 0;
+        }
+
+        /* Make sure it's NUL-terminated. */
         buf[n] = 0;
 
         /* Notify every unit that might be interested, but try
-- 
cgit v1.2.3-54-g00ecf


From 8f4d6401351b1114117a1517283f00c7f349c9ff Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Fri, 7 Oct 2016 09:39:42 -0400
Subject: core: only warn on short reads on signal fd

---
 src/core/manager.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'src/core')

diff --git a/src/core/manager.c b/src/core/manager.c
index 34db276a7d..c1dce62a18 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1946,14 +1946,17 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
         for (;;) {
                 n = read(m->signal_fd, &sfsi, sizeof(sfsi));
                 if (n != sizeof(sfsi)) {
+                        if (n >= 0) {
+                                log_warning("Truncated read from signal fd (%zu bytes)!", n);
+                                return 0;
+                        }
 
-                        if (n >= 0)
-                                return -EIO;
-
-                        if (errno == EINTR || errno == EAGAIN)
+                        if (IN_SET(errno, EINTR, EAGAIN))
                                 break;
 
-                        return -errno;
+                        /* We return an error here, which will kill this handler,
+                         * to avoid a busy loop on read error. */
+                        return log_error_errno(errno, "Reading from signal fd failed: %m");
                 }
 
                 log_received_signal(sfsi.ssi_signo == SIGCHLD ||
-- 
cgit v1.2.3-54-g00ecf


From 4b58153dd22172d817055d2a09a0cdf3f4bd9db3 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Tue, 30 Aug 2016 23:18:46 +0200
Subject: core: add "invocation ID" concept to service manager

This adds a new invocation ID concept to the service manager. The invocation ID
identifies each runtime cycle of a unit uniquely. A new randomized 128bit ID is
generated each time a unit moves from and inactive to an activating or active
state.

The primary usecase for this concept is to connect the runtime data PID 1
maintains about a service with the offline data the journal stores about it.
Previously we'd use the unit name plus start/stop times, which however is
highly racy since the journal will generally process log data after the service
already ended.

The "invocation ID" kinda matches the "boot ID" concept of the Linux kernel,
except that it applies to an individual unit instead of the whole system.

The invocation ID is passed to the activated processes as environment variable.
It is additionally stored as extended attribute on the cgroup of the unit. The
latter is used by journald to automatically retrieve it for each log logged
message and attach it to the log entry. The environment variable is very easily
accessible, even for unprivileged services. OTOH the extended attribute is only
accessible to privileged processes (this is because cgroupfs only supports the
"trusted." xattr namespace, not "user."). The environment variable may be
altered by services, the extended attribute may not be, hence is the better
choice for the journal.

Note that reading the invocation ID off the extended attribute from journald is
racy, similar to the way reading the unit name for a logging process is.

This patch adds APIs to read the invocation ID to sd-id128:
sd_id128_get_invocation() may be used in a similar fashion to
sd_id128_get_boot().

PID1's own logging is updated to always include the invocation ID when it logs
information about a unit.

A new bus call GetUnitByInvocationID() is added that allows retrieving a bus
path to a unit by its invocation ID. The bus path is built using the invocation
ID, thus providing a path for referring to a unit that is valid only for the
current runtime cycleof it.

Outlook for the future: should the kernel eventually allow passing of cgroup
information along AF_UNIX/SOCK_DGRAM messages via a unique cgroup id, then we
can alter the invocation ID to be generated as hash from that rather than
entirely randomly. This way we can derive the invocation race-freely from the
messages.
---
 Makefile-man.am                           |  5 ++
 man/sd_id128_get_machine.xml              | 34 +++++++-----
 man/systemd.exec.xml                      | 10 ++++
 src/basic/cgroup-util.c                   | 38 +++++++++++++
 src/basic/cgroup-util.h                   |  3 ++
 src/basic/log.c                           | 45 +++++++++-------
 src/basic/log.h                           | 10 ++--
 src/core/automount.c                      |  4 ++
 src/core/busname.c                        |  4 ++
 src/core/cgroup.c                         | 21 ++++++++
 src/core/dbus-manager.c                   | 59 +++++++++++++++++++++
 src/core/dbus-unit.c                      |  1 +
 src/core/device.c                         |  4 ++
 src/core/execute.c                        | 10 +++-
 src/core/manager.c                        | 24 ++++++++-
 src/core/manager.h                        |  4 ++
 src/core/mount.c                          | 11 ++--
 src/core/org.freedesktop.systemd1.conf    |  4 ++
 src/core/path.c                           |  4 ++
 src/core/scope.c                          |  4 ++
 src/core/service.c                        |  4 ++
 src/core/slice.c                          |  5 ++
 src/core/socket.c                         |  5 +-
 src/core/swap.c                           |  5 ++
 src/core/target.c                         |  5 ++
 src/core/timer.c                          |  6 ++-
 src/core/unit.c                           | 88 ++++++++++++++++++++++++++++++-
 src/core/unit.h                           | 10 +++-
 src/journal/journald-server.c             | 60 ++++++++++++++++++---
 src/journal/journald-server.h             |  2 +-
 src/libsystemd/libsystemd.sym             |  1 +
 src/libsystemd/sd-bus/bus-common-errors.c |  1 +
 src/libsystemd/sd-bus/bus-common-errors.h |  1 +
 src/libsystemd/sd-id128/id128-util.c      | 13 +++++
 src/libsystemd/sd-id128/id128-util.h      |  6 +++
 src/libsystemd/sd-id128/sd-id128.c        | 22 ++++++++
 src/network/networkd-link.h               |  2 +-
 src/network/networkd-netdev.h             |  2 +-
 src/shared/bus-util.c                     |  2 +-
 src/systemd/sd-id128.h                    |  2 +-
 40 files changed, 486 insertions(+), 55 deletions(-)

(limited to 'src/core')

diff --git a/Makefile-man.am b/Makefile-man.am
index ef5077cc5a..5760878fe3 100644
--- a/Makefile-man.am
+++ b/Makefile-man.am
@@ -395,6 +395,7 @@ MANPAGES_ALIAS += \
 	man/sd_id128_equal.3 \
 	man/sd_id128_from_string.3 \
 	man/sd_id128_get_boot.3 \
+	man/sd_id128_get_invocation.3 \
 	man/sd_id128_t.3 \
 	man/sd_is_mq.3 \
 	man/sd_is_socket.3 \
@@ -745,6 +746,7 @@ man/sd_event_unrefp.3: man/sd_event_new.3
 man/sd_id128_equal.3: man/sd-id128.3
 man/sd_id128_from_string.3: man/sd_id128_to_string.3
 man/sd_id128_get_boot.3: man/sd_id128_get_machine.3
+man/sd_id128_get_invocation.3: man/sd_id128_get_machine.3
 man/sd_id128_t.3: man/sd-id128.3
 man/sd_is_mq.3: man/sd_is_fifo.3
 man/sd_is_socket.3: man/sd_is_fifo.3
@@ -1519,6 +1521,9 @@ man/sd_id128_from_string.html: man/sd_id128_to_string.html
 man/sd_id128_get_boot.html: man/sd_id128_get_machine.html
 	$(html-alias)
 
+man/sd_id128_get_invocation.html: man/sd_id128_get_machine.html
+	$(html-alias)
+
 man/sd_id128_t.html: man/sd-id128.html
 	$(html-alias)
 
diff --git a/man/sd_id128_get_machine.xml b/man/sd_id128_get_machine.xml
index 2ad1f8f728..9a86c24aed 100644
--- a/man/sd_id128_get_machine.xml
+++ b/man/sd_id128_get_machine.xml
@@ -45,6 +45,7 @@
   <refnamediv>
     <refname>sd_id128_get_machine</refname>
     <refname>sd_id128_get_boot</refname>
+    <refname>sd_id128_get_invocation</refname>
     <refpurpose>Retrieve 128-bit IDs</refpurpose>
   </refnamediv>
 
@@ -62,6 +63,11 @@
         <paramdef>sd_id128_t *<parameter>ret</parameter></paramdef>
       </funcprototype>
 
+      <funcprototype>
+        <funcdef>int <function>sd_id128_get_invocation</function></funcdef>
+        <paramdef>sd_id128_t *<parameter>ret</parameter></paramdef>
+      </funcprototype>
+
     </funcsynopsis>
   </refsynopsisdiv>
 
@@ -83,11 +89,15 @@
     for more information. This function also internally caches the
     returned ID to make this call a cheap operation.</para>
 
-    <para>Note that <function>sd_id128_get_boot()</function> always
-    returns a UUID v4 compatible ID.
-    <function>sd_id128_get_machine()</function> will also return a
-    UUID v4-compatible ID on new installations but might not on older.
-    It is possible to convert the machine ID into a UUID v4-compatible
+    <para><function>sd_id128_get_invocation()</function> returns the invocation ID of the currently executed
+    service. In its current implementation, this reads and parses the <varname>$INVOCATION_ID</varname> environment
+    variable that the service manager sets when activating a service, see
+    <citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry> for details. The
+    ID is cached internally. In future a different mechanism to determine the invocation ID may be added.</para>
+
+    <para>Note that <function>sd_id128_get_boot()</function> and <function>sd_id128_get_invocation()</function> always
+    return UUID v4 compatible IDs.  <function>sd_id128_get_machine()</function> will also return a UUID v4-compatible
+    ID on new installations but might not on older.  It is possible to convert the machine ID into a UUID v4-compatible
     one. For more information, see
     <citerefentry><refentrytitle>machine-id</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
 
@@ -107,11 +117,10 @@
   <refsect1>
     <title>Notes</title>
 
-    <para>The <function>sd_id128_get_machine()</function> and
-    <function>sd_id128_get_boot()</function> interfaces are available
-    as a shared library, which can be compiled and linked to with the
-    <literal>libsystemd</literal> <citerefentry project='die-net'><refentrytitle>pkg-config</refentrytitle><manvolnum>1</manvolnum></citerefentry>
-    file.</para>
+    <para>The <function>sd_id128_get_machine()</function>, <function>sd_id128_get_boot()</function> and
+    <function>sd_id128_get_invocation()</function> interfaces are available as a shared library, which can be compiled
+    and linked to with the <literal>libsystemd</literal> <citerefentry
+    project='die-net'><refentrytitle>pkg-config</refentrytitle><manvolnum>1</manvolnum></citerefentry> file.</para>
   </refsect1>
 
   <refsect1>
@@ -121,8 +130,9 @@
       <citerefentry><refentrytitle>systemd</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
       <citerefentry><refentrytitle>sd-id128</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
       <citerefentry><refentrytitle>machine-id</refentrytitle><manvolnum>5</manvolnum></citerefentry>,
-      <citerefentry><refentrytitle>random</refentrytitle><manvolnum>4</manvolnum></citerefentry>,
-      <citerefentry><refentrytitle>sd_id128_randomize</refentrytitle><manvolnum>3</manvolnum></citerefentry>
+      <citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry>,
+      <citerefentry><refentrytitle>sd_id128_randomize</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
+      <citerefentry project='man-pages'><refentrytitle>random</refentrytitle><manvolnum>4</manvolnum></citerefentry>
     </para>
   </refsect1>
 
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 5e6787338d..c73ccaa493 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -1513,6 +1513,16 @@
         </para></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>$INVOCATION_ID</varname></term>
+
+        <listitem><para>Contains a randomized, unique 128bit ID identifying each runtime cycle of the unit, formatted
+        as 32 character hexadecimal string. A new ID is assigned each time the unit changes from an inactive state into
+        an activating or active state, and may be used to identify this specific runtime cycle, in particular in data
+        stored offline, such as the journal. The same ID is passed to all processes run as part of the
+        unit.</para></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><varname>$XDG_RUNTIME_DIR</varname></term>
 
diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c
index 7675ab0299..37e6928a46 100644
--- a/src/basic/cgroup-util.c
+++ b/src/basic/cgroup-util.c
@@ -28,6 +28,7 @@
 #include <sys/stat.h>
 #include <sys/statfs.h>
 #include <sys/types.h>
+#include <sys/xattr.h>
 #include <unistd.h>
 
 #include "alloc-util.h"
@@ -883,6 +884,43 @@ int cg_set_task_access(
         return 0;
 }
 
+int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
+        _cleanup_free_ char *fs = NULL;
+        int r;
+
+        assert(path);
+        assert(name);
+        assert(value || size <= 0);
+
+        r = cg_get_path(controller, path, NULL, &fs);
+        if (r < 0)
+                return r;
+
+        if (setxattr(fs, name, value, size, flags) < 0)
+                return -errno;
+
+        return 0;
+}
+
+int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
+        _cleanup_free_ char *fs = NULL;
+        ssize_t n;
+        int r;
+
+        assert(path);
+        assert(name);
+
+        r = cg_get_path(controller, path, NULL, &fs);
+        if (r < 0)
+                return r;
+
+        n = getxattr(fs, name, value, size);
+        if (n < 0)
+                return -errno;
+
+        return (int) n;
+}
+
 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
         _cleanup_fclose_ FILE *f = NULL;
         char line[LINE_MAX];
diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h
index 1a61c7ad22..7529c9719e 100644
--- a/src/basic/cgroup-util.h
+++ b/src/basic/cgroup-util.h
@@ -185,6 +185,9 @@ int cg_get_keyed_attribute(const char *controller, const char *path, const char
 int cg_set_group_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid);
 int cg_set_task_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid);
 
+int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags);
+int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size);
+
 int cg_install_release_agent(const char *controller, const char *agent);
 int cg_uninstall_release_agent(const char *controller);
 
diff --git a/src/basic/log.c b/src/basic/log.c
index 6a8dad311d..bd6c96c4f8 100644
--- a/src/basic/log.c
+++ b/src/basic/log.c
@@ -330,8 +330,6 @@ static int write_to_console(
                 const char *file,
                 int line,
                 const char *func,
-                const char *object_field,
-                const char *object,
                 const char *buffer) {
 
         char location[256], prefix[1 + DECIMAL_STR_MAX(int) + 2];
@@ -390,8 +388,6 @@ static int write_to_syslog(
                 const char *file,
                 int line,
                 const char *func,
-                const char *object_field,
-                const char *object,
                 const char *buffer) {
 
         char header_priority[2 + DECIMAL_STR_MAX(int) + 1],
@@ -453,8 +449,6 @@ static int write_to_kmsg(
                 const char *file,
                 int line,
                 const char *func,
-                const char *object_field,
-                const char *object,
                 const char *buffer) {
 
         char header_priority[2 + DECIMAL_STR_MAX(int) + 1],
@@ -485,7 +479,8 @@ static int log_do_header(
                 int level,
                 int error,
                 const char *file, int line, const char *func,
-                const char *object_field, const char *object) {
+                const char *object_field, const char *object,
+                const char *extra_field, const char *extra) {
 
         snprintf(header, size,
                  "PRIORITY=%i\n"
@@ -495,6 +490,7 @@ static int log_do_header(
                  "%s%s%s"
                  "%s%.*i%s"
                  "%s%s%s"
+                 "%s%s%s"
                  "SYSLOG_IDENTIFIER=%s\n",
                  LOG_PRI(level),
                  LOG_FAC(level),
@@ -513,6 +509,9 @@ static int log_do_header(
                  isempty(object) ? "" : object_field,
                  isempty(object) ? "" : object,
                  isempty(object) ? "" : "\n",
+                 isempty(extra) ? "" : extra_field,
+                 isempty(extra) ? "" : extra,
+                 isempty(extra) ? "" : "\n",
                  program_invocation_short_name);
 
         return 0;
@@ -526,6 +525,8 @@ static int write_to_journal(
                 const char *func,
                 const char *object_field,
                 const char *object,
+                const char *extra_field,
+                const char *extra,
                 const char *buffer) {
 
         char header[LINE_MAX];
@@ -535,7 +536,7 @@ static int write_to_journal(
         if (journal_fd < 0)
                 return 0;
 
-        log_do_header(header, sizeof(header), level, error, file, line, func, object_field, object);
+        log_do_header(header, sizeof(header), level, error, file, line, func, object_field, object, extra_field, extra);
 
         IOVEC_SET_STRING(iovec[0], header);
         IOVEC_SET_STRING(iovec[1], "MESSAGE=");
@@ -559,6 +560,8 @@ static int log_dispatch(
                 const char *func,
                 const char *object_field,
                 const char *object,
+                const char *extra,
+                const char *extra_field,
                 char *buffer) {
 
         assert(buffer);
@@ -589,7 +592,7 @@ static int log_dispatch(
                     log_target == LOG_TARGET_JOURNAL_OR_KMSG ||
                     log_target == LOG_TARGET_JOURNAL) {
 
-                        k = write_to_journal(level, error, file, line, func, object_field, object, buffer);
+                        k = write_to_journal(level, error, file, line, func, object_field, object, extra_field, extra, buffer);
                         if (k < 0) {
                                 if (k != -EAGAIN)
                                         log_close_journal();
@@ -600,7 +603,7 @@ static int log_dispatch(
                 if (log_target == LOG_TARGET_SYSLOG_OR_KMSG ||
                     log_target == LOG_TARGET_SYSLOG) {
 
-                        k = write_to_syslog(level, error, file, line, func, object_field, object, buffer);
+                        k = write_to_syslog(level, error, file, line, func, buffer);
                         if (k < 0) {
                                 if (k != -EAGAIN)
                                         log_close_syslog();
@@ -615,7 +618,7 @@ static int log_dispatch(
                      log_target == LOG_TARGET_JOURNAL_OR_KMSG ||
                      log_target == LOG_TARGET_KMSG)) {
 
-                        k = write_to_kmsg(level, error, file, line, func, object_field, object, buffer);
+                        k = write_to_kmsg(level, error, file, line, func, buffer);
                         if (k < 0) {
                                 log_close_kmsg();
                                 log_open_console();
@@ -623,7 +626,7 @@ static int log_dispatch(
                 }
 
                 if (k <= 0)
-                        (void) write_to_console(level, error, file, line, func, object_field, object, buffer);
+                        (void) write_to_console(level, error, file, line, func, buffer);
 
                 buffer = e;
         } while (buffer);
@@ -649,7 +652,7 @@ int log_dump_internal(
         if (_likely_(LOG_PRI(level) > log_max_level))
                 return -error;
 
-        return log_dispatch(level, error, file, line, func, NULL, NULL, buffer);
+        return log_dispatch(level, error, file, line, func, NULL, NULL, NULL, NULL, buffer);
 }
 
 int log_internalv(
@@ -676,7 +679,7 @@ int log_internalv(
 
         vsnprintf(buffer, sizeof(buffer), format, ap);
 
-        return log_dispatch(level, error, file, line, func, NULL, NULL, buffer);
+        return log_dispatch(level, error, file, line, func, NULL, NULL, NULL, NULL, buffer);
 }
 
 int log_internal(
@@ -705,6 +708,8 @@ int log_object_internalv(
                 const char *func,
                 const char *object_field,
                 const char *object,
+                const char *extra_field,
+                const char *extra,
                 const char *format,
                 va_list ap) {
 
@@ -738,7 +743,7 @@ int log_object_internalv(
 
         vsnprintf(b, l, format, ap);
 
-        return log_dispatch(level, error, file, line, func, object_field, object, buffer);
+        return log_dispatch(level, error, file, line, func, object_field, object, extra_field, extra, buffer);
 }
 
 int log_object_internal(
@@ -749,13 +754,15 @@ int log_object_internal(
                 const char *func,
                 const char *object_field,
                 const char *object,
+                const char *extra_field,
+                const char *extra,
                 const char *format, ...) {
 
         va_list ap;
         int r;
 
         va_start(ap, format);
-        r = log_object_internalv(level, error, file, line, func, object_field, object, format, ap);
+        r = log_object_internalv(level, error, file, line, func, object_field, object, extra_field, extra, format, ap);
         va_end(ap);
 
         return r;
@@ -780,7 +787,7 @@ static void log_assert(
 
         log_abort_msg = buffer;
 
-        log_dispatch(level, 0, file, line, func, NULL, NULL, buffer);
+        log_dispatch(level, 0, file, line, func, NULL, NULL, NULL, NULL, buffer);
 }
 
 noreturn void log_assert_failed(const char *text, const char *file, int line, const char *func) {
@@ -888,7 +895,7 @@ int log_struct_internal(
                 bool fallback = false;
 
                 /* If the journal is available do structured logging */
-                log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL);
+                log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL);
                 IOVEC_SET_STRING(iovec[n++], header);
 
                 va_start(ap, format);
@@ -935,7 +942,7 @@ int log_struct_internal(
         if (!found)
                 return -error;
 
-        return log_dispatch(level, error, file, line, func, NULL, NULL, buf + 8);
+        return log_dispatch(level, error, file, line, func, NULL, NULL, NULL, NULL, buf + 8);
 }
 
 int log_set_target_from_string(const char *e) {
diff --git a/src/basic/log.h b/src/basic/log.h
index b6356228d9..2afee20bb5 100644
--- a/src/basic/log.h
+++ b/src/basic/log.h
@@ -100,18 +100,22 @@ int log_object_internal(
                 const char *func,
                 const char *object_field,
                 const char *object,
-                const char *format, ...) _printf_(8,9);
+                const char *extra_field,
+                const char *extra,
+                const char *format, ...) _printf_(10,11);
 
 int log_object_internalv(
                 int level,
                 int error,
-                const char*file,
+                const char *file,
                 int line,
                 const char *func,
                 const char *object_field,
                 const char *object,
+                const char *extra_field,
+                const char *extra,
                 const char *format,
-                va_list ap) _printf_(8,0);
+                va_list ap) _printf_(9,0);
 
 int log_struct_internal(
                 int level,
diff --git a/src/core/automount.c b/src/core/automount.c
index bdc0e06965..7d7a0a6e46 100644
--- a/src/core/automount.c
+++ b/src/core/automount.c
@@ -800,6 +800,10 @@ static int automount_start(Unit *u) {
                 return r;
         }
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         a->result = AUTOMOUNT_SUCCESS;
         automount_enter_waiting(a);
         return 1;
diff --git a/src/core/busname.c b/src/core/busname.c
index 7952cd31aa..63c7dde0bd 100644
--- a/src/core/busname.c
+++ b/src/core/busname.c
@@ -639,6 +639,10 @@ static int busname_start(Unit *u) {
                 return r;
         }
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         n->result = BUSNAME_SUCCESS;
         busname_enter_making(n);
 
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 7873f88785..20bdbc39d0 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -1361,6 +1361,26 @@ int unit_attach_pids_to_cgroup(Unit *u) {
         return 0;
 }
 
+static void cgroup_xattr_apply(Unit *u) {
+        char ids[SD_ID128_STRING_MAX];
+        int r;
+
+        assert(u);
+
+        if (!MANAGER_IS_SYSTEM(u->manager))
+                return;
+
+        if (sd_id128_is_null(u->invocation_id))
+                return;
+
+        r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path,
+                         "trusted.invocation_id",
+                         sd_id128_to_string(u->invocation_id, ids), 32,
+                         0);
+        if (r < 0)
+                log_unit_warning_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path);
+}
+
 static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask enable_mask) {
         assert(u);
 
@@ -1404,6 +1424,7 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
 
         /* Finally, apply the necessary attributes. */
         cgroup_context_apply(u, target_mask, state);
+        cgroup_xattr_apply(u);
 
         return 0;
 }
diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c
index ea7ced2fd0..12eb55cb7f 100644
--- a/src/core/dbus-manager.c
+++ b/src/core/dbus-manager.c
@@ -464,6 +464,64 @@ static int method_get_unit_by_pid(sd_bus_message *message, void *userdata, sd_bu
         return sd_bus_reply_method_return(message, "o", path);
 }
 
+static int method_get_unit_by_invocation_id(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+        _cleanup_free_ char *path = NULL;
+        Manager *m = userdata;
+        sd_id128_t id;
+        const void *a;
+        Unit *u;
+        size_t sz;
+        int r;
+
+        assert(message);
+        assert(m);
+
+        /* Anyone can call this method */
+
+        r = sd_bus_message_read_array(message, 'y', &a, &sz);
+        if (r < 0)
+                return r;
+        if (sz == 0)
+                id = SD_ID128_NULL;
+        else if (sz == 16)
+                memcpy(&id, a, sz);
+        else
+                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid invocation ID");
+
+        if (sd_id128_is_null(id)) {
+                _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+                pid_t pid;
+
+                r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+                if (r < 0)
+                        return r;
+
+                r = sd_bus_creds_get_pid(creds, &pid);
+                if (r < 0)
+                        return r;
+
+                u = manager_get_unit_by_pid(m, pid);
+                if (!u)
+                        return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Client " PID_FMT " not member of any unit.", pid);
+        } else {
+                u = hashmap_get(m->units_by_invocation_id, &id);
+                if (!u)
+                        return sd_bus_error_setf(error, BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID, "No unit with the specified invocation ID " SD_ID128_FORMAT_STR " known.", SD_ID128_FORMAT_VAL(id));
+        }
+
+        r = mac_selinux_unit_access_check(u, message, "status", error);
+        if (r < 0)
+                return r;
+
+        /* So here's a special trick: the bus path we return actually references the unit by its invocation ID instead
+         * of the unit name. This means it stays valid only as long as the invocation ID stays the same. */
+        path = unit_dbus_path_invocation_id(u);
+        if (!path)
+                return -ENOMEM;
+
+        return sd_bus_reply_method_return(message, "o", path);
+}
+
 static int method_load_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
         _cleanup_free_ char *path = NULL;
         Manager *m = userdata;
@@ -2254,6 +2312,7 @@ const sd_bus_vtable bus_manager_vtable[] = {
 
         SD_BUS_METHOD("GetUnit", "s", "o", method_get_unit, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("GetUnitByPID", "u", "o", method_get_unit_by_pid, SD_BUS_VTABLE_UNPRIVILEGED),
+        SD_BUS_METHOD("GetUnitByInvocationID", "ay", "o", method_get_unit_by_invocation_id, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("LoadUnit", "s", "o", method_load_unit, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("StartUnit", "ss", "o", method_start_unit, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("StartUnitReplace", "sss", "o", method_start_unit_replace, SD_BUS_VTABLE_UNPRIVILEGED),
diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c
index 5020dfba4b..245912fc0f 100644
--- a/src/core/dbus-unit.c
+++ b/src/core/dbus-unit.c
@@ -764,6 +764,7 @@ const sd_bus_vtable bus_unit_vtable[] = {
         SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("StartLimitAction", "s", property_get_failure_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("InvocationID", "ay", bus_property_get_id128, offsetof(Unit, invocation_id), 0),
 
         SD_BUS_METHOD("Start", "s", "o", method_start, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("Stop", "s", "o", method_stop, SD_BUS_VTABLE_UNPRIVILEGED),
diff --git a/src/core/device.c b/src/core/device.c
index 16e56efcc3..8a3e888e5e 100644
--- a/src/core/device.c
+++ b/src/core/device.c
@@ -464,6 +464,10 @@ static void device_update_found_one(Device *d, bool add, DeviceFound found, bool
         if (!now)
                 return;
 
+        /* Didn't exist before, but does now? if so, generate a new invocation ID for it */
+        if (previous == DEVICE_NOT_FOUND && d->found != DEVICE_NOT_FOUND)
+                (void) unit_acquire_invocation_id(UNIT(d));
+
         if (d->found & DEVICE_FOUND_UDEV)
                 /* When the device is known to udev we consider it
                  * plugged. */
diff --git a/src/core/execute.c b/src/core/execute.c
index d5c4e60796..7079aeed6e 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1553,10 +1553,11 @@ static int build_environment(
         unsigned n_env = 0;
         char *x;
 
+        assert(u);
         assert(c);
         assert(ret);
 
-        our_env = new0(char*, 13);
+        our_env = new0(char*, 14);
         if (!our_env)
                 return -ENOMEM;
 
@@ -1627,6 +1628,13 @@ static int build_environment(
                 our_env[n_env++] = x;
         }
 
+        if (!sd_id128_is_null(u->invocation_id)) {
+                if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
+                        return -ENOMEM;
+
+                our_env[n_env++] = x;
+        }
+
         if (exec_context_needs_term(c)) {
                 const char *tty_path, *term = NULL;
 
diff --git a/src/core/manager.c b/src/core/manager.c
index c1dce62a18..3569249788 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -522,6 +522,7 @@ static void manager_clean_environment(Manager *m) {
                         "LISTEN_FDNAMES",
                         "WATCHDOG_PID",
                         "WATCHDOG_USEC",
+                        "INVOCATION_ID",
                         NULL);
 }
 
@@ -582,9 +583,15 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) {
         if (MANAGER_IS_SYSTEM(m)) {
                 m->unit_log_field = "UNIT=";
                 m->unit_log_format_string = "UNIT=%s";
+
+                m->invocation_log_field = "INVOCATION_ID=";
+                m->invocation_log_format_string = "INVOCATION_ID=" SD_ID128_FORMAT_STR;
         } else {
                 m->unit_log_field = "USER_UNIT=";
                 m->unit_log_format_string = "USER_UNIT=%s";
+
+                m->invocation_log_field = "USER_INVOCATION_ID=";
+                m->invocation_log_format_string = "USER_INVOCATION_ID=" SD_ID128_FORMAT_STR;
         }
 
         m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1;
@@ -1062,6 +1069,7 @@ Manager* manager_free(Manager *m) {
         hashmap_free(m->dynamic_users);
 
         hashmap_free(m->units);
+        hashmap_free(m->units_by_invocation_id);
         hashmap_free(m->jobs);
         hashmap_free(m->watch_pids1);
         hashmap_free(m->watch_pids2);
@@ -2268,6 +2276,7 @@ int manager_loop(Manager *m) {
 
 int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e, Unit **_u) {
         _cleanup_free_ char *n = NULL;
+        sd_id128_t invocation_id;
         Unit *u;
         int r;
 
@@ -2279,12 +2288,25 @@ int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e,
         if (r < 0)
                 return r;
 
+        /* Permit addressing units by invocation ID: if the passed bus path is suffixed by a 128bit ID then we use it
+         * as invocation ID. */
+        r = sd_id128_from_string(n, &invocation_id);
+        if (r >= 0) {
+                u = hashmap_get(m->units_by_invocation_id, &invocation_id);
+                if (u) {
+                        *_u = u;
+                        return 0;
+                }
+
+                return sd_bus_error_setf(e, BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID, "No unit with the specified invocation ID " SD_ID128_FORMAT_STR " known.", SD_ID128_FORMAT_VAL(invocation_id));
+        }
+
+        /* If this didn't work, we use the suffix as unit name. */
         r = manager_load_unit(m, n, NULL, e, &u);
         if (r < 0)
                 return r;
 
         *_u = u;
-
         return 0;
 }
 
diff --git a/src/core/manager.h b/src/core/manager.h
index 495440b446..29fe14e10b 100644
--- a/src/core/manager.h
+++ b/src/core/manager.h
@@ -89,6 +89,7 @@ struct Manager {
 
         /* Active jobs and units */
         Hashmap *units;  /* name string => Unit object n:1 */
+        Hashmap *units_by_invocation_id;
         Hashmap *jobs;   /* job id => Job object 1:1 */
 
         /* To make it easy to iterate through the units of a specific
@@ -319,6 +320,9 @@ struct Manager {
         const char *unit_log_field;
         const char *unit_log_format_string;
 
+        const char *invocation_log_field;
+        const char *invocation_log_format_string;
+
         int first_boot; /* tri-state */
 };
 
diff --git a/src/core/mount.c b/src/core/mount.c
index 04025b83b9..436c0e1029 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -1005,6 +1005,10 @@ static int mount_start(Unit *u) {
                 return r;
         }
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         m->result = MOUNT_SUCCESS;
         m->reload_result = MOUNT_SUCCESS;
         m->reset_cpu_usage = true;
@@ -1742,9 +1746,10 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
 
                         case MOUNT_DEAD:
                         case MOUNT_FAILED:
-                                /* This has just been mounted by
-                                 * somebody else, follow the state
-                                 * change. */
+
+                                /* This has just been mounted by somebody else, follow the state change, but let's
+                                 * generate a new invocation ID for this implicitly and automatically. */
+                                (void) unit_acquire_invocation_id(UNIT(mount));
                                 mount_enter_mounted(mount, MOUNT_SUCCESS);
                                 break;
 
diff --git a/src/core/org.freedesktop.systemd1.conf b/src/core/org.freedesktop.systemd1.conf
index 647e5f736c..6caa15b0b8 100644
--- a/src/core/org.freedesktop.systemd1.conf
+++ b/src/core/org.freedesktop.systemd1.conf
@@ -52,6 +52,10 @@
                        send_interface="org.freedesktop.systemd1.Manager"
                        send_member="GetUnitByPID"/>
 
+                <allow send_destination="org.freedesktop.systemd1"
+                       send_interface="org.freedesktop.systemd1.Manager"
+                       send_member="GetUnitByInvocationID"/>
+
                 <allow send_destination="org.freedesktop.systemd1"
                        send_interface="org.freedesktop.systemd1.Manager"
                        send_member="LoadUnit"/>
diff --git a/src/core/path.c b/src/core/path.c
index 10f9b06974..83f794be89 100644
--- a/src/core/path.c
+++ b/src/core/path.c
@@ -577,6 +577,10 @@ static int path_start(Unit *u) {
                 return r;
         }
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         path_mkdir(p);
 
         p->result = PATH_SUCCESS;
diff --git a/src/core/scope.c b/src/core/scope.c
index 65fa65493b..e7583f6d89 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -298,6 +298,10 @@ static int scope_start(Unit *u) {
         if (!u->transient && !MANAGER_IS_RELOADING(u->manager))
                 return -ENOENT;
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         (void) unit_realize_cgroup(u);
         (void) unit_reset_cpu_usage(u);
 
diff --git a/src/core/service.c b/src/core/service.c
index 99a70395fc..8ce25c494c 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -2033,6 +2033,10 @@ static int service_start(Unit *u) {
                 return r;
         }
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         s->result = SERVICE_SUCCESS;
         s->reload_result = SERVICE_SUCCESS;
         s->main_pid_known = false;
diff --git a/src/core/slice.c b/src/core/slice.c
index c7700b8857..03fe797f27 100644
--- a/src/core/slice.c
+++ b/src/core/slice.c
@@ -187,10 +187,15 @@ static void slice_dump(Unit *u, FILE *f, const char *prefix) {
 
 static int slice_start(Unit *u) {
         Slice *t = SLICE(u);
+        int r;
 
         assert(t);
         assert(t->state == SLICE_DEAD);
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         (void) unit_realize_cgroup(u);
         (void) unit_reset_cpu_usage(u);
 
diff --git a/src/core/socket.c b/src/core/socket.c
index b9032fa5c9..ae8a1f751f 100644
--- a/src/core/socket.c
+++ b/src/core/socket.c
@@ -2354,11 +2354,14 @@ static int socket_start(Unit *u) {
                 return r;
         }
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         s->result = SOCKET_SUCCESS;
         s->reset_cpu_usage = true;
 
         socket_enter_start_pre(s);
-
         return 1;
 }
 
diff --git a/src/core/swap.c b/src/core/swap.c
index fb222b6858..0333eaefb8 100644
--- a/src/core/swap.c
+++ b/src/core/swap.c
@@ -861,6 +861,10 @@ static int swap_start(Unit *u) {
                 return r;
         }
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         s->result = SWAP_SUCCESS;
         s->reset_cpu_usage = true;
 
@@ -1189,6 +1193,7 @@ static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, v
 
                         case SWAP_DEAD:
                         case SWAP_FAILED:
+                                (void) unit_acquire_invocation_id(UNIT(swap));
                                 swap_enter_active(swap, SWAP_SUCCESS);
                                 break;
 
diff --git a/src/core/target.c b/src/core/target.c
index 61a91aad07..765c1f3fa4 100644
--- a/src/core/target.c
+++ b/src/core/target.c
@@ -124,10 +124,15 @@ static void target_dump(Unit *u, FILE *f, const char *prefix) {
 
 static int target_start(Unit *u) {
         Target *t = TARGET(u);
+        int r;
 
         assert(t);
         assert(t->state == TARGET_DEAD);
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         target_set_state(t, TARGET_ACTIVE);
         return 1;
 }
diff --git a/src/core/timer.c b/src/core/timer.c
index e2b43f02f8..9538059c13 100644
--- a/src/core/timer.c
+++ b/src/core/timer.c
@@ -616,6 +616,10 @@ static int timer_start(Unit *u) {
                 return r;
         }
 
+        r = unit_acquire_invocation_id(u);
+        if (r < 0)
+                return r;
+
         t->last_trigger = DUAL_TIMESTAMP_NULL;
 
         /* Reenable all timers that depend on unit activation time */
@@ -632,7 +636,7 @@ static int timer_start(Unit *u) {
                         /* The timer has never run before,
                          * make sure a stamp file exists.
                          */
-                        touch_file(t->stamp_path, true, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID);
+                        (void) touch_file(t->stamp_path, true, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID);
         }
 
         t->result = TIMER_SUCCESS;
diff --git a/src/core/unit.c b/src/core/unit.c
index 693f75c928..690f7f7dd9 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -37,6 +37,7 @@
 #include "execute.h"
 #include "fileio-label.h"
 #include "formats-util.h"
+#include "id128-util.h"
 #include "load-dropin.h"
 #include "load-fragment.h"
 #include "log.h"
@@ -521,6 +522,9 @@ void unit_free(Unit *u) {
         SET_FOREACH(t, u->names, i)
                 hashmap_remove_value(u->manager->units, t, u);
 
+        if (!sd_id128_is_null(u->invocation_id))
+                hashmap_remove_value(u->manager->units_by_invocation_id, &u->invocation_id, u);
+
         if (u->job) {
                 Job *j = u->job;
                 job_uninstall(j);
@@ -953,6 +957,10 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
         SET_FOREACH(t, u->names, i)
                 fprintf(f, "%s\tName: %s\n", prefix, t);
 
+        if (!sd_id128_is_null(u->invocation_id))
+                fprintf(f, "%s\tInvocation ID: " SD_ID128_FORMAT_STR "\n",
+                        prefix, SD_ID128_FORMAT_VAL(u->invocation_id));
+
         STRV_FOREACH(j, u->documentation)
                 fprintf(f, "%s\tDocumentation: %s\n", prefix, *j);
 
@@ -1054,7 +1062,6 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
 
         if (u->nop_job)
                 job_dump(u->nop_job, f, prefix2);
-
 }
 
 /* Common implementation for multiple backends */
@@ -2392,6 +2399,15 @@ char *unit_dbus_path(Unit *u) {
         return unit_dbus_path_from_name(u->id);
 }
 
+char *unit_dbus_path_invocation_id(Unit *u) {
+        assert(u);
+
+        if (sd_id128_is_null(u->invocation_id))
+                return NULL;
+
+        return unit_dbus_path_from_name(u->invocation_id_string);
+}
+
 int unit_set_slice(Unit *u, Unit *slice) {
         assert(u);
         assert(slice);
@@ -2640,6 +2656,9 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
         if (gid_is_valid(u->ref_gid))
                 unit_serialize_item_format(u, f, "ref-gid", GID_FMT, u->ref_gid);
 
+        if (!sd_id128_is_null(u->invocation_id))
+                unit_serialize_item_format(u, f, "invocation-id", SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id));
+
         bus_track_serialize(u->bus_track, f, "ref");
 
         if (serialize_jobs) {
@@ -2914,6 +2933,19 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
                         if (r < 0)
                                 log_oom();
 
+                        continue;
+                } else if (streq(l, "invocation-id")) {
+                        sd_id128_t id;
+
+                        r = sd_id128_from_string(v, &id);
+                        if (r < 0)
+                                log_unit_debug(u, "Failed to parse invocation id %s, ignoring.", v);
+                        else {
+                                r = unit_set_invocation_id(u, id);
+                                if (r < 0)
+                                        log_unit_warning_errno(u, r, "Failed to set invocation ID for unit: %m");
+                        }
+
                         continue;
                 }
 
@@ -4153,3 +4185,57 @@ void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid) {
         if (r > 0)
                 bus_unit_send_change_signal(u);
 }
+
+int unit_set_invocation_id(Unit *u, sd_id128_t id) {
+        int r;
+
+        assert(u);
+
+        /* Set the invocation ID for this unit. If we cannot, this will not roll back, but reset the whole thing. */
+
+        if (sd_id128_equal(u->invocation_id, id))
+                return 0;
+
+        if (!sd_id128_is_null(u->invocation_id))
+                (void) hashmap_remove_value(u->manager->units_by_invocation_id, &u->invocation_id, u);
+
+        if (sd_id128_is_null(id)) {
+                r = 0;
+                goto reset;
+        }
+
+        r = hashmap_ensure_allocated(&u->manager->units_by_invocation_id, &id128_hash_ops);
+        if (r < 0)
+                goto reset;
+
+        u->invocation_id = id;
+        sd_id128_to_string(id, u->invocation_id_string);
+
+        r = hashmap_put(u->manager->units_by_invocation_id, &u->invocation_id, u);
+        if (r < 0)
+                goto reset;
+
+        return 0;
+
+reset:
+        u->invocation_id = SD_ID128_NULL;
+        u->invocation_id_string[0] = 0;
+        return r;
+}
+
+int unit_acquire_invocation_id(Unit *u) {
+        sd_id128_t id;
+        int r;
+
+        assert(u);
+
+        r = sd_id128_randomize(&id);
+        if (r < 0)
+                return log_unit_error_errno(u, r, "Failed to generate invocation ID for unit: %m");
+
+        r = unit_set_invocation_id(u, id);
+        if (r < 0)
+                return log_unit_error_errno(u, r, "Failed to set invocation ID for unit: %m");
+
+        return 0;
+}
diff --git a/src/core/unit.h b/src/core/unit.h
index 3584c16d8c..a8dd3e602c 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -207,6 +207,10 @@ struct Unit {
         /* How to start OnFailure units */
         JobMode on_failure_job_mode;
 
+        /* The current invocation ID */
+        sd_id128_t invocation_id;
+        char invocation_id_string[SD_ID128_STRING_MAX]; /* useful when logging */
+
         /* Garbage collect us we nobody wants or requires us anymore */
         bool stop_when_unneeded;
 
@@ -546,6 +550,7 @@ bool unit_job_is_applicable(Unit *u, JobType j);
 int set_unit_path(const char *p);
 
 char *unit_dbus_path(Unit *u);
+char *unit_dbus_path_invocation_id(Unit *u);
 
 int unit_load_related_unit(Unit *u, const char *type, Unit **_found);
 
@@ -643,12 +648,15 @@ void unit_unref_uid_gid(Unit *u, bool destroy_now);
 
 void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid);
 
+int unit_set_invocation_id(Unit *u, sd_id128_t id);
+int unit_acquire_invocation_id(Unit *u);
+
 /* Macros which append UNIT= or USER_UNIT= to the message */
 
 #define log_unit_full(unit, level, error, ...)                          \
         ({                                                              \
                 const Unit *_u = (unit);                                \
-                _u ? log_object_internal(level, error, __FILE__, __LINE__, __func__, _u->manager->unit_log_field, _u->id, ##__VA_ARGS__) : \
+                _u ? log_object_internal(level, error, __FILE__, __LINE__, __func__, _u->manager->unit_log_field, _u->id, _u->manager->invocation_log_field, _u->invocation_id_string, ##__VA_ARGS__) : \
                         log_internal(level, error, __FILE__, __LINE__, __func__, ##__VA_ARGS__); \
         })
 
diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c
index 015b74b203..f01cf1d937 100644
--- a/src/journal/journald-server.c
+++ b/src/journal/journald-server.c
@@ -44,6 +44,7 @@
 #include "fs-util.h"
 #include "hashmap.h"
 #include "hostname-util.h"
+#include "id128-util.h"
 #include "io-util.h"
 #include "journal-authenticate.h"
 #include "journal-file.h"
@@ -56,6 +57,7 @@
 #include "journald-server.h"
 #include "journald-stream.h"
 #include "journald-syslog.h"
+#include "log.h"
 #include "missing.h"
 #include "mkdir.h"
 #include "parse-util.h"
@@ -69,7 +71,6 @@
 #include "string-table.h"
 #include "string-util.h"
 #include "user-util.h"
-#include "log.h"
 
 #define USER_JOURNALS_MAX 1024
 
@@ -675,6 +676,44 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
                 server_schedule_sync(s, priority);
 }
 
+static int get_invocation_id(const char *cgroup_root, const char *slice, const char *unit, char **ret) {
+        _cleanup_free_ char *escaped = NULL, *slice_path = NULL, *p = NULL;
+        char *copy, ids[SD_ID128_STRING_MAX];
+        int r;
+
+        /* Read the invocation ID of a unit off a unit. It's stored in the "trusted.invocation_id" extended attribute
+         * on the cgroup path. */
+
+        r = cg_slice_to_path(slice, &slice_path);
+        if (r < 0)
+                return r;
+
+        escaped = cg_escape(unit);
+        if (!escaped)
+                return -ENOMEM;
+
+        p = strjoin(cgroup_root, "/", slice_path, "/", escaped, NULL);
+        if (!p)
+                return -ENOMEM;
+
+        r = cg_get_xattr(SYSTEMD_CGROUP_CONTROLLER, p, "trusted.invocation_id", ids, 32);
+        if (r < 0)
+                return r;
+        if (r != 32)
+                return -EINVAL;
+        ids[32] = 0;
+
+        if (!id128_is_valid(ids))
+                return -EINVAL;
+
+        copy = strdup(ids);
+        if (!copy)
+                return -ENOMEM;
+
+        *ret = copy;
+        return 0;
+}
+
 static void dispatch_message_real(
                 Server *s,
                 struct iovec *iovec, unsigned n, unsigned m,
@@ -771,6 +810,7 @@ static void dispatch_message_real(
 
                 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
                 if (r >= 0) {
+                        _cleanup_free_ char *raw_unit = NULL, *raw_slice = NULL;
                         char *session = NULL;
 
                         x = strjoina("_SYSTEMD_CGROUP=", c);
@@ -790,9 +830,8 @@ static void dispatch_message_real(
                                 IOVEC_SET_STRING(iovec[n++], owner_uid);
                         }
 
-                        if (cg_path_get_unit(c, &t) >= 0) {
-                                x = strjoina("_SYSTEMD_UNIT=", t);
-                                free(t);
+                        if (cg_path_get_unit(c, &raw_unit) >= 0) {
+                                x = strjoina("_SYSTEMD_UNIT=", raw_unit);
                                 IOVEC_SET_STRING(iovec[n++], x);
                         } else if (unit_id && !session) {
                                 x = strjoina("_SYSTEMD_UNIT=", unit_id);
@@ -808,9 +847,8 @@ static void dispatch_message_real(
                                 IOVEC_SET_STRING(iovec[n++], x);
                         }
 
-                        if (cg_path_get_slice(c, &t) >= 0) {
-                                x = strjoina("_SYSTEMD_SLICE=", t);
-                                free(t);
+                        if (cg_path_get_slice(c, &raw_slice) >= 0) {
+                                x = strjoina("_SYSTEMD_SLICE=", raw_slice);
                                 IOVEC_SET_STRING(iovec[n++], x);
                         }
 
@@ -820,6 +858,14 @@ static void dispatch_message_real(
                                 IOVEC_SET_STRING(iovec[n++], x);
                         }
 
+                        if (raw_slice && raw_unit) {
+                                if (get_invocation_id(s->cgroup_root, raw_slice, raw_unit, &t) >= 0) {
+                                        x = strjoina("_SYSTEMD_INVOCATION_ID=", t);
+                                        free(t);
+                                        IOVEC_SET_STRING(iovec[n++], x);
+                                }
+                        }
+
                         free(c);
                 } else if (unit_id) {
                         x = strjoina("_SYSTEMD_UNIT=", unit_id);
diff --git a/src/journal/journald-server.h b/src/journal/journald-server.h
index 784d24833d..dfb5724794 100644
--- a/src/journal/journald-server.h
+++ b/src/journal/journald-server.h
@@ -153,7 +153,7 @@ struct Server {
 
 #define SERVER_MACHINE_ID(s) ((s)->machine_id_field + strlen("_MACHINE_ID="))
 
-#define N_IOVEC_META_FIELDS 21
+#define N_IOVEC_META_FIELDS 22
 #define N_IOVEC_KERNEL_FIELDS 64
 #define N_IOVEC_UDEV_FIELDS 32
 #define N_IOVEC_OBJECT_FIELDS 14
diff --git a/src/libsystemd/libsystemd.sym b/src/libsystemd/libsystemd.sym
index 70ea347361..d48ef6bbe2 100644
--- a/src/libsystemd/libsystemd.sym
+++ b/src/libsystemd/libsystemd.sym
@@ -509,4 +509,5 @@ global:
         sd_bus_track_count_sender;
         sd_bus_set_exit_on_disconnect;
         sd_bus_get_exit_on_disconnect;
+        sd_id128_get_invocation;
 } LIBSYSTEMD_231;
diff --git a/src/libsystemd/sd-bus/bus-common-errors.c b/src/libsystemd/sd-bus/bus-common-errors.c
index 9cc28ed564..d2a826bf6e 100644
--- a/src/libsystemd/sd-bus/bus-common-errors.c
+++ b/src/libsystemd/sd-bus/bus-common-errors.c
@@ -27,6 +27,7 @@
 BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map bus_common_errors[] = {
         SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_UNIT,                 ENOENT),
         SD_BUS_ERROR_MAP(BUS_ERROR_NO_UNIT_FOR_PID,              ESRCH),
+        SD_BUS_ERROR_MAP(BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID,    ENOENT),
         SD_BUS_ERROR_MAP(BUS_ERROR_UNIT_EXISTS,                  EEXIST),
         SD_BUS_ERROR_MAP(BUS_ERROR_LOAD_FAILED,                  EIO),
         SD_BUS_ERROR_MAP(BUS_ERROR_JOB_FAILED,                   EREMOTEIO),
diff --git a/src/libsystemd/sd-bus/bus-common-errors.h b/src/libsystemd/sd-bus/bus-common-errors.h
index 5df21c8926..525b79fa77 100644
--- a/src/libsystemd/sd-bus/bus-common-errors.h
+++ b/src/libsystemd/sd-bus/bus-common-errors.h
@@ -23,6 +23,7 @@
 
 #define BUS_ERROR_NO_SUCH_UNIT "org.freedesktop.systemd1.NoSuchUnit"
 #define BUS_ERROR_NO_UNIT_FOR_PID "org.freedesktop.systemd1.NoUnitForPID"
+#define BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID "org.freedesktop.systemd1.NoUnitForInvocationID"
 #define BUS_ERROR_UNIT_EXISTS "org.freedesktop.systemd1.UnitExists"
 #define BUS_ERROR_LOAD_FAILED "org.freedesktop.systemd1.LoadFailed"
 #define BUS_ERROR_JOB_FAILED "org.freedesktop.systemd1.JobFailed"
diff --git a/src/libsystemd/sd-id128/id128-util.c b/src/libsystemd/sd-id128/id128-util.c
index c3f527d657..337eae24b4 100644
--- a/src/libsystemd/sd-id128/id128-util.c
+++ b/src/libsystemd/sd-id128/id128-util.c
@@ -192,3 +192,16 @@ int id128_write(const char *p, Id128Format f, sd_id128_t id, bool do_sync) {
 
         return id128_write_fd(fd, f, id, do_sync);
 }
+
+void id128_hash_func(const void *p, struct siphash *state) {
+        siphash24_compress(p, 16, state);
+}
+
+int id128_compare_func(const void *a, const void *b) {
+        return memcmp(a, b, 16);
+}
+
+const struct hash_ops id128_hash_ops = {
+        .hash = id128_hash_func,
+        .compare = id128_compare_func,
+};
diff --git a/src/libsystemd/sd-id128/id128-util.h b/src/libsystemd/sd-id128/id128-util.h
index 3ba59acbca..6b3855acbb 100644
--- a/src/libsystemd/sd-id128/id128-util.h
+++ b/src/libsystemd/sd-id128/id128-util.h
@@ -22,6 +22,8 @@
 #include <stdbool.h>
 
 #include "sd-id128.h"
+
+#include "hash-funcs.h"
 #include "macro.h"
 
 char *id128_to_uuid_string(sd_id128_t id, char s[37]);
@@ -43,3 +45,7 @@ int id128_read(const char *p, Id128Format f, sd_id128_t *ret);
 
 int id128_write_fd(int fd, Id128Format f, sd_id128_t id, bool do_sync);
 int id128_write(const char *p, Id128Format f, sd_id128_t id, bool do_sync);
+
+void id128_hash_func(const void *p, struct siphash *state);
+int id128_compare_func(const void *a, const void *b) _pure_;
+extern const struct hash_ops id128_hash_ops;
diff --git a/src/libsystemd/sd-id128/sd-id128.c b/src/libsystemd/sd-id128/sd-id128.c
index 9f47d04e61..d4450c70a0 100644
--- a/src/libsystemd/sd-id128/sd-id128.c
+++ b/src/libsystemd/sd-id128/sd-id128.c
@@ -129,6 +129,28 @@ _public_ int sd_id128_get_boot(sd_id128_t *ret) {
         return 0;
 }
 
+_public_ int sd_id128_get_invocation(sd_id128_t *ret) {
+        static thread_local sd_id128_t saved_invocation_id = {};
+        int r;
+
+        assert_return(ret, -EINVAL);
+
+        if (sd_id128_is_null(saved_invocation_id)) {
+                const char *e;
+
+                e = secure_getenv("INVOCATION_ID");
+                if (!e)
+                        return -ENXIO;
+
+                r = sd_id128_from_string(e, &saved_invocation_id);
+                if (r < 0)
+                        return r;
+        }
+
+        *ret = saved_invocation_id;
+        return 0;
+}
+
 static sd_id128_t make_v4_uuid(sd_id128_t id) {
         /* Stolen from generate_random_uuid() of drivers/char/random.c
          * in the kernel sources */
diff --git a/src/network/networkd-link.h b/src/network/networkd-link.h
index 05b2a2b323..77f72d070e 100644
--- a/src/network/networkd-link.h
+++ b/src/network/networkd-link.h
@@ -187,7 +187,7 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(Link*, link_unref);
 #define log_link_full(link, level, error, ...)                          \
         ({                                                              \
                 const Link *_l = (link);                                \
-                _l ? log_object_internal(level, error, __FILE__, __LINE__, __func__, "INTERFACE=", _l->ifname, ##__VA_ARGS__) : \
+                _l ? log_object_internal(level, error, __FILE__, __LINE__, __func__, "INTERFACE=", _l->ifname, NULL, NULL, ##__VA_ARGS__) : \
                         log_internal(level, error, __FILE__, __LINE__, __func__, ##__VA_ARGS__); \
         })                                                              \
 
diff --git a/src/network/networkd-netdev.h b/src/network/networkd-netdev.h
index 31b55e2791..70ff947b99 100644
--- a/src/network/networkd-netdev.h
+++ b/src/network/networkd-netdev.h
@@ -182,7 +182,7 @@ const struct ConfigPerfItem* network_netdev_gperf_lookup(const char *key, unsign
 #define log_netdev_full(netdev, level, error, ...)                      \
         ({                                                              \
                 const NetDev *_n = (netdev);                            \
-                _n ? log_object_internal(level, error, __FILE__, __LINE__, __func__, "INTERFACE=", _n->ifname, ##__VA_ARGS__) : \
+                _n ? log_object_internal(level, error, __FILE__, __LINE__, __func__, "INTERFACE=", _n->ifname, NULL, NULL, ##__VA_ARGS__) : \
                         log_internal(level, error, __FILE__, __LINE__, __func__, ##__VA_ARGS__); \
         })
 
diff --git a/src/shared/bus-util.c b/src/shared/bus-util.c
index 64fcf9295f..bb90c89cc2 100644
--- a/src/shared/bus-util.c
+++ b/src/shared/bus-util.c
@@ -1338,7 +1338,7 @@ int bus_property_get_id128(
         if (sd_id128_is_null(*id)) /* Add an empty array if the ID is zero */
                 return sd_bus_message_append(reply, "ay", 0);
         else
-                return sd_bus_message_append_array(reply, 'b', id->bytes, 16);
+                return sd_bus_message_append_array(reply, 'y', id->bytes, 16);
 }
 
 #if __SIZEOF_SIZE_T__ != 8
diff --git a/src/systemd/sd-id128.h b/src/systemd/sd-id128.h
index 4dff0b9b81..ee011b1861 100644
--- a/src/systemd/sd-id128.h
+++ b/src/systemd/sd-id128.h
@@ -45,8 +45,8 @@ int sd_id128_from_string(const char *s, sd_id128_t *ret);
 int sd_id128_randomize(sd_id128_t *ret);
 
 int sd_id128_get_machine(sd_id128_t *ret);
-
 int sd_id128_get_boot(sd_id128_t *ret);
+int sd_id128_get_invocation(sd_id128_t *ret);
 
 #define SD_ID128_MAKE(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) \
         ((const sd_id128_t) { .bytes = { 0x##v0, 0x##v1, 0x##v2, 0x##v3, 0x##v4, 0x##v5, 0x##v6, 0x##v7, \
-- 
cgit v1.2.3-54-g00ecf


From 9fc932bff1e2058885fc458412b3c94d586cb586 Mon Sep 17 00:00:00 2001
From: 0xAX <0xAX@users.noreply.github.com>
Date: Mon, 10 Oct 2016 05:55:24 +0300
Subject: tree-wide: print warning in a failure case of make_null_stdio()
 (#4320)

The make_null_stdio() may fail. Let's check its result and print
warning message instead of keeping silence.
---
 src/core/main.c  | 3 ++-
 src/udev/udevd.c | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'src/core')

diff --git a/src/core/main.c b/src/core/main.c
index 6fe440277e..e88362b7fe 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -1532,7 +1532,8 @@ int main(int argc, char *argv[]) {
                  * need to do that for user instances since they never log
                  * into the console. */
                 log_show_color(colors_enabled());
-                make_null_stdio();
+                if (make_null_stdio() < 0)
+                        log_warning_errno(errno, "Failed to redirect standard streams to /dev/null: %m");
         }
 
         /* Initialize default unit */
diff --git a/src/udev/udevd.c b/src/udev/udevd.c
index 19f1c29198..6000d9c7ec 100644
--- a/src/udev/udevd.c
+++ b/src/udev/udevd.c
@@ -1739,7 +1739,9 @@ int main(int argc, char *argv[]) {
 
                 /* connect /dev/null to stdin, stdout, stderr */
                 if (log_get_max_level() < LOG_DEBUG)
-                        (void) make_null_stdio();
+                        if (make_null_stdio() < 0)
+                                log_warning_errno(errno, "Failed to redirect standard streams to /dev/null: %m");
+
 
                 pid = fork();
                 switch (pid) {
-- 
cgit v1.2.3-54-g00ecf


From 10c961b9c9625d4e0408b5f21e2ba815148ccf63 Mon Sep 17 00:00:00 2001
From: 0xAX <0xAX@users.noreply.github.com>
Date: Mon, 10 Oct 2016 05:57:03 +0300
Subject: main: initialize default unit little later (#4321)

systemd fills arg_default_unit during startup with default.target
value. But arg_default_unit may be overwritten in parse_argv() or
parse_proc_cmdline_item().

Let's check value of arg_default_unit after calls of parse_argv()
and parse_proc_cmdline_item() and fill it with default.target if
it wasn't filled before. In this way we will not spend unnecessary
time to for filling arg_default_unit with default.target.
---
 src/core/main.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'src/core')

diff --git a/src/core/main.c b/src/core/main.c
index e88362b7fe..0fad393323 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -1536,14 +1536,6 @@ int main(int argc, char *argv[]) {
                         log_warning_errno(errno, "Failed to redirect standard streams to /dev/null: %m");
         }
 
-        /* Initialize default unit */
-        r = free_and_strdup(&arg_default_unit, SPECIAL_DEFAULT_TARGET);
-        if (r < 0) {
-                log_emergency_errno(r, "Failed to set default unit %s: %m", SPECIAL_DEFAULT_TARGET);
-                error_message = "Failed to set default unit";
-                goto finish;
-        }
-
         r = initialize_join_controllers();
         if (r < 0) {
                 error_message = "Failed to initialize cgroup controllers";
@@ -1591,6 +1583,16 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
+        /* Initialize default unit */
+        if (!arg_default_unit) {
+                r = free_and_strdup(&arg_default_unit, SPECIAL_DEFAULT_TARGET);
+                if (r < 0) {
+                        log_emergency_errno(r, "Failed to set default unit %s: %m", SPECIAL_DEFAULT_TARGET);
+                        error_message = "Failed to set default unit";
+                        goto finish;
+                }
+        }
+
         if (arg_action == ACTION_TEST &&
             geteuid() == 0) {
                 log_error("Don't run test mode as root.");
-- 
cgit v1.2.3-54-g00ecf


From c76cf844d6e59ca5ac5eb3e6c60b3e501815c5b8 Mon Sep 17 00:00:00 2001
From: 0xAX <0xAX@users.noreply.github.com>
Date: Mon, 10 Oct 2016 20:51:33 +0300
Subject: tree-wide: pass return value of make_null_stdio() to warning instead
 of errno (#4328)

as @poettering suggested in the #4320
---
 src/core/main.c  | 5 +++--
 src/udev/udevd.c | 9 ++++++---
 2 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'src/core')

diff --git a/src/core/main.c b/src/core/main.c
index 0fad393323..30d9c43fbb 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -1532,8 +1532,9 @@ int main(int argc, char *argv[]) {
                  * need to do that for user instances since they never log
                  * into the console. */
                 log_show_color(colors_enabled());
-                if (make_null_stdio() < 0)
-                        log_warning_errno(errno, "Failed to redirect standard streams to /dev/null: %m");
+                r = make_null_stdio();
+                if (r < 0)
+                        log_warning_errno(r, "Failed to redirect standard streams to /dev/null: %m");
         }
 
         r = initialize_join_controllers();
diff --git a/src/udev/udevd.c b/src/udev/udevd.c
index 6000d9c7ec..535d317c27 100644
--- a/src/udev/udevd.c
+++ b/src/udev/udevd.c
@@ -1738,9 +1738,12 @@ int main(int argc, char *argv[]) {
                 log_info("starting version " VERSION);
 
                 /* connect /dev/null to stdin, stdout, stderr */
-                if (log_get_max_level() < LOG_DEBUG)
-                        if (make_null_stdio() < 0)
-                                log_warning_errno(errno, "Failed to redirect standard streams to /dev/null: %m");
+                if (log_get_max_level() < LOG_DEBUG) {
+                        r = make_null_stdio();
+                        if (r < 0)
+                                log_warning_errno(r, "Failed to redirect standard streams to /dev/null: %m");
+                }
+
 
 
                 pid = fork();
-- 
cgit v1.2.3-54-g00ecf


From 41e2036eb83204df95a1c3e829bcfd78ee17aaa3 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 10 Oct 2016 21:48:08 +0200
Subject: exit-status: kill is_clean_exit_lsb(), move logic to sysv-generator

Let's get rid of is_clean_exit_lsb(), let's move the logic for the special
handling of the two LSB exit codes into the sysv-generator by writing out
appropriate SuccessExitStatus= lines if the LSB header exists. This is not only
semantically more correct, bug also fixes a bug as the code in service.c that
chose between is_clean_exit_lsb() and is_clean_exit() based this check on
whether a native unit files was available for the unit. However, that check was
bogus since a long time, since the SysV generator was introduced and native
SysV script support was removed from PID 1, as in that case a unit file always
existed.
---
 src/basic/exit-status.c             | 10 ----------
 src/basic/exit-status.h             |  1 -
 src/core/service.c                  |  3 +--
 src/systemctl/systemctl.c           |  2 +-
 src/sysv-generator/sysv-generator.c |  8 ++++++++
 5 files changed, 10 insertions(+), 14 deletions(-)

(limited to 'src/core')

diff --git a/src/basic/exit-status.c b/src/basic/exit-status.c
index 81c2c6675c..96d4619da6 100644
--- a/src/basic/exit-status.c
+++ b/src/basic/exit-status.c
@@ -194,16 +194,6 @@ bool is_clean_exit(int code, int status, ExitStatusSet *success_status) {
         return false;
 }
 
-bool is_clean_exit_lsb(int code, int status, ExitStatusSet *success_status) {
-
-        if (is_clean_exit(code, status, success_status))
-                return true;
-
-        return
-                code == CLD_EXITED &&
-                IN_SET(status, EXIT_NOTINSTALLED, EXIT_NOTCONFIGURED);
-}
-
 void exit_status_set_free(ExitStatusSet *x) {
         assert(x);
 
diff --git a/src/basic/exit-status.h b/src/basic/exit-status.h
index 46cc905865..b3baa50cf4 100644
--- a/src/basic/exit-status.h
+++ b/src/basic/exit-status.h
@@ -99,7 +99,6 @@ typedef struct ExitStatusSet {
 const char* exit_status_to_string(int status, ExitStatusLevel level) _const_;
 
 bool is_clean_exit(int code, int status, ExitStatusSet *success_status);
-bool is_clean_exit_lsb(int code, int status, ExitStatusSet *success_status);
 
 void exit_status_set_free(ExitStatusSet *x);
 bool exit_status_set_is_empty(ExitStatusSet *x);
diff --git a/src/core/service.c b/src/core/service.c
index 99a70395fc..fc1d353356 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -2600,8 +2600,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
         assert(s);
         assert(pid >= 0);
 
-        if (UNIT(s)->fragment_path ? is_clean_exit(code, status, &s->success_status) :
-                                     is_clean_exit_lsb(code, status, &s->success_status))
+        if (is_clean_exit(code, status, &s->success_status))
                 f = SERVICE_SUCCESS;
         else if (code == CLD_EXITED)
                 f = SERVICE_FAILURE_EXIT_CODE;
diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c
index bb6002e8ef..9c6a475a39 100644
--- a/src/systemctl/systemctl.c
+++ b/src/systemctl/systemctl.c
@@ -3936,7 +3936,7 @@ static void print_status_info(
                 argv = strv_join(p->argv, " ");
                 printf("  Process: "PID_FMT" %s=%s ", p->pid, p->name, strna(argv));
 
-                good = is_clean_exit_lsb(p->code, p->status, NULL);
+                good = is_clean_exit(p->code, p->status, NULL);
                 if (!good) {
                         on = ansi_highlight_red();
                         off = ansi_normal();
diff --git a/src/sysv-generator/sysv-generator.c b/src/sysv-generator/sysv-generator.c
index 39821687b9..c2c80175a2 100644
--- a/src/sysv-generator/sysv-generator.c
+++ b/src/sysv-generator/sysv-generator.c
@@ -25,6 +25,7 @@
 
 #include "alloc-util.h"
 #include "dirent-util.h"
+#include "exit-status.h"
 #include "fd-util.h"
 #include "fileio.h"
 #include "hashmap.h"
@@ -199,6 +200,13 @@ static int generate_unit_file(SysvStub *s) {
         if (s->pid_file)
                 fprintf(f, "PIDFile=%s\n", s->pid_file);
 
+        /* Consider two special LSB exit codes a clean exit */
+        if (s->has_lsb)
+                fprintf(f,
+                        "SuccessExitStatus=%i %i\n",
+                        EXIT_NOTINSTALLED,
+                        EXIT_NOTCONFIGURED);
+
         fprintf(f,
                 "ExecStart=%s start\n"
                 "ExecStop=%s stop\n",
-- 
cgit v1.2.3-54-g00ecf


From f6dd106c73abb33af5eb0aaeffcddd11724515e6 Mon Sep 17 00:00:00 2001
From: 0xAX <0xAX@users.noreply.github.com>
Date: Mon, 10 Oct 2016 23:11:36 +0300
Subject: main: use strdup instead of free_and_strdup to initialize default
 unit (#4335)

Previously we've used free_and_strdup() to fill arg_default_unit with unit
name, If we didn't pass default unit name through a kernel command line or
command line arguments. But we can use just strdup() instead of
free_and_strdup() for this, because we will start fill arg_default_unit
only if it wasn't set before.
---
 src/core/main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/core')

diff --git a/src/core/main.c b/src/core/main.c
index 30d9c43fbb..9985510161 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -1586,9 +1586,9 @@ int main(int argc, char *argv[]) {
 
         /* Initialize default unit */
         if (!arg_default_unit) {
-                r = free_and_strdup(&arg_default_unit, SPECIAL_DEFAULT_TARGET);
-                if (r < 0) {
-                        log_emergency_errno(r, "Failed to set default unit %s: %m", SPECIAL_DEFAULT_TARGET);
+                arg_default_unit = strdup(SPECIAL_DEFAULT_TARGET);
+                if (!arg_default_unit) {
+                        r = log_oom();
                         error_message = "Failed to set default unit";
                         goto finish;
                 }
-- 
cgit v1.2.3-54-g00ecf


From 38107f5a4ad5ab148054910040d5d548a4c7a805 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 10 Oct 2016 21:56:36 +0200
Subject: core: lower exit status "level" at one place

When we print information about PID 1's crashdump subprocess failing. In this
case we *know* that we do not generate LSB exit codes, as it's basically PID 1
itself that exited there.
---
 src/core/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/core')

diff --git a/src/core/main.c b/src/core/main.c
index 30d9c43fbb..0908993c4a 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -203,7 +203,7 @@ noreturn static void crash(int sig) {
                                               pid, sigchld_code_to_string(status.si_code),
                                               status.si_status,
                                               strna(status.si_code == CLD_EXITED
-                                                    ? exit_status_to_string(status.si_status, EXIT_STATUS_FULL)
+                                                    ? exit_status_to_string(status.si_status, EXIT_STATUS_MINIMAL)
                                                     : signal_to_string(status.si_status)));
                         else
                                 log_emergency("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid);
-- 
cgit v1.2.3-54-g00ecf


From 1f0958f640b87175cd547c1e69084cfe54a22e9d Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 10 Oct 2016 22:07:30 +0200
Subject: core: when determining whether a process exit status is clean,
 consider whether it is a command or a daemon

SIGTERM should be considered a clean exit code for daemons (i.e. long-running
processes, as a daemon without SIGTERM handler may be shut down without issues
via SIGTERM still) while it should not be considered a clean exit code for
commands (i.e. short-running processes).

Let's add two different clean checking modes for this, and use the right one at
the appropriate places.

Fixes: #4275
---
 src/basic/exit-status.c                             | 8 ++++----
 src/basic/exit-status.h                             | 7 ++++++-
 src/core/busname.c                                  | 2 +-
 src/core/mount.c                                    | 2 +-
 src/core/service.c                                  | 2 +-
 src/core/socket.c                                   | 2 +-
 src/core/swap.c                                     | 2 +-
 src/remount-fs/remount-fs.c                         | 2 +-
 src/systemctl/systemctl.c                           | 2 +-
 src/tty-ask-password-agent/tty-ask-password-agent.c | 2 +-
 10 files changed, 18 insertions(+), 13 deletions(-)

(limited to 'src/core')

diff --git a/src/basic/exit-status.c b/src/basic/exit-status.c
index 96d4619da6..59557f8afe 100644
--- a/src/basic/exit-status.c
+++ b/src/basic/exit-status.c
@@ -177,17 +177,17 @@ const char* exit_status_to_string(int status, ExitStatusLevel level) {
         return NULL;
 }
 
-bool is_clean_exit(int code, int status, ExitStatusSet *success_status) {
+bool is_clean_exit(int code, int status, ExitClean clean, ExitStatusSet *success_status) {
 
         if (code == CLD_EXITED)
                 return status == 0 ||
                        (success_status &&
                        set_contains(success_status->status, INT_TO_PTR(status)));
 
-        /* If a daemon does not implement handlers for some of the
-         * signals that's not considered an unclean shutdown */
+        /* If a daemon does not implement handlers for some of the signals that's not considered an unclean shutdown */
         if (code == CLD_KILLED)
-                return IN_SET(status, SIGHUP, SIGINT, SIGTERM, SIGPIPE) ||
+                return
+                        (clean == EXIT_CLEAN_DAEMON && IN_SET(status, SIGHUP, SIGINT, SIGTERM, SIGPIPE)) ||
                         (success_status &&
                          set_contains(success_status->signal, INT_TO_PTR(status)));
 
diff --git a/src/basic/exit-status.h b/src/basic/exit-status.h
index b3baa50cf4..0cfdfd7891 100644
--- a/src/basic/exit-status.h
+++ b/src/basic/exit-status.h
@@ -98,7 +98,12 @@ typedef struct ExitStatusSet {
 
 const char* exit_status_to_string(int status, ExitStatusLevel level) _const_;
 
-bool is_clean_exit(int code, int status, ExitStatusSet *success_status);
+typedef enum ExitClean {
+        EXIT_CLEAN_DAEMON,
+        EXIT_CLEAN_COMMAND,
+} ExitClean;
+
+bool is_clean_exit(int code, int status, ExitClean clean, ExitStatusSet *success_status);
 
 void exit_status_set_free(ExitStatusSet *x);
 bool exit_status_set_is_empty(ExitStatusSet *x);
diff --git a/src/core/busname.c b/src/core/busname.c
index 7952cd31aa..a69e3831f6 100644
--- a/src/core/busname.c
+++ b/src/core/busname.c
@@ -868,7 +868,7 @@ static void busname_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 
         n->control_pid = 0;
 
-        if (is_clean_exit(code, status, NULL))
+        if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
                 f = BUSNAME_SUCCESS;
         else if (code == CLD_EXITED)
                 f = BUSNAME_FAILURE_EXIT_CODE;
diff --git a/src/core/mount.c b/src/core/mount.c
index 04025b83b9..ed542776ac 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -1159,7 +1159,7 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 
         m->control_pid = 0;
 
-        if (is_clean_exit(code, status, NULL))
+        if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
                 f = MOUNT_SUCCESS;
         else if (code == CLD_EXITED)
                 f = MOUNT_FAILURE_EXIT_CODE;
diff --git a/src/core/service.c b/src/core/service.c
index fc1d353356..98edc437a2 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -2600,7 +2600,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
         assert(s);
         assert(pid >= 0);
 
-        if (is_clean_exit(code, status, &s->success_status))
+        if (is_clean_exit(code, status, s->type == SERVICE_ONESHOT ? EXIT_CLEAN_COMMAND : EXIT_CLEAN_DAEMON, &s->success_status))
                 f = SERVICE_SUCCESS;
         else if (code == CLD_EXITED)
                 f = SERVICE_FAILURE_EXIT_CODE;
diff --git a/src/core/socket.c b/src/core/socket.c
index b9032fa5c9..1b4a1b3dc3 100644
--- a/src/core/socket.c
+++ b/src/core/socket.c
@@ -2743,7 +2743,7 @@ static void socket_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 
         s->control_pid = 0;
 
-        if (is_clean_exit(code, status, NULL))
+        if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
                 f = SOCKET_SUCCESS;
         else if (code == CLD_EXITED)
                 f = SOCKET_FAILURE_EXIT_CODE;
diff --git a/src/core/swap.c b/src/core/swap.c
index fb222b6858..fee9e7b0e6 100644
--- a/src/core/swap.c
+++ b/src/core/swap.c
@@ -988,7 +988,7 @@ static void swap_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 
         s->control_pid = 0;
 
-        if (is_clean_exit(code, status, NULL))
+        if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
                 f = SWAP_SUCCESS;
         else if (code == CLD_EXITED)
                 f = SWAP_FAILURE_EXIT_CODE;
diff --git a/src/remount-fs/remount-fs.c b/src/remount-fs/remount-fs.c
index 6468d1eecd..c3bdcaf1da 100644
--- a/src/remount-fs/remount-fs.c
+++ b/src/remount-fs/remount-fs.c
@@ -137,7 +137,7 @@ int main(int argc, char *argv[]) {
 
                 s = hashmap_remove(pids, PID_TO_PTR(si.si_pid));
                 if (s) {
-                        if (!is_clean_exit(si.si_code, si.si_status, NULL)) {
+                        if (!is_clean_exit(si.si_code, si.si_status, EXIT_CLEAN_COMMAND, NULL)) {
                                 if (si.si_code == CLD_EXITED)
                                         log_error(MOUNT_PATH " for %s exited with exit status %i.", s, si.si_status);
                                 else
diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c
index 9c6a475a39..18a8a4f248 100644
--- a/src/systemctl/systemctl.c
+++ b/src/systemctl/systemctl.c
@@ -3936,7 +3936,7 @@ static void print_status_info(
                 argv = strv_join(p->argv, " ");
                 printf("  Process: "PID_FMT" %s=%s ", p->pid, p->name, strna(argv));
 
-                good = is_clean_exit(p->code, p->status, NULL);
+                good = is_clean_exit(p->code, p->status, EXIT_CLEAN_DAEMON, NULL);
                 if (!good) {
                         on = ansi_highlight_red();
                         off = ansi_normal();
diff --git a/src/tty-ask-password-agent/tty-ask-password-agent.c b/src/tty-ask-password-agent/tty-ask-password-agent.c
index 8851af449d..b45490be1a 100644
--- a/src/tty-ask-password-agent/tty-ask-password-agent.c
+++ b/src/tty-ask-password-agent/tty-ask-password-agent.c
@@ -827,7 +827,7 @@ static int ask_on_consoles(int argc, char *argv[]) {
                 break;
         }
 
-        if (!is_clean_exit(status.si_code, status.si_status, NULL))
+        if (!is_clean_exit(status.si_code, status.si_status, EXIT_CLEAN_DAEMON, NULL))
                 log_error("Password agent failed with: %d", status.si_status);
 
         terminate_agents(pids);
-- 
cgit v1.2.3-54-g00ecf


From f2aed3070d2c3bc6f380b711b1bdcebbda1bd1e8 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 10 Oct 2016 22:28:38 +0200
Subject: core: make use of IN_SET() in various places in mount.c

---
 src/core/mount.c | 39 +++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

(limited to 'src/core')

diff --git a/src/core/mount.c b/src/core/mount.c
index ed542776ac..26c33b94ea 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -830,7 +830,7 @@ static void mount_enter_signal(Mount *m, MountState state, MountResult f) {
 fail:
         log_unit_warning_errno(UNIT(m), r, "Failed to kill processes: %m");
 
-        if (state == MOUNT_REMOUNTING_SIGTERM || state == MOUNT_REMOUNTING_SIGKILL)
+        if (IN_SET(state, MOUNT_REMOUNTING_SIGTERM, MOUNT_REMOUNTING_SIGKILL))
                 mount_enter_mounted(m, MOUNT_FAILURE_RESOURCES);
         else
                 mount_enter_dead(m, MOUNT_FAILURE_RESOURCES);
@@ -986,18 +986,19 @@ static int mount_start(Unit *u) {
 
         /* We cannot fulfill this request right now, try again later
          * please! */
-        if (m->state == MOUNT_UNMOUNTING ||
-            m->state == MOUNT_UNMOUNTING_SIGTERM ||
-            m->state == MOUNT_UNMOUNTING_SIGKILL ||
-            m->state == MOUNT_MOUNTING_SIGTERM ||
-            m->state == MOUNT_MOUNTING_SIGKILL)
+        if (IN_SET(m->state,
+                   MOUNT_UNMOUNTING,
+                   MOUNT_UNMOUNTING_SIGTERM,
+                   MOUNT_UNMOUNTING_SIGKILL,
+                   MOUNT_MOUNTING_SIGTERM,
+                   MOUNT_MOUNTING_SIGKILL))
                 return -EAGAIN;
 
         /* Already on it! */
         if (m->state == MOUNT_MOUNTING)
                 return 0;
 
-        assert(m->state == MOUNT_DEAD || m->state == MOUNT_FAILED);
+        assert(IN_SET(m->state, MOUNT_DEAD, MOUNT_FAILED));
 
         r = unit_start_limit_test(u);
         if (r < 0) {
@@ -1019,19 +1020,21 @@ static int mount_stop(Unit *u) {
         assert(m);
 
         /* Already on it */
-        if (m->state == MOUNT_UNMOUNTING ||
-            m->state == MOUNT_UNMOUNTING_SIGKILL ||
-            m->state == MOUNT_UNMOUNTING_SIGTERM ||
-            m->state == MOUNT_MOUNTING_SIGTERM ||
-            m->state == MOUNT_MOUNTING_SIGKILL)
+        if (IN_SET(m->state,
+                   MOUNT_UNMOUNTING,
+                   MOUNT_UNMOUNTING_SIGKILL,
+                   MOUNT_UNMOUNTING_SIGTERM,
+                   MOUNT_MOUNTING_SIGTERM,
+                   MOUNT_MOUNTING_SIGKILL))
                 return 0;
 
-        assert(m->state == MOUNT_MOUNTING ||
-               m->state == MOUNT_MOUNTING_DONE ||
-               m->state == MOUNT_MOUNTED ||
-               m->state == MOUNT_REMOUNTING ||
-               m->state == MOUNT_REMOUNTING_SIGTERM ||
-               m->state == MOUNT_REMOUNTING_SIGKILL);
+        assert(IN_SET(m->state,
+                      MOUNT_MOUNTING,
+                      MOUNT_MOUNTING_DONE,
+                      MOUNT_MOUNTED,
+                      MOUNT_REMOUNTING,
+                      MOUNT_REMOUNTING_SIGTERM,
+                      MOUNT_REMOUNTING_SIGKILL));
 
         mount_enter_unmounting(m);
         return 1;
-- 
cgit v1.2.3-54-g00ecf


From 052364d41f432c5a2357122a41d5f60cabb63a5e Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 10 Oct 2016 22:28:55 +0200
Subject: core: simplify if branches a bit

We do the same thing in two branches, let's merge them. Let's also add an
explanatory comment, while we are at it.
---
 src/core/mount.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'src/core')

diff --git a/src/core/mount.c b/src/core/mount.c
index 26c33b94ea..f5e67b1d78 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -1200,9 +1200,10 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) {
         case MOUNT_MOUNTING_SIGKILL:
         case MOUNT_MOUNTING_SIGTERM:
 
-                if (f == MOUNT_SUCCESS)
-                        mount_enter_mounted(m, f);
-                else if (m->from_proc_self_mountinfo)
+                if (f == MOUNT_SUCCESS || m->from_proc_self_mountinfo)
+                        /* If /bin/mount returned success, or if we see the mount point in /proc/self/mountinfo we are
+                         * happy. If we see the first condition first, we should see the the second condition
+                         * immediately after – or /bin/mount lies to us and is broken. */
                         mount_enter_mounted(m, f);
                 else
                         mount_enter_dead(m, f);
-- 
cgit v1.2.3-54-g00ecf


From e0d2adfde677d91b57dd63f6a3f00f4b86be9a64 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Tue, 11 Oct 2016 20:07:22 +0200
Subject: core: chown() any TTY used for stdin, not just when StandardInput=tty
 is used (#4347)

If stdin is supplied as an fd for transient units (using the
StandardInputFileDescriptor pseudo-property for transient units), then we
should also fix up the TTY ownership, not just when we opened the TTY
ourselves.

This simply drops the explicit is_terminal_input()-based check. Note that
chown_terminal() internally does a much more appropriate isatty()-based check
anyway, hence we can drop this without replacement.

Fixes: #4260
---
 src/core/execute.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index 7079aeed6e..0c983f4953 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -2350,7 +2350,7 @@ static int exec_child(
                                       USER_PROCESS,
                                       username ? "root" : context->user);
 
-        if (context->user && is_terminal_input(context->std_input)) {
+        if (context->user) {
                 r = chown_terminal(STDIN_FILENO, uid);
                 if (r < 0) {
                         *exit_status = EXIT_STDIN;
-- 
cgit v1.2.3-54-g00ecf


From 74e7579c17e08ee548a81dfca3d98251d3d3948c Mon Sep 17 00:00:00 2001
From: 0xAX <0xAX@users.noreply.github.com>
Date: Wed, 12 Oct 2016 00:30:04 +0300
Subject: core/main: get rid from excess check of ACTION_TEST (#4350)

If `--test` command line option was passed, the systemd set skip_setup
to true during bootup. But after this we check again that arg_action is
test or help and opens pager depends on result.

We should skip setup in a case when `--test` is passed, but it is also
safe to set skip_setup in a case of `--help`. So let's remove first
check and move skip_setup = true to the second check.
---
 src/core/main.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'src/core')

diff --git a/src/core/main.c b/src/core/main.c
index 4b82a57b3c..61f3828a36 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -1614,11 +1614,10 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
-        if (arg_action == ACTION_TEST)
-                skip_setup = true;
-
-        if (arg_action == ACTION_TEST || arg_action == ACTION_HELP)
+        if (arg_action == ACTION_TEST || arg_action == ACTION_HELP) {
                 pager_open(arg_no_pager, false);
+                skip_setup = true;
+        }
 
         if (arg_action == ACTION_HELP) {
                 retval = help();
-- 
cgit v1.2.3-54-g00ecf


From 3ccb886283a1a98b549f44b6d33edeecc3768f1f Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Wed, 12 Oct 2016 05:12:11 -0400
Subject: Allow block and char classes in DeviceAllow bus properties (#4353)

Allowed paths are unified betwen the configuration file parses and the bus
property checker. The biggest change is that the bus code now allows "block-"
and "char-" classes. In addition, path_startswith("/dev") was used in the bus
code, and startswith("/dev") was used in the config file code. It seems
reasonable to use path_startswith() which allows a slightly broader class of
strings.

Fixes #3935.
---
 src/basic/path-util.c      | 11 ++++++++---
 src/basic/path-util.h      |  1 +
 src/core/load-fragment.c   |  4 +---
 src/shared/bus-unit-util.c |  2 +-
 4 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'src/core')

diff --git a/src/basic/path-util.c b/src/basic/path-util.c
index c32e961af4..a76963aa9f 100644
--- a/src/basic/path-util.c
+++ b/src/basic/path-util.c
@@ -812,9 +812,14 @@ bool is_device_path(const char *path) {
         /* Returns true on paths that refer to a device, either in
          * sysfs or in /dev */
 
-        return
-                path_startswith(path, "/dev/") ||
-                path_startswith(path, "/sys/");
+        return path_startswith(path, "/dev/") ||
+               path_startswith(path, "/sys/");
+}
+
+bool is_deviceallow_pattern(const char *path) {
+        return path_startswith(path, "/dev/") ||
+               startswith(path, "block-") ||
+               startswith(path, "char-");
 }
 
 int systemd_installation_has_version(const char *root, unsigned minimal_version) {
diff --git a/src/basic/path-util.h b/src/basic/path-util.h
index 78472f0961..66545f52d9 100644
--- a/src/basic/path-util.h
+++ b/src/basic/path-util.h
@@ -125,5 +125,6 @@ char *file_in_same_dir(const char *path, const char *filename);
 bool hidden_or_backup_file(const char *filename) _pure_;
 
 bool is_device_path(const char *path);
+bool is_deviceallow_pattern(const char *path);
 
 int systemd_installation_has_version(const char *root, unsigned minimal_version);
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 8f067b5586..06c156a623 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -3084,9 +3084,7 @@ int config_parse_device_allow(
         if (!path)
                 return log_oom();
 
-        if (!startswith(path, "/dev/") &&
-            !startswith(path, "block-") &&
-            !startswith(path, "char-")) {
+        if (!is_deviceallow_pattern(path)) {
                 log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid device node path '%s'. Ignoring.", path);
                 return 0;
         }
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index c6bd2f145c..a550a370b5 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -303,7 +303,7 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
                                 rwm = "";
                         }
 
-                        if (!path_startswith(path, "/dev")) {
+                        if (!is_deviceallow_pattern(path)) {
                                 log_error("%s is not a device file in /dev.", path);
                                 return -EINVAL;
                         }
-- 
cgit v1.2.3-54-g00ecf


From 502d704e5ed2d288069471f4e3611115cde107d6 Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Wed, 12 Oct 2016 13:31:21 +0200
Subject: core:sandbox: Add ProtectKernelModules= option

This is useful to turn off explicit module load and unload operations on modular
kernels. This option removes CAP_SYS_MODULE from the capability bounding set for
the unit, and installs a system call filter to block module system calls.

This option will not prevent the kernel from loading modules using the module
auto-load feature which is a system wide operation.
---
 man/systemd.exec.xml                  | 17 ++++++++++++
 src/core/dbus-execute.c               |  5 +++-
 src/core/execute.c                    | 52 +++++++++++++++++++++++++++++++++++
 src/core/execute.h                    |  1 +
 src/core/load-fragment-gperf.gperf.m4 |  1 +
 src/core/unit.c                       |  3 ++
 src/shared/bus-unit-util.c            |  3 +-
 7 files changed, 80 insertions(+), 2 deletions(-)

(limited to 'src/core')

diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 986985ad35..3bea4976b3 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -1404,6 +1404,23 @@
         logging. This does not affect commands prefixed with <literal>+</literal>.</para></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>ProtectKernelModules=</varname></term>
+
+        <listitem><para>Takes a boolean argument. If true, explicit module loading will
+        be denied. This allows to turn off module load and unload operations on modular
+        kernels. It is recomended to turn this on for most services that do not need special
+        file systems or extra kernel modules to work. Default to off. Enabling this option
+        removes <constant>CAP_SYS_MODULE</constant> from the capability bounding set for
+        the unit, and installs a system call filter to block module system calls.
+        Note that limited automatic module loading due to user configuration or kernel
+        mapping tables might still happen as side effect of requested user operations,
+        both privileged and unprivileged. To disable module auto-load feature please see
+        <citerefentry><refentrytitle>sysctl.d</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+        <constant>kernel.modules_disabled</constant> mechanism and
+        <filename>/proc/sys/kernel/modules_disabled</filename> documentation.</para></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><varname>Personality=</varname></term>
 
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index eec4500c8c..b8720d7d3d 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -708,6 +708,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("ProtectKernelTunables", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_tunables), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("ProtectKernelModules", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_modules), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -1075,7 +1076,7 @@ int bus_exec_context_set_transient_property(
                               "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
                               "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute",
                               "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
-                              "ProtectControlGroups")) {
+                              "ProtectKernelModules", "ProtectControlGroups")) {
                 int b;
 
                 r = sd_bus_message_read(message, "b", &b);
@@ -1111,6 +1112,8 @@ int bus_exec_context_set_transient_property(
                                 c->remove_ipc = b;
                         else if (streq(name, "ProtectKernelTunables"))
                                 c->protect_kernel_tunables = b;
+                        else if (streq(name, "ProtectKernelModules"))
+                                c->protect_kernel_modules = b;
                         else if (streq(name, "ProtectControlGroups"))
                                 c->protect_control_groups = b;
 
diff --git a/src/core/execute.c b/src/core/execute.c
index 0c983f4953..7a278b7d31 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1436,6 +1436,50 @@ finish:
         return r;
 }
 
+static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) {
+        static const int module_syscalls[] = {
+                SCMP_SYS(delete_module),
+                SCMP_SYS(finit_module),
+                SCMP_SYS(init_module),
+        };
+
+        scmp_filter_ctx *seccomp;
+        unsigned i;
+        int r;
+
+        assert(c);
+
+        /* Turn of module syscalls on ProtectKernelModules=yes */
+
+        if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
+                return 0;
+
+        seccomp = seccomp_init(SCMP_ACT_ALLOW);
+        if (!seccomp)
+                return -ENOMEM;
+
+        r = seccomp_add_secondary_archs(seccomp);
+        if (r < 0)
+                goto finish;
+
+        for (i = 0; i < ELEMENTSOF(module_syscalls); i++) {
+                r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM),
+                                     module_syscalls[i], 0);
+                if (r < 0)
+                        goto finish;
+        }
+
+        r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+        if (r < 0)
+                goto finish;
+
+        r = seccomp_load(seccomp);
+
+finish:
+        seccomp_release(seccomp);
+        return r;
+}
+
 static int apply_private_devices(Unit *u, const ExecContext *c) {
         const SystemCallFilterSet *set;
         scmp_filter_ctx *seccomp;
@@ -2690,6 +2734,14 @@ static int exec_child(
                         }
                 }
 
+                if (context->protect_kernel_modules) {
+                        r = apply_protect_kernel_modules(unit, context);
+                        if (r < 0) {
+                                *exit_status = EXIT_SECCOMP;
+                                return r;
+                        }
+                }
+
                 if (context->private_devices) {
                         r = apply_private_devices(unit, context);
                         if (r < 0) {
diff --git a/src/core/execute.h b/src/core/execute.h
index 449180c903..1de439c3ad 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -175,6 +175,7 @@ struct ExecContext {
         ProtectSystem protect_system;
         ProtectHome protect_home;
         bool protect_kernel_tunables;
+        bool protect_kernel_modules;
         bool protect_control_groups;
 
         bool no_new_privileges;
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index c49c1d6732..a700d853cc 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -90,6 +90,7 @@ $1.InaccessiblePaths,            config_parse_namespace_path_strv,   0,
 $1.PrivateTmp,                   config_parse_bool,                  0,                             offsetof($1, exec_context.private_tmp)
 $1.PrivateDevices,               config_parse_bool,                  0,                             offsetof($1, exec_context.private_devices)
 $1.ProtectKernelTunables,        config_parse_bool,                  0,                             offsetof($1, exec_context.protect_kernel_tunables)
+$1.ProtectKernelModules,         config_parse_bool,                  0,                             offsetof($1, exec_context.protect_kernel_modules)
 $1.ProtectControlGroups,         config_parse_bool,                  0,                             offsetof($1, exec_context.protect_control_groups)
 $1.PrivateNetwork,               config_parse_bool,                  0,                             offsetof($1, exec_context.private_network)
 $1.PrivateUsers,                 config_parse_bool,                  0,                             offsetof($1, exec_context.private_users)
diff --git a/src/core/unit.c b/src/core/unit.c
index 690f7f7dd9..71f95c0b96 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -3401,6 +3401,9 @@ int unit_patch_contexts(Unit *u) {
                 if (ec->private_devices)
                         ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_MKNOD);
 
+                if (ec->protect_kernel_modules)
+                        ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_SYS_MODULE);
+
                 if (ec->dynamic_user) {
                         if (!ec->user) {
                                 r = user_from_unit_name(u, &ec->user);
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index a550a370b5..f639e0e832 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -204,7 +204,8 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
                               "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "RemainAfterExit",
                               "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", "NoNewPrivileges",
                               "SyslogLevelPrefix", "Delegate", "RemainAfterElapse", "MemoryDenyWriteExecute",
-                              "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables", "ProtectControlGroups")) {
+                              "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
+                              "ProtectKernelModules", "ProtectControlGroups")) {
 
                 r = parse_boolean(eq);
                 if (r < 0)
-- 
cgit v1.2.3-54-g00ecf


From 2cd0a735470894bd2d25147442285744764633a1 Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Fri, 7 Oct 2016 20:38:05 +0200
Subject: core:sandbox: remove CAP_SYS_RAWIO on PrivateDevices=yes

The rawio system calls were filtered, but CAP_SYS_RAWIO allows to access raw
data through /proc, ioctl and some other exotic system calls...
---
 man/systemd.exec.xml | 4 ++--
 src/core/unit.c      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/core')

diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 3bea4976b3..c46c0f6dd8 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -946,8 +946,8 @@
         <filename>/dev/port</filename> and others. This is useful to securely turn off physical device access by the
         executed process. Defaults to false. Enabling this option will install a system call filter to block low-level
         I/O system calls that are grouped in the <varname>@raw-io</varname> set, will also remove
-        <constant>CAP_MKNOD</constant> from the capability bounding set for the unit (see above), and set
-        <varname>DevicePolicy=closed</varname> (see
+        <constant>CAP_MKNOD</constant> and <constant>CAP_SYS_RAWIO</constant> from the capability bounding set for
+        the unit (see above), and set <varname>DevicePolicy=closed</varname> (see
         <citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
         for details). Note that using this setting will disconnect propagation of mounts from the service to the host
         (propagation in the opposite direction continues to work).  This means that this setting may not be used for
diff --git a/src/core/unit.c b/src/core/unit.c
index 71f95c0b96..67668bdc48 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -3399,7 +3399,7 @@ int unit_patch_contexts(Unit *u) {
                         ec->no_new_privileges = true;
 
                 if (ec->private_devices)
-                        ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_MKNOD);
+                        ec->capability_bounding_set &= ~((UINT64_C(1) << CAP_MKNOD) | (UINT64_C(1) << CAP_SYS_RAWIO));
 
                 if (ec->protect_kernel_modules)
                         ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_SYS_MODULE);
-- 
cgit v1.2.3-54-g00ecf


From c575770b75b6cd15684fbacd249147bf5fd6ead7 Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Wed, 12 Oct 2016 14:11:16 +0200
Subject: core:sandbox: lets make /lib/modules/ inaccessible on
 ProtectKernelModules=

Lets go further and make /lib/modules/ inaccessible for services that do
not have business with modules, this is a minor improvment but it may
help on setups with custom modules and they are limited... in regard of
kernel auto-load feature.

This change introduce NameSpaceInfo struct which we may embed later
inside ExecContext but for now lets just reduce the argument number to
setup_namespace() and merge ProtectKernelModules feature.
---
 man/systemd.exec.xml |  5 ++++-
 src/core/execute.c   | 11 ++++++++---
 src/core/namespace.c | 54 +++++++++++++++++++++++++++++++++++-----------------
 src/core/namespace.h | 14 +++++++++++---
 src/test/test-ns.c   | 12 +++++++++---
 5 files changed, 69 insertions(+), 27 deletions(-)

(limited to 'src/core')

diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 4a68695348..249fcb0363 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -1415,7 +1415,10 @@
         kernels. It is recomended to turn this on for most services that do not need special
         file systems or extra kernel modules to work. Default to off. Enabling this option
         removes <constant>CAP_SYS_MODULE</constant> from the capability bounding set for
-        the unit, and installs a system call filter to block module system calls.
+        the unit, and installs a system call filter to block module system calls,
+        also <filename>/usr/lib/modules</filename> is made inaccessible. For this
+        setting the same restrictions regarding mount propagation and privileges
+        apply as for <varname>ReadOnlyPaths=</varname> and related calls, see above.
         Note that limited automatic module loading due to user configuration or kernel
         mapping tables might still happen as side effect of requested user operations,
         both privileged and unprivileged. To disable module auto-load feature please see
diff --git a/src/core/execute.c b/src/core/execute.c
index 7a278b7d31..dc078d96f0 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1766,6 +1766,7 @@ static bool exec_needs_mount_namespace(
             context->protect_system != PROTECT_SYSTEM_NO ||
             context->protect_home != PROTECT_HOME_NO ||
             context->protect_kernel_tunables ||
+            context->protect_kernel_modules ||
             context->protect_control_groups)
                 return true;
 
@@ -2493,6 +2494,12 @@ static int exec_child(
         if (needs_mount_namespace) {
                 _cleanup_free_ char **rw = NULL;
                 char *tmp = NULL, *var = NULL;
+                NameSpaceInfo ns_info = {
+                        .private_dev = context->private_devices,
+                        .protect_control_groups = context->protect_control_groups,
+                        .protect_kernel_tunables = context->protect_kernel_tunables,
+                        .protect_kernel_modules = context->protect_kernel_modules,
+                };
 
                 /* The runtime struct only contains the parent
                  * of the private /tmp, which is
@@ -2515,14 +2522,12 @@ static int exec_child(
 
                 r = setup_namespace(
                                 (params->flags & EXEC_APPLY_CHROOT) ? context->root_directory : NULL,
+                                &ns_info,
                                 rw,
                                 context->read_only_paths,
                                 context->inaccessible_paths,
                                 tmp,
                                 var,
-                                context->private_devices,
-                                context->protect_kernel_tunables,
-                                context->protect_control_groups,
                                 context->protect_home,
                                 context->protect_system,
                                 context->mount_flags);
diff --git a/src/core/namespace.c b/src/core/namespace.c
index 43a2f4ba6e..1195e9a854 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -97,6 +97,14 @@ static const TargetMount protect_kernel_tunables_table[] = {
         { "/sys/fs/cgroup",             READWRITE,      false }, /* READONLY is set by ProtectControlGroups= option */
 };
 
+/* ProtectKernelModules= option */
+static const TargetMount protect_kernel_modules_table[] = {
+#ifdef HAVE_SPLIT_USR
+        { "/lib/modules",               INACCESSIBLE,   true  },
+#endif
+        { "/usr/lib/modules",           INACCESSIBLE,   true  },
+};
+
 /*
  * ProtectHome=read-only table, protect $HOME and $XDG_RUNTIME_DIR and rest of
  * system should be protected by ProtectSystem=
@@ -207,6 +215,13 @@ static int append_protect_kernel_tunables(BindMount **p, const char *root_direct
                                     ELEMENTSOF(protect_kernel_tunables_table));
 }
 
+static int append_protect_kernel_modules(BindMount **p, const char *root_directory) {
+        assert(p);
+
+        return append_target_mounts(p, root_directory, protect_kernel_modules_table,
+                                    ELEMENTSOF(protect_kernel_modules_table));
+}
+
 static int append_protect_home(BindMount **p, const char *root_directory, ProtectHome protect_home) {
         int r = 0;
 
@@ -660,14 +675,12 @@ static int chase_all_symlinks(const char *root_directory, BindMount *m, unsigned
 }
 
 static unsigned namespace_calculate_mounts(
+                const NameSpaceInfo *ns_info,
                 char** read_write_paths,
                 char** read_only_paths,
                 char** inaccessible_paths,
                 const char* tmp_dir,
                 const char* var_tmp_dir,
-                bool private_dev,
-                bool protect_sysctl,
-                bool protect_cgroups,
                 ProtectHome protect_home,
                 ProtectSystem protect_system) {
 
@@ -690,22 +703,21 @@ static unsigned namespace_calculate_mounts(
                 strv_length(read_write_paths) +
                 strv_length(read_only_paths) +
                 strv_length(inaccessible_paths) +
-                private_dev +
-                (protect_sysctl ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +
-                (protect_cgroups ? 1 : 0) +
+                ns_info->private_dev +
+                (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +
+                (ns_info->protect_control_groups ? 1 : 0) +
+                (ns_info->protect_kernel_modules ? ELEMENTSOF(protect_kernel_modules_table) : 0) +
                 protect_home_cnt + protect_system_cnt;
 }
 
 int setup_namespace(
                 const char* root_directory,
+                const NameSpaceInfo *ns_info,
                 char** read_write_paths,
                 char** read_only_paths,
                 char** inaccessible_paths,
                 const char* tmp_dir,
                 const char* var_tmp_dir,
-                bool private_dev,
-                bool protect_sysctl,
-                bool protect_cgroups,
                 ProtectHome protect_home,
                 ProtectSystem protect_system,
                 unsigned long mount_flags) {
@@ -718,13 +730,12 @@ int setup_namespace(
         if (mount_flags == 0)
                 mount_flags = MS_SHARED;
 
-        n = namespace_calculate_mounts(read_write_paths,
+        n = namespace_calculate_mounts(ns_info,
+                                       read_write_paths,
                                        read_only_paths,
                                        inaccessible_paths,
                                        tmp_dir, var_tmp_dir,
-                                       private_dev, protect_sysctl,
-                                       protect_cgroups, protect_home,
-                                       protect_system);
+                                       protect_home, protect_system);
 
         /* Set mount slave mode */
         if (root_directory || n > 0)
@@ -756,16 +767,25 @@ int setup_namespace(
                         m++;
                 }
 
-                if (private_dev) {
+                if (ns_info->private_dev) {
                         m->path = prefix_roota(root_directory, "/dev");
                         m->mode = PRIVATE_DEV;
                         m++;
                 }
 
-                if (protect_sysctl)
-                        append_protect_kernel_tunables(&m, root_directory);
+                if (ns_info->protect_kernel_tunables) {
+                        r = append_protect_kernel_tunables(&m, root_directory);
+                        if (r < 0)
+                                return r;
+                }
+
+                if (ns_info->protect_kernel_modules) {
+                        r = append_protect_kernel_modules(&m, root_directory);
+                        if (r < 0)
+                                return r;
+                }
 
-                if (protect_cgroups) {
+                if (ns_info->protect_control_groups) {
                         m->path = prefix_roota(root_directory, "/sys/fs/cgroup");
                         m->mode = READONLY;
                         m++;
diff --git a/src/core/namespace.h b/src/core/namespace.h
index 6505bcc499..6310638e9a 100644
--- a/src/core/namespace.h
+++ b/src/core/namespace.h
@@ -4,6 +4,7 @@
   This file is part of systemd.
 
   Copyright 2010 Lennart Poettering
+  Copyright 2016 Djalal Harouni
 
   systemd is free software; you can redistribute it and/or modify it
   under the terms of the GNU Lesser General Public License as published by
@@ -19,6 +20,8 @@
   along with systemd; If not, see <http://www.gnu.org/licenses/>.
 ***/
 
+typedef struct NameSpaceInfo NameSpaceInfo;
+
 #include <stdbool.h>
 
 #include "macro.h"
@@ -40,15 +43,20 @@ typedef enum ProtectSystem {
         _PROTECT_SYSTEM_INVALID = -1
 } ProtectSystem;
 
+struct NameSpaceInfo {
+        bool private_dev:1;
+        bool protect_control_groups:1;
+        bool protect_kernel_tunables:1;
+        bool protect_kernel_modules:1;
+};
+
 int setup_namespace(const char *chroot,
+                    const NameSpaceInfo *ns_info,
                     char **read_write_paths,
                     char **read_only_paths,
                     char **inaccessible_paths,
                     const char *tmp_dir,
                     const char *var_tmp_dir,
-                    bool private_dev,
-                    bool protect_sysctl,
-                    bool protect_cgroups,
                     ProtectHome protect_home,
                     ProtectSystem protect_system,
                     unsigned long mount_flags);
diff --git a/src/test/test-ns.c b/src/test/test-ns.c
index c4d4da6d05..da7a8b0565 100644
--- a/src/test/test-ns.c
+++ b/src/test/test-ns.c
@@ -45,6 +45,14 @@ int main(int argc, char *argv[]) {
                 "/home/lennart/projects",
                 NULL
         };
+
+        static const NameSpaceInfo ns_info = {
+                .private_dev = true,
+                .protect_control_groups = true,
+                .protect_kernel_tunables = true,
+                .protect_kernel_modules = true,
+        };
+
         char *root_directory;
         char *projects_directory;
         int r;
@@ -69,14 +77,12 @@ int main(int argc, char *argv[]) {
                 log_info("Not chrooted");
 
         r = setup_namespace(root_directory,
+                            &ns_info,
                             (char **) writable,
                             (char **) readonly,
                             (char **) inaccessible,
                             tmp_dir,
                             var_tmp_dir,
-                            true,
-                            true,
-                            true,
                             PROTECT_HOME_NO,
                             PROTECT_SYSTEM_NO,
                             0);
-- 
cgit v1.2.3-54-g00ecf


From 4084e8fc8947566092fd4ee5a07405570fdbf84d Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Sun, 9 Oct 2016 12:28:25 +0200
Subject: core: check protect_kernel_modules and private_devices in order to
 setup NNP

---
 src/core/execute.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index dc078d96f0..71439bc3c2 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -2115,6 +2115,8 @@ static bool context_has_no_new_privileges(const ExecContext *c) {
                 c->memory_deny_write_execute ||
                 c->restrict_realtime ||
                 c->protect_kernel_tunables ||
+                c->protect_kernel_modules ||
+                c->private_devices ||
                 context_has_syscall_filters(c);
 }
 
-- 
cgit v1.2.3-54-g00ecf


From e66a2f658b182b1fe8e4bc46b384b9967abd2bf2 Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Sun, 9 Oct 2016 12:31:51 +0200
Subject: core: make sure to dump ProtectKernelModules= value

---
 src/core/execute.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index 71439bc3c2..869522704a 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -3190,6 +3190,7 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
                 "%sPrivateTmp: %s\n"
                 "%sPrivateDevices: %s\n"
                 "%sProtectKernelTunables: %s\n"
+                "%sProtectKernelModules: %s\n"
                 "%sProtectControlGroups: %s\n"
                 "%sPrivateNetwork: %s\n"
                 "%sPrivateUsers: %s\n"
@@ -3205,6 +3206,7 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
                 prefix, yes_no(c->private_tmp),
                 prefix, yes_no(c->private_devices),
                 prefix, yes_no(c->protect_kernel_tunables),
+                prefix, yes_no(c->protect_kernel_modules),
                 prefix, yes_no(c->protect_control_groups),
                 prefix, yes_no(c->private_network),
                 prefix, yes_no(c->private_users),
-- 
cgit v1.2.3-54-g00ecf


From 7d862ab8c25d5c20eae6ed28c10c00c525c64a1f Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@fb.com>
Date: Fri, 14 Oct 2016 21:07:16 -0400
Subject: core: make settings for unified cgroup hierarchy supersede the ones
 for legacy hierarchy (#4269)

There are overlapping control group resource settings for the unified and
legacy hierarchies.  To help transition, the settings are translated back and
forth.  When both versions of a given setting are present, the one matching the
cgroup hierarchy type in use is used.  Unfortunately, this is more confusing to
use and document than necessary because there is no clear static precedence.

Update the translation logic so that the settings for the unified hierarchy are
always preferred.  systemd.resource-control man page is updated to reflect the
change and reorganized so that the deprecated settings are at the end in its
own section.
---
 man/systemd.resource-control.xml | 325 ++++++++++++++++++++-------------------
 src/core/cgroup.c                |  60 ++++----
 2 files changed, 195 insertions(+), 190 deletions(-)

(limited to 'src/core')

diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml
index c11f420fe5..10aefbe0c5 100644
--- a/man/systemd.resource-control.xml
+++ b/man/systemd.resource-control.xml
@@ -140,10 +140,10 @@
       </variablelist>
     </para>
 
-    <para>To ease the transition, there is best-effort translation between the two versions of settings. If all
-    settings of a unit for a given resource type are for the other hierarchy type, the settings are translated and
-    applied. If there are any valid settings for the hierarchy in use, all translations are disabled for the resource
-    type. Mixing the two types of settings on a unit can lead to confusing results.</para>
+    <para>To ease the transition, there is best-effort translation between the two versions of settings. For each
+    controller, if any of the settings for the unified hierarchy are present, all settings for the legacy hierarchy are
+    ignored. If the resulting settings are for the other type of hierarchy, the configurations are translated before
+    application.</para>
 
     <para>Legacy control group hierarchy (see <ulink
     url="https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt">cgroups.txt</ulink>), also called cgroup-v1,
@@ -196,30 +196,7 @@
 
           <para>Implies <literal>CPUAccounting=true</literal>.</para>
 
-          <para>These settings are supported only if the unified control group hierarchy is used.</para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><varname>CPUShares=<replaceable>weight</replaceable></varname></term>
-        <term><varname>StartupCPUShares=<replaceable>weight</replaceable></varname></term>
-
-        <listitem>
-          <para>Assign the specified CPU time share weight to the processes executed. These options take an integer
-          value and control the <literal>cpu.shares</literal> control group attribute. The allowed range is 2 to
-          262144. Defaults to 1024. For details about this control group attribute, see <ulink
-          url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.
-          The available CPU time is split up among all units within one slice relative to their CPU time share
-          weight.</para>
-
-          <para>While <varname>StartupCPUShares=</varname> only applies to the startup phase of the system,
-          <varname>CPUShares=</varname> applies to normal runtime of the system, and if the former is not set also to
-          the startup phase. Using <varname>StartupCPUShares=</varname> allows prioritizing specific services at
-          boot-up differently than during normal runtime.</para>
-
-          <para>Implies <literal>CPUAccounting=true</literal>.</para>
-
-          <para>These settings are supported only if the legacy control group hierarchy is used.</para>
+          <para>These settings replace <varname>CPUShares=</varname> and <varname>StartupCPUShares=</varname>.</para>
         </listitem>
       </varlistentry>
 
@@ -239,8 +216,6 @@
           20% CPU time on one CPU.</para>
 
           <para>Implies <literal>CPUAccounting=true</literal>.</para>
-
-          <para>This setting is supported on both unified and legacy control group hierarchies.</para>
         </listitem>
       </varlistentry>
 
@@ -276,7 +251,8 @@
 
           <para>Implies <literal>MemoryAccounting=true</literal>.</para>
 
-          <para>This setting is supported only if the unified control group hierarchy is used.</para>
+          <para>This setting is supported only if the unified control group hierarchy is used and disables
+          <varname>MemoryLimit=</varname>.</para>
         </listitem>
       </varlistentry>
 
@@ -298,7 +274,8 @@
 
           <para>Implies <literal>MemoryAccounting=true</literal>.</para>
 
-          <para>This setting is supported only if the unified control group hierarchy is used.</para>
+          <para>This setting is supported only if the unified control group hierarchy is used and disables
+          <varname>MemoryLimit=</varname>.</para>
         </listitem>
       </varlistentry>
 
@@ -320,8 +297,7 @@
 
           <para>Implies <literal>MemoryAccounting=true</literal>.</para>
 
-          <para>This setting is supported only if the unified control group hierarchy is used. Use
-          <varname>MemoryLimit=</varname> on systems using the legacy control group hierarchy.</para>
+          <para>This setting replaces <varname>MemoryLimit=</varname>.</para>
         </listitem>
       </varlistentry>
 
@@ -339,28 +315,8 @@
 
           <para>Implies <literal>MemoryAccounting=true</literal>.</para>
 
-          <para>This setting is supported only if the unified control group hierarchy is used.</para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><varname>MemoryLimit=<replaceable>bytes</replaceable></varname></term>
-
-        <listitem>
-          <para>Specify the limit on maximum memory usage of the executed processes. The limit specifies how much
-          process and kernel memory can be used by tasks in this unit. Takes a memory size in bytes. If the value is
-          suffixed with K, M, G or T, the specified memory size is parsed as Kilobytes, Megabytes, Gigabytes, or
-          Terabytes (with the base 1024), respectively. Alternatively, a percentage value may be specified, which is
-          taken relative to the installed physical memory on the system. If assigned the special value
-          <literal>infinity</literal>, no memory limit is applied. This controls the
-          <literal>memory.limit_in_bytes</literal> control group attribute. For details about this control group
-          attribute, see <ulink
-          url="https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt">memory.txt</ulink>.</para>
-
-          <para>Implies <literal>MemoryAccounting=true</literal>.</para>
-
-          <para>This setting is supported only if the legacy control group hierarchy is used. Use
-          <varname>MemoryMax=</varname> on systems using the unified control group hierarchy.</para>
+          <para>This setting is supported only if the unified control group hierarchy is used and disables
+          <varname>MemoryLimit=</varname>.</para>
         </listitem>
       </varlistentry>
 
@@ -412,8 +368,8 @@
           in
           <citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
 
-          <para>This setting is supported only if the unified control group hierarchy is used. Use
-          <varname>BlockIOAccounting=</varname> on systems using the legacy control group hierarchy.</para>
+          <para>This setting replaces <varname>BlockIOAccounting=</varname> and disables settings prefixed with
+          <varname>BlockIO</varname> or <varname>StartupBlockIO</varname>.</para>
         </listitem>
       </varlistentry>
 
@@ -438,9 +394,8 @@
 
           <para>Implies <literal>IOAccounting=true</literal>.</para>
 
-          <para>This setting is supported only if the unified control group hierarchy is used. Use
-          <varname>BlockIOWeight=</varname> and <varname>StartupBlockIOWeight=</varname> on systems using the legacy
-          control group hierarchy.</para>
+          <para>These settings replace <varname>BlockIOWeight=</varname> and <varname>StartupBlockIOWeight=</varname>
+          and disable settings prefixed with <varname>BlockIO</varname> or <varname>StartupBlockIO</varname>.</para>
         </listitem>
       </varlistentry>
 
@@ -459,8 +414,8 @@
 
           <para>Implies <literal>IOAccounting=true</literal>.</para>
 
-          <para>This setting is supported only if the unified control group hierarchy is used. Use
-          <varname>BlockIODeviceWeight=</varname> on systems using the legacy control group hierarchy.</para>
+          <para>This setting replaces <varname>BlockIODeviceWeight=</varname> and disables settings prefixed with
+          <varname>BlockIO</varname> or <varname>StartupBlockIO</varname>.</para>
         </listitem>
       </varlistentry>
 
@@ -484,8 +439,9 @@
 
           <para>Implies <literal>IOAccounting=true</literal>.</para>
 
-          <para>This setting is supported only if the unified control group hierarchy is used. Use
-          <varname>BlockIOAccounting=</varname> on systems using the legacy control group hierarchy.</para>
+          <para>These settings replace <varname>BlockIOReadBandwidth=</varname> and
+          <varname>BlockIOWriteBandwidth=</varname> and disable settings prefixed with <varname>BlockIO</varname> or
+          <varname>StartupBlockIO</varname>.</para>
         </listitem>
       </varlistentry>
 
@@ -509,100 +465,8 @@
 
           <para>Implies <literal>IOAccounting=true</literal>.</para>
 
-          <para>This setting is supported only if the unified control group hierarchy is used.</para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><varname>BlockIOAccounting=</varname></term>
-
-        <listitem>
-          <para>Turn on Block I/O accounting for this unit, if the legacy control group hierarchy is used on the
-          system. Takes a boolean argument. Note that turning on block I/O accounting for one unit will also implicitly
-          turn it on for all units contained in the same slice and all for its parent slices and the units contained
-          therein. The system default for this setting may be controlled with
-          <varname>DefaultBlockIOAccounting=</varname> in
-          <citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
-
-          <para>This setting is supported only if the legacy control group hierarchy is used. Use
-          <varname>IOAccounting=</varname> on systems using the unified control group hierarchy.</para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><varname>BlockIOWeight=<replaceable>weight</replaceable></varname></term>
-        <term><varname>StartupBlockIOWeight=<replaceable>weight</replaceable></varname></term>
-
-        <listitem><para>Set the default overall block I/O weight for the executed processes, if the legacy control
-        group hierarchy is used on the system. Takes a single weight value (between 10 and 1000) to set the default
-        block I/O weight. This controls the <literal>blkio.weight</literal> control group attribute, which defaults to
-        500. For details about this control group attribute, see <ulink
-        url="https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt">blkio-controller.txt</ulink>.
-        The available I/O bandwidth is split up among all units within one slice relative to their block I/O
-        weight.</para>
-
-        <para>While <varname>StartupBlockIOWeight=</varname> only
-        applies to the startup phase of the system,
-        <varname>BlockIOWeight=</varname> applies to the later runtime
-        of the system, and if the former is not set also to the
-        startup phase. This allows prioritizing specific services at
-        boot-up differently than during runtime.</para>
-
-        <para>Implies
-        <literal>BlockIOAccounting=true</literal>.</para>
-
-        <para>This setting is supported only if the legacy control group hierarchy is used. Use
-        <varname>IOWeight=</varname> and <varname>StartupIOWeight=</varname> on systems using the unified control group
-        hierarchy.</para>
-
-      </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><varname>BlockIODeviceWeight=<replaceable>device</replaceable> <replaceable>weight</replaceable></varname></term>
-
-        <listitem>
-          <para>Set the per-device overall block I/O weight for the executed processes, if the legacy control group
-          hierarchy is used on the system. Takes a space-separated pair of a file path and a weight value to specify
-          the device specific weight value, between 10 and 1000. (Example: "/dev/sda 500"). The file path may be
-          specified as path to a block device node or as any other file, in which case the backing block device of the
-          file system of the file is determined. This controls the <literal>blkio.weight_device</literal> control group
-          attribute, which defaults to 1000. Use this option multiple times to set weights for multiple devices. For
-          details about this control group attribute, see <ulink
-          url="https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt">blkio-controller.txt</ulink>.</para>
-
-          <para>Implies
-          <literal>BlockIOAccounting=true</literal>.</para>
-
-          <para>This setting is supported only if the legacy control group hierarchy is used. Use
-          <varname>IODeviceWeight=</varname> on systems using the unified control group hierarchy.</para>
-        </listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term><varname>BlockIOReadBandwidth=<replaceable>device</replaceable> <replaceable>bytes</replaceable></varname></term>
-        <term><varname>BlockIOWriteBandwidth=<replaceable>device</replaceable> <replaceable>bytes</replaceable></varname></term>
-
-        <listitem>
-          <para>Set the per-device overall block I/O bandwidth limit for the executed processes, if the legacy control
-          group hierarchy is used on the system. Takes a space-separated pair of a file path and a bandwidth value (in
-          bytes per second) to specify the device specific bandwidth. The file path may be a path to a block device
-          node, or as any other file in which case the backing block device of the file system of the file is used. If
-          the bandwidth is suffixed with K, M, G, or T, the specified bandwidth is parsed as Kilobytes, Megabytes,
-          Gigabytes, or Terabytes, respectively, to the base of 1000. (Example:
-          "/dev/disk/by-path/pci-0000:00:1f.2-scsi-0:0:0:0 5M"). This controls the
-          <literal>blkio.throttle.read_bps_device</literal> and <literal>blkio.throttle.write_bps_device</literal>
-          control group attributes. Use this option multiple times to set bandwidth limits for multiple devices. For
-          details about these control group attributes, see <ulink
-          url="https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt">blkio-controller.txt</ulink>.
-          </para>
-
-          <para>Implies
-          <literal>BlockIOAccounting=true</literal>.</para>
-
-          <para>This setting is supported only if the legacy control group hierarchy is used. Use
-          <varname>IOReadBandwidthMax=</varname> and <varname>IOWriteBandwidthMax=</varname> on systems using the
-          unified control group hierarchy.</para>
+          <para>These settings are supported only if the unified control group hierarchy is used and disable settings
+          prefixed with <varname>BlockIO</varname> or <varname>StartupBlockIO</varname>.</para>
         </listitem>
       </varlistentry>
 
@@ -733,6 +597,149 @@
     </variablelist>
   </refsect1>
 
+  <refsect1>
+    <title>Deprecated Options</title>
+
+    <para>The following options are deprecated. Use the indicated superseding options instead:</para>
+
+    <variablelist class='unit-directives'>
+
+      <varlistentry>
+        <term><varname>CPUShares=<replaceable>weight</replaceable></varname></term>
+        <term><varname>StartupCPUShares=<replaceable>weight</replaceable></varname></term>
+
+        <listitem>
+          <para>Assign the specified CPU time share weight to the processes executed. These options take an integer
+          value and control the <literal>cpu.shares</literal> control group attribute. The allowed range is 2 to
+          262144. Defaults to 1024. For details about this control group attribute, see <ulink
+          url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.
+          The available CPU time is split up among all units within one slice relative to their CPU time share
+          weight.</para>
+
+          <para>While <varname>StartupCPUShares=</varname> only applies to the startup phase of the system,
+          <varname>CPUShares=</varname> applies to normal runtime of the system, and if the former is not set also to
+          the startup phase. Using <varname>StartupCPUShares=</varname> allows prioritizing specific services at
+          boot-up differently than during normal runtime.</para>
+
+          <para>Implies <literal>CPUAccounting=true</literal>.</para>
+
+          <para>These settings are deprecated. Use <varname>CPUWeight=</varname> and
+          <varname>StartupCPUWeight=</varname> instead.</para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>MemoryLimit=<replaceable>bytes</replaceable></varname></term>
+
+        <listitem>
+          <para>Specify the limit on maximum memory usage of the executed processes. The limit specifies how much
+          process and kernel memory can be used by tasks in this unit. Takes a memory size in bytes. If the value is
+          suffixed with K, M, G or T, the specified memory size is parsed as Kilobytes, Megabytes, Gigabytes, or
+          Terabytes (with the base 1024), respectively. Alternatively, a percentage value may be specified, which is
+          taken relative to the installed physical memory on the system. If assigned the special value
+          <literal>infinity</literal>, no memory limit is applied. This controls the
+          <literal>memory.limit_in_bytes</literal> control group attribute. For details about this control group
+          attribute, see <ulink
+          url="https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt">memory.txt</ulink>.</para>
+
+          <para>Implies <literal>MemoryAccounting=true</literal>.</para>
+
+          <para>This setting is deprecated. Use <varname>MemoryMax=</varname> instead.</para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>BlockIOAccounting=</varname></term>
+
+        <listitem>
+          <para>Turn on Block I/O accounting for this unit, if the legacy control group hierarchy is used on the
+          system. Takes a boolean argument. Note that turning on block I/O accounting for one unit will also implicitly
+          turn it on for all units contained in the same slice and all for its parent slices and the units contained
+          therein. The system default for this setting may be controlled with
+          <varname>DefaultBlockIOAccounting=</varname> in
+          <citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
+
+          <para>This setting is deprecated. Use <varname>IOAccounting=</varname> instead.</para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>BlockIOWeight=<replaceable>weight</replaceable></varname></term>
+        <term><varname>StartupBlockIOWeight=<replaceable>weight</replaceable></varname></term>
+
+        <listitem><para>Set the default overall block I/O weight for the executed processes, if the legacy control
+        group hierarchy is used on the system. Takes a single weight value (between 10 and 1000) to set the default
+        block I/O weight. This controls the <literal>blkio.weight</literal> control group attribute, which defaults to
+        500. For details about this control group attribute, see <ulink
+        url="https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt">blkio-controller.txt</ulink>.
+        The available I/O bandwidth is split up among all units within one slice relative to their block I/O
+        weight.</para>
+
+        <para>While <varname>StartupBlockIOWeight=</varname> only
+        applies to the startup phase of the system,
+        <varname>BlockIOWeight=</varname> applies to the later runtime
+        of the system, and if the former is not set also to the
+        startup phase. This allows prioritizing specific services at
+        boot-up differently than during runtime.</para>
+
+        <para>Implies
+        <literal>BlockIOAccounting=true</literal>.</para>
+
+        <para>These settings are deprecated. Use <varname>IOWeight=</varname> and <varname>StartupIOWeight=</varname>
+        instead.</para>
+
+      </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>BlockIODeviceWeight=<replaceable>device</replaceable> <replaceable>weight</replaceable></varname></term>
+
+        <listitem>
+          <para>Set the per-device overall block I/O weight for the executed processes, if the legacy control group
+          hierarchy is used on the system. Takes a space-separated pair of a file path and a weight value to specify
+          the device specific weight value, between 10 and 1000. (Example: "/dev/sda 500"). The file path may be
+          specified as path to a block device node or as any other file, in which case the backing block device of the
+          file system of the file is determined. This controls the <literal>blkio.weight_device</literal> control group
+          attribute, which defaults to 1000. Use this option multiple times to set weights for multiple devices. For
+          details about this control group attribute, see <ulink
+          url="https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt">blkio-controller.txt</ulink>.</para>
+
+          <para>Implies
+          <literal>BlockIOAccounting=true</literal>.</para>
+
+          <para>This setting is deprecated. Use <varname>IODeviceWeight=</varname> instead.</para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>BlockIOReadBandwidth=<replaceable>device</replaceable> <replaceable>bytes</replaceable></varname></term>
+        <term><varname>BlockIOWriteBandwidth=<replaceable>device</replaceable> <replaceable>bytes</replaceable></varname></term>
+
+        <listitem>
+          <para>Set the per-device overall block I/O bandwidth limit for the executed processes, if the legacy control
+          group hierarchy is used on the system. Takes a space-separated pair of a file path and a bandwidth value (in
+          bytes per second) to specify the device specific bandwidth. The file path may be a path to a block device
+          node, or as any other file in which case the backing block device of the file system of the file is used. If
+          the bandwidth is suffixed with K, M, G, or T, the specified bandwidth is parsed as Kilobytes, Megabytes,
+          Gigabytes, or Terabytes, respectively, to the base of 1000. (Example:
+          "/dev/disk/by-path/pci-0000:00:1f.2-scsi-0:0:0:0 5M"). This controls the
+          <literal>blkio.throttle.read_bps_device</literal> and <literal>blkio.throttle.write_bps_device</literal>
+          control group attributes. Use this option multiple times to set bandwidth limits for multiple devices. For
+          details about these control group attributes, see <ulink
+          url="https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt">blkio-controller.txt</ulink>.
+          </para>
+
+          <para>Implies
+          <literal>BlockIOAccounting=true</literal>.</para>
+
+          <para>These settings are deprecated. Use <varname>IOReadBandwidthMax=</varname> and
+          <varname>IOWriteBandwidthMax=</varname> instead.</para>
+        </listitem>
+      </varlistentry>
+
+    </variablelist>
+  </refsect1>
+
   <refsect1>
     <title>See Also</title>
     <para>
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 20bdbc39d0..23a92f9651 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -687,16 +687,16 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                 } else {
                         uint64_t shares;
 
-                        if (has_shares)
-                                shares = cgroup_context_cpu_shares(c, state);
-                        else if (has_weight) {
+                        if (has_weight) {
                                 uint64_t weight = cgroup_context_cpu_weight(c, state);
 
                                 shares = cgroup_cpu_weight_to_shares(weight);
 
                                 log_cgroup_compat(u, "Applying [Startup]CpuWeight %" PRIu64 " as [Startup]CpuShares %" PRIu64 " on %s",
                                                   weight, shares, path);
-                        } else
+                        } else if (has_shares)
+                                shares = cgroup_context_cpu_shares(c, state);
+                        else
                                 shares = CGROUP_CPU_SHARES_DEFAULT;
 
                         cgroup_apply_legacy_cpu_config(u, shares, c->cpu_quota_per_sec_usec);
@@ -788,16 +788,16 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                         char buf[DECIMAL_STR_MAX(uint64_t)+1];
                         uint64_t weight;
 
-                        if (has_blockio)
-                                weight = cgroup_context_blkio_weight(c, state);
-                        else if (has_io) {
+                        if (has_io) {
                                 uint64_t io_weight = cgroup_context_io_weight(c, state);
 
                                 weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state));
 
                                 log_cgroup_compat(u, "Applying [Startup]IOWeight %" PRIu64 " as [Startup]BlockIOWeight %" PRIu64,
                                                   io_weight, weight);
-                        } else
+                        } else if (has_blockio)
+                                weight = cgroup_context_blkio_weight(c, state);
+                        else
                                 weight = CGROUP_BLKIO_WEIGHT_DEFAULT;
 
                         xsprintf(buf, "%" PRIu64 "\n", weight);
@@ -806,13 +806,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                                 log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
                                               "Failed to set blkio.weight: %m");
 
-                        if (has_blockio) {
-                                CGroupBlockIODeviceWeight *w;
-
-                                /* FIXME: no way to reset this list */
-                                LIST_FOREACH(device_weights, w, c->blockio_device_weights)
-                                        cgroup_apply_blkio_device_weight(u, w->path, w->weight);
-                        } else if (has_io) {
+                        if (has_io) {
                                 CGroupIODeviceWeight *w;
 
                                 /* FIXME: no way to reset this list */
@@ -824,18 +818,17 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
 
                                         cgroup_apply_blkio_device_weight(u, w->path, weight);
                                 }
+                        } else if (has_blockio) {
+                                CGroupBlockIODeviceWeight *w;
+
+                                /* FIXME: no way to reset this list */
+                                LIST_FOREACH(device_weights, w, c->blockio_device_weights)
+                                        cgroup_apply_blkio_device_weight(u, w->path, w->weight);
                         }
                 }
 
                 /* Apply limits and free ones without config. */
-                if (has_blockio) {
-                        CGroupBlockIODeviceBandwidth *b, *next;
-
-                        LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) {
-                                if (!cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps))
-                                        cgroup_context_free_blockio_device_bandwidth(c, b);
-                        }
-                } else if (has_io) {
+                if (has_io) {
                         CGroupIODeviceLimit *l, *next;
 
                         LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) {
@@ -845,13 +838,19 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                                 if (!cgroup_apply_blkio_device_limit(u, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX]))
                                         cgroup_context_free_io_device_limit(c, l);
                         }
+                } else if (has_blockio) {
+                        CGroupBlockIODeviceBandwidth *b, *next;
+
+                        LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths)
+                                if (!cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps))
+                                        cgroup_context_free_blockio_device_bandwidth(c, b);
                 }
         }
 
         if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
                 if (cg_all_unified() > 0) {
-                        uint64_t max = c->memory_max;
-                        uint64_t swap_max = c->memory_swap_max;
+                        uint64_t max;
+                        uint64_t swap_max = CGROUP_LIMIT_MAX;
 
                         if (cgroup_context_has_unified_memory_config(c)) {
                                 max = c->memory_max;
@@ -869,14 +868,13 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                         cgroup_apply_unified_memory_limit(u, "memory.swap.max", swap_max);
                 } else {
                         char buf[DECIMAL_STR_MAX(uint64_t) + 1];
-                        uint64_t val = c->memory_limit;
+                        uint64_t val;
 
-                        if (val == CGROUP_LIMIT_MAX) {
+                        if (cgroup_context_has_unified_memory_config(c)) {
                                 val = c->memory_max;
-
-                                if (val != CGROUP_LIMIT_MAX)
-                                        log_cgroup_compat(u, "Applying MemoryMax %" PRIi64 " as MemoryLimit", c->memory_max);
-                        }
+                                log_cgroup_compat(u, "Applying MemoryMax %" PRIi64 " as MemoryLimit", val);
+                        } else
+                                val = c->memory_limit;
 
                         if (val == CGROUP_LIMIT_MAX)
                                 strncpy(buf, "-1\n", sizeof(buf));
-- 
cgit v1.2.3-54-g00ecf


From 6b430fdb7c0c2c52ea69a7d56f23d739218b13d0 Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Sun, 16 Oct 2016 18:28:30 -0400
Subject: tree-wide: use mfree more

---
 coccinelle/mfree_return.cocci             |  6 ++++++
 src/basic/bitmap.c                        |  6 ++----
 src/basic/env-util.c                      |  3 +--
 src/basic/prioq.c                         |  4 +---
 src/basic/replace-var.c                   |  3 +--
 src/basic/strbuf.c                        |  3 +--
 src/basic/string-util.c                   |  6 ++----
 src/basic/strv.c                          |  9 +++------
 src/core/dynamic-user.c                   |  4 +---
 src/core/execute.c                        |  4 +---
 src/core/manager.c                        |  3 +--
 src/core/transaction.c                    |  6 ++----
 src/core/unit.c                           |  6 ++----
 src/cryptsetup/cryptsetup-generator.c     |  9 +++------
 src/import/curl-util.c                    |  4 +---
 src/import/export-raw.c                   |  4 +---
 src/import/export-tar.c                   |  4 +---
 src/import/import-raw.c                   |  4 +---
 src/import/import-tar.c                   |  4 +---
 src/import/importd.c                      |  6 ++----
 src/import/pull-job.c                     |  4 +---
 src/import/pull-raw.c                     |  4 +---
 src/import/pull-tar.c                     |  4 +---
 src/journal-remote/journal-remote-write.c | 10 +++-------
 src/journal/journal-file.c                |  3 +--
 src/journal/mmap-cache.c                  |  6 ++----
 src/journal/sd-journal.c                  |  9 +++------
 src/libsystemd-network/ndisc-router.c     |  3 +--
 src/libsystemd-network/sd-dhcp-client.c   |  4 +---
 src/libsystemd-network/sd-dhcp-lease.c    |  4 +---
 src/libsystemd-network/sd-dhcp-server.c   |  4 +---
 src/libsystemd-network/sd-dhcp6-client.c  |  4 +---
 src/libsystemd-network/sd-dhcp6-lease.c   |  4 +---
 src/libsystemd-network/sd-ipv4acd.c       |  4 +---
 src/libsystemd-network/sd-ipv4ll.c        |  4 +---
 src/libsystemd-network/sd-lldp.c          |  4 +---
 src/libsystemd-network/sd-ndisc.c         |  4 +---
 src/libsystemd/sd-bus/bus-slot.c          |  4 +---
 src/libsystemd/sd-bus/bus-track.c         |  8 ++------
 src/libudev/libudev-list.c                | 14 ++++++--------
 src/libudev/libudev-monitor.c             |  3 +--
 src/login/logind-button.c                 |  9 +++------
 src/login/logind-device.c                 |  9 +++------
 src/login/logind-inhibit.c                |  9 +++------
 src/login/logind-seat.c                   |  9 +++------
 src/login/logind-session.c                | 12 ++++--------
 src/machine/machine.c                     |  4 +---
 src/machine/operation.c                   |  3 +--
 src/network/networkd-wait-online-link.c   |  3 +--
 src/nspawn/nspawn-settings.c              |  4 +---
 src/resolve/resolved-dns-query.c          |  8 ++------
 src/resolve/resolved-dns-rr.c             | 10 +++-------
 src/resolve/resolved-dns-scope.c          |  4 +---
 src/resolve/resolved-dns-search-domain.c  |  4 +---
 src/resolve/resolved-dns-server.c         |  3 +--
 src/resolve/resolved-dns-stream.c         |  4 +---
 src/resolve/resolved-dns-transaction.c    |  3 +--
 src/resolve/resolved-link.c               |  6 ++----
 src/resolve/resolved-manager.c            |  4 +---
 src/shared/machine-image.c                |  3 +--
 src/shared/ptyfwd.c                       |  3 +--
 src/timesync/timesyncd-server.c           |  7 ++-----
 src/udev/udev-ctrl.c                      |  3 +--
 src/udev/udev-rules.c                     |  3 +--
 64 files changed, 105 insertions(+), 228 deletions(-)
 create mode 100644 coccinelle/mfree_return.cocci

(limited to 'src/core')

diff --git a/coccinelle/mfree_return.cocci b/coccinelle/mfree_return.cocci
new file mode 100644
index 0000000000..8119fe07f2
--- /dev/null
+++ b/coccinelle/mfree_return.cocci
@@ -0,0 +1,6 @@
+@@
+expression p;
+@@
+- free(p);
+- return NULL;
++ return mfree(p);
diff --git a/src/basic/bitmap.c b/src/basic/bitmap.c
index f4b12fc261..f6212e6151 100644
--- a/src/basic/bitmap.c
+++ b/src/basic/bitmap.c
@@ -58,10 +58,8 @@ Bitmap *bitmap_copy(Bitmap *b) {
                 return NULL;
 
         ret->bitmaps = newdup(uint64_t, b->bitmaps, b->n_bitmaps);
-        if (!ret->bitmaps) {
-                free(ret);
-                return NULL;
-        }
+        if (!ret->bitmaps)
+                return mfree(ret);
 
         ret->n_bitmaps = ret->bitmaps_allocated = b->n_bitmaps;
         return ret;
diff --git a/src/basic/env-util.c b/src/basic/env-util.c
index 7f5fddb700..b74290d6fd 100644
--- a/src/basic/env-util.c
+++ b/src/basic/env-util.c
@@ -544,8 +544,7 @@ char *replace_env(const char *format, char **env) {
         return k;
 
 fail:
-        free(r);
-        return NULL;
+        return mfree(r);
 }
 
 char **replace_env_argv(char **argv, char **env) {
diff --git a/src/basic/prioq.c b/src/basic/prioq.c
index d2ec516d29..4570b8e4ba 100644
--- a/src/basic/prioq.c
+++ b/src/basic/prioq.c
@@ -62,9 +62,7 @@ Prioq* prioq_free(Prioq *q) {
                 return NULL;
 
         free(q->items);
-        free(q);
-
-        return NULL;
+        return mfree(q);
 }
 
 int prioq_ensure_allocated(Prioq **q, compare_func_t compare_func) {
diff --git a/src/basic/replace-var.c b/src/basic/replace-var.c
index 6a204b9ec3..0d21423a9c 100644
--- a/src/basic/replace-var.c
+++ b/src/basic/replace-var.c
@@ -107,6 +107,5 @@ char *replace_var(const char *text, char *(*lookup)(const char *variable, void*u
         return r;
 
 oom:
-        free(r);
-        return NULL;
+        return mfree(r);
 }
diff --git a/src/basic/strbuf.c b/src/basic/strbuf.c
index 4bef87d3c2..00aaf9e621 100644
--- a/src/basic/strbuf.c
+++ b/src/basic/strbuf.c
@@ -62,8 +62,7 @@ struct strbuf *strbuf_new(void) {
 err:
         free(str->buf);
         free(str->root);
-        free(str);
-        return NULL;
+        return mfree(str);
 }
 
 static void strbuf_node_cleanup(struct strbuf_node *node) {
diff --git a/src/basic/string-util.c b/src/basic/string-util.c
index dc7de5dab8..6b06e643c9 100644
--- a/src/basic/string-util.c
+++ b/src/basic/string-util.c
@@ -610,8 +610,7 @@ char *strreplace(const char *text, const char *old_string, const char *new_strin
         return r;
 
 oom:
-        free(r);
-        return NULL;
+        return mfree(r);
 }
 
 char *strip_tab_ansi(char **ibuf, size_t *_isz) {
@@ -682,8 +681,7 @@ char *strip_tab_ansi(char **ibuf, size_t *_isz) {
 
         if (ferror(f)) {
                 fclose(f);
-                free(obuf);
-                return NULL;
+                return mfree(obuf);
         }
 
         fclose(f);
diff --git a/src/basic/strv.c b/src/basic/strv.c
index 34e464d253..0eec868eed 100644
--- a/src/basic/strv.c
+++ b/src/basic/strv.c
@@ -87,8 +87,7 @@ void strv_clear(char **l) {
 
 char **strv_free(char **l) {
         strv_clear(l);
-        free(l);
-        return NULL;
+        return mfree(l);
 }
 
 char **strv_free_erase(char **l) {
@@ -426,8 +425,7 @@ char *strv_join_quoted(char **l) {
         return buf;
 
  oom:
-        free(buf);
-        return NULL;
+        return mfree(buf);
 }
 
 int strv_push(char ***l, char *value) {
@@ -869,8 +867,7 @@ char ***strv_free_free(char ***l) {
         for (i = l; *i; i++)
                 strv_free(*i);
 
-        free(l);
-        return NULL;
+        return mfree(l);
 }
 
 char **strv_skip(char **l, size_t n) {
diff --git a/src/core/dynamic-user.c b/src/core/dynamic-user.c
index 1043da3eb7..e1846e1adb 100644
--- a/src/core/dynamic-user.c
+++ b/src/core/dynamic-user.c
@@ -42,9 +42,7 @@ static DynamicUser* dynamic_user_free(DynamicUser *d) {
                 (void) hashmap_remove(d->manager->dynamic_users, d->name);
 
         safe_close_pair(d->storage_socket);
-        free(d);
-
-        return NULL;
+        return mfree(d);
 }
 
 static int dynamic_user_add(Manager *m, const char *name, int storage_socket[2], DynamicUser **ret) {
diff --git a/src/core/execute.c b/src/core/execute.c
index 869522704a..59c2bd0dd2 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -3740,9 +3740,7 @@ ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
         free(r->tmp_dir);
         free(r->var_tmp_dir);
         safe_close_pair(r->netns_storage_socket);
-        free(r);
-
-        return NULL;
+        return mfree(r);
 }
 
 int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
diff --git a/src/core/manager.c b/src/core/manager.c
index 3569249788..50aae0d1ba 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1119,8 +1119,7 @@ Manager* manager_free(Manager *m) {
         hashmap_free(m->uid_refs);
         hashmap_free(m->gid_refs);
 
-        free(m);
-        return NULL;
+        return mfree(m);
 }
 
 void manager_enumerate(Manager *m) {
diff --git a/src/core/transaction.c b/src/core/transaction.c
index 8370b864fb..e22e3b30c2 100644
--- a/src/core/transaction.c
+++ b/src/core/transaction.c
@@ -1085,10 +1085,8 @@ Transaction *transaction_new(bool irreversible) {
                 return NULL;
 
         tr->jobs = hashmap_new(NULL);
-        if (!tr->jobs) {
-                free(tr);
-                return NULL;
-        }
+        if (!tr->jobs)
+                return mfree(tr);
 
         tr->irreversible = irreversible;
 
diff --git a/src/core/unit.c b/src/core/unit.c
index 67668bdc48..a6b8ecdcb7 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -88,10 +88,8 @@ Unit *unit_new(Manager *m, size_t size) {
                 return NULL;
 
         u->names = set_new(&string_hash_ops);
-        if (!u->names) {
-                free(u);
-                return NULL;
-        }
+        if (!u->names)
+                return mfree(u);
 
         u->manager = m;
         u->type = _UNIT_TYPE_INVALID;
diff --git a/src/cryptsetup/cryptsetup-generator.c b/src/cryptsetup/cryptsetup-generator.c
index 8ac5ab730a..de0a3b6f9c 100644
--- a/src/cryptsetup/cryptsetup-generator.c
+++ b/src/cryptsetup/cryptsetup-generator.c
@@ -264,16 +264,13 @@ static crypto_device *get_crypto_device(const char *uuid) {
                 d->keyfile = d->options = d->name = NULL;
 
                 d->uuid = strdup(uuid);
-                if (!d->uuid) {
-                        free(d);
-                        return NULL;
-                }
+                if (!d->uuid)
+                        return mfree(d);
 
                 r = hashmap_put(arg_disks, d->uuid, d);
                 if (r < 0) {
                         free(d->uuid);
-                        free(d);
-                        return NULL;
+                        return mfree(d);
                 }
         }
 
diff --git a/src/import/curl-util.c b/src/import/curl-util.c
index 6990c47f48..734e1560e6 100644
--- a/src/import/curl-util.c
+++ b/src/import/curl-util.c
@@ -235,9 +235,7 @@ CurlGlue *curl_glue_unref(CurlGlue *g) {
 
         sd_event_source_unref(g->timer);
         sd_event_unref(g->event);
-        free(g);
-
-        return NULL;
+        return mfree(g);
 }
 
 int curl_glue_new(CurlGlue **glue, sd_event *event) {
diff --git a/src/import/export-raw.c b/src/import/export-raw.c
index 6136b677dd..a3dbce1954 100644
--- a/src/import/export-raw.c
+++ b/src/import/export-raw.c
@@ -87,9 +87,7 @@ RawExport *raw_export_unref(RawExport *e) {
 
         free(e->buffer);
         free(e->path);
-        free(e);
-
-        return NULL;
+        return mfree(e);
 }
 
 int raw_export_new(
diff --git a/src/import/export-tar.c b/src/import/export-tar.c
index d79c27f2d0..3bb6027431 100644
--- a/src/import/export-tar.c
+++ b/src/import/export-tar.c
@@ -91,9 +91,7 @@ TarExport *tar_export_unref(TarExport *e) {
 
         free(e->buffer);
         free(e->path);
-        free(e);
-
-        return NULL;
+        return mfree(e);
 }
 
 int tar_export_new(
diff --git a/src/import/import-raw.c b/src/import/import-raw.c
index fd6b9f7703..29f3f896e5 100644
--- a/src/import/import-raw.c
+++ b/src/import/import-raw.c
@@ -100,9 +100,7 @@ RawImport* raw_import_unref(RawImport *i) {
         free(i->final_path);
         free(i->image_root);
         free(i->local);
-        free(i);
-
-        return NULL;
+        return mfree(i);
 }
 
 int raw_import_new(
diff --git a/src/import/import-tar.c b/src/import/import-tar.c
index 8b81324fde..22f9b8c5ea 100644
--- a/src/import/import-tar.c
+++ b/src/import/import-tar.c
@@ -107,9 +107,7 @@ TarImport* tar_import_unref(TarImport *i) {
         free(i->final_path);
         free(i->image_root);
         free(i->local);
-        free(i);
-
-        return NULL;
+        return mfree(i);
 }
 
 int tar_import_new(
diff --git a/src/import/importd.c b/src/import/importd.c
index 28b4302cb3..9d31a956a5 100644
--- a/src/import/importd.c
+++ b/src/import/importd.c
@@ -141,8 +141,7 @@ static Transfer *transfer_unref(Transfer *t) {
         safe_close(t->stdin_fd);
         safe_close(t->stdout_fd);
 
-        free(t);
-        return NULL;
+        return mfree(t);
 }
 
 DEFINE_TRIVIAL_CLEANUP_FUNC(Transfer*, transfer_unref);
@@ -548,8 +547,7 @@ static Manager *manager_unref(Manager *m) {
         m->bus = sd_bus_flush_close_unref(m->bus);
         sd_event_unref(m->event);
 
-        free(m);
-        return NULL;
+        return mfree(m);
 }
 
 DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_unref);
diff --git a/src/import/pull-job.c b/src/import/pull-job.c
index 6bcf35ef4e..e550df2c57 100644
--- a/src/import/pull-job.c
+++ b/src/import/pull-job.c
@@ -50,9 +50,7 @@ PullJob* pull_job_unref(PullJob *j) {
         free(j->payload);
         free(j->checksum);
 
-        free(j);
-
-        return NULL;
+        return mfree(j);
 }
 
 static void pull_job_finish(PullJob *j, int ret) {
diff --git a/src/import/pull-raw.c b/src/import/pull-raw.c
index 8993402821..0cf410a5d9 100644
--- a/src/import/pull-raw.c
+++ b/src/import/pull-raw.c
@@ -110,9 +110,7 @@ RawPull* raw_pull_unref(RawPull *i) {
         free(i->settings_path);
         free(i->image_root);
         free(i->local);
-        free(i);
-
-        return NULL;
+        return mfree(i);
 }
 
 int raw_pull_new(
diff --git a/src/import/pull-tar.c b/src/import/pull-tar.c
index 8c61c46f73..68e2397b02 100644
--- a/src/import/pull-tar.c
+++ b/src/import/pull-tar.c
@@ -114,9 +114,7 @@ TarPull* tar_pull_unref(TarPull *i) {
         free(i->settings_path);
         free(i->image_root);
         free(i->local);
-        free(i);
-
-        return NULL;
+        return mfree(i);
 }
 
 int tar_pull_new(
diff --git a/src/journal-remote/journal-remote-write.c b/src/journal-remote/journal-remote-write.c
index 7bba52566e..8729372aa3 100644
--- a/src/journal-remote/journal-remote-write.c
+++ b/src/journal-remote/journal-remote-write.c
@@ -75,10 +75,8 @@ Writer* writer_new(RemoteServer *server) {
         memset(&w->metrics, 0xFF, sizeof(w->metrics));
 
         w->mmap = mmap_cache_new();
-        if (!w->mmap) {
-                free(w);
-                return NULL;
-        }
+        if (!w->mmap)
+                return mfree(w);
 
         w->n_ref = 1;
         w->server = server;
@@ -103,9 +101,7 @@ Writer* writer_free(Writer *w) {
         if (w->mmap)
                 mmap_cache_unref(w->mmap);
 
-        free(w);
-
-        return NULL;
+        return mfree(w);
 }
 
 Writer* writer_unref(Writer *w) {
diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c
index 49199b269f..d3e0214731 100644
--- a/src/journal/journal-file.c
+++ b/src/journal/journal-file.c
@@ -394,8 +394,7 @@ JournalFile* journal_file_close(JournalFile *f) {
                 gcry_md_close(f->hmac);
 #endif
 
-        free(f);
-        return NULL;
+        return mfree(f);
 }
 
 void journal_file_close_set(Set *s) {
diff --git a/src/journal/mmap-cache.c b/src/journal/mmap-cache.c
index 293d27053a..d91247b524 100644
--- a/src/journal/mmap-cache.c
+++ b/src/journal/mmap-cache.c
@@ -325,10 +325,8 @@ static FileDescriptor* fd_add(MMapCache *m, int fd) {
         f->fd = fd;
 
         r = hashmap_put(m->fds, FD_TO_PTR(fd), f);
-        if (r < 0) {
-                free(f);
-                return NULL;
-        }
+        if (r < 0)
+                return mfree(f);
 
         return f;
 }
diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c
index 98c8a47afe..f2f8546086 100644
--- a/src/journal/sd-journal.c
+++ b/src/journal/sd-journal.c
@@ -387,7 +387,7 @@ _public_ int sd_journal_add_disjunction(sd_journal *j) {
 }
 
 static char *match_make_string(Match *m) {
-        char *p, *r;
+        char *p = NULL, *r;
         Match *i;
         bool enclose = false;
 
@@ -397,15 +397,12 @@ static char *match_make_string(Match *m) {
         if (m->type == MATCH_DISCRETE)
                 return strndup(m->data, m->size);
 
-        p = NULL;
         LIST_FOREACH(matches, i, m->matches) {
                 char *t, *k;
 
                 t = match_make_string(i);
-                if (!t) {
-                        free(p);
-                        return NULL;
-                }
+                if (!t)
+                        return mfree(p);
 
                 if (p) {
                         k = strjoin(p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t, NULL);
diff --git a/src/libsystemd-network/ndisc-router.c b/src/libsystemd-network/ndisc-router.c
index d9950b638c..41ff2b353a 100644
--- a/src/libsystemd-network/ndisc-router.c
+++ b/src/libsystemd-network/ndisc-router.c
@@ -49,8 +49,7 @@ _public_ sd_ndisc_router* sd_ndisc_router_unref(sd_ndisc_router *rt) {
         if (rt->n_ref > 0)
                 return NULL;
 
-        free(rt);
-        return NULL;
+        return mfree(rt);
 }
 
 sd_ndisc_router *ndisc_router_new(size_t raw_size) {
diff --git a/src/libsystemd-network/sd-dhcp-client.c b/src/libsystemd-network/sd-dhcp-client.c
index 179e5950bd..5ccb23922c 100644
--- a/src/libsystemd-network/sd-dhcp-client.c
+++ b/src/libsystemd-network/sd-dhcp-client.c
@@ -1873,9 +1873,7 @@ sd_dhcp_client *sd_dhcp_client_unref(sd_dhcp_client *client) {
         free(client->req_opts);
         free(client->hostname);
         free(client->vendor_class_identifier);
-        free(client);
-
-        return NULL;
+        return mfree(client);
 }
 
 int sd_dhcp_client_new(sd_dhcp_client **ret) {
diff --git a/src/libsystemd-network/sd-dhcp-lease.c b/src/libsystemd-network/sd-dhcp-lease.c
index ef50ed17a1..8387b185c0 100644
--- a/src/libsystemd-network/sd-dhcp-lease.c
+++ b/src/libsystemd-network/sd-dhcp-lease.c
@@ -282,9 +282,7 @@ sd_dhcp_lease *sd_dhcp_lease_unref(sd_dhcp_lease *lease) {
         free(lease->static_route);
         free(lease->client_id);
         free(lease->vendor_specific);
-        free(lease);
-
-        return NULL;
+        return mfree(lease);
 }
 
 static int lease_parse_u32(const uint8_t *option, size_t len, uint32_t *ret, uint32_t min) {
diff --git a/src/libsystemd-network/sd-dhcp-server.c b/src/libsystemd-network/sd-dhcp-server.c
index 11ee2e252e..f16314a37f 100644
--- a/src/libsystemd-network/sd-dhcp-server.c
+++ b/src/libsystemd-network/sd-dhcp-server.c
@@ -178,9 +178,7 @@ sd_dhcp_server *sd_dhcp_server_unref(sd_dhcp_server *server) {
         hashmap_free(server->leases_by_client_id);
 
         free(server->bound_leases);
-        free(server);
-
-        return NULL;
+        return mfree(server);
 }
 
 int sd_dhcp_server_new(sd_dhcp_server **ret, int ifindex) {
diff --git a/src/libsystemd-network/sd-dhcp6-client.c b/src/libsystemd-network/sd-dhcp6-client.c
index 463fde401c..e81215f7d7 100644
--- a/src/libsystemd-network/sd-dhcp6-client.c
+++ b/src/libsystemd-network/sd-dhcp6-client.c
@@ -1300,9 +1300,7 @@ sd_dhcp6_client *sd_dhcp6_client_unref(sd_dhcp6_client *client) {
         sd_dhcp6_client_detach_event(client);
 
         free(client->req_opts);
-        free(client);
-
-        return NULL;
+        return mfree(client);
 }
 
 int sd_dhcp6_client_new(sd_dhcp6_client **ret) {
diff --git a/src/libsystemd-network/sd-dhcp6-lease.c b/src/libsystemd-network/sd-dhcp6-lease.c
index 5c10a6326a..ab59977a3f 100644
--- a/src/libsystemd-network/sd-dhcp6-lease.c
+++ b/src/libsystemd-network/sd-dhcp6-lease.c
@@ -389,9 +389,7 @@ sd_dhcp6_lease *sd_dhcp6_lease_unref(sd_dhcp6_lease *lease) {
         free(lease->ntp);
 
         lease->ntp_fqdn = strv_free(lease->ntp_fqdn);
-        free(lease);
-
-        return NULL;
+        return mfree(lease);
 }
 
 int dhcp6_lease_new(sd_dhcp6_lease **ret) {
diff --git a/src/libsystemd-network/sd-ipv4acd.c b/src/libsystemd-network/sd-ipv4acd.c
index 662885840f..4dd343c101 100644
--- a/src/libsystemd-network/sd-ipv4acd.c
+++ b/src/libsystemd-network/sd-ipv4acd.c
@@ -135,9 +135,7 @@ sd_ipv4acd *sd_ipv4acd_unref(sd_ipv4acd *acd) {
         ipv4acd_reset(acd);
         sd_ipv4acd_detach_event(acd);
 
-        free(acd);
-
-        return NULL;
+        return mfree(acd);
 }
 
 int sd_ipv4acd_new(sd_ipv4acd **ret) {
diff --git a/src/libsystemd-network/sd-ipv4ll.c b/src/libsystemd-network/sd-ipv4ll.c
index 5603a533a5..13209261f9 100644
--- a/src/libsystemd-network/sd-ipv4ll.c
+++ b/src/libsystemd-network/sd-ipv4ll.c
@@ -90,9 +90,7 @@ sd_ipv4ll *sd_ipv4ll_unref(sd_ipv4ll *ll) {
                 return NULL;
 
         sd_ipv4acd_unref(ll->acd);
-        free(ll);
-
-        return NULL;
+        return mfree(ll);
 }
 
 int sd_ipv4ll_new(sd_ipv4ll **ret) {
diff --git a/src/libsystemd-network/sd-lldp.c b/src/libsystemd-network/sd-lldp.c
index 0bd1e66aa0..0702241506 100644
--- a/src/libsystemd-network/sd-lldp.c
+++ b/src/libsystemd-network/sd-lldp.c
@@ -374,9 +374,7 @@ _public_ sd_lldp* sd_lldp_unref(sd_lldp *lldp) {
 
         hashmap_free(lldp->neighbor_by_id);
         prioq_free(lldp->neighbor_by_expiry);
-        free(lldp);
-
-        return NULL;
+        return mfree(lldp);
 }
 
 _public_ int sd_lldp_new(sd_lldp **ret) {
diff --git a/src/libsystemd-network/sd-ndisc.c b/src/libsystemd-network/sd-ndisc.c
index 07b0d7f704..1d3be9b862 100644
--- a/src/libsystemd-network/sd-ndisc.c
+++ b/src/libsystemd-network/sd-ndisc.c
@@ -148,9 +148,7 @@ _public_ sd_ndisc *sd_ndisc_unref(sd_ndisc *nd) {
 
         ndisc_reset(nd);
         sd_ndisc_detach_event(nd);
-        free(nd);
-
-        return NULL;
+        return mfree(nd);
 }
 
 _public_ int sd_ndisc_new(sd_ndisc **ret) {
diff --git a/src/libsystemd/sd-bus/bus-slot.c b/src/libsystemd/sd-bus/bus-slot.c
index 8e9074c7df..33590c31ac 100644
--- a/src/libsystemd/sd-bus/bus-slot.c
+++ b/src/libsystemd/sd-bus/bus-slot.c
@@ -212,9 +212,7 @@ _public_ sd_bus_slot* sd_bus_slot_unref(sd_bus_slot *slot) {
 
         bus_slot_disconnect(slot);
         free(slot->description);
-        free(slot);
-
-        return NULL;
+        return mfree(slot);
 }
 
 _public_ sd_bus* sd_bus_slot_get_bus(sd_bus_slot *slot) {
diff --git a/src/libsystemd/sd-bus/bus-track.c b/src/libsystemd/sd-bus/bus-track.c
index 00e93a215f..4acaf24793 100644
--- a/src/libsystemd/sd-bus/bus-track.c
+++ b/src/libsystemd/sd-bus/bus-track.c
@@ -74,9 +74,7 @@ static struct track_item* track_item_free(struct track_item *i) {
 
         sd_bus_slot_unref(i->slot);
         free(i->name);
-        free(i);
-
-        return NULL;
+        return mfree(i);
 }
 
 DEFINE_TRIVIAL_CLEANUP_FUNC(struct track_item*, track_item_free);
@@ -206,9 +204,7 @@ _public_ sd_bus_track* sd_bus_track_unref(sd_bus_track *track) {
         bus_track_remove_from_queue(track);
         hashmap_free(track->names);
         sd_bus_unref(track->bus);
-        free(track);
-
-        return NULL;
+        return mfree(track);
 }
 
 static int on_name_owner_changed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
diff --git a/src/libudev/libudev-list.c b/src/libudev/libudev-list.c
index da496ed456..0d51322a15 100644
--- a/src/libudev/libudev-list.c
+++ b/src/libudev/libudev-list.c
@@ -166,17 +166,16 @@ struct udev_list_entry *udev_list_entry_add(struct udev_list *list, const char *
         entry = new0(struct udev_list_entry, 1);
         if (entry == NULL)
                 return NULL;
+
         entry->name = strdup(name);
-        if (entry->name == NULL) {
-                free(entry);
-                return NULL;
-        }
+        if (entry->name == NULL)
+                return mfree(entry);
+
         if (value != NULL) {
                 entry->value = strdup(value);
                 if (entry->value == NULL) {
                         free(entry->name);
-                        free(entry);
-                        return NULL;
+                        return mfree(entry);
                 }
         }
 
@@ -193,8 +192,7 @@ struct udev_list_entry *udev_list_entry_add(struct udev_list *list, const char *
                         if (entries == NULL) {
                                 free(entry->name);
                                 free(entry->value);
-                                free(entry);
-                                return NULL;
+                                return mfree(entry);
                         }
                         list->entries = entries;
                         list->entries_max += add;
diff --git a/src/libudev/libudev-monitor.c b/src/libudev/libudev-monitor.c
index 1f9d16c450..a1f2b33ad5 100644
--- a/src/libudev/libudev-monitor.c
+++ b/src/libudev/libudev-monitor.c
@@ -207,8 +207,7 @@ struct udev_monitor *udev_monitor_new_from_netlink_fd(struct udev *udev, const c
                 udev_monitor->sock = socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_KOBJECT_UEVENT);
                 if (udev_monitor->sock < 0) {
                         log_debug_errno(errno, "error getting socket: %m");
-                        free(udev_monitor);
-                        return NULL;
+                        return mfree(udev_monitor);
                 }
         } else {
                 udev_monitor->bound = true;
diff --git a/src/login/logind-button.c b/src/login/logind-button.c
index baa6b7113c..90fb93bbaf 100644
--- a/src/login/logind-button.c
+++ b/src/login/logind-button.c
@@ -43,15 +43,12 @@ Button* button_new(Manager *m, const char *name) {
                 return NULL;
 
         b->name = strdup(name);
-        if (!b->name) {
-                free(b);
-                return NULL;
-        }
+        if (!b->name)
+                return mfree(b);
 
         if (hashmap_put(m->buttons, b->name, b) < 0) {
                 free(b->name);
-                free(b);
-                return NULL;
+                return mfree(b);
         }
 
         b->manager = m;
diff --git a/src/login/logind-device.c b/src/login/logind-device.c
index eb5edd1cd5..6537fa04bf 100644
--- a/src/login/logind-device.c
+++ b/src/login/logind-device.c
@@ -34,15 +34,12 @@ Device* device_new(Manager *m, const char *sysfs, bool master) {
                 return NULL;
 
         d->sysfs = strdup(sysfs);
-        if (!d->sysfs) {
-                free(d);
-                return NULL;
-        }
+        if (!d->sysfs)
+                return mfree(d);
 
         if (hashmap_put(m->devices, d->sysfs, d) < 0) {
                 free(d->sysfs);
-                free(d);
-                return NULL;
+                return mfree(d);
         }
 
         d->manager = m;
diff --git a/src/login/logind-inhibit.c b/src/login/logind-inhibit.c
index 6c78e0dddc..c93b24009b 100644
--- a/src/login/logind-inhibit.c
+++ b/src/login/logind-inhibit.c
@@ -45,17 +45,14 @@ Inhibitor* inhibitor_new(Manager *m, const char* id) {
                 return NULL;
 
         i->state_file = strappend("/run/systemd/inhibit/", id);
-        if (!i->state_file) {
-                free(i);
-                return NULL;
-        }
+        if (!i->state_file)
+                return mfree(i);
 
         i->id = basename(i->state_file);
 
         if (hashmap_put(m->inhibitors, i->id, i) < 0) {
                 free(i->state_file);
-                free(i);
-                return NULL;
+                return mfree(i);
         }
 
         i->manager = m;
diff --git a/src/login/logind-seat.c b/src/login/logind-seat.c
index b5192320e4..ecc7bd2e5b 100644
--- a/src/login/logind-seat.c
+++ b/src/login/logind-seat.c
@@ -48,18 +48,15 @@ Seat *seat_new(Manager *m, const char *id) {
                 return NULL;
 
         s->state_file = strappend("/run/systemd/seats/", id);
-        if (!s->state_file) {
-                free(s);
-                return NULL;
-        }
+        if (!s->state_file)
+                return mfree(s);
 
         s->id = basename(s->state_file);
         s->manager = m;
 
         if (hashmap_put(m->seats, s->id, s) < 0) {
                 free(s->state_file);
-                free(s);
-                return NULL;
+                return mfree(s);
         }
 
         return s;
diff --git a/src/login/logind-session.c b/src/login/logind-session.c
index ba1bcc2630..cbf035f706 100644
--- a/src/login/logind-session.c
+++ b/src/login/logind-session.c
@@ -62,16 +62,13 @@ Session* session_new(Manager *m, const char *id) {
                 return NULL;
 
         s->state_file = strappend("/run/systemd/sessions/", id);
-        if (!s->state_file) {
-                free(s);
-                return NULL;
-        }
+        if (!s->state_file)
+                return mfree(s);
 
         s->devices = hashmap_new(&devt_hash_ops);
         if (!s->devices) {
                 free(s->state_file);
-                free(s);
-                return NULL;
+                return mfree(s);
         }
 
         s->id = basename(s->state_file);
@@ -79,8 +76,7 @@ Session* session_new(Manager *m, const char *id) {
         if (hashmap_put(m->sessions, s->id, s) < 0) {
                 hashmap_free(s->devices);
                 free(s->state_file);
-                free(s);
-                return NULL;
+                return mfree(s);
         }
 
         s->manager = m;
diff --git a/src/machine/machine.c b/src/machine/machine.c
index dd046d6563..a02b9d7575 100644
--- a/src/machine/machine.c
+++ b/src/machine/machine.c
@@ -80,9 +80,7 @@ Machine* machine_new(Manager *manager, MachineClass class, const char *name) {
 fail:
         free(m->state_file);
         free(m->name);
-        free(m);
-
-        return NULL;
+        return mfree(m);
 }
 
 void machine_free(Machine *m) {
diff --git a/src/machine/operation.c b/src/machine/operation.c
index 2bf93cb493..c966d0d21c 100644
--- a/src/machine/operation.c
+++ b/src/machine/operation.c
@@ -147,6 +147,5 @@ Operation *operation_free(Operation *o) {
         if (o->machine)
                 LIST_REMOVE(operations_by_machine, o->machine->operations, o);
 
-        free(o);
-        return NULL;
+        return mfree(o);
 }
diff --git a/src/network/networkd-wait-online-link.c b/src/network/networkd-wait-online-link.c
index 5727422e3d..e63ba07e90 100644
--- a/src/network/networkd-wait-online-link.c
+++ b/src/network/networkd-wait-online-link.c
@@ -77,8 +77,7 @@ Link *link_free(Link *l) {
         }
 
         free(l->ifname);
-        free(l);
-        return NULL;
+        return mfree(l);
  }
 
 int link_update_rtnl(Link *l, sd_netlink_message *m) {
diff --git a/src/nspawn/nspawn-settings.c b/src/nspawn/nspawn-settings.c
index 5f1522cfb6..09c8f070ba 100644
--- a/src/nspawn/nspawn-settings.c
+++ b/src/nspawn/nspawn-settings.c
@@ -101,9 +101,7 @@ Settings* settings_free(Settings *s) {
         expose_port_free_all(s->expose_ports);
 
         custom_mount_free_all(s->custom_mounts, s->n_custom_mounts);
-        free(s);
-
-        return NULL;
+        return mfree(s);
 }
 
 bool settings_private_network(Settings *s) {
diff --git a/src/resolve/resolved-dns-query.c b/src/resolve/resolved-dns-query.c
index 53be18efc6..e03db4d003 100644
--- a/src/resolve/resolved-dns-query.c
+++ b/src/resolve/resolved-dns-query.c
@@ -83,9 +83,7 @@ DnsQueryCandidate* dns_query_candidate_free(DnsQueryCandidate *c) {
         if (c->scope)
                 LIST_REMOVE(candidates_by_scope, c->scope->query_candidates, c);
 
-        free(c);
-
-        return NULL;
+        return mfree(c);
 }
 
 static int dns_query_candidate_next_search_domain(DnsQueryCandidate *c) {
@@ -421,9 +419,7 @@ DnsQuery *dns_query_free(DnsQuery *q) {
                 q->manager->n_dns_queries--;
         }
 
-        free(q);
-
-        return NULL;
+        return mfree(q);
 }
 
 int dns_query_new(
diff --git a/src/resolve/resolved-dns-rr.c b/src/resolve/resolved-dns-rr.c
index 5687588a7d..87e4abec6e 100644
--- a/src/resolve/resolved-dns-rr.c
+++ b/src/resolve/resolved-dns-rr.c
@@ -73,10 +73,8 @@ DnsResourceKey* dns_resource_key_new_redirect(const DnsResourceKey *key, const D
                         return dns_resource_key_ref((DnsResourceKey*) key);
 
                 k = dns_resource_key_new_consume(key->class, key->type, destination);
-                if (!k) {
-                        free(destination);
-                        return NULL;
-                }
+                if (!k)
+                        return mfree(destination);
 
                 return k;
         }
@@ -513,9 +511,7 @@ DnsResourceRecord* dns_resource_record_unref(DnsResourceRecord *rr) {
         }
 
         free(rr->to_string);
-        free(rr);
-
-        return NULL;
+        return mfree(rr);
 }
 
 int dns_resource_record_new_reverse(DnsResourceRecord **ret, int family, const union in_addr_union *address, const char *hostname) {
diff --git a/src/resolve/resolved-dns-scope.c b/src/resolve/resolved-dns-scope.c
index 03811ac8e7..8dbc7f623b 100644
--- a/src/resolve/resolved-dns-scope.c
+++ b/src/resolve/resolved-dns-scope.c
@@ -128,9 +128,7 @@ DnsScope* dns_scope_free(DnsScope *s) {
         dns_zone_flush(&s->zone);
 
         LIST_REMOVE(scopes, s->manager->dns_scopes, s);
-        free(s);
-
-        return NULL;
+        return mfree(s);
 }
 
 DnsServer *dns_scope_get_dns_server(DnsScope *s) {
diff --git a/src/resolve/resolved-dns-search-domain.c b/src/resolve/resolved-dns-search-domain.c
index 732471027b..1386e6a17b 100644
--- a/src/resolve/resolved-dns-search-domain.c
+++ b/src/resolve/resolved-dns-search-domain.c
@@ -104,9 +104,7 @@ DnsSearchDomain* dns_search_domain_unref(DnsSearchDomain *d) {
                 return NULL;
 
         free(d->name);
-        free(d);
-
-        return NULL;
+        return mfree(d);
 }
 
 void dns_search_domain_unlink(DnsSearchDomain *d) {
diff --git a/src/resolve/resolved-dns-server.c b/src/resolve/resolved-dns-server.c
index 97cc8c0e09..7282848e35 100644
--- a/src/resolve/resolved-dns-server.c
+++ b/src/resolve/resolved-dns-server.c
@@ -139,8 +139,7 @@ DnsServer* dns_server_unref(DnsServer *s)  {
                 return NULL;
 
         free(s->server_string);
-        free(s);
-        return NULL;
+        return mfree(s);
 }
 
 void dns_server_unlink(DnsServer *s) {
diff --git a/src/resolve/resolved-dns-stream.c b/src/resolve/resolved-dns-stream.c
index dd0e0b90e3..878bae47dc 100644
--- a/src/resolve/resolved-dns-stream.c
+++ b/src/resolve/resolved-dns-stream.c
@@ -343,9 +343,7 @@ DnsStream *dns_stream_unref(DnsStream *s) {
         dns_packet_unref(s->write_packet);
         dns_packet_unref(s->read_packet);
 
-        free(s);
-
-        return NULL;
+        return mfree(s);
 }
 
 DEFINE_TRIVIAL_CLEANUP_FUNC(DnsStream*, dns_stream_unref);
diff --git a/src/resolve/resolved-dns-transaction.c b/src/resolve/resolved-dns-transaction.c
index d455b6b1fa..2fce44ec8b 100644
--- a/src/resolve/resolved-dns-transaction.c
+++ b/src/resolve/resolved-dns-transaction.c
@@ -134,8 +134,7 @@ DnsTransaction* dns_transaction_free(DnsTransaction *t) {
         dns_answer_unref(t->validated_keys);
         dns_resource_key_unref(t->key);
 
-        free(t);
-        return NULL;
+        return mfree(t);
 }
 
 DEFINE_TRIVIAL_CLEANUP_FUNC(DnsTransaction*, dns_transaction_free);
diff --git a/src/resolve/resolved-link.c b/src/resolve/resolved-link.c
index ea4a007139..13e1f91192 100644
--- a/src/resolve/resolved-link.c
+++ b/src/resolve/resolved-link.c
@@ -101,8 +101,7 @@ Link *link_free(Link *l) {
 
         free(l->state_file);
 
-        free(l);
-        return NULL;
+        return mfree(l);
 }
 
 void link_allocate_scopes(Link *l) {
@@ -698,8 +697,7 @@ LinkAddress *link_address_free(LinkAddress *a) {
         dns_resource_record_unref(a->llmnr_address_rr);
         dns_resource_record_unref(a->llmnr_ptr_rr);
 
-        free(a);
-        return NULL;
+        return mfree(a);
 }
 
 void link_address_add_rrs(LinkAddress *a, bool force_remove) {
diff --git a/src/resolve/resolved-manager.c b/src/resolve/resolved-manager.c
index 40f08e8044..0954641c20 100644
--- a/src/resolve/resolved-manager.c
+++ b/src/resolve/resolved-manager.c
@@ -630,9 +630,7 @@ Manager *manager_free(Manager *m) {
         dns_trust_anchor_flush(&m->trust_anchor);
         manager_etc_hosts_flush(m);
 
-        free(m);
-
-        return NULL;
+        return mfree(m);
 }
 
 int manager_recv(Manager *m, int fd, DnsProtocol protocol, DnsPacket **ret) {
diff --git a/src/shared/machine-image.c b/src/shared/machine-image.c
index 529d89ee2a..060f8d50c7 100644
--- a/src/shared/machine-image.c
+++ b/src/shared/machine-image.c
@@ -62,8 +62,7 @@ Image *image_unref(Image *i) {
 
         free(i->name);
         free(i->path);
-        free(i);
-        return NULL;
+        return mfree(i);
 }
 
 static char **image_settings_path(Image *image) {
diff --git a/src/shared/ptyfwd.c b/src/shared/ptyfwd.c
index 24055e772b..293c6673fc 100644
--- a/src/shared/ptyfwd.c
+++ b/src/shared/ptyfwd.c
@@ -463,8 +463,7 @@ int pty_forward_new(
 
 PTYForward *pty_forward_free(PTYForward *f) {
         pty_forward_disconnect(f);
-        free(f);
-        return NULL;
+        return mfree(f);
 }
 
 int pty_forward_get_last_char(PTYForward *f, char *ch) {
diff --git a/src/timesync/timesyncd-server.c b/src/timesync/timesyncd-server.c
index 6bda86fe6e..57a7bf2c25 100644
--- a/src/timesync/timesyncd-server.c
+++ b/src/timesync/timesyncd-server.c
@@ -61,8 +61,7 @@ ServerAddress* server_address_free(ServerAddress *a) {
                         manager_set_server_address(a->name->manager, NULL);
         }
 
-        free(a);
-        return NULL;
+        return mfree(a);
 }
 
 int server_name_new(
@@ -137,9 +136,7 @@ ServerName *server_name_free(ServerName *n) {
         log_debug("Removed server %s.", n->string);
 
         free(n->string);
-        free(n);
-
-        return NULL;
+        return mfree(n);
 }
 
 void server_name_flush_addresses(ServerName *n) {
diff --git a/src/udev/udev-ctrl.c b/src/udev/udev-ctrl.c
index f68a09d7a8..7717ac7924 100644
--- a/src/udev/udev-ctrl.c
+++ b/src/udev/udev-ctrl.c
@@ -211,8 +211,7 @@ struct udev_ctrl_connection *udev_ctrl_get_connection(struct udev_ctrl *uctrl) {
 err:
         if (conn->sock >= 0)
                 close(conn->sock);
-        free(conn);
-        return NULL;
+        return mfree(conn);
 }
 
 struct udev_ctrl_connection *udev_ctrl_connection_ref(struct udev_ctrl_connection *conn) {
diff --git a/src/udev/udev-rules.c b/src/udev/udev-rules.c
index 26fa52cf6c..7619c8371b 100644
--- a/src/udev/udev-rules.c
+++ b/src/udev/udev-rules.c
@@ -1583,8 +1583,7 @@ struct udev_rules *udev_rules_unref(struct udev_rules *rules) {
         strbuf_cleanup(rules->strbuf);
         free(rules->uids);
         free(rules->gids);
-        free(rules);
-        return NULL;
+        return mfree(rules);
 }
 
 bool udev_rules_check_timestamp(struct udev_rules *rules) {
-- 
cgit v1.2.3-54-g00ecf


From 3b319885c4febb5f7ea9b5ab31c3395548ed6886 Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Sun, 16 Oct 2016 19:23:35 -0400
Subject: tree-wide: introduce free_and_replace helper

It's a common pattern, so add a helper for it. A macro is necessary
because a function that takes a pointer to a pointer would be type specific,
similarly to cleanup functions. Seems better to use a macro.
---
 coccinelle/free_and_replace.cocci   | 15 +++++++++++++++
 src/basic/alloc-util.h              |  8 ++++++++
 src/basic/fs-util.c                 |  8 ++------
 src/basic/path-util.c               |  4 +---
 src/core/load-fragment.c            | 10 ++--------
 src/core/mount.c                    | 14 +++-----------
 src/core/service.c                  |  4 +---
 src/journal-remote/journal-upload.c |  4 +---
 src/shared/install.c                | 10 ++--------
 9 files changed, 35 insertions(+), 42 deletions(-)
 create mode 100644 coccinelle/free_and_replace.cocci

(limited to 'src/core')

diff --git a/coccinelle/free_and_replace.cocci b/coccinelle/free_and_replace.cocci
new file mode 100644
index 0000000000..9dcdbf4d42
--- /dev/null
+++ b/coccinelle/free_and_replace.cocci
@@ -0,0 +1,15 @@
+@@
+expression p, q;
+@@
+- free(p);
+- p = q;
+- q = NULL;
+- return 0;
++ return free_and_replace(p, q);
+@@
+expression p, q;
+@@
+- free(p);
+- p = q;
+- q = NULL;
++ free_and_replace(p, q);
diff --git a/src/basic/alloc-util.h b/src/basic/alloc-util.h
index ceeee519b7..a44dd473c1 100644
--- a/src/basic/alloc-util.h
+++ b/src/basic/alloc-util.h
@@ -43,6 +43,14 @@ static inline void *mfree(void *memory) {
         return NULL;
 }
 
+#define free_and_replace(a, b)                  \
+        ({                                      \
+                free(a);                        \
+                (a) = (b);                      \
+                (b) = NULL;                     \
+                0;                              \
+        })
+
 void* memdup(const void *p, size_t l) _alloc_(2);
 
 static inline void freep(void *p) {
diff --git a/src/basic/fs-util.c b/src/basic/fs-util.c
index 86d9ad7e36..48952a1c26 100644
--- a/src/basic/fs-util.c
+++ b/src/basic/fs-util.c
@@ -678,9 +678,7 @@ int chase_symlinks(const char *path, const char *_root, char **ret) {
                             !path_startswith(parent, root))
                                 return -EINVAL;
 
-                        free(done);
-                        done = parent;
-                        parent = NULL;
+                        free_and_replace(done, parent);
 
                         fd_parent = openat(fd, "..", O_CLOEXEC|O_NOFOLLOW|O_PATH);
                         if (fd_parent < 0)
@@ -724,9 +722,7 @@ int chase_symlinks(const char *path, const char *_root, char **ret) {
                                 if (fd < 0)
                                         return -errno;
 
-                                free(buffer);
-                                buffer = destination;
-                                destination = NULL;
+                                free_and_replace(buffer, destination);
 
                                 todo = buffer;
                                 free(done);
diff --git a/src/basic/path-util.c b/src/basic/path-util.c
index a76963aa9f..0f5b20cf05 100644
--- a/src/basic/path-util.c
+++ b/src/basic/path-util.c
@@ -288,9 +288,7 @@ char **path_strv_resolve(char **l, const char *prefix) {
                         } else {
                                 /* canonicalized path goes outside of
                                  * prefix, keep the original path instead */
-                                free(u);
-                                u = orig;
-                                orig = NULL;
+                                free_and_replace(u, orig);
                         }
                 } else
                         free(t);
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 06c156a623..8cc7a8e765 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -1591,11 +1591,7 @@ int config_parse_fdname(
                 return 0;
         }
 
-        free(s->fdname);
-        s->fdname = p;
-        p = NULL;
-
-        return 0;
+        return free_and_replace(s->fdname, p);
 }
 
 int config_parse_service_sockets(
@@ -2052,9 +2048,7 @@ int config_parse_working_directory(
                         return 0;
                 }
 
-                free(c->working_directory);
-                c->working_directory = k;
-                k = NULL;
+                free_and_replace(c->working_directory, k);
 
                 c->working_directory_home = false;
         }
diff --git a/src/core/mount.c b/src/core/mount.c
index 15619dffe3..da480001e1 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -1484,17 +1484,9 @@ static int mount_setup_unit(
 
         MOUNT(u)->from_proc_self_mountinfo = true;
 
-        free(p->what);
-        p->what = w;
-        w = NULL;
-
-        free(p->options);
-        p->options = o;
-        o = NULL;
-
-        free(p->fstype);
-        p->fstype = f;
-        f = NULL;
+        free_and_replace(p->what, w);
+        free_and_replace(p->options, o);
+        free_and_replace(p->fstype, f);
 
         if (load_extras) {
                 r = mount_add_extras(MOUNT(u));
diff --git a/src/core/service.c b/src/core/service.c
index 63045ede55..c949de9cbe 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -3088,9 +3088,7 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags, FDSet *fds)
 
                 if (!streq_ptr(s->status_text, t)) {
 
-                        free(s->status_text);
-                        s->status_text = t;
-                        t = NULL;
+                        free_and_replace(s->status_text, t);
 
                         notify_dbus = true;
                 }
diff --git a/src/journal-remote/journal-upload.c b/src/journal-remote/journal-upload.c
index c0f967ab94..61190ff83c 100644
--- a/src/journal-remote/journal-upload.c
+++ b/src/journal-remote/journal-upload.c
@@ -527,9 +527,7 @@ static int perform_upload(Uploader *u) {
                 log_debug("Upload finished successfully with code %ld: %s",
                           status, strna(u->answer));
 
-        free(u->last_cursor);
-        u->last_cursor = u->current_cursor;
-        u->current_cursor = NULL;
+        free_and_replace(u->last_cursor, u->current_cursor);
 
         return update_cursor_state(u);
 }
diff --git a/src/shared/install.c b/src/shared/install.c
index c1ef62b337..f70b3e3dd5 100644
--- a/src/shared/install.c
+++ b/src/shared/install.c
@@ -1107,11 +1107,7 @@ static int config_parse_default_instance(
         if (!unit_instance_is_valid(printed))
                 return -EINVAL;
 
-        free(i->default_instance);
-        i->default_instance = printed;
-        printed = NULL;
-
-        return 0;
+        return free_and_replace(i->default_instance, printed);
 }
 
 static int unit_file_load(
@@ -1357,9 +1353,7 @@ static int install_info_follow(
         if (!streq(basename(i->symlink_target), i->name))
                 return -EXDEV;
 
-        free(i->path);
-        i->path = i->symlink_target;
-        i->symlink_target = NULL;
+        free_and_replace(i->path, i->symlink_target);
         i->type = _UNIT_FILE_TYPE_INVALID;
 
         return unit_file_load_or_readlink(c, i, i->path, root_dir, flags);
-- 
cgit v1.2.3-54-g00ecf


From ba25d39e449347952522162c3fa110b04308e28c Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Mon, 17 Oct 2016 01:15:03 -0400
Subject: pid1: do not use mtime==0 as sign of masking (#4388)

It is allowed for unit files to have an mtime==0, so instead of assuming that
any file that had mtime==0 was masked, use the load_state to filter masked
units.

Fixes https://bugzilla.redhat.com/show_bug.cgi?id=1384150.
---
 src/core/unit.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'src/core')

diff --git a/src/core/unit.c b/src/core/unit.c
index a6b8ecdcb7..b24ca5aed8 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -3053,7 +3053,7 @@ int unit_coldplug(Unit *u) {
         return r;
 }
 
-static bool fragment_mtime_newer(const char *path, usec_t mtime) {
+static bool fragment_mtime_newer(const char *path, usec_t mtime, bool path_masked) {
         struct stat st;
 
         if (!path)
@@ -3063,12 +3063,12 @@ static bool fragment_mtime_newer(const char *path, usec_t mtime) {
                 /* What, cannot access this anymore? */
                 return true;
 
-        if (mtime > 0)
+        if (path_masked)
+                /* For masked files check if they are still so */
+                return !null_or_empty(&st);
+        else
                 /* For non-empty files check the mtime */
                 return timespec_load(&st.st_mtim) > mtime;
-        else if (!null_or_empty(&st))
-                /* For masked files check if they are still so */
-                return true;
 
         return false;
 }
@@ -3079,18 +3079,22 @@ bool unit_need_daemon_reload(Unit *u) {
 
         assert(u);
 
-        if (fragment_mtime_newer(u->fragment_path, u->fragment_mtime))
+        /* For unit files, we allow masking… */
+        if (fragment_mtime_newer(u->fragment_path, u->fragment_mtime,
+                                 u->load_state == UNIT_MASKED))
                 return true;
 
-        if (fragment_mtime_newer(u->source_path, u->source_mtime))
+        /* Source paths should not be masked… */
+        if (fragment_mtime_newer(u->source_path, u->source_mtime, false))
                 return true;
 
         (void) unit_find_dropin_paths(u, &t);
         if (!strv_equal(u->dropin_paths, t))
                 return true;
 
+        /* … any drop-ins that are masked are simply omitted from the list. */
         STRV_FOREACH(path, u->dropin_paths)
-                if (fragment_mtime_newer(*path, u->dropin_mtime))
+                if (fragment_mtime_newer(*path, u->dropin_mtime, false))
                         return true;
 
         return false;
-- 
cgit v1.2.3-54-g00ecf


From 6e2c9ce1b6940c93d1bfdb26d75dec5a2885664b Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Mon, 17 Oct 2016 02:05:30 -0400
Subject: core/timer: reset next_elapse_*time when timer is not waiting

When the unit that is triggered by a timer is started and running,
we transition to "running" state, and the timer will not elapse again
until the unit has finished running. In this state "systemctl list-timers"
would display the previously calculated next elapse time, which would
now of course be in the past, leading to nonsensical values.

Simply set the next elapse to infinity, which causes list-timers to
show n/a. We cannot specify when the next elapse will happen, possibly
never.

Fixes #4031.
---
 src/core/timer.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/core')

diff --git a/src/core/timer.c b/src/core/timer.c
index 9538059c13..2469a517ea 100644
--- a/src/core/timer.c
+++ b/src/core/timer.c
@@ -261,6 +261,8 @@ static void timer_set_state(Timer *t, TimerState state) {
         if (state != TIMER_WAITING) {
                 t->monotonic_event_source = sd_event_source_unref(t->monotonic_event_source);
                 t->realtime_event_source = sd_event_source_unref(t->realtime_event_source);
+                t->next_elapse_monotonic_or_boottime = USEC_INFINITY;
+                t->next_elapse_realtime = USEC_INFINITY;
         }
 
         if (state != old_state)
-- 
cgit v1.2.3-54-g00ecf


From 52c239d770d3ef955220c5ae72b852360da67c8b Mon Sep 17 00:00:00 2001
From: Luca Bruno <luca.bruno@coreos.com>
Date: Tue, 18 Oct 2016 00:05:49 +0000
Subject: core/exec: add a named-descriptor option ("fd") for streams (#4179)

This commit adds a `fd` option to `StandardInput=`,
`StandardOutput=` and `StandardError=` properties in order to
connect standard streams to externally named descriptors provided
by some socket units.

This option looks for a file descriptor named as the corresponding
stream. Custom names can be specified, separated by a colon.
If multiple name-matches exist, the first matching fd will be used.
---
 man/systemd.exec.xml                  |  46 +++++++++++++--
 src/core/dbus-execute.c               |  86 +++++++++++++++++++++++++++-
 src/core/execute.c                    |  86 +++++++++++++++++++++++++---
 src/core/execute.h                    |   5 ++
 src/core/load-fragment-gperf.gperf.m4 |   6 +-
 src/core/load-fragment.c              | 104 ++++++++++++++++++++++++++++++++--
 src/core/load-fragment.h              |   2 +
 src/core/unit.c                       |  20 +++----
 8 files changed, 322 insertions(+), 33 deletions(-)

(limited to 'src/core')

diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index a5a453031f..7453aa7bee 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -409,8 +409,9 @@
         <option>null</option>,
         <option>tty</option>,
         <option>tty-force</option>,
-        <option>tty-fail</option> or
-        <option>socket</option>.</para>
+        <option>tty-fail</option>,
+        <option>socket</option> or
+        <option>fd</option>.</para>
 
         <para>If <option>null</option> is selected, standard input
         will be connected to <filename>/dev/null</filename>, i.e. all
@@ -448,6 +449,20 @@
         <citerefentry project='freebsd'><refentrytitle>inetd</refentrytitle><manvolnum>8</manvolnum></citerefentry>
         daemon.</para>
 
+        <para>The <option>fd</option> option connects
+        the input stream to a single file descriptor provided by a socket unit.
+        A custom named file descriptor can be specified as part of this option,
+        after a <literal>:</literal> (e.g. <literal>fd:<replaceable>foobar</replaceable></literal>).
+        If no name is specified, <literal>stdin</literal> is assumed
+        (i.e. <literal>fd</literal> is equivalent to <literal>fd:stdin</literal>).
+        At least one socket unit defining such name must be explicitly provided via the
+        <varname>Sockets=</varname> option, and file descriptor name may differ
+        from the name of its containing socket unit.
+        If multiple matches are found, the first one will be used.
+        See <varname>FileDescriptorName=</varname> in
+        <citerefentry><refentrytitle>systemd.socket</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+        for more details about named descriptors and ordering.</para>
+
         <para>This setting defaults to
         <option>null</option>.</para></listitem>
       </varlistentry>
@@ -464,8 +479,9 @@
         <option>kmsg</option>,
         <option>journal+console</option>,
         <option>syslog+console</option>,
-        <option>kmsg+console</option> or
-        <option>socket</option>.</para>
+        <option>kmsg+console</option>,
+        <option>socket</option> or
+        <option>fd</option>.</para>
 
         <para><option>inherit</option> duplicates the file descriptor
         of standard input for standard output.</para>
@@ -514,6 +530,20 @@
         similar to the same option of
         <varname>StandardInput=</varname>.</para>
 
+        <para>The <option>fd</option> option connects
+        the output stream to a single file descriptor provided by a socket unit.
+        A custom named file descriptor can be specified as part of this option,
+        after a <literal>:</literal> (e.g. <literal>fd:<replaceable>foobar</replaceable></literal>).
+        If no name is specified, <literal>stdout</literal> is assumed
+        (i.e. <literal>fd</literal> is equivalent to <literal>fd:stdout</literal>).
+        At least one socket unit defining such name must be explicitly provided via the
+        <varname>Sockets=</varname> option, and file descriptor name may differ
+        from the name of its containing socket unit.
+        If multiple matches are found, the first one will be used.
+        See <varname>FileDescriptorName=</varname> in
+        <citerefentry><refentrytitle>systemd.socket</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+        for more details about named descriptors and ordering.</para>
+
         <para>If the standard output (or error output, see below) of a unit is connected to the journal, syslog or the
         kernel log buffer, the unit will implicitly gain a dependency of type <varname>After=</varname> on
         <filename>systemd-journald.socket</filename> (also see the automatic dependencies section above).</para>
@@ -531,9 +561,13 @@
         <listitem><para>Controls where file descriptor 2 (STDERR) of
         the executed processes is connected to. The available options
         are identical to those of <varname>StandardOutput=</varname>,
-        with one exception: if set to <option>inherit</option> the
+        with some exceptions: if set to <option>inherit</option> the
         file descriptor used for standard output is duplicated for
-        standard error. This setting defaults to the value set with
+        standard error, while <option>fd</option> operates on the error
+        stream and will look by default for a descriptor named
+        <literal>stderr</literal>.</para>
+
+        <para>This setting defaults to the value set with
         <option>DefaultStandardError=</option> in
         <citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>,
         which defaults to <option>inherit</option>. Note that setting
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index b8720d7d3d..1a7f770db1 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -627,6 +627,53 @@ static int property_get_syslog_facility(
         return sd_bus_message_append(reply, "i", LOG_FAC(c->syslog_priority));
 }
 
+static int property_get_input_fdname(
+                sd_bus *bus,
+                const char *path,
+                const char *interface,
+                const char *property,
+                sd_bus_message *reply,
+                void *userdata,
+                sd_bus_error *error) {
+
+        ExecContext *c = userdata;
+        const char *name;
+
+        assert(bus);
+        assert(c);
+        assert(property);
+        assert(reply);
+
+        name = exec_context_fdname(c, STDIN_FILENO);
+
+        return sd_bus_message_append(reply, "s", name);
+}
+
+static int property_get_output_fdname(
+                sd_bus *bus,
+                const char *path,
+                const char *interface,
+                const char *property,
+                sd_bus_message *reply,
+                void *userdata,
+                sd_bus_error *error) {
+
+        ExecContext *c = userdata;
+        const char *name = NULL;
+
+        assert(bus);
+        assert(c);
+        assert(property);
+        assert(reply);
+
+        if (c->std_output == EXEC_OUTPUT_NAMED_FD && streq(property, "StandardOutputFileDescriptorName"))
+                name = exec_context_fdname(c, STDOUT_FILENO);
+        else if (c->std_error == EXEC_OUTPUT_NAMED_FD && streq(property, "StandardErrorFileDescriptorName"))
+                name = exec_context_fdname(c, STDERR_FILENO);
+
+        return sd_bus_message_append(reply, "s", name);
+}
+
 const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_VTABLE_START(0),
         SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(ExecContext, environment), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -677,8 +724,11 @@ const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_PROPERTY("CPUSchedulingResetOnFork", "b", bus_property_get_bool, offsetof(ExecContext, cpu_sched_reset_on_fork), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("NonBlocking", "b", bus_property_get_bool, offsetof(ExecContext, non_blocking), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("StandardInput", "s", property_get_exec_input, offsetof(ExecContext, std_input), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("StandardInputFileDescriptorName", "s", property_get_input_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("StandardOutput", "s", bus_property_get_exec_output, offsetof(ExecContext, std_output), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("StandardOutputFileDescriptorName", "s", property_get_output_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("StandardError", "s", bus_property_get_exec_output, offsetof(ExecContext, std_error), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("StandardErrorFileDescriptorName", "s", property_get_output_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("TTYPath", "s", NULL, offsetof(ExecContext, tty_path), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("TTYReset", "b", bus_property_get_bool, offsetof(ExecContext, tty_reset), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("TTYVHangup", "b", bus_property_get_bool, offsetof(ExecContext, tty_vhangup), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -1030,7 +1080,6 @@ int bus_exec_context_set_transient_property(
 
                 return 1;
 
-
         } else if (streq(name, "StandardOutput")) {
                 const char *s;
                 ExecOutput p;
@@ -1071,6 +1120,41 @@ int bus_exec_context_set_transient_property(
 
                 return 1;
 
+        } else if (STR_IN_SET(name,
+                              "StandardInputFileDescriptorName", "StandardOutputFileDescriptorName", "StandardErrorFileDescriptorName")) {
+                const char *s;
+
+                r = sd_bus_message_read(message, "s", &s);
+                if (r < 0)
+                        return r;
+
+                if (!fdname_is_valid(s))
+                        return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid file descriptor name");
+
+                if (mode != UNIT_CHECK) {
+                        if (streq(name, "StandardInputFileDescriptorName")) {
+                                c->std_input = EXEC_INPUT_NAMED_FD;
+                                r = free_and_strdup(&c->stdio_fdname[STDIN_FILENO], s);
+                                if (r < 0)
+                                        return r;
+                                unit_write_drop_in_private_format(u, mode, name, "StandardInput=fd:%s", s);
+                        } else if (streq(name, "StandardOutputFileDescriptorName")) {
+                                c->std_output = EXEC_OUTPUT_NAMED_FD;
+                                r = free_and_strdup(&c->stdio_fdname[STDOUT_FILENO], s);
+                                if (r < 0)
+                                        return r;
+                                unit_write_drop_in_private_format(u, mode, name, "StandardOutput=fd:%s", s);
+                        } else if (streq(name, "StandardErrorFileDescriptorName")) {
+                                c->std_error = EXEC_OUTPUT_NAMED_FD;
+                                r = free_and_strdup(&c->stdio_fdname[STDERR_FILENO], s);
+                                if (r < 0)
+                                        return r;
+                                unit_write_drop_in_private_format(u, mode, name, "StandardError=fd:%s", s);
+                        }
+                }
+
+                return 1;
+
         } else if (STR_IN_SET(name,
                               "IgnoreSIGPIPE", "TTYVHangup", "TTYReset",
                               "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
diff --git a/src/core/execute.c b/src/core/execute.c
index 59c2bd0dd2..1b7b4a928d 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -411,7 +411,8 @@ static int fixup_output(ExecOutput std_output, int socket_fd) {
 static int setup_input(
                 const ExecContext *context,
                 const ExecParameters *params,
-                int socket_fd) {
+                int socket_fd,
+                int named_iofds[3]) {
 
         ExecInput i;
 
@@ -461,6 +462,10 @@ static int setup_input(
         case EXEC_INPUT_SOCKET:
                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
 
+        case EXEC_INPUT_NAMED_FD:
+                (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
+                return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
+
         default:
                 assert_not_reached("Unknown input type");
         }
@@ -472,6 +477,7 @@ static int setup_output(
                 const ExecParameters *params,
                 int fileno,
                 int socket_fd,
+                int named_iofds[3],
                 const char *ident,
                 uid_t uid,
                 gid_t gid,
@@ -523,7 +529,7 @@ static int setup_output(
                         return fileno;
 
                 /* Duplicate from stdout if possible */
-                if (e == o || e == EXEC_OUTPUT_INHERIT)
+                if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
 
                 o = e;
@@ -585,6 +591,10 @@ static int setup_output(
                 assert(socket_fd >= 0);
                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
 
+        case EXEC_OUTPUT_NAMED_FD:
+                (void) fd_nonblock(named_iofds[fileno], false);
+                return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
+
         default:
                 assert_not_reached("Unknown error type");
         }
@@ -2157,6 +2167,7 @@ static int exec_child(
                 DynamicCreds *dcreds,
                 char **argv,
                 int socket_fd,
+                int named_iofds[3],
                 int *fds, unsigned n_fds,
                 char **files_env,
                 int user_lookup_fd,
@@ -2298,19 +2309,19 @@ static int exec_child(
         if (socket_fd >= 0)
                 (void) fd_nonblock(socket_fd, false);
 
-        r = setup_input(context, params, socket_fd);
+        r = setup_input(context, params, socket_fd, named_iofds);
         if (r < 0) {
                 *exit_status = EXIT_STDIN;
                 return r;
         }
 
-        r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
+        r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
         if (r < 0) {
                 *exit_status = EXIT_STDOUT;
                 return r;
         }
 
-        r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
+        r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
         if (r < 0) {
                 *exit_status = EXIT_STDERR;
                 return r;
@@ -2829,6 +2840,7 @@ int exec_spawn(Unit *unit,
         int *fds = NULL; unsigned n_fds = 0;
         _cleanup_free_ char *line = NULL;
         int socket_fd, r;
+        int named_iofds[3] = { -1, -1, -1 };
         char **argv;
         pid_t pid;
 
@@ -2855,6 +2867,10 @@ int exec_spawn(Unit *unit,
                 n_fds = params->n_fds;
         }
 
+        r = exec_context_named_iofds(unit, context, params, named_iofds);
+        if (r < 0)
+                return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
+
         r = exec_context_load_environment(unit, context, &files_env);
         if (r < 0)
                 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
@@ -2884,6 +2900,7 @@ int exec_spawn(Unit *unit,
                                dcreds,
                                argv,
                                socket_fd,
+                               named_iofds,
                                fds, n_fds,
                                files_env,
                                unit->manager->user_lookup_fds[1],
@@ -2946,6 +2963,9 @@ void exec_context_done(ExecContext *c) {
         for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
                 c->rlimit[l] = mfree(c->rlimit[l]);
 
+        for (l = 0; l < 3; l++)
+                c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
+
         c->working_directory = mfree(c->working_directory);
         c->root_directory = mfree(c->root_directory);
         c->tty_path = mfree(c->tty_path);
@@ -3044,6 +3064,56 @@ static void invalid_env(const char *p, void *userdata) {
         log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
 }
 
+const char* exec_context_fdname(const ExecContext *c, int fd_index) {
+        assert(c);
+
+        switch (fd_index) {
+        case STDIN_FILENO:
+                if (c->std_input != EXEC_INPUT_NAMED_FD)
+                        return NULL;
+                return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
+        case STDOUT_FILENO:
+                if (c->std_output != EXEC_OUTPUT_NAMED_FD)
+                        return NULL;
+                return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
+        case STDERR_FILENO:
+                if (c->std_error != EXEC_OUTPUT_NAMED_FD)
+                        return NULL;
+                return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
+        default:
+                return NULL;
+        }
+}
+
+int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
+        unsigned i, targets;
+        const char *stdio_fdname[3];
+
+        assert(c);
+        assert(p);
+
+        targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
+                  (c->std_output == EXEC_OUTPUT_NAMED_FD) +
+                  (c->std_error == EXEC_OUTPUT_NAMED_FD);
+
+        for (i = 0; i < 3; i++)
+                stdio_fdname[i] = exec_context_fdname(c, i);
+
+        for (i = 0; i < p->n_fds && targets > 0; i++)
+                if (named_iofds[STDIN_FILENO] < 0 && c->std_input == EXEC_INPUT_NAMED_FD && stdio_fdname[STDIN_FILENO] && streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
+                        named_iofds[STDIN_FILENO] = p->fds[i];
+                        targets--;
+                } else if (named_iofds[STDOUT_FILENO] < 0 && c->std_output == EXEC_OUTPUT_NAMED_FD && stdio_fdname[STDOUT_FILENO] && streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
+                        named_iofds[STDOUT_FILENO] = p->fds[i];
+                        targets--;
+                } else if (named_iofds[STDERR_FILENO] < 0 && c->std_error == EXEC_OUTPUT_NAMED_FD && stdio_fdname[STDERR_FILENO] && streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
+                        named_iofds[STDERR_FILENO] = p->fds[i];
+                        targets--;
+                }
+
+        return (targets == 0 ? 0 : -ENOENT);
+}
+
 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
         char **i, **r = NULL;
 
@@ -3896,7 +3966,8 @@ static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
         [EXEC_INPUT_TTY] = "tty",
         [EXEC_INPUT_TTY_FORCE] = "tty-force",
         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
-        [EXEC_INPUT_SOCKET] = "socket"
+        [EXEC_INPUT_SOCKET] = "socket",
+        [EXEC_INPUT_NAMED_FD] = "fd",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
@@ -3911,7 +3982,8 @@ static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
         [EXEC_OUTPUT_JOURNAL] = "journal",
         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
-        [EXEC_OUTPUT_SOCKET] = "socket"
+        [EXEC_OUTPUT_SOCKET] = "socket",
+        [EXEC_OUTPUT_NAMED_FD] = "fd",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
diff --git a/src/core/execute.h b/src/core/execute.h
index 1de439c3ad..c7d0f7761e 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -50,6 +50,7 @@ typedef enum ExecInput {
         EXEC_INPUT_TTY_FORCE,
         EXEC_INPUT_TTY_FAIL,
         EXEC_INPUT_SOCKET,
+        EXEC_INPUT_NAMED_FD,
         _EXEC_INPUT_MAX,
         _EXEC_INPUT_INVALID = -1
 } ExecInput;
@@ -65,6 +66,7 @@ typedef enum ExecOutput {
         EXEC_OUTPUT_JOURNAL,
         EXEC_OUTPUT_JOURNAL_AND_CONSOLE,
         EXEC_OUTPUT_SOCKET,
+        EXEC_OUTPUT_NAMED_FD,
         _EXEC_OUTPUT_MAX,
         _EXEC_OUTPUT_INVALID = -1
 } ExecOutput;
@@ -120,6 +122,7 @@ struct ExecContext {
         ExecInput std_input;
         ExecOutput std_output;
         ExecOutput std_error;
+        char *stdio_fdname[3];
 
         nsec_t timer_slack_nsec;
 
@@ -284,6 +287,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix);
 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_root);
 
 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l);
+int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]);
+const char* exec_context_fdname(const ExecContext *c, int fd_index);
 
 bool exec_context_may_touch_console(ExecContext *c);
 bool exec_context_maintains_privileges(ExecContext *c);
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index a700d853cc..08c88b6b53 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -35,9 +35,9 @@ $1.Environment,                  config_parse_environ,               0,
 $1.EnvironmentFile,              config_parse_unit_env_file,         0,                             offsetof($1, exec_context.environment_files)
 $1.PassEnvironment,              config_parse_pass_environ,          0,                             offsetof($1, exec_context.pass_environment)
 $1.DynamicUser,                  config_parse_bool,                  0,                             offsetof($1, exec_context.dynamic_user)
-$1.StandardInput,                config_parse_input,                 0,                             offsetof($1, exec_context.std_input)
-$1.StandardOutput,               config_parse_output,                0,                             offsetof($1, exec_context.std_output)
-$1.StandardError,                config_parse_output,                0,                             offsetof($1, exec_context.std_error)
+$1.StandardInput,                config_parse_exec_input,            0,                             offsetof($1, exec_context)
+$1.StandardOutput,               config_parse_exec_output,           0,                             offsetof($1, exec_context)
+$1.StandardError,                config_parse_exec_output,           0,                             offsetof($1, exec_context)
 $1.TTYPath,                      config_parse_unit_path_printf,      0,                             offsetof($1, exec_context.tty_path)
 $1.TTYReset,                     config_parse_bool,                  0,                             offsetof($1, exec_context.tty_reset)
 $1.TTYVHangup,                   config_parse_bool,                  0,                             offsetof($1, exec_context.tty_vhangup)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 8cc7a8e765..a69f60097d 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -776,8 +776,104 @@ int config_parse_socket_bindtodevice(
         return 0;
 }
 
-DEFINE_CONFIG_PARSE_ENUM(config_parse_output, exec_output, ExecOutput, "Failed to parse output specifier");
-DEFINE_CONFIG_PARSE_ENUM(config_parse_input, exec_input, ExecInput, "Failed to parse input specifier");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_input, exec_input, ExecInput, "Failed to parse input literal specifier");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_output, exec_output, ExecOutput, "Failed to parse output literal specifier");
+
+int config_parse_exec_input(const char *unit,
+                            const char *filename,
+                            unsigned line,
+                            const char *section,
+                            unsigned section_line,
+                            const char *lvalue,
+                            int ltype,
+                            const char *rvalue,
+                            void *data,
+                            void *userdata) {
+        ExecContext *c = data;
+        const char *name;
+        int r;
+
+        assert(data);
+        assert(filename);
+        assert(line);
+        assert(rvalue);
+
+        name = startswith(rvalue, "fd:");
+        if (name) {
+                /* Strip prefix and validate fd name */
+                if (!fdname_is_valid(name)) {
+                        log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid file descriptor name, ignoring: %s", name);
+                        return 0;
+                }
+                c->std_input = EXEC_INPUT_NAMED_FD;
+                r = free_and_strdup(&c->stdio_fdname[STDIN_FILENO], name);
+                if (r < 0)
+                        log_oom();
+                return r;
+        } else {
+                ExecInput ei = exec_input_from_string(rvalue);
+                if (ei == _EXEC_INPUT_INVALID)
+                        log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse input specifier, ignoring: %s", rvalue);
+                else
+                        c->std_input = ei;
+                return 0;
+        }
+}
+
+int config_parse_exec_output(const char *unit,
+                             const char *filename,
+                             unsigned line,
+                             const char *section,
+                             unsigned section_line,
+                             const char *lvalue,
+                             int ltype,
+                             const char *rvalue,
+                             void *data,
+                             void *userdata) {
+        ExecContext *c = data;
+        ExecOutput eo;
+        const char *name;
+        int r;
+
+        assert(data);
+        assert(filename);
+        assert(line);
+        assert(lvalue);
+        assert(rvalue);
+
+        name = startswith(rvalue, "fd:");
+        if (name) {
+                /* Strip prefix and validate fd name */
+                if (!fdname_is_valid(name)) {
+                        log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid file descriptor name, ignoring: %s", name);
+                        return 0;
+                }
+                eo = EXEC_OUTPUT_NAMED_FD;
+        } else {
+                eo = exec_output_from_string(rvalue);
+                if (eo == _EXEC_OUTPUT_INVALID) {
+                        log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse output specifier, ignoring: %s", rvalue);
+                        return 0;
+                }
+        }
+
+        if (streq(lvalue, "StandardOutput")) {
+                c->std_output = eo;
+                r = free_and_strdup(&c->stdio_fdname[STDOUT_FILENO], name);
+                if (r < 0)
+                        log_oom();
+                return r;
+        } else if (streq(lvalue, "StandardError")) {
+                c->std_error = eo;
+                r = free_and_strdup(&c->stdio_fdname[STDERR_FILENO], name);
+                if (r < 0)
+                        log_oom();
+                return r;
+        } else {
+                log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse output property, ignoring: %s", lvalue);
+                return 0;
+        }
+}
 
 int config_parse_exec_io_class(const char *unit,
                                const char *filename,
@@ -4183,8 +4279,8 @@ void unit_dump_config_items(FILE *f) {
                 { config_parse_exec_cpu_affinity,     "CPUAFFINITY" },
                 { config_parse_mode,                  "MODE" },
                 { config_parse_unit_env_file,         "FILE" },
-                { config_parse_output,                "OUTPUT" },
-                { config_parse_input,                 "INPUT" },
+                { config_parse_exec_output,           "OUTPUT" },
+                { config_parse_exec_input,            "INPUT" },
                 { config_parse_log_facility,          "FACILITY" },
                 { config_parse_log_level,             "LEVEL" },
                 { config_parse_exec_secure_bits,      "SECUREBITS" },
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index 8b688740cf..6d1fe55bcd 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -45,7 +45,9 @@ int config_parse_service_timeout(const char *unit, const char *filename, unsigne
 int config_parse_service_type(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_service_restart(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_socket_bindtodevice(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_exec_output(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_output(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_exec_input(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_input(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_exec_io_class(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_exec_io_priority(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
diff --git a/src/core/unit.c b/src/core/unit.c
index b24ca5aed8..2fa397bd41 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -858,18 +858,14 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) {
                         return r;
         }
 
-        if (c->std_output != EXEC_OUTPUT_KMSG &&
-            c->std_output != EXEC_OUTPUT_SYSLOG &&
-            c->std_output != EXEC_OUTPUT_JOURNAL &&
-            c->std_output != EXEC_OUTPUT_KMSG_AND_CONSOLE &&
-            c->std_output != EXEC_OUTPUT_SYSLOG_AND_CONSOLE &&
-            c->std_output != EXEC_OUTPUT_JOURNAL_AND_CONSOLE &&
-            c->std_error != EXEC_OUTPUT_KMSG &&
-            c->std_error != EXEC_OUTPUT_SYSLOG &&
-            c->std_error != EXEC_OUTPUT_JOURNAL &&
-            c->std_error != EXEC_OUTPUT_KMSG_AND_CONSOLE &&
-            c->std_error != EXEC_OUTPUT_JOURNAL_AND_CONSOLE &&
-            c->std_error != EXEC_OUTPUT_SYSLOG_AND_CONSOLE)
+        if (!IN_SET(c->std_output,
+                    EXEC_OUTPUT_JOURNAL, EXEC_OUTPUT_JOURNAL_AND_CONSOLE,
+                    EXEC_OUTPUT_KMSG, EXEC_OUTPUT_KMSG_AND_CONSOLE,
+                    EXEC_OUTPUT_SYSLOG, EXEC_OUTPUT_SYSLOG_AND_CONSOLE) &&
+            !IN_SET(c->std_error,
+                    EXEC_OUTPUT_JOURNAL, EXEC_OUTPUT_JOURNAL_AND_CONSOLE,
+                    EXEC_OUTPUT_KMSG, EXEC_OUTPUT_KMSG_AND_CONSOLE,
+                    EXEC_OUTPUT_SYSLOG, EXEC_OUTPUT_SYSLOG_AND_CONSOLE))
                 return 0;
 
         /* If syslog or kernel logging is requested, make sure our own
-- 
cgit v1.2.3-54-g00ecf


From 5368222db6093195dbbd5fc7418508b154b1b769 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Thu, 20 Oct 2016 01:48:35 +0200
Subject: core: let's upgrade the log level for service processes dying of
 signal (#4415)

As suggested in
https://github.com/systemd/systemd/pull/4367#issuecomment-253670328
---
 src/core/service.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'src/core')

diff --git a/src/core/service.c b/src/core/service.c
index c949de9cbe..f9127d7509 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -2646,7 +2646,14 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
                                 f = SERVICE_SUCCESS;
                 }
 
-                log_struct(f == SERVICE_SUCCESS ? LOG_DEBUG : LOG_NOTICE,
+                /* When this is a successful exit, let's log about the exit code on DEBUG level. If this is a failure
+                 * and the process exited on its own via exit(), then let's make this a NOTICE, under the assumption
+                 * that the service already logged the reason at a higher log level on its own. However, if the service
+                 * died due to a signal, then it most likely didn't say anything about any reason, hence let's raise
+                 * our log level to WARNING then. */
+
+                log_struct(f == SERVICE_SUCCESS ? LOG_DEBUG :
+                           (code == CLD_EXITED ? LOG_NOTICE : LOG_WARNING),
                            LOG_UNIT_ID(u),
                            LOG_UNIT_MESSAGE(u, "Main process exited, code=%s, status=%i/%s",
                                             sigchld_code_to_string(code), status,
-- 
cgit v1.2.3-54-g00ecf


From 3ce40911bdfee404ff816471c77a6afd7d8b2271 Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Tue, 18 Oct 2016 13:38:41 -0400
Subject: pid1: downgrade some rlimit warnings

Since we ignore the result anyway, downgrade errors to warning.

log_oom() will still emit an error, but that's mostly theoretical, so it
is not worth complicating the code to avoid the small inconsistency
---
 src/core/main.c    | 6 +++---
 src/core/manager.c | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/core')

diff --git a/src/core/main.c b/src/core/main.c
index 61f3828a36..cf3c640a73 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -1124,7 +1124,7 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
          * later when transitioning from the initrd to the main
          * systemd or suchlike. */
         if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0)
-                return log_error_errno(errno, "Reading RLIMIT_NOFILE failed: %m");
+                return log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
 
         /* Make sure forked processes get the default kernel setting */
         if (!arg_default_rlimit[RLIMIT_NOFILE]) {
@@ -1141,7 +1141,7 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
         nl.rlim_cur = nl.rlim_max = 64*1024;
         r = setrlimit_closest(RLIMIT_NOFILE, &nl);
         if (r < 0)
-                return log_error_errno(r, "Setting RLIMIT_NOFILE failed: %m");
+                return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
 
         return 0;
 }
@@ -1775,7 +1775,7 @@ int main(int argc, char *argv[]) {
                         log_warning_errno(errno, "Failed to make us a subreaper: %m");
 
         if (arg_system) {
-                bump_rlimit_nofile(&saved_rlimit_nofile);
+                (void) bump_rlimit_nofile(&saved_rlimit_nofile);
 
                 if (empty_etc) {
                         r = unit_file_preset_all(UNIT_FILE_SYSTEM, false, NULL, UNIT_FILE_PRESET_ENABLE_ONLY, false, NULL, 0);
diff --git a/src/core/manager.c b/src/core/manager.c
index 50aae0d1ba..65f163de31 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -3098,7 +3098,7 @@ int manager_set_default_rlimits(Manager *m, struct rlimit **default_rlimit) {
 
                 m->rlimit[i] = newdup(struct rlimit, default_rlimit[i], 1);
                 if (!m->rlimit[i])
-                        return -ENOMEM;
+                        return log_oom();
         }
 
         return 0;
-- 
cgit v1.2.3-54-g00ecf


From 8ae2c6300feffc598f1b13b9268e7312c647d19e Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Wed, 19 Oct 2016 23:30:50 +0200
Subject: journald,core: add short comments we we keep reopening /dev/console
 all the time

Just to make sure the next one reading this isn't surprised that the fd isn't
kept open. SAK and stuff...

Fix suggested:

https://github.com/systemd/systemd/pull/4366#issuecomment-253659162
---
 src/core/show-status.c         | 5 +++++
 src/journal/journald-console.c | 5 +++++
 2 files changed, 10 insertions(+)

(limited to 'src/core')

diff --git a/src/core/show-status.c b/src/core/show-status.c
index 59ebdc7219..65f9cb888a 100644
--- a/src/core/show-status.c
+++ b/src/core/show-status.c
@@ -61,6 +61,11 @@ int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char
         if (vasprintf(&s, format, ap) < 0)
                 return log_oom();
 
+        /* Before you ask: yes, on purpose we open/close the console for each status line we write individually. This
+         * is a good strategy to avoid PID 1 getting killed by the kernel's SAK concept (it doesn't fix this entirely,
+         * but minimizes the time window the kernel might end up killing PID 1 due to SAK). It also makes things easier
+         * for us so that we don't have to recover from hangups and suchlike triggered on the console. */
+
         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
         if (fd < 0)
                 return fd;
diff --git a/src/journal/journald-console.c b/src/journal/journald-console.c
index fcc9f25814..3a9fba42a3 100644
--- a/src/journal/journald-console.c
+++ b/src/journal/journald-console.c
@@ -102,6 +102,11 @@ void server_forward_console(
 
         tty = s->tty_path ? s->tty_path : "/dev/console";
 
+        /* Before you ask: yes, on purpose we open/close the console for each log line we write individually. This is a
+         * good strategy to avoid journald getting killed by the kernel's SAK concept (it doesn't fix this entirely,
+         * but minimizes the time window the kernel might end up killing journald due to SAK). It also makes things
+         * easier for us so that we don't have to recover from hangups and suchlike triggered on the console. */
+
         fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC);
         if (fd < 0) {
                 log_debug_errno(fd, "Failed to open %s for logging: %m", tty);
-- 
cgit v1.2.3-54-g00ecf


From 47fffb3530af3e3ad4048570611685635fde062e Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Fri, 21 Oct 2016 12:27:46 +0200
Subject: core: if the start command vanishes during runtime don't hit an
 assert

This can happen when the configuration is changed and reloaded while we are
executing a service. Let's not hit an assert in this case.

Fixes: #4444
---
 src/core/service.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'src/core')

diff --git a/src/core/service.c b/src/core/service.c
index f9127d7509..53c26984ca 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -1756,7 +1756,15 @@ static void service_enter_start(Service *s) {
         }
 
         if (!c) {
-                assert(s->type == SERVICE_ONESHOT);
+                if (s->type != SERVICE_ONESHOT) {
+                        /* There's no command line configured for the main command? Hmm, that is strange. This can only
+                         * happen if the configuration changes at runtime. In this case, let's enter a failure
+                         * state. */
+                        log_unit_error(UNIT(s), "There's no 'start' task anymore we could start: %m");
+                        r = -ENXIO;
+                        goto fail;
+                }
+
                 service_enter_start_post(s);
                 return;
         }
-- 
cgit v1.2.3-54-g00ecf


From 87a47f99bc8e576a63581ad2593c62eb10a53814 Mon Sep 17 00:00:00 2001
From: Lukas Nykryn <lnykryn@redhat.com>
Date: Thu, 20 Oct 2016 15:27:37 +0200
Subject: failure-action: generalize failure action to emergency action

---
 Makefile.am                           |   4 +-
 src/core/dbus-service.c               |   6 +-
 src/core/dbus-unit.c                  |   6 +-
 src/core/emergency-action.c           | 128 ++++++++++++++++++++++++++++++++++
 src/core/emergency-action.h           |  41 +++++++++++
 src/core/failure-action.c             | 127 ---------------------------------
 src/core/failure-action.h             |  41 -----------
 src/core/job.c                        |   2 +-
 src/core/load-fragment-gperf.gperf.m4 |   8 +--
 src/core/load-fragment.c              |   4 +-
 src/core/load-fragment.h              |   2 +-
 src/core/service.c                    |   2 +-
 src/core/service.h                    |   2 +-
 src/core/unit.c                       |   6 +-
 src/core/unit.h                       |   6 +-
 src/test/test-tables.c                |   2 +-
 16 files changed, 194 insertions(+), 193 deletions(-)
 create mode 100644 src/core/emergency-action.c
 create mode 100644 src/core/emergency-action.h
 delete mode 100644 src/core/failure-action.c
 delete mode 100644 src/core/failure-action.h

(limited to 'src/core')

diff --git a/Makefile.am b/Makefile.am
index 00124a29f8..07acce347e 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1275,8 +1275,8 @@ libcore_la_SOURCES = \
 	src/core/audit-fd.h \
 	src/core/show-status.c \
 	src/core/show-status.h \
-	src/core/failure-action.c \
-	src/core/failure-action.h
+	src/core/emergency-action.c \
+	src/core/emergency-action.h
 
 nodist_libcore_la_SOURCES = \
 	src/core/load-fragment-gperf.c \
diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c
index 3c55e0f7fe..61b83d2d62 100644
--- a/src/core/dbus-service.c
+++ b/src/core/dbus-service.c
@@ -36,7 +36,7 @@ static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, service_type, ServiceType
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, service_result, ServiceResult);
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_restart, service_restart, ServiceRestart);
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_notify_access, notify_access, NotifyAccess);
-static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_failure_action, failure_action, FailureAction);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_emergency_action, emergency_action, EmergencyAction);
 
 const sd_bus_vtable bus_service_vtable[] = {
         SD_BUS_VTABLE_START(0),
@@ -50,7 +50,7 @@ const sd_bus_vtable bus_service_vtable[] = {
         SD_BUS_PROPERTY("RuntimeMaxUSec", "t", bus_property_get_usec, offsetof(Service, runtime_max_usec), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("WatchdogUSec", "t", bus_property_get_usec, offsetof(Service, watchdog_usec), SD_BUS_VTABLE_PROPERTY_CONST),
         BUS_PROPERTY_DUAL_TIMESTAMP("WatchdogTimestamp", offsetof(Service, watchdog_timestamp), 0),
-        SD_BUS_PROPERTY("FailureAction", "s", property_get_failure_action, offsetof(Service, failure_action), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("FailureAction", "s", property_get_emergency_action, offsetof(Service, emergency_action), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PermissionsStartOnly", "b", bus_property_get_bool, offsetof(Service, permissions_start_only), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("RootDirectoryStartOnly", "b", bus_property_get_bool, offsetof(Service, root_directory_start_only), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("RemainAfterExit", "b", bus_property_get_bool, offsetof(Service, remain_after_exit), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -79,7 +79,7 @@ const sd_bus_vtable bus_service_vtable[] = {
         /* The following four are obsolete, and thus marked hidden here. They moved into the Unit interface */
         SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
         SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
-        SD_BUS_PROPERTY("StartLimitAction", "s", property_get_failure_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+        SD_BUS_PROPERTY("StartLimitAction", "s", property_get_emergency_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
         SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
         SD_BUS_VTABLE_END
 };
diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c
index 245912fc0f..8f34fa1a52 100644
--- a/src/core/dbus-unit.c
+++ b/src/core/dbus-unit.c
@@ -37,7 +37,7 @@
 
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_load_state, unit_load_state, UnitLoadState);
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_job_mode, job_mode, JobMode);
-static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_failure_action, failure_action, FailureAction);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_emergency_action, emergency_action, EmergencyAction);
 
 static int property_get_names(
                 sd_bus *bus,
@@ -750,7 +750,7 @@ const sd_bus_vtable bus_unit_vtable[] = {
         SD_BUS_PROPERTY("IgnoreOnIsolate", "b", bus_property_get_bool, offsetof(Unit, ignore_on_isolate), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("NeedDaemonReload", "b", property_get_need_daemon_reload, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("JobTimeoutUSec", "t", bus_property_get_usec, offsetof(Unit, job_timeout), SD_BUS_VTABLE_PROPERTY_CONST),
-        SD_BUS_PROPERTY("JobTimeoutAction", "s", property_get_failure_action, offsetof(Unit, job_timeout_action), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("JobTimeoutAction", "s", property_get_emergency_action, offsetof(Unit, job_timeout_action), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("JobTimeoutRebootArgument", "s", NULL, offsetof(Unit, job_timeout_reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("ConditionResult", "b", bus_property_get_bool, offsetof(Unit, condition_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("AssertResult", "b", bus_property_get_bool, offsetof(Unit, assert_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
@@ -762,7 +762,7 @@ const sd_bus_vtable bus_unit_vtable[] = {
         SD_BUS_PROPERTY("Transient", "b", bus_property_get_bool, offsetof(Unit, transient), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("StartLimitIntervalSec", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST),
-        SD_BUS_PROPERTY("StartLimitAction", "s", property_get_failure_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("StartLimitAction", "s", property_get_emergency_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("InvocationID", "ay", bus_property_get_id128, offsetof(Unit, invocation_id), 0),
 
diff --git a/src/core/emergency-action.c b/src/core/emergency-action.c
new file mode 100644
index 0000000000..90232bc57a
--- /dev/null
+++ b/src/core/emergency-action.c
@@ -0,0 +1,128 @@
+/***
+  This file is part of systemd.
+
+  Copyright 2014 Lennart Poettering
+  Copyright 2012 Michael Olbrich
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/reboot.h>
+#include <linux/reboot.h>
+
+#include "bus-error.h"
+#include "bus-util.h"
+#include "emergency-action.h"
+#include "special.h"
+#include "string-table.h"
+#include "terminal-util.h"
+
+static void log_and_status(Manager *m, const char *message, const char *reason) {
+        log_warning("%s: %s", message, reason);
+        manager_status_printf(m, STATUS_TYPE_EMERGENCY,
+                              ANSI_HIGHLIGHT_RED " !!  " ANSI_NORMAL,
+                              "%s: %s", message, reason);
+}
+
+int emergency_action(
+                Manager *m,
+                EmergencyAction action,
+                const char *reboot_arg,
+                const char *reason) {
+
+        assert(m);
+        assert(action >= 0);
+        assert(action < _EMERGENCY_ACTION_MAX);
+
+        if (action == EMERGENCY_ACTION_NONE)
+                return -ECANCELED;
+
+        if (!MANAGER_IS_SYSTEM(m)) {
+                /* Downgrade all options to simply exiting if we run
+                 * in user mode */
+
+                log_warning("Exiting: %s", reason);
+                m->exit_code = MANAGER_EXIT;
+                return -ECANCELED;
+        }
+
+        switch (action) {
+
+        case EMERGENCY_ACTION_REBOOT:
+                log_and_status(m, "Rebooting", reason);
+
+                (void) update_reboot_parameter_and_warn(reboot_arg);
+                (void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_REBOOT_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL);
+
+                break;
+
+        case EMERGENCY_ACTION_REBOOT_FORCE:
+                log_and_status(m, "Forcibly rebooting", reason);
+
+                (void) update_reboot_parameter_and_warn(reboot_arg);
+                m->exit_code = MANAGER_REBOOT;
+
+                break;
+
+        case EMERGENCY_ACTION_REBOOT_IMMEDIATE:
+                log_and_status(m, "Rebooting immediately", reason);
+
+                sync();
+
+                if (!isempty(reboot_arg)) {
+                        log_info("Rebooting with argument '%s'.", reboot_arg);
+                        syscall(SYS_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_RESTART2, reboot_arg);
+                        log_warning_errno(errno, "Failed to reboot with parameter, retrying without: %m");
+                }
+
+                log_info("Rebooting.");
+                reboot(RB_AUTOBOOT);
+                break;
+
+        case EMERGENCY_ACTION_POWEROFF:
+                log_and_status(m, "Powering off", reason);
+                (void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_POWEROFF_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL);
+                break;
+
+        case EMERGENCY_ACTION_POWEROFF_FORCE:
+                log_and_status(m, "Forcibly powering off", reason);
+                m->exit_code = MANAGER_POWEROFF;
+                break;
+
+        case EMERGENCY_ACTION_POWEROFF_IMMEDIATE:
+                log_and_status(m, "Powering off immediately", reason);
+
+                sync();
+
+                log_info("Powering off.");
+                reboot(RB_POWER_OFF);
+                break;
+
+        default:
+                assert_not_reached("Unknown emergency action");
+        }
+
+        return -ECANCELED;
+}
+
+static const char* const emergency_action_table[_EMERGENCY_ACTION_MAX] = {
+        [EMERGENCY_ACTION_NONE] = "none",
+        [EMERGENCY_ACTION_REBOOT] = "reboot",
+        [EMERGENCY_ACTION_REBOOT_FORCE] = "reboot-force",
+        [EMERGENCY_ACTION_REBOOT_IMMEDIATE] = "reboot-immediate",
+        [EMERGENCY_ACTION_POWEROFF] = "poweroff",
+        [EMERGENCY_ACTION_POWEROFF_FORCE] = "poweroff-force",
+        [EMERGENCY_ACTION_POWEROFF_IMMEDIATE] = "poweroff-immediate"
+};
+DEFINE_STRING_TABLE_LOOKUP(emergency_action, EmergencyAction);
diff --git a/src/core/emergency-action.h b/src/core/emergency-action.h
new file mode 100644
index 0000000000..8804b59752
--- /dev/null
+++ b/src/core/emergency-action.h
@@ -0,0 +1,41 @@
+#pragma once
+
+/***
+  This file is part of systemd.
+
+  Copyright 2014 Lennart Poettering
+  Copyright 2012 Michael Olbrich
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+typedef enum EmergencyAction {
+        EMERGENCY_ACTION_NONE,
+        EMERGENCY_ACTION_REBOOT,
+        EMERGENCY_ACTION_REBOOT_FORCE,
+        EMERGENCY_ACTION_REBOOT_IMMEDIATE,
+        EMERGENCY_ACTION_POWEROFF,
+        EMERGENCY_ACTION_POWEROFF_FORCE,
+        EMERGENCY_ACTION_POWEROFF_IMMEDIATE,
+        _EMERGENCY_ACTION_MAX,
+        _EMERGENCY_ACTION_INVALID = -1
+} EmergencyAction;
+
+#include "macro.h"
+#include "manager.h"
+
+int emergency_action(Manager *m, EmergencyAction action, const char *reboot_arg, const char *reason);
+
+const char* emergency_action_to_string(EmergencyAction i) _const_;
+EmergencyAction emergency_action_from_string(const char *s) _pure_;
diff --git a/src/core/failure-action.c b/src/core/failure-action.c
deleted file mode 100644
index ddae46190f..0000000000
--- a/src/core/failure-action.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/***
-  This file is part of systemd.
-
-  Copyright 2014 Lennart Poettering
-  Copyright 2012 Michael Olbrich
-
-  systemd is free software; you can redistribute it and/or modify it
-  under the terms of the GNU Lesser General Public License as published by
-  the Free Software Foundation; either version 2.1 of the License, or
-  (at your option) any later version.
-
-  systemd is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-  Lesser General Public License for more details.
-
-  You should have received a copy of the GNU Lesser General Public License
-  along with systemd; If not, see <http://www.gnu.org/licenses/>.
-***/
-
-#include <sys/reboot.h>
-#include <linux/reboot.h>
-
-#include "bus-error.h"
-#include "bus-util.h"
-#include "failure-action.h"
-#include "special.h"
-#include "string-table.h"
-#include "terminal-util.h"
-
-static void log_and_status(Manager *m, const char *message) {
-        log_warning("%s", message);
-        manager_status_printf(m, STATUS_TYPE_EMERGENCY,
-                              ANSI_HIGHLIGHT_RED " !!  " ANSI_NORMAL,
-                              "%s", message);
-}
-
-int failure_action(
-                Manager *m,
-                FailureAction action,
-                const char *reboot_arg) {
-
-        assert(m);
-        assert(action >= 0);
-        assert(action < _FAILURE_ACTION_MAX);
-
-        if (action == FAILURE_ACTION_NONE)
-                return -ECANCELED;
-
-        if (!MANAGER_IS_SYSTEM(m)) {
-                /* Downgrade all options to simply exiting if we run
-                 * in user mode */
-
-                log_warning("Exiting as result of failure.");
-                m->exit_code = MANAGER_EXIT;
-                return -ECANCELED;
-        }
-
-        switch (action) {
-
-        case FAILURE_ACTION_REBOOT:
-                log_and_status(m, "Rebooting as result of failure.");
-
-                (void) update_reboot_parameter_and_warn(reboot_arg);
-                (void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_REBOOT_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL);
-
-                break;
-
-        case FAILURE_ACTION_REBOOT_FORCE:
-                log_and_status(m, "Forcibly rebooting as result of failure.");
-
-                (void) update_reboot_parameter_and_warn(reboot_arg);
-                m->exit_code = MANAGER_REBOOT;
-
-                break;
-
-        case FAILURE_ACTION_REBOOT_IMMEDIATE:
-                log_and_status(m, "Rebooting immediately as result of failure.");
-
-                sync();
-
-                if (!isempty(reboot_arg)) {
-                        log_info("Rebooting with argument '%s'.", reboot_arg);
-                        syscall(SYS_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_RESTART2, reboot_arg);
-                        log_warning_errno(errno, "Failed to reboot with parameter, retrying without: %m");
-                }
-
-                log_info("Rebooting.");
-                reboot(RB_AUTOBOOT);
-                break;
-
-        case FAILURE_ACTION_POWEROFF:
-                log_and_status(m, "Powering off as result of failure.");
-                (void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_POWEROFF_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL);
-                break;
-
-        case FAILURE_ACTION_POWEROFF_FORCE:
-                log_and_status(m, "Forcibly powering off as result of failure.");
-                m->exit_code = MANAGER_POWEROFF;
-                break;
-
-        case FAILURE_ACTION_POWEROFF_IMMEDIATE:
-                log_and_status(m, "Powering off immediately as result of failure.");
-
-                sync();
-
-                log_info("Powering off.");
-                reboot(RB_POWER_OFF);
-                break;
-
-        default:
-                assert_not_reached("Unknown failure action");
-        }
-
-        return -ECANCELED;
-}
-
-static const char* const failure_action_table[_FAILURE_ACTION_MAX] = {
-        [FAILURE_ACTION_NONE] = "none",
-        [FAILURE_ACTION_REBOOT] = "reboot",
-        [FAILURE_ACTION_REBOOT_FORCE] = "reboot-force",
-        [FAILURE_ACTION_REBOOT_IMMEDIATE] = "reboot-immediate",
-        [FAILURE_ACTION_POWEROFF] = "poweroff",
-        [FAILURE_ACTION_POWEROFF_FORCE] = "poweroff-force",
-        [FAILURE_ACTION_POWEROFF_IMMEDIATE] = "poweroff-immediate"
-};
-DEFINE_STRING_TABLE_LOOKUP(failure_action, FailureAction);
diff --git a/src/core/failure-action.h b/src/core/failure-action.h
deleted file mode 100644
index 1adac4ad5c..0000000000
--- a/src/core/failure-action.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#pragma once
-
-/***
-  This file is part of systemd.
-
-  Copyright 2014 Lennart Poettering
-  Copyright 2012 Michael Olbrich
-
-  systemd is free software; you can redistribute it and/or modify it
-  under the terms of the GNU Lesser General Public License as published by
-  the Free Software Foundation; either version 2.1 of the License, or
-  (at your option) any later version.
-
-  systemd is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-  Lesser General Public License for more details.
-
-  You should have received a copy of the GNU Lesser General Public License
-  along with systemd; If not, see <http://www.gnu.org/licenses/>.
-***/
-
-typedef enum FailureAction {
-        FAILURE_ACTION_NONE,
-        FAILURE_ACTION_REBOOT,
-        FAILURE_ACTION_REBOOT_FORCE,
-        FAILURE_ACTION_REBOOT_IMMEDIATE,
-        FAILURE_ACTION_POWEROFF,
-        FAILURE_ACTION_POWEROFF_FORCE,
-        FAILURE_ACTION_POWEROFF_IMMEDIATE,
-        _FAILURE_ACTION_MAX,
-        _FAILURE_ACTION_INVALID = -1
-} FailureAction;
-
-#include "macro.h"
-#include "manager.h"
-
-int failure_action(Manager *m, FailureAction action, const char *reboot_arg);
-
-const char* failure_action_to_string(FailureAction i) _const_;
-FailureAction failure_action_from_string(const char *s) _pure_;
diff --git a/src/core/job.c b/src/core/job.c
index 7faf2ef686..3ecc8a1a73 100644
--- a/src/core/job.c
+++ b/src/core/job.c
@@ -927,7 +927,7 @@ static int job_dispatch_timer(sd_event_source *s, uint64_t monotonic, void *user
         u = j->unit;
         job_finish_and_invalidate(j, JOB_TIMEOUT, true, false);
 
-        failure_action(u->manager, u->job_timeout_action, u->job_timeout_reboot_arg);
+        emergency_action(u->manager, u->job_timeout_action, u->job_timeout_reboot_arg, "job timed out");
 
         return 0;
 }
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 08c88b6b53..af2f9d960b 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -188,13 +188,13 @@ Unit.OnFailureIsolate,           config_parse_job_mode_isolate,      0,
 Unit.IgnoreOnIsolate,            config_parse_bool,                  0,                             offsetof(Unit, ignore_on_isolate)
 Unit.IgnoreOnSnapshot,           config_parse_warn_compat,           DISABLED_LEGACY,               0
 Unit.JobTimeoutSec,              config_parse_sec_fix_0,             0,                             offsetof(Unit, job_timeout)
-Unit.JobTimeoutAction,           config_parse_failure_action,        0,                             offsetof(Unit, job_timeout_action)
+Unit.JobTimeoutAction,           config_parse_emergency_action,      0,                             offsetof(Unit, job_timeout_action)
 Unit.JobTimeoutRebootArgument,   config_parse_string,                0,                             offsetof(Unit, job_timeout_reboot_arg)
 Unit.StartLimitIntervalSec,      config_parse_sec,                   0,                             offsetof(Unit, start_limit.interval)
 m4_dnl The following is a legacy alias name for compatibility
 Unit.StartLimitInterval,         config_parse_sec,                   0,                             offsetof(Unit, start_limit.interval)
 Unit.StartLimitBurst,            config_parse_unsigned,              0,                             offsetof(Unit, start_limit.burst)
-Unit.StartLimitAction,           config_parse_failure_action,        0,                             offsetof(Unit, start_limit_action)
+Unit.StartLimitAction,           config_parse_emergency_action,      0,                             offsetof(Unit, start_limit_action)
 Unit.RebootArgument,             config_parse_string,                0,                             offsetof(Unit, reboot_arg)
 Unit.ConditionPathExists,        config_parse_unit_condition_path,   CONDITION_PATH_EXISTS,         offsetof(Unit, conditions)
 Unit.ConditionPathExistsGlob,    config_parse_unit_condition_path,   CONDITION_PATH_EXISTS_GLOB,    offsetof(Unit, conditions)
@@ -251,9 +251,9 @@ Service.WatchdogSec,             config_parse_sec,                   0,
 m4_dnl The following three only exist for compatibility, they moved into Unit, see above
 Service.StartLimitInterval,      config_parse_sec,                   0,                             offsetof(Unit, start_limit.interval)
 Service.StartLimitBurst,         config_parse_unsigned,              0,                             offsetof(Unit, start_limit.burst)
-Service.StartLimitAction,        config_parse_failure_action,        0,                             offsetof(Unit, start_limit_action)
+Service.StartLimitAction,        config_parse_emergency_action,      0,                             offsetof(Unit, start_limit_action)
 Service.RebootArgument,          config_parse_string,                0,                             offsetof(Unit, reboot_arg)
-Service.FailureAction,           config_parse_failure_action,        0,                             offsetof(Service, failure_action)
+Service.FailureAction,           config_parse_emergency_action,      0,                             offsetof(Service, emergency_action)
 Service.Type,                    config_parse_service_type,          0,                             offsetof(Service, type)
 Service.Restart,                 config_parse_service_restart,       0,                             offsetof(Service, restart)
 Service.PermissionsStartOnly,    config_parse_bool,                  0,                             offsetof(Service, permissions_start_only)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index a69f60097d..6f68e23340 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -2523,7 +2523,7 @@ int config_parse_unit_condition_null(
 }
 
 DEFINE_CONFIG_PARSE_ENUM(config_parse_notify_access, notify_access, NotifyAccess, "Failed to parse notify access specifier");
-DEFINE_CONFIG_PARSE_ENUM(config_parse_failure_action, failure_action, FailureAction, "Failed to parse failure action specifier");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_emergency_action, emergency_action, EmergencyAction, "Failed to parse failure action specifier");
 
 int config_parse_unit_requires_mounts_for(
                 const char *unit,
@@ -4315,7 +4315,7 @@ void unit_dump_config_items(FILE *f) {
                 { config_parse_unit_slice,            "SLICE" },
                 { config_parse_documentation,         "URL" },
                 { config_parse_service_timeout,       "SECONDS" },
-                { config_parse_failure_action,        "ACTION" },
+                { config_parse_emergency_action,      "ACTION" },
                 { config_parse_set_status,            "STATUS" },
                 { config_parse_service_sockets,       "SOCKETS" },
                 { config_parse_environ,               "ENVIRON" },
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index 6d1fe55bcd..c05f205c37 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -75,7 +75,7 @@ int config_parse_unit_condition_string(const char *unit, const char *filename, u
 int config_parse_unit_condition_null(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_kill_mode(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_notify_access(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
-int config_parse_failure_action(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_emergency_action(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_unit_requires_mounts_for(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_syscall_filter(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_syscall_archs(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
diff --git a/src/core/service.c b/src/core/service.c
index f9127d7509..5b92c901e3 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -1455,7 +1455,7 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
 
         if (s->result != SERVICE_SUCCESS) {
                 log_unit_warning(UNIT(s), "Failed with result '%s'.", service_result_to_string(s->result));
-                failure_action(UNIT(s)->manager, s->failure_action, UNIT(s)->reboot_arg);
+                emergency_action(UNIT(s)->manager, s->emergency_action, UNIT(s)->reboot_arg, "service failed");
         }
 
         if (allow_restart && service_shall_restart(s)) {
diff --git a/src/core/service.h b/src/core/service.h
index 888007cc0b..2869144fcb 100644
--- a/src/core/service.h
+++ b/src/core/service.h
@@ -178,7 +178,7 @@ struct Service {
         char *status_text;
         int status_errno;
 
-        FailureAction failure_action;
+        EmergencyAction emergency_action;
 
         UnitRef accept_socket;
 
diff --git a/src/core/unit.c b/src/core/unit.c
index 2fa397bd41..cabb1050a8 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -982,8 +982,8 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
         if (u->job_timeout != USEC_INFINITY)
                 fprintf(f, "%s\tJob Timeout: %s\n", prefix, format_timespan(timespan, sizeof(timespan), u->job_timeout, 0));
 
-        if (u->job_timeout_action != FAILURE_ACTION_NONE)
-                fprintf(f, "%s\tJob Timeout Action: %s\n", prefix, failure_action_to_string(u->job_timeout_action));
+        if (u->job_timeout_action != EMERGENCY_ACTION_NONE)
+                fprintf(f, "%s\tJob Timeout Action: %s\n", prefix, emergency_action_to_string(u->job_timeout_action));
 
         if (u->job_timeout_reboot_arg)
                 fprintf(f, "%s\tJob Timeout Reboot Argument: %s\n", prefix, u->job_timeout_reboot_arg);
@@ -1490,7 +1490,7 @@ int unit_start_limit_test(Unit *u) {
         log_unit_warning(u, "Start request repeated too quickly.");
         u->start_limit_hit = true;
 
-        return failure_action(u->manager, u->start_limit_action, u->reboot_arg);
+        return emergency_action(u->manager, u->start_limit_action, u->reboot_arg, "unit failed");
 }
 
 /* Errors:
diff --git a/src/core/unit.h b/src/core/unit.h
index a8dd3e602c..adcdee6db6 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -29,7 +29,7 @@ typedef struct UnitRef UnitRef;
 typedef struct UnitStatusMessageFormats UnitStatusMessageFormats;
 
 #include "condition.h"
-#include "failure-action.h"
+#include "emergency-action.h"
 #include "install.h"
 #include "list.h"
 #include "unit-name.h"
@@ -114,7 +114,7 @@ struct Unit {
 
         /* Job timeout and action to take */
         usec_t job_timeout;
-        FailureAction job_timeout_action;
+        EmergencyAction job_timeout_action;
         char *job_timeout_reboot_arg;
 
         /* References to this */
@@ -178,7 +178,7 @@ struct Unit {
 
         /* Put a ratelimit on unit starting */
         RateLimit start_limit;
-        FailureAction start_limit_action;
+        EmergencyAction start_limit_action;
         char *reboot_arg;
 
         /* Make sure we never enter endless loops with the check unneeded logic, or the BindsTo= logic */
diff --git a/src/test/test-tables.c b/src/test/test-tables.c
index 0be74921fc..8d4622694e 100644
--- a/src/test/test-tables.c
+++ b/src/test/test-tables.c
@@ -63,7 +63,7 @@ int main(int argc, char **argv) {
         test_table(device_state, DEVICE_STATE);
         test_table(exec_input, EXEC_INPUT);
         test_table(exec_output, EXEC_OUTPUT);
-        test_table(failure_action, FAILURE_ACTION);
+        test_table(emergency_action, EMERGENCY_ACTION);
         test_table(job_mode, JOB_MODE);
         test_table(job_result, JOB_RESULT);
         test_table(job_state, JOB_STATE);
-- 
cgit v1.2.3-54-g00ecf


From ae8c7939df962cbf660b2b9517fe46be272f58b9 Mon Sep 17 00:00:00 2001
From: Lukas Nykryn <lnykryn@redhat.com>
Date: Tue, 18 Oct 2016 12:16:32 +0200
Subject: core: use emergency_action for ctr+alt+del burst

Fixes #4306
---
 man/systemd-system.conf.xml |  5 +++--
 src/core/main.c             |  7 +++----
 src/core/manager.c          | 33 ++++-----------------------------
 src/core/manager.h          | 13 +------------
 4 files changed, 11 insertions(+), 47 deletions(-)

(limited to 'src/core')

diff --git a/man/systemd-system.conf.xml b/man/systemd-system.conf.xml
index 1d995f143e..80cad7f09c 100644
--- a/man/systemd-system.conf.xml
+++ b/man/systemd-system.conf.xml
@@ -110,8 +110,9 @@
 
         <listitem><para>Defines what action will be performed
         if user presses Ctrl-Alt-Delete more than 7 times in 2s.
-        Can be set to <literal>reboot-force</literal>, <literal>poweroff-force</literal>
-        or disabled with <literal>ignore</literal>. Defaults to
+        Can be set to <literal>reboot-force</literal>, <literal>poweroff-force</literal>,
+        <literal>reboot-immediate</literal>, <literal>poweroff-immediate</literal>
+        or disabled with <literal>none</literal>. Defaults to
         <literal>reboot-force</literal>.
         </para></listitem>
       </varlistentry>
diff --git a/src/core/main.c b/src/core/main.c
index cf3c640a73..b635a633a7 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -89,6 +89,7 @@
 #include "user-util.h"
 #include "virt.h"
 #include "watchdog.h"
+#include "emergency-action.h"
 
 static enum {
         ACTION_RUN,
@@ -131,7 +132,7 @@ static bool arg_default_memory_accounting = false;
 static bool arg_default_tasks_accounting = true;
 static uint64_t arg_default_tasks_max = UINT64_MAX;
 static sd_id128_t arg_machine_id = {};
-static CADBurstAction arg_cad_burst_action = CAD_BURST_ACTION_REBOOT;
+static EmergencyAction arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
 
 noreturn static void freeze_or_reboot(void) {
 
@@ -649,8 +650,6 @@ static int config_parse_join_controllers(const char *unit,
         return 0;
 }
 
-static DEFINE_CONFIG_PARSE_ENUM(config_parse_cad_burst_action, cad_burst_action, CADBurstAction, "Failed to parse service restart specifier");
-
 static int parse_config_file(void) {
 
         const ConfigTableItem items[] = {
@@ -705,7 +704,7 @@ static int parse_config_file(void) {
                 { "Manager", "DefaultMemoryAccounting",   config_parse_bool,             0, &arg_default_memory_accounting         },
                 { "Manager", "DefaultTasksAccounting",    config_parse_bool,             0, &arg_default_tasks_accounting          },
                 { "Manager", "DefaultTasksMax",           config_parse_tasks_max,        0, &arg_default_tasks_max                 },
-                { "Manager", "CtrlAltDelBurstAction",     config_parse_cad_burst_action, 0, &arg_cad_burst_action},
+                { "Manager", "CtrlAltDelBurstAction",     config_parse_emergency_action, 0, &arg_cad_burst_action                  },
                 {}
         };
 
diff --git a/src/core/manager.c b/src/core/manager.c
index 65f163de31..ffccfdcd5e 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1911,28 +1911,11 @@ static void manager_handle_ctrl_alt_del(Manager *m) {
          * 7 times within 2s, we reboot/shutdown immediately,
          * unless it was disabled in system.conf */
 
-        if (ratelimit_test(&m->ctrl_alt_del_ratelimit) || m->cad_burst_action == CAD_BURST_ACTION_IGNORE)
+        if (ratelimit_test(&m->ctrl_alt_del_ratelimit) || m->cad_burst_action == EMERGENCY_ACTION_NONE)
                 manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY);
-        else {
-                switch (m->cad_burst_action) {
-
-                case CAD_BURST_ACTION_REBOOT:
-                        m->exit_code = MANAGER_REBOOT;
-                        break;
-
-                case CAD_BURST_ACTION_POWEROFF:
-                        m->exit_code = MANAGER_POWEROFF;
-                        break;
-
-                default:
-                        assert_not_reached("Unknown action.");
-                }
-
-                log_notice("Ctrl-Alt-Del was pressed more than 7 times within 2s, performing immediate %s.",
-                                cad_burst_action_to_string(m->cad_burst_action));
-                status_printf(NULL, true, false, "Ctrl-Alt-Del was pressed more than 7 times within 2s, performing immediate %s.",
-                                cad_burst_action_to_string(m->cad_burst_action));
-        }
+        else
+                emergency_action(m, m->cad_burst_action, NULL,
+                                "Ctrl-Alt-Del was pressed more than 7 times within 2s");
 }
 
 static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
@@ -3590,11 +3573,3 @@ static const char *const manager_state_table[_MANAGER_STATE_MAX] = {
 };
 
 DEFINE_STRING_TABLE_LOOKUP(manager_state, ManagerState);
-
-static const char *const cad_burst_action_table[_CAD_BURST_ACTION_MAX] = {
-        [CAD_BURST_ACTION_IGNORE] = "ignore",
-        [CAD_BURST_ACTION_REBOOT] = "reboot-force",
-        [CAD_BURST_ACTION_POWEROFF] = "poweroff-force",
-};
-
-DEFINE_STRING_TABLE_LOOKUP(cad_burst_action, CADBurstAction);
diff --git a/src/core/manager.h b/src/core/manager.h
index 29fe14e10b..35172fdba9 100644
--- a/src/core/manager.h
+++ b/src/core/manager.h
@@ -62,14 +62,6 @@ typedef enum ManagerExitCode {
         _MANAGER_EXIT_CODE_INVALID = -1
 } ManagerExitCode;
 
-typedef enum CADBurstAction {
-        CAD_BURST_ACTION_IGNORE,
-        CAD_BURST_ACTION_REBOOT,
-        CAD_BURST_ACTION_POWEROFF,
-        _CAD_BURST_ACTION_MAX,
-        _CAD_BURST_ACTION_INVALID = -1
-} CADBurstAction;
-
 typedef enum StatusType {
         STATUS_TYPE_EPHEMERAL,
         STATUS_TYPE_NORMAL,
@@ -315,7 +307,7 @@ struct Manager {
 
         /* When the user hits C-A-D more than 7 times per 2s, do something immediately... */
         RateLimit ctrl_alt_del_ratelimit;
-        CADBurstAction cad_burst_action;
+        EmergencyAction cad_burst_action;
 
         const char *unit_log_field;
         const char *unit_log_format_string;
@@ -411,6 +403,3 @@ void manager_deserialize_gid_refs_one(Manager *m, const char *value);
 
 const char *manager_state_to_string(ManagerState m) _const_;
 ManagerState manager_state_from_string(const char *s) _pure_;
-
-const char *cad_burst_action_to_string(CADBurstAction a) _const_;
-CADBurstAction cad_burst_action_from_string(const char *s) _pure_;
-- 
cgit v1.2.3-54-g00ecf


From 96287a491676763849ab73bda6c4486d7f775323 Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Sat, 22 Oct 2016 14:24:52 -0400
Subject: tree-wide: allow state to be passed through to
 parse_proc_cmdline_item

No functional change.
---
 src/basic/log.c                                   | 4 ++--
 src/basic/proc-cmdline.c                          | 4 ++--
 src/basic/proc-cmdline.h                          | 2 +-
 src/core/main.c                                   | 4 ++--
 src/cryptsetup/cryptsetup-generator.c             | 4 ++--
 src/debug-generator/debug-generator.c             | 4 ++--
 src/fsck/fsck.c                                   | 4 ++--
 src/fstab-generator/fstab-generator.c             | 4 ++--
 src/gpt-auto-generator/gpt-auto-generator.c       | 4 ++--
 src/hibernate-resume/hibernate-resume-generator.c | 4 ++--
 src/modules-load/modules-load.c                   | 4 ++--
 src/quotacheck/quotacheck.c                       | 4 ++--
 src/test/test-proc-cmdline.c                      | 7 +++++--
 src/udev/udevd.c                                  | 4 ++--
 14 files changed, 30 insertions(+), 27 deletions(-)

(limited to 'src/core')

diff --git a/src/basic/log.c b/src/basic/log.c
index bd6c96c4f8..40f342ca72 100644
--- a/src/basic/log.c
+++ b/src/basic/log.c
@@ -967,7 +967,7 @@ int log_set_max_level_from_string(const char *e) {
         return 0;
 }
 
-static int parse_proc_cmdline_item(const char *key, const char *value) {
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
 
         /*
          * The systemd.log_xyz= settings are parsed by all tools, and
@@ -1012,7 +1012,7 @@ void log_parse_environment(void) {
                 /* Only try to read the command line in daemons.
                    We assume that anything that has a controlling
                    tty is user stuff. */
-                (void) parse_proc_cmdline(parse_proc_cmdline_item);
+                (void) parse_proc_cmdline(parse_proc_cmdline_item, NULL);
 
         e = secure_getenv("SYSTEMD_LOG_TARGET");
         if (e && log_set_target_from_string(e) < 0)
diff --git a/src/basic/proc-cmdline.c b/src/basic/proc-cmdline.c
index 0430beadaa..951db34d1a 100644
--- a/src/basic/proc-cmdline.c
+++ b/src/basic/proc-cmdline.c
@@ -42,7 +42,7 @@ int proc_cmdline(char **ret) {
                 return read_one_line_file("/proc/cmdline", ret);
 }
 
-int parse_proc_cmdline(int (*parse_item)(const char *key, const char *value)) {
+int parse_proc_cmdline(int (*parse_item)(const char *key, const char *value, void *data), void *data) {
         _cleanup_free_ char *line = NULL;
         const char *p;
         int r;
@@ -73,7 +73,7 @@ int parse_proc_cmdline(int (*parse_item)(const char *key, const char *value)) {
                 if (value)
                         *(value++) = 0;
 
-                r = parse_item(word, value);
+                r = parse_item(word, value, data);
                 if (r < 0)
                         return r;
         }
diff --git a/src/basic/proc-cmdline.h b/src/basic/proc-cmdline.h
index 452642a2f5..a8832d8419 100644
--- a/src/basic/proc-cmdline.h
+++ b/src/basic/proc-cmdline.h
@@ -20,7 +20,7 @@
 ***/
 
 int proc_cmdline(char **ret);
-int parse_proc_cmdline(int (*parse_word)(const char *key, const char *value));
+int parse_proc_cmdline(int (*parse_word)(const char *key, const char *value, void *data), void *data);
 int get_proc_cmdline_key(const char *parameter, char **value);
 
 int shall_restore_state(void);
diff --git a/src/core/main.c b/src/core/main.c
index cf3c640a73..bf9bba28d6 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -307,7 +307,7 @@ static int set_machine_id(const char *m) {
         return 0;
 }
 
-static int parse_proc_cmdline_item(const char *key, const char *value) {
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
 
         int r;
 
@@ -1570,7 +1570,7 @@ int main(int argc, char *argv[]) {
         }
 
         if (arg_system) {
-                r = parse_proc_cmdline(parse_proc_cmdline_item);
+                r = parse_proc_cmdline(parse_proc_cmdline_item, NULL);
                 if (r < 0)
                         log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
         }
diff --git a/src/cryptsetup/cryptsetup-generator.c b/src/cryptsetup/cryptsetup-generator.c
index de0a3b6f9c..7193d93070 100644
--- a/src/cryptsetup/cryptsetup-generator.c
+++ b/src/cryptsetup/cryptsetup-generator.c
@@ -277,7 +277,7 @@ static crypto_device *get_crypto_device(const char *uuid) {
         return d;
 }
 
-static int parse_proc_cmdline_item(const char *key, const char *value) {
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
         int r;
         crypto_device *d;
         _cleanup_free_ char *uuid = NULL, *uuid_value = NULL;
@@ -478,7 +478,7 @@ int main(int argc, char *argv[]) {
         if (!arg_disks)
                 goto cleanup;
 
-        r = parse_proc_cmdline(parse_proc_cmdline_item);
+        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL);
         if (r < 0) {
                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
                 r = EXIT_FAILURE;
diff --git a/src/debug-generator/debug-generator.c b/src/debug-generator/debug-generator.c
index 7e80af78e7..0b0de1b461 100644
--- a/src/debug-generator/debug-generator.c
+++ b/src/debug-generator/debug-generator.c
@@ -33,7 +33,7 @@ static char **arg_mask = NULL;
 static char **arg_wants = NULL;
 static bool arg_debug_shell = false;
 
-static int parse_proc_cmdline_item(const char *key, const char *value) {
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
         int r;
 
         assert(key);
@@ -178,7 +178,7 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
-        r = parse_proc_cmdline(parse_proc_cmdline_item);
+        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL);
         if (r < 0)
                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
 
diff --git a/src/fsck/fsck.c b/src/fsck/fsck.c
index d32e1d923e..957e0ccef8 100644
--- a/src/fsck/fsck.c
+++ b/src/fsck/fsck.c
@@ -94,7 +94,7 @@ static void start_target(const char *target, const char *mode) {
                 log_error("Failed to start unit: %s", bus_error_message(&error, r));
 }
 
-static int parse_proc_cmdline_item(const char *key, const char *value) {
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
         int r;
 
         assert(key);
@@ -293,7 +293,7 @@ int main(int argc, char *argv[]) {
 
         umask(0022);
 
-        r = parse_proc_cmdline(parse_proc_cmdline_item);
+        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL);
         if (r < 0)
                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
 
diff --git a/src/fstab-generator/fstab-generator.c b/src/fstab-generator/fstab-generator.c
index 33af553d0d..b608eff630 100644
--- a/src/fstab-generator/fstab-generator.c
+++ b/src/fstab-generator/fstab-generator.c
@@ -590,7 +590,7 @@ static int add_sysroot_usr_mount(void) {
                          "/proc/cmdline");
 }
 
-static int parse_proc_cmdline_item(const char *key, const char *value) {
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
         int r;
 
         /* root=, usr=, usrfstype= and roofstype= may occur more than once, the last
@@ -674,7 +674,7 @@ int main(int argc, char *argv[]) {
 
         umask(0022);
 
-        r = parse_proc_cmdline(parse_proc_cmdline_item);
+        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL);
         if (r < 0)
                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
 
diff --git a/src/gpt-auto-generator/gpt-auto-generator.c b/src/gpt-auto-generator/gpt-auto-generator.c
index 6cc1aad705..a25b3413ae 100644
--- a/src/gpt-auto-generator/gpt-auto-generator.c
+++ b/src/gpt-auto-generator/gpt-auto-generator.c
@@ -907,7 +907,7 @@ fallback:
         return 1;
 }
 
-static int parse_proc_cmdline_item(const char *key, const char *value) {
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
         int r;
 
         assert(key);
@@ -1018,7 +1018,7 @@ int main(int argc, char *argv[]) {
                 return EXIT_SUCCESS;
         }
 
-        r = parse_proc_cmdline(parse_proc_cmdline_item);
+        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL);
         if (r < 0)
                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
 
diff --git a/src/hibernate-resume/hibernate-resume-generator.c b/src/hibernate-resume/hibernate-resume-generator.c
index d7ee80d58f..8be461aafa 100644
--- a/src/hibernate-resume/hibernate-resume-generator.c
+++ b/src/hibernate-resume/hibernate-resume-generator.c
@@ -33,7 +33,7 @@
 static const char *arg_dest = "/tmp";
 static char *arg_resume_dev = NULL;
 
-static int parse_proc_cmdline_item(const char *key, const char *value) {
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
 
         if (streq(key, "resume") && value) {
                 free(arg_resume_dev);
@@ -88,7 +88,7 @@ int main(int argc, char *argv[]) {
         if (!in_initrd())
                 return EXIT_SUCCESS;
 
-        r = parse_proc_cmdline(parse_proc_cmdline_item);
+        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL);
         if (r < 0)
                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
 
diff --git a/src/modules-load/modules-load.c b/src/modules-load/modules-load.c
index f75015d8c3..bd07bad446 100644
--- a/src/modules-load/modules-load.c
+++ b/src/modules-load/modules-load.c
@@ -59,7 +59,7 @@ static int add_modules(const char *p) {
         return 0;
 }
 
-static int parse_proc_cmdline_item(const char *key, const char *value) {
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
         int r;
 
         if (STR_IN_SET(key, "modules-load", "rd.modules-load") && value) {
@@ -226,7 +226,7 @@ int main(int argc, char *argv[]) {
 
         umask(0022);
 
-        r = parse_proc_cmdline(parse_proc_cmdline_item);
+        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL);
         if (r < 0)
                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
 
diff --git a/src/quotacheck/quotacheck.c b/src/quotacheck/quotacheck.c
index 6d8c05f046..a87b0866cd 100644
--- a/src/quotacheck/quotacheck.c
+++ b/src/quotacheck/quotacheck.c
@@ -32,7 +32,7 @@
 static bool arg_skip = false;
 static bool arg_force = false;
 
-static int parse_proc_cmdline_item(const char *key, const char *value) {
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
 
         if (streq(key, "quotacheck.mode") && value) {
 
@@ -88,7 +88,7 @@ int main(int argc, char *argv[]) {
 
         umask(0022);
 
-        r = parse_proc_cmdline(parse_proc_cmdline_item);
+        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL);
         if (r < 0)
                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
 
diff --git a/src/test/test-proc-cmdline.c b/src/test/test-proc-cmdline.c
index 80ad5ed98b..e4207935c4 100644
--- a/src/test/test-proc-cmdline.c
+++ b/src/test/test-proc-cmdline.c
@@ -25,15 +25,18 @@
 #include "string-util.h"
 #include "util.h"
 
-static int parse_item(const char *key, const char *value) {
+static int obj;
+
+static int parse_item(const char *key, const char *value, void *data) {
         assert_se(key);
+        assert_se(data == &obj);
 
         log_info("kernel cmdline option <%s> = <%s>", key, strna(value));
         return 0;
 }
 
 static void test_parse_proc_cmdline(void) {
-        assert_se(parse_proc_cmdline(parse_item) >= 0);
+        assert_se(parse_proc_cmdline(parse_item, &obj) >= 0);
 }
 
 static void test_runlevel_to_target(void) {
diff --git a/src/udev/udevd.c b/src/udev/udevd.c
index 535d317c27..601ab69f1b 100644
--- a/src/udev/udevd.c
+++ b/src/udev/udevd.c
@@ -1362,7 +1362,7 @@ static int listen_fds(int *rctrl, int *rnetlink) {
  *   udev.exec-delay=<number of seconds>       delay execution of every executed program
  *   udev.event-timeout=<number of seconds>    seconds to wait before terminating an event
  */
-static int parse_proc_cmdline_item(const char *key, const char *value) {
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
         const char *full_key = key;
         int r;
 
@@ -1665,7 +1665,7 @@ int main(int argc, char *argv[]) {
         if (r <= 0)
                 goto exit;
 
-        r = parse_proc_cmdline(parse_proc_cmdline_item);
+        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL);
         if (r < 0)
                 log_warning_errno(r, "failed to parse kernel command line, ignoring: %m");
 
-- 
cgit v1.2.3-54-g00ecf


From d7f69e16f1a5b84e9acf1771a9b53da3787ae79d Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Sat, 22 Oct 2016 15:31:14 -0400
Subject: tree-wide: make parse_proc_cmdline() strip "rd." prefix automatically

This stripping is contolled by a new boolean parameter. When the parameter
is true, it means that the caller does not care about the distinction between
initrd and real root, and wants to act on both rd-dot-prefixed and unprefixed
parameters in the initramfs, and only on the unprefixed parameters in real
root. If the parameter is false, behaviour is the same as before.

Changes by caller:
log.c (systemd.log_*):      changed to accept rd-dot-prefix params
pid1:                       no change, custom logic
cryptsetup-generator:       no change, still accepts rd-dot-prefix params
debug-generator:            no change, does not accept rd-dot-prefix params
fsck:                       changed to accept rd-dot-prefix params
fstab-generator:            no change, custom logic
gpt-auto-generator:         no change, custom logic
hibernate-resume-generator: no change, does not accept rd-dot-prefix params
journald:                   changed to accept rd-dot-prefix params
modules-load:               no change, still accepts rd-dot-prefix params
quote-check:                no change, does not accept rd-dot-prefix params
udevd:                      no change, still accepts rd-dot-prefix params

I added support for "rd." params in the three cases where I think it's
useful: logging, fsck options, journald forwarding options.
---
 src/basic/log.c                                   |  2 +-
 src/basic/proc-cmdline.c                          | 11 +++++++----
 src/basic/proc-cmdline.h                          |  4 +++-
 src/core/main.c                                   |  2 +-
 src/cryptsetup/cryptsetup-generator.c             | 14 +++++++-------
 src/debug-generator/debug-generator.c             |  2 +-
 src/fsck/fsck.c                                   |  2 +-
 src/fstab-generator/fstab-generator.c             |  2 +-
 src/gpt-auto-generator/gpt-auto-generator.c       |  2 +-
 src/hibernate-resume/hibernate-resume-generator.c |  2 +-
 src/journal/journald-server.c                     |  2 +-
 src/modules-load/modules-load.c                   |  4 ++--
 src/quotacheck/quotacheck.c                       |  2 +-
 src/test/test-proc-cmdline.c                      |  2 +-
 src/udev/udevd.c                                  |  5 +----
 15 files changed, 30 insertions(+), 28 deletions(-)

(limited to 'src/core')

diff --git a/src/basic/log.c b/src/basic/log.c
index 40f342ca72..2ff70be255 100644
--- a/src/basic/log.c
+++ b/src/basic/log.c
@@ -1012,7 +1012,7 @@ void log_parse_environment(void) {
                 /* Only try to read the command line in daemons.
                    We assume that anything that has a controlling
                    tty is user stuff. */
-                (void) parse_proc_cmdline(parse_proc_cmdline_item, NULL);
+                (void) parse_proc_cmdline(parse_proc_cmdline_item, NULL, true);
 
         e = secure_getenv("SYSTEMD_LOG_TARGET");
         if (e && log_set_target_from_string(e) < 0)
diff --git a/src/basic/proc-cmdline.c b/src/basic/proc-cmdline.c
index 951db34d1a..8297a222b7 100644
--- a/src/basic/proc-cmdline.c
+++ b/src/basic/proc-cmdline.c
@@ -42,7 +42,9 @@ int proc_cmdline(char **ret) {
                 return read_one_line_file("/proc/cmdline", ret);
 }
 
-int parse_proc_cmdline(int (*parse_item)(const char *key, const char *value, void *data), void *data) {
+int parse_proc_cmdline(int (*parse_item)(const char *key, const char *value, void *data),
+                       void *data,
+                       bool strip_prefix) {
         _cleanup_free_ char *line = NULL;
         const char *p;
         int r;
@@ -56,7 +58,7 @@ int parse_proc_cmdline(int (*parse_item)(const char *key, const char *value, voi
         p = line;
         for (;;) {
                 _cleanup_free_ char *word = NULL;
-                char *value = NULL;
+                char *value = NULL, *unprefixed;
 
                 r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES|EXTRACT_RELAX);
                 if (r < 0)
@@ -66,14 +68,15 @@ int parse_proc_cmdline(int (*parse_item)(const char *key, const char *value, voi
 
                 /* Filter out arguments that are intended only for the
                  * initrd */
-                if (!in_initrd() && startswith(word, "rd."))
+                unprefixed = startswith(word, "rd.");
+                if (unprefixed && !in_initrd())
                         continue;
 
                 value = strchr(word, '=');
                 if (value)
                         *(value++) = 0;
 
-                r = parse_item(word, value, data);
+                r = parse_item(strip_prefix && unprefixed ? unprefixed : word, value, data);
                 if (r < 0)
                         return r;
         }
diff --git a/src/basic/proc-cmdline.h b/src/basic/proc-cmdline.h
index a8832d8419..6d6ee95c11 100644
--- a/src/basic/proc-cmdline.h
+++ b/src/basic/proc-cmdline.h
@@ -20,7 +20,9 @@
 ***/
 
 int proc_cmdline(char **ret);
-int parse_proc_cmdline(int (*parse_word)(const char *key, const char *value, void *data), void *data);
+int parse_proc_cmdline(int (*parse_item)(const char *key, const char *value, void *data),
+                       void *data,
+                       bool strip_prefix);
 int get_proc_cmdline_key(const char *parameter, char **value);
 
 int shall_restore_state(void);
diff --git a/src/core/main.c b/src/core/main.c
index bf9bba28d6..ffc7725f16 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -1570,7 +1570,7 @@ int main(int argc, char *argv[]) {
         }
 
         if (arg_system) {
-                r = parse_proc_cmdline(parse_proc_cmdline_item, NULL);
+                r = parse_proc_cmdline(parse_proc_cmdline_item, NULL, false);
                 if (r < 0)
                         log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
         }
diff --git a/src/cryptsetup/cryptsetup-generator.c b/src/cryptsetup/cryptsetup-generator.c
index 7193d93070..e2dc4327fe 100644
--- a/src/cryptsetup/cryptsetup-generator.c
+++ b/src/cryptsetup/cryptsetup-generator.c
@@ -282,7 +282,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
         crypto_device *d;
         _cleanup_free_ char *uuid = NULL, *uuid_value = NULL;
 
-        if (STR_IN_SET(key, "luks", "rd.luks") && value) {
+        if (streq(key, "luks") && value) {
 
                 r = parse_boolean(value);
                 if (r < 0)
@@ -290,7 +290,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
                 else
                         arg_enabled = r;
 
-        } else if (STR_IN_SET(key, "luks.crypttab", "rd.luks.crypttab") && value) {
+        } else if (streq(key, "luks.crypttab") && value) {
 
                 r = parse_boolean(value);
                 if (r < 0)
@@ -298,7 +298,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
                 else
                         arg_read_crypttab = r;
 
-        } else if (STR_IN_SET(key, "luks.uuid", "rd.luks.uuid") && value) {
+        } else if (streq(key, "luks.uuid") && value) {
 
                 d = get_crypto_device(startswith(value, "luks-") ? value+5 : value);
                 if (!d)
@@ -306,7 +306,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
 
                 d->create = arg_whitelist = true;
 
-        } else if (STR_IN_SET(key, "luks.options", "rd.luks.options") && value) {
+        } else if (streq(key, "luks.options") && value) {
 
                 r = sscanf(value, "%m[0-9a-fA-F-]=%ms", &uuid, &uuid_value);
                 if (r == 2) {
@@ -320,7 +320,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
                 } else if (free_and_strdup(&arg_default_options, value) < 0)
                         return log_oom();
 
-        } else if (STR_IN_SET(key, "luks.key", "rd.luks.key") && value) {
+        } else if (streq(key, "luks.key") && value) {
 
                 r = sscanf(value, "%m[0-9a-fA-F-]=%ms", &uuid, &uuid_value);
                 if (r == 2) {
@@ -334,7 +334,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
                 } else if (free_and_strdup(&arg_default_keyfile, value) < 0)
                         return log_oom();
 
-        } else if (STR_IN_SET(key, "luks.name", "rd.luks.name") && value) {
+        } else if (streq(key, "luks.name") && value) {
 
                 r = sscanf(value, "%m[0-9a-fA-F-]=%ms", &uuid, &uuid_value);
                 if (r == 2) {
@@ -478,7 +478,7 @@ int main(int argc, char *argv[]) {
         if (!arg_disks)
                 goto cleanup;
 
-        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL);
+        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL, true);
         if (r < 0) {
                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
                 r = EXIT_FAILURE;
diff --git a/src/debug-generator/debug-generator.c b/src/debug-generator/debug-generator.c
index 0b0de1b461..7f11ec724d 100644
--- a/src/debug-generator/debug-generator.c
+++ b/src/debug-generator/debug-generator.c
@@ -178,7 +178,7 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
-        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL);
+        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL, false);
         if (r < 0)
                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
 
diff --git a/src/fsck/fsck.c b/src/fsck/fsck.c
index 957e0ccef8..be25c6a2b2 100644
--- a/src/fsck/fsck.c
+++ b/src/fsck/fsck.c
@@ -293,7 +293,7 @@ int main(int argc, char *argv[]) {
 
         umask(0022);
 
-        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL);
+        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL, true);
         if (r < 0)
                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
 
diff --git a/src/fstab-generator/fstab-generator.c b/src/fstab-generator/fstab-generator.c
index b608eff630..e77bd71a52 100644
--- a/src/fstab-generator/fstab-generator.c
+++ b/src/fstab-generator/fstab-generator.c
@@ -674,7 +674,7 @@ int main(int argc, char *argv[]) {
 
         umask(0022);
 
-        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL);
+        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL, false);
         if (r < 0)
                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
 
diff --git a/src/gpt-auto-generator/gpt-auto-generator.c b/src/gpt-auto-generator/gpt-auto-generator.c
index a25b3413ae..a098b27a8e 100644
--- a/src/gpt-auto-generator/gpt-auto-generator.c
+++ b/src/gpt-auto-generator/gpt-auto-generator.c
@@ -1018,7 +1018,7 @@ int main(int argc, char *argv[]) {
                 return EXIT_SUCCESS;
         }
 
-        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL);
+        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL, false);
         if (r < 0)
                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
 
diff --git a/src/hibernate-resume/hibernate-resume-generator.c b/src/hibernate-resume/hibernate-resume-generator.c
index 8be461aafa..17e670604e 100644
--- a/src/hibernate-resume/hibernate-resume-generator.c
+++ b/src/hibernate-resume/hibernate-resume-generator.c
@@ -88,7 +88,7 @@ int main(int argc, char *argv[]) {
         if (!in_initrd())
                 return EXIT_SUCCESS;
 
-        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL);
+        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL, false);
         if (r < 0)
                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
 
diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c
index e7dcbba04f..76c9baf6db 100644
--- a/src/journal/journald-server.c
+++ b/src/journal/journald-server.c
@@ -1900,7 +1900,7 @@ int server_init(Server *s) {
         journal_reset_metrics(&s->runtime_storage.metrics);
 
         server_parse_config_file(s);
-        parse_proc_cmdline(parse_proc_cmdline_item, s);
+        parse_proc_cmdline(parse_proc_cmdline_item, s, true);
 
         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
diff --git a/src/modules-load/modules-load.c b/src/modules-load/modules-load.c
index bd07bad446..0901fea8dc 100644
--- a/src/modules-load/modules-load.c
+++ b/src/modules-load/modules-load.c
@@ -62,7 +62,7 @@ static int add_modules(const char *p) {
 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
         int r;
 
-        if (STR_IN_SET(key, "modules-load", "rd.modules-load") && value) {
+        if (streq(key, "modules-load") && value) {
                 r = add_modules(value);
                 if (r < 0)
                         return r;
@@ -226,7 +226,7 @@ int main(int argc, char *argv[]) {
 
         umask(0022);
 
-        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL);
+        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL, true);
         if (r < 0)
                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
 
diff --git a/src/quotacheck/quotacheck.c b/src/quotacheck/quotacheck.c
index a87b0866cd..2714cde5c7 100644
--- a/src/quotacheck/quotacheck.c
+++ b/src/quotacheck/quotacheck.c
@@ -88,7 +88,7 @@ int main(int argc, char *argv[]) {
 
         umask(0022);
 
-        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL);
+        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL, false);
         if (r < 0)
                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
 
diff --git a/src/test/test-proc-cmdline.c b/src/test/test-proc-cmdline.c
index e4207935c4..4101678f19 100644
--- a/src/test/test-proc-cmdline.c
+++ b/src/test/test-proc-cmdline.c
@@ -36,7 +36,7 @@ static int parse_item(const char *key, const char *value, void *data) {
 }
 
 static void test_parse_proc_cmdline(void) {
-        assert_se(parse_proc_cmdline(parse_item, &obj) >= 0);
+        assert_se(parse_proc_cmdline(parse_item, &obj, true) >= 0);
 }
 
 static void test_runlevel_to_target(void) {
diff --git a/src/udev/udevd.c b/src/udev/udevd.c
index 05cd5079c1..badbab6205 100644
--- a/src/udev/udevd.c
+++ b/src/udev/udevd.c
@@ -1370,9 +1370,6 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
         if (!value)
                 return 0;
 
-        if (startswith(key, "rd."))
-                key += strlen("rd.");
-
         if (streq(key, "udev.log-priority") && value) {
                 r = util_log_priority(value);
                 if (r >= 0)
@@ -1652,7 +1649,7 @@ int main(int argc, char *argv[]) {
         if (r <= 0)
                 goto exit;
 
-        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL);
+        r = parse_proc_cmdline(parse_proc_cmdline_item, NULL, true);
         if (r < 0)
                 log_warning_errno(r, "failed to parse kernel command line, ignoring: %m");
 
-- 
cgit v1.2.3-54-g00ecf


From 9b232d3241fcfbf60affab69fa51213e36133db5 Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Sat, 22 Oct 2016 23:28:46 -0400
Subject: core: do not set no_new_privileges flag in
 config_parse_syscall_filter

If SyscallFilter was set, and subsequently cleared, the no_new_privileges flag
was not reset properly. We don't need to set this flag here, it will be
set automatically in unit_patch_contexts() if syscall_filter is set.
---
 src/core/load-fragment.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'src/core')

diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index a69f60097d..9881baf192 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -2736,11 +2736,6 @@ int config_parse_syscall_filter(
         if (!isempty(state))
                 log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring.");
 
-        /* Turn on NNP, but only if it wasn't configured explicitly
-         * before, and only if we are in user mode. */
-        if (!c->no_new_privileges_set && MANAGER_IS_USER(u->manager))
-                c->no_new_privileges = true;
-
         return 0;
 }
 
@@ -3829,7 +3824,7 @@ int config_parse_no_new_privileges(
                 return 0;
         }
 
-        c->no_new_privileges = !!k;
+        c->no_new_privileges = k;
         c->no_new_privileges_set = true;
 
         return 0;
-- 
cgit v1.2.3-54-g00ecf


From 4d885bd326d958827d926512ecc5e0d21f6ad76b Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Sun, 23 Oct 2016 23:24:14 +0200
Subject: core: first lookup and cache creds then apply them after namespace
 setup

This fixes: https://github.com/systemd/systemd/issues/4357

Let's lookup and cache creds then apply them. We also switch from
getgroups() to getgrouplist().
---
 src/core/execute.c | 213 ++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 144 insertions(+), 69 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index 1b7b4a928d..874f035b2e 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -730,74 +730,146 @@ static int ask_for_confirmation(char *response, char **argv) {
         return r;
 }
 
-static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
-        bool keep_groups = false;
+static int get_fixed_user(const ExecContext *c, const char **user,
+                          uid_t *uid, gid_t *gid,
+                          const char **home, const char **shell) {
         int r;
+        const char *name;
 
-        assert(context);
+        assert(c);
 
-        /* Lookup and set GID and supplementary group list. Here too
-         * we avoid NSS lookups for gid=0. */
+        if (!c->user)
+                return 0;
 
-        if (context->group || username) {
-                /* First step, initialize groups from /etc/groups */
-                if (username && gid != 0) {
-                        if (initgroups(username, gid) < 0)
-                                return -errno;
+        /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
+         * (i.e. are "/" or "/bin/nologin"). */
 
-                        keep_groups = true;
-                }
+        name = c->user;
+        r = get_user_creds_clean(&name, uid, gid, home, shell);
+        if (r < 0)
+                return r;
 
-                /* Second step, set our gids */
-                if (setresgid(gid, gid, gid) < 0)
+        *user = name;
+        return 0;
+}
+
+static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
+        int r;
+        const char *name;
+
+        assert(c);
+
+        if (!c->group)
+                return 0;
+
+        name = c->group;
+        r = get_group_creds(&name, gid);
+        if (r < 0)
+                return r;
+
+        *group = name;
+        return 0;
+}
+
+static int get_fixed_supplementary_groups(const ExecContext *c,
+                                          const char *user,
+                                          const char *group,
+                                          gid_t gid,
+                                          gid_t **supplementary_gids, int *ngids) {
+        char **i;
+        int r, k = 0;
+        int ngroups_max;
+        bool keep_groups = false;
+        gid_t *groups = NULL;
+        _cleanup_free_ gid_t *l_gids = NULL;
+
+        assert(c);
+
+        if (!c->supplementary_groups)
+                return 0;
+
+        /*
+         * If user is given, then lookup GID and supplementary group list.
+         * We avoid NSS lookups for gid=0.
+         */
+        if (user && gid_is_valid(gid) && gid != 0) {
+                /* First step, initialize groups from /etc/groups */
+                if (initgroups(user, gid) < 0)
                         return -errno;
+
+                keep_groups = true;
         }
 
-        if (context->supplementary_groups) {
-                int ngroups_max, k;
-                gid_t *gids;
-                char **i;
+        assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
 
-                /* Final step, initialize any manually set supplementary groups */
-                assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
+        l_gids = new(gid_t, ngroups_max);
+        if (!l_gids)
+                return -ENOMEM;
 
-                if (!(gids = new(gid_t, ngroups_max)))
-                        return -ENOMEM;
+        if (keep_groups) {
+                /*
+                 * Lookup the list of groups that the user belongs to, we
+                 * avoid NSS lookups here too for gid=0.
+                 */
+                k = ngroups_max;
+                if (getgrouplist(user, gid, l_gids, &k) < 0)
+                        return -EINVAL;
+        } else
+                k = 0;
 
-                if (keep_groups) {
-                        k = getgroups(ngroups_max, gids);
-                        if (k < 0) {
-                                free(gids);
-                                return -errno;
-                        }
-                } else
-                        k = 0;
+        STRV_FOREACH(i, c->supplementary_groups) {
+                const char *g;
 
-                STRV_FOREACH(i, context->supplementary_groups) {
-                        const char *g;
+                if (k >= ngroups_max)
+                        return -E2BIG;
 
-                        if (k >= ngroups_max) {
-                                free(gids);
-                                return -E2BIG;
-                        }
+                g = *i;
+                r = get_group_creds(&g, l_gids+k);
+                if (r < 0)
+                        return r;
 
-                        g = *i;
-                        r = get_group_creds(&g, gids+k);
-                        if (r < 0) {
-                                free(gids);
-                                return r;
-                        }
+                k++;
+        }
 
-                        k++;
-                }
+        /*
+         * Sets ngids to zero to drop all supplementary groups, happens
+         * when we are under root and SupplementaryGroups= is empty.
+         */
+        if (k == 0) {
+                *ngids = 0;
+                return 0;
+        }
 
-                r = maybe_setgroups(k, gids);
-                if (r < 0) {
-                        free(gids);
+        /* Otherwise get the final list of supplementary groups */
+        groups = memdup(l_gids, sizeof(gid_t) * k);
+        if (!groups)
+                return -ENOMEM;
+
+        *supplementary_gids = groups;
+        *ngids = k;
+
+        groups = NULL;
+
+        return 0;
+}
+
+static int enforce_groups(const ExecContext *context, gid_t gid,
+                          gid_t *supplementary_gids, int ngids) {
+        int r;
+
+        assert(context);
+
+        /* Handle SupplementaryGroups= even if it is empty */
+        if (context->supplementary_groups) {
+                r = maybe_setgroups(ngids, supplementary_gids);
+                if (r < 0)
                         return r;
-                }
+        }
 
-                free(gids);
+        if (gid_is_valid(gid)) {
+                /* Then set our gids */
+                if (setresgid(gid, gid, gid) < 0)
+                        return -errno;
         }
 
         return 0;
@@ -806,6 +878,9 @@ static int enforce_groups(const ExecContext *context, const char *username, gid_
 static int enforce_user(const ExecContext *context, uid_t uid) {
         assert(context);
 
+        if (!uid_is_valid(uid))
+                return 0;
+
         /* Sets (but doesn't look up) the uid and make sure we keep the
          * capabilities while doing so. */
 
@@ -2175,13 +2250,15 @@ static int exec_child(
 
         _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
         _cleanup_free_ char *mac_selinux_context_net = NULL;
-        const char *username = NULL, *home = NULL, *shell = NULL, *wd;
+        _cleanup_free_ gid_t *supplementary_gids = NULL;
+        const char *username = NULL, *groupname = NULL;
+        const char *home = NULL, *shell = NULL, *wd;
         dev_t journal_stream_dev = 0;
         ino_t journal_stream_ino = 0;
         bool needs_mount_namespace;
         uid_t uid = UID_INVALID;
         gid_t gid = GID_INVALID;
-        int i, r;
+        int i, r, ngids = 0;
 
         assert(unit);
         assert(command);
@@ -2273,26 +2350,23 @@ static int exec_child(
                         username = dcreds->user->name;
 
         } else {
-                if (context->user) {
-                        username = context->user;
-                        r = get_user_creds_clean(&username, &uid, &gid, &home, &shell);
-                        if (r < 0) {
-                                *exit_status = EXIT_USER;
-                                return r;
-                        }
-
-                        /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
-                         * (i.e. are "/" or "/bin/nologin"). */
+                r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
+                if (r < 0) {
+                        *exit_status = EXIT_USER;
+                        return r;
                 }
 
-                if (context->group) {
-                        const char *g = context->group;
+                r = get_fixed_group(context, &groupname, &gid);
+                if (r < 0) {
+                        *exit_status = EXIT_GROUP;
+                        return r;
+                }
 
-                        r = get_group_creds(&g, &gid);
-                        if (r < 0) {
-                                *exit_status = EXIT_GROUP;
-                                return r;
-                        }
+                r = get_fixed_supplementary_groups(context, username, groupname,
+                                                   gid, &supplementary_gids, &ngids);
+                if (r < 0) {
+                        *exit_status = EXIT_GROUP;
+                        return r;
                 }
         }
 
@@ -2558,8 +2632,9 @@ static int exec_child(
                 }
         }
 
+        /* Drop group as early as possbile */
         if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
-                r = enforce_groups(context, username, gid);
+                r = enforce_groups(context, gid, supplementary_gids, ngids);
                 if (r < 0) {
                         *exit_status = EXIT_GROUP;
                         return r;
-- 
cgit v1.2.3-54-g00ecf


From 8b6903ad4d0dc94cd0098f453a4ea8ab24a4a3f7 Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Fri, 21 Oct 2016 22:22:56 +0200
Subject: core: lets move the setup of working directory before group enforce

This is minor but lets try to split and move bit by bit cgroups and
portable environment setup before applying the security context.
---
 src/core/execute.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index 874f035b2e..a9b2b8f299 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -2632,6 +2632,13 @@ static int exec_child(
                 }
         }
 
+        if (context->working_directory_home)
+                wd = home;
+        else if (context->working_directory)
+                wd = context->working_directory;
+        else
+                wd = "/";
+
         /* Drop group as early as possbile */
         if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
                 r = enforce_groups(context, gid, supplementary_gids, ngids);
@@ -2641,13 +2648,6 @@ static int exec_child(
                 }
         }
 
-        if (context->working_directory_home)
-                wd = home;
-        else if (context->working_directory)
-                wd = context->working_directory;
-        else
-                wd = "/";
-
         if (params->flags & EXEC_APPLY_CHROOT) {
                 if (!needs_mount_namespace && context->root_directory)
                         if (chroot(context->root_directory) < 0) {
-- 
cgit v1.2.3-54-g00ecf


From b3796dd8349af4235143889e44522a730c1635c0 Mon Sep 17 00:00:00 2001
From: Jan Synacek <jsynacek@redhat.com>
Date: Thu, 20 Oct 2016 14:48:33 +0200
Subject: install: introduce UnitFileFlags

Introduce a new enum to get rid of some boolean arguments of unit_file_*
functions. It unifies the code, makes it a bit cleaner and extensible.
---
 src/core/dbus-manager.c      | 37 +++++++++++++++++++-------
 src/core/main.c              |  2 +-
 src/shared/install.c         | 61 +++++++++++++++++++------------------------
 src/shared/install.h         | 33 ++++++++++++-----------
 src/systemctl/systemctl.c    | 28 +++++++++++++-------
 src/test/test-install-root.c | 62 ++++++++++++++++++++++----------------------
 src/test/test-install.c      | 38 +++++++++++++--------------
 7 files changed, 139 insertions(+), 122 deletions(-)

(limited to 'src/core')

diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c
index 12eb55cb7f..ecad71a5aa 100644
--- a/src/core/dbus-manager.c
+++ b/src/core/dbus-manager.c
@@ -47,6 +47,11 @@
 #include "virt.h"
 #include "watchdog.h"
 
+static UnitFileFlags unit_file_bools_to_flags(bool runtime, bool force) {
+        return (runtime ? UNIT_FILE_RUNTIME : 0) |
+               (force   ? UNIT_FILE_FORCE   : 0);
+}
+
 static int property_get_version(
                 sd_bus *bus,
                 const char *path,
@@ -1948,13 +1953,14 @@ static int install_error(
 static int method_enable_unit_files_generic(
                 sd_bus_message *message,
                 Manager *m,
-                int (*call)(UnitFileScope scope, bool runtime, const char *root_dir, char *files[], bool force, UnitFileChange **changes, unsigned *n_changes),
+                int (*call)(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char *files[], UnitFileChange **changes, unsigned *n_changes),
                 bool carries_install_info,
                 sd_bus_error *error) {
 
         _cleanup_strv_free_ char **l = NULL;
         UnitFileChange *changes = NULL;
         unsigned n_changes = 0;
+        UnitFileFlags flags;
         int runtime, force, r;
 
         assert(message);
@@ -1968,13 +1974,15 @@ static int method_enable_unit_files_generic(
         if (r < 0)
                 return r;
 
+        flags = unit_file_bools_to_flags(runtime, force);
+
         r = bus_verify_manage_unit_files_async(m, message, error);
         if (r < 0)
                 return r;
         if (r == 0)
                 return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
 
-        r = call(m->unit_file_scope, runtime, NULL, l, force, &changes, &n_changes);
+        r = call(m->unit_file_scope, flags, NULL, l, &changes, &n_changes);
         if (r < 0)
                 return install_error(error, r, changes, n_changes);
 
@@ -1993,8 +2001,8 @@ static int method_link_unit_files(sd_bus_message *message, void *userdata, sd_bu
         return method_enable_unit_files_generic(message, userdata, unit_file_link, false, error);
 }
 
-static int unit_file_preset_without_mode(UnitFileScope scope, bool runtime, const char *root_dir, char **files, bool force, UnitFileChange **changes, unsigned *n_changes) {
-        return unit_file_preset(scope, runtime, root_dir, files, UNIT_FILE_PRESET_FULL, force, changes, n_changes);
+static int unit_file_preset_without_mode(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char **files, UnitFileChange **changes, unsigned *n_changes) {
+        return unit_file_preset(scope, flags, root_dir, files, UNIT_FILE_PRESET_FULL, changes, n_changes);
 }
 
 static int method_preset_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
@@ -2013,6 +2021,7 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use
         Manager *m = userdata;
         UnitFilePresetMode mm;
         int runtime, force, r;
+        UnitFileFlags flags;
         const char *mode;
 
         assert(message);
@@ -2026,6 +2035,8 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use
         if (r < 0)
                 return r;
 
+        flags = unit_file_bools_to_flags(runtime, force);
+
         if (isempty(mode))
                 mm = UNIT_FILE_PRESET_FULL;
         else {
@@ -2040,7 +2051,7 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use
         if (r == 0)
                 return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
 
-        r = unit_file_preset(m->unit_file_scope, runtime, NULL, l, mm, force, &changes, &n_changes);
+        r = unit_file_preset(m->unit_file_scope, flags, NULL, l, mm, &changes, &n_changes);
         if (r < 0)
                 return install_error(error, r, changes, n_changes);
 
@@ -2050,7 +2061,7 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use
 static int method_disable_unit_files_generic(
                 sd_bus_message *message,
                 Manager *m,
-                int (*call)(UnitFileScope scope, bool runtime, const char *root_dir, char *files[], UnitFileChange **changes, unsigned *n_changes),
+                int (*call)(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char *files[], UnitFileChange **changes, unsigned *n_changes),
                 sd_bus_error *error) {
 
         _cleanup_strv_free_ char **l = NULL;
@@ -2075,7 +2086,7 @@ static int method_disable_unit_files_generic(
         if (r == 0)
                 return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
 
-        r = call(m->unit_file_scope, runtime, NULL, l, &changes, &n_changes);
+        r = call(m->unit_file_scope, runtime ? UNIT_FILE_RUNTIME : 0, NULL, l, &changes, &n_changes);
         if (r < 0)
                 return install_error(error, r, changes, n_changes);
 
@@ -2141,7 +2152,7 @@ static int method_set_default_target(sd_bus_message *message, void *userdata, sd
         if (r == 0)
                 return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
 
-        r = unit_file_set_default(m->unit_file_scope, NULL, name, force, &changes, &n_changes);
+        r = unit_file_set_default(m->unit_file_scope, force ? UNIT_FILE_FORCE : 0, NULL, name, &changes, &n_changes);
         if (r < 0)
                 return install_error(error, r, changes, n_changes);
 
@@ -2154,6 +2165,7 @@ static int method_preset_all_unit_files(sd_bus_message *message, void *userdata,
         Manager *m = userdata;
         UnitFilePresetMode mm;
         const char *mode;
+        UnitFileFlags flags;
         int force, runtime, r;
 
         assert(message);
@@ -2167,6 +2179,8 @@ static int method_preset_all_unit_files(sd_bus_message *message, void *userdata,
         if (r < 0)
                 return r;
 
+        flags = unit_file_bools_to_flags(runtime, force);
+
         if (isempty(mode))
                 mm = UNIT_FILE_PRESET_FULL;
         else {
@@ -2181,7 +2195,7 @@ static int method_preset_all_unit_files(sd_bus_message *message, void *userdata,
         if (r == 0)
                 return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
 
-        r = unit_file_preset_all(m->unit_file_scope, runtime, NULL, mm, force, &changes, &n_changes);
+        r = unit_file_preset_all(m->unit_file_scope, flags, NULL, mm, &changes, &n_changes);
         if (r < 0)
                 return install_error(error, r, changes, n_changes);
 
@@ -2196,6 +2210,7 @@ static int method_add_dependency_unit_files(sd_bus_message *message, void *userd
         int runtime, force, r;
         char *target, *type;
         UnitDependency dep;
+        UnitFileFlags flags;
 
         assert(message);
         assert(m);
@@ -2214,11 +2229,13 @@ static int method_add_dependency_unit_files(sd_bus_message *message, void *userd
         if (r < 0)
                 return r;
 
+        flags = unit_file_bools_to_flags(runtime, force);
+
         dep = unit_dependency_from_string(type);
         if (dep < 0)
                 return -EINVAL;
 
-        r = unit_file_add_dependency(m->unit_file_scope, runtime, NULL, l, target, dep, force, &changes, &n_changes);
+        r = unit_file_add_dependency(m->unit_file_scope, flags, NULL, l, target, dep, &changes, &n_changes);
         if (r < 0)
                 return install_error(error, r, changes, n_changes);
 
diff --git a/src/core/main.c b/src/core/main.c
index b635a633a7..498e289a83 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -1777,7 +1777,7 @@ int main(int argc, char *argv[]) {
                 (void) bump_rlimit_nofile(&saved_rlimit_nofile);
 
                 if (empty_etc) {
-                        r = unit_file_preset_all(UNIT_FILE_SYSTEM, false, NULL, UNIT_FILE_PRESET_ENABLE_ONLY, false, NULL, 0);
+                        r = unit_file_preset_all(UNIT_FILE_SYSTEM, 0, NULL, UNIT_FILE_PRESET_ENABLE_ONLY, NULL, 0);
                         if (r < 0)
                                 log_full_errno(r == -EEXIST ? LOG_NOTICE : LOG_WARNING, r, "Failed to populate /etc with preset unit settings, ignoring: %m");
                         else
diff --git a/src/shared/install.c b/src/shared/install.c
index d33a658d0a..caca23435a 100644
--- a/src/shared/install.c
+++ b/src/shared/install.c
@@ -1805,10 +1805,9 @@ static int install_context_mark_for_removal(
 
 int unit_file_mask(
                 UnitFileScope scope,
-                bool runtime,
+                UnitFileFlags flags,
                 const char *root_dir,
                 char **files,
-                bool force,
                 UnitFileChange **changes,
                 unsigned *n_changes) {
 
@@ -1824,7 +1823,7 @@ int unit_file_mask(
         if (r < 0)
                 return r;
 
-        config_path = runtime ? paths.runtime_config : paths.persistent_config;
+        config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
 
         STRV_FOREACH(i, files) {
                 _cleanup_free_ char *path = NULL;
@@ -1840,7 +1839,7 @@ int unit_file_mask(
                 if (!path)
                         return -ENOMEM;
 
-                q = create_symlink(&paths, "/dev/null", path, force, changes, n_changes);
+                q = create_symlink(&paths, "/dev/null", path, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
                 if (q < 0 && r >= 0)
                         r = q;
         }
@@ -1850,7 +1849,7 @@ int unit_file_mask(
 
 int unit_file_unmask(
                 UnitFileScope scope,
-                bool runtime,
+                UnitFileFlags flags,
                 const char *root_dir,
                 char **files,
                 UnitFileChange **changes,
@@ -1871,7 +1870,7 @@ int unit_file_unmask(
         if (r < 0)
                 return r;
 
-        config_path = runtime ? paths.runtime_config : paths.persistent_config;
+        config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
 
         STRV_FOREACH(i, files) {
                 _cleanup_free_ char *path = NULL;
@@ -1935,10 +1934,9 @@ int unit_file_unmask(
 
 int unit_file_link(
                 UnitFileScope scope,
-                bool runtime,
+                UnitFileFlags flags,
                 const char *root_dir,
                 char **files,
-                bool force,
                 UnitFileChange **changes,
                 unsigned *n_changes) {
 
@@ -1956,7 +1954,7 @@ int unit_file_link(
         if (r < 0)
                 return r;
 
-        config_path = runtime ? paths.runtime_config : paths.persistent_config;
+        config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
 
         STRV_FOREACH(i, files) {
                 _cleanup_free_ char *full = NULL;
@@ -2005,7 +2003,7 @@ int unit_file_link(
                 if (!new_path)
                         return -ENOMEM;
 
-                q = create_symlink(&paths, *i, new_path, force, changes, n_changes);
+                q = create_symlink(&paths, *i, new_path, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
                 if (q < 0 && r >= 0)
                         r = q;
         }
@@ -2190,12 +2188,11 @@ int unit_file_revert(
 
 int unit_file_add_dependency(
                 UnitFileScope scope,
-                bool runtime,
+                UnitFileFlags flags,
                 const char *root_dir,
                 char **files,
                 const char *target,
                 UnitDependency dep,
-                bool force,
                 UnitFileChange **changes,
                 unsigned *n_changes) {
 
@@ -2220,7 +2217,7 @@ int unit_file_add_dependency(
         if (r < 0)
                 return r;
 
-        config_path = runtime ? paths.runtime_config : paths.persistent_config;
+        config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
 
         r = install_info_discover(scope, &c, &paths, target, SEARCH_FOLLOW_CONFIG_SYMLINKS,
                                   &target_info, changes, n_changes);
@@ -2260,15 +2257,14 @@ int unit_file_add_dependency(
                         return -ENOMEM;
         }
 
-        return install_context_apply(scope, &c, &paths, config_path, force, SEARCH_FOLLOW_CONFIG_SYMLINKS, changes, n_changes);
+        return install_context_apply(scope, &c, &paths, config_path, !!(flags & UNIT_FILE_FORCE), SEARCH_FOLLOW_CONFIG_SYMLINKS, changes, n_changes);
 }
 
 int unit_file_enable(
                 UnitFileScope scope,
-                bool runtime,
+                UnitFileFlags flags,
                 const char *root_dir,
                 char **files,
-                bool force,
                 UnitFileChange **changes,
                 unsigned *n_changes) {
 
@@ -2286,7 +2282,7 @@ int unit_file_enable(
         if (r < 0)
                 return r;
 
-        config_path = runtime ? paths.runtime_config : paths.persistent_config;
+        config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
 
         STRV_FOREACH(f, files) {
                 r = install_info_discover(scope, &c, &paths, *f, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS,
@@ -2305,12 +2301,12 @@ int unit_file_enable(
            is useful to determine whether the passed files had any
            installation data at all. */
 
-        return install_context_apply(scope, &c, &paths, config_path, force, SEARCH_LOAD, changes, n_changes);
+        return install_context_apply(scope, &c, &paths, config_path, !!(flags & UNIT_FILE_FORCE), SEARCH_LOAD, changes, n_changes);
 }
 
 int unit_file_disable(
                 UnitFileScope scope,
-                bool runtime,
+                UnitFileFlags flags,
                 const char *root_dir,
                 char **files,
                 UnitFileChange **changes,
@@ -2330,7 +2326,7 @@ int unit_file_disable(
         if (r < 0)
                 return r;
 
-        config_path = runtime ? paths.runtime_config : paths.persistent_config;
+        config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
 
         STRV_FOREACH(i, files) {
                 if (!unit_name_is_valid(*i, UNIT_NAME_ANY))
@@ -2350,10 +2346,9 @@ int unit_file_disable(
 
 int unit_file_reenable(
                 UnitFileScope scope,
-                bool runtime,
+                UnitFileFlags flags,
                 const char *root_dir,
                 char **files,
-                bool force,
                 UnitFileChange **changes,
                 unsigned *n_changes) {
 
@@ -2368,19 +2363,19 @@ int unit_file_reenable(
                 n[i] = basename(files[i]);
         n[i] = NULL;
 
-        r = unit_file_disable(scope, runtime, root_dir, n, changes, n_changes);
+        r = unit_file_disable(scope, flags, root_dir, n, changes, n_changes);
         if (r < 0)
                 return r;
 
         /* But the enable command with the full name */
-        return unit_file_enable(scope, runtime, root_dir, files, force, changes, n_changes);
+        return unit_file_enable(scope, flags, root_dir, files, changes, n_changes);
 }
 
 int unit_file_set_default(
                 UnitFileScope scope,
+                UnitFileFlags flags,
                 const char *root_dir,
                 const char *name,
-                bool force,
                 UnitFileChange **changes,
                 unsigned *n_changes) {
 
@@ -2411,7 +2406,7 @@ int unit_file_set_default(
                 return r;
 
         new_path = strjoina(paths.persistent_config, "/" SPECIAL_DEFAULT_TARGET);
-        return create_symlink(&paths, i->path, new_path, force, changes, n_changes);
+        return create_symlink(&paths, i->path, new_path, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
 }
 
 int unit_file_get_default(
@@ -2803,11 +2798,10 @@ static int preset_prepare_one(
 
 int unit_file_preset(
                 UnitFileScope scope,
-                bool runtime,
+                UnitFileFlags flags,
                 const char *root_dir,
                 char **files,
                 UnitFilePresetMode mode,
-                bool force,
                 UnitFileChange **changes,
                 unsigned *n_changes) {
 
@@ -2826,7 +2820,7 @@ int unit_file_preset(
         if (r < 0)
                 return r;
 
-        config_path = runtime ? paths.runtime_config : paths.persistent_config;
+        config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
 
         r = read_presets(scope, root_dir, &presets);
         if (r < 0)
@@ -2838,15 +2832,14 @@ int unit_file_preset(
                         return r;
         }
 
-        return execute_preset(scope, &plus, &minus, &paths, config_path, files, mode, force, changes, n_changes);
+        return execute_preset(scope, &plus, &minus, &paths, config_path, files, mode, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
 }
 
 int unit_file_preset_all(
                 UnitFileScope scope,
-                bool runtime,
+                UnitFileFlags flags,
                 const char *root_dir,
                 UnitFilePresetMode mode,
-                bool force,
                 UnitFileChange **changes,
                 unsigned *n_changes) {
 
@@ -2865,7 +2858,7 @@ int unit_file_preset_all(
         if (r < 0)
                 return r;
 
-        config_path = runtime ? paths.runtime_config : paths.persistent_config;
+        config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
 
         r = read_presets(scope, root_dir, &presets);
         if (r < 0)
@@ -2906,7 +2899,7 @@ int unit_file_preset_all(
                 }
         }
 
-        return execute_preset(scope, &plus, &minus, &paths, config_path, NULL, mode, force, changes, n_changes);
+        return execute_preset(scope, &plus, &minus, &paths, config_path, NULL, mode, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
 }
 
 static void unit_file_list_free_one(UnitFileList *f) {
diff --git a/src/shared/install.h b/src/shared/install.h
index b1f220693b..561b521bbc 100644
--- a/src/shared/install.h
+++ b/src/shared/install.h
@@ -23,6 +23,7 @@ typedef enum UnitFileScope UnitFileScope;
 typedef enum UnitFileState UnitFileState;
 typedef enum UnitFilePresetMode UnitFilePresetMode;
 typedef enum UnitFileChangeType UnitFileChangeType;
+typedef enum UnitFileFlags UnitFileFlags;
 typedef enum UnitFileType UnitFileType;
 typedef struct UnitFileChange UnitFileChange;
 typedef struct UnitFileList UnitFileList;
@@ -78,6 +79,11 @@ enum UnitFileChangeType {
         _UNIT_FILE_CHANGE_INVALID = INT_MIN
 };
 
+enum UnitFileFlags {
+        UNIT_FILE_RUNTIME = 1,
+        UNIT_FILE_FORCE = 1 << 1,
+};
+
 /* type can either one of the UnitFileChangeTypes listed above, or a negative error.
  * If source is specified, it should be the contents of the path symlink.
  * In case of an error, source should be the existing symlink contents or NULL
@@ -144,65 +150,59 @@ bool unit_type_may_template(UnitType type) _const_;
 
 int unit_file_enable(
                 UnitFileScope scope,
-                bool runtime,
+                UnitFileFlags flags,
                 const char *root_dir,
                 char **files,
-                bool force,
                 UnitFileChange **changes,
                 unsigned *n_changes);
 int unit_file_disable(
                 UnitFileScope scope,
-                bool runtime,
+                UnitFileFlags flags,
                 const char *root_dir,
                 char **files,
                 UnitFileChange **changes,
                 unsigned *n_changes);
 int unit_file_reenable(
                 UnitFileScope scope,
-                bool runtime,
+                UnitFileFlags flags,
                 const char *root_dir,
                 char **files,
-                bool force,
                 UnitFileChange **changes,
                 unsigned *n_changes);
 int unit_file_preset(
                 UnitFileScope scope,
-                bool runtime,
+                UnitFileFlags flags,
                 const char *root_dir,
                 char **files,
                 UnitFilePresetMode mode,
-                bool force,
                 UnitFileChange **changes,
                 unsigned *n_changes);
 int unit_file_preset_all(
                 UnitFileScope scope,
-                bool runtime,
+                UnitFileFlags flags,
                 const char *root_dir,
                 UnitFilePresetMode mode,
-                bool force,
                 UnitFileChange **changes,
                 unsigned *n_changes);
 int unit_file_mask(
                 UnitFileScope scope,
-                bool runtime,
+                UnitFileFlags flags,
                 const char *root_dir,
                 char **files,
-                bool force,
                 UnitFileChange **changes,
                 unsigned *n_changes);
 int unit_file_unmask(
                 UnitFileScope scope,
-                bool runtime,
+                UnitFileFlags flags,
                 const char *root_dir,
                 char **files,
                 UnitFileChange **changes,
                 unsigned *n_changes);
 int unit_file_link(
                 UnitFileScope scope,
-                bool runtime,
+                UnitFileFlags flags,
                 const char *root_dir,
                 char **files,
-                bool force,
                 UnitFileChange **changes,
                 unsigned *n_changes);
 int unit_file_revert(
@@ -213,9 +213,9 @@ int unit_file_revert(
                 unsigned *n_changes);
 int unit_file_set_default(
                 UnitFileScope scope,
+                UnitFileFlags flags,
                 const char *root_dir,
                 const char *file,
-                bool force,
                 UnitFileChange **changes,
                 unsigned *n_changes);
 int unit_file_get_default(
@@ -224,12 +224,11 @@ int unit_file_get_default(
                 char **name);
 int unit_file_add_dependency(
                 UnitFileScope scope,
-                bool runtime,
+                UnitFileFlags flags,
                 const char *root_dir,
                 char **files,
                 const char *target,
                 UnitDependency dep,
-                bool force,
                 UnitFileChange **changes,
                 unsigned *n_changes);
 
diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c
index 129706d15f..c9ca3db3f1 100644
--- a/src/systemctl/systemctl.c
+++ b/src/systemctl/systemctl.c
@@ -189,6 +189,11 @@ typedef enum BusFocus {
 
 static sd_bus *busses[_BUS_FOCUS_MAX] = {};
 
+static UnitFileFlags args_to_flags(void) {
+        return (arg_runtime ? UNIT_FILE_RUNTIME : 0) |
+               (arg_force   ? UNIT_FILE_FORCE   : 0);
+}
+
 static int acquire_bus(BusFocus focus, sd_bus **ret) {
         int r;
 
@@ -2137,7 +2142,7 @@ static int set_default(int argc, char *argv[], void *userdata) {
                 return log_error_errno(r, "Failed to mangle unit name: %m");
 
         if (install_client_side()) {
-                r = unit_file_set_default(arg_scope, arg_root, unit, true, &changes, &n_changes);
+                r = unit_file_set_default(arg_scope, UNIT_FILE_FORCE, arg_root, unit, &changes, &n_changes);
                 unit_file_dump_changes(r, "set default", changes, n_changes, arg_quiet);
 
                 if (r > 0)
@@ -5955,22 +5960,25 @@ static int enable_unit(int argc, char *argv[], void *userdata) {
         }
 
         if (install_client_side()) {
+                UnitFileFlags flags;
+
+                flags = args_to_flags();
                 if (streq(verb, "enable")) {
-                        r = unit_file_enable(arg_scope, arg_runtime, arg_root, names, arg_force, &changes, &n_changes);
+                        r = unit_file_enable(arg_scope, flags, arg_root, names, &changes, &n_changes);
                         carries_install_info = r;
                 } else if (streq(verb, "disable"))
-                        r = unit_file_disable(arg_scope, arg_runtime, arg_root, names, &changes, &n_changes);
+                        r = unit_file_disable(arg_scope, flags, arg_root, names, &changes, &n_changes);
                 else if (streq(verb, "reenable")) {
-                        r = unit_file_reenable(arg_scope, arg_runtime, arg_root, names, arg_force, &changes, &n_changes);
+                        r = unit_file_reenable(arg_scope, flags, arg_root, names, &changes, &n_changes);
                         carries_install_info = r;
                 } else if (streq(verb, "link"))
-                        r = unit_file_link(arg_scope, arg_runtime, arg_root, names, arg_force, &changes, &n_changes);
+                        r = unit_file_link(arg_scope, flags, arg_root, names, &changes, &n_changes);
                 else if (streq(verb, "preset")) {
-                        r = unit_file_preset(arg_scope, arg_runtime, arg_root, names, arg_preset_mode, arg_force, &changes, &n_changes);
+                        r = unit_file_preset(arg_scope, flags, arg_root, names, arg_preset_mode, &changes, &n_changes);
                 } else if (streq(verb, "mask"))
-                        r = unit_file_mask(arg_scope, arg_runtime, arg_root, names, arg_force, &changes, &n_changes);
+                        r = unit_file_mask(arg_scope, flags, arg_root, names, &changes, &n_changes);
                 else if (streq(verb, "unmask"))
-                        r = unit_file_unmask(arg_scope, arg_runtime, arg_root, names, &changes, &n_changes);
+                        r = unit_file_unmask(arg_scope, flags, arg_root, names, &changes, &n_changes);
                 else if (streq(verb, "revert"))
                         r = unit_file_revert(arg_scope, arg_root, names, &changes, &n_changes);
                 else
@@ -6152,7 +6160,7 @@ static int add_dependency(int argc, char *argv[], void *userdata) {
                 assert_not_reached("Unknown verb");
 
         if (install_client_side()) {
-                r = unit_file_add_dependency(arg_scope, arg_runtime, arg_root, names, target, dep, arg_force, &changes, &n_changes);
+                r = unit_file_add_dependency(arg_scope, args_to_flags(), arg_root, names, target, dep, &changes, &n_changes);
                 unit_file_dump_changes(r, "add dependency on", changes, n_changes, arg_quiet);
 
                 if (r > 0)
@@ -6214,7 +6222,7 @@ static int preset_all(int argc, char *argv[], void *userdata) {
         int r;
 
         if (install_client_side()) {
-                r = unit_file_preset_all(arg_scope, arg_runtime, arg_root, arg_preset_mode, arg_force, &changes, &n_changes);
+                r = unit_file_preset_all(arg_scope, args_to_flags(), arg_root, arg_preset_mode, &changes, &n_changes);
                 unit_file_dump_changes(r, "preset", changes, n_changes, arg_quiet);
 
                 if (r > 0)
diff --git a/src/test/test-install-root.c b/src/test/test-install-root.c
index 1686054d2a..a98de76b43 100644
--- a/src/test/test-install-root.c
+++ b/src/test/test-install-root.c
@@ -64,7 +64,7 @@ static void test_basic_mask_and_enable(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", NULL) >= 0);
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
 
-        assert_se(unit_file_mask(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("a.service"), false, &changes, &n_changes) >= 0);
+        assert_se(unit_file_mask(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) >= 0);
         assert_se(n_changes == 1);
         assert_se(changes[0].type == UNIT_FILE_SYMLINK);
         assert_se(streq(changes[0].source, "/dev/null"));
@@ -80,11 +80,11 @@ static void test_basic_mask_and_enable(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", &state) >= 0 && state == UNIT_FILE_MASKED);
 
         /* Enabling a masked unit should fail! */
-        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("a.service"), false, &changes, &n_changes) == -ERFKILL);
+        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) == -ERFKILL);
         unit_file_changes_free(changes, n_changes);
         changes = NULL; n_changes = 0;
 
-        assert_se(unit_file_unmask(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("a.service"), &changes, &n_changes) >= 0);
+        assert_se(unit_file_unmask(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) >= 0);
         assert_se(n_changes == 1);
         assert_se(changes[0].type == UNIT_FILE_UNLINK);
         p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/a.service");
@@ -92,7 +92,7 @@ static void test_basic_mask_and_enable(const char *root) {
         unit_file_changes_free(changes, n_changes);
         changes = NULL; n_changes = 0;
 
-        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("a.service"), false, &changes, &n_changes) == 1);
+        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) == 1);
         assert_se(n_changes == 1);
         assert_se(changes[0].type == UNIT_FILE_SYMLINK);
         assert_se(streq(changes[0].source, "/usr/lib/systemd/system/a.service"));
@@ -107,12 +107,12 @@ static void test_basic_mask_and_enable(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
 
         /* Enabling it again should succeed but be a NOP */
-        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("a.service"), false, &changes, &n_changes) >= 0);
+        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) >= 0);
         assert_se(n_changes == 0);
         unit_file_changes_free(changes, n_changes);
         changes = NULL; n_changes = 0;
 
-        assert_se(unit_file_disable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("a.service"), &changes, &n_changes) >= 0);
+        assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) >= 0);
         assert_se(n_changes == 1);
         assert_se(changes[0].type == UNIT_FILE_UNLINK);
         p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/a.service");
@@ -126,13 +126,13 @@ static void test_basic_mask_and_enable(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "d.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
 
         /* Disabling a disabled unit must suceed but be a NOP */
-        assert_se(unit_file_disable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("a.service"), &changes, &n_changes) >= 0);
+        assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("a.service"), &changes, &n_changes) >= 0);
         assert_se(n_changes == 0);
         unit_file_changes_free(changes, n_changes);
         changes = NULL; n_changes = 0;
 
         /* Let's enable this indirectly via a symlink */
-        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("d.service"), false, &changes, &n_changes) >= 0);
+        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("d.service"), &changes, &n_changes) >= 0);
         assert_se(n_changes == 1);
         assert_se(changes[0].type == UNIT_FILE_SYMLINK);
         assert_se(streq(changes[0].source, "/usr/lib/systemd/system/a.service"));
@@ -148,7 +148,7 @@ static void test_basic_mask_and_enable(const char *root) {
 
         /* Let's try to reenable */
 
-        assert_se(unit_file_reenable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("b.service"), false, &changes, &n_changes) >= 0);
+        assert_se(unit_file_reenable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("b.service"), &changes, &n_changes) >= 0);
         assert_se(n_changes == 2);
         assert_se(changes[0].type == UNIT_FILE_UNLINK);
         p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/a.service");
@@ -217,7 +217,7 @@ static void test_linked_units(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked3.service", &state) >= 0 && state == UNIT_FILE_LINKED);
 
         /* First, let's link the unit into the search path */
-        assert_se(unit_file_link(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("/opt/linked.service"), false, &changes, &n_changes) >= 0);
+        assert_se(unit_file_link(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("/opt/linked.service"), &changes, &n_changes) >= 0);
         assert_se(n_changes == 1);
         assert_se(changes[0].type == UNIT_FILE_SYMLINK);
         assert_se(streq(changes[0].source, "/opt/linked.service"));
@@ -229,7 +229,7 @@ static void test_linked_units(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked.service", &state) >= 0 && state == UNIT_FILE_LINKED);
 
         /* Let's unlink it from the search path again */
-        assert_se(unit_file_disable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("linked.service"), &changes, &n_changes) >= 0);
+        assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("linked.service"), &changes, &n_changes) >= 0);
         assert_se(n_changes == 1);
         assert_se(changes[0].type == UNIT_FILE_UNLINK);
         p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/linked.service");
@@ -240,7 +240,7 @@ static void test_linked_units(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked.service", NULL) == -ENOENT);
 
         /* Now, let's not just link it, but also enable it */
-        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("/opt/linked.service"), false, &changes, &n_changes) >= 0);
+        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("/opt/linked.service"), &changes, &n_changes) >= 0);
         assert_se(n_changes == 2);
         p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/linked.service");
         q = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/linked.service");
@@ -262,7 +262,7 @@ static void test_linked_units(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
 
         /* And let's unlink it again */
-        assert_se(unit_file_disable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("linked.service"), &changes, &n_changes) >= 0);
+        assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("linked.service"), &changes, &n_changes) >= 0);
         assert_se(n_changes == 2);
         p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/linked.service");
         q = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/linked.service");
@@ -282,7 +282,7 @@ static void test_linked_units(const char *root) {
 
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "linked.service", NULL) == -ENOENT);
 
-        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("linked2.service"), false, &changes, &n_changes) >= 0);
+        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("linked2.service"), &changes, &n_changes) >= 0);
         assert_se(n_changes == 2);
         p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/linked2.service");
         q = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/linked2.service");
@@ -301,7 +301,7 @@ static void test_linked_units(const char *root) {
         unit_file_changes_free(changes, n_changes);
         changes = NULL; n_changes = 0;
 
-        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("linked3.service"), false, &changes, &n_changes) >= 0);
+        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("linked3.service"), &changes, &n_changes) >= 0);
         assert_se(n_changes == 1);
         assert_se(changes[0].type == UNIT_FILE_SYMLINK);
         assert_se(startswith(changes[0].path, root));
@@ -325,7 +325,7 @@ static void test_default(const char *root) {
 
         assert_se(unit_file_get_default(UNIT_FILE_SYSTEM, root, &def) == -ENOENT);
 
-        assert_se(unit_file_set_default(UNIT_FILE_SYSTEM, root, "idontexist.target", false, &changes, &n_changes) == -ENOENT);
+        assert_se(unit_file_set_default(UNIT_FILE_SYSTEM, 0, root, "idontexist.target", &changes, &n_changes) == -ENOENT);
         assert_se(n_changes == 1);
         assert_se(changes[0].type == -ENOENT);
         assert_se(streq_ptr(changes[0].path, "idontexist.target"));
@@ -334,7 +334,7 @@ static void test_default(const char *root) {
 
         assert_se(unit_file_get_default(UNIT_FILE_SYSTEM, root, &def) == -ENOENT);
 
-        assert_se(unit_file_set_default(UNIT_FILE_SYSTEM, root, "test-default.target", false, &changes, &n_changes) >= 0);
+        assert_se(unit_file_set_default(UNIT_FILE_SYSTEM, 0, root, "test-default.target", &changes, &n_changes) >= 0);
         assert_se(n_changes == 1);
         assert_se(changes[0].type == UNIT_FILE_SYMLINK);
         assert_se(streq(changes[0].source, "/usr/lib/systemd/system/test-default-real.target"));
@@ -364,7 +364,7 @@ static void test_add_dependency(const char *root) {
         p = strjoina(root, "/usr/lib/systemd/system/add-dependency-test-service.service");
         assert_se(symlink("real-add-dependency-test-service.service", p) >= 0);
 
-        assert_se(unit_file_add_dependency(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("add-dependency-test-service.service"), "add-dependency-test-target.target", UNIT_WANTS, false, &changes, &n_changes) >= 0);
+        assert_se(unit_file_add_dependency(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("add-dependency-test-service.service"), "add-dependency-test-target.target", UNIT_WANTS, &changes, &n_changes) >= 0);
         assert_se(n_changes == 1);
         assert_se(changes[0].type == UNIT_FILE_SYMLINK);
         assert_se(streq(changes[0].source, "/usr/lib/systemd/system/real-add-dependency-test-service.service"));
@@ -401,7 +401,7 @@ static void test_template_enable(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
 
-        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("template@.service"), false, &changes, &n_changes) >= 0);
+        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("template@.service"), &changes, &n_changes) >= 0);
         assert_se(n_changes == 1);
         assert_se(changes[0].type == UNIT_FILE_SYMLINK);
         assert_se(streq(changes[0].source, "/usr/lib/systemd/system/template@.service"));
@@ -417,7 +417,7 @@ static void test_template_enable(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@def.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
 
-        assert_se(unit_file_disable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("template@.service"), &changes, &n_changes) >= 0);
+        assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("template@.service"), &changes, &n_changes) >= 0);
         assert_se(n_changes == 1);
         assert_se(changes[0].type == UNIT_FILE_UNLINK);
         assert_se(streq(changes[0].path, p));
@@ -431,7 +431,7 @@ static void test_template_enable(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
 
-        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("template@foo.service"), false, &changes, &n_changes) >= 0);
+        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("template@foo.service"), &changes, &n_changes) >= 0);
         assert_se(changes[0].type == UNIT_FILE_SYMLINK);
         assert_se(streq(changes[0].source, "/usr/lib/systemd/system/template@.service"));
         p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/template@foo.service");
@@ -446,7 +446,7 @@ static void test_template_enable(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@def.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
 
-        assert_se(unit_file_disable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("template@foo.service"), &changes, &n_changes) >= 0);
+        assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("template@foo.service"), &changes, &n_changes) >= 0);
         assert_se(n_changes == 1);
         assert_se(changes[0].type == UNIT_FILE_UNLINK);
         assert_se(streq(changes[0].path, p));
@@ -462,7 +462,7 @@ static void test_template_enable(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "template-symlink@quux.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
 
-        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("template-symlink@quux.service"), false, &changes, &n_changes) >= 0);
+        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("template-symlink@quux.service"), &changes, &n_changes) >= 0);
         assert_se(changes[0].type == UNIT_FILE_SYMLINK);
         assert_se(streq(changes[0].source, "/usr/lib/systemd/system/template@.service"));
         p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/template@quux.service");
@@ -507,7 +507,7 @@ static void test_indirect(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirectb.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirectc.service", &state) >= 0 && state == UNIT_FILE_INDIRECT);
 
-        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("indirectc.service"), false, &changes, &n_changes) >= 0);
+        assert_se(unit_file_enable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("indirectc.service"), &changes, &n_changes) >= 0);
         assert_se(n_changes == 1);
         assert_se(changes[0].type == UNIT_FILE_SYMLINK);
         assert_se(streq(changes[0].source, "/usr/lib/systemd/system/indirectb.service"));
@@ -520,7 +520,7 @@ static void test_indirect(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirectb.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "indirectc.service", &state) >= 0 && state == UNIT_FILE_INDIRECT);
 
-        assert_se(unit_file_disable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("indirectc.service"), &changes, &n_changes) >= 0);
+        assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("indirectc.service"), &changes, &n_changes) >= 0);
         assert_se(n_changes == 1);
         assert_se(changes[0].type == UNIT_FILE_UNLINK);
         p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/indirectb.service");
@@ -560,7 +560,7 @@ static void test_preset_and_list(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-yes.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-no.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
 
-        assert_se(unit_file_preset(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("preset-yes.service"), UNIT_FILE_PRESET_FULL, false, &changes, &n_changes) >= 0);
+        assert_se(unit_file_preset(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("preset-yes.service"), UNIT_FILE_PRESET_FULL, &changes, &n_changes) >= 0);
         assert_se(n_changes == 1);
         assert_se(changes[0].type == UNIT_FILE_SYMLINK);
         assert_se(streq(changes[0].source, "/usr/lib/systemd/system/preset-yes.service"));
@@ -572,7 +572,7 @@ static void test_preset_and_list(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-yes.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-no.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
 
-        assert_se(unit_file_disable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("preset-yes.service"), &changes, &n_changes) >= 0);
+        assert_se(unit_file_disable(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("preset-yes.service"), &changes, &n_changes) >= 0);
         assert_se(n_changes == 1);
         assert_se(changes[0].type == UNIT_FILE_UNLINK);
         p = strjoina(root, SYSTEM_CONFIG_UNIT_PATH"/multi-user.target.wants/preset-yes.service");
@@ -583,7 +583,7 @@ static void test_preset_and_list(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-yes.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-no.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
 
-        assert_se(unit_file_preset(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("preset-no.service"), UNIT_FILE_PRESET_FULL, false, &changes, &n_changes) >= 0);
+        assert_se(unit_file_preset(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("preset-no.service"), UNIT_FILE_PRESET_FULL, &changes, &n_changes) >= 0);
         assert_se(n_changes == 0);
         unit_file_changes_free(changes, n_changes);
         changes = NULL; n_changes = 0;
@@ -591,7 +591,7 @@ static void test_preset_and_list(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-yes.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "preset-no.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
 
-        assert_se(unit_file_preset_all(UNIT_FILE_SYSTEM, false, root, UNIT_FILE_PRESET_FULL, false, &changes, &n_changes) >= 0);
+        assert_se(unit_file_preset_all(UNIT_FILE_SYSTEM, 0, root, UNIT_FILE_PRESET_FULL, &changes, &n_changes) >= 0);
 
         assert_se(n_changes > 0);
 
@@ -716,7 +716,7 @@ static void test_preset_order(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-1.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-2.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
 
-        assert_se(unit_file_preset(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("prefix-1.service"), UNIT_FILE_PRESET_FULL, false, &changes, &n_changes) >= 0);
+        assert_se(unit_file_preset(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("prefix-1.service"), UNIT_FILE_PRESET_FULL, &changes, &n_changes) >= 0);
         assert_se(n_changes == 1);
         assert_se(changes[0].type == UNIT_FILE_SYMLINK);
         assert_se(streq(changes[0].source, "/usr/lib/systemd/system/prefix-1.service"));
@@ -728,7 +728,7 @@ static void test_preset_order(const char *root) {
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-1.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-2.service", &state) >= 0 && state == UNIT_FILE_DISABLED);
 
-        assert_se(unit_file_preset(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("prefix-2.service"), UNIT_FILE_PRESET_FULL, false, &changes, &n_changes) >= 0);
+        assert_se(unit_file_preset(UNIT_FILE_SYSTEM, 0, root, STRV_MAKE("prefix-2.service"), UNIT_FILE_PRESET_FULL, &changes, &n_changes) >= 0);
         assert_se(n_changes == 0);
 
         assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-1.service", &state) >= 0 && state == UNIT_FILE_ENABLED);
diff --git a/src/test/test-install.c b/src/test/test-install.c
index 0ac85f040a..fb36aa83ca 100644
--- a/src/test/test-install.c
+++ b/src/test/test-install.c
@@ -70,12 +70,12 @@ int main(int argc, char* argv[]) {
 
         log_info("/*** enable **/");
 
-        r = unit_file_enable(UNIT_FILE_SYSTEM, false, NULL, (char**) files, false, &changes, &n_changes);
+        r = unit_file_enable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
         assert_se(r >= 0);
 
         log_info("/*** enable2 **/");
 
-        r = unit_file_enable(UNIT_FILE_SYSTEM, false, NULL, (char**) files, false, &changes, &n_changes);
+        r = unit_file_enable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
         assert_se(r >= 0);
 
         dump_changes(changes, n_changes);
@@ -89,7 +89,7 @@ int main(int argc, char* argv[]) {
         changes = NULL;
         n_changes = 0;
 
-        r = unit_file_disable(UNIT_FILE_SYSTEM, false, NULL, (char**) files, &changes, &n_changes);
+        r = unit_file_disable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
         assert_se(r >= 0);
 
         dump_changes(changes, n_changes);
@@ -103,10 +103,10 @@ int main(int argc, char* argv[]) {
         changes = NULL;
         n_changes = 0;
 
-        r = unit_file_mask(UNIT_FILE_SYSTEM, false, NULL, (char**) files, false, &changes, &n_changes);
+        r = unit_file_mask(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
         assert_se(r >= 0);
         log_info("/*** mask2 ***/");
-        r = unit_file_mask(UNIT_FILE_SYSTEM, false, NULL, (char**) files, false, &changes, &n_changes);
+        r = unit_file_mask(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
         assert_se(r >= 0);
 
         dump_changes(changes, n_changes);
@@ -120,10 +120,10 @@ int main(int argc, char* argv[]) {
         changes = NULL;
         n_changes = 0;
 
-        r = unit_file_unmask(UNIT_FILE_SYSTEM, false, NULL, (char**) files, &changes, &n_changes);
+        r = unit_file_unmask(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
         assert_se(r >= 0);
         log_info("/*** unmask2 ***/");
-        r = unit_file_unmask(UNIT_FILE_SYSTEM, false, NULL, (char**) files, &changes, &n_changes);
+        r = unit_file_unmask(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
         assert_se(r >= 0);
 
         dump_changes(changes, n_changes);
@@ -137,7 +137,7 @@ int main(int argc, char* argv[]) {
         changes = NULL;
         n_changes = 0;
 
-        r = unit_file_mask(UNIT_FILE_SYSTEM, false, NULL, (char**) files, false, &changes, &n_changes);
+        r = unit_file_mask(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
         assert_se(r >= 0);
 
         dump_changes(changes, n_changes);
@@ -151,10 +151,10 @@ int main(int argc, char* argv[]) {
         changes = NULL;
         n_changes = 0;
 
-        r = unit_file_disable(UNIT_FILE_SYSTEM, false, NULL, (char**) files, &changes, &n_changes);
+        r = unit_file_disable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
         assert_se(r >= 0);
         log_info("/*** disable2 ***/");
-        r = unit_file_disable(UNIT_FILE_SYSTEM, false, NULL, (char**) files, &changes, &n_changes);
+        r = unit_file_disable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
         assert_se(r >= 0);
 
         dump_changes(changes, n_changes);
@@ -168,7 +168,7 @@ int main(int argc, char* argv[]) {
         changes = NULL;
         n_changes = 0;
 
-        r = unit_file_unmask(UNIT_FILE_SYSTEM, false, NULL, (char**) files, &changes, &n_changes);
+        r = unit_file_unmask(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, &changes, &n_changes);
         assert_se(r >= 0);
 
         dump_changes(changes, n_changes);
@@ -182,7 +182,7 @@ int main(int argc, char* argv[]) {
         changes = NULL;
         n_changes = 0;
 
-        r = unit_file_enable(UNIT_FILE_SYSTEM, false, NULL, (char**) files2, false, &changes, &n_changes);
+        r = unit_file_enable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files2, &changes, &n_changes);
         assert_se(r >= 0);
 
         dump_changes(changes, n_changes);
@@ -196,7 +196,7 @@ int main(int argc, char* argv[]) {
         changes = NULL;
         n_changes = 0;
 
-        r = unit_file_disable(UNIT_FILE_SYSTEM, false, NULL, STRV_MAKE(basename(files2[0])), &changes, &n_changes);
+        r = unit_file_disable(UNIT_FILE_SYSTEM, 0, NULL, STRV_MAKE(basename(files2[0])), &changes, &n_changes);
         assert_se(r >= 0);
 
         dump_changes(changes, n_changes);
@@ -209,7 +209,7 @@ int main(int argc, char* argv[]) {
         changes = NULL;
         n_changes = 0;
 
-        r = unit_file_link(UNIT_FILE_SYSTEM, false, NULL, (char**) files2, false, &changes, &n_changes);
+        r = unit_file_link(UNIT_FILE_SYSTEM, 0, NULL, (char**) files2, &changes, &n_changes);
         assert_se(r >= 0);
 
         dump_changes(changes, n_changes);
@@ -223,7 +223,7 @@ int main(int argc, char* argv[]) {
         changes = NULL;
         n_changes = 0;
 
-        r = unit_file_disable(UNIT_FILE_SYSTEM, false, NULL, STRV_MAKE(basename(files2[0])), &changes, &n_changes);
+        r = unit_file_disable(UNIT_FILE_SYSTEM, 0, NULL, STRV_MAKE(basename(files2[0])), &changes, &n_changes);
         assert_se(r >= 0);
 
         dump_changes(changes, n_changes);
@@ -236,7 +236,7 @@ int main(int argc, char* argv[]) {
         changes = NULL;
         n_changes = 0;
 
-        r = unit_file_link(UNIT_FILE_SYSTEM, false, NULL, (char**) files2, false, &changes, &n_changes);
+        r = unit_file_link(UNIT_FILE_SYSTEM, 0, NULL, (char**) files2, &changes, &n_changes);
         assert_se(r >= 0);
 
         dump_changes(changes, n_changes);
@@ -250,7 +250,7 @@ int main(int argc, char* argv[]) {
         changes = NULL;
         n_changes = 0;
 
-        r = unit_file_reenable(UNIT_FILE_SYSTEM, false, NULL, (char**) files2, false, &changes, &n_changes);
+        r = unit_file_reenable(UNIT_FILE_SYSTEM, 0, NULL, (char**) files2, &changes, &n_changes);
         assert_se(r >= 0);
 
         dump_changes(changes, n_changes);
@@ -264,7 +264,7 @@ int main(int argc, char* argv[]) {
         changes = NULL;
         n_changes = 0;
 
-        r = unit_file_disable(UNIT_FILE_SYSTEM, false, NULL, STRV_MAKE(basename(files2[0])), &changes, &n_changes);
+        r = unit_file_disable(UNIT_FILE_SYSTEM, 0, NULL, STRV_MAKE(basename(files2[0])), &changes, &n_changes);
         assert_se(r >= 0);
 
         dump_changes(changes, n_changes);
@@ -276,7 +276,7 @@ int main(int argc, char* argv[]) {
         changes = NULL;
         n_changes = 0;
 
-        r = unit_file_preset(UNIT_FILE_SYSTEM, false, NULL, (char**) files, UNIT_FILE_PRESET_FULL, false, &changes, &n_changes);
+        r = unit_file_preset(UNIT_FILE_SYSTEM, 0, NULL, (char**) files, UNIT_FILE_PRESET_FULL, &changes, &n_changes);
         assert_se(r >= 0);
 
         dump_changes(changes, n_changes);
-- 
cgit v1.2.3-54-g00ecf


From 3b3557c410c7910fae0990599dcb82711cf5fbb7 Mon Sep 17 00:00:00 2001
From: Jan Synacek <jsynacek@redhat.com>
Date: Thu, 20 Oct 2016 15:20:11 +0200
Subject: shared, systemctl: teach is-enabled to show installation targets

It may be desired by users to know what targets a particular service is
installed into. Improve user friendliness by teaching the is-enabled
command to show such information when used with --full.

This patch makes use of the newly added UnitFileFlags and adds
UNIT_FILE_DRY_RUN flag into it. Since the API had already been modified,
it's now easy to add the new dry-run feature for other commands as
well. As a next step, --dry-run could be added to systemctl, which in
turn might pave the way for a long requested dry-run feature when
running systemctl start.
---
 man/systemctl.xml                      |  3 ++
 src/core/dbus-manager.c                | 44 ++++++++++++++++++++
 src/core/org.freedesktop.systemd1.conf |  4 ++
 src/shared/install.c                   | 38 +++++++++--------
 src/shared/install.h                   |  1 +
 src/systemctl/systemctl.c              | 74 ++++++++++++++++++++++++++++++++--
 6 files changed, 145 insertions(+), 19 deletions(-)

(limited to 'src/core')

diff --git a/man/systemctl.xml b/man/systemctl.xml
index 75885bcf02..a4d4c53725 100644
--- a/man/systemctl.xml
+++ b/man/systemctl.xml
@@ -233,6 +233,8 @@
           of <command>status</command>, <command>list-units</command>,
           <command>list-jobs</command>, and
           <command>list-timers</command>.</para>
+          <para>Also, show installation targets in the output of
+          <command>is-enabled</command>.</para>
         </listitem>
       </varlistentry>
 
@@ -1136,6 +1138,7 @@ kobject-uevent 1 systemd-udevd-kernel.socket systemd-udevd.service
             exit code of 0 if at least one is enabled, non-zero
             otherwise. Prints the current enable status (see table).
             To suppress this output, use <option>--quiet</option>.
+            To show installation targets, use <option>--full</option>.
             </para>
 
             <table>
diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c
index ecad71a5aa..d7d3d3c8ce 100644
--- a/src/core/dbus-manager.c
+++ b/src/core/dbus-manager.c
@@ -2242,6 +2242,49 @@ static int method_add_dependency_unit_files(sd_bus_message *message, void *userd
         return reply_unit_file_changes_and_free(m, message, -1, changes, n_changes);
 }
 
+static int method_get_unit_file_links(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+        _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+        UnitFileChange *changes = NULL;
+        unsigned n_changes = 0, i;
+        UnitFileFlags flags;
+        const char *name;
+        char **p;
+        int runtime, r;
+
+        r = sd_bus_message_read(message, "sb", &name, &runtime);
+        if (r < 0)
+                return r;
+
+        r = sd_bus_message_new_method_return(message, &reply);
+        if (r < 0)
+                return r;
+
+        r = sd_bus_message_open_container(reply, SD_BUS_TYPE_ARRAY, "s");
+        if (r < 0)
+                return r;
+
+        p = STRV_MAKE(name);
+        flags = UNIT_FILE_DRY_RUN |
+                (runtime ? UNIT_FILE_RUNTIME : 0);
+
+        r = unit_file_disable(UNIT_FILE_SYSTEM, flags, NULL, p, &changes, &n_changes);
+        if (r < 0)
+                return log_error_errno(r, "Failed to get file links for %s: %m", name);
+
+        for (i = 0; i < n_changes; i++)
+                if (changes[i].type == UNIT_FILE_UNLINK) {
+                        r = sd_bus_message_append(reply, "s", changes[i].path);
+                        if (r < 0)
+                                return r;
+                }
+
+        r = sd_bus_message_close_container(reply);
+        if (r < 0)
+                return r;
+
+        return sd_bus_send(NULL, reply, NULL);
+}
+
 const sd_bus_vtable bus_manager_vtable[] = {
         SD_BUS_VTABLE_START(0),
 
@@ -2387,6 +2430,7 @@ const sd_bus_vtable bus_manager_vtable[] = {
         SD_BUS_METHOD("GetDefaultTarget", NULL, "s", method_get_default_target, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("PresetAllUnitFiles", "sbb", "a(sss)", method_preset_all_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("AddDependencyUnitFiles", "asssbb", "a(sss)", method_add_dependency_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
+        SD_BUS_METHOD("GetUnitFileLinks", "sb", "as", method_get_unit_file_links, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("SetExitCode", "y", NULL, method_set_exit_code, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("LookupDynamicUserByName", "s", "u", method_lookup_dynamic_user_by_name, SD_BUS_VTABLE_UNPRIVILEGED),
         SD_BUS_METHOD("LookupDynamicUserByUID", "u", "s", method_lookup_dynamic_user_by_uid, SD_BUS_VTABLE_UNPRIVILEGED),
diff --git a/src/core/org.freedesktop.systemd1.conf b/src/core/org.freedesktop.systemd1.conf
index 6caa15b0b8..a61677e645 100644
--- a/src/core/org.freedesktop.systemd1.conf
+++ b/src/core/org.freedesktop.systemd1.conf
@@ -92,6 +92,10 @@
                        send_interface="org.freedesktop.systemd1.Manager"
                        send_member="GetUnitProcesses"/>
 
+                <allow send_destination="org.freedesktop.systemd1"
+                       send_interface="org.freedesktop.systemd1.Manager"
+                       send_member="GetUnitFileLinks"/>
+
                 <allow send_destination="org.freedesktop.systemd1"
                        send_interface="org.freedesktop.systemd1.Manager"
                        send_member="ListJobs"/>
diff --git a/src/shared/install.c b/src/shared/install.c
index caca23435a..96fba6e25b 100644
--- a/src/shared/install.c
+++ b/src/shared/install.c
@@ -518,6 +518,7 @@ static int remove_marked_symlinks_fd(
                 const char *path,
                 const char *config_path,
                 const LookupPaths *lp,
+                bool dry_run,
                 bool *restart,
                 UnitFileChange **changes,
                 unsigned *n_changes) {
@@ -566,7 +567,7 @@ static int remove_marked_symlinks_fd(
                         }
 
                         /* This will close nfd, regardless whether it succeeds or not */
-                        q = remove_marked_symlinks_fd(remove_symlinks_to, nfd, p, config_path, lp, restart, changes, n_changes);
+                        q = remove_marked_symlinks_fd(remove_symlinks_to, nfd, p, config_path, lp, dry_run, restart, changes, n_changes);
                         if (q < 0 && r == 0)
                                 r = q;
 
@@ -603,14 +604,16 @@ static int remove_marked_symlinks_fd(
                         if (!found)
                                 continue;
 
-                        if (unlinkat(fd, de->d_name, 0) < 0 && errno != ENOENT) {
-                                if (r == 0)
-                                        r = -errno;
-                                unit_file_changes_add(changes, n_changes, -errno, p, NULL);
-                                continue;
-                        }
+                        if (!dry_run) {
+                                if (unlinkat(fd, de->d_name, 0) < 0 && errno != ENOENT) {
+                                        if (r == 0)
+                                                r = -errno;
+                                        unit_file_changes_add(changes, n_changes, -errno, p, NULL);
+                                        continue;
+                                }
 
-                        (void) rmdir_parents(p, config_path);
+                                (void) rmdir_parents(p, config_path);
+                        }
 
                         unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, p, NULL);
 
@@ -621,7 +624,7 @@ static int remove_marked_symlinks_fd(
                         q = mark_symlink_for_removal(&remove_symlinks_to, rp ?: p);
                         if (q < 0)
                                 return q;
-                        if (q > 0)
+                        if (q > 0 && !dry_run)
                                 *restart = true;
                 }
         }
@@ -633,6 +636,7 @@ static int remove_marked_symlinks(
                 Set *remove_symlinks_to,
                 const char *config_path,
                 const LookupPaths *lp,
+                bool dry_run,
                 UnitFileChange **changes,
                 unsigned *n_changes) {
 
@@ -659,7 +663,7 @@ static int remove_marked_symlinks(
                         return -errno;
 
                 /* This takes possession of cfd and closes it */
-                q = remove_marked_symlinks_fd(remove_symlinks_to, cfd, config_path, config_path, lp, &restart, changes, n_changes);
+                q = remove_marked_symlinks_fd(remove_symlinks_to, cfd, config_path, config_path, lp, dry_run, &restart, changes, n_changes);
                 if (r == 0)
                         r = q;
         } while (restart);
@@ -1861,6 +1865,7 @@ int unit_file_unmask(
         size_t n_todo = 0, n_allocated = 0;
         const char *config_path;
         char **i;
+        bool dry_run;
         int r, q;
 
         assert(scope >= 0);
@@ -1871,6 +1876,7 @@ int unit_file_unmask(
                 return r;
 
         config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+        dry_run = !!(flags & UNIT_FILE_DRY_RUN);
 
         STRV_FOREACH(i, files) {
                 _cleanup_free_ char *path = NULL;
@@ -1907,7 +1913,7 @@ int unit_file_unmask(
                 if (!path)
                         return -ENOMEM;
 
-                if (unlink(path) < 0) {
+                if (!dry_run && unlink(path) < 0) {
                         if (errno != ENOENT) {
                                 if (r >= 0)
                                         r = -errno;
@@ -1925,7 +1931,7 @@ int unit_file_unmask(
                         return q;
         }
 
-        q = remove_marked_symlinks(remove_symlinks_to, config_path, &paths, changes, n_changes);
+        q = remove_marked_symlinks(remove_symlinks_to, config_path, &paths, dry_run, changes, n_changes);
         if (r >= 0)
                 r = q;
 
@@ -2175,11 +2181,11 @@ int unit_file_revert(
                         return q;
         }
 
-        q = remove_marked_symlinks(remove_symlinks_to, paths.runtime_config, &paths, changes, n_changes);
+        q = remove_marked_symlinks(remove_symlinks_to, paths.runtime_config, &paths, false, changes, n_changes);
         if (r >= 0)
                 r = q;
 
-        q = remove_marked_symlinks(remove_symlinks_to, paths.persistent_config, &paths, changes, n_changes);
+        q = remove_marked_symlinks(remove_symlinks_to, paths.persistent_config, &paths, false, changes, n_changes);
         if (r >= 0)
                 r = q;
 
@@ -2341,7 +2347,7 @@ int unit_file_disable(
         if (r < 0)
                 return r;
 
-        return remove_marked_symlinks(remove_symlinks_to, config_path, &paths, changes, n_changes);
+        return remove_marked_symlinks(remove_symlinks_to, config_path, &paths, !!(flags & UNIT_FILE_DRY_RUN), changes, n_changes);
 }
 
 int unit_file_reenable(
@@ -2730,7 +2736,7 @@ static int execute_preset(
                 if (r < 0)
                         return r;
 
-                r = remove_marked_symlinks(remove_symlinks_to, config_path, paths, changes, n_changes);
+                r = remove_marked_symlinks(remove_symlinks_to, config_path, paths, false, changes, n_changes);
         } else
                 r = 0;
 
diff --git a/src/shared/install.h b/src/shared/install.h
index 561b521bbc..7a5859e729 100644
--- a/src/shared/install.h
+++ b/src/shared/install.h
@@ -82,6 +82,7 @@ enum UnitFileChangeType {
 enum UnitFileFlags {
         UNIT_FILE_RUNTIME = 1,
         UNIT_FILE_FORCE = 1 << 1,
+        UNIT_FILE_DRY_RUN = 1 << 2,
 };
 
 /* type can either one of the UnitFileChangeTypes listed above, or a negative error.
diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c
index c9ca3db3f1..2e3b59af3e 100644
--- a/src/systemctl/systemctl.c
+++ b/src/systemctl/systemctl.c
@@ -6271,6 +6271,63 @@ finish:
         return r;
 }
 
+static int show_installation_targets_client_side(const char *name) {
+        UnitFileChange *changes = NULL;
+        unsigned n_changes = 0, i;
+        UnitFileFlags flags;
+        char **p;
+        int r;
+
+        p = STRV_MAKE(name);
+        flags = UNIT_FILE_DRY_RUN |
+                (arg_runtime ? UNIT_FILE_RUNTIME : 0);
+
+        r = unit_file_disable(UNIT_FILE_SYSTEM, flags, NULL, p, &changes, &n_changes);
+        if (r < 0)
+                return log_error_errno(r, "Failed to get file links for %s: %m", name);
+
+        for (i = 0; i < n_changes; i++)
+                if (changes[i].type == UNIT_FILE_UNLINK)
+                        printf("  %s\n", changes[i].path);
+
+        return 0;
+}
+
+static int show_installation_targets(sd_bus *bus, const char *name) {
+        _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+        _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+        const char *link;
+        int r;
+
+        r = sd_bus_call_method(
+                        bus,
+                        "org.freedesktop.systemd1",
+                        "/org/freedesktop/systemd1",
+                        "org.freedesktop.systemd1.Manager",
+                        "GetUnitFileLinks",
+                        &error,
+                        &reply,
+                        "sb", name, arg_runtime);
+        if (r < 0)
+                return log_error_errno(r, "Failed to get unit file links for %s: %s", name, bus_error_message(&error, r));
+
+        r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "s");
+        if (r < 0)
+                return bus_log_parse_error(r);
+
+        while ((r = sd_bus_message_read(reply, "s", &link)) > 0)
+                printf("  %s\n", link);
+
+        if (r < 0)
+                return bus_log_parse_error(r);
+
+        r = sd_bus_message_exit_container(reply);
+        if (r < 0)
+                return bus_log_parse_error(r);
+
+        return 0;
+}
+
 static int unit_is_enabled(int argc, char *argv[], void *userdata) {
 
         _cleanup_strv_free_ char **names = NULL;
@@ -6289,7 +6346,6 @@ static int unit_is_enabled(int argc, char *argv[], void *userdata) {
         enabled = r > 0;
 
         if (install_client_side()) {
-
                 STRV_FOREACH(name, names) {
                         UnitFileState state;
 
@@ -6305,8 +6361,14 @@ static int unit_is_enabled(int argc, char *argv[], void *userdata) {
                                    UNIT_FILE_GENERATED))
                                 enabled = true;
 
-                        if (!arg_quiet)
+                        if (!arg_quiet) {
                                 puts(unit_file_state_to_string(state));
+                                if (arg_full) {
+                                        r = show_installation_targets_client_side(*name);
+                                        if (r < 0)
+                                                return r;
+                                }
+                        }
                 }
 
                 r = 0;
@@ -6341,8 +6403,14 @@ static int unit_is_enabled(int argc, char *argv[], void *userdata) {
                         if (STR_IN_SET(s, "enabled", "enabled-runtime", "static", "indirect", "generated"))
                                 enabled = true;
 
-                        if (!arg_quiet)
+                        if (!arg_quiet) {
                                 puts(s);
+                                if (arg_full) {
+                                        r = show_installation_targets(bus, *name);
+                                        if (r < 0)
+                                                return r;
+                                }
+                        }
                 }
         }
 
-- 
cgit v1.2.3-54-g00ecf


From 366ddd252ed25397ead209228b48c5eef93ced2e Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Mon, 24 Oct 2016 13:13:06 +0200
Subject: core: do not assert when sysconf(_SC_NGROUPS_MAX) fails (#4466)

Remove the assert and check the return code of sysconf(_SC_NGROUPS_MAX).

_SC_NGROUPS_MAX maps to NGROUPS_MAX which is defined in <limits.h> to
65536 these days. The value is a sysctl read-only
/proc/sys/kernel/ngroups_max and the kernel assumes that it is always
positive otherwise things may break. Follow this and support only
positive values for all other case return either -errno or -EOPNOTSUPP.

Now if there are systems that want to re-write NGROUPS_MAX then they
should not pass SupplementaryGroups= in units even if it is empty, in
this case nothing fails and we just ignore supplementary groups. However
if SupplementaryGroups= is passed even if it is empty we have to assume
that there will be groups manipulation from our side or the kernel and
since the kernel always assumes that NGROUPS_MAX is positive, then
follow that and support only positive values.
---
 src/core/execute.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index a9b2b8f299..53356c3c06 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -788,6 +788,19 @@ static int get_fixed_supplementary_groups(const ExecContext *c,
         if (!c->supplementary_groups)
                 return 0;
 
+        /*
+         * If SupplementaryGroups= was passed then NGROUPS_MAX has to
+         * be positive, otherwise fail.
+         */
+        errno = 0;
+        ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
+        if (ngroups_max <= 0) {
+                if (errno > 0)
+                        return -errno;
+                else
+                        return -EOPNOTSUPP; /* For all other values */
+        }
+
         /*
          * If user is given, then lookup GID and supplementary group list.
          * We avoid NSS lookups for gid=0.
@@ -800,8 +813,6 @@ static int get_fixed_supplementary_groups(const ExecContext *c,
                 keep_groups = true;
         }
 
-        assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
-
         l_gids = new(gid_t, ngroups_max);
         if (!l_gids)
                 return -ENOMEM;
-- 
cgit v1.2.3-54-g00ecf


From f673b62df67dfa67ddfa4f5a72d78ea3c002278f Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Fri, 21 Oct 2016 20:03:51 +0200
Subject: core: simplify skip_seccomp_unavailable() a bit

Let's prefer early-exit over deep-indented if blocks. Not behavioural change.
---
 src/core/execute.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index 53356c3c06..b69297241b 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1185,13 +1185,14 @@ static void rename_process_from_path(const char *path) {
 #ifdef HAVE_SECCOMP
 
 static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
-        if (!is_seccomp_available()) {
-                log_open();
-                log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
-                log_close();
-                return true;
-        }
-        return false;
+
+        if (is_seccomp_available())
+                return false;
+
+        log_open();
+        log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
+        log_close();
+        return true;
 }
 
 static int apply_seccomp(const Unit* u, const ExecContext *c) {
-- 
cgit v1.2.3-54-g00ecf


From e0f3720e399573134657458f4c8bd20c68fc092a Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Fri, 21 Oct 2016 20:05:49 +0200
Subject: core: move misplaced comment to the right place

---
 src/core/execute.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index b69297241b..e63a12f934 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1891,9 +1891,9 @@ static int setup_private_users(uid_t uid, gid_t gid) {
                 asprintf(&uid_map,
                          "0 0 1\n"                      /* Map root → root */
                          UID_FMT " " UID_FMT " 1\n",    /* Map $UID → $UID */
-                         uid, uid);                     /* The case where the above is the same */
+                         uid, uid);
         else
-                uid_map = strdup("0 0 1\n");
+                uid_map = strdup("0 0 1\n");            /* The case where the above is the same */
         if (!uid_map)
                 return -ENOMEM;
 
-- 
cgit v1.2.3-54-g00ecf


From 8130926d32d76193e98ba783ba932816f276bfad Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Fri, 21 Oct 2016 21:50:05 +0200
Subject: core: rework syscall filter set handling

A variety of fixes:

- rename the SystemCallFilterSet structure to SyscallFilterSet. So far the main
  instance of it (the syscall_filter_sets[] array) used to abbreviate
  "SystemCall" as "Syscall". Let's stick to one of the two syntaxes, and not
  mix and match too wildly. Let's pick the shorter name in this case, as it is
  sufficiently well established to not confuse hackers reading this.

- Export explicit indexes into the syscall_filter_sets[] array via an enum.
  This way, code that wants to make use of a specific filter set, can index it
  directly via the enum, instead of having to search for it. This makes
  apply_private_devices() in particular a lot simpler.

- Provide two new helper calls in seccomp-util.c: syscall_filter_set_find() to
  find a set by its name, seccomp_add_syscall_filter_set() to add a set to a
  seccomp object.

- Update SystemCallFilter= parser to use extract_first_word().  Let's work on
  deprecating FOREACH_WORD_QUOTED().

- Simplify apply_private_devices() using this functionality
---
 src/core/execute.c        |  41 +--------------
 src/core/load-fragment.c  |  54 ++++++++++---------
 src/shared/seccomp-util.c | 128 ++++++++++++++++++++++++++++++++++------------
 src/shared/seccomp-util.h |  32 ++++++++++--
 4 files changed, 155 insertions(+), 100 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index e63a12f934..18bb67cda9 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1578,10 +1578,7 @@ finish:
 }
 
 static int apply_private_devices(Unit *u, const ExecContext *c) {
-        const SystemCallFilterSet *set;
         scmp_filter_ctx *seccomp;
-        const char *sys;
-        bool syscalls_found = false;
         int r;
 
         assert(c);
@@ -1599,43 +1596,9 @@ static int apply_private_devices(Unit *u, const ExecContext *c) {
         if (r < 0)
                 goto finish;
 
-        for (set = syscall_filter_sets; set->set_name; set++)
-                if (streq(set->set_name, "@raw-io")) {
-                        syscalls_found = true;
-                        break;
-                }
-
-        /* We should never fail here */
-        if (!syscalls_found) {
-                r = -EOPNOTSUPP;
+        r = seccomp_add_syscall_filter_set(seccomp, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
+        if (r < 0)
                 goto finish;
-        }
-
-        NULSTR_FOREACH(sys, set->value) {
-                int id;
-                bool add = true;
-
-#ifndef __NR_s390_pci_mmio_read
-                if (streq(sys, "s390_pci_mmio_read"))
-                        add = false;
-#endif
-#ifndef __NR_s390_pci_mmio_write
-                if (streq(sys, "s390_pci_mmio_write"))
-                        add = false;
-#endif
-
-                if (!add)
-                        continue;
-
-                id = seccomp_syscall_resolve_name(sys);
-
-                r = seccomp_rule_add(
-                                seccomp,
-                                SCMP_ACT_ERRNO(EPERM),
-                                id, 0);
-                if (r < 0)
-                        goto finish;
-        }
 
         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
         if (r < 0)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 6f68e23340..118b39c1cf 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -2618,6 +2618,7 @@ int config_parse_documentation(const char *unit,
 }
 
 #ifdef HAVE_SECCOMP
+
 static int syscall_filter_parse_one(
                 const char *unit,
                 const char *filename,
@@ -2628,27 +2629,29 @@ static int syscall_filter_parse_one(
                 bool warn) {
         int r;
 
-        if (*t == '@') {
-                const SystemCallFilterSet *set;
+        if (t[0] == '@') {
+                const SyscallFilterSet *set;
+                const char *i;
 
-                for (set = syscall_filter_sets; set->set_name; set++)
-                        if (streq(set->set_name, t)) {
-                                const char *sys;
+                set = syscall_filter_set_find(t);
+                if (!set) {
+                        if (warn)
+                                log_syntax(unit, LOG_WARNING, filename, line, 0, "Don't know system call group, ignoring: %s", t);
+                        return 0;
+                }
 
-                                NULSTR_FOREACH(sys, set->value) {
-                                        r = syscall_filter_parse_one(unit, filename, line, c, invert, sys, false);
-                                        if (r < 0)
-                                                return r;
-                                }
-                                break;
-                        }
+                NULSTR_FOREACH(i, set->value) {
+                        r = syscall_filter_parse_one(unit, filename, line, c, invert, i, false);
+                        if (r < 0)
+                                return r;
+                }
         } else {
                 int id;
 
                 id = seccomp_syscall_resolve_name(t);
                 if (id == __NR_SCMP_ERROR)  {
                         if (warn)
-                                log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t);
+                                log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse system call, ignoring: %s", t);
                         return 0;
                 }
 
@@ -2662,8 +2665,9 @@ static int syscall_filter_parse_one(
                         if (r < 0)
                                 return log_oom();
                 } else
-                        set_remove(c->syscall_filter, INT_TO_PTR(id + 1));
+                        (void) set_remove(c->syscall_filter, INT_TO_PTR(id + 1));
         }
+
         return 0;
 }
 
@@ -2682,8 +2686,7 @@ int config_parse_syscall_filter(
         ExecContext *c = data;
         Unit *u = userdata;
         bool invert = false;
-        const char *word, *state;
-        size_t l;
+        const char *p;
         int r;
 
         assert(filename);
@@ -2722,19 +2725,24 @@ int config_parse_syscall_filter(
                 }
         }
 
-        FOREACH_WORD_QUOTED(word, l, rvalue, state) {
-                _cleanup_free_ char *t = NULL;
+        p = rvalue;
+        for (;;) {
+                _cleanup_free_ char *word = NULL;
 
-                t = strndup(word, l);
-                if (!t)
+                r = extract_first_word(&p, &word, NULL, 0);
+                if (r == 0)
+                        break;
+                if (r == -ENOMEM)
                         return log_oom();
+                if (r < 0) {
+                        log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+                        break;
+                }
 
-                r = syscall_filter_parse_one(unit, filename, line, c, invert, t, true);
+                r = syscall_filter_parse_one(unit, filename, line, c, invert, word, true);
                 if (r < 0)
                         return r;
         }
-        if (!isempty(state))
-                log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring.");
 
         /* Turn on NNP, but only if it wasn't configured explicitly
          * before, and only if we are in user mode. */
diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c
index 8116c7671f..1d51f3fd1f 100644
--- a/src/shared/seccomp-util.c
+++ b/src/shared/seccomp-util.c
@@ -26,6 +26,7 @@
 #include "macro.h"
 #include "seccomp-util.h"
 #include "string-util.h"
+#include "util.h"
 
 const char* seccomp_arch_to_string(uint32_t c) {
 
@@ -132,28 +133,30 @@ bool is_seccomp_available(void) {
         return cached_enabled;
 }
 
-const SystemCallFilterSet syscall_filter_sets[] = {
-        {
+const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
+        [SYSCALL_FILTER_SET_CLOCK] = {
                 /* Clock */
-                .set_name = "@clock",
+                .name = "@clock",
                 .value =
                 "adjtimex\0"
                 "clock_adjtime\0"
                 "clock_settime\0"
                 "settimeofday\0"
                 "stime\0"
-        }, {
+        },
+        [SYSCALL_FILTER_SET_CPU_EMULATION] = {
                 /* CPU emulation calls */
-                .set_name = "@cpu-emulation",
+                .name = "@cpu-emulation",
                 .value =
                 "modify_ldt\0"
                 "subpage_prot\0"
                 "switch_endian\0"
                 "vm86\0"
                 "vm86old\0"
-        }, {
+        },
+        [SYSCALL_FILTER_SET_DEBUG] = {
                 /* Debugging/Performance Monitoring/Tracing */
-                .set_name = "@debug",
+                .name = "@debug",
                 .value =
                 "lookup_dcookie\0"
                 "perf_event_open\0"
@@ -161,11 +164,14 @@ const SystemCallFilterSet syscall_filter_sets[] = {
                 "process_vm_writev\0"
                 "ptrace\0"
                 "rtas\0"
+#ifdef __NR_s390_runtime_instr
                 "s390_runtime_instr\0"
+#endif
                 "sys_debug_setcontext\0"
-        }, {
+        },
+        [SYSCALL_FILTER_SET_DEFAULT] = {
                 /* Default list */
-                .set_name = "@default",
+                .name = "@default",
                 .value =
                 "execve\0"
                 "exit\0"
@@ -173,9 +179,10 @@ const SystemCallFilterSet syscall_filter_sets[] = {
                 "getrlimit\0"      /* make sure processes can query stack size and such */
                 "rt_sigreturn\0"
                 "sigreturn\0"
-        }, {
+        },
+        [SYSCALL_FILTER_SET_IO_EVENT] = {
                 /* Event loop use */
-                .set_name = "@io-event",
+                .name = "@io-event",
                 .value =
                 "_newselect\0"
                 "epoll_create1\0"
@@ -191,9 +198,10 @@ const SystemCallFilterSet syscall_filter_sets[] = {
                 "ppoll\0"
                 "pselect6\0"
                 "select\0"
-        }, {
+        },
+        [SYSCALL_FILTER_SET_IPC] = {
                 /* Message queues, SYSV IPC or other IPC: unusual */
-                .set_name = "@ipc",
+                .name = "@ipc",
                 .value = "ipc\0"
                 "mq_getsetattr\0"
                 "mq_notify\0"
@@ -215,23 +223,26 @@ const SystemCallFilterSet syscall_filter_sets[] = {
                 "shmctl\0"
                 "shmdt\0"
                 "shmget\0"
-        }, {
+        },
+        [SYSCALL_FILTER_SET_KEYRING] = {
                 /* Keyring */
-                .set_name = "@keyring",
+                .name = "@keyring",
                 .value =
                 "add_key\0"
                 "keyctl\0"
                 "request_key\0"
-        }, {
+        },
+        [SYSCALL_FILTER_SET_MODULE] = {
                 /* Kernel module control */
-                .set_name = "@module",
+                .name = "@module",
                 .value =
                 "delete_module\0"
                 "finit_module\0"
                 "init_module\0"
-        }, {
+        },
+        [SYSCALL_FILTER_SET_MOUNT] = {
                 /* Mounting */
-                .set_name = "@mount",
+                .name = "@mount",
                 .value =
                 "chroot\0"
                 "mount\0"
@@ -239,9 +250,10 @@ const SystemCallFilterSet syscall_filter_sets[] = {
                 "pivot_root\0"
                 "umount2\0"
                 "umount\0"
-        }, {
+        },
+        [SYSCALL_FILTER_SET_NETWORK_IO] = {
                 /* Network or Unix socket IO, should not be needed if not network facing */
-                .set_name = "@network-io",
+                .name = "@network-io",
                 .value =
                 "accept4\0"
                 "accept\0"
@@ -264,9 +276,10 @@ const SystemCallFilterSet syscall_filter_sets[] = {
                 "socket\0"
                 "socketcall\0"
                 "socketpair\0"
-        }, {
+        },
+        [SYSCALL_FILTER_SET_OBSOLETE] = {
                 /* Unusual, obsolete or unimplemented, some unknown even to libseccomp */
-                .set_name = "@obsolete",
+                .name = "@obsolete",
                 .value =
                 "_sysctl\0"
                 "afs_syscall\0"
@@ -292,9 +305,10 @@ const SystemCallFilterSet syscall_filter_sets[] = {
                 "uselib\0"
                 "ustat\0"
                 "vserver\0"
-        }, {
+        },
+        [SYSCALL_FILTER_SET_PRIVILEGED] = {
                 /* Nice grab-bag of all system calls which need superuser capabilities */
-                .set_name = "@privileged",
+                .name = "@privileged",
                 .value =
                 "@clock\0"
                 "@module\0"
@@ -333,9 +347,10 @@ const SystemCallFilterSet syscall_filter_sets[] = {
                 "swapon\0"
                 "sysctl\0"
                 "vhangup\0"
-        }, {
+        },
+        [SYSCALL_FILTER_SET_PROCESS] = {
                 /* Process control, execution, namespaces */
-                .set_name = "@process",
+                .name = "@process",
                 .value =
                 "arch_prctl\0"
                 "clone\0"
@@ -349,19 +364,66 @@ const SystemCallFilterSet syscall_filter_sets[] = {
                 "tkill\0"
                 "unshare\0"
                 "vfork\0"
-        }, {
+        },
+        [SYSCALL_FILTER_SET_RAW_IO] = {
                 /* Raw I/O ports */
-                .set_name = "@raw-io",
+                .name = "@raw-io",
                 .value =
                 "ioperm\0"
                 "iopl\0"
                 "pciconfig_iobase\0"
                 "pciconfig_read\0"
                 "pciconfig_write\0"
+#ifdef __NR_s390_pci_mmio_read
                 "s390_pci_mmio_read\0"
+#endif
+#ifdef __NR_s390_pci_mmio_write
                 "s390_pci_mmio_write\0"
-        }, {
-                .set_name = NULL,
-                .value = NULL
-        }
+#endif
+        },
 };
+
+const SyscallFilterSet *syscall_filter_set_find(const char *name) {
+        unsigned i;
+
+        if (isempty(name) || name[0] != '@')
+                return NULL;
+
+        for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++)
+                if (streq(syscall_filter_sets[i].name, name))
+                        return syscall_filter_sets + i;
+
+        return NULL;
+}
+
+int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action) {
+        const char *sys;
+        int r;
+
+        assert(seccomp);
+        assert(set);
+
+        NULSTR_FOREACH(sys, set->value) {
+                int id;
+
+                if (sys[0] == '@') {
+                        const SyscallFilterSet *other;
+
+                        other = syscall_filter_set_find(sys);
+                        if (!other)
+                                return -EINVAL;
+
+                        r = seccomp_add_syscall_filter_set(seccomp, other, action);
+                } else {
+                        id = seccomp_syscall_resolve_name(sys);
+                        if (id == __NR_SCMP_ERROR)
+                                return -EINVAL;
+
+                        r = seccomp_rule_add(seccomp, action, id, 0);
+                }
+                if (r < 0)
+                        return r;
+        }
+
+        return 0;
+}
diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h
index cca7c17912..34fd49c122 100644
--- a/src/shared/seccomp-util.h
+++ b/src/shared/seccomp-util.h
@@ -29,9 +29,31 @@ int seccomp_add_secondary_archs(scmp_filter_ctx *c);
 
 bool is_seccomp_available(void);
 
-typedef struct SystemCallFilterSet {
-        const char *set_name;
+typedef struct SyscallFilterSet {
+        const char *name;
         const char *value;
-} SystemCallFilterSet;
-
-extern const SystemCallFilterSet syscall_filter_sets[];
+} SyscallFilterSet;
+
+enum {
+        SYSCALL_FILTER_SET_CLOCK,
+        SYSCALL_FILTER_SET_CPU_EMULATION,
+        SYSCALL_FILTER_SET_DEBUG,
+        SYSCALL_FILTER_SET_DEFAULT,
+        SYSCALL_FILTER_SET_IO_EVENT,
+        SYSCALL_FILTER_SET_IPC,
+        SYSCALL_FILTER_SET_KEYRING,
+        SYSCALL_FILTER_SET_MODULE,
+        SYSCALL_FILTER_SET_MOUNT,
+        SYSCALL_FILTER_SET_NETWORK_IO,
+        SYSCALL_FILTER_SET_OBSOLETE,
+        SYSCALL_FILTER_SET_PRIVILEGED,
+        SYSCALL_FILTER_SET_PROCESS,
+        SYSCALL_FILTER_SET_RAW_IO,
+        _SYSCALL_FILTER_SET_MAX
+};
+
+extern const SyscallFilterSet syscall_filter_sets[];
+
+const SyscallFilterSet *syscall_filter_set_find(const char *name);
+
+int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action);
-- 
cgit v1.2.3-54-g00ecf


From 25a8d8a0cb297f75b6b9fd3cc15747ba7f56031e Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Fri, 21 Oct 2016 20:12:33 +0200
Subject: core: rework apply_protect_kernel_modules() to use
 seccomp_add_syscall_filter_set()

Let's simplify this call, by making use of the new infrastructure.

This is actually more in line with Djalal's original patch but instead of
search the filter set in the array by its name we can now use the set index and
jump directly to it.
---
 src/core/execute.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index 18bb67cda9..f435a079c7 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1534,19 +1534,14 @@ finish:
 }
 
 static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) {
-        static const int module_syscalls[] = {
-                SCMP_SYS(delete_module),
-                SCMP_SYS(finit_module),
-                SCMP_SYS(init_module),
-        };
 
         scmp_filter_ctx *seccomp;
-        unsigned i;
+        const char *sys;
         int r;
 
         assert(c);
 
-        /* Turn of module syscalls on ProtectKernelModules=yes */
+        /* Turn off module syscalls on ProtectKernelModules=yes */
 
         if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
                 return 0;
@@ -1559,12 +1554,9 @@ static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) {
         if (r < 0)
                 goto finish;
 
-        for (i = 0; i < ELEMENTSOF(module_syscalls); i++) {
-                r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM),
-                                     module_syscalls[i], 0);
-                if (r < 0)
-                        goto finish;
-        }
+        r = seccomp_add_syscall_filter_set(seccomp, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
+        if (r < 0)
+                goto finish;
 
         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
         if (r < 0)
-- 
cgit v1.2.3-54-g00ecf


From 8d7b0c8fd780e88ab5a6d1d79e09e27247245bee Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Fri, 21 Oct 2016 20:28:05 +0200
Subject: seccomp: add new seccomp_init_conservative() helper

This adds a new seccomp_init_conservative() helper call that is mostly just a
wrapper around seccomp_init(), but turns off NNP and adds in all secondary
archs, for best compatibility with everything else.

Pretty much all of our code used the very same constructs for these three
steps, hence unifying this in one small function makes things a lot shorter.

This also changes incorrect usage of the "scmp_filter_ctx" type at various
places. libseccomp defines it as typedef to "void*", i.e. it is a pointer type
(pretty poor choice already!) that casts implicitly to and from all other
pointer types (even poorer choice: you defined a confusing type now, and don't
even gain any bit of type safety through it...). A lot of the code assumed the
type would refer to a structure, and hence aded additional "*" here and there.
Remove that.
---
 src/core/execute.c          | 88 ++++++++++-----------------------------------
 src/nspawn/nspawn-seccomp.c | 18 ++--------
 src/shared/seccomp-util.c   | 30 ++++++++++++++--
 src/shared/seccomp-util.h   |  4 ++-
 4 files changed, 53 insertions(+), 87 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index f435a079c7..668504c5cf 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1197,7 +1197,7 @@ static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
 
 static int apply_seccomp(const Unit* u, const ExecContext *c) {
         uint32_t negative_action, action;
-        scmp_filter_ctx *seccomp;
+        scmp_filter_ctx seccomp;
         Iterator i;
         void *id;
         int r;
@@ -1248,7 +1248,7 @@ finish:
 }
 
 static int apply_address_families(const Unit* u, const ExecContext *c) {
-        scmp_filter_ctx *seccomp;
+        scmp_filter_ctx seccomp;
         Iterator i;
         int r;
 
@@ -1257,13 +1257,9 @@ static int apply_address_families(const Unit* u, const ExecContext *c) {
         if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
                 return 0;
 
-        seccomp = seccomp_init(SCMP_ACT_ALLOW);
-        if (!seccomp)
-                return -ENOMEM;
-
-        r = seccomp_add_secondary_archs(seccomp);
+        r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
         if (r < 0)
-                goto finish;
+                return r;
 
         if (c->address_families_whitelist) {
                 int af, first = 0, last = 0;
@@ -1360,10 +1356,6 @@ static int apply_address_families(const Unit* u, const ExecContext *c) {
                 }
         }
 
-        r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
-        if (r < 0)
-                goto finish;
-
         r = seccomp_load(seccomp);
 
 finish:
@@ -1372,7 +1364,7 @@ finish:
 }
 
 static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
-        scmp_filter_ctx *seccomp;
+        scmp_filter_ctx seccomp;
         int r;
 
         assert(c);
@@ -1380,13 +1372,9 @@ static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c)
         if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
                 return 0;
 
-        seccomp = seccomp_init(SCMP_ACT_ALLOW);
-        if (!seccomp)
-                return -ENOMEM;
-
-        r = seccomp_add_secondary_archs(seccomp);
+        r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
         if (r < 0)
-                goto finish;
+                return r;
 
         r = seccomp_rule_add(
                         seccomp,
@@ -1406,10 +1394,6 @@ static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c)
         if (r < 0)
                 goto finish;
 
-        r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
-        if (r < 0)
-                goto finish;
-
         r = seccomp_load(seccomp);
 
 finish:
@@ -1424,7 +1408,7 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
                 SCHED_IDLE,
         };
 
-        scmp_filter_ctx *seccomp;
+        scmp_filter_ctx seccomp;
         unsigned i;
         int r, p, max_policy = 0;
 
@@ -1433,13 +1417,9 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
         if (skip_seccomp_unavailable(u, "RestrictRealtime="))
                 return 0;
 
-        seccomp = seccomp_init(SCMP_ACT_ALLOW);
-        if (!seccomp)
-                return -ENOMEM;
-
-        r = seccomp_add_secondary_archs(seccomp);
+        r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
         if (r < 0)
-                goto finish;
+                return r;
 
         /* Determine the highest policy constant we want to allow */
         for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
@@ -1483,10 +1463,6 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
         if (r < 0)
                 goto finish;
 
-        r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
-        if (r < 0)
-                goto finish;
-
         r = seccomp_load(seccomp);
 
 finish:
@@ -1495,7 +1471,7 @@ finish:
 }
 
 static int apply_protect_sysctl(Unit *u, const ExecContext *c) {
-        scmp_filter_ctx *seccomp;
+        scmp_filter_ctx seccomp;
         int r;
 
         assert(c);
@@ -1506,13 +1482,9 @@ static int apply_protect_sysctl(Unit *u, const ExecContext *c) {
         if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
                 return 0;
 
-        seccomp = seccomp_init(SCMP_ACT_ALLOW);
-        if (!seccomp)
-                return -ENOMEM;
-
-        r = seccomp_add_secondary_archs(seccomp);
+        r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
         if (r < 0)
-                goto finish;
+                return r;
 
         r = seccomp_rule_add(
                         seccomp,
@@ -1522,10 +1494,6 @@ static int apply_protect_sysctl(Unit *u, const ExecContext *c) {
         if (r < 0)
                 goto finish;
 
-        r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
-        if (r < 0)
-                goto finish;
-
         r = seccomp_load(seccomp);
 
 finish:
@@ -1534,9 +1502,7 @@ finish:
 }
 
 static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) {
-
-        scmp_filter_ctx *seccomp;
-        const char *sys;
+        scmp_filter_ctx seccomp;
         int r;
 
         assert(c);
@@ -1546,22 +1512,14 @@ static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) {
         if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
                 return 0;
 
-        seccomp = seccomp_init(SCMP_ACT_ALLOW);
-        if (!seccomp)
-                return -ENOMEM;
-
-        r = seccomp_add_secondary_archs(seccomp);
+        r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
         if (r < 0)
-                goto finish;
+                return r;
 
         r = seccomp_add_syscall_filter_set(seccomp, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
         if (r < 0)
                 goto finish;
 
-        r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
-        if (r < 0)
-                goto finish;
-
         r = seccomp_load(seccomp);
 
 finish:
@@ -1570,7 +1528,7 @@ finish:
 }
 
 static int apply_private_devices(Unit *u, const ExecContext *c) {
-        scmp_filter_ctx *seccomp;
+        scmp_filter_ctx seccomp;
         int r;
 
         assert(c);
@@ -1580,22 +1538,14 @@ static int apply_private_devices(Unit *u, const ExecContext *c) {
         if (skip_seccomp_unavailable(u, "PrivateDevices="))
                 return 0;
 
-        seccomp = seccomp_init(SCMP_ACT_ALLOW);
-        if (!seccomp)
-                return -ENOMEM;
-
-        r = seccomp_add_secondary_archs(seccomp);
+        r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
         if (r < 0)
-                goto finish;
+                return r;
 
         r = seccomp_add_syscall_filter_set(seccomp, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
         if (r < 0)
                 goto finish;
 
-        r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
-        if (r < 0)
-                goto finish;
-
         r = seccomp_load(seccomp);
 
 finish:
diff --git a/src/nspawn/nspawn-seccomp.c b/src/nspawn/nspawn-seccomp.c
index 44a0b397ab..03a397d30c 100644
--- a/src/nspawn/nspawn-seccomp.c
+++ b/src/nspawn/nspawn-seccomp.c
@@ -135,15 +135,9 @@ int setup_seccomp(uint64_t cap_list_retain) {
                 return 0;
         }
 
-        seccomp = seccomp_init(SCMP_ACT_ALLOW);
-        if (!seccomp)
-                return log_oom();
-
-        r = seccomp_add_secondary_archs(seccomp);
-        if (r < 0) {
-                log_error_errno(r, "Failed to add secondary archs to seccomp filter: %m");
-                goto finish;
-        }
+        r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
+        if (r < 0)
+                return log_error_errno(r, "Failed to allocate seccomp object: %m");
 
         r = seccomp_add_default_syscall_filter(seccomp, cap_list_retain);
         if (r < 0)
@@ -171,12 +165,6 @@ int setup_seccomp(uint64_t cap_list_retain) {
                 goto finish;
         }
 
-        r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
-        if (r < 0) {
-                log_error_errno(r, "Failed to unset NO_NEW_PRIVS: %m");
-                goto finish;
-        }
-
         r = seccomp_load(seccomp);
         if (r < 0) {
                 log_error_errno(r, "Failed to install seccomp audit filter: %m");
diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c
index 1d51f3fd1f..0b9fa47c44 100644
--- a/src/shared/seccomp-util.c
+++ b/src/shared/seccomp-util.c
@@ -74,7 +74,34 @@ int seccomp_arch_from_string(const char *n, uint32_t *ret) {
         return 0;
 }
 
-int seccomp_add_secondary_archs(scmp_filter_ctx *c) {
+int seccomp_init_conservative(scmp_filter_ctx *ret, uint32_t default_action) {
+        scmp_filter_ctx seccomp;
+        int r;
+
+        /* Much like seccomp_init(), but tries to be a bit more conservative in its defaults: all secondary archs are
+         * added by default, and NNP is turned off. */
+
+        seccomp = seccomp_init(default_action);
+        if (!seccomp)
+                return -ENOMEM;
+
+        r = seccomp_add_secondary_archs(seccomp);
+        if (r < 0)
+                goto finish;
+
+        r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+        if (r < 0)
+                goto finish;
+
+        *ret = seccomp;
+        return 0;
+
+finish:
+        seccomp_release(seccomp);
+        return r;
+}
+
+int seccomp_add_secondary_archs(scmp_filter_ctx c) {
 
 #if defined(__i386__) || defined(__x86_64__)
         int r;
@@ -111,7 +138,6 @@ int seccomp_add_secondary_archs(scmp_filter_ctx *c) {
 #endif
 
         return 0;
-
 }
 
 static bool is_basic_seccomp_available(void) {
diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h
index 34fd49c122..2de429a772 100644
--- a/src/shared/seccomp-util.h
+++ b/src/shared/seccomp-util.h
@@ -25,7 +25,9 @@
 const char* seccomp_arch_to_string(uint32_t c);
 int seccomp_arch_from_string(const char *n, uint32_t *ret);
 
-int seccomp_add_secondary_archs(scmp_filter_ctx *c);
+int seccomp_init_conservative(scmp_filter_ctx *ret, uint32_t default_action);
+
+int seccomp_add_secondary_archs(scmp_filter_ctx c);
 
 bool is_seccomp_available(void);
 
-- 
cgit v1.2.3-54-g00ecf


From a3be2849b2570482757f83181b999febbfc7bbef Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Fri, 21 Oct 2016 21:18:46 +0200
Subject: seccomp: add new helper call seccomp_load_filter_set()

This allows us to unify most of the code in apply_protect_kernel_modules() and
apply_private_devices().
---
 src/core/execute.c        | 34 ++--------------------------------
 src/shared/seccomp-util.c | 24 ++++++++++++++++++++++++
 src/shared/seccomp-util.h |  2 ++
 3 files changed, 28 insertions(+), 32 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index 668504c5cf..5e7d7c25d7 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1502,9 +1502,6 @@ finish:
 }
 
 static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) {
-        scmp_filter_ctx seccomp;
-        int r;
-
         assert(c);
 
         /* Turn off module syscalls on ProtectKernelModules=yes */
@@ -1512,25 +1509,10 @@ static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) {
         if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
                 return 0;
 
-        r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
-        if (r < 0)
-                return r;
-
-        r = seccomp_add_syscall_filter_set(seccomp, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
-        if (r < 0)
-                goto finish;
-
-        r = seccomp_load(seccomp);
-
-finish:
-        seccomp_release(seccomp);
-        return r;
+        return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
 }
 
 static int apply_private_devices(Unit *u, const ExecContext *c) {
-        scmp_filter_ctx seccomp;
-        int r;
-
         assert(c);
 
         /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
@@ -1538,19 +1520,7 @@ static int apply_private_devices(Unit *u, const ExecContext *c) {
         if (skip_seccomp_unavailable(u, "PrivateDevices="))
                 return 0;
 
-        r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
-        if (r < 0)
-                return r;
-
-        r = seccomp_add_syscall_filter_set(seccomp, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
-        if (r < 0)
-                goto finish;
-
-        r = seccomp_load(seccomp);
-
-finish:
-        seccomp_release(seccomp);
-        return r;
+        return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
 }
 
 #endif
diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c
index f1e9de05b2..6252cd16a6 100644
--- a/src/shared/seccomp-util.c
+++ b/src/shared/seccomp-util.c
@@ -452,3 +452,27 @@ int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterS
 
         return 0;
 }
+
+int seccomp_load_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action) {
+        scmp_filter_ctx seccomp;
+        int r;
+
+        assert(set);
+
+        /* The one-stop solution: allocate a seccomp object, add a filter to it, and apply it */
+
+        r = seccomp_init_conservative(&seccomp, default_action);
+        if (r < 0)
+                return r;
+
+        r = seccomp_add_syscall_filter_set(seccomp, set, action);
+        if (r < 0)
+                goto finish;
+
+        r = seccomp_load(seccomp);
+
+finish:
+        seccomp_release(seccomp);
+        return r;
+
+}
diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h
index 2de429a772..667687b14f 100644
--- a/src/shared/seccomp-util.h
+++ b/src/shared/seccomp-util.h
@@ -59,3 +59,5 @@ extern const SyscallFilterSet syscall_filter_sets[];
 const SyscallFilterSet *syscall_filter_set_find(const char *name);
 
 int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action);
+
+int seccomp_load_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action);
-- 
cgit v1.2.3-54-g00ecf


From 8e4e851f1ddc98daf69b68998afc5a096ea17893 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 24 Oct 2016 20:37:54 +0200
Subject: core: move initialization of -.slice and init.scope into the
 unit_load() callbacks

Previously, we'd synthesize the root slice unit and the init scope unit in the
enumerator callbacks for the unit type. This is problematic if either of them
is already referenced from a unit that is loaded as result of another unit
type's enumerator logic.

Let's clean this up and simply create the two objects from the enumerator
callbacks, if they are not around yet. Do the actual filling in of the settings
from the unit_load() callbacks, to match how other units are loaded.

Fixes: #4322
---
 src/core/scope.c | 44 ++++++++++++++++++++++++++++++++------------
 src/core/slice.c | 38 ++++++++++++++++++++++++++++----------
 2 files changed, 60 insertions(+), 22 deletions(-)

(limited to 'src/core')

diff --git a/src/core/scope.c b/src/core/scope.c
index e7583f6d89..af0c43c7da 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -147,6 +147,34 @@ static int scope_verify(Scope *s) {
         return 0;
 }
 
+static int scope_load_init_scope(Unit *u) {
+        assert(u);
+
+        if (!unit_has_name(u, SPECIAL_INIT_SCOPE))
+                return 0;
+
+        u->transient = true;
+        u->no_gc = true;
+
+        /* init.scope is a bit special, as it has to stick around forever. Because of its special semantics we
+         * synthesize it here, instead of relying on the unit file on disk. */
+
+        u->default_dependencies = false;
+        u->ignore_on_isolate = true;
+        u->refuse_manual_start = true;
+        u->refuse_manual_stop = true;
+
+        SCOPE(u)->kill_context.kill_signal = SIGRTMIN+14;
+
+        /* Prettify things, if we can. */
+        if (!u->description)
+                u->description = strdup("System and Service Manager");
+        if (!u->documentation)
+                (void) strv_extend(&u->documentation, "man:systemd(1)");
+
+        return 1;
+}
+
 static int scope_load(Unit *u) {
         Scope *s = SCOPE(u);
         int r;
@@ -158,6 +186,9 @@ static int scope_load(Unit *u) {
                 /* Refuse to load non-transient scope units, but allow them while reloading. */
                 return -ENOENT;
 
+        r = scope_load_init_scope(u);
+        if (r < 0)
+                return r;
         r = unit_load_fragment_and_dropin_optional(u);
         if (r < 0)
                 return r;
@@ -543,25 +574,14 @@ static void scope_enumerate(Manager *m) {
                 r = unit_add_name(u, SPECIAL_INIT_SCOPE);
                 if (r < 0)  {
                         unit_free(u);
-                        log_error_errno(r, "Failed to add init.scope name");
+                        log_error_errno(r, "Failed to add the " SPECIAL_INIT_SCOPE " name: %m");
                         return;
                 }
         }
 
         u->transient = true;
-        u->default_dependencies = false;
         u->no_gc = true;
-        u->ignore_on_isolate = true;
-        u->refuse_manual_start = true;
-        u->refuse_manual_stop = true;
         SCOPE(u)->deserialized_state = SCOPE_RUNNING;
-        SCOPE(u)->kill_context.kill_signal = SIGRTMIN+14;
-
-        /* Prettify things, if we can. */
-        if (!u->description)
-                u->description = strdup("System and Service Manager");
-        if (!u->documentation)
-                (void) strv_extend(&u->documentation, "man:systemd(1)");
 
         unit_add_to_load_queue(u);
         unit_add_to_dbus_queue(u);
diff --git a/src/core/slice.c b/src/core/slice.c
index 03fe797f27..0fef29661f 100644
--- a/src/core/slice.c
+++ b/src/core/slice.c
@@ -130,6 +130,30 @@ static int slice_verify(Slice *s) {
         return 0;
 }
 
+static int slice_load_root_slice(Unit *u) {
+        assert(u);
+
+        if (!unit_has_name(u, SPECIAL_ROOT_SLICE))
+                return 0;
+
+        u->no_gc = true;
+
+        /* The root slice is a bit special. For example it is always running and cannot be terminated. Because of its
+         * special semantics we synthesize it here, instead of relying on the unit file on disk. */
+
+        u->default_dependencies = false;
+        u->ignore_on_isolate = true;
+        u->refuse_manual_start = true;
+        u->refuse_manual_stop = true;
+
+        if (!u->description)
+                u->description = strdup("Root Slice");
+        if (!u->documentation)
+                u->documentation = strv_new("man:systemd.special(7)", NULL);
+
+        return 1;
+}
+
 static int slice_load(Unit *u) {
         Slice *s = SLICE(u);
         int r;
@@ -137,6 +161,9 @@ static int slice_load(Unit *u) {
         assert(s);
         assert(u->load_state == UNIT_STUB);
 
+        r = slice_load_root_slice(u);
+        if (r < 0)
+                return r;
         r = unit_load_fragment_and_dropin_optional(u);
         if (r < 0)
                 return r;
@@ -283,23 +310,14 @@ static void slice_enumerate(Manager *m) {
                 r = unit_add_name(u, SPECIAL_ROOT_SLICE);
                 if (r < 0) {
                         unit_free(u);
-                        log_error_errno(r, "Failed to add -.slice name");
+                        log_error_errno(r, "Failed to add the "SPECIAL_ROOT_SLICE " name: %m");
                         return;
                 }
         }
 
-        u->default_dependencies = false;
         u->no_gc = true;
-        u->ignore_on_isolate = true;
-        u->refuse_manual_start = true;
-        u->refuse_manual_stop = true;
         SLICE(u)->deserialized_state = SLICE_ACTIVE;
 
-        if (!u->description)
-                u->description = strdup("Root Slice");
-        if (!u->documentation)
-                (void) strv_extend(&u->documentation, "man:systemd.special(7)");
-
         unit_add_to_load_queue(u);
         unit_add_to_dbus_queue(u);
 }
-- 
cgit v1.2.3-54-g00ecf


From d2ffa389b8112282be1633bb4638f6f47e159299 Mon Sep 17 00:00:00 2001
From: Topi Miettinen <toiwoton@gmail.com>
Date: Wed, 26 Oct 2016 18:52:53 +0300
Subject: seccomp: also block shmat(..., SHM_EXEC) for MemoryDenyWriteExecute

shmat(..., SHM_EXEC) can be used to create writable and executable
memory, so let's block it when MemoryDenyWriteExecute is set.
---
 man/systemd.exec.xml | 11 +++++++----
 src/core/execute.c   | 11 +++++++++++
 2 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'src/core')

diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index dbe4594730..f9a15d8db0 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -1516,12 +1516,15 @@
         <term><varname>MemoryDenyWriteExecute=</varname></term>
 
         <listitem><para>Takes a boolean argument. If set, attempts to create memory mappings that are writable and
-        executable at the same time, or to change existing memory mappings to become executable are prohibited.
+        executable at the same time, or to change existing memory mappings to become executable, or mapping shared memory
+        segments as executable are prohibited.
         Specifically, a system call filter is added that rejects
         <citerefentry><refentrytitle>mmap</refentrytitle><manvolnum>2</manvolnum></citerefentry>
-        system calls with both <constant>PROT_EXEC</constant> and <constant>PROT_WRITE</constant> set
-        and <citerefentry><refentrytitle>mprotect</refentrytitle><manvolnum>2</manvolnum></citerefentry>
-        system calls with <constant>PROT_EXEC</constant> set. Note that this option is incompatible with programs
+        system calls with both <constant>PROT_EXEC</constant> and <constant>PROT_WRITE</constant> set,
+        <citerefentry><refentrytitle>mprotect</refentrytitle><manvolnum>2</manvolnum></citerefentry>
+        system calls with <constant>PROT_EXEC</constant> set and
+        <citerefentry><refentrytitle>shmat</refentrytitle><manvolnum>2</manvolnum></citerefentry>
+        system calls with <constant>SHM_EXEC</constant> set. Note that this option is incompatible with programs
         that generate program code dynamically at runtime, such as JIT execution engines, or programs compiled making
         use of the code "trampoline" feature of various C compilers. This option improves service security, as it makes
         harder for software exploits to change running code dynamically.
diff --git a/src/core/execute.c b/src/core/execute.c
index 5e7d7c25d7..7b42ac7bdc 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -29,8 +29,10 @@
 #include <sys/mman.h>
 #include <sys/personality.h>
 #include <sys/prctl.h>
+#include <sys/shm.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
+#include <sys/types.h>
 #include <sys/un.h>
 #include <unistd.h>
 #include <utmpx.h>
@@ -1394,6 +1396,15 @@ static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c)
         if (r < 0)
                 goto finish;
 
+        r = seccomp_rule_add(
+                        seccomp,
+                        SCMP_ACT_ERRNO(EPERM),
+                        SCMP_SYS(shmat),
+                        1,
+                        SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
+        if (r < 0)
+                goto finish;
+
         r = seccomp_load(seccomp);
 
 finish:
-- 
cgit v1.2.3-54-g00ecf


From 93c6bb51b6a3c80577fd823d0108c1dfcf01a0ce Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Thu, 27 Oct 2016 09:20:18 +0200
Subject: core: move the code that setups namespaces on its own function

---
 src/core/execute.c | 101 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 54 insertions(+), 47 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index 5e7d7c25d7..2908ba8fa3 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -2003,6 +2003,59 @@ static int compile_read_write_paths(
         return 0;
 }
 
+static int apply_mount_namespace(Unit *u, const ExecContext *context,
+                                 const ExecParameters *params,
+                                 ExecRuntime *runtime) {
+        int r;
+        _cleanup_free_ char **rw = NULL;
+        char *tmp = NULL, *var = NULL;
+        const char *root_dir = NULL;
+        NameSpaceInfo ns_info = {
+                .private_dev = context->private_devices,
+                .protect_control_groups = context->protect_control_groups,
+                .protect_kernel_tunables = context->protect_kernel_tunables,
+                .protect_kernel_modules = context->protect_kernel_modules,
+        };
+
+        /* The runtime struct only contains the parent of the private /tmp,
+         * which is non-accessible to world users. Inside of it there's a /tmp
+         * that is sticky, and that's the one we want to use here. */
+
+        if (context->private_tmp && runtime) {
+                if (runtime->tmp_dir)
+                        tmp = strjoina(runtime->tmp_dir, "/tmp");
+                if (runtime->var_tmp_dir)
+                        var = strjoina(runtime->var_tmp_dir, "/tmp");
+        }
+
+        r = compile_read_write_paths(context, params, &rw);
+        if (r < 0)
+                return r;
+
+        if (params->flags & EXEC_APPLY_CHROOT)
+                root_dir = context->root_directory;
+
+        r = setup_namespace(root_dir, &ns_info, rw,
+                            context->read_only_paths,
+                            context->inaccessible_paths,
+                            tmp,
+                            var,
+                            context->protect_home,
+                            context->protect_system,
+                            context->mount_flags);
+
+        /* If we couldn't set up the namespace this is probably due to a
+         * missing capability. In this case, silently proceeed. */
+        if (IN_SET(r, -EPERM, -EACCES)) {
+                log_open();
+                log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
+                log_close();
+                r = 0;
+        }
+
+        return r;
+}
+
 static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
         assert(array);
         assert(n);
@@ -2466,57 +2519,11 @@ static int exec_child(
 
         needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
         if (needs_mount_namespace) {
-                _cleanup_free_ char **rw = NULL;
-                char *tmp = NULL, *var = NULL;
-                NameSpaceInfo ns_info = {
-                        .private_dev = context->private_devices,
-                        .protect_control_groups = context->protect_control_groups,
-                        .protect_kernel_tunables = context->protect_kernel_tunables,
-                        .protect_kernel_modules = context->protect_kernel_modules,
-                };
-
-                /* The runtime struct only contains the parent
-                 * of the private /tmp, which is
-                 * non-accessible to world users. Inside of it
-                 * there's a /tmp that is sticky, and that's
-                 * the one we want to use here. */
-
-                if (context->private_tmp && runtime) {
-                        if (runtime->tmp_dir)
-                                tmp = strjoina(runtime->tmp_dir, "/tmp");
-                        if (runtime->var_tmp_dir)
-                                var = strjoina(runtime->var_tmp_dir, "/tmp");
-                }
-
-                r = compile_read_write_paths(context, params, &rw);
+                r = apply_mount_namespace(unit, context, params, runtime);
                 if (r < 0) {
                         *exit_status = EXIT_NAMESPACE;
                         return r;
                 }
-
-                r = setup_namespace(
-                                (params->flags & EXEC_APPLY_CHROOT) ? context->root_directory : NULL,
-                                &ns_info,
-                                rw,
-                                context->read_only_paths,
-                                context->inaccessible_paths,
-                                tmp,
-                                var,
-                                context->protect_home,
-                                context->protect_system,
-                                context->mount_flags);
-
-                /* If we couldn't set up the namespace this is
-                 * probably due to a missing capability. In this case,
-                 * silently proceeed. */
-                if (r == -EPERM || r == -EACCES) {
-                        log_open();
-                        log_unit_debug_errno(unit, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
-                        log_close();
-                } else if (r < 0) {
-                        *exit_status = EXIT_NAMESPACE;
-                        return r;
-                }
         }
 
         if (context->working_directory_home)
-- 
cgit v1.2.3-54-g00ecf


From e7f1e7c6e2f91f3cad5eadfcc6ab9673caedb838 Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Thu, 27 Oct 2016 09:21:44 +0200
Subject: core: move apply working directory code into its own
 apply_working_directory()

---
 src/core/execute.c | 52 +++++++++++++++++++++++++++++++---------------------
 1 file changed, 31 insertions(+), 21 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index 2908ba8fa3..642add0360 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -2056,6 +2056,33 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context,
         return r;
 }
 
+static int apply_working_directory(const ExecContext *context,
+                                   const ExecParameters *params,
+                                   const char *working_directory,
+                                   const bool needs_mount_ns) {
+
+        if (params->flags & EXEC_APPLY_CHROOT) {
+                if (!needs_mount_ns && context->root_directory)
+                        if (chroot(context->root_directory) < 0)
+                                return -errno;
+
+                if (chdir(working_directory) < 0 &&
+                    !context->working_directory_missing_ok)
+                        return -errno;
+
+        } else {
+                const char *d;
+
+                d = strjoina(strempty(context->root_directory), "/",
+                             strempty(working_directory));
+                if (chdir(d) < 0 &&
+                    !context->working_directory_missing_ok)
+                        return -errno;
+        }
+
+        return 0;
+}
+
 static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
         assert(array);
         assert(n);
@@ -2542,27 +2569,10 @@ static int exec_child(
                 }
         }
 
-        if (params->flags & EXEC_APPLY_CHROOT) {
-                if (!needs_mount_namespace && context->root_directory)
-                        if (chroot(context->root_directory) < 0) {
-                                *exit_status = EXIT_CHROOT;
-                                return -errno;
-                        }
-
-                if (chdir(wd) < 0 &&
-                    !context->working_directory_missing_ok) {
-                        *exit_status = EXIT_CHDIR;
-                        return -errno;
-                }
-        } else {
-                const char *d;
-
-                d = strjoina(strempty(context->root_directory), "/", strempty(wd));
-                if (chdir(d) < 0 &&
-                    !context->working_directory_missing_ok) {
-                        *exit_status = EXIT_CHDIR;
-                        return -errno;
-                }
+        r = apply_working_directory(context, params, wd, needs_mount_namespace);
+        if (r < 0) {
+                *exit_status = EXIT_CHROOT;
+                return r;
         }
 
 #ifdef HAVE_SELINUX
-- 
cgit v1.2.3-54-g00ecf


From 2b3c1b9e9d7a09b1f974f8d702da8ebaeff036f6 Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Thu, 27 Oct 2016 09:28:54 +0200
Subject: core: get the working directory value inside
 apply_working_directory()

Improve apply_working_directory() and lets get the current working directory
inside of it.
---
 src/core/execute.c | 43 +++++++++++++++++++++----------------------
 1 file changed, 21 insertions(+), 22 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index 642add0360..0b6fcc9ac7 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -2017,6 +2017,8 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context,
                 .protect_kernel_modules = context->protect_kernel_modules,
         };
 
+        assert(context);
+
         /* The runtime struct only contains the parent of the private /tmp,
          * which is non-accessible to world users. Inside of it there's a /tmp
          * that is sticky, and that's the one we want to use here. */
@@ -2058,27 +2060,31 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context,
 
 static int apply_working_directory(const ExecContext *context,
                                    const ExecParameters *params,
-                                   const char *working_directory,
+                                   const char *home,
                                    const bool needs_mount_ns) {
+        const char *d;
+        const char *wd;
+
+        assert(context);
+
+        if (context->working_directory_home)
+                wd = home;
+        else if (context->working_directory)
+                wd = context->working_directory;
+        else
+                wd = "/";
 
         if (params->flags & EXEC_APPLY_CHROOT) {
                 if (!needs_mount_ns && context->root_directory)
                         if (chroot(context->root_directory) < 0)
                                 return -errno;
 
-                if (chdir(working_directory) < 0 &&
-                    !context->working_directory_missing_ok)
-                        return -errno;
-
-        } else {
-                const char *d;
+                d = wd;
+        } else
+                d = strjoina(strempty(context->root_directory), "/", strempty(wd));
 
-                d = strjoina(strempty(context->root_directory), "/",
-                             strempty(working_directory));
-                if (chdir(d) < 0 &&
-                    !context->working_directory_missing_ok)
-                        return -errno;
-        }
+        if (chdir(d) < 0 && !context->working_directory_missing_ok)
+                return -errno;
 
         return 0;
 }
@@ -2219,7 +2225,7 @@ static int exec_child(
         _cleanup_free_ char *mac_selinux_context_net = NULL;
         _cleanup_free_ gid_t *supplementary_gids = NULL;
         const char *username = NULL, *groupname = NULL;
-        const char *home = NULL, *shell = NULL, *wd;
+        const char *home = NULL, *shell = NULL;
         dev_t journal_stream_dev = 0;
         ino_t journal_stream_ino = 0;
         bool needs_mount_namespace;
@@ -2553,13 +2559,6 @@ static int exec_child(
                 }
         }
 
-        if (context->working_directory_home)
-                wd = home;
-        else if (context->working_directory)
-                wd = context->working_directory;
-        else
-                wd = "/";
-
         /* Drop group as early as possbile */
         if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
                 r = enforce_groups(context, gid, supplementary_gids, ngids);
@@ -2569,7 +2568,7 @@ static int exec_child(
                 }
         }
 
-        r = apply_working_directory(context, params, wd, needs_mount_namespace);
+        r = apply_working_directory(context, params, home, needs_mount_namespace);
         if (r < 0) {
                 *exit_status = EXIT_CHROOT;
                 return r;
-- 
cgit v1.2.3-54-g00ecf


From 50b3dfb9d64872025450aa63765206720be471d6 Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Tue, 25 Oct 2016 16:24:35 +0200
Subject: core: lets apply working directory just after mount namespaces

This makes applying groups after applying the working directory, this
may allow some flexibility but at same it is not a big deal since we
don't execute or do anything between applying working directory and
droping groups.
---
 src/core/execute.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index 0b6fcc9ac7..a9e39f6fd7 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -2559,6 +2559,13 @@ static int exec_child(
                 }
         }
 
+        /* Apply just after mount namespace setup */
+        r = apply_working_directory(context, params, home, needs_mount_namespace);
+        if (r < 0) {
+                *exit_status = EXIT_CHROOT;
+                return r;
+        }
+
         /* Drop group as early as possbile */
         if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
                 r = enforce_groups(context, gid, supplementary_gids, ngids);
@@ -2568,12 +2575,6 @@ static int exec_child(
                 }
         }
 
-        r = apply_working_directory(context, params, home, needs_mount_namespace);
-        if (r < 0) {
-                *exit_status = EXIT_CHROOT;
-                return r;
-        }
-
 #ifdef HAVE_SELINUX
         if ((params->flags & EXEC_APPLY_PERMISSIONS) &&
             mac_selinux_use() &&
-- 
cgit v1.2.3-54-g00ecf


From 59e856c7d33e8260d0ac3a8e21318d6f0768d75b Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Thu, 27 Oct 2016 09:39:20 +0200
Subject: core: make unit argument const for apply seccomp functions

---
 src/core/execute.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index a9e39f6fd7..7f343c4902 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1470,7 +1470,7 @@ finish:
         return r;
 }
 
-static int apply_protect_sysctl(Unit *u, const ExecContext *c) {
+static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
         scmp_filter_ctx seccomp;
         int r;
 
@@ -1501,7 +1501,7 @@ finish:
         return r;
 }
 
-static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) {
+static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
         assert(c);
 
         /* Turn off module syscalls on ProtectKernelModules=yes */
@@ -1512,7 +1512,7 @@ static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) {
         return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
 }
 
-static int apply_private_devices(Unit *u, const ExecContext *c) {
+static int apply_private_devices(const Unit *u, const ExecContext *c) {
         assert(c);
 
         /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
-- 
cgit v1.2.3-54-g00ecf


From 9021ff17e2a6e08e0675bc69c8b18e2ddb63de4a Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Sat, 22 Oct 2016 18:53:24 -0400
Subject: pid1: only log about added fd if it was really added

If it was a duplicate, log nothing.
---
 src/core/service.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'src/core')

diff --git a/src/core/service.c b/src/core/service.c
index ee4f4983fc..c699a31941 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -368,6 +368,8 @@ static int service_add_fd_store(Service *s, int fd, const char *name) {
         ServiceFDStore *fs;
         int r;
 
+        /* fd is always consumed if we return >= 0 */
+
         assert(s);
         assert(fd >= 0);
 
@@ -379,9 +381,8 @@ static int service_add_fd_store(Service *s, int fd, const char *name) {
                 if (r < 0)
                         return r;
                 if (r > 0) {
-                        /* Already included */
                         safe_close(fd);
-                        return 1;
+                        return 0; /* fd already included */
                 }
         }
 
@@ -409,7 +410,7 @@ static int service_add_fd_store(Service *s, int fd, const char *name) {
         LIST_PREPEND(fd_store, s->fd_store, fs);
         s->n_fd_store++;
 
-        return 1;
+        return 1; /* fd newly stored */
 }
 
 static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
@@ -430,10 +431,9 @@ static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
                 r = service_add_fd_store(s, fd, name);
                 if (r < 0)
                         return log_unit_error_errno(UNIT(s), r, "Couldn't add fd to fd store: %m");
-                if (r > 0) {
+                if (r > 0)
                         log_unit_debug(UNIT(s), "Added fd to fd store.");
-                        fd = -1;
-                }
+                fd = -1;
         }
 
         if (fdset_size(fds) > 0)
@@ -2336,7 +2336,7 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
                         r = service_add_fd_store(s, fd, t);
                         if (r < 0)
                                 log_unit_error_errno(u, r, "Failed to add fd to store: %m");
-                        else if (r > 0)
+                        else
                                 fdset_remove(fds, fd);
                 }
 
-- 
cgit v1.2.3-54-g00ecf


From 16f70d6362536c7432f0fd0af0155a5cd95c4a76 Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Sat, 22 Oct 2016 20:01:37 -0400
Subject: pid1: nicely log when doing operation on stored fds

Should help with debugging #4408.
---
 src/core/service.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/core')

diff --git a/src/core/service.c b/src/core/service.c
index c699a31941..0775bd6a9f 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -360,6 +360,10 @@ static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *us
         assert(fs);
 
         /* If we get either EPOLLHUP or EPOLLERR, it's time to remove this entry from the fd store */
+        log_unit_debug(UNIT(fs->service),
+                       "Received %s on stored fd %d (%s), closing.",
+                       revents & EPOLLERR ? "EPOLLERR" : "EPOLLHUP",
+                       fs->fd, strna(fs->fdname));
         service_fd_store_unlink(fs);
         return 0;
 }
@@ -432,7 +436,7 @@ static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
                 if (r < 0)
                         return log_unit_error_errno(UNIT(s), r, "Couldn't add fd to fd store: %m");
                 if (r > 0)
-                        log_unit_debug(UNIT(s), "Added fd to fd store.");
+                        log_unit_debug(UNIT(s), "Added fd %u (%s) to fd store.", fd, strna(name));
                 fd = -1;
         }
 
@@ -1225,6 +1229,7 @@ static int service_spawn(
                         return r;
 
                 n_fds = r;
+                log_unit_debug(UNIT(s), "Passing %i fds to service", n_fds);
         }
 
         r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), timeout));
-- 
cgit v1.2.3-54-g00ecf


From f0bfbfac43b7faa68ef1bb2ad659c191b9ec85d2 Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Sat, 22 Oct 2016 22:16:02 -0400
Subject: core: when restarting services, don't close fds

We would close all the stored fds in service_release_resources(), which of
course broke the whole concept of storing fds over service restart.

Fixes #4408.
---
 src/core/service.c | 22 +++++++++++++++-------
 src/core/unit.c    |  6 ++++--
 src/core/unit.h    |  2 +-
 3 files changed, 20 insertions(+), 10 deletions(-)

(limited to 'src/core')

diff --git a/src/core/service.c b/src/core/service.c
index 0775bd6a9f..00ce500fd3 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -289,7 +289,17 @@ static void service_fd_store_unlink(ServiceFDStore *fs) {
         free(fs);
 }
 
-static void service_release_resources(Unit *u) {
+static void service_release_fd_store(Service *s) {
+        assert(s);
+
+        log_unit_debug(UNIT(s), "Releasing all stored fds");
+        while (s->fd_store)
+                service_fd_store_unlink(s->fd_store);
+
+        assert(s->n_fd_store == 0);
+}
+
+static void service_release_resources(Unit *u, bool inactive) {
         Service *s = SERVICE(u);
 
         assert(s);
@@ -297,16 +307,14 @@ static void service_release_resources(Unit *u) {
         if (!s->fd_store && s->stdin_fd < 0 && s->stdout_fd < 0 && s->stderr_fd < 0)
                 return;
 
-        log_unit_debug(u, "Releasing all resources.");
+        log_unit_debug(u, "Releasing resources.");
 
         s->stdin_fd = safe_close(s->stdin_fd);
         s->stdout_fd = safe_close(s->stdout_fd);
         s->stderr_fd = safe_close(s->stderr_fd);
 
-        while (s->fd_store)
-                service_fd_store_unlink(s->fd_store);
-
-        assert(s->n_fd_store == 0);
+        if (inactive)
+                service_release_fd_store(s);
 }
 
 static void service_done(Unit *u) {
@@ -350,7 +358,7 @@ static void service_done(Unit *u) {
 
         s->timer_event_source = sd_event_source_unref(s->timer_event_source);
 
-        service_release_resources(u);
+        service_release_resources(u, true);
 }
 
 static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
diff --git a/src/core/unit.c b/src/core/unit.c
index cabb1050a8..8e6cef103b 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -302,6 +302,7 @@ int unit_set_description(Unit *u, const char *description) {
 
 bool unit_check_gc(Unit *u) {
         UnitActiveState state;
+        bool inactive;
         assert(u);
 
         if (u->job)
@@ -311,16 +312,17 @@ bool unit_check_gc(Unit *u) {
                 return true;
 
         state = unit_active_state(u);
+        inactive = state == UNIT_INACTIVE;
 
         /* If the unit is inactive and failed and no job is queued for
          * it, then release its runtime resources */
         if (UNIT_IS_INACTIVE_OR_FAILED(state) &&
             UNIT_VTABLE(u)->release_resources)
-                UNIT_VTABLE(u)->release_resources(u);
+                UNIT_VTABLE(u)->release_resources(u, inactive);
 
         /* But we keep the unit object around for longer when it is
          * referenced or configured to not be gc'ed */
-        if (state != UNIT_INACTIVE)
+        if (!inactive)
                 return true;
 
         if (u->no_gc)
diff --git a/src/core/unit.h b/src/core/unit.h
index adcdee6db6..e01ec7a775 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -372,7 +372,7 @@ struct UnitVTable {
 
         /* When the unit is not running and no job for it queued we
          * shall release its runtime resources */
-        void (*release_resources)(Unit *u);
+        void (*release_resources)(Unit *u, bool inactive);
 
         /* Invoked on every child that died */
         void (*sigchld_event)(Unit *u, pid_t pid, int code, int status);
-- 
cgit v1.2.3-54-g00ecf


From 5cd9cd3537d1afca85877103615e61e6c03e7079 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Tue, 25 Oct 2016 15:52:54 +0200
Subject: execute: apply seccomp filters after changing selinux/aa/smack
 contexts

Seccomp is generally an unprivileged operation, changing security contexts is
most likely associated with some form of policy. Moreover, while seccomp may
influence our own flow of code quite a bit (much more than the security context
change) make sure to apply the seccomp filters immediately before executing the
binary to invoke.

This also moves enforcement of NNP after the security context change, so that
NNP cannot affect it anymore. (However, the security policy now has to permit
the NNP change).

This change has a good chance of breaking current SELinux/AA/SMACK setups, because
the policy might not expect this change of behaviour. However, it's technically
the better choice I think and should hence be applied.

Fixes: #3993
---
 src/core/execute.c | 70 ++++++++++++++++++++++++++++++------------------------
 1 file changed, 39 insertions(+), 31 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index ae9df41b99..c73e2e7220 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -2538,12 +2538,6 @@ static int exec_child(
         (void) umask(context->umask);
 
         if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
-                r = setup_smack(context, command);
-                if (r < 0) {
-                        *exit_status = EXIT_SMACK_PROCESS_LABEL;
-                        return r;
-                }
-
                 if (context->pam_name && username) {
                         r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
                         if (r < 0) {
@@ -2693,6 +2687,41 @@ static int exec_child(
                         }
                 }
 
+                /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
+                 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
+                 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
+                 * are restricted. */
+
+#ifdef HAVE_SELINUX
+                if (mac_selinux_use()) {
+                        char *exec_context = mac_selinux_context_net ?: context->selinux_context;
+
+                        if (exec_context) {
+                                r = setexeccon(exec_context);
+                                if (r < 0) {
+                                        *exit_status = EXIT_SELINUX_CONTEXT;
+                                        return r;
+                                }
+                        }
+                }
+#endif
+
+                r = setup_smack(context, command);
+                if (r < 0) {
+                        *exit_status = EXIT_SMACK_PROCESS_LABEL;
+                        return r;
+                }
+
+#ifdef HAVE_APPARMOR
+                if (context->apparmor_profile && mac_apparmor_use()) {
+                        r = aa_change_onexec(context->apparmor_profile);
+                        if (r < 0 && !context->apparmor_profile_ignore) {
+                                *exit_status = EXIT_APPARMOR_PROFILE;
+                                return -errno;
+                        }
+                }
+#endif
+
                 /* PR_GET_SECUREBITS is not privileged, while
                  * PR_SET_SECUREBITS is. So to suppress
                  * potential EPERMs we'll try not to call
@@ -2758,6 +2787,8 @@ static int exec_child(
                         }
                 }
 
+                /* This really should remain the last step before the execve(), to make sure our own code is unaffected
+                 * by the filter as little as possible. */
                 if (context_has_syscall_filters(context)) {
                         r = apply_seccomp(unit, context);
                         if (r < 0) {
@@ -2766,30 +2797,6 @@ static int exec_child(
                         }
                 }
 #endif
-
-#ifdef HAVE_SELINUX
-                if (mac_selinux_use()) {
-                        char *exec_context = mac_selinux_context_net ?: context->selinux_context;
-
-                        if (exec_context) {
-                                r = setexeccon(exec_context);
-                                if (r < 0) {
-                                        *exit_status = EXIT_SELINUX_CONTEXT;
-                                        return r;
-                                }
-                        }
-                }
-#endif
-
-#ifdef HAVE_APPARMOR
-                if (context->apparmor_profile && mac_apparmor_use()) {
-                        r = aa_change_onexec(context->apparmor_profile);
-                        if (r < 0 && !context->apparmor_profile_ignore) {
-                                *exit_status = EXIT_APPARMOR_PROFILE;
-                                return -errno;
-                        }
-                }
-#endif
         }
 
         final_argv = replace_env_argv(argv, accum_env);
@@ -3611,7 +3618,8 @@ char *exec_command_line(char **argv) {
         STRV_FOREACH(a, argv)
                 k += strlen(*a)+3;
 
-        if (!(n = new(char, k)))
+        n = new(char, k);
+        if (!n)
                 return NULL;
 
         p = n;
-- 
cgit v1.2.3-54-g00ecf


From bbeea271172a4664ce9a4a41a7fa3b1ca18dbedd Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Wed, 2 Nov 2016 17:51:35 +0100
Subject: core: initialize groups list before checking SupplementaryGroups= of
 a unit (#4533)

Always initialize the supplementary groups of caller before checking the
unit SupplementaryGroups= option.

Fixes https://github.com/systemd/systemd/issues/4531
---
 src/core/execute.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index ae9df41b99..a1bd0c1238 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -787,6 +787,20 @@ static int get_fixed_supplementary_groups(const ExecContext *c,
 
         assert(c);
 
+        /*
+         * If user is given, then lookup GID and supplementary groups list.
+         * We avoid NSS lookups for gid=0. Also we have to initialize groups
+         * as early as possible so we keep the list of supplementary groups
+         * of the caller.
+         */
+        if (user && gid_is_valid(gid) && gid != 0) {
+                /* First step, initialize groups from /etc/groups */
+                if (initgroups(user, gid) < 0)
+                        return -errno;
+
+                keep_groups = true;
+        }
+
         if (!c->supplementary_groups)
                 return 0;
 
@@ -803,18 +817,6 @@ static int get_fixed_supplementary_groups(const ExecContext *c,
                         return -EOPNOTSUPP; /* For all other values */
         }
 
-        /*
-         * If user is given, then lookup GID and supplementary group list.
-         * We avoid NSS lookups for gid=0.
-         */
-        if (user && gid_is_valid(gid) && gid != 0) {
-                /* First step, initialize groups from /etc/groups */
-                if (initgroups(user, gid) < 0)
-                        return -errno;
-
-                keep_groups = true;
-        }
-
         l_gids = new(gid_t, ngroups_max);
         if (!l_gids)
                 return -ENOMEM;
@@ -2577,7 +2579,7 @@ static int exec_child(
                 return r;
         }
 
-        /* Drop group as early as possbile */
+        /* Drop groups as early as possbile */
         if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
                 r = enforce_groups(context, gid, supplementary_gids, ngids);
                 if (r < 0) {
-- 
cgit v1.2.3-54-g00ecf


From f5869324e303a136d7ca802769e8966e8eb26d56 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 24 Oct 2016 21:41:54 +0200
Subject: core: rework the "no_gc" unit flag to become a more generic
 "perpetual" flag

So far "no_gc" was set on -.slice and init.scope, to units that are always
running, cannot be stopped and never exist in an "inactive" state. Since these
units are the only users of this flag, let's remodel it and rename it
"perpetual" and let's derive more funcitonality off it. Specifically, refuse
enqueing stop jobs for these units, and report that they are "unstoppable" in
the CanStop bus property.
---
 src/core/dbus-unit.c |  6 ++----
 src/core/scope.c     |  6 ++----
 src/core/slice.c     | 10 ++++------
 src/core/unit.c      | 27 ++++++++++++++++++++++++---
 src/core/unit.h      |  6 ++++--
 5 files changed, 36 insertions(+), 19 deletions(-)

(limited to 'src/core')

diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c
index 8f34fa1a52..69e249c844 100644
--- a/src/core/dbus-unit.c
+++ b/src/core/dbus-unit.c
@@ -263,10 +263,7 @@ static int property_get_can_stop(
         assert(reply);
         assert(u);
 
-        /* On the lower levels we assume that every unit we can start
-         * we can also stop */
-
-        return sd_bus_message_append(reply, "b", unit_can_start(u) && !u->refuse_manual_stop);
+        return sd_bus_message_append(reply, "b", unit_can_stop(u) && !u->refuse_manual_stop);
 }
 
 static int property_get_can_reload(
@@ -760,6 +757,7 @@ const sd_bus_vtable bus_unit_vtable[] = {
         SD_BUS_PROPERTY("Asserts", "a(sbbsi)", property_get_conditions, offsetof(Unit, asserts), 0),
         SD_BUS_PROPERTY("LoadError", "(ss)", property_get_load_error, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Transient", "b", bus_property_get_bool, offsetof(Unit, transient), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("Perpetual", "b", bus_property_get_bool, offsetof(Unit, perpetual), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("StartLimitIntervalSec", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("StartLimitAction", "s", property_get_emergency_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST),
diff --git a/src/core/scope.c b/src/core/scope.c
index af0c43c7da..77f3fdc2aa 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -154,15 +154,13 @@ static int scope_load_init_scope(Unit *u) {
                 return 0;
 
         u->transient = true;
-        u->no_gc = true;
+        u->perpetual = true;
 
         /* init.scope is a bit special, as it has to stick around forever. Because of its special semantics we
          * synthesize it here, instead of relying on the unit file on disk. */
 
         u->default_dependencies = false;
         u->ignore_on_isolate = true;
-        u->refuse_manual_start = true;
-        u->refuse_manual_stop = true;
 
         SCOPE(u)->kill_context.kill_signal = SIGRTMIN+14;
 
@@ -580,7 +578,7 @@ static void scope_enumerate(Manager *m) {
         }
 
         u->transient = true;
-        u->no_gc = true;
+        u->perpetual = true;
         SCOPE(u)->deserialized_state = SCOPE_RUNNING;
 
         unit_add_to_load_queue(u);
diff --git a/src/core/slice.c b/src/core/slice.c
index 0fef29661f..c505fa1916 100644
--- a/src/core/slice.c
+++ b/src/core/slice.c
@@ -136,15 +136,13 @@ static int slice_load_root_slice(Unit *u) {
         if (!unit_has_name(u, SPECIAL_ROOT_SLICE))
                 return 0;
 
-        u->no_gc = true;
+        u->perpetual = true;
 
         /* The root slice is a bit special. For example it is always running and cannot be terminated. Because of its
          * special semantics we synthesize it here, instead of relying on the unit file on disk. */
 
         u->default_dependencies = false;
         u->ignore_on_isolate = true;
-        u->refuse_manual_start = true;
-        u->refuse_manual_stop = true;
 
         if (!u->description)
                 u->description = strdup("Root Slice");
@@ -302,7 +300,7 @@ static void slice_enumerate(Manager *m) {
         u = manager_get_unit(m, SPECIAL_ROOT_SLICE);
         if (!u) {
                 u = unit_new(m, sizeof(Slice));
-                if (!u)  {
+                if (!u) {
                         log_oom();
                         return;
                 }
@@ -310,12 +308,12 @@ static void slice_enumerate(Manager *m) {
                 r = unit_add_name(u, SPECIAL_ROOT_SLICE);
                 if (r < 0) {
                         unit_free(u);
-                        log_error_errno(r, "Failed to add the "SPECIAL_ROOT_SLICE " name: %m");
+                        log_error_errno(r, "Failed to add the " SPECIAL_ROOT_SLICE " name: %m");
                         return;
                 }
         }
 
-        u->no_gc = true;
+        u->perpetual = true;
         SLICE(u)->deserialized_state = SLICE_ACTIVE;
 
         unit_add_to_load_queue(u);
diff --git a/src/core/unit.c b/src/core/unit.c
index cabb1050a8..8d74b44f73 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -323,7 +323,7 @@ bool unit_check_gc(Unit *u) {
         if (state != UNIT_INACTIVE)
                 return true;
 
-        if (u->no_gc)
+        if (u->perpetual)
                 return true;
 
         if (u->refs)
@@ -924,6 +924,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
                 "%s\tGC Check Good: %s\n"
                 "%s\tNeed Daemon Reload: %s\n"
                 "%s\tTransient: %s\n"
+                "%s\tPerpetual: %s\n"
                 "%s\tSlice: %s\n"
                 "%s\tCGroup: %s\n"
                 "%s\tCGroup realized: %s\n"
@@ -942,6 +943,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
                 prefix, yes_no(unit_check_gc(u)),
                 prefix, yes_no(unit_need_daemon_reload(u)),
                 prefix, yes_no(u->transient),
+                prefix, yes_no(u->perpetual),
                 prefix, strna(unit_slice_name(u)),
                 prefix, strna(u->cgroup_path),
                 prefix, yes_no(u->cgroup_realized),
@@ -1616,6 +1618,18 @@ int unit_stop(Unit *u) {
         return UNIT_VTABLE(u)->stop(u);
 }
 
+bool unit_can_stop(Unit *u) {
+        assert(u);
+
+        if (!unit_supported(u))
+                return false;
+
+        if (u->perpetual)
+                return false;
+
+        return !!UNIT_VTABLE(u)->stop;
+}
+
 /* Errors:
  *         -EBADR:    This unit type does not support reloading.
  *         -ENOEXEC:  Unit is not started.
@@ -2150,13 +2164,20 @@ bool unit_job_is_applicable(Unit *u, JobType j) {
 
         case JOB_VERIFY_ACTIVE:
         case JOB_START:
-        case JOB_STOP:
         case JOB_NOP:
+                /* Note that we don't check unit_can_start() here. That's because .device units and suchlike are not
+                 * startable by us but may appear due to external events, and it thus makes sense to permit enqueing
+                 * jobs for it. */
                 return true;
 
+        case JOB_STOP:
+                /* Similar as above. However, perpetual units can never be stopped (neither explicitly nor due to
+                 * external events), hence it makes no sense to permit enqueing such a request either. */
+                return !u->perpetual;
+
         case JOB_RESTART:
         case JOB_TRY_RESTART:
-                return unit_can_start(u);
+                return unit_can_stop(u) && unit_can_start(u);
 
         case JOB_RELOAD:
         case JOB_TRY_RELOAD:
diff --git a/src/core/unit.h b/src/core/unit.h
index adcdee6db6..e524553ed3 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -236,6 +236,9 @@ struct Unit {
         /* Is this a transient unit? */
         bool transient;
 
+        /* Is this a unit that is always running and cannot be stopped? */
+        bool perpetual;
+
         bool in_load_queue:1;
         bool in_dbus_queue:1;
         bool in_cleanup_queue:1;
@@ -244,8 +247,6 @@ struct Unit {
 
         bool sent_dbus_new_signal:1;
 
-        bool no_gc:1;
-
         bool in_audit:1;
 
         bool cgroup_realized:1;
@@ -524,6 +525,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix);
 
 bool unit_can_reload(Unit *u) _pure_;
 bool unit_can_start(Unit *u) _pure_;
+bool unit_can_stop(Unit *u) _pure_;
 bool unit_can_isolate(Unit *u) _pure_;
 
 int unit_start(Unit *u);
-- 
cgit v1.2.3-54-g00ecf


From 11222d0fe0b5abb0cef65359b979e0c7f50129f3 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Tue, 25 Oct 2016 00:04:55 +0200
Subject: core: make the root mount perpetual too

Now that have a proper concept of "perpetual" units, let's make the root mount
one too, since it also cannot go away.
---
 src/basic/special.h |  3 ++
 src/core/mount.c    | 81 +++++++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 69 insertions(+), 15 deletions(-)

(limited to 'src/core')

diff --git a/src/basic/special.h b/src/basic/special.h
index 084d3dfa23..5276bcf598 100644
--- a/src/basic/special.h
+++ b/src/basic/special.h
@@ -117,3 +117,6 @@
 
 /* The scope unit systemd itself lives in. */
 #define SPECIAL_INIT_SCOPE "init.scope"
+
+/* The root directory. */
+#define SPECIAL_ROOT_MOUNT "-.mount"
diff --git a/src/core/mount.c b/src/core/mount.c
index da480001e1..03e5ea1376 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -159,17 +159,6 @@ static void mount_init(Unit *u) {
         m->timeout_usec = u->manager->default_timeout_start_usec;
         m->directory_mode = 0755;
 
-        if (unit_has_name(u, "-.mount")) {
-                /* Don't allow start/stop for root directory */
-                u->refuse_manual_start = true;
-                u->refuse_manual_stop = true;
-        } else {
-                /* The stdio/kmsg bridge socket is on /, in order to avoid a
-                 * dep loop, don't use kmsg logging for -.mount */
-                m->exec_context.std_output = u->manager->default_std_output;
-                m->exec_context.std_error = u->manager->default_std_error;
-        }
-
         /* We need to make sure that /usr/bin/mount is always called
          * in the same process group as us, so that the autofs kernel
          * side doesn't send us another mount request while we are
@@ -577,6 +566,25 @@ static int mount_add_extras(Mount *m) {
         return 0;
 }
 
+static int mount_load_root_mount(Unit *u) {
+        assert(u);
+
+        if (!unit_has_name(u, SPECIAL_ROOT_MOUNT))
+                return 0;
+
+        u->perpetual = true;
+        u->default_dependencies = false;
+
+        /* The stdio/kmsg bridge socket is on /, in order to avoid a dep loop, don't use kmsg logging for -.mount */
+        MOUNT(u)->exec_context.std_output = EXEC_OUTPUT_NULL;
+        MOUNT(u)->exec_context.std_input = EXEC_INPUT_NULL;
+
+        if (!u->description)
+                u->description = strdup("Root Mount");
+
+        return 1;
+}
+
 static int mount_load(Unit *u) {
         Mount *m = MOUNT(u);
         int r;
@@ -584,11 +592,14 @@ static int mount_load(Unit *u) {
         assert(u);
         assert(u->load_state == UNIT_STUB);
 
-        if (m->from_proc_self_mountinfo)
+        r = mount_load_root_mount(u);
+        if (r < 0)
+                return r;
+
+        if (m->from_proc_self_mountinfo || u->perpetual)
                 r = unit_load_fragment_and_dropin_optional(u);
         else
                 r = unit_load_fragment_and_dropin(u);
-
         if (r < 0)
                 return r;
 
@@ -1592,11 +1603,51 @@ static int mount_get_timeout(Unit *u, usec_t *timeout) {
         return 1;
 }
 
+static void synthesize_root_mount(Manager *m) {
+        Unit *u;
+        int r;
+
+        assert(m);
+
+        /* Whatever happens, we know for sure that the root directory is around, and cannot go away. Let's
+         * unconditionally synthesize it here and mark it as perpetual. */
+
+        u = manager_get_unit(m, SPECIAL_ROOT_MOUNT);
+        if (!u) {
+                u = unit_new(m, sizeof(Mount));
+                if (!u) {
+                        log_oom();
+                        return;
+                }
+
+                r = unit_add_name(u, SPECIAL_ROOT_MOUNT);
+                if (r < 0) {
+                        unit_free(u);
+                        log_error_errno(r, "Failed to add the " SPECIAL_ROOT_MOUNT " name: %m");
+                        return;
+                }
+        }
+
+        u->perpetual = true;
+        MOUNT(u)->deserialized_state = MOUNT_MOUNTED;
+
+        unit_add_to_load_queue(u);
+        unit_add_to_dbus_queue(u);
+}
+
+static bool mount_is_mounted(Mount *m) {
+        assert(m);
+
+        return UNIT(m)->perpetual || m->is_mounted;
+}
+
 static void mount_enumerate(Manager *m) {
         int r;
 
         assert(m);
 
+        synthesize_root_mount(m);
+
         mnt_init_debug(0);
 
         if (!m->mount_monitor) {
@@ -1703,7 +1754,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
         LIST_FOREACH(units_by_type, u, m->units_by_type[UNIT_MOUNT]) {
                 Mount *mount = MOUNT(u);
 
-                if (!mount->is_mounted) {
+                if (!mount_is_mounted(mount)) {
 
                         /* A mount point is not around right now. It
                          * might be gone, or might never have
@@ -1764,7 +1815,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
                         }
                 }
 
-                if (mount->is_mounted &&
+                if (mount_is_mounted(mount) &&
                     mount->from_proc_self_mountinfo &&
                     mount->parameters_proc_self_mountinfo.what) {
 
-- 
cgit v1.2.3-54-g00ecf


From a581e45ae8f9bb5c6693c23c78bc070aa15d0c8a Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Tue, 25 Oct 2016 00:29:05 +0200
Subject: unit: unify some code with new unit_new_for_name() call

---
 src/core/device.c |  6 +-----
 src/core/mount.c  | 17 +++--------------
 src/core/scope.c  | 11 ++---------
 src/core/slice.c  | 11 ++---------
 src/core/swap.c   |  6 +-----
 src/core/unit.c   | 18 ++++++++++++++++++
 src/core/unit.h   |  1 +
 7 files changed, 28 insertions(+), 42 deletions(-)

(limited to 'src/core')

diff --git a/src/core/device.c b/src/core/device.c
index 8a3e888e5e..bd87a447cd 100644
--- a/src/core/device.c
+++ b/src/core/device.c
@@ -331,11 +331,7 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa
         if (!u) {
                 delete = true;
 
-                u = unit_new(m, sizeof(Device));
-                if (!u)
-                        return log_oom();
-
-                r = unit_add_name(u, e);
+                r = unit_new_for_name(m, sizeof(Device), e, &u);
                 if (r < 0)
                         goto fail;
 
diff --git a/src/core/mount.c b/src/core/mount.c
index 03e5ea1376..0641621d8f 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -1404,11 +1404,7 @@ static int mount_setup_unit(
         if (!u) {
                 delete = true;
 
-                u = unit_new(m, sizeof(Mount));
-                if (!u)
-                        return log_oom();
-
-                r = unit_add_name(u, e);
+                r = unit_new_for_name(m, sizeof(Mount), e, &u);
                 if (r < 0)
                         goto fail;
 
@@ -1614,16 +1610,9 @@ static void synthesize_root_mount(Manager *m) {
 
         u = manager_get_unit(m, SPECIAL_ROOT_MOUNT);
         if (!u) {
-                u = unit_new(m, sizeof(Mount));
-                if (!u) {
-                        log_oom();
-                        return;
-                }
-
-                r = unit_add_name(u, SPECIAL_ROOT_MOUNT);
+                r = unit_new_for_name(m, sizeof(Mount), SPECIAL_ROOT_MOUNT, &u);
                 if (r < 0) {
-                        unit_free(u);
-                        log_error_errno(r, "Failed to add the " SPECIAL_ROOT_MOUNT " name: %m");
+                        log_error_errno(r, "Failed to allocate the special " SPECIAL_ROOT_MOUNT " unit: %m");
                         return;
                 }
         }
diff --git a/src/core/scope.c b/src/core/scope.c
index 77f3fdc2aa..d6e1f8e392 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -563,16 +563,9 @@ static void scope_enumerate(Manager *m) {
 
         u = manager_get_unit(m, SPECIAL_INIT_SCOPE);
         if (!u) {
-                u = unit_new(m, sizeof(Scope));
-                if (!u) {
-                        log_oom();
-                        return;
-                }
-
-                r = unit_add_name(u, SPECIAL_INIT_SCOPE);
+                r = unit_new_for_name(m, sizeof(Scope), SPECIAL_INIT_SCOPE, &u);
                 if (r < 0)  {
-                        unit_free(u);
-                        log_error_errno(r, "Failed to add the " SPECIAL_INIT_SCOPE " name: %m");
+                        log_error_errno(r, "Failed to allocate the special " SPECIAL_INIT_SCOPE " unit: %m");
                         return;
                 }
         }
diff --git a/src/core/slice.c b/src/core/slice.c
index c505fa1916..ed5d3fd701 100644
--- a/src/core/slice.c
+++ b/src/core/slice.c
@@ -299,16 +299,9 @@ static void slice_enumerate(Manager *m) {
 
         u = manager_get_unit(m, SPECIAL_ROOT_SLICE);
         if (!u) {
-                u = unit_new(m, sizeof(Slice));
-                if (!u) {
-                        log_oom();
-                        return;
-                }
-
-                r = unit_add_name(u, SPECIAL_ROOT_SLICE);
+                r = unit_new_for_name(m, sizeof(Slice), SPECIAL_ROOT_SLICE, &u);
                 if (r < 0) {
-                        unit_free(u);
-                        log_error_errno(r, "Failed to add the " SPECIAL_ROOT_SLICE " name: %m");
+                        log_error_errno(r, "Failed to allocate the special " SPECIAL_ROOT_SLICE " unit: %m");
                         return;
                 }
         }
diff --git a/src/core/swap.c b/src/core/swap.c
index b592abb9fb..2228a254bb 100644
--- a/src/core/swap.c
+++ b/src/core/swap.c
@@ -381,11 +381,7 @@ static int swap_setup_unit(
         if (!u) {
                 delete = true;
 
-                u = unit_new(m, sizeof(Swap));
-                if (!u)
-                        return log_oom();
-
-                r = unit_add_name(u, e);
+                r = unit_new_for_name(m, sizeof(Swap), e, &u);
                 if (r < 0)
                         goto fail;
 
diff --git a/src/core/unit.c b/src/core/unit.c
index 8d74b44f73..068d319206 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -109,6 +109,24 @@ Unit *unit_new(Manager *m, size_t size) {
         return u;
 }
 
+int unit_new_for_name(Manager *m, size_t size, const char *name, Unit **ret) {
+        Unit *u;
+        int r;
+
+        u = unit_new(m, size);
+        if (!u)
+                return -ENOMEM;
+
+        r = unit_add_name(u, name);
+        if (r < 0) {
+                unit_free(u);
+                return r;
+        }
+
+        *ret = u;
+        return r;
+}
+
 bool unit_has_name(Unit *u, const char *name) {
         assert(u);
         assert(name);
diff --git a/src/core/unit.h b/src/core/unit.h
index e524553ed3..899cd62c92 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -481,6 +481,7 @@ DEFINE_CAST(SCOPE, Scope);
 Unit *unit_new(Manager *m, size_t size);
 void unit_free(Unit *u);
 
+int unit_new_for_name(Manager *m, size_t size, const char *name, Unit **ret);
 int unit_add_name(Unit *u, const char *name);
 
 int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_reference);
-- 
cgit v1.2.3-54-g00ecf


From 1201cae704c6674fde2c23fdd7feab8493a20159 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Wed, 2 Nov 2016 11:38:12 -0600
Subject: core: change mount_synthesize_root() return to int

Let's propagate the error here, instead of eating it up early.

In a later change we should probably also change mount_enumerate() to propagate
errors up, but that would mean we'd have to change the unit vtable, and thus
change all unit types, hence is quite an invasive change.
---
 src/core/mount.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'src/core')

diff --git a/src/core/mount.c b/src/core/mount.c
index 0641621d8f..d749e49df5 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -1599,7 +1599,7 @@ static int mount_get_timeout(Unit *u, usec_t *timeout) {
         return 1;
 }
 
-static void synthesize_root_mount(Manager *m) {
+static int synthesize_root_mount(Manager *m) {
         Unit *u;
         int r;
 
@@ -1611,10 +1611,8 @@ static void synthesize_root_mount(Manager *m) {
         u = manager_get_unit(m, SPECIAL_ROOT_MOUNT);
         if (!u) {
                 r = unit_new_for_name(m, sizeof(Mount), SPECIAL_ROOT_MOUNT, &u);
-                if (r < 0) {
-                        log_error_errno(r, "Failed to allocate the special " SPECIAL_ROOT_MOUNT " unit: %m");
-                        return;
-                }
+                if (r < 0)
+                        return log_error_errno(r, "Failed to allocate the special " SPECIAL_ROOT_MOUNT " unit: %m");
         }
 
         u->perpetual = true;
@@ -1622,6 +1620,8 @@ static void synthesize_root_mount(Manager *m) {
 
         unit_add_to_load_queue(u);
         unit_add_to_dbus_queue(u);
+
+        return 0;
 }
 
 static bool mount_is_mounted(Mount *m) {
@@ -1635,7 +1635,9 @@ static void mount_enumerate(Manager *m) {
 
         assert(m);
 
-        synthesize_root_mount(m);
+        r = synthesize_root_mount(m);
+        if (r < 0)
+                goto fail;
 
         mnt_init_debug(0);
 
-- 
cgit v1.2.3-54-g00ecf


From b09246352f751c95ab06f598b639f6f6ca0e67b1 Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Wed, 2 Nov 2016 15:00:54 -0400
Subject: pid1: fix fd memleak when we hit FileDescriptorStoreMax limit

Since service_add_fd_store() already does the check, remove the redundant check
from service_add_fd_store_set().

Also, print a warning when repopulating FDStore after daemon-reexec and we hit
the limit. This is a user visible issue, so we should not discard fds silently.
(Note that service_deserialize_item is impacted by the return value from
service_add_fd_store(), but we rely on the general error message, so the caller
does not need to be modified, and does not show up in the diff.)
---
 src/core/service.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'src/core')

diff --git a/src/core/service.c b/src/core/service.c
index 00ce500fd3..a7274a758f 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -386,7 +386,9 @@ static int service_add_fd_store(Service *s, int fd, const char *name) {
         assert(fd >= 0);
 
         if (s->n_fd_store >= s->n_fd_store_max)
-                return 0;
+                return -EXFULL; /* Our store is full.
+                                 * Use this errno rather than E[NM]FILE to distinguish from
+                                 * the case where systemd itself hits the file limit. */
 
         LIST_FOREACH(fd_store, fs, s->fd_store) {
                 r = same_fd(fs->fd, fd);
@@ -430,10 +432,7 @@ static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
 
         assert(s);
 
-        if (fdset_size(fds) <= 0)
-                return 0;
-
-        while (s->n_fd_store < s->n_fd_store_max) {
+        while (fdset_size(fds) > 0) {
                 _cleanup_close_ int fd = -1;
 
                 fd = fdset_steal_first(fds);
@@ -441,16 +440,17 @@ static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
                         break;
 
                 r = service_add_fd_store(s, fd, name);
+                if (r == -EXFULL)
+                        return log_unit_warning_errno(UNIT(s), r,
+                                                      "Cannot store more fds than FileDescriptorStoreMax=%u, closing remaining.",
+                                                      s->n_fd_store_max);
                 if (r < 0)
-                        return log_unit_error_errno(UNIT(s), r, "Couldn't add fd to fd store: %m");
+                        return log_unit_error_errno(UNIT(s), r, "Failed to add fd to store: %m");
                 if (r > 0)
                         log_unit_debug(UNIT(s), "Added fd %u (%s) to fd store.", fd, strna(name));
                 fd = -1;
         }
 
-        if (fdset_size(fds) > 0)
-                log_unit_warning(UNIT(s), "Tried to store more fds than FileDescriptorStoreMax=%u allows, closing remaining.", s->n_fd_store_max);
-
         return 0;
 }
 
-- 
cgit v1.2.3-54-g00ecf


From 07ecca0dc9d2d8f3b3abd73ab32f254f339fd903 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Wed, 2 Nov 2016 12:02:53 -0600
Subject: core: don't hit an assert when printing status messages about units
 with overly long description strings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This essentially reverts one part of d054f0a4d451120c26494263fc4dc175bfd405b1.

(We might also choose to use proper ellipsation here, but I wasn't sure the
memory allocation this requires wouöld be a good idea here...)

Fixes: #4534
---
 src/core/job.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/core')

diff --git a/src/core/job.c b/src/core/job.c
index 3ecc8a1a73..d8b170b983 100644
--- a/src/core/job.c
+++ b/src/core/job.c
@@ -767,8 +767,9 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) {
         if (!format)
                 return;
 
+        /* The description might be longer than the buffer, but that's OK, we'll just truncate it here */
         DISABLE_WARNING_FORMAT_NONLITERAL;
-        xsprintf(buf, format, unit_description(u));
+        snprintf(buf, sizeof(buf), format, unit_description(u));
         REENABLE_WARNING;
 
         switch (t) {
-- 
cgit v1.2.3-54-g00ecf


From ac334b2cfbf5e57b5b6510e2f57c5f550bc57cca Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Wed, 2 Nov 2016 12:05:22 -0600
Subject: core: make a constant table actually constant

---
 src/core/job.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'src/core')

diff --git a/src/core/job.c b/src/core/job.c
index d8b170b983..ac6910a906 100644
--- a/src/core/job.c
+++ b/src/core/job.c
@@ -690,16 +690,16 @@ _pure_ static const char *job_get_status_message_format(Unit *u, JobType t, JobR
 }
 
 static void job_print_status_message(Unit *u, JobType t, JobResult result) {
-        static struct {
+        static const struct {
                 const char *color, *word;
         } const statuses[_JOB_RESULT_MAX] = {
-                [JOB_DONE]        = {ANSI_GREEN,            "  OK  "},
-                [JOB_TIMEOUT]     = {ANSI_HIGHLIGHT_RED,    " TIME "},
-                [JOB_FAILED]      = {ANSI_HIGHLIGHT_RED,    "FAILED"},
-                [JOB_DEPENDENCY]  = {ANSI_HIGHLIGHT_YELLOW, "DEPEND"},
-                [JOB_SKIPPED]     = {ANSI_HIGHLIGHT,        " INFO "},
-                [JOB_ASSERT]      = {ANSI_HIGHLIGHT_YELLOW, "ASSERT"},
-                [JOB_UNSUPPORTED] = {ANSI_HIGHLIGHT_YELLOW, "UNSUPP"},
+                [JOB_DONE]        = { ANSI_GREEN,            "  OK  " },
+                [JOB_TIMEOUT]     = { ANSI_HIGHLIGHT_RED,    " TIME " },
+                [JOB_FAILED]      = { ANSI_HIGHLIGHT_RED,    "FAILED" },
+                [JOB_DEPENDENCY]  = { ANSI_HIGHLIGHT_YELLOW, "DEPEND" },
+                [JOB_SKIPPED]     = { ANSI_HIGHLIGHT,        " INFO " },
+                [JOB_ASSERT]      = { ANSI_HIGHLIGHT_YELLOW, "ASSERT" },
+                [JOB_UNSUPPORTED] = { ANSI_HIGHLIGHT_YELLOW, "UNSUPP" },
         };
 
         const char *format;
-- 
cgit v1.2.3-54-g00ecf


From e68eedbbdc98fa13449756b7fee3bed689d76493 Mon Sep 17 00:00:00 2001
From: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Date: Wed, 2 Nov 2016 22:02:46 -0400
Subject: Revert some uses of xsprintf

This reverts some changes introduced in d054f0a4d4.
xsprintf should be used in cases where we calculated the right buffer
size by hand (using DECIMAL_STRING_MAX and such), and never in cases where
we are printing externally specified strings of arbitrary length.

Fixes #4534.
---
 src/basic/log.c                |  2 +-
 src/core/unit.c                |  2 +-
 src/udev/collect/collect.c     |  6 +++---
 src/udev/udev-builtin-net_id.c | 11 ++++++-----
 4 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'src/core')

diff --git a/src/basic/log.c b/src/basic/log.c
index 2ff70be255..4919d175da 100644
--- a/src/basic/log.c
+++ b/src/basic/log.c
@@ -782,7 +782,7 @@ static void log_assert(
                 return;
 
         DISABLE_WARNING_FORMAT_NONLITERAL;
-        xsprintf(buffer, format, text, file, line, func);
+        snprintf(buffer, sizeof buffer, format, text, file, line, func);
         REENABLE_WARNING;
 
         log_abort_msg = buffer;
diff --git a/src/core/unit.c b/src/core/unit.c
index 463e6d6a62..e664e23892 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -1472,7 +1472,7 @@ static void unit_status_log_starting_stopping_reloading(Unit *u, JobType t) {
         format = unit_get_status_message_format(u, t);
 
         DISABLE_WARNING_FORMAT_NONLITERAL;
-        xsprintf(buf, format, unit_description(u));
+        snprintf(buf, sizeof buf, format, unit_description(u));
         REENABLE_WARNING;
 
         mid = t == JOB_START ? SD_MESSAGE_UNIT_STARTING :
diff --git a/src/udev/collect/collect.c b/src/udev/collect/collect.c
index 349585b634..0e973cd521 100644
--- a/src/udev/collect/collect.c
+++ b/src/udev/collect/collect.c
@@ -85,16 +85,16 @@ static void usage(void)
  */
 static int prepare(char *dir, char *filename)
 {
-        char buf[512];
+        char buf[PATH_MAX];
         int r, fd;
 
         r = mkdir(dir, 0700);
         if (r < 0 && errno != EEXIST)
                 return -errno;
 
-        xsprintf(buf, "%s/%s", dir, filename);
+        snprintf(buf, sizeof buf, "%s/%s", dir, filename);
 
-        fd = open(buf,O_RDWR|O_CREAT|O_CLOEXEC, S_IRUSR|S_IWUSR);
+        fd = open(buf, O_RDWR|O_CREAT|O_CLOEXEC, S_IRUSR|S_IWUSR);
         if (fd < 0)
                 fprintf(stderr, "Cannot open %s: %m\n", buf);
 
diff --git a/src/udev/udev-builtin-net_id.c b/src/udev/udev-builtin-net_id.c
index 0eb2500dd2..fe9d6f4482 100644
--- a/src/udev/udev-builtin-net_id.c
+++ b/src/udev/udev-builtin-net_id.c
@@ -211,7 +211,7 @@ static int dev_pci_slot(struct udev_device *dev, struct netnames *names) {
         char *s;
         const char *attr, *port_name;
         struct udev_device *pci = NULL;
-        char slots[256], str[256];
+        char slots[PATH_MAX];
         _cleanup_closedir_ DIR *dir = NULL;
         struct dirent *dent;
         int hotplug_slot = 0, err = 0;
@@ -248,7 +248,8 @@ static int dev_pci_slot(struct udev_device *dev, struct netnames *names) {
                 err = -ENOENT;
                 goto out;
         }
-        xsprintf(slots, "%s/slots", udev_device_get_syspath(pci));
+
+        snprintf(slots, sizeof slots, "%s/slots", udev_device_get_syspath(pci));
         dir = opendir(slots);
         if (!dir) {
                 err = -errno;
@@ -257,8 +258,7 @@ static int dev_pci_slot(struct udev_device *dev, struct netnames *names) {
 
         for (dent = readdir(dir); dent != NULL; dent = readdir(dir)) {
                 int i;
-                char *rest;
-                char *address;
+                char *rest, *address, str[PATH_MAX];
 
                 if (dent->d_name[0] == '.')
                         continue;
@@ -267,7 +267,8 @@ static int dev_pci_slot(struct udev_device *dev, struct netnames *names) {
                         continue;
                 if (i < 1)
                         continue;
-                xsprintf(str, "%s/%s/address", slots, dent->d_name);
+
+                snprintf(str, sizeof str, "%s/%s/address", slots, dent->d_name);
                 if (read_one_line_file(str, &address) >= 0) {
                         /* match slot address with device by stripping the function */
                         if (strneq(address, udev_device_get_sysname(names->pcidev), strlen(address)))
-- 
cgit v1.2.3-54-g00ecf


From cdc5d5c55e58ff9eeb6b2258c9fc3a416ee8b53f Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@opendz.org>
Date: Wed, 2 Nov 2016 22:42:40 +0100
Subject: core: intialize user aux groups and SupplementaryGroups= when
 DynamicUser= is set

Make sure that when DynamicUser= is set that we intialize the user
supplementary groups and that we also support SupplementaryGroups=

Fixes: https://github.com/systemd/systemd/issues/4539

Thanks Evgeny Vereshchagin (@evverx)
---
 src/core/execute.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index 3f053602b5..f13ca30395 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -773,11 +773,9 @@ static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid)
         return 0;
 }
 
-static int get_fixed_supplementary_groups(const ExecContext *c,
-                                          const char *user,
-                                          const char *group,
-                                          gid_t gid,
-                                          gid_t **supplementary_gids, int *ngids) {
+static int get_supplementary_groups(const ExecContext *c, const char *user,
+                                    const char *group, gid_t gid,
+                                    gid_t **supplementary_gids, int *ngids) {
         char **i;
         int r, k = 0;
         int ngroups_max;
@@ -790,8 +788,8 @@ static int get_fixed_supplementary_groups(const ExecContext *c,
         /*
          * If user is given, then lookup GID and supplementary groups list.
          * We avoid NSS lookups for gid=0. Also we have to initialize groups
-         * as early as possible so we keep the list of supplementary groups
-         * of the caller.
+         * here and as early as possible so we keep the list of supplementary
+         * groups of the caller.
          */
         if (user && gid_is_valid(gid) && gid != 0) {
                 /* First step, initialize groups from /etc/groups */
@@ -2347,13 +2345,14 @@ static int exec_child(
                         *exit_status = EXIT_GROUP;
                         return r;
                 }
+        }
 
-                r = get_fixed_supplementary_groups(context, username, groupname,
-                                                   gid, &supplementary_gids, &ngids);
-                if (r < 0) {
-                        *exit_status = EXIT_GROUP;
-                        return r;
-                }
+        /* Initialize user supplementary groups and get SupplementaryGroups= ones */
+        r = get_supplementary_groups(context, username, groupname, gid,
+                                     &supplementary_gids, &ngids);
+        if (r < 0) {
+                *exit_status = EXIT_GROUP;
+                return r;
         }
 
         r = send_user_lookup(unit, user_lookup_fd, uid, gid);
-- 
cgit v1.2.3-54-g00ecf


From 49e5f27c53a1a1716a8e660fb27cfbfe8ea67de5 Mon Sep 17 00:00:00 2001
From: Dave Reisner <d@falconindy.com>
Date: Sun, 27 Nov 2016 17:05:39 -0500
Subject: device: Avoid calling unit_free(NULL) in device setup logic (#4748)

Since a581e45ae8f9bb5c, there's a few function calls to
unit_new_for_name which will unit_free on failure. Prior to this commit,
a failure would result in calling unit_free with a NULL unit, and hit an
assertion failure, seen at least via device_setup_unit:

Assertion 'u' failed at src/core/unit.c:519, function unit_free().  Aborting.

Fixes #4747
https://bugs.archlinux.org/task/51950
---
 src/core/device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/core')

diff --git a/src/core/device.c b/src/core/device.c
index bd87a447cd..4b9e84aeb6 100644
--- a/src/core/device.c
+++ b/src/core/device.c
@@ -365,7 +365,7 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa
 fail:
         log_unit_warning_errno(u, r, "Failed to set up device unit: %m");
 
-        if (delete)
+        if (delete && u)
                 unit_free(u);
 
         return r;
-- 
cgit v1.2.3-54-g00ecf


From d9454c44bc1854a291f6c37dfce378c9b6067d0b Mon Sep 17 00:00:00 2001
From: Dave Reisner <dreisner@archlinux.org>
Date: Wed, 9 Nov 2016 08:00:26 -0500
Subject: disable RestrictAddressFamilies on i686

Shit's broke, yo.

https://github.com/systemd/systemd/issues/4575
---
 src/core/execute.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/core')

diff --git a/src/core/execute.c b/src/core/execute.c
index f13ca30395..85ee82c3e1 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1254,6 +1254,10 @@ static int apply_address_families(const Unit* u, const ExecContext *c) {
         Iterator i;
         int r;
 
+#if defined(__i386__)
+        return 0;
+#endif
+
         assert(c);
 
         if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
-- 
cgit v1.2.3-54-g00ecf


From 9146dc0afb1118a988ce79a249d5e5235b7b52c8 Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@sbcglobal.net>
Date: Wed, 25 May 2016 12:23:40 -0400
Subject: FSDG: os-release: Default to PRETTY_NAME "GNU/Linux" instead of
 "Linux".

---
 man/kernel-install.xml                    | 2 +-
 man/os-release.xml                        | 2 +-
 src/analyze/analyze.c                     | 2 +-
 src/core/main.c                           | 4 ++--
 src/firstboot/firstboot.c                 | 2 +-
 src/kernel-install/90-loaderentry.install | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'src/core')

diff --git a/man/kernel-install.xml b/man/kernel-install.xml
index 4a8a46cef4..32e6169f63 100644
--- a/man/kernel-install.xml
+++ b/man/kernel-install.xml
@@ -109,7 +109,7 @@
           <replaceable>PRETTY_NAME</replaceable> parameter specified
           in <filename>/etc/os-release</filename> or
           <filename>/usr/lib/os-release</filename> (if the former is
-          missing), or "Linux
+          missing), or "GNU/Linux
           <replaceable>KERNEL-VERSION</replaceable>", if unset.  If
           the file <filename>initrd</filename> is found next to the
           <filename>linux</filename> file, the initrd will be added to
diff --git a/man/os-release.xml b/man/os-release.xml
index 99bbb61004..27d18749dc 100644
--- a/man/os-release.xml
+++ b/man/os-release.xml
@@ -210,7 +210,7 @@
         suitable for presentation to the user. May or may not contain
         a release code name or OS version of some kind, as suitable.
         If not set, defaults to
-        <literal>PRETTY_NAME="Linux"</literal>. Example:
+        <literal>PRETTY_NAME="GNU/Linux"</literal>. Example:
         <literal>PRETTY_NAME="Fedora 17 (Beefy
         Miracle)"</literal>.</para></listitem>
       </varlistentry>
diff --git a/src/analyze/analyze.c b/src/analyze/analyze.c
index cbf9354a7a..66830695f3 100644
--- a/src/analyze/analyze.c
+++ b/src/analyze/analyze.c
@@ -653,7 +653,7 @@ static int analyze_plot(sd_bus *bus) {
         svg("<rect class=\"background\" width=\"100%%\" height=\"100%%\" />\n");
         svg("<text x=\"20\" y=\"50\">%s</text>", pretty_times);
         svg("<text x=\"20\" y=\"30\">%s %s (%s %s %s) %s %s</text>",
-            isempty(host->os_pretty_name) ? "Linux" : host->os_pretty_name,
+            isempty(host->os_pretty_name) ? "GNU/Linux" : host->os_pretty_name,
             strempty(host->hostname),
             strempty(host->kernel_name),
             strempty(host->kernel_release),
diff --git a/src/core/main.c b/src/core/main.c
index 94602611a7..f07ed71b31 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -1246,11 +1246,11 @@ static int status_welcome(void) {
                 return status_printf(NULL, false, false,
                                      "\nWelcome to \x1B[%sm%s\x1B[0m!\n",
                                      isempty(ansi_color) ? "1" : ansi_color,
-                                     isempty(pretty_name) ? "Linux" : pretty_name);
+                                     isempty(pretty_name) ? "GNU/Linux" : pretty_name);
         else
                 return status_printf(NULL, false, false,
                                      "\nWelcome to %s!\n",
-                                     isempty(pretty_name) ? "Linux" : pretty_name);
+                                     isempty(pretty_name) ? "GNU/Linux" : pretty_name);
 }
 
 static int write_container_id(void) {
diff --git a/src/firstboot/firstboot.c b/src/firstboot/firstboot.c
index c9e8e54ee3..83a21eaf0e 100644
--- a/src/firstboot/firstboot.c
+++ b/src/firstboot/firstboot.c
@@ -96,7 +96,7 @@ static void print_welcome(void) {
                 log_warning_errno(r, "Failed to read os-release file: %m");
 
         printf("\nWelcome to your new installation of %s!\nPlease configure a few basic system settings:\n\n",
-               isempty(pretty_name) ? "Linux" : pretty_name);
+               isempty(pretty_name) ? "GNU/Linux" : pretty_name);
 
         press_any_key();
 
diff --git a/src/kernel-install/90-loaderentry.install b/src/kernel-install/90-loaderentry.install
index a0bca05c9a..af9f0f9ccd 100644
--- a/src/kernel-install/90-loaderentry.install
+++ b/src/kernel-install/90-loaderentry.install
@@ -38,7 +38,7 @@ elif [[ -f /usr/lib/os-release ]]; then
 fi
 
 if ! [[ $PRETTY_NAME ]]; then
-    PRETTY_NAME="Linux $KERNEL_VERSION"
+    PRETTY_NAME="GNU/Linux $KERNEL_VERSION"
 fi
 
 declare -a BOOT_OPTIONS
-- 
cgit v1.2.3-54-g00ecf