summaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
Diffstat (limited to 'src/core')
-rw-r--r--src/core/automount.c14
-rw-r--r--src/core/busname.c12
-rw-r--r--src/core/cgroup.c341
-rw-r--r--src/core/cgroup.h6
-rw-r--r--src/core/dbus-automount.c54
-rw-r--r--src/core/dbus-automount.h2
-rw-r--r--src/core/dbus-cgroup.c51
-rw-r--r--src/core/dbus-execute.c121
-rw-r--r--src/core/dbus-manager.c257
-rw-r--r--src/core/dbus-mount.c7
-rw-r--r--src/core/dbus-service.c18
-rw-r--r--src/core/dbus-socket.c3
-rw-r--r--src/core/dbus-swap.c2
-rw-r--r--src/core/dbus-unit.c181
-rw-r--r--src/core/dbus-unit.h8
-rw-r--r--src/core/dbus.c75
-rw-r--r--src/core/dbus.h5
-rw-r--r--src/core/device.c12
-rw-r--r--src/core/dynamic-user.c794
-rw-r--r--src/core/dynamic-user.h66
-rw-r--r--src/core/emergency-action.c (renamed from src/core/failure-action.c)65
-rw-r--r--src/core/emergency-action.h (renamed from src/core/failure-action.h)28
-rw-r--r--src/core/execute.c1282
-rw-r--r--src/core/execute.h35
-rw-r--r--src/core/job.c38
-rw-r--r--src/core/job.h4
-rw-r--r--src/core/load-fragment-gperf.gperf.m437
-rw-r--r--src/core/load-fragment.c411
-rw-r--r--src/core/load-fragment.h7
-rw-r--r--src/core/main.c81
-rw-r--r--src/core/manager.c598
-rw-r--r--src/core/manager.h36
-rw-r--r--src/core/mount-setup.c6
-rw-r--r--src/core/mount.c240
-rw-r--r--src/core/mount.h6
-rw-r--r--src/core/namespace.c604
-rw-r--r--src/core/namespace.h13
-rw-r--r--src/core/org.freedesktop.systemd1.conf24
-rw-r--r--src/core/path.c6
-rw-r--r--src/core/scope.c63
-rw-r--r--src/core/selinux-access.h2
-rw-r--r--src/core/service.c284
-rw-r--r--src/core/service.h4
-rw-r--r--src/core/show-status.c5
-rw-r--r--src/core/slice.c52
-rw-r--r--src/core/socket.c296
-rw-r--r--src/core/socket.h12
-rw-r--r--src/core/swap.c50
-rw-r--r--src/core/swap.h1
-rw-r--r--src/core/system.conf1
-rw-r--r--src/core/target.c5
-rw-r--r--src/core/timer.c10
-rw-r--r--src/core/transaction.c6
-rw-r--r--src/core/umount.c2
-rw-r--r--src/core/unit.c522
-rw-r--r--src/core/unit.h64
56 files changed, 5659 insertions, 1270 deletions
diff --git a/src/core/automount.c b/src/core/automount.c
index 4e9891569c..7d7a0a6e46 100644
--- a/src/core/automount.c
+++ b/src/core/automount.c
@@ -271,6 +271,11 @@ static int automount_coldplug(Unit *u) {
return r;
(void) sd_event_source_set_description(a->pipe_event_source, "automount-io");
+ if (a->deserialized_state == AUTOMOUNT_RUNNING) {
+ r = automount_start_expire(a);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(a), r, "Failed to start expiration timer, ignoring: %m");
+ }
}
automount_set_state(a, a->deserialized_state);
@@ -301,7 +306,7 @@ static void automount_dump(Unit *u, FILE *f, const char *prefix) {
static void automount_enter_dead(Automount *a, AutomountResult f) {
assert(a);
- if (f != AUTOMOUNT_SUCCESS)
+ if (a->result == AUTOMOUNT_SUCCESS)
a->result = f;
automount_set_state(a, a->result != AUTOMOUNT_SUCCESS ? AUTOMOUNT_FAILED : AUTOMOUNT_DEAD);
@@ -795,6 +800,10 @@ static int automount_start(Unit *u) {
return r;
}
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
a->result = AUTOMOUNT_SUCCESS;
automount_enter_waiting(a);
return 1;
@@ -1105,6 +1114,9 @@ const UnitVTable automount_vtable = {
.reset_failed = automount_reset_failed,
.bus_vtable = bus_automount_vtable,
+ .bus_set_property = bus_automount_set_property,
+
+ .can_transient = true,
.shutdown = automount_shutdown,
.supported = automount_supported,
diff --git a/src/core/busname.c b/src/core/busname.c
index 730be2ee14..b96ec09e67 100644
--- a/src/core/busname.c
+++ b/src/core/busname.c
@@ -442,7 +442,7 @@ fail:
static void busname_enter_dead(BusName *n, BusNameResult f) {
assert(n);
- if (f != BUSNAME_SUCCESS)
+ if (n->result == BUSNAME_SUCCESS)
n->result = f;
busname_set_state(n, n->result != BUSNAME_SUCCESS ? BUSNAME_FAILED : BUSNAME_DEAD);
@@ -454,7 +454,7 @@ static void busname_enter_signal(BusName *n, BusNameState state, BusNameResult f
assert(n);
- if (f != BUSNAME_SUCCESS)
+ if (n->result == BUSNAME_SUCCESS)
n->result = f;
kill_context_init(&kill_context);
@@ -639,6 +639,10 @@ static int busname_start(Unit *u) {
return r;
}
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
n->result = BUSNAME_SUCCESS;
busname_enter_making(n);
@@ -868,7 +872,7 @@ static void busname_sigchld_event(Unit *u, pid_t pid, int code, int status) {
n->control_pid = 0;
- if (is_clean_exit(code, status, NULL))
+ if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
f = BUSNAME_SUCCESS;
else if (code == CLD_EXITED)
f = BUSNAME_FAILURE_EXIT_CODE;
@@ -882,7 +886,7 @@ static void busname_sigchld_event(Unit *u, pid_t pid, int code, int status) {
log_unit_full(u, f == BUSNAME_SUCCESS ? LOG_DEBUG : LOG_NOTICE, 0,
"Control process exited, code=%s status=%i", sigchld_code_to_string(code), status);
- if (f != BUSNAME_SUCCESS)
+ if (n->result == BUSNAME_SUCCESS)
n->result = f;
switch (n->state) {
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index c19e43f571..23a92f9651 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -57,12 +57,16 @@ void cgroup_context_init(CGroupContext *c) {
/* Initialize everything to the kernel defaults, assuming the
* structure is preinitialized to 0 */
+ c->cpu_weight = CGROUP_WEIGHT_INVALID;
+ c->startup_cpu_weight = CGROUP_WEIGHT_INVALID;
+ c->cpu_quota_per_sec_usec = USEC_INFINITY;
+
c->cpu_shares = CGROUP_CPU_SHARES_INVALID;
c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID;
- c->cpu_quota_per_sec_usec = USEC_INFINITY;
c->memory_high = CGROUP_LIMIT_MAX;
c->memory_max = CGROUP_LIMIT_MAX;
+ c->memory_swap_max = CGROUP_LIMIT_MAX;
c->memory_limit = CGROUP_LIMIT_MAX;
@@ -158,6 +162,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
"%sBlockIOAccounting=%s\n"
"%sMemoryAccounting=%s\n"
"%sTasksAccounting=%s\n"
+ "%sCPUWeight=%" PRIu64 "\n"
+ "%sStartupCPUWeight=%" PRIu64 "\n"
"%sCPUShares=%" PRIu64 "\n"
"%sStartupCPUShares=%" PRIu64 "\n"
"%sCPUQuotaPerSecSec=%s\n"
@@ -168,6 +174,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
"%sMemoryLow=%" PRIu64 "\n"
"%sMemoryHigh=%" PRIu64 "\n"
"%sMemoryMax=%" PRIu64 "\n"
+ "%sMemorySwapMax=%" PRIu64 "\n"
"%sMemoryLimit=%" PRIu64 "\n"
"%sTasksMax=%" PRIu64 "\n"
"%sDevicePolicy=%s\n"
@@ -177,6 +184,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
prefix, yes_no(c->blockio_accounting),
prefix, yes_no(c->memory_accounting),
prefix, yes_no(c->tasks_accounting),
+ prefix, c->cpu_weight,
+ prefix, c->startup_cpu_weight,
prefix, c->cpu_shares,
prefix, c->startup_cpu_shares,
prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1),
@@ -187,6 +196,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
prefix, c->memory_low,
prefix, c->memory_high,
prefix, c->memory_max,
+ prefix, c->memory_swap_max,
prefix, c->memory_limit,
prefix, c->tasks_max,
prefix, cgroup_device_policy_to_string(c->device_policy),
@@ -382,6 +392,95 @@ fail:
return -errno;
}
+static bool cgroup_context_has_cpu_weight(CGroupContext *c) {
+ return c->cpu_weight != CGROUP_WEIGHT_INVALID ||
+ c->startup_cpu_weight != CGROUP_WEIGHT_INVALID;
+}
+
+static bool cgroup_context_has_cpu_shares(CGroupContext *c) {
+ return c->cpu_shares != CGROUP_CPU_SHARES_INVALID ||
+ c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID;
+}
+
+static uint64_t cgroup_context_cpu_weight(CGroupContext *c, ManagerState state) {
+ if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+ c->startup_cpu_weight != CGROUP_WEIGHT_INVALID)
+ return c->startup_cpu_weight;
+ else if (c->cpu_weight != CGROUP_WEIGHT_INVALID)
+ return c->cpu_weight;
+ else
+ return CGROUP_WEIGHT_DEFAULT;
+}
+
+static uint64_t cgroup_context_cpu_shares(CGroupContext *c, ManagerState state) {
+ if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+ c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID)
+ return c->startup_cpu_shares;
+ else if (c->cpu_shares != CGROUP_CPU_SHARES_INVALID)
+ return c->cpu_shares;
+ else
+ return CGROUP_CPU_SHARES_DEFAULT;
+}
+
+static void cgroup_apply_unified_cpu_config(Unit *u, uint64_t weight, uint64_t quota) {
+ char buf[MAX(DECIMAL_STR_MAX(uint64_t) + 1, (DECIMAL_STR_MAX(usec_t) + 1) * 2)];
+ int r;
+
+ xsprintf(buf, "%" PRIu64 "\n", weight);
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.weight", buf);
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.weight: %m");
+
+ if (quota != USEC_INFINITY)
+ xsprintf(buf, USEC_FMT " " USEC_FMT "\n",
+ quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC, CGROUP_CPU_QUOTA_PERIOD_USEC);
+ else
+ xsprintf(buf, "max " USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
+
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.max", buf);
+
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.max: %m");
+}
+
+static void cgroup_apply_legacy_cpu_config(Unit *u, uint64_t shares, uint64_t quota) {
+ char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1];
+ int r;
+
+ xsprintf(buf, "%" PRIu64 "\n", shares);
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.shares", buf);
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.shares: %m");
+
+ xsprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_period_us", buf);
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.cfs_period_us: %m");
+
+ if (quota != USEC_INFINITY) {
+ xsprintf(buf, USEC_FMT "\n", quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", buf);
+ } else
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", "-1");
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.cfs_quota_us: %m");
+}
+
+static uint64_t cgroup_cpu_shares_to_weight(uint64_t shares) {
+ return CLAMP(shares * CGROUP_WEIGHT_DEFAULT / CGROUP_CPU_SHARES_DEFAULT,
+ CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX);
+}
+
+static uint64_t cgroup_cpu_weight_to_shares(uint64_t weight) {
+ return CLAMP(weight * CGROUP_CPU_SHARES_DEFAULT / CGROUP_WEIGHT_DEFAULT,
+ CGROUP_CPU_SHARES_MIN, CGROUP_CPU_SHARES_MAX);
+}
+
static bool cgroup_context_has_io_config(CGroupContext *c) {
return c->io_accounting ||
c->io_weight != CGROUP_WEIGHT_INVALID ||
@@ -521,7 +620,7 @@ static unsigned cgroup_apply_blkio_device_limit(Unit *u, const char *dev_path, u
}
static bool cgroup_context_has_unified_memory_config(CGroupContext *c) {
- return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX;
+ return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX || c->memory_swap_max != CGROUP_LIMIT_MAX;
}
static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_t v) {
@@ -566,30 +665,42 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
* and missing cgroups, i.e. EROFS and ENOENT. */
if ((mask & CGROUP_MASK_CPU) && !is_root) {
- char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1];
+ bool has_weight = cgroup_context_has_cpu_weight(c);
+ bool has_shares = cgroup_context_has_cpu_shares(c);
- sprintf(buf, "%" PRIu64 "\n",
- IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->startup_cpu_shares :
- c->cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->cpu_shares : CGROUP_CPU_SHARES_DEFAULT);
- r = cg_set_attribute("cpu", path, "cpu.shares", buf);
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set cpu.shares: %m");
+ if (cg_all_unified() > 0) {
+ uint64_t weight;
- sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
- r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf);
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set cpu.cfs_period_us: %m");
+ if (has_weight)
+ weight = cgroup_context_cpu_weight(c, state);
+ else if (has_shares) {
+ uint64_t shares = cgroup_context_cpu_shares(c, state);
- if (c->cpu_quota_per_sec_usec != USEC_INFINITY) {
- sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
- r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf);
- } else
- r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1");
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set cpu.cfs_quota_us: %m");
+ weight = cgroup_cpu_shares_to_weight(shares);
+
+ log_cgroup_compat(u, "Applying [Startup]CpuShares %" PRIu64 " as [Startup]CpuWeight %" PRIu64 " on %s",
+ shares, weight, path);
+ } else
+ weight = CGROUP_WEIGHT_DEFAULT;
+
+ cgroup_apply_unified_cpu_config(u, weight, c->cpu_quota_per_sec_usec);
+ } else {
+ uint64_t shares;
+
+ if (has_weight) {
+ uint64_t weight = cgroup_context_cpu_weight(c, state);
+
+ shares = cgroup_cpu_weight_to_shares(weight);
+
+ log_cgroup_compat(u, "Applying [Startup]CpuWeight %" PRIu64 " as [Startup]CpuShares %" PRIu64 " on %s",
+ weight, shares, path);
+ } else if (has_shares)
+ shares = cgroup_context_cpu_shares(c, state);
+ else
+ shares = CGROUP_CPU_SHARES_DEFAULT;
+
+ cgroup_apply_legacy_cpu_config(u, shares, c->cpu_quota_per_sec_usec);
+ }
}
if (mask & CGROUP_MASK_IO) {
@@ -677,16 +788,16 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
char buf[DECIMAL_STR_MAX(uint64_t)+1];
uint64_t weight;
- if (has_blockio)
- weight = cgroup_context_blkio_weight(c, state);
- else if (has_io) {
+ if (has_io) {
uint64_t io_weight = cgroup_context_io_weight(c, state);
weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state));
log_cgroup_compat(u, "Applying [Startup]IOWeight %" PRIu64 " as [Startup]BlockIOWeight %" PRIu64,
io_weight, weight);
- } else
+ } else if (has_blockio)
+ weight = cgroup_context_blkio_weight(c, state);
+ else
weight = CGROUP_BLKIO_WEIGHT_DEFAULT;
xsprintf(buf, "%" PRIu64 "\n", weight);
@@ -695,13 +806,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to set blkio.weight: %m");
- if (has_blockio) {
- CGroupBlockIODeviceWeight *w;
-
- /* FIXME: no way to reset this list */
- LIST_FOREACH(device_weights, w, c->blockio_device_weights)
- cgroup_apply_blkio_device_weight(u, w->path, w->weight);
- } else if (has_io) {
+ if (has_io) {
CGroupIODeviceWeight *w;
/* FIXME: no way to reset this list */
@@ -713,18 +818,17 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
cgroup_apply_blkio_device_weight(u, w->path, weight);
}
+ } else if (has_blockio) {
+ CGroupBlockIODeviceWeight *w;
+
+ /* FIXME: no way to reset this list */
+ LIST_FOREACH(device_weights, w, c->blockio_device_weights)
+ cgroup_apply_blkio_device_weight(u, w->path, w->weight);
}
}
/* Apply limits and free ones without config. */
- if (has_blockio) {
- CGroupBlockIODeviceBandwidth *b, *next;
-
- LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) {
- if (!cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps))
- cgroup_context_free_blockio_device_bandwidth(c, b);
- }
- } else if (has_io) {
+ if (has_io) {
CGroupIODeviceLimit *l, *next;
LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) {
@@ -734,16 +838,24 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
if (!cgroup_apply_blkio_device_limit(u, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX]))
cgroup_context_free_io_device_limit(c, l);
}
+ } else if (has_blockio) {
+ CGroupBlockIODeviceBandwidth *b, *next;
+
+ LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths)
+ if (!cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps))
+ cgroup_context_free_blockio_device_bandwidth(c, b);
}
}
if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
- if (cg_unified() > 0) {
- uint64_t max = c->memory_max;
+ if (cg_all_unified() > 0) {
+ uint64_t max;
+ uint64_t swap_max = CGROUP_LIMIT_MAX;
- if (cgroup_context_has_unified_memory_config(c))
+ if (cgroup_context_has_unified_memory_config(c)) {
max = c->memory_max;
- else {
+ swap_max = c->memory_swap_max;
+ } else {
max = c->memory_limit;
if (max != CGROUP_LIMIT_MAX)
@@ -753,16 +865,16 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
cgroup_apply_unified_memory_limit(u, "memory.low", c->memory_low);
cgroup_apply_unified_memory_limit(u, "memory.high", c->memory_high);
cgroup_apply_unified_memory_limit(u, "memory.max", max);
+ cgroup_apply_unified_memory_limit(u, "memory.swap.max", swap_max);
} else {
char buf[DECIMAL_STR_MAX(uint64_t) + 1];
- uint64_t val = c->memory_limit;
+ uint64_t val;
- if (val == CGROUP_LIMIT_MAX) {
+ if (cgroup_context_has_unified_memory_config(c)) {
val = c->memory_max;
-
- if (val != CGROUP_LIMIT_MAX)
- log_cgroup_compat(u, "Applying MemoryMax %" PRIi64 " as MemoryLimit", c->memory_max);
- }
+ log_cgroup_compat(u, "Applying MemoryMax %" PRIi64 " as MemoryLimit", val);
+ } else
+ val = c->memory_limit;
if (val == CGROUP_LIMIT_MAX)
strncpy(buf, "-1\n", sizeof(buf));
@@ -844,7 +956,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
if ((mask & CGROUP_MASK_PIDS) && !is_root) {
- if (c->tasks_max != (uint64_t) -1) {
+ if (c->tasks_max != CGROUP_LIMIT_MAX) {
char buf[DECIMAL_STR_MAX(uint64_t) + 2];
sprintf(buf, "%" PRIu64 "\n", c->tasks_max);
@@ -864,8 +976,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) {
/* Figure out which controllers we need */
if (c->cpu_accounting ||
- c->cpu_shares != CGROUP_CPU_SHARES_INVALID ||
- c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ||
+ cgroup_context_has_cpu_weight(c) ||
+ cgroup_context_has_cpu_shares(c) ||
c->cpu_quota_per_sec_usec != USEC_INFINITY)
mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU;
@@ -911,7 +1023,7 @@ CGroupMask unit_get_own_mask(Unit *u) {
e = unit_get_exec_context(u);
if (!e ||
exec_context_maintains_privileges(e) ||
- cg_unified() > 0)
+ cg_all_unified() > 0)
return _CGROUP_MASK_ALL;
}
@@ -1137,7 +1249,7 @@ int unit_watch_cgroup(Unit *u) {
return 0;
/* Only applies to the unified hierarchy */
- r = cg_unified();
+ r = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
if (r < 0)
return log_unit_error_errno(u, r, "Failed detect whether the unified hierarchy is used: %m");
if (r == 0)
@@ -1247,6 +1359,26 @@ int unit_attach_pids_to_cgroup(Unit *u) {
return 0;
}
+static void cgroup_xattr_apply(Unit *u) {
+ char ids[SD_ID128_STRING_MAX];
+ int r;
+
+ assert(u);
+
+ if (!MANAGER_IS_SYSTEM(u->manager))
+ return;
+
+ if (sd_id128_is_null(u->invocation_id))
+ return;
+
+ r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path,
+ "trusted.invocation_id",
+ sd_id128_to_string(u->invocation_id, ids), 32,
+ 0);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path);
+}
+
static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask enable_mask) {
assert(u);
@@ -1290,6 +1422,7 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
/* Finally, apply the necessary attributes. */
cgroup_context_apply(u, target_mask, state);
+ cgroup_xattr_apply(u);
return 0;
}
@@ -1416,6 +1549,8 @@ void unit_prune_cgroup(Unit *u) {
if (!u->cgroup_path)
return;
+ (void) unit_get_cpu_usage(u, NULL); /* Cache the last CPU usage value before we destroy the cgroup */
+
is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE);
r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice);
@@ -1537,7 +1672,7 @@ int unit_watch_all_pids(Unit *u) {
if (!u->cgroup_path)
return -ENOENT;
- if (cg_unified() > 0) /* On unified we can use proper notifications */
+ if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0) /* On unified we can use proper notifications */
return 0;
return unit_watch_pids_in_path(u, u->cgroup_path);
@@ -1610,7 +1745,7 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents,
int manager_setup_cgroup(Manager *m) {
_cleanup_free_ char *path = NULL;
CGroupController c;
- int r, unified;
+ int r, all_unified, systemd_unified;
char *e;
assert(m);
@@ -1647,11 +1782,17 @@ int manager_setup_cgroup(Manager *m) {
if (r < 0)
return log_error_errno(r, "Cannot find cgroup mount point: %m");
- unified = cg_unified();
- if (unified < 0)
- return log_error_errno(r, "Couldn't determine if we are running in the unified hierarchy: %m");
- if (unified > 0)
+ all_unified = cg_all_unified();
+ systemd_unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
+
+ if (all_unified < 0 || systemd_unified < 0)
+ return log_error_errno(all_unified < 0 ? all_unified : systemd_unified,
+ "Couldn't determine if we are running in the unified hierarchy: %m");
+
+ if (all_unified > 0)
log_debug("Unified cgroup hierarchy is located at %s.", path);
+ else if (systemd_unified > 0)
+ log_debug("Unified cgroup hierarchy is located at %s. Controllers are on legacy hierarchies.", path);
else
log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
@@ -1659,7 +1800,7 @@ int manager_setup_cgroup(Manager *m) {
const char *scope_path;
/* 3. Install agent */
- if (unified) {
+ if (systemd_unified) {
/* In the unified hierarchy we can get
* cgroup empty notifications via inotify. */
@@ -1719,7 +1860,7 @@ int manager_setup_cgroup(Manager *m) {
return log_error_errno(errno, "Failed to open pin file: %m");
/* 6. Always enable hierarchical support if it exists... */
- if (!unified)
+ if (!all_unified)
(void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
}
@@ -1845,7 +1986,7 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) {
if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0)
return -ENODATA;
- if (cg_unified() <= 0)
+ if (cg_all_unified() <= 0)
r = cg_get_attribute("memory", u->cgroup_path, "memory.usage_in_bytes", &v);
else
r = cg_get_attribute("memory", u->cgroup_path, "memory.current", &v);
@@ -1890,18 +2031,37 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
if (!u->cgroup_path)
return -ENODATA;
- if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0)
- return -ENODATA;
+ if (cg_all_unified() > 0) {
+ const char *keys[] = { "usage_usec", NULL };
+ _cleanup_free_ char *val = NULL;
+ uint64_t us;
- r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v);
- if (r == -ENOENT)
- return -ENODATA;
- if (r < 0)
- return r;
+ if ((u->cgroup_realized_mask & CGROUP_MASK_CPU) == 0)
+ return -ENODATA;
- r = safe_atou64(v, &ns);
- if (r < 0)
- return r;
+ r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", keys, &val);
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(val, &us);
+ if (r < 0)
+ return r;
+
+ ns = us * NSEC_PER_USEC;
+ } else {
+ if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0)
+ return -ENODATA;
+
+ r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(v, &ns);
+ if (r < 0)
+ return r;
+ }
*ret = ns;
return 0;
@@ -1911,16 +2071,33 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
nsec_t ns;
int r;
+ assert(u);
+
+ /* Retrieve the current CPU usage counter. This will subtract the CPU counter taken when the unit was
+ * started. If the cgroup has been removed already, returns the last cached value. To cache the value, simply
+ * call this function with a NULL return value. */
+
r = unit_get_cpu_usage_raw(u, &ns);
+ if (r == -ENODATA && u->cpu_usage_last != NSEC_INFINITY) {
+ /* If we can't get the CPU usage anymore (because the cgroup was already removed, for example), use our
+ * cached value. */
+
+ if (ret)
+ *ret = u->cpu_usage_last;
+ return 0;
+ }
if (r < 0)
return r;
- if (ns > u->cpuacct_usage_base)
- ns -= u->cpuacct_usage_base;
+ if (ns > u->cpu_usage_base)
+ ns -= u->cpu_usage_base;
else
ns = 0;
- *ret = ns;
+ u->cpu_usage_last = ns;
+ if (ret)
+ *ret = ns;
+
return 0;
}
@@ -1930,13 +2107,15 @@ int unit_reset_cpu_usage(Unit *u) {
assert(u);
+ u->cpu_usage_last = NSEC_INFINITY;
+
r = unit_get_cpu_usage_raw(u, &ns);
if (r < 0) {
- u->cpuacct_usage_base = 0;
+ u->cpu_usage_base = 0;
return r;
}
- u->cpuacct_usage_base = ns;
+ u->cpu_usage_base = ns;
return 0;
}
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index a57403e79f..4cd168f63e 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -89,6 +89,10 @@ struct CGroupContext {
bool tasks_accounting;
/* For unified hierarchy */
+ uint64_t cpu_weight;
+ uint64_t startup_cpu_weight;
+ usec_t cpu_quota_per_sec_usec;
+
uint64_t io_weight;
uint64_t startup_io_weight;
LIST_HEAD(CGroupIODeviceWeight, io_device_weights);
@@ -97,11 +101,11 @@ struct CGroupContext {
uint64_t memory_low;
uint64_t memory_high;
uint64_t memory_max;
+ uint64_t memory_swap_max;
/* For legacy hierarchies */
uint64_t cpu_shares;
uint64_t startup_cpu_shares;
- usec_t cpu_quota_per_sec_usec;
uint64_t blockio_weight;
uint64_t startup_blockio_weight;
diff --git a/src/core/dbus-automount.c b/src/core/dbus-automount.c
index b2806ad86f..26212b3a95 100644
--- a/src/core/dbus-automount.c
+++ b/src/core/dbus-automount.c
@@ -32,3 +32,57 @@ const sd_bus_vtable bus_automount_vtable[] = {
SD_BUS_PROPERTY("TimeoutIdleUSec", "t", bus_property_get_usec, offsetof(Automount, timeout_idle_usec), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_VTABLE_END
};
+
+static int bus_automount_set_transient_property(
+ Automount *a,
+ const char *name,
+ sd_bus_message *message,
+ UnitSetPropertiesMode mode,
+ sd_bus_error *error) {
+
+ int r;
+
+ assert(a);
+ assert(name);
+ assert(message);
+
+ if (streq(name, "TimeoutIdleUSec")) {
+ usec_t timeout_idle_usec;
+ r = sd_bus_message_read(message, "t", &timeout_idle_usec);
+ if (r < 0)
+ return r;
+
+ if (mode != UNIT_CHECK) {
+ char time[FORMAT_TIMESPAN_MAX];
+
+ a->timeout_idle_usec = timeout_idle_usec;
+ unit_write_drop_in_format(UNIT(a), mode, name, "[Automount]\nTimeoutIdleSec=%s\n",
+ format_timespan(time, sizeof(time), timeout_idle_usec, USEC_PER_MSEC));
+ }
+ } else
+ return 0;
+
+ return 1;
+}
+
+int bus_automount_set_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitSetPropertiesMode mode,
+ sd_bus_error *error) {
+
+ Automount *a = AUTOMOUNT(u);
+ int r = 0;
+
+ assert(a);
+ assert(name);
+ assert(message);
+
+ if (u->transient && u->load_state == UNIT_STUB)
+ /* This is a transient unit, let's load a little more */
+
+ r = bus_automount_set_transient_property(a, name, message, mode, error);
+
+ return r;
+}
diff --git a/src/core/dbus-automount.h b/src/core/dbus-automount.h
index 7b51eb973a..f41adda2a6 100644
--- a/src/core/dbus-automount.h
+++ b/src/core/dbus-automount.h
@@ -21,3 +21,5 @@
extern const sd_bus_vtable bus_automount_vtable[];
+
+int bus_automount_set_property(Unit *u, const char *name, sd_bus_message *message, UnitSetPropertiesMode mode, sd_bus_error *error);
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index 85b0c86a2f..c4067a95bf 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -210,6 +210,8 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_VTABLE_START(0),
SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0),
+ SD_BUS_PROPERTY("CPUWeight", "t", NULL, offsetof(CGroupContext, cpu_weight), 0),
+ SD_BUS_PROPERTY("StartupCPUWeight", "t", NULL, offsetof(CGroupContext, startup_cpu_weight), 0),
SD_BUS_PROPERTY("CPUShares", "t", NULL, offsetof(CGroupContext, cpu_shares), 0),
SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0),
SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0),
@@ -231,6 +233,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("MemoryLow", "t", NULL, offsetof(CGroupContext, memory_low), 0),
SD_BUS_PROPERTY("MemoryHigh", "t", NULL, offsetof(CGroupContext, memory_high), 0),
SD_BUS_PROPERTY("MemoryMax", "t", NULL, offsetof(CGroupContext, memory_max), 0),
+ SD_BUS_PROPERTY("MemorySwapMax", "t", NULL, offsetof(CGroupContext, memory_swap_max), 0),
SD_BUS_PROPERTY("MemoryLimit", "t", NULL, offsetof(CGroupContext, memory_limit), 0),
SD_BUS_PROPERTY("DevicePolicy", "s", property_get_cgroup_device_policy, offsetof(CGroupContext, device_policy), 0),
SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0),
@@ -303,6 +306,50 @@ int bus_cgroup_set_property(
return 1;
+ } else if (streq(name, "CPUWeight")) {
+ uint64_t weight;
+
+ r = sd_bus_message_read(message, "t", &weight);
+ if (r < 0)
+ return r;
+
+ if (!CGROUP_WEIGHT_IS_OK(weight))
+ return sd_bus_error_set_errnof(error, EINVAL, "CPUWeight value out of range");
+
+ if (mode != UNIT_CHECK) {
+ c->cpu_weight = weight;
+ unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
+
+ if (weight == CGROUP_WEIGHT_INVALID)
+ unit_write_drop_in_private(u, mode, name, "CPUWeight=");
+ else
+ unit_write_drop_in_private_format(u, mode, name, "CPUWeight=%" PRIu64, weight);
+ }
+
+ return 1;
+
+ } else if (streq(name, "StartupCPUWeight")) {
+ uint64_t weight;
+
+ r = sd_bus_message_read(message, "t", &weight);
+ if (r < 0)
+ return r;
+
+ if (!CGROUP_WEIGHT_IS_OK(weight))
+ return sd_bus_error_set_errnof(error, EINVAL, "StartupCPUWeight value out of range");
+
+ if (mode != UNIT_CHECK) {
+ c->startup_cpu_weight = weight;
+ unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
+
+ if (weight == CGROUP_CPU_SHARES_INVALID)
+ unit_write_drop_in_private(u, mode, name, "StartupCPUWeight=");
+ else
+ unit_write_drop_in_private_format(u, mode, name, "StartupCPUWeight=%" PRIu64, weight);
+ }
+
+ return 1;
+
} else if (streq(name, "CPUShares")) {
uint64_t shares;
@@ -829,7 +876,7 @@ int bus_cgroup_set_property(
return 1;
- } else if (STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax")) {
+ } else if (STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax", "MemorySwapMax")) {
uint64_t v;
r = sd_bus_message_read(message, "t", &v);
@@ -843,6 +890,8 @@ int bus_cgroup_set_property(
c->memory_low = v;
else if (streq(name, "MemoryHigh"))
c->memory_high = v;
+ else if (streq(name, "MemorySwapMax"))
+ c->memory_swap_max = v;
else
c->memory_max = v;
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index 307c3d8e7a..1a7f770db1 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -44,6 +44,7 @@
#endif
#include "strv.h"
#include "syslog-util.h"
+#include "user-util.h"
#include "utf8.h"
BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_exec_output, exec_output, ExecOutput);
@@ -626,6 +627,53 @@ static int property_get_syslog_facility(
return sd_bus_message_append(reply, "i", LOG_FAC(c->syslog_priority));
}
+static int property_get_input_fdname(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ const char *name;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ name = exec_context_fdname(c, STDIN_FILENO);
+
+ return sd_bus_message_append(reply, "s", name);
+}
+
+static int property_get_output_fdname(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ const char *name = NULL;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ if (c->std_output == EXEC_OUTPUT_NAMED_FD && streq(property, "StandardOutputFileDescriptorName"))
+ name = exec_context_fdname(c, STDOUT_FILENO);
+ else if (c->std_error == EXEC_OUTPUT_NAMED_FD && streq(property, "StandardErrorFileDescriptorName"))
+ name = exec_context_fdname(c, STDERR_FILENO);
+
+ return sd_bus_message_append(reply, "s", name);
+}
+
const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_VTABLE_START(0),
SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(ExecContext, environment), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -676,8 +724,11 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("CPUSchedulingResetOnFork", "b", bus_property_get_bool, offsetof(ExecContext, cpu_sched_reset_on_fork), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("NonBlocking", "b", bus_property_get_bool, offsetof(ExecContext, non_blocking), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("StandardInput", "s", property_get_exec_input, offsetof(ExecContext, std_input), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StandardInputFileDescriptorName", "s", property_get_input_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("StandardOutput", "s", bus_property_get_exec_output, offsetof(ExecContext, std_output), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StandardOutputFileDescriptorName", "s", property_get_output_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("StandardError", "s", bus_property_get_exec_output, offsetof(ExecContext, std_error), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StandardErrorFileDescriptorName", "s", property_get_output_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("TTYPath", "s", NULL, offsetof(ExecContext, tty_path), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("TTYReset", "b", bus_property_get_bool, offsetof(ExecContext, tty_reset), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("TTYVHangup", "b", bus_property_get_bool, offsetof(ExecContext, tty_vhangup), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -693,6 +744,8 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("AmbientCapabilities", "t", property_get_ambient_capabilities, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("User", "s", NULL, offsetof(ExecContext, user), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Group", "s", NULL, offsetof(ExecContext, group), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DynamicUser", "b", bus_property_get_bool, offsetof(ExecContext, dynamic_user), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RemoveIPC", "b", bus_property_get_bool, offsetof(ExecContext, remove_ipc), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SupplementaryGroups", "as", NULL, offsetof(ExecContext, supplementary_groups), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PAMName", "s", NULL, offsetof(ExecContext, pam_name), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ReadWriteDirectories", "as", NULL, offsetof(ExecContext, read_write_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
@@ -703,8 +756,12 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("InaccessiblePaths", "as", NULL, offsetof(ExecContext, inaccessible_paths), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_flags), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
- SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectKernelTunables", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_tunables), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectKernelModules", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_modules), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectHome", "s", bus_property_get_protect_home, offsetof(ExecContext, protect_home), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectSystem", "s", bus_property_get_protect_system, offsetof(ExecContext, protect_system), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SameProcessGroup", "b", bus_property_get_bool, offsetof(ExecContext, same_pgrp), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -840,6 +897,9 @@ int bus_exec_context_set_transient_property(
if (r < 0)
return r;
+ if (!isempty(uu) && !valid_user_group_name_or_id(uu))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid user name: %s", uu);
+
if (mode != UNIT_CHECK) {
if (isempty(uu))
@@ -859,6 +919,9 @@ int bus_exec_context_set_transient_property(
if (r < 0)
return r;
+ if (!isempty(gg) && !valid_user_group_name_or_id(gg))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid group name: %s", gg);
+
if (mode != UNIT_CHECK) {
if (isempty(gg))
@@ -927,7 +990,7 @@ int bus_exec_context_set_transient_property(
if (r < 0)
return r;
- if (n < PRIO_MIN || n >= PRIO_MAX)
+ if (!nice_is_valid(n))
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Nice value out of range");
if (mode != UNIT_CHECK) {
@@ -1017,7 +1080,6 @@ int bus_exec_context_set_transient_property(
return 1;
-
} else if (streq(name, "StandardOutput")) {
const char *s;
ExecOutput p;
@@ -1059,9 +1121,46 @@ int bus_exec_context_set_transient_property(
return 1;
} else if (STR_IN_SET(name,
+ "StandardInputFileDescriptorName", "StandardOutputFileDescriptorName", "StandardErrorFileDescriptorName")) {
+ const char *s;
+
+ r = sd_bus_message_read(message, "s", &s);
+ if (r < 0)
+ return r;
+
+ if (!fdname_is_valid(s))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid file descriptor name");
+
+ if (mode != UNIT_CHECK) {
+ if (streq(name, "StandardInputFileDescriptorName")) {
+ c->std_input = EXEC_INPUT_NAMED_FD;
+ r = free_and_strdup(&c->stdio_fdname[STDIN_FILENO], s);
+ if (r < 0)
+ return r;
+ unit_write_drop_in_private_format(u, mode, name, "StandardInput=fd:%s", s);
+ } else if (streq(name, "StandardOutputFileDescriptorName")) {
+ c->std_output = EXEC_OUTPUT_NAMED_FD;
+ r = free_and_strdup(&c->stdio_fdname[STDOUT_FILENO], s);
+ if (r < 0)
+ return r;
+ unit_write_drop_in_private_format(u, mode, name, "StandardOutput=fd:%s", s);
+ } else if (streq(name, "StandardErrorFileDescriptorName")) {
+ c->std_error = EXEC_OUTPUT_NAMED_FD;
+ r = free_and_strdup(&c->stdio_fdname[STDERR_FILENO], s);
+ if (r < 0)
+ return r;
+ unit_write_drop_in_private_format(u, mode, name, "StandardError=fd:%s", s);
+ }
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name,
"IgnoreSIGPIPE", "TTYVHangup", "TTYReset",
- "PrivateTmp", "PrivateDevices", "PrivateNetwork",
- "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute", "RestrictRealtime")) {
+ "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
+ "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute",
+ "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
+ "ProtectKernelModules", "ProtectControlGroups")) {
int b;
r = sd_bus_message_read(message, "b", &b);
@@ -1081,6 +1180,8 @@ int bus_exec_context_set_transient_property(
c->private_devices = b;
else if (streq(name, "PrivateNetwork"))
c->private_network = b;
+ else if (streq(name, "PrivateUsers"))
+ c->private_users = b;
else if (streq(name, "NoNewPrivileges"))
c->no_new_privileges = b;
else if (streq(name, "SyslogLevelPrefix"))
@@ -1089,6 +1190,16 @@ int bus_exec_context_set_transient_property(
c->memory_deny_write_execute = b;
else if (streq(name, "RestrictRealtime"))
c->restrict_realtime = b;
+ else if (streq(name, "DynamicUser"))
+ c->dynamic_user = b;
+ else if (streq(name, "RemoveIPC"))
+ c->remove_ipc = b;
+ else if (streq(name, "ProtectKernelTunables"))
+ c->protect_kernel_tunables = b;
+ else if (streq(name, "ProtectKernelModules"))
+ c->protect_kernel_modules = b;
+ else if (streq(name, "ProtectControlGroups"))
+ c->protect_control_groups = b;
unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, yes_no(b));
}
diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c
index d05968bd65..d7d3d3c8ce 100644
--- a/src/core/dbus-manager.c
+++ b/src/core/dbus-manager.c
@@ -43,9 +43,15 @@
#include "string-util.h"
#include "strv.h"
#include "syslog-util.h"
+#include "user-util.h"
#include "virt.h"
#include "watchdog.h"
+static UnitFileFlags unit_file_bools_to_flags(bool runtime, bool force) {
+ return (runtime ? UNIT_FILE_RUNTIME : 0) |
+ (force ? UNIT_FILE_FORCE : 0);
+}
+
static int property_get_version(
sd_bus *bus,
const char *path,
@@ -463,6 +469,64 @@ static int method_get_unit_by_pid(sd_bus_message *message, void *userdata, sd_bu
return sd_bus_reply_method_return(message, "o", path);
}
+static int method_get_unit_by_invocation_id(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *path = NULL;
+ Manager *m = userdata;
+ sd_id128_t id;
+ const void *a;
+ Unit *u;
+ size_t sz;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = sd_bus_message_read_array(message, 'y', &a, &sz);
+ if (r < 0)
+ return r;
+ if (sz == 0)
+ id = SD_ID128_NULL;
+ else if (sz == 16)
+ memcpy(&id, a, sz);
+ else
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid invocation ID");
+
+ if (sd_id128_is_null(id)) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ pid_t pid;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return r;
+
+ u = manager_get_unit_by_pid(m, pid);
+ if (!u)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Client " PID_FMT " not member of any unit.", pid);
+ } else {
+ u = hashmap_get(m->units_by_invocation_id, &id);
+ if (!u)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID, "No unit with the specified invocation ID " SD_ID128_FORMAT_STR " known.", SD_ID128_FORMAT_VAL(id));
+ }
+
+ r = mac_selinux_unit_access_check(u, message, "status", error);
+ if (r < 0)
+ return r;
+
+ /* So here's a special trick: the bus path we return actually references the unit by its invocation ID instead
+ * of the unit name. This means it stays valid only as long as the invocation ID stays the same. */
+ path = unit_dbus_path_invocation_id(u);
+ if (!path)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", path);
+}
+
static int method_load_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
_cleanup_free_ char *path = NULL;
Manager *m = userdata;
@@ -642,6 +706,54 @@ static int method_set_unit_properties(sd_bus_message *message, void *userdata, s
return bus_unit_method_set_properties(message, u, error);
}
+static int method_ref_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_load_unit(m, name, NULL, error, &u);
+ if (r < 0)
+ return r;
+
+ r = bus_unit_check_load_state(u, error);
+ if (r < 0)
+ return r;
+
+ return bus_unit_method_ref(message, u, error);
+}
+
+static int method_unref_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_load_unit(m, name, NULL, error, &u);
+ if (r < 0)
+ return r;
+
+ r = bus_unit_check_load_state(u, error);
+ if (r < 0)
+ return r;
+
+ return bus_unit_method_unref(message, u, error);
+}
+
static int reply_unit_info(sd_bus_message *reply, Unit *u) {
_cleanup_free_ char *unit_path = NULL, *job_path = NULL;
Unit *following;
@@ -780,6 +892,13 @@ static int transient_unit_from_message(
if (r < 0)
return r;
+ /* If the client asked for it, automatically add a reference to this unit. */
+ if (u->bus_track_add) {
+ r = bus_unit_track_add_sender(u, message);
+ if (r < 0)
+ return log_error_errno(r, "Failed to watch sender: %m");
+ }
+
/* Now load the missing bits of the unit we just created */
unit_add_to_load_queue(u);
manager_dispatch_load_queue(m);
@@ -1511,8 +1630,8 @@ static int method_unset_and_set_environment(sd_bus_message *message, void *userd
}
static int method_set_exit_code(sd_bus_message *message, void *userdata, sd_bus_error *error) {
- uint8_t code;
Manager *m = userdata;
+ uint8_t code;
int r;
assert(message);
@@ -1534,6 +1653,61 @@ static int method_set_exit_code(sd_bus_message *message, void *userdata, sd_bus_
return sd_bus_reply_method_return(message, NULL);
}
+static int method_lookup_dynamic_user_by_name(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ uid_t uid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read_basic(message, 's', &name);
+ if (r < 0)
+ return r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Dynamic users are only supported in the system instance.");
+ if (!valid_user_group_name(name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "User name invalid: %s", name);
+
+ r = dynamic_user_lookup_name(m, name, &uid);
+ if (r == -ESRCH)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_DYNAMIC_USER, "Dynamic user %s does not exist.", name);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "u", (uint32_t) uid);
+}
+
+static int method_lookup_dynamic_user_by_uid(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *name = NULL;
+ Manager *m = userdata;
+ uid_t uid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(uid) == sizeof(uint32_t));
+ r = sd_bus_message_read_basic(message, 'u', &uid);
+ if (r < 0)
+ return r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Dynamic users are only supported in the system instance.");
+ if (!uid_is_valid(uid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "User ID invalid: " UID_FMT, uid);
+
+ r = dynamic_user_lookup_uid(m, uid, &name);
+ if (r == -ESRCH)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_DYNAMIC_USER, "Dynamic user ID " UID_FMT " does not exist.", uid);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "s", name);
+}
+
static int list_unit_files_by_patterns(sd_bus_message *message, void *userdata, sd_bus_error *error, char **states, char **patterns) {
_cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
Manager *m = userdata;
@@ -1779,13 +1953,14 @@ static int install_error(
static int method_enable_unit_files_generic(
sd_bus_message *message,
Manager *m,
- int (*call)(UnitFileScope scope, bool runtime, const char *root_dir, char *files[], bool force, UnitFileChange **changes, unsigned *n_changes),
+ int (*call)(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char *files[], UnitFileChange **changes, unsigned *n_changes),
bool carries_install_info,
sd_bus_error *error) {
_cleanup_strv_free_ char **l = NULL;
UnitFileChange *changes = NULL;
unsigned n_changes = 0;
+ UnitFileFlags flags;
int runtime, force, r;
assert(message);
@@ -1799,13 +1974,15 @@ static int method_enable_unit_files_generic(
if (r < 0)
return r;
+ flags = unit_file_bools_to_flags(runtime, force);
+
r = bus_verify_manage_unit_files_async(m, message, error);
if (r < 0)
return r;
if (r == 0)
return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
- r = call(m->unit_file_scope, runtime, NULL, l, force, &changes, &n_changes);
+ r = call(m->unit_file_scope, flags, NULL, l, &changes, &n_changes);
if (r < 0)
return install_error(error, r, changes, n_changes);
@@ -1824,8 +2001,8 @@ static int method_link_unit_files(sd_bus_message *message, void *userdata, sd_bu
return method_enable_unit_files_generic(message, userdata, unit_file_link, false, error);
}
-static int unit_file_preset_without_mode(UnitFileScope scope, bool runtime, const char *root_dir, char **files, bool force, UnitFileChange **changes, unsigned *n_changes) {
- return unit_file_preset(scope, runtime, root_dir, files, UNIT_FILE_PRESET_FULL, force, changes, n_changes);
+static int unit_file_preset_without_mode(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char **files, UnitFileChange **changes, unsigned *n_changes) {
+ return unit_file_preset(scope, flags, root_dir, files, UNIT_FILE_PRESET_FULL, changes, n_changes);
}
static int method_preset_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
@@ -1844,6 +2021,7 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use
Manager *m = userdata;
UnitFilePresetMode mm;
int runtime, force, r;
+ UnitFileFlags flags;
const char *mode;
assert(message);
@@ -1857,6 +2035,8 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use
if (r < 0)
return r;
+ flags = unit_file_bools_to_flags(runtime, force);
+
if (isempty(mode))
mm = UNIT_FILE_PRESET_FULL;
else {
@@ -1871,7 +2051,7 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use
if (r == 0)
return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
- r = unit_file_preset(m->unit_file_scope, runtime, NULL, l, mm, force, &changes, &n_changes);
+ r = unit_file_preset(m->unit_file_scope, flags, NULL, l, mm, &changes, &n_changes);
if (r < 0)
return install_error(error, r, changes, n_changes);
@@ -1881,7 +2061,7 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use
static int method_disable_unit_files_generic(
sd_bus_message *message,
Manager *m,
- int (*call)(UnitFileScope scope, bool runtime, const char *root_dir, char *files[], UnitFileChange **changes, unsigned *n_changes),
+ int (*call)(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char *files[], UnitFileChange **changes, unsigned *n_changes),
sd_bus_error *error) {
_cleanup_strv_free_ char **l = NULL;
@@ -1906,7 +2086,7 @@ static int method_disable_unit_files_generic(
if (r == 0)
return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
- r = call(m->unit_file_scope, runtime, NULL, l, &changes, &n_changes);
+ r = call(m->unit_file_scope, runtime ? UNIT_FILE_RUNTIME : 0, NULL, l, &changes, &n_changes);
if (r < 0)
return install_error(error, r, changes, n_changes);
@@ -1972,7 +2152,7 @@ static int method_set_default_target(sd_bus_message *message, void *userdata, sd
if (r == 0)
return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
- r = unit_file_set_default(m->unit_file_scope, NULL, name, force, &changes, &n_changes);
+ r = unit_file_set_default(m->unit_file_scope, force ? UNIT_FILE_FORCE : 0, NULL, name, &changes, &n_changes);
if (r < 0)
return install_error(error, r, changes, n_changes);
@@ -1985,6 +2165,7 @@ static int method_preset_all_unit_files(sd_bus_message *message, void *userdata,
Manager *m = userdata;
UnitFilePresetMode mm;
const char *mode;
+ UnitFileFlags flags;
int force, runtime, r;
assert(message);
@@ -1998,6 +2179,8 @@ static int method_preset_all_unit_files(sd_bus_message *message, void *userdata,
if (r < 0)
return r;
+ flags = unit_file_bools_to_flags(runtime, force);
+
if (isempty(mode))
mm = UNIT_FILE_PRESET_FULL;
else {
@@ -2012,7 +2195,7 @@ static int method_preset_all_unit_files(sd_bus_message *message, void *userdata,
if (r == 0)
return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
- r = unit_file_preset_all(m->unit_file_scope, runtime, NULL, mm, force, &changes, &n_changes);
+ r = unit_file_preset_all(m->unit_file_scope, flags, NULL, mm, &changes, &n_changes);
if (r < 0)
return install_error(error, r, changes, n_changes);
@@ -2027,6 +2210,7 @@ static int method_add_dependency_unit_files(sd_bus_message *message, void *userd
int runtime, force, r;
char *target, *type;
UnitDependency dep;
+ UnitFileFlags flags;
assert(message);
assert(m);
@@ -2045,17 +2229,62 @@ static int method_add_dependency_unit_files(sd_bus_message *message, void *userd
if (r < 0)
return r;
+ flags = unit_file_bools_to_flags(runtime, force);
+
dep = unit_dependency_from_string(type);
if (dep < 0)
return -EINVAL;
- r = unit_file_add_dependency(m->unit_file_scope, runtime, NULL, l, target, dep, force, &changes, &n_changes);
+ r = unit_file_add_dependency(m->unit_file_scope, flags, NULL, l, target, dep, &changes, &n_changes);
if (r < 0)
return install_error(error, r, changes, n_changes);
return reply_unit_file_changes_and_free(m, message, -1, changes, n_changes);
}
+static int method_get_unit_file_links(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ UnitFileChange *changes = NULL;
+ unsigned n_changes = 0, i;
+ UnitFileFlags flags;
+ const char *name;
+ char **p;
+ int runtime, r;
+
+ r = sd_bus_message_read(message, "sb", &name, &runtime);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, SD_BUS_TYPE_ARRAY, "s");
+ if (r < 0)
+ return r;
+
+ p = STRV_MAKE(name);
+ flags = UNIT_FILE_DRY_RUN |
+ (runtime ? UNIT_FILE_RUNTIME : 0);
+
+ r = unit_file_disable(UNIT_FILE_SYSTEM, flags, NULL, p, &changes, &n_changes);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get file links for %s: %m", name);
+
+ for (i = 0; i < n_changes; i++)
+ if (changes[i].type == UNIT_FILE_UNLINK) {
+ r = sd_bus_message_append(reply, "s", changes[i].path);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
const sd_bus_vtable bus_manager_vtable[] = {
SD_BUS_VTABLE_START(0),
@@ -2143,6 +2372,7 @@ const sd_bus_vtable bus_manager_vtable[] = {
SD_BUS_METHOD("GetUnit", "s", "o", method_get_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("GetUnitByPID", "u", "o", method_get_unit_by_pid, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetUnitByInvocationID", "ay", "o", method_get_unit_by_invocation_id, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("LoadUnit", "s", "o", method_load_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("StartUnit", "ss", "o", method_start_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("StartUnitReplace", "sss", "o", method_start_unit_replace, SD_BUS_VTABLE_UNPRIVILEGED),
@@ -2155,6 +2385,8 @@ const sd_bus_vtable bus_manager_vtable[] = {
SD_BUS_METHOD("KillUnit", "ssi", NULL, method_kill_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("ResetFailedUnit", "s", NULL, method_reset_failed_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("SetUnitProperties", "sba(sv)", NULL, method_set_unit_properties, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("RefUnit", "s", NULL, method_ref_unit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("UnrefUnit", "s", NULL, method_unref_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("StartTransientUnit", "ssa(sv)a(sa(sv))", "o", method_start_transient_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("GetUnitProcesses", "s", "a(sus)", method_get_unit_processes, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("GetJob", "u", "o", method_get_job, SD_BUS_VTABLE_UNPRIVILEGED),
@@ -2198,7 +2430,10 @@ const sd_bus_vtable bus_manager_vtable[] = {
SD_BUS_METHOD("GetDefaultTarget", NULL, "s", method_get_default_target, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("PresetAllUnitFiles", "sbb", "a(sss)", method_preset_all_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("AddDependencyUnitFiles", "asssbb", "a(sss)", method_add_dependency_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetUnitFileLinks", "sb", "as", method_get_unit_file_links, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("SetExitCode", "y", NULL, method_set_exit_code, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("LookupDynamicUserByName", "s", "u", method_lookup_dynamic_user_by_name, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("LookupDynamicUserByUID", "u", "s", method_lookup_dynamic_user_by_uid, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_SIGNAL("UnitNew", "so", 0),
SD_BUS_SIGNAL("UnitRemoved", "so", 0),
diff --git a/src/core/dbus-mount.c b/src/core/dbus-mount.c
index 935db7c48b..76a7a7ce97 100644
--- a/src/core/dbus-mount.c
+++ b/src/core/dbus-mount.c
@@ -116,7 +116,11 @@ const sd_bus_vtable bus_mount_vtable[] = {
SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Mount, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("DirectoryMode", "u", bus_property_get_mode, offsetof(Mount, directory_mode), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SloppyOptions", "b", bus_property_get_bool, offsetof(Mount, sloppy_options), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LazyUnmount", "b", bus_property_get_bool, offsetof(Mount, lazy_unmount), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ForceUnmount", "b", bus_property_get_bool, offsetof(Mount, force_unmount), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Mount, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
BUS_EXEC_COMMAND_VTABLE("ExecMount", offsetof(Mount, exec_command[MOUNT_EXEC_MOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
BUS_EXEC_COMMAND_VTABLE("ExecUnmount", offsetof(Mount, exec_command[MOUNT_EXEC_UNMOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
BUS_EXEC_COMMAND_VTABLE("ExecRemount", offsetof(Mount, exec_command[MOUNT_EXEC_REMOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
@@ -157,6 +161,9 @@ static int bus_mount_set_transient_property(
if (!p)
return -ENOMEM;
+ unit_write_drop_in_format(UNIT(m), mode, name, "[Mount]\n%s=%s\n",
+ name, new_property);
+
free(*property);
*property = p;
}
diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c
index fab3677a01..61b83d2d62 100644
--- a/src/core/dbus-service.c
+++ b/src/core/dbus-service.c
@@ -36,7 +36,7 @@ static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, service_type, ServiceType
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, service_result, ServiceResult);
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_restart, service_restart, ServiceRestart);
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_notify_access, notify_access, NotifyAccess);
-static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_failure_action, failure_action, FailureAction);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_emergency_action, emergency_action, EmergencyAction);
const sd_bus_vtable bus_service_vtable[] = {
SD_BUS_VTABLE_START(0),
@@ -50,12 +50,7 @@ const sd_bus_vtable bus_service_vtable[] = {
SD_BUS_PROPERTY("RuntimeMaxUSec", "t", bus_property_get_usec, offsetof(Service, runtime_max_usec), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("WatchdogUSec", "t", bus_property_get_usec, offsetof(Service, watchdog_usec), SD_BUS_VTABLE_PROPERTY_CONST),
BUS_PROPERTY_DUAL_TIMESTAMP("WatchdogTimestamp", offsetof(Service, watchdog_timestamp), 0),
- /* The following four are obsolete, and thus marked hidden here. They moved into the Unit interface */
- SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
- SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
- SD_BUS_PROPERTY("StartLimitAction", "s", property_get_failure_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
- SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
- SD_BUS_PROPERTY("FailureAction", "s", property_get_failure_action, offsetof(Service, failure_action), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FailureAction", "s", property_get_emergency_action, offsetof(Service, emergency_action), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PermissionsStartOnly", "b", bus_property_get_bool, offsetof(Service, permissions_start_only), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RootDirectoryStartOnly", "b", bus_property_get_bool, offsetof(Service, root_directory_start_only), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RemainAfterExit", "b", bus_property_get_bool, offsetof(Service, remain_after_exit), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -70,6 +65,9 @@ const sd_bus_vtable bus_service_vtable[] = {
SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Service, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("USBFunctionDescriptors", "s", NULL, offsetof(Service, usb_function_descriptors), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("USBFunctionStrings", "s", NULL, offsetof(Service, usb_function_strings), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+
BUS_EXEC_STATUS_VTABLE("ExecMain", offsetof(Service, main_exec_status), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
BUS_EXEC_COMMAND_LIST_VTABLE("ExecStart", offsetof(Service, exec_command[SERVICE_EXEC_START]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
@@ -77,6 +75,12 @@ const sd_bus_vtable bus_service_vtable[] = {
BUS_EXEC_COMMAND_LIST_VTABLE("ExecReload", offsetof(Service, exec_command[SERVICE_EXEC_RELOAD]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
BUS_EXEC_COMMAND_LIST_VTABLE("ExecStop", offsetof(Service, exec_command[SERVICE_EXEC_STOP]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
BUS_EXEC_COMMAND_LIST_VTABLE("ExecStopPost", offsetof(Service, exec_command[SERVICE_EXEC_STOP_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+
+ /* The following four are obsolete, and thus marked hidden here. They moved into the Unit interface */
+ SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("StartLimitAction", "s", property_get_emergency_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
SD_BUS_VTABLE_END
};
diff --git a/src/core/dbus-socket.c b/src/core/dbus-socket.c
index 961340608d..21adb64e15 100644
--- a/src/core/dbus-socket.c
+++ b/src/core/dbus-socket.c
@@ -137,6 +137,7 @@ const sd_bus_vtable bus_socket_vtable[] = {
SD_BUS_PROPERTY("Symlinks", "as", NULL, offsetof(Socket, symlinks), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Mark", "i", bus_property_get_int, offsetof(Socket, mark), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MaxConnections", "u", bus_property_get_unsigned, offsetof(Socket, max_connections), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MaxConnectionsPerSource", "u", bus_property_get_unsigned, offsetof(Socket, max_connections_per_source), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MessageQueueMaxMessages", "x", bus_property_get_long, offsetof(Socket, mq_maxmsg), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MessageQueueMessageSize", "x", bus_property_get_long, offsetof(Socket, mq_msgsize), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ReusePort", "b", bus_property_get_bool, offsetof(Socket, reuse_port), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -151,6 +152,8 @@ const sd_bus_vtable bus_socket_vtable[] = {
SD_BUS_PROPERTY("SocketProtocol", "i", bus_property_get_int, offsetof(Socket, socket_protocol), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("TriggerLimitIntervalUSec", "t", bus_property_get_usec, offsetof(Socket, trigger_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("TriggerLimitBurst", "u", bus_property_get_unsigned, offsetof(Socket, trigger_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Socket, exec_command[SOCKET_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPost", offsetof(Socket, exec_command[SOCKET_EXEC_START_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
BUS_EXEC_COMMAND_LIST_VTABLE("ExecStopPre", offsetof(Socket, exec_command[SOCKET_EXEC_STOP_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
diff --git a/src/core/dbus-swap.c b/src/core/dbus-swap.c
index 292f8738c6..85a2c26b98 100644
--- a/src/core/dbus-swap.c
+++ b/src/core/dbus-swap.c
@@ -84,6 +84,8 @@ const sd_bus_vtable bus_swap_vtable[] = {
SD_BUS_PROPERTY("TimeoutUSec", "t", bus_property_get_usec, offsetof(Swap, timeout_usec), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Swap, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Swap, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
BUS_EXEC_COMMAND_VTABLE("ExecActivate", offsetof(Swap, exec_command[SWAP_EXEC_ACTIVATE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
BUS_EXEC_COMMAND_VTABLE("ExecDeactivate", offsetof(Swap, exec_command[SWAP_EXEC_DEACTIVATE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
SD_BUS_VTABLE_END
diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c
index b55d2cf735..69e249c844 100644
--- a/src/core/dbus-unit.c
+++ b/src/core/dbus-unit.c
@@ -37,7 +37,7 @@
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_load_state, unit_load_state, UnitLoadState);
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_job_mode, job_mode, JobMode);
-static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_failure_action, failure_action, FailureAction);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_emergency_action, emergency_action, EmergencyAction);
static int property_get_names(
sd_bus *bus,
@@ -263,10 +263,7 @@ static int property_get_can_stop(
assert(reply);
assert(u);
- /* On the lower levels we assume that every unit we can start
- * we can also stop */
-
- return sd_bus_message_append(reply, "b", unit_can_start(u) && !u->refuse_manual_stop);
+ return sd_bus_message_append(reply, "b", unit_can_stop(u) && !u->refuse_manual_stop);
}
static int property_get_can_reload(
@@ -418,6 +415,7 @@ static int bus_verify_manage_units_async_full(
const char *verb,
int capability,
const char *polkit_message,
+ bool interactive,
sd_bus_message *call,
sd_bus_error *error) {
@@ -433,7 +431,15 @@ static int bus_verify_manage_units_async_full(
details[7] = GETTEXT_PACKAGE;
}
- return bus_verify_polkit_async(call, capability, "org.freedesktop.systemd1.manage-units", details, false, UID_INVALID, &u->manager->polkit_registry, error);
+ return bus_verify_polkit_async(
+ call,
+ capability,
+ "org.freedesktop.systemd1.manage-units",
+ details,
+ interactive,
+ UID_INVALID,
+ &u->manager->polkit_registry,
+ error);
}
int bus_unit_method_start_generic(
@@ -486,6 +492,7 @@ int bus_unit_method_start_generic(
verb,
CAP_SYS_ADMIN,
job_type < _JOB_TYPE_MAX ? polkit_message_for_job[job_type] : NULL,
+ true,
message,
error);
if (r < 0)
@@ -558,6 +565,7 @@ int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *
"kill",
CAP_KILL,
N_("Authentication is required to kill '$(unit)'."),
+ true,
message,
error);
if (r < 0)
@@ -588,6 +596,7 @@ int bus_unit_method_reset_failed(sd_bus_message *message, void *userdata, sd_bus
"reset-failed",
CAP_SYS_ADMIN,
N_("Authentication is required to reset the \"failed\" state of '$(unit)'."),
+ true,
message,
error);
if (r < 0)
@@ -620,6 +629,7 @@ int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_b
"set-property",
CAP_SYS_ADMIN,
N_("Authentication is required to set properties on '$(unit)'."),
+ true,
message,
error);
if (r < 0)
@@ -634,6 +644,53 @@ int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_b
return sd_bus_reply_method_return(message, NULL);
}
+int bus_unit_method_ref(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Unit *u = userdata;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ r = mac_selinux_unit_access_check(u, message, "start", error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_units_async_full(
+ u,
+ "ref",
+ CAP_SYS_ADMIN,
+ NULL,
+ false,
+ message,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = bus_unit_track_add_sender(u, message);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_unit_method_unref(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Unit *u = userdata;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ r = bus_unit_track_remove_sender(u, message);
+ if (r == -EUNATCH)
+ return sd_bus_error_setf(error, BUS_ERROR_NOT_REFERENCED, "Unit has not been referenced yet.");
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
const sd_bus_vtable bus_unit_vtable[] = {
SD_BUS_VTABLE_START(0),
@@ -660,10 +717,6 @@ const sd_bus_vtable bus_unit_vtable[] = {
SD_BUS_PROPERTY("PropagatesReloadTo", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_PROPAGATES_RELOAD_TO]), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ReloadPropagatedFrom", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_RELOAD_PROPAGATED_FROM]), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("JoinsNamespaceOf", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_JOINS_NAMESPACE_OF]), SD_BUS_VTABLE_PROPERTY_CONST),
- SD_BUS_PROPERTY("RequiresOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
- SD_BUS_PROPERTY("RequisiteOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
- SD_BUS_PROPERTY("RequiredByOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
- SD_BUS_PROPERTY("RequisiteOfOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
SD_BUS_PROPERTY("RequiresMountsFor", "as", NULL, offsetof(Unit, requires_mounts_for), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Documentation", "as", NULL, offsetof(Unit, documentation), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Description", "s", property_get_description, 0, SD_BUS_VTABLE_PROPERTY_CONST),
@@ -694,7 +747,7 @@ const sd_bus_vtable bus_unit_vtable[] = {
SD_BUS_PROPERTY("IgnoreOnIsolate", "b", bus_property_get_bool, offsetof(Unit, ignore_on_isolate), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("NeedDaemonReload", "b", property_get_need_daemon_reload, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("JobTimeoutUSec", "t", bus_property_get_usec, offsetof(Unit, job_timeout), SD_BUS_VTABLE_PROPERTY_CONST),
- SD_BUS_PROPERTY("JobTimeoutAction", "s", property_get_failure_action, offsetof(Unit, job_timeout_action), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("JobTimeoutAction", "s", property_get_emergency_action, offsetof(Unit, job_timeout_action), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("JobTimeoutRebootArgument", "s", NULL, offsetof(Unit, job_timeout_reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ConditionResult", "b", bus_property_get_bool, offsetof(Unit, condition_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("AssertResult", "b", bus_property_get_bool, offsetof(Unit, assert_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
@@ -704,11 +757,12 @@ const sd_bus_vtable bus_unit_vtable[] = {
SD_BUS_PROPERTY("Asserts", "a(sbbsi)", property_get_conditions, offsetof(Unit, asserts), 0),
SD_BUS_PROPERTY("LoadError", "(ss)", property_get_load_error, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Transient", "b", bus_property_get_bool, offsetof(Unit, transient), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Perpetual", "b", bus_property_get_bool, offsetof(Unit, perpetual), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("StartLimitIntervalSec", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST),
- SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), /* obsolete alias name */
SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST),
- SD_BUS_PROPERTY("StartLimitAction", "s", property_get_failure_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StartLimitAction", "s", property_get_emergency_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("InvocationID", "ay", bus_property_get_id128, offsetof(Unit, invocation_id), 0),
SD_BUS_METHOD("Start", "s", "o", method_start, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("Stop", "s", "o", method_stop, SD_BUS_VTABLE_UNPRIVILEGED),
@@ -720,7 +774,15 @@ const sd_bus_vtable bus_unit_vtable[] = {
SD_BUS_METHOD("Kill", "si", NULL, bus_unit_method_kill, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("ResetFailed", NULL, NULL, bus_unit_method_reset_failed, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("SetProperties", "ba(sv)", NULL, bus_unit_method_set_properties, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Ref", NULL, NULL, bus_unit_method_ref, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Unref", NULL, NULL, bus_unit_method_unref, SD_BUS_VTABLE_UNPRIVILEGED),
+ /* Obsolete properties or obsolete alias names */
+ SD_BUS_PROPERTY("RequiresOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("RequisiteOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("RequiredByOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("RequisiteOfOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
SD_BUS_VTABLE_END
};
@@ -1104,7 +1166,7 @@ void bus_unit_send_removed_signal(Unit *u) {
int r;
assert(u);
- if (!u->sent_dbus_new_signal)
+ if (!u->sent_dbus_new_signal || u->in_dbus_queue)
bus_unit_send_change_signal(u);
if (!u->id)
@@ -1317,6 +1379,29 @@ static int bus_unit_set_transient_property(
return r;
return 1;
+
+ } else if (streq(name, "AddRef")) {
+
+ int b;
+
+ /* Why is this called "AddRef" rather than just "Ref", or "Reference"? There's already a "Ref()" method
+ * on the Unit interface, and it's probably not a good idea to expose a property and a method on the
+ * same interface (well, strictly speaking AddRef isn't exposed as full property, we just read it for
+ * transient units, but still). And "References" and "ReferencedBy" is already used as unit reference
+ * dependency type, hence let's not confuse things with that.
+ *
+ * Note that we don't acually add the reference to the bus track. We do that only after the setup of
+ * the transient unit is complete, so that setting this property multiple times in the same transient
+ * unit creation call doesn't count as individual references. */
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ if (mode != UNIT_CHECK)
+ u->bus_track_add = b;
+
+ return 1;
}
return 0;
@@ -1421,3 +1506,71 @@ int bus_unit_check_load_state(Unit *u, sd_bus_error *error) {
return sd_bus_error_set_errnof(error, u->load_error, "Unit %s is not loaded properly: %m.", u->id);
}
+
+static int bus_track_handler(sd_bus_track *t, void *userdata) {
+ Unit *u = userdata;
+
+ assert(t);
+ assert(u);
+
+ u->bus_track = sd_bus_track_unref(u->bus_track); /* make sure we aren't called again */
+
+ unit_add_to_gc_queue(u);
+ return 0;
+}
+
+static int allocate_bus_track(Unit *u) {
+ int r;
+
+ assert(u);
+
+ if (u->bus_track)
+ return 0;
+
+ r = sd_bus_track_new(u->manager->api_bus, &u->bus_track, bus_track_handler, u);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_track_set_recursive(u->bus_track, true);
+ if (r < 0) {
+ u->bus_track = sd_bus_track_unref(u->bus_track);
+ return r;
+ }
+
+ return 0;
+}
+
+int bus_unit_track_add_name(Unit *u, const char *name) {
+ int r;
+
+ assert(u);
+
+ r = allocate_bus_track(u);
+ if (r < 0)
+ return r;
+
+ return sd_bus_track_add_name(u->bus_track, name);
+}
+
+int bus_unit_track_add_sender(Unit *u, sd_bus_message *m) {
+ int r;
+
+ assert(u);
+
+ r = allocate_bus_track(u);
+ if (r < 0)
+ return r;
+
+ return sd_bus_track_add_sender(u->bus_track, m);
+}
+
+int bus_unit_track_remove_sender(Unit *u, sd_bus_message *m) {
+ assert(u);
+
+ /* If we haven't allocated the bus track object yet, then there's definitely no reference taken yet, return an
+ * error */
+ if (!u->bus_track)
+ return -EUNATCH;
+
+ return sd_bus_track_remove_sender(u->bus_track, m);
+}
diff --git a/src/core/dbus-unit.h b/src/core/dbus-unit.h
index 4db88dbebc..b280de7a1d 100644
--- a/src/core/dbus-unit.h
+++ b/src/core/dbus-unit.h
@@ -33,9 +33,15 @@ int bus_unit_method_start_generic(sd_bus_message *message, Unit *u, JobType job_
int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error);
int bus_unit_method_reset_failed(sd_bus_message *message, void *userdata, sd_bus_error *error);
-int bus_unit_queue_job(sd_bus_message *message, Unit *u, JobType type, JobMode mode, bool reload_if_possible, sd_bus_error *error);
int bus_unit_set_properties(Unit *u, sd_bus_message *message, UnitSetPropertiesMode mode, bool commit, sd_bus_error *error);
int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_bus_error *error);
int bus_unit_method_get_processes(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_ref(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_unref(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_queue_job(sd_bus_message *message, Unit *u, JobType type, JobMode mode, bool reload_if_possible, sd_bus_error *error);
int bus_unit_check_load_state(Unit *u, sd_bus_error *error);
+
+int bus_unit_track_add_name(Unit *u, const char *name);
+int bus_unit_track_add_sender(Unit *u, sd_bus_message *m);
+int bus_unit_track_remove_sender(Unit *u, sd_bus_message *m);
diff --git a/src/core/dbus.c b/src/core/dbus.c
index 3422a02d68..070974fe66 100644
--- a/src/core/dbus.c
+++ b/src/core/dbus.c
@@ -964,10 +964,6 @@ static int bus_init_private(Manager *m) {
if (m->private_listen_fd >= 0)
return 0;
- /* We don't need the private socket if we have kdbus */
- if (m->kdbus_fd >= 0)
- return 0;
-
if (MANAGER_IS_SYSTEM(m)) {
/* We want the private bus only when running as init */
@@ -1168,60 +1164,57 @@ int bus_foreach_bus(
return ret;
}
-void bus_track_serialize(sd_bus_track *t, FILE *f) {
+void bus_track_serialize(sd_bus_track *t, FILE *f, const char *prefix) {
const char *n;
assert(f);
+ assert(prefix);
- for (n = sd_bus_track_first(t); n; n = sd_bus_track_next(t))
- fprintf(f, "subscribed=%s\n", n);
-}
+ for (n = sd_bus_track_first(t); n; n = sd_bus_track_next(t)) {
+ int c, j;
-int bus_track_deserialize_item(char ***l, const char *line) {
- const char *e;
- int r;
-
- assert(l);
- assert(line);
-
- e = startswith(line, "subscribed=");
- if (!e)
- return 0;
+ c = sd_bus_track_count_name(t, n);
- r = strv_extend(l, e);
- if (r < 0)
- return r;
-
- return 1;
+ for (j = 0; j < c; j++) {
+ fputs(prefix, f);
+ fputc('=', f);
+ fputs(n, f);
+ fputc('\n', f);
+ }
+ }
}
-int bus_track_coldplug(Manager *m, sd_bus_track **t, char ***l) {
+int bus_track_coldplug(Manager *m, sd_bus_track **t, bool recursive, char **l) {
+ char **i;
int r = 0;
assert(m);
assert(t);
- assert(l);
- if (!strv_isempty(*l) && m->api_bus) {
- char **i;
-
- if (!*t) {
- r = sd_bus_track_new(m->api_bus, t, NULL, NULL);
- if (r < 0)
- return r;
- }
+ if (strv_isempty(l))
+ return 0;
- r = 0;
- STRV_FOREACH(i, *l) {
- int k;
+ if (!m->api_bus)
+ return 0;
- k = sd_bus_track_add_name(*t, *i);
- if (k < 0)
- r = k;
- }
+ if (!*t) {
+ r = sd_bus_track_new(m->api_bus, t, NULL, NULL);
+ if (r < 0)
+ return r;
}
- *l = strv_free(*l);
+ r = sd_bus_track_set_recursive(*t, recursive);
+ if (r < 0)
+ return r;
+
+ r = 0;
+ STRV_FOREACH(i, l) {
+ int k;
+
+ k = sd_bus_track_add_name(*t, *i);
+ if (k < 0)
+ r = k;
+ }
return r;
}
diff --git a/src/core/dbus.h b/src/core/dbus.h
index 6baaffbd75..a092ed9d76 100644
--- a/src/core/dbus.h
+++ b/src/core/dbus.h
@@ -28,9 +28,8 @@ void bus_done(Manager *m);
int bus_fdset_add_all(Manager *m, FDSet *fds);
-void bus_track_serialize(sd_bus_track *t, FILE *f);
-int bus_track_deserialize_item(char ***l, const char *line);
-int bus_track_coldplug(Manager *m, sd_bus_track **t, char ***l);
+void bus_track_serialize(sd_bus_track *t, FILE *f, const char *prefix);
+int bus_track_coldplug(Manager *m, sd_bus_track **t, bool recursive, char **l);
int manager_sync_bus_names(Manager *m, sd_bus *bus);
diff --git a/src/core/device.c b/src/core/device.c
index 16e56efcc3..4b9e84aeb6 100644
--- a/src/core/device.c
+++ b/src/core/device.c
@@ -331,11 +331,7 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa
if (!u) {
delete = true;
- u = unit_new(m, sizeof(Device));
- if (!u)
- return log_oom();
-
- r = unit_add_name(u, e);
+ r = unit_new_for_name(m, sizeof(Device), e, &u);
if (r < 0)
goto fail;
@@ -369,7 +365,7 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa
fail:
log_unit_warning_errno(u, r, "Failed to set up device unit: %m");
- if (delete)
+ if (delete && u)
unit_free(u);
return r;
@@ -464,6 +460,10 @@ static void device_update_found_one(Device *d, bool add, DeviceFound found, bool
if (!now)
return;
+ /* Didn't exist before, but does now? if so, generate a new invocation ID for it */
+ if (previous == DEVICE_NOT_FOUND && d->found != DEVICE_NOT_FOUND)
+ (void) unit_acquire_invocation_id(UNIT(d));
+
if (d->found & DEVICE_FOUND_UDEV)
/* When the device is known to udev we consider it
* plugged. */
diff --git a/src/core/dynamic-user.c b/src/core/dynamic-user.c
new file mode 100644
index 0000000000..e1846e1adb
--- /dev/null
+++ b/src/core/dynamic-user.c
@@ -0,0 +1,794 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2016 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <grp.h>
+#include <pwd.h>
+#include <sys/file.h>
+
+#include "dynamic-user.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "parse-util.h"
+#include "random-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "user-util.h"
+#include "fileio.h"
+
+/* Takes a value generated randomly or by hashing and turns it into a UID in the right range */
+#define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (DYNAMIC_UID_MAX - DYNAMIC_UID_MIN + 1)) + DYNAMIC_UID_MIN)
+
+static DynamicUser* dynamic_user_free(DynamicUser *d) {
+ if (!d)
+ return NULL;
+
+ if (d->manager)
+ (void) hashmap_remove(d->manager->dynamic_users, d->name);
+
+ safe_close_pair(d->storage_socket);
+ return mfree(d);
+}
+
+static int dynamic_user_add(Manager *m, const char *name, int storage_socket[2], DynamicUser **ret) {
+ DynamicUser *d = NULL;
+ int r;
+
+ assert(m);
+ assert(name);
+ assert(storage_socket);
+
+ r = hashmap_ensure_allocated(&m->dynamic_users, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ d = malloc0(offsetof(DynamicUser, name) + strlen(name) + 1);
+ if (!d)
+ return -ENOMEM;
+
+ strcpy(d->name, name);
+
+ d->storage_socket[0] = storage_socket[0];
+ d->storage_socket[1] = storage_socket[1];
+
+ r = hashmap_put(m->dynamic_users, d->name, d);
+ if (r < 0) {
+ free(d);
+ return r;
+ }
+
+ d->manager = m;
+
+ if (ret)
+ *ret = d;
+
+ return 0;
+}
+
+int dynamic_user_acquire(Manager *m, const char *name, DynamicUser** ret) {
+ _cleanup_close_pair_ int storage_socket[2] = { -1, -1 };
+ DynamicUser *d;
+ int r;
+
+ assert(m);
+ assert(name);
+
+ /* Return the DynamicUser structure for a specific user name. Note that this won't actually allocate a UID for
+ * it, but just prepare the data structure for it. The UID is allocated only on demand, when it's really
+ * needed, and in the child process we fork off, since allocation involves NSS checks which are not OK to do
+ * from PID 1. To allow the children and PID 1 share information about allocated UIDs we use an anonymous
+ * AF_UNIX/SOCK_DGRAM socket (called the "storage socket") that contains at most one datagram with the
+ * allocated UID number, plus an fd referencing the lock file for the UID
+ * (i.e. /run/systemd/dynamic-uid/$UID). Why involve the socket pair? So that PID 1 and all its children can
+ * share the same storage for the UID and lock fd, simply by inheriting the storage socket fds. The socket pair
+ * may exist in three different states:
+ *
+ * a) no datagram stored. This is the initial state. In this case the dynamic user was never realized.
+ *
+ * b) a datagram containing a UID stored, but no lock fd attached to it. In this case there was already a
+ * statically assigned UID by the same name, which we are reusing.
+ *
+ * c) a datagram containing a UID stored, and a lock fd is attached to it. In this case we allocated a dynamic
+ * UID and locked it in the file system, using the lock fd.
+ *
+ * As PID 1 and various children might access the socket pair simultaneously, and pop the datagram or push it
+ * back in any time, we also maintain a lock on the socket pair. Note one peculiarity regarding locking here:
+ * the UID lock on disk is protected via a BSD file lock (i.e. an fd-bound lock), so that the lock is kept in
+ * place as long as there's a reference to the fd open. The lock on the storage socket pair however is a POSIX
+ * file lock (i.e. a process-bound lock), as all users share the same fd of this (after all it is anonymous,
+ * nobody else could get any access to it except via our own fd) and we want to synchronize access between all
+ * processes that have access to it. */
+
+ d = hashmap_get(m->dynamic_users, name);
+ if (d) {
+ /* We already have a structure for the dynamic user, let's increase the ref count and reuse it */
+ d->n_ref++;
+ *ret = d;
+ return 0;
+ }
+
+ if (!valid_user_group_name_or_id(name))
+ return -EINVAL;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, storage_socket) < 0)
+ return -errno;
+
+ r = dynamic_user_add(m, name, storage_socket, &d);
+ if (r < 0)
+ return r;
+
+ storage_socket[0] = storage_socket[1] = -1;
+
+ if (ret) {
+ d->n_ref++;
+ *ret = d;
+ }
+
+ return 1;
+}
+
+static int make_uid_symlinks(uid_t uid, const char *name, bool b) {
+
+ char path1[strlen("/run/systemd/dynamic-uid/direct:") + DECIMAL_STR_MAX(uid_t) + 1];
+ const char *path2;
+ int r = 0, k;
+
+ /* Add direct additional symlinks for direct lookups of dynamic UIDs and their names by userspace code. The
+ * only reason we have this is because dbus-daemon cannot use D-Bus for resolving users and groups (since it
+ * would be its own client then). We hence keep these world-readable symlinks in place, so that the
+ * unprivileged dbus user can read the mappings when it needs them via these symlinks instead of having to go
+ * via the bus. Ideally, we'd use the lock files we keep for this anyway, but we can't since we use BSD locks
+ * on them and as those may be taken by any user with read access we can't make them world-readable. */
+
+ xsprintf(path1, "/run/systemd/dynamic-uid/direct:" UID_FMT, uid);
+ if (unlink(path1) < 0 && errno != ENOENT)
+ r = -errno;
+
+ if (b && symlink(name, path1) < 0) {
+ k = log_warning_errno(errno, "Failed to symlink \"%s\": %m", path1);
+ if (r == 0)
+ r = k;
+ }
+
+ path2 = strjoina("/run/systemd/dynamic-uid/direct:", name);
+ if (unlink(path2) < 0 && errno != ENOENT) {
+ k = -errno;
+ if (r == 0)
+ r = k;
+ }
+
+ if (b && symlink(path1 + strlen("/run/systemd/dynamic-uid/direct:"), path2) < 0) {
+ k = log_warning_errno(errno, "Failed to symlink \"%s\": %m", path2);
+ if (r == 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int pick_uid(const char *name, uid_t *ret_uid) {
+
+ static const uint8_t hash_key[] = {
+ 0x37, 0x53, 0x7e, 0x31, 0xcf, 0xce, 0x48, 0xf5,
+ 0x8a, 0xbb, 0x39, 0x57, 0x8d, 0xd9, 0xec, 0x59
+ };
+
+ unsigned n_tries = 100;
+ uid_t candidate;
+ int r;
+
+ /* A static user by this name does not exist yet. Let's find a free ID then, and use that. We start with a UID
+ * generated as hash from the user name. */
+ candidate = UID_CLAMP_INTO_RANGE(siphash24(name, strlen(name), hash_key));
+
+ (void) mkdir("/run/systemd/dynamic-uid", 0755);
+
+ for (;;) {
+ char lock_path[strlen("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1];
+ _cleanup_close_ int lock_fd = -1;
+ ssize_t l;
+
+ if (--n_tries <= 0) /* Give up retrying eventually */
+ return -EBUSY;
+
+ if (!uid_is_dynamic(candidate))
+ goto next;
+
+ xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, candidate);
+
+ for (;;) {
+ struct stat st;
+
+ lock_fd = open(lock_path, O_CREAT|O_RDWR|O_NOFOLLOW|O_CLOEXEC|O_NOCTTY, 0600);
+ if (lock_fd < 0)
+ return -errno;
+
+ r = flock(lock_fd, LOCK_EX|LOCK_NB); /* Try to get a BSD file lock on the UID lock file */
+ if (r < 0) {
+ if (errno == EBUSY || errno == EAGAIN)
+ goto next; /* already in use */
+
+ return -errno;
+ }
+
+ if (fstat(lock_fd, &st) < 0)
+ return -errno;
+ if (st.st_nlink > 0)
+ break;
+
+ /* Oh, bummer, we got the lock, but the file was unlinked between the time we opened it and
+ * got the lock. Close it, and try again. */
+ lock_fd = safe_close(lock_fd);
+ }
+
+ /* Some superficial check whether this UID/GID might already be taken by some static user */
+ if (getpwuid(candidate) || getgrgid((gid_t) candidate)) {
+ (void) unlink(lock_path);
+ goto next;
+ }
+
+ /* Let's store the user name in the lock file, so that we can use it for looking up the username for a UID */
+ l = pwritev(lock_fd,
+ (struct iovec[2]) {
+ { .iov_base = (char*) name, .iov_len = strlen(name) },
+ { .iov_base = (char[1]) { '\n' }, .iov_len = 1 }
+ }, 2, 0);
+ if (l < 0) {
+ (void) unlink(lock_path);
+ return -errno;
+ }
+
+ (void) ftruncate(lock_fd, l);
+ (void) make_uid_symlinks(candidate, name, true); /* also add direct lookup symlinks */
+
+ *ret_uid = candidate;
+ r = lock_fd;
+ lock_fd = -1;
+
+ return r;
+
+ next:
+ /* Pick another random UID, and see if that works for us. */
+ random_bytes(&candidate, sizeof(candidate));
+ candidate = UID_CLAMP_INTO_RANGE(candidate);
+ }
+}
+
+static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) {
+ uid_t uid = UID_INVALID;
+ struct iovec iov = {
+ .iov_base = &uid,
+ .iov_len = sizeof(uid),
+ };
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(int))];
+ } control = {};
+ struct msghdr mh = {
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ struct cmsghdr *cmsg;
+
+ ssize_t k;
+ int lock_fd = -1;
+
+ assert(d);
+ assert(ret_uid);
+ assert(ret_lock_fd);
+
+ /* Read the UID and lock fd that is stored in the storage AF_UNIX socket. This should be called with the lock
+ * on the socket taken. */
+
+ k = recvmsg(d->storage_socket[0], &mh, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC);
+ if (k < 0)
+ return -errno;
+
+ cmsg = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int)));
+ if (cmsg)
+ lock_fd = *(int*) CMSG_DATA(cmsg);
+ else
+ cmsg_close_all(&mh); /* just in case... */
+
+ *ret_uid = uid;
+ *ret_lock_fd = lock_fd;
+
+ return 0;
+}
+
+static int dynamic_user_push(DynamicUser *d, uid_t uid, int lock_fd) {
+ struct iovec iov = {
+ .iov_base = &uid,
+ .iov_len = sizeof(uid),
+ };
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(int))];
+ } control = {};
+ struct msghdr mh = {
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ ssize_t k;
+
+ assert(d);
+
+ /* Store the UID and lock_fd in the storage socket. This should be called with the socket pair lock taken. */
+
+ if (lock_fd >= 0) {
+ struct cmsghdr *cmsg;
+
+ cmsg = CMSG_FIRSTHDR(&mh);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &lock_fd, sizeof(int));
+
+ mh.msg_controllen = CMSG_SPACE(sizeof(int));
+ } else {
+ mh.msg_control = NULL;
+ mh.msg_controllen = 0;
+ }
+
+ k = sendmsg(d->storage_socket[1], &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
+ if (k < 0)
+ return -errno;
+
+ return 0;
+}
+
+static void unlink_uid_lock(int lock_fd, uid_t uid, const char *name) {
+ char lock_path[strlen("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1];
+
+ if (lock_fd < 0)
+ return;
+
+ xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, uid);
+ (void) unlink(lock_path);
+
+ (void) make_uid_symlinks(uid, name, false); /* remove direct lookup symlinks */
+}
+
+int dynamic_user_realize(DynamicUser *d, uid_t *ret) {
+
+ _cleanup_close_ int etc_passwd_lock_fd = -1, uid_lock_fd = -1;
+ uid_t uid = UID_INVALID;
+ int r;
+
+ assert(d);
+
+ /* Acquire a UID for the user name. This will allocate a UID for the user name if the user doesn't exist
+ * yet. If it already exists its existing UID/GID will be reused. */
+
+ if (lockf(d->storage_socket[0], F_LOCK, 0) < 0)
+ return -errno;
+
+ r = dynamic_user_pop(d, &uid, &uid_lock_fd);
+ if (r < 0) {
+ int new_uid_lock_fd;
+ uid_t new_uid;
+
+ if (r != -EAGAIN)
+ goto finish;
+
+ /* OK, nothing stored yet, let's try to find something useful. While we are working on this release the
+ * lock however, so that nobody else blocks on our NSS lookups. */
+ (void) lockf(d->storage_socket[0], F_ULOCK, 0);
+
+ /* Let's see if a proper, static user or group by this name exists. Try to take the lock on
+ * /etc/passwd, if that fails with EROFS then /etc is read-only. In that case it's fine if we don't
+ * take the lock, given that users can't be added there anyway in this case. */
+ etc_passwd_lock_fd = take_etc_passwd_lock(NULL);
+ if (etc_passwd_lock_fd < 0 && etc_passwd_lock_fd != -EROFS)
+ return etc_passwd_lock_fd;
+
+ /* First, let's parse this as numeric UID */
+ r = parse_uid(d->name, &uid);
+ if (r < 0) {
+ struct passwd *p;
+ struct group *g;
+
+ /* OK, this is not a numeric UID. Let's see if there's a user by this name */
+ p = getpwnam(d->name);
+ if (p)
+ uid = p->pw_uid;
+
+ /* Let's see if there's a group by this name */
+ g = getgrnam(d->name);
+ if (g) {
+ /* If the UID/GID of the user/group of the same don't match, refuse operation */
+ if (uid != UID_INVALID && uid != (uid_t) g->gr_gid)
+ return -EILSEQ;
+
+ uid = (uid_t) g->gr_gid;
+ }
+ }
+
+ if (uid == UID_INVALID) {
+ /* No static UID assigned yet, excellent. Let's pick a new dynamic one, and lock it. */
+
+ uid_lock_fd = pick_uid(d->name, &uid);
+ if (uid_lock_fd < 0)
+ return uid_lock_fd;
+ }
+
+ /* So, we found a working UID/lock combination. Let's see if we actually still need it. */
+ if (lockf(d->storage_socket[0], F_LOCK, 0) < 0) {
+ unlink_uid_lock(uid_lock_fd, uid, d->name);
+ return -errno;
+ }
+
+ r = dynamic_user_pop(d, &new_uid, &new_uid_lock_fd);
+ if (r < 0) {
+ if (r != -EAGAIN) {
+ /* OK, something bad happened, let's get rid of the bits we acquired. */
+ unlink_uid_lock(uid_lock_fd, uid, d->name);
+ goto finish;
+ }
+
+ /* Great! Nothing is stored here, still. Store our newly acquired data. */
+ } else {
+ /* Hmm, so as it appears there's now something stored in the storage socket. Throw away what we
+ * acquired, and use what's stored now. */
+
+ unlink_uid_lock(uid_lock_fd, uid, d->name);
+ safe_close(uid_lock_fd);
+
+ uid = new_uid;
+ uid_lock_fd = new_uid_lock_fd;
+ }
+ }
+
+ /* If the UID/GID was already allocated dynamically, push the data we popped out back in. If it was already
+ * allocated statically, push the UID back too, but do not push the lock fd in. If we allocated the UID
+ * dynamically right here, push that in along with the lock fd for it. */
+ r = dynamic_user_push(d, uid, uid_lock_fd);
+ if (r < 0)
+ goto finish;
+
+ *ret = uid;
+ r = 0;
+
+finish:
+ (void) lockf(d->storage_socket[0], F_ULOCK, 0);
+ return r;
+}
+
+int dynamic_user_current(DynamicUser *d, uid_t *ret) {
+ _cleanup_close_ int lock_fd = -1;
+ uid_t uid;
+ int r;
+
+ assert(d);
+ assert(ret);
+
+ /* Get the currently assigned UID for the user, if there's any. This simply pops the data from the storage socket, and pushes it back in right-away. */
+
+ if (lockf(d->storage_socket[0], F_LOCK, 0) < 0)
+ return -errno;
+
+ r = dynamic_user_pop(d, &uid, &lock_fd);
+ if (r < 0)
+ goto finish;
+
+ r = dynamic_user_push(d, uid, lock_fd);
+ if (r < 0)
+ goto finish;
+
+ *ret = uid;
+ r = 0;
+
+finish:
+ (void) lockf(d->storage_socket[0], F_ULOCK, 0);
+ return r;
+}
+
+DynamicUser* dynamic_user_ref(DynamicUser *d) {
+ if (!d)
+ return NULL;
+
+ assert(d->n_ref > 0);
+ d->n_ref++;
+
+ return d;
+}
+
+DynamicUser* dynamic_user_unref(DynamicUser *d) {
+ if (!d)
+ return NULL;
+
+ /* Note that this doesn't actually release any resources itself. If a dynamic user should be fully destroyed
+ * and its UID released, use dynamic_user_destroy() instead. NB: the dynamic user table may contain entries
+ * with no references, which is commonly the case right before a daemon reload. */
+
+ assert(d->n_ref > 0);
+ d->n_ref--;
+
+ return NULL;
+}
+
+static int dynamic_user_close(DynamicUser *d) {
+ _cleanup_close_ int lock_fd = -1;
+ uid_t uid;
+ int r;
+
+ /* Release the user ID, by releasing the lock on it, and emptying the storage socket. After this the user is
+ * unrealized again, much like it was after it the DynamicUser object was first allocated. */
+
+ if (lockf(d->storage_socket[0], F_LOCK, 0) < 0)
+ return -errno;
+
+ r = dynamic_user_pop(d, &uid, &lock_fd);
+ if (r == -EAGAIN) {
+ /* User wasn't realized yet, nothing to do. */
+ r = 0;
+ goto finish;
+ }
+ if (r < 0)
+ goto finish;
+
+ /* This dynamic user was realized and dynamically allocated. In this case, let's remove the lock file. */
+ unlink_uid_lock(lock_fd, uid, d->name);
+ r = 1;
+
+finish:
+ (void) lockf(d->storage_socket[0], F_ULOCK, 0);
+ return r;
+}
+
+DynamicUser* dynamic_user_destroy(DynamicUser *d) {
+ if (!d)
+ return NULL;
+
+ /* Drop a reference to a DynamicUser object, and destroy the user completely if this was the last
+ * reference. This is called whenever a service is shut down and wants its dynamic UID gone. Note that
+ * dynamic_user_unref() is what is called whenever a service is simply freed, for example during a reload
+ * cycle, where the dynamic users should not be destroyed, but our datastructures should. */
+
+ dynamic_user_unref(d);
+
+ if (d->n_ref > 0)
+ return NULL;
+
+ (void) dynamic_user_close(d);
+ return dynamic_user_free(d);
+}
+
+int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds) {
+ DynamicUser *d;
+ Iterator i;
+
+ assert(m);
+ assert(f);
+ assert(fds);
+
+ /* Dump the dynamic user database into the manager serialization, to deal with daemon reloads. */
+
+ HASHMAP_FOREACH(d, m->dynamic_users, i) {
+ int copy0, copy1;
+
+ copy0 = fdset_put_dup(fds, d->storage_socket[0]);
+ if (copy0 < 0)
+ return copy0;
+
+ copy1 = fdset_put_dup(fds, d->storage_socket[1]);
+ if (copy1 < 0)
+ return copy1;
+
+ fprintf(f, "dynamic-user=%s %i %i\n", d->name, copy0, copy1);
+ }
+
+ return 0;
+}
+
+void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds) {
+ _cleanup_free_ char *name = NULL, *s0 = NULL, *s1 = NULL;
+ int r, fd0, fd1;
+
+ assert(m);
+ assert(value);
+ assert(fds);
+
+ /* Parse the serialization again, after a daemon reload */
+
+ r = extract_many_words(&value, NULL, 0, &name, &s0, &s1, NULL);
+ if (r != 3 || !isempty(value)) {
+ log_debug("Unable to parse dynamic user line.");
+ return;
+ }
+
+ if (safe_atoi(s0, &fd0) < 0 || !fdset_contains(fds, fd0)) {
+ log_debug("Unable to process dynamic user fd specification.");
+ return;
+ }
+
+ if (safe_atoi(s1, &fd1) < 0 || !fdset_contains(fds, fd1)) {
+ log_debug("Unable to process dynamic user fd specification.");
+ return;
+ }
+
+ r = dynamic_user_add(m, name, (int[]) { fd0, fd1 }, NULL);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add dynamic user: %m");
+ return;
+ }
+
+ (void) fdset_remove(fds, fd0);
+ (void) fdset_remove(fds, fd1);
+}
+
+void dynamic_user_vacuum(Manager *m, bool close_user) {
+ DynamicUser *d;
+ Iterator i;
+
+ assert(m);
+
+ /* Empty the dynamic user database, optionally cleaning up orphaned dynamic users, i.e. destroy and free users
+ * to which no reference exist. This is called after a daemon reload finished, in order to destroy users which
+ * might not be referenced anymore. */
+
+ HASHMAP_FOREACH(d, m->dynamic_users, i) {
+ if (d->n_ref > 0)
+ continue;
+
+ if (close_user) {
+ log_debug("Removing orphaned dynamic user %s", d->name);
+ (void) dynamic_user_close(d);
+ }
+
+ dynamic_user_free(d);
+ }
+}
+
+int dynamic_user_lookup_uid(Manager *m, uid_t uid, char **ret) {
+ char lock_path[strlen("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1];
+ _cleanup_free_ char *user = NULL;
+ uid_t check_uid;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ /* A friendly way to translate a dynamic user's UID into a name. */
+ if (!uid_is_dynamic(uid))
+ return -ESRCH;
+
+ xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, uid);
+ r = read_one_line_file(lock_path, &user);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ /* The lock file might be stale, hence let's verify the data before we return it */
+ r = dynamic_user_lookup_name(m, user, &check_uid);
+ if (r < 0)
+ return r;
+ if (check_uid != uid) /* lock file doesn't match our own idea */
+ return -ESRCH;
+
+ *ret = user;
+ user = NULL;
+
+ return 0;
+}
+
+int dynamic_user_lookup_name(Manager *m, const char *name, uid_t *ret) {
+ DynamicUser *d;
+ int r;
+
+ assert(m);
+ assert(name);
+ assert(ret);
+
+ /* A friendly call for translating a dynamic user's name into its UID */
+
+ d = hashmap_get(m->dynamic_users, name);
+ if (!d)
+ return -ESRCH;
+
+ r = dynamic_user_current(d, ret);
+ if (r == -EAGAIN) /* not realized yet? */
+ return -ESRCH;
+
+ return r;
+}
+
+int dynamic_creds_acquire(DynamicCreds *creds, Manager *m, const char *user, const char *group) {
+ bool acquired = false;
+ int r;
+
+ assert(creds);
+ assert(m);
+
+ /* A DynamicUser object encapsulates an allocation of both a UID and a GID for a specific name. However, some
+ * services use different user and groups. For cases like that there's DynamicCreds containing a pair of user
+ * and group. This call allocates a pair. */
+
+ if (!creds->user && user) {
+ r = dynamic_user_acquire(m, user, &creds->user);
+ if (r < 0)
+ return r;
+
+ acquired = true;
+ }
+
+ if (!creds->group) {
+
+ if (creds->user && (!group || streq_ptr(user, group)))
+ creds->group = dynamic_user_ref(creds->user);
+ else {
+ r = dynamic_user_acquire(m, group, &creds->group);
+ if (r < 0) {
+ if (acquired)
+ creds->user = dynamic_user_unref(creds->user);
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int dynamic_creds_realize(DynamicCreds *creds, uid_t *uid, gid_t *gid) {
+ uid_t u = UID_INVALID;
+ gid_t g = GID_INVALID;
+ int r;
+
+ assert(creds);
+ assert(uid);
+ assert(gid);
+
+ /* Realize both the referenced user and group */
+
+ if (creds->user) {
+ r = dynamic_user_realize(creds->user, &u);
+ if (r < 0)
+ return r;
+ }
+
+ if (creds->group && creds->group != creds->user) {
+ r = dynamic_user_realize(creds->group, &g);
+ if (r < 0)
+ return r;
+ } else
+ g = u;
+
+ *uid = u;
+ *gid = g;
+
+ return 0;
+}
+
+void dynamic_creds_unref(DynamicCreds *creds) {
+ assert(creds);
+
+ creds->user = dynamic_user_unref(creds->user);
+ creds->group = dynamic_user_unref(creds->group);
+}
+
+void dynamic_creds_destroy(DynamicCreds *creds) {
+ assert(creds);
+
+ creds->user = dynamic_user_destroy(creds->user);
+ creds->group = dynamic_user_destroy(creds->group);
+}
diff --git a/src/core/dynamic-user.h b/src/core/dynamic-user.h
new file mode 100644
index 0000000000..0b8bce1a72
--- /dev/null
+++ b/src/core/dynamic-user.h
@@ -0,0 +1,66 @@
+#pragma once
+
+/***
+ This file is part of systemd.
+
+ Copyright 2016 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+typedef struct DynamicUser DynamicUser;
+
+typedef struct DynamicCreds {
+ /* A combination of a dynamic user and group */
+ DynamicUser *user;
+ DynamicUser *group;
+} DynamicCreds;
+
+#include "manager.h"
+
+/* Note that this object always allocates a pair of user and group under the same name, even if one of them isn't
+ * used. This means, if you want to allocate a group and user pair, and they might have two different names, then you
+ * need to allocated two of these objects. DynamicCreds below makes that easy. */
+struct DynamicUser {
+ int n_ref;
+ Manager *manager;
+
+ /* An AF_UNIX socket pair that contains a datagram containing both the numeric ID assigned, as well as a lock
+ * file fd locking the user ID we picked. */
+ int storage_socket[2];
+
+ char name[];
+};
+
+int dynamic_user_acquire(Manager *m, const char *name, DynamicUser **ret);
+
+int dynamic_user_realize(DynamicUser *d, uid_t *ret);
+int dynamic_user_current(DynamicUser *d, uid_t *ret);
+
+DynamicUser* dynamic_user_ref(DynamicUser *d);
+DynamicUser* dynamic_user_unref(DynamicUser *d);
+DynamicUser* dynamic_user_destroy(DynamicUser *d);
+
+int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds);
+void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds);
+void dynamic_user_vacuum(Manager *m, bool close_user);
+
+int dynamic_user_lookup_uid(Manager *m, uid_t uid, char **ret);
+int dynamic_user_lookup_name(Manager *m, const char *name, uid_t *ret);
+
+int dynamic_creds_acquire(DynamicCreds *creds, Manager *m, const char *user, const char *group);
+int dynamic_creds_realize(DynamicCreds *creds, uid_t *uid, gid_t *gid);
+
+void dynamic_creds_unref(DynamicCreds *creds);
+void dynamic_creds_destroy(DynamicCreds *creds);
diff --git a/src/core/failure-action.c b/src/core/emergency-action.c
index ddae46190f..90232bc57a 100644
--- a/src/core/failure-action.c
+++ b/src/core/emergency-action.c
@@ -23,59 +23,60 @@
#include "bus-error.h"
#include "bus-util.h"
-#include "failure-action.h"
+#include "emergency-action.h"
#include "special.h"
#include "string-table.h"
#include "terminal-util.h"
-static void log_and_status(Manager *m, const char *message) {
- log_warning("%s", message);
+static void log_and_status(Manager *m, const char *message, const char *reason) {
+ log_warning("%s: %s", message, reason);
manager_status_printf(m, STATUS_TYPE_EMERGENCY,
ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL,
- "%s", message);
+ "%s: %s", message, reason);
}
-int failure_action(
+int emergency_action(
Manager *m,
- FailureAction action,
- const char *reboot_arg) {
+ EmergencyAction action,
+ const char *reboot_arg,
+ const char *reason) {
assert(m);
assert(action >= 0);
- assert(action < _FAILURE_ACTION_MAX);
+ assert(action < _EMERGENCY_ACTION_MAX);
- if (action == FAILURE_ACTION_NONE)
+ if (action == EMERGENCY_ACTION_NONE)
return -ECANCELED;
if (!MANAGER_IS_SYSTEM(m)) {
/* Downgrade all options to simply exiting if we run
* in user mode */
- log_warning("Exiting as result of failure.");
+ log_warning("Exiting: %s", reason);
m->exit_code = MANAGER_EXIT;
return -ECANCELED;
}
switch (action) {
- case FAILURE_ACTION_REBOOT:
- log_and_status(m, "Rebooting as result of failure.");
+ case EMERGENCY_ACTION_REBOOT:
+ log_and_status(m, "Rebooting", reason);
(void) update_reboot_parameter_and_warn(reboot_arg);
(void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_REBOOT_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL);
break;
- case FAILURE_ACTION_REBOOT_FORCE:
- log_and_status(m, "Forcibly rebooting as result of failure.");
+ case EMERGENCY_ACTION_REBOOT_FORCE:
+ log_and_status(m, "Forcibly rebooting", reason);
(void) update_reboot_parameter_and_warn(reboot_arg);
m->exit_code = MANAGER_REBOOT;
break;
- case FAILURE_ACTION_REBOOT_IMMEDIATE:
- log_and_status(m, "Rebooting immediately as result of failure.");
+ case EMERGENCY_ACTION_REBOOT_IMMEDIATE:
+ log_and_status(m, "Rebooting immediately", reason);
sync();
@@ -89,18 +90,18 @@ int failure_action(
reboot(RB_AUTOBOOT);
break;
- case FAILURE_ACTION_POWEROFF:
- log_and_status(m, "Powering off as result of failure.");
+ case EMERGENCY_ACTION_POWEROFF:
+ log_and_status(m, "Powering off", reason);
(void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_POWEROFF_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL);
break;
- case FAILURE_ACTION_POWEROFF_FORCE:
- log_and_status(m, "Forcibly powering off as result of failure.");
+ case EMERGENCY_ACTION_POWEROFF_FORCE:
+ log_and_status(m, "Forcibly powering off", reason);
m->exit_code = MANAGER_POWEROFF;
break;
- case FAILURE_ACTION_POWEROFF_IMMEDIATE:
- log_and_status(m, "Powering off immediately as result of failure.");
+ case EMERGENCY_ACTION_POWEROFF_IMMEDIATE:
+ log_and_status(m, "Powering off immediately", reason);
sync();
@@ -109,19 +110,19 @@ int failure_action(
break;
default:
- assert_not_reached("Unknown failure action");
+ assert_not_reached("Unknown emergency action");
}
return -ECANCELED;
}
-static const char* const failure_action_table[_FAILURE_ACTION_MAX] = {
- [FAILURE_ACTION_NONE] = "none",
- [FAILURE_ACTION_REBOOT] = "reboot",
- [FAILURE_ACTION_REBOOT_FORCE] = "reboot-force",
- [FAILURE_ACTION_REBOOT_IMMEDIATE] = "reboot-immediate",
- [FAILURE_ACTION_POWEROFF] = "poweroff",
- [FAILURE_ACTION_POWEROFF_FORCE] = "poweroff-force",
- [FAILURE_ACTION_POWEROFF_IMMEDIATE] = "poweroff-immediate"
+static const char* const emergency_action_table[_EMERGENCY_ACTION_MAX] = {
+ [EMERGENCY_ACTION_NONE] = "none",
+ [EMERGENCY_ACTION_REBOOT] = "reboot",
+ [EMERGENCY_ACTION_REBOOT_FORCE] = "reboot-force",
+ [EMERGENCY_ACTION_REBOOT_IMMEDIATE] = "reboot-immediate",
+ [EMERGENCY_ACTION_POWEROFF] = "poweroff",
+ [EMERGENCY_ACTION_POWEROFF_FORCE] = "poweroff-force",
+ [EMERGENCY_ACTION_POWEROFF_IMMEDIATE] = "poweroff-immediate"
};
-DEFINE_STRING_TABLE_LOOKUP(failure_action, FailureAction);
+DEFINE_STRING_TABLE_LOOKUP(emergency_action, EmergencyAction);
diff --git a/src/core/failure-action.h b/src/core/emergency-action.h
index 1adac4ad5c..8804b59752 100644
--- a/src/core/failure-action.h
+++ b/src/core/emergency-action.h
@@ -20,22 +20,22 @@
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
-typedef enum FailureAction {
- FAILURE_ACTION_NONE,
- FAILURE_ACTION_REBOOT,
- FAILURE_ACTION_REBOOT_FORCE,
- FAILURE_ACTION_REBOOT_IMMEDIATE,
- FAILURE_ACTION_POWEROFF,
- FAILURE_ACTION_POWEROFF_FORCE,
- FAILURE_ACTION_POWEROFF_IMMEDIATE,
- _FAILURE_ACTION_MAX,
- _FAILURE_ACTION_INVALID = -1
-} FailureAction;
+typedef enum EmergencyAction {
+ EMERGENCY_ACTION_NONE,
+ EMERGENCY_ACTION_REBOOT,
+ EMERGENCY_ACTION_REBOOT_FORCE,
+ EMERGENCY_ACTION_REBOOT_IMMEDIATE,
+ EMERGENCY_ACTION_POWEROFF,
+ EMERGENCY_ACTION_POWEROFF_FORCE,
+ EMERGENCY_ACTION_POWEROFF_IMMEDIATE,
+ _EMERGENCY_ACTION_MAX,
+ _EMERGENCY_ACTION_INVALID = -1
+} EmergencyAction;
#include "macro.h"
#include "manager.h"
-int failure_action(Manager *m, FailureAction action, const char *reboot_arg);
+int emergency_action(Manager *m, EmergencyAction action, const char *reboot_arg, const char *reason);
-const char* failure_action_to_string(FailureAction i) _const_;
-FailureAction failure_action_from_string(const char *s) _pure_;
+const char* emergency_action_to_string(EmergencyAction i) _const_;
+EmergencyAction emergency_action_from_string(const char *s) _pure_;
diff --git a/src/core/execute.c b/src/core/execute.c
index 7c178b97c3..85ee82c3e1 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -25,11 +25,14 @@
#include <signal.h>
#include <string.h>
#include <sys/capability.h>
+#include <sys/eventfd.h>
#include <sys/mman.h>
#include <sys/personality.h>
#include <sys/prctl.h>
+#include <sys/shm.h>
#include <sys/socket.h>
#include <sys/stat.h>
+#include <sys/types.h>
#include <sys/un.h>
#include <unistd.h>
#include <utmpx.h>
@@ -90,6 +93,7 @@
#include "selinux-util.h"
#include "signal-util.h"
#include "smack-util.h"
+#include "special.h"
#include "string-table.h"
#include "string-util.h"
#include "strv.h"
@@ -219,12 +223,36 @@ static void exec_context_tty_reset(const ExecContext *context, const ExecParamet
(void) vt_disallocate(path);
}
+static bool is_terminal_input(ExecInput i) {
+ return IN_SET(i,
+ EXEC_INPUT_TTY,
+ EXEC_INPUT_TTY_FORCE,
+ EXEC_INPUT_TTY_FAIL);
+}
+
static bool is_terminal_output(ExecOutput o) {
- return
- o == EXEC_OUTPUT_TTY ||
- o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
- o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
- o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
+ return IN_SET(o,
+ EXEC_OUTPUT_TTY,
+ EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
+ EXEC_OUTPUT_KMSG_AND_CONSOLE,
+ EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
+}
+
+static bool exec_context_needs_term(const ExecContext *c) {
+ assert(c);
+
+ /* Return true if the execution context suggests we should set $TERM to something useful. */
+
+ if (is_terminal_input(c->std_input))
+ return true;
+
+ if (is_terminal_output(c->std_output))
+ return true;
+
+ if (is_terminal_output(c->std_error))
+ return true;
+
+ return !!c->tty_path;
}
static int open_null_as(int flags, int nfd) {
@@ -363,13 +391,6 @@ static int open_terminal_as(const char *path, mode_t mode, int nfd) {
return r;
}
-static bool is_terminal_input(ExecInput i) {
- return
- i == EXEC_INPUT_TTY ||
- i == EXEC_INPUT_TTY_FORCE ||
- i == EXEC_INPUT_TTY_FAIL;
-}
-
static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
if (is_terminal_input(std_input) && !apply_tty_stdin)
@@ -392,7 +413,8 @@ static int fixup_output(ExecOutput std_output, int socket_fd) {
static int setup_input(
const ExecContext *context,
const ExecParameters *params,
- int socket_fd) {
+ int socket_fd,
+ int named_iofds[3]) {
ExecInput i;
@@ -410,7 +432,7 @@ static int setup_input(
return STDIN_FILENO;
}
- i = fixup_input(context->std_input, socket_fd, params->apply_tty_stdin);
+ i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
switch (i) {
@@ -442,6 +464,10 @@ static int setup_input(
case EXEC_INPUT_SOCKET:
return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
+ case EXEC_INPUT_NAMED_FD:
+ (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
+ return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
+
default:
assert_not_reached("Unknown input type");
}
@@ -453,6 +479,7 @@ static int setup_output(
const ExecParameters *params,
int fileno,
int socket_fd,
+ int named_iofds[3],
const char *ident,
uid_t uid,
gid_t gid,
@@ -485,7 +512,7 @@ static int setup_output(
return STDERR_FILENO;
}
- i = fixup_input(context->std_input, socket_fd, params->apply_tty_stdin);
+ i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
o = fixup_output(context->std_output, socket_fd);
if (fileno == STDERR_FILENO) {
@@ -504,7 +531,7 @@ static int setup_output(
return fileno;
/* Duplicate from stdout if possible */
- if (e == o || e == EXEC_OUTPUT_INHERIT)
+ if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
o = e;
@@ -566,6 +593,10 @@ static int setup_output(
assert(socket_fd >= 0);
return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
+ case EXEC_OUTPUT_NAMED_FD:
+ (void) fd_nonblock(named_iofds[fileno], false);
+ return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
+
default:
assert_not_reached("Unknown error type");
}
@@ -701,73 +732,157 @@ static int ask_for_confirmation(char *response, char **argv) {
return r;
}
-static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
- bool keep_groups = false;
+static int get_fixed_user(const ExecContext *c, const char **user,
+ uid_t *uid, gid_t *gid,
+ const char **home, const char **shell) {
int r;
+ const char *name;
- assert(context);
+ assert(c);
+
+ if (!c->user)
+ return 0;
+
+ /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
+ * (i.e. are "/" or "/bin/nologin"). */
+
+ name = c->user;
+ r = get_user_creds_clean(&name, uid, gid, home, shell);
+ if (r < 0)
+ return r;
- /* Lookup and set GID and supplementary group list. Here too
- * we avoid NSS lookups for gid=0. */
+ *user = name;
+ return 0;
+}
- if (context->group || username) {
+static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
+ int r;
+ const char *name;
+
+ assert(c);
+
+ if (!c->group)
+ return 0;
+
+ name = c->group;
+ r = get_group_creds(&name, gid);
+ if (r < 0)
+ return r;
+
+ *group = name;
+ return 0;
+}
+
+static int get_supplementary_groups(const ExecContext *c, const char *user,
+ const char *group, gid_t gid,
+ gid_t **supplementary_gids, int *ngids) {
+ char **i;
+ int r, k = 0;
+ int ngroups_max;
+ bool keep_groups = false;
+ gid_t *groups = NULL;
+ _cleanup_free_ gid_t *l_gids = NULL;
+
+ assert(c);
+
+ /*
+ * If user is given, then lookup GID and supplementary groups list.
+ * We avoid NSS lookups for gid=0. Also we have to initialize groups
+ * here and as early as possible so we keep the list of supplementary
+ * groups of the caller.
+ */
+ if (user && gid_is_valid(gid) && gid != 0) {
/* First step, initialize groups from /etc/groups */
- if (username && gid != 0) {
- if (initgroups(username, gid) < 0)
- return -errno;
+ if (initgroups(user, gid) < 0)
+ return -errno;
- keep_groups = true;
- }
+ keep_groups = true;
+ }
- /* Second step, set our gids */
- if (setresgid(gid, gid, gid) < 0)
+ if (!c->supplementary_groups)
+ return 0;
+
+ /*
+ * If SupplementaryGroups= was passed then NGROUPS_MAX has to
+ * be positive, otherwise fail.
+ */
+ errno = 0;
+ ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
+ if (ngroups_max <= 0) {
+ if (errno > 0)
return -errno;
+ else
+ return -EOPNOTSUPP; /* For all other values */
}
- if (context->supplementary_groups) {
- int ngroups_max, k;
- gid_t *gids;
- char **i;
+ l_gids = new(gid_t, ngroups_max);
+ if (!l_gids)
+ return -ENOMEM;
- /* Final step, initialize any manually set supplementary groups */
- assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
+ if (keep_groups) {
+ /*
+ * Lookup the list of groups that the user belongs to, we
+ * avoid NSS lookups here too for gid=0.
+ */
+ k = ngroups_max;
+ if (getgrouplist(user, gid, l_gids, &k) < 0)
+ return -EINVAL;
+ } else
+ k = 0;
- if (!(gids = new(gid_t, ngroups_max)))
- return -ENOMEM;
+ STRV_FOREACH(i, c->supplementary_groups) {
+ const char *g;
- if (keep_groups) {
- k = getgroups(ngroups_max, gids);
- if (k < 0) {
- free(gids);
- return -errno;
- }
- } else
- k = 0;
+ if (k >= ngroups_max)
+ return -E2BIG;
- STRV_FOREACH(i, context->supplementary_groups) {
- const char *g;
+ g = *i;
+ r = get_group_creds(&g, l_gids+k);
+ if (r < 0)
+ return r;
- if (k >= ngroups_max) {
- free(gids);
- return -E2BIG;
- }
+ k++;
+ }
- g = *i;
- r = get_group_creds(&g, gids+k);
- if (r < 0) {
- free(gids);
- return r;
- }
+ /*
+ * Sets ngids to zero to drop all supplementary groups, happens
+ * when we are under root and SupplementaryGroups= is empty.
+ */
+ if (k == 0) {
+ *ngids = 0;
+ return 0;
+ }
- k++;
- }
+ /* Otherwise get the final list of supplementary groups */
+ groups = memdup(l_gids, sizeof(gid_t) * k);
+ if (!groups)
+ return -ENOMEM;
- if (setgroups(k, gids) < 0) {
- free(gids);
- return -errno;
- }
+ *supplementary_gids = groups;
+ *ngids = k;
+
+ groups = NULL;
+
+ return 0;
+}
+
+static int enforce_groups(const ExecContext *context, gid_t gid,
+ gid_t *supplementary_gids, int ngids) {
+ int r;
+
+ assert(context);
+
+ /* Handle SupplementaryGroups= even if it is empty */
+ if (context->supplementary_groups) {
+ r = maybe_setgroups(ngids, supplementary_gids);
+ if (r < 0)
+ return r;
+ }
- free(gids);
+ if (gid_is_valid(gid)) {
+ /* Then set our gids */
+ if (setresgid(gid, gid, gid) < 0)
+ return -errno;
}
return 0;
@@ -776,6 +891,9 @@ static int enforce_groups(const ExecContext *context, const char *username, gid_
static int enforce_user(const ExecContext *context, uid_t uid) {
assert(context);
+ if (!uid_is_valid(uid))
+ return 0;
+
/* Sets (but doesn't look up) the uid and make sure we keep the
* capabilities while doing so. */
@@ -818,14 +936,19 @@ static int null_conv(
return PAM_CONV_ERR;
}
+#endif
+
static int setup_pam(
const char *name,
const char *user,
uid_t uid,
+ gid_t gid,
const char *tty,
char ***env,
int fds[], unsigned n_fds) {
+#ifdef HAVE_PAM
+
static const struct pam_conv conv = {
.conv = null_conv,
.appdata_ptr = NULL
@@ -925,8 +1048,14 @@ static int setup_pam(
* and this will make PR_SET_PDEATHSIG work in most cases.
* If this fails, ignore the error - but expect sd-pam threads
* to fail to exit normally */
+
+ r = maybe_setgroups(0, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
+ if (setresgid(gid, gid, gid) < 0)
+ log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
if (setresuid(uid, uid, uid) < 0)
- log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
+ log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
(void) ignore_signals(SIGPIPE, -1);
@@ -1019,8 +1148,10 @@ fail:
closelog();
return r;
-}
+#else
+ return 0;
#endif
+}
static void rename_process_from_path(const char *path) {
char process_name[11];
@@ -1055,15 +1186,29 @@ static void rename_process_from_path(const char *path) {
#ifdef HAVE_SECCOMP
-static int apply_seccomp(const ExecContext *c) {
+static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
+
+ if (is_seccomp_available())
+ return false;
+
+ log_open();
+ log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
+ log_close();
+ return true;
+}
+
+static int apply_seccomp(const Unit* u, const ExecContext *c) {
uint32_t negative_action, action;
- scmp_filter_ctx *seccomp;
+ scmp_filter_ctx seccomp;
Iterator i;
void *id;
int r;
assert(c);
+ if (skip_seccomp_unavailable(u, "syscall filtering"))
+ return 0;
+
negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
@@ -1104,20 +1249,23 @@ finish:
return r;
}
-static int apply_address_families(const ExecContext *c) {
- scmp_filter_ctx *seccomp;
+static int apply_address_families(const Unit* u, const ExecContext *c) {
+ scmp_filter_ctx seccomp;
Iterator i;
int r;
+#if defined(__i386__)
+ return 0;
+#endif
+
assert(c);
- seccomp = seccomp_init(SCMP_ACT_ALLOW);
- if (!seccomp)
- return -ENOMEM;
+ if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
+ return 0;
- r = seccomp_add_secondary_archs(seccomp);
+ r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
if (r < 0)
- goto finish;
+ return r;
if (c->address_families_whitelist) {
int af, first = 0, last = 0;
@@ -1214,10 +1362,6 @@ static int apply_address_families(const ExecContext *c) {
}
}
- r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
- if (r < 0)
- goto finish;
-
r = seccomp_load(seccomp);
finish:
@@ -1225,15 +1369,18 @@ finish:
return r;
}
-static int apply_memory_deny_write_execute(const ExecContext *c) {
- scmp_filter_ctx *seccomp;
+static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
+ scmp_filter_ctx seccomp;
int r;
assert(c);
- seccomp = seccomp_init(SCMP_ACT_ALLOW);
- if (!seccomp)
- return -ENOMEM;
+ if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
+ return 0;
+
+ r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
r = seccomp_rule_add(
seccomp,
@@ -1253,7 +1400,12 @@ static int apply_memory_deny_write_execute(const ExecContext *c) {
if (r < 0)
goto finish;
- r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+ r = seccomp_rule_add(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(shmat),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
if (r < 0)
goto finish;
@@ -1264,22 +1416,25 @@ finish:
return r;
}
-static int apply_restrict_realtime(const ExecContext *c) {
+static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
static const int permitted_policies[] = {
SCHED_OTHER,
SCHED_BATCH,
SCHED_IDLE,
};
- scmp_filter_ctx *seccomp;
+ scmp_filter_ctx seccomp;
unsigned i;
int r, p, max_policy = 0;
assert(c);
- seccomp = seccomp_init(SCMP_ACT_ALLOW);
- if (!seccomp)
- return -ENOMEM;
+ if (skip_seccomp_unavailable(u, "RestrictRealtime="))
+ return 0;
+
+ r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
/* Determine the highest policy constant we want to allow */
for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
@@ -1323,7 +1478,34 @@ static int apply_restrict_realtime(const ExecContext *c) {
if (r < 0)
goto finish;
- r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+ r = seccomp_load(seccomp);
+
+finish:
+ seccomp_release(seccomp);
+ return r;
+}
+
+static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
+ scmp_filter_ctx seccomp;
+ int r;
+
+ assert(c);
+
+ /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
+ * let's protect even those systems where this is left on in the kernel. */
+
+ if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
+ return 0;
+
+ r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = seccomp_rule_add(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(_sysctl),
+ 0);
if (r < 0)
goto finish;
@@ -1334,12 +1516,33 @@ finish:
return r;
}
+static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
+ assert(c);
+
+ /* Turn off module syscalls on ProtectKernelModules=yes */
+
+ if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
+ return 0;
+
+ return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
+}
+
+static int apply_private_devices(const Unit *u, const ExecContext *c) {
+ assert(c);
+
+ /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
+
+ if (skip_seccomp_unavailable(u, "PrivateDevices="))
+ return 0;
+
+ return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
+}
+
#endif
static void do_idle_pipe_dance(int idle_pipe[4]) {
assert(idle_pipe);
-
idle_pipe[1] = safe_close(idle_pipe[1]);
idle_pipe[2] = safe_close(idle_pipe[2]);
@@ -1366,6 +1569,7 @@ static void do_idle_pipe_dance(int idle_pipe[4]) {
}
static int build_environment(
+ Unit *u,
const ExecContext *c,
const ExecParameters *p,
unsigned n_fds,
@@ -1380,10 +1584,11 @@ static int build_environment(
unsigned n_env = 0;
char *x;
+ assert(u);
assert(c);
assert(ret);
- our_env = new0(char*, 12);
+ our_env = new0(char*, 14);
if (!our_env)
return -ENOMEM;
@@ -1408,7 +1613,7 @@ static int build_environment(
our_env[n_env++] = x;
}
- if (p->watchdog_usec > 0) {
+ if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
return -ENOMEM;
our_env[n_env++] = x;
@@ -1418,6 +1623,16 @@ static int build_environment(
our_env[n_env++] = x;
}
+ /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
+ * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
+ * check the database directly. */
+ if (unit_has_name(u, SPECIAL_DBUS_SERVICE)) {
+ x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
+ if (!x)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+ }
+
if (home) {
x = strappend("HOME=", home);
if (!x)
@@ -1444,12 +1659,28 @@ static int build_environment(
our_env[n_env++] = x;
}
- if (is_terminal_input(c->std_input) ||
- c->std_output == EXEC_OUTPUT_TTY ||
- c->std_error == EXEC_OUTPUT_TTY ||
- c->tty_path) {
+ if (!sd_id128_is_null(u->invocation_id)) {
+ if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
+ return -ENOMEM;
+
+ our_env[n_env++] = x;
+ }
+
+ if (exec_context_needs_term(c)) {
+ const char *tty_path, *term = NULL;
+
+ tty_path = exec_context_tty_path(c);
+
+ /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
+ * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
+ * passes to PID 1 ends up all the way in the console login shown. */
- x = strdup(default_term_for_tty(exec_context_tty_path(c)));
+ if (path_equal(tty_path, "/dev/console") && getppid() == 1)
+ term = getenv("TERM");
+ if (!term)
+ term = default_term_for_tty(tty_path);
+
+ x = strappend("TERM=", term);
if (!x)
return -ENOMEM;
our_env[n_env++] = x;
@@ -1520,20 +1751,382 @@ static bool exec_needs_mount_namespace(
if (context->private_devices ||
context->protect_system != PROTECT_SYSTEM_NO ||
- context->protect_home != PROTECT_HOME_NO)
+ context->protect_home != PROTECT_HOME_NO ||
+ context->protect_kernel_tunables ||
+ context->protect_kernel_modules ||
+ context->protect_control_groups)
return true;
return false;
}
+static int setup_private_users(uid_t uid, gid_t gid) {
+ _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
+ _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
+ _cleanup_close_ int unshare_ready_fd = -1;
+ _cleanup_(sigkill_waitp) pid_t pid = 0;
+ uint64_t c = 1;
+ siginfo_t si;
+ ssize_t n;
+ int r;
+
+ /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
+ * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
+ * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
+ * which waits for the parent to create the new user namespace while staying in the original namespace. The
+ * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
+ * continues execution normally. */
+
+ if (uid != 0 && uid_is_valid(uid))
+ asprintf(&uid_map,
+ "0 0 1\n" /* Map root → root */
+ UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
+ uid, uid);
+ else
+ uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
+ if (!uid_map)
+ return -ENOMEM;
+
+ if (gid != 0 && gid_is_valid(gid))
+ asprintf(&gid_map,
+ "0 0 1\n" /* Map root → root */
+ GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
+ gid, gid);
+ else
+ gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
+ if (!gid_map)
+ return -ENOMEM;
+
+ /* Create a communication channel so that the parent can tell the child when it finished creating the user
+ * namespace. */
+ unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
+ if (unshare_ready_fd < 0)
+ return -errno;
+
+ /* Create a communication channel so that the child can tell the parent a proper error code in case it
+ * failed. */
+ if (pipe2(errno_pipe, O_CLOEXEC) < 0)
+ return -errno;
+
+ pid = fork();
+ if (pid < 0)
+ return -errno;
+
+ if (pid == 0) {
+ _cleanup_close_ int fd = -1;
+ const char *a;
+ pid_t ppid;
+
+ /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
+ * here, after the parent opened its own user namespace. */
+
+ ppid = getppid();
+ errno_pipe[0] = safe_close(errno_pipe[0]);
+
+ /* Wait until the parent unshared the user namespace */
+ if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+
+ /* Disable the setgroups() system call in the child user namespace, for good. */
+ a = procfs_file_alloca(ppid, "setgroups");
+ fd = open(a, O_WRONLY|O_CLOEXEC);
+ if (fd < 0) {
+ if (errno != ENOENT) {
+ r = -errno;
+ goto child_fail;
+ }
+
+ /* If the file is missing the kernel is too old, let's continue anyway. */
+ } else {
+ if (write(fd, "deny\n", 5) < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+
+ fd = safe_close(fd);
+ }
+
+ /* First write the GID map */
+ a = procfs_file_alloca(ppid, "gid_map");
+ fd = open(a, O_WRONLY|O_CLOEXEC);
+ if (fd < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+ if (write(fd, gid_map, strlen(gid_map)) < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+ fd = safe_close(fd);
+
+ /* The write the UID map */
+ a = procfs_file_alloca(ppid, "uid_map");
+ fd = open(a, O_WRONLY|O_CLOEXEC);
+ if (fd < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+ if (write(fd, uid_map, strlen(uid_map)) < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+
+ _exit(EXIT_SUCCESS);
+
+ child_fail:
+ (void) write(errno_pipe[1], &r, sizeof(r));
+ _exit(EXIT_FAILURE);
+ }
+
+ errno_pipe[1] = safe_close(errno_pipe[1]);
+
+ if (unshare(CLONE_NEWUSER) < 0)
+ return -errno;
+
+ /* Let the child know that the namespace is ready now */
+ if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
+ return -errno;
+
+ /* Try to read an error code from the child */
+ n = read(errno_pipe[0], &r, sizeof(r));
+ if (n < 0)
+ return -errno;
+ if (n == sizeof(r)) { /* an error code was sent to us */
+ if (r < 0)
+ return r;
+ return -EIO;
+ }
+ if (n != 0) /* on success we should have read 0 bytes */
+ return -EIO;
+
+ r = wait_for_terminate(pid, &si);
+ if (r < 0)
+ return r;
+ pid = 0;
+
+ /* If something strange happened with the child, let's consider this fatal, too */
+ if (si.si_code != CLD_EXITED || si.si_status != 0)
+ return -EIO;
+
+ return 0;
+}
+
+static int setup_runtime_directory(
+ const ExecContext *context,
+ const ExecParameters *params,
+ uid_t uid,
+ gid_t gid) {
+
+ char **rt;
+ int r;
+
+ assert(context);
+ assert(params);
+
+ STRV_FOREACH(rt, context->runtime_directory) {
+ _cleanup_free_ char *p;
+
+ p = strjoin(params->runtime_prefix, "/", *rt, NULL);
+ if (!p)
+ return -ENOMEM;
+
+ r = mkdir_p_label(p, context->runtime_directory_mode);
+ if (r < 0)
+ return r;
+
+ r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int setup_smack(
+ const ExecContext *context,
+ const ExecCommand *command) {
+
+#ifdef HAVE_SMACK
+ int r;
+
+ assert(context);
+ assert(command);
+
+ if (!mac_smack_use())
+ return 0;
+
+ if (context->smack_process_label) {
+ r = mac_smack_apply_pid(0, context->smack_process_label);
+ if (r < 0)
+ return r;
+ }
+#ifdef SMACK_DEFAULT_PROCESS_LABEL
+ else {
+ _cleanup_free_ char *exec_label = NULL;
+
+ r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
+ if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP)
+ return r;
+
+ r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
+ if (r < 0)
+ return r;
+ }
+#endif
+#endif
+
+ return 0;
+}
+
+static int compile_read_write_paths(
+ const ExecContext *context,
+ const ExecParameters *params,
+ char ***ret) {
+
+ _cleanup_strv_free_ char **l = NULL;
+ char **rt;
+
+ /* Compile the list of writable paths. This is the combination of the explicitly configured paths, plus all
+ * runtime directories. */
+
+ if (strv_isempty(context->read_write_paths) &&
+ strv_isempty(context->runtime_directory)) {
+ *ret = NULL; /* NOP if neither is set */
+ return 0;
+ }
+
+ l = strv_copy(context->read_write_paths);
+ if (!l)
+ return -ENOMEM;
+
+ STRV_FOREACH(rt, context->runtime_directory) {
+ char *s;
+
+ s = strjoin(params->runtime_prefix, "/", *rt, NULL);
+ if (!s)
+ return -ENOMEM;
+
+ if (strv_consume(&l, s) < 0)
+ return -ENOMEM;
+ }
+
+ *ret = l;
+ l = NULL;
+
+ return 0;
+}
+
+static int apply_mount_namespace(Unit *u, const ExecContext *context,
+ const ExecParameters *params,
+ ExecRuntime *runtime) {
+ int r;
+ _cleanup_free_ char **rw = NULL;
+ char *tmp = NULL, *var = NULL;
+ const char *root_dir = NULL;
+ NameSpaceInfo ns_info = {
+ .private_dev = context->private_devices,
+ .protect_control_groups = context->protect_control_groups,
+ .protect_kernel_tunables = context->protect_kernel_tunables,
+ .protect_kernel_modules = context->protect_kernel_modules,
+ };
+
+ assert(context);
+
+ /* The runtime struct only contains the parent of the private /tmp,
+ * which is non-accessible to world users. Inside of it there's a /tmp
+ * that is sticky, and that's the one we want to use here. */
+
+ if (context->private_tmp && runtime) {
+ if (runtime->tmp_dir)
+ tmp = strjoina(runtime->tmp_dir, "/tmp");
+ if (runtime->var_tmp_dir)
+ var = strjoina(runtime->var_tmp_dir, "/tmp");
+ }
+
+ r = compile_read_write_paths(context, params, &rw);
+ if (r < 0)
+ return r;
+
+ if (params->flags & EXEC_APPLY_CHROOT)
+ root_dir = context->root_directory;
+
+ r = setup_namespace(root_dir, &ns_info, rw,
+ context->read_only_paths,
+ context->inaccessible_paths,
+ tmp,
+ var,
+ context->protect_home,
+ context->protect_system,
+ context->mount_flags);
+
+ /* If we couldn't set up the namespace this is probably due to a
+ * missing capability. In this case, silently proceeed. */
+ if (IN_SET(r, -EPERM, -EACCES)) {
+ log_open();
+ log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
+ log_close();
+ r = 0;
+ }
+
+ return r;
+}
+
+static int apply_working_directory(const ExecContext *context,
+ const ExecParameters *params,
+ const char *home,
+ const bool needs_mount_ns) {
+ const char *d;
+ const char *wd;
+
+ assert(context);
+
+ if (context->working_directory_home)
+ wd = home;
+ else if (context->working_directory)
+ wd = context->working_directory;
+ else
+ wd = "/";
+
+ if (params->flags & EXEC_APPLY_CHROOT) {
+ if (!needs_mount_ns && context->root_directory)
+ if (chroot(context->root_directory) < 0)
+ return -errno;
+
+ d = wd;
+ } else
+ d = strjoina(strempty(context->root_directory), "/", strempty(wd));
+
+ if (chdir(d) < 0 && !context->working_directory_missing_ok)
+ return -errno;
+
+ return 0;
+}
+
+static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
+ assert(array);
+ assert(n);
+
+ if (!pair)
+ return;
+
+ if (pair[0] >= 0)
+ array[(*n)++] = pair[0];
+ if (pair[1] >= 0)
+ array[(*n)++] = pair[1];
+}
+
static int close_remaining_fds(
const ExecParameters *params,
ExecRuntime *runtime,
+ DynamicCreds *dcreds,
+ int user_lookup_fd,
int socket_fd,
int *fds, unsigned n_fds) {
unsigned n_dont_close = 0;
- int dont_close[n_fds + 7];
+ int dont_close[n_fds + 12];
assert(params);
@@ -1551,37 +2144,109 @@ static int close_remaining_fds(
n_dont_close += n_fds;
}
- if (runtime) {
- if (runtime->netns_storage_socket[0] >= 0)
- dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
- if (runtime->netns_storage_socket[1] >= 0)
- dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
+ if (runtime)
+ append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
+
+ if (dcreds) {
+ if (dcreds->user)
+ append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
+ if (dcreds->group)
+ append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
}
+ if (user_lookup_fd >= 0)
+ dont_close[n_dont_close++] = user_lookup_fd;
+
return close_all_fds(dont_close, n_dont_close);
}
+static bool context_has_address_families(const ExecContext *c) {
+ assert(c);
+
+ return c->address_families_whitelist ||
+ !set_isempty(c->address_families);
+}
+
+static bool context_has_syscall_filters(const ExecContext *c) {
+ assert(c);
+
+ return c->syscall_whitelist ||
+ !set_isempty(c->syscall_filter) ||
+ !set_isempty(c->syscall_archs);
+}
+
+static bool context_has_no_new_privileges(const ExecContext *c) {
+ assert(c);
+
+ if (c->no_new_privileges)
+ return true;
+
+ if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
+ return false;
+
+ return context_has_address_families(c) || /* we need NNP if we have any form of seccomp and are unprivileged */
+ c->memory_deny_write_execute ||
+ c->restrict_realtime ||
+ c->protect_kernel_tunables ||
+ c->protect_kernel_modules ||
+ c->private_devices ||
+ context_has_syscall_filters(c);
+}
+
+static int send_user_lookup(
+ Unit *unit,
+ int user_lookup_fd,
+ uid_t uid,
+ gid_t gid) {
+
+ assert(unit);
+
+ /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
+ * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
+ * specified. */
+
+ if (user_lookup_fd < 0)
+ return 0;
+
+ if (!uid_is_valid(uid) && !gid_is_valid(gid))
+ return 0;
+
+ if (writev(user_lookup_fd,
+ (struct iovec[]) {
+ { .iov_base = &uid, .iov_len = sizeof(uid) },
+ { .iov_base = &gid, .iov_len = sizeof(gid) },
+ { .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0)
+ return -errno;
+
+ return 0;
+}
+
static int exec_child(
Unit *unit,
ExecCommand *command,
const ExecContext *context,
const ExecParameters *params,
ExecRuntime *runtime,
+ DynamicCreds *dcreds,
char **argv,
int socket_fd,
+ int named_iofds[3],
int *fds, unsigned n_fds,
char **files_env,
+ int user_lookup_fd,
int *exit_status) {
_cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
_cleanup_free_ char *mac_selinux_context_net = NULL;
- const char *username = NULL, *home = NULL, *shell = NULL, *wd;
+ _cleanup_free_ gid_t *supplementary_gids = NULL;
+ const char *username = NULL, *groupname = NULL;
+ const char *home = NULL, *shell = NULL;
dev_t journal_stream_dev = 0;
ino_t journal_stream_ino = 0;
bool needs_mount_namespace;
uid_t uid = UID_INVALID;
gid_t gid = GID_INVALID;
- int i, r;
+ int i, r, ngids = 0;
assert(unit);
assert(command);
@@ -1617,7 +2282,7 @@ static int exec_child(
log_forget_fds();
- r = close_remaining_fds(params, runtime, socket_fd, fds, n_fds);
+ r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
if (r < 0) {
*exit_status = EXIT_FDS;
return r;
@@ -1631,7 +2296,7 @@ static int exec_child(
exec_context_tty_reset(context, params);
- if (params->confirm_spawn) {
+ if (params->flags & EXEC_CONFIRM_SPAWN) {
char response;
r = ask_for_confirmation(&response, argv);
@@ -1650,44 +2315,76 @@ static int exec_child(
}
}
- if (context->user) {
- username = context->user;
- r = get_user_creds(&username, &uid, &gid, &home, &shell);
+ if (context->dynamic_user && dcreds) {
+
+ /* Make sure we bypass our own NSS module for any NSS checks */
+ if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
+ *exit_status = EXIT_USER;
+ return -errno;
+ }
+
+ r = dynamic_creds_realize(dcreds, &uid, &gid);
if (r < 0) {
*exit_status = EXIT_USER;
return r;
}
- }
- if (context->group) {
- const char *g = context->group;
+ if (!uid_is_valid(uid) || !gid_is_valid(gid)) {
+ *exit_status = EXIT_USER;
+ return -ESRCH;
+ }
+
+ if (dcreds->user)
+ username = dcreds->user->name;
+
+ } else {
+ r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
+ if (r < 0) {
+ *exit_status = EXIT_USER;
+ return r;
+ }
- r = get_group_creds(&g, &gid);
+ r = get_fixed_group(context, &groupname, &gid);
if (r < 0) {
*exit_status = EXIT_GROUP;
return r;
}
}
+ /* Initialize user supplementary groups and get SupplementaryGroups= ones */
+ r = get_supplementary_groups(context, username, groupname, gid,
+ &supplementary_gids, &ngids);
+ if (r < 0) {
+ *exit_status = EXIT_GROUP;
+ return r;
+ }
+
+ r = send_user_lookup(unit, user_lookup_fd, uid, gid);
+ if (r < 0) {
+ *exit_status = EXIT_USER;
+ return r;
+ }
+
+ user_lookup_fd = safe_close(user_lookup_fd);
/* If a socket is connected to STDIN/STDOUT/STDERR, we
* must sure to drop O_NONBLOCK */
if (socket_fd >= 0)
(void) fd_nonblock(socket_fd, false);
- r = setup_input(context, params, socket_fd);
+ r = setup_input(context, params, socket_fd, named_iofds);
if (r < 0) {
*exit_status = EXIT_STDIN;
return r;
}
- r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
+ r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
if (r < 0) {
*exit_status = EXIT_STDOUT;
return r;
}
- r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
+ r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
if (r < 0) {
*exit_status = EXIT_STDERR;
return r;
@@ -1774,7 +2471,7 @@ static int exec_child(
USER_PROCESS,
username ? "root" : context->user);
- if (context->user && is_terminal_input(context->std_input)) {
+ if (context->user) {
r = chown_terminal(STDIN_FILENO, uid);
if (r < 0) {
*exit_status = EXIT_STDIN;
@@ -1801,32 +2498,15 @@ static int exec_child(
}
if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
- char **rt;
-
- STRV_FOREACH(rt, context->runtime_directory) {
- _cleanup_free_ char *p;
-
- p = strjoin(params->runtime_prefix, "/", *rt, NULL);
- if (!p) {
- *exit_status = EXIT_RUNTIME_DIRECTORY;
- return -ENOMEM;
- }
-
- r = mkdir_p_label(p, context->runtime_directory_mode);
- if (r < 0) {
- *exit_status = EXIT_RUNTIME_DIRECTORY;
- return r;
- }
-
- r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid);
- if (r < 0) {
- *exit_status = EXIT_RUNTIME_DIRECTORY;
- return r;
- }
+ r = setup_runtime_directory(context, params, uid, gid);
+ if (r < 0) {
+ *exit_status = EXIT_RUNTIME_DIRECTORY;
+ return r;
}
}
r = build_environment(
+ unit,
context,
params,
n_fds,
@@ -1860,49 +2540,16 @@ static int exec_child(
}
accum_env = strv_env_clean(accum_env);
- umask(context->umask);
+ (void) umask(context->umask);
- if (params->apply_permissions && !command->privileged) {
- r = enforce_groups(context, username, gid);
- if (r < 0) {
- *exit_status = EXIT_GROUP;
- return r;
- }
-#ifdef HAVE_SMACK
- if (context->smack_process_label) {
- r = mac_smack_apply_pid(0, context->smack_process_label);
- if (r < 0) {
- *exit_status = EXIT_SMACK_PROCESS_LABEL;
- return r;
- }
- }
-#ifdef SMACK_DEFAULT_PROCESS_LABEL
- else {
- _cleanup_free_ char *exec_label = NULL;
-
- r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
- if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP) {
- *exit_status = EXIT_SMACK_PROCESS_LABEL;
- return r;
- }
-
- r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
- if (r < 0) {
- *exit_status = EXIT_SMACK_PROCESS_LABEL;
- return r;
- }
- }
-#endif
-#endif
-#ifdef HAVE_PAM
+ if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
if (context->pam_name && username) {
- r = setup_pam(context->pam_name, username, uid, context->tty_path, &accum_env, fds, n_fds);
+ r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
if (r < 0) {
*exit_status = EXIT_PAM;
return r;
}
}
-#endif
}
if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
@@ -1914,80 +2561,37 @@ static int exec_child(
}
needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
-
if (needs_mount_namespace) {
- char *tmp = NULL, *var = NULL;
-
- /* The runtime struct only contains the parent
- * of the private /tmp, which is
- * non-accessible to world users. Inside of it
- * there's a /tmp that is sticky, and that's
- * the one we want to use here. */
-
- if (context->private_tmp && runtime) {
- if (runtime->tmp_dir)
- tmp = strjoina(runtime->tmp_dir, "/tmp");
- if (runtime->var_tmp_dir)
- var = strjoina(runtime->var_tmp_dir, "/tmp");
- }
-
- r = setup_namespace(
- params->apply_chroot ? context->root_directory : NULL,
- context->read_write_paths,
- context->read_only_paths,
- context->inaccessible_paths,
- tmp,
- var,
- context->private_devices,
- context->protect_home,
- context->protect_system,
- context->mount_flags);
-
- /* If we couldn't set up the namespace this is
- * probably due to a missing capability. In this case,
- * silently proceeed. */
- if (r == -EPERM || r == -EACCES) {
- log_open();
- log_unit_debug_errno(unit, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
- log_close();
- } else if (r < 0) {
+ r = apply_mount_namespace(unit, context, params, runtime);
+ if (r < 0) {
*exit_status = EXIT_NAMESPACE;
return r;
}
}
- if (context->working_directory_home)
- wd = home;
- else if (context->working_directory)
- wd = context->working_directory;
- else
- wd = "/";
-
- if (params->apply_chroot) {
- if (!needs_mount_namespace && context->root_directory)
- if (chroot(context->root_directory) < 0) {
- *exit_status = EXIT_CHROOT;
- return -errno;
- }
-
- if (chdir(wd) < 0 &&
- !context->working_directory_missing_ok) {
- *exit_status = EXIT_CHDIR;
- return -errno;
- }
- } else {
- const char *d;
+ /* Apply just after mount namespace setup */
+ r = apply_working_directory(context, params, home, needs_mount_namespace);
+ if (r < 0) {
+ *exit_status = EXIT_CHROOT;
+ return r;
+ }
- d = strjoina(strempty(context->root_directory), "/", strempty(wd));
- if (chdir(d) < 0 &&
- !context->working_directory_missing_ok) {
- *exit_status = EXIT_CHDIR;
- return -errno;
+ /* Drop groups as early as possbile */
+ if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
+ r = enforce_groups(context, gid, supplementary_gids, ngids);
+ if (r < 0) {
+ *exit_status = EXIT_GROUP;
+ return r;
}
}
#ifdef HAVE_SELINUX
- if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0 && !command->privileged) {
+ if ((params->flags & EXEC_APPLY_PERMISSIONS) &&
+ mac_selinux_use() &&
+ params->selinux_context_net &&
+ socket_fd >= 0 &&
+ !command->privileged) {
+
r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
if (r < 0) {
*exit_status = EXIT_SELINUX_CONTEXT;
@@ -1996,6 +2600,14 @@ static int exec_child(
}
#endif
+ if ((params->flags & EXEC_APPLY_PERMISSIONS) && context->private_users) {
+ r = setup_private_users(uid, gid);
+ if (r < 0) {
+ *exit_status = EXIT_USER;
+ return r;
+ }
+ }
+
/* We repeat the fd closing here, to make sure that
* nothing is leaked from the PAM modules. Note that
* we are more aggressive this time since socket_fd
@@ -2012,13 +2624,8 @@ static int exec_child(
return r;
}
- if (params->apply_permissions && !command->privileged) {
+ if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
- bool use_address_families = context->address_families_whitelist ||
- !set_isempty(context->address_families);
- bool use_syscall_filter = context->syscall_whitelist ||
- !set_isempty(context->syscall_filter) ||
- !set_isempty(context->syscall_archs);
int secure_bits = context->secure_bits;
for (i = 0; i < _RLIMIT_MAX; i++) {
@@ -2085,6 +2692,41 @@ static int exec_child(
}
}
+ /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
+ * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
+ * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
+ * are restricted. */
+
+#ifdef HAVE_SELINUX
+ if (mac_selinux_use()) {
+ char *exec_context = mac_selinux_context_net ?: context->selinux_context;
+
+ if (exec_context) {
+ r = setexeccon(exec_context);
+ if (r < 0) {
+ *exit_status = EXIT_SELINUX_CONTEXT;
+ return r;
+ }
+ }
+ }
+#endif
+
+ r = setup_smack(context, command);
+ if (r < 0) {
+ *exit_status = EXIT_SMACK_PROCESS_LABEL;
+ return r;
+ }
+
+#ifdef HAVE_APPARMOR
+ if (context->apparmor_profile && mac_apparmor_use()) {
+ r = aa_change_onexec(context->apparmor_profile);
+ if (r < 0 && !context->apparmor_profile_ignore) {
+ *exit_status = EXIT_APPARMOR_PROFILE;
+ return -errno;
+ }
+ }
+#endif
+
/* PR_GET_SECUREBITS is not privileged, while
* PR_SET_SECUREBITS is. So to suppress
* potential EPERMs we'll try not to call
@@ -2095,16 +2737,15 @@ static int exec_child(
return -errno;
}
- if (context->no_new_privileges ||
- (!have_effective_cap(CAP_SYS_ADMIN) && (use_address_families || context->memory_deny_write_execute || context->restrict_realtime || use_syscall_filter)))
+ if (context_has_no_new_privileges(context))
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
*exit_status = EXIT_NO_NEW_PRIVILEGES;
return -errno;
}
#ifdef HAVE_SECCOMP
- if (use_address_families) {
- r = apply_address_families(context);
+ if (context_has_address_families(context)) {
+ r = apply_address_families(unit, context);
if (r < 0) {
*exit_status = EXIT_ADDRESS_FAMILIES;
return r;
@@ -2112,7 +2753,7 @@ static int exec_child(
}
if (context->memory_deny_write_execute) {
- r = apply_memory_deny_write_execute(context);
+ r = apply_memory_deny_write_execute(unit, context);
if (r < 0) {
*exit_status = EXIT_SECCOMP;
return r;
@@ -2120,42 +2761,44 @@ static int exec_child(
}
if (context->restrict_realtime) {
- r = apply_restrict_realtime(context);
+ r = apply_restrict_realtime(unit, context);
if (r < 0) {
*exit_status = EXIT_SECCOMP;
return r;
}
}
- if (use_syscall_filter) {
- r = apply_seccomp(context);
+ if (context->protect_kernel_tunables) {
+ r = apply_protect_sysctl(unit, context);
if (r < 0) {
*exit_status = EXIT_SECCOMP;
return r;
}
}
-#endif
-#ifdef HAVE_SELINUX
- if (mac_selinux_use()) {
- char *exec_context = mac_selinux_context_net ?: context->selinux_context;
+ if (context->protect_kernel_modules) {
+ r = apply_protect_kernel_modules(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return r;
+ }
+ }
- if (exec_context) {
- r = setexeccon(exec_context);
- if (r < 0) {
- *exit_status = EXIT_SELINUX_CONTEXT;
- return r;
- }
+ if (context->private_devices) {
+ r = apply_private_devices(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return r;
}
}
-#endif
-#ifdef HAVE_APPARMOR
- if (context->apparmor_profile && mac_apparmor_use()) {
- r = aa_change_onexec(context->apparmor_profile);
- if (r < 0 && !context->apparmor_profile_ignore) {
- *exit_status = EXIT_APPARMOR_PROFILE;
- return -errno;
+ /* This really should remain the last step before the execve(), to make sure our own code is unaffected
+ * by the filter as little as possible. */
+ if (context_has_syscall_filters(context)) {
+ r = apply_seccomp(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return r;
}
}
#endif
@@ -2192,12 +2835,14 @@ int exec_spawn(Unit *unit,
const ExecContext *context,
const ExecParameters *params,
ExecRuntime *runtime,
+ DynamicCreds *dcreds,
pid_t *ret) {
_cleanup_strv_free_ char **files_env = NULL;
int *fds = NULL; unsigned n_fds = 0;
_cleanup_free_ char *line = NULL;
int socket_fd, r;
+ int named_iofds[3] = { -1, -1, -1 };
char **argv;
pid_t pid;
@@ -2224,6 +2869,10 @@ int exec_spawn(Unit *unit,
n_fds = params->n_fds;
}
+ r = exec_context_named_iofds(unit, context, params, named_iofds);
+ if (r < 0)
+ return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
+
r = exec_context_load_environment(unit, context, &files_env);
if (r < 0)
return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
@@ -2250,10 +2899,13 @@ int exec_spawn(Unit *unit,
context,
params,
runtime,
+ dcreds,
argv,
socket_fd,
+ named_iofds,
fds, n_fds,
files_env,
+ unit->manager->user_lookup_fds[1],
&exit_status);
if (r < 0) {
log_open();
@@ -2313,6 +2965,9 @@ void exec_context_done(ExecContext *c) {
for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
c->rlimit[l] = mfree(c->rlimit[l]);
+ for (l = 0; l < 3; l++)
+ c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
+
c->working_directory = mfree(c->working_directory);
c->root_directory = mfree(c->root_directory);
c->tty_path = mfree(c->tty_path);
@@ -2411,6 +3066,56 @@ static void invalid_env(const char *p, void *userdata) {
log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
}
+const char* exec_context_fdname(const ExecContext *c, int fd_index) {
+ assert(c);
+
+ switch (fd_index) {
+ case STDIN_FILENO:
+ if (c->std_input != EXEC_INPUT_NAMED_FD)
+ return NULL;
+ return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
+ case STDOUT_FILENO:
+ if (c->std_output != EXEC_OUTPUT_NAMED_FD)
+ return NULL;
+ return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
+ case STDERR_FILENO:
+ if (c->std_error != EXEC_OUTPUT_NAMED_FD)
+ return NULL;
+ return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
+ default:
+ return NULL;
+ }
+}
+
+int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
+ unsigned i, targets;
+ const char *stdio_fdname[3];
+
+ assert(c);
+ assert(p);
+
+ targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
+ (c->std_output == EXEC_OUTPUT_NAMED_FD) +
+ (c->std_error == EXEC_OUTPUT_NAMED_FD);
+
+ for (i = 0; i < 3; i++)
+ stdio_fdname[i] = exec_context_fdname(c, i);
+
+ for (i = 0; i < p->n_fds && targets > 0; i++)
+ if (named_iofds[STDIN_FILENO] < 0 && c->std_input == EXEC_INPUT_NAMED_FD && stdio_fdname[STDIN_FILENO] && streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
+ named_iofds[STDIN_FILENO] = p->fds[i];
+ targets--;
+ } else if (named_iofds[STDOUT_FILENO] < 0 && c->std_output == EXEC_OUTPUT_NAMED_FD && stdio_fdname[STDOUT_FILENO] && streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
+ named_iofds[STDOUT_FILENO] = p->fds[i];
+ targets--;
+ } else if (named_iofds[STDERR_FILENO] < 0 && c->std_error == EXEC_OUTPUT_NAMED_FD && stdio_fdname[STDERR_FILENO] && streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
+ named_iofds[STDERR_FILENO] = p->fds[i];
+ targets--;
+ }
+
+ return (targets == 0 ? 0 : -ENOENT);
+}
+
int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
char **i, **r = NULL;
@@ -2555,8 +3260,12 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
"%sRootDirectory: %s\n"
"%sNonBlocking: %s\n"
"%sPrivateTmp: %s\n"
- "%sPrivateNetwork: %s\n"
"%sPrivateDevices: %s\n"
+ "%sProtectKernelTunables: %s\n"
+ "%sProtectKernelModules: %s\n"
+ "%sProtectControlGroups: %s\n"
+ "%sPrivateNetwork: %s\n"
+ "%sPrivateUsers: %s\n"
"%sProtectHome: %s\n"
"%sProtectSystem: %s\n"
"%sIgnoreSIGPIPE: %s\n"
@@ -2567,8 +3276,12 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
prefix, c->root_directory ? c->root_directory : "/",
prefix, yes_no(c->non_blocking),
prefix, yes_no(c->private_tmp),
- prefix, yes_no(c->private_network),
prefix, yes_no(c->private_devices),
+ prefix, yes_no(c->protect_kernel_tunables),
+ prefix, yes_no(c->protect_kernel_modules),
+ prefix, yes_no(c->protect_control_groups),
+ prefix, yes_no(c->private_network),
+ prefix, yes_no(c->private_users),
prefix, protect_home_to_string(c->protect_home),
prefix, protect_system_to_string(c->protect_system),
prefix, yes_no(c->ignore_sigpipe),
@@ -2723,6 +3436,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
if (c->group)
fprintf(f, "%sGroup: %s\n", prefix, c->group);
+ fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
+
if (strv_length(c->supplementary_groups) > 0) {
fprintf(f, "%sSupplementaryGroups:", prefix);
strv_fprintf(f, c->supplementary_groups);
@@ -2882,12 +3597,12 @@ void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
"%sPID: "PID_FMT"\n",
prefix, s->pid);
- if (s->start_timestamp.realtime > 0)
+ if (dual_timestamp_is_set(&s->start_timestamp))
fprintf(f,
"%sStart Timestamp: %s\n",
prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
- if (s->exit_timestamp.realtime > 0)
+ if (dual_timestamp_is_set(&s->exit_timestamp))
fprintf(f,
"%sExit Timestamp: %s\n"
"%sExit Code: %s\n"
@@ -2908,7 +3623,8 @@ char *exec_command_line(char **argv) {
STRV_FOREACH(a, argv)
k += strlen(*a)+3;
- if (!(n = new(char, k)))
+ n = new(char, k);
+ if (!n)
return NULL;
p = n;
@@ -3097,9 +3813,7 @@ ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
free(r->tmp_dir);
free(r->var_tmp_dir);
safe_close_pair(r->netns_storage_socket);
- free(r);
-
- return NULL;
+ return mfree(r);
}
int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
@@ -3255,7 +3969,8 @@ static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
[EXEC_INPUT_TTY] = "tty",
[EXEC_INPUT_TTY_FORCE] = "tty-force",
[EXEC_INPUT_TTY_FAIL] = "tty-fail",
- [EXEC_INPUT_SOCKET] = "socket"
+ [EXEC_INPUT_SOCKET] = "socket",
+ [EXEC_INPUT_NAMED_FD] = "fd",
};
DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
@@ -3270,7 +3985,8 @@ static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
[EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
[EXEC_OUTPUT_JOURNAL] = "journal",
[EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
- [EXEC_OUTPUT_SOCKET] = "socket"
+ [EXEC_OUTPUT_SOCKET] = "socket",
+ [EXEC_OUTPUT_NAMED_FD] = "fd",
};
DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
diff --git a/src/core/execute.h b/src/core/execute.h
index 189c4d0999..c7d0f7761e 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -50,6 +50,7 @@ typedef enum ExecInput {
EXEC_INPUT_TTY_FORCE,
EXEC_INPUT_TTY_FAIL,
EXEC_INPUT_SOCKET,
+ EXEC_INPUT_NAMED_FD,
_EXEC_INPUT_MAX,
_EXEC_INPUT_INVALID = -1
} ExecInput;
@@ -65,6 +66,7 @@ typedef enum ExecOutput {
EXEC_OUTPUT_JOURNAL,
EXEC_OUTPUT_JOURNAL_AND_CONSOLE,
EXEC_OUTPUT_SOCKET,
+ EXEC_OUTPUT_NAMED_FD,
_EXEC_OUTPUT_MAX,
_EXEC_OUTPUT_INVALID = -1
} ExecOutput;
@@ -92,6 +94,8 @@ struct ExecRuntime {
char *tmp_dir;
char *var_tmp_dir;
+ /* An AF_UNIX socket pair, that contains a datagram containing a file descriptor referring to the network
+ * namespace. */
int netns_storage_socket[2];
};
@@ -118,6 +122,7 @@ struct ExecContext {
ExecInput std_input;
ExecOutput std_output;
ExecOutput std_error;
+ char *stdio_fdname[3];
nsec_t timer_slack_nsec;
@@ -169,11 +174,18 @@ struct ExecContext {
bool private_tmp;
bool private_network;
bool private_devices;
+ bool private_users;
ProtectSystem protect_system;
ProtectHome protect_home;
+ bool protect_kernel_tunables;
+ bool protect_kernel_modules;
+ bool protect_control_groups;
bool no_new_privileges;
+ bool dynamic_user;
+ bool remove_ipc;
+
/* This is not exposed to the user but available
* internally. We need it to make sure that whenever we spawn
* /usr/bin/mount it is run in the same process group as us so
@@ -204,6 +216,19 @@ struct ExecContext {
bool no_new_privileges_set:1;
};
+typedef enum ExecFlags {
+ EXEC_CONFIRM_SPAWN = 1U << 0,
+ EXEC_APPLY_PERMISSIONS = 1U << 1,
+ EXEC_APPLY_CHROOT = 1U << 2,
+ EXEC_APPLY_TTY_STDIN = 1U << 3,
+
+ /* The following are not used by execute.c, but by consumers internally */
+ EXEC_PASS_FDS = 1U << 4,
+ EXEC_IS_CONTROL = 1U << 5,
+ EXEC_SETENV_RESULT = 1U << 6,
+ EXEC_SET_WATCHDOG = 1U << 7,
+} ExecFlags;
+
struct ExecParameters {
char **argv;
char **environment;
@@ -212,11 +237,7 @@ struct ExecParameters {
char **fd_names;
unsigned n_fds;
- bool apply_permissions:1;
- bool apply_chroot:1;
- bool apply_tty_stdin:1;
-
- bool confirm_spawn:1;
+ ExecFlags flags;
bool selinux_context_net:1;
bool cgroup_delegate:1;
@@ -235,12 +256,14 @@ struct ExecParameters {
};
#include "unit.h"
+#include "dynamic-user.h"
int exec_spawn(Unit *unit,
ExecCommand *command,
const ExecContext *context,
const ExecParameters *exec_params,
ExecRuntime *runtime,
+ DynamicCreds *dynamic_creds,
pid_t *ret);
void exec_command_done(ExecCommand *c);
@@ -264,6 +287,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix);
int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_root);
int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l);
+int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]);
+const char* exec_context_fdname(const ExecContext *c, int fd_index);
bool exec_context_may_touch_console(ExecContext *c);
bool exec_context_maintains_privileges(ExecContext *c);
diff --git a/src/core/job.c b/src/core/job.c
index 7557874d4d..ac6910a906 100644
--- a/src/core/job.c
+++ b/src/core/job.c
@@ -690,16 +690,16 @@ _pure_ static const char *job_get_status_message_format(Unit *u, JobType t, JobR
}
static void job_print_status_message(Unit *u, JobType t, JobResult result) {
- static struct {
+ static const struct {
const char *color, *word;
} const statuses[_JOB_RESULT_MAX] = {
- [JOB_DONE] = {ANSI_GREEN, " OK "},
- [JOB_TIMEOUT] = {ANSI_HIGHLIGHT_RED, " TIME "},
- [JOB_FAILED] = {ANSI_HIGHLIGHT_RED, "FAILED"},
- [JOB_DEPENDENCY] = {ANSI_HIGHLIGHT_YELLOW, "DEPEND"},
- [JOB_SKIPPED] = {ANSI_HIGHLIGHT, " INFO "},
- [JOB_ASSERT] = {ANSI_HIGHLIGHT_YELLOW, "ASSERT"},
- [JOB_UNSUPPORTED] = {ANSI_HIGHLIGHT_YELLOW, "UNSUPP"},
+ [JOB_DONE] = { ANSI_GREEN, " OK " },
+ [JOB_TIMEOUT] = { ANSI_HIGHLIGHT_RED, " TIME " },
+ [JOB_FAILED] = { ANSI_HIGHLIGHT_RED, "FAILED" },
+ [JOB_DEPENDENCY] = { ANSI_HIGHLIGHT_YELLOW, "DEPEND" },
+ [JOB_SKIPPED] = { ANSI_HIGHLIGHT, " INFO " },
+ [JOB_ASSERT] = { ANSI_HIGHLIGHT_YELLOW, "ASSERT" },
+ [JOB_UNSUPPORTED] = { ANSI_HIGHLIGHT_YELLOW, "UNSUPP" },
};
const char *format;
@@ -767,8 +767,9 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) {
if (!format)
return;
+ /* The description might be longer than the buffer, but that's OK, we'll just truncate it here */
DISABLE_WARNING_FORMAT_NONLITERAL;
- xsprintf(buf, format, unit_description(u));
+ snprintf(buf, sizeof(buf), format, unit_description(u));
REENABLE_WARNING;
switch (t) {
@@ -927,7 +928,7 @@ static int job_dispatch_timer(sd_event_source *s, uint64_t monotonic, void *user
u = j->unit;
job_finish_and_invalidate(j, JOB_TIMEOUT, true, false);
- failure_action(u->manager, u->job_timeout_action, u->job_timeout_reboot_arg);
+ emergency_action(u->manager, u->job_timeout_action, u->job_timeout_reboot_arg, "job timed out");
return 0;
}
@@ -997,7 +998,10 @@ char *job_dbus_path(Job *j) {
return p;
}
-int job_serialize(Job *j, FILE *f, FDSet *fds) {
+int job_serialize(Job *j, FILE *f) {
+ assert(j);
+ assert(f);
+
fprintf(f, "job-id=%u\n", j->id);
fprintf(f, "job-type=%s\n", job_type_to_string(j->type));
fprintf(f, "job-state=%s\n", job_state_to_string(j->state));
@@ -1008,15 +1012,16 @@ int job_serialize(Job *j, FILE *f, FDSet *fds) {
if (j->begin_usec > 0)
fprintf(f, "job-begin="USEC_FMT"\n", j->begin_usec);
- bus_track_serialize(j->clients, f);
+ bus_track_serialize(j->clients, f, "subscribed");
/* End marker */
fputc('\n', f);
return 0;
}
-int job_deserialize(Job *j, FILE *f, FDSet *fds) {
+int job_deserialize(Job *j, FILE *f) {
assert(j);
+ assert(f);
for (;;) {
char line[LINE_MAX], *l, *v;
@@ -1106,7 +1111,7 @@ int job_deserialize(Job *j, FILE *f, FDSet *fds) {
} else if (streq(l, "subscribed")) {
if (strv_extend(&j->deserialized_clients, v) < 0)
- return log_oom();
+ log_oom();
}
}
}
@@ -1118,9 +1123,8 @@ int job_coldplug(Job *j) {
/* After deserialization is complete and the bus connection
* set up again, let's start watching our subscribers again */
- r = bus_track_coldplug(j->manager, &j->clients, &j->deserialized_clients);
- if (r < 0)
- return r;
+ (void) bus_track_coldplug(j->manager, &j->clients, false, j->deserialized_clients);
+ j->deserialized_clients = strv_free(j->deserialized_clients);
if (j->state == JOB_WAITING)
job_add_to_run_queue(j);
diff --git a/src/core/job.h b/src/core/job.h
index d359e8bb3e..85368f0d30 100644
--- a/src/core/job.h
+++ b/src/core/job.h
@@ -177,8 +177,8 @@ Job* job_install(Job *j);
int job_install_deserialized(Job *j);
void job_uninstall(Job *j);
void job_dump(Job *j, FILE*f, const char *prefix);
-int job_serialize(Job *j, FILE *f, FDSet *fds);
-int job_deserialize(Job *j, FILE *f, FDSet *fds);
+int job_serialize(Job *j, FILE *f);
+int job_deserialize(Job *j, FILE *f);
int job_coldplug(Job *j);
JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts);
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 6a5c16a000..af2f9d960b 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -19,9 +19,9 @@ m4_dnl Define the context options only once
m4_define(`EXEC_CONTEXT_CONFIG_ITEMS',
`$1.WorkingDirectory, config_parse_working_directory, 0, offsetof($1, exec_context)
$1.RootDirectory, config_parse_unit_path_printf, 0, offsetof($1, exec_context.root_directory)
-$1.User, config_parse_unit_string_printf, 0, offsetof($1, exec_context.user)
-$1.Group, config_parse_unit_string_printf, 0, offsetof($1, exec_context.group)
-$1.SupplementaryGroups, config_parse_strv, 0, offsetof($1, exec_context.supplementary_groups)
+$1.User, config_parse_user_group, 0, offsetof($1, exec_context.user)
+$1.Group, config_parse_user_group, 0, offsetof($1, exec_context.group)
+$1.SupplementaryGroups, config_parse_user_group_strv, 0, offsetof($1, exec_context.supplementary_groups)
$1.Nice, config_parse_exec_nice, 0, offsetof($1, exec_context)
$1.OOMScoreAdjust, config_parse_exec_oom_score_adjust, 0, offsetof($1, exec_context)
$1.IOSchedulingClass, config_parse_exec_io_class, 0, offsetof($1, exec_context)
@@ -34,9 +34,10 @@ $1.UMask, config_parse_mode, 0,
$1.Environment, config_parse_environ, 0, offsetof($1, exec_context.environment)
$1.EnvironmentFile, config_parse_unit_env_file, 0, offsetof($1, exec_context.environment_files)
$1.PassEnvironment, config_parse_pass_environ, 0, offsetof($1, exec_context.pass_environment)
-$1.StandardInput, config_parse_input, 0, offsetof($1, exec_context.std_input)
-$1.StandardOutput, config_parse_output, 0, offsetof($1, exec_context.std_output)
-$1.StandardError, config_parse_output, 0, offsetof($1, exec_context.std_error)
+$1.DynamicUser, config_parse_bool, 0, offsetof($1, exec_context.dynamic_user)
+$1.StandardInput, config_parse_exec_input, 0, offsetof($1, exec_context)
+$1.StandardOutput, config_parse_exec_output, 0, offsetof($1, exec_context)
+$1.StandardError, config_parse_exec_output, 0, offsetof($1, exec_context)
$1.TTYPath, config_parse_unit_path_printf, 0, offsetof($1, exec_context.tty_path)
$1.TTYReset, config_parse_bool, 0, offsetof($1, exec_context.tty_reset)
$1.TTYVHangup, config_parse_bool, 0, offsetof($1, exec_context.tty_vhangup)
@@ -87,8 +88,12 @@ $1.ReadWritePaths, config_parse_namespace_path_strv, 0,
$1.ReadOnlyPaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_paths)
$1.InaccessiblePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths)
$1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp)
-$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
$1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices)
+$1.ProtectKernelTunables, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_tunables)
+$1.ProtectKernelModules, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_modules)
+$1.ProtectControlGroups, config_parse_bool, 0, offsetof($1, exec_context.protect_control_groups)
+$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
+$1.PrivateUsers, config_parse_bool, 0, offsetof($1, exec_context.private_users)
$1.ProtectSystem, config_parse_protect_system, 0, offsetof($1, exec_context)
$1.ProtectHome, config_parse_protect_home, 0, offsetof($1, exec_context)
$1.MountFlags, config_parse_exec_mount_flags, 0, offsetof($1, exec_context)
@@ -120,6 +125,8 @@ $1.KillSignal, config_parse_signal, 0,
m4_define(`CGROUP_CONTEXT_CONFIG_ITEMS',
`$1.Slice, config_parse_unit_slice, 0, 0
$1.CPUAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.cpu_accounting)
+$1.CPUWeight, config_parse_cpu_weight, 0, offsetof($1, cgroup_context.cpu_weight)
+$1.StartupCPUWeight, config_parse_cpu_weight, 0, offsetof($1, cgroup_context.startup_cpu_weight)
$1.CPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.cpu_shares)
$1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares)
$1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context)
@@ -127,6 +134,7 @@ $1.MemoryAccounting, config_parse_bool, 0,
$1.MemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.MemoryHigh, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.MemoryMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
+$1.MemorySwapMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.MemoryLimit, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.DeviceAllow, config_parse_device_allow, 0, offsetof($1, cgroup_context)
$1.DevicePolicy, config_parse_device_policy, 0, offsetof($1, cgroup_context.device_policy)
@@ -180,13 +188,13 @@ Unit.OnFailureIsolate, config_parse_job_mode_isolate, 0,
Unit.IgnoreOnIsolate, config_parse_bool, 0, offsetof(Unit, ignore_on_isolate)
Unit.IgnoreOnSnapshot, config_parse_warn_compat, DISABLED_LEGACY, 0
Unit.JobTimeoutSec, config_parse_sec_fix_0, 0, offsetof(Unit, job_timeout)
-Unit.JobTimeoutAction, config_parse_failure_action, 0, offsetof(Unit, job_timeout_action)
+Unit.JobTimeoutAction, config_parse_emergency_action, 0, offsetof(Unit, job_timeout_action)
Unit.JobTimeoutRebootArgument, config_parse_string, 0, offsetof(Unit, job_timeout_reboot_arg)
Unit.StartLimitIntervalSec, config_parse_sec, 0, offsetof(Unit, start_limit.interval)
m4_dnl The following is a legacy alias name for compatibility
Unit.StartLimitInterval, config_parse_sec, 0, offsetof(Unit, start_limit.interval)
Unit.StartLimitBurst, config_parse_unsigned, 0, offsetof(Unit, start_limit.burst)
-Unit.StartLimitAction, config_parse_failure_action, 0, offsetof(Unit, start_limit_action)
+Unit.StartLimitAction, config_parse_emergency_action, 0, offsetof(Unit, start_limit_action)
Unit.RebootArgument, config_parse_string, 0, offsetof(Unit, reboot_arg)
Unit.ConditionPathExists, config_parse_unit_condition_path, CONDITION_PATH_EXISTS, offsetof(Unit, conditions)
Unit.ConditionPathExistsGlob, config_parse_unit_condition_path, CONDITION_PATH_EXISTS_GLOB, offsetof(Unit, conditions)
@@ -243,9 +251,9 @@ Service.WatchdogSec, config_parse_sec, 0,
m4_dnl The following three only exist for compatibility, they moved into Unit, see above
Service.StartLimitInterval, config_parse_sec, 0, offsetof(Unit, start_limit.interval)
Service.StartLimitBurst, config_parse_unsigned, 0, offsetof(Unit, start_limit.burst)
-Service.StartLimitAction, config_parse_failure_action, 0, offsetof(Unit, start_limit_action)
+Service.StartLimitAction, config_parse_emergency_action, 0, offsetof(Unit, start_limit_action)
Service.RebootArgument, config_parse_string, 0, offsetof(Unit, reboot_arg)
-Service.FailureAction, config_parse_failure_action, 0, offsetof(Service, failure_action)
+Service.FailureAction, config_parse_emergency_action, 0, offsetof(Service, emergency_action)
Service.Type, config_parse_service_type, 0, offsetof(Service, type)
Service.Restart, config_parse_service_restart, 0, offsetof(Service, restart)
Service.PermissionsStartOnly, config_parse_bool, 0, offsetof(Service, permissions_start_only)
@@ -285,13 +293,14 @@ Socket.ExecStartPost, config_parse_exec, SOCKET_EXEC
Socket.ExecStopPre, config_parse_exec, SOCKET_EXEC_STOP_PRE, offsetof(Socket, exec_command)
Socket.ExecStopPost, config_parse_exec, SOCKET_EXEC_STOP_POST, offsetof(Socket, exec_command)
Socket.TimeoutSec, config_parse_sec, 0, offsetof(Socket, timeout_usec)
-Socket.SocketUser, config_parse_unit_string_printf, 0, offsetof(Socket, user)
-Socket.SocketGroup, config_parse_unit_string_printf, 0, offsetof(Socket, group)
+Socket.SocketUser, config_parse_user_group, 0, offsetof(Socket, user)
+Socket.SocketGroup, config_parse_user_group, 0, offsetof(Socket, group)
Socket.SocketMode, config_parse_mode, 0, offsetof(Socket, socket_mode)
Socket.DirectoryMode, config_parse_mode, 0, offsetof(Socket, directory_mode)
Socket.Accept, config_parse_bool, 0, offsetof(Socket, accept)
Socket.Writable, config_parse_bool, 0, offsetof(Socket, writable)
Socket.MaxConnections, config_parse_unsigned, 0, offsetof(Socket, max_connections)
+Socket.MaxConnectionsPerSource, config_parse_unsigned, 0, offsetof(Socket, max_connections_per_source)
Socket.KeepAlive, config_parse_bool, 0, offsetof(Socket, keep_alive)
Socket.KeepAliveTimeSec, config_parse_sec, 0, offsetof(Socket, keep_alive_time)
Socket.KeepAliveIntervalSec, config_parse_sec, 0, offsetof(Socket, keep_alive_interval)
@@ -350,6 +359,8 @@ Mount.Type, config_parse_string, 0,
Mount.TimeoutSec, config_parse_sec, 0, offsetof(Mount, timeout_usec)
Mount.DirectoryMode, config_parse_mode, 0, offsetof(Mount, directory_mode)
Mount.SloppyOptions, config_parse_bool, 0, offsetof(Mount, sloppy_options)
+Mount.LazyUnmount, config_parse_bool, 0, offsetof(Mount, lazy_unmount)
+Mount.ForceUnmount, config_parse_bool, 0, offsetof(Mount, force_unmount)
EXEC_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
CGROUP_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
KILL_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index a36953f766..cbc826809e 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -64,6 +64,7 @@
#include "unit-name.h"
#include "unit-printf.h"
#include "unit.h"
+#include "user-util.h"
#include "utf8.h"
#include "web-util.h"
@@ -490,16 +491,17 @@ int config_parse_socket_bind(const char *unit,
return 0;
}
-int config_parse_exec_nice(const char *unit,
- const char *filename,
- unsigned line,
- const char *section,
- unsigned section_line,
- const char *lvalue,
- int ltype,
- const char *rvalue,
- void *data,
- void *userdata) {
+int config_parse_exec_nice(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
ExecContext *c = data;
int priority, r;
@@ -509,14 +511,13 @@ int config_parse_exec_nice(const char *unit,
assert(rvalue);
assert(data);
- r = safe_atoi(rvalue, &priority);
+ r = parse_nice(rvalue, &priority);
if (r < 0) {
- log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse nice priority, ignoring: %s", rvalue);
- return 0;
- }
+ if (r == -ERANGE)
+ log_syntax(unit, LOG_ERR, filename, line, r, "Nice priority out of range, ignoring: %s", rvalue);
+ else
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse nice priority, ignoring: %s", rvalue);
- if (priority < PRIO_MIN || priority >= PRIO_MAX) {
- log_syntax(unit, LOG_ERR, filename, line, 0, "Nice priority out of range, ignoring: %s", rvalue);
return 0;
}
@@ -775,8 +776,104 @@ int config_parse_socket_bindtodevice(
return 0;
}
-DEFINE_CONFIG_PARSE_ENUM(config_parse_output, exec_output, ExecOutput, "Failed to parse output specifier");
-DEFINE_CONFIG_PARSE_ENUM(config_parse_input, exec_input, ExecInput, "Failed to parse input specifier");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_input, exec_input, ExecInput, "Failed to parse input literal specifier");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_output, exec_output, ExecOutput, "Failed to parse output literal specifier");
+
+int config_parse_exec_input(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ ExecContext *c = data;
+ const char *name;
+ int r;
+
+ assert(data);
+ assert(filename);
+ assert(line);
+ assert(rvalue);
+
+ name = startswith(rvalue, "fd:");
+ if (name) {
+ /* Strip prefix and validate fd name */
+ if (!fdname_is_valid(name)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid file descriptor name, ignoring: %s", name);
+ return 0;
+ }
+ c->std_input = EXEC_INPUT_NAMED_FD;
+ r = free_and_strdup(&c->stdio_fdname[STDIN_FILENO], name);
+ if (r < 0)
+ log_oom();
+ return r;
+ } else {
+ ExecInput ei = exec_input_from_string(rvalue);
+ if (ei == _EXEC_INPUT_INVALID)
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse input specifier, ignoring: %s", rvalue);
+ else
+ c->std_input = ei;
+ return 0;
+ }
+}
+
+int config_parse_exec_output(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ ExecContext *c = data;
+ ExecOutput eo;
+ const char *name;
+ int r;
+
+ assert(data);
+ assert(filename);
+ assert(line);
+ assert(lvalue);
+ assert(rvalue);
+
+ name = startswith(rvalue, "fd:");
+ if (name) {
+ /* Strip prefix and validate fd name */
+ if (!fdname_is_valid(name)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid file descriptor name, ignoring: %s", name);
+ return 0;
+ }
+ eo = EXEC_OUTPUT_NAMED_FD;
+ } else {
+ eo = exec_output_from_string(rvalue);
+ if (eo == _EXEC_OUTPUT_INVALID) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse output specifier, ignoring: %s", rvalue);
+ return 0;
+ }
+ }
+
+ if (streq(lvalue, "StandardOutput")) {
+ c->std_output = eo;
+ r = free_and_strdup(&c->stdio_fdname[STDOUT_FILENO], name);
+ if (r < 0)
+ log_oom();
+ return r;
+ } else if (streq(lvalue, "StandardError")) {
+ c->std_error = eo;
+ r = free_and_strdup(&c->stdio_fdname[STDERR_FILENO], name);
+ if (r < 0)
+ log_oom();
+ return r;
+ } else {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse output property, ignoring: %s", lvalue);
+ return 0;
+ }
+}
int config_parse_exec_io_class(const char *unit,
const char *filename,
@@ -1337,10 +1434,13 @@ int config_parse_timer(const char *unit,
void *userdata) {
Timer *t = data;
- usec_t u = 0;
+ usec_t usec = 0;
TimerValue *v;
TimerBase b;
CalendarSpec *c = NULL;
+ Unit *u = userdata;
+ _cleanup_free_ char *k = NULL;
+ int r;
assert(filename);
assert(lvalue);
@@ -1359,14 +1459,20 @@ int config_parse_timer(const char *unit,
return 0;
}
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
if (b == TIMER_CALENDAR) {
- if (calendar_spec_from_string(rvalue, &c) < 0) {
- log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse calendar specification, ignoring: %s", rvalue);
+ if (calendar_spec_from_string(k, &c) < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse calendar specification, ignoring: %s", k);
return 0;
}
} else {
- if (parse_sec(rvalue, &u) < 0) {
- log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse timer value, ignoring: %s", rvalue);
+ if (parse_sec(k, &usec) < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse timer value, ignoring: %s", k);
return 0;
}
}
@@ -1378,7 +1484,7 @@ int config_parse_timer(const char *unit,
}
v->base = b;
- v->value = u;
+ v->value = usec;
v->calendar_spec = c;
LIST_PREPEND(value, t->values, v);
@@ -1581,11 +1687,7 @@ int config_parse_fdname(
return 0;
}
- free(s->fdname);
- s->fdname = p;
- p = NULL;
-
- return 0;
+ return free_and_replace(s->fdname, p);
}
int config_parse_service_sockets(
@@ -1763,6 +1865,123 @@ int config_parse_sec_fix_0(
return 0;
}
+int config_parse_user_group(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char **user = data, *n;
+ Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (isempty(rvalue))
+ n = NULL;
+ else {
+ _cleanup_free_ char *k = NULL;
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (!valid_user_group_name_or_id(k)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid user/group name or numeric ID, ignoring: %s", k);
+ return 0;
+ }
+
+ n = k;
+ k = NULL;
+ }
+
+ free(*user);
+ *user = n;
+
+ return 0;
+}
+
+int config_parse_user_group_strv(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char ***users = data;
+ Unit *u = userdata;
+ const char *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (isempty(rvalue)) {
+ char **empty;
+
+ empty = new0(char*, 1);
+ if (!empty)
+ return log_oom();
+
+ strv_free(*users);
+ *users = empty;
+
+ return 0;
+ }
+
+ p = rvalue;
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&p, &word, WHITESPACE, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ break;
+ }
+
+ r = unit_full_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", word);
+ continue;
+ }
+
+ if (!valid_user_group_name_or_id(k)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid user/group name or numeric ID, ignoring: %s", k);
+ continue;
+ }
+
+ r = strv_push(users, k);
+ if (r < 0)
+ return log_oom();
+
+ k = NULL;
+ }
+
+ return 0;
+}
+
int config_parse_busname_service(
const char *unit,
const char *filename,
@@ -1925,9 +2144,7 @@ int config_parse_working_directory(
return 0;
}
- free(c->working_directory);
- c->working_directory = k;
- k = NULL;
+ free_and_replace(c->working_directory, k);
c->working_directory_home = false;
}
@@ -2306,7 +2523,7 @@ int config_parse_unit_condition_null(
}
DEFINE_CONFIG_PARSE_ENUM(config_parse_notify_access, notify_access, NotifyAccess, "Failed to parse notify access specifier");
-DEFINE_CONFIG_PARSE_ENUM(config_parse_failure_action, failure_action, FailureAction, "Failed to parse failure action specifier");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_emergency_action, emergency_action, EmergencyAction, "Failed to parse failure action specifier");
int config_parse_unit_requires_mounts_for(
const char *unit,
@@ -2401,6 +2618,7 @@ int config_parse_documentation(const char *unit,
}
#ifdef HAVE_SECCOMP
+
static int syscall_filter_parse_one(
const char *unit,
const char *filename,
@@ -2411,27 +2629,29 @@ static int syscall_filter_parse_one(
bool warn) {
int r;
- if (*t == '@') {
- const SystemCallFilterSet *set;
+ if (t[0] == '@') {
+ const SyscallFilterSet *set;
+ const char *i;
- for (set = syscall_filter_sets; set->set_name; set++)
- if (streq(set->set_name, t)) {
- const char *sys;
+ set = syscall_filter_set_find(t);
+ if (!set) {
+ if (warn)
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Don't know system call group, ignoring: %s", t);
+ return 0;
+ }
- NULSTR_FOREACH(sys, set->value) {
- r = syscall_filter_parse_one(unit, filename, line, c, invert, sys, false);
- if (r < 0)
- return r;
- }
- break;
- }
+ NULSTR_FOREACH(i, set->value) {
+ r = syscall_filter_parse_one(unit, filename, line, c, invert, i, false);
+ if (r < 0)
+ return r;
+ }
} else {
int id;
id = seccomp_syscall_resolve_name(t);
if (id == __NR_SCMP_ERROR) {
if (warn)
- log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t);
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse system call, ignoring: %s", t);
return 0;
}
@@ -2445,8 +2665,9 @@ static int syscall_filter_parse_one(
if (r < 0)
return log_oom();
} else
- set_remove(c->syscall_filter, INT_TO_PTR(id + 1));
+ (void) set_remove(c->syscall_filter, INT_TO_PTR(id + 1));
}
+
return 0;
}
@@ -2465,8 +2686,7 @@ int config_parse_syscall_filter(
ExecContext *c = data;
Unit *u = userdata;
bool invert = false;
- const char *word, *state;
- size_t l;
+ const char *p;
int r;
assert(filename);
@@ -2505,24 +2725,24 @@ int config_parse_syscall_filter(
}
}
- FOREACH_WORD_QUOTED(word, l, rvalue, state) {
- _cleanup_free_ char *t = NULL;
+ p = rvalue;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
- t = strndup(word, l);
- if (!t)
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ break;
+ }
- r = syscall_filter_parse_one(unit, filename, line, c, invert, t, true);
+ r = syscall_filter_parse_one(unit, filename, line, c, invert, word, true);
if (r < 0)
return r;
}
- if (!isempty(state))
- log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring.");
-
- /* Turn on NNP, but only if it wasn't configured explicitly
- * before, and only if we are in user mode. */
- if (!c->no_new_privileges_set && MANAGER_IS_USER(u->manager))
- c->no_new_privileges = true;
return 0;
}
@@ -2733,6 +2953,34 @@ int config_parse_unit_slice(
DEFINE_CONFIG_PARSE_ENUM(config_parse_device_policy, cgroup_device_policy, CGroupDevicePolicy, "Failed to parse device policy");
+int config_parse_cpu_weight(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t *weight = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = cg_weight_parse(rvalue, weight);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "CPU weight '%s' invalid. Ignoring.", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
int config_parse_cpu_shares(
const char *unit,
const char *filename,
@@ -2785,7 +3033,7 @@ int config_parse_cpu_quota(
return 0;
}
- r = parse_percent(rvalue);
+ r = parse_percent_unbounded(rvalue);
if (r <= 0) {
log_syntax(unit, LOG_ERR, filename, line, r, "CPU quota '%s' invalid. Ignoring.", rvalue);
return 0;
@@ -2835,8 +3083,12 @@ int config_parse_memory_limit(
c->memory_high = bytes;
else if (streq(lvalue, "MemoryMax"))
c->memory_max = bytes;
- else
+ else if (streq(lvalue, "MemorySwapMax"))
+ c->memory_swap_max = bytes;
+ else if (streq(lvalue, "MemoryLimit"))
c->memory_limit = bytes;
+ else
+ return -EINVAL;
return 0;
}
@@ -2853,30 +3105,36 @@ int config_parse_tasks_max(
void *data,
void *userdata) {
- uint64_t *tasks_max = data, u;
+ uint64_t *tasks_max = data, v;
+ Unit *u = userdata;
int r;
- if (isempty(rvalue) || streq(rvalue, "infinity")) {
- *tasks_max = (uint64_t) -1;
+ if (isempty(rvalue)) {
+ *tasks_max = u->manager->default_tasks_max;
+ return 0;
+ }
+
+ if (streq(rvalue, "infinity")) {
+ *tasks_max = CGROUP_LIMIT_MAX;
return 0;
}
r = parse_percent(rvalue);
if (r < 0) {
- r = safe_atou64(rvalue, &u);
+ r = safe_atou64(rvalue, &v);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue);
return 0;
}
} else
- u = system_tasks_max_scale(r, 100U);
+ v = system_tasks_max_scale(r, 100U);
- if (u <= 0 || u >= UINT64_MAX) {
+ if (v <= 0 || v >= UINT64_MAX) {
log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range. Ignoring.", rvalue);
return 0;
}
- *tasks_max = u;
+ *tasks_max = v;
return 0;
}
@@ -2919,9 +3177,7 @@ int config_parse_device_allow(
if (!path)
return log_oom();
- if (!startswith(path, "/dev/") &&
- !startswith(path, "block-") &&
- !startswith(path, "char-")) {
+ if (!is_deviceallow_pattern(path)) {
log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid device node path '%s'. Ignoring.", path);
return 0;
}
@@ -3576,7 +3832,7 @@ int config_parse_no_new_privileges(
return 0;
}
- c->no_new_privileges = !!k;
+ c->no_new_privileges = k;
c->no_new_privileges_set = true;
return 0;
@@ -4026,8 +4282,8 @@ void unit_dump_config_items(FILE *f) {
{ config_parse_exec_cpu_affinity, "CPUAFFINITY" },
{ config_parse_mode, "MODE" },
{ config_parse_unit_env_file, "FILE" },
- { config_parse_output, "OUTPUT" },
- { config_parse_input, "INPUT" },
+ { config_parse_exec_output, "OUTPUT" },
+ { config_parse_exec_input, "INPUT" },
{ config_parse_log_facility, "FACILITY" },
{ config_parse_log_level, "LEVEL" },
{ config_parse_exec_secure_bits, "SECUREBITS" },
@@ -4062,7 +4318,7 @@ void unit_dump_config_items(FILE *f) {
{ config_parse_unit_slice, "SLICE" },
{ config_parse_documentation, "URL" },
{ config_parse_service_timeout, "SECONDS" },
- { config_parse_failure_action, "ACTION" },
+ { config_parse_emergency_action, "ACTION" },
{ config_parse_set_status, "STATUS" },
{ config_parse_service_sockets, "SOCKETS" },
{ config_parse_environ, "ENVIRON" },
@@ -4073,6 +4329,7 @@ void unit_dump_config_items(FILE *f) {
{ config_parse_address_families, "FAMILIES" },
#endif
{ config_parse_cpu_shares, "SHARES" },
+ { config_parse_cpu_weight, "WEIGHT" },
{ config_parse_memory_limit, "LIMIT" },
{ config_parse_device_allow, "DEVICE" },
{ config_parse_device_policy, "POLICY" },
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index b36a2e3a02..c05f205c37 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -45,7 +45,9 @@ int config_parse_service_timeout(const char *unit, const char *filename, unsigne
int config_parse_service_type(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_service_restart(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_socket_bindtodevice(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_exec_output(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_output(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_exec_input(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_input(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_exec_io_class(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_exec_io_priority(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
@@ -73,7 +75,7 @@ int config_parse_unit_condition_string(const char *unit, const char *filename, u
int config_parse_unit_condition_null(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_kill_mode(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_notify_access(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
-int config_parse_failure_action(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_emergency_action(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_unit_requires_mounts_for(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_syscall_filter(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_syscall_archs(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
@@ -81,6 +83,7 @@ int config_parse_syscall_errno(const char *unit, const char *filename, unsigned
int config_parse_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_pass_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_unit_slice(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_cpu_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_cpu_shares(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_memory_limit(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_tasks_max(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
@@ -111,6 +114,8 @@ int config_parse_exec_utmp_mode(const char *unit, const char *filename, unsigned
int config_parse_working_directory(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_fdname(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_sec_fix_0(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_user_group(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_user_group_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
/* gperf prototypes */
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length);
diff --git a/src/core/main.c b/src/core/main.c
index 33e22e37dc..f07ed71b31 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -72,6 +72,9 @@
#include "process-util.h"
#include "raw-clone.h"
#include "rlimit-util.h"
+#ifdef HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
#include "selinux-setup.h"
#include "selinux-util.h"
#include "signal-util.h"
@@ -86,14 +89,14 @@
#include "user-util.h"
#include "virt.h"
#include "watchdog.h"
+#include "emergency-action.h"
static enum {
ACTION_RUN,
ACTION_HELP,
ACTION_VERSION,
ACTION_TEST,
- ACTION_DUMP_CONFIGURATION_ITEMS,
- ACTION_DONE
+ ACTION_DUMP_CONFIGURATION_ITEMS
} arg_action = ACTION_RUN;
static char *arg_default_unit = NULL;
static bool arg_system = false;
@@ -129,6 +132,7 @@ static bool arg_default_memory_accounting = false;
static bool arg_default_tasks_accounting = true;
static uint64_t arg_default_tasks_max = UINT64_MAX;
static sd_id128_t arg_machine_id = {};
+static EmergencyAction arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
noreturn static void freeze_or_reboot(void) {
@@ -200,7 +204,7 @@ noreturn static void crash(int sig) {
pid, sigchld_code_to_string(status.si_code),
status.si_status,
strna(status.si_code == CLD_EXITED
- ? exit_status_to_string(status.si_status, EXIT_STATUS_FULL)
+ ? exit_status_to_string(status.si_status, EXIT_STATUS_MINIMAL)
: signal_to_string(status.si_status)));
else
log_emergency("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid);
@@ -304,7 +308,7 @@ static int set_machine_id(const char *m) {
return 0;
}
-static int parse_proc_cmdline_item(const char *key, const char *value) {
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
int r;
@@ -700,6 +704,7 @@ static int parse_config_file(void) {
{ "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
{ "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting },
{ "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max },
+ { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, 0, &arg_cad_burst_action },
{}
};
@@ -713,7 +718,7 @@ static int parse_config_file(void) {
CONF_PATHS_NULSTR("systemd/system.conf.d") :
CONF_PATHS_NULSTR("systemd/user.conf.d");
- config_parse_many(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, false, NULL);
+ config_parse_many_nulstr(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, false, NULL);
/* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we used USEC_INFINITY
* like everywhere else. */
@@ -994,10 +999,8 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_MACHINE_ID:
r = set_machine_id(optarg);
- if (r < 0) {
- log_error("MachineID '%s' is not valid.", optarg);
- return r;
- }
+ if (r < 0)
+ return log_error_errno(r, "MachineID '%s' is not valid.", optarg);
break;
case 'h':
@@ -1120,7 +1123,7 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
* later when transitioning from the initrd to the main
* systemd or suchlike. */
if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0)
- return log_error_errno(errno, "Reading RLIMIT_NOFILE failed: %m");
+ return log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
/* Make sure forked processes get the default kernel setting */
if (!arg_default_rlimit[RLIMIT_NOFILE]) {
@@ -1137,7 +1140,7 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
nl.rlim_cur = nl.rlim_max = 64*1024;
r = setrlimit_closest(RLIMIT_NOFILE, &nl);
if (r < 0)
- return log_error_errno(r, "Setting RLIMIT_NOFILE failed: %m");
+ return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
return 0;
}
@@ -1187,6 +1190,9 @@ static int enforce_syscall_archs(Set *archs) {
void *id;
int r;
+ if (!is_seccomp_available())
+ return 0;
+
seccomp = seccomp_init(SCMP_ACT_ALLOW);
if (!seccomp)
return log_oom();
@@ -1319,7 +1325,7 @@ static int fixup_environment(void) {
return r;
if (r == 0) {
- term = strdup(default_term_for_tty("/dev/console") + 5);
+ term = strdup(default_term_for_tty("/dev/console"));
if (!term)
return -ENOMEM;
}
@@ -1415,12 +1421,12 @@ int main(int argc, char *argv[]) {
if (mac_selinux_setup(&loaded_policy) < 0) {
error_message = "Failed to load SELinux policy";
goto finish;
- } else if (ima_setup() < 0) {
- error_message = "Failed to load IMA policy";
- goto finish;
} else if (mac_smack_setup(&loaded_policy) < 0) {
error_message = "Failed to load SMACK policy";
goto finish;
+ } else if (ima_setup() < 0) {
+ error_message = "Failed to load IMA policy";
+ goto finish;
}
dual_timestamp_get(&security_finish_timestamp);
}
@@ -1506,7 +1512,8 @@ int main(int argc, char *argv[]) {
if (getpid() == 1) {
/* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
* will process core dumps for system services by default. */
- (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY));
+ if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
+ log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
/* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored
* until the systemd-coredump tool is enabled via sysctl. */
@@ -1524,15 +1531,9 @@ int main(int argc, char *argv[]) {
* need to do that for user instances since they never log
* into the console. */
log_show_color(colors_enabled());
- make_null_stdio();
- }
-
- /* Initialize default unit */
- r = free_and_strdup(&arg_default_unit, SPECIAL_DEFAULT_TARGET);
- if (r < 0) {
- log_emergency_errno(r, "Failed to set default unit %s: %m", SPECIAL_DEFAULT_TARGET);
- error_message = "Failed to set default unit";
- goto finish;
+ r = make_null_stdio();
+ if (r < 0)
+ log_warning_errno(r, "Failed to redirect standard streams to /dev/null: %m");
}
r = initialize_join_controllers();
@@ -1560,7 +1561,7 @@ int main(int argc, char *argv[]) {
(void) reset_all_signal_handlers();
(void) ignore_signals(SIGNALS_IGNORE, -1);
- arg_default_tasks_max = system_tasks_max_scale(15U, 100U); /* 15% the system PIDs equals 4915 by default. */
+ arg_default_tasks_max = system_tasks_max_scale(DEFAULT_TASKS_MAX_PERCENTAGE, 100U);
if (parse_config_file() < 0) {
error_message = "Failed to parse config file";
@@ -1568,7 +1569,7 @@ int main(int argc, char *argv[]) {
}
if (arg_system) {
- r = parse_proc_cmdline(parse_proc_cmdline_item);
+ r = parse_proc_cmdline(parse_proc_cmdline_item, NULL, false);
if (r < 0)
log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
}
@@ -1582,6 +1583,16 @@ int main(int argc, char *argv[]) {
goto finish;
}
+ /* Initialize default unit */
+ if (!arg_default_unit) {
+ arg_default_unit = strdup(SPECIAL_DEFAULT_TARGET);
+ if (!arg_default_unit) {
+ r = log_oom();
+ error_message = "Failed to set default unit";
+ goto finish;
+ }
+ }
+
if (arg_action == ACTION_TEST &&
geteuid() == 0) {
log_error("Don't run test mode as root.");
@@ -1602,11 +1613,10 @@ int main(int argc, char *argv[]) {
goto finish;
}
- if (arg_action == ACTION_TEST)
- skip_setup = true;
-
- if (arg_action == ACTION_TEST || arg_action == ACTION_HELP)
+ if (arg_action == ACTION_TEST || arg_action == ACTION_HELP) {
pager_open(arg_no_pager, false);
+ skip_setup = true;
+ }
if (arg_action == ACTION_HELP) {
retval = help();
@@ -1615,12 +1625,10 @@ int main(int argc, char *argv[]) {
retval = version();
goto finish;
} else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
+ pager_open(arg_no_pager, false);
unit_dump_config_items(stdout);
retval = EXIT_SUCCESS;
goto finish;
- } else if (arg_action == ACTION_DONE) {
- retval = EXIT_SUCCESS;
- goto finish;
}
if (!arg_system &&
@@ -1766,10 +1774,10 @@ int main(int argc, char *argv[]) {
log_warning_errno(errno, "Failed to make us a subreaper: %m");
if (arg_system) {
- bump_rlimit_nofile(&saved_rlimit_nofile);
+ (void) bump_rlimit_nofile(&saved_rlimit_nofile);
if (empty_etc) {
- r = unit_file_preset_all(UNIT_FILE_SYSTEM, false, NULL, UNIT_FILE_PRESET_ENABLE_ONLY, false, NULL, 0);
+ r = unit_file_preset_all(UNIT_FILE_SYSTEM, 0, NULL, UNIT_FILE_PRESET_ENABLE_ONLY, NULL, 0);
if (r < 0)
log_full_errno(r == -EEXIST ? LOG_NOTICE : LOG_WARNING, r, "Failed to populate /etc with preset unit settings, ignoring: %m");
else
@@ -1792,6 +1800,7 @@ int main(int argc, char *argv[]) {
m->initrd_timestamp = initrd_timestamp;
m->security_start_timestamp = security_start_timestamp;
m->security_finish_timestamp = security_finish_timestamp;
+ m->cad_burst_action = arg_cad_burst_action;
manager_set_defaults(m);
manager_set_show_status(m, arg_show_status);
diff --git a/src/core/manager.c b/src/core/manager.c
index 85bf858992..ffccfdcd5e 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -45,6 +45,7 @@
#include "bus-error.h"
#include "bus-kernel.h"
#include "bus-util.h"
+#include "clean-ipc.h"
#include "dbus-job.h"
#include "dbus-manager.h"
#include "dbus-unit.h"
@@ -81,6 +82,7 @@
#include "transaction.h"
#include "umask-util.h"
#include "unit-name.h"
+#include "user-util.h"
#include "util.h"
#include "virt.h"
#include "watchdog.h"
@@ -98,6 +100,7 @@ static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, ui
static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata);
static int manager_dispatch_run_queue(sd_event_source *source, void *userdata);
static int manager_run_generators(Manager *m);
@@ -519,6 +522,7 @@ static void manager_clean_environment(Manager *m) {
"LISTEN_FDNAMES",
"WATCHDOG_PID",
"WATCHDOG_USEC",
+ "INVOCATION_ID",
NULL);
}
@@ -553,7 +557,6 @@ static int manager_default_environment(Manager *m) {
return 0;
}
-
int manager_new(UnitFileScope scope, bool test_run, Manager **_m) {
Manager *m;
int r;
@@ -580,17 +583,25 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) {
if (MANAGER_IS_SYSTEM(m)) {
m->unit_log_field = "UNIT=";
m->unit_log_format_string = "UNIT=%s";
+
+ m->invocation_log_field = "INVOCATION_ID=";
+ m->invocation_log_format_string = "INVOCATION_ID=" SD_ID128_FORMAT_STR;
} else {
m->unit_log_field = "USER_UNIT=";
m->unit_log_format_string = "USER_UNIT=%s";
+
+ m->invocation_log_field = "USER_INVOCATION_ID=";
+ m->invocation_log_format_string = "USER_INVOCATION_ID=" SD_ID128_FORMAT_STR;
}
m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1;
m->pin_cgroupfs_fd = m->notify_fd = m->cgroups_agent_fd = m->signal_fd = m->time_change_fd =
- m->dev_autofs_fd = m->private_listen_fd = m->kdbus_fd = m->cgroup_inotify_fd =
+ m->dev_autofs_fd = m->private_listen_fd = m->cgroup_inotify_fd =
m->ask_password_inotify_fd = -1;
+ m->user_lookup_fds[0] = m->user_lookup_fds[1] = -1;
+
m->current_job_id = 1; /* start as id #1, so that we can leave #0 around as "null-like" value */
m->have_ask_password = -EINVAL; /* we don't know */
@@ -657,9 +668,8 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) {
goto fail;
}
- /* Note that we set up neither kdbus, nor the notify fd
- * here. We do that after deserialization, since they might
- * have gotten serialized across the reexec. */
+ /* Note that we do not set up the notify fd here. We do that after deserialization,
+ * since they might have gotten serialized across the reexec. */
m->taint_usr = dir_is_empty("/usr") > 0;
@@ -767,7 +777,7 @@ static int manager_setup_cgroups_agent(Manager *m) {
if (!MANAGER_IS_SYSTEM(m))
return 0;
- if (cg_unified() > 0) /* We don't need this anymore on the unified hierarchy */
+ if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0) /* We don't need this anymore on the unified hierarchy */
return 0;
if (m->cgroups_agent_fd < 0) {
@@ -813,6 +823,59 @@ static int manager_setup_cgroups_agent(Manager *m) {
return 0;
}
+static int manager_setup_user_lookup_fd(Manager *m) {
+ int r;
+
+ assert(m);
+
+ /* Set up the socket pair used for passing UID/GID resolution results from forked off processes to PID
+ * 1. Background: we can't do name lookups (NSS) from PID 1, since it might involve IPC and thus activation,
+ * and we might hence deadlock on ourselves. Hence we do all user/group lookups asynchronously from the forked
+ * off processes right before executing the binaries to start. In order to be able to clean up any IPC objects
+ * created by a unit (see RemoveIPC=) we need to know in PID 1 the used UID/GID of the executed processes,
+ * hence we establish this communication channel so that forked off processes can pass their UID/GID
+ * information back to PID 1. The forked off processes send their resolved UID/GID to PID 1 in a simple
+ * datagram, along with their unit name, so that we can share one communication socket pair among all units for
+ * this purpose.
+ *
+ * You might wonder why we need a communication channel for this that is independent of the usual notification
+ * socket scheme (i.e. $NOTIFY_SOCKET). The primary difference is about trust: data sent via the $NOTIFY_SOCKET
+ * channel is only accepted if it originates from the right unit and if reception was enabled for it. The user
+ * lookup socket OTOH is only accessible by PID 1 and its children until they exec(), and always available.
+ *
+ * Note that this function is called under two circumstances: when we first initialize (in which case we
+ * allocate both the socket pair and the event source to listen on it), and when we deserialize after a reload
+ * (in which case the socket pair already exists but we still need to allocate the event source for it). */
+
+ if (m->user_lookup_fds[0] < 0) {
+
+ /* Free all secondary fields */
+ safe_close_pair(m->user_lookup_fds);
+ m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source);
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, m->user_lookup_fds) < 0)
+ return log_error_errno(errno, "Failed to allocate user lookup socket: %m");
+
+ (void) fd_inc_rcvbuf(m->user_lookup_fds[0], NOTIFY_RCVBUF_SIZE);
+ }
+
+ if (!m->user_lookup_event_source) {
+ r = sd_event_add_io(m->event, &m->user_lookup_event_source, m->user_lookup_fds[0], EPOLLIN, manager_dispatch_user_lookup_fd, m);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to allocate user lookup event source: %m");
+
+ /* Process even earlier than the notify event source, so that we always know first about valid UID/GID
+ * resolutions */
+ r = sd_event_source_set_priority(m->user_lookup_event_source, SD_EVENT_PRIORITY_NORMAL-8);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to set priority ot user lookup event source: %m");
+
+ (void) sd_event_source_set_description(m->user_lookup_event_source, "user-lookup");
+ }
+
+ return 0;
+}
+
static int manager_connect_bus(Manager *m, bool reexecuting) {
bool try_bus_connect;
@@ -822,7 +885,6 @@ static int manager_connect_bus(Manager *m, bool reexecuting) {
return 0;
try_bus_connect =
- m->kdbus_fd >= 0 ||
reexecuting ||
(MANAGER_IS_USER(m) && getenv("DBUS_SESSION_BUS_ADDRESS"));
@@ -854,8 +916,7 @@ enum {
_GC_OFFSET_MAX
};
-static void unit_gc_mark_good(Unit *u, unsigned gc_marker)
-{
+static void unit_gc_mark_good(Unit *u, unsigned gc_marker) {
Iterator i;
Unit *other;
@@ -1004,7 +1065,11 @@ Manager* manager_free(Manager *m) {
bus_done(m);
+ dynamic_user_vacuum(m, false);
+ hashmap_free(m->dynamic_users);
+
hashmap_free(m->units);
+ hashmap_free(m->units_by_invocation_id);
hashmap_free(m->jobs);
hashmap_free(m->watch_pids1);
hashmap_free(m->watch_pids2);
@@ -1019,12 +1084,13 @@ Manager* manager_free(Manager *m) {
sd_event_source_unref(m->time_change_event_source);
sd_event_source_unref(m->jobs_in_progress_event_source);
sd_event_source_unref(m->run_queue_event_source);
+ sd_event_source_unref(m->user_lookup_event_source);
safe_close(m->signal_fd);
safe_close(m->notify_fd);
safe_close(m->cgroups_agent_fd);
safe_close(m->time_change_fd);
- safe_close(m->kdbus_fd);
+ safe_close_pair(m->user_lookup_fds);
manager_close_ask_password(m);
@@ -1050,8 +1116,10 @@ Manager* manager_free(Manager *m) {
assert(hashmap_isempty(m->units_requiring_mounts_for));
hashmap_free(m->units_requiring_mounts_for);
- free(m);
- return NULL;
+ hashmap_free(m->uid_refs);
+ hashmap_free(m->gid_refs);
+
+ return mfree(m);
}
void manager_enumerate(Manager *m) {
@@ -1175,9 +1243,11 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
return r;
/* Make sure the transient directory always exists, so that it remains in the search path */
- r = mkdir_p_label(m->lookup_paths.transient, 0755);
- if (r < 0)
- return r;
+ if (!m->test_run) {
+ r = mkdir_p_label(m->lookup_paths.transient, 0755);
+ if (r < 0)
+ return r;
+ }
dual_timestamp_get(&m->generators_start_timestamp);
r = manager_run_generators(m);
@@ -1219,14 +1289,26 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
if (q < 0 && r == 0)
r = q;
- /* We might have deserialized the kdbus control fd, but if we
- * didn't, then let's create the bus now. */
- manager_connect_bus(m, !!serialization);
- bus_track_coldplug(m, &m->subscribed, &m->deserialized_subscribed);
+ q = manager_setup_user_lookup_fd(m);
+ if (q < 0 && r == 0)
+ r = q;
+
+ /* Let's connect to the bus now. */
+ (void) manager_connect_bus(m, !!serialization);
+
+ (void) bus_track_coldplug(m, &m->subscribed, false, m->deserialized_subscribed);
+ m->deserialized_subscribed = strv_free(m->deserialized_subscribed);
/* Third, fire things up! */
manager_coldplug(m);
+ /* Release any dynamic users no longer referenced */
+ dynamic_user_vacuum(m, true);
+
+ /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */
+ manager_vacuum_uid_refs(m);
+ manager_vacuum_gid_refs(m);
+
if (serialization) {
assert(m->n_reloading > 0);
m->n_reloading--;
@@ -1599,8 +1681,14 @@ static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const
if (UNIT_VTABLE(u)->notify_message)
UNIT_VTABLE(u)->notify_message(u, pid, tags, fds);
- else
- log_unit_debug(u, "Got notification message for unit. Ignoring.");
+ else if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
+ _cleanup_free_ char *x = NULL, *y = NULL;
+
+ x = cescape(buf);
+ if (x)
+ y = ellipsize(x, 20, 90);
+ log_unit_debug(u, "Got notification message \"%s\", ignoring.", strnull(y));
+ }
}
static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
@@ -1626,7 +1714,6 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
struct cmsghdr *cmsg;
struct ucred *ucred = NULL;
- bool found = false;
Unit *u1, *u2, *u3;
int r, *fd_array = NULL;
unsigned n_fds = 0;
@@ -1640,16 +1727,15 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
return 0;
}
- n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC|MSG_TRUNC);
if (n < 0) {
- if (!IN_SET(errno, EAGAIN, EINTR))
- log_error("Failed to receive notification message: %m");
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0; /* Spurious wakeup, try again */
- /* It's not an option to return an error here since it
- * would disable the notification handler entirely. Services
- * wouldn't be able to send the WATCHDOG message for
- * example... */
- return 0;
+ /* If this is any other, real error, then let's stop processing this socket. This of course means we
+ * won't take notification messages anymore, but that's still better than busy looping around this:
+ * being woken up over and over again but being unable to actually read the message off the socket. */
+ return log_error_errno(errno, "Failed to receive notification message: %m");
}
CMSG_FOREACH(cmsg, &msghdr) {
@@ -1682,40 +1768,40 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
return 0;
}
- if ((size_t) n >= sizeof(buf)) {
+ if ((size_t) n >= sizeof(buf) || (msghdr.msg_flags & MSG_TRUNC)) {
log_warning("Received notify message exceeded maximum size. Ignoring.");
return 0;
}
- /* The message should be a string. Here we make sure it's NUL-terminated,
- * but only the part until first NUL will be used anyway. */
+ /* As extra safety check, let's make sure the string we get doesn't contain embedded NUL bytes. We permit one
+ * trailing NUL byte in the message, but don't expect it. */
+ if (n > 1 && memchr(buf, 0, n-1)) {
+ log_warning("Received notify message with embedded NUL bytes. Ignoring.");
+ return 0;
+ }
+
+ /* Make sure it's NUL-terminated. */
buf[n] = 0;
/* Notify every unit that might be interested, but try
* to avoid notifying the same one multiple times. */
u1 = manager_get_unit_by_pid_cgroup(m, ucred->pid);
- if (u1) {
+ if (u1)
manager_invoke_notify_message(m, u1, ucred->pid, buf, fds);
- found = true;
- }
u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(ucred->pid));
- if (u2 && u2 != u1) {
+ if (u2 && u2 != u1)
manager_invoke_notify_message(m, u2, ucred->pid, buf, fds);
- found = true;
- }
u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(ucred->pid));
- if (u3 && u3 != u2 && u3 != u1) {
+ if (u3 && u3 != u2 && u3 != u1)
manager_invoke_notify_message(m, u3, ucred->pid, buf, fds);
- found = true;
- }
- if (!found)
+ if (!u1 && !u2 && !u3)
log_warning("Cannot find unit for notify message of PID "PID_FMT".", ucred->pid);
if (fdset_size(fds) > 0)
- log_warning("Got auxiliary fds with notification message, closing all.");
+ log_warning("Got extra auxiliary fds with notification message, closing them.");
return 0;
}
@@ -1820,6 +1906,18 @@ static int manager_start_target(Manager *m, const char *name, JobMode mode) {
return r;
}
+static void manager_handle_ctrl_alt_del(Manager *m) {
+ /* If the user presses C-A-D more than
+ * 7 times within 2s, we reboot/shutdown immediately,
+ * unless it was disabled in system.conf */
+
+ if (ratelimit_test(&m->ctrl_alt_del_ratelimit) || m->cad_burst_action == EMERGENCY_ACTION_NONE)
+ manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY);
+ else
+ emergency_action(m, m->cad_burst_action, NULL,
+ "Ctrl-Alt-Del was pressed more than 7 times within 2s");
+}
+
static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
Manager *m = userdata;
ssize_t n;
@@ -1838,14 +1936,17 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
for (;;) {
n = read(m->signal_fd, &sfsi, sizeof(sfsi));
if (n != sizeof(sfsi)) {
+ if (n >= 0) {
+ log_warning("Truncated read from signal fd (%zu bytes)!", n);
+ return 0;
+ }
- if (n >= 0)
- return -EIO;
-
- if (errno == EINTR || errno == EAGAIN)
+ if (IN_SET(errno, EINTR, EAGAIN))
break;
- return -errno;
+ /* We return an error here, which will kill this handler,
+ * to avoid a busy loop on read error. */
+ return log_error_errno(errno, "Reading from signal fd failed: %m");
}
log_received_signal(sfsi.ssi_signo == SIGCHLD ||
@@ -1871,19 +1972,7 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
case SIGINT:
if (MANAGER_IS_SYSTEM(m)) {
-
- /* If the user presses C-A-D more than
- * 7 times within 2s, we reboot
- * immediately. */
-
- if (ratelimit_test(&m->ctrl_alt_del_ratelimit))
- manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY);
- else {
- log_notice("Ctrl-Alt-Del was pressed more than 7 times within 2s, rebooting immediately.");
- status_printf(NULL, true, false, "Ctrl-Alt-Del was pressed more than 7 times within 2s, rebooting immediately.");
- m->exit_code = MANAGER_REBOOT;
- }
-
+ manager_handle_ctrl_alt_del(m);
break;
}
@@ -2169,6 +2258,7 @@ int manager_loop(Manager *m) {
int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e, Unit **_u) {
_cleanup_free_ char *n = NULL;
+ sd_id128_t invocation_id;
Unit *u;
int r;
@@ -2180,12 +2270,25 @@ int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e,
if (r < 0)
return r;
+ /* Permit addressing units by invocation ID: if the passed bus path is suffixed by a 128bit ID then we use it
+ * as invocation ID. */
+ r = sd_id128_from_string(n, &invocation_id);
+ if (r >= 0) {
+ u = hashmap_get(m->units_by_invocation_id, &invocation_id);
+ if (u) {
+ *_u = u;
+ return 0;
+ }
+
+ return sd_bus_error_setf(e, BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID, "No unit with the specified invocation ID " SD_ID128_FORMAT_STR " known.", SD_ID128_FORMAT_VAL(invocation_id));
+ }
+
+ /* If this didn't work, we use the suffix as unit name. */
r = manager_load_unit(m, n, NULL, e, &u);
if (r < 0)
return r;
*_u = u;
-
return 0;
}
@@ -2397,17 +2500,28 @@ int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) {
fprintf(f, "cgroups-agent-fd=%i\n", copy);
}
- if (m->kdbus_fd >= 0) {
- int copy;
+ if (m->user_lookup_fds[0] >= 0) {
+ int copy0, copy1;
- copy = fdset_put_dup(fds, m->kdbus_fd);
- if (copy < 0)
- return copy;
+ copy0 = fdset_put_dup(fds, m->user_lookup_fds[0]);
+ if (copy0 < 0)
+ return copy0;
+
+ copy1 = fdset_put_dup(fds, m->user_lookup_fds[1]);
+ if (copy1 < 0)
+ return copy1;
- fprintf(f, "kdbus-fd=%i\n", copy);
+ fprintf(f, "user-lookup=%i %i\n", copy0, copy1);
}
- bus_track_serialize(m->subscribed, f);
+ bus_track_serialize(m->subscribed, f, "subscribed");
+
+ r = dynamic_user_serialize(m, f, fds);
+ if (r < 0)
+ return r;
+
+ manager_serialize_uid_refs(m, f);
+ manager_serialize_gid_refs(m, f);
fputc('\n', f);
@@ -2575,25 +2689,31 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
m->cgroups_agent_fd = fdset_remove(fds, fd);
}
- } else if (startswith(l, "kdbus-fd=")) {
- int fd;
+ } else if (startswith(l, "user-lookup=")) {
+ int fd0, fd1;
- if (safe_atoi(l + 9, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
- log_debug("Failed to parse kdbus fd: %s", l + 9);
+ if (sscanf(l + 12, "%i %i", &fd0, &fd1) != 2 || fd0 < 0 || fd1 < 0 || fd0 == fd1 || !fdset_contains(fds, fd0) || !fdset_contains(fds, fd1))
+ log_debug("Failed to parse user lookup fd: %s", l + 12);
else {
- safe_close(m->kdbus_fd);
- m->kdbus_fd = fdset_remove(fds, fd);
+ m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source);
+ safe_close_pair(m->user_lookup_fds);
+ m->user_lookup_fds[0] = fdset_remove(fds, fd0);
+ m->user_lookup_fds[1] = fdset_remove(fds, fd1);
}
- } else {
- int k;
+ } else if (startswith(l, "dynamic-user="))
+ dynamic_user_deserialize_one(m, l + 13, fds);
+ else if (startswith(l, "destroy-ipc-uid="))
+ manager_deserialize_uid_refs_one(m, l + 16);
+ else if (startswith(l, "destroy-ipc-gid="))
+ manager_deserialize_gid_refs_one(m, l + 16);
+ else if (startswith(l, "subscribed=")) {
- k = bus_track_deserialize_item(&m->deserialized_subscribed, l);
- if (k < 0)
- log_debug_errno(k, "Failed to deserialize bus tracker object: %m");
- else if (k == 0)
- log_debug("Unknown serialization item '%s'", l);
- }
+ if (strv_extend(&m->deserialized_subscribed, l+11) < 0)
+ log_oom();
+
+ } else if (!startswith(l, "kdbus-fd=")) /* ignore this one */
+ log_debug("Unknown serialization item '%s'", l);
}
for (;;) {
@@ -2666,6 +2786,9 @@ int manager_reload(Manager *m) {
manager_clear_jobs_and_units(m);
lookup_paths_flush_generator(&m->lookup_paths);
lookup_paths_free(&m->lookup_paths);
+ dynamic_user_vacuum(m, false);
+ m->uid_refs = hashmap_free(m->uid_refs);
+ m->gid_refs = hashmap_free(m->gid_refs);
q = lookup_paths_init(&m->lookup_paths, m->unit_file_scope, 0, NULL);
if (q < 0 && r >= 0)
@@ -2699,9 +2822,20 @@ int manager_reload(Manager *m) {
if (q < 0 && r >= 0)
r = q;
+ q = manager_setup_user_lookup_fd(m);
+ if (q < 0 && r >= 0)
+ r = q;
+
/* Third, fire things up! */
manager_coldplug(m);
+ /* Release any dynamic users no longer referenced */
+ dynamic_user_vacuum(m, true);
+
+ /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */
+ manager_vacuum_uid_refs(m);
+ manager_vacuum_gid_refs(m);
+
/* Sync current state of bus names with our set of listening units */
if (m->api_bus)
manager_sync_bus_names(m, m->api_bus);
@@ -2947,7 +3081,7 @@ int manager_set_default_rlimits(Manager *m, struct rlimit **default_rlimit) {
m->rlimit[i] = newdup(struct rlimit, default_rlimit[i], 1);
if (!m->rlimit[i])
- return -ENOMEM;
+ return log_oom();
}
return 0;
@@ -3135,6 +3269,300 @@ ManagerState manager_state(Manager *m) {
return MANAGER_RUNNING;
}
+#define DESTROY_IPC_FLAG (UINT32_C(1) << 31)
+
+static void manager_unref_uid_internal(
+ Manager *m,
+ Hashmap **uid_refs,
+ uid_t uid,
+ bool destroy_now,
+ int (*_clean_ipc)(uid_t uid)) {
+
+ uint32_t c, n;
+
+ assert(m);
+ assert(uid_refs);
+ assert(uid_is_valid(uid));
+ assert(_clean_ipc);
+
+ /* A generic implementation, covering both manager_unref_uid() and manager_unref_gid(), under the assumption
+ * that uid_t and gid_t are actually defined the same way, with the same validity rules.
+ *
+ * We store a hashmap where the UID/GID is they key and the value is a 32bit reference counter, whose highest
+ * bit is used as flag for marking UIDs/GIDs whose IPC objects to remove when the last reference to the UID/GID
+ * is dropped. The flag is set to on, once at least one reference from a unit where RemoveIPC= is set is added
+ * on a UID/GID. It is reset when the UID's/GID's reference counter drops to 0 again. */
+
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+ assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+ if (uid == 0) /* We don't keep track of root, and will never destroy it */
+ return;
+
+ c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
+
+ n = c & ~DESTROY_IPC_FLAG;
+ assert(n > 0);
+ n--;
+
+ if (destroy_now && n == 0) {
+ hashmap_remove(*uid_refs, UID_TO_PTR(uid));
+
+ if (c & DESTROY_IPC_FLAG) {
+ log_debug("%s " UID_FMT " is no longer referenced, cleaning up its IPC.",
+ _clean_ipc == clean_ipc_by_uid ? "UID" : "GID",
+ uid);
+ (void) _clean_ipc(uid);
+ }
+ } else {
+ c = n | (c & DESTROY_IPC_FLAG);
+ assert_se(hashmap_update(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c)) >= 0);
+ }
+}
+
+void manager_unref_uid(Manager *m, uid_t uid, bool destroy_now) {
+ manager_unref_uid_internal(m, &m->uid_refs, uid, destroy_now, clean_ipc_by_uid);
+}
+
+void manager_unref_gid(Manager *m, gid_t gid, bool destroy_now) {
+ manager_unref_uid_internal(m, &m->gid_refs, (uid_t) gid, destroy_now, clean_ipc_by_gid);
+}
+
+static int manager_ref_uid_internal(
+ Manager *m,
+ Hashmap **uid_refs,
+ uid_t uid,
+ bool clean_ipc) {
+
+ uint32_t c, n;
+ int r;
+
+ assert(m);
+ assert(uid_refs);
+ assert(uid_is_valid(uid));
+
+ /* A generic implementation, covering both manager_ref_uid() and manager_ref_gid(), under the assumption
+ * that uid_t and gid_t are actually defined the same way, with the same validity rules. */
+
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+ assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+ if (uid == 0) /* We don't keep track of root, and will never destroy it */
+ return 0;
+
+ r = hashmap_ensure_allocated(uid_refs, &trivial_hash_ops);
+ if (r < 0)
+ return r;
+
+ c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
+
+ n = c & ~DESTROY_IPC_FLAG;
+ n++;
+
+ if (n & DESTROY_IPC_FLAG) /* check for overflow */
+ return -EOVERFLOW;
+
+ c = n | (c & DESTROY_IPC_FLAG) | (clean_ipc ? DESTROY_IPC_FLAG : 0);
+
+ return hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c));
+}
+
+int manager_ref_uid(Manager *m, uid_t uid, bool clean_ipc) {
+ return manager_ref_uid_internal(m, &m->uid_refs, uid, clean_ipc);
+}
+
+int manager_ref_gid(Manager *m, gid_t gid, bool clean_ipc) {
+ return manager_ref_uid_internal(m, &m->gid_refs, (uid_t) gid, clean_ipc);
+}
+
+static void manager_vacuum_uid_refs_internal(
+ Manager *m,
+ Hashmap **uid_refs,
+ int (*_clean_ipc)(uid_t uid)) {
+
+ Iterator i;
+ void *p, *k;
+
+ assert(m);
+ assert(uid_refs);
+ assert(_clean_ipc);
+
+ HASHMAP_FOREACH_KEY(p, k, *uid_refs, i) {
+ uint32_t c, n;
+ uid_t uid;
+
+ uid = PTR_TO_UID(k);
+ c = PTR_TO_UINT32(p);
+
+ n = c & ~DESTROY_IPC_FLAG;
+ if (n > 0)
+ continue;
+
+ if (c & DESTROY_IPC_FLAG) {
+ log_debug("Found unreferenced %s " UID_FMT " after reload/reexec. Cleaning up.",
+ _clean_ipc == clean_ipc_by_uid ? "UID" : "GID",
+ uid);
+ (void) _clean_ipc(uid);
+ }
+
+ assert_se(hashmap_remove(*uid_refs, k) == p);
+ }
+}
+
+void manager_vacuum_uid_refs(Manager *m) {
+ manager_vacuum_uid_refs_internal(m, &m->uid_refs, clean_ipc_by_uid);
+}
+
+void manager_vacuum_gid_refs(Manager *m) {
+ manager_vacuum_uid_refs_internal(m, &m->gid_refs, clean_ipc_by_gid);
+}
+
+static void manager_serialize_uid_refs_internal(
+ Manager *m,
+ FILE *f,
+ Hashmap **uid_refs,
+ const char *field_name) {
+
+ Iterator i;
+ void *p, *k;
+
+ assert(m);
+ assert(f);
+ assert(uid_refs);
+ assert(field_name);
+
+ /* Serialize the UID reference table. Or actually, just the IPC destruction flag of it, as the actual counter
+ * of it is better rebuild after a reload/reexec. */
+
+ HASHMAP_FOREACH_KEY(p, k, *uid_refs, i) {
+ uint32_t c;
+ uid_t uid;
+
+ uid = PTR_TO_UID(k);
+ c = PTR_TO_UINT32(p);
+
+ if (!(c & DESTROY_IPC_FLAG))
+ continue;
+
+ fprintf(f, "%s=" UID_FMT "\n", field_name, uid);
+ }
+}
+
+void manager_serialize_uid_refs(Manager *m, FILE *f) {
+ manager_serialize_uid_refs_internal(m, f, &m->uid_refs, "destroy-ipc-uid");
+}
+
+void manager_serialize_gid_refs(Manager *m, FILE *f) {
+ manager_serialize_uid_refs_internal(m, f, &m->gid_refs, "destroy-ipc-gid");
+}
+
+static void manager_deserialize_uid_refs_one_internal(
+ Manager *m,
+ Hashmap** uid_refs,
+ const char *value) {
+
+ uid_t uid;
+ uint32_t c;
+ int r;
+
+ assert(m);
+ assert(uid_refs);
+ assert(value);
+
+ r = parse_uid(value, &uid);
+ if (r < 0 || uid == 0) {
+ log_debug("Unable to parse UID reference serialization");
+ return;
+ }
+
+ r = hashmap_ensure_allocated(uid_refs, &trivial_hash_ops);
+ if (r < 0) {
+ log_oom();
+ return;
+ }
+
+ c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
+ if (c & DESTROY_IPC_FLAG)
+ return;
+
+ c |= DESTROY_IPC_FLAG;
+
+ r = hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c));
+ if (r < 0) {
+ log_debug("Failed to add UID reference entry");
+ return;
+ }
+}
+
+void manager_deserialize_uid_refs_one(Manager *m, const char *value) {
+ manager_deserialize_uid_refs_one_internal(m, &m->uid_refs, value);
+}
+
+void manager_deserialize_gid_refs_one(Manager *m, const char *value) {
+ manager_deserialize_uid_refs_one_internal(m, &m->gid_refs, value);
+}
+
+int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ struct buffer {
+ uid_t uid;
+ gid_t gid;
+ char unit_name[UNIT_NAME_MAX+1];
+ } _packed_ buffer;
+
+ Manager *m = userdata;
+ ssize_t l;
+ size_t n;
+ Unit *u;
+
+ assert_se(source);
+ assert_se(m);
+
+ /* Invoked whenever a child process succeeded resolving its user/group to use and sent us the resulting UID/GID
+ * in a datagram. We parse the datagram here and pass it off to the unit, so that it can add a reference to the
+ * UID/GID so that it can destroy the UID/GID's IPC objects when the reference counter drops to 0. */
+
+ l = recv(fd, &buffer, sizeof(buffer), MSG_DONTWAIT);
+ if (l < 0) {
+ if (errno == EINTR || errno == EAGAIN)
+ return 0;
+
+ return log_error_errno(errno, "Failed to read from user lookup fd: %m");
+ }
+
+ if ((size_t) l <= offsetof(struct buffer, unit_name)) {
+ log_warning("Received too short user lookup message, ignoring.");
+ return 0;
+ }
+
+ if ((size_t) l > offsetof(struct buffer, unit_name) + UNIT_NAME_MAX) {
+ log_warning("Received too long user lookup message, ignoring.");
+ return 0;
+ }
+
+ if (!uid_is_valid(buffer.uid) && !gid_is_valid(buffer.gid)) {
+ log_warning("Got user lookup message with invalid UID/GID pair, ignoring.");
+ return 0;
+ }
+
+ n = (size_t) l - offsetof(struct buffer, unit_name);
+ if (memchr(buffer.unit_name, 0, n)) {
+ log_warning("Received lookup message with embedded NUL character, ignoring.");
+ return 0;
+ }
+
+ buffer.unit_name[n] = 0;
+ u = manager_get_unit(m, buffer.unit_name);
+ if (!u) {
+ log_debug("Got user lookup message but unit doesn't exist, ignoring.");
+ return 0;
+ }
+
+ log_unit_debug(u, "User lookup succeeded: uid=" UID_FMT " gid=" GID_FMT, buffer.uid, buffer.gid);
+
+ unit_notify_user_lookup(u, buffer.uid, buffer.gid);
+ return 0;
+}
+
static const char *const manager_state_table[_MANAGER_STATE_MAX] = {
[MANAGER_INITIALIZING] = "initializing",
[MANAGER_STARTING] = "starting",
diff --git a/src/core/manager.h b/src/core/manager.h
index 6ed15c1a41..35172fdba9 100644
--- a/src/core/manager.h
+++ b/src/core/manager.h
@@ -81,6 +81,7 @@ struct Manager {
/* Active jobs and units */
Hashmap *units; /* name string => Unit object n:1 */
+ Hashmap *units_by_invocation_id;
Hashmap *jobs; /* job id => Job object 1:1 */
/* To make it easy to iterate through the units of a specific
@@ -143,6 +144,9 @@ struct Manager {
sd_event_source *jobs_in_progress_event_source;
+ int user_lookup_fds[2];
+ sd_event_source *user_lookup_event_source;
+
UnitFileScope unit_file_scope;
LookupPaths lookup_paths;
Set *unit_path_cache;
@@ -234,7 +238,6 @@ struct Manager {
bool dispatching_dbus_queue:1;
bool taint_usr:1;
-
bool test_run:1;
/* If non-zero, exit with the following value when the systemd
@@ -292,18 +295,26 @@ struct Manager {
* value where Unit objects are contained. */
Hashmap *units_requiring_mounts_for;
- /* Reference to the kdbus bus control fd */
- int kdbus_fd;
-
/* Used for processing polkit authorization responses */
Hashmap *polkit_registry;
- /* When the user hits C-A-D more than 7 times per 2s, reboot immediately... */
+ /* Dynamic users/groups, indexed by their name */
+ Hashmap *dynamic_users;
+
+ /* Keep track of all UIDs and GIDs any of our services currently use. This is useful for the RemoveIPC= logic. */
+ Hashmap *uid_refs;
+ Hashmap *gid_refs;
+
+ /* When the user hits C-A-D more than 7 times per 2s, do something immediately... */
RateLimit ctrl_alt_del_ratelimit;
+ EmergencyAction cad_burst_action;
const char *unit_log_field;
const char *unit_log_format_string;
+ const char *invocation_log_field;
+ const char *invocation_log_format_string;
+
int first_boot; /* tri-state */
};
@@ -375,5 +386,20 @@ ManagerState manager_state(Manager *m);
int manager_update_failed_units(Manager *m, Unit *u, bool failed);
+void manager_unref_uid(Manager *m, uid_t uid, bool destroy_now);
+int manager_ref_uid(Manager *m, uid_t uid, bool clean_ipc);
+
+void manager_unref_gid(Manager *m, gid_t gid, bool destroy_now);
+int manager_ref_gid(Manager *m, gid_t gid, bool destroy_now);
+
+void manager_vacuum_uid_refs(Manager *m);
+void manager_vacuum_gid_refs(Manager *m);
+
+void manager_serialize_uid_refs(Manager *m, FILE *f);
+void manager_deserialize_uid_refs_one(Manager *m, const char *value);
+
+void manager_serialize_gid_refs(Manager *m, FILE *f);
+void manager_deserialize_gid_refs_one(Manager *m, const char *value);
+
const char *manager_state_to_string(ManagerState m) _const_;
ManagerState manager_state_from_string(const char *s) _pure_;
diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c
index 5d8ab0ec70..ca63a93e8b 100644
--- a/src/core/mount-setup.c
+++ b/src/core/mount-setup.c
@@ -99,10 +99,12 @@ static const MountPoint mount_table[] = {
cg_is_unified_wanted, MNT_FATAL|MNT_IN_CONTAINER },
{ "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
+ { "cgroup", "/sys/fs/cgroup/systemd", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ cg_is_unified_systemd_controller_wanted, MNT_IN_CONTAINER },
{ "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
- cg_is_legacy_wanted, MNT_IN_CONTAINER },
+ cg_is_legacy_systemd_controller_wanted, MNT_IN_CONTAINER },
{ "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
- cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
+ cg_is_legacy_systemd_controller_wanted, MNT_FATAL|MNT_IN_CONTAINER },
{ "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
NULL, MNT_NONE },
#ifdef ENABLE_EFI
diff --git a/src/core/mount.c b/src/core/mount.c
index fda4d65d6f..d749e49df5 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -159,17 +159,6 @@ static void mount_init(Unit *u) {
m->timeout_usec = u->manager->default_timeout_start_usec;
m->directory_mode = 0755;
- if (unit_has_name(u, "-.mount")) {
- /* Don't allow start/stop for root directory */
- u->refuse_manual_start = true;
- u->refuse_manual_stop = true;
- } else {
- /* The stdio/kmsg bridge socket is on /, in order to avoid a
- * dep loop, don't use kmsg logging for -.mount */
- m->exec_context.std_output = u->manager->default_std_output;
- m->exec_context.std_error = u->manager->default_std_error;
- }
-
/* We need to make sure that /usr/bin/mount is always called
* in the same process group as us, so that the autofs kernel
* side doesn't send us another mount request while we are
@@ -245,6 +234,8 @@ static void mount_done(Unit *u) {
exec_command_done_array(m->exec_command, _MOUNT_EXEC_COMMAND_MAX);
m->control_command = NULL;
+ dynamic_creds_unref(&m->dynamic_creds);
+
mount_unwatch_control_pid(m);
m->timer_event_source = sd_event_source_unref(m->timer_event_source);
@@ -482,6 +473,7 @@ static int mount_add_default_dependencies(Mount *m) {
static int mount_verify(Mount *m) {
_cleanup_free_ char *e = NULL;
+ MountParameters *p;
int r;
assert(m);
@@ -506,7 +498,8 @@ static int mount_verify(Mount *m) {
return -EINVAL;
}
- if (UNIT(m)->fragment_path && !m->parameters_fragment.what) {
+ p = get_mount_parameters_fragment(m);
+ if (p && !p->what) {
log_unit_error(UNIT(m), "What= setting is missing. Refusing.");
return -EBADMSG;
}
@@ -573,6 +566,25 @@ static int mount_add_extras(Mount *m) {
return 0;
}
+static int mount_load_root_mount(Unit *u) {
+ assert(u);
+
+ if (!unit_has_name(u, SPECIAL_ROOT_MOUNT))
+ return 0;
+
+ u->perpetual = true;
+ u->default_dependencies = false;
+
+ /* The stdio/kmsg bridge socket is on /, in order to avoid a dep loop, don't use kmsg logging for -.mount */
+ MOUNT(u)->exec_context.std_output = EXEC_OUTPUT_NULL;
+ MOUNT(u)->exec_context.std_input = EXEC_INPUT_NULL;
+
+ if (!u->description)
+ u->description = strdup("Root Mount");
+
+ return 1;
+}
+
static int mount_load(Unit *u) {
Mount *m = MOUNT(u);
int r;
@@ -580,11 +592,14 @@ static int mount_load(Unit *u) {
assert(u);
assert(u->load_state == UNIT_STUB);
- if (m->from_proc_self_mountinfo)
+ r = mount_load_root_mount(u);
+ if (r < 0)
+ return r;
+
+ if (m->from_proc_self_mountinfo || u->perpetual)
r = unit_load_fragment_and_dropin_optional(u);
else
r = unit_load_fragment_and_dropin(u);
-
if (r < 0)
return r;
@@ -648,6 +663,9 @@ static int mount_coldplug(Unit *u) {
return r;
}
+ if (!IN_SET(new_state, MOUNT_DEAD, MOUNT_FAILED))
+ (void) unit_setup_dynamic_creds(u);
+
mount_set_state(m, new_state);
return 0;
}
@@ -670,7 +688,10 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
"%sOptions: %s\n"
"%sFrom /proc/self/mountinfo: %s\n"
"%sFrom fragment: %s\n"
- "%sDirectoryMode: %04o\n",
+ "%sDirectoryMode: %04o\n"
+ "%sSloppyOptions: %s\n"
+ "%sLazyUnmount: %s\n"
+ "%sForceUnmount: %s\n",
prefix, mount_state_to_string(m->state),
prefix, mount_result_to_string(m->result),
prefix, m->where,
@@ -679,7 +700,10 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
prefix, p ? strna(p->options) : "n/a",
prefix, yes_no(m->from_proc_self_mountinfo),
prefix, yes_no(m->from_fragment),
- prefix, m->directory_mode);
+ prefix, m->directory_mode,
+ prefix, yes_no(m->sloppy_options),
+ prefix, yes_no(m->lazy_unmount),
+ prefix, yes_no(m->force_unmount));
if (m->control_pid > 0)
fprintf(f,
@@ -694,12 +718,10 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
pid_t pid;
int r;
ExecParameters exec_params = {
- .apply_permissions = true,
- .apply_chroot = true,
- .apply_tty_stdin = true,
- .stdin_fd = -1,
- .stdout_fd = -1,
- .stderr_fd = -1,
+ .flags = EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
+ .stdin_fd = -1,
+ .stdout_fd = -1,
+ .stderr_fd = -1,
};
assert(m);
@@ -716,12 +738,16 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
if (r < 0)
return r;
+ r = unit_setup_dynamic_creds(UNIT(m));
+ if (r < 0)
+ return r;
+
r = mount_arm_timer(m, usec_add(now(CLOCK_MONOTONIC), m->timeout_usec));
if (r < 0)
return r;
exec_params.environment = UNIT(m)->manager->environment;
- exec_params.confirm_spawn = UNIT(m)->manager->confirm_spawn;
+ exec_params.flags |= UNIT(m)->manager->confirm_spawn ? EXEC_CONFIRM_SPAWN : 0;
exec_params.cgroup_supported = UNIT(m)->manager->cgroup_supported;
exec_params.cgroup_path = UNIT(m)->cgroup_path;
exec_params.cgroup_delegate = m->cgroup_context.delegate;
@@ -732,6 +758,7 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
&m->exec_context,
&exec_params,
m->exec_runtime,
+ &m->dynamic_creds,
&pid);
if (r < 0)
return r;
@@ -749,21 +776,25 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
static void mount_enter_dead(Mount *m, MountResult f) {
assert(m);
- if (f != MOUNT_SUCCESS)
+ if (m->result == MOUNT_SUCCESS)
m->result = f;
+ mount_set_state(m, m->result != MOUNT_SUCCESS ? MOUNT_FAILED : MOUNT_DEAD);
+
exec_runtime_destroy(m->exec_runtime);
m->exec_runtime = exec_runtime_unref(m->exec_runtime);
exec_context_destroy_runtime_directory(&m->exec_context, manager_get_runtime_prefix(UNIT(m)->manager));
- mount_set_state(m, m->result != MOUNT_SUCCESS ? MOUNT_FAILED : MOUNT_DEAD);
+ unit_unref_uid_gid(UNIT(m), true);
+
+ dynamic_creds_destroy(&m->dynamic_creds);
}
static void mount_enter_mounted(Mount *m, MountResult f) {
assert(m);
- if (f != MOUNT_SUCCESS)
+ if (m->result == MOUNT_SUCCESS)
m->result = f;
mount_set_state(m, MOUNT_MOUNTED);
@@ -774,7 +805,7 @@ static void mount_enter_signal(Mount *m, MountState state, MountResult f) {
assert(m);
- if (f != MOUNT_SUCCESS)
+ if (m->result == MOUNT_SUCCESS)
m->result = f;
r = unit_kill_context(
@@ -810,7 +841,7 @@ static void mount_enter_signal(Mount *m, MountState state, MountResult f) {
fail:
log_unit_warning_errno(UNIT(m), r, "Failed to kill processes: %m");
- if (state == MOUNT_REMOUNTING_SIGTERM || state == MOUNT_REMOUNTING_SIGKILL)
+ if (IN_SET(state, MOUNT_REMOUNTING_SIGTERM, MOUNT_REMOUNTING_SIGKILL))
mount_enter_mounted(m, MOUNT_FAILURE_RESOURCES);
else
mount_enter_dead(m, MOUNT_FAILURE_RESOURCES);
@@ -832,6 +863,10 @@ static void mount_enter_unmounting(Mount *m) {
m->control_command = m->exec_command + MOUNT_EXEC_UNMOUNT;
r = exec_command_set(m->control_command, UMOUNT_PATH, m->where, NULL);
+ if (r >= 0 && m->lazy_unmount)
+ r = exec_command_append(m->control_command, "-l", NULL);
+ if (r >= 0 && m->force_unmount)
+ r = exec_command_append(m->control_command, "-f", NULL);
if (r < 0)
goto fail;
@@ -850,11 +885,6 @@ fail:
mount_enter_mounted(m, MOUNT_FAILURE_RESOURCES);
}
-static int mount_get_opts(Mount *m, char **ret) {
- return fstab_filter_options(m->parameters_fragment.options,
- "nofail\0" "noauto\0" "auto\0", NULL, NULL, ret);
-}
-
static void mount_enter_mounting(Mount *m) {
int r;
MountParameters *p;
@@ -877,19 +907,18 @@ static void mount_enter_mounting(Mount *m) {
if (p && mount_is_bind(p))
(void) mkdir_p_label(p->what, m->directory_mode);
- if (m->from_fragment) {
+ if (p) {
_cleanup_free_ char *opts = NULL;
- r = mount_get_opts(m, &opts);
+ r = fstab_filter_options(p->options, "nofail\0" "noauto\0" "auto\0", NULL, NULL, &opts);
if (r < 0)
goto fail;
- r = exec_command_set(m->control_command, MOUNT_PATH,
- m->parameters_fragment.what, m->where, NULL);
+ r = exec_command_set(m->control_command, MOUNT_PATH, p->what, m->where, NULL);
if (r >= 0 && m->sloppy_options)
r = exec_command_append(m->control_command, "-s", NULL);
- if (r >= 0 && m->parameters_fragment.fstype)
- r = exec_command_append(m->control_command, "-t", m->parameters_fragment.fstype, NULL);
+ if (r >= 0 && p->fstype)
+ r = exec_command_append(m->control_command, "-t", p->fstype, NULL);
if (r >= 0 && !isempty(opts))
r = exec_command_append(m->control_command, "-o", opts, NULL);
} else
@@ -915,27 +944,29 @@ fail:
static void mount_enter_remounting(Mount *m) {
int r;
+ MountParameters *p;
assert(m);
m->control_command_id = MOUNT_EXEC_REMOUNT;
m->control_command = m->exec_command + MOUNT_EXEC_REMOUNT;
- if (m->from_fragment) {
+ p = get_mount_parameters_fragment(m);
+ if (p) {
const char *o;
- if (m->parameters_fragment.options)
- o = strjoina("remount,", m->parameters_fragment.options);
+ if (p->options)
+ o = strjoina("remount,", p->options);
else
o = "remount";
r = exec_command_set(m->control_command, MOUNT_PATH,
- m->parameters_fragment.what, m->where,
+ p->what, m->where,
"-o", o, NULL);
if (r >= 0 && m->sloppy_options)
r = exec_command_append(m->control_command, "-s", NULL);
- if (r >= 0 && m->parameters_fragment.fstype)
- r = exec_command_append(m->control_command, "-t", m->parameters_fragment.fstype, NULL);
+ if (r >= 0 && p->fstype)
+ r = exec_command_append(m->control_command, "-t", p->fstype, NULL);
} else
r = -ENOENT;
@@ -966,18 +997,19 @@ static int mount_start(Unit *u) {
/* We cannot fulfill this request right now, try again later
* please! */
- if (m->state == MOUNT_UNMOUNTING ||
- m->state == MOUNT_UNMOUNTING_SIGTERM ||
- m->state == MOUNT_UNMOUNTING_SIGKILL ||
- m->state == MOUNT_MOUNTING_SIGTERM ||
- m->state == MOUNT_MOUNTING_SIGKILL)
+ if (IN_SET(m->state,
+ MOUNT_UNMOUNTING,
+ MOUNT_UNMOUNTING_SIGTERM,
+ MOUNT_UNMOUNTING_SIGKILL,
+ MOUNT_MOUNTING_SIGTERM,
+ MOUNT_MOUNTING_SIGKILL))
return -EAGAIN;
/* Already on it! */
if (m->state == MOUNT_MOUNTING)
return 0;
- assert(m->state == MOUNT_DEAD || m->state == MOUNT_FAILED);
+ assert(IN_SET(m->state, MOUNT_DEAD, MOUNT_FAILED));
r = unit_start_limit_test(u);
if (r < 0) {
@@ -985,6 +1017,10 @@ static int mount_start(Unit *u) {
return r;
}
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
m->result = MOUNT_SUCCESS;
m->reload_result = MOUNT_SUCCESS;
m->reset_cpu_usage = true;
@@ -999,19 +1035,21 @@ static int mount_stop(Unit *u) {
assert(m);
/* Already on it */
- if (m->state == MOUNT_UNMOUNTING ||
- m->state == MOUNT_UNMOUNTING_SIGKILL ||
- m->state == MOUNT_UNMOUNTING_SIGTERM ||
- m->state == MOUNT_MOUNTING_SIGTERM ||
- m->state == MOUNT_MOUNTING_SIGKILL)
+ if (IN_SET(m->state,
+ MOUNT_UNMOUNTING,
+ MOUNT_UNMOUNTING_SIGKILL,
+ MOUNT_UNMOUNTING_SIGTERM,
+ MOUNT_MOUNTING_SIGTERM,
+ MOUNT_MOUNTING_SIGKILL))
return 0;
- assert(m->state == MOUNT_MOUNTING ||
- m->state == MOUNT_MOUNTING_DONE ||
- m->state == MOUNT_MOUNTED ||
- m->state == MOUNT_REMOUNTING ||
- m->state == MOUNT_REMOUNTING_SIGTERM ||
- m->state == MOUNT_REMOUNTING_SIGKILL);
+ assert(IN_SET(m->state,
+ MOUNT_MOUNTING,
+ MOUNT_MOUNTING_DONE,
+ MOUNT_MOUNTED,
+ MOUNT_REMOUNTING,
+ MOUNT_REMOUNTING_SIGTERM,
+ MOUNT_REMOUNTING_SIGKILL));
mount_enter_unmounting(m);
return 1;
@@ -1139,7 +1177,7 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) {
m->control_pid = 0;
- if (is_clean_exit(code, status, NULL))
+ if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
f = MOUNT_SUCCESS;
else if (code == CLD_EXITED)
f = MOUNT_FAILURE_EXIT_CODE;
@@ -1150,7 +1188,7 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) {
else
assert_not_reached("Unknown code");
- if (f != MOUNT_SUCCESS)
+ if (m->result == MOUNT_SUCCESS)
m->result = f;
if (m->control_command) {
@@ -1177,9 +1215,10 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) {
case MOUNT_MOUNTING_SIGKILL:
case MOUNT_MOUNTING_SIGTERM:
- if (f == MOUNT_SUCCESS)
- mount_enter_mounted(m, f);
- else if (m->from_proc_self_mountinfo)
+ if (f == MOUNT_SUCCESS || m->from_proc_self_mountinfo)
+ /* If /bin/mount returned success, or if we see the mount point in /proc/self/mountinfo we are
+ * happy. If we see the first condition first, we should see the the second condition
+ * immediately after – or /bin/mount lies to us and is broken. */
mount_enter_mounted(m, f);
else
mount_enter_dead(m, f);
@@ -1365,11 +1404,7 @@ static int mount_setup_unit(
if (!u) {
delete = true;
- u = unit_new(m, sizeof(Mount));
- if (!u)
- return log_oom();
-
- r = unit_add_name(u, e);
+ r = unit_new_for_name(m, sizeof(Mount), e, &u);
if (r < 0)
goto fail;
@@ -1456,17 +1491,9 @@ static int mount_setup_unit(
MOUNT(u)->from_proc_self_mountinfo = true;
- free(p->what);
- p->what = w;
- w = NULL;
-
- free(p->options);
- p->options = o;
- o = NULL;
-
- free(p->fstype);
- p->fstype = f;
- f = NULL;
+ free_and_replace(p->what, w);
+ free_and_replace(p->options, o);
+ free_and_replace(p->fstype, f);
if (load_extras) {
r = mount_add_extras(MOUNT(u));
@@ -1572,11 +1599,46 @@ static int mount_get_timeout(Unit *u, usec_t *timeout) {
return 1;
}
+static int synthesize_root_mount(Manager *m) {
+ Unit *u;
+ int r;
+
+ assert(m);
+
+ /* Whatever happens, we know for sure that the root directory is around, and cannot go away. Let's
+ * unconditionally synthesize it here and mark it as perpetual. */
+
+ u = manager_get_unit(m, SPECIAL_ROOT_MOUNT);
+ if (!u) {
+ r = unit_new_for_name(m, sizeof(Mount), SPECIAL_ROOT_MOUNT, &u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate the special " SPECIAL_ROOT_MOUNT " unit: %m");
+ }
+
+ u->perpetual = true;
+ MOUNT(u)->deserialized_state = MOUNT_MOUNTED;
+
+ unit_add_to_load_queue(u);
+ unit_add_to_dbus_queue(u);
+
+ return 0;
+}
+
+static bool mount_is_mounted(Mount *m) {
+ assert(m);
+
+ return UNIT(m)->perpetual || m->is_mounted;
+}
+
static void mount_enumerate(Manager *m) {
int r;
assert(m);
+ r = synthesize_root_mount(m);
+ if (r < 0)
+ goto fail;
+
mnt_init_debug(0);
if (!m->mount_monitor) {
@@ -1683,7 +1745,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
LIST_FOREACH(units_by_type, u, m->units_by_type[UNIT_MOUNT]) {
Mount *mount = MOUNT(u);
- if (!mount->is_mounted) {
+ if (!mount_is_mounted(mount)) {
/* A mount point is not around right now. It
* might be gone, or might never have
@@ -1722,9 +1784,10 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
case MOUNT_DEAD:
case MOUNT_FAILED:
- /* This has just been mounted by
- * somebody else, follow the state
- * change. */
+
+ /* This has just been mounted by somebody else, follow the state change, but let's
+ * generate a new invocation ID for this implicitly and automatically. */
+ (void) unit_acquire_invocation_id(UNIT(mount));
mount_enter_mounted(mount, MOUNT_SUCCESS);
break;
@@ -1743,7 +1806,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
}
}
- if (mount->is_mounted &&
+ if (mount_is_mounted(mount) &&
mount->from_proc_self_mountinfo &&
mount->parameters_proc_self_mountinfo.what) {
@@ -1817,6 +1880,7 @@ const UnitVTable mount_vtable = {
.cgroup_context_offset = offsetof(Mount, cgroup_context),
.kill_context_offset = offsetof(Mount, kill_context),
.exec_runtime_offset = offsetof(Mount, exec_runtime),
+ .dynamic_creds_offset = offsetof(Mount, dynamic_creds),
.sections =
"Unit\0"
diff --git a/src/core/mount.h b/src/core/mount.h
index da529c44f4..9f7326ba6a 100644
--- a/src/core/mount.h
+++ b/src/core/mount.h
@@ -21,8 +21,8 @@
typedef struct Mount Mount;
-#include "execute.h"
#include "kill.h"
+#include "dynamic-user.h"
typedef enum MountExecCommand {
MOUNT_EXEC_MOUNT,
@@ -71,6 +71,9 @@ struct Mount {
bool sloppy_options;
+ bool lazy_unmount;
+ bool force_unmount;
+
MountResult result;
MountResult reload_result;
@@ -85,6 +88,7 @@ struct Mount {
CGroupContext cgroup_context;
ExecRuntime *exec_runtime;
+ DynamicCreds dynamic_creds;
MountState state, deserialized_state;
diff --git a/src/core/namespace.c b/src/core/namespace.c
index 52a2505d94..1195e9a854 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -29,6 +29,7 @@
#include "alloc-util.h"
#include "dev-setup.h"
#include "fd-util.h"
+#include "fs-util.h"
#include "loopback-setup.h"
#include "missing.h"
#include "mkdir.h"
@@ -53,61 +54,245 @@ typedef enum MountMode {
PRIVATE_TMP,
PRIVATE_VAR_TMP,
PRIVATE_DEV,
- READWRITE
+ READWRITE,
} MountMode;
typedef struct BindMount {
- const char *path;
+ const char *path; /* stack memory, doesn't need to be freed explicitly */
+ char *chased; /* malloc()ed memory, needs to be freed */
MountMode mode;
- bool done;
- bool ignore;
+ bool ignore; /* Ignore if path does not exist */
} BindMount;
+typedef struct TargetMount {
+ const char *path;
+ MountMode mode;
+ bool ignore; /* Ignore if path does not exist */
+} TargetMount;
+
+/*
+ * The following Protect tables are to protect paths and mark some of them
+ * READONLY, in case a path is covered by an option from another table, then
+ * it is marked READWRITE in the current one, and the more restrictive mode is
+ * applied from that other table. This way all options can be combined in a
+ * safe and comprehensible way for users.
+ */
+
+/* ProtectKernelTunables= option and the related filesystem APIs */
+static const TargetMount protect_kernel_tunables_table[] = {
+ { "/proc/sys", READONLY, false },
+ { "/proc/sysrq-trigger", READONLY, true },
+ { "/proc/latency_stats", READONLY, true },
+ { "/proc/mtrr", READONLY, true },
+ { "/proc/apm", READONLY, true },
+ { "/proc/acpi", READONLY, true },
+ { "/proc/timer_stats", READONLY, true },
+ { "/proc/asound", READONLY, true },
+ { "/proc/bus", READONLY, true },
+ { "/proc/fs", READONLY, true },
+ { "/proc/irq", READONLY, true },
+ { "/sys", READONLY, false },
+ { "/sys/kernel/debug", READONLY, true },
+ { "/sys/kernel/tracing", READONLY, true },
+ { "/sys/fs/cgroup", READWRITE, false }, /* READONLY is set by ProtectControlGroups= option */
+};
+
+/* ProtectKernelModules= option */
+static const TargetMount protect_kernel_modules_table[] = {
+#ifdef HAVE_SPLIT_USR
+ { "/lib/modules", INACCESSIBLE, true },
+#endif
+ { "/usr/lib/modules", INACCESSIBLE, true },
+};
+
+/*
+ * ProtectHome=read-only table, protect $HOME and $XDG_RUNTIME_DIR and rest of
+ * system should be protected by ProtectSystem=
+ */
+static const TargetMount protect_home_read_only_table[] = {
+ { "/home", READONLY, true },
+ { "/run/user", READONLY, true },
+ { "/root", READONLY, true },
+};
+
+/* ProtectHome=yes table */
+static const TargetMount protect_home_yes_table[] = {
+ { "/home", INACCESSIBLE, true },
+ { "/run/user", INACCESSIBLE, true },
+ { "/root", INACCESSIBLE, true },
+};
+
+/* ProtectSystem=yes table */
+static const TargetMount protect_system_yes_table[] = {
+ { "/usr", READONLY, false },
+ { "/boot", READONLY, true },
+ { "/efi", READONLY, true },
+};
+
+/* ProtectSystem=full includes ProtectSystem=yes */
+static const TargetMount protect_system_full_table[] = {
+ { "/usr", READONLY, false },
+ { "/boot", READONLY, true },
+ { "/efi", READONLY, true },
+ { "/etc", READONLY, false },
+};
+
+/*
+ * ProtectSystem=strict table. In this strict mode, we mount everything
+ * read-only, except for /proc, /dev, /sys which are the kernel API VFS,
+ * which are left writable, but PrivateDevices= + ProtectKernelTunables=
+ * protect those, and these options should be fully orthogonal.
+ * (And of course /home and friends are also left writable, as ProtectHome=
+ * shall manage those, orthogonally).
+ */
+static const TargetMount protect_system_strict_table[] = {
+ { "/", READONLY, false },
+ { "/proc", READWRITE, false }, /* ProtectKernelTunables= */
+ { "/sys", READWRITE, false }, /* ProtectKernelTunables= */
+ { "/dev", READWRITE, false }, /* PrivateDevices= */
+ { "/home", READWRITE, true }, /* ProtectHome= */
+ { "/run/user", READWRITE, true }, /* ProtectHome= */
+ { "/root", READWRITE, true }, /* ProtectHome= */
+};
+
+static void set_bind_mount(BindMount **p, const char *path, MountMode mode, bool ignore) {
+ (*p)->path = path;
+ (*p)->mode = mode;
+ (*p)->ignore = ignore;
+}
+
static int append_mounts(BindMount **p, char **strv, MountMode mode) {
char **i;
assert(p);
STRV_FOREACH(i, strv) {
+ bool ignore = false;
- (*p)->ignore = false;
- (*p)->done = false;
-
- if ((mode == INACCESSIBLE || mode == READONLY || mode == READWRITE) && (*i)[0] == '-') {
- (*p)->ignore = true;
+ if (IN_SET(mode, INACCESSIBLE, READONLY, READWRITE) && startswith(*i, "-")) {
(*i)++;
+ ignore = true;
}
if (!path_is_absolute(*i))
return -EINVAL;
- (*p)->path = *i;
- (*p)->mode = mode;
+ set_bind_mount(p, *i, mode, ignore);
(*p)++;
}
return 0;
}
-static int mount_path_compare(const void *a, const void *b) {
- const BindMount *p = a, *q = b;
- int d;
+static int append_target_mounts(BindMount **p, const char *root_directory, const TargetMount *mounts, const size_t size) {
+ unsigned i;
- d = path_compare(p->path, q->path);
+ assert(p);
+ assert(mounts);
+
+ for (i = 0; i < size; i++) {
+ /*
+ * Here we assume that the ignore field is set during
+ * declaration we do not support "-" at the beginning.
+ */
+ const TargetMount *m = &mounts[i];
+ const char *path = prefix_roota(root_directory, m->path);
+
+ if (!path_is_absolute(path))
+ return -EINVAL;
+
+ set_bind_mount(p, path, m->mode, m->ignore);
+ (*p)++;
+ }
+
+ return 0;
+}
+
+static int append_protect_kernel_tunables(BindMount **p, const char *root_directory) {
+ assert(p);
+
+ return append_target_mounts(p, root_directory, protect_kernel_tunables_table,
+ ELEMENTSOF(protect_kernel_tunables_table));
+}
+
+static int append_protect_kernel_modules(BindMount **p, const char *root_directory) {
+ assert(p);
+
+ return append_target_mounts(p, root_directory, protect_kernel_modules_table,
+ ELEMENTSOF(protect_kernel_modules_table));
+}
+
+static int append_protect_home(BindMount **p, const char *root_directory, ProtectHome protect_home) {
+ int r = 0;
+
+ assert(p);
+
+ if (protect_home == PROTECT_HOME_NO)
+ return 0;
+
+ switch (protect_home) {
+ case PROTECT_HOME_READ_ONLY:
+ r = append_target_mounts(p, root_directory, protect_home_read_only_table,
+ ELEMENTSOF(protect_home_read_only_table));
+ break;
+ case PROTECT_HOME_YES:
+ r = append_target_mounts(p, root_directory, protect_home_yes_table,
+ ELEMENTSOF(protect_home_yes_table));
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ return r;
+}
- if (d == 0) {
- /* If the paths are equal, check the mode */
- if (p->mode < q->mode)
- return -1;
+static int append_protect_system(BindMount **p, const char *root_directory, ProtectSystem protect_system) {
+ int r = 0;
- if (p->mode > q->mode)
- return 1;
+ assert(p);
+ if (protect_system == PROTECT_SYSTEM_NO)
return 0;
+
+ switch (protect_system) {
+ case PROTECT_SYSTEM_STRICT:
+ r = append_target_mounts(p, root_directory, protect_system_strict_table,
+ ELEMENTSOF(protect_system_strict_table));
+ break;
+ case PROTECT_SYSTEM_YES:
+ r = append_target_mounts(p, root_directory, protect_system_yes_table,
+ ELEMENTSOF(protect_system_yes_table));
+ break;
+ case PROTECT_SYSTEM_FULL:
+ r = append_target_mounts(p, root_directory, protect_system_full_table,
+ ELEMENTSOF(protect_system_full_table));
+ break;
+ default:
+ r = -EINVAL;
+ break;
}
+ return r;
+}
+
+static int mount_path_compare(const void *a, const void *b) {
+ const BindMount *p = a, *q = b;
+ int d;
+
/* If the paths are not equal, then order prefixes first */
- return d;
+ d = path_compare(p->path, q->path);
+ if (d != 0)
+ return d;
+
+ /* If the paths are equal, check the mode */
+ if (p->mode < q->mode)
+ return -1;
+
+ if (p->mode > q->mode)
+ return 1;
+
+ return 0;
}
static void drop_duplicates(BindMount *m, unsigned *n) {
@@ -116,16 +301,110 @@ static void drop_duplicates(BindMount *m, unsigned *n) {
assert(m);
assert(n);
+ /* Drops duplicate entries. Expects that the array is properly ordered already. */
+
for (f = m, t = m, previous = NULL; f < m+*n; f++) {
- /* The first one wins */
- if (previous && path_equal(f->path, previous->path))
+ /* The first one wins (which is the one with the more restrictive mode), see mount_path_compare()
+ * above. */
+ if (previous && path_equal(f->path, previous->path)) {
+ log_debug("%s is duplicate.", f->path);
continue;
+ }
*t = *f;
-
previous = t;
+ t++;
+ }
+
+ *n = t - m;
+}
+
+static void drop_inaccessible(BindMount *m, unsigned *n) {
+ BindMount *f, *t;
+ const char *clear = NULL;
+ assert(m);
+ assert(n);
+
+ /* Drops all entries obstructed by another entry further up the tree. Expects that the array is properly
+ * ordered already. */
+
+ for (f = m, t = m; f < m+*n; f++) {
+
+ /* If we found a path set for INACCESSIBLE earlier, and this entry has it as prefix we should drop
+ * it, as inaccessible paths really should drop the entire subtree. */
+ if (clear && path_startswith(f->path, clear)) {
+ log_debug("%s is masked by %s.", f->path, clear);
+ continue;
+ }
+
+ clear = f->mode == INACCESSIBLE ? f->path : NULL;
+
+ *t = *f;
+ t++;
+ }
+
+ *n = t - m;
+}
+
+static void drop_nop(BindMount *m, unsigned *n) {
+ BindMount *f, *t;
+
+ assert(m);
+ assert(n);
+
+ /* Drops all entries which have an immediate parent that has the same type, as they are redundant. Assumes the
+ * list is ordered by prefixes. */
+
+ for (f = m, t = m; f < m+*n; f++) {
+
+ /* Only suppress such subtrees for READONLY and READWRITE entries */
+ if (IN_SET(f->mode, READONLY, READWRITE)) {
+ BindMount *p;
+ bool found = false;
+
+ /* Now let's find the first parent of the entry we are looking at. */
+ for (p = t-1; p >= m; p--) {
+ if (path_startswith(f->path, p->path)) {
+ found = true;
+ break;
+ }
+ }
+
+ /* We found it, let's see if it's the same mode, if so, we can drop this entry */
+ if (found && p->mode == f->mode) {
+ log_debug("%s is redundant by %s", f->path, p->path);
+ continue;
+ }
+ }
+
+ *t = *f;
+ t++;
+ }
+
+ *n = t - m;
+}
+
+static void drop_outside_root(const char *root_directory, BindMount *m, unsigned *n) {
+ BindMount *f, *t;
+
+ assert(m);
+ assert(n);
+
+ if (!root_directory)
+ return;
+
+ /* Drops all mounts that are outside of the root directory. */
+
+ for (f = m, t = m; f < m+*n; f++) {
+
+ if (!path_startswith(f->path, root_directory)) {
+ log_debug("%s is outside of root directory.", f->path);
+ continue;
+ }
+
+ *t = *f;
t++;
}
@@ -278,24 +557,23 @@ static int apply_mount(
const char *what;
int r;
- struct stat target;
assert(m);
+ log_debug("Applying namespace mount on %s", m->path);
+
switch (m->mode) {
- case INACCESSIBLE:
+ case INACCESSIBLE: {
+ struct stat target;
/* First, get rid of everything that is below if there
* is anything... Then, overmount it with an
* inaccessible path. */
- umount_recursive(m->path, 0);
+ (void) umount_recursive(m->path, 0);
- if (lstat(m->path, &target) < 0) {
- if (m->ignore && errno == ENOENT)
- return 0;
- return -errno;
- }
+ if (lstat(m->path, &target) < 0)
+ return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", m->path);
what = mode_to_inaccessible_node(target.st_mode);
if (!what) {
@@ -303,11 +581,20 @@ static int apply_mount(
return -ELOOP;
}
break;
+ }
+
case READONLY:
case READWRITE:
- /* Nothing to mount here, we just later toggle the
- * MS_RDONLY bit for the mount point */
- return 0;
+
+ r = path_is_mount_point(m->path, 0);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", m->path);
+ if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */
+ return 0;
+
+ /* This isn't a mount point yet, let's make it one. */
+ what = m->path;
+ break;
case PRIVATE_TMP:
what = tmp_dir;
@@ -326,68 +613,133 @@ static int apply_mount(
assert(what);
- r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL);
- if (r >= 0) {
- log_debug("Successfully mounted %s to %s", what, m->path);
- return r;
- } else {
- if (m->ignore && errno == ENOENT)
- return 0;
+ if (mount(what, m->path, NULL, MS_BIND|MS_REC, NULL) < 0)
return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, m->path);
- }
+
+ log_debug("Successfully mounted %s to %s", what, m->path);
+ return 0;
}
-static int make_read_only(BindMount *m) {
- int r;
+static int make_read_only(BindMount *m, char **blacklist) {
+ int r = 0;
assert(m);
if (IN_SET(m->mode, INACCESSIBLE, READONLY))
- r = bind_remount_recursive(m->path, true);
- else if (IN_SET(m->mode, READWRITE, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV)) {
- r = bind_remount_recursive(m->path, false);
- if (r == 0 && m->mode == PRIVATE_DEV) /* can be readonly but the submounts can't*/
- if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0)
- r = -errno;
+ r = bind_remount_recursive(m->path, true, blacklist);
+ else if (m->mode == PRIVATE_DEV) { /* Can be readonly but the submounts can't*/
+ if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0)
+ r = -errno;
} else
- r = 0;
-
- if (m->ignore && r == -ENOENT)
return 0;
+ /* Not that we only turn on the MS_RDONLY flag here, we never turn it off. Something that was marked read-only
+ * already stays this way. This improves compatibility with container managers, where we won't attempt to undo
+ * read-only mounts already applied. */
+
return r;
}
+static int chase_all_symlinks(const char *root_directory, BindMount *m, unsigned *n) {
+ BindMount *f, *t;
+ int r;
+
+ assert(m);
+ assert(n);
+
+ /* Since mount() will always follow symlinks and we need to take the different root directory into account we
+ * chase the symlinks on our own first. This call wil do so for all entries and remove all entries where we
+ * can't resolve the path, and which have been marked for such removal. */
+
+ for (f = m, t = m; f < m+*n; f++) {
+
+ r = chase_symlinks(f->path, root_directory, &f->chased);
+ if (r == -ENOENT && f->ignore) /* Doesn't exist? Then remove it! */
+ continue;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to chase symlinks for %s: %m", f->path);
+
+ if (path_equal(f->path, f->chased))
+ f->chased = mfree(f->chased);
+ else {
+ log_debug("Chased %s → %s", f->path, f->chased);
+ f->path = f->chased;
+ }
+
+ *t = *f;
+ t++;
+ }
+
+ *n = t - m;
+ return 0;
+}
+
+static unsigned namespace_calculate_mounts(
+ const NameSpaceInfo *ns_info,
+ char** read_write_paths,
+ char** read_only_paths,
+ char** inaccessible_paths,
+ const char* tmp_dir,
+ const char* var_tmp_dir,
+ ProtectHome protect_home,
+ ProtectSystem protect_system) {
+
+ unsigned protect_home_cnt;
+ unsigned protect_system_cnt =
+ (protect_system == PROTECT_SYSTEM_STRICT ?
+ ELEMENTSOF(protect_system_strict_table) :
+ ((protect_system == PROTECT_SYSTEM_FULL) ?
+ ELEMENTSOF(protect_system_full_table) :
+ ((protect_system == PROTECT_SYSTEM_YES) ?
+ ELEMENTSOF(protect_system_yes_table) : 0)));
+
+ protect_home_cnt =
+ (protect_home == PROTECT_HOME_YES ?
+ ELEMENTSOF(protect_home_yes_table) :
+ ((protect_home == PROTECT_HOME_READ_ONLY) ?
+ ELEMENTSOF(protect_home_read_only_table) : 0));
+
+ return !!tmp_dir + !!var_tmp_dir +
+ strv_length(read_write_paths) +
+ strv_length(read_only_paths) +
+ strv_length(inaccessible_paths) +
+ ns_info->private_dev +
+ (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +
+ (ns_info->protect_control_groups ? 1 : 0) +
+ (ns_info->protect_kernel_modules ? ELEMENTSOF(protect_kernel_modules_table) : 0) +
+ protect_home_cnt + protect_system_cnt;
+}
+
int setup_namespace(
const char* root_directory,
+ const NameSpaceInfo *ns_info,
char** read_write_paths,
char** read_only_paths,
char** inaccessible_paths,
const char* tmp_dir,
const char* var_tmp_dir,
- bool private_dev,
ProtectHome protect_home,
ProtectSystem protect_system,
unsigned long mount_flags) {
BindMount *m, *mounts = NULL;
+ bool make_slave = false;
unsigned n;
int r = 0;
if (mount_flags == 0)
mount_flags = MS_SHARED;
- if (unshare(CLONE_NEWNS) < 0)
- return -errno;
+ n = namespace_calculate_mounts(ns_info,
+ read_write_paths,
+ read_only_paths,
+ inaccessible_paths,
+ tmp_dir, var_tmp_dir,
+ protect_home, protect_system);
- n = !!tmp_dir + !!var_tmp_dir +
- strv_length(read_write_paths) +
- strv_length(read_only_paths) +
- strv_length(inaccessible_paths) +
- private_dev +
- (protect_home != PROTECT_HOME_NO ? 3 : 0) +
- (protect_system != PROTECT_SYSTEM_NO ? 2 : 0) +
- (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0);
+ /* Set mount slave mode */
+ if (root_directory || n > 0)
+ make_slave = true;
if (n > 0) {
m = mounts = (BindMount *) alloca0(n * sizeof(BindMount));
@@ -415,100 +767,127 @@ int setup_namespace(
m++;
}
- if (private_dev) {
+ if (ns_info->private_dev) {
m->path = prefix_roota(root_directory, "/dev");
m->mode = PRIVATE_DEV;
m++;
}
- if (protect_home != PROTECT_HOME_NO) {
- const char *home_dir, *run_user_dir, *root_dir;
-
- home_dir = prefix_roota(root_directory, "/home");
- home_dir = strjoina("-", home_dir);
- run_user_dir = prefix_roota(root_directory, "/run/user");
- run_user_dir = strjoina("-", run_user_dir);
- root_dir = prefix_roota(root_directory, "/root");
- root_dir = strjoina("-", root_dir);
-
- r = append_mounts(&m, STRV_MAKE(home_dir, run_user_dir, root_dir),
- protect_home == PROTECT_HOME_READ_ONLY ? READONLY : INACCESSIBLE);
+ if (ns_info->protect_kernel_tunables) {
+ r = append_protect_kernel_tunables(&m, root_directory);
if (r < 0)
return r;
}
- if (protect_system != PROTECT_SYSTEM_NO) {
- const char *usr_dir, *boot_dir, *etc_dir;
-
- usr_dir = prefix_roota(root_directory, "/usr");
- boot_dir = prefix_roota(root_directory, "/boot");
- boot_dir = strjoina("-", boot_dir);
- etc_dir = prefix_roota(root_directory, "/etc");
-
- r = append_mounts(&m, protect_system == PROTECT_SYSTEM_FULL
- ? STRV_MAKE(usr_dir, boot_dir, etc_dir)
- : STRV_MAKE(usr_dir, boot_dir), READONLY);
+ if (ns_info->protect_kernel_modules) {
+ r = append_protect_kernel_modules(&m, root_directory);
if (r < 0)
return r;
}
+ if (ns_info->protect_control_groups) {
+ m->path = prefix_roota(root_directory, "/sys/fs/cgroup");
+ m->mode = READONLY;
+ m++;
+ }
+
+ r = append_protect_home(&m, root_directory, protect_home);
+ if (r < 0)
+ return r;
+
+ r = append_protect_system(&m, root_directory, protect_system);
+ if (r < 0)
+ return r;
+
assert(mounts + n == m);
+ /* Resolve symlinks manually first, as mount() will always follow them relative to the host's
+ * root. Moreover we want to suppress duplicates based on the resolved paths. This of course is a bit
+ * racy. */
+ r = chase_all_symlinks(root_directory, mounts, &n);
+ if (r < 0)
+ goto finish;
+
qsort(mounts, n, sizeof(BindMount), mount_path_compare);
+
drop_duplicates(mounts, &n);
+ drop_outside_root(root_directory, mounts, &n);
+ drop_inaccessible(mounts, &n);
+ drop_nop(mounts, &n);
}
- if (n > 0 || root_directory) {
+ if (unshare(CLONE_NEWNS) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ if (make_slave) {
/* Remount / as SLAVE so that nothing now mounted in the namespace
shows up in the parent */
- if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
- return -errno;
+ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
+ r = -errno;
+ goto finish;
+ }
}
if (root_directory) {
- /* Turn directory into bind mount */
- if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0)
- return -errno;
+ /* Turn directory into bind mount, if it isn't one yet */
+ r = path_is_mount_point(root_directory, AT_SYMLINK_FOLLOW);
+ if (r < 0)
+ goto finish;
+ if (r == 0) {
+ if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0) {
+ r = -errno;
+ goto finish;
+ }
+ }
}
if (n > 0) {
+ char **blacklist;
+ unsigned j;
+
+ /* First round, add in all special mounts we need */
for (m = mounts; m < mounts + n; ++m) {
r = apply_mount(m, tmp_dir, var_tmp_dir);
if (r < 0)
- goto fail;
+ goto finish;
}
+ /* Create a blacklist we can pass to bind_mount_recursive() */
+ blacklist = newa(char*, n+1);
+ for (j = 0; j < n; j++)
+ blacklist[j] = (char*) mounts[j].path;
+ blacklist[j] = NULL;
+
+ /* Second round, flip the ro bits if necessary. */
for (m = mounts; m < mounts + n; ++m) {
- r = make_read_only(m);
+ r = make_read_only(m, blacklist);
if (r < 0)
- goto fail;
+ goto finish;
}
}
if (root_directory) {
/* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */
r = mount_move_root(root_directory);
-
- /* at this point, we cannot rollback */
if (r < 0)
- return r;
+ goto finish;
}
/* Remount / as the desired mode. Not that this will not
* reestablish propagation from our side to the host, since
* what's disconnected is disconnected. */
- if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0)
- /* at this point, we cannot rollback */
- return -errno;
+ if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) {
+ r = -errno;
+ goto finish;
+ }
- return 0;
+ r = 0;
-fail:
- if (n > 0) {
- for (m = mounts; m < mounts + n; ++m)
- if (m->done)
- (void) umount2(m->path, MNT_DETACH);
- }
+finish:
+ for (m = mounts; m < mounts + n; m++)
+ free(m->chased);
return r;
}
@@ -658,6 +1037,7 @@ static const char *const protect_system_table[_PROTECT_SYSTEM_MAX] = {
[PROTECT_SYSTEM_NO] = "no",
[PROTECT_SYSTEM_YES] = "yes",
[PROTECT_SYSTEM_FULL] = "full",
+ [PROTECT_SYSTEM_STRICT] = "strict",
};
DEFINE_STRING_TABLE_LOOKUP(protect_system, ProtectSystem);
diff --git a/src/core/namespace.h b/src/core/namespace.h
index 1aedf5f208..6310638e9a 100644
--- a/src/core/namespace.h
+++ b/src/core/namespace.h
@@ -4,6 +4,7 @@
This file is part of systemd.
Copyright 2010 Lennart Poettering
+ Copyright 2016 Djalal Harouni
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
@@ -19,6 +20,8 @@
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
+typedef struct NameSpaceInfo NameSpaceInfo;
+
#include <stdbool.h>
#include "macro.h"
@@ -35,17 +38,25 @@ typedef enum ProtectSystem {
PROTECT_SYSTEM_NO,
PROTECT_SYSTEM_YES,
PROTECT_SYSTEM_FULL,
+ PROTECT_SYSTEM_STRICT,
_PROTECT_SYSTEM_MAX,
_PROTECT_SYSTEM_INVALID = -1
} ProtectSystem;
+struct NameSpaceInfo {
+ bool private_dev:1;
+ bool protect_control_groups:1;
+ bool protect_kernel_tunables:1;
+ bool protect_kernel_modules:1;
+};
+
int setup_namespace(const char *chroot,
+ const NameSpaceInfo *ns_info,
char **read_write_paths,
char **read_only_paths,
char **inaccessible_paths,
const char *tmp_dir,
const char *var_tmp_dir,
- bool private_dev,
ProtectHome protect_home,
ProtectSystem protect_system,
unsigned long mount_flags);
diff --git a/src/core/org.freedesktop.systemd1.conf b/src/core/org.freedesktop.systemd1.conf
index 3c64f20872..a61677e645 100644
--- a/src/core/org.freedesktop.systemd1.conf
+++ b/src/core/org.freedesktop.systemd1.conf
@@ -54,6 +54,10 @@
<allow send_destination="org.freedesktop.systemd1"
send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetUnitByInvocationID"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
send_member="LoadUnit"/>
<allow send_destination="org.freedesktop.systemd1"
@@ -90,6 +94,10 @@
<allow send_destination="org.freedesktop.systemd1"
send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetUnitFileLinks"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
send_member="ListJobs"/>
<allow send_destination="org.freedesktop.systemd1"
@@ -108,6 +116,14 @@
send_interface="org.freedesktop.systemd1.Manager"
send_member="GetDefaultTarget"/>
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="LookupDynamicUserByName"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="LookupDynamicUserByUID"/>
+
<!-- Managed via polkit or other criteria -->
<allow send_destination="org.freedesktop.systemd1"
@@ -176,6 +192,14 @@
<allow send_destination="org.freedesktop.systemd1"
send_interface="org.freedesktop.systemd1.Manager"
+ send_member="RefUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="UnrefUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
send_member="EnableUnitFiles"/>
<allow send_destination="org.freedesktop.systemd1"
diff --git a/src/core/path.c b/src/core/path.c
index 0dd0d375d8..83f794be89 100644
--- a/src/core/path.c
+++ b/src/core/path.c
@@ -454,7 +454,7 @@ static int path_coldplug(Unit *u) {
static void path_enter_dead(Path *p, PathResult f) {
assert(p);
- if (f != PATH_SUCCESS)
+ if (p->result == PATH_SUCCESS)
p->result = f;
path_set_state(p, p->result != PATH_SUCCESS ? PATH_FAILED : PATH_DEAD);
@@ -577,6 +577,10 @@ static int path_start(Unit *u) {
return r;
}
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
path_mkdir(p);
p->result = PATH_SUCCESS;
diff --git a/src/core/scope.c b/src/core/scope.c
index b45e238974..d6e1f8e392 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -147,6 +147,32 @@ static int scope_verify(Scope *s) {
return 0;
}
+static int scope_load_init_scope(Unit *u) {
+ assert(u);
+
+ if (!unit_has_name(u, SPECIAL_INIT_SCOPE))
+ return 0;
+
+ u->transient = true;
+ u->perpetual = true;
+
+ /* init.scope is a bit special, as it has to stick around forever. Because of its special semantics we
+ * synthesize it here, instead of relying on the unit file on disk. */
+
+ u->default_dependencies = false;
+ u->ignore_on_isolate = true;
+
+ SCOPE(u)->kill_context.kill_signal = SIGRTMIN+14;
+
+ /* Prettify things, if we can. */
+ if (!u->description)
+ u->description = strdup("System and Service Manager");
+ if (!u->documentation)
+ (void) strv_extend(&u->documentation, "man:systemd(1)");
+
+ return 1;
+}
+
static int scope_load(Unit *u) {
Scope *s = SCOPE(u);
int r;
@@ -158,6 +184,9 @@ static int scope_load(Unit *u) {
/* Refuse to load non-transient scope units, but allow them while reloading. */
return -ENOENT;
+ r = scope_load_init_scope(u);
+ if (r < 0)
+ return r;
r = unit_load_fragment_and_dropin_optional(u);
if (r < 0)
return r;
@@ -221,7 +250,7 @@ static void scope_dump(Unit *u, FILE *f, const char *prefix) {
static void scope_enter_dead(Scope *s, ScopeResult f) {
assert(s);
- if (f != SCOPE_SUCCESS)
+ if (s->result == SCOPE_SUCCESS)
s->result = f;
scope_set_state(s, s->result != SCOPE_SUCCESS ? SCOPE_FAILED : SCOPE_DEAD);
@@ -233,7 +262,7 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) {
assert(s);
- if (f != SCOPE_SUCCESS)
+ if (s->result == SCOPE_SUCCESS)
s->result = f;
unit_watch_all_pids(UNIT(s));
@@ -298,6 +327,10 @@ static int scope_start(Unit *u) {
if (!u->transient && !MANAGER_IS_RELOADING(u->manager))
return -ENOENT;
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
(void) unit_realize_cgroup(u);
(void) unit_reset_cpu_usage(u);
@@ -441,7 +474,7 @@ static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
/* If the PID set is empty now, then let's finish this off
(On unified we use proper notifications) */
- if (cg_unified() <= 0 && set_isempty(u->pids))
+ if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) <= 0 && set_isempty(u->pids))
scope_notify_cgroup_empty_event(u);
}
@@ -530,34 +563,16 @@ static void scope_enumerate(Manager *m) {
u = manager_get_unit(m, SPECIAL_INIT_SCOPE);
if (!u) {
- u = unit_new(m, sizeof(Scope));
- if (!u) {
- log_oom();
- return;
- }
-
- r = unit_add_name(u, SPECIAL_INIT_SCOPE);
+ r = unit_new_for_name(m, sizeof(Scope), SPECIAL_INIT_SCOPE, &u);
if (r < 0) {
- unit_free(u);
- log_error_errno(r, "Failed to add init.scope name");
+ log_error_errno(r, "Failed to allocate the special " SPECIAL_INIT_SCOPE " unit: %m");
return;
}
}
u->transient = true;
- u->default_dependencies = false;
- u->no_gc = true;
- u->ignore_on_isolate = true;
- u->refuse_manual_start = true;
- u->refuse_manual_stop = true;
+ u->perpetual = true;
SCOPE(u)->deserialized_state = SCOPE_RUNNING;
- SCOPE(u)->kill_context.kill_signal = SIGRTMIN+14;
-
- /* Prettify things, if we can. */
- if (!u->description)
- u->description = strdup("System and Service Manager");
- if (!u->documentation)
- (void) strv_extend(&u->documentation, "man:systemd(1)");
unit_add_to_load_queue(u);
unit_add_to_dbus_queue(u);
diff --git a/src/core/selinux-access.h b/src/core/selinux-access.h
index 8f1f058a32..f46370d020 100644
--- a/src/core/selinux-access.h
+++ b/src/core/selinux-access.h
@@ -33,7 +33,7 @@ int mac_selinux_generic_access_check(sd_bus_message *message, const char *path,
#define mac_selinux_unit_access_check(unit, message, permission, error) \
({ \
- Unit *_unit = (unit); \
+ const Unit *_unit = (unit); \
mac_selinux_generic_access_check((message), _unit->source_path ?: _unit->fragment_path, (permission), (error)); \
})
diff --git a/src/core/service.c b/src/core/service.c
index afb198507b..a7274a758f 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -289,7 +289,17 @@ static void service_fd_store_unlink(ServiceFDStore *fs) {
free(fs);
}
-static void service_release_resources(Unit *u) {
+static void service_release_fd_store(Service *s) {
+ assert(s);
+
+ log_unit_debug(UNIT(s), "Releasing all stored fds");
+ while (s->fd_store)
+ service_fd_store_unlink(s->fd_store);
+
+ assert(s->n_fd_store == 0);
+}
+
+static void service_release_resources(Unit *u, bool inactive) {
Service *s = SERVICE(u);
assert(s);
@@ -297,16 +307,14 @@ static void service_release_resources(Unit *u) {
if (!s->fd_store && s->stdin_fd < 0 && s->stdout_fd < 0 && s->stderr_fd < 0)
return;
- log_unit_debug(u, "Releasing all resources.");
+ log_unit_debug(u, "Releasing resources.");
s->stdin_fd = safe_close(s->stdin_fd);
s->stdout_fd = safe_close(s->stdout_fd);
s->stderr_fd = safe_close(s->stderr_fd);
- while (s->fd_store)
- service_fd_store_unlink(s->fd_store);
-
- assert(s->n_fd_store == 0);
+ if (inactive)
+ service_release_fd_store(s);
}
static void service_done(Unit *u) {
@@ -322,6 +330,8 @@ static void service_done(Unit *u) {
s->control_command = NULL;
s->main_command = NULL;
+ dynamic_creds_unref(&s->dynamic_creds);
+
exit_status_set_free(&s->restart_prevent_status);
exit_status_set_free(&s->restart_force_status);
exit_status_set_free(&s->success_status);
@@ -340,6 +350,7 @@ static void service_done(Unit *u) {
s->bus_name_owner = mfree(s->bus_name_owner);
service_close_socket_fd(s);
+ s->peer = socket_peer_unref(s->peer);
unit_ref_unset(&s->accept_socket);
@@ -347,7 +358,7 @@ static void service_done(Unit *u) {
s->timer_event_source = sd_event_source_unref(s->timer_event_source);
- service_release_resources(u);
+ service_release_resources(u, true);
}
static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
@@ -357,6 +368,10 @@ static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *us
assert(fs);
/* If we get either EPOLLHUP or EPOLLERR, it's time to remove this entry from the fd store */
+ log_unit_debug(UNIT(fs->service),
+ "Received %s on stored fd %d (%s), closing.",
+ revents & EPOLLERR ? "EPOLLERR" : "EPOLLHUP",
+ fs->fd, strna(fs->fdname));
service_fd_store_unlink(fs);
return 0;
}
@@ -365,20 +380,23 @@ static int service_add_fd_store(Service *s, int fd, const char *name) {
ServiceFDStore *fs;
int r;
+ /* fd is always consumed if we return >= 0 */
+
assert(s);
assert(fd >= 0);
if (s->n_fd_store >= s->n_fd_store_max)
- return 0;
+ return -EXFULL; /* Our store is full.
+ * Use this errno rather than E[NM]FILE to distinguish from
+ * the case where systemd itself hits the file limit. */
LIST_FOREACH(fd_store, fs, s->fd_store) {
r = same_fd(fs->fd, fd);
if (r < 0)
return r;
if (r > 0) {
- /* Already included */
safe_close(fd);
- return 1;
+ return 0; /* fd already included */
}
}
@@ -406,7 +424,7 @@ static int service_add_fd_store(Service *s, int fd, const char *name) {
LIST_PREPEND(fd_store, s->fd_store, fs);
s->n_fd_store++;
- return 1;
+ return 1; /* fd newly stored */
}
static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
@@ -414,10 +432,7 @@ static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
assert(s);
- if (fdset_size(fds) <= 0)
- return 0;
-
- while (s->n_fd_store < s->n_fd_store_max) {
+ while (fdset_size(fds) > 0) {
_cleanup_close_ int fd = -1;
fd = fdset_steal_first(fds);
@@ -425,17 +440,17 @@ static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
break;
r = service_add_fd_store(s, fd, name);
+ if (r == -EXFULL)
+ return log_unit_warning_errno(UNIT(s), r,
+ "Cannot store more fds than FileDescriptorStoreMax=%u, closing remaining.",
+ s->n_fd_store_max);
if (r < 0)
- return log_unit_error_errno(UNIT(s), r, "Couldn't add fd to fd store: %m");
- if (r > 0) {
- log_unit_debug(UNIT(s), "Added fd to fd store.");
- fd = -1;
- }
+ return log_unit_error_errno(UNIT(s), r, "Failed to add fd to store: %m");
+ if (r > 0)
+ log_unit_debug(UNIT(s), "Added fd %u (%s) to fd store.", fd, strna(name));
+ fd = -1;
}
- if (fdset_size(fds) > 0)
- log_unit_warning(UNIT(s), "Tried to store more fds than FileDescriptorStoreMax=%u allows, closing remaining.", s->n_fd_store_max);
-
return 0;
}
@@ -758,6 +773,11 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) {
prefix, s->bus_name,
prefix, yes_no(s->bus_name_good));
+ if (UNIT_ISSET(s->accept_socket))
+ fprintf(f,
+ "%sAccept Socket: %s\n",
+ prefix, UNIT_DEREF(s->accept_socket)->id);
+
kill_context_dump(&s->kill_context, f, prefix);
exec_context_dump(&s->exec_context, f, prefix);
@@ -1030,6 +1050,23 @@ static int service_coldplug(Unit *u) {
if (IN_SET(s->deserialized_state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD))
service_start_watchdog(s);
+ if (!IN_SET(s->deserialized_state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART))
+ (void) unit_setup_dynamic_creds(u);
+
+ if (UNIT_ISSET(s->accept_socket)) {
+ Socket* socket = SOCKET(UNIT_DEREF(s->accept_socket));
+
+ if (socket->max_connections_per_source > 0) {
+ SocketPeer *peer;
+
+ /* Make a best-effort attempt at bumping the connection count */
+ if (socket_acquire_peer(socket, s->socket_fd, &peer) > 0) {
+ socket_peer_unref(s->peer);
+ s->peer = peer;
+ }
+ }
+ }
+
service_set_state(s, s->deserialized_state);
return 0;
}
@@ -1146,11 +1183,7 @@ static int service_spawn(
Service *s,
ExecCommand *c,
usec_t timeout,
- bool pass_fds,
- bool apply_permissions,
- bool apply_chroot,
- bool apply_tty_stdin,
- bool is_control,
+ ExecFlags flags,
pid_t *_pid) {
_cleanup_strv_free_ char **argv = NULL, **final_env = NULL, **our_env = NULL, **fd_names = NULL;
@@ -1160,12 +1193,10 @@ static int service_spawn(
pid_t pid;
ExecParameters exec_params = {
- .apply_permissions = apply_permissions,
- .apply_chroot = apply_chroot,
- .apply_tty_stdin = apply_tty_stdin,
- .stdin_fd = -1,
- .stdout_fd = -1,
- .stderr_fd = -1,
+ .flags = flags,
+ .stdin_fd = -1,
+ .stdout_fd = -1,
+ .stderr_fd = -1,
};
int r;
@@ -1174,6 +1205,14 @@ static int service_spawn(
assert(c);
assert(_pid);
+ if (flags & EXEC_IS_CONTROL) {
+ /* If this is a control process, mask the permissions/chroot application if this is requested. */
+ if (s->permissions_start_only)
+ exec_params.flags &= ~EXEC_APPLY_PERMISSIONS;
+ if (s->root_directory_start_only)
+ exec_params.flags &= ~EXEC_APPLY_CHROOT;
+ }
+
(void) unit_realize_cgroup(UNIT(s));
if (s->reset_cpu_usage) {
(void) unit_reset_cpu_usage(UNIT(s));
@@ -1184,7 +1223,11 @@ static int service_spawn(
if (r < 0)
return r;
- if (pass_fds ||
+ r = unit_setup_dynamic_creds(UNIT(s));
+ if (r < 0)
+ return r;
+
+ if ((flags & EXEC_PASS_FDS) ||
s->exec_context.std_input == EXEC_INPUT_SOCKET ||
s->exec_context.std_output == EXEC_OUTPUT_SOCKET ||
s->exec_context.std_error == EXEC_OUTPUT_SOCKET) {
@@ -1194,6 +1237,7 @@ static int service_spawn(
return r;
n_fds = r;
+ log_unit_debug(UNIT(s), "Passing %i fds to service", n_fds);
}
r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), timeout));
@@ -1204,11 +1248,11 @@ static int service_spawn(
if (r < 0)
return r;
- our_env = new0(char*, 6);
+ our_env = new0(char*, 9);
if (!our_env)
return -ENOMEM;
- if (is_control ? s->notify_access == NOTIFY_ALL : s->notify_access != NOTIFY_NONE)
+ if ((flags & EXEC_IS_CONTROL) ? s->notify_access == NOTIFY_ALL : s->notify_access != NOTIFY_NONE)
if (asprintf(our_env + n_env++, "NOTIFY_SOCKET=%s", UNIT(s)->manager->notify_socket) < 0)
return -ENOMEM;
@@ -1216,7 +1260,7 @@ static int service_spawn(
if (asprintf(our_env + n_env++, "MAINPID="PID_FMT, s->main_pid) < 0)
return -ENOMEM;
- if (!MANAGER_IS_SYSTEM(UNIT(s)->manager))
+ if (MANAGER_IS_USER(UNIT(s)->manager))
if (asprintf(our_env + n_env++, "MANAGERPID="PID_FMT, getpid()) < 0)
return -ENOMEM;
@@ -1225,10 +1269,16 @@ static int service_spawn(
socklen_t salen = sizeof(sa);
r = getpeername(s->socket_fd, &sa.sa, &salen);
- if (r < 0)
- return -errno;
+ if (r < 0) {
+ r = -errno;
- if (IN_SET(sa.sa.sa_family, AF_INET, AF_INET6)) {
+ /* ENOTCONN is legitimate if the endpoint disappeared on shutdown.
+ * This connection is over, but the socket unit lives on. */
+ if (r != -ENOTCONN || !IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST))
+ return r;
+ }
+
+ if (r == 0 && IN_SET(sa.sa.sa_family, AF_INET, AF_INET6)) {
_cleanup_free_ char *addr = NULL;
char *t;
int port;
@@ -1252,22 +1302,40 @@ static int service_spawn(
}
}
+ if (flags & EXEC_SETENV_RESULT) {
+ if (asprintf(our_env + n_env++, "SERVICE_RESULT=%s", service_result_to_string(s->result)) < 0)
+ return -ENOMEM;
+
+ if (s->main_exec_status.pid > 0 &&
+ dual_timestamp_is_set(&s->main_exec_status.exit_timestamp)) {
+ if (asprintf(our_env + n_env++, "EXIT_CODE=%s", sigchld_code_to_string(s->main_exec_status.code)) < 0)
+ return -ENOMEM;
+
+ if (s->main_exec_status.code == CLD_EXITED)
+ r = asprintf(our_env + n_env++, "EXIT_STATUS=%i", s->main_exec_status.status);
+ else
+ r = asprintf(our_env + n_env++, "EXIT_STATUS=%s", signal_to_string(s->main_exec_status.status));
+ if (r < 0)
+ return -ENOMEM;
+ }
+ }
+
final_env = strv_env_merge(2, UNIT(s)->manager->environment, our_env, NULL);
if (!final_env)
return -ENOMEM;
- if (is_control && UNIT(s)->cgroup_path) {
+ if ((flags & EXEC_IS_CONTROL) && UNIT(s)->cgroup_path) {
path = strjoina(UNIT(s)->cgroup_path, "/control");
(void) cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
} else
path = UNIT(s)->cgroup_path;
exec_params.argv = argv;
+ exec_params.environment = final_env;
exec_params.fds = fds;
exec_params.fd_names = fd_names;
exec_params.n_fds = n_fds;
- exec_params.environment = final_env;
- exec_params.confirm_spawn = UNIT(s)->manager->confirm_spawn;
+ exec_params.flags |= UNIT(s)->manager->confirm_spawn ? EXEC_CONFIRM_SPAWN : 0;
exec_params.cgroup_supported = UNIT(s)->manager->cgroup_supported;
exec_params.cgroup_path = path;
exec_params.cgroup_delegate = s->cgroup_context.delegate;
@@ -1285,6 +1353,7 @@ static int service_spawn(
&s->exec_context,
&exec_params,
s->exec_runtime,
+ &s->dynamic_creds,
&pid);
if (r < 0)
return r;
@@ -1392,14 +1461,14 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
int r;
assert(s);
- if (f != SERVICE_SUCCESS)
+ if (s->result == SERVICE_SUCCESS)
s->result = f;
service_set_state(s, s->result != SERVICE_SUCCESS ? SERVICE_FAILED : SERVICE_DEAD);
if (s->result != SERVICE_SUCCESS) {
log_unit_warning(UNIT(s), "Failed with result '%s'.", service_result_to_string(s->result));
- failure_action(UNIT(s)->manager, s->failure_action, UNIT(s)->reboot_arg);
+ emergency_action(UNIT(s)->manager, s->emergency_action, UNIT(s)->reboot_arg, "service failed");
}
if (allow_restart && service_shall_restart(s)) {
@@ -1418,9 +1487,15 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
exec_runtime_destroy(s->exec_runtime);
s->exec_runtime = exec_runtime_unref(s->exec_runtime);
- /* Also, remove the runtime directory in */
+ /* Also, remove the runtime directory */
exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager));
+ /* Get rid of the IPC bits of the user */
+ unit_unref_uid_gid(UNIT(s), true);
+
+ /* Release the user, and destroy it if we are the only remaining owner */
+ dynamic_creds_destroy(&s->dynamic_creds);
+
/* Try to delete the pid file. At this point it will be
* out-of-date, and some software might be confused by it, so
* let's remove it. */
@@ -1438,7 +1513,7 @@ static void service_enter_stop_post(Service *s, ServiceResult f) {
int r;
assert(s);
- if (f != SERVICE_SUCCESS)
+ if (s->result == SERVICE_SUCCESS)
s->result = f;
service_unwatch_control_pid(s);
@@ -1451,11 +1526,7 @@ static void service_enter_stop_post(Service *s, ServiceResult f) {
r = service_spawn(s,
s->control_command,
s->timeout_stop_usec,
- false,
- !s->permissions_start_only,
- !s->root_directory_start_only,
- true,
- true,
+ EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_IS_CONTROL|EXEC_SETENV_RESULT,
&s->control_pid);
if (r < 0)
goto fail;
@@ -1495,7 +1566,7 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f
assert(s);
- if (f != SERVICE_SUCCESS)
+ if (s->result == SERVICE_SUCCESS)
s->result = f;
unit_watch_all_pids(UNIT(s));
@@ -1553,7 +1624,7 @@ static void service_enter_stop(Service *s, ServiceResult f) {
assert(s);
- if (f != SERVICE_SUCCESS)
+ if (s->result == SERVICE_SUCCESS)
s->result = f;
service_unwatch_control_pid(s);
@@ -1566,11 +1637,7 @@ static void service_enter_stop(Service *s, ServiceResult f) {
r = service_spawn(s,
s->control_command,
s->timeout_stop_usec,
- false,
- !s->permissions_start_only,
- !s->root_directory_start_only,
- false,
- true,
+ EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_SETENV_RESULT,
&s->control_pid);
if (r < 0)
goto fail;
@@ -1609,7 +1676,7 @@ static bool service_good(Service *s) {
static void service_enter_running(Service *s, ServiceResult f) {
assert(s);
- if (f != SERVICE_SUCCESS)
+ if (s->result == SERVICE_SUCCESS)
s->result = f;
service_unwatch_control_pid(s);
@@ -1647,11 +1714,7 @@ static void service_enter_start_post(Service *s) {
r = service_spawn(s,
s->control_command,
s->timeout_start_usec,
- false,
- !s->permissions_start_only,
- !s->root_directory_start_only,
- false,
- true,
+ EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL,
&s->control_pid);
if (r < 0)
goto fail;
@@ -1706,7 +1769,15 @@ static void service_enter_start(Service *s) {
}
if (!c) {
- assert(s->type == SERVICE_ONESHOT);
+ if (s->type != SERVICE_ONESHOT) {
+ /* There's no command line configured for the main command? Hmm, that is strange. This can only
+ * happen if the configuration changes at runtime. In this case, let's enter a failure
+ * state. */
+ log_unit_error(UNIT(s), "There's no 'start' task anymore we could start: %m");
+ r = -ENXIO;
+ goto fail;
+ }
+
service_enter_start_post(s);
return;
}
@@ -1721,11 +1792,7 @@ static void service_enter_start(Service *s) {
r = service_spawn(s,
c,
timeout,
- true,
- true,
- true,
- true,
- false,
+ EXEC_PASS_FDS|EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_SET_WATCHDOG,
&pid);
if (r < 0)
goto fail;
@@ -1784,11 +1851,7 @@ static void service_enter_start_pre(Service *s) {
r = service_spawn(s,
s->control_command,
s->timeout_start_usec,
- false,
- !s->permissions_start_only,
- !s->root_directory_start_only,
- true,
- true,
+ EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_APPLY_TTY_STDIN,
&s->control_pid);
if (r < 0)
goto fail;
@@ -1863,11 +1926,7 @@ static void service_enter_reload(Service *s) {
r = service_spawn(s,
s->control_command,
s->timeout_start_usec,
- false,
- !s->permissions_start_only,
- !s->root_directory_start_only,
- false,
- true,
+ EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL,
&s->control_pid);
if (r < 0)
goto fail;
@@ -1905,12 +1964,9 @@ static void service_run_next_control(Service *s) {
r = service_spawn(s,
s->control_command,
timeout,
- false,
- !s->permissions_start_only,
- !s->root_directory_start_only,
- s->control_command_id == SERVICE_EXEC_START_PRE ||
- s->control_command_id == SERVICE_EXEC_STOP_POST,
- true,
+ EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|
+ (IN_SET(s->control_command_id, SERVICE_EXEC_START_PRE, SERVICE_EXEC_STOP_POST) ? EXEC_APPLY_TTY_STDIN : 0)|
+ (IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_SETENV_RESULT : 0),
&s->control_pid);
if (r < 0)
goto fail;
@@ -1948,11 +2004,7 @@ static void service_run_next_main(Service *s) {
r = service_spawn(s,
s->main_command,
s->timeout_start_usec,
- true,
- true,
- true,
- true,
- false,
+ EXEC_PASS_FDS|EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_SET_WATCHDOG,
&pid);
if (r < 0)
goto fail;
@@ -2002,6 +2054,10 @@ static int service_start(Unit *u) {
return r;
}
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
s->result = SERVICE_SUCCESS;
s->reload_result = SERVICE_SUCCESS;
s->main_pid_known = false;
@@ -2116,6 +2172,12 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) {
if (r < 0)
return r;
+ if (UNIT_ISSET(s->accept_socket)) {
+ r = unit_serialize_item(u, f, "accept-socket", UNIT_DEREF(s->accept_socket)->id);
+ if (r < 0)
+ return r;
+ }
+
r = unit_serialize_item_fd(u, f, fds, "socket-fd", s->socket_fd);
if (r < 0)
return r;
@@ -2246,6 +2308,17 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
s->control_command_id = id;
s->control_command = s->exec_command[id];
}
+ } else if (streq(key, "accept-socket")) {
+ Unit *socket;
+
+ r = manager_load_unit(u->manager, value, NULL, NULL, &socket);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to load accept-socket unit: %s", value);
+ else {
+ unit_ref_set(&s->accept_socket, socket);
+ SOCKET(socket)->n_connections++;
+ }
+
} else if (streq(key, "socket-fd")) {
int fd;
@@ -2276,7 +2349,7 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
r = service_add_fd_store(s, fd, t);
if (r < 0)
log_unit_error_errno(u, r, "Failed to add fd to store: %m");
- else if (r > 0)
+ else
fdset_remove(fds, fd);
}
@@ -2552,8 +2625,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
assert(s);
assert(pid >= 0);
- if (UNIT(s)->fragment_path ? is_clean_exit(code, status, &s->success_status) :
- is_clean_exit_lsb(code, status, &s->success_status))
+ if (is_clean_exit(code, status, s->type == SERVICE_ONESHOT ? EXIT_CLEAN_COMMAND : EXIT_CLEAN_DAEMON, &s->success_status))
f = SERVICE_SUCCESS;
else if (code == CLD_EXITED)
f = SERVICE_FAILURE_EXIT_CODE;
@@ -2595,7 +2667,14 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
f = SERVICE_SUCCESS;
}
- log_struct(f == SERVICE_SUCCESS ? LOG_DEBUG : LOG_NOTICE,
+ /* When this is a successful exit, let's log about the exit code on DEBUG level. If this is a failure
+ * and the process exited on its own via exit(), then let's make this a NOTICE, under the assumption
+ * that the service already logged the reason at a higher log level on its own. However, if the service
+ * died due to a signal, then it most likely didn't say anything about any reason, hence let's raise
+ * our log level to WARNING then. */
+
+ log_struct(f == SERVICE_SUCCESS ? LOG_DEBUG :
+ (code == CLD_EXITED ? LOG_NOTICE : LOG_WARNING),
LOG_UNIT_ID(u),
LOG_UNIT_MESSAGE(u, "Main process exited, code=%s, status=%i/%s",
sigchld_code_to_string(code), status,
@@ -2606,7 +2685,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
"EXIT_STATUS=%i", status,
NULL);
- if (f != SERVICE_SUCCESS)
+ if (s->result == SERVICE_SUCCESS)
s->result = f;
if (s->main_command &&
@@ -2687,7 +2766,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
"Control process exited, code=%s status=%i",
sigchld_code_to_string(code), status);
- if (f != SERVICE_SUCCESS)
+ if (s->result == SERVICE_SUCCESS)
s->result = f;
/* Immediately get rid of the cgroup, so that the
@@ -2827,7 +2906,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
/* If the PID set is empty now, then let's finish this off
(On unified we use proper notifications) */
- if (cg_unified() <= 0 && set_isempty(u->pids))
+ if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) <= 0 && set_isempty(u->pids))
service_notify_cgroup_empty_event(u);
}
@@ -3037,9 +3116,7 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags, FDSet *fds)
if (!streq_ptr(s->status_text, t)) {
- free(s->status_text);
- s->status_text = t;
- t = NULL;
+ free_and_replace(s->status_text, t);
notify_dbus = true;
}
@@ -3323,6 +3400,7 @@ const UnitVTable service_vtable = {
.cgroup_context_offset = offsetof(Service, cgroup_context),
.kill_context_offset = offsetof(Service, kill_context),
.exec_runtime_offset = offsetof(Service, exec_runtime),
+ .dynamic_creds_offset = offsetof(Service, dynamic_creds),
.sections =
"Unit\0"
diff --git a/src/core/service.h b/src/core/service.h
index cfef375b03..2869144fcb 100644
--- a/src/core/service.h
+++ b/src/core/service.h
@@ -148,9 +148,11 @@ struct Service {
/* Runtime data of the execution context */
ExecRuntime *exec_runtime;
+ DynamicCreds dynamic_creds;
pid_t main_pid, control_pid;
int socket_fd;
+ SocketPeer *peer;
bool socket_fd_selinux_context_net;
bool permissions_start_only;
@@ -176,7 +178,7 @@ struct Service {
char *status_text;
int status_errno;
- FailureAction failure_action;
+ EmergencyAction emergency_action;
UnitRef accept_socket;
diff --git a/src/core/show-status.c b/src/core/show-status.c
index 59ebdc7219..65f9cb888a 100644
--- a/src/core/show-status.c
+++ b/src/core/show-status.c
@@ -61,6 +61,11 @@ int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char
if (vasprintf(&s, format, ap) < 0)
return log_oom();
+ /* Before you ask: yes, on purpose we open/close the console for each status line we write individually. This
+ * is a good strategy to avoid PID 1 getting killed by the kernel's SAK concept (it doesn't fix this entirely,
+ * but minimizes the time window the kernel might end up killing PID 1 due to SAK). It also makes things easier
+ * for us so that we don't have to recover from hangups and suchlike triggered on the console. */
+
fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
if (fd < 0)
return fd;
diff --git a/src/core/slice.c b/src/core/slice.c
index c7700b8857..ed5d3fd701 100644
--- a/src/core/slice.c
+++ b/src/core/slice.c
@@ -130,6 +130,28 @@ static int slice_verify(Slice *s) {
return 0;
}
+static int slice_load_root_slice(Unit *u) {
+ assert(u);
+
+ if (!unit_has_name(u, SPECIAL_ROOT_SLICE))
+ return 0;
+
+ u->perpetual = true;
+
+ /* The root slice is a bit special. For example it is always running and cannot be terminated. Because of its
+ * special semantics we synthesize it here, instead of relying on the unit file on disk. */
+
+ u->default_dependencies = false;
+ u->ignore_on_isolate = true;
+
+ if (!u->description)
+ u->description = strdup("Root Slice");
+ if (!u->documentation)
+ u->documentation = strv_new("man:systemd.special(7)", NULL);
+
+ return 1;
+}
+
static int slice_load(Unit *u) {
Slice *s = SLICE(u);
int r;
@@ -137,6 +159,9 @@ static int slice_load(Unit *u) {
assert(s);
assert(u->load_state == UNIT_STUB);
+ r = slice_load_root_slice(u);
+ if (r < 0)
+ return r;
r = unit_load_fragment_and_dropin_optional(u);
if (r < 0)
return r;
@@ -187,10 +212,15 @@ static void slice_dump(Unit *u, FILE *f, const char *prefix) {
static int slice_start(Unit *u) {
Slice *t = SLICE(u);
+ int r;
assert(t);
assert(t->state == SLICE_DEAD);
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
(void) unit_realize_cgroup(u);
(void) unit_reset_cpu_usage(u);
@@ -269,32 +299,16 @@ static void slice_enumerate(Manager *m) {
u = manager_get_unit(m, SPECIAL_ROOT_SLICE);
if (!u) {
- u = unit_new(m, sizeof(Slice));
- if (!u) {
- log_oom();
- return;
- }
-
- r = unit_add_name(u, SPECIAL_ROOT_SLICE);
+ r = unit_new_for_name(m, sizeof(Slice), SPECIAL_ROOT_SLICE, &u);
if (r < 0) {
- unit_free(u);
- log_error_errno(r, "Failed to add -.slice name");
+ log_error_errno(r, "Failed to allocate the special " SPECIAL_ROOT_SLICE " unit: %m");
return;
}
}
- u->default_dependencies = false;
- u->no_gc = true;
- u->ignore_on_isolate = true;
- u->refuse_manual_start = true;
- u->refuse_manual_stop = true;
+ u->perpetual = true;
SLICE(u)->deserialized_state = SLICE_ACTIVE;
- if (!u->description)
- u->description = strdup("Root Slice");
- if (!u->documentation)
- (void) strv_extend(&u->documentation, "man:systemd.special(7)");
-
unit_add_to_load_queue(u);
unit_add_to_dbus_queue(u);
}
diff --git a/src/core/socket.c b/src/core/socket.c
index e098055885..0b1c4acfec 100644
--- a/src/core/socket.c
+++ b/src/core/socket.c
@@ -57,6 +57,14 @@
#include "unit-printf.h"
#include "unit.h"
#include "user-util.h"
+#include "in-addr-util.h"
+
+struct SocketPeer {
+ unsigned n_ref;
+
+ Socket *socket;
+ union sockaddr_union peer;
+};
static const UnitActiveState state_translation_table[_SOCKET_STATE_MAX] = {
[SOCKET_DEAD] = UNIT_INACTIVE,
@@ -141,15 +149,23 @@ void socket_free_ports(Socket *s) {
static void socket_done(Unit *u) {
Socket *s = SOCKET(u);
+ SocketPeer *p;
assert(s);
socket_free_ports(s);
+ while ((p = set_steal_first(s->peers_by_address)))
+ p->socket = NULL;
+
+ s->peers_by_address = set_free(s->peers_by_address);
+
s->exec_runtime = exec_runtime_unref(s->exec_runtime);
exec_command_free_array(s->exec_command, _SOCKET_EXEC_COMMAND_MAX);
s->control_command = NULL;
+ dynamic_creds_unref(&s->dynamic_creds);
+
socket_unwatch_control_pid(s);
unit_ref_unset(&s->service);
@@ -466,6 +482,40 @@ static int socket_verify(Socket *s) {
return 0;
}
+static void peer_address_hash_func(const void *p, struct siphash *state) {
+ const SocketPeer *s = p;
+
+ assert(s);
+ assert(IN_SET(s->peer.sa.sa_family, AF_INET, AF_INET6));
+
+ if (s->peer.sa.sa_family == AF_INET)
+ siphash24_compress(&s->peer.in.sin_addr, sizeof(s->peer.in.sin_addr), state);
+ else
+ siphash24_compress(&s->peer.in6.sin6_addr, sizeof(s->peer.in6.sin6_addr), state);
+}
+
+static int peer_address_compare_func(const void *a, const void *b) {
+ const SocketPeer *x = a, *y = b;
+
+ if (x->peer.sa.sa_family < y->peer.sa.sa_family)
+ return -1;
+ if (x->peer.sa.sa_family > y->peer.sa.sa_family)
+ return 1;
+
+ switch(x->peer.sa.sa_family) {
+ case AF_INET:
+ return memcmp(&x->peer.in.sin_addr, &y->peer.in.sin_addr, sizeof(x->peer.in.sin_addr));
+ case AF_INET6:
+ return memcmp(&x->peer.in6.sin6_addr, &y->peer.in6.sin6_addr, sizeof(x->peer.in6.sin6_addr));
+ }
+ assert_not_reached("Black sheep in the family!");
+}
+
+const struct hash_ops peer_address_hash_ops = {
+ .hash = peer_address_hash_func,
+ .compare = peer_address_compare_func
+};
+
static int socket_load(Unit *u) {
Socket *s = SOCKET(u);
int r;
@@ -473,6 +523,10 @@ static int socket_load(Unit *u) {
assert(u);
assert(u->load_state == UNIT_STUB);
+ r = set_ensure_allocated(&s->peers_by_address, &peer_address_hash_ops);
+ if (r < 0)
+ return r;
+
r = unit_load_fragment_and_dropin(u);
if (r < 0)
return r;
@@ -487,6 +541,87 @@ static int socket_load(Unit *u) {
return socket_verify(s);
}
+static SocketPeer *socket_peer_new(void) {
+ SocketPeer *p;
+
+ p = new0(SocketPeer, 1);
+ if (!p)
+ return NULL;
+
+ p->n_ref = 1;
+
+ return p;
+}
+
+SocketPeer *socket_peer_ref(SocketPeer *p) {
+ if (!p)
+ return NULL;
+
+ assert(p->n_ref > 0);
+ p->n_ref++;
+
+ return p;
+}
+
+SocketPeer *socket_peer_unref(SocketPeer *p) {
+ if (!p)
+ return NULL;
+
+ assert(p->n_ref > 0);
+
+ p->n_ref--;
+
+ if (p->n_ref > 0)
+ return NULL;
+
+ if (p->socket)
+ set_remove(p->socket->peers_by_address, p);
+
+ return mfree(p);
+}
+
+int socket_acquire_peer(Socket *s, int fd, SocketPeer **p) {
+ _cleanup_(socket_peer_unrefp) SocketPeer *remote = NULL;
+ SocketPeer sa = {}, *i;
+ socklen_t salen = sizeof(sa.peer);
+ int r;
+
+ assert(fd >= 0);
+ assert(s);
+
+ r = getpeername(fd, &sa.peer.sa, &salen);
+ if (r < 0)
+ return log_error_errno(errno, "getpeername failed: %m");
+
+ if (!IN_SET(sa.peer.sa.sa_family, AF_INET, AF_INET6)) {
+ *p = NULL;
+ return 0;
+ }
+
+ i = set_get(s->peers_by_address, &sa);
+ if (i) {
+ *p = socket_peer_ref(i);
+ return 1;
+ }
+
+ remote = socket_peer_new();
+ if (!remote)
+ return log_oom();
+
+ remote->peer = sa.peer;
+
+ r = set_put(s->peers_by_address, remote);
+ if (r < 0)
+ return r;
+
+ remote->socket = s;
+
+ *p = remote;
+ remote = NULL;
+
+ return 1;
+}
+
_const_ static const char* listen_lookup(int family, int type) {
if (family == AF_NETLINK)
@@ -1199,14 +1334,9 @@ static int usbffs_select_ep(const struct dirent *d) {
static int usbffs_dispatch_eps(SocketPort *p) {
_cleanup_free_ struct dirent **ent = NULL;
- _cleanup_free_ char *path = NULL;
int r, i, n, k;
- path = dirname_malloc(p->path);
- if (!path)
- return -ENOMEM;
-
- r = scandir(path, &ent, usbffs_select_ep, alphasort);
+ r = scandir(p->path, &ent, usbffs_select_ep, alphasort);
if (r < 0)
return -errno;
@@ -1221,7 +1351,7 @@ static int usbffs_dispatch_eps(SocketPort *p) {
for (i = 0; i < n; ++i) {
_cleanup_free_ char *ep = NULL;
- ep = path_make_absolute(ent[i]->d_name, path);
+ ep = path_make_absolute(ent[i]->d_name, p->path);
if (!ep)
return -ENOMEM;
@@ -1602,6 +1732,9 @@ static int socket_coldplug(Unit *u) {
return r;
}
+ if (!IN_SET(s->deserialized_state, SOCKET_DEAD, SOCKET_FAILED))
+ (void) unit_setup_dynamic_creds(u);
+
socket_set_state(s, s->deserialized_state);
return 0;
}
@@ -1611,12 +1744,10 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
pid_t pid;
int r;
ExecParameters exec_params = {
- .apply_permissions = true,
- .apply_chroot = true,
- .apply_tty_stdin = true,
- .stdin_fd = -1,
- .stdout_fd = -1,
- .stderr_fd = -1,
+ .flags = EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
+ .stdin_fd = -1,
+ .stdout_fd = -1,
+ .stderr_fd = -1,
};
assert(s);
@@ -1633,6 +1764,10 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
if (r < 0)
return r;
+ r = unit_setup_dynamic_creds(UNIT(s));
+ if (r < 0)
+ return r;
+
r = socket_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_usec));
if (r < 0)
return r;
@@ -1643,7 +1778,7 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
exec_params.argv = argv;
exec_params.environment = UNIT(s)->manager->environment;
- exec_params.confirm_spawn = UNIT(s)->manager->confirm_spawn;
+ exec_params.flags |= UNIT(s)->manager->confirm_spawn ? EXEC_CONFIRM_SPAWN : 0;
exec_params.cgroup_supported = UNIT(s)->manager->cgroup_supported;
exec_params.cgroup_path = UNIT(s)->cgroup_path;
exec_params.cgroup_delegate = s->cgroup_context.delegate;
@@ -1654,6 +1789,7 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
&s->exec_context,
&exec_params,
s->exec_runtime,
+ &s->dynamic_creds,
&pid);
if (r < 0)
return r;
@@ -1754,15 +1890,19 @@ fail:
static void socket_enter_dead(Socket *s, SocketResult f) {
assert(s);
- if (f != SOCKET_SUCCESS)
+ if (s->result == SOCKET_SUCCESS)
s->result = f;
+ socket_set_state(s, s->result != SOCKET_SUCCESS ? SOCKET_FAILED : SOCKET_DEAD);
+
exec_runtime_destroy(s->exec_runtime);
s->exec_runtime = exec_runtime_unref(s->exec_runtime);
exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager));
- socket_set_state(s, s->result != SOCKET_SUCCESS ? SOCKET_FAILED : SOCKET_DEAD);
+ unit_unref_uid_gid(UNIT(s), true);
+
+ dynamic_creds_destroy(&s->dynamic_creds);
}
static void socket_enter_signal(Socket *s, SocketState state, SocketResult f);
@@ -1771,7 +1911,7 @@ static void socket_enter_stop_post(Socket *s, SocketResult f) {
int r;
assert(s);
- if (f != SOCKET_SUCCESS)
+ if (s->result == SOCKET_SUCCESS)
s->result = f;
socket_unwatch_control_pid(s);
@@ -1799,7 +1939,7 @@ static void socket_enter_signal(Socket *s, SocketState state, SocketResult f) {
assert(s);
- if (f != SOCKET_SUCCESS)
+ if (s->result == SOCKET_SUCCESS)
s->result = f;
r = unit_kill_context(
@@ -1843,7 +1983,7 @@ static void socket_enter_stop_pre(Socket *s, SocketResult f) {
int r;
assert(s);
- if (f != SOCKET_SUCCESS)
+ if (s->result == SOCKET_SUCCESS)
s->result = f;
socket_unwatch_control_pid(s);
@@ -2038,14 +2178,34 @@ static void socket_enter_running(Socket *s, int cfd) {
socket_set_state(s, SOCKET_RUNNING);
} else {
_cleanup_free_ char *prefix = NULL, *instance = NULL, *name = NULL;
+ _cleanup_(socket_peer_unrefp) SocketPeer *p = NULL;
Service *service;
if (s->n_connections >= s->max_connections) {
- log_unit_warning(UNIT(s), "Too many incoming connections (%u), refusing connection attempt.", s->n_connections);
+ log_unit_warning(UNIT(s), "Too many incoming connections (%u), dropping connection.",
+ s->n_connections);
safe_close(cfd);
return;
}
+ if (s->max_connections_per_source > 0) {
+ r = socket_acquire_peer(s, cfd, &p);
+ if (r < 0) {
+ safe_close(cfd);
+ return;
+ } else if (r > 0 && p->n_ref > s->max_connections_per_source) {
+ _cleanup_free_ char *t = NULL;
+
+ sockaddr_pretty(&p->peer.sa, FAMILY_ADDRESS_SIZE(p->peer.sa.sa_family), true, false, &t);
+
+ log_unit_warning(UNIT(s),
+ "Too many incoming connections (%u) from source %s, dropping connection.",
+ p->n_ref, strnull(t));
+ safe_close(cfd);
+ return;
+ }
+ }
+
r = socket_instantiate_service(s);
if (r < 0)
goto fail;
@@ -2087,6 +2247,9 @@ static void socket_enter_running(Socket *s, int cfd) {
cfd = -1; /* We passed ownership of the fd to the service now. Forget it here. */
s->n_connections++;
+ service->peer = p; /* Pass ownership of the peer reference */
+ p = NULL;
+
r = manager_add_job(UNIT(s)->manager, JOB_START, UNIT(service), JOB_REPLACE, &error, NULL);
if (r < 0) {
/* We failed to activate the new service, but it still exists. Let's make sure the service
@@ -2191,11 +2354,14 @@ static int socket_start(Unit *u) {
return r;
}
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
s->result = SOCKET_SUCCESS;
s->reset_cpu_usage = true;
socket_enter_start_pre(s);
-
return 1;
}
@@ -2286,6 +2452,11 @@ static int socket_serialize(Unit *u, FILE *f, FDSet *fds) {
return 0;
}
+static void socket_port_take_fd(SocketPort *p, FDSet *fds, int fd) {
+ safe_close(p->fd);
+ p->fd = fdset_remove(fds, fd);
+}
+
static int socket_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
Socket *s = SOCKET(u);
@@ -2340,18 +2511,13 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value,
if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd))
log_unit_debug(u, "Failed to parse fifo value: %s", value);
- else {
-
+ else
LIST_FOREACH(port, p, s->ports)
if (p->type == SOCKET_FIFO &&
- path_equal_or_files_same(p->path, value+skip))
+ path_equal_or_files_same(p->path, value+skip)) {
+ socket_port_take_fd(p, fds, fd);
break;
-
- if (p) {
- safe_close(p->fd);
- p->fd = fdset_remove(fds, fd);
- }
- }
+ }
} else if (streq(key, "special")) {
int fd, skip = 0;
@@ -2359,18 +2525,13 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value,
if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd))
log_unit_debug(u, "Failed to parse special value: %s", value);
- else {
-
+ else
LIST_FOREACH(port, p, s->ports)
if (p->type == SOCKET_SPECIAL &&
- path_equal_or_files_same(p->path, value+skip))
+ path_equal_or_files_same(p->path, value+skip)) {
+ socket_port_take_fd(p, fds, fd);
break;
-
- if (p) {
- safe_close(p->fd);
- p->fd = fdset_remove(fds, fd);
- }
- }
+ }
} else if (streq(key, "mqueue")) {
int fd, skip = 0;
@@ -2378,18 +2539,13 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value,
if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd))
log_unit_debug(u, "Failed to parse mqueue value: %s", value);
- else {
-
+ else
LIST_FOREACH(port, p, s->ports)
if (p->type == SOCKET_MQUEUE &&
- streq(p->path, value+skip))
+ streq(p->path, value+skip)) {
+ socket_port_take_fd(p, fds, fd);
break;
-
- if (p) {
- safe_close(p->fd);
- p->fd = fdset_remove(fds, fd);
- }
- }
+ }
} else if (streq(key, "socket")) {
int fd, type, skip = 0;
@@ -2397,17 +2553,12 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value,
if (sscanf(value, "%i %i %n", &fd, &type, &skip) < 2 || fd < 0 || type < 0 || !fdset_contains(fds, fd))
log_unit_debug(u, "Failed to parse socket value: %s", value);
- else {
-
+ else
LIST_FOREACH(port, p, s->ports)
- if (socket_address_is(&p->address, value+skip, type))
+ if (socket_address_is(&p->address, value+skip, type)) {
+ socket_port_take_fd(p, fds, fd);
break;
-
- if (p) {
- safe_close(p->fd);
- p->fd = fdset_remove(fds, fd);
- }
- }
+ }
} else if (streq(key, "netlink")) {
int fd, skip = 0;
@@ -2415,17 +2566,12 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value,
if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd))
log_unit_debug(u, "Failed to parse socket value: %s", value);
- else {
-
+ else
LIST_FOREACH(port, p, s->ports)
- if (socket_address_is_netlink(&p->address, value+skip))
+ if (socket_address_is_netlink(&p->address, value+skip)) {
+ socket_port_take_fd(p, fds, fd);
break;
-
- if (p) {
- safe_close(p->fd);
- p->fd = fdset_remove(fds, fd);
- }
- }
+ }
} else if (streq(key, "ffs")) {
int fd, skip = 0;
@@ -2433,18 +2579,13 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value,
if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd))
log_unit_debug(u, "Failed to parse ffs value: %s", value);
- else {
-
+ else
LIST_FOREACH(port, p, s->ports)
if (p->type == SOCKET_USB_FUNCTION &&
- path_equal_or_files_same(p->path, value+skip))
+ path_equal_or_files_same(p->path, value+skip)) {
+ socket_port_take_fd(p, fds, fd);
break;
-
- if (p) {
- safe_close(p->fd);
- p->fd = fdset_remove(fds, fd);
- }
- }
+ }
} else
log_unit_debug(UNIT(s), "Unknown serialization key: %s", key);
@@ -2605,7 +2746,7 @@ static void socket_sigchld_event(Unit *u, pid_t pid, int code, int status) {
s->control_pid = 0;
- if (is_clean_exit(code, status, NULL))
+ if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
f = SOCKET_SUCCESS;
else if (code == CLD_EXITED)
f = SOCKET_FAILURE_EXIT_CODE;
@@ -2627,7 +2768,7 @@ static void socket_sigchld_event(Unit *u, pid_t pid, int code, int status) {
"Control process exited, code=%s status=%i",
sigchld_code_to_string(code), status);
- if (f != SOCKET_SUCCESS)
+ if (s->result == SOCKET_SUCCESS)
s->result = f;
if (s->control_command &&
@@ -2930,6 +3071,7 @@ const UnitVTable socket_vtable = {
.cgroup_context_offset = offsetof(Socket, cgroup_context),
.kill_context_offset = offsetof(Socket, kill_context),
.exec_runtime_offset = offsetof(Socket, exec_runtime),
+ .dynamic_creds_offset = offsetof(Socket, dynamic_creds),
.sections =
"Unit\0"
diff --git a/src/core/socket.h b/src/core/socket.h
index 0f1ac69c6f..89f4664510 100644
--- a/src/core/socket.h
+++ b/src/core/socket.h
@@ -20,6 +20,7 @@
***/
typedef struct Socket Socket;
+typedef struct SocketPeer SocketPeer;
#include "mount.h"
#include "service.h"
@@ -79,9 +80,12 @@ struct Socket {
LIST_HEAD(SocketPort, ports);
+ Set *peers_by_address;
+
unsigned n_accepted;
unsigned n_connections;
unsigned max_connections;
+ unsigned max_connections_per_source;
unsigned backlog;
unsigned keep_alive_cnt;
@@ -94,7 +98,9 @@ struct Socket {
ExecContext exec_context;
KillContext kill_context;
CGroupContext cgroup_context;
+
ExecRuntime *exec_runtime;
+ DynamicCreds dynamic_creds;
/* For Accept=no sockets refers to the one service we'll
activate. For Accept=yes sockets is either NULL, or filled
@@ -162,6 +168,12 @@ struct Socket {
RateLimit trigger_limit;
};
+SocketPeer *socket_peer_ref(SocketPeer *p);
+SocketPeer *socket_peer_unref(SocketPeer *p);
+int socket_acquire_peer(Socket *s, int fd, SocketPeer **p);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(SocketPeer*, socket_peer_unref);
+
/* Called from the service code when collecting fds */
int socket_collect_fds(Socket *s, int **fds);
diff --git a/src/core/swap.c b/src/core/swap.c
index a532b15be8..2228a254bb 100644
--- a/src/core/swap.c
+++ b/src/core/swap.c
@@ -153,6 +153,8 @@ static void swap_done(Unit *u) {
exec_command_done_array(s->exec_command, _SWAP_EXEC_COMMAND_MAX);
s->control_command = NULL;
+ dynamic_creds_unref(&s->dynamic_creds);
+
swap_unwatch_control_pid(s);
s->timer_event_source = sd_event_source_unref(s->timer_event_source);
@@ -379,11 +381,7 @@ static int swap_setup_unit(
if (!u) {
delete = true;
- u = unit_new(m, sizeof(Swap));
- if (!u)
- return log_oom();
-
- r = unit_add_name(u, e);
+ r = unit_new_for_name(m, sizeof(Swap), e, &u);
if (r < 0)
goto fail;
@@ -553,6 +551,9 @@ static int swap_coldplug(Unit *u) {
return r;
}
+ if (!IN_SET(new_state, SWAP_DEAD, SWAP_FAILED))
+ (void) unit_setup_dynamic_creds(u);
+
swap_set_state(s, new_state);
return 0;
}
@@ -606,12 +607,10 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
pid_t pid;
int r;
ExecParameters exec_params = {
- .apply_permissions = true,
- .apply_chroot = true,
- .apply_tty_stdin = true,
- .stdin_fd = -1,
- .stdout_fd = -1,
- .stderr_fd = -1,
+ .flags = EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
+ .stdin_fd = -1,
+ .stdout_fd = -1,
+ .stderr_fd = -1,
};
assert(s);
@@ -628,12 +627,16 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
if (r < 0)
goto fail;
+ r = unit_setup_dynamic_creds(UNIT(s));
+ if (r < 0)
+ return r;
+
r = swap_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_usec));
if (r < 0)
goto fail;
exec_params.environment = UNIT(s)->manager->environment;
- exec_params.confirm_spawn = UNIT(s)->manager->confirm_spawn;
+ exec_params.flags |= UNIT(s)->manager->confirm_spawn ? EXEC_CONFIRM_SPAWN : 0;
exec_params.cgroup_supported = UNIT(s)->manager->cgroup_supported;
exec_params.cgroup_path = UNIT(s)->cgroup_path;
exec_params.cgroup_delegate = s->cgroup_context.delegate;
@@ -644,6 +647,7 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
&s->exec_context,
&exec_params,
s->exec_runtime,
+ &s->dynamic_creds,
&pid);
if (r < 0)
goto fail;
@@ -665,21 +669,25 @@ fail:
static void swap_enter_dead(Swap *s, SwapResult f) {
assert(s);
- if (f != SWAP_SUCCESS)
+ if (s->result == SWAP_SUCCESS)
s->result = f;
+ swap_set_state(s, s->result != SWAP_SUCCESS ? SWAP_FAILED : SWAP_DEAD);
+
exec_runtime_destroy(s->exec_runtime);
s->exec_runtime = exec_runtime_unref(s->exec_runtime);
exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager));
- swap_set_state(s, s->result != SWAP_SUCCESS ? SWAP_FAILED : SWAP_DEAD);
+ unit_unref_uid_gid(UNIT(s), true);
+
+ dynamic_creds_destroy(&s->dynamic_creds);
}
static void swap_enter_active(Swap *s, SwapResult f) {
assert(s);
- if (f != SWAP_SUCCESS)
+ if (s->result == SWAP_SUCCESS)
s->result = f;
swap_set_state(s, SWAP_ACTIVE);
@@ -690,7 +698,7 @@ static void swap_enter_signal(Swap *s, SwapState state, SwapResult f) {
assert(s);
- if (f != SWAP_SUCCESS)
+ if (s->result == SWAP_SUCCESS)
s->result = f;
r = unit_kill_context(
@@ -849,6 +857,10 @@ static int swap_start(Unit *u) {
return r;
}
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
s->result = SWAP_SUCCESS;
s->reset_cpu_usage = true;
@@ -976,7 +988,7 @@ static void swap_sigchld_event(Unit *u, pid_t pid, int code, int status) {
s->control_pid = 0;
- if (is_clean_exit(code, status, NULL))
+ if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
f = SWAP_SUCCESS;
else if (code == CLD_EXITED)
f = SWAP_FAILURE_EXIT_CODE;
@@ -987,7 +999,7 @@ static void swap_sigchld_event(Unit *u, pid_t pid, int code, int status) {
else
assert_not_reached("Unknown code");
- if (f != SWAP_SUCCESS)
+ if (s->result == SWAP_SUCCESS)
s->result = f;
if (s->control_command) {
@@ -1177,6 +1189,7 @@ static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, v
case SWAP_DEAD:
case SWAP_FAILED:
+ (void) unit_acquire_invocation_id(UNIT(swap));
swap_enter_active(swap, SWAP_SUCCESS);
break;
@@ -1466,6 +1479,7 @@ const UnitVTable swap_vtable = {
.cgroup_context_offset = offsetof(Swap, cgroup_context),
.kill_context_offset = offsetof(Swap, kill_context),
.exec_runtime_offset = offsetof(Swap, exec_runtime),
+ .dynamic_creds_offset = offsetof(Swap, dynamic_creds),
.sections =
"Unit\0"
diff --git a/src/core/swap.h b/src/core/swap.h
index fbf66debdc..b0ef50f1e8 100644
--- a/src/core/swap.h
+++ b/src/core/swap.h
@@ -82,6 +82,7 @@ struct Swap {
CGroupContext cgroup_context;
ExecRuntime *exec_runtime;
+ DynamicCreds dynamic_creds;
SwapState state, deserialized_state;
diff --git a/src/core/system.conf b/src/core/system.conf
index c6bb050aac..746572b7ff 100644
--- a/src/core/system.conf
+++ b/src/core/system.conf
@@ -21,6 +21,7 @@
#CrashChangeVT=no
#CrashShell=no
#CrashReboot=no
+#CtrlAltDelBurstAction=reboot-force
#CPUAffinity=1 2
#JoinControllers=cpu,cpuacct net_cls,net_prio
#RuntimeWatchdogSec=0
diff --git a/src/core/target.c b/src/core/target.c
index 61a91aad07..765c1f3fa4 100644
--- a/src/core/target.c
+++ b/src/core/target.c
@@ -124,10 +124,15 @@ static void target_dump(Unit *u, FILE *f, const char *prefix) {
static int target_start(Unit *u) {
Target *t = TARGET(u);
+ int r;
assert(t);
assert(t->state == TARGET_DEAD);
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
target_set_state(t, TARGET_ACTIVE);
return 1;
}
diff --git a/src/core/timer.c b/src/core/timer.c
index 3206296f09..2469a517ea 100644
--- a/src/core/timer.c
+++ b/src/core/timer.c
@@ -261,6 +261,8 @@ static void timer_set_state(Timer *t, TimerState state) {
if (state != TIMER_WAITING) {
t->monotonic_event_source = sd_event_source_unref(t->monotonic_event_source);
t->realtime_event_source = sd_event_source_unref(t->realtime_event_source);
+ t->next_elapse_monotonic_or_boottime = USEC_INFINITY;
+ t->next_elapse_realtime = USEC_INFINITY;
}
if (state != old_state)
@@ -291,7 +293,7 @@ static int timer_coldplug(Unit *u) {
static void timer_enter_dead(Timer *t, TimerResult f) {
assert(t);
- if (f != TIMER_SUCCESS)
+ if (t->result == TIMER_SUCCESS)
t->result = f;
timer_set_state(t, t->result != TIMER_SUCCESS ? TIMER_FAILED : TIMER_DEAD);
@@ -616,6 +618,10 @@ static int timer_start(Unit *u) {
return r;
}
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
t->last_trigger = DUAL_TIMESTAMP_NULL;
/* Reenable all timers that depend on unit activation time */
@@ -632,7 +638,7 @@ static int timer_start(Unit *u) {
/* The timer has never run before,
* make sure a stamp file exists.
*/
- touch_file(t->stamp_path, true, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID);
+ (void) touch_file(t->stamp_path, true, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID);
}
t->result = TIMER_SUCCESS;
diff --git a/src/core/transaction.c b/src/core/transaction.c
index 8370b864fb..e22e3b30c2 100644
--- a/src/core/transaction.c
+++ b/src/core/transaction.c
@@ -1085,10 +1085,8 @@ Transaction *transaction_new(bool irreversible) {
return NULL;
tr->jobs = hashmap_new(NULL);
- if (!tr->jobs) {
- free(tr);
- return NULL;
- }
+ if (!tr->jobs)
+ return mfree(tr);
tr->irreversible = irreversible;
diff --git a/src/core/umount.c b/src/core/umount.c
index c21a2be54e..1e5459ed80 100644
--- a/src/core/umount.c
+++ b/src/core/umount.c
@@ -375,7 +375,7 @@ static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_e
/* If we are in a container, don't attempt to
read-only mount anything as that brings no real
benefits, but might confuse the host, as we remount
- the superblock here, not the bind mound. */
+ the superblock here, not the bind mount. */
if (detect_container() <= 0) {
_cleanup_free_ char *options = NULL;
/* MS_REMOUNT requires that the data parameter
diff --git a/src/core/unit.c b/src/core/unit.c
index 4934a0e56f..e664e23892 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -37,6 +37,7 @@
#include "execute.h"
#include "fileio-label.h"
#include "formats-util.h"
+#include "id128-util.h"
#include "load-dropin.h"
#include "load-fragment.h"
#include "log.h"
@@ -87,10 +88,8 @@ Unit *unit_new(Manager *m, size_t size) {
return NULL;
u->names = set_new(&string_hash_ops);
- if (!u->names) {
- free(u);
- return NULL;
- }
+ if (!u->names)
+ return mfree(u);
u->manager = m;
u->type = _UNIT_TYPE_INVALID;
@@ -100,7 +99,9 @@ Unit *unit_new(Manager *m, size_t size) {
u->on_failure_job_mode = JOB_REPLACE;
u->cgroup_inotify_wd = -1;
u->job_timeout = USEC_INFINITY;
- u->sigchldgen = 0;
+ u->ref_uid = UID_INVALID;
+ u->ref_gid = GID_INVALID;
+ u->cpu_usage_last = NSEC_INFINITY;
RATELIMIT_INIT(u->start_limit, m->default_start_limit_interval, m->default_start_limit_burst);
RATELIMIT_INIT(u->auto_stop_ratelimit, 10 * USEC_PER_SEC, 16);
@@ -108,11 +109,29 @@ Unit *unit_new(Manager *m, size_t size) {
return u;
}
+int unit_new_for_name(Manager *m, size_t size, const char *name, Unit **ret) {
+ Unit *u;
+ int r;
+
+ u = unit_new(m, size);
+ if (!u)
+ return -ENOMEM;
+
+ r = unit_add_name(u, name);
+ if (r < 0) {
+ unit_free(u);
+ return r;
+ }
+
+ *ret = u;
+ return r;
+}
+
bool unit_has_name(Unit *u, const char *name) {
assert(u);
assert(name);
- return !!set_get(u->names, (char*) name);
+ return set_contains(u->names, (char*) name);
}
static void unit_init(Unit *u) {
@@ -301,6 +320,7 @@ int unit_set_description(Unit *u, const char *description) {
bool unit_check_gc(Unit *u) {
UnitActiveState state;
+ bool inactive;
assert(u);
if (u->job)
@@ -310,24 +330,28 @@ bool unit_check_gc(Unit *u) {
return true;
state = unit_active_state(u);
+ inactive = state == UNIT_INACTIVE;
/* If the unit is inactive and failed and no job is queued for
* it, then release its runtime resources */
if (UNIT_IS_INACTIVE_OR_FAILED(state) &&
UNIT_VTABLE(u)->release_resources)
- UNIT_VTABLE(u)->release_resources(u);
+ UNIT_VTABLE(u)->release_resources(u, inactive);
/* But we keep the unit object around for longer when it is
* referenced or configured to not be gc'ed */
- if (state != UNIT_INACTIVE)
+ if (!inactive)
return true;
- if (u->no_gc)
+ if (u->perpetual)
return true;
if (u->refs)
return true;
+ if (sd_bus_track_count(u->bus_track) > 0)
+ return true;
+
if (UNIT_VTABLE(u)->check_gc)
if (UNIT_VTABLE(u)->check_gc(u))
return true;
@@ -508,11 +532,17 @@ void unit_free(Unit *u) {
sd_bus_slot_unref(u->match_bus_slot);
+ sd_bus_track_unref(u->bus_track);
+ u->deserialized_refs = strv_free(u->deserialized_refs);
+
unit_free_requires_mounts_for(u);
SET_FOREACH(t, u->names, i)
hashmap_remove_value(u->manager->units, t, u);
+ if (!sd_id128_is_null(u->invocation_id))
+ hashmap_remove_value(u->manager->units_by_invocation_id, &u->invocation_id, u);
+
if (u->job) {
Job *j = u->job;
job_uninstall(j);
@@ -550,6 +580,8 @@ void unit_free(Unit *u) {
unit_release_cgroup(u);
+ unit_unref_uid_gid(u, false);
+
(void) manager_update_failed_units(u->manager, u, false);
set_remove(u->manager->startup_units, u);
@@ -846,18 +878,14 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) {
return r;
}
- if (c->std_output != EXEC_OUTPUT_KMSG &&
- c->std_output != EXEC_OUTPUT_SYSLOG &&
- c->std_output != EXEC_OUTPUT_JOURNAL &&
- c->std_output != EXEC_OUTPUT_KMSG_AND_CONSOLE &&
- c->std_output != EXEC_OUTPUT_SYSLOG_AND_CONSOLE &&
- c->std_output != EXEC_OUTPUT_JOURNAL_AND_CONSOLE &&
- c->std_error != EXEC_OUTPUT_KMSG &&
- c->std_error != EXEC_OUTPUT_SYSLOG &&
- c->std_error != EXEC_OUTPUT_JOURNAL &&
- c->std_error != EXEC_OUTPUT_KMSG_AND_CONSOLE &&
- c->std_error != EXEC_OUTPUT_JOURNAL_AND_CONSOLE &&
- c->std_error != EXEC_OUTPUT_SYSLOG_AND_CONSOLE)
+ if (!IN_SET(c->std_output,
+ EXEC_OUTPUT_JOURNAL, EXEC_OUTPUT_JOURNAL_AND_CONSOLE,
+ EXEC_OUTPUT_KMSG, EXEC_OUTPUT_KMSG_AND_CONSOLE,
+ EXEC_OUTPUT_SYSLOG, EXEC_OUTPUT_SYSLOG_AND_CONSOLE) &&
+ !IN_SET(c->std_error,
+ EXEC_OUTPUT_JOURNAL, EXEC_OUTPUT_JOURNAL_AND_CONSOLE,
+ EXEC_OUTPUT_KMSG, EXEC_OUTPUT_KMSG_AND_CONSOLE,
+ EXEC_OUTPUT_SYSLOG, EXEC_OUTPUT_SYSLOG_AND_CONSOLE))
return 0;
/* If syslog or kernel logging is requested, make sure our own
@@ -894,6 +922,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
Unit *following;
_cleanup_set_free_ Set *following_set = NULL;
int r;
+ const char *n;
assert(u);
assert(u->type >= 0);
@@ -915,6 +944,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
"%s\tGC Check Good: %s\n"
"%s\tNeed Daemon Reload: %s\n"
"%s\tTransient: %s\n"
+ "%s\tPerpetual: %s\n"
"%s\tSlice: %s\n"
"%s\tCGroup: %s\n"
"%s\tCGroup realized: %s\n"
@@ -933,6 +963,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
prefix, yes_no(unit_check_gc(u)),
prefix, yes_no(unit_need_daemon_reload(u)),
prefix, yes_no(u->transient),
+ prefix, yes_no(u->perpetual),
prefix, strna(unit_slice_name(u)),
prefix, strna(u->cgroup_path),
prefix, yes_no(u->cgroup_realized),
@@ -942,6 +973,10 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
SET_FOREACH(t, u->names, i)
fprintf(f, "%s\tName: %s\n", prefix, t);
+ if (!sd_id128_is_null(u->invocation_id))
+ fprintf(f, "%s\tInvocation ID: " SD_ID128_FORMAT_STR "\n",
+ prefix, SD_ID128_FORMAT_VAL(u->invocation_id));
+
STRV_FOREACH(j, u->documentation)
fprintf(f, "%s\tDocumentation: %s\n", prefix, *j);
@@ -969,8 +1004,8 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
if (u->job_timeout != USEC_INFINITY)
fprintf(f, "%s\tJob Timeout: %s\n", prefix, format_timespan(timespan, sizeof(timespan), u->job_timeout, 0));
- if (u->job_timeout_action != FAILURE_ACTION_NONE)
- fprintf(f, "%s\tJob Timeout Action: %s\n", prefix, failure_action_to_string(u->job_timeout_action));
+ if (u->job_timeout_action != EMERGENCY_ACTION_NONE)
+ fprintf(f, "%s\tJob Timeout Action: %s\n", prefix, emergency_action_to_string(u->job_timeout_action));
if (u->job_timeout_reboot_arg)
fprintf(f, "%s\tJob Timeout Reboot Argument: %s\n", prefix, u->job_timeout_reboot_arg);
@@ -1035,13 +1070,14 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
else if (u->load_state == UNIT_ERROR)
fprintf(f, "%s\tLoad Error Code: %s\n", prefix, strerror(-u->load_error));
+ for (n = sd_bus_track_first(u->bus_track); n; n = sd_bus_track_next(u->bus_track))
+ fprintf(f, "%s\tBus Ref: %s\n", prefix, n);
if (u->job)
job_dump(u->job, f, prefix2);
if (u->nop_job)
job_dump(u->nop_job, f, prefix2);
-
}
/* Common implementation for multiple backends */
@@ -1436,7 +1472,7 @@ static void unit_status_log_starting_stopping_reloading(Unit *u, JobType t) {
format = unit_get_status_message_format(u, t);
DISABLE_WARNING_FORMAT_NONLITERAL;
- xsprintf(buf, format, unit_description(u));
+ snprintf(buf, sizeof buf, format, unit_description(u));
REENABLE_WARNING;
mid = t == JOB_START ? SD_MESSAGE_UNIT_STARTING :
@@ -1476,7 +1512,7 @@ int unit_start_limit_test(Unit *u) {
log_unit_warning(u, "Start request repeated too quickly.");
u->start_limit_hit = true;
- return failure_action(u->manager, u->start_limit_action, u->reboot_arg);
+ return emergency_action(u->manager, u->start_limit_action, u->reboot_arg, "unit failed");
}
/* Errors:
@@ -1602,6 +1638,18 @@ int unit_stop(Unit *u) {
return UNIT_VTABLE(u)->stop(u);
}
+bool unit_can_stop(Unit *u) {
+ assert(u);
+
+ if (!unit_supported(u))
+ return false;
+
+ if (u->perpetual)
+ return false;
+
+ return !!UNIT_VTABLE(u)->stop;
+}
+
/* Errors:
* -EBADR: This unit type does not support reloading.
* -ENOEXEC: Unit is not started.
@@ -2136,13 +2184,20 @@ bool unit_job_is_applicable(Unit *u, JobType j) {
case JOB_VERIFY_ACTIVE:
case JOB_START:
- case JOB_STOP:
case JOB_NOP:
+ /* Note that we don't check unit_can_start() here. That's because .device units and suchlike are not
+ * startable by us but may appear due to external events, and it thus makes sense to permit enqueing
+ * jobs for it. */
return true;
+ case JOB_STOP:
+ /* Similar as above. However, perpetual units can never be stopped (neither explicitly nor due to
+ * external events), hence it makes no sense to permit enqueing such a request either. */
+ return !u->perpetual;
+
case JOB_RESTART:
case JOB_TRY_RESTART:
- return unit_can_start(u);
+ return unit_can_stop(u) && unit_can_start(u);
case JOB_RELOAD:
case JOB_TRY_RELOAD:
@@ -2212,6 +2267,11 @@ int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_referen
return 0;
}
+ if (d == UNIT_BEFORE && other->type == UNIT_DEVICE) {
+ log_unit_warning(u, "Dependency Before=%s ignored (.device units cannot be delayed)", other->id);
+ return 0;
+ }
+
r = set_ensure_allocated(&u->dependencies[d], NULL);
if (r < 0)
return r;
@@ -2374,6 +2434,15 @@ char *unit_dbus_path(Unit *u) {
return unit_dbus_path_from_name(u->id);
}
+char *unit_dbus_path_invocation_id(Unit *u) {
+ assert(u);
+
+ if (sd_id128_is_null(u->invocation_id))
+ return NULL;
+
+ return unit_dbus_path_from_name(u->invocation_id_string);
+}
+
int unit_set_slice(Unit *u, Unit *slice) {
assert(u);
assert(slice);
@@ -2608,21 +2677,34 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
unit_serialize_item(u, f, "assert-result", yes_no(u->assert_result));
unit_serialize_item(u, f, "transient", yes_no(u->transient));
- unit_serialize_item_format(u, f, "cpuacct-usage-base", "%" PRIu64, u->cpuacct_usage_base);
+
+ unit_serialize_item_format(u, f, "cpu-usage-base", "%" PRIu64, u->cpu_usage_base);
+ if (u->cpu_usage_last != NSEC_INFINITY)
+ unit_serialize_item_format(u, f, "cpu-usage-last", "%" PRIu64, u->cpu_usage_last);
if (u->cgroup_path)
unit_serialize_item(u, f, "cgroup", u->cgroup_path);
unit_serialize_item(u, f, "cgroup-realized", yes_no(u->cgroup_realized));
+ if (uid_is_valid(u->ref_uid))
+ unit_serialize_item_format(u, f, "ref-uid", UID_FMT, u->ref_uid);
+ if (gid_is_valid(u->ref_gid))
+ unit_serialize_item_format(u, f, "ref-gid", GID_FMT, u->ref_gid);
+
+ if (!sd_id128_is_null(u->invocation_id))
+ unit_serialize_item_format(u, f, "invocation-id", SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id));
+
+ bus_track_serialize(u->bus_track, f, "ref");
+
if (serialize_jobs) {
if (u->job) {
fprintf(f, "job\n");
- job_serialize(u->job, f, fds);
+ job_serialize(u->job, f);
}
if (u->nop_job) {
fprintf(f, "job\n");
- job_serialize(u->nop_job, f, fds);
+ job_serialize(u->nop_job, f);
}
}
@@ -2752,7 +2834,7 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
if (!j)
return log_oom();
- r = job_deserialize(j, f, fds);
+ r = job_deserialize(j, f);
if (r < 0) {
job_free(j);
return r;
@@ -2824,11 +2906,19 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
continue;
- } else if (streq(l, "cpuacct-usage-base")) {
+ } else if (STR_IN_SET(l, "cpu-usage-base", "cpuacct-usage-base")) {
+
+ r = safe_atou64(v, &u->cpu_usage_base);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse CPU usage base %s, ignoring.", v);
+
+ continue;
+
+ } else if (streq(l, "cpu-usage-last")) {
- r = safe_atou64(v, &u->cpuacct_usage_base);
+ r = safe_atou64(v, &u->cpu_usage_last);
if (r < 0)
- log_unit_debug(u, "Failed to parse CPU usage %s, ignoring.", v);
+ log_unit_debug(u, "Failed to read CPU usage last %s, ignoring.", v);
continue;
@@ -2851,6 +2941,47 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
u->cgroup_realized = b;
continue;
+
+ } else if (streq(l, "ref-uid")) {
+ uid_t uid;
+
+ r = parse_uid(v, &uid);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse referenced UID %s, ignoring.", v);
+ else
+ unit_ref_uid_gid(u, uid, GID_INVALID);
+
+ continue;
+
+ } else if (streq(l, "ref-gid")) {
+ gid_t gid;
+
+ r = parse_gid(v, &gid);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse referenced GID %s, ignoring.", v);
+ else
+ unit_ref_uid_gid(u, UID_INVALID, gid);
+
+ } else if (streq(l, "ref")) {
+
+ r = strv_extend(&u->deserialized_refs, v);
+ if (r < 0)
+ log_oom();
+
+ continue;
+ } else if (streq(l, "invocation-id")) {
+ sd_id128_t id;
+
+ r = sd_id128_from_string(v, &id);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse invocation id %s, ignoring.", v);
+ else {
+ r = unit_set_invocation_id(u, id);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to set invocation ID for unit: %m");
+ }
+
+ continue;
}
if (unit_can_serialize(u)) {
@@ -2925,7 +3056,8 @@ int unit_add_node_link(Unit *u, const char *what, bool wants, UnitDependency dep
}
int unit_coldplug(Unit *u) {
- int r = 0, q = 0;
+ int r = 0, q;
+ char **i;
assert(u);
@@ -2936,21 +3068,29 @@ int unit_coldplug(Unit *u) {
u->coldplugged = true;
- if (UNIT_VTABLE(u)->coldplug)
- r = UNIT_VTABLE(u)->coldplug(u);
+ STRV_FOREACH(i, u->deserialized_refs) {
+ q = bus_unit_track_add_name(u, *i);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+ u->deserialized_refs = strv_free(u->deserialized_refs);
- if (u->job)
- q = job_coldplug(u->job);
+ if (UNIT_VTABLE(u)->coldplug) {
+ q = UNIT_VTABLE(u)->coldplug(u);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
- if (r < 0)
- return r;
- if (q < 0)
- return q;
+ if (u->job) {
+ q = job_coldplug(u->job);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
- return 0;
+ return r;
}
-static bool fragment_mtime_newer(const char *path, usec_t mtime) {
+static bool fragment_mtime_newer(const char *path, usec_t mtime, bool path_masked) {
struct stat st;
if (!path)
@@ -2960,12 +3100,12 @@ static bool fragment_mtime_newer(const char *path, usec_t mtime) {
/* What, cannot access this anymore? */
return true;
- if (mtime > 0)
+ if (path_masked)
+ /* For masked files check if they are still so */
+ return !null_or_empty(&st);
+ else
/* For non-empty files check the mtime */
return timespec_load(&st.st_mtim) > mtime;
- else if (!null_or_empty(&st))
- /* For masked files check if they are still so */
- return true;
return false;
}
@@ -2976,18 +3116,22 @@ bool unit_need_daemon_reload(Unit *u) {
assert(u);
- if (fragment_mtime_newer(u->fragment_path, u->fragment_mtime))
+ /* For unit files, we allow masking… */
+ if (fragment_mtime_newer(u->fragment_path, u->fragment_mtime,
+ u->load_state == UNIT_MASKED))
return true;
- if (fragment_mtime_newer(u->source_path, u->source_mtime))
+ /* Source paths should not be masked… */
+ if (fragment_mtime_newer(u->source_path, u->source_mtime, false))
return true;
(void) unit_find_dropin_paths(u, &t);
if (!strv_equal(u->dropin_paths, t))
return true;
+ /* … any drop-ins that are masked are simply omitted from the list. */
STRV_FOREACH(path, u->dropin_paths)
- if (fragment_mtime_newer(*path, u->dropin_mtime))
+ if (fragment_mtime_newer(*path, u->dropin_mtime, false))
return true;
return false;
@@ -3224,6 +3368,33 @@ void unit_ref_unset(UnitRef *ref) {
ref->unit = NULL;
}
+static int user_from_unit_name(Unit *u, char **ret) {
+
+ static const uint8_t hash_key[] = {
+ 0x58, 0x1a, 0xaf, 0xe6, 0x28, 0x58, 0x4e, 0x96,
+ 0xb4, 0x4e, 0xf5, 0x3b, 0x8c, 0x92, 0x07, 0xec
+ };
+
+ _cleanup_free_ char *n = NULL;
+ int r;
+
+ r = unit_name_to_prefix(u->id, &n);
+ if (r < 0)
+ return r;
+
+ if (valid_user_group_name(n)) {
+ *ret = n;
+ n = NULL;
+ return 0;
+ }
+
+ /* If we can't use the unit name as a user name, then let's hash it and use that */
+ if (asprintf(ret, "_du%016" PRIx64, siphash24(n, strlen(n), hash_key)) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
int unit_patch_contexts(Unit *u) {
CGroupContext *cc;
ExecContext *ec;
@@ -3267,7 +3438,33 @@ int unit_patch_contexts(Unit *u) {
ec->no_new_privileges = true;
if (ec->private_devices)
- ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_MKNOD);
+ ec->capability_bounding_set &= ~((UINT64_C(1) << CAP_MKNOD) | (UINT64_C(1) << CAP_SYS_RAWIO));
+
+ if (ec->protect_kernel_modules)
+ ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_SYS_MODULE);
+
+ if (ec->dynamic_user) {
+ if (!ec->user) {
+ r = user_from_unit_name(u, &ec->user);
+ if (r < 0)
+ return r;
+ }
+
+ if (!ec->group) {
+ ec->group = strdup(ec->user);
+ if (!ec->group)
+ return -ENOMEM;
+ }
+
+ /* If the dynamic user option is on, let's make sure that the unit can't leave its UID/GID
+ * around in the file system or on IPC objects. Hence enforce a strict sandbox. */
+
+ ec->private_tmp = true;
+ ec->remove_ipc = true;
+ ec->protect_system = PROTECT_SYSTEM_STRICT;
+ if (ec->protect_home == PROTECT_HOME_NO)
+ ec->protect_home = PROTECT_HOME_READ_ONLY;
+ }
}
cc = unit_get_cgroup_context(u);
@@ -3652,7 +3849,7 @@ int unit_kill_context(
* there we get proper events. Hence rely on
* them.*/
- if (cg_unified() > 0 ||
+ if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0 ||
(detect_container() == 0 && !unit_cgroup_delegate(u)))
wait_for_exit = true;
@@ -3776,6 +3973,26 @@ int unit_setup_exec_runtime(Unit *u) {
return exec_runtime_make(rt, unit_get_exec_context(u), u->id);
}
+int unit_setup_dynamic_creds(Unit *u) {
+ ExecContext *ec;
+ DynamicCreds *dcreds;
+ size_t offset;
+
+ assert(u);
+
+ offset = UNIT_VTABLE(u)->dynamic_creds_offset;
+ assert(offset > 0);
+ dcreds = (DynamicCreds*) ((uint8_t*) u + offset);
+
+ ec = unit_get_exec_context(u);
+ assert(ec);
+
+ if (!ec->dynamic_user)
+ return 0;
+
+ return dynamic_creds_acquire(dcreds, u->manager, ec->user, ec->group);
+}
+
bool unit_type_supported(UnitType t) {
if (_unlikely_(t < 0))
return false;
@@ -3869,3 +4086,198 @@ pid_t unit_main_pid(Unit *u) {
return 0;
}
+
+static void unit_unref_uid_internal(
+ Unit *u,
+ uid_t *ref_uid,
+ bool destroy_now,
+ void (*_manager_unref_uid)(Manager *m, uid_t uid, bool destroy_now)) {
+
+ assert(u);
+ assert(ref_uid);
+ assert(_manager_unref_uid);
+
+ /* Generic implementation of both unit_unref_uid() and unit_unref_gid(), under the assumption that uid_t and
+ * gid_t are actually the same time, with the same validity rules.
+ *
+ * Drops a reference to UID/GID from a unit. */
+
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+ assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+ if (!uid_is_valid(*ref_uid))
+ return;
+
+ _manager_unref_uid(u->manager, *ref_uid, destroy_now);
+ *ref_uid = UID_INVALID;
+}
+
+void unit_unref_uid(Unit *u, bool destroy_now) {
+ unit_unref_uid_internal(u, &u->ref_uid, destroy_now, manager_unref_uid);
+}
+
+void unit_unref_gid(Unit *u, bool destroy_now) {
+ unit_unref_uid_internal(u, (uid_t*) &u->ref_gid, destroy_now, manager_unref_gid);
+}
+
+static int unit_ref_uid_internal(
+ Unit *u,
+ uid_t *ref_uid,
+ uid_t uid,
+ bool clean_ipc,
+ int (*_manager_ref_uid)(Manager *m, uid_t uid, bool clean_ipc)) {
+
+ int r;
+
+ assert(u);
+ assert(ref_uid);
+ assert(uid_is_valid(uid));
+ assert(_manager_ref_uid);
+
+ /* Generic implementation of both unit_ref_uid() and unit_ref_guid(), under the assumption that uid_t and gid_t
+ * are actually the same type, and have the same validity rules.
+ *
+ * Adds a reference on a specific UID/GID to this unit. Each unit referencing the same UID/GID maintains a
+ * reference so that we can destroy the UID/GID's IPC resources as soon as this is requested and the counter
+ * drops to zero. */
+
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+ assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+ if (*ref_uid == uid)
+ return 0;
+
+ if (uid_is_valid(*ref_uid)) /* Already set? */
+ return -EBUSY;
+
+ r = _manager_ref_uid(u->manager, uid, clean_ipc);
+ if (r < 0)
+ return r;
+
+ *ref_uid = uid;
+ return 1;
+}
+
+int unit_ref_uid(Unit *u, uid_t uid, bool clean_ipc) {
+ return unit_ref_uid_internal(u, &u->ref_uid, uid, clean_ipc, manager_ref_uid);
+}
+
+int unit_ref_gid(Unit *u, gid_t gid, bool clean_ipc) {
+ return unit_ref_uid_internal(u, (uid_t*) &u->ref_gid, (uid_t) gid, clean_ipc, manager_ref_gid);
+}
+
+static int unit_ref_uid_gid_internal(Unit *u, uid_t uid, gid_t gid, bool clean_ipc) {
+ int r = 0, q = 0;
+
+ assert(u);
+
+ /* Reference both a UID and a GID in one go. Either references both, or neither. */
+
+ if (uid_is_valid(uid)) {
+ r = unit_ref_uid(u, uid, clean_ipc);
+ if (r < 0)
+ return r;
+ }
+
+ if (gid_is_valid(gid)) {
+ q = unit_ref_gid(u, gid, clean_ipc);
+ if (q < 0) {
+ if (r > 0)
+ unit_unref_uid(u, false);
+
+ return q;
+ }
+ }
+
+ return r > 0 || q > 0;
+}
+
+int unit_ref_uid_gid(Unit *u, uid_t uid, gid_t gid) {
+ ExecContext *c;
+ int r;
+
+ assert(u);
+
+ c = unit_get_exec_context(u);
+
+ r = unit_ref_uid_gid_internal(u, uid, gid, c ? c->remove_ipc : false);
+ if (r < 0)
+ return log_unit_warning_errno(u, r, "Couldn't add UID/GID reference to unit, proceeding without: %m");
+
+ return r;
+}
+
+void unit_unref_uid_gid(Unit *u, bool destroy_now) {
+ assert(u);
+
+ unit_unref_uid(u, destroy_now);
+ unit_unref_gid(u, destroy_now);
+}
+
+void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid) {
+ int r;
+
+ assert(u);
+
+ /* This is invoked whenever one of the forked off processes let's us know the UID/GID its user name/group names
+ * resolved to. We keep track of which UID/GID is currently assigned in order to be able to destroy its IPC
+ * objects when no service references the UID/GID anymore. */
+
+ r = unit_ref_uid_gid(u, uid, gid);
+ if (r > 0)
+ bus_unit_send_change_signal(u);
+}
+
+int unit_set_invocation_id(Unit *u, sd_id128_t id) {
+ int r;
+
+ assert(u);
+
+ /* Set the invocation ID for this unit. If we cannot, this will not roll back, but reset the whole thing. */
+
+ if (sd_id128_equal(u->invocation_id, id))
+ return 0;
+
+ if (!sd_id128_is_null(u->invocation_id))
+ (void) hashmap_remove_value(u->manager->units_by_invocation_id, &u->invocation_id, u);
+
+ if (sd_id128_is_null(id)) {
+ r = 0;
+ goto reset;
+ }
+
+ r = hashmap_ensure_allocated(&u->manager->units_by_invocation_id, &id128_hash_ops);
+ if (r < 0)
+ goto reset;
+
+ u->invocation_id = id;
+ sd_id128_to_string(id, u->invocation_id_string);
+
+ r = hashmap_put(u->manager->units_by_invocation_id, &u->invocation_id, u);
+ if (r < 0)
+ goto reset;
+
+ return 0;
+
+reset:
+ u->invocation_id = SD_ID128_NULL;
+ u->invocation_id_string[0] = 0;
+ return r;
+}
+
+int unit_acquire_invocation_id(Unit *u) {
+ sd_id128_t id;
+ int r;
+
+ assert(u);
+
+ r = sd_id128_randomize(&id);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to generate invocation ID for unit: %m");
+
+ r = unit_set_invocation_id(u, id);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to set invocation ID for unit: %m");
+
+ return 0;
+}
diff --git a/src/core/unit.h b/src/core/unit.h
index 1eabfa51e2..991543664b 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -29,7 +29,7 @@ typedef struct UnitRef UnitRef;
typedef struct UnitStatusMessageFormats UnitStatusMessageFormats;
#include "condition.h"
-#include "failure-action.h"
+#include "emergency-action.h"
#include "install.h"
#include "list.h"
#include "unit-name.h"
@@ -108,9 +108,13 @@ struct Unit {
/* The slot used for watching NameOwnerChanged signals */
sd_bus_slot *match_bus_slot;
+ /* References to this unit from clients */
+ sd_bus_track *bus_track;
+ char **deserialized_refs;
+
/* Job timeout and action to take */
usec_t job_timeout;
- FailureAction job_timeout_action;
+ EmergencyAction job_timeout_action;
char *job_timeout_reboot_arg;
/* References to this */
@@ -174,18 +178,23 @@ struct Unit {
/* Put a ratelimit on unit starting */
RateLimit start_limit;
- FailureAction start_limit_action;
+ EmergencyAction start_limit_action;
char *reboot_arg;
/* Make sure we never enter endless loops with the check unneeded logic, or the BindsTo= logic */
RateLimit auto_stop_ratelimit;
+ /* Reference to a specific UID/GID */
+ uid_t ref_uid;
+ gid_t ref_gid;
+
/* Cached unit file state and preset */
UnitFileState unit_file_state;
int unit_file_preset;
- /* Where the cpuacct.usage cgroup counter was at the time the unit was started */
- nsec_t cpuacct_usage_base;
+ /* Where the cpu.stat or cpuacct.usage was at the time the unit was started */
+ nsec_t cpu_usage_base;
+ nsec_t cpu_usage_last; /* the most recently read value */
/* Counterparts in the cgroup filesystem */
char *cgroup_path;
@@ -195,11 +204,13 @@ struct Unit {
CGroupMask cgroup_members_mask;
int cgroup_inotify_wd;
- uint32_t cgroup_netclass_id;
-
/* How to start OnFailure units */
JobMode on_failure_job_mode;
+ /* The current invocation ID */
+ sd_id128_t invocation_id;
+ char invocation_id_string[SD_ID128_STRING_MAX]; /* useful when logging */
+
/* Garbage collect us we nobody wants or requires us anymore */
bool stop_when_unneeded;
@@ -225,6 +236,9 @@ struct Unit {
/* Is this a transient unit? */
bool transient;
+ /* Is this a unit that is always running and cannot be stopped? */
+ bool perpetual;
+
bool in_load_queue:1;
bool in_dbus_queue:1;
bool in_cleanup_queue:1;
@@ -233,8 +247,6 @@ struct Unit {
bool sent_dbus_new_signal:1;
- bool no_gc:1;
-
bool in_audit:1;
bool cgroup_realized:1;
@@ -245,6 +257,9 @@ struct Unit {
/* Did we already invoke unit_coldplug() for this unit? */
bool coldplugged:1;
+
+ /* For transient units: whether to add a bus track reference after creating the unit */
+ bool bus_track_add:1;
};
struct UnitStatusMessageFormats {
@@ -291,6 +306,10 @@ struct UnitVTable {
* that */
size_t exec_runtime_offset;
+ /* If greater than 0, the offset into the object where the pointer to DynamicCreds is found, if the unit type
+ * has that. */
+ size_t dynamic_creds_offset;
+
/* The name of the configuration file section with the private settings of this unit */
const char *private_section;
@@ -354,7 +373,7 @@ struct UnitVTable {
/* When the unit is not running and no job for it queued we
* shall release its runtime resources */
- void (*release_resources)(Unit *u);
+ void (*release_resources)(Unit *u, bool inactive);
/* Invoked on every child that died */
void (*sigchld_event)(Unit *u, pid_t pid, int code, int status);
@@ -369,8 +388,7 @@ struct UnitVTable {
/* Called whenever a process of this unit sends us a message */
void (*notify_message)(Unit *u, pid_t pid, char **tags, FDSet *fds);
- /* Called whenever a name this Unit registered for comes or
- * goes away. */
+ /* Called whenever a name this Unit registered for comes or goes away. */
void (*bus_name_owner_change)(Unit *u, const char *name, const char *old_owner, const char *new_owner);
/* Called for each property that is being set */
@@ -463,6 +481,7 @@ DEFINE_CAST(SCOPE, Scope);
Unit *unit_new(Manager *m, size_t size);
void unit_free(Unit *u);
+int unit_new_for_name(Manager *m, size_t size, const char *name, Unit **ret);
int unit_add_name(Unit *u, const char *name);
int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_reference);
@@ -507,6 +526,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix);
bool unit_can_reload(Unit *u) _pure_;
bool unit_can_start(Unit *u) _pure_;
+bool unit_can_stop(Unit *u) _pure_;
bool unit_can_isolate(Unit *u) _pure_;
int unit_start(Unit *u);
@@ -533,6 +553,7 @@ bool unit_job_is_applicable(Unit *u, JobType j);
int set_unit_path(const char *p);
char *unit_dbus_path(Unit *u);
+char *unit_dbus_path_invocation_id(Unit *u);
int unit_load_related_unit(Unit *u, const char *type, Unit **_found);
@@ -589,6 +610,7 @@ CGroupContext *unit_get_cgroup_context(Unit *u) _pure_;
ExecRuntime *unit_get_exec_runtime(Unit *u) _pure_;
int unit_setup_exec_runtime(Unit *u);
+int unit_setup_dynamic_creds(Unit *u);
int unit_write_drop_in(Unit *u, UnitSetPropertiesMode mode, const char *name, const char *data);
int unit_write_drop_in_format(Unit *u, UnitSetPropertiesMode mode, const char *name, const char *format, ...) _printf_(4,5);
@@ -618,12 +640,26 @@ int unit_fail_if_symlink(Unit *u, const char* where);
int unit_start_limit_test(Unit *u);
+void unit_unref_uid(Unit *u, bool destroy_now);
+int unit_ref_uid(Unit *u, uid_t uid, bool clean_ipc);
+
+void unit_unref_gid(Unit *u, bool destroy_now);
+int unit_ref_gid(Unit *u, gid_t gid, bool clean_ipc);
+
+int unit_ref_uid_gid(Unit *u, uid_t uid, gid_t gid);
+void unit_unref_uid_gid(Unit *u, bool destroy_now);
+
+void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid);
+
+int unit_set_invocation_id(Unit *u, sd_id128_t id);
+int unit_acquire_invocation_id(Unit *u);
+
/* Macros which append UNIT= or USER_UNIT= to the message */
#define log_unit_full(unit, level, error, ...) \
({ \
- Unit *_u = (unit); \
- _u ? log_object_internal(level, error, __FILE__, __LINE__, __func__, _u->manager->unit_log_field, _u->id, ##__VA_ARGS__) : \
+ const Unit *_u = (unit); \
+ _u ? log_object_internal(level, error, __FILE__, __LINE__, __func__, _u->manager->unit_log_field, _u->id, _u->manager->invocation_log_field, _u->invocation_id_string, ##__VA_ARGS__) : \
log_internal(level, error, __FILE__, __LINE__, __func__, ##__VA_ARGS__); \
})