summaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
Diffstat (limited to 'src/core')
-rw-r--r--src/core/automount.c6
-rw-r--r--src/core/busname.c7
-rw-r--r--src/core/cgroup.c34
-rw-r--r--src/core/cgroup.h1
-rw-r--r--src/core/dbus-cgroup.c85
-rw-r--r--src/core/dbus-execute.c41
-rw-r--r--src/core/dbus-manager.c61
-rw-r--r--src/core/dbus-scope.c2
-rw-r--r--src/core/dbus-socket.c1
-rw-r--r--src/core/dynamic-user.c763
-rw-r--r--src/core/dynamic-user.h66
-rw-r--r--src/core/execute.c455
-rw-r--r--src/core/execute.h23
-rw-r--r--src/core/killall.c7
-rw-r--r--src/core/kmod-setup.c3
-rw-r--r--src/core/load-fragment-gperf.gperf.m423
-rw-r--r--src/core/load-fragment.c197
-rw-r--r--src/core/load-fragment.h2
-rw-r--r--src/core/machine-id-setup.c226
-rw-r--r--src/core/machine-id-setup.h2
-rw-r--r--src/core/macros.systemd.in8
-rw-r--r--src/core/main.c87
-rw-r--r--src/core/manager.c92
-rw-r--r--src/core/manager.h3
-rw-r--r--src/core/mount-setup.c16
-rw-r--r--src/core/mount.c16
-rw-r--r--src/core/mount.h3
-rw-r--r--src/core/namespace.c51
-rw-r--r--src/core/namespace.h6
-rw-r--r--src/core/scope.c21
-rw-r--r--src/core/scope.h3
-rw-r--r--src/core/selinux-access.c2
-rw-r--r--src/core/selinux-setup.c2
-rw-r--r--src/core/service.c97
-rw-r--r--src/core/service.h4
-rw-r--r--src/core/shutdown.c20
-rw-r--r--src/core/socket.c216
-rw-r--r--src/core/socket.h18
-rw-r--r--src/core/swap.c15
-rw-r--r--src/core/swap.h1
-rw-r--r--src/core/system.conf2
-rw-r--r--src/core/transaction.c7
-rw-r--r--src/core/unit.c172
-rw-r--r--src/core/unit.h9
44 files changed, 2341 insertions, 535 deletions
diff --git a/src/core/automount.c b/src/core/automount.c
index 10ea4ee17d..20a73c76f9 100644
--- a/src/core/automount.c
+++ b/src/core/automount.c
@@ -98,9 +98,6 @@ static void unmount_autofs(Automount *a) {
if (a->pipe_fd < 0)
return;
- automount_send_ready(a, a->tokens, -EHOSTDOWN);
- automount_send_ready(a, a->expire_tokens, -EHOSTDOWN);
-
a->pipe_event_source = sd_event_source_unref(a->pipe_event_source);
a->pipe_fd = safe_close(a->pipe_fd);
@@ -109,6 +106,9 @@ static void unmount_autofs(Automount *a) {
if (a->where &&
(UNIT(a)->manager->exit_code != MANAGER_RELOAD &&
UNIT(a)->manager->exit_code != MANAGER_REEXECUTE)) {
+ automount_send_ready(a, a->tokens, -EHOSTDOWN);
+ automount_send_ready(a, a->expire_tokens, -EHOSTDOWN);
+
r = repeat_unmount(a->where, MNT_DETACH);
if (r < 0)
log_error_errno(r, "Failed to unmount: %m");
diff --git a/src/core/busname.c b/src/core/busname.c
index f03a95c24e..730be2ee14 100644
--- a/src/core/busname.c
+++ b/src/core/busname.c
@@ -998,12 +998,7 @@ static int busname_get_timeout(Unit *u, usec_t *timeout) {
}
static bool busname_supported(void) {
- static int supported = -1;
-
- if (supported < 0)
- supported = is_kdbus_available();
-
- return supported;
+ return false;
}
static int busname_control_pid(Unit *u) {
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index f3e0c54b76..c19e43f571 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -36,8 +36,7 @@
#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
-static void cgroup_compat_warn(void)
-{
+static void cgroup_compat_warn(void) {
static bool cgroup_compat_warned = false;
if (cgroup_compat_warned)
@@ -50,7 +49,7 @@ static void cgroup_compat_warn(void)
#define log_cgroup_compat(unit, fmt, ...) do { \
cgroup_compat_warn(); \
log_unit_debug(unit, "cgroup-compat: " fmt, ##__VA_ARGS__); \
- } while (0)
+ } while (false)
void cgroup_context_init(CGroupContext *c) {
assert(c);
@@ -756,16 +755,20 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
cgroup_apply_unified_memory_limit(u, "memory.max", max);
} else {
char buf[DECIMAL_STR_MAX(uint64_t) + 1];
+ uint64_t val = c->memory_limit;
- if (c->memory_limit != CGROUP_LIMIT_MAX)
- xsprintf(buf, "%" PRIu64 "\n", c->memory_limit);
- else {
- xsprintf(buf, "%" PRIu64 "\n", c->memory_max);
+ if (val == CGROUP_LIMIT_MAX) {
+ val = c->memory_max;
- if (c->memory_max != CGROUP_LIMIT_MAX)
- log_cgroup_compat(u, "Applying MemoryMax %" PRIu64 " as MemoryLimit", c->memory_max);
+ if (val != CGROUP_LIMIT_MAX)
+ log_cgroup_compat(u, "Applying MemoryMax %" PRIi64 " as MemoryLimit", c->memory_max);
}
+ if (val == CGROUP_LIMIT_MAX)
+ strncpy(buf, "-1\n", sizeof(buf));
+ else
+ xsprintf(buf, "%" PRIu64 "\n", val);
+
r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
if (r < 0)
log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
@@ -797,7 +800,10 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
"/dev/random\0" "rwm\0"
"/dev/urandom\0" "rwm\0"
"/dev/tty\0" "rwm\0"
- "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */
+ "/dev/pts/ptmx\0" "rw\0" /* /dev/pts/ptmx may not be duplicated, but accessed */
+ /* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */
+ "/run/systemd/inaccessible/chr\0" "rwm\0"
+ "/run/systemd/inaccessible/blk\0" "rwm\0";
const char *x, *y;
@@ -1133,7 +1139,7 @@ int unit_watch_cgroup(Unit *u) {
/* Only applies to the unified hierarchy */
r = cg_unified();
if (r < 0)
- return log_unit_error_errno(u, r, "Failed detect wether the unified hierarchy is used: %m");
+ return log_unit_error_errno(u, r, "Failed detect whether the unified hierarchy is used: %m");
if (r == 0)
return 0;
@@ -1655,7 +1661,7 @@ int manager_setup_cgroup(Manager *m) {
/* 3. Install agent */
if (unified) {
- /* In the unified hierarchy we can can get
+ /* In the unified hierarchy we can get
* cgroup empty notifications via inotify. */
m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
@@ -1702,7 +1708,7 @@ int manager_setup_cgroup(Manager *m) {
/* also, move all other userspace processes remaining
* in the root cgroup into that scope. */
- r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, false);
+ r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
if (r < 0)
log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m");
@@ -1723,7 +1729,7 @@ int manager_setup_cgroup(Manager *m) {
return log_error_errno(r, "Failed to determine supported controllers: %m");
for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++)
- log_debug("Controller '%s' supported: %s", cgroup_controller_to_string(c), yes_no(m->cgroup_supported & c));
+ log_debug("Controller '%s' supported: %s", cgroup_controller_to_string(c), yes_no(m->cgroup_supported & CGROUP_CONTROLLER_TO_MASK(c)));
return 0;
}
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index f21409bd5d..a57403e79f 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -119,7 +119,6 @@ struct CGroupContext {
bool delegate;
};
-#include "cgroup-util.h"
#include "unit.h"
void cgroup_context_init(CGroupContext *c);
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index 8525fa1bf1..85b0c86a2f 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -641,7 +641,7 @@ int bus_cgroup_set_property(
return 1;
- } else if (streq(name, "BlockIOReadBandwidth") || streq(name, "BlockIOWriteBandwidth")) {
+ } else if (STR_IN_SET(name, "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) {
const char *path;
bool read = true;
unsigned n = 0;
@@ -835,6 +835,8 @@ int bus_cgroup_set_property(
r = sd_bus_message_read(message, "t", &v);
if (r < 0)
return r;
+ if (v <= 0)
+ return sd_bus_error_set_errnof(error, EINVAL, "%s= is too small", name);
if (mode != UNIT_CHECK) {
if (streq(name, "MemoryLow"))
@@ -847,19 +849,54 @@ int bus_cgroup_set_property(
unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY);
if (v == CGROUP_LIMIT_MAX)
- unit_write_drop_in_private_format(u, mode, name, "%s=max", name);
+ unit_write_drop_in_private_format(u, mode, name, "%s=infinity", name);
else
unit_write_drop_in_private_format(u, mode, name, "%s=%" PRIu64, name, v);
}
return 1;
+ } else if (STR_IN_SET(name, "MemoryLowScale", "MemoryHighScale", "MemoryMaxScale")) {
+ uint32_t raw;
+ uint64_t v;
+
+ r = sd_bus_message_read(message, "u", &raw);
+ if (r < 0)
+ return r;
+
+ v = physical_memory_scale(raw, UINT32_MAX);
+ if (v <= 0 || v == UINT64_MAX)
+ return sd_bus_error_set_errnof(error, EINVAL, "%s= is out of range", name);
+
+ if (mode != UNIT_CHECK) {
+ const char *e;
+
+ /* Chop off suffix */
+ assert_se(e = endswith(name, "Scale"));
+ name = strndupa(name, e - name);
+
+ if (streq(name, "MemoryLow"))
+ c->memory_low = v;
+ else if (streq(name, "MemoryHigh"))
+ c->memory_high = v;
+ else
+ c->memory_max = v;
+
+ unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY);
+ unit_write_drop_in_private_format(u, mode, name, "%s=%" PRIu32 "%%", name,
+ (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX)));
+ }
+
+ return 1;
+
} else if (streq(name, "MemoryLimit")) {
uint64_t limit;
r = sd_bus_message_read(message, "t", &limit);
if (r < 0)
return r;
+ if (limit <= 0)
+ return sd_bus_error_set_errnof(error, EINVAL, "%s= is too small", name);
if (mode != UNIT_CHECK) {
c->memory_limit = limit;
@@ -873,6 +910,27 @@ int bus_cgroup_set_property(
return 1;
+ } else if (streq(name, "MemoryLimitScale")) {
+ uint64_t limit;
+ uint32_t raw;
+
+ r = sd_bus_message_read(message, "u", &raw);
+ if (r < 0)
+ return r;
+
+ limit = physical_memory_scale(raw, UINT32_MAX);
+ if (limit <= 0 || limit == UINT64_MAX)
+ return sd_bus_error_set_errnof(error, EINVAL, "%s= is out of range", name);
+
+ if (mode != UNIT_CHECK) {
+ c->memory_limit = limit;
+ unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY);
+ unit_write_drop_in_private_format(u, mode, "MemoryLimit", "MemoryLimit=%" PRIu32 "%%",
+ (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX)));
+ }
+
+ return 1;
+
} else if (streq(name, "DevicePolicy")) {
const char *policy;
CGroupDevicePolicy p;
@@ -904,6 +962,7 @@ int bus_cgroup_set_property(
while ((r = sd_bus_message_read(message, "(ss)", &path, &rwm)) > 0) {
if ((!startswith(path, "/dev/") &&
+ !startswith(path, "/run/systemd/inaccessible/") &&
!startswith(path, "block-") &&
!startswith(path, "char-")) ||
strpbrk(path, WHITESPACE))
@@ -1003,6 +1062,8 @@ int bus_cgroup_set_property(
r = sd_bus_message_read(message, "t", &limit);
if (r < 0)
return r;
+ if (limit <= 0)
+ return sd_bus_error_set_errnof(error, EINVAL, "%s= is too small", name);
if (mode != UNIT_CHECK) {
c->tasks_max = limit;
@@ -1015,6 +1076,26 @@ int bus_cgroup_set_property(
}
return 1;
+ } else if (streq(name, "TasksMaxScale")) {
+ uint64_t limit;
+ uint32_t raw;
+
+ r = sd_bus_message_read(message, "u", &raw);
+ if (r < 0)
+ return r;
+
+ limit = system_tasks_max_scale(raw, UINT32_MAX);
+ if (limit <= 0 || limit >= UINT64_MAX)
+ return sd_bus_error_set_errnof(error, EINVAL, "%s= is out of range", name);
+
+ if (mode != UNIT_CHECK) {
+ c->tasks_max = limit;
+ unit_invalidate_cgroup(u, CGROUP_MASK_PIDS);
+ unit_write_drop_in_private_format(u, mode, name, "TasksMax=%" PRIu32 "%%",
+ (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX)));
+ }
+
+ return 1;
}
if (u->transient && u->load_state == UNIT_STUB) {
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index 4c88c41127..9c50cd93e5 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -44,6 +44,7 @@
#endif
#include "strv.h"
#include "syslog-util.h"
+#include "user-util.h"
#include "utf8.h"
BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_exec_output, exec_output, ExecOutput);
@@ -693,11 +694,15 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("AmbientCapabilities", "t", property_get_ambient_capabilities, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("User", "s", NULL, offsetof(ExecContext, user), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Group", "s", NULL, offsetof(ExecContext, group), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DynamicUser", "b", bus_property_get_bool, offsetof(ExecContext, dynamic_user), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SupplementaryGroups", "as", NULL, offsetof(ExecContext, supplementary_groups), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PAMName", "s", NULL, offsetof(ExecContext, pam_name), SD_BUS_VTABLE_PROPERTY_CONST),
- SD_BUS_PROPERTY("ReadWriteDirectories", "as", NULL, offsetof(ExecContext, read_write_dirs), SD_BUS_VTABLE_PROPERTY_CONST),
- SD_BUS_PROPERTY("ReadOnlyDirectories", "as", NULL, offsetof(ExecContext, read_only_dirs), SD_BUS_VTABLE_PROPERTY_CONST),
- SD_BUS_PROPERTY("InaccessibleDirectories", "as", NULL, offsetof(ExecContext, inaccessible_dirs), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ReadWriteDirectories", "as", NULL, offsetof(ExecContext, read_write_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("ReadOnlyDirectories", "as", NULL, offsetof(ExecContext, read_only_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("InaccessibleDirectories", "as", NULL, offsetof(ExecContext, inaccessible_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("ReadWritePaths", "as", NULL, offsetof(ExecContext, read_write_paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ReadOnlyPaths", "as", NULL, offsetof(ExecContext, read_only_paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("InaccessiblePaths", "as", NULL, offsetof(ExecContext, inaccessible_paths), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_flags), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -720,6 +725,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("RuntimeDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, runtime_directory_mode), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RuntimeDirectory", "as", NULL, offsetof(ExecContext, runtime_directory), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MemoryDenyWriteExecute", "b", bus_property_get_bool, offsetof(ExecContext, memory_deny_write_execute), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RestrictRealtime", "b", bus_property_get_bool, offsetof(ExecContext, restrict_realtime), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_VTABLE_END
};
@@ -836,6 +842,9 @@ int bus_exec_context_set_transient_property(
if (r < 0)
return r;
+ if (!isempty(uu) && !valid_user_group_name_or_id(uu))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid user name: %s", uu);
+
if (mode != UNIT_CHECK) {
if (isempty(uu))
@@ -855,6 +864,9 @@ int bus_exec_context_set_transient_property(
if (r < 0)
return r;
+ if (!isempty(gg) && !valid_user_group_name_or_id(gg))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid group name: %s", gg);
+
if (mode != UNIT_CHECK) {
if (isempty(gg))
@@ -1057,7 +1069,8 @@ int bus_exec_context_set_transient_property(
} else if (STR_IN_SET(name,
"IgnoreSIGPIPE", "TTYVHangup", "TTYReset",
"PrivateTmp", "PrivateDevices", "PrivateNetwork",
- "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute")) {
+ "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute",
+ "RestrictRealtime", "DynamicUser")) {
int b;
r = sd_bus_message_read(message, "b", &b);
@@ -1083,6 +1096,10 @@ int bus_exec_context_set_transient_property(
c->syslog_level_prefix = b;
else if (streq(name, "MemoryDenyWriteExecute"))
c->memory_deny_write_execute = b;
+ else if (streq(name, "RestrictRealtime"))
+ c->restrict_realtime = b;
+ else if (streq(name, "DynamicUser"))
+ c->dynamic_user = b;
unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, yes_no(b));
}
@@ -1320,8 +1337,8 @@ int bus_exec_context_set_transient_property(
return 1;
- } else if (STR_IN_SET(name, "ReadWriteDirectories", "ReadOnlyDirectories", "InaccessibleDirectories")) {
-
+ } else if (STR_IN_SET(name, "ReadWriteDirectories", "ReadOnlyDirectories", "InaccessibleDirectories",
+ "ReadWritePaths", "ReadOnlyPaths", "InaccessiblePaths")) {
_cleanup_strv_free_ char **l = NULL;
char ***dirs;
char **p;
@@ -1343,12 +1360,12 @@ int bus_exec_context_set_transient_property(
if (mode != UNIT_CHECK) {
_cleanup_free_ char *joined = NULL;
- if (streq(name, "ReadWriteDirectories"))
- dirs = &c->read_write_dirs;
- else if (streq(name, "ReadOnlyDirectories"))
- dirs = &c->read_only_dirs;
- else /* "InaccessibleDirectories" */
- dirs = &c->inaccessible_dirs;
+ if (STR_IN_SET(name, "ReadWriteDirectories", "ReadWritePaths"))
+ dirs = &c->read_write_paths;
+ else if (STR_IN_SET(name, "ReadOnlyDirectories", "ReadOnlyPaths"))
+ dirs = &c->read_only_paths;
+ else /* "InaccessiblePaths" */
+ dirs = &c->inaccessible_paths;
if (strv_length(l) == 0) {
*dirs = strv_free(*dirs);
diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c
index 86722e1162..ef05a75a8b 100644
--- a/src/core/dbus-manager.c
+++ b/src/core/dbus-manager.c
@@ -43,6 +43,7 @@
#include "string-util.h"
#include "strv.h"
#include "syslog-util.h"
+#include "user-util.h"
#include "virt.h"
#include "watchdog.h"
@@ -781,6 +782,7 @@ static int transient_unit_from_message(
return r;
/* Now load the missing bits of the unit we just created */
+ unit_add_to_load_queue(u);
manager_dispatch_load_queue(m);
*unit = u;
@@ -1510,8 +1512,8 @@ static int method_unset_and_set_environment(sd_bus_message *message, void *userd
}
static int method_set_exit_code(sd_bus_message *message, void *userdata, sd_bus_error *error) {
- uint8_t code;
Manager *m = userdata;
+ uint8_t code;
int r;
assert(message);
@@ -1533,6 +1535,61 @@ static int method_set_exit_code(sd_bus_message *message, void *userdata, sd_bus_
return sd_bus_reply_method_return(message, NULL);
}
+static int method_lookup_dynamic_user_by_name(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ uid_t uid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read_basic(message, 's', &name);
+ if (r < 0)
+ return r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Dynamic users are only supported in the system instance.");
+ if (!valid_user_group_name(name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "User name invalid: %s", name);
+
+ r = dynamic_user_lookup_name(m, name, &uid);
+ if (r == -ESRCH)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_DYNAMIC_USER, "Dynamic user %s does not exist.", name);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "u", (uint32_t) uid);
+}
+
+static int method_lookup_dynamic_user_by_uid(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *name = NULL;
+ Manager *m = userdata;
+ uid_t uid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(uid) == sizeof(uint32_t));
+ r = sd_bus_message_read_basic(message, 'u', &uid);
+ if (r < 0)
+ return r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Dynamic users are only supported in the system instance.");
+ if (!uid_is_valid(uid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "User ID invalid: " UID_FMT, uid);
+
+ r = dynamic_user_lookup_uid(m, uid, &name);
+ if (r == -ESRCH)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_DYNAMIC_USER, "Dynamic user ID " UID_FMT " does not exist.", uid);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "s", name);
+}
+
static int list_unit_files_by_patterns(sd_bus_message *message, void *userdata, sd_bus_error *error, char **states, char **patterns) {
_cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
Manager *m = userdata;
@@ -2198,6 +2255,8 @@ const sd_bus_vtable bus_manager_vtable[] = {
SD_BUS_METHOD("PresetAllUnitFiles", "sbb", "a(sss)", method_preset_all_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("AddDependencyUnitFiles", "asssbb", "a(sss)", method_add_dependency_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("SetExitCode", "y", NULL, method_set_exit_code, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("LookupDynamicUserByName", "s", "u", method_lookup_dynamic_user_by_name, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("LookupDynamicUserByUID", "u", "s", method_lookup_dynamic_user_by_uid, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_SIGNAL("UnitNew", "so", 0),
SD_BUS_SIGNAL("UnitRemoved", "so", 0),
diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c
index f557eedfc3..1abaf9f658 100644
--- a/src/core/dbus-scope.c
+++ b/src/core/dbus-scope.c
@@ -225,5 +225,5 @@ int bus_scope_send_request_stop(Scope *s) {
if (r < 0)
return r;
- return sd_bus_send_to(UNIT(s)->manager->api_bus, m, /* s->controller */ NULL, NULL);
+ return sd_bus_send_to(UNIT(s)->manager->api_bus, m, s->controller, NULL);
}
diff --git a/src/core/dbus-socket.c b/src/core/dbus-socket.c
index 961340608d..9a071a1355 100644
--- a/src/core/dbus-socket.c
+++ b/src/core/dbus-socket.c
@@ -137,6 +137,7 @@ const sd_bus_vtable bus_socket_vtable[] = {
SD_BUS_PROPERTY("Symlinks", "as", NULL, offsetof(Socket, symlinks), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Mark", "i", bus_property_get_int, offsetof(Socket, mark), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MaxConnections", "u", bus_property_get_unsigned, offsetof(Socket, max_connections), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MaxConnectionsPerSource", "u", bus_property_get_unsigned, offsetof(Socket, max_connections_per_source), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MessageQueueMaxMessages", "x", bus_property_get_long, offsetof(Socket, mq_maxmsg), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MessageQueueMessageSize", "x", bus_property_get_long, offsetof(Socket, mq_msgsize), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ReusePort", "b", bus_property_get_bool, offsetof(Socket, reuse_port), SD_BUS_VTABLE_PROPERTY_CONST),
diff --git a/src/core/dynamic-user.c b/src/core/dynamic-user.c
new file mode 100644
index 0000000000..8035bee231
--- /dev/null
+++ b/src/core/dynamic-user.c
@@ -0,0 +1,763 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2016 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <grp.h>
+#include <pwd.h>
+#include <sys/file.h>
+
+#include "dynamic-user.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "parse-util.h"
+#include "random-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "user-util.h"
+#include "fileio.h"
+
+/* Let's pick a UIDs within the 16bit range, so that we are compatible with containers using 16bit user namespacing. At
+ * least on Fedora normal users are allocated until UID 60000, hence do not allocate from below this. Also stay away
+ * from the upper end of the range as that is often used for overflow/nobody users. */
+#define UID_PICK_MIN ((uid_t) UINT32_C(0x0000EF00))
+#define UID_PICK_MAX ((uid_t) UINT32_C(0x0000FFEF))
+
+/* Takes a value generated randomly or by hashing and turns it into a UID in the right range */
+#define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (UID_PICK_MAX - UID_PICK_MIN + 1)) + UID_PICK_MIN)
+
+static DynamicUser* dynamic_user_free(DynamicUser *d) {
+ if (!d)
+ return NULL;
+
+ if (d->manager)
+ (void) hashmap_remove(d->manager->dynamic_users, d->name);
+
+ safe_close_pair(d->storage_socket);
+ free(d);
+
+ return NULL;
+}
+
+static int dynamic_user_add(Manager *m, const char *name, int storage_socket[2], DynamicUser **ret) {
+ DynamicUser *d = NULL;
+ int r;
+
+ assert(m);
+ assert(name);
+ assert(storage_socket);
+
+ r = hashmap_ensure_allocated(&m->dynamic_users, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ d = malloc0(offsetof(DynamicUser, name) + strlen(name) + 1);
+ if (!d)
+ return -ENOMEM;
+
+ strcpy(d->name, name);
+
+ d->storage_socket[0] = storage_socket[0];
+ d->storage_socket[1] = storage_socket[1];
+
+ r = hashmap_put(m->dynamic_users, d->name, d);
+ if (r < 0) {
+ free(d);
+ return r;
+ }
+
+ d->manager = m;
+
+ if (ret)
+ *ret = d;
+
+ return 0;
+}
+
+int dynamic_user_acquire(Manager *m, const char *name, DynamicUser** ret) {
+ _cleanup_close_pair_ int storage_socket[2] = { -1, -1 };
+ DynamicUser *d;
+ int r;
+
+ assert(m);
+ assert(name);
+
+ /* Return the DynamicUser structure for a specific user name. Note that this won't actually allocate a UID for
+ * it, but just prepare the data structure for it. The UID is allocated only on demand, when it's really
+ * needed, and in the child process we fork off, since allocation involves NSS checks which are not OK to do
+ * from PID 1. To allow the children and PID 1 share information about allocated UIDs we use an anonymous
+ * AF_UNIX/SOCK_DGRAM socket (called the "storage socket") that contains at most one datagram with the
+ * allocated UID number, plus an fd referencing the lock file for the UID
+ * (i.e. /run/systemd/dynamic-uid/$UID). Why involve the socket pair? So that PID 1 and all its children can
+ * share the same storage for the UID and lock fd, simply by inheriting the storage socket fds. The socket pair
+ * may exist in three different states:
+ *
+ * a) no datagram stored. This is the initial state. In this case the dynamic user was never realized.
+ *
+ * b) a datagram containing a UID stored, but no lock fd attached to it. In this case there was already a
+ * statically assigned UID by the same name, which we are reusing.
+ *
+ * c) a datagram containing a UID stored, and a lock fd is attached to it. In this case we allocated a dynamic
+ * UID and locked it in the file system, using the lock fd.
+ *
+ * As PID 1 and various children might access the socket pair simultaneously, and pop the datagram or push it
+ * back in any time, we also maintain a lock on the socket pair. Note one peculiarity regarding locking here:
+ * the UID lock on disk is protected via a BSD file lock (i.e. an fd-bound lock), so that the lock is kept in
+ * place as long as there's a reference to the fd open. The lock on the storage socket pair however is a POSIX
+ * file lock (i.e. a process-bound lock), as all users share the same fd of this (after all it is anonymous,
+ * nobody else could get any access to it except via our own fd) and we want to synchronize access between all
+ * processes that have access to it. */
+
+ d = hashmap_get(m->dynamic_users, name);
+ if (d) {
+ /* We already have a structure for the dynamic user, let's increase the ref count and reuse it */
+ d->n_ref++;
+ *ret = d;
+ return 0;
+ }
+
+ if (!valid_user_group_name_or_id(name))
+ return -EINVAL;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, storage_socket) < 0)
+ return -errno;
+
+ r = dynamic_user_add(m, name, storage_socket, &d);
+ if (r < 0)
+ return r;
+
+ storage_socket[0] = storage_socket[1] = -1;
+
+ if (ret) {
+ d->n_ref++;
+ *ret = d;
+ }
+
+ return 1;
+}
+
+static int pick_uid(const char *name, uid_t *ret_uid) {
+
+ static const uint8_t hash_key[] = {
+ 0x37, 0x53, 0x7e, 0x31, 0xcf, 0xce, 0x48, 0xf5,
+ 0x8a, 0xbb, 0x39, 0x57, 0x8d, 0xd9, 0xec, 0x59
+ };
+
+ unsigned n_tries = 100;
+ uid_t candidate;
+ int r;
+
+ /* A static user by this name does not exist yet. Let's find a free ID then, and use that. We start with a UID
+ * generated as hash from the user name. */
+ candidate = UID_CLAMP_INTO_RANGE(siphash24(name, strlen(name), hash_key));
+
+ (void) mkdir("/run/systemd/dynamic-uid", 0755);
+
+ for (;;) {
+ char lock_path[strlen("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1];
+ _cleanup_close_ int lock_fd = -1;
+ ssize_t l;
+
+ if (--n_tries <= 0) /* Give up retrying eventually */
+ return -EBUSY;
+
+ if (candidate < UID_PICK_MIN || candidate > UID_PICK_MAX)
+ goto next;
+
+ xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, candidate);
+
+ for (;;) {
+ struct stat st;
+
+ lock_fd = open(lock_path, O_CREAT|O_RDWR|O_NOFOLLOW|O_CLOEXEC|O_NOCTTY, 0600);
+ if (lock_fd < 0)
+ return -errno;
+
+ r = flock(lock_fd, LOCK_EX|LOCK_NB); /* Try to get a BSD file lock on the UID lock file */
+ if (r < 0) {
+ if (errno == EBUSY || errno == EAGAIN)
+ goto next; /* already in use */
+
+ return -errno;
+ }
+
+ if (fstat(lock_fd, &st) < 0)
+ return -errno;
+ if (st.st_nlink > 0)
+ break;
+
+ /* Oh, bummer, we got got the lock, but the file was unlinked between the time we opened it and
+ * got the lock. Close it, and try again. */
+ lock_fd = safe_close(lock_fd);
+ }
+
+ /* Some superficial check whether this UID/GID might already be taken by some static user */
+ if (getpwuid(candidate) || getgrgid((gid_t) candidate)) {
+ (void) unlink(lock_path);
+ goto next;
+ }
+
+ /* Let's store the user name in the lock file, so that we can use it for looking up the username for a UID */
+ l = pwritev(lock_fd,
+ (struct iovec[2]) {
+ { .iov_base = (char*) name, .iov_len = strlen(name) },
+ { .iov_base = (char[1]) { '\n' }, .iov_len = 1 }
+ }, 2, 0);
+ if (l < 0) {
+ (void) unlink(lock_path);
+ return -errno;
+ }
+
+ (void) ftruncate(lock_fd, l);
+
+ *ret_uid = candidate;
+ r = lock_fd;
+ lock_fd = -1;
+
+ return r;
+
+ next:
+ /* Pick another random UID, and see if that works for us. */
+ random_bytes(&candidate, sizeof(candidate));
+ candidate = UID_CLAMP_INTO_RANGE(candidate);
+ }
+}
+
+static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) {
+ uid_t uid = UID_INVALID;
+ struct iovec iov = {
+ .iov_base = &uid,
+ .iov_len = sizeof(uid),
+ };
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(int))];
+ } control = {};
+ struct msghdr mh = {
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ struct cmsghdr *cmsg;
+
+ ssize_t k;
+ int lock_fd = -1;
+
+ assert(d);
+ assert(ret_uid);
+ assert(ret_lock_fd);
+
+ /* Read the UID and lock fd that is stored in the storage AF_UNIX socket. This should be called with the lock
+ * on the socket taken. */
+
+ k = recvmsg(d->storage_socket[0], &mh, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC);
+ if (k < 0)
+ return -errno;
+
+ cmsg = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int)));
+ if (cmsg)
+ lock_fd = *(int*) CMSG_DATA(cmsg);
+ else
+ cmsg_close_all(&mh); /* just in case... */
+
+ *ret_uid = uid;
+ *ret_lock_fd = lock_fd;
+
+ return 0;
+}
+
+static int dynamic_user_push(DynamicUser *d, uid_t uid, int lock_fd) {
+ struct iovec iov = {
+ .iov_base = &uid,
+ .iov_len = sizeof(uid),
+ };
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(int))];
+ } control = {};
+ struct msghdr mh = {
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ ssize_t k;
+
+ assert(d);
+
+ /* Store the UID and lock_fd in the storage socket. This should be called with the socket pair lock taken. */
+
+ if (lock_fd >= 0) {
+ struct cmsghdr *cmsg;
+
+ cmsg = CMSG_FIRSTHDR(&mh);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &lock_fd, sizeof(int));
+
+ mh.msg_controllen = CMSG_SPACE(sizeof(int));
+ } else {
+ mh.msg_control = NULL;
+ mh.msg_controllen = 0;
+ }
+
+ k = sendmsg(d->storage_socket[1], &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
+ if (k < 0)
+ return -errno;
+
+ return 0;
+}
+
+static void unlink_uid_lock(int lock_fd, uid_t uid) {
+ char lock_path[strlen("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1];
+
+ if (lock_fd < 0)
+ return;
+
+ xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, uid);
+ (void) unlink_noerrno(lock_path);
+}
+
+int dynamic_user_realize(DynamicUser *d, uid_t *ret) {
+
+ _cleanup_close_ int etc_passwd_lock_fd = -1, uid_lock_fd = -1;
+ uid_t uid = UID_INVALID;
+ int r;
+
+ assert(d);
+
+ /* Acquire a UID for the user name. This will allocate a UID for the user name if the user doesn't exist
+ * yet. If it already exists its existing UID/GID will be reused. */
+
+ if (lockf(d->storage_socket[0], F_LOCK, 0) < 0)
+ return -errno;
+
+ r = dynamic_user_pop(d, &uid, &uid_lock_fd);
+ if (r < 0) {
+ int new_uid_lock_fd;
+ uid_t new_uid;
+
+ if (r != -EAGAIN)
+ goto finish;
+
+ /* OK, nothing stored yet, let's try to find something useful. While we are working on this release the
+ * lock however, so that nobody else blocks on our NSS lookups. */
+ (void) lockf(d->storage_socket[0], F_ULOCK, 0);
+
+ /* Let's see if a proper, static user or group by this name exists. Try to take the lock on
+ * /etc/passwd, if that fails with EROFS then /etc is read-only. In that case it's fine if we don't
+ * take the lock, given that users can't be added there anyway in this case. */
+ etc_passwd_lock_fd = take_etc_passwd_lock(NULL);
+ if (etc_passwd_lock_fd < 0 && etc_passwd_lock_fd != -EROFS)
+ return etc_passwd_lock_fd;
+
+ /* First, let's parse this as numeric UID */
+ r = parse_uid(d->name, &uid);
+ if (r < 0) {
+ struct passwd *p;
+ struct group *g;
+
+ /* OK, this is not a numeric UID. Let's see if there's a user by this name */
+ p = getpwnam(d->name);
+ if (p)
+ uid = p->pw_uid;
+
+ /* Let's see if there's a group by this name */
+ g = getgrnam(d->name);
+ if (g) {
+ /* If the UID/GID of the user/group of the same don't match, refuse operation */
+ if (uid != UID_INVALID && uid != (uid_t) g->gr_gid)
+ return -EILSEQ;
+
+ uid = (uid_t) g->gr_gid;
+ }
+ }
+
+ if (uid == UID_INVALID) {
+ /* No static UID assigned yet, excellent. Let's pick a new dynamic one, and lock it. */
+
+ uid_lock_fd = pick_uid(d->name, &uid);
+ if (uid_lock_fd < 0)
+ return uid_lock_fd;
+ }
+
+ /* So, we found a working UID/lock combination. Let's see if we actually still need it. */
+ if (lockf(d->storage_socket[0], F_LOCK, 0) < 0) {
+ unlink_uid_lock(uid_lock_fd, uid);
+ return -errno;
+ }
+
+ r = dynamic_user_pop(d, &new_uid, &new_uid_lock_fd);
+ if (r < 0) {
+ if (r != -EAGAIN) {
+ /* OK, something bad happened, let's get rid of the bits we acquired. */
+ unlink_uid_lock(uid_lock_fd, uid);
+ goto finish;
+ }
+
+ /* Great! Nothing is stored here, still. Store our newly acquired data. */
+ } else {
+ /* Hmm, so as it appears there's now something stored in the storage socket. Throw away what we
+ * acquired, and use what's stored now. */
+
+ unlink_uid_lock(uid_lock_fd, uid);
+ safe_close(uid_lock_fd);
+
+ uid = new_uid;
+ uid_lock_fd = new_uid_lock_fd;
+ }
+ }
+
+ /* If the UID/GID was already allocated dynamically, push the data we popped out back in. If it was already
+ * allocated statically, push the UID back too, but do not push the lock fd in. If we allocated the UID
+ * dynamically right here, push that in along with the lock fd for it. */
+ r = dynamic_user_push(d, uid, uid_lock_fd);
+ if (r < 0)
+ goto finish;
+
+ *ret = uid;
+ r = 0;
+
+finish:
+ (void) lockf(d->storage_socket[0], F_ULOCK, 0);
+ return r;
+}
+
+int dynamic_user_current(DynamicUser *d, uid_t *ret) {
+ _cleanup_close_ int lock_fd = -1;
+ uid_t uid;
+ int r;
+
+ assert(d);
+ assert(ret);
+
+ /* Get the currently assigned UID for the user, if there's any. This simply pops the data from the storage socket, and pushes it back in right-away. */
+
+ if (lockf(d->storage_socket[0], F_LOCK, 0) < 0)
+ return -errno;
+
+ r = dynamic_user_pop(d, &uid, &lock_fd);
+ if (r < 0)
+ goto finish;
+
+ r = dynamic_user_push(d, uid, lock_fd);
+ if (r < 0)
+ goto finish;
+
+ *ret = uid;
+ r = 0;
+
+finish:
+ (void) lockf(d->storage_socket[0], F_ULOCK, 0);
+ return r;
+}
+
+DynamicUser* dynamic_user_ref(DynamicUser *d) {
+ if (!d)
+ return NULL;
+
+ assert(d->n_ref > 0);
+ d->n_ref++;
+
+ return d;
+}
+
+DynamicUser* dynamic_user_unref(DynamicUser *d) {
+ if (!d)
+ return NULL;
+
+ /* Note that this doesn't actually release any resources itself. If a dynamic user should be fully destroyed
+ * and its UID released, use dynamic_user_destroy() instead. NB: the dynamic user table may contain entries
+ * with no references, which is commonly the case right before a daemon reload. */
+
+ assert(d->n_ref > 0);
+ d->n_ref--;
+
+ return NULL;
+}
+
+static int dynamic_user_close(DynamicUser *d) {
+ _cleanup_close_ int lock_fd = -1;
+ uid_t uid;
+ int r;
+
+ /* Release the user ID, by releasing the lock on it, and emptying the storage socket. After this the user is
+ * unrealized again, much like it was after it the DynamicUser object was first allocated. */
+
+ if (lockf(d->storage_socket[0], F_LOCK, 0) < 0)
+ return -errno;
+
+ r = dynamic_user_pop(d, &uid, &lock_fd);
+ if (r == -EAGAIN) {
+ /* User wasn't realized yet, nothing to do. */
+ r = 0;
+ goto finish;
+ }
+ if (r < 0)
+ goto finish;
+
+ /* This dynamic user was realized and dynamically allocated. In this case, let's remove the lock file. */
+ unlink_uid_lock(lock_fd, uid);
+ r = 1;
+
+finish:
+ (void) lockf(d->storage_socket[0], F_ULOCK, 0);
+ return r;
+}
+
+DynamicUser* dynamic_user_destroy(DynamicUser *d) {
+ if (!d)
+ return NULL;
+
+ /* Drop a reference to a DynamicUser object, and destroy the user completely if this was the last
+ * reference. This is called whenever a service is shut down and wants its dynamic UID gone. Note that
+ * dynamic_user_unref() is what is called whenever a service is simply freed, for example during a reload
+ * cycle, where the dynamic users should not be destroyed, but our datastructures should. */
+
+ dynamic_user_unref(d);
+
+ if (d->n_ref > 0)
+ return NULL;
+
+ (void) dynamic_user_close(d);
+ return dynamic_user_free(d);
+}
+
+int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds) {
+ DynamicUser *d;
+ Iterator i;
+
+ assert(m);
+ assert(f);
+ assert(fds);
+
+ /* Dump the dynamic user database into the manager serialization, to deal with daemon reloads. */
+
+ HASHMAP_FOREACH(d, m->dynamic_users, i) {
+ int copy0, copy1;
+
+ copy0 = fdset_put_dup(fds, d->storage_socket[0]);
+ if (copy0 < 0)
+ return copy0;
+
+ copy1 = fdset_put_dup(fds, d->storage_socket[1]);
+ if (copy1 < 0)
+ return copy1;
+
+ fprintf(f, "dynamic-user=%s %i %i\n", d->name, copy0, copy1);
+ }
+
+ return 0;
+}
+
+void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds) {
+ _cleanup_free_ char *name = NULL, *s0 = NULL, *s1 = NULL;
+ int r, fd0, fd1;
+
+ assert(m);
+ assert(value);
+ assert(fds);
+
+ /* Parse the serialization again, after a daemon reload */
+
+ r = extract_many_words(&value, NULL, 0, &name, &s0, &s1, NULL);
+ if (r != 3 || !isempty(value)) {
+ log_debug("Unable to parse dynamic user line.");
+ return;
+ }
+
+ if (safe_atoi(s0, &fd0) < 0 || !fdset_contains(fds, fd0)) {
+ log_debug("Unable to process dynamic user fd specification.");
+ return;
+ }
+
+ if (safe_atoi(s1, &fd1) < 0 || !fdset_contains(fds, fd1)) {
+ log_debug("Unable to process dynamic user fd specification.");
+ return;
+ }
+
+ r = dynamic_user_add(m, name, (int[]) { fd0, fd1 }, NULL);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add dynamic user: %m");
+ return;
+ }
+
+ (void) fdset_remove(fds, fd0);
+ (void) fdset_remove(fds, fd1);
+}
+
+void dynamic_user_vacuum(Manager *m, bool close_user) {
+ DynamicUser *d;
+ Iterator i;
+
+ assert(m);
+
+ /* Empty the dynamic user database, optionally cleaning up orphaned dynamic users, i.e. destroy and free users
+ * to which no reference exist. This is called after a daemon reload finished, in order to destroy users which
+ * might not be referenced anymore. */
+
+ HASHMAP_FOREACH(d, m->dynamic_users, i) {
+ if (d->n_ref > 0)
+ continue;
+
+ if (close_user) {
+ log_debug("Removing orphaned dynamic user %s", d->name);
+ (void) dynamic_user_close(d);
+ }
+
+ dynamic_user_free(d);
+ }
+}
+
+int dynamic_user_lookup_uid(Manager *m, uid_t uid, char **ret) {
+ char lock_path[strlen("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1];
+ _cleanup_free_ char *user = NULL;
+ uid_t check_uid;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ /* A friendly way to translate a dynamic user's UID into a his name. */
+
+ if (uid < UID_PICK_MIN)
+ return -ESRCH;
+ if (uid > UID_PICK_MAX)
+ return -ESRCH;
+
+ xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, uid);
+ r = read_one_line_file(lock_path, &user);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ /* The lock file might be stale, hence let's verify the data before we return it */
+ r = dynamic_user_lookup_name(m, user, &check_uid);
+ if (r < 0)
+ return r;
+ if (check_uid != uid) /* lock file doesn't match our own idea */
+ return -ESRCH;
+
+ *ret = user;
+ user = NULL;
+
+ return 0;
+}
+
+int dynamic_user_lookup_name(Manager *m, const char *name, uid_t *ret) {
+ DynamicUser *d;
+ int r;
+
+ assert(m);
+ assert(name);
+ assert(ret);
+
+ /* A friendly call for translating a dynamic user's name into its UID */
+
+ d = hashmap_get(m->dynamic_users, name);
+ if (!d)
+ return -ESRCH;
+
+ r = dynamic_user_current(d, ret);
+ if (r == -EAGAIN) /* not realized yet? */
+ return -ESRCH;
+
+ return r;
+}
+
+int dynamic_creds_acquire(DynamicCreds *creds, Manager *m, const char *user, const char *group) {
+ bool acquired = false;
+ int r;
+
+ assert(creds);
+ assert(m);
+
+ /* A DynamicUser object encapsulates an allocation of both a UID and a GID for a specific name. However, some
+ * services use different user and groups. For cases like that there's DynamicCreds containing a pair of user
+ * and group. This call allocates a pair. */
+
+ if (!creds->user && user) {
+ r = dynamic_user_acquire(m, user, &creds->user);
+ if (r < 0)
+ return r;
+
+ acquired = true;
+ }
+
+ if (!creds->group) {
+
+ if (creds->user && (!group || streq_ptr(user, group)))
+ creds->group = dynamic_user_ref(creds->user);
+ else {
+ r = dynamic_user_acquire(m, group, &creds->group);
+ if (r < 0) {
+ if (acquired)
+ creds->user = dynamic_user_unref(creds->user);
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int dynamic_creds_realize(DynamicCreds *creds, uid_t *uid, gid_t *gid) {
+ uid_t u = UID_INVALID;
+ gid_t g = GID_INVALID;
+ int r;
+
+ assert(creds);
+ assert(uid);
+ assert(gid);
+
+ /* Realize both the referenced user and group */
+
+ if (creds->user) {
+ r = dynamic_user_realize(creds->user, &u);
+ if (r < 0)
+ return r;
+ }
+
+ if (creds->group && creds->group != creds->user) {
+ r = dynamic_user_realize(creds->group, &g);
+ if (r < 0)
+ return r;
+ } else
+ g = u;
+
+ *uid = u;
+ *gid = g;
+
+ return 0;
+}
+
+void dynamic_creds_unref(DynamicCreds *creds) {
+ assert(creds);
+
+ creds->user = dynamic_user_unref(creds->user);
+ creds->group = dynamic_user_unref(creds->group);
+}
+
+void dynamic_creds_destroy(DynamicCreds *creds) {
+ assert(creds);
+
+ creds->user = dynamic_user_destroy(creds->user);
+ creds->group = dynamic_user_destroy(creds->group);
+}
diff --git a/src/core/dynamic-user.h b/src/core/dynamic-user.h
new file mode 100644
index 0000000000..0b8bce1a72
--- /dev/null
+++ b/src/core/dynamic-user.h
@@ -0,0 +1,66 @@
+#pragma once
+
+/***
+ This file is part of systemd.
+
+ Copyright 2016 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+typedef struct DynamicUser DynamicUser;
+
+typedef struct DynamicCreds {
+ /* A combination of a dynamic user and group */
+ DynamicUser *user;
+ DynamicUser *group;
+} DynamicCreds;
+
+#include "manager.h"
+
+/* Note that this object always allocates a pair of user and group under the same name, even if one of them isn't
+ * used. This means, if you want to allocate a group and user pair, and they might have two different names, then you
+ * need to allocated two of these objects. DynamicCreds below makes that easy. */
+struct DynamicUser {
+ int n_ref;
+ Manager *manager;
+
+ /* An AF_UNIX socket pair that contains a datagram containing both the numeric ID assigned, as well as a lock
+ * file fd locking the user ID we picked. */
+ int storage_socket[2];
+
+ char name[];
+};
+
+int dynamic_user_acquire(Manager *m, const char *name, DynamicUser **ret);
+
+int dynamic_user_realize(DynamicUser *d, uid_t *ret);
+int dynamic_user_current(DynamicUser *d, uid_t *ret);
+
+DynamicUser* dynamic_user_ref(DynamicUser *d);
+DynamicUser* dynamic_user_unref(DynamicUser *d);
+DynamicUser* dynamic_user_destroy(DynamicUser *d);
+
+int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds);
+void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds);
+void dynamic_user_vacuum(Manager *m, bool close_user);
+
+int dynamic_user_lookup_uid(Manager *m, uid_t uid, char **ret);
+int dynamic_user_lookup_name(Manager *m, const char *name, uid_t *ret);
+
+int dynamic_creds_acquire(DynamicCreds *creds, Manager *m, const char *user, const char *group);
+int dynamic_creds_realize(DynamicCreds *creds, uid_t *uid, gid_t *gid);
+
+void dynamic_creds_unref(DynamicCreds *creds);
+void dynamic_creds_destroy(DynamicCreds *creds);
diff --git a/src/core/execute.c b/src/core/execute.c
index 2cef70e668..77a75245cb 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -219,12 +219,36 @@ static void exec_context_tty_reset(const ExecContext *context, const ExecParamet
(void) vt_disallocate(path);
}
+static bool is_terminal_input(ExecInput i) {
+ return IN_SET(i,
+ EXEC_INPUT_TTY,
+ EXEC_INPUT_TTY_FORCE,
+ EXEC_INPUT_TTY_FAIL);
+}
+
static bool is_terminal_output(ExecOutput o) {
- return
- o == EXEC_OUTPUT_TTY ||
- o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
- o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
- o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
+ return IN_SET(o,
+ EXEC_OUTPUT_TTY,
+ EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
+ EXEC_OUTPUT_KMSG_AND_CONSOLE,
+ EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
+}
+
+static bool exec_context_needs_term(const ExecContext *c) {
+ assert(c);
+
+ /* Return true if the execution context suggests we should set $TERM to something useful. */
+
+ if (is_terminal_input(c->std_input))
+ return true;
+
+ if (is_terminal_output(c->std_output))
+ return true;
+
+ if (is_terminal_output(c->std_error))
+ return true;
+
+ return !!c->tty_path;
}
static int open_null_as(int flags, int nfd) {
@@ -289,7 +313,15 @@ static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
return r;
}
-static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
+static int connect_logger_as(
+ Unit *unit,
+ const ExecContext *context,
+ ExecOutput output,
+ const char *ident,
+ int nfd,
+ uid_t uid,
+ gid_t gid) {
+
int fd, r;
assert(context);
@@ -310,7 +342,7 @@ static int connect_logger_as(const ExecContext *context, ExecOutput output, cons
return -errno;
}
- fd_inc_sndbuf(fd, SNDBUF_SIZE);
+ (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
dprintf(fd,
"%s\n"
@@ -321,18 +353,18 @@ static int connect_logger_as(const ExecContext *context, ExecOutput output, cons
"%i\n"
"%i\n",
context->syslog_identifier ? context->syslog_identifier : ident,
- unit_id,
+ unit->id,
context->syslog_priority,
!!context->syslog_level_prefix,
output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
is_terminal_output(output));
- if (fd != nfd) {
- r = dup2(fd, nfd) < 0 ? -errno : nfd;
- safe_close(fd);
- } else
- r = nfd;
+ if (fd == nfd)
+ return nfd;
+
+ r = dup2(fd, nfd) < 0 ? -errno : nfd;
+ safe_close(fd);
return r;
}
@@ -355,13 +387,6 @@ static int open_terminal_as(const char *path, mode_t mode, int nfd) {
return r;
}
-static bool is_terminal_input(ExecInput i) {
- return
- i == EXEC_INPUT_TTY ||
- i == EXEC_INPUT_TTY_FORCE ||
- i == EXEC_INPUT_TTY_FAIL;
-}
-
static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
if (is_terminal_input(std_input) && !apply_tty_stdin)
@@ -446,7 +471,10 @@ static int setup_output(
int fileno,
int socket_fd,
const char *ident,
- uid_t uid, gid_t gid) {
+ uid_t uid,
+ gid_t gid,
+ dev_t *journal_stream_dev,
+ ino_t *journal_stream_ino) {
ExecOutput o;
ExecInput i;
@@ -456,6 +484,8 @@ static int setup_output(
assert(context);
assert(params);
assert(ident);
+ assert(journal_stream_dev);
+ assert(journal_stream_ino);
if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
@@ -531,10 +561,21 @@ static int setup_output(
case EXEC_OUTPUT_KMSG_AND_CONSOLE:
case EXEC_OUTPUT_JOURNAL:
case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
- r = connect_logger_as(context, o, ident, unit->id, fileno, uid, gid);
+ r = connect_logger_as(unit, context, o, ident, fileno, uid, gid);
if (r < 0) {
log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
r = open_null_as(O_WRONLY, fileno);
+ } else {
+ struct stat st;
+
+ /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
+ * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
+ * services to detect whether they are connected to the journal or not. */
+
+ if (fstat(fileno, &st) >= 0) {
+ *journal_stream_dev = st.st_dev;
+ *journal_stream_ino = st.st_ino;
+ }
}
return r;
@@ -552,6 +593,10 @@ static int chown_terminal(int fd, uid_t uid) {
assert(fd >= 0);
+ /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
+ if (isatty(fd) < 1)
+ return 0;
+
/* This might fail. What matters are the results. */
(void) fchown(fd, uid, -1);
(void) fchmod(fd, TTY_MODE);
@@ -795,7 +840,7 @@ static int setup_pam(
const char *user,
uid_t uid,
const char *tty,
- char ***pam_env,
+ char ***env,
int fds[], unsigned n_fds) {
static const struct pam_conv conv = {
@@ -807,14 +852,14 @@ static int setup_pam(
pam_handle_t *handle = NULL;
sigset_t old_ss;
int pam_code = PAM_SUCCESS, r;
- char **e = NULL;
+ char **nv, **e = NULL;
bool close_session = false;
pid_t pam_pid = 0, parent_pid;
int flags = 0;
assert(name);
assert(user);
- assert(pam_env);
+ assert(env);
/* We set up PAM in the parent process, then fork. The child
* will then stay around until killed via PR_GET_PDEATHSIG or
@@ -842,6 +887,12 @@ static int setup_pam(
goto fail;
}
+ STRV_FOREACH(nv, *env) {
+ pam_code = pam_putenv(handle, *nv);
+ if (pam_code != PAM_SUCCESS)
+ goto fail;
+ }
+
pam_code = pam_acct_mgmt(handle, flags);
if (pam_code != PAM_SUCCESS)
goto fail;
@@ -962,8 +1013,8 @@ static int setup_pam(
if (!barrier_place_and_sync(&barrier))
log_error("PAM initialization failed");
- *pam_env = e;
- e = NULL;
+ strv_free(*env);
+ *env = e;
return 0;
@@ -1203,7 +1254,7 @@ static int apply_memory_deny_write_execute(const ExecContext *c) {
r = seccomp_rule_add(
seccomp,
- SCMP_ACT_KILL,
+ SCMP_ACT_ERRNO(EPERM),
SCMP_SYS(mmap),
1,
SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
@@ -1212,7 +1263,7 @@ static int apply_memory_deny_write_execute(const ExecContext *c) {
r = seccomp_rule_add(
seccomp,
- SCMP_ACT_KILL,
+ SCMP_ACT_ERRNO(EPERM),
SCMP_SYS(mprotect),
1,
SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC));
@@ -1230,6 +1281,76 @@ finish:
return r;
}
+static int apply_restrict_realtime(const ExecContext *c) {
+ static const int permitted_policies[] = {
+ SCHED_OTHER,
+ SCHED_BATCH,
+ SCHED_IDLE,
+ };
+
+ scmp_filter_ctx *seccomp;
+ unsigned i;
+ int r, p, max_policy = 0;
+
+ assert(c);
+
+ seccomp = seccomp_init(SCMP_ACT_ALLOW);
+ if (!seccomp)
+ return -ENOMEM;
+
+ /* Determine the highest policy constant we want to allow */
+ for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
+ if (permitted_policies[i] > max_policy)
+ max_policy = permitted_policies[i];
+
+ /* Go through all policies with lower values than that, and block them -- unless they appear in the
+ * whitelist. */
+ for (p = 0; p < max_policy; p++) {
+ bool good = false;
+
+ /* Check if this is in the whitelist. */
+ for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
+ if (permitted_policies[i] == p) {
+ good = true;
+ break;
+ }
+
+ if (good)
+ continue;
+
+ /* Deny this policy */
+ r = seccomp_rule_add(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(sched_setscheduler),
+ 1,
+ SCMP_A1(SCMP_CMP_EQ, p));
+ if (r < 0)
+ goto finish;
+ }
+
+ /* Blacklist all other policies, i.e. the ones with higher values. Note that all comparisons are unsigned here,
+ * hence no need no check for < 0 values. */
+ r = seccomp_rule_add(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(sched_setscheduler),
+ 1,
+ SCMP_A1(SCMP_CMP_GT, max_policy));
+ if (r < 0)
+ goto finish;
+
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+ if (r < 0)
+ goto finish;
+
+ r = seccomp_load(seccomp);
+
+finish:
+ seccomp_release(seccomp);
+ return r;
+}
+
#endif
static void do_idle_pipe_dance(int idle_pipe[4]) {
@@ -1268,6 +1389,8 @@ static int build_environment(
const char *home,
const char *username,
const char *shell,
+ dev_t journal_stream_dev,
+ ino_t journal_stream_ino,
char ***ret) {
_cleanup_strv_free_ char **our_env = NULL;
@@ -1277,7 +1400,7 @@ static int build_environment(
assert(c);
assert(ret);
- our_env = new0(char*, 11);
+ our_env = new0(char*, 12);
if (!our_env)
return -ENOMEM;
@@ -1338,19 +1461,35 @@ static int build_environment(
our_env[n_env++] = x;
}
- if (is_terminal_input(c->std_input) ||
- c->std_output == EXEC_OUTPUT_TTY ||
- c->std_error == EXEC_OUTPUT_TTY ||
- c->tty_path) {
+ if (exec_context_needs_term(c)) {
+ const char *tty_path, *term = NULL;
+
+ tty_path = exec_context_tty_path(c);
- x = strdup(default_term_for_tty(exec_context_tty_path(c)));
+ /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
+ * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
+ * passes to PID 1 ends up all the way in the console login shown. */
+
+ if (path_equal(tty_path, "/dev/console") && getppid() == 1)
+ term = getenv("TERM");
+ if (!term)
+ term = default_term_for_tty(tty_path);
+
+ x = strappend("TERM=", term);
if (!x)
return -ENOMEM;
our_env[n_env++] = x;
}
+ if (journal_stream_dev != 0 && journal_stream_ino != 0) {
+ if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
+ return -ENOMEM;
+
+ our_env[n_env++] = x;
+ }
+
our_env[n_env++] = NULL;
- assert(n_env <= 11);
+ assert(n_env <= 12);
*ret = our_env;
our_env = NULL;
@@ -1394,9 +1533,9 @@ static bool exec_needs_mount_namespace(
assert(context);
assert(params);
- if (!strv_isempty(context->read_write_dirs) ||
- !strv_isempty(context->read_only_dirs) ||
- !strv_isempty(context->inaccessible_dirs))
+ if (!strv_isempty(context->read_write_paths) ||
+ !strv_isempty(context->read_only_paths) ||
+ !strv_isempty(context->inaccessible_paths))
return true;
if (context->mount_flags != 0)
@@ -1413,14 +1552,28 @@ static bool exec_needs_mount_namespace(
return false;
}
+static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
+ assert(array);
+ assert(n);
+
+ if (!pair)
+ return;
+
+ if (pair[0] >= 0)
+ array[(*n)++] = pair[0];
+ if (pair[1] >= 0)
+ array[(*n)++] = pair[1];
+}
+
static int close_remaining_fds(
const ExecParameters *params,
ExecRuntime *runtime,
+ DynamicCreds *dcreds,
int socket_fd,
int *fds, unsigned n_fds) {
unsigned n_dont_close = 0;
- int dont_close[n_fds + 7];
+ int dont_close[n_fds + 11];
assert(params);
@@ -1438,11 +1591,14 @@ static int close_remaining_fds(
n_dont_close += n_fds;
}
- if (runtime) {
- if (runtime->netns_storage_socket[0] >= 0)
- dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
- if (runtime->netns_storage_socket[1] >= 0)
- dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
+ if (runtime)
+ append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
+
+ if (dcreds) {
+ if (dcreds->user)
+ append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
+ if (dcreds->group)
+ append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
}
return close_all_fds(dont_close, n_dont_close);
@@ -1454,19 +1610,22 @@ static int exec_child(
const ExecContext *context,
const ExecParameters *params,
ExecRuntime *runtime,
+ DynamicCreds *dcreds,
char **argv,
int socket_fd,
int *fds, unsigned n_fds,
char **files_env,
int *exit_status) {
- _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
+ _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
_cleanup_free_ char *mac_selinux_context_net = NULL;
const char *username = NULL, *home = NULL, *shell = NULL, *wd;
+ dev_t journal_stream_dev = 0;
+ ino_t journal_stream_ino = 0;
+ bool needs_mount_namespace;
uid_t uid = UID_INVALID;
gid_t gid = GID_INVALID;
int i, r;
- bool needs_mount_namespace;
assert(unit);
assert(command);
@@ -1502,7 +1661,7 @@ static int exec_child(
log_forget_fds();
- r = close_remaining_fds(params, runtime, socket_fd, fds, n_fds);
+ r = close_remaining_fds(params, runtime, dcreds, socket_fd, fds, n_fds);
if (r < 0) {
*exit_status = EXIT_FDS;
return r;
@@ -1535,25 +1694,59 @@ static int exec_child(
}
}
- if (context->user) {
- username = context->user;
- r = get_user_creds(&username, &uid, &gid, &home, &shell);
+ if (context->dynamic_user && dcreds) {
+
+ /* Make sure we bypass our own NSS module for any NSS checks */
+ if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
+ *exit_status = EXIT_USER;
+ return -errno;
+ }
+
+ r = dynamic_creds_realize(dcreds, &uid, &gid);
if (r < 0) {
*exit_status = EXIT_USER;
return r;
}
- }
- if (context->group) {
- const char *g = context->group;
+ if (uid == UID_INVALID || gid == GID_INVALID) {
+ *exit_status = EXIT_USER;
+ return -ESRCH;
+ }
- r = get_group_creds(&g, &gid);
- if (r < 0) {
- *exit_status = EXIT_GROUP;
- return r;
+ if (dcreds->user)
+ username = dcreds->user->name;
+
+ } else {
+ if (context->user) {
+ username = context->user;
+ r = get_user_creds(&username, &uid, &gid, &home, &shell);
+ if (r < 0) {
+ *exit_status = EXIT_USER;
+ return r;
+ }
+
+ /* Don't set $HOME or $SHELL if they are are not particularly enlightening anyway. */
+ if (isempty(home) || path_equal(home, "/"))
+ home = NULL;
+
+ if (isempty(shell) || PATH_IN_SET(shell,
+ "/bin/nologin",
+ "/sbin/nologin",
+ "/usr/bin/nologin",
+ "/usr/sbin/nologin"))
+ shell = NULL;
}
- }
+ if (context->group) {
+ const char *g = context->group;
+
+ r = get_group_creds(&g, &gid);
+ if (r < 0) {
+ *exit_status = EXIT_GROUP;
+ return r;
+ }
+ }
+ }
/* If a socket is connected to STDIN/STDOUT/STDERR, we
* must sure to drop O_NONBLOCK */
@@ -1566,13 +1759,13 @@ static int exec_child(
return r;
}
- r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, basename(command->path), uid, gid);
+ r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
if (r < 0) {
*exit_status = EXIT_STDOUT;
return r;
}
- r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, basename(command->path), uid, gid);
+ r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
if (r < 0) {
*exit_status = EXIT_STDERR;
return r;
@@ -1711,9 +1904,43 @@ static int exec_child(
}
}
+ r = build_environment(
+ context,
+ params,
+ n_fds,
+ home,
+ username,
+ shell,
+ journal_stream_dev,
+ journal_stream_ino,
+ &our_env);
+ if (r < 0) {
+ *exit_status = EXIT_MEMORY;
+ return r;
+ }
+
+ r = build_pass_environment(context, &pass_env);
+ if (r < 0) {
+ *exit_status = EXIT_MEMORY;
+ return r;
+ }
+
+ accum_env = strv_env_merge(5,
+ params->environment,
+ our_env,
+ pass_env,
+ context->environment,
+ files_env,
+ NULL);
+ if (!accum_env) {
+ *exit_status = EXIT_MEMORY;
+ return -ENOMEM;
+ }
+ accum_env = strv_env_clean(accum_env);
+
umask(context->umask);
- if (params->apply_permissions) {
+ if (params->apply_permissions && !command->privileged) {
r = enforce_groups(context, username, gid);
if (r < 0) {
*exit_status = EXIT_GROUP;
@@ -1747,7 +1974,7 @@ static int exec_child(
#endif
#ifdef HAVE_PAM
if (context->pam_name && username) {
- r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
+ r = setup_pam(context->pam_name, username, uid, context->tty_path, &accum_env, fds, n_fds);
if (r < 0) {
*exit_status = EXIT_PAM;
return r;
@@ -1784,9 +2011,9 @@ static int exec_child(
r = setup_namespace(
params->apply_chroot ? context->root_directory : NULL,
- context->read_write_dirs,
- context->read_only_dirs,
- context->inaccessible_dirs,
+ context->read_write_paths,
+ context->read_only_paths,
+ context->inaccessible_paths,
tmp,
var,
context->private_devices,
@@ -1838,7 +2065,7 @@ static int exec_child(
}
#ifdef HAVE_SELINUX
- if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
+ if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0 && !command->privileged) {
r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
if (r < 0) {
*exit_status = EXIT_SELINUX_CONTEXT;
@@ -1863,7 +2090,7 @@ static int exec_child(
return r;
}
- if (params->apply_permissions) {
+ if (params->apply_permissions && !command->privileged) {
bool use_address_families = context->address_families_whitelist ||
!set_isempty(context->address_families);
@@ -1873,10 +2100,20 @@ static int exec_child(
int secure_bits = context->secure_bits;
for (i = 0; i < _RLIMIT_MAX; i++) {
+
if (!context->rlimit[i])
continue;
- if (setrlimit_closest(i, context->rlimit[i]) < 0) {
+ r = setrlimit_closest(i, context->rlimit[i]);
+ if (r < 0) {
+ *exit_status = EXIT_LIMITS;
+ return r;
+ }
+ }
+
+ /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
+ if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
+ if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
*exit_status = EXIT_LIMITS;
return -errno;
}
@@ -1937,7 +2174,7 @@ static int exec_child(
}
if (context->no_new_privileges ||
- (!have_effective_cap(CAP_SYS_ADMIN) && (use_address_families || use_syscall_filter)))
+ (!have_effective_cap(CAP_SYS_ADMIN) && (use_address_families || context->memory_deny_write_execute || context->restrict_realtime || use_syscall_filter)))
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
*exit_status = EXIT_NO_NEW_PRIVILEGES;
return -errno;
@@ -1959,6 +2196,15 @@ static int exec_child(
return r;
}
}
+
+ if (context->restrict_realtime) {
+ r = apply_restrict_realtime(context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return r;
+ }
+ }
+
if (use_syscall_filter) {
r = apply_seccomp(context);
if (r < 0) {
@@ -1993,39 +2239,12 @@ static int exec_child(
#endif
}
- r = build_environment(context, params, n_fds, home, username, shell, &our_env);
- if (r < 0) {
- *exit_status = EXIT_MEMORY;
- return r;
- }
-
- r = build_pass_environment(context, &pass_env);
- if (r < 0) {
- *exit_status = EXIT_MEMORY;
- return r;
- }
-
- final_env = strv_env_merge(6,
- params->environment,
- our_env,
- pass_env,
- context->environment,
- files_env,
- pam_env,
- NULL);
- if (!final_env) {
- *exit_status = EXIT_MEMORY;
- return -ENOMEM;
- }
-
- final_argv = replace_env_argv(argv, final_env);
+ final_argv = replace_env_argv(argv, accum_env);
if (!final_argv) {
*exit_status = EXIT_MEMORY;
return -ENOMEM;
}
- final_env = strv_env_clean(final_env);
-
if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
_cleanup_free_ char *line;
@@ -2041,7 +2260,7 @@ static int exec_child(
}
}
- execve(command->path, final_argv, final_env);
+ execve(command->path, final_argv, accum_env);
*exit_status = EXIT_EXEC;
return -errno;
}
@@ -2051,6 +2270,7 @@ int exec_spawn(Unit *unit,
const ExecContext *context,
const ExecParameters *params,
ExecRuntime *runtime,
+ DynamicCreds *dcreds,
pid_t *ret) {
_cleanup_strv_free_ char **files_env = NULL;
@@ -2109,6 +2329,7 @@ int exec_spawn(Unit *unit,
context,
params,
runtime,
+ dcreds,
argv,
socket_fd,
fds, n_fds,
@@ -2183,9 +2404,9 @@ void exec_context_done(ExecContext *c) {
c->pam_name = mfree(c->pam_name);
- c->read_only_dirs = strv_free(c->read_only_dirs);
- c->read_write_dirs = strv_free(c->read_write_dirs);
- c->inaccessible_dirs = strv_free(c->inaccessible_dirs);
+ c->read_only_paths = strv_free(c->read_only_paths);
+ c->read_write_paths = strv_free(c->read_write_paths);
+ c->inaccessible_paths = strv_free(c->inaccessible_paths);
if (c->cpuset)
CPU_FREE(c->cpuset);
@@ -2419,7 +2640,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
"%sProtectHome: %s\n"
"%sProtectSystem: %s\n"
"%sIgnoreSIGPIPE: %s\n"
- "%sMemoryDenyWriteExecute: %s\n",
+ "%sMemoryDenyWriteExecute: %s\n"
+ "%sRestrictRealtime: %s\n",
prefix, c->umask,
prefix, c->working_directory ? c->working_directory : "/",
prefix, c->root_directory ? c->root_directory : "/",
@@ -2430,7 +2652,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
prefix, protect_home_to_string(c->protect_home),
prefix, protect_system_to_string(c->protect_system),
prefix, yes_no(c->ignore_sigpipe),
- prefix, yes_no(c->memory_deny_write_execute));
+ prefix, yes_no(c->memory_deny_write_execute),
+ prefix, yes_no(c->restrict_realtime));
STRV_FOREACH(e, c->environment)
fprintf(f, "%sEnvironment: %s\n", prefix, *e);
@@ -2580,6 +2803,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
if (c->group)
fprintf(f, "%sGroup: %s\n", prefix, c->group);
+ fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
+
if (strv_length(c->supplementary_groups) > 0) {
fprintf(f, "%sSupplementaryGroups:", prefix);
strv_fprintf(f, c->supplementary_groups);
@@ -2589,21 +2814,21 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
if (c->pam_name)
fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
- if (strv_length(c->read_write_dirs) > 0) {
- fprintf(f, "%sReadWriteDirs:", prefix);
- strv_fprintf(f, c->read_write_dirs);
+ if (strv_length(c->read_write_paths) > 0) {
+ fprintf(f, "%sReadWritePaths:", prefix);
+ strv_fprintf(f, c->read_write_paths);
fputs("\n", f);
}
- if (strv_length(c->read_only_dirs) > 0) {
- fprintf(f, "%sReadOnlyDirs:", prefix);
- strv_fprintf(f, c->read_only_dirs);
+ if (strv_length(c->read_only_paths) > 0) {
+ fprintf(f, "%sReadOnlyPaths:", prefix);
+ strv_fprintf(f, c->read_only_paths);
fputs("\n", f);
}
- if (strv_length(c->inaccessible_dirs) > 0) {
- fprintf(f, "%sInaccessibleDirs:", prefix);
- strv_fprintf(f, c->inaccessible_dirs);
+ if (strv_length(c->inaccessible_paths) > 0) {
+ fprintf(f, "%sInaccessiblePaths:", prefix);
+ strv_fprintf(f, c->inaccessible_paths);
fputs("\n", f);
}
@@ -2684,7 +2909,7 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
bool exec_context_maintains_privileges(ExecContext *c) {
assert(c);
- /* Returns true if the process forked off would run run under
+ /* Returns true if the process forked off would run under
* an unchanged UID or as root. */
if (!c->user)
@@ -2919,7 +3144,7 @@ int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
return r;
if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
- if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
+ if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
return -errno;
}
diff --git a/src/core/execute.h b/src/core/execute.h
index 464869d226..48cc18fbb3 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -30,6 +30,7 @@ typedef struct ExecParameters ExecParameters;
#include <stdio.h>
#include <sys/capability.h>
+#include "cgroup-util.h"
#include "fdset.h"
#include "list.h"
#include "missing.h"
@@ -81,7 +82,8 @@ struct ExecCommand {
char **argv;
ExecStatus exec_status;
LIST_FIELDS(ExecCommand, command); /* useful for chaining commands */
- bool ignore;
+ bool ignore:1;
+ bool privileged:1;
};
struct ExecRuntime {
@@ -90,6 +92,8 @@ struct ExecRuntime {
char *tmp_dir;
char *var_tmp_dir;
+ /* An AF_UNIX socket pair, that contains a datagram containing a file descriptor referring to the network
+ * namespace. */
int netns_storage_socket[2];
};
@@ -129,7 +133,7 @@ struct ExecContext {
bool ignore_sigpipe;
- /* Since resolving these names might might involve socket
+ /* Since resolving these names might involve socket
* connections and we don't want to deadlock ourselves these
* names are resolved on execution only and in the child
* process. */
@@ -151,7 +155,7 @@ struct ExecContext {
bool smack_process_label_ignore;
char *smack_process_label;
- char **read_write_dirs, **read_only_dirs, **inaccessible_dirs;
+ char **read_write_paths, **read_only_paths, **inaccessible_paths;
unsigned long mount_flags;
uint64_t capability_bounding_set;
@@ -172,6 +176,8 @@ struct ExecContext {
bool no_new_privileges;
+ bool dynamic_user;
+
/* This is not exposed to the user but available
* internally. We need it to make sure that whenever we spawn
* /usr/bin/mount it is run in the same process group as us so
@@ -192,17 +198,16 @@ struct ExecContext {
char **runtime_directory;
mode_t runtime_directory_mode;
+ bool memory_deny_write_execute;
+ bool restrict_realtime;
+
bool oom_score_adjust_set:1;
bool nice_set:1;
bool ioprio_set:1;
bool cpu_sched_set:1;
bool no_new_privileges_set:1;
- bool memory_deny_write_execute;
};
-#include "cgroup-util.h"
-#include "cgroup.h"
-
struct ExecParameters {
char **argv;
char **environment;
@@ -233,11 +238,15 @@ struct ExecParameters {
int stderr_fd;
};
+#include "unit.h"
+#include "dynamic-user.h"
+
int exec_spawn(Unit *unit,
ExecCommand *command,
const ExecContext *context,
const ExecParameters *exec_params,
ExecRuntime *runtime,
+ DynamicCreds *dynamic_creds,
pid_t *ret);
void exec_command_done(ExecCommand *c);
diff --git a/src/core/killall.c b/src/core/killall.c
index 09378f7085..a8b814e868 100644
--- a/src/core/killall.c
+++ b/src/core/killall.c
@@ -23,6 +23,7 @@
#include <unistd.h>
#include "alloc-util.h"
+#include "def.h"
#include "fd-util.h"
#include "formats-util.h"
#include "killall.h"
@@ -33,8 +34,6 @@
#include "terminal-util.h"
#include "util.h"
-#define TIMEOUT_USEC (10 * USEC_PER_SEC)
-
static bool ignore_proc(pid_t pid, bool warn_rootfs) {
_cleanup_fclose_ FILE *f = NULL;
char c;
@@ -80,7 +79,7 @@ static bool ignore_proc(pid_t pid, bool warn_rootfs) {
get_process_comm(pid, &comm);
if (r)
- log_notice("Process " PID_FMT " (%s) has been been marked to be excluded from killing. It is "
+ log_notice("Process " PID_FMT " (%s) has been marked to be excluded from killing. It is "
"running from the root file system, and thus likely to block re-mounting of the "
"root file system to read-only. Please consider moving it into an initrd file "
"system instead.", pid, strna(comm));
@@ -99,7 +98,7 @@ static void wait_for_children(Set *pids, sigset_t *mask) {
if (set_isempty(pids))
return;
- until = now(CLOCK_MONOTONIC) + TIMEOUT_USEC;
+ until = now(CLOCK_MONOTONIC) + DEFAULT_TIMEOUT_USEC;
for (;;) {
struct timespec ts;
int k;
diff --git a/src/core/kmod-setup.c b/src/core/kmod-setup.c
index 3503db52ed..fd1021f706 100644
--- a/src/core/kmod-setup.c
+++ b/src/core/kmod-setup.c
@@ -64,9 +64,6 @@ int kmod_setup(void) {
/* this should never be a module */
{ "unix", "/proc/net/unix", true, true, NULL },
- /* IPC is needed before we bring up any other services */
- { "kdbus", "/sys/fs/kdbus", false, false, is_kdbus_wanted },
-
#ifdef HAVE_LIBIPTC
/* netfilter is needed by networkd, nspawn among others, and cannot be autoloaded */
{ "ip_tables", "/proc/net/ip_tables_names", false, false, NULL },
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index eb58586523..396f847213 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -19,9 +19,9 @@ m4_dnl Define the context options only once
m4_define(`EXEC_CONTEXT_CONFIG_ITEMS',
`$1.WorkingDirectory, config_parse_working_directory, 0, offsetof($1, exec_context)
$1.RootDirectory, config_parse_unit_path_printf, 0, offsetof($1, exec_context.root_directory)
-$1.User, config_parse_unit_string_printf, 0, offsetof($1, exec_context.user)
-$1.Group, config_parse_unit_string_printf, 0, offsetof($1, exec_context.group)
-$1.SupplementaryGroups, config_parse_strv, 0, offsetof($1, exec_context.supplementary_groups)
+$1.User, config_parse_user_group, 0, offsetof($1, exec_context.user)
+$1.Group, config_parse_user_group, 0, offsetof($1, exec_context.group)
+$1.SupplementaryGroups, config_parse_user_group_strv, 0, offsetof($1, exec_context.supplementary_groups)
$1.Nice, config_parse_exec_nice, 0, offsetof($1, exec_context)
$1.OOMScoreAdjust, config_parse_exec_oom_score_adjust, 0, offsetof($1, exec_context)
$1.IOSchedulingClass, config_parse_exec_io_class, 0, offsetof($1, exec_context)
@@ -34,6 +34,7 @@ $1.UMask, config_parse_mode, 0,
$1.Environment, config_parse_environ, 0, offsetof($1, exec_context.environment)
$1.EnvironmentFile, config_parse_unit_env_file, 0, offsetof($1, exec_context.environment_files)
$1.PassEnvironment, config_parse_pass_environ, 0, offsetof($1, exec_context.pass_environment)
+$1.DynamicUser, config_parse_bool, 0, offsetof($1, exec_context.dynamic_user)
$1.StandardInput, config_parse_input, 0, offsetof($1, exec_context.std_input)
$1.StandardOutput, config_parse_output, 0, offsetof($1, exec_context.std_output)
$1.StandardError, config_parse_output, 0, offsetof($1, exec_context.std_error)
@@ -56,11 +57,13 @@ m4_ifdef(`HAVE_SECCOMP',
$1.SystemCallArchitectures, config_parse_syscall_archs, 0, offsetof($1, exec_context.syscall_archs)
$1.SystemCallErrorNumber, config_parse_syscall_errno, 0, offsetof($1, exec_context)
$1.MemoryDenyWriteExecute, config_parse_bool, 0, offsetof($1, exec_context.memory_deny_write_execute)
+$1.RestrictRealtime, config_parse_bool, 0, offsetof($1, exec_context.restrict_realtime)
$1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)',
`$1.SystemCallFilter, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.SystemCallArchitectures, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.SystemCallErrorNumber, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.MemoryDenyWriteExecute, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.RestrictRealtime, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
$1.LimitCPU, config_parse_limit, RLIMIT_CPU, offsetof($1, exec_context.rlimit)
$1.LimitFSIZE, config_parse_limit, RLIMIT_FSIZE, offsetof($1, exec_context.rlimit)
@@ -78,9 +81,12 @@ $1.LimitMSGQUEUE, config_parse_limit, RLIMIT_MSGQ
$1.LimitNICE, config_parse_limit, RLIMIT_NICE, offsetof($1, exec_context.rlimit)
$1.LimitRTPRIO, config_parse_limit, RLIMIT_RTPRIO, offsetof($1, exec_context.rlimit)
$1.LimitRTTIME, config_parse_limit, RLIMIT_RTTIME, offsetof($1, exec_context.rlimit)
-$1.ReadWriteDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_write_dirs)
-$1.ReadOnlyDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_dirs)
-$1.InaccessibleDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_dirs)
+$1.ReadWriteDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_write_paths)
+$1.ReadOnlyDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_paths)
+$1.InaccessibleDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths)
+$1.ReadWritePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_write_paths)
+$1.ReadOnlyPaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_paths)
+$1.InaccessiblePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths)
$1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp)
$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
$1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices)
@@ -280,13 +286,14 @@ Socket.ExecStartPost, config_parse_exec, SOCKET_EXEC
Socket.ExecStopPre, config_parse_exec, SOCKET_EXEC_STOP_PRE, offsetof(Socket, exec_command)
Socket.ExecStopPost, config_parse_exec, SOCKET_EXEC_STOP_POST, offsetof(Socket, exec_command)
Socket.TimeoutSec, config_parse_sec, 0, offsetof(Socket, timeout_usec)
-Socket.SocketUser, config_parse_unit_string_printf, 0, offsetof(Socket, user)
-Socket.SocketGroup, config_parse_unit_string_printf, 0, offsetof(Socket, group)
+Socket.SocketUser, config_parse_user_group, 0, offsetof(Socket, user)
+Socket.SocketGroup, config_parse_user_group, 0, offsetof(Socket, group)
Socket.SocketMode, config_parse_mode, 0, offsetof(Socket, socket_mode)
Socket.DirectoryMode, config_parse_mode, 0, offsetof(Socket, directory_mode)
Socket.Accept, config_parse_bool, 0, offsetof(Socket, accept)
Socket.Writable, config_parse_bool, 0, offsetof(Socket, writable)
Socket.MaxConnections, config_parse_unsigned, 0, offsetof(Socket, max_connections)
+Socket.MaxConnectionsPerSource, config_parse_unsigned, 0, offsetof(Socket, max_connections_per_source)
Socket.KeepAlive, config_parse_bool, 0, offsetof(Socket, keep_alive)
Socket.KeepAliveTimeSec, config_parse_sec, 0, offsetof(Socket, keep_alive_time)
Socket.KeepAliveIntervalSec, config_parse_sec, 0, offsetof(Socket, keep_alive_interval)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index b53301a147..d5f035b67f 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -64,6 +64,7 @@
#include "unit-name.h"
#include "unit-printf.h"
#include "unit.h"
+#include "user-util.h"
#include "utf8.h"
#include "web-util.h"
@@ -596,7 +597,7 @@ int config_parse_exec(
p = rvalue;
do {
_cleanup_free_ char *path = NULL, *firstword = NULL;
- bool separate_argv0 = false, ignore = false;
+ bool separate_argv0 = false, ignore = false, privileged = false;
_cleanup_free_ ExecCommand *nce = NULL;
_cleanup_strv_free_ char **n = NULL;
size_t nlen = 0, nbufsize = 0;
@@ -610,14 +611,18 @@ int config_parse_exec(
return 0;
f = firstword;
- for (i = 0; i < 2; i++) {
- /* We accept an absolute path as first argument, or
- * alternatively an absolute prefixed with @ to allow
- * overriding of argv[0]. */
+ for (i = 0; i < 3; i++) {
+ /* We accept an absolute path as first argument.
+ * If it's prefixed with - and the path doesn't exist,
+ * we ignore it instead of erroring out;
+ * if it's prefixed with @, we allow overriding of argv[0];
+ * and if it's prefixed with !, it will be run with full privileges */
if (*f == '-' && !ignore)
ignore = true;
else if (*f == '@' && !separate_argv0)
separate_argv0 = true;
+ else if (*f == '+' && !privileged)
+ privileged = true;
else
break;
f++;
@@ -715,6 +720,7 @@ int config_parse_exec(
nce->argv = n;
nce->path = path;
nce->ignore = ignore;
+ nce->privileged = privileged;
exec_command_append_list(e, nce);
@@ -1758,6 +1764,123 @@ int config_parse_sec_fix_0(
return 0;
}
+int config_parse_user_group(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char **user = data, *n;
+ Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (isempty(rvalue))
+ n = NULL;
+ else {
+ _cleanup_free_ char *k = NULL;
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (!valid_user_group_name_or_id(k)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid user/group name or numeric ID, ignoring: %s", k);
+ return 0;
+ }
+
+ n = k;
+ k = NULL;
+ }
+
+ free(*user);
+ *user = n;
+
+ return 0;
+}
+
+int config_parse_user_group_strv(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char ***users = data;
+ Unit *u = userdata;
+ const char *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (isempty(rvalue)) {
+ char **empty;
+
+ empty = new0(char*, 1);
+ if (!empty)
+ return log_oom();
+
+ strv_free(*users);
+ *users = empty;
+
+ return 0;
+ }
+
+ p = rvalue;
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&p, &word, WHITESPACE, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ break;
+ }
+
+ r = unit_full_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", word);
+ continue;
+ }
+
+ if (!valid_user_group_name_or_id(k)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid user/group name or numeric ID, ignoring: %s", k);
+ continue;
+ }
+
+ r = strv_push(users, k);
+ if (r < 0)
+ return log_oom();
+
+ k = NULL;
+ }
+
+ return 0;
+}
+
int config_parse_busname_service(
const char *unit,
const char *filename,
@@ -2424,7 +2547,7 @@ static int syscall_filter_parse_one(
int id;
id = seccomp_syscall_resolve_name(t);
- if (id < 0) {
+ if (id == __NR_SCMP_ERROR) {
if (warn)
log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t);
return 0;
@@ -2769,7 +2892,7 @@ int config_parse_cpu_quota(
void *userdata) {
CGroupContext *c = data;
- double percent;
+ int r;
assert(filename);
assert(lvalue);
@@ -2780,18 +2903,13 @@ int config_parse_cpu_quota(
return 0;
}
- if (!endswith(rvalue, "%")) {
- log_syntax(unit, LOG_ERR, filename, line, 0, "CPU quota '%s' not ending in '%%'. Ignoring.", rvalue);
+ r = parse_percent_unbounded(rvalue);
+ if (r <= 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "CPU quota '%s' invalid. Ignoring.", rvalue);
return 0;
}
- if (sscanf(rvalue, "%lf%%", &percent) != 1 || percent <= 0) {
- log_syntax(unit, LOG_ERR, filename, line, 0, "CPU quota '%s' invalid. Ignoring.", rvalue);
- return 0;
- }
-
- c->cpu_quota_per_sec_usec = (usec_t) (percent * USEC_PER_SEC / 100);
-
+ c->cpu_quota_per_sec_usec = ((usec_t) r * USEC_PER_SEC) / 100U;
return 0;
}
@@ -2812,9 +2930,19 @@ int config_parse_memory_limit(
int r;
if (!isempty(rvalue) && !streq(rvalue, "infinity")) {
- r = parse_size(rvalue, 1024, &bytes);
- if (r < 0 || bytes < 1) {
- log_syntax(unit, LOG_ERR, filename, line, r, "Memory limit '%s' invalid. Ignoring.", rvalue);
+
+ r = parse_percent(rvalue);
+ if (r < 0) {
+ r = parse_size(rvalue, 1024, &bytes);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Memory limit '%s' invalid. Ignoring.", rvalue);
+ return 0;
+ }
+ } else
+ bytes = physical_memory_scale(r, 100U);
+
+ if (bytes <= 0 || bytes >= UINT64_MAX) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Memory limit '%s' out of range. Ignoring.", rvalue);
return 0;
}
}
@@ -2851,9 +2979,18 @@ int config_parse_tasks_max(
return 0;
}
- r = safe_atou64(rvalue, &u);
- if (r < 0 || u < 1) {
- log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue);
+ r = parse_percent(rvalue);
+ if (r < 0) {
+ r = safe_atou64(rvalue, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue);
+ return 0;
+ }
+ } else
+ u = system_tasks_max_scale(r, 100U);
+
+ if (u <= 0 || u >= UINT64_MAX) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range. Ignoring.", rvalue);
return 0;
}
@@ -3584,7 +3721,7 @@ int config_parse_protect_home(
assert(data);
/* Our enum shall be a superset of booleans, hence first try
- * to parse as as boolean, and then as enum */
+ * to parse as boolean, and then as enum */
k = parse_boolean(rvalue);
if (k > 0)
@@ -3627,7 +3764,7 @@ int config_parse_protect_system(
assert(data);
/* Our enum shall be a superset of booleans, hence first try
- * to parse as as boolean, and then as enum */
+ * to parse as boolean, and then as enum */
k = parse_boolean(rvalue);
if (k > 0)
@@ -3743,7 +3880,7 @@ static int merge_by_names(Unit **u, Set *names, const char *id) {
/* If the symlink name we are looking at is unit template, then
we must search for instance of this template */
- if (unit_name_is_valid(k, UNIT_NAME_TEMPLATE)) {
+ if (unit_name_is_valid(k, UNIT_NAME_TEMPLATE) && (*u)->instance) {
_cleanup_free_ char *instance = NULL;
r = unit_name_replace_instance(k, (*u)->instance, &instance);
@@ -3825,7 +3962,15 @@ static int load_from_path(Unit *u, const char *path) {
if (r >= 0)
break;
filename = mfree(filename);
- if (r != -ENOENT)
+
+ /* ENOENT means that the file is missing or is a dangling symlink.
+ * ENOTDIR means that one of paths we expect to be is a directory
+ * is not a directory, we should just ignore that.
+ * EACCES means that the directory or file permissions are wrong.
+ */
+ if (r == -EACCES)
+ log_debug_errno(r, "Cannot access \"%s\": %m", filename);
+ else if (!IN_SET(r, -ENOENT, -ENOTDIR))
return r;
/* Empty the symlink names for the next run */
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index b36a2e3a02..213bce55a7 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -111,6 +111,8 @@ int config_parse_exec_utmp_mode(const char *unit, const char *filename, unsigned
int config_parse_working_directory(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_fdname(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_sec_fix_0(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_user_group(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_user_group_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
/* gperf prototypes */
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length);
diff --git a/src/core/machine-id-setup.c b/src/core/machine-id-setup.c
index 0145fe2894..76dfcfa6d7 100644
--- a/src/core/machine-id-setup.c
+++ b/src/core/machine-id-setup.c
@@ -17,11 +17,8 @@
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
-#include <errno.h>
#include <fcntl.h>
#include <sched.h>
-#include <stdio.h>
-#include <string.h>
#include <sys/mount.h>
#include <unistd.h>
@@ -29,10 +26,8 @@
#include "alloc-util.h"
#include "fd-util.h"
-#include "fileio.h"
#include "fs-util.h"
-#include "hexdecoct.h"
-#include "io-util.h"
+#include "id128-util.h"
#include "log.h"
#include "machine-id-setup.h"
#include "macro.h"
@@ -46,101 +41,23 @@
#include "util.h"
#include "virt.h"
-static int shorten_uuid(char destination[34], const char source[36]) {
- unsigned i, j;
-
- assert(destination);
- assert(source);
-
- /* Converts a UUID into a machine ID, by lowercasing it and
- * removing dashes. Validates everything. */
-
- for (i = 0, j = 0; i < 36 && j < 32; i++) {
- int t;
-
- t = unhexchar(source[i]);
- if (t < 0)
- continue;
-
- destination[j++] = hexchar(t);
- }
-
- if (i != 36 || j != 32)
- return -EINVAL;
-
- destination[32] = '\n';
- destination[33] = 0;
- return 0;
-}
-
-static int read_machine_id(int fd, char id[34]) {
- char id_to_validate[34];
- int r;
-
- assert(fd >= 0);
- assert(id);
-
- /* Reads a machine ID from a file, validates it, and returns
- * it. The returned ID ends in a newline. */
-
- r = loop_read_exact(fd, id_to_validate, 33, false);
- if (r < 0)
- return r;
-
- if (id_to_validate[32] != '\n')
- return -EINVAL;
-
- id_to_validate[32] = 0;
-
- if (!id128_is_valid(id_to_validate))
- return -EINVAL;
-
- memcpy(id, id_to_validate, 32);
- id[32] = '\n';
- id[33] = 0;
- return 0;
-}
-
-static int write_machine_id(int fd, const char id[34]) {
- int r;
-
- assert(fd >= 0);
- assert(id);
-
- if (lseek(fd, 0, SEEK_SET) < 0)
- return -errno;
-
- r = loop_write(fd, id, 33, false);
- if (r < 0)
- return r;
-
- if (fsync(fd) < 0)
- return -errno;
-
- return 0;
-}
-
-static int generate_machine_id(char id[34], const char *root) {
- int fd, r;
- unsigned char *p;
- sd_id128_t buf;
- char *q;
+static int generate_machine_id(const char *root, sd_id128_t *ret) {
const char *dbus_machine_id;
+ _cleanup_close_ int fd = -1;
+ int r;
- assert(id);
-
- dbus_machine_id = prefix_roota(root, "/var/lib/dbus/machine-id");
+ assert(ret);
/* First, try reading the D-Bus machine id, unless it is a symlink */
+ dbus_machine_id = prefix_roota(root, "/var/lib/dbus/machine-id");
fd = open(dbus_machine_id, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
if (fd >= 0) {
- r = read_machine_id(fd, id);
- safe_close(fd);
-
- if (r >= 0) {
+ if (id128_read_fd(fd, ID128_PLAIN, ret) >= 0) {
log_info("Initializing machine ID from D-Bus machine ID.");
return 0;
}
+
+ fd = safe_close(fd);
}
if (isempty(root)) {
@@ -151,13 +68,10 @@ static int generate_machine_id(char id[34], const char *root) {
if (detect_container() > 0) {
_cleanup_free_ char *e = NULL;
- r = getenv_for_pid(1, "container_uuid", &e);
- if (r > 0) {
- r = shorten_uuid(id, e);
- if (r >= 0) {
- log_info("Initializing machine ID from container UUID.");
- return 0;
- }
+ if (getenv_for_pid(1, "container_uuid", &e) > 0 &&
+ sd_id128_from_string(e, ret) >= 0) {
+ log_info("Initializing machine ID from container UUID.");
+ return 0;
}
} else if (detect_vm() == VIRTUALIZATION_KVM) {
@@ -166,51 +80,29 @@ static int generate_machine_id(char id[34], const char *root) {
* running in qemu/kvm and a machine ID was passed in
* via -uuid on the qemu/kvm command line */
- char uuid[36];
-
- fd = open("/sys/class/dmi/id/product_uuid", O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
- if (fd >= 0) {
- r = loop_read_exact(fd, uuid, 36, false);
- safe_close(fd);
-
- if (r >= 0) {
- r = shorten_uuid(id, uuid);
- if (r >= 0) {
- log_info("Initializing machine ID from KVM UUID.");
- return 0;
- }
- }
+ if (id128_read("/sys/class/dmi/id/product_uuid", ID128_UUID, ret) >= 0) {
+ log_info("Initializing machine ID from KVM UUID.");
+ return 0;
}
}
}
/* If that didn't work, generate a random machine id */
- r = sd_id128_randomize(&buf);
+ r = sd_id128_randomize(ret);
if (r < 0)
- return log_error_errno(r, "Failed to open /dev/urandom: %m");
-
- for (p = buf.bytes, q = id; p < buf.bytes + sizeof(buf); p++, q += 2) {
- q[0] = hexchar(*p >> 4);
- q[1] = hexchar(*p & 15);
- }
-
- id[32] = '\n';
- id[33] = 0;
+ return log_error_errno(r, "Failed to generate randomized : %m");
log_info("Initializing machine ID from random generator.");
-
return 0;
}
-int machine_id_setup(const char *root, sd_id128_t machine_id) {
+int machine_id_setup(const char *root, sd_id128_t machine_id, sd_id128_t *ret) {
const char *etc_machine_id, *run_machine_id;
_cleanup_close_ int fd = -1;
- bool writable = true;
- char id[34]; /* 32 + \n + \0 */
+ bool writable;
int r;
etc_machine_id = prefix_roota(root, "/etc/machine-id");
- run_machine_id = prefix_roota(root, "/run/machine-id");
RUN_WITH_UMASK(0000) {
/* We create this 0444, to indicate that this isn't really
@@ -218,7 +110,7 @@ int machine_id_setup(const char *root, sd_id128_t machine_id) {
* will be owned by root it doesn't matter much, but maybe
* people look. */
- mkdir_parents(etc_machine_id, 0755);
+ (void) mkdir_parents(etc_machine_id, 0755);
fd = open(etc_machine_id, O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY, 0444);
if (fd < 0) {
int old_errno = errno;
@@ -239,41 +131,41 @@ int machine_id_setup(const char *root, sd_id128_t machine_id) {
}
writable = false;
- }
+ } else
+ writable = true;
}
- /* A machine id argument overrides all other machined-ids */
- if (!sd_id128_is_null(machine_id)) {
- sd_id128_to_string(machine_id, id);
- id[32] = '\n';
- id[33] = 0;
- } else {
- if (read_machine_id(fd, id) >= 0)
- return 0;
+ /* A we got a valid machine ID argument, that's what counts */
+ if (sd_id128_is_null(machine_id)) {
- /* Hmm, so, the id currently stored is not useful, then let's
- * generate one */
+ /* Try to read any existing machine ID */
+ if (id128_read_fd(fd, ID128_PLAIN, ret) >= 0)
+ return 0;
- r = generate_machine_id(id, root);
+ /* Hmm, so, the id currently stored is not useful, then let's generate one */
+ r = generate_machine_id(root, &machine_id);
if (r < 0)
return r;
+
+ if (lseek(fd, 0, SEEK_SET) == (off_t) -1)
+ return log_error_errno(errno, "Failed to seek: %m");
}
if (writable)
- if (write_machine_id(fd, id) >= 0)
- return 0;
+ if (id128_write_fd(fd, ID128_PLAIN, machine_id, true) >= 0)
+ goto finish;
fd = safe_close(fd);
- /* Hmm, we couldn't write it? So let's write it to
- * /run/machine-id as a replacement */
+ /* Hmm, we couldn't write it? So let's write it to /run/machine-id as a replacement */
- RUN_WITH_UMASK(0022) {
- r = write_string_file(run_machine_id, id, WRITE_STRING_FILE_CREATE);
- if (r < 0) {
- (void) unlink(run_machine_id);
- return log_error_errno(r, "Cannot write %s: %m", run_machine_id);
- }
+ run_machine_id = prefix_roota(root, "/run/machine-id");
+
+ RUN_WITH_UMASK(0022)
+ r = id128_write(run_machine_id, ID128_PLAIN, machine_id, false);
+ if (r < 0) {
+ (void) unlink(run_machine_id);
+ return log_error_errno(r, "Cannot write %s: %m", run_machine_id);
}
/* And now, let's mount it over */
@@ -286,7 +178,11 @@ int machine_id_setup(const char *root, sd_id128_t machine_id) {
/* Mark the mount read-only */
if (mount(NULL, etc_machine_id, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, NULL) < 0)
- log_warning_errno(errno, "Failed to make transient %s read-only: %m", etc_machine_id);
+ log_warning_errno(errno, "Failed to make transient %s read-only, ignoring: %m", etc_machine_id);
+
+finish:
+ if (ret)
+ *ret = machine_id;
return 0;
}
@@ -294,16 +190,20 @@ int machine_id_setup(const char *root, sd_id128_t machine_id) {
int machine_id_commit(const char *root) {
_cleanup_close_ int fd = -1, initial_mntns_fd = -1;
const char *etc_machine_id;
- char id[34]; /* 32 + \n + \0 */
+ sd_id128_t id;
int r;
+ /* Replaces a tmpfs bind mount of /etc/machine-id by a proper file, atomically. For this, the umount is removed
+ * in a mount namespace, a new file is created at the right place. Afterwards the mount is also removed in the
+ * original mount namespace, thus revealing the file that was just created. */
+
etc_machine_id = prefix_roota(root, "/etc/machine-id");
r = path_is_mount_point(etc_machine_id, 0);
if (r < 0)
return log_error_errno(r, "Failed to determine whether %s is a mount point: %m", etc_machine_id);
if (r == 0) {
- log_debug("%s is is not a mount point. Nothing to do.", etc_machine_id);
+ log_debug("%s is not a mount point. Nothing to do.", etc_machine_id);
return 0;
}
@@ -312,10 +212,6 @@ int machine_id_commit(const char *root) {
if (fd < 0)
return log_error_errno(errno, "Cannot open %s: %m", etc_machine_id);
- r = read_machine_id(fd, id);
- if (r < 0)
- return log_error_errno(r, "We didn't find a valid machine ID in %s.", etc_machine_id);
-
r = fd_is_temporary_fs(fd);
if (r < 0)
return log_error_errno(r, "Failed to determine whether %s is on a temporary file system: %m", etc_machine_id);
@@ -324,6 +220,10 @@ int machine_id_commit(const char *root) {
return -EROFS;
}
+ r = id128_read_fd(fd, ID128_PLAIN, &id);
+ if (r < 0)
+ return log_error_errno(r, "We didn't find a valid machine ID in %s.", etc_machine_id);
+
fd = safe_close(fd);
/* Store current mount namespace */
@@ -342,15 +242,9 @@ int machine_id_commit(const char *root) {
return log_error_errno(errno, "Failed to unmount transient %s file in our private namespace: %m", etc_machine_id);
/* Update a persistent version of etc_machine_id */
- fd = open(etc_machine_id, O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY, 0444);
- if (fd < 0)
- return log_error_errno(errno, "Cannot open for writing %s. This is mandatory to get a persistent machine-id: %m", etc_machine_id);
-
- r = write_machine_id(fd, id);
+ r = id128_write(etc_machine_id, ID128_PLAIN, id, true);
if (r < 0)
- return log_error_errno(r, "Cannot write %s: %m", etc_machine_id);
-
- fd = safe_close(fd);
+ return log_error_errno(r, "Cannot write %s. This is mandatory to get a persistent machine ID: %m", etc_machine_id);
/* Return to initial namespace and proceed a lazy tmpfs unmount */
r = namespace_enter(-1, initial_mntns_fd, -1, -1, -1);
diff --git a/src/core/machine-id-setup.h b/src/core/machine-id-setup.h
index a7e7678ed9..29f4620646 100644
--- a/src/core/machine-id-setup.h
+++ b/src/core/machine-id-setup.h
@@ -20,4 +20,4 @@
***/
int machine_id_commit(const char *root);
-int machine_id_setup(const char *root, sd_id128_t machine_id);
+int machine_id_setup(const char *root, sd_id128_t requested, sd_id128_t *ret);
diff --git a/src/core/macros.systemd.in b/src/core/macros.systemd.in
index 2cace3d3ba..6e8a3b3e3d 100644
--- a/src/core/macros.systemd.in
+++ b/src/core/macros.systemd.in
@@ -29,6 +29,8 @@
%_sysusersdir @sysusersdir@
%_sysctldir @sysctldir@
%_binfmtdir @binfmtdir@
+%_systemdgeneratordir @systemgeneratordir@
+%_systemdusergeneratordir @usergeneratordir@
%systemd_requires \
Requires(post): systemd \
@@ -36,6 +38,12 @@ Requires(preun): systemd \
Requires(postun): systemd \
%{nil}
+%systemd_ordering \
+OrderWithRequires(post): systemd \
+OrderWithRequires(preun): systemd \
+OrderWithRequires(postun): systemd \
+%{nil}
+
%systemd_post() \
if [ $1 -eq 1 ] ; then \
# Initial installation \
diff --git a/src/core/main.c b/src/core/main.c
index 93098daa9b..094bbef964 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -92,8 +92,7 @@ static enum {
ACTION_HELP,
ACTION_VERSION,
ACTION_TEST,
- ACTION_DUMP_CONFIGURATION_ITEMS,
- ACTION_DONE
+ ACTION_DUMP_CONFIGURATION_ITEMS
} arg_action = ACTION_RUN;
static char *arg_default_unit = NULL;
static bool arg_system = false;
@@ -127,7 +126,7 @@ static bool arg_default_io_accounting = false;
static bool arg_default_blockio_accounting = false;
static bool arg_default_memory_accounting = false;
static bool arg_default_tasks_accounting = true;
-static uint64_t arg_default_tasks_max = UINT64_C(512);
+static uint64_t arg_default_tasks_max = UINT64_MAX;
static sd_id128_t arg_machine_id = {};
noreturn static void freeze_or_reboot(void) {
@@ -291,14 +290,16 @@ static int parse_crash_chvt(const char *value) {
}
static int set_machine_id(const char *m) {
+ sd_id128_t t;
assert(m);
- if (sd_id128_from_string(m, &arg_machine_id) < 0)
+ if (sd_id128_from_string(m, &t) < 0)
return -EINVAL;
- if (sd_id128_is_null(arg_machine_id))
+ if (sd_id128_is_null(t))
return -EINVAL;
+ arg_machine_id = t;
return 0;
}
@@ -409,7 +410,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value) {
if (detect_container() > 0)
log_set_target(LOG_TARGET_CONSOLE);
- } else if (!in_initrd() && !value) {
+ } else if (!value) {
const char *target;
/* SysV compatibility */
@@ -1294,6 +1295,40 @@ static int bump_unix_max_dgram_qlen(void) {
return 1;
}
+static int fixup_environment(void) {
+ _cleanup_free_ char *term = NULL;
+ int r;
+
+ /* We expect the environment to be set correctly
+ * if run inside a container. */
+ if (detect_container() > 0)
+ return 0;
+
+ /* When started as PID1, the kernel uses /dev/console
+ * for our stdios and uses TERM=linux whatever the
+ * backend device used by the console. We try to make
+ * a better guess here since some consoles might not
+ * have support for color mode for example.
+ *
+ * However if TERM was configured through the kernel
+ * command line then leave it alone. */
+
+ r = get_proc_cmdline_key("TERM=", &term);
+ if (r < 0)
+ return r;
+
+ if (r == 0) {
+ term = strdup(default_term_for_tty("/dev/console"));
+ if (!term)
+ return -ENOMEM;
+ }
+
+ if (setenv("TERM", term, 1) < 0)
+ return -errno;
+
+ return 0;
+}
+
int main(int argc, char *argv[]) {
Manager *m = NULL;
int r, retval = EXIT_FAILURE;
@@ -1353,7 +1388,6 @@ int main(int argc, char *argv[]) {
saved_argv = argv;
saved_argc = argc;
- log_show_color(colors_enabled());
log_set_upgrade_syslog_to_journal(true);
/* Disable the umask logic */
@@ -1364,7 +1398,6 @@ int main(int argc, char *argv[]) {
/* Running outside of a container as PID 1 */
arg_system = true;
- make_null_stdio();
log_set_target(LOG_TARGET_KMSG);
log_open();
@@ -1381,12 +1414,12 @@ int main(int argc, char *argv[]) {
if (mac_selinux_setup(&loaded_policy) < 0) {
error_message = "Failed to load SELinux policy";
goto finish;
- } else if (ima_setup() < 0) {
- error_message = "Failed to load IMA policy";
- goto finish;
} else if (mac_smack_setup(&loaded_policy) < 0) {
error_message = "Failed to load SMACK policy";
goto finish;
+ } else if (ima_setup() < 0) {
+ error_message = "Failed to load IMA policy";
+ goto finish;
}
dual_timestamp_get(&security_finish_timestamp);
}
@@ -1417,7 +1450,7 @@ int main(int argc, char *argv[]) {
/*
* Do a dummy very first call to seal the kernel's time warp magic.
*
- * Do not call this this from inside the initrd. The initrd might not
+ * Do not call this from inside the initrd. The initrd might not
* carry /etc/adjtime with LOCAL, but the real system could be set up
* that way. In such case, we need to delay the time-warp or the sealing
* until we reach the real system.
@@ -1480,6 +1513,19 @@ int main(int argc, char *argv[]) {
(void) write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", 0);
}
+ if (arg_system) {
+ if (fixup_environment() < 0) {
+ error_message = "Failed to fix up PID1 environment";
+ goto finish;
+ }
+
+ /* Try to figure out if we can use colors with the console. No
+ * need to do that for user instances since they never log
+ * into the console. */
+ log_show_color(colors_enabled());
+ make_null_stdio();
+ }
+
/* Initialize default unit */
r = free_and_strdup(&arg_default_unit, SPECIAL_DEFAULT_TARGET);
if (r < 0) {
@@ -1513,6 +1559,8 @@ int main(int argc, char *argv[]) {
(void) reset_all_signal_handlers();
(void) ignore_signals(SIGNALS_IGNORE, -1);
+ arg_default_tasks_max = system_tasks_max_scale(15U, 100U); /* 15% the system PIDs equals 4915 by default. */
+
if (parse_config_file() < 0) {
error_message = "Failed to parse config file";
goto finish;
@@ -1569,9 +1617,6 @@ int main(int argc, char *argv[]) {
unit_dump_config_items(stdout);
retval = EXIT_SUCCESS;
goto finish;
- } else if (arg_action == ACTION_DONE) {
- retval = EXIT_SUCCESS;
- goto finish;
}
if (!arg_system &&
@@ -1674,7 +1719,7 @@ int main(int argc, char *argv[]) {
status_welcome();
hostname_setup();
- machine_id_setup(NULL, arg_machine_id);
+ machine_id_setup(NULL, arg_machine_id, NULL);
loopback_setup();
bump_unix_max_dgram_qlen();
@@ -1967,6 +2012,9 @@ finish:
log_error_errno(r, "Failed to switch root, trying to continue: %m");
}
+ /* Reopen the console */
+ (void) make_console_stdio();
+
args_size = MAX(6, argc+1);
args = newa(const char*, args_size);
@@ -1992,10 +2040,6 @@ finish:
args[i++] = sfd;
args[i++] = NULL;
- /* do not pass along the environment we inherit from the kernel or initrd */
- if (switch_root_dir)
- (void) clearenv();
-
assert(i <= args_size);
/*
@@ -2018,9 +2062,6 @@ finish:
arg_serialization = safe_fclose(arg_serialization);
fds = fdset_free(fds);
- /* Reopen the console */
- (void) make_console_stdio();
-
for (j = 1, i = 1; j < (unsigned) argc; j++)
args[i++] = argv[j];
args[i++] = NULL;
diff --git a/src/core/manager.c b/src/core/manager.c
index 14d97a87d0..c20e185d78 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -64,7 +64,6 @@
#include "manager.h"
#include "missing.h"
#include "mkdir.h"
-#include "mkdir.h"
#include "parse-util.h"
#include "path-lookup.h"
#include "path-util.h"
@@ -136,23 +135,28 @@ static void draw_cylon(char buffer[], size_t buflen, unsigned width, unsigned po
if (pos > 1) {
if (pos > 2)
p = mempset(p, ' ', pos-2);
- p = stpcpy(p, ANSI_RED);
+ if (log_get_show_color())
+ p = stpcpy(p, ANSI_RED);
*p++ = '*';
}
if (pos > 0 && pos <= width) {
- p = stpcpy(p, ANSI_HIGHLIGHT_RED);
+ if (log_get_show_color())
+ p = stpcpy(p, ANSI_HIGHLIGHT_RED);
*p++ = '*';
}
- p = stpcpy(p, ANSI_NORMAL);
+ if (log_get_show_color())
+ p = stpcpy(p, ANSI_NORMAL);
if (pos < width) {
- p = stpcpy(p, ANSI_RED);
+ if (log_get_show_color())
+ p = stpcpy(p, ANSI_RED);
*p++ = '*';
if (pos < width-1)
p = mempset(p, ' ', width-1-pos);
- strcpy(p, ANSI_NORMAL);
+ if (log_get_show_color())
+ strcpy(p, ANSI_NORMAL);
}
}
@@ -549,7 +553,6 @@ static int manager_default_environment(Manager *m) {
return 0;
}
-
int manager_new(UnitFileScope scope, bool test_run, Manager **_m) {
Manager *m;
int r;
@@ -565,7 +568,7 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) {
m->exit_code = _MANAGER_EXIT_CODE_INVALID;
m->default_timer_accuracy_usec = USEC_PER_MINUTE;
m->default_tasks_accounting = true;
- m->default_tasks_max = UINT64_C(512);
+ m->default_tasks_max = UINT64_MAX;
#ifdef ENABLE_EFI
if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0)
@@ -809,28 +812,6 @@ static int manager_setup_cgroups_agent(Manager *m) {
return 0;
}
-static int manager_setup_kdbus(Manager *m) {
- _cleanup_free_ char *p = NULL;
-
- assert(m);
-
- if (m->test_run || m->kdbus_fd >= 0)
- return 0;
- if (!is_kdbus_available())
- return -ESOCKTNOSUPPORT;
-
- m->kdbus_fd = bus_kernel_create_bus(
- MANAGER_IS_SYSTEM(m) ? "system" : "user",
- MANAGER_IS_SYSTEM(m), &p);
-
- if (m->kdbus_fd < 0)
- return log_debug_errno(m->kdbus_fd, "Failed to set up kdbus: %m");
-
- log_debug("Successfully set up kdbus on %s", p);
-
- return 0;
-}
-
static int manager_connect_bus(Manager *m, bool reexecuting) {
bool try_bus_connect;
@@ -872,6 +853,19 @@ enum {
_GC_OFFSET_MAX
};
+static void unit_gc_mark_good(Unit *u, unsigned gc_marker)
+{
+ Iterator i;
+ Unit *other;
+
+ u->gc_marker = gc_marker + GC_OFFSET_GOOD;
+
+ /* Recursively mark referenced units as GOOD as well */
+ SET_FOREACH(other, u->dependencies[UNIT_REFERENCES], i)
+ if (other->gc_marker == gc_marker + GC_OFFSET_UNSURE)
+ unit_gc_mark_good(other, gc_marker);
+}
+
static void unit_gc_sweep(Unit *u, unsigned gc_marker) {
Iterator i;
Unit *other;
@@ -881,6 +875,7 @@ static void unit_gc_sweep(Unit *u, unsigned gc_marker) {
if (u->gc_marker == gc_marker + GC_OFFSET_GOOD ||
u->gc_marker == gc_marker + GC_OFFSET_BAD ||
+ u->gc_marker == gc_marker + GC_OFFSET_UNSURE ||
u->gc_marker == gc_marker + GC_OFFSET_IN_PATH)
return;
@@ -921,7 +916,7 @@ bad:
return;
good:
- u->gc_marker = gc_marker + GC_OFFSET_GOOD;
+ unit_gc_mark_good(u, gc_marker);
}
static unsigned manager_dispatch_gc_queue(Manager *m) {
@@ -1008,6 +1003,9 @@ Manager* manager_free(Manager *m) {
bus_done(m);
+ dynamic_user_vacuum(m, false);
+ hashmap_free(m->dynamic_users);
+
hashmap_free(m->units);
hashmap_free(m->jobs);
hashmap_free(m->watch_pids1);
@@ -1225,13 +1223,15 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
/* We might have deserialized the kdbus control fd, but if we
* didn't, then let's create the bus now. */
- manager_setup_kdbus(m);
manager_connect_bus(m, !!serialization);
bus_track_coldplug(m, &m->subscribed, &m->deserialized_subscribed);
/* Third, fire things up! */
manager_coldplug(m);
+ /* Release any dynamic users no longer referenced */
+ dynamic_user_vacuum(m, true);
+
if (serialization) {
assert(m->n_reloading > 0);
m->n_reloading--;
@@ -1720,16 +1720,28 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
}
static void invoke_sigchld_event(Manager *m, Unit *u, const siginfo_t *si) {
+ uint64_t iteration;
+
assert(m);
assert(u);
assert(si);
+ sd_event_get_iteration(m->event, &iteration);
+
log_unit_debug(u, "Child "PID_FMT" belongs to %s", si->si_pid, u->id);
unit_unwatch_pid(u, si->si_pid);
- if (UNIT_VTABLE(u)->sigchld_event)
- UNIT_VTABLE(u)->sigchld_event(u, si->si_pid, si->si_code, si->si_status);
+ if (UNIT_VTABLE(u)->sigchld_event) {
+ if (set_size(u->pids) <= 1 ||
+ iteration != u->sigchldgen ||
+ unit_main_pid(u) == si->si_pid ||
+ unit_control_pid(u) == si->si_pid) {
+ UNIT_VTABLE(u)->sigchld_event(u, si->si_pid, si->si_code, si->si_status);
+ u->sigchldgen = iteration;
+ } else
+ log_debug("%s already issued a sigchld this iteration %" PRIu64 ", skipping. Pids still being watched %d", u->id, iteration, set_size(u->pids));
+ }
}
static int manager_dispatch_sigchld(Manager *m) {
@@ -2396,6 +2408,10 @@ int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) {
bus_track_serialize(m->subscribed, f);
+ r = dynamic_user_serialize(m, f, fds);
+ if (r < 0)
+ return r;
+
fputc('\n', f);
HASHMAP_FOREACH_KEY(u, t, m->units, i) {
@@ -2572,7 +2588,9 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
m->kdbus_fd = fdset_remove(fds, fd);
}
- } else {
+ } else if (startswith(l, "dynamic-user="))
+ dynamic_user_deserialize_one(m, l + 13, fds);
+ else {
int k;
k = bus_track_deserialize_item(&m->deserialized_subscribed, l);
@@ -2653,6 +2671,7 @@ int manager_reload(Manager *m) {
manager_clear_jobs_and_units(m);
lookup_paths_flush_generator(&m->lookup_paths);
lookup_paths_free(&m->lookup_paths);
+ dynamic_user_vacuum(m, false);
q = lookup_paths_init(&m->lookup_paths, m->unit_file_scope, 0, NULL);
if (q < 0 && r >= 0)
@@ -2689,6 +2708,9 @@ int manager_reload(Manager *m) {
/* Third, fire things up! */
manager_coldplug(m);
+ /* Release any dynamic users no longer referenced */
+ dynamic_user_vacuum(m, true);
+
/* Sync current state of bus names with our set of listening units */
if (m->api_bus)
manager_sync_bus_names(m, m->api_bus);
diff --git a/src/core/manager.h b/src/core/manager.h
index 6ed15c1a41..c681d5dc46 100644
--- a/src/core/manager.h
+++ b/src/core/manager.h
@@ -298,6 +298,9 @@ struct Manager {
/* Used for processing polkit authorization responses */
Hashmap *polkit_registry;
+ /* Dynamic users/groups, indexed by their name */
+ Hashmap *dynamic_users;
+
/* When the user hits C-A-D more than 7 times per 2s, reboot immediately... */
RateLimit ctrl_alt_del_ratelimit;
diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c
index 40fc548b42..5d8ab0ec70 100644
--- a/src/core/mount-setup.c
+++ b/src/core/mount-setup.c
@@ -28,6 +28,7 @@
#include "cgroup-util.h"
#include "dev-setup.h"
#include "efivars.h"
+#include "fs-util.h"
#include "label.h"
#include "log.h"
#include "macro.h"
@@ -108,8 +109,6 @@ static const MountPoint mount_table[] = {
{ "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
is_efi_boot, MNT_NONE },
#endif
- { "kdbusfs", "/sys/fs/kdbus", "kdbusfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
- is_kdbus_wanted, MNT_IN_CONTAINER },
};
/* These are API file systems that might be mounted by other software,
@@ -405,9 +404,16 @@ int mount_setup(bool loaded_policy) {
* really needs to stay for good, otherwise software that
* copied sd-daemon.c into their sources will misdetect
* systemd. */
- mkdir_label("/run/systemd", 0755);
- mkdir_label("/run/systemd/system", 0755);
- mkdir_label("/run/systemd/inaccessible", 0000);
+ (void) mkdir_label("/run/systemd", 0755);
+ (void) mkdir_label("/run/systemd/system", 0755);
+ (void) mkdir_label("/run/systemd/inaccessible", 0000);
+ /* Set up inaccessible items */
+ (void) mknod("/run/systemd/inaccessible/reg", S_IFREG | 0000, 0);
+ (void) mkdir_label("/run/systemd/inaccessible/dir", 0000);
+ (void) mknod("/run/systemd/inaccessible/chr", S_IFCHR | 0000, makedev(0, 0));
+ (void) mknod("/run/systemd/inaccessible/blk", S_IFBLK | 0000, makedev(0, 0));
+ (void) mkfifo("/run/systemd/inaccessible/fifo", 0000);
+ (void) mknod("/run/systemd/inaccessible/sock", S_IFSOCK | 0000, 0);
return 0;
}
diff --git a/src/core/mount.c b/src/core/mount.c
index 7b20892b0d..afb20af9e2 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -245,6 +245,8 @@ static void mount_done(Unit *u) {
exec_command_done_array(m->exec_command, _MOUNT_EXEC_COMMAND_MAX);
m->control_command = NULL;
+ dynamic_creds_unref(&m->dynamic_creds);
+
mount_unwatch_control_pid(m);
m->timer_event_source = sd_event_source_unref(m->timer_event_source);
@@ -650,6 +652,9 @@ static int mount_coldplug(Unit *u) {
return r;
}
+ if (!IN_SET(new_state, MOUNT_DEAD, MOUNT_FAILED))
+ (void) unit_setup_dynamic_creds(u);
+
mount_set_state(m, new_state);
return 0;
}
@@ -718,6 +723,10 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
if (r < 0)
return r;
+ r = unit_setup_dynamic_creds(UNIT(m));
+ if (r < 0)
+ return r;
+
r = mount_arm_timer(m, usec_add(now(CLOCK_MONOTONIC), m->timeout_usec));
if (r < 0)
return r;
@@ -734,6 +743,7 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
&m->exec_context,
&exec_params,
m->exec_runtime,
+ &m->dynamic_creds,
&pid);
if (r < 0)
return r;
@@ -754,12 +764,14 @@ static void mount_enter_dead(Mount *m, MountResult f) {
if (f != MOUNT_SUCCESS)
m->result = f;
+ mount_set_state(m, m->result != MOUNT_SUCCESS ? MOUNT_FAILED : MOUNT_DEAD);
+
exec_runtime_destroy(m->exec_runtime);
m->exec_runtime = exec_runtime_unref(m->exec_runtime);
exec_context_destroy_runtime_directory(&m->exec_context, manager_get_runtime_prefix(UNIT(m)->manager));
- mount_set_state(m, m->result != MOUNT_SUCCESS ? MOUNT_FAILED : MOUNT_DEAD);
+ dynamic_creds_destroy(&m->dynamic_creds);
}
static void mount_enter_mounted(Mount *m, MountResult f) {
@@ -1704,6 +1716,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
/* This has just been unmounted by
* somebody else, follow the state
* change. */
+ mount->result = MOUNT_SUCCESS; /* make sure we forget any earlier umount failures */
mount_enter_dead(mount, MOUNT_SUCCESS);
break;
@@ -1814,6 +1827,7 @@ const UnitVTable mount_vtable = {
.cgroup_context_offset = offsetof(Mount, cgroup_context),
.kill_context_offset = offsetof(Mount, kill_context),
.exec_runtime_offset = offsetof(Mount, exec_runtime),
+ .dynamic_creds_offset = offsetof(Mount, dynamic_creds),
.sections =
"Unit\0"
diff --git a/src/core/mount.h b/src/core/mount.h
index da529c44f4..ac27b518cc 100644
--- a/src/core/mount.h
+++ b/src/core/mount.h
@@ -21,8 +21,8 @@
typedef struct Mount Mount;
-#include "execute.h"
#include "kill.h"
+#include "dynamic-user.h"
typedef enum MountExecCommand {
MOUNT_EXEC_MOUNT,
@@ -85,6 +85,7 @@ struct Mount {
CGroupContext cgroup_context;
ExecRuntime *exec_runtime;
+ DynamicCreds dynamic_creds;
MountState state, deserialized_state;
diff --git a/src/core/namespace.c b/src/core/namespace.c
index 203d122810..52a2505d94 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -278,6 +278,7 @@ static int apply_mount(
const char *what;
int r;
+ struct stat target;
assert(m);
@@ -287,12 +288,21 @@ static int apply_mount(
/* First, get rid of everything that is below if there
* is anything... Then, overmount it with an
- * inaccessible directory. */
+ * inaccessible path. */
umount_recursive(m->path, 0);
- what = "/run/systemd/inaccessible";
- break;
+ if (lstat(m->path, &target) < 0) {
+ if (m->ignore && errno == ENOENT)
+ return 0;
+ return -errno;
+ }
+ what = mode_to_inaccessible_node(target.st_mode);
+ if (!what) {
+ log_debug("File type not supported for inaccessible mounts. Note that symlinks are not allowed");
+ return -ELOOP;
+ }
+ break;
case READONLY:
case READWRITE:
/* Nothing to mount here, we just later toggle the
@@ -317,12 +327,14 @@ static int apply_mount(
assert(what);
r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL);
- if (r >= 0)
+ if (r >= 0) {
log_debug("Successfully mounted %s to %s", what, m->path);
- else if (m->ignore && errno == ENOENT)
- return 0;
-
- return r;
+ return r;
+ } else {
+ if (m->ignore && errno == ENOENT)
+ return 0;
+ return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, m->path);
+ }
}
static int make_read_only(BindMount *m) {
@@ -335,7 +347,8 @@ static int make_read_only(BindMount *m) {
else if (IN_SET(m->mode, READWRITE, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV)) {
r = bind_remount_recursive(m->path, false);
if (r == 0 && m->mode == PRIVATE_DEV) /* can be readonly but the submounts can't*/
- r = mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL);
+ if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0)
+ r = -errno;
} else
r = 0;
@@ -347,9 +360,9 @@ static int make_read_only(BindMount *m) {
int setup_namespace(
const char* root_directory,
- char** read_write_dirs,
- char** read_only_dirs,
- char** inaccessible_dirs,
+ char** read_write_paths,
+ char** read_only_paths,
+ char** inaccessible_paths,
const char* tmp_dir,
const char* var_tmp_dir,
bool private_dev,
@@ -368,9 +381,9 @@ int setup_namespace(
return -errno;
n = !!tmp_dir + !!var_tmp_dir +
- strv_length(read_write_dirs) +
- strv_length(read_only_dirs) +
- strv_length(inaccessible_dirs) +
+ strv_length(read_write_paths) +
+ strv_length(read_only_paths) +
+ strv_length(inaccessible_paths) +
private_dev +
(protect_home != PROTECT_HOME_NO ? 3 : 0) +
(protect_system != PROTECT_SYSTEM_NO ? 2 : 0) +
@@ -378,15 +391,15 @@ int setup_namespace(
if (n > 0) {
m = mounts = (BindMount *) alloca0(n * sizeof(BindMount));
- r = append_mounts(&m, read_write_dirs, READWRITE);
+ r = append_mounts(&m, read_write_paths, READWRITE);
if (r < 0)
return r;
- r = append_mounts(&m, read_only_dirs, READONLY);
+ r = append_mounts(&m, read_only_paths, READONLY);
if (r < 0)
return r;
- r = append_mounts(&m, inaccessible_dirs, INACCESSIBLE);
+ r = append_mounts(&m, inaccessible_paths, INACCESSIBLE);
if (r < 0)
return r;
@@ -629,7 +642,7 @@ int setup_netns(int netns_storage_socket[2]) {
}
fail:
- lockf(netns_storage_socket[0], F_ULOCK, 0);
+ (void) lockf(netns_storage_socket[0], F_ULOCK, 0);
return r;
}
diff --git a/src/core/namespace.h b/src/core/namespace.h
index b54b7b47d6..1aedf5f208 100644
--- a/src/core/namespace.h
+++ b/src/core/namespace.h
@@ -40,9 +40,9 @@ typedef enum ProtectSystem {
} ProtectSystem;
int setup_namespace(const char *chroot,
- char **read_write_dirs,
- char **read_only_dirs,
- char **inaccessible_dirs,
+ char **read_write_paths,
+ char **read_only_paths,
+ char **inaccessible_paths,
const char *tmp_dir,
const char *var_tmp_dir,
bool private_dev,
diff --git a/src/core/scope.c b/src/core/scope.c
index 238f63a729..b45e238974 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -240,7 +240,7 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) {
/* If we have a controller set let's ask the controller nicely
* to terminate the scope, instead of us going directly into
- * SIGTERM beserk mode */
+ * SIGTERM berserk mode */
if (state == SCOPE_STOP_SIGTERM)
skip_signal = bus_scope_send_request_stop(s) > 0;
@@ -248,7 +248,9 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) {
r = unit_kill_context(
UNIT(s),
&s->kill_context,
- state != SCOPE_STOP_SIGTERM ? KILL_KILL : KILL_TERMINATE,
+ state != SCOPE_STOP_SIGTERM ? KILL_KILL :
+ s->was_abandoned ? KILL_TERMINATE_AND_LOG :
+ KILL_TERMINATE,
-1, -1, false);
if (r < 0)
goto fail;
@@ -369,6 +371,7 @@ static int scope_serialize(Unit *u, FILE *f, FDSet *fds) {
assert(fds);
unit_serialize_item(u, f, "state", scope_state_to_string(s->state));
+ unit_serialize_item(u, f, "was-abandoned", yes_no(s->was_abandoned));
return 0;
}
@@ -389,6 +392,14 @@ static int scope_deserialize_item(Unit *u, const char *key, const char *value, F
else
s->deserialized_state = state;
+ } else if (streq(key, "was-abandoned")) {
+ int k;
+
+ k = parse_boolean(value);
+ if (k < 0)
+ log_unit_debug(u, "Failed to parse boolean value: %s", value);
+ else
+ s->was_abandoned = k;
} else
log_unit_debug(u, "Unknown serialization key: %s", key);
@@ -428,8 +439,9 @@ static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
unit_tidy_watch_pids(u, 0, 0);
unit_watch_all_pids(u);
- /* If the PID set is empty now, then let's finish this off */
- if (set_isempty(u->pids))
+ /* If the PID set is empty now, then let's finish this off
+ (On unified we use proper notifications) */
+ if (cg_unified() <= 0 && set_isempty(u->pids))
scope_notify_cgroup_empty_event(u);
}
@@ -473,6 +485,7 @@ int scope_abandon(Scope *s) {
if (!IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED))
return -ESTALE;
+ s->was_abandoned = true;
s->controller = mfree(s->controller);
/* The client is no longer watching the remaining processes,
diff --git a/src/core/scope.h b/src/core/scope.h
index 2dc86325c5..eaf8e8b447 100644
--- a/src/core/scope.h
+++ b/src/core/scope.h
@@ -21,7 +21,9 @@
typedef struct Scope Scope;
+#include "cgroup.h"
#include "kill.h"
+#include "unit.h"
typedef enum ScopeResult {
SCOPE_SUCCESS,
@@ -43,6 +45,7 @@ struct Scope {
usec_t timeout_stop_usec;
char *controller;
+ bool was_abandoned;
sd_event_source *timer_event_source;
};
diff --git a/src/core/selinux-access.c b/src/core/selinux-access.c
index cc287d602d..2b96a9551b 100644
--- a/src/core/selinux-access.c
+++ b/src/core/selinux-access.c
@@ -191,7 +191,7 @@ int mac_selinux_generic_access_check(
const char *tclass = NULL, *scon = NULL;
struct audit_info audit_info = {};
_cleanup_free_ char *cl = NULL;
- security_context_t fcon = NULL;
+ char *fcon = NULL;
char **cmdline = NULL;
int r = 0;
diff --git a/src/core/selinux-setup.c b/src/core/selinux-setup.c
index 4072df58e6..527aa8add0 100644
--- a/src/core/selinux-setup.c
+++ b/src/core/selinux-setup.c
@@ -44,7 +44,7 @@ int mac_selinux_setup(bool *loaded_policy) {
#ifdef HAVE_SELINUX
int enforce = 0;
usec_t before_load, after_load;
- security_context_t con;
+ char *con;
int r;
union selinux_callback cb;
bool initialized = false;
diff --git a/src/core/service.c b/src/core/service.c
index 7ebabca5d6..eb125cb999 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -200,16 +200,27 @@ static void service_stop_watchdog(Service *s) {
s->watchdog_timestamp = DUAL_TIMESTAMP_NULL;
}
+static usec_t service_get_watchdog_usec(Service *s) {
+ assert(s);
+
+ if (s->watchdog_override_enable)
+ return s->watchdog_override_usec;
+ else
+ return s->watchdog_usec;
+}
+
static void service_start_watchdog(Service *s) {
int r;
+ usec_t watchdog_usec;
assert(s);
- if (s->watchdog_usec <= 0)
+ watchdog_usec = service_get_watchdog_usec(s);
+ if (watchdog_usec == 0 || watchdog_usec == USEC_INFINITY)
return;
if (s->watchdog_event_source) {
- r = sd_event_source_set_time(s->watchdog_event_source, usec_add(s->watchdog_timestamp.monotonic, s->watchdog_usec));
+ r = sd_event_source_set_time(s->watchdog_event_source, usec_add(s->watchdog_timestamp.monotonic, watchdog_usec));
if (r < 0) {
log_unit_warning_errno(UNIT(s), r, "Failed to reset watchdog timer: %m");
return;
@@ -221,7 +232,7 @@ static void service_start_watchdog(Service *s) {
UNIT(s)->manager->event,
&s->watchdog_event_source,
CLOCK_MONOTONIC,
- usec_add(s->watchdog_timestamp.monotonic, s->watchdog_usec), 0,
+ usec_add(s->watchdog_timestamp.monotonic, watchdog_usec), 0,
service_dispatch_watchdog, s);
if (r < 0) {
log_unit_warning_errno(UNIT(s), r, "Failed to add watchdog timer: %m");
@@ -246,6 +257,17 @@ static void service_reset_watchdog(Service *s) {
service_start_watchdog(s);
}
+static void service_reset_watchdog_timeout(Service *s, usec_t watchdog_override_usec) {
+ assert(s);
+
+ s->watchdog_override_enable = true;
+ s->watchdog_override_usec = watchdog_override_usec;
+ service_reset_watchdog(s);
+
+ log_unit_debug(UNIT(s), "watchdog_usec="USEC_FMT, s->watchdog_usec);
+ log_unit_debug(UNIT(s), "watchdog_override_usec="USEC_FMT, s->watchdog_override_usec);
+}
+
static void service_fd_store_unlink(ServiceFDStore *fs) {
if (!fs)
@@ -300,6 +322,8 @@ static void service_done(Unit *u) {
s->control_command = NULL;
s->main_command = NULL;
+ dynamic_creds_unref(&s->dynamic_creds);
+
exit_status_set_free(&s->restart_prevent_status);
exit_status_set_free(&s->restart_force_status);
exit_status_set_free(&s->success_status);
@@ -318,6 +342,7 @@ static void service_done(Unit *u) {
s->bus_name_owner = mfree(s->bus_name_owner);
service_close_socket_fd(s);
+ s->peer = socket_peer_unref(s->peer);
unit_ref_unset(&s->accept_socket);
@@ -574,20 +599,9 @@ static int service_setup_bus_name(Service *s) {
if (!s->bus_name)
return 0;
- if (is_kdbus_available()) {
- const char *n;
-
- n = strjoina(s->bus_name, ".busname");
- r = unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, n, NULL, true);
- if (r < 0)
- return log_unit_error_errno(UNIT(s), r, "Failed to add dependency to .busname unit: %m");
-
- } else {
- /* If kdbus is not available, we know the dbus socket is required, hence pull it in, and require it */
- r = unit_add_dependency_by_name(UNIT(s), UNIT_REQUIRES, SPECIAL_DBUS_SOCKET, NULL, true);
- if (r < 0)
- return log_unit_error_errno(UNIT(s), r, "Failed to add dependency on " SPECIAL_DBUS_SOCKET ": %m");
- }
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_REQUIRES, SPECIAL_DBUS_SOCKET, NULL, true);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to add dependency on " SPECIAL_DBUS_SOCKET ": %m");
/* Regardless if kdbus is used or not, we always want to be ordered against dbus.socket if both are in the transaction. */
r = unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_DBUS_SOCKET, NULL, true);
@@ -1019,6 +1033,9 @@ static int service_coldplug(Unit *u) {
if (IN_SET(s->deserialized_state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD))
service_start_watchdog(s);
+ if (!IN_SET(s->deserialized_state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART))
+ (void) unit_setup_dynamic_creds(u);
+
service_set_state(s, s->deserialized_state);
return 0;
}
@@ -1173,6 +1190,10 @@ static int service_spawn(
if (r < 0)
return r;
+ r = unit_setup_dynamic_creds(UNIT(s));
+ if (r < 0)
+ return r;
+
if (pass_fds ||
s->exec_context.std_input == EXEC_INPUT_SOCKET ||
s->exec_context.std_output == EXEC_OUTPUT_SOCKET ||
@@ -1274,6 +1295,7 @@ static int service_spawn(
&s->exec_context,
&exec_params,
s->exec_runtime,
+ &s->dynamic_creds,
&pid);
if (r < 0)
return r;
@@ -1407,9 +1429,12 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
exec_runtime_destroy(s->exec_runtime);
s->exec_runtime = exec_runtime_unref(s->exec_runtime);
- /* Also, remove the runtime directory in */
+ /* Also, remove the runtime directory */
exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager));
+ /* Release the user, and destroy it if we are the only remaining owner */
+ dynamic_creds_destroy(&s->dynamic_creds);
+
/* Try to delete the pid file. At this point it will be
* out-of-date, and some software might be confused by it, so
* let's remove it. */
@@ -1663,7 +1688,7 @@ static void service_kill_control_processes(Service *s) {
return;
p = strjoina(UNIT(s)->cgroup_path, "/control");
- cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, p, SIGKILL, true, true, true, NULL);
+ cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, p, SIGKILL, CGROUP_SIGCONT|CGROUP_IGNORE_SELF|CGROUP_REMOVE, NULL, NULL, NULL);
}
static void service_enter_start(Service *s) {
@@ -2003,6 +2028,9 @@ static int service_start(Unit *u) {
s->notify_state = NOTIFY_UNKNOWN;
+ s->watchdog_override_enable = false;
+ s->watchdog_override_usec = 0;
+
service_enter_start_pre(s);
return 1;
}
@@ -2134,6 +2162,9 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) {
unit_serialize_item(u, f, "forbid-restart", yes_no(s->forbid_restart));
+ if (s->watchdog_override_enable)
+ unit_serialize_item_format(u, f, "watchdog-override-usec", USEC_FMT, s->watchdog_override_usec);
+
return 0;
}
@@ -2328,6 +2359,14 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
s->stderr_fd = fdset_remove(fds, fd);
s->exec_context.stdio_as_fds = true;
}
+ } else if (streq(key, "watchdog-override-usec")) {
+ usec_t watchdog_override_usec;
+ if (timestamp_deserialize(value, &watchdog_override_usec) < 0)
+ log_unit_debug(u, "Failed to parse watchdog_override_usec value: %s", value);
+ else {
+ s->watchdog_override_enable = true;
+ s->watchdog_override_usec = watchdog_override_usec;
+ }
} else
log_unit_debug(u, "Unknown serialization key: %s", key);
@@ -2800,8 +2839,9 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
unit_tidy_watch_pids(u, s->main_pid, s->control_pid);
unit_watch_all_pids(u);
- /* If the PID set is empty now, then let's finish this off */
- if (set_isempty(u->pids))
+ /* If the PID set is empty now, then let's finish this off
+ (On unified we use proper notifications) */
+ if (cg_unified() <= 0 && set_isempty(u->pids))
service_notify_cgroup_empty_event(u);
}
@@ -2905,12 +2945,15 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us
static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void *userdata) {
Service *s = SERVICE(userdata);
char t[FORMAT_TIMESPAN_MAX];
+ usec_t watchdog_usec;
assert(s);
assert(source == s->watchdog_event_source);
+ watchdog_usec = service_get_watchdog_usec(s);
+
log_unit_error(UNIT(s), "Watchdog timeout (limit %s)!",
- format_timespan(t, sizeof(t), s->watchdog_usec, 1));
+ format_timespan(t, sizeof(t), watchdog_usec, 1));
service_enter_signal(s, SERVICE_STOP_SIGABRT, SERVICE_FAILURE_WATCHDOG);
@@ -3047,6 +3090,15 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags, FDSet *fds)
service_add_fd_store_set(s, fds, name);
}
+ e = strv_find_startswith(tags, "WATCHDOG_USEC=");
+ if (e) {
+ usec_t watchdog_override_usec;
+ if (safe_atou64(e, &watchdog_override_usec) < 0)
+ log_unit_warning(u, "Failed to parse WATCHDOG_USEC=%s", e);
+ else
+ service_reset_watchdog_timeout(s, watchdog_override_usec);
+ }
+
/* Notify clients about changed status or main pid */
if (notify_dbus)
unit_add_to_dbus_queue(u);
@@ -3285,6 +3337,7 @@ const UnitVTable service_vtable = {
.cgroup_context_offset = offsetof(Service, cgroup_context),
.kill_context_offset = offsetof(Service, kill_context),
.exec_runtime_offset = offsetof(Service, exec_runtime),
+ .dynamic_creds_offset = offsetof(Service, dynamic_creds),
.sections =
"Unit\0"
diff --git a/src/core/service.h b/src/core/service.h
index 4af3d40439..888007cc0b 100644
--- a/src/core/service.h
+++ b/src/core/service.h
@@ -120,6 +120,8 @@ struct Service {
dual_timestamp watchdog_timestamp;
usec_t watchdog_usec;
+ usec_t watchdog_override_usec;
+ bool watchdog_override_enable;
sd_event_source *watchdog_event_source;
ExecCommand* exec_command[_SERVICE_EXEC_COMMAND_MAX];
@@ -146,9 +148,11 @@ struct Service {
/* Runtime data of the execution context */
ExecRuntime *exec_runtime;
+ DynamicCreds dynamic_creds;
pid_t main_pid, control_pid;
int socket_fd;
+ SocketPeer *peer;
bool socket_fd_selinux_context_net;
bool permissions_start_only;
diff --git a/src/core/shutdown.c b/src/core/shutdown.c
index e14755d84e..a795d875bb 100644
--- a/src/core/shutdown.c
+++ b/src/core/shutdown.c
@@ -157,7 +157,6 @@ static int switch_root_initramfs(void) {
return switch_root("/run/initramfs", "/oldroot", false, MS_BIND);
}
-
int main(int argc, char *argv[]) {
bool need_umount, need_swapoff, need_loop_detach, need_dm_detach;
bool in_container, use_watchdog = false;
@@ -203,20 +202,25 @@ int main(int argc, char *argv[]) {
}
(void) cg_get_root_path(&cgroup);
+ in_container = detect_container() > 0;
use_watchdog = !!getenv("WATCHDOG_USEC");
- /* lock us into memory */
+ /* Lock us into memory */
mlockall(MCL_CURRENT|MCL_FUTURE);
+ /* Synchronize everything that is not written to disk yet at this point already. This is a good idea so that
+ * slow IO is processed here already and the final process killing spree is not impacted by processes
+ * desperately trying to sync IO to disk within their timeout. */
+ if (!in_container)
+ sync();
+
log_info("Sending SIGTERM to remaining processes...");
broadcast_signal(SIGTERM, true, true);
log_info("Sending SIGKILL to remaining processes...");
broadcast_signal(SIGKILL, true, false);
- in_container = detect_container() > 0;
-
need_umount = !in_container;
need_swapoff = !in_container;
need_loop_detach = !in_container;
@@ -345,10 +349,10 @@ int main(int argc, char *argv[]) {
need_loop_detach ? " loop devices," : "",
need_dm_detach ? " DM devices," : "");
- /* The kernel will automaticall flush ATA disks and suchlike
- * on reboot(), but the file systems need to be synce'd
- * explicitly in advance. So let's do this here, but not
- * needlessly slow down containers. */
+ /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need to be
+ * sync'ed explicitly in advance. So let's do this here, but not needlessly slow down containers. Note that we
+ * sync'ed things already once above, but we did some more work since then which might have caused IO, hence
+ * let's doit once more. */
if (!in_container)
sync();
diff --git a/src/core/socket.c b/src/core/socket.c
index f6204d04bf..ff55885fb3 100644
--- a/src/core/socket.c
+++ b/src/core/socket.c
@@ -57,6 +57,7 @@
#include "unit-printf.h"
#include "unit.h"
#include "user-util.h"
+#include "in-addr-util.h"
static const UnitActiveState state_translation_table[_SOCKET_STATE_MAX] = {
[SOCKET_DEAD] = UNIT_INACTIVE,
@@ -77,6 +78,9 @@ static const UnitActiveState state_translation_table[_SOCKET_STATE_MAX] = {
static int socket_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int socket_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
+SocketPeer *socket_peer_new(void);
+int socket_find_peer(Socket *s, int fd, SocketPeer **p);
+
static void socket_init(Unit *u) {
Socket *s = SOCKET(u);
@@ -141,15 +145,23 @@ void socket_free_ports(Socket *s) {
static void socket_done(Unit *u) {
Socket *s = SOCKET(u);
+ SocketPeer *p;
assert(s);
socket_free_ports(s);
+ while ((p = hashmap_steal_first(s->peers_by_address)))
+ p->socket = NULL;
+
+ s->peers_by_address = hashmap_free(s->peers_by_address);
+
s->exec_runtime = exec_runtime_unref(s->exec_runtime);
exec_command_free_array(s->exec_command, _SOCKET_EXEC_COMMAND_MAX);
s->control_command = NULL;
+ dynamic_creds_unref(&s->dynamic_creds);
+
socket_unwatch_control_pid(s);
unit_ref_unset(&s->service);
@@ -466,6 +478,40 @@ static int socket_verify(Socket *s) {
return 0;
}
+static void peer_address_hash_func(const void *p, struct siphash *state) {
+ const SocketPeer *s = p;
+
+ assert(s);
+
+ if (s->peer.sa.sa_family == AF_INET)
+ siphash24_compress(&s->peer.in.sin_addr, sizeof(s->peer.in.sin_addr), state);
+ else if (s->peer.sa.sa_family == AF_INET6)
+ siphash24_compress(&s->peer.in6.sin6_addr, sizeof(s->peer.in6.sin6_addr), state);
+}
+
+static int peer_address_compare_func(const void *a, const void *b) {
+ const SocketPeer *x = a, *y = b;
+
+ if (x->peer.sa.sa_family < y->peer.sa.sa_family)
+ return -1;
+ if (x->peer.sa.sa_family > y->peer.sa.sa_family)
+ return 1;
+
+ switch(x->peer.sa.sa_family) {
+ case AF_INET:
+ return memcmp(&x->peer.in.sin_addr, &y->peer.in.sin_addr, sizeof(x->peer.in.sin_addr));
+ case AF_INET6:
+ return memcmp(&x->peer.in6.sin6_addr, &y->peer.in6.sin6_addr, sizeof(x->peer.in6.sin6_addr));
+ }
+
+ return -1;
+}
+
+const struct hash_ops peer_address_hash_ops = {
+ .hash = peer_address_hash_func,
+ .compare = peer_address_compare_func
+};
+
static int socket_load(Unit *u) {
Socket *s = SOCKET(u);
int r;
@@ -473,6 +519,10 @@ static int socket_load(Unit *u) {
assert(u);
assert(u->load_state == UNIT_STUB);
+ r = hashmap_ensure_allocated(&s->peers_by_address, &peer_address_hash_ops);
+ if (r < 0)
+ return r;
+
r = unit_load_fragment_and_dropin(u);
if (r < 0)
return r;
@@ -730,16 +780,16 @@ static int instance_from_socket(int fd, unsigned nr, char **instance) {
case AF_INET: {
uint32_t
- a = ntohl(local.in.sin_addr.s_addr),
- b = ntohl(remote.in.sin_addr.s_addr);
+ a = be32toh(local.in.sin_addr.s_addr),
+ b = be32toh(remote.in.sin_addr.s_addr);
if (asprintf(&r,
"%u-%u.%u.%u.%u:%u-%u.%u.%u.%u:%u",
nr,
a >> 24, (a >> 16) & 0xFF, (a >> 8) & 0xFF, a & 0xFF,
- ntohs(local.in.sin_port),
+ be16toh(local.in.sin_port),
b >> 24, (b >> 16) & 0xFF, (b >> 8) & 0xFF, b & 0xFF,
- ntohs(remote.in.sin_port)) < 0)
+ be16toh(remote.in.sin_port)) < 0)
return -ENOMEM;
break;
@@ -760,9 +810,9 @@ static int instance_from_socket(int fd, unsigned nr, char **instance) {
"%u-%u.%u.%u.%u:%u-%u.%u.%u.%u:%u",
nr,
a[0], a[1], a[2], a[3],
- ntohs(local.in6.sin6_port),
+ be16toh(local.in6.sin6_port),
b[0], b[1], b[2], b[3],
- ntohs(remote.in6.sin6_port)) < 0)
+ be16toh(remote.in6.sin6_port)) < 0)
return -ENOMEM;
} else {
char a[INET6_ADDRSTRLEN], b[INET6_ADDRSTRLEN];
@@ -771,9 +821,9 @@ static int instance_from_socket(int fd, unsigned nr, char **instance) {
"%u-%s:%u-%s:%u",
nr,
inet_ntop(AF_INET6, &local.in6.sin6_addr, a, sizeof(a)),
- ntohs(local.in6.sin6_port),
+ be16toh(local.in6.sin6_port),
inet_ntop(AF_INET6, &remote.in6.sin6_addr, b, sizeof(b)),
- ntohs(remote.in6.sin6_port)) < 0)
+ be16toh(remote.in6.sin6_port)) < 0)
return -ENOMEM;
}
@@ -1602,6 +1652,9 @@ static int socket_coldplug(Unit *u) {
return r;
}
+ if (!IN_SET(s->deserialized_state, SOCKET_DEAD, SOCKET_FAILED))
+ (void) unit_setup_dynamic_creds(u);
+
socket_set_state(s, s->deserialized_state);
return 0;
}
@@ -1633,6 +1686,10 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
if (r < 0)
return r;
+ r = unit_setup_dynamic_creds(UNIT(s));
+ if (r < 0)
+ return r;
+
r = socket_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_usec));
if (r < 0)
return r;
@@ -1654,6 +1711,7 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
&s->exec_context,
&exec_params,
s->exec_runtime,
+ &s->dynamic_creds,
&pid);
if (r < 0)
return r;
@@ -1757,12 +1815,14 @@ static void socket_enter_dead(Socket *s, SocketResult f) {
if (f != SOCKET_SUCCESS)
s->result = f;
+ socket_set_state(s, s->result != SOCKET_SUCCESS ? SOCKET_FAILED : SOCKET_DEAD);
+
exec_runtime_destroy(s->exec_runtime);
s->exec_runtime = exec_runtime_unref(s->exec_runtime);
exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager));
- socket_set_state(s, s->result != SOCKET_SUCCESS ? SOCKET_FAILED : SOCKET_DEAD);
+ dynamic_creds_destroy(&s->dynamic_creds);
}
static void socket_enter_signal(Socket *s, SocketState state, SocketResult f);
@@ -2038,6 +2098,7 @@ static void socket_enter_running(Socket *s, int cfd) {
socket_set_state(s, SOCKET_RUNNING);
} else {
_cleanup_free_ char *prefix = NULL, *instance = NULL, *name = NULL;
+ _cleanup_(socket_peer_unrefp) SocketPeer *p = NULL;
Service *service;
if (s->n_connections >= s->max_connections) {
@@ -2046,6 +2107,21 @@ static void socket_enter_running(Socket *s, int cfd) {
return;
}
+ if (s->max_connections_per_source > 0) {
+ r = socket_find_peer(s, cfd, &p);
+ if (r < 0) {
+ safe_close(cfd);
+ return;
+ }
+
+ if (p->n_ref > s->max_connections_per_source) {
+ log_unit_warning(UNIT(s), "Too many incoming connections (%u) from source, refusing connection attempt.", p->n_ref);
+ safe_close(cfd);
+ p = NULL;
+ return;
+ }
+ }
+
r = socket_instantiate_service(s);
if (r < 0)
goto fail;
@@ -2087,6 +2163,11 @@ static void socket_enter_running(Socket *s, int cfd) {
cfd = -1; /* We passed ownership of the fd to the service now. Forget it here. */
s->n_connections++;
+ if (s->max_connections_per_source > 0) {
+ service->peer = socket_peer_ref(p);
+ p = NULL;
+ }
+
r = manager_add_job(UNIT(s)->manager, JOB_START, UNIT(service), JOB_REPLACE, &error, NULL);
if (r < 0) {
/* We failed to activate the new service, but it still exists. Let's make sure the service
@@ -2232,7 +2313,9 @@ static int socket_stop(Unit *u) {
static int socket_serialize(Unit *u, FILE *f, FDSet *fds) {
Socket *s = SOCKET(u);
+ SocketPeer *k;
SocketPort *p;
+ Iterator i;
int r;
assert(u);
@@ -2283,6 +2366,16 @@ static int socket_serialize(Unit *u, FILE *f, FDSet *fds) {
}
}
+ HASHMAP_FOREACH(k, s->peers_by_address, i) {
+ _cleanup_free_ char *t = NULL;
+
+ r = sockaddr_pretty(&k->peer.sa, FAMILY_ADDRESS_SIZE(k->peer.sa.sa_family), true, true, &t);
+ if (r < 0)
+ return r;
+
+ unit_serialize_item_format(u, f, "peer", "%u %s", k->n_ref, t);
+ }
+
return 0;
}
@@ -2446,6 +2539,33 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value,
}
}
+ } else if (streq(key, "peer")) {
+ _cleanup_(socket_peer_unrefp) SocketPeer *p;
+ int n_ref, skip = 0;
+ SocketAddress a;
+ int r;
+
+ if (sscanf(value, "%u %n", &n_ref, &skip) < 1 || n_ref < 1)
+ log_unit_debug(u, "Failed to parse socket peer value: %s", value);
+ else {
+ r = socket_address_parse(&a, value+skip);
+ if (r < 0)
+ return r;
+
+ p = socket_peer_new();
+ if (!p)
+ return log_oom();
+
+ p->n_ref = n_ref;
+ memcpy(&p->peer, &a.sockaddr, sizeof(a.sockaddr));
+ p->socket = s;
+
+ r = hashmap_put(s->peers_by_address, p, p);
+ if (r < 0)
+ return r;
+
+ p = NULL;
+ }
} else
log_unit_debug(UNIT(s), "Unknown serialization key: %s", key);
@@ -2542,6 +2662,83 @@ _pure_ static bool socket_check_gc(Unit *u) {
return s->n_connections > 0;
}
+SocketPeer *socket_peer_new(void) {
+ SocketPeer *p;
+
+ p = new0(SocketPeer, 1);
+ if (!p)
+ return NULL;
+
+ p->n_ref = 1;
+
+ return p;
+}
+
+SocketPeer *socket_peer_ref(SocketPeer *p) {
+ if (!p)
+ return NULL;
+
+ assert(p->n_ref > 0);
+ p->n_ref++;
+
+ return p;
+}
+
+SocketPeer *socket_peer_unref(SocketPeer *p) {
+ if (!p)
+ return NULL;
+
+ assert(p->n_ref > 0);
+
+ p->n_ref--;
+
+ if (p->n_ref > 0)
+ return NULL;
+
+ if (p->socket)
+ (void) hashmap_remove(p->socket->peers_by_address, p);
+
+ free(p);
+
+ return NULL;
+}
+
+int socket_find_peer(Socket *s, int fd, SocketPeer **p) {
+ _cleanup_free_ SocketPeer *remote = NULL;
+ SocketPeer sa, *i;
+ socklen_t salen = sizeof(sa.peer);
+ int r;
+
+ assert(fd >= 0);
+ assert(s);
+
+ r = getpeername(fd, &sa.peer.sa, &salen);
+ if (r < 0)
+ return log_error_errno(errno, "getpeername failed: %m");
+
+ i = hashmap_get(s->peers_by_address, &sa);
+ if (i) {
+ *p = i;
+ return 1;
+ }
+
+ remote = socket_peer_new();
+ if (!remote)
+ return log_oom();
+
+ memcpy(&remote->peer, &sa.peer, sizeof(union sockaddr_union));
+ remote->socket = s;
+
+ r = hashmap_put(s->peers_by_address, remote, remote);
+ if (r < 0)
+ return r;
+
+ *p = remote;
+ remote = NULL;
+
+ return 0;
+}
+
static int socket_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
SocketPort *p = userdata;
int cfd = -1;
@@ -2930,6 +3127,7 @@ const UnitVTable socket_vtable = {
.cgroup_context_offset = offsetof(Socket, cgroup_context),
.kill_context_offset = offsetof(Socket, kill_context),
.exec_runtime_offset = offsetof(Socket, exec_runtime),
+ .dynamic_creds_offset = offsetof(Socket, dynamic_creds),
.sections =
"Unit\0"
diff --git a/src/core/socket.h b/src/core/socket.h
index 0f1ac69c6f..2fe38ef2aa 100644
--- a/src/core/socket.h
+++ b/src/core/socket.h
@@ -20,6 +20,7 @@
***/
typedef struct Socket Socket;
+typedef struct SocketPeer SocketPeer;
#include "mount.h"
#include "service.h"
@@ -79,9 +80,12 @@ struct Socket {
LIST_HEAD(SocketPort, ports);
+ Hashmap *peers_by_address;
+
unsigned n_accepted;
unsigned n_connections;
unsigned max_connections;
+ unsigned max_connections_per_source;
unsigned backlog;
unsigned keep_alive_cnt;
@@ -94,7 +98,9 @@ struct Socket {
ExecContext exec_context;
KillContext kill_context;
CGroupContext cgroup_context;
+
ExecRuntime *exec_runtime;
+ DynamicCreds dynamic_creds;
/* For Accept=no sockets refers to the one service we'll
activate. For Accept=yes sockets is either NULL, or filled
@@ -162,6 +168,18 @@ struct Socket {
RateLimit trigger_limit;
};
+struct SocketPeer {
+ unsigned n_ref;
+
+ Socket *socket;
+ union sockaddr_union peer;
+};
+
+SocketPeer *socket_peer_ref(SocketPeer *p);
+SocketPeer *socket_peer_unref(SocketPeer *p);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(SocketPeer*, socket_peer_unref);
+
/* Called from the service code when collecting fds */
int socket_collect_fds(Socket *s, int **fds);
diff --git a/src/core/swap.c b/src/core/swap.c
index a532b15be8..66a318d01f 100644
--- a/src/core/swap.c
+++ b/src/core/swap.c
@@ -153,6 +153,8 @@ static void swap_done(Unit *u) {
exec_command_done_array(s->exec_command, _SWAP_EXEC_COMMAND_MAX);
s->control_command = NULL;
+ dynamic_creds_unref(&s->dynamic_creds);
+
swap_unwatch_control_pid(s);
s->timer_event_source = sd_event_source_unref(s->timer_event_source);
@@ -553,6 +555,9 @@ static int swap_coldplug(Unit *u) {
return r;
}
+ if (!IN_SET(new_state, SWAP_DEAD, SWAP_FAILED))
+ (void) unit_setup_dynamic_creds(u);
+
swap_set_state(s, new_state);
return 0;
}
@@ -628,6 +633,10 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
if (r < 0)
goto fail;
+ r = unit_setup_dynamic_creds(UNIT(s));
+ if (r < 0)
+ return r;
+
r = swap_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_usec));
if (r < 0)
goto fail;
@@ -644,6 +653,7 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
&s->exec_context,
&exec_params,
s->exec_runtime,
+ &s->dynamic_creds,
&pid);
if (r < 0)
goto fail;
@@ -668,12 +678,14 @@ static void swap_enter_dead(Swap *s, SwapResult f) {
if (f != SWAP_SUCCESS)
s->result = f;
+ swap_set_state(s, s->result != SWAP_SUCCESS ? SWAP_FAILED : SWAP_DEAD);
+
exec_runtime_destroy(s->exec_runtime);
s->exec_runtime = exec_runtime_unref(s->exec_runtime);
exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager));
- swap_set_state(s, s->result != SWAP_SUCCESS ? SWAP_FAILED : SWAP_DEAD);
+ dynamic_creds_destroy(&s->dynamic_creds);
}
static void swap_enter_active(Swap *s, SwapResult f) {
@@ -1466,6 +1478,7 @@ const UnitVTable swap_vtable = {
.cgroup_context_offset = offsetof(Swap, cgroup_context),
.kill_context_offset = offsetof(Swap, kill_context),
.exec_runtime_offset = offsetof(Swap, exec_runtime),
+ .dynamic_creds_offset = offsetof(Swap, dynamic_creds),
.sections =
"Unit\0"
diff --git a/src/core/swap.h b/src/core/swap.h
index fbf66debdc..b0ef50f1e8 100644
--- a/src/core/swap.h
+++ b/src/core/swap.h
@@ -82,6 +82,7 @@ struct Swap {
CGroupContext cgroup_context;
ExecRuntime *exec_runtime;
+ DynamicCreds dynamic_creds;
SwapState state, deserialized_state;
diff --git a/src/core/system.conf b/src/core/system.conf
index db8b7acd78..c6bb050aac 100644
--- a/src/core/system.conf
+++ b/src/core/system.conf
@@ -42,7 +42,7 @@
#DefaultBlockIOAccounting=no
#DefaultMemoryAccounting=no
#DefaultTasksAccounting=yes
-#DefaultTasksMax=512
+#DefaultTasksMax=15%
#DefaultLimitCPU=
#DefaultLimitFSIZE=
#DefaultLimitDATA=
diff --git a/src/core/transaction.c b/src/core/transaction.c
index e06a48a2f1..8370b864fb 100644
--- a/src/core/transaction.c
+++ b/src/core/transaction.c
@@ -373,7 +373,7 @@ static int transaction_verify_order_one(Transaction *tr, Job *j, Job *from, unsi
delete = NULL;
for (k = from; k; k = ((k->generation == generation && k->marker != k) ? k->marker : NULL)) {
- /* logging for j not k here here to provide consistent narrative */
+ /* logging for j not k here to provide consistent narrative */
log_unit_warning(j->unit,
"Found dependency on %s/%s",
k->unit->id, job_type_to_string(k->type));
@@ -392,7 +392,7 @@ static int transaction_verify_order_one(Transaction *tr, Job *j, Job *from, unsi
if (delete) {
const char *status;
- /* logging for j not k here here to provide consistent narrative */
+ /* logging for j not k here to provide consistent narrative */
log_unit_warning(j->unit,
"Breaking ordering cycle by deleting job %s/%s",
delete->unit->id, job_type_to_string(delete->type));
@@ -591,6 +591,9 @@ static int transaction_apply(Transaction *tr, Manager *m, JobMode mode) {
HASHMAP_FOREACH(j, m->jobs, i) {
assert(j->installed);
+ if (j->unit->ignore_on_isolate)
+ continue;
+
if (hashmap_get(tr->jobs, j->unit))
continue;
diff --git a/src/core/unit.c b/src/core/unit.c
index e98086a3f6..ff7c562fba 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -100,6 +100,7 @@ Unit *unit_new(Manager *m, size_t size) {
u->on_failure_job_mode = JOB_REPLACE;
u->cgroup_inotify_wd = -1;
u->job_timeout = USEC_INFINITY;
+ u->sigchldgen = 0;
RATELIMIT_INIT(u->start_limit, m->default_start_limit_interval, m->default_start_limit_burst);
RATELIMIT_INIT(u->auto_stop_ratelimit, 10 * USEC_PER_SEC, 16);
@@ -1682,7 +1683,7 @@ static void unit_check_unneeded(Unit *u) {
if (unit_active_or_pending(other))
return;
- /* If stopping a unit fails continously we might enter a stop
+ /* If stopping a unit fails continuously we might enter a stop
* loop here, hence stop acting on the service being
* unnecessary after a while. */
if (!ratelimit_test(&u->auto_stop_ratelimit)) {
@@ -1727,7 +1728,7 @@ static void unit_check_binds_to(Unit *u) {
if (!stop)
return;
- /* If stopping a unit fails continously we might enter a stop
+ /* If stopping a unit fails continuously we might enter a stop
* loop here, hence stop acting on the service being
* unnecessary after a while. */
if (!ratelimit_test(&u->auto_stop_ratelimit)) {
@@ -3143,7 +3144,7 @@ int unit_kill_common(
if (!pid_set)
return -ENOMEM;
- q = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, signo, false, false, false, pid_set);
+ q = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, signo, 0, pid_set, NULL, NULL);
if (q < 0 && q != -EAGAIN && q != -ESRCH && q != -ENOENT)
r = q;
else
@@ -3223,6 +3224,33 @@ void unit_ref_unset(UnitRef *ref) {
ref->unit = NULL;
}
+static int user_from_unit_name(Unit *u, char **ret) {
+
+ static const uint8_t hash_key[] = {
+ 0x58, 0x1a, 0xaf, 0xe6, 0x28, 0x58, 0x4e, 0x96,
+ 0xb4, 0x4e, 0xf5, 0x3b, 0x8c, 0x92, 0x07, 0xec
+ };
+
+ _cleanup_free_ char *n = NULL;
+ int r;
+
+ r = unit_name_to_prefix(u->id, &n);
+ if (r < 0)
+ return r;
+
+ if (valid_user_group_name(n)) {
+ *ret = n;
+ n = NULL;
+ return 0;
+ }
+
+ /* If we can't use the unit name as a user name, then let's hash it and use that */
+ if (asprintf(ret, "_du%016" PRIx64, siphash24(n, strlen(n), hash_key)) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
int unit_patch_contexts(Unit *u) {
CGroupContext *cc;
ExecContext *ec;
@@ -3267,6 +3295,22 @@ int unit_patch_contexts(Unit *u) {
if (ec->private_devices)
ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_MKNOD);
+
+ if (ec->dynamic_user) {
+ if (!ec->user) {
+ r = user_from_unit_name(u, &ec->user);
+ if (r < 0)
+ return r;
+ }
+
+ if (!ec->group) {
+ ec->group = strdup(ec->user);
+ if (!ec->group)
+ return -ENOMEM;
+ }
+
+ ec->private_tmp = true;
+ }
}
cc = unit_get_cgroup_context(u);
@@ -3364,6 +3408,7 @@ int unit_write_drop_in(Unit *u, UnitSetPropertiesMode mode, const char *name, co
/* When this is a transient unit file in creation, then let's not create a new drop-in but instead
* write to the transient unit file. */
fputs(data, u->transient_file);
+ fputc('\n', u->transient_file);
return 0;
}
@@ -3375,7 +3420,7 @@ int unit_write_drop_in(Unit *u, UnitSetPropertiesMode mode, const char *name, co
return -EINVAL;
wrapped = strjoina("# This is a drop-in unit file extension, created via \"systemctl set-property\"\n"
- "or an equivalent operation. Do not edit.\n",
+ "# or an equivalent operation. Do not edit.\n",
data,
"\n");
@@ -3503,7 +3548,6 @@ int unit_make_transient(Unit *u) {
unit_add_to_dbus_queue(u);
unit_add_to_gc_queue(u);
- unit_add_to_load_queue(u);
fputs("# This is a transient unit file, created programmatically via the systemd API. Do not edit.\n",
u->transient_file);
@@ -3511,6 +3555,43 @@ int unit_make_transient(Unit *u) {
return 0;
}
+static void log_kill(pid_t pid, int sig, void *userdata) {
+ _cleanup_free_ char *comm = NULL;
+
+ (void) get_process_comm(pid, &comm);
+
+ /* Don't log about processes marked with brackets, under the assumption that these are temporary processes
+ only, like for example systemd's own PAM stub process. */
+ if (comm && comm[0] == '(')
+ return;
+
+ log_unit_notice(userdata,
+ "Killing process " PID_FMT " (%s) with signal SIG%s.",
+ pid,
+ strna(comm),
+ signal_to_string(sig));
+}
+
+static int operation_to_signal(KillContext *c, KillOperation k) {
+ assert(c);
+
+ switch (k) {
+
+ case KILL_TERMINATE:
+ case KILL_TERMINATE_AND_LOG:
+ return c->kill_signal;
+
+ case KILL_KILL:
+ return SIGKILL;
+
+ case KILL_ABORT:
+ return SIGABRT;
+
+ default:
+ assert_not_reached("KillOperation unknown");
+ }
+}
+
int unit_kill_context(
Unit *u,
KillContext *c,
@@ -3519,58 +3600,63 @@ int unit_kill_context(
pid_t control_pid,
bool main_pid_alien) {
- bool wait_for_exit = false;
+ bool wait_for_exit = false, send_sighup;
+ cg_kill_log_func_t log_func;
int sig, r;
assert(u);
assert(c);
+ /* Kill the processes belonging to this unit, in preparation for shutting the unit down. Returns > 0 if we
+ * killed something worth waiting for, 0 otherwise. */
+
if (c->kill_mode == KILL_NONE)
return 0;
- switch (k) {
- case KILL_KILL:
- sig = SIGKILL;
- break;
- case KILL_ABORT:
- sig = SIGABRT;
- break;
- case KILL_TERMINATE:
- sig = c->kill_signal;
- break;
- default:
- assert_not_reached("KillOperation unknown");
- }
+ sig = operation_to_signal(c, k);
+
+ send_sighup =
+ c->send_sighup &&
+ IN_SET(k, KILL_TERMINATE, KILL_TERMINATE_AND_LOG) &&
+ sig != SIGHUP;
+
+ log_func =
+ k != KILL_TERMINATE ||
+ IN_SET(sig, SIGKILL, SIGABRT) ? log_kill : NULL;
if (main_pid > 0) {
- r = kill_and_sigcont(main_pid, sig);
+ if (log_func)
+ log_func(main_pid, sig, u);
+ r = kill_and_sigcont(main_pid, sig);
if (r < 0 && r != -ESRCH) {
_cleanup_free_ char *comm = NULL;
- get_process_comm(main_pid, &comm);
+ (void) get_process_comm(main_pid, &comm);
log_unit_warning_errno(u, r, "Failed to kill main process " PID_FMT " (%s), ignoring: %m", main_pid, strna(comm));
} else {
if (!main_pid_alien)
wait_for_exit = true;
- if (c->send_sighup && k == KILL_TERMINATE)
+ if (r != -ESRCH && send_sighup)
(void) kill(main_pid, SIGHUP);
}
}
if (control_pid > 0) {
- r = kill_and_sigcont(control_pid, sig);
+ if (log_func)
+ log_func(control_pid, sig, u);
+ r = kill_and_sigcont(control_pid, sig);
if (r < 0 && r != -ESRCH) {
_cleanup_free_ char *comm = NULL;
- get_process_comm(control_pid, &comm);
+ (void) get_process_comm(control_pid, &comm);
log_unit_warning_errno(u, r, "Failed to kill control process " PID_FMT " (%s), ignoring: %m", control_pid, strna(comm));
} else {
wait_for_exit = true;
- if (c->send_sighup && k == KILL_TERMINATE)
+ if (r != -ESRCH && send_sighup)
(void) kill(control_pid, SIGHUP);
}
}
@@ -3584,7 +3670,11 @@ int unit_kill_context(
if (!pid_set)
return -ENOMEM;
- r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, sig, true, k != KILL_TERMINATE, false, pid_set);
+ r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path,
+ sig,
+ CGROUP_SIGCONT|CGROUP_IGNORE_SELF,
+ pid_set,
+ log_func, u);
if (r < 0) {
if (r != -EAGAIN && r != -ESRCH && r != -ENOENT)
log_unit_warning_errno(u, r, "Failed to kill control group %s, ignoring: %m", u->cgroup_path);
@@ -3609,14 +3699,18 @@ int unit_kill_context(
(detect_container() == 0 && !unit_cgroup_delegate(u)))
wait_for_exit = true;
- if (c->send_sighup && k != KILL_KILL) {
+ if (send_sighup) {
set_free(pid_set);
pid_set = unit_pid_set(main_pid, control_pid);
if (!pid_set)
return -ENOMEM;
- cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, SIGHUP, false, true, false, pid_set);
+ cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path,
+ SIGHUP,
+ CGROUP_IGNORE_SELF,
+ pid_set,
+ NULL, NULL);
}
}
}
@@ -3725,6 +3819,26 @@ int unit_setup_exec_runtime(Unit *u) {
return exec_runtime_make(rt, unit_get_exec_context(u), u->id);
}
+int unit_setup_dynamic_creds(Unit *u) {
+ ExecContext *ec;
+ DynamicCreds *dcreds;
+ size_t offset;
+
+ assert(u);
+
+ offset = UNIT_VTABLE(u)->dynamic_creds_offset;
+ assert(offset > 0);
+ dcreds = (DynamicCreds*) ((uint8_t*) u + offset);
+
+ ec = unit_get_exec_context(u);
+ assert(ec);
+
+ if (!ec->dynamic_user)
+ return 0;
+
+ return dynamic_creds_acquire(dcreds, u->manager, ec->user, ec->group);
+}
+
bool unit_type_supported(UnitType t) {
if (_unlikely_(t < 0))
return false;
@@ -3789,7 +3903,7 @@ bool unit_is_pristine(Unit *u) {
/* Check if the unit already exists or is already around,
* in a number of different ways. Note that to cater for unit
* types such as slice, we are generally fine with units that
- * are marked UNIT_LOADED even even though nothing was
+ * are marked UNIT_LOADED even though nothing was
* actually loaded, as those unit types don't require a file
* on disk to validly load. */
diff --git a/src/core/unit.h b/src/core/unit.h
index 08a927962d..47eb8d50a6 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -36,6 +36,7 @@ typedef struct UnitStatusMessageFormats UnitStatusMessageFormats;
typedef enum KillOperation {
KILL_TERMINATE,
+ KILL_TERMINATE_AND_LOG,
KILL_KILL,
KILL_ABORT,
_KILL_OPERATION_MAX,
@@ -162,6 +163,9 @@ struct Unit {
* process SIGCHLD for */
Set *pids;
+ /* Used in sigchld event invocation to avoid repeat events being invoked */
+ uint64_t sigchldgen;
+
/* Used during GC sweeps */
unsigned gc_marker;
@@ -287,6 +291,10 @@ struct UnitVTable {
* that */
size_t exec_runtime_offset;
+ /* If greater than 0, the offset into the object where the pointer to DynamicCreds is found, if the unit type
+ * has that. */
+ size_t dynamic_creds_offset;
+
/* The name of the configuration file section with the private settings of this unit */
const char *private_section;
@@ -585,6 +593,7 @@ CGroupContext *unit_get_cgroup_context(Unit *u) _pure_;
ExecRuntime *unit_get_exec_runtime(Unit *u) _pure_;
int unit_setup_exec_runtime(Unit *u);
+int unit_setup_dynamic_creds(Unit *u);
int unit_write_drop_in(Unit *u, UnitSetPropertiesMode mode, const char *name, const char *data);
int unit_write_drop_in_format(Unit *u, UnitSetPropertiesMode mode, const char *name, const char *format, ...) _printf_(4,5);