summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/basic/log.c2
-rw-r--r--src/basic/mount-util.c1
-rw-r--r--src/basic/special.h3
-rw-r--r--src/core/dbus-unit.c6
-rw-r--r--src/core/device.c6
-rw-r--r--src/core/execute.c98
-rw-r--r--src/core/job.c19
-rw-r--r--src/core/mount.c82
-rw-r--r--src/core/scope.c17
-rw-r--r--src/core/service.c61
-rw-r--r--src/core/slice.c17
-rw-r--r--src/core/swap.c6
-rw-r--r--src/core/unit.c53
-rw-r--r--src/core/unit.h9
-rw-r--r--src/libsystemd/sd-bus/bus-kernel.c6
-rw-r--r--src/network/networkd-link.c7
-rw-r--r--src/network/networkd-ndisc.c13
-rw-r--r--src/network/networkd-ndisc.h1
-rw-r--r--src/nspawn/nspawn.c14
-rw-r--r--src/shared/seccomp-util.c178
-rw-r--r--src/shared/seccomp-util.h2
-rw-r--r--src/shared/switch-root.c25
-rw-r--r--src/sysctl/sysctl.c62
-rw-r--r--src/systemctl/systemctl.c39
-rw-r--r--src/test/test-seccomp.c33
-rw-r--r--src/test/test-unit-file.c2
-rw-r--r--src/udev/collect/collect.c6
-rw-r--r--src/udev/udev-builtin-net_id.c41
-rw-r--r--src/udev/udev-node.c4
-rw-r--r--src/udev/udev-watch.c6
30 files changed, 554 insertions, 265 deletions
diff --git a/src/basic/log.c b/src/basic/log.c
index 2ff70be255..4919d175da 100644
--- a/src/basic/log.c
+++ b/src/basic/log.c
@@ -782,7 +782,7 @@ static void log_assert(
return;
DISABLE_WARNING_FORMAT_NONLITERAL;
- xsprintf(buffer, format, text, file, line, func);
+ snprintf(buffer, sizeof buffer, format, text, file, line, func);
REENABLE_WARNING;
log_abort_msg = buffer;
diff --git a/src/basic/mount-util.c b/src/basic/mount-util.c
index 2985cc475a..c8f8022578 100644
--- a/src/basic/mount-util.c
+++ b/src/basic/mount-util.c
@@ -525,6 +525,7 @@ bool fstype_is_network(const char *fstype) {
"glusterfs\0"
"pvfs2\0" /* OrangeFS */
"ocfs2\0"
+ "lustre\0"
;
const char *x;
diff --git a/src/basic/special.h b/src/basic/special.h
index 084d3dfa23..5276bcf598 100644
--- a/src/basic/special.h
+++ b/src/basic/special.h
@@ -117,3 +117,6 @@
/* The scope unit systemd itself lives in. */
#define SPECIAL_INIT_SCOPE "init.scope"
+
+/* The root directory. */
+#define SPECIAL_ROOT_MOUNT "-.mount"
diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c
index 8f34fa1a52..69e249c844 100644
--- a/src/core/dbus-unit.c
+++ b/src/core/dbus-unit.c
@@ -263,10 +263,7 @@ static int property_get_can_stop(
assert(reply);
assert(u);
- /* On the lower levels we assume that every unit we can start
- * we can also stop */
-
- return sd_bus_message_append(reply, "b", unit_can_start(u) && !u->refuse_manual_stop);
+ return sd_bus_message_append(reply, "b", unit_can_stop(u) && !u->refuse_manual_stop);
}
static int property_get_can_reload(
@@ -760,6 +757,7 @@ const sd_bus_vtable bus_unit_vtable[] = {
SD_BUS_PROPERTY("Asserts", "a(sbbsi)", property_get_conditions, offsetof(Unit, asserts), 0),
SD_BUS_PROPERTY("LoadError", "(ss)", property_get_load_error, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Transient", "b", bus_property_get_bool, offsetof(Unit, transient), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Perpetual", "b", bus_property_get_bool, offsetof(Unit, perpetual), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("StartLimitIntervalSec", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("StartLimitAction", "s", property_get_emergency_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST),
diff --git a/src/core/device.c b/src/core/device.c
index 8a3e888e5e..bd87a447cd 100644
--- a/src/core/device.c
+++ b/src/core/device.c
@@ -331,11 +331,7 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa
if (!u) {
delete = true;
- u = unit_new(m, sizeof(Device));
- if (!u)
- return log_oom();
-
- r = unit_add_name(u, e);
+ r = unit_new_for_name(m, sizeof(Device), e, &u);
if (r < 0)
goto fail;
diff --git a/src/core/execute.c b/src/core/execute.c
index ae9df41b99..3f053602b5 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -787,6 +787,20 @@ static int get_fixed_supplementary_groups(const ExecContext *c,
assert(c);
+ /*
+ * If user is given, then lookup GID and supplementary groups list.
+ * We avoid NSS lookups for gid=0. Also we have to initialize groups
+ * as early as possible so we keep the list of supplementary groups
+ * of the caller.
+ */
+ if (user && gid_is_valid(gid) && gid != 0) {
+ /* First step, initialize groups from /etc/groups */
+ if (initgroups(user, gid) < 0)
+ return -errno;
+
+ keep_groups = true;
+ }
+
if (!c->supplementary_groups)
return 0;
@@ -803,18 +817,6 @@ static int get_fixed_supplementary_groups(const ExecContext *c,
return -EOPNOTSUPP; /* For all other values */
}
- /*
- * If user is given, then lookup GID and supplementary group list.
- * We avoid NSS lookups for gid=0.
- */
- if (user && gid_is_valid(gid) && gid != 0) {
- /* First step, initialize groups from /etc/groups */
- if (initgroups(user, gid) < 0)
- return -errno;
-
- keep_groups = true;
- }
-
l_gids = new(gid_t, ngroups_max);
if (!l_gids)
return -ENOMEM;
@@ -2538,12 +2540,6 @@ static int exec_child(
(void) umask(context->umask);
if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
- r = setup_smack(context, command);
- if (r < 0) {
- *exit_status = EXIT_SMACK_PROCESS_LABEL;
- return r;
- }
-
if (context->pam_name && username) {
r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
if (r < 0) {
@@ -2577,7 +2573,7 @@ static int exec_child(
return r;
}
- /* Drop group as early as possbile */
+ /* Drop groups as early as possbile */
if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
r = enforce_groups(context, gid, supplementary_gids, ngids);
if (r < 0) {
@@ -2693,6 +2689,41 @@ static int exec_child(
}
}
+ /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
+ * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
+ * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
+ * are restricted. */
+
+#ifdef HAVE_SELINUX
+ if (mac_selinux_use()) {
+ char *exec_context = mac_selinux_context_net ?: context->selinux_context;
+
+ if (exec_context) {
+ r = setexeccon(exec_context);
+ if (r < 0) {
+ *exit_status = EXIT_SELINUX_CONTEXT;
+ return r;
+ }
+ }
+ }
+#endif
+
+ r = setup_smack(context, command);
+ if (r < 0) {
+ *exit_status = EXIT_SMACK_PROCESS_LABEL;
+ return r;
+ }
+
+#ifdef HAVE_APPARMOR
+ if (context->apparmor_profile && mac_apparmor_use()) {
+ r = aa_change_onexec(context->apparmor_profile);
+ if (r < 0 && !context->apparmor_profile_ignore) {
+ *exit_status = EXIT_APPARMOR_PROFILE;
+ return -errno;
+ }
+ }
+#endif
+
/* PR_GET_SECUREBITS is not privileged, while
* PR_SET_SECUREBITS is. So to suppress
* potential EPERMs we'll try not to call
@@ -2758,6 +2789,8 @@ static int exec_child(
}
}
+ /* This really should remain the last step before the execve(), to make sure our own code is unaffected
+ * by the filter as little as possible. */
if (context_has_syscall_filters(context)) {
r = apply_seccomp(unit, context);
if (r < 0) {
@@ -2766,30 +2799,6 @@ static int exec_child(
}
}
#endif
-
-#ifdef HAVE_SELINUX
- if (mac_selinux_use()) {
- char *exec_context = mac_selinux_context_net ?: context->selinux_context;
-
- if (exec_context) {
- r = setexeccon(exec_context);
- if (r < 0) {
- *exit_status = EXIT_SELINUX_CONTEXT;
- return r;
- }
- }
- }
-#endif
-
-#ifdef HAVE_APPARMOR
- if (context->apparmor_profile && mac_apparmor_use()) {
- r = aa_change_onexec(context->apparmor_profile);
- if (r < 0 && !context->apparmor_profile_ignore) {
- *exit_status = EXIT_APPARMOR_PROFILE;
- return -errno;
- }
- }
-#endif
}
final_argv = replace_env_argv(argv, accum_env);
@@ -3611,7 +3620,8 @@ char *exec_command_line(char **argv) {
STRV_FOREACH(a, argv)
k += strlen(*a)+3;
- if (!(n = new(char, k)))
+ n = new(char, k);
+ if (!n)
return NULL;
p = n;
diff --git a/src/core/job.c b/src/core/job.c
index 3ecc8a1a73..ac6910a906 100644
--- a/src/core/job.c
+++ b/src/core/job.c
@@ -690,16 +690,16 @@ _pure_ static const char *job_get_status_message_format(Unit *u, JobType t, JobR
}
static void job_print_status_message(Unit *u, JobType t, JobResult result) {
- static struct {
+ static const struct {
const char *color, *word;
} const statuses[_JOB_RESULT_MAX] = {
- [JOB_DONE] = {ANSI_GREEN, " OK "},
- [JOB_TIMEOUT] = {ANSI_HIGHLIGHT_RED, " TIME "},
- [JOB_FAILED] = {ANSI_HIGHLIGHT_RED, "FAILED"},
- [JOB_DEPENDENCY] = {ANSI_HIGHLIGHT_YELLOW, "DEPEND"},
- [JOB_SKIPPED] = {ANSI_HIGHLIGHT, " INFO "},
- [JOB_ASSERT] = {ANSI_HIGHLIGHT_YELLOW, "ASSERT"},
- [JOB_UNSUPPORTED] = {ANSI_HIGHLIGHT_YELLOW, "UNSUPP"},
+ [JOB_DONE] = { ANSI_GREEN, " OK " },
+ [JOB_TIMEOUT] = { ANSI_HIGHLIGHT_RED, " TIME " },
+ [JOB_FAILED] = { ANSI_HIGHLIGHT_RED, "FAILED" },
+ [JOB_DEPENDENCY] = { ANSI_HIGHLIGHT_YELLOW, "DEPEND" },
+ [JOB_SKIPPED] = { ANSI_HIGHLIGHT, " INFO " },
+ [JOB_ASSERT] = { ANSI_HIGHLIGHT_YELLOW, "ASSERT" },
+ [JOB_UNSUPPORTED] = { ANSI_HIGHLIGHT_YELLOW, "UNSUPP" },
};
const char *format;
@@ -767,8 +767,9 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) {
if (!format)
return;
+ /* The description might be longer than the buffer, but that's OK, we'll just truncate it here */
DISABLE_WARNING_FORMAT_NONLITERAL;
- xsprintf(buf, format, unit_description(u));
+ snprintf(buf, sizeof(buf), format, unit_description(u));
REENABLE_WARNING;
switch (t) {
diff --git a/src/core/mount.c b/src/core/mount.c
index da480001e1..d749e49df5 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -159,17 +159,6 @@ static void mount_init(Unit *u) {
m->timeout_usec = u->manager->default_timeout_start_usec;
m->directory_mode = 0755;
- if (unit_has_name(u, "-.mount")) {
- /* Don't allow start/stop for root directory */
- u->refuse_manual_start = true;
- u->refuse_manual_stop = true;
- } else {
- /* The stdio/kmsg bridge socket is on /, in order to avoid a
- * dep loop, don't use kmsg logging for -.mount */
- m->exec_context.std_output = u->manager->default_std_output;
- m->exec_context.std_error = u->manager->default_std_error;
- }
-
/* We need to make sure that /usr/bin/mount is always called
* in the same process group as us, so that the autofs kernel
* side doesn't send us another mount request while we are
@@ -577,6 +566,25 @@ static int mount_add_extras(Mount *m) {
return 0;
}
+static int mount_load_root_mount(Unit *u) {
+ assert(u);
+
+ if (!unit_has_name(u, SPECIAL_ROOT_MOUNT))
+ return 0;
+
+ u->perpetual = true;
+ u->default_dependencies = false;
+
+ /* The stdio/kmsg bridge socket is on /, in order to avoid a dep loop, don't use kmsg logging for -.mount */
+ MOUNT(u)->exec_context.std_output = EXEC_OUTPUT_NULL;
+ MOUNT(u)->exec_context.std_input = EXEC_INPUT_NULL;
+
+ if (!u->description)
+ u->description = strdup("Root Mount");
+
+ return 1;
+}
+
static int mount_load(Unit *u) {
Mount *m = MOUNT(u);
int r;
@@ -584,11 +592,14 @@ static int mount_load(Unit *u) {
assert(u);
assert(u->load_state == UNIT_STUB);
- if (m->from_proc_self_mountinfo)
+ r = mount_load_root_mount(u);
+ if (r < 0)
+ return r;
+
+ if (m->from_proc_self_mountinfo || u->perpetual)
r = unit_load_fragment_and_dropin_optional(u);
else
r = unit_load_fragment_and_dropin(u);
-
if (r < 0)
return r;
@@ -1393,11 +1404,7 @@ static int mount_setup_unit(
if (!u) {
delete = true;
- u = unit_new(m, sizeof(Mount));
- if (!u)
- return log_oom();
-
- r = unit_add_name(u, e);
+ r = unit_new_for_name(m, sizeof(Mount), e, &u);
if (r < 0)
goto fail;
@@ -1592,11 +1599,46 @@ static int mount_get_timeout(Unit *u, usec_t *timeout) {
return 1;
}
+static int synthesize_root_mount(Manager *m) {
+ Unit *u;
+ int r;
+
+ assert(m);
+
+ /* Whatever happens, we know for sure that the root directory is around, and cannot go away. Let's
+ * unconditionally synthesize it here and mark it as perpetual. */
+
+ u = manager_get_unit(m, SPECIAL_ROOT_MOUNT);
+ if (!u) {
+ r = unit_new_for_name(m, sizeof(Mount), SPECIAL_ROOT_MOUNT, &u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate the special " SPECIAL_ROOT_MOUNT " unit: %m");
+ }
+
+ u->perpetual = true;
+ MOUNT(u)->deserialized_state = MOUNT_MOUNTED;
+
+ unit_add_to_load_queue(u);
+ unit_add_to_dbus_queue(u);
+
+ return 0;
+}
+
+static bool mount_is_mounted(Mount *m) {
+ assert(m);
+
+ return UNIT(m)->perpetual || m->is_mounted;
+}
+
static void mount_enumerate(Manager *m) {
int r;
assert(m);
+ r = synthesize_root_mount(m);
+ if (r < 0)
+ goto fail;
+
mnt_init_debug(0);
if (!m->mount_monitor) {
@@ -1703,7 +1745,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
LIST_FOREACH(units_by_type, u, m->units_by_type[UNIT_MOUNT]) {
Mount *mount = MOUNT(u);
- if (!mount->is_mounted) {
+ if (!mount_is_mounted(mount)) {
/* A mount point is not around right now. It
* might be gone, or might never have
@@ -1764,7 +1806,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
}
}
- if (mount->is_mounted &&
+ if (mount_is_mounted(mount) &&
mount->from_proc_self_mountinfo &&
mount->parameters_proc_self_mountinfo.what) {
diff --git a/src/core/scope.c b/src/core/scope.c
index af0c43c7da..d6e1f8e392 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -154,15 +154,13 @@ static int scope_load_init_scope(Unit *u) {
return 0;
u->transient = true;
- u->no_gc = true;
+ u->perpetual = true;
/* init.scope is a bit special, as it has to stick around forever. Because of its special semantics we
* synthesize it here, instead of relying on the unit file on disk. */
u->default_dependencies = false;
u->ignore_on_isolate = true;
- u->refuse_manual_start = true;
- u->refuse_manual_stop = true;
SCOPE(u)->kill_context.kill_signal = SIGRTMIN+14;
@@ -565,22 +563,15 @@ static void scope_enumerate(Manager *m) {
u = manager_get_unit(m, SPECIAL_INIT_SCOPE);
if (!u) {
- u = unit_new(m, sizeof(Scope));
- if (!u) {
- log_oom();
- return;
- }
-
- r = unit_add_name(u, SPECIAL_INIT_SCOPE);
+ r = unit_new_for_name(m, sizeof(Scope), SPECIAL_INIT_SCOPE, &u);
if (r < 0) {
- unit_free(u);
- log_error_errno(r, "Failed to add the " SPECIAL_INIT_SCOPE " name: %m");
+ log_error_errno(r, "Failed to allocate the special " SPECIAL_INIT_SCOPE " unit: %m");
return;
}
}
u->transient = true;
- u->no_gc = true;
+ u->perpetual = true;
SCOPE(u)->deserialized_state = SCOPE_RUNNING;
unit_add_to_load_queue(u);
diff --git a/src/core/service.c b/src/core/service.c
index ee4f4983fc..a7274a758f 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -289,7 +289,17 @@ static void service_fd_store_unlink(ServiceFDStore *fs) {
free(fs);
}
-static void service_release_resources(Unit *u) {
+static void service_release_fd_store(Service *s) {
+ assert(s);
+
+ log_unit_debug(UNIT(s), "Releasing all stored fds");
+ while (s->fd_store)
+ service_fd_store_unlink(s->fd_store);
+
+ assert(s->n_fd_store == 0);
+}
+
+static void service_release_resources(Unit *u, bool inactive) {
Service *s = SERVICE(u);
assert(s);
@@ -297,16 +307,14 @@ static void service_release_resources(Unit *u) {
if (!s->fd_store && s->stdin_fd < 0 && s->stdout_fd < 0 && s->stderr_fd < 0)
return;
- log_unit_debug(u, "Releasing all resources.");
+ log_unit_debug(u, "Releasing resources.");
s->stdin_fd = safe_close(s->stdin_fd);
s->stdout_fd = safe_close(s->stdout_fd);
s->stderr_fd = safe_close(s->stderr_fd);
- while (s->fd_store)
- service_fd_store_unlink(s->fd_store);
-
- assert(s->n_fd_store == 0);
+ if (inactive)
+ service_release_fd_store(s);
}
static void service_done(Unit *u) {
@@ -350,7 +358,7 @@ static void service_done(Unit *u) {
s->timer_event_source = sd_event_source_unref(s->timer_event_source);
- service_release_resources(u);
+ service_release_resources(u, true);
}
static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
@@ -360,6 +368,10 @@ static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *us
assert(fs);
/* If we get either EPOLLHUP or EPOLLERR, it's time to remove this entry from the fd store */
+ log_unit_debug(UNIT(fs->service),
+ "Received %s on stored fd %d (%s), closing.",
+ revents & EPOLLERR ? "EPOLLERR" : "EPOLLHUP",
+ fs->fd, strna(fs->fdname));
service_fd_store_unlink(fs);
return 0;
}
@@ -368,20 +380,23 @@ static int service_add_fd_store(Service *s, int fd, const char *name) {
ServiceFDStore *fs;
int r;
+ /* fd is always consumed if we return >= 0 */
+
assert(s);
assert(fd >= 0);
if (s->n_fd_store >= s->n_fd_store_max)
- return 0;
+ return -EXFULL; /* Our store is full.
+ * Use this errno rather than E[NM]FILE to distinguish from
+ * the case where systemd itself hits the file limit. */
LIST_FOREACH(fd_store, fs, s->fd_store) {
r = same_fd(fs->fd, fd);
if (r < 0)
return r;
if (r > 0) {
- /* Already included */
safe_close(fd);
- return 1;
+ return 0; /* fd already included */
}
}
@@ -409,7 +424,7 @@ static int service_add_fd_store(Service *s, int fd, const char *name) {
LIST_PREPEND(fd_store, s->fd_store, fs);
s->n_fd_store++;
- return 1;
+ return 1; /* fd newly stored */
}
static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
@@ -417,10 +432,7 @@ static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
assert(s);
- if (fdset_size(fds) <= 0)
- return 0;
-
- while (s->n_fd_store < s->n_fd_store_max) {
+ while (fdset_size(fds) > 0) {
_cleanup_close_ int fd = -1;
fd = fdset_steal_first(fds);
@@ -428,17 +440,17 @@ static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
break;
r = service_add_fd_store(s, fd, name);
+ if (r == -EXFULL)
+ return log_unit_warning_errno(UNIT(s), r,
+ "Cannot store more fds than FileDescriptorStoreMax=%u, closing remaining.",
+ s->n_fd_store_max);
if (r < 0)
- return log_unit_error_errno(UNIT(s), r, "Couldn't add fd to fd store: %m");
- if (r > 0) {
- log_unit_debug(UNIT(s), "Added fd to fd store.");
- fd = -1;
- }
+ return log_unit_error_errno(UNIT(s), r, "Failed to add fd to store: %m");
+ if (r > 0)
+ log_unit_debug(UNIT(s), "Added fd %u (%s) to fd store.", fd, strna(name));
+ fd = -1;
}
- if (fdset_size(fds) > 0)
- log_unit_warning(UNIT(s), "Tried to store more fds than FileDescriptorStoreMax=%u allows, closing remaining.", s->n_fd_store_max);
-
return 0;
}
@@ -1225,6 +1237,7 @@ static int service_spawn(
return r;
n_fds = r;
+ log_unit_debug(UNIT(s), "Passing %i fds to service", n_fds);
}
r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), timeout));
@@ -2336,7 +2349,7 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
r = service_add_fd_store(s, fd, t);
if (r < 0)
log_unit_error_errno(u, r, "Failed to add fd to store: %m");
- else if (r > 0)
+ else
fdset_remove(fds, fd);
}
diff --git a/src/core/slice.c b/src/core/slice.c
index 0fef29661f..ed5d3fd701 100644
--- a/src/core/slice.c
+++ b/src/core/slice.c
@@ -136,15 +136,13 @@ static int slice_load_root_slice(Unit *u) {
if (!unit_has_name(u, SPECIAL_ROOT_SLICE))
return 0;
- u->no_gc = true;
+ u->perpetual = true;
/* The root slice is a bit special. For example it is always running and cannot be terminated. Because of its
* special semantics we synthesize it here, instead of relying on the unit file on disk. */
u->default_dependencies = false;
u->ignore_on_isolate = true;
- u->refuse_manual_start = true;
- u->refuse_manual_stop = true;
if (!u->description)
u->description = strdup("Root Slice");
@@ -301,21 +299,14 @@ static void slice_enumerate(Manager *m) {
u = manager_get_unit(m, SPECIAL_ROOT_SLICE);
if (!u) {
- u = unit_new(m, sizeof(Slice));
- if (!u) {
- log_oom();
- return;
- }
-
- r = unit_add_name(u, SPECIAL_ROOT_SLICE);
+ r = unit_new_for_name(m, sizeof(Slice), SPECIAL_ROOT_SLICE, &u);
if (r < 0) {
- unit_free(u);
- log_error_errno(r, "Failed to add the "SPECIAL_ROOT_SLICE " name: %m");
+ log_error_errno(r, "Failed to allocate the special " SPECIAL_ROOT_SLICE " unit: %m");
return;
}
}
- u->no_gc = true;
+ u->perpetual = true;
SLICE(u)->deserialized_state = SLICE_ACTIVE;
unit_add_to_load_queue(u);
diff --git a/src/core/swap.c b/src/core/swap.c
index b592abb9fb..2228a254bb 100644
--- a/src/core/swap.c
+++ b/src/core/swap.c
@@ -381,11 +381,7 @@ static int swap_setup_unit(
if (!u) {
delete = true;
- u = unit_new(m, sizeof(Swap));
- if (!u)
- return log_oom();
-
- r = unit_add_name(u, e);
+ r = unit_new_for_name(m, sizeof(Swap), e, &u);
if (r < 0)
goto fail;
diff --git a/src/core/unit.c b/src/core/unit.c
index cabb1050a8..e664e23892 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -109,6 +109,24 @@ Unit *unit_new(Manager *m, size_t size) {
return u;
}
+int unit_new_for_name(Manager *m, size_t size, const char *name, Unit **ret) {
+ Unit *u;
+ int r;
+
+ u = unit_new(m, size);
+ if (!u)
+ return -ENOMEM;
+
+ r = unit_add_name(u, name);
+ if (r < 0) {
+ unit_free(u);
+ return r;
+ }
+
+ *ret = u;
+ return r;
+}
+
bool unit_has_name(Unit *u, const char *name) {
assert(u);
assert(name);
@@ -302,6 +320,7 @@ int unit_set_description(Unit *u, const char *description) {
bool unit_check_gc(Unit *u) {
UnitActiveState state;
+ bool inactive;
assert(u);
if (u->job)
@@ -311,19 +330,20 @@ bool unit_check_gc(Unit *u) {
return true;
state = unit_active_state(u);
+ inactive = state == UNIT_INACTIVE;
/* If the unit is inactive and failed and no job is queued for
* it, then release its runtime resources */
if (UNIT_IS_INACTIVE_OR_FAILED(state) &&
UNIT_VTABLE(u)->release_resources)
- UNIT_VTABLE(u)->release_resources(u);
+ UNIT_VTABLE(u)->release_resources(u, inactive);
/* But we keep the unit object around for longer when it is
* referenced or configured to not be gc'ed */
- if (state != UNIT_INACTIVE)
+ if (!inactive)
return true;
- if (u->no_gc)
+ if (u->perpetual)
return true;
if (u->refs)
@@ -924,6 +944,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
"%s\tGC Check Good: %s\n"
"%s\tNeed Daemon Reload: %s\n"
"%s\tTransient: %s\n"
+ "%s\tPerpetual: %s\n"
"%s\tSlice: %s\n"
"%s\tCGroup: %s\n"
"%s\tCGroup realized: %s\n"
@@ -942,6 +963,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
prefix, yes_no(unit_check_gc(u)),
prefix, yes_no(unit_need_daemon_reload(u)),
prefix, yes_no(u->transient),
+ prefix, yes_no(u->perpetual),
prefix, strna(unit_slice_name(u)),
prefix, strna(u->cgroup_path),
prefix, yes_no(u->cgroup_realized),
@@ -1450,7 +1472,7 @@ static void unit_status_log_starting_stopping_reloading(Unit *u, JobType t) {
format = unit_get_status_message_format(u, t);
DISABLE_WARNING_FORMAT_NONLITERAL;
- xsprintf(buf, format, unit_description(u));
+ snprintf(buf, sizeof buf, format, unit_description(u));
REENABLE_WARNING;
mid = t == JOB_START ? SD_MESSAGE_UNIT_STARTING :
@@ -1616,6 +1638,18 @@ int unit_stop(Unit *u) {
return UNIT_VTABLE(u)->stop(u);
}
+bool unit_can_stop(Unit *u) {
+ assert(u);
+
+ if (!unit_supported(u))
+ return false;
+
+ if (u->perpetual)
+ return false;
+
+ return !!UNIT_VTABLE(u)->stop;
+}
+
/* Errors:
* -EBADR: This unit type does not support reloading.
* -ENOEXEC: Unit is not started.
@@ -2150,13 +2184,20 @@ bool unit_job_is_applicable(Unit *u, JobType j) {
case JOB_VERIFY_ACTIVE:
case JOB_START:
- case JOB_STOP:
case JOB_NOP:
+ /* Note that we don't check unit_can_start() here. That's because .device units and suchlike are not
+ * startable by us but may appear due to external events, and it thus makes sense to permit enqueing
+ * jobs for it. */
return true;
+ case JOB_STOP:
+ /* Similar as above. However, perpetual units can never be stopped (neither explicitly nor due to
+ * external events), hence it makes no sense to permit enqueing such a request either. */
+ return !u->perpetual;
+
case JOB_RESTART:
case JOB_TRY_RESTART:
- return unit_can_start(u);
+ return unit_can_stop(u) && unit_can_start(u);
case JOB_RELOAD:
case JOB_TRY_RELOAD:
diff --git a/src/core/unit.h b/src/core/unit.h
index adcdee6db6..991543664b 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -236,6 +236,9 @@ struct Unit {
/* Is this a transient unit? */
bool transient;
+ /* Is this a unit that is always running and cannot be stopped? */
+ bool perpetual;
+
bool in_load_queue:1;
bool in_dbus_queue:1;
bool in_cleanup_queue:1;
@@ -244,8 +247,6 @@ struct Unit {
bool sent_dbus_new_signal:1;
- bool no_gc:1;
-
bool in_audit:1;
bool cgroup_realized:1;
@@ -372,7 +373,7 @@ struct UnitVTable {
/* When the unit is not running and no job for it queued we
* shall release its runtime resources */
- void (*release_resources)(Unit *u);
+ void (*release_resources)(Unit *u, bool inactive);
/* Invoked on every child that died */
void (*sigchld_event)(Unit *u, pid_t pid, int code, int status);
@@ -480,6 +481,7 @@ DEFINE_CAST(SCOPE, Scope);
Unit *unit_new(Manager *m, size_t size);
void unit_free(Unit *u);
+int unit_new_for_name(Manager *m, size_t size, const char *name, Unit **ret);
int unit_add_name(Unit *u, const char *name);
int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_reference);
@@ -524,6 +526,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix);
bool unit_can_reload(Unit *u) _pure_;
bool unit_can_start(Unit *u) _pure_;
+bool unit_can_stop(Unit *u) _pure_;
bool unit_can_isolate(Unit *u) _pure_;
int unit_start(Unit *u);
diff --git a/src/libsystemd/sd-bus/bus-kernel.c b/src/libsystemd/sd-bus/bus-kernel.c
index 59398b841d..ad468572f3 100644
--- a/src/libsystemd/sd-bus/bus-kernel.c
+++ b/src/libsystemd/sd-bus/bus-kernel.c
@@ -848,8 +848,7 @@ static int bus_kernel_make_message(sd_bus *bus, struct kdbus_msg *k) {
if (k->src_id == KDBUS_SRC_ID_KERNEL)
bus_message_set_sender_driver(bus, m);
else {
- xsprintf(m->sender_buffer, ":1.%llu",
- (unsigned long long)k->src_id);
+ xsprintf(m->sender_buffer, ":1.%"PRIu64, k->src_id);
m->sender = m->creds.unique_name = m->sender_buffer;
}
@@ -860,8 +859,7 @@ static int bus_kernel_make_message(sd_bus *bus, struct kdbus_msg *k) {
else if (k->dst_id == KDBUS_DST_ID_NAME)
m->destination = bus->unique_name; /* fill in unique name if the well-known name is missing */
else {
- xsprintf(m->destination_buffer, ":1.%llu",
- (unsigned long long)k->dst_id);
+ xsprintf(m->destination_buffer, ":1.%"PRIu64, k->dst_id);
m->destination = m->destination_buffer;
}
diff --git a/src/network/networkd-link.c b/src/network/networkd-link.c
index d9e060b6cf..aefe7335b9 100644
--- a/src/network/networkd-link.c
+++ b/src/network/networkd-link.c
@@ -514,13 +514,12 @@ static void link_free(Link *link) {
sd_lldp_unref(link->lldp);
free(link->lldp_file);
+ ndisc_flush(link);
+
sd_ipv4ll_unref(link->ipv4ll);
sd_dhcp6_client_unref(link->dhcp6_client);
sd_ndisc_unref(link->ndisc);
- set_free_free(link->ndisc_rdnss);
- set_free_free(link->ndisc_dnssl);
-
if (link->manager)
hashmap_remove(link->manager->links, INT_TO_PTR(link->ifindex));
@@ -2427,6 +2426,8 @@ static int link_drop_config(Link *link) {
return r;
}
+ ndisc_flush(link);
+
return 0;
}
diff --git a/src/network/networkd-ndisc.c b/src/network/networkd-ndisc.c
index b282634e4b..4853791aa5 100644
--- a/src/network/networkd-ndisc.c
+++ b/src/network/networkd-ndisc.c
@@ -680,13 +680,22 @@ void ndisc_vacuum(Link *link) {
SET_FOREACH(r, link->ndisc_rdnss, i)
if (r->valid_until < time_now) {
- (void) set_remove(link->ndisc_rdnss, r);
+ free(set_remove(link->ndisc_rdnss, r));
link_dirty(link);
}
SET_FOREACH(d, link->ndisc_dnssl, i)
if (d->valid_until < time_now) {
- (void) set_remove(link->ndisc_dnssl, d);
+ free(set_remove(link->ndisc_dnssl, d));
link_dirty(link);
}
}
+
+void ndisc_flush(Link *link) {
+ assert(link);
+
+ /* Removes all RDNSS and DNSSL entries, without exception */
+
+ link->ndisc_rdnss = set_free_free(link->ndisc_rdnss);
+ link->ndisc_dnssl = set_free_free(link->ndisc_dnssl);
+}
diff --git a/src/network/networkd-ndisc.h b/src/network/networkd-ndisc.h
index 2002f55107..127126190e 100644
--- a/src/network/networkd-ndisc.h
+++ b/src/network/networkd-ndisc.h
@@ -37,3 +37,4 @@ static inline char* NDISC_DNSSL_DOMAIN(const NDiscDNSSL *n) {
int ndisc_configure(Link *link);
void ndisc_vacuum(Link *link);
+void ndisc_flush(Link *link);
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 295293858e..c56af6e6f4 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -2260,7 +2260,7 @@ static int dissect_image(
static int mount_device(const char *what, const char *where, const char *directory, bool rw) {
#ifdef HAVE_BLKID
_cleanup_blkid_free_probe_ blkid_probe b = NULL;
- const char *fstype, *p;
+ const char *fstype, *p, *options;
int r;
assert(what);
@@ -2309,7 +2309,17 @@ static int mount_device(const char *what, const char *where, const char *directo
return -EOPNOTSUPP;
}
- return mount_verbose(LOG_ERR, what, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), NULL);
+ /* If this is a loopback device then let's mount the image with discard, so that the underlying file remains
+ * sparse when possible. */
+ if (STR_IN_SET(fstype, "btrfs", "ext4", "vfat", "xfs")) {
+ const char *l;
+
+ l = path_startswith(what, "/dev");
+ if (l && startswith(l, "loop"))
+ options = "discard";
+ }
+
+ return mount_verbose(LOG_ERR, what, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
#else
log_error("--image= is not supported, compiled without blkid support.");
return -EOPNOTSUPP;
diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c
index 6252cd16a6..c9b24f1065 100644
--- a/src/shared/seccomp-util.c
+++ b/src/shared/seccomp-util.c
@@ -29,23 +29,49 @@
#include "util.h"
const char* seccomp_arch_to_string(uint32_t c) {
+ /* Maintain order used in <seccomp.h>.
+ *
+ * Names used here should be the same as those used for ConditionArchitecture=,
+ * except for "subarchitectures" like x32. */
- if (c == SCMP_ARCH_NATIVE)
+ switch(c) {
+ case SCMP_ARCH_NATIVE:
return "native";
- if (c == SCMP_ARCH_X86)
+ case SCMP_ARCH_X86:
return "x86";
- if (c == SCMP_ARCH_X86_64)
+ case SCMP_ARCH_X86_64:
return "x86-64";
- if (c == SCMP_ARCH_X32)
+ case SCMP_ARCH_X32:
return "x32";
- if (c == SCMP_ARCH_ARM)
+ case SCMP_ARCH_ARM:
return "arm";
- if (c == SCMP_ARCH_S390)
+ case SCMP_ARCH_AARCH64:
+ return "arm64";
+ case SCMP_ARCH_MIPS:
+ return "mips";
+ case SCMP_ARCH_MIPS64:
+ return "mips64";
+ case SCMP_ARCH_MIPS64N32:
+ return "mips64-n32";
+ case SCMP_ARCH_MIPSEL:
+ return "mips-le";
+ case SCMP_ARCH_MIPSEL64:
+ return "mips64-le";
+ case SCMP_ARCH_MIPSEL64N32:
+ return "mips64-le-n32";
+ case SCMP_ARCH_PPC:
+ return "ppc";
+ case SCMP_ARCH_PPC64:
+ return "ppc64";
+ case SCMP_ARCH_PPC64LE:
+ return "ppc64-le";
+ case SCMP_ARCH_S390:
return "s390";
- if (c == SCMP_ARCH_S390X)
+ case SCMP_ARCH_S390X:
return "s390x";
-
- return NULL;
+ default:
+ return NULL;
+ }
}
int seccomp_arch_from_string(const char *n, uint32_t *ret) {
@@ -64,6 +90,26 @@ int seccomp_arch_from_string(const char *n, uint32_t *ret) {
*ret = SCMP_ARCH_X32;
else if (streq(n, "arm"))
*ret = SCMP_ARCH_ARM;
+ else if (streq(n, "arm64"))
+ *ret = SCMP_ARCH_AARCH64;
+ else if (streq(n, "mips"))
+ *ret = SCMP_ARCH_MIPS;
+ else if (streq(n, "mips64"))
+ *ret = SCMP_ARCH_MIPS64;
+ else if (streq(n, "mips64-n32"))
+ *ret = SCMP_ARCH_MIPS64N32;
+ else if (streq(n, "mips-le"))
+ *ret = SCMP_ARCH_MIPSEL;
+ else if (streq(n, "mips64-le"))
+ *ret = SCMP_ARCH_MIPSEL64;
+ else if (streq(n, "mips64-le-n32"))
+ *ret = SCMP_ARCH_MIPSEL64N32;
+ else if (streq(n, "ppc"))
+ *ret = SCMP_ARCH_PPC;
+ else if (streq(n, "ppc64"))
+ *ret = SCMP_ARCH_PPC64;
+ else if (streq(n, "ppc64-le"))
+ *ret = SCMP_ARCH_PPC64LE;
else if (streq(n, "s390"))
*ret = SCMP_ARCH_S390;
else if (streq(n, "s390x"))
@@ -101,41 +147,52 @@ finish:
return r;
}
-int seccomp_add_secondary_archs(scmp_filter_ctx c) {
-
-#if defined(__i386__) || defined(__x86_64__)
- int r;
+int seccomp_add_secondary_archs(scmp_filter_ctx ctx) {
/* Add in all possible secondary archs we are aware of that
* this kernel might support. */
- r = seccomp_arch_add(c, SCMP_ARCH_X86);
- if (r < 0 && r != -EEXIST)
- return r;
-
- r = seccomp_arch_add(c, SCMP_ARCH_X86_64);
- if (r < 0 && r != -EEXIST)
- return r;
-
- r = seccomp_arch_add(c, SCMP_ARCH_X32);
- if (r < 0 && r != -EEXIST)
- return r;
+ static const int seccomp_arches[] = {
+#if defined(__i386__) || defined(__x86_64__)
+ SCMP_ARCH_X86,
+ SCMP_ARCH_X86_64,
+ SCMP_ARCH_X32,
+
+#elif defined(__arm__) || defined(__aarch64__)
+ SCMP_ARCH_ARM,
+ SCMP_ARCH_AARCH64,
+
+#elif defined(__arm__) || defined(__aarch64__)
+ SCMP_ARCH_ARM,
+ SCMP_ARCH_AARCH64,
+
+#elif defined(__mips__) || defined(__mips64__)
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPS64,
+ SCMP_ARCH_MIPS64N32,
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPSEL64,
+ SCMP_ARCH_MIPSEL64N32,
+
+#elif defined(__powerpc__) || defined(__powerpc64__)
+ SCMP_ARCH_PPC,
+ SCMP_ARCH_PPC64,
+ SCMP_ARCH_PPC64LE,
#elif defined(__s390__) || defined(__s390x__)
- int r;
-
- /* Add in all possible secondary archs we are aware of that
- * this kernel might support. */
-
- r = seccomp_arch_add(c, SCMP_ARCH_S390);
- if (r < 0 && r != -EEXIST)
- return r;
+ SCMP_ARCH_S390,
+ SCMP_ARCH_S390X,
+#endif
+ };
- r = seccomp_arch_add(c, SCMP_ARCH_S390X);
- if (r < 0 && r != -EEXIST)
- return r;
+ unsigned i;
+ int r;
-#endif
+ for (i = 0; i < ELEMENTSOF(seccomp_arches); i++) {
+ r = seccomp_arch_add(ctx, seccomp_arches[i]);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
return 0;
}
@@ -160,6 +217,24 @@ bool is_seccomp_available(void) {
}
const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
+ [SYSCALL_FILTER_SET_BASIC_IO] = {
+ /* Basic IO */
+ .name = "@basic-io",
+ .value =
+ "close\0"
+ "dup2\0"
+ "dup3\0"
+ "dup\0"
+ "lseek\0"
+ "pread64\0"
+ "preadv\0"
+ "pwrite64\0"
+ "pwritev\0"
+ "read\0"
+ "readv\0"
+ "write\0"
+ "writev\0"
+ },
[SYSCALL_FILTER_SET_CLOCK] = {
/* Clock */
.name = "@clock",
@@ -196,15 +271,22 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"sys_debug_setcontext\0"
},
[SYSCALL_FILTER_SET_DEFAULT] = {
- /* Default list */
+ /* Default list: the most basic of operations */
.name = "@default",
.value =
+ "clock_getres\0"
+ "clock_gettime\0"
+ "clock_nanosleep\0"
"execve\0"
"exit\0"
"exit_group\0"
"getrlimit\0" /* make sure processes can query stack size and such */
+ "gettimeofday\0"
+ "nanosleep\0"
+ "pause\0"
"rt_sigreturn\0"
"sigreturn\0"
+ "time\0"
},
[SYSCALL_FILTER_SET_IO_EVENT] = {
/* Event loop use */
@@ -226,9 +308,10 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"select\0"
},
[SYSCALL_FILTER_SET_IPC] = {
- /* Message queues, SYSV IPC or other IPC: unusual */
+ /* Message queues, SYSV IPC or other IPC */
.name = "@ipc",
.value = "ipc\0"
+ "memfd_create\0"
"mq_getsetattr\0"
"mq_notify\0"
"mq_open\0"
@@ -239,6 +322,8 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"msgget\0"
"msgrcv\0"
"msgsnd\0"
+ "pipe2\0"
+ "pipe\0"
"process_vm_readv\0"
"process_vm_writev\0"
"semctl\0"
@@ -379,7 +464,6 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
.value =
"arch_prctl\0"
"clone\0"
- "execve\0"
"execveat\0"
"fork\0"
"kill\0"
@@ -406,6 +490,22 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"s390_pci_mmio_write\0"
#endif
},
+ [SYSCALL_FILTER_SET_RESOURCES] = {
+ /* Alter resource settings */
+ .name = "@resources",
+ .value =
+ "sched_setparam\0"
+ "sched_setscheduler\0"
+ "sched_setaffinity\0"
+ "setpriority\0"
+ "setrlimit\0"
+ "set_mempolicy\0"
+ "migrate_pages\0"
+ "move_pages\0"
+ "mbind\0"
+ "sched_setattr\0"
+ "prlimit64\0"
+ },
};
const SyscallFilterSet *syscall_filter_set_find(const char *name) {
diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h
index 8050fc6fbf..8e209efef2 100644
--- a/src/shared/seccomp-util.h
+++ b/src/shared/seccomp-util.h
@@ -38,6 +38,7 @@ typedef struct SyscallFilterSet {
} SyscallFilterSet;
enum {
+ SYSCALL_FILTER_SET_BASIC_IO,
SYSCALL_FILTER_SET_CLOCK,
SYSCALL_FILTER_SET_CPU_EMULATION,
SYSCALL_FILTER_SET_DEBUG,
@@ -52,6 +53,7 @@ enum {
SYSCALL_FILTER_SET_PRIVILEGED,
SYSCALL_FILTER_SET_PROCESS,
SYSCALL_FILTER_SET_RAW_IO,
+ SYSCALL_FILTER_SET_RESOURCES,
_SYSCALL_FILTER_SET_MAX
};
diff --git a/src/shared/switch-root.c b/src/shared/switch-root.c
index 47d3a5a1fa..4eff4f692e 100644
--- a/src/shared/switch-root.c
+++ b/src/shared/switch-root.c
@@ -75,17 +75,29 @@ int switch_root(const char *new_root, const char *oldroot, bool detach_oldroot,
NULSTR_FOREACH(i, move_mounts) {
char new_mount[PATH_MAX];
struct stat sb;
+ size_t n;
- xsprintf(new_mount, "%s%s", new_root, i);
+ n = snprintf(new_mount, sizeof new_mount, "%s%s", new_root, i);
+ if (n >= sizeof new_mount) {
+ bool move = mountflags & MS_MOVE;
+
+ log_warning("New path is too long, %s: %s%s",
+ move ? "forcing unmount instead" : "ignoring",
+ new_root, i);
+
+ if (move)
+ if (umount2(i, MNT_FORCE) < 0)
+ log_warning_errno(errno, "Failed to unmount %s: %m", i);
+ continue;
+ }
mkdir_p_label(new_mount, 0755);
- if ((stat(new_mount, &sb) < 0) ||
+ if (stat(new_mount, &sb) < 0 ||
sb.st_dev != new_root_stat.st_dev) {
/* Mount point seems to be mounted already or
- * stat failed. Unmount the old mount
- * point. */
+ * stat failed. Unmount the old mount point. */
if (umount2(i, MNT_DETACH) < 0)
log_warning_errno(errno, "Failed to unmount %s: %m", i);
continue;
@@ -97,10 +109,9 @@ int switch_root(const char *new_root, const char *oldroot, bool detach_oldroot,
if (umount2(i, MNT_FORCE) < 0)
log_warning_errno(errno, "Failed to unmount %s: %m", i);
- }
- if (mountflags & MS_BIND)
- log_error_errno(errno, "Failed to bind mount %s to %s: %m", i, new_mount);
+ } else if (mountflags & MS_BIND)
+ log_error_errno(errno, "Failed to bind mount %s to %s: %m", i, new_mount);
}
}
diff --git a/src/sysctl/sysctl.c b/src/sysctl/sysctl.c
index fbc1e0eb1a..b3587e249d 100644
--- a/src/sysctl/sysctl.c
+++ b/src/sysctl/sysctl.c
@@ -51,19 +51,46 @@ static int apply_all(OrderedHashmap *sysctl_options) {
k = sysctl_write(property, value);
if (k < 0) {
- log_full_errno(k == -ENOENT ? LOG_INFO : LOG_WARNING, k,
- "Couldn't write '%s' to '%s', ignoring: %m", value, property);
-
- if (r == 0 && k != -ENOENT)
- r = k;
+ /* If the sysctl is not available in the kernel or we are running with reduced privileges and
+ * cannot write it, then log about the issue at LOG_NOTICE level, and proceed without
+ * failing. (EROFS is treated as a permission problem here, since that's how container managers
+ * usually protected their sysctls.) In all other cases log an error and make the tool fail. */
+
+ if (IN_SET(k, -EPERM, -EACCES, -EROFS, -ENOENT))
+ log_notice_errno(k, "Couldn't write '%s' to '%s', ignoring: %m", value, property);
+ else {
+ log_error_errno(k, "Couldn't write '%s' to '%s': %m", value, property);
+ if (r == 0)
+ r = k;
+ }
}
}
return r;
}
+static bool test_prefix(const char *p) {
+ char **i;
+
+ if (strv_isempty(arg_prefixes))
+ return true;
+
+ STRV_FOREACH(i, arg_prefixes) {
+ const char *t;
+
+ t = path_startswith(*i, "/proc/sys/");
+ if (!t)
+ t = *i;
+ if (path_startswith(p, t))
+ return true;
+ }
+
+ return false;
+}
+
static int parse_file(OrderedHashmap *sysctl_options, const char *path, bool ignore_enoent) {
_cleanup_fclose_ FILE *f = NULL;
+ unsigned c = 0;
int r;
assert(path);
@@ -77,7 +104,7 @@ static int parse_file(OrderedHashmap *sysctl_options, const char *path, bool ign
}
log_debug("Parsing %s", path);
- while (!feof(f)) {
+ for (;;) {
char l[LINE_MAX], *p, *value, *new_value, *property, *existing;
void *v;
int k;
@@ -89,6 +116,8 @@ static int parse_file(OrderedHashmap *sysctl_options, const char *path, bool ign
return log_error_errno(errno, "Failed to read file '%s', ignoring: %m", path);
}
+ c++;
+
p = strstrip(l);
if (!*p)
continue;
@@ -98,7 +127,7 @@ static int parse_file(OrderedHashmap *sysctl_options, const char *path, bool ign
value = strchr(p, '=');
if (!value) {
- log_error("Line is not an assignment in file '%s': %s", path, value);
+ log_error("Line is not an assignment at '%s:%u': %s", path, c, value);
if (r == 0)
r = -EINVAL;
@@ -111,26 +140,15 @@ static int parse_file(OrderedHashmap *sysctl_options, const char *path, bool ign
p = sysctl_normalize(strstrip(p));
value = strstrip(value);
- if (!strv_isempty(arg_prefixes)) {
- char **i, *t;
- STRV_FOREACH(i, arg_prefixes) {
- t = path_startswith(*i, "/proc/sys/");
- if (t == NULL)
- t = *i;
- if (path_startswith(p, t))
- goto found;
- }
- /* not found */
+ if (!test_prefix(p))
continue;
- }
-found:
existing = ordered_hashmap_get2(sysctl_options, p, &v);
if (existing) {
if (streq(value, existing))
continue;
- log_debug("Overwriting earlier assignment of %s in file '%s'.", p, path);
+ log_debug("Overwriting earlier assignment of %s at '%s:%u'.", p, path, c);
free(ordered_hashmap_remove(sysctl_options, p));
free(v);
}
@@ -229,12 +247,12 @@ static int parse_argv(int argc, char *argv[]) {
}
int main(int argc, char *argv[]) {
+ OrderedHashmap *sysctl_options = NULL;
int r = 0, k;
- OrderedHashmap *sysctl_options;
r = parse_argv(argc, argv);
if (r <= 0)
- return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+ goto finish;
log_set_target(LOG_TARGET_AUTO);
log_parse_environment();
diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c
index d311bbec1a..dd3b931cd6 100644
--- a/src/systemctl/systemctl.c
+++ b/src/systemctl/systemctl.c
@@ -410,23 +410,24 @@ static bool output_show_unit(const UnitInfo *u, char **patterns) {
}
static int output_units_list(const UnitInfo *unit_infos, unsigned c) {
- unsigned circle_len = 0, id_len, max_id_len, load_len, active_len, sub_len, job_len;
+ unsigned circle_len = 0, id_len, max_id_len, load_len, active_len, sub_len, job_len, desc_len, max_desc_len;
const UnitInfo *u;
unsigned n_shown = 0;
- int job_count = 0, desc_len;
+ int job_count = 0;
max_id_len = strlen("UNIT");
load_len = strlen("LOAD");
active_len = strlen("ACTIVE");
sub_len = strlen("SUB");
job_len = strlen("JOB");
- desc_len = 0;
+ max_desc_len = strlen("DESCRIPTION");
for (u = unit_infos; u < unit_infos + c; u++) {
max_id_len = MAX(max_id_len, strlen(u->id) + (u->machine ? strlen(u->machine)+1 : 0));
load_len = MAX(load_len, strlen(u->load_state));
active_len = MAX(active_len, strlen(u->active_state));
sub_len = MAX(sub_len, strlen(u->sub_state));
+ max_desc_len = MAX(max_desc_len, strlen(u->description));
if (u->job_id != 0) {
job_len = MAX(job_len, strlen(u->job_type));
@@ -442,7 +443,7 @@ static int output_units_list(const UnitInfo *unit_infos, unsigned c) {
if (!arg_full && original_stdout_is_tty) {
unsigned basic_len;
- id_len = MIN(max_id_len, 25u);
+ id_len = MIN(max_id_len, 25u); /* as much as it needs, but at most 25 for now */
basic_len = circle_len + 5 + id_len + 5 + active_len + sub_len;
if (job_count)
@@ -455,19 +456,21 @@ static int output_units_list(const UnitInfo *unit_infos, unsigned c) {
/* Either UNIT already got 25, or is fully satisfied.
* Grant up to 25 to DESC now. */
incr = MIN(extra_len, 25u);
- desc_len += incr;
+ desc_len = incr;
extra_len -= incr;
- /* split the remaining space between UNIT and DESC,
- * but do not give UNIT more than it needs. */
+ /* Of the remainder give as much as the ID needs to the ID, and give the rest to the
+ * description but not more than it needs. */
if (extra_len > 0) {
- incr = MIN(extra_len / 2, max_id_len - id_len);
+ incr = MIN(max_id_len - id_len, extra_len);
id_len += incr;
- desc_len += extra_len - incr;
+ desc_len += MIN(extra_len - incr, max_desc_len - desc_len);
}
}
- } else
+ } else {
id_len = max_id_len;
+ desc_len = max_desc_len;
+ }
for (u = unit_infos; u < unit_infos + c; u++) {
_cleanup_free_ char *e = NULL, *j = NULL;
@@ -493,8 +496,9 @@ static int output_units_list(const UnitInfo *unit_infos, unsigned c) {
if (job_count)
printf("%-*s ", job_len, "JOB");
- printf("%.*s%s\n",
- !arg_full && arg_no_pager ? desc_len : -1,
+ printf("%-*.*s%s\n",
+ desc_len,
+ !arg_full && arg_no_pager ? (int) desc_len : -1,
"DESCRIPTION",
ansi_normal());
}
@@ -513,13 +517,13 @@ static int output_units_list(const UnitInfo *unit_infos, unsigned c) {
off_circle = ansi_normal();
circle = true;
on_loaded = underline ? ansi_highlight_red_underline() : ansi_highlight_red();
- off_loaded = on_underline;
+ off_loaded = underline ? on_underline : ansi_normal();
} else if (streq(u->active_state, "failed") && !arg_plain) {
on_circle = ansi_highlight_red();
off_circle = ansi_normal();
circle = true;
on_active = underline ? ansi_highlight_red_underline() : ansi_highlight_red();
- off_active = on_underline;
+ off_active = underline ? on_underline : ansi_normal();
}
if (u->machine) {
@@ -550,8 +554,9 @@ static int output_units_list(const UnitInfo *unit_infos, unsigned c) {
sub_len, u->sub_state, off_active,
job_count ? job_len + 1 : 0, u->job_id ? u->job_type : "");
- printf("%.*s%s\n",
- desc_len > 0 ? desc_len : -1,
+ printf("%-*.*s%s\n",
+ desc_len,
+ !arg_full && arg_no_pager ? (int) desc_len : -1,
u->description,
off_underline);
}
@@ -5272,7 +5277,7 @@ static int cat(int argc, char *argv[], void *userdata) {
else
puts("");
- if (need_daemon_reload(bus, *name))
+ if (need_daemon_reload(bus, *name) > 0) /* ignore errors (<0), this is informational output */
fprintf(stderr,
"%s# Warning: %s changed on disk, the version systemd has loaded is outdated.\n"
"%s# This output shows the current version of the unit's original fragment and drop-in files.\n"
diff --git a/src/test/test-seccomp.c b/src/test/test-seccomp.c
index 0060ecdf02..43d1567288 100644
--- a/src/test/test-seccomp.c
+++ b/src/test/test-seccomp.c
@@ -25,6 +25,8 @@
#include "macro.h"
#include "process-util.h"
#include "seccomp-util.h"
+#include "string-util.h"
+#include "util.h"
static void test_seccomp_arch_to_string(void) {
uint32_t a, b;
@@ -38,6 +40,36 @@ static void test_seccomp_arch_to_string(void) {
assert_se(a == b);
}
+static void test_architecture_table(void) {
+ const char *n, *n2;
+
+ NULSTR_FOREACH(n,
+ "native\0"
+ "x86\0"
+ "x86-64\0"
+ "x32\0"
+ "arm\0"
+ "arm64\0"
+ "mips\0"
+ "mips64\0"
+ "mips64-n32\0"
+ "mips-le\0"
+ "mips64-le\0"
+ "mips64-le-n32\0"
+ "ppc\0"
+ "ppc64\0"
+ "ppc64-le\0"
+ "s390\0"
+ "s390x\0") {
+ uint32_t c;
+
+ assert_se(seccomp_arch_from_string(n, &c) >= 0);
+ n2 = seccomp_arch_to_string(c);
+ log_info("seccomp-arch: %s → 0x%"PRIx32" → %s", n, c, n2);
+ assert_se(streq_ptr(n, n2));
+ }
+}
+
static void test_syscall_filter_set_find(void) {
assert_se(!syscall_filter_set_find(NULL));
assert_se(!syscall_filter_set_find(""));
@@ -96,6 +128,7 @@ static void test_filter_sets(void) {
int main(int argc, char *argv[]) {
test_seccomp_arch_to_string();
+ test_architecture_table();
test_syscall_filter_set_find();
test_filter_sets();
diff --git a/src/test/test-unit-file.c b/src/test/test-unit-file.c
index 7ef087a2e3..12f48bf435 100644
--- a/src/test/test-unit-file.c
+++ b/src/test/test-unit-file.c
@@ -589,7 +589,7 @@ static void test_install_printf(void) {
assert_se(specifier_machine_id('m', NULL, NULL, &mid) >= 0 && mid);
assert_se(specifier_boot_id('b', NULL, NULL, &bid) >= 0 && bid);
assert_se((host = gethostname_malloc()));
- assert_se((user = getusername_malloc()));
+ assert_se((user = uid_to_name(getuid())));
assert_se(asprintf(&uid, UID_FMT, getuid()) >= 0);
#define expect(src, pattern, result) \
diff --git a/src/udev/collect/collect.c b/src/udev/collect/collect.c
index 349585b634..0e973cd521 100644
--- a/src/udev/collect/collect.c
+++ b/src/udev/collect/collect.c
@@ -85,16 +85,16 @@ static void usage(void)
*/
static int prepare(char *dir, char *filename)
{
- char buf[512];
+ char buf[PATH_MAX];
int r, fd;
r = mkdir(dir, 0700);
if (r < 0 && errno != EEXIST)
return -errno;
- xsprintf(buf, "%s/%s", dir, filename);
+ snprintf(buf, sizeof buf, "%s/%s", dir, filename);
- fd = open(buf,O_RDWR|O_CREAT|O_CLOEXEC, S_IRUSR|S_IWUSR);
+ fd = open(buf, O_RDWR|O_CREAT|O_CLOEXEC, S_IRUSR|S_IWUSR);
if (fd < 0)
fprintf(stderr, "Cannot open %s: %m\n", buf);
diff --git a/src/udev/udev-builtin-net_id.c b/src/udev/udev-builtin-net_id.c
index a7be2a4eed..fe9d6f4482 100644
--- a/src/udev/udev-builtin-net_id.c
+++ b/src/udev/udev-builtin-net_id.c
@@ -35,10 +35,12 @@
* Type of names:
* b<number> — BCMA bus core number
* c<bus_id> — CCW bus group name, without leading zeros [s390]
- * o<index>[d<dev_port>] — on-board device index number
- * s<slot>[f<function>][d<dev_port>] — hotplug slot index number
+ * o<index>[n<phys_port_name>|d<dev_port>]
+ * — on-board device index number
+ * s<slot>[f<function>][n<phys_port_name>|d<dev_port>]
+ * — hotplug slot index number
* x<MAC> — MAC address
- * [P<domain>]p<bus>s<slot>[f<function>][d<dev_port>]
+ * [P<domain>]p<bus>s<slot>[f<function>][n<phys_port_name>|d<dev_port>]
* — PCI geographical location
* [P<domain>]p<bus>s<slot>[f<function>][u<port>][..][c<config>][i<interface>]
* — USB port number chain
@@ -137,7 +139,7 @@ static int dev_pci_onboard(struct udev_device *dev, struct netnames *names) {
unsigned dev_port = 0;
size_t l;
char *s;
- const char *attr;
+ const char *attr, *port_name;
int idx;
/* ACPI _DSM — device specific method for naming a PCI or PCI Express device */
@@ -164,10 +166,15 @@ static int dev_pci_onboard(struct udev_device *dev, struct netnames *names) {
if (attr)
dev_port = strtol(attr, NULL, 10);
+ /* kernel provided front panel port name for multiple port PCI device */
+ port_name = udev_device_get_sysattr_value(dev, "phys_port_name");
+
s = names->pci_onboard;
l = sizeof(names->pci_onboard);
l = strpcpyf(&s, l, "o%d", idx);
- if (dev_port > 0)
+ if (port_name)
+ l = strpcpyf(&s, l, "n%s", port_name);
+ else if (dev_port > 0)
l = strpcpyf(&s, l, "d%d", dev_port);
if (l == 0)
names->pci_onboard[0] = '\0';
@@ -202,9 +209,9 @@ static int dev_pci_slot(struct udev_device *dev, struct netnames *names) {
unsigned domain, bus, slot, func, dev_port = 0;
size_t l;
char *s;
- const char *attr;
+ const char *attr, *port_name;
struct udev_device *pci = NULL;
- char slots[256], str[256];
+ char slots[PATH_MAX];
_cleanup_closedir_ DIR *dir = NULL;
struct dirent *dent;
int hotplug_slot = 0, err = 0;
@@ -217,6 +224,9 @@ static int dev_pci_slot(struct udev_device *dev, struct netnames *names) {
if (attr)
dev_port = strtol(attr, NULL, 10);
+ /* kernel provided front panel port name for multiple port PCI device */
+ port_name = udev_device_get_sysattr_value(dev, "phys_port_name");
+
/* compose a name based on the raw kernel's PCI bus, slot numbers */
s = names->pci_path;
l = sizeof(names->pci_path);
@@ -225,7 +235,9 @@ static int dev_pci_slot(struct udev_device *dev, struct netnames *names) {
l = strpcpyf(&s, l, "p%us%u", bus, slot);
if (func > 0 || is_pci_multifunction(names->pcidev))
l = strpcpyf(&s, l, "f%u", func);
- if (dev_port > 0)
+ if (port_name)
+ l = strpcpyf(&s, l, "n%s", port_name);
+ else if (dev_port > 0)
l = strpcpyf(&s, l, "d%u", dev_port);
if (l == 0)
names->pci_path[0] = '\0';
@@ -236,7 +248,8 @@ static int dev_pci_slot(struct udev_device *dev, struct netnames *names) {
err = -ENOENT;
goto out;
}
- xsprintf(slots, "%s/slots", udev_device_get_syspath(pci));
+
+ snprintf(slots, sizeof slots, "%s/slots", udev_device_get_syspath(pci));
dir = opendir(slots);
if (!dir) {
err = -errno;
@@ -245,8 +258,7 @@ static int dev_pci_slot(struct udev_device *dev, struct netnames *names) {
for (dent = readdir(dir); dent != NULL; dent = readdir(dir)) {
int i;
- char *rest;
- char *address;
+ char *rest, *address, str[PATH_MAX];
if (dent->d_name[0] == '.')
continue;
@@ -255,7 +267,8 @@ static int dev_pci_slot(struct udev_device *dev, struct netnames *names) {
continue;
if (i < 1)
continue;
- xsprintf(str, "%s/%s/address", slots, dent->d_name);
+
+ snprintf(str, sizeof str, "%s/%s/address", slots, dent->d_name);
if (read_one_line_file(str, &address) >= 0) {
/* match slot address with device by stripping the function */
if (strneq(address, udev_device_get_sysname(names->pcidev), strlen(address)))
@@ -275,7 +288,9 @@ static int dev_pci_slot(struct udev_device *dev, struct netnames *names) {
l = strpcpyf(&s, l, "s%d", hotplug_slot);
if (func > 0 || is_pci_multifunction(names->pcidev))
l = strpcpyf(&s, l, "f%d", func);
- if (dev_port > 0)
+ if (port_name)
+ l = strpcpyf(&s, l, "n%s", port_name);
+ else if (dev_port > 0)
l = strpcpyf(&s, l, "d%d", dev_port);
if (l == 0)
names->pci_slot[0] = '\0';
diff --git a/src/udev/udev-node.c b/src/udev/udev-node.c
index 5d2997fd8f..43004bc0bc 100644
--- a/src/udev/udev-node.c
+++ b/src/udev/udev-node.c
@@ -337,7 +337,7 @@ out:
void udev_node_add(struct udev_device *dev, bool apply,
mode_t mode, uid_t uid, gid_t gid,
struct udev_list *seclabel_list) {
- char filename[UTIL_PATH_SIZE];
+ char filename[sizeof("/dev/block/:") + 2*DECIMAL_STR_MAX(unsigned)];
struct udev_list_entry *list_entry;
log_debug("handling device node '%s', devnum=%s, mode=%#o, uid="UID_FMT", gid="GID_FMT,
@@ -360,7 +360,7 @@ void udev_node_add(struct udev_device *dev, bool apply,
void udev_node_remove(struct udev_device *dev) {
struct udev_list_entry *list_entry;
- char filename[UTIL_PATH_SIZE];
+ char filename[sizeof("/dev/block/:") + 2*DECIMAL_STR_MAX(unsigned)];
/* remove/update symlinks, remove symlinks from name index */
udev_list_entry_foreach(list_entry, udev_device_get_devlinks_list_entry(dev))
diff --git a/src/udev/udev-watch.c b/src/udev/udev-watch.c
index 9ce5e975de..bc9096ed0c 100644
--- a/src/udev/udev-watch.c
+++ b/src/udev/udev-watch.c
@@ -89,7 +89,7 @@ unlink:
}
void udev_watch_begin(struct udev *udev, struct udev_device *dev) {
- char filename[UTIL_PATH_SIZE];
+ char filename[sizeof("/run/udev/watch/") + DECIMAL_STR_MAX(int)];
int wd;
int r;
@@ -116,7 +116,7 @@ void udev_watch_begin(struct udev *udev, struct udev_device *dev) {
void udev_watch_end(struct udev *udev, struct udev_device *dev) {
int wd;
- char filename[UTIL_PATH_SIZE];
+ char filename[sizeof("/run/udev/watch/") + DECIMAL_STR_MAX(int)];
if (inotify_fd < 0)
return;
@@ -135,7 +135,7 @@ void udev_watch_end(struct udev *udev, struct udev_device *dev) {
}
struct udev_device *udev_watch_lookup(struct udev *udev, int wd) {
- char filename[UTIL_PATH_SIZE];
+ char filename[sizeof("/run/udev/watch/") + DECIMAL_STR_MAX(int)];
char device[UTIL_NAME_SIZE];
ssize_t len;