diff options
Diffstat (limited to 'src')
66 files changed, 2068 insertions, 579 deletions
diff --git a/src/analyze/analyze-verify.c b/src/analyze/analyze-verify.c index 5fd3ee49eb..0ce0276d92 100644 --- a/src/analyze/analyze-verify.c +++ b/src/analyze/analyze-verify.c @@ -71,6 +71,7 @@ static int prepare_filename(const char *filename, char **ret) { } static int generate_path(char **var, char **filenames) { + const char *old; char **filename; _cleanup_strv_free_ char **ans = NULL; @@ -90,9 +91,19 @@ static int generate_path(char **var, char **filenames) { assert_se(strv_uniq(ans)); - r = strv_extend(&ans, ""); - if (r < 0) - return r; + /* First, prepend our directories. Second, if some path was specified, use that, and + * otherwise use the defaults. Any duplicates will be filtered out in path-lookup.c. + * Treat explicit empty path to mean that nothing should be appended. + */ + old = getenv("SYSTEMD_UNIT_PATH"); + if (!streq_ptr(old, "")) { + if (!old) + old = ":"; + + r = strv_extend(&ans, old); + if (r < 0) + return r; + } *var = strv_join(ans, ":"); if (!*var) diff --git a/src/backlight/backlight.c b/src/backlight/backlight.c index 45be135a23..7c59f60d5f 100644 --- a/src/backlight/backlight.c +++ b/src/backlight/backlight.c @@ -167,7 +167,7 @@ static bool validate_device(struct udev *udev, struct udev_device *device) { continue; v = udev_device_get_sysattr_value(other, "type"); - if (!streq_ptr(v, "platform") && !streq_ptr(v, "firmware")) + if (!STRPTR_IN_SET(v, "platform", "firmware")) continue; /* OK, so there's another backlight device, and it's a diff --git a/src/basic/escape.c b/src/basic/escape.c index 01daf11ce7..4a1ec4505e 100644 --- a/src/basic/escape.c +++ b/src/basic/escape.c @@ -333,7 +333,7 @@ int cunescape_length_with_prefix(const char *s, size_t length, const char *prefi assert(remaining > 0); if (*f != '\\') { - /* A literal literal, copy verbatim */ + /* A literal, copy verbatim */ *(t++) = *f; continue; } diff --git a/src/basic/fs-util.c b/src/basic/fs-util.c index ce87257bc1..86d9ad7e36 100644 --- a/src/basic/fs-util.c +++ b/src/basic/fs-util.c @@ -597,3 +597,190 @@ int inotify_add_watch_fd(int fd, int what, uint32_t mask) { return r; } + +int chase_symlinks(const char *path, const char *_root, char **ret) { + _cleanup_free_ char *buffer = NULL, *done = NULL, *root = NULL; + _cleanup_close_ int fd = -1; + unsigned max_follow = 32; /* how many symlinks to follow before giving up and returning ELOOP */ + char *todo; + int r; + + assert(path); + + /* This is a lot like canonicalize_file_name(), but takes an additional "root" parameter, that allows following + * symlinks relative to a root directory, instead of the root of the host. + * + * Note that "root" matters only if we encounter an absolute symlink, it's unused otherwise. Most importantly + * this means the path parameter passed in is not prefixed by it. + * + * Algorithmically this operates on two path buffers: "done" are the components of the path we already + * processed and resolved symlinks, "." and ".." of. "todo" are the components of the path we still need to + * process. On each iteration, we move one component from "todo" to "done", processing it's special meaning + * each time. The "todo" path always starts with at least one slash, the "done" path always ends in no + * slash. We always keep an O_PATH fd to the component we are currently processing, thus keeping lookup races + * at a minimum. */ + + r = path_make_absolute_cwd(path, &buffer); + if (r < 0) + return r; + + if (_root) { + r = path_make_absolute_cwd(_root, &root); + if (r < 0) + return r; + } + + fd = open("/", O_CLOEXEC|O_NOFOLLOW|O_PATH); + if (fd < 0) + return -errno; + + todo = buffer; + for (;;) { + _cleanup_free_ char *first = NULL; + _cleanup_close_ int child = -1; + struct stat st; + size_t n, m; + + /* Determine length of first component in the path */ + n = strspn(todo, "/"); /* The slashes */ + m = n + strcspn(todo + n, "/"); /* The entire length of the component */ + + /* Extract the first component. */ + first = strndup(todo, m); + if (!first) + return -ENOMEM; + + todo += m; + + /* Just a single slash? Then we reached the end. */ + if (isempty(first) || path_equal(first, "/")) + break; + + /* Just a dot? Then let's eat this up. */ + if (path_equal(first, "/.")) + continue; + + /* Two dots? Then chop off the last bit of what we already found out. */ + if (path_equal(first, "/..")) { + _cleanup_free_ char *parent = NULL; + int fd_parent = -1; + + if (isempty(done) || path_equal(done, "/")) + return -EINVAL; + + parent = dirname_malloc(done); + if (!parent) + return -ENOMEM; + + /* Don't allow this to leave the root dir */ + if (root && + path_startswith(done, root) && + !path_startswith(parent, root)) + return -EINVAL; + + free(done); + done = parent; + parent = NULL; + + fd_parent = openat(fd, "..", O_CLOEXEC|O_NOFOLLOW|O_PATH); + if (fd_parent < 0) + return -errno; + + safe_close(fd); + fd = fd_parent; + + continue; + } + + /* Otherwise let's see what this is. */ + child = openat(fd, first + n, O_CLOEXEC|O_NOFOLLOW|O_PATH); + if (child < 0) + return -errno; + + if (fstat(child, &st) < 0) + return -errno; + + if (S_ISLNK(st.st_mode)) { + _cleanup_free_ char *destination = NULL; + + /* This is a symlink, in this case read the destination. But let's make sure we don't follow + * symlinks without bounds. */ + if (--max_follow <= 0) + return -ELOOP; + + r = readlinkat_malloc(fd, first + n, &destination); + if (r < 0) + return r; + if (isempty(destination)) + return -EINVAL; + + if (path_is_absolute(destination)) { + + /* An absolute destination. Start the loop from the beginning, but use the root + * directory as base. */ + + safe_close(fd); + fd = open(root ?: "/", O_CLOEXEC|O_NOFOLLOW|O_PATH); + if (fd < 0) + return -errno; + + free(buffer); + buffer = destination; + destination = NULL; + + todo = buffer; + free(done); + + /* Note that we do not revalidate the root, we take it as is. */ + if (isempty(root)) + done = NULL; + else { + done = strdup(root); + if (!done) + return -ENOMEM; + } + + } else { + char *joined; + + /* A relative destination. If so, this is what we'll prefix what's left to do with what + * we just read, and start the loop again, but remain in the current directory. */ + + joined = strjoin("/", destination, todo, NULL); + if (!joined) + return -ENOMEM; + + free(buffer); + todo = buffer = joined; + } + + continue; + } + + /* If this is not a symlink, then let's just add the name we read to what we already verified. */ + if (!done) { + done = first; + first = NULL; + } else { + if (!strextend(&done, first, NULL)) + return -ENOMEM; + } + + /* And iterate again, but go one directory further down. */ + safe_close(fd); + fd = child; + child = -1; + } + + if (!done) { + /* Special case, turn the empty string into "/", to indicate the root directory. */ + done = strdup("/"); + if (!done) + return -ENOMEM; + } + + *ret = done; + done = NULL; + + return 0; +} diff --git a/src/basic/fs-util.h b/src/basic/fs-util.h index 2c3b9a1c74..31df47cf1e 100644 --- a/src/basic/fs-util.h +++ b/src/basic/fs-util.h @@ -77,3 +77,5 @@ union inotify_event_buffer { }; int inotify_add_watch_fd(int fd, int what, uint32_t mask); + +int chase_symlinks(const char *path, const char *_root, char **ret); diff --git a/src/basic/list.h b/src/basic/list.h index 5962aa4211..c3771a177f 100644 --- a/src/basic/list.h +++ b/src/basic/list.h @@ -142,6 +142,8 @@ } else { \ if ((_b->name##_prev = _a->name##_prev)) \ _b->name##_prev->name##_next = _b; \ + else \ + *_head = _b; \ _b->name##_next = _a; \ _a->name##_prev = _b; \ } \ diff --git a/src/basic/mount-util.c b/src/basic/mount-util.c index bfa04394fe..b9affb4e70 100644 --- a/src/basic/mount-util.c +++ b/src/basic/mount-util.c @@ -36,6 +36,7 @@ #include "set.h" #include "stdio-util.h" #include "string-util.h" +#include "strv.h" static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) { char path[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)]; @@ -287,10 +288,12 @@ int umount_recursive(const char *prefix, int flags) { continue; if (umount2(p, flags) < 0) { - r = -errno; + r = log_debug_errno(errno, "Failed to umount %s: %m", p); continue; } + log_debug("Successfully unmounted %s", p); + again = true; n++; @@ -311,24 +314,21 @@ static int get_mount_flags(const char *path, unsigned long *flags) { return 0; } -int bind_remount_recursive(const char *prefix, bool ro) { +int bind_remount_recursive(const char *prefix, bool ro, char **blacklist) { _cleanup_set_free_free_ Set *done = NULL; _cleanup_free_ char *cleaned = NULL; int r; - /* Recursively remount a directory (and all its submounts) - * read-only or read-write. If the directory is already - * mounted, we reuse the mount and simply mark it - * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write - * operation). If it isn't we first make it one. Afterwards we - * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all - * submounts we can access, too. When mounts are stacked on - * the same mount point we only care for each individual - * "top-level" mount on each point, as we cannot - * influence/access the underlying mounts anyway. We do not - * have any effect on future submounts that might get - * propagated, they migt be writable. This includes future - * submounts that have been triggered via autofs. */ + /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already + * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write + * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to + * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each + * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We + * do not have any effect on future submounts that might get propagated, they migt be writable. This includes + * future submounts that have been triggered via autofs. + * + * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the + * remount operation. Note that we'll ignore the blacklist for the top-level path. */ cleaned = strdup(prefix); if (!cleaned) @@ -385,6 +385,33 @@ int bind_remount_recursive(const char *prefix, bool ro) { if (r < 0) return r; + if (!path_startswith(p, cleaned)) + continue; + + /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount we shall + * operate on. */ + if (!path_equal(cleaned, p)) { + bool blacklisted = false; + char **i; + + STRV_FOREACH(i, blacklist) { + + if (path_equal(*i, cleaned)) + continue; + + if (!path_startswith(*i, cleaned)) + continue; + + if (path_startswith(p, *i)) { + blacklisted = true; + log_debug("Not remounting %s, because blacklisted by %s, called for %s", p, *i, cleaned); + break; + } + } + if (blacklisted) + continue; + } + /* Let's ignore autofs mounts. If they aren't * triggered yet, we want to avoid triggering * them, as we don't make any guarantees for @@ -396,12 +423,9 @@ int bind_remount_recursive(const char *prefix, bool ro) { continue; } - if (path_startswith(p, cleaned) && - !set_contains(done, p)) { - + if (!set_contains(done, p)) { r = set_consume(todo, p); p = NULL; - if (r == -EEXIST) continue; if (r < 0) @@ -418,8 +442,7 @@ int bind_remount_recursive(const char *prefix, bool ro) { if (!set_contains(done, cleaned) && !set_contains(todo, cleaned)) { - /* The prefix directory itself is not yet a - * mount, make it one. */ + /* The prefix directory itself is not yet a mount, make it one. */ if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0) return -errno; @@ -430,6 +453,8 @@ int bind_remount_recursive(const char *prefix, bool ro) { if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) return -errno; + log_debug("Made top-level directory %s a mount point.", prefix); + x = strdup(cleaned); if (!x) return -ENOMEM; @@ -447,8 +472,7 @@ int bind_remount_recursive(const char *prefix, bool ro) { if (r < 0) return r; - /* Deal with mount points that are obstructed by a - * later mount */ + /* Deal with mount points that are obstructed by a later mount */ r = path_is_mount_point(x, 0); if (r == -ENOENT || r == 0) continue; @@ -463,6 +487,7 @@ int bind_remount_recursive(const char *prefix, bool ro) { if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) return -errno; + log_debug("Remounted %s read-only.", x); } } } diff --git a/src/basic/mount-util.h b/src/basic/mount-util.h index f46989ebb3..74730de663 100644 --- a/src/basic/mount-util.h +++ b/src/basic/mount-util.h @@ -35,7 +35,7 @@ int path_is_mount_point(const char *path, int flags); int repeat_unmount(const char *path, int flags); int umount_recursive(const char *target, int flags); -int bind_remount_recursive(const char *prefix, bool ro); +int bind_remount_recursive(const char *prefix, bool ro, char **blacklist); int mount_move_root(const char *path); diff --git a/src/basic/socket-util.c b/src/basic/socket-util.c index 6093e47172..5c829e0e7e 100644 --- a/src/basic/socket-util.c +++ b/src/basic/socket-util.c @@ -441,7 +441,7 @@ const char* socket_address_get_path(const SocketAddress *a) { } bool socket_ipv6_is_supported(void) { - if (access("/proc/net/sockstat6", F_OK) != 0) + if (access("/proc/net/if_inet6", F_OK) != 0) return false; return true; diff --git a/src/basic/strv.h b/src/basic/strv.h index 683ce83a2a..fec2597db0 100644 --- a/src/basic/strv.h +++ b/src/basic/strv.h @@ -141,6 +141,11 @@ void strv_print(char **l); }) #define STR_IN_SET(x, ...) strv_contains(STRV_MAKE(__VA_ARGS__), x) +#define STRPTR_IN_SET(x, ...) \ + ({ \ + const char* _x = (x); \ + _x && strv_contains(STRV_MAKE(__VA_ARGS__), _x); \ + }) #define FOREACH_STRING(x, ...) \ for (char **_l = ({ \ diff --git a/src/basic/user-util.c b/src/basic/user-util.c index 122d9a0c7c..0522bce1d1 100644 --- a/src/basic/user-util.c +++ b/src/basic/user-util.c @@ -31,14 +31,15 @@ #include <unistd.h> #include <utmp.h> -#include "missing.h" #include "alloc-util.h" #include "fd-util.h" #include "formats-util.h" #include "macro.h" +#include "missing.h" #include "parse-util.h" #include "path-util.h" #include "string-util.h" +#include "strv.h" #include "user-util.h" #include "utf8.h" @@ -175,6 +176,35 @@ int get_user_creds( return 0; } +int get_user_creds_clean( + const char **username, + uid_t *uid, gid_t *gid, + const char **home, + const char **shell) { + + int r; + + /* Like get_user_creds(), but resets home/shell to NULL if they don't contain anything relevant. */ + + r = get_user_creds(username, uid, gid, home, shell); + if (r < 0) + return r; + + if (shell && + (isempty(*shell) || PATH_IN_SET(*shell, + "/bin/nologin", + "/sbin/nologin", + "/usr/bin/nologin", + "/usr/sbin/nologin"))) + *shell = NULL; + + if (home && + (isempty(*home) || path_equal(*home, "/"))) + *home = NULL; + + return 0; +} + int get_group_creds(const char **groupname, gid_t *gid) { struct group *g; gid_t id; diff --git a/src/basic/user-util.h b/src/basic/user-util.h index f569363811..6c61f63cae 100644 --- a/src/basic/user-util.h +++ b/src/basic/user-util.h @@ -40,6 +40,7 @@ char* getlogname_malloc(void); char* getusername_malloc(void); int get_user_creds(const char **username, uid_t *uid, gid_t *gid, const char **home, const char **shell); +int get_user_creds_clean(const char **username, uid_t *uid, gid_t *gid, const char **home, const char **shell); int get_group_creds(const char **groupname, gid_t *gid); char* uid_to_name(uid_t uid); diff --git a/src/basic/util.c b/src/basic/util.c index 9d66d28eb7..ec7939dc83 100644 --- a/src/basic/util.c +++ b/src/basic/util.c @@ -467,7 +467,7 @@ bool in_initrd(void) { * 2. the root file system must be a memory file system * * The second check is extra paranoia, since misdetecting an - * initrd can have bad bad consequences due the initrd + * initrd can have bad consequences due the initrd * emptying when transititioning to the main systemd. */ diff --git a/src/core/automount.c b/src/core/automount.c index 00295cf769..bdc0e06965 100644 --- a/src/core/automount.c +++ b/src/core/automount.c @@ -271,6 +271,11 @@ static int automount_coldplug(Unit *u) { return r; (void) sd_event_source_set_description(a->pipe_event_source, "automount-io"); + if (a->deserialized_state == AUTOMOUNT_RUNNING) { + r = automount_start_expire(a); + if (r < 0) + log_unit_warning_errno(UNIT(a), r, "Failed to start expiration timer, ignoring: %m"); + } } automount_set_state(a, a->deserialized_state); diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 7e33a2d201..eec4500c8c 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -707,6 +707,8 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_flags), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ProtectKernelTunables", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_tunables), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ProtectHome", "s", bus_property_get_protect_home, offsetof(ExecContext, protect_home), SD_BUS_VTABLE_PROPERTY_CONST), @@ -1072,7 +1074,8 @@ int bus_exec_context_set_transient_property( "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute", - "RestrictRealtime", "DynamicUser", "RemoveIPC")) { + "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables", + "ProtectControlGroups")) { int b; r = sd_bus_message_read(message, "b", &b); @@ -1106,6 +1109,10 @@ int bus_exec_context_set_transient_property( c->dynamic_user = b; else if (streq(name, "RemoveIPC")) c->remove_ipc = b; + else if (streq(name, "ProtectKernelTunables")) + c->protect_kernel_tunables = b; + else if (streq(name, "ProtectControlGroups")) + c->protect_control_groups = b; unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, yes_no(b)); } diff --git a/src/core/dynamic-user.c b/src/core/dynamic-user.c index 310aaa94e1..1043da3eb7 100644 --- a/src/core/dynamic-user.c +++ b/src/core/dynamic-user.c @@ -233,7 +233,7 @@ static int pick_uid(const char *name, uid_t *ret_uid) { if (st.st_nlink > 0) break; - /* Oh, bummer, we got got the lock, but the file was unlinked between the time we opened it and + /* Oh, bummer, we got the lock, but the file was unlinked between the time we opened it and * got the lock. Close it, and try again. */ lock_fd = safe_close(lock_fd); } diff --git a/src/core/execute.c b/src/core/execute.c index 2026137721..82d8c978c1 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -837,6 +837,8 @@ static int null_conv( return PAM_CONV_ERR; } +#endif + static int setup_pam( const char *name, const char *user, @@ -845,6 +847,8 @@ static int setup_pam( char ***env, int fds[], unsigned n_fds) { +#ifdef HAVE_PAM + static const struct pam_conv conv = { .conv = null_conv, .appdata_ptr = NULL @@ -1038,8 +1042,10 @@ fail: closelog(); return r; -} +#else + return 0; #endif +} static void rename_process_from_path(const char *path) { char process_name[11]; @@ -1273,6 +1279,10 @@ static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) if (!seccomp) return -ENOMEM; + r = seccomp_add_secondary_archs(seccomp); + if (r < 0) + goto finish; + r = seccomp_rule_add( seccomp, SCMP_ACT_ERRNO(EPERM), @@ -1322,6 +1332,10 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) { if (!seccomp) return -ENOMEM; + r = seccomp_add_secondary_archs(seccomp); + if (r < 0) + goto finish; + /* Determine the highest policy constant we want to allow */ for (i = 0; i < ELEMENTSOF(permitted_policies); i++) if (permitted_policies[i] > max_policy) @@ -1375,12 +1389,121 @@ finish: return r; } +static int apply_protect_sysctl(Unit *u, const ExecContext *c) { + scmp_filter_ctx *seccomp; + int r; + + assert(c); + + /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but + * let's protect even those systems where this is left on in the kernel. */ + + if (skip_seccomp_unavailable(u, "ProtectKernelTunables=")) + return 0; + + seccomp = seccomp_init(SCMP_ACT_ALLOW); + if (!seccomp) + return -ENOMEM; + + r = seccomp_add_secondary_archs(seccomp); + if (r < 0) + goto finish; + + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(_sysctl), + 0); + if (r < 0) + goto finish; + + r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + if (r < 0) + goto finish; + + r = seccomp_load(seccomp); + +finish: + seccomp_release(seccomp); + return r; +} + +static int apply_private_devices(Unit *u, const ExecContext *c) { + const SystemCallFilterSet *set; + scmp_filter_ctx *seccomp; + const char *sys; + bool syscalls_found = false; + int r; + + assert(c); + + /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */ + + if (skip_seccomp_unavailable(u, "PrivateDevices=")) + return 0; + + seccomp = seccomp_init(SCMP_ACT_ALLOW); + if (!seccomp) + return -ENOMEM; + + r = seccomp_add_secondary_archs(seccomp); + if (r < 0) + goto finish; + + for (set = syscall_filter_sets; set->set_name; set++) + if (streq(set->set_name, "@raw-io")) { + syscalls_found = true; + break; + } + + /* We should never fail here */ + if (!syscalls_found) { + r = -EOPNOTSUPP; + goto finish; + } + + NULSTR_FOREACH(sys, set->value) { + int id; + bool add = true; + +#ifndef __NR_s390_pci_mmio_read + if (streq(sys, "s390_pci_mmio_read")) + add = false; +#endif +#ifndef __NR_s390_pci_mmio_write + if (streq(sys, "s390_pci_mmio_write")) + add = false; +#endif + + if (!add) + continue; + + id = seccomp_syscall_resolve_name(sys); + + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + id, 0); + if (r < 0) + goto finish; + } + + r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + if (r < 0) + goto finish; + + r = seccomp_load(seccomp); + +finish: + seccomp_release(seccomp); + return r; +} + #endif static void do_idle_pipe_dance(int idle_pipe[4]) { assert(idle_pipe); - idle_pipe[1] = safe_close(idle_pipe[1]); idle_pipe[2] = safe_close(idle_pipe[2]); @@ -1581,7 +1704,9 @@ static bool exec_needs_mount_namespace( if (context->private_devices || context->protect_system != PROTECT_SYSTEM_NO || - context->protect_home != PROTECT_HOME_NO) + context->protect_home != PROTECT_HOME_NO || + context->protect_kernel_tunables || + context->protect_control_groups) return true; return false; @@ -1740,6 +1865,111 @@ static int setup_private_users(uid_t uid, gid_t gid) { return 0; } +static int setup_runtime_directory( + const ExecContext *context, + const ExecParameters *params, + uid_t uid, + gid_t gid) { + + char **rt; + int r; + + assert(context); + assert(params); + + STRV_FOREACH(rt, context->runtime_directory) { + _cleanup_free_ char *p; + + p = strjoin(params->runtime_prefix, "/", *rt, NULL); + if (!p) + return -ENOMEM; + + r = mkdir_p_label(p, context->runtime_directory_mode); + if (r < 0) + return r; + + r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid); + if (r < 0) + return r; + } + + return 0; +} + +static int setup_smack( + const ExecContext *context, + const ExecCommand *command) { + +#ifdef HAVE_SMACK + int r; + + assert(context); + assert(command); + + if (!mac_smack_use()) + return 0; + + if (context->smack_process_label) { + r = mac_smack_apply_pid(0, context->smack_process_label); + if (r < 0) + return r; + } +#ifdef SMACK_DEFAULT_PROCESS_LABEL + else { + _cleanup_free_ char *exec_label = NULL; + + r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label); + if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP) + return r; + + r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL); + if (r < 0) + return r; + } +#endif +#endif + + return 0; +} + +static int compile_read_write_paths( + const ExecContext *context, + const ExecParameters *params, + char ***ret) { + + _cleanup_strv_free_ char **l = NULL; + char **rt; + + /* Compile the list of writable paths. This is the combination of the explicitly configured paths, plus all + * runtime directories. */ + + if (strv_isempty(context->read_write_paths) && + strv_isempty(context->runtime_directory)) { + *ret = NULL; /* NOP if neither is set */ + return 0; + } + + l = strv_copy(context->read_write_paths); + if (!l) + return -ENOMEM; + + STRV_FOREACH(rt, context->runtime_directory) { + char *s; + + s = strjoin(params->runtime_prefix, "/", *rt, NULL); + if (!s) + return -ENOMEM; + + if (strv_consume(&l, s) < 0) + return -ENOMEM; + } + + *ret = l; + l = NULL; + + return 0; +} + static void append_socket_pair(int *array, unsigned *n, int pair[2]) { assert(array); assert(n); @@ -1796,6 +2026,37 @@ static int close_remaining_fds( return close_all_fds(dont_close, n_dont_close); } +static bool context_has_address_families(const ExecContext *c) { + assert(c); + + return c->address_families_whitelist || + !set_isempty(c->address_families); +} + +static bool context_has_syscall_filters(const ExecContext *c) { + assert(c); + + return c->syscall_whitelist || + !set_isempty(c->syscall_filter) || + !set_isempty(c->syscall_archs); +} + +static bool context_has_no_new_privileges(const ExecContext *c) { + assert(c); + + if (c->no_new_privileges) + return true; + + if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */ + return false; + + return context_has_address_families(c) || /* we need NNP if we have any form of seccomp and are unprivileged */ + c->memory_deny_write_execute || + c->restrict_realtime || + c->protect_kernel_tunables || + context_has_syscall_filters(c); +} + static int send_user_lookup( Unit *unit, int user_lookup_fd, @@ -1940,22 +2201,14 @@ static int exec_child( } else { if (context->user) { username = context->user; - r = get_user_creds(&username, &uid, &gid, &home, &shell); + r = get_user_creds_clean(&username, &uid, &gid, &home, &shell); if (r < 0) { *exit_status = EXIT_USER; return r; } - /* Don't set $HOME or $SHELL if they are are not particularly enlightening anyway. */ - if (isempty(home) || path_equal(home, "/")) - home = NULL; - - if (isempty(shell) || PATH_IN_SET(shell, - "/bin/nologin", - "/sbin/nologin", - "/usr/bin/nologin", - "/usr/sbin/nologin")) - shell = NULL; + /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway + * (i.e. are "/" or "/bin/nologin"). */ } if (context->group) { @@ -2108,28 +2361,10 @@ static int exec_child( } if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) { - char **rt; - - STRV_FOREACH(rt, context->runtime_directory) { - _cleanup_free_ char *p; - - p = strjoin(params->runtime_prefix, "/", *rt, NULL); - if (!p) { - *exit_status = EXIT_RUNTIME_DIRECTORY; - return -ENOMEM; - } - - r = mkdir_p_label(p, context->runtime_directory_mode); - if (r < 0) { - *exit_status = EXIT_RUNTIME_DIRECTORY; - return r; - } - - r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid); - if (r < 0) { - *exit_status = EXIT_RUNTIME_DIRECTORY; - return r; - } + r = setup_runtime_directory(context, params, uid, gid); + if (r < 0) { + *exit_status = EXIT_RUNTIME_DIRECTORY; + return r; } } @@ -2168,41 +2403,15 @@ static int exec_child( } accum_env = strv_env_clean(accum_env); - umask(context->umask); + (void) umask(context->umask); if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) { - r = enforce_groups(context, username, gid); + r = setup_smack(context, command); if (r < 0) { - *exit_status = EXIT_GROUP; + *exit_status = EXIT_SMACK_PROCESS_LABEL; return r; } -#ifdef HAVE_SMACK - if (context->smack_process_label) { - r = mac_smack_apply_pid(0, context->smack_process_label); - if (r < 0) { - *exit_status = EXIT_SMACK_PROCESS_LABEL; - return r; - } - } -#ifdef SMACK_DEFAULT_PROCESS_LABEL - else { - _cleanup_free_ char *exec_label = NULL; - r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label); - if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP) { - *exit_status = EXIT_SMACK_PROCESS_LABEL; - return r; - } - - r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL); - if (r < 0) { - *exit_status = EXIT_SMACK_PROCESS_LABEL; - return r; - } - } -#endif -#endif -#ifdef HAVE_PAM if (context->pam_name && username) { r = setup_pam(context->pam_name, username, uid, context->tty_path, &accum_env, fds, n_fds); if (r < 0) { @@ -2210,7 +2419,6 @@ static int exec_child( return r; } } -#endif } if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) { @@ -2222,8 +2430,8 @@ static int exec_child( } needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime); - if (needs_mount_namespace) { + _cleanup_free_ char **rw = NULL; char *tmp = NULL, *var = NULL; /* The runtime struct only contains the parent @@ -2239,14 +2447,22 @@ static int exec_child( var = strjoina(runtime->var_tmp_dir, "/tmp"); } + r = compile_read_write_paths(context, params, &rw); + if (r < 0) { + *exit_status = EXIT_NAMESPACE; + return r; + } + r = setup_namespace( (params->flags & EXEC_APPLY_CHROOT) ? context->root_directory : NULL, - context->read_write_paths, + rw, context->read_only_paths, context->inaccessible_paths, tmp, var, context->private_devices, + context->protect_kernel_tunables, + context->protect_control_groups, context->protect_home, context->protect_system, context->mount_flags); @@ -2264,6 +2480,14 @@ static int exec_child( } } + if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) { + r = enforce_groups(context, username, gid); + if (r < 0) { + *exit_status = EXIT_GROUP; + return r; + } + } + if (context->working_directory_home) wd = home; else if (context->working_directory) @@ -2335,11 +2559,6 @@ static int exec_child( if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) { - bool use_address_families = context->address_families_whitelist || - !set_isempty(context->address_families); - bool use_syscall_filter = context->syscall_whitelist || - !set_isempty(context->syscall_filter) || - !set_isempty(context->syscall_archs); int secure_bits = context->secure_bits; for (i = 0; i < _RLIMIT_MAX; i++) { @@ -2416,15 +2635,14 @@ static int exec_child( return -errno; } - if (context->no_new_privileges || - (!have_effective_cap(CAP_SYS_ADMIN) && (use_address_families || context->memory_deny_write_execute || context->restrict_realtime || use_syscall_filter))) + if (context_has_no_new_privileges(context)) if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) { *exit_status = EXIT_NO_NEW_PRIVILEGES; return -errno; } #ifdef HAVE_SECCOMP - if (use_address_families) { + if (context_has_address_families(context)) { r = apply_address_families(unit, context); if (r < 0) { *exit_status = EXIT_ADDRESS_FAMILIES; @@ -2448,7 +2666,23 @@ static int exec_child( } } - if (use_syscall_filter) { + if (context->protect_kernel_tunables) { + r = apply_protect_sysctl(unit, context); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + return r; + } + } + + if (context->private_devices) { + r = apply_private_devices(unit, context); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + return r; + } + } + + if (context_has_syscall_filters(context)) { r = apply_seccomp(unit, context); if (r < 0) { *exit_status = EXIT_SECCOMP; @@ -2880,6 +3114,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { "%sNonBlocking: %s\n" "%sPrivateTmp: %s\n" "%sPrivateDevices: %s\n" + "%sProtectKernelTunables: %s\n" + "%sProtectControlGroups: %s\n" "%sPrivateNetwork: %s\n" "%sPrivateUsers: %s\n" "%sProtectHome: %s\n" @@ -2893,6 +3129,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { prefix, yes_no(c->non_blocking), prefix, yes_no(c->private_tmp), prefix, yes_no(c->private_devices), + prefix, yes_no(c->protect_kernel_tunables), + prefix, yes_no(c->protect_control_groups), prefix, yes_no(c->private_network), prefix, yes_no(c->private_users), prefix, protect_home_to_string(c->protect_home), diff --git a/src/core/execute.h b/src/core/execute.h index 6082c42aba..449180c903 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -174,6 +174,8 @@ struct ExecContext { bool private_users; ProtectSystem protect_system; ProtectHome protect_home; + bool protect_kernel_tunables; + bool protect_control_groups; bool no_new_privileges; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 2e6c965aec..c49c1d6732 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -89,6 +89,8 @@ $1.ReadOnlyPaths, config_parse_namespace_path_strv, 0, $1.InaccessiblePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths) $1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp) $1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices) +$1.ProtectKernelTunables, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_tunables) +$1.ProtectControlGroups, config_parse_bool, 0, offsetof($1, exec_context.protect_control_groups) $1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network) $1.PrivateUsers, config_parse_bool, 0, offsetof($1, exec_context.private_users) $1.ProtectSystem, config_parse_protect_system, 0, offsetof($1, exec_context) diff --git a/src/core/main.c b/src/core/main.c index 803307c9d5..be0cb0b6d1 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -996,10 +996,8 @@ static int parse_argv(int argc, char *argv[]) { case ARG_MACHINE_ID: r = set_machine_id(optarg); - if (r < 0) { - log_error("MachineID '%s' is not valid.", optarg); - return r; - } + if (r < 0) + return log_error_errno(r, "MachineID '%s' is not valid.", optarg); break; case 'h': diff --git a/src/core/manager.c b/src/core/manager.c index fa8deb9b1b..dd0d1fa984 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -1236,9 +1236,11 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) { return r; /* Make sure the transient directory always exists, so that it remains in the search path */ - r = mkdir_p_label(m->lookup_paths.transient, 0755); - if (r < 0) - return r; + if (!m->test_run) { + r = mkdir_p_label(m->lookup_paths.transient, 0755); + if (r < 0) + return r; + } dual_timestamp_get(&m->generators_start_timestamp); r = manager_run_generators(m); @@ -1657,13 +1659,12 @@ static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, ui return 0; } -static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const char *buf, size_t n, FDSet *fds) { +static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const char *buf, FDSet *fds) { _cleanup_strv_free_ char **tags = NULL; assert(m); assert(u); assert(buf); - assert(n > 0); tags = strv_split(buf, "\n\r"); if (!tags) { @@ -1673,8 +1674,14 @@ static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const if (UNIT_VTABLE(u)->notify_message) UNIT_VTABLE(u)->notify_message(u, pid, tags, fds); - else - log_unit_debug(u, "Got notification message for unit. Ignoring."); + else if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) { + _cleanup_free_ char *x = NULL, *y = NULL; + + x = cescape(buf); + if (x) + y = ellipsize(x, 20, 90); + log_unit_debug(u, "Got notification message \"%s\", ignoring.", strnull(y)); + } } static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) { @@ -1700,7 +1707,6 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t struct cmsghdr *cmsg; struct ucred *ucred = NULL; - bool found = false; Unit *u1, *u2, *u3; int r, *fd_array = NULL; unsigned n_fds = 0; @@ -1716,10 +1722,14 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC); if (n < 0) { - if (errno == EAGAIN || errno == EINTR) - return 0; + if (!IN_SET(errno, EAGAIN, EINTR)) + log_error("Failed to receive notification message: %m"); - return -errno; + /* It's not an option to return an error here since it + * would disable the notification handler entirely. Services + * wouldn't be able to send the WATCHDOG message for + * example... */ + return 0; } CMSG_FOREACH(cmsg, &msghdr) { @@ -1742,7 +1752,8 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t r = fdset_new_array(&fds, fd_array, n_fds); if (r < 0) { close_many(fd_array, n_fds); - return log_oom(); + log_oom(); + return 0; } } @@ -1756,33 +1767,29 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t return 0; } + /* The message should be a string. Here we make sure it's NUL-terminated, + * but only the part until first NUL will be used anyway. */ buf[n] = 0; /* Notify every unit that might be interested, but try * to avoid notifying the same one multiple times. */ u1 = manager_get_unit_by_pid_cgroup(m, ucred->pid); - if (u1) { - manager_invoke_notify_message(m, u1, ucred->pid, buf, n, fds); - found = true; - } + if (u1) + manager_invoke_notify_message(m, u1, ucred->pid, buf, fds); u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(ucred->pid)); - if (u2 && u2 != u1) { - manager_invoke_notify_message(m, u2, ucred->pid, buf, n, fds); - found = true; - } + if (u2 && u2 != u1) + manager_invoke_notify_message(m, u2, ucred->pid, buf, fds); u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(ucred->pid)); - if (u3 && u3 != u2 && u3 != u1) { - manager_invoke_notify_message(m, u3, ucred->pid, buf, n, fds); - found = true; - } + if (u3 && u3 != u2 && u3 != u1) + manager_invoke_notify_message(m, u3, ucred->pid, buf, fds); - if (!found) + if (!u1 && !u2 && !u3) log_warning("Cannot find unit for notify message of PID "PID_FMT".", ucred->pid); if (fdset_size(fds) > 0) - log_warning("Got auxiliary fds with notification message, closing all."); + log_warning("Got extra auxiliary fds with notification message, closing them."); return 0; } diff --git a/src/core/namespace.c b/src/core/namespace.c index 52a2505d94..43a2f4ba6e 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -29,6 +29,7 @@ #include "alloc-util.h" #include "dev-setup.h" #include "fd-util.h" +#include "fs-util.h" #include "loopback-setup.h" #include "missing.h" #include "mkdir.h" @@ -53,61 +54,230 @@ typedef enum MountMode { PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV, - READWRITE + READWRITE, } MountMode; typedef struct BindMount { - const char *path; + const char *path; /* stack memory, doesn't need to be freed explicitly */ + char *chased; /* malloc()ed memory, needs to be freed */ MountMode mode; - bool done; - bool ignore; + bool ignore; /* Ignore if path does not exist */ } BindMount; +typedef struct TargetMount { + const char *path; + MountMode mode; + bool ignore; /* Ignore if path does not exist */ +} TargetMount; + +/* + * The following Protect tables are to protect paths and mark some of them + * READONLY, in case a path is covered by an option from another table, then + * it is marked READWRITE in the current one, and the more restrictive mode is + * applied from that other table. This way all options can be combined in a + * safe and comprehensible way for users. + */ + +/* ProtectKernelTunables= option and the related filesystem APIs */ +static const TargetMount protect_kernel_tunables_table[] = { + { "/proc/sys", READONLY, false }, + { "/proc/sysrq-trigger", READONLY, true }, + { "/proc/latency_stats", READONLY, true }, + { "/proc/mtrr", READONLY, true }, + { "/proc/apm", READONLY, true }, + { "/proc/acpi", READONLY, true }, + { "/proc/timer_stats", READONLY, true }, + { "/proc/asound", READONLY, true }, + { "/proc/bus", READONLY, true }, + { "/proc/fs", READONLY, true }, + { "/proc/irq", READONLY, true }, + { "/sys", READONLY, false }, + { "/sys/kernel/debug", READONLY, true }, + { "/sys/kernel/tracing", READONLY, true }, + { "/sys/fs/cgroup", READWRITE, false }, /* READONLY is set by ProtectControlGroups= option */ +}; + +/* + * ProtectHome=read-only table, protect $HOME and $XDG_RUNTIME_DIR and rest of + * system should be protected by ProtectSystem= + */ +static const TargetMount protect_home_read_only_table[] = { + { "/home", READONLY, true }, + { "/run/user", READONLY, true }, + { "/root", READONLY, true }, +}; + +/* ProtectHome=yes table */ +static const TargetMount protect_home_yes_table[] = { + { "/home", INACCESSIBLE, true }, + { "/run/user", INACCESSIBLE, true }, + { "/root", INACCESSIBLE, true }, +}; + +/* ProtectSystem=yes table */ +static const TargetMount protect_system_yes_table[] = { + { "/usr", READONLY, false }, + { "/boot", READONLY, true }, + { "/efi", READONLY, true }, +}; + +/* ProtectSystem=full includes ProtectSystem=yes */ +static const TargetMount protect_system_full_table[] = { + { "/usr", READONLY, false }, + { "/boot", READONLY, true }, + { "/efi", READONLY, true }, + { "/etc", READONLY, false }, +}; + +/* + * ProtectSystem=strict table. In this strict mode, we mount everything + * read-only, except for /proc, /dev, /sys which are the kernel API VFS, + * which are left writable, but PrivateDevices= + ProtectKernelTunables= + * protect those, and these options should be fully orthogonal. + * (And of course /home and friends are also left writable, as ProtectHome= + * shall manage those, orthogonally). + */ +static const TargetMount protect_system_strict_table[] = { + { "/", READONLY, false }, + { "/proc", READWRITE, false }, /* ProtectKernelTunables= */ + { "/sys", READWRITE, false }, /* ProtectKernelTunables= */ + { "/dev", READWRITE, false }, /* PrivateDevices= */ + { "/home", READWRITE, true }, /* ProtectHome= */ + { "/run/user", READWRITE, true }, /* ProtectHome= */ + { "/root", READWRITE, true }, /* ProtectHome= */ +}; + +static void set_bind_mount(BindMount **p, const char *path, MountMode mode, bool ignore) { + (*p)->path = path; + (*p)->mode = mode; + (*p)->ignore = ignore; +} + static int append_mounts(BindMount **p, char **strv, MountMode mode) { char **i; assert(p); STRV_FOREACH(i, strv) { + bool ignore = false; - (*p)->ignore = false; - (*p)->done = false; - - if ((mode == INACCESSIBLE || mode == READONLY || mode == READWRITE) && (*i)[0] == '-') { - (*p)->ignore = true; + if (IN_SET(mode, INACCESSIBLE, READONLY, READWRITE) && startswith(*i, "-")) { (*i)++; + ignore = true; } if (!path_is_absolute(*i)) return -EINVAL; - (*p)->path = *i; - (*p)->mode = mode; + set_bind_mount(p, *i, mode, ignore); (*p)++; } return 0; } -static int mount_path_compare(const void *a, const void *b) { - const BindMount *p = a, *q = b; - int d; +static int append_target_mounts(BindMount **p, const char *root_directory, const TargetMount *mounts, const size_t size) { + unsigned i; - d = path_compare(p->path, q->path); + assert(p); + assert(mounts); + + for (i = 0; i < size; i++) { + /* + * Here we assume that the ignore field is set during + * declaration we do not support "-" at the beginning. + */ + const TargetMount *m = &mounts[i]; + const char *path = prefix_roota(root_directory, m->path); + + if (!path_is_absolute(path)) + return -EINVAL; + + set_bind_mount(p, path, m->mode, m->ignore); + (*p)++; + } + + return 0; +} + +static int append_protect_kernel_tunables(BindMount **p, const char *root_directory) { + assert(p); + + return append_target_mounts(p, root_directory, protect_kernel_tunables_table, + ELEMENTSOF(protect_kernel_tunables_table)); +} - if (d == 0) { - /* If the paths are equal, check the mode */ - if (p->mode < q->mode) - return -1; +static int append_protect_home(BindMount **p, const char *root_directory, ProtectHome protect_home) { + int r = 0; - if (p->mode > q->mode) - return 1; + assert(p); + if (protect_home == PROTECT_HOME_NO) return 0; + + switch (protect_home) { + case PROTECT_HOME_READ_ONLY: + r = append_target_mounts(p, root_directory, protect_home_read_only_table, + ELEMENTSOF(protect_home_read_only_table)); + break; + case PROTECT_HOME_YES: + r = append_target_mounts(p, root_directory, protect_home_yes_table, + ELEMENTSOF(protect_home_yes_table)); + break; + default: + r = -EINVAL; + break; } + return r; +} + +static int append_protect_system(BindMount **p, const char *root_directory, ProtectSystem protect_system) { + int r = 0; + + assert(p); + + if (protect_system == PROTECT_SYSTEM_NO) + return 0; + + switch (protect_system) { + case PROTECT_SYSTEM_STRICT: + r = append_target_mounts(p, root_directory, protect_system_strict_table, + ELEMENTSOF(protect_system_strict_table)); + break; + case PROTECT_SYSTEM_YES: + r = append_target_mounts(p, root_directory, protect_system_yes_table, + ELEMENTSOF(protect_system_yes_table)); + break; + case PROTECT_SYSTEM_FULL: + r = append_target_mounts(p, root_directory, protect_system_full_table, + ELEMENTSOF(protect_system_full_table)); + break; + default: + r = -EINVAL; + break; + } + + return r; +} + +static int mount_path_compare(const void *a, const void *b) { + const BindMount *p = a, *q = b; + int d; + /* If the paths are not equal, then order prefixes first */ - return d; + d = path_compare(p->path, q->path); + if (d != 0) + return d; + + /* If the paths are equal, check the mode */ + if (p->mode < q->mode) + return -1; + + if (p->mode > q->mode) + return 1; + + return 0; } static void drop_duplicates(BindMount *m, unsigned *n) { @@ -116,16 +286,110 @@ static void drop_duplicates(BindMount *m, unsigned *n) { assert(m); assert(n); + /* Drops duplicate entries. Expects that the array is properly ordered already. */ + for (f = m, t = m, previous = NULL; f < m+*n; f++) { - /* The first one wins */ - if (previous && path_equal(f->path, previous->path)) + /* The first one wins (which is the one with the more restrictive mode), see mount_path_compare() + * above. */ + if (previous && path_equal(f->path, previous->path)) { + log_debug("%s is duplicate.", f->path); continue; + } *t = *f; - previous = t; + t++; + } + + *n = t - m; +} + +static void drop_inaccessible(BindMount *m, unsigned *n) { + BindMount *f, *t; + const char *clear = NULL; + + assert(m); + assert(n); + + /* Drops all entries obstructed by another entry further up the tree. Expects that the array is properly + * ordered already. */ + for (f = m, t = m; f < m+*n; f++) { + + /* If we found a path set for INACCESSIBLE earlier, and this entry has it as prefix we should drop + * it, as inaccessible paths really should drop the entire subtree. */ + if (clear && path_startswith(f->path, clear)) { + log_debug("%s is masked by %s.", f->path, clear); + continue; + } + + clear = f->mode == INACCESSIBLE ? f->path : NULL; + + *t = *f; + t++; + } + + *n = t - m; +} + +static void drop_nop(BindMount *m, unsigned *n) { + BindMount *f, *t; + + assert(m); + assert(n); + + /* Drops all entries which have an immediate parent that has the same type, as they are redundant. Assumes the + * list is ordered by prefixes. */ + + for (f = m, t = m; f < m+*n; f++) { + + /* Only suppress such subtrees for READONLY and READWRITE entries */ + if (IN_SET(f->mode, READONLY, READWRITE)) { + BindMount *p; + bool found = false; + + /* Now let's find the first parent of the entry we are looking at. */ + for (p = t-1; p >= m; p--) { + if (path_startswith(f->path, p->path)) { + found = true; + break; + } + } + + /* We found it, let's see if it's the same mode, if so, we can drop this entry */ + if (found && p->mode == f->mode) { + log_debug("%s is redundant by %s", f->path, p->path); + continue; + } + } + + *t = *f; + t++; + } + + *n = t - m; +} + +static void drop_outside_root(const char *root_directory, BindMount *m, unsigned *n) { + BindMount *f, *t; + + assert(m); + assert(n); + + if (!root_directory) + return; + + /* Drops all mounts that are outside of the root directory. */ + + for (f = m, t = m; f < m+*n; f++) { + + if (!path_startswith(f->path, root_directory)) { + log_debug("%s is outside of root directory.", f->path); + continue; + } + + *t = *f; t++; } @@ -278,24 +542,23 @@ static int apply_mount( const char *what; int r; - struct stat target; assert(m); + log_debug("Applying namespace mount on %s", m->path); + switch (m->mode) { - case INACCESSIBLE: + case INACCESSIBLE: { + struct stat target; /* First, get rid of everything that is below if there * is anything... Then, overmount it with an * inaccessible path. */ - umount_recursive(m->path, 0); + (void) umount_recursive(m->path, 0); - if (lstat(m->path, &target) < 0) { - if (m->ignore && errno == ENOENT) - return 0; - return -errno; - } + if (lstat(m->path, &target) < 0) + return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", m->path); what = mode_to_inaccessible_node(target.st_mode); if (!what) { @@ -303,11 +566,20 @@ static int apply_mount( return -ELOOP; } break; + } + case READONLY: case READWRITE: - /* Nothing to mount here, we just later toggle the - * MS_RDONLY bit for the mount point */ - return 0; + + r = path_is_mount_point(m->path, 0); + if (r < 0) + return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", m->path); + if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */ + return 0; + + /* This isn't a mount point yet, let's make it one. */ + what = m->path; + break; case PRIVATE_TMP: what = tmp_dir; @@ -326,38 +598,104 @@ static int apply_mount( assert(what); - r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL); - if (r >= 0) { - log_debug("Successfully mounted %s to %s", what, m->path); - return r; - } else { - if (m->ignore && errno == ENOENT) - return 0; + if (mount(what, m->path, NULL, MS_BIND|MS_REC, NULL) < 0) return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, m->path); - } + + log_debug("Successfully mounted %s to %s", what, m->path); + return 0; } -static int make_read_only(BindMount *m) { - int r; +static int make_read_only(BindMount *m, char **blacklist) { + int r = 0; assert(m); if (IN_SET(m->mode, INACCESSIBLE, READONLY)) - r = bind_remount_recursive(m->path, true); - else if (IN_SET(m->mode, READWRITE, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV)) { - r = bind_remount_recursive(m->path, false); - if (r == 0 && m->mode == PRIVATE_DEV) /* can be readonly but the submounts can't*/ - if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0) - r = -errno; + r = bind_remount_recursive(m->path, true, blacklist); + else if (m->mode == PRIVATE_DEV) { /* Can be readonly but the submounts can't*/ + if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0) + r = -errno; } else - r = 0; - - if (m->ignore && r == -ENOENT) return 0; + /* Not that we only turn on the MS_RDONLY flag here, we never turn it off. Something that was marked read-only + * already stays this way. This improves compatibility with container managers, where we won't attempt to undo + * read-only mounts already applied. */ + return r; } +static int chase_all_symlinks(const char *root_directory, BindMount *m, unsigned *n) { + BindMount *f, *t; + int r; + + assert(m); + assert(n); + + /* Since mount() will always follow symlinks and we need to take the different root directory into account we + * chase the symlinks on our own first. This call wil do so for all entries and remove all entries where we + * can't resolve the path, and which have been marked for such removal. */ + + for (f = m, t = m; f < m+*n; f++) { + + r = chase_symlinks(f->path, root_directory, &f->chased); + if (r == -ENOENT && f->ignore) /* Doesn't exist? Then remove it! */ + continue; + if (r < 0) + return log_debug_errno(r, "Failed to chase symlinks for %s: %m", f->path); + + if (path_equal(f->path, f->chased)) + f->chased = mfree(f->chased); + else { + log_debug("Chased %s → %s", f->path, f->chased); + f->path = f->chased; + } + + *t = *f; + t++; + } + + *n = t - m; + return 0; +} + +static unsigned namespace_calculate_mounts( + char** read_write_paths, + char** read_only_paths, + char** inaccessible_paths, + const char* tmp_dir, + const char* var_tmp_dir, + bool private_dev, + bool protect_sysctl, + bool protect_cgroups, + ProtectHome protect_home, + ProtectSystem protect_system) { + + unsigned protect_home_cnt; + unsigned protect_system_cnt = + (protect_system == PROTECT_SYSTEM_STRICT ? + ELEMENTSOF(protect_system_strict_table) : + ((protect_system == PROTECT_SYSTEM_FULL) ? + ELEMENTSOF(protect_system_full_table) : + ((protect_system == PROTECT_SYSTEM_YES) ? + ELEMENTSOF(protect_system_yes_table) : 0))); + + protect_home_cnt = + (protect_home == PROTECT_HOME_YES ? + ELEMENTSOF(protect_home_yes_table) : + ((protect_home == PROTECT_HOME_READ_ONLY) ? + ELEMENTSOF(protect_home_read_only_table) : 0)); + + return !!tmp_dir + !!var_tmp_dir + + strv_length(read_write_paths) + + strv_length(read_only_paths) + + strv_length(inaccessible_paths) + + private_dev + + (protect_sysctl ? ELEMENTSOF(protect_kernel_tunables_table) : 0) + + (protect_cgroups ? 1 : 0) + + protect_home_cnt + protect_system_cnt; +} + int setup_namespace( const char* root_directory, char** read_write_paths, @@ -366,28 +704,31 @@ int setup_namespace( const char* tmp_dir, const char* var_tmp_dir, bool private_dev, + bool protect_sysctl, + bool protect_cgroups, ProtectHome protect_home, ProtectSystem protect_system, unsigned long mount_flags) { BindMount *m, *mounts = NULL; + bool make_slave = false; unsigned n; int r = 0; if (mount_flags == 0) mount_flags = MS_SHARED; - if (unshare(CLONE_NEWNS) < 0) - return -errno; + n = namespace_calculate_mounts(read_write_paths, + read_only_paths, + inaccessible_paths, + tmp_dir, var_tmp_dir, + private_dev, protect_sysctl, + protect_cgroups, protect_home, + protect_system); - n = !!tmp_dir + !!var_tmp_dir + - strv_length(read_write_paths) + - strv_length(read_only_paths) + - strv_length(inaccessible_paths) + - private_dev + - (protect_home != PROTECT_HOME_NO ? 3 : 0) + - (protect_system != PROTECT_SYSTEM_NO ? 2 : 0) + - (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0); + /* Set mount slave mode */ + if (root_directory || n > 0) + make_slave = true; if (n > 0) { m = mounts = (BindMount *) alloca0(n * sizeof(BindMount)); @@ -421,94 +762,112 @@ int setup_namespace( m++; } - if (protect_home != PROTECT_HOME_NO) { - const char *home_dir, *run_user_dir, *root_dir; + if (protect_sysctl) + append_protect_kernel_tunables(&m, root_directory); - home_dir = prefix_roota(root_directory, "/home"); - home_dir = strjoina("-", home_dir); - run_user_dir = prefix_roota(root_directory, "/run/user"); - run_user_dir = strjoina("-", run_user_dir); - root_dir = prefix_roota(root_directory, "/root"); - root_dir = strjoina("-", root_dir); - - r = append_mounts(&m, STRV_MAKE(home_dir, run_user_dir, root_dir), - protect_home == PROTECT_HOME_READ_ONLY ? READONLY : INACCESSIBLE); - if (r < 0) - return r; + if (protect_cgroups) { + m->path = prefix_roota(root_directory, "/sys/fs/cgroup"); + m->mode = READONLY; + m++; } - if (protect_system != PROTECT_SYSTEM_NO) { - const char *usr_dir, *boot_dir, *etc_dir; - - usr_dir = prefix_roota(root_directory, "/usr"); - boot_dir = prefix_roota(root_directory, "/boot"); - boot_dir = strjoina("-", boot_dir); - etc_dir = prefix_roota(root_directory, "/etc"); + r = append_protect_home(&m, root_directory, protect_home); + if (r < 0) + return r; - r = append_mounts(&m, protect_system == PROTECT_SYSTEM_FULL - ? STRV_MAKE(usr_dir, boot_dir, etc_dir) - : STRV_MAKE(usr_dir, boot_dir), READONLY); - if (r < 0) - return r; - } + r = append_protect_system(&m, root_directory, protect_system); + if (r < 0) + return r; assert(mounts + n == m); + /* Resolve symlinks manually first, as mount() will always follow them relative to the host's + * root. Moreover we want to suppress duplicates based on the resolved paths. This of course is a bit + * racy. */ + r = chase_all_symlinks(root_directory, mounts, &n); + if (r < 0) + goto finish; + qsort(mounts, n, sizeof(BindMount), mount_path_compare); + drop_duplicates(mounts, &n); + drop_outside_root(root_directory, mounts, &n); + drop_inaccessible(mounts, &n); + drop_nop(mounts, &n); + } + + if (unshare(CLONE_NEWNS) < 0) { + r = -errno; + goto finish; } - if (n > 0 || root_directory) { + if (make_slave) { /* Remount / as SLAVE so that nothing now mounted in the namespace shows up in the parent */ - if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) - return -errno; + if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) { + r = -errno; + goto finish; + } } if (root_directory) { - /* Turn directory into bind mount */ - if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0) - return -errno; + /* Turn directory into bind mount, if it isn't one yet */ + r = path_is_mount_point(root_directory, AT_SYMLINK_FOLLOW); + if (r < 0) + goto finish; + if (r == 0) { + if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0) { + r = -errno; + goto finish; + } + } } if (n > 0) { + char **blacklist; + unsigned j; + + /* First round, add in all special mounts we need */ for (m = mounts; m < mounts + n; ++m) { r = apply_mount(m, tmp_dir, var_tmp_dir); if (r < 0) - goto fail; + goto finish; } + /* Create a blacklist we can pass to bind_mount_recursive() */ + blacklist = newa(char*, n+1); + for (j = 0; j < n; j++) + blacklist[j] = (char*) mounts[j].path; + blacklist[j] = NULL; + + /* Second round, flip the ro bits if necessary. */ for (m = mounts; m < mounts + n; ++m) { - r = make_read_only(m); + r = make_read_only(m, blacklist); if (r < 0) - goto fail; + goto finish; } } if (root_directory) { /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */ r = mount_move_root(root_directory); - - /* at this point, we cannot rollback */ if (r < 0) - return r; + goto finish; } /* Remount / as the desired mode. Not that this will not * reestablish propagation from our side to the host, since * what's disconnected is disconnected. */ - if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) - /* at this point, we cannot rollback */ - return -errno; + if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) { + r = -errno; + goto finish; + } - return 0; + r = 0; -fail: - if (n > 0) { - for (m = mounts; m < mounts + n; ++m) - if (m->done) - (void) umount2(m->path, MNT_DETACH); - } +finish: + for (m = mounts; m < mounts + n; m++) + free(m->chased); return r; } @@ -658,6 +1017,7 @@ static const char *const protect_system_table[_PROTECT_SYSTEM_MAX] = { [PROTECT_SYSTEM_NO] = "no", [PROTECT_SYSTEM_YES] = "yes", [PROTECT_SYSTEM_FULL] = "full", + [PROTECT_SYSTEM_STRICT] = "strict", }; DEFINE_STRING_TABLE_LOOKUP(protect_system, ProtectSystem); diff --git a/src/core/namespace.h b/src/core/namespace.h index 1aedf5f208..6505bcc499 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -35,6 +35,7 @@ typedef enum ProtectSystem { PROTECT_SYSTEM_NO, PROTECT_SYSTEM_YES, PROTECT_SYSTEM_FULL, + PROTECT_SYSTEM_STRICT, _PROTECT_SYSTEM_MAX, _PROTECT_SYSTEM_INVALID = -1 } ProtectSystem; @@ -46,6 +47,8 @@ int setup_namespace(const char *chroot, const char *tmp_dir, const char *var_tmp_dir, bool private_dev, + bool protect_sysctl, + bool protect_cgroups, ProtectHome protect_home, ProtectSystem protect_system, unsigned long mount_flags); diff --git a/src/core/socket.c b/src/core/socket.c index 70d55dd9ed..b9032fa5c9 100644 --- a/src/core/socket.c +++ b/src/core/socket.c @@ -1334,14 +1334,9 @@ static int usbffs_select_ep(const struct dirent *d) { static int usbffs_dispatch_eps(SocketPort *p) { _cleanup_free_ struct dirent **ent = NULL; - _cleanup_free_ char *path = NULL; int r, i, n, k; - path = dirname_malloc(p->path); - if (!path) - return -ENOMEM; - - r = scandir(path, &ent, usbffs_select_ep, alphasort); + r = scandir(p->path, &ent, usbffs_select_ep, alphasort); if (r < 0) return -errno; @@ -1356,7 +1351,7 @@ static int usbffs_dispatch_eps(SocketPort *p) { for (i = 0; i < n; ++i) { _cleanup_free_ char *ep = NULL; - ep = path_make_absolute(ent[i]->d_name, path); + ep = path_make_absolute(ent[i]->d_name, p->path); if (!ep) return -ENOMEM; diff --git a/src/core/unit.c b/src/core/unit.c index de22f657c6..693f75c928 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -2225,6 +2225,11 @@ int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_referen return 0; } + if (d == UNIT_BEFORE && other->type == UNIT_DEVICE) { + log_unit_warning(u, "Dependency Before=%s ignored (.device units cannot be delayed)", other->id); + return 0; + } + r = set_ensure_allocated(&u->dependencies[d], NULL); if (r < 0) return r; @@ -3377,8 +3382,14 @@ int unit_patch_contexts(Unit *u) { return -ENOMEM; } + /* If the dynamic user option is on, let's make sure that the unit can't leave its UID/GID + * around in the file system or on IPC objects. Hence enforce a strict sandbox. */ + ec->private_tmp = true; ec->remove_ipc = true; + ec->protect_system = PROTECT_SYSTEM_STRICT; + if (ec->protect_home == PROTECT_HOME_NO) + ec->protect_home = PROTECT_HOME_READ_ONLY; } } diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c index 9dea10b3e1..db60d0af7a 100644 --- a/src/coredump/coredump.c +++ b/src/coredump/coredump.c @@ -28,9 +28,10 @@ #include <elfutils/libdwfl.h> #endif +#include "sd-daemon.h" #include "sd-journal.h" #include "sd-login.h" -#include "sd-daemon.h" +#include "sd-messages.h" #include "acl-util.h" #include "alloc-util.h" @@ -93,7 +94,6 @@ typedef enum CoredumpStorage { COREDUMP_STORAGE_NONE, COREDUMP_STORAGE_EXTERNAL, COREDUMP_STORAGE_JOURNAL, - COREDUMP_STORAGE_BOTH, _COREDUMP_STORAGE_MAX, _COREDUMP_STORAGE_INVALID = -1 } CoredumpStorage; @@ -102,7 +102,6 @@ static const char* const coredump_storage_table[_COREDUMP_STORAGE_MAX] = { [COREDUMP_STORAGE_NONE] = "none", [COREDUMP_STORAGE_EXTERNAL] = "external", [COREDUMP_STORAGE_JOURNAL] = "journal", - [COREDUMP_STORAGE_BOTH] = "both", }; DEFINE_PRIVATE_STRING_TABLE_LOOKUP(coredump_storage, CoredumpStorage); @@ -135,6 +134,10 @@ static int parse_config(void) { false, NULL); } +static inline uint64_t storage_size_max(void) { + return arg_storage == COREDUMP_STORAGE_EXTERNAL ? arg_external_size_max : arg_journal_size_max; +} + static int fix_acl(int fd, uid_t uid) { #ifdef HAVE_ACL @@ -247,7 +250,7 @@ static int maybe_remove_external_coredump(const char *filename, uint64_t size) { /* Returns 1 if might remove, 0 if will not remove, < 0 on error. */ - if (IN_SET(arg_storage, COREDUMP_STORAGE_EXTERNAL, COREDUMP_STORAGE_BOTH) && + if (arg_storage == COREDUMP_STORAGE_EXTERNAL && size <= arg_external_size_max) return 0; @@ -331,12 +334,13 @@ static int save_external_coredump( /* Is coredumping disabled? Then don't bother saving/processing the coredump. * Anything below PAGE_SIZE cannot give a readable coredump (the kernel uses * ELF_EXEC_PAGESIZE which is not easily accessible, but is usually the same as PAGE_SIZE. */ - log_info("Core dumping has been disabled for process %s (%s).", context[CONTEXT_PID], context[CONTEXT_COMM]); + log_info("Resource limits disable core dumping for process %s (%s).", + context[CONTEXT_PID], context[CONTEXT_COMM]); return -EBADSLT; } /* Never store more than the process configured, or than we actually shall keep or process */ - max_size = MIN(rlimit, MAX(arg_process_size_max, arg_external_size_max)); + max_size = MIN(rlimit, MAX(arg_process_size_max, storage_size_max())); r = make_filename(context, &fn); if (r < 0) @@ -349,19 +353,18 @@ static int save_external_coredump( return log_error_errno(fd, "Failed to create temporary file for coredump %s: %m", fn); r = copy_bytes(input_fd, fd, max_size, false); - if (r == -EFBIG) { - log_error("Coredump of %s (%s) is larger than configured processing limit, refusing.", context[CONTEXT_PID], context[CONTEXT_COMM]); - goto fail; - } else if (IN_SET(r, -EDQUOT, -ENOSPC)) { - log_error("Not enough disk space for coredump of %s (%s), refusing.", context[CONTEXT_PID], context[CONTEXT_COMM]); - goto fail; - } else if (r < 0) { - log_error_errno(r, "Failed to dump coredump to file: %m"); + if (r < 0) { + log_error_errno(r, "Cannot store coredump of %s (%s): %m", context[CONTEXT_PID], context[CONTEXT_COMM]); goto fail; - } + } else if (r == 1) + log_struct(LOG_INFO, + LOG_MESSAGE("Core file was truncated to %zu bytes.", max_size), + "SIZE_LIMIT=%zu", max_size, + LOG_MESSAGE_ID(SD_MESSAGE_TRUNCATED_CORE), + NULL); if (fstat(fd, &st) < 0) { - log_error_errno(errno, "Failed to fstat coredump %s: %m", coredump_tmpfile_name(tmp)); + log_error_errno(errno, "Failed to fstat core file %s: %m", coredump_tmpfile_name(tmp)); goto fail; } @@ -372,8 +375,7 @@ static int save_external_coredump( #if defined(HAVE_XZ) || defined(HAVE_LZ4) /* If we will remove the coredump anyway, do not compress. */ - if (maybe_remove_external_coredump(NULL, st.st_size) == 0 - && arg_compress) { + if (arg_compress && !maybe_remove_external_coredump(NULL, st.st_size)) { _cleanup_free_ char *fn_compressed = NULL, *tmp_compressed = NULL; _cleanup_close_ int fd_compressed = -1; @@ -678,7 +680,7 @@ static int submit_coredump( _cleanup_close_ int coredump_fd = -1, coredump_node_fd = -1; _cleanup_free_ char *core_message = NULL, *filename = NULL, *coredump_data = NULL; - uint64_t coredump_size; + uint64_t coredump_size = UINT64_MAX; int r; assert(context); @@ -705,7 +707,9 @@ static int submit_coredump( coredump_filename = strjoina("COREDUMP_FILENAME=", filename); IOVEC_SET_STRING(iovec[n_iovec++], coredump_filename); - } + } else if (arg_storage == COREDUMP_STORAGE_EXTERNAL) + log_info("The core will not be stored: size %zu is greater than %zu (the configured maximum)", + coredump_size, arg_external_size_max); /* Vacuum again, but exclude the coredump we just created */ (void) coredump_vacuum(coredump_node_fd >= 0 ? coredump_node_fd : coredump_fd, arg_keep_free, arg_max_use); @@ -730,7 +734,9 @@ static int submit_coredump( log_warning("Failed to generate stack trace: %s", dwfl_errmsg(dwfl_errno())); else log_warning_errno(r, "Failed to generate stack trace: %m"); - } + } else + log_debug("Not generating stack trace: core size %zu is greater than %zu (the configured maximum)", + coredump_size, arg_process_size_max); if (!core_message) #endif @@ -740,18 +746,22 @@ log: IOVEC_SET_STRING(iovec[n_iovec++], core_message); /* Optionally store the entire coredump in the journal */ - if (IN_SET(arg_storage, COREDUMP_STORAGE_JOURNAL, COREDUMP_STORAGE_BOTH) && - coredump_size <= arg_journal_size_max) { - size_t sz = 0; - - /* Store the coredump itself in the journal */ - - r = allocate_journal_field(coredump_fd, (size_t) coredump_size, &coredump_data, &sz); - if (r >= 0) { - iovec[n_iovec].iov_base = coredump_data; - iovec[n_iovec].iov_len = sz; - n_iovec++; - } + if (arg_storage == COREDUMP_STORAGE_JOURNAL) { + if (coredump_size <= arg_journal_size_max) { + size_t sz = 0; + + /* Store the coredump itself in the journal */ + + r = allocate_journal_field(coredump_fd, (size_t) coredump_size, &coredump_data, &sz); + if (r >= 0) { + iovec[n_iovec].iov_base = coredump_data; + iovec[n_iovec].iov_len = sz; + n_iovec++; + } else + log_warning_errno(r, "Failed to attach the core to the journal entry: %m"); + } else + log_info("The core will not be stored: size %zu is greater than %zu (the configured maximum)", + coredump_size, arg_journal_size_max); } assert(n_iovec <= n_iovec_allocated); diff --git a/src/coredump/coredumpctl.c b/src/coredump/coredumpctl.c index 8ba7c08eed..0e5351e621 100644 --- a/src/coredump/coredumpctl.c +++ b/src/coredump/coredumpctl.c @@ -280,11 +280,10 @@ static int retrieve(const void *data, free(*var); *var = v; - return 0; + return 1; } -static void print_field(FILE* file, sd_journal *j) { - _cleanup_free_ char *value = NULL; +static int print_field(FILE* file, sd_journal *j) { const void *d; size_t l; @@ -293,37 +292,59 @@ static void print_field(FILE* file, sd_journal *j) { assert(arg_field); - SD_JOURNAL_FOREACH_DATA(j, d, l) - retrieve(d, l, arg_field, &value); + /* A (user-specified) field may appear more than once for a given entry. + * We will print all of the occurences. + * This is different below for fields that systemd-coredump uses, + * because they cannot meaningfully appear more than once. + */ + SD_JOURNAL_FOREACH_DATA(j, d, l) { + _cleanup_free_ char *value = NULL; + int r; + + r = retrieve(d, l, arg_field, &value); + if (r < 0) + return r; + if (r > 0) + fprintf(file, "%s\n", value); + } - if (value) - fprintf(file, "%s\n", value); + return 0; } +#define RETRIEVE(d, l, name, arg) \ + { \ + int _r = retrieve(d, l, name, &arg); \ + if (_r < 0) \ + return _r; \ + if (_r > 0) \ + continue; \ + } + static int print_list(FILE* file, sd_journal *j, int had_legend) { _cleanup_free_ char *pid = NULL, *uid = NULL, *gid = NULL, *sgnl = NULL, *exe = NULL, *comm = NULL, *cmdline = NULL, - *filename = NULL; + *filename = NULL, *coredump = NULL; const void *d; size_t l; usec_t t; char buf[FORMAT_TIMESTAMP_MAX]; int r; - bool present; + const char *present; assert(file); assert(j); SD_JOURNAL_FOREACH_DATA(j, d, l) { - retrieve(d, l, "COREDUMP_PID", &pid); - retrieve(d, l, "COREDUMP_UID", &uid); - retrieve(d, l, "COREDUMP_GID", &gid); - retrieve(d, l, "COREDUMP_SIGNAL", &sgnl); - retrieve(d, l, "COREDUMP_EXE", &exe); - retrieve(d, l, "COREDUMP_COMM", &comm); - retrieve(d, l, "COREDUMP_CMDLINE", &cmdline); - retrieve(d, l, "COREDUMP_FILENAME", &filename); + RETRIEVE(d, l, "COREDUMP_PID", pid); + RETRIEVE(d, l, "COREDUMP_UID", uid); + RETRIEVE(d, l, "COREDUMP_GID", gid); + RETRIEVE(d, l, "COREDUMP_SIGNAL", sgnl); + RETRIEVE(d, l, "COREDUMP_EXE", exe); + RETRIEVE(d, l, "COREDUMP_COMM", comm); + RETRIEVE(d, l, "COREDUMP_CMDLINE", cmdline); + RETRIEVE(d, l, "COREDUMP_FILENAME", filename); + RETRIEVE(d, l, "COREDUMP", coredump); } if (!pid && !uid && !gid && !sgnl && !exe && !comm && !cmdline && !filename) { @@ -336,7 +357,6 @@ static int print_list(FILE* file, sd_journal *j, int had_legend) { return log_error_errno(r, "Failed to get realtime timestamp: %m"); format_timestamp(buf, sizeof(buf), t); - present = filename && access(filename, F_OK) == 0; if (!had_legend && !arg_no_legend) fprintf(file, "%-*s %*s %*s %*s %*s %*s %s\n", @@ -345,16 +365,28 @@ static int print_list(FILE* file, sd_journal *j, int had_legend) { 5, "UID", 5, "GID", 3, "SIG", - 1, "PRESENT", + 8, "COREFILE", "EXE"); - fprintf(file, "%-*s %*s %*s %*s %*s %*s %s\n", + if (filename) + if (access(filename, R_OK) == 0) + present = "present"; + else if (errno == ENOENT) + present = "missing"; + else + present = "error"; + else if (coredump) + present = "journal"; + else + present = "none"; + + fprintf(file, "%-*s %*s %*s %*s %*s %-*s %s\n", FORMAT_TIMESTAMP_WIDTH, buf, 6, strna(pid), 5, strna(uid), 5, strna(gid), 3, strna(sgnl), - 1, present ? "*" : "", + 8, present, strna(exe ?: (comm ?: cmdline))); return 0; @@ -367,7 +399,8 @@ static int print_info(FILE *file, sd_journal *j, bool need_space) { *unit = NULL, *user_unit = NULL, *session = NULL, *boot_id = NULL, *machine_id = NULL, *hostname = NULL, *slice = NULL, *cgroup = NULL, *owner_uid = NULL, - *message = NULL, *timestamp = NULL, *filename = NULL; + *message = NULL, *timestamp = NULL, *filename = NULL, + *coredump = NULL; const void *d; size_t l; int r; @@ -376,25 +409,26 @@ static int print_info(FILE *file, sd_journal *j, bool need_space) { assert(j); SD_JOURNAL_FOREACH_DATA(j, d, l) { - retrieve(d, l, "COREDUMP_PID", &pid); - retrieve(d, l, "COREDUMP_UID", &uid); - retrieve(d, l, "COREDUMP_GID", &gid); - retrieve(d, l, "COREDUMP_SIGNAL", &sgnl); - retrieve(d, l, "COREDUMP_EXE", &exe); - retrieve(d, l, "COREDUMP_COMM", &comm); - retrieve(d, l, "COREDUMP_CMDLINE", &cmdline); - retrieve(d, l, "COREDUMP_UNIT", &unit); - retrieve(d, l, "COREDUMP_USER_UNIT", &user_unit); - retrieve(d, l, "COREDUMP_SESSION", &session); - retrieve(d, l, "COREDUMP_OWNER_UID", &owner_uid); - retrieve(d, l, "COREDUMP_SLICE", &slice); - retrieve(d, l, "COREDUMP_CGROUP", &cgroup); - retrieve(d, l, "COREDUMP_TIMESTAMP", ×tamp); - retrieve(d, l, "COREDUMP_FILENAME", &filename); - retrieve(d, l, "_BOOT_ID", &boot_id); - retrieve(d, l, "_MACHINE_ID", &machine_id); - retrieve(d, l, "_HOSTNAME", &hostname); - retrieve(d, l, "MESSAGE", &message); + RETRIEVE(d, l, "COREDUMP_PID", pid); + RETRIEVE(d, l, "COREDUMP_UID", uid); + RETRIEVE(d, l, "COREDUMP_GID", gid); + RETRIEVE(d, l, "COREDUMP_SIGNAL", sgnl); + RETRIEVE(d, l, "COREDUMP_EXE", exe); + RETRIEVE(d, l, "COREDUMP_COMM", comm); + RETRIEVE(d, l, "COREDUMP_CMDLINE", cmdline); + RETRIEVE(d, l, "COREDUMP_UNIT", unit); + RETRIEVE(d, l, "COREDUMP_USER_UNIT", user_unit); + RETRIEVE(d, l, "COREDUMP_SESSION", session); + RETRIEVE(d, l, "COREDUMP_OWNER_UID", owner_uid); + RETRIEVE(d, l, "COREDUMP_SLICE", slice); + RETRIEVE(d, l, "COREDUMP_CGROUP", cgroup); + RETRIEVE(d, l, "COREDUMP_TIMESTAMP", timestamp); + RETRIEVE(d, l, "COREDUMP_FILENAME", filename); + RETRIEVE(d, l, "COREDUMP", coredump); + RETRIEVE(d, l, "_BOOT_ID", boot_id); + RETRIEVE(d, l, "_MACHINE_ID", machine_id); + RETRIEVE(d, l, "_HOSTNAME", hostname); + RETRIEVE(d, l, "MESSAGE", message); } if (need_space) @@ -477,7 +511,7 @@ static int print_info(FILE *file, sd_journal *j, bool need_space) { if (unit) fprintf(file, " Unit: %s\n", unit); if (user_unit) - fprintf(file, " User Unit: %s\n", unit); + fprintf(file, " User Unit: %s\n", user_unit); if (slice) fprintf(file, " Slice: %s\n", slice); if (session) @@ -505,8 +539,13 @@ static int print_info(FILE *file, sd_journal *j, bool need_space) { if (hostname) fprintf(file, " Hostname: %s\n", hostname); - if (filename && access(filename, F_OK) == 0) - fprintf(file, " Coredump: %s\n", filename); + if (filename) + fprintf(file, " Storage: %s%s\n", filename, + access(filename, R_OK) < 0 ? " (inaccessible)" : ""); + else if (coredump) + fprintf(file, " Storage: journal\n"); + else + fprintf(file, " Storage: none\n"); if (message) { _cleanup_free_ char *m = NULL; @@ -534,15 +573,15 @@ static int focus(sd_journal *j) { return r; } -static void print_entry(sd_journal *j, unsigned n_found) { +static int print_entry(sd_journal *j, unsigned n_found) { assert(j); if (arg_action == ACTION_INFO) - print_info(stdout, j, n_found); + return print_info(stdout, j, n_found); else if (arg_field) - print_field(stdout, j); + return print_field(stdout, j); else - print_list(stdout, j, n_found); + return print_list(stdout, j, n_found); } static int dump_list(sd_journal *j) { @@ -561,10 +600,13 @@ static int dump_list(sd_journal *j) { if (r < 0) return r; - print_entry(j, 0); + return print_entry(j, 0); } else { - SD_JOURNAL_FOREACH(j) - print_entry(j, n_found++); + SD_JOURNAL_FOREACH(j) { + r = print_entry(j, n_found++); + if (r < 0) + return r; + } if (!arg_field && n_found <= 0) { log_notice("No coredumps found."); @@ -575,122 +617,142 @@ static int dump_list(sd_journal *j) { return 0; } -static int save_core(sd_journal *j, int fd, char **path, bool *unlink_temp) { +static int save_core(sd_journal *j, FILE *file, char **path, bool *unlink_temp) { const char *data; _cleanup_free_ char *filename = NULL; size_t len; - int r; + int r, fd; + _cleanup_close_ int fdt = -1; + char *temp = NULL; - assert((fd >= 0) != !!path); - assert(!!path == !!unlink_temp); + assert(!(file && path)); /* At most one can be specified */ + assert(!!path == !!unlink_temp); /* Those must be specified together */ - /* Prefer uncompressed file to journal (probably cached) to - * compressed file (probably uncached). */ + /* Look for a coredump on disk first. */ r = sd_journal_get_data(j, "COREDUMP_FILENAME", (const void**) &data, &len); - if (r < 0 && r != -ENOENT) - log_warning_errno(r, "Failed to retrieve COREDUMP_FILENAME: %m"); - else if (r == 0) + if (r == 0) retrieve(data, len, "COREDUMP_FILENAME", &filename); + else { + if (r != -ENOENT) + return log_error_errno(r, "Failed to retrieve COREDUMP_FILENAME field: %m"); + /* Check that we can have a COREDUMP field. We still haven't set a high + * data threshold, so we'll get a few kilobytes at most. + */ - if (filename && access(filename, R_OK) < 0) { - log_full(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, - "File %s is not readable: %m", filename); - filename = mfree(filename); + r = sd_journal_get_data(j, "COREDUMP", (const void**) &data, &len); + if (r == -ENOENT) + return log_error_errno(r, "Coredump entry has no core attached (neither internally in the journal nor externally on disk)."); + if (r < 0) + return log_error_errno(r, "Failed to retrieve COREDUMP field: %m"); } - if (filename && !endswith(filename, ".xz") && !endswith(filename, ".lz4")) { - if (path) { + if (filename) { + if (access(filename, R_OK) < 0) + return log_error_errno(errno, "File \"%s\" is not readable: %m", filename); + + if (path && !endswith(filename, ".xz") && !endswith(filename, ".lz4")) { *path = filename; filename = NULL; + + return 0; } + } - return 0; - } else { - _cleanup_close_ int fdt = -1; - char *temp = NULL; + if (path) { + const char *vt; - if (fd < 0) { - const char *vt; + /* Create a temporary file to write the uncompressed core to. */ - r = var_tmp_dir(&vt); - if (r < 0) - return log_error_errno(r, "Failed to acquire temporary directory path: %m"); + r = var_tmp_dir(&vt); + if (r < 0) + return log_error_errno(r, "Failed to acquire temporary directory path: %m"); - temp = strjoin(vt, "/coredump-XXXXXX", NULL); - if (!temp) - return log_oom(); + temp = strjoin(vt, "/coredump-XXXXXX", NULL); + if (!temp) + return log_oom(); - fdt = mkostemp_safe(temp); - if (fdt < 0) - return log_error_errno(fdt, "Failed to create temporary file: %m"); - log_debug("Created temporary file %s", temp); + fdt = mkostemp_safe(temp); + if (fdt < 0) + return log_error_errno(fdt, "Failed to create temporary file: %m"); + log_debug("Created temporary file %s", temp); - fd = fdt; + fd = fdt; + } else { + /* If neither path or file are specified, we will write to stdout. Let's now check + * if stdout is connected to a tty. We checked that the file exists, or that the + * core might be stored in the journal. In this second case, if we found the entry, + * in all likelyhood we will be able to access the COREDUMP= field. In either case, + * we stop before doing any "real" work, i.e. before starting decompression or + * reading from the file or creating temporary files. + */ + if (!file) { + if (on_tty()) + return log_error_errno(ENOTTY, "Refusing to dump core to tty" + " (use shell redirection or specify --output)."); + file = stdout; } - r = sd_journal_get_data(j, "COREDUMP", (const void**) &data, &len); - if (r == 0) { - ssize_t sz; - - assert(len >= 9); - data += 9; - len -= 9; - - sz = write(fdt, data, len); - if (sz < 0) { - r = log_error_errno(errno, - "Failed to write temporary file: %m"); - goto error; - } - if (sz != (ssize_t) len) { - log_error("Short write to temporary file."); - r = -EIO; - goto error; - } - } else if (filename) { + fd = fileno(file); + } + + if (filename) { #if defined(HAVE_XZ) || defined(HAVE_LZ4) - _cleanup_close_ int fdf; - - fdf = open(filename, O_RDONLY | O_CLOEXEC); - if (fdf < 0) { - r = log_error_errno(errno, - "Failed to open %s: %m", - filename); - goto error; - } + _cleanup_close_ int fdf; - r = decompress_stream(filename, fdf, fd, -1); - if (r < 0) { - log_error_errno(r, "Failed to decompress %s: %m", filename); - goto error; - } -#else - log_error("Cannot decompress file. Compiled without compression support."); - r = -EOPNOTSUPP; + fdf = open(filename, O_RDONLY | O_CLOEXEC); + if (fdf < 0) { + r = log_error_errno(errno, "Failed to open %s: %m", filename); goto error; -#endif - } else { - if (r == -ENOENT) - log_error("Cannot retrieve coredump from journal or disk."); - else - log_error_errno(r, "Failed to retrieve COREDUMP field: %m"); + } + + r = decompress_stream(filename, fdf, fd, -1); + if (r < 0) { + log_error_errno(r, "Failed to decompress %s: %m", filename); goto error; } +#else + log_error("Cannot decompress file. Compiled without compression support."); + r = -EOPNOTSUPP; + goto error; +#endif + } else { + ssize_t sz; + + /* We want full data, nothing truncated. */ + sd_journal_set_data_threshold(j, 0); + + r = sd_journal_get_data(j, "COREDUMP", (const void**) &data, &len); + if (r < 0) + return log_error_errno(r, "Failed to retrieve COREDUMP field: %m"); - if (temp) { - *path = temp; - *unlink_temp = true; + assert(len >= 9); + data += 9; + len -= 9; + + sz = write(fd, data, len); + if (sz < 0) { + r = log_error_errno(errno, "Failed to write output: %m"); + goto error; + } + if (sz != (ssize_t) len) { + log_error("Short write to output."); + r = -EIO; + goto error; } + } - return 0; + if (temp) { + *path = temp; + *unlink_temp = true; + } + return 0; error: - if (temp) { - unlink(temp); - log_debug("Removed temporary file %s", temp); - } - return r; + if (temp) { + unlink(temp); + log_debug("Removed temporary file %s", temp); } + return r; } static int dump_core(sd_journal* j) { @@ -704,17 +766,12 @@ static int dump_core(sd_journal* j) { print_info(arg_output ? stdout : stderr, j, false); - if (on_tty() && !arg_output) { - log_error("Refusing to dump core to tty."); - return -ENOTTY; - } - - r = save_core(j, arg_output ? fileno(arg_output) : STDOUT_FILENO, NULL, NULL); + r = save_core(j, arg_output, NULL, NULL); if (r < 0) - return log_error_errno(r, "Coredump retrieval failed: %m"); + return r; r = sd_journal_previous(j); - if (r >= 0) + if (r > 0) log_warning("More than one entry matches, ignoring rest."); return 0; @@ -760,9 +817,9 @@ static int run_gdb(sd_journal *j) { return -ENOENT; } - r = save_core(j, -1, &path, &unlink_path); + r = save_core(j, NULL, &path, &unlink_path); if (r < 0) - return log_error_errno(r, "Failed to retrieve core: %m"); + return r; pid = fork(); if (pid < 0) { @@ -836,9 +893,6 @@ int main(int argc, char *argv[]) { } } - /* We want full data, nothing truncated. */ - sd_journal_set_data_threshold(j, 0); - SET_FOREACH(match, matches, it) { r = sd_journal_add_match(j, match, strlen(match)); if (r != 0) { diff --git a/src/hostname/hostnamectl.c b/src/hostname/hostnamectl.c index 4795324667..07c57fb567 100644 --- a/src/hostname/hostnamectl.c +++ b/src/hostname/hostnamectl.c @@ -278,7 +278,7 @@ static int set_hostname(sd_bus *bus, char **args, unsigned n) { /* Now that we set the pretty hostname, let's clean up the parameter and use that as static * hostname. If the hostname was already valid as static hostname, this will only chop off the trailing * dot if there is one. If it was not valid, then it will be made fully valid by truncating, dropping - * multiple dots, and and dropping weird chars. Note that we clean the name up only if we also are + * multiple dots, and dropping weird chars. Note that we clean the name up only if we also are * supposed to set the pretty name. If the pretty name is not being set we assume the user knows what * he does and pass the name as-is. */ h = strdup(hostname); diff --git a/src/journal-remote/journal-gatewayd.c b/src/journal-remote/journal-gatewayd.c index 54f42b8bf3..7325adee8f 100644 --- a/src/journal-remote/journal-gatewayd.c +++ b/src/journal-remote/journal-gatewayd.c @@ -782,11 +782,11 @@ static int request_handler_machine( r = sd_journal_get_usage(m->journal, &usage); if (r < 0) - return mhd_respondf(connection, r, MHD_HTTP_INTERNAL_SERVER_ERROR, "Failed to determine disk usage: %s"); + return mhd_respondf(connection, r, MHD_HTTP_INTERNAL_SERVER_ERROR, "Failed to determine disk usage: %m"); r = sd_journal_get_cutoff_realtime_usec(m->journal, &cutoff_from, &cutoff_to); if (r < 0) - return mhd_respondf(connection, r, MHD_HTTP_INTERNAL_SERVER_ERROR, "Failed to determine disk usage: %s"); + return mhd_respondf(connection, r, MHD_HTTP_INTERNAL_SERVER_ERROR, "Failed to determine disk usage: %m"); if (parse_env_file("/etc/os-release", NEWLINE, "PRETTY_NAME", &os_name, NULL) == -ENOENT) (void) parse_env_file("/usr/lib/os-release", NEWLINE, "PRETTY_NAME", &os_name, NULL); diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 7504326bff..349ef74e81 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -333,8 +333,13 @@ JournalFile* journal_file_close(JournalFile *f) { #ifdef HAVE_GCRYPT /* Write the final tag */ - if (f->seal && f->writable) - journal_file_append_tag(f); + if (f->seal && f->writable) { + int r; + + r = journal_file_append_tag(f); + if (r < 0) + log_error_errno(r, "Failed to append tag when closing journal: %m"); + } #endif if (f->post_change_timer) { @@ -1369,6 +1374,12 @@ static int journal_file_append_data( if (r < 0) return r; +#ifdef HAVE_GCRYPT + r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p); + if (r < 0) + return r; +#endif + /* The linking might have altered the window, so let's * refresh our pointer */ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o); @@ -1393,12 +1404,6 @@ static int journal_file_append_data( fo->field.head_data_offset = le64toh(p); } -#ifdef HAVE_GCRYPT - r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p); - if (r < 0) - return r; -#endif - if (ret) *ret = o; diff --git a/src/journal/journald-rate-limit.c b/src/journal/journald-rate-limit.c index fce799a6ce..f48639cf58 100644 --- a/src/journal/journald-rate-limit.c +++ b/src/journal/journald-rate-limit.c @@ -190,7 +190,7 @@ static unsigned burst_modulate(unsigned burst, uint64_t available) { if (k <= 20) return burst; - burst = (burst * (k-20)) / 4; + burst = (burst * (k-16)) / 4; /* * Example: @@ -261,7 +261,7 @@ int journal_rate_limit_test(JournalRateLimit *r, const char *id, int priority, u return 1 + s; } - if (p->num <= burst) { + if (p->num < burst) { p->num++; return 1; } diff --git a/src/journal/journald-stream.c b/src/journal/journald-stream.c index 4ad16ee41c..bc092f3c12 100644 --- a/src/journal/journald-stream.c +++ b/src/journal/journald-stream.c @@ -393,6 +393,9 @@ static int stdout_stream_scan(StdoutStream *s, bool force_flush) { p = s->buffer; remaining = s->length; + + /* XXX: This function does nothing if (s->length == 0) */ + for (;;) { char *end; size_t skip; diff --git a/src/kernel-install/kernel-install b/src/kernel-install/kernel-install index c66bcfc092..0c0ee718ac 100644 --- a/src/kernel-install/kernel-install +++ b/src/kernel-install/kernel-install @@ -19,6 +19,8 @@ # You should have received a copy of the GNU Lesser General Public License # along with systemd; If not, see <http://www.gnu.org/licenses/>. +SKIP_REMAINING=77 + usage() { echo "Usage:" @@ -123,7 +125,11 @@ case $COMMAND in for f in "${PLUGINS[@]}"; do if [[ -x $f ]]; then "$f" add "$KERNEL_VERSION" "$BOOT_DIR_ABS" "$KERNEL_IMAGE" - ((ret+=$?)) + x=$? + if [[ $x == $SKIP_REMAINING ]]; then + return 0 + fi + ((ret+=$x)) fi done ;; @@ -132,7 +138,11 @@ case $COMMAND in for f in "${PLUGINS[@]}"; do if [[ -x $f ]]; then "$f" remove "$KERNEL_VERSION" "$BOOT_DIR_ABS" - ((ret+=$?)) + x=$? + if [[ $x == $SKIP_REMAINING ]]; then + return 0 + fi + ((ret+=$x)) fi done diff --git a/src/libsystemd/sd-bus/busctl.c b/src/libsystemd/sd-bus/busctl.c index eb042e9c81..2c3f591053 100644 --- a/src/libsystemd/sd-bus/busctl.c +++ b/src/libsystemd/sd-bus/busctl.c @@ -2003,8 +2003,7 @@ int main(int argc, char *argv[]) { goto finish; } - if (streq_ptr(argv[optind], "monitor") || - streq_ptr(argv[optind], "capture")) { + if (STRPTR_IN_SET(argv[optind], "monitor", "capture")) { r = sd_bus_set_monitor(bus, true); if (r < 0) { diff --git a/src/libsystemd/sd-bus/test-bus-creds.c b/src/libsystemd/sd-bus/test-bus-creds.c index 82237af115..6fdcfa4128 100644 --- a/src/libsystemd/sd-bus/test-bus-creds.c +++ b/src/libsystemd/sd-bus/test-bus-creds.c @@ -27,12 +27,17 @@ int main(int argc, char *argv[]) { _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL; int r; + log_set_max_level(LOG_DEBUG); + log_parse_environment(); + log_open(); + if (cg_all_unified() == -ENOMEDIUM) { - puts("Skipping test: /sys/fs/cgroup/ not available"); + log_info("Skipping test: /sys/fs/cgroup/ not available"); return EXIT_TEST_SKIP; } r = sd_bus_creds_new_from_pid(&creds, 0, _SD_BUS_CREDS_ALL); + log_full_errno(r < 0 ? LOG_ERR : LOG_DEBUG, r, "sd_bus_creds_new_from_pid: %m"); assert_se(r >= 0); bus_creds_dump(creds, NULL, true); diff --git a/src/login/logind-session.c b/src/login/logind-session.c index b6da237397..ba1bcc2630 100644 --- a/src/login/logind-session.c +++ b/src/login/logind-session.c @@ -611,7 +611,7 @@ static int session_stop_scope(Session *s, bool force) { return 0; /* Let's always abandon the scope first. This tells systemd that we are not interested anymore, and everything - * that is left in in the scope is "left-over". Informing systemd about this has the benefit that it will log + * that is left in the scope is "left-over". Informing systemd about this has the benefit that it will log * when killing any processes left after this point. */ r = manager_abandon_scope(s->manager, s->scope, &error); if (r < 0) diff --git a/src/machine/machinectl.c b/src/machine/machinectl.c index 74e1a349bc..e9de31e184 100644 --- a/src/machine/machinectl.c +++ b/src/machine/machinectl.c @@ -1326,10 +1326,12 @@ static int process_forward(sd_event *event, PTYForward **forward, int master, PT assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGWINCH, SIGTERM, SIGINT, -1) >= 0); - if (streq(name, ".host")) - log_info("Connected to the local host. Press ^] three times within 1s to exit session."); - else - log_info("Connected to machine %s. Press ^] three times within 1s to exit session.", name); + if (!arg_quiet) { + if (streq(name, ".host")) + log_info("Connected to the local host. Press ^] three times within 1s to exit session."); + else + log_info("Connected to machine %s. Press ^] three times within 1s to exit session.", name); + } sd_event_add_signal(event, NULL, SIGINT, NULL, NULL); sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL); @@ -1353,17 +1355,54 @@ static int process_forward(sd_event *event, PTYForward **forward, int master, PT if (last_char != '\n') fputc('\n', stdout); - if (machine_died) - log_info("Machine %s terminated.", name); - else if (streq(name, ".host")) - log_info("Connection to the local host terminated."); - else - log_info("Connection to machine %s terminated.", name); + if (!arg_quiet) { + if (machine_died) + log_info("Machine %s terminated.", name); + else if (streq(name, ".host")) + log_info("Connection to the local host terminated."); + else + log_info("Connection to machine %s terminated.", name); + } sd_event_get_exit_code(event, &ret); return ret; } +static int parse_machine_uid(const char *spec, const char **machine, char **uid) { + /* + * Whatever is specified in the spec takes priority over global arguments. + */ + char *_uid = NULL; + const char *_machine = NULL; + + if (spec) { + const char *at; + + at = strchr(spec, '@'); + if (at) { + if (at == spec) + /* Do the same as ssh and refuse "@host". */ + return -EINVAL; + + _machine = at + 1; + _uid = strndup(spec, at - spec); + if (!_uid) + return -ENOMEM; + } else + _machine = spec; + }; + + if (arg_uid && !_uid) { + _uid = strdup(arg_uid); + if (!_uid) + return -ENOMEM; + } + + *uid = _uid; + *machine = isempty(_machine) ? ".host" : _machine; + return 0; +} + static int login_machine(int argc, char *argv[], void *userdata) { _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -1439,7 +1478,8 @@ static int shell_machine(int argc, char *argv[], void *userdata) { _cleanup_(sd_event_unrefp) sd_event *event = NULL; int master = -1, r; sd_bus *bus = userdata; - const char *pty, *match, *machine, *path, *uid = NULL; + const char *pty, *match, *machine, *path; + _cleanup_free_ char *uid = NULL; assert(bus); @@ -1470,22 +1510,9 @@ static int shell_machine(int argc, char *argv[], void *userdata) { if (r < 0) return log_error_errno(r, "Failed to attach bus to event loop: %m"); - machine = argc < 2 || isempty(argv[1]) ? NULL : argv[1]; - - if (arg_uid) - uid = arg_uid; - else if (machine) { - const char *at; - - at = strchr(machine, '@'); - if (at) { - uid = strndupa(machine, at - machine); - machine = at + 1; - } - } - - if (isempty(machine)) - machine = ".host"; + r = parse_machine_uid(argc >= 2 ? argv[1] : NULL, &machine, &uid); + if (r < 0) + return log_error_errno(r, "Failed to parse machine specification: %m"); match = strjoina("type='signal'," "sender='org.freedesktop.machine1'," diff --git a/src/network/networkctl.c b/src/network/networkctl.c index d2df9b7560..6f7f41bf7d 100644 --- a/src/network/networkctl.c +++ b/src/network/networkctl.c @@ -122,7 +122,7 @@ static void setup_state_to_color(const char *state, const char **on, const char } else if (streq_ptr(state, "configuring")) { *on = ansi_highlight_yellow(); *off = ansi_normal(); - } else if (streq_ptr(state, "failed") || streq_ptr(state, "linger")) { + } else if (STRPTR_IN_SET(state, "failed", "linger")) { *on = ansi_highlight_red(); *off = ansi_normal(); } else diff --git a/src/network/networkd-dhcp4.c b/src/network/networkd-dhcp4.c index 12fb8e3fce..76d3d132ea 100644 --- a/src/network/networkd-dhcp4.c +++ b/src/network/networkd-dhcp4.c @@ -95,6 +95,7 @@ static int link_set_dhcp_routes(Link *link) { route_gw->scope = RT_SCOPE_LINK; route_gw->protocol = RTPROT_DHCP; route_gw->priority = link->network->dhcp_route_metric; + route_gw->table = link->network->dhcp_route_table; r = route_configure(route_gw, link, dhcp4_route_handler); if (r < 0) @@ -106,6 +107,7 @@ static int link_set_dhcp_routes(Link *link) { route->gw.in = gateway; route->prefsrc.in = address; route->priority = link->network->dhcp_route_metric; + route->table = link->network->dhcp_route_table; r = route_configure(route, link, dhcp4_route_handler); if (r < 0) { @@ -136,6 +138,7 @@ static int link_set_dhcp_routes(Link *link) { assert_se(sd_dhcp_route_get_destination(static_routes[i], &route->dst.in) >= 0); assert_se(sd_dhcp_route_get_destination_prefix_length(static_routes[i], &route->dst_prefixlen) >= 0); route->priority = link->network->dhcp_route_metric; + route->table = link->network->dhcp_route_table; r = route_configure(route, link, dhcp4_route_handler); if (r < 0) diff --git a/src/network/networkd-fdb.c b/src/network/networkd-fdb.c index be8aebee2d..ed5a47589e 100644 --- a/src/network/networkd-fdb.c +++ b/src/network/networkd-fdb.c @@ -107,20 +107,28 @@ int fdb_entry_configure(Link *link, FdbEntry *fdb_entry) { _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL; sd_netlink *rtnl; int r; + uint8_t flags; + Bridge *bridge; assert(link); + assert(link->network); assert(link->manager); assert(fdb_entry); rtnl = link->manager->rtnl; + bridge = BRIDGE(link->network->bridge); /* create new RTM message */ r = sd_rtnl_message_new_neigh(rtnl, &req, RTM_NEWNEIGH, link->ifindex, PF_BRIDGE); if (r < 0) return rtnl_log_create_error(r); - /* only NTF_SELF flag supported. */ - r = sd_rtnl_message_neigh_set_flags(req, NTF_SELF); + if (bridge) + flags = NTF_MASTER; + else + flags = NTF_SELF; + + r = sd_rtnl_message_neigh_set_flags(req, flags); if (r < 0) return rtnl_log_create_error(r); diff --git a/src/network/networkd-link.c b/src/network/networkd-link.c index 1687d9bf31..d9e060b6cf 100644 --- a/src/network/networkd-link.c +++ b/src/network/networkd-link.c @@ -942,6 +942,19 @@ static int link_push_ntp_to_dhcp_server(Link *link, sd_dhcp_server *s) { return sd_dhcp_server_set_ntp(s, addresses, n_addresses); } +static int link_set_bridge_fdb(Link *link) { + FdbEntry *fdb_entry; + int r; + + LIST_FOREACH(static_fdb_entries, fdb_entry, link->network->static_fdb_entries) { + r = fdb_entry_configure(link, fdb_entry); + if (r < 0) + return log_link_error_errno(link, r, "Failed to add MAC entry to static MAC table: %m"); + } + + return 0; +} + static int link_enter_set_addresses(Link *link) { Address *ad; int r; @@ -950,6 +963,10 @@ static int link_enter_set_addresses(Link *link) { assert(link->network); assert(link->state != _LINK_STATE_INVALID); + r = link_set_bridge_fdb(link); + if (r < 0) + return r; + link_set_state(link, LINK_STATE_SETTING_ADDRESSES); LIST_FOREACH(addresses, ad, link->network->static_addresses) { @@ -1119,21 +1136,6 @@ static int link_set_bridge_vlan(Link *link) { return r; } -static int link_set_bridge_fdb(Link *link) { - FdbEntry *fdb_entry; - int r = 0; - - LIST_FOREACH(static_fdb_entries, fdb_entry, link->network->static_fdb_entries) { - r = fdb_entry_configure(link, fdb_entry); - if (r < 0) { - log_link_error_errno(link, r, "Failed to add MAC entry to static MAC table: %m"); - break; - } - } - - return r; -} - static int link_set_proxy_arp(Link *link) { const char *p = NULL; int r; @@ -2477,10 +2479,6 @@ static int link_configure(Link *link) { return r; } - r = link_set_bridge_fdb(link); - if (r < 0) - return r; - r = link_set_proxy_arp(link); if (r < 0) return r; @@ -2995,7 +2993,8 @@ static int link_carrier_lost(Link *link) { if (r < 0) return r; - if (link->state != LINK_STATE_UNMANAGED) { + if (!IN_SET(link->state, LINK_STATE_UNMANAGED, LINK_STATE_PENDING)) { + log_link_debug(link, "State is %s, dropping config", link_state_to_string(link->state)); r = link_drop_foreign_config(link); if (r < 0) return r; diff --git a/src/network/networkd-ndisc.c b/src/network/networkd-ndisc.c index d9c18b32a5..c2b7970623 100644 --- a/src/network/networkd-ndisc.c +++ b/src/network/networkd-ndisc.c @@ -94,7 +94,7 @@ static void ndisc_router_process_default(Link *link, sd_ndisc_router *rt) { } route->family = AF_INET6; - route->table = RT_TABLE_MAIN; + route->table = link->network->ipv6_accept_ra_route_table; route->protocol = RTPROT_RA; route->pref = preference; route->gw.in6 = gateway; @@ -214,7 +214,7 @@ static void ndisc_router_process_onlink_prefix(Link *link, sd_ndisc_router *rt) } route->family = AF_INET6; - route->table = RT_TABLE_MAIN; + route->table = link->network->ipv6_accept_ra_route_table; route->protocol = RTPROT_RA; route->flags = RTM_F_PREFIX; route->dst_prefixlen = prefixlen; @@ -285,7 +285,7 @@ static void ndisc_router_process_route(Link *link, sd_ndisc_router *rt) { } route->family = AF_INET6; - route->table = RT_TABLE_MAIN; + route->table = link->network->ipv6_accept_ra_route_table; route->protocol = RTPROT_RA; route->pref = preference; route->gw.in6 = gateway; diff --git a/src/network/networkd-netdev-bridge.c b/src/network/networkd-netdev-bridge.c index bdbea7d770..002ad94210 100644 --- a/src/network/networkd-netdev-bridge.c +++ b/src/network/networkd-netdev-bridge.c @@ -39,7 +39,7 @@ static int netdev_bridge_set_handler(sd_netlink *rtnl, sd_netlink_message *m, vo return 1; } - log_netdev_debug(netdev, "Bridge parametres set success"); + log_netdev_debug(netdev, "Bridge parameters set success"); return 1; } diff --git a/src/network/networkd-network-gperf.gperf b/src/network/networkd-network-gperf.gperf index b96f0b7210..62779c7c48 100644 --- a/src/network/networkd-network-gperf.gperf +++ b/src/network/networkd-network-gperf.gperf @@ -92,10 +92,12 @@ DHCP.VendorClassIdentifier, config_parse_string, DHCP.DUIDType, config_parse_duid_type, 0, offsetof(Network, duid.type) DHCP.DUIDRawData, config_parse_duid_rawdata, 0, offsetof(Network, duid) DHCP.RouteMetric, config_parse_unsigned, 0, offsetof(Network, dhcp_route_metric) +DHCP.RouteTable, config_parse_dhcp_route_table, 0, offsetof(Network, dhcp_route_table) DHCP.UseTimezone, config_parse_bool, 0, offsetof(Network, dhcp_use_timezone) DHCP.IAID, config_parse_iaid, 0, offsetof(Network, iaid) IPv6AcceptRA.UseDNS, config_parse_bool, 0, offsetof(Network, ipv6_accept_ra_use_dns) IPv6AcceptRA.UseDomains, config_parse_dhcp_use_domains, 0, offsetof(Network, ipv6_accept_ra_use_domains) +IPv6AcceptRA.RouteTable, config_parse_dhcp_route_table, 0, offsetof(Network, ipv6_accept_ra_route_table) DHCPServer.MaxLeaseTimeSec, config_parse_sec, 0, offsetof(Network, dhcp_server_max_lease_time_usec) DHCPServer.DefaultLeaseTimeSec, config_parse_sec, 0, offsetof(Network, dhcp_server_default_lease_time_usec) DHCPServer.EmitDNS, config_parse_bool, 0, offsetof(Network, dhcp_server_emit_dns) diff --git a/src/network/networkd-network.c b/src/network/networkd-network.c index 313abca762..584cb96979 100644 --- a/src/network/networkd-network.c +++ b/src/network/networkd-network.c @@ -111,6 +111,7 @@ static int network_load_one(Manager *manager, const char *filename) { network->dhcp_send_hostname = true; network->dhcp_route_metric = DHCP_ROUTE_METRIC; network->dhcp_client_identifier = DHCP_CLIENT_ID_DUID; + network->dhcp_route_table = RT_TABLE_MAIN; network->dhcp_server_emit_dns = true; network->dhcp_server_emit_ntp = true; @@ -137,6 +138,7 @@ static int network_load_one(Manager *manager, const char *filename) { network->proxy_arp = -1; network->arp = -1; network->ipv6_accept_ra_use_dns = true; + network->ipv6_accept_ra_route_table = RT_TABLE_MAIN; dropin_dirname = strjoina(network->name, ".network.d"); @@ -1033,6 +1035,36 @@ int config_parse_dnssec_negative_trust_anchors( return 0; } +int config_parse_dhcp_route_table(const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + uint32_t rt; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + r = safe_atou32(rvalue, &rt); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, + "Unable to read RouteTable, ignoring assignment: %s", rvalue); + return 0; + } + + *((uint32_t *)data) = rt; + + return 0; +} + DEFINE_CONFIG_PARSE_ENUM(config_parse_dhcp_use_domains, dhcp_use_domains, DHCPUseDomains, "Failed to parse DHCP use domains setting"); static const char* const dhcp_use_domains_table[_DHCP_USE_DOMAINS_MAX] = { diff --git a/src/network/networkd-network.h b/src/network/networkd-network.h index 5460eb4d1c..ef4b499ab9 100644 --- a/src/network/networkd-network.h +++ b/src/network/networkd-network.h @@ -123,6 +123,7 @@ struct Network { bool dhcp_use_routes; bool dhcp_use_timezone; unsigned dhcp_route_metric; + uint32_t dhcp_route_table; /* DHCP Server Support */ bool dhcp_server; @@ -166,6 +167,7 @@ struct Network { bool ipv6_accept_ra_use_dns; DHCPUseDomains ipv6_accept_ra_use_domains; + uint32_t ipv6_accept_ra_route_table; union in_addr_union ipv6_token; IPv6PrivacyExtensions ipv6_privacy_extensions; @@ -228,6 +230,7 @@ int config_parse_dhcp_server_ntp(const char *unit, const char *filename, unsigne int config_parse_dnssec_negative_trust_anchors(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_dhcp_use_domains(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_lldp_mode(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_dhcp_route_table(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); /* Legacy IPv4LL support */ int config_parse_ipv4ll(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index 295b75341f..25d38aa742 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -314,19 +314,21 @@ int mount_all(const char *dest, } MountPoint; static const MountPoint mount_table[] = { - { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true, false }, - { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true, false }, /* Bind mount first ...*/ - { "/proc/sys/net", "/proc/sys/net", NULL, NULL, MS_BIND, true, true, true }, /* (except for this) */ - { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true, false }, /* ... then, make it r/o */ - { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, true }, - { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, false }, - { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false, false }, - { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false }, - { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false }, - { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false, false }, + { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true, false }, + { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true, false }, /* Bind mount first ...*/ + { "/proc/sys/net", "/proc/sys/net", NULL, NULL, MS_BIND, true, true, true }, /* (except for this) */ + { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true, false }, /* ... then, make it r/o */ + { "/proc/sysrq-trigger", "/proc/sysrq-trigger", NULL, NULL, MS_BIND, false, true, false }, /* Bind mount first ...*/ + { NULL, "/proc/sysrq-trigger", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, true, false }, /* ... then, make it r/o */ + { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, true }, + { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, false }, + { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false, false }, + { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false }, + { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false }, + { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false, false }, #ifdef HAVE_SELINUX - { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false, false }, /* Bind mount first */ - { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false, false }, /* Then, make it r/o */ + { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false, false }, /* Bind mount first */ + { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false, false }, /* Then, make it r/o */ #endif }; @@ -356,7 +358,7 @@ int mount_all(const char *dest, continue; r = mkdir_p(where, 0755); - if (r < 0) { + if (r < 0 && r != -EEXIST) { if (mount_table[k].fatal) return log_error_errno(r, "Failed to create directory %s: %m", where); @@ -476,7 +478,7 @@ static int mount_bind(const char *dest, CustomMount *m) { return log_error_errno(errno, "mount(%s) failed: %m", where); if (m->read_only) { - r = bind_remount_recursive(where, true); + r = bind_remount_recursive(where, true, NULL); if (r < 0) return log_error_errno(r, "Read-only bind mount failed: %m"); } @@ -990,7 +992,7 @@ int setup_volatile_state( /* --volatile=state means we simply overmount /var with a tmpfs, and the rest read-only. */ - r = bind_remount_recursive(directory, true); + r = bind_remount_recursive(directory, true, NULL); if (r < 0) return log_error_errno(r, "Failed to remount %s read-only: %m", directory); @@ -1065,7 +1067,7 @@ int setup_volatile( bind_mounted = true; - r = bind_remount_recursive(t, true); + r = bind_remount_recursive(t, true, NULL); if (r < 0) { log_error_errno(r, "Failed to remount %s read-only: %m", t); goto fail; diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 632c12898f..9a8274c8de 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -1045,7 +1045,8 @@ static int parse_argv(int argc, char *argv[]) { parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_UTS", CLONE_NEWUTS); parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_SYSTEM", CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS); - if (arg_clone_ns_flags != (CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS)) { + if (!(arg_clone_ns_flags & CLONE_NEWPID) || + !(arg_clone_ns_flags & CLONE_NEWUTS)) { arg_register = false; if (arg_start_mode != START_PID1) { log_error("--boot cannot be used without namespacing."); @@ -1221,7 +1222,7 @@ static int setup_timezone(const char *dest) { if (r < 0) { log_warning("host's /etc/localtime is not a symlink, not updating container timezone."); /* to handle warning, delete /etc/localtime and replace it - * it /w a symbolic link to a time zone data file. + * with a symbolic link to a time zone data file. * * Example: * ln -s /usr/share/zoneinfo/UTC /etc/localtime @@ -1394,6 +1395,12 @@ static int copy_devnodes(const char *dest) { } else { if (mknod(to, st.st_mode, st.st_rdev) < 0) { + /* + * This is some sort of protection too against + * recursive userns chown on shared /dev/ + */ + if (errno == EEXIST) + log_notice("%s/dev/ should be an empty directory", dest); if (errno != EPERM) return log_error_errno(errno, "mknod(%s) failed: %m", to); @@ -1754,6 +1761,11 @@ static int setup_propagate(const char *root) { if (mount(NULL, q, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL) < 0) return log_error_errno(errno, "Failed to make propagation mount read-only"); + /* machined will MS_MOVE into that directory, and that's only + * supported for non-shared mounts. */ + if (mount(NULL, q, NULL, MS_SLAVE, NULL) < 0) + return log_error_errno(errno, "Failed to make propagation mount slave"); + return 0; } @@ -2989,6 +3001,15 @@ static int outer_child( if (mount(directory, directory, NULL, MS_BIND|MS_REC, NULL) < 0) return log_error_errno(errno, "Failed to make bind mount: %m"); + /* Mark everything as shared so our mounts get propagated down. This is + * required to make new bind mounts available in systemd services + * inside the containter that create a new mount namespace. + * See https://github.com/systemd/systemd/issues/3860 + * Further submounts (such as /dev) done after this will inherit the + * shared propagation mode.*/ + if (mount(NULL, directory, NULL, MS_SHARED|MS_REC, NULL) < 0) + return log_error_errno(errno, "MS_SHARED|MS_REC failed: %m"); + r = recursive_chown(directory, arg_uid_shift, arg_uid_range); if (r < 0) return r; @@ -3018,7 +3039,7 @@ static int outer_child( return r; if (arg_read_only) { - r = bind_remount_recursive(directory, true); + r = bind_remount_recursive(directory, true, NULL); if (r < 0) return log_error_errno(r, "Failed to make tree read-only: %m"); } diff --git a/src/nss-resolve/nss-resolve.c b/src/nss-resolve/nss-resolve.c index 5ce10f1cbd..eea91e3e88 100644 --- a/src/nss-resolve/nss-resolve.c +++ b/src/nss-resolve/nss-resolve.c @@ -279,9 +279,12 @@ fallback: } fail: + /* When we arrive here, resolved runs and has answered (fallback to + * "dns" is handled earlier). So we have a definitive "no" answer and + * should not fall back to subsequent NSS modules via "UNAVAIL". */ *errnop = -r; *h_errnop = NO_RECOVERY; - return NSS_STATUS_UNAVAIL; + return NSS_STATUS_NOTFOUND; } enum nss_status _nss_resolve_gethostbyname3_r( @@ -476,7 +479,7 @@ fallback: fail: *errnop = -r; *h_errnop = NO_RECOVERY; - return NSS_STATUS_UNAVAIL; + return NSS_STATUS_NOTFOUND; } enum nss_status _nss_resolve_gethostbyaddr2_r( @@ -558,9 +561,7 @@ enum nss_status _nss_resolve_gethostbyaddr2_r( goto fallback; - *errnop = -r; - *h_errnop = NO_RECOVERY; - return NSS_STATUS_UNAVAIL; + goto fail; } r = sd_bus_message_enter_container(reply, 'a', "(is)"); @@ -668,7 +669,7 @@ fallback: fail: *errnop = -r; *h_errnop = NO_RECOVERY; - return NSS_STATUS_UNAVAIL; + return NSS_STATUS_NOTFOUND; } NSS_GETHOSTBYNAME_FALLBACKS(resolve); diff --git a/src/resolve/resolved-dns-scope.c b/src/resolve/resolved-dns-scope.c index ed0c6aa105..03811ac8e7 100644 --- a/src/resolve/resolved-dns-scope.c +++ b/src/resolve/resolved-dns-scope.c @@ -407,6 +407,7 @@ int dns_scope_socket_tcp(DnsScope *s, int family, const union in_addr_union *add DnsScopeMatch dns_scope_good_domain(DnsScope *s, int ifindex, uint64_t flags, const char *domain) { DnsSearchDomain *d; + DnsServer *dns_server; assert(s); assert(domain); @@ -447,6 +448,13 @@ DnsScopeMatch dns_scope_good_domain(DnsScope *s, int ifindex, uint64_t flags, co if (dns_name_endswith(domain, d->name) > 0) return DNS_SCOPE_YES; + /* If the DNS server has route-only domains, don't send other requests + * to it. This would be a privacy violation, will most probably fail + * anyway, and adds unnecessary load. */ + dns_server = dns_scope_get_dns_server(s); + if (dns_server && dns_server_limited_domains(dns_server)) + return DNS_SCOPE_NO; + switch (s->protocol) { case DNS_PROTOCOL_DNS: diff --git a/src/resolve/resolved-dns-server.c b/src/resolve/resolved-dns-server.c index 9b7b471600..97cc8c0e09 100644 --- a/src/resolve/resolved-dns-server.c +++ b/src/resolve/resolved-dns-server.c @@ -576,6 +576,27 @@ void dns_server_warn_downgrade(DnsServer *server) { server->warned_downgrade = true; } +bool dns_server_limited_domains(DnsServer *server) +{ + DnsSearchDomain *domain; + bool domain_restricted = false; + + /* Check if the server has route-only domains without ~., i. e. whether + * it should only be used for particular domains */ + if (!server->link) + return false; + + LIST_FOREACH(domains, domain, server->link->search_domains) + if (domain->route_only) { + domain_restricted = true; + /* ~. means "any domain", thus it is a global server */ + if (streq(DNS_SEARCH_DOMAIN_NAME(domain), ".")) + return false; + } + + return domain_restricted; +} + static void dns_server_hash_func(const void *p, struct siphash *state) { const DnsServer *s = p; diff --git a/src/resolve/resolved-dns-server.h b/src/resolve/resolved-dns-server.h index c1732faffd..83e288a202 100644 --- a/src/resolve/resolved-dns-server.h +++ b/src/resolve/resolved-dns-server.h @@ -128,6 +128,8 @@ bool dns_server_dnssec_supported(DnsServer *server); void dns_server_warn_downgrade(DnsServer *server); +bool dns_server_limited_domains(DnsServer *server); + DnsServer *dns_server_find(DnsServer *first, int family, const union in_addr_union *in_addr, int ifindex); void dns_server_unlink_all(DnsServer *first); diff --git a/src/resolve/resolved-resolv-conf.c b/src/resolve/resolved-resolv-conf.c index 31b25ca50f..801014caf5 100644 --- a/src/resolve/resolved-resolv-conf.c +++ b/src/resolve/resolved-resolv-conf.c @@ -154,6 +154,16 @@ static void write_resolv_conf_server(DnsServer *s, FILE *f, unsigned *count) { return; } + /* Check if the DNS server is limited to particular domains; + * resolv.conf does not have a syntax to express that, so it must not + * appear as a global name server to avoid routing unrelated domains to + * it (which is a privacy violation, will most probably fail anyway, + * and adds unnecessary load) */ + if (dns_server_limited_domains(s)) { + log_debug("DNS server %s has route-only domains, not using as global name server", dns_server_string(s)); + return; + } + if (*count == MAXNS) fputs("# Too many DNS servers configured, the following entries may be ignored.\n", f); (*count)++; diff --git a/src/run/run.c b/src/run/run.c index 2dd229868c..81b53fdfab 100644 --- a/src/run/run.c +++ b/src/run/run.c @@ -1168,17 +1168,21 @@ static int start_transient_scope( uid_t uid; gid_t gid; - r = get_user_creds(&arg_exec_user, &uid, &gid, &home, &shell); + r = get_user_creds_clean(&arg_exec_user, &uid, &gid, &home, &shell); if (r < 0) return log_error_errno(r, "Failed to resolve user %s: %m", arg_exec_user); - r = strv_extendf(&user_env, "HOME=%s", home); - if (r < 0) - return log_oom(); + if (home) { + r = strv_extendf(&user_env, "HOME=%s", home); + if (r < 0) + return log_oom(); + } - r = strv_extendf(&user_env, "SHELL=%s", shell); - if (r < 0) - return log_oom(); + if (shell) { + r = strv_extendf(&user_env, "SHELL=%s", shell); + if (r < 0) + return log_oom(); + } r = strv_extendf(&user_env, "USER=%s", arg_exec_user); if (r < 0) diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index feb4a06737..c6bd2f145c 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -204,7 +204,7 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "RemainAfterExit", "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", "NoNewPrivileges", "SyslogLevelPrefix", "Delegate", "RemainAfterElapse", "MemoryDenyWriteExecute", - "RestrictRealtime", "DynamicUser", "RemoveIPC")) { + "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables", "ProtectControlGroups")) { r = parse_boolean(eq); if (r < 0) diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index 2f42381fc1..8116c7671f 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -39,6 +39,10 @@ const char* seccomp_arch_to_string(uint32_t c) { return "x32"; if (c == SCMP_ARCH_ARM) return "arm"; + if (c == SCMP_ARCH_S390) + return "s390"; + if (c == SCMP_ARCH_S390X) + return "s390x"; return NULL; } @@ -59,6 +63,10 @@ int seccomp_arch_from_string(const char *n, uint32_t *ret) { *ret = SCMP_ARCH_X32; else if (streq(n, "arm")) *ret = SCMP_ARCH_ARM; + else if (streq(n, "s390")) + *ret = SCMP_ARCH_S390; + else if (streq(n, "s390x")) + *ret = SCMP_ARCH_S390X; else return -EINVAL; @@ -85,6 +93,20 @@ int seccomp_add_secondary_archs(scmp_filter_ctx *c) { if (r < 0 && r != -EEXIST) return r; +#elif defined(__s390__) || defined(__s390x__) + int r; + + /* Add in all possible secondary archs we are aware of that + * this kernel might support. */ + + r = seccomp_arch_add(c, SCMP_ARCH_S390); + if (r < 0 && r != -EEXIST) + return r; + + r = seccomp_arch_add(c, SCMP_ARCH_S390X); + if (r < 0 && r != -EEXIST) + return r; + #endif return 0; diff --git a/src/sysctl/sysctl.c b/src/sysctl/sysctl.c index ce7c26e7d3..fbc1e0eb1a 100644 --- a/src/sysctl/sysctl.c +++ b/src/sysctl/sysctl.c @@ -41,12 +41,12 @@ static char **arg_prefixes = NULL; static const char conf_file_dirs[] = CONF_PATHS_NULSTR("sysctl.d"); -static int apply_all(Hashmap *sysctl_options) { +static int apply_all(OrderedHashmap *sysctl_options) { char *property, *value; Iterator i; int r = 0; - HASHMAP_FOREACH_KEY(value, property, sysctl_options, i) { + ORDERED_HASHMAP_FOREACH_KEY(value, property, sysctl_options, i) { int k; k = sysctl_write(property, value); @@ -62,7 +62,7 @@ static int apply_all(Hashmap *sysctl_options) { return r; } -static int parse_file(Hashmap *sysctl_options, const char *path, bool ignore_enoent) { +static int parse_file(OrderedHashmap *sysctl_options, const char *path, bool ignore_enoent) { _cleanup_fclose_ FILE *f = NULL; int r; @@ -125,13 +125,13 @@ static int parse_file(Hashmap *sysctl_options, const char *path, bool ignore_eno } found: - existing = hashmap_get2(sysctl_options, p, &v); + existing = ordered_hashmap_get2(sysctl_options, p, &v); if (existing) { if (streq(value, existing)) continue; log_debug("Overwriting earlier assignment of %s in file '%s'.", p, path); - free(hashmap_remove(sysctl_options, p)); + free(ordered_hashmap_remove(sysctl_options, p)); free(v); } @@ -145,7 +145,7 @@ found: return log_oom(); } - k = hashmap_put(sysctl_options, property, new_value); + k = ordered_hashmap_put(sysctl_options, property, new_value); if (k < 0) { log_error_errno(k, "Failed to add sysctl variable %s to hashmap: %m", property); free(property); @@ -230,7 +230,7 @@ static int parse_argv(int argc, char *argv[]) { int main(int argc, char *argv[]) { int r = 0, k; - Hashmap *sysctl_options; + OrderedHashmap *sysctl_options; r = parse_argv(argc, argv); if (r <= 0) @@ -242,7 +242,7 @@ int main(int argc, char *argv[]) { umask(0022); - sysctl_options = hashmap_new(&string_hash_ops); + sysctl_options = ordered_hashmap_new(&string_hash_ops); if (!sysctl_options) { r = log_oom(); goto finish; @@ -280,7 +280,7 @@ int main(int argc, char *argv[]) { r = k; finish: - hashmap_free_free_free(sysctl_options); + ordered_hashmap_free_free_free(sysctl_options); strv_free(arg_prefixes); return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c index 5912441168..bb6002e8ef 100644 --- a/src/systemctl/systemctl.c +++ b/src/systemctl/systemctl.c @@ -118,6 +118,7 @@ static enum dependency { } arg_dependency = DEPENDENCY_FORWARD; static const char *arg_job_mode = "replace"; static UnitFileScope arg_scope = UNIT_FILE_SYSTEM; +static bool arg_wait = false; static bool arg_no_block = false; static bool arg_no_legend = false; static bool arg_no_pager = false; @@ -2679,13 +2680,86 @@ static const char *method_to_verb(const char *method) { return "n/a"; } +typedef struct { + sd_bus_slot *match; + sd_event *event; + Set *unit_paths; + bool any_failed; +} WaitContext; + +static void wait_context_free(WaitContext *c) { + c->match = sd_bus_slot_unref(c->match); + c->event = sd_event_unref(c->event); + c->unit_paths = set_free(c->unit_paths); +} + +static int on_properties_changed(sd_bus_message *m, void *userdata, sd_bus_error *error) { + WaitContext *c = userdata; + const char *path; + int r; + + path = sd_bus_message_get_path(m); + if (!set_contains(c->unit_paths, path)) + return 0; + + /* Check if ActiveState changed to inactive/failed */ + /* (s interface, a{sv} changed_properties, as invalidated_properties) */ + r = sd_bus_message_skip(m, "s"); + if (r < 0) + return bus_log_parse_error(r); + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "{sv}"); + if (r < 0) + return bus_log_parse_error(r); + + while ((r = sd_bus_message_enter_container(m, SD_BUS_TYPE_DICT_ENTRY, "sv")) > 0) { + const char *s; + bool is_failed; + + r = sd_bus_message_read(m, "s", &s); + if (r < 0) + return bus_log_parse_error(r); + if (streq(s, "ActiveState")) { + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, "s"); + if (r < 0) + return bus_log_parse_error(r); + r = sd_bus_message_read(m, "s", &s); + if (r < 0) + return bus_log_parse_error(r); + is_failed = streq(s, "failed"); + if (streq(s, "inactive") || is_failed) { + log_debug("%s became %s, dropping from --wait tracking", path, s); + set_remove(c->unit_paths, path); + c->any_failed |= is_failed; + } else + log_debug("ActiveState on %s changed to %s", path, s); + break; /* no need to dissect the rest of the message */ + } else { + /* other property */ + r = sd_bus_message_skip(m, "v"); + if (r < 0) + return bus_log_parse_error(r); + } + r = sd_bus_message_exit_container(m); + if (r < 0) + return bus_log_parse_error(r); + } + if (r < 0) + return bus_log_parse_error(r); + + if (set_isempty(c->unit_paths)) + sd_event_exit(c->event, EXIT_SUCCESS); + + return 0; +} + static int start_unit_one( sd_bus *bus, const char *method, const char *name, const char *mode, sd_bus_error *error, - BusWaitForJobs *w) { + BusWaitForJobs *w, + WaitContext *wait_context) { _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; const char *path; @@ -2696,6 +2770,40 @@ static int start_unit_one( assert(mode); assert(error); + if (wait_context) { + _cleanup_free_ char *unit_path = NULL; + const char* mt; + + log_debug("Watching for property changes of %s", name); + r = sd_bus_call_method( + bus, + "org.freedesktop.systemd1", + "/org/freedesktop/systemd1", + "org.freedesktop.systemd1.Manager", + "RefUnit", + error, + NULL, + "s", name); + if (r < 0) + return log_error_errno(r, "Failed to RefUnit %s: %s", name, bus_error_message(error, r)); + + unit_path = unit_dbus_path_from_name(name); + if (!unit_path) + return log_oom(); + + r = set_put_strdup(wait_context->unit_paths, unit_path); + if (r < 0) + return log_error_errno(r, "Failed to add unit path %s to set: %m", unit_path); + + mt = strjoina("type='signal'," + "interface='org.freedesktop.DBus.Properties'," + "path='", unit_path, "'," + "member='PropertiesChanged'"); + r = sd_bus_add_match(bus, &wait_context->match, mt, on_properties_changed, wait_context); + if (r < 0) + return log_error_errno(r, "Failed to add match for PropertiesChanged signal: %m"); + } + log_debug("Calling manager for %s on %s, %s", method, name, mode); r = sd_bus_call_method( @@ -2841,10 +2949,18 @@ static int start_unit(int argc, char *argv[], void *userdata) { const char *method, *mode, *one_name, *suffix = NULL; _cleanup_strv_free_ char **names = NULL; sd_bus *bus; + _cleanup_(wait_context_free) WaitContext wait_context = {}; char **name; int r = 0; - r = acquire_bus(BUS_MANAGER, &bus); + if (arg_wait && !strstr(argv[0], "start")) { + log_error("--wait may only be used with a command that starts units."); + return -EINVAL; + } + + /* we cannot do sender tracking on the private bus, so we need the full + * one for RefUnit to implement --wait */ + r = acquire_bus(arg_wait ? BUS_FULL : BUS_MANAGER, &bus); if (r < 0) return r; @@ -2888,11 +3004,36 @@ static int start_unit(int argc, char *argv[], void *userdata) { return log_error_errno(r, "Could not watch jobs: %m"); } + if (arg_wait) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + + wait_context.unit_paths = set_new(&string_hash_ops); + if (!wait_context.unit_paths) + return log_oom(); + + r = sd_bus_call_method( + bus, + "org.freedesktop.systemd1", + "/org/freedesktop/systemd1", + "org.freedesktop.systemd1.Manager", + "Subscribe", + &error, + NULL, NULL); + if (r < 0) + return log_error_errno(r, "Failed to enable subscription: %s", bus_error_message(&error, r)); + r = sd_event_default(&wait_context.event); + if (r < 0) + return log_error_errno(r, "Failed to allocate event loop: %m"); + r = sd_bus_attach_event(bus, wait_context.event, 0); + if (r < 0) + return log_error_errno(r, "Failed to attach bus to event loop: %m"); + } + STRV_FOREACH(name, names) { _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; int q; - q = start_unit_one(bus, method, *name, mode, &error, w); + q = start_unit_one(bus, method, *name, mode, &error, w, arg_wait ? &wait_context : NULL); if (r >= 0 && q < 0) r = translate_bus_error_to_exit_status(q, &error); } @@ -2924,6 +3065,15 @@ static int start_unit(int argc, char *argv[], void *userdata) { check_triggering_units(bus, *name); } + if (r >= 0 && arg_wait) { + int q; + q = sd_event_loop(wait_context.event); + if (q < 0) + return log_error_errno(q, "Failed to run event loop: %m"); + if (wait_context.any_failed) + r = EXIT_FAILURE; + } + return r; } @@ -3121,7 +3271,7 @@ static int logind_check_inhibitors(enum action a) { if (sd_session_get_class(*s, &class) < 0 || !streq(class, "user")) continue; - if (sd_session_get_type(*s, &type) < 0 || (!streq(type, "x11") && !streq(type, "tty"))) + if (sd_session_get_type(*s, &type) < 0 || !STR_IN_SET(type, "x11", "tty")) continue; sd_session_get_tty(*s, &tty); @@ -3622,7 +3772,7 @@ static void print_status_info( if (streq_ptr(i->active_state, "failed")) { active_on = ansi_highlight_red(); active_off = ansi_normal(); - } else if (streq_ptr(i->active_state, "active") || streq_ptr(i->active_state, "reloading")) { + } else if (STRPTR_IN_SET(i->active_state, "active", "reloading")) { active_on = ansi_highlight_green(); active_off = ansi_normal(); } else @@ -3703,12 +3853,10 @@ static void print_status_info( if (!isempty(i->result) && !streq(i->result, "success")) printf(" (Result: %s)", i->result); - timestamp = (streq_ptr(i->active_state, "active") || - streq_ptr(i->active_state, "reloading")) ? i->active_enter_timestamp : - (streq_ptr(i->active_state, "inactive") || - streq_ptr(i->active_state, "failed")) ? i->inactive_enter_timestamp : - streq_ptr(i->active_state, "activating") ? i->inactive_exit_timestamp : - i->active_exit_timestamp; + timestamp = STRPTR_IN_SET(i->active_state, "active", "reloading") ? i->active_enter_timestamp : + STRPTR_IN_SET(i->active_state, "inactive", "failed") ? i->inactive_enter_timestamp : + STRPTR_IN_SET(i->active_state, "activating") ? i->inactive_exit_timestamp : + i->active_exit_timestamp; s1 = format_timestamp_relative(since1, sizeof(since1), timestamp); s2 = format_timestamp(since2, sizeof(since2), timestamp); @@ -4583,7 +4731,8 @@ static int print_property(const char *name, sd_bus_message *m, const char *conte return 0; - } else if (contents[1] == SD_BUS_TYPE_STRUCT_BEGIN && (streq(name, "IODeviceWeight") || streq(name, "BlockIODeviceWeight"))) { + } else if (contents[1] == SD_BUS_TYPE_STRUCT_BEGIN && + STR_IN_SET(name, "IODeviceWeight", "BlockIODeviceWeight")) { const char *path; uint64_t weight; @@ -4602,8 +4751,9 @@ static int print_property(const char *name, sd_bus_message *m, const char *conte return 0; - } else if (contents[1] == SD_BUS_TYPE_STRUCT_BEGIN && (cgroup_io_limit_type_from_string(name) >= 0 || - streq(name, "BlockIOReadBandwidth") || streq(name, "BlockIOWriteBandwidth"))) { + } else if (contents[1] == SD_BUS_TYPE_STRUCT_BEGIN && + (cgroup_io_limit_type_from_string(name) >= 0 || + STR_IN_SET(name, "BlockIOReadBandwidth", "BlockIOWriteBandwidth"))) { const char *path; uint64_t bandwidth; @@ -4695,12 +4845,14 @@ static int show_one( return log_error_errno(r, "Failed to map properties: %s", bus_error_message(&error, r)); if (streq_ptr(info.load_state, "not-found") && streq_ptr(info.active_state, "inactive")) { - log_error("Unit %s could not be found.", unit); + log_full(streq(verb, "status") ? LOG_ERR : LOG_DEBUG, + "Unit %s could not be found.", unit); if (streq(verb, "status")) return EXIT_PROGRAM_OR_SERVICES_STATUS_UNKNOWN; - return -ENOENT; + if (!streq(verb, "show")) + return -ENOENT; } r = sd_bus_message_rewind(reply, true); @@ -4765,10 +4917,11 @@ static int show_one( r = 0; if (show_properties) { char **pp; + int not_found_level = streq(verb, "show") ? LOG_DEBUG : LOG_WARNING; STRV_FOREACH(pp, arg_properties) if (!set_contains(found_properties, *pp)) { - log_warning("Property %s does not exist.", *pp); + log_full(not_found_level, "Property %s does not exist.", *pp); r = -ENXIO; } @@ -6584,6 +6737,7 @@ static void systemctl_help(void) { " -s --signal=SIGNAL Which signal to send\n" " --now Start or stop unit in addition to enabling or disabling it\n" " -q --quiet Suppress output\n" + " --wait For (re)start, wait until service stopped again\n" " --no-block Do not wait until operation finished\n" " --no-wall Don't send wall message before halt/power-off/reboot\n" " --no-reload Don't reload daemon after en-/dis-abling unit files\n" @@ -6854,6 +7008,7 @@ static int systemctl_parse_argv(int argc, char *argv[]) { ARG_FIRMWARE_SETUP, ARG_NOW, ARG_MESSAGE, + ARG_WAIT, }; static const struct option options[] = { @@ -6877,6 +7032,7 @@ static int systemctl_parse_argv(int argc, char *argv[]) { { "user", no_argument, NULL, ARG_USER }, { "system", no_argument, NULL, ARG_SYSTEM }, { "global", no_argument, NULL, ARG_GLOBAL }, + { "wait", no_argument, NULL, ARG_WAIT }, { "no-block", no_argument, NULL, ARG_NO_BLOCK }, { "no-legend", no_argument, NULL, ARG_NO_LEGEND }, { "no-pager", no_argument, NULL, ARG_NO_PAGER }, @@ -7057,6 +7213,10 @@ static int systemctl_parse_argv(int argc, char *argv[]) { arg_scope = UNIT_FILE_GLOBAL; break; + case ARG_WAIT: + arg_wait = true; + break; + case ARG_NO_BLOCK: arg_no_block = true; break; @@ -7232,6 +7392,11 @@ static int systemctl_parse_argv(int argc, char *argv[]) { return -EINVAL; } + if (arg_wait && arg_no_block) { + log_error("--wait may not be combined with --no-block."); + return -EINVAL; + } + return 1; } diff --git a/src/systemd/sd-messages.h b/src/systemd/sd-messages.h index 3c44d63021..79246ae060 100644 --- a/src/systemd/sd-messages.h +++ b/src/systemd/sd-messages.h @@ -40,6 +40,7 @@ _SD_BEGIN_DECLARATIONS; #define SD_MESSAGE_JOURNAL_USAGE SD_ID128_MAKE(ec,38,7f,57,7b,84,4b,8f,a9,48,f3,3c,ad,9a,75,e6) #define SD_MESSAGE_COREDUMP SD_ID128_MAKE(fc,2e,22,bc,6e,e6,47,b6,b9,07,29,ab,34,a2,50,b1) +#define SD_MESSAGE_TRUNCATED_CORE SD_ID128_MAKE(5a,ad,d8,e9,54,dc,4b,1a,8c,95,4d,63,fd,9e,11,37) #define SD_MESSAGE_SESSION_START SD_ID128_MAKE(8d,45,62,0c,1a,43,48,db,b1,74,10,da,57,c6,0c,66) #define SD_MESSAGE_SESSION_STOP SD_ID128_MAKE(33,54,93,94,24,b4,45,6d,98,02,ca,83,33,ed,42,4a) diff --git a/src/test/test-execute.c b/src/test/test-execute.c index 25489cefbc..8b4ff22495 100644 --- a/src/test/test-execute.c +++ b/src/test/test-execute.c @@ -133,6 +133,28 @@ static void test_exec_privatedevices(Manager *m) { test(m, "exec-privatedevices-no.service", 0, CLD_EXITED); } +static void test_exec_privatedevices_capabilities(Manager *m) { + if (detect_container() > 0) { + log_notice("testing in container, skipping private device tests"); + return; + } + test(m, "exec-privatedevices-yes-capability-mknod.service", 0, CLD_EXITED); + test(m, "exec-privatedevices-no-capability-mknod.service", 0, CLD_EXITED); +} + +static void test_exec_readonlypaths(Manager *m) { + test(m, "exec-readonlypaths.service", 0, CLD_EXITED); + test(m, "exec-readonlypaths-mount-propagation.service", 0, CLD_EXITED); +} + +static void test_exec_readwritepaths(Manager *m) { + test(m, "exec-readwritepaths-mount-propagation.service", 0, CLD_EXITED); +} + +static void test_exec_inaccessiblepaths(Manager *m) { + test(m, "exec-inaccessiblepaths-mount-propagation.service", 0, CLD_EXITED); +} + static void test_exec_systemcallfilter(Manager *m) { #ifdef HAVE_SECCOMP if (!is_seccomp_available()) @@ -345,6 +367,10 @@ int main(int argc, char *argv[]) { test_exec_ignoresigpipe, test_exec_privatetmp, test_exec_privatedevices, + test_exec_privatedevices_capabilities, + test_exec_readonlypaths, + test_exec_readwritepaths, + test_exec_inaccessiblepaths, test_exec_privatenetwork, test_exec_systemcallfilter, test_exec_systemcallerrornumber, diff --git a/src/test/test-fs-util.c b/src/test/test-fs-util.c index b35a2ea2c8..53a3cdc663 100644 --- a/src/test/test-fs-util.c +++ b/src/test/test-fs-util.c @@ -20,16 +20,109 @@ #include <unistd.h> #include "alloc-util.h" -#include "fileio.h" #include "fd-util.h" +#include "fileio.h" #include "fs-util.h" #include "macro.h" #include "mkdir.h" +#include "path-util.h" #include "rm-rf.h" #include "string-util.h" #include "strv.h" #include "util.h" +static void test_chase_symlinks(void) { + _cleanup_free_ char *result = NULL; + char temp[] = "/tmp/test-chase.XXXXXX"; + const char *top, *p, *q; + int r; + + assert_se(mkdtemp(temp)); + + top = strjoina(temp, "/top"); + assert_se(mkdir(top, 0700) >= 0); + + p = strjoina(top, "/dot"); + assert_se(symlink(".", p) >= 0); + + p = strjoina(top, "/dotdot"); + assert_se(symlink("..", p) >= 0); + + p = strjoina(top, "/dotdota"); + assert_se(symlink("../a", p) >= 0); + + p = strjoina(temp, "/a"); + assert_se(symlink("b", p) >= 0); + + p = strjoina(temp, "/b"); + assert_se(symlink("/usr", p) >= 0); + + p = strjoina(temp, "/start"); + assert_se(symlink("top/dot/dotdota", p) >= 0); + + r = chase_symlinks(p, NULL, &result); + assert_se(r >= 0); + assert_se(path_equal(result, "/usr")); + + result = mfree(result); + r = chase_symlinks(p, temp, &result); + assert_se(r == -ENOENT); + + q = strjoina(temp, "/usr"); + assert_se(mkdir(q, 0700) >= 0); + + r = chase_symlinks(p, temp, &result); + assert_se(r >= 0); + assert_se(path_equal(result, q)); + + p = strjoina(temp, "/slash"); + assert_se(symlink("/", p) >= 0); + + result = mfree(result); + r = chase_symlinks(p, NULL, &result); + assert_se(r >= 0); + assert_se(path_equal(result, "/")); + + result = mfree(result); + r = chase_symlinks(p, temp, &result); + assert_se(r >= 0); + assert_se(path_equal(result, temp)); + + p = strjoina(temp, "/slashslash"); + assert_se(symlink("///usr///", p) >= 0); + + result = mfree(result); + r = chase_symlinks(p, NULL, &result); + assert_se(r >= 0); + assert_se(path_equal(result, "/usr")); + + result = mfree(result); + r = chase_symlinks(p, temp, &result); + assert_se(r >= 0); + assert_se(path_equal(result, q)); + + result = mfree(result); + r = chase_symlinks("/etc/./.././", NULL, &result); + assert_se(r >= 0); + assert_se(path_equal(result, "/")); + + result = mfree(result); + r = chase_symlinks("/etc/./.././", "/etc", &result); + assert_se(r == -EINVAL); + + result = mfree(result); + r = chase_symlinks("/etc/machine-id/foo", NULL, &result); + assert_se(r == -ENOTDIR); + + result = mfree(result); + p = strjoina(temp, "/recursive-symlink"); + assert_se(symlink("recursive-symlink", p) >= 0); + r = chase_symlinks(p, NULL, &result); + assert_se(r == -ELOOP); + + assert_se(rm_rf(temp, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0); +} + static void test_unlink_noerrno(void) { char name[] = "/tmp/test-close_nointr.XXXXXX"; int fd; @@ -144,6 +237,7 @@ int main(int argc, char *argv[]) { test_readlink_and_make_absolute(); test_get_files_in_directory(); test_var_tmp(); + test_chase_symlinks(); return 0; } diff --git a/src/test/test-list.c b/src/test/test-list.c index 160064d06a..0ccd745cc9 100644 --- a/src/test/test-list.c +++ b/src/test/test-list.c @@ -132,6 +132,29 @@ int main(int argc, const char *argv[]) { assert_se(items[1].item_prev == &items[3]); assert_se(items[3].item_prev == NULL); + LIST_INSERT_BEFORE(item, head, &items[3], &items[0]); + assert_se(items[2].item_next == NULL); + assert_se(items[1].item_next == &items[2]); + assert_se(items[3].item_next == &items[1]); + assert_se(items[0].item_next == &items[3]); + + assert_se(items[2].item_prev == &items[1]); + assert_se(items[1].item_prev == &items[3]); + assert_se(items[3].item_prev == &items[0]); + assert_se(items[0].item_prev == NULL); + assert_se(head == &items[0]); + + LIST_REMOVE(item, head, &items[0]); + assert_se(LIST_JUST_US(item, &items[0])); + + assert_se(items[2].item_next == NULL); + assert_se(items[1].item_next == &items[2]); + assert_se(items[3].item_next == &items[1]); + + assert_se(items[2].item_prev == &items[1]); + assert_se(items[1].item_prev == &items[3]); + assert_se(items[3].item_prev == NULL); + LIST_INSERT_BEFORE(item, head, NULL, &items[0]); assert_se(items[0].item_next == NULL); assert_se(items[2].item_next == &items[0]); diff --git a/src/test/test-ns.c b/src/test/test-ns.c index 9248f2987c..c4d4da6d05 100644 --- a/src/test/test-ns.c +++ b/src/test/test-ns.c @@ -26,13 +26,18 @@ int main(int argc, char *argv[]) { const char * const writable[] = { "/home", + "-/home/lennart/projects/foobar", /* this should be masked automatically */ NULL }; const char * const readonly[] = { - "/", - "/usr", + /* "/", */ + /* "/usr", */ "/boot", + "/lib", + "/usr/lib", + "-/lib64", + "-/usr/lib64", NULL }; @@ -42,11 +47,12 @@ int main(int argc, char *argv[]) { }; char *root_directory; char *projects_directory; - int r; char tmp_dir[] = "/tmp/systemd-private-XXXXXX", var_tmp_dir[] = "/var/tmp/systemd-private-XXXXXX"; + log_set_max_level(LOG_DEBUG); + assert_se(mkdtemp(tmp_dir)); assert_se(mkdtemp(var_tmp_dir)); @@ -69,6 +75,8 @@ int main(int argc, char *argv[]) { tmp_dir, var_tmp_dir, true, + true, + true, PROTECT_HOME_NO, PROTECT_SYSTEM_NO, 0); diff --git a/src/test/test-strv.c b/src/test/test-strv.c index 841a36782f..ce20f2dd5b 100644 --- a/src/test/test-strv.c +++ b/src/test/test-strv.c @@ -54,6 +54,25 @@ static void test_specifier_printf(void) { puts(w); } +static void test_str_in_set(void) { + assert_se(STR_IN_SET("x", "x", "y", "z")); + assert_se(!STR_IN_SET("X", "x", "y", "z")); + assert_se(!STR_IN_SET("", "x", "y", "z")); + assert_se(STR_IN_SET("x", "w", "x")); +} + +static void test_strptr_in_set(void) { + assert_se(STRPTR_IN_SET("x", "x", "y", "z")); + assert_se(!STRPTR_IN_SET("X", "x", "y", "z")); + assert_se(!STRPTR_IN_SET("", "x", "y", "z")); + assert_se(STRPTR_IN_SET("x", "w", "x")); + + assert_se(!STRPTR_IN_SET(NULL, "x", "y", "z")); + assert_se(!STRPTR_IN_SET(NULL, "")); + /* strv cannot contain a null, hence the result below */ + assert_se(!STRPTR_IN_SET(NULL, NULL)); +} + static const char* const input_table_multiple[] = { "one", "two", @@ -703,6 +722,8 @@ static void test_strv_fnmatch(void) { int main(int argc, char *argv[]) { test_specifier_printf(); + test_str_in_set(); + test_strptr_in_set(); test_strv_foreach(); test_strv_foreach_backwards(); test_strv_foreach_pair(); diff --git a/src/udev/udev-builtin-path_id.c b/src/udev/udev-builtin-path_id.c index 6e9adc6e96..1825ee75a7 100644 --- a/src/udev/udev-builtin-path_id.c +++ b/src/udev/udev-builtin-path_id.c @@ -693,6 +693,15 @@ static int builtin_path_id(struct udev_device *dev, int argc, char *argv[], bool parent = skip_subsystem(parent, "iucv"); supported_transport = true; supported_parent = true; + } else if (streq(subsys, "nvme")) { + const char *nsid = udev_device_get_sysattr_value(dev, "nsid"); + + if (nsid) { + path_prepend(&path, "nvme-%s", nsid); + parent = skip_subsystem(parent, "nvme"); + supported_parent = true; + supported_transport = true; + } } if (parent) diff --git a/src/vconsole/vconsole-setup.c b/src/vconsole/vconsole-setup.c index c0d76f9685..ac4ceb1486 100644 --- a/src/vconsole/vconsole-setup.c +++ b/src/vconsole/vconsole-setup.c @@ -75,7 +75,7 @@ static bool is_settable(int fd) { r = ioctl(fd, KDGKBMODE, &curr_mode); /* * Make sure we only adjust consoles in K_XLATE or K_UNICODE mode. - * Oterwise we would (likely) interfere with X11's processing of the + * Otherwise we would (likely) interfere with X11's processing of the * key events. * * http://lists.freedesktop.org/archives/systemd-devel/2013-February/008573.html |