diff options
Diffstat (limited to 'src')
28 files changed, 729 insertions, 211 deletions
diff --git a/src/basic/conf-files.c b/src/basic/conf-files.c index c0c22610d7..b5780194df 100644 --- a/src/basic/conf-files.c +++ b/src/basic/conf-files.c @@ -43,7 +43,6 @@ static int files_add(Hashmap *h, const char *root, const char *path, const char int r; assert(path); - assert(suffix); dirpath = prefix_roota(root, path); @@ -94,7 +93,6 @@ static int conf_files_list_strv_internal(char ***strv, const char *suffix, const int r; assert(strv); - assert(suffix); /* This alters the dirs string array */ if (!path_strv_resolve_uniq(dirs, root)) @@ -126,7 +124,6 @@ int conf_files_list_strv(char ***strv, const char *suffix, const char *root, con _cleanup_strv_free_ char **copy = NULL; assert(strv); - assert(suffix); copy = strv_copy((char**) dirs); if (!copy) diff --git a/src/basic/dirent-util.c b/src/basic/dirent-util.c index 59067121b7..6b9d26773e 100644 --- a/src/basic/dirent-util.c +++ b/src/basic/dirent-util.c @@ -70,5 +70,8 @@ bool dirent_is_file_with_suffix(const struct dirent *de, const char *suffix) { if (de->d_name[0] == '.') return false; + if (!suffix) + return true; + return endswith(de->d_name, suffix); } diff --git a/src/basic/raw-clone.h b/src/basic/raw-clone.h index d473828999..c6e531ada4 100644 --- a/src/basic/raw-clone.h +++ b/src/basic/raw-clone.h @@ -47,8 +47,8 @@ static inline int raw_clone(unsigned long flags) { assert((flags & (CLONE_VM|CLONE_PARENT_SETTID|CLONE_CHILD_SETTID| CLONE_CHILD_CLEARTID|CLONE_SETTLS)) == 0); -#if defined(__s390__) || defined(__CRIS__) - /* On s390 and cris the order of the first and second arguments +#if defined(__s390x__) || defined(__s390__) || defined(__CRIS__) + /* On s390/s390x and cris the order of the first and second arguments * of the raw clone() system call is reversed. */ return (int) syscall(__NR_clone, NULL, flags); #elif defined(__sparc__) && defined(__arch64__) diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index 9876251438..0136d38833 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -19,6 +19,7 @@ #include <errno.h> #include <sys/prctl.h> +#include <sys/statvfs.h> #include <unistd.h> #include "alloc-util.h" @@ -38,6 +39,7 @@ #include "fs-util.h" #include "install.h" #include "log.h" +#include "parse-util.h" #include "path-util.h" #include "selinux-access.h" #include "stat-util.h" @@ -48,6 +50,10 @@ #include "virt.h" #include "watchdog.h" +/* Require 16MiB free in /run/systemd for reloading/reexecing. After all we need to serialize our state there, and if + * we can't we'll fail badly. */ +#define RELOAD_DISK_SPACE_MIN (UINT64_C(16) * UINT64_C(1024) * UINT64_C(1024)) + static UnitFileFlags unit_file_bools_to_flags(bool runtime, bool force) { return (runtime ? UNIT_FILE_RUNTIME : 0) | (force ? UNIT_FILE_FORCE : 0); @@ -1312,6 +1318,40 @@ static int method_refuse_snapshot(sd_bus_message *message, void *userdata, sd_bu return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Support for snapshots has been removed."); } +static int verify_run_space(const char *message, sd_bus_error *error) { + struct statvfs svfs; + uint64_t available; + + if (statvfs("/run/systemd", &svfs) < 0) + return sd_bus_error_set_errnof(error, errno, "Failed to statvfs(/run/systemd): %m"); + + available = (uint64_t) svfs.f_bfree * (uint64_t) svfs.f_bsize; + + if (available < RELOAD_DISK_SPACE_MIN) { + char fb_available[FORMAT_BYTES_MAX], fb_need[FORMAT_BYTES_MAX]; + return sd_bus_error_setf(error, + BUS_ERROR_DISK_FULL, + "%s, not enough space available on /run/systemd. " + "Currently, %s are free, but a safety buffer of %s is enforced.", + message, + format_bytes(fb_available, sizeof(fb_available), available), + format_bytes(fb_need, sizeof(fb_need), RELOAD_DISK_SPACE_MIN)); + } + + return 0; +} + +int verify_run_space_and_log(const char *message) { + sd_bus_error error = SD_BUS_ERROR_NULL; + int r; + + r = verify_run_space(message, &error); + if (r < 0) + log_error_errno(r, "%s", bus_error_message(&error, r)); + + return r; +} + static int method_reload(sd_bus_message *message, void *userdata, sd_bus_error *error) { Manager *m = userdata; int r; @@ -1319,6 +1359,10 @@ static int method_reload(sd_bus_message *message, void *userdata, sd_bus_error * assert(message); assert(m); + r = verify_run_space("Refusing to reload", error); + if (r < 0) + return r; + r = mac_selinux_access_check(message, "reload", error); if (r < 0) return r; @@ -1351,6 +1395,10 @@ static int method_reexecute(sd_bus_message *message, void *userdata, sd_bus_erro assert(message); assert(m); + r = verify_run_space("Refusing to reexecute", error); + if (r < 0) + return r; + r = mac_selinux_access_check(message, "reload", error); if (r < 0) return r; @@ -1469,11 +1517,26 @@ static int method_switch_root(sd_bus_message *message, void *userdata, sd_bus_er char *ri = NULL, *rt = NULL; const char *root, *init; Manager *m = userdata; + struct statvfs svfs; + uint64_t available; int r; assert(message); assert(m); + if (statvfs("/run/systemd", &svfs) < 0) + return sd_bus_error_set_errnof(error, errno, "Failed to statvfs(/run/systemd): %m"); + + available = (uint64_t) svfs.f_bfree * (uint64_t) svfs.f_bsize; + + if (available < RELOAD_DISK_SPACE_MIN) { + char fb_available[FORMAT_BYTES_MAX], fb_need[FORMAT_BYTES_MAX]; + log_warning("Dangerously low amount of free space on /run/systemd, root switching operation might not complete successfuly. " + "Currently, %s are free, but %s are suggested. Proceeding anyway.", + format_bytes(fb_available, sizeof(fb_available), available), + format_bytes(fb_need, sizeof(fb_need), RELOAD_DISK_SPACE_MIN)); + } + r = mac_selinux_access_check(message, "reboot", error); if (r < 0) return r; diff --git a/src/core/dbus-manager.h b/src/core/dbus-manager.h index 36a2e9481b..9f3222da28 100644 --- a/src/core/dbus-manager.h +++ b/src/core/dbus-manager.h @@ -26,3 +26,5 @@ extern const sd_bus_vtable bus_manager_vtable[]; void bus_manager_send_finished(Manager *m, usec_t firmware_usec, usec_t loader_usec, usec_t kernel_usec, usec_t initrd_usec, usec_t userspace_usec, usec_t total_usec); void bus_manager_send_reloading(Manager *m, bool active); void bus_manager_send_change_signal(Manager *m); + +int verify_run_space_and_log(const char *message); diff --git a/src/core/load-dropin.c b/src/core/load-dropin.c index fc07151d37..ff3636149a 100644 --- a/src/core/load-dropin.c +++ b/src/core/load-dropin.c @@ -19,53 +19,121 @@ #include "conf-parser.h" +#include "fs-util.h" #include "load-dropin.h" #include "load-fragment.h" #include "log.h" +#include "stat-util.h" +#include "string-util.h" #include "strv.h" #include "unit-name.h" #include "unit.h" -static int add_dependency_consumer( - UnitDependency dependency, - const char *entry, - const char* filepath, - void *arg) { - Unit *u = arg; +static bool unit_name_compatible(const char *a, const char *b) { + _cleanup_free_ char *prefix = NULL; int r; - assert(u); + /* the straightforward case: the symlink name matches the target */ + if (streq(a, b)) + return true; + + r = unit_name_template(a, &prefix); + if (r < 0) { + log_oom(); + return true; + } + + /* an instance name points to a target that is just the template name */ + if (streq(prefix, b)) + return true; + + return false; +} + +static int process_deps(Unit *u, UnitDependency dependency, const char *dir_suffix) { + _cleanup_strv_free_ char **paths = NULL; + char **p; + int r; - r = unit_add_dependency_by_name(u, dependency, entry, filepath, true); + r = unit_file_find_dropin_paths(NULL, + u->manager->lookup_paths.search_path, + u->manager->unit_path_cache, + dir_suffix, + NULL, + u->names, + &paths); if (r < 0) - log_error_errno(r, "Cannot add dependency %s to %s, ignoring: %m", entry, u->id); + return r; + + STRV_FOREACH(p, paths) { + const char *entry; + _cleanup_free_ char *target = NULL; + + entry = basename(*p); + + if (null_or_empty_path(*p) > 0) { + /* an error usually means an invalid symlink, which is not a mask */ + log_unit_debug(u, "%s dependency on %s is masked by %s, ignoring.", + unit_dependency_to_string(dependency), entry, *p); + continue; + } + + r = is_symlink(*p); + if (r < 0) { + log_unit_warning_errno(u, r, "%s dropin %s unreadable, ignoring: %m", + unit_dependency_to_string(dependency), *p); + continue; + } + if (r == 0) { + log_unit_warning(u, "%s dependency dropin %s is not a symlink, ignoring.", + unit_dependency_to_string(dependency), *p); + continue; + } + + if (!unit_name_is_valid(entry, UNIT_NAME_ANY)) { + log_unit_warning(u, "%s dependency dropin %s is not a valid unit name, ignoring.", + unit_dependency_to_string(dependency), *p); + continue; + } + + r = readlink_malloc(*p, &target); + if (r < 0) { + log_unit_warning_errno(u, r, "readlink(\"%s\") failed, ignoring: %m", *p); + continue; + } + + /* We don't treat this as an error, especially because we didn't check this for a + * long time. Nevertheless, we warn, because such mismatch can be mighty confusing. */ + if (!unit_name_compatible(entry, basename(target))) + log_unit_warning(u, "%s dependency dropin %s target %s has different name", + unit_dependency_to_string(dependency), *p, target); + + r = unit_add_dependency_by_name(u, dependency, entry, *p, true); + if (r < 0) + log_unit_error_errno(u, r, "cannot add %s dependency on %s, ignoring: %m", + unit_dependency_to_string(dependency), entry); + } return 0; } int unit_load_dropin(Unit *u) { _cleanup_strv_free_ char **l = NULL; - Iterator i; - char *t, **f; + char **f; int r; assert(u); - /* Load dependencies from supplementary drop-in directories */ - - SET_FOREACH(t, u->names, i) { - char **p; + /* Load dependencies from .wants and .requires directories */ + r = process_deps(u, UNIT_WANTS, ".wants"); + if (r < 0) + return r; - STRV_FOREACH(p, u->manager->lookup_paths.search_path) { - unit_file_process_dir(NULL, u->manager->unit_path_cache, *p, t, - ".wants", UNIT_WANTS, - add_dependency_consumer, u, NULL); - unit_file_process_dir(NULL, u->manager->unit_path_cache, *p, t, - ".requires", UNIT_REQUIRES, - add_dependency_consumer, u, NULL); - } - } + r = process_deps(u, UNIT_REQUIRES, ".requires"); + if (r < 0) + return r; + /* Load .conf dropins */ r = unit_find_dropin_paths(u, &l); if (r <= 0) return 0; diff --git a/src/core/load-dropin.h b/src/core/load-dropin.h index 319827dfb9..5828a223ce 100644 --- a/src/core/load-dropin.h +++ b/src/core/load-dropin.h @@ -25,11 +25,11 @@ /* Read service data supplementary drop-in directories */ static inline int unit_find_dropin_paths(Unit *u, char ***paths) { - return unit_file_find_dropin_paths(NULL, - u->manager->lookup_paths.search_path, - u->manager->unit_path_cache, - u->names, - paths); + return unit_file_find_dropin_conf_paths(NULL, + u->manager->lookup_paths.search_path, + u->manager->unit_path_cache, + u->names, + paths); } int unit_load_dropin(Unit *u); diff --git a/src/core/manager.c b/src/core/manager.c index d83c5ef5e2..e4da945777 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -1984,7 +1984,9 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t if (MANAGER_IS_SYSTEM(m)) { /* This is for compatibility with the * original sysvinit */ - m->exit_code = MANAGER_REEXECUTE; + r = verify_run_space_and_log("Refusing to reexecute"); + if (r >= 0) + m->exit_code = MANAGER_REEXECUTE; break; } @@ -2061,7 +2063,9 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t } case SIGHUP: - m->exit_code = MANAGER_RELOAD; + r = verify_run_space_and_log("Refusing to reload"); + if (r >= 0) + m->exit_code = MANAGER_RELOAD; break; default: { @@ -2432,18 +2436,22 @@ void manager_send_unit_plymouth(Manager *m, Unit *u) { } int manager_open_serialization(Manager *m, FILE **_f) { - const char *path; int fd = -1; FILE *f; assert(_f); - path = MANAGER_IS_SYSTEM(m) ? "/run/systemd" : "/tmp"; - fd = open_tmpfile_unlinkable(path, O_RDWR|O_CLOEXEC); - if (fd < 0) - return -errno; + fd = memfd_create("systemd-serialization", MFD_CLOEXEC); + if (fd < 0) { + const char *path; - log_debug("Serializing state to %s", path); + path = MANAGER_IS_SYSTEM(m) ? "/run/systemd" : "/tmp"; + fd = open_tmpfile_unlinkable(path, O_RDWR|O_CLOEXEC); + if (fd < 0) + return -errno; + log_debug("Serializing state to %s.", path); + } else + log_debug("Serializing state to memfd."); f = fdopen(fd, "w+"); if (!f) { diff --git a/src/libsystemd/sd-bus/bus-common-errors.c b/src/libsystemd/sd-bus/bus-common-errors.c index c9fd79e3b4..b40ba2520c 100644 --- a/src/libsystemd/sd-bus/bus-common-errors.c +++ b/src/libsystemd/sd-bus/bus-common-errors.c @@ -47,6 +47,7 @@ BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map bus_common_errors[] = { SD_BUS_ERROR_MAP(BUS_ERROR_SCOPE_NOT_RUNNING, EHOSTDOWN), SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_DYNAMIC_USER, ESRCH), SD_BUS_ERROR_MAP(BUS_ERROR_NOT_REFERENCED, EUNATCH), + SD_BUS_ERROR_MAP(BUS_ERROR_DISK_FULL, ENOSPC), SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_MACHINE, ENXIO), SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_IMAGE, ENOENT), diff --git a/src/libsystemd/sd-bus/bus-common-errors.h b/src/libsystemd/sd-bus/bus-common-errors.h index 525b79fa77..4523be05ce 100644 --- a/src/libsystemd/sd-bus/bus-common-errors.h +++ b/src/libsystemd/sd-bus/bus-common-errors.h @@ -43,6 +43,7 @@ #define BUS_ERROR_SCOPE_NOT_RUNNING "org.freedesktop.systemd1.ScopeNotRunning" #define BUS_ERROR_NO_SUCH_DYNAMIC_USER "org.freedesktop.systemd1.NoSuchDynamicUser" #define BUS_ERROR_NOT_REFERENCED "org.freedesktop.systemd1.NotReferenced" +#define BUS_ERROR_DISK_FULL "org.freedesktop.systemd1.DiskFull" #define BUS_ERROR_NO_SUCH_MACHINE "org.freedesktop.machine1.NoSuchMachine" #define BUS_ERROR_NO_SUCH_IMAGE "org.freedesktop.machine1.NoSuchImage" diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c index 4816bd1f67..b4686d0065 100644 --- a/src/libsystemd/sd-event/sd-event.c +++ b/src/libsystemd/sd-event/sd-event.c @@ -730,7 +730,6 @@ static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig /* If all the mask is all-zero we can get rid of the structure */ hashmap_remove(e->signal_data, &d->priority); - assert(!d->current); safe_close(d->fd); free(d); return; diff --git a/src/nspawn/nspawn-gperf.gperf b/src/nspawn/nspawn-gperf.gperf index c0fa4bfa1f..e5fdf63162 100644 --- a/src/nspawn/nspawn-gperf.gperf +++ b/src/nspawn/nspawn-gperf.gperf @@ -26,6 +26,7 @@ Exec.KillSignal, config_parse_signal, 0, offsetof(Settings, Exec.Personality, config_parse_personality, 0, offsetof(Settings, personality) Exec.MachineID, config_parse_id128, 0, offsetof(Settings, machine_id) Exec.WorkingDirectory, config_parse_path, 0, offsetof(Settings, working_directory) +Exec.PivotRoot, config_parse_pivot_root, 0, 0 Exec.PrivateUsers, config_parse_private_users, 0, 0 Exec.NotifyReady, config_parse_bool, 0, offsetof(Settings, notify_ready) Files.ReadOnly, config_parse_tristate, 0, offsetof(Settings, read_only) diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index 72c007f204..4b2838b752 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -1349,3 +1349,116 @@ fail: (void) rmdir(template); return r; } + +/* Expects *pivot_root_new and *pivot_root_old to be initialised to allocated memory or NULL. */ +int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s) { + _cleanup_free_ char *root_new = NULL, *root_old = NULL; + const char *p = s; + int r; + + assert(pivot_root_new); + assert(pivot_root_old); + + r = extract_first_word(&p, &root_new, ":", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; + + if (isempty(p)) + root_old = NULL; + else { + root_old = strdup(p); + if (!root_old) + return -ENOMEM; + } + + if (!path_is_absolute(root_new)) + return -EINVAL; + if (root_old && !path_is_absolute(root_old)) + return -EINVAL; + + free_and_replace(*pivot_root_new, root_new); + free_and_replace(*pivot_root_old, root_old); + + return 0; +} + +int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old) { + _cleanup_free_ char *directory_pivot_root_new = NULL; + _cleanup_free_ char *pivot_tmp_pivot_root_old = NULL; + char pivot_tmp[] = "/tmp/nspawn-pivot-XXXXXX"; + bool remove_pivot_tmp = false; + int r; + + assert(directory); + + if (!pivot_root_new) + return 0; + + /* Pivot pivot_root_new to / and the existing / to pivot_root_old. + * If pivot_root_old is NULL, the existing / disappears. + * This requires a temporary directory, pivot_tmp, which is + * not a child of either. + * + * This is typically used for OSTree-style containers, where + * the root partition contains several sysroots which could be + * run. Normally, one would be chosen by the bootloader and + * pivoted to / by initramfs. + * + * For example, for an OSTree deployment, pivot_root_new + * would be: /ostree/deploy/$os/deploy/$checksum. Note that this + * code doesn’t do the /var mount which OSTree expects: use + * --bind +/sysroot/ostree/deploy/$os/var:/var for that. + * + * So in the OSTree case, we’ll end up with something like: + * - directory = /tmp/nspawn-root-123456 + * - pivot_root_new = /ostree/deploy/os/deploy/123abc + * - pivot_root_old = /sysroot + * - directory_pivot_root_new = + * /tmp/nspawn-root-123456/ostree/deploy/os/deploy/123abc + * - pivot_tmp = /tmp/nspawn-pivot-123456 + * - pivot_tmp_pivot_root_old = /tmp/nspawn-pivot-123456/sysroot + * + * Requires all file systems at directory and below to be mounted + * MS_PRIVATE or MS_SLAVE so they can be moved. + */ + directory_pivot_root_new = prefix_root(directory, pivot_root_new); + + /* Remount directory_pivot_root_new to make it movable. */ + r = mount_verbose(LOG_ERR, directory_pivot_root_new, directory_pivot_root_new, NULL, MS_BIND, NULL); + if (r < 0) + goto done; + + if (pivot_root_old) { + if (!mkdtemp(pivot_tmp)) { + r = log_error_errno(errno, "Failed to create temporary directory: %m"); + goto done; + } + + remove_pivot_tmp = true; + pivot_tmp_pivot_root_old = prefix_root(pivot_tmp, pivot_root_old); + + r = mount_verbose(LOG_ERR, directory_pivot_root_new, pivot_tmp, NULL, MS_MOVE, NULL); + if (r < 0) + goto done; + + r = mount_verbose(LOG_ERR, directory, pivot_tmp_pivot_root_old, NULL, MS_MOVE, NULL); + if (r < 0) + goto done; + + r = mount_verbose(LOG_ERR, pivot_tmp, directory, NULL, MS_MOVE, NULL); + if (r < 0) + goto done; + } else { + r = mount_verbose(LOG_ERR, directory_pivot_root_new, directory, NULL, MS_MOVE, NULL); + if (r < 0) + goto done; + } + +done: + if (remove_pivot_tmp) + (void) rmdir(pivot_tmp); + + return r; +} diff --git a/src/nspawn/nspawn-mount.h b/src/nspawn/nspawn-mount.h index 6b33fbff57..2777d2169b 100644 --- a/src/nspawn/nspawn-mount.h +++ b/src/nspawn/nspawn-mount.h @@ -70,3 +70,6 @@ int mount_custom(const char *dest, CustomMount *mounts, unsigned n, bool userns, int setup_volatile(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); int setup_volatile_state(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); + +int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s); +int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old); diff --git a/src/nspawn/nspawn-settings.c b/src/nspawn/nspawn-settings.c index 22b74d88e4..5217d10665 100644 --- a/src/nspawn/nspawn-settings.c +++ b/src/nspawn/nspawn-settings.c @@ -90,6 +90,8 @@ Settings* settings_free(Settings *s) { strv_free(s->parameters); strv_free(s->environment); free(s->user); + free(s->pivot_root_new); + free(s->pivot_root_old); free(s->working_directory); strv_free(s->network_interfaces); @@ -237,6 +239,34 @@ int config_parse_id128( return 0; } +int config_parse_pivot_root( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + Settings *settings = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = pivot_root_parse(&settings->pivot_root_new, &settings->pivot_root_old, rvalue); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Invalid pivot root mount specification %s: %m", rvalue); + return 0; + } + + return 0; +} + int config_parse_bind( const char *unit, const char *filename, diff --git a/src/nspawn/nspawn-settings.h b/src/nspawn/nspawn-settings.h index e9ea087191..021403258f 100644 --- a/src/nspawn/nspawn-settings.h +++ b/src/nspawn/nspawn-settings.h @@ -57,7 +57,8 @@ typedef enum SettingsMask { SETTING_WORKING_DIRECTORY = 1 << 12, SETTING_USERNS = 1 << 13, SETTING_NOTIFY_READY = 1 << 14, - _SETTINGS_MASK_ALL = (1 << 15) -1 + SETTING_PIVOT_ROOT = 1 << 15, + _SETTINGS_MASK_ALL = (1 << 16) -1 } SettingsMask; typedef struct Settings { @@ -72,6 +73,8 @@ typedef struct Settings { unsigned long personality; sd_id128_t machine_id; char *working_directory; + char *pivot_root_new; + char *pivot_root_old; UserNamespaceMode userns_mode; uid_t uid_shift, uid_range; bool notify_ready; @@ -109,6 +112,7 @@ int config_parse_capability(const char *unit, const char *filename, unsigned lin int config_parse_id128(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_expose_port(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_volatile_mode(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_pivot_root(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_bind(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_tmpfs(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_overlay(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 213f50f796..b172b44933 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -132,6 +132,8 @@ typedef enum LinkJournal { static char *arg_directory = NULL; static char *arg_template = NULL; static char *arg_chdir = NULL; +static char *arg_pivot_root_new = NULL; +static char *arg_pivot_root_old = NULL; static char *arg_user = NULL; static sd_id128_t arg_uuid = {}; static char *arg_machine = NULL; @@ -221,6 +223,8 @@ static void help(void) { " -a --as-pid2 Maintain a stub init as PID1, invoke binary as PID2\n" " -b --boot Boot up full system (i.e. invoke init)\n" " --chdir=PATH Set working directory in the container\n" + " --pivot-root=PATH[:PATH]\n" + " Pivot root to given directory in the container\n" " -u --user=USER Run the command under specified user or uid\n" " -M --machine=NAME Set the machine name for the container\n" " --uuid=UUID Set a specific machine UUID for the container\n" @@ -427,6 +431,7 @@ static int parse_argv(int argc, char *argv[]) { ARG_KILL_SIGNAL, ARG_SETTINGS, ARG_CHDIR, + ARG_PIVOT_ROOT, ARG_PRIVATE_USERS_CHOWN, ARG_NOTIFY_READY, ARG_ROOT_HASH, @@ -478,6 +483,7 @@ static int parse_argv(int argc, char *argv[]) { { "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL }, { "settings", required_argument, NULL, ARG_SETTINGS }, { "chdir", required_argument, NULL, ARG_CHDIR }, + { "pivot-root", required_argument, NULL, ARG_PIVOT_ROOT }, { "notify-ready", required_argument, NULL, ARG_NOTIFY_READY }, { "root-hash", required_argument, NULL, ARG_ROOT_HASH }, {} @@ -1012,6 +1018,14 @@ static int parse_argv(int argc, char *argv[]) { arg_settings_mask |= SETTING_WORKING_DIRECTORY; break; + case ARG_PIVOT_ROOT: + r = pivot_root_parse(&arg_pivot_root_new, &arg_pivot_root_old, optarg); + if (r < 0) + return log_error_errno(r, "Failed to parse --pivot-root= argument %s: %m", optarg); + + arg_settings_mask |= SETTING_PIVOT_ROOT; + break; + case ARG_NOTIFY_READY: r = parse_boolean(optarg); if (r < 0) { @@ -2493,6 +2507,13 @@ static int outer_child( if (r < 0) return r; + r = setup_pivot_root( + directory, + arg_pivot_root_new, + arg_pivot_root_old); + if (r < 0) + return r; + r = setup_volatile( directory, arg_volatile_mode, @@ -2915,6 +2936,12 @@ static int load_settings(void) { settings->parameters = NULL; } + if ((arg_settings_mask & SETTING_PIVOT_ROOT) == 0 && + settings->pivot_root_new) { + free_and_replace(arg_pivot_root_new, settings->pivot_root_new); + free_and_replace(arg_pivot_root_old, settings->pivot_root_old); + } + if ((arg_settings_mask & SETTING_WORKING_DIRECTORY) == 0 && settings->working_directory) { free(arg_chdir); @@ -3872,6 +3899,8 @@ finish: free(arg_image); free(arg_machine); free(arg_user); + free(arg_pivot_root_new); + free(arg_pivot_root_old); free(arg_chdir); strv_free(arg_setenv); free(arg_network_bridge); diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c index 66ddf3a872..410a7764ed 100644 --- a/src/shared/dissect-image.c +++ b/src/shared/dissect-image.c @@ -352,9 +352,6 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectI sd_id128_t type_id, id; bool rw = true; - if (pflags & GPT_FLAG_NO_AUTO) - continue; - sid = blkid_partition_get_uuid(pp); if (!sid) continue; @@ -368,18 +365,37 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectI continue; if (sd_id128_equal(type_id, GPT_HOME)) { + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + designator = PARTITION_HOME; rw = !(pflags & GPT_FLAG_READ_ONLY); } else if (sd_id128_equal(type_id, GPT_SRV)) { + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + designator = PARTITION_SRV; rw = !(pflags & GPT_FLAG_READ_ONLY); } else if (sd_id128_equal(type_id, GPT_ESP)) { + + /* Note that we don't check the GPT_FLAG_NO_AUTO flag for the ESP, as it is not defined + * there. We instead check the GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as recommended by the + * UEFI spec (See "12.3.3 Number and Location of System Partitions"). */ + + if (pflags & GPT_FLAG_NO_BLOCK_IO_PROTOCOL) + continue; + designator = PARTITION_ESP; fstype = "vfat"; } #ifdef GPT_ROOT_NATIVE else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) { + if (pflags & GPT_FLAG_NO_AUTO) + continue; + /* If a root ID is specified, ignore everything but the root id */ if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id)) continue; @@ -389,6 +405,9 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectI rw = !(pflags & GPT_FLAG_READ_ONLY); } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) { + if (pflags & GPT_FLAG_NO_AUTO) + continue; + m->can_verity = true; /* Ignore verity unless a root hash is specified */ @@ -404,6 +423,9 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectI #ifdef GPT_ROOT_SECONDARY else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) { + if (pflags & GPT_FLAG_NO_AUTO) + continue; + /* If a root ID is specified, ignore everything but the root id */ if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id)) continue; @@ -412,6 +434,10 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectI architecture = SECONDARY_ARCHITECTURE; rw = !(pflags & GPT_FLAG_READ_ONLY); } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) { + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + m->can_verity = true; /* Ignore verity unless root has is specified */ @@ -425,10 +451,17 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectI } #endif else if (sd_id128_equal(type_id, GPT_SWAP)) { + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + designator = PARTITION_SWAP; fstype = "swap"; } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) { + if (pflags & GPT_FLAG_NO_AUTO) + continue; + if (generic_node) multiple_generic = true; else { diff --git a/src/shared/dropin.c b/src/shared/dropin.c index 06cf3de620..3917eb8f23 100644 --- a/src/shared/dropin.c +++ b/src/shared/dropin.c @@ -117,17 +117,12 @@ int write_drop_in_format(const char *dir, const char *unit, unsigned level, return write_drop_in(dir, unit, level, name, p); } -static int iterate_dir( - const char *path, +static int unit_file_find_dir( const char *original_root, - UnitDependency dependency, - dependency_consumer_t consumer, - void *arg, - char ***strv) { + const char *path, + char ***dirs) { _cleanup_free_ char *chased = NULL; - _cleanup_closedir_ DIR *d = NULL; - struct dirent *de; int r; assert(path); @@ -137,52 +132,21 @@ static int iterate_dir( return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r, "Failed to canonicalize path %s: %m", path); - /* The config directories are special, since the order of the - * drop-ins matters */ - if (dependency < 0) { - r = strv_push(strv, chased); - if (r < 0) - return log_oom(); - - chased = NULL; - return 0; - } - - assert(consumer); - - d = opendir(chased); - if (!d) { - if (errno == ENOENT) - return 0; - - return log_warning_errno(errno, "Failed to open directory %s: %m", path); - } - - FOREACH_DIRENT(de, d, return log_warning_errno(errno, "Failed to read directory %s: %m", path)) { - _cleanup_free_ char *f = NULL; - - f = strjoin(path, "/", de->d_name); - if (!f) - return log_oom(); - - r = consumer(dependency, de->d_name, f, arg); - if (r < 0) - return r; - } + r = strv_push(dirs, chased); + if (r < 0) + return log_oom(); + chased = NULL; return 0; } -int unit_file_process_dir( +static int unit_file_find_dirs( const char *original_root, Set *unit_path_cache, const char *unit_path, const char *name, const char *suffix, - UnitDependency dependency, - dependency_consumer_t consumer, - void *arg, - char ***strv) { + char ***dirs) { _cleanup_free_ char *path = NULL; int r; @@ -195,8 +159,11 @@ int unit_file_process_dir( if (!path) return log_oom(); - if (!unit_path_cache || set_get(unit_path_cache, path)) - (void) iterate_dir(path, original_root, dependency, consumer, arg, strv); + if (!unit_path_cache || set_get(unit_path_cache, path)) { + r = unit_file_find_dir(original_root, path, dirs); + if (r < 0) + return r; + } if (unit_name_is_valid(name, UNIT_NAME_INSTANCE)) { _cleanup_free_ char *template = NULL, *p = NULL; @@ -210,8 +177,11 @@ int unit_file_process_dir( if (!p) return log_oom(); - if (!unit_path_cache || set_get(unit_path_cache, p)) - (void) iterate_dir(p, original_root, dependency, consumer, arg, strv); + if (!unit_path_cache || set_get(unit_path_cache, p)) { + r = unit_file_find_dir(original_root, p, dirs); + if (r < 0) + return r; + } } return 0; @@ -221,30 +191,28 @@ int unit_file_find_dropin_paths( const char *original_root, char **lookup_path, Set *unit_path_cache, + const char *dir_suffix, + const char *file_suffix, Set *names, char ***paths) { - _cleanup_strv_free_ char **strv = NULL, **ans = NULL; + _cleanup_strv_free_ char **dirs = NULL, **ans = NULL; Iterator i; - char *t; + char *t, **p; int r; assert(paths); - SET_FOREACH(t, names, i) { - char **p; - + SET_FOREACH(t, names, i) STRV_FOREACH(p, lookup_path) - unit_file_process_dir(original_root, unit_path_cache, *p, t, ".d", - _UNIT_DEPENDENCY_INVALID, NULL, NULL, &strv); - } + unit_file_find_dirs(original_root, unit_path_cache, *p, t, dir_suffix, &dirs); - if (strv_isempty(strv)) + if (strv_isempty(dirs)) return 0; - r = conf_files_list_strv(&ans, ".conf", NULL, (const char**) strv); + r = conf_files_list_strv(&ans, file_suffix, NULL, (const char**) dirs); if (r < 0) - return log_warning_errno(r, "Failed to get list of configuration files: %m"); + return log_warning_errno(r, "Failed to sort the list of configuration files: %m"); *paths = ans; ans = NULL; diff --git a/src/shared/dropin.h b/src/shared/dropin.h index 761b250886..a2b8cdce61 100644 --- a/src/shared/dropin.h +++ b/src/shared/dropin.h @@ -33,31 +33,24 @@ int write_drop_in(const char *dir, const char *unit, unsigned level, int write_drop_in_format(const char *dir, const char *unit, unsigned level, const char *name, const char *format, ...) _printf_(5, 6); -/** - * This callback will be called for each directory entry @entry, - * with @filepath being the full path to the entry. - * - * If return value is negative, loop will be aborted. - */ -typedef int (*dependency_consumer_t)(UnitDependency dependency, - const char *entry, - const char* filepath, - void *arg); - -int unit_file_process_dir( - const char *original_root, - Set * unit_path_cache, - const char *unit_path, - const char *name, - const char *suffix, - UnitDependency dependency, - dependency_consumer_t consumer, - void *arg, - char ***strv); - int unit_file_find_dropin_paths( const char *original_root, char **lookup_path, Set *unit_path_cache, + const char *dir_suffix, + const char *file_suffix, Set *names, char ***paths); + +static inline int unit_file_find_dropin_conf_paths( + const char *original_root, + char **lookup_path, + Set *unit_path_cache, + Set *names, + char ***paths) { + return unit_file_find_dropin_paths(original_root, + lookup_path, + unit_path_cache, + ".d", ".conf", + names, paths); +} diff --git a/src/shared/gpt.h b/src/shared/gpt.h index 13d80d611c..cc752006fa 100644 --- a/src/shared/gpt.h +++ b/src/shared/gpt.h @@ -71,6 +71,8 @@ # define GPT_ROOT_NATIVE_VERITY GPT_ROOT_ARM_VERITY #endif +#define GPT_FLAG_NO_BLOCK_IO_PROTOCOL (1ULL << 1) + /* Flags we recognize on the root, swap, home and srv partitions when * doing auto-discovery. These happen to be identical to what * Microsoft defines for its own Basic Data Partitions, but that's diff --git a/src/shared/install.c b/src/shared/install.c index 478abac8ab..f25ed685f6 100644 --- a/src/shared/install.c +++ b/src/shared/install.c @@ -208,7 +208,7 @@ static int path_is_control(const LookupPaths *p, const char *path) { path_equal_ptr(parent, p->runtime_control); } -static int path_is_config(const LookupPaths *p, const char *path) { +static int path_is_config(const LookupPaths *p, const char *path, bool check_parent) { _cleanup_free_ char *parent = NULL; assert(p); @@ -217,15 +217,19 @@ static int path_is_config(const LookupPaths *p, const char *path) { /* Note that we do *not* have generic checks for /etc or /run in place, since with * them we couldn't discern configuration from transient or generated units */ - parent = dirname_malloc(path); - if (!parent) - return -ENOMEM; + if (check_parent) { + parent = dirname_malloc(path); + if (!parent) + return -ENOMEM; - return path_equal_ptr(parent, p->persistent_config) || - path_equal_ptr(parent, p->runtime_config); + path = parent; + } + + return path_equal_ptr(path, p->persistent_config) || + path_equal_ptr(path, p->runtime_config); } -static int path_is_runtime(const LookupPaths *p, const char *path) { +static int path_is_runtime(const LookupPaths *p, const char *path, bool check_parent) { _cleanup_free_ char *parent = NULL; const char *rpath; @@ -239,16 +243,20 @@ static int path_is_runtime(const LookupPaths *p, const char *path) { if (rpath && path_startswith(rpath, "/run")) return true; - parent = dirname_malloc(path); - if (!parent) - return -ENOMEM; + if (check_parent) { + parent = dirname_malloc(path); + if (!parent) + return -ENOMEM; - return path_equal_ptr(parent, p->runtime_config) || - path_equal_ptr(parent, p->generator) || - path_equal_ptr(parent, p->generator_early) || - path_equal_ptr(parent, p->generator_late) || - path_equal_ptr(parent, p->transient) || - path_equal_ptr(parent, p->runtime_control); + path = parent; + } + + return path_equal_ptr(path, p->runtime_config) || + path_equal_ptr(path, p->generator) || + path_equal_ptr(path, p->generator_early) || + path_equal_ptr(path, p->generator_late) || + path_equal_ptr(path, p->transient) || + path_equal_ptr(path, p->runtime_control); } static int path_is_vendor(const LookupPaths *p, const char *path) { @@ -677,7 +685,6 @@ static int find_symlinks_fd( int fd, const char *path, const char *config_path, - const LookupPaths *lp, bool *same_name_link) { _cleanup_closedir_ DIR *d = NULL; @@ -688,7 +695,6 @@ static int find_symlinks_fd( assert(fd >= 0); assert(path); assert(config_path); - assert(lp); assert(same_name_link); d = fdopendir(fd); @@ -722,7 +728,7 @@ static int find_symlinks_fd( } /* This will close nfd, regardless whether it succeeds or not */ - q = find_symlinks_fd(root_dir, name, nfd, p, config_path, lp, same_name_link); + q = find_symlinks_fd(root_dir, name, nfd, p, config_path, same_name_link); if (q > 0) return 1; if (r == 0) @@ -800,7 +806,6 @@ static int find_symlinks( const char *root_dir, const char *name, const char *config_path, - const LookupPaths *lp, bool *same_name_link) { int fd; @@ -817,44 +822,82 @@ static int find_symlinks( } /* This takes possession of fd and closes it */ - return find_symlinks_fd(root_dir, name, fd, config_path, config_path, lp, same_name_link); + return find_symlinks_fd(root_dir, name, fd, config_path, config_path, same_name_link); } static int find_symlinks_in_scope( - UnitFileScope scope, const LookupPaths *paths, const char *name, UnitFileState *state) { - bool same_name_link_runtime = false, same_name_link = false; + bool same_name_link_runtime = false, same_name_link_config = false; + bool enabled_in_runtime = false, enabled_at_all = false; + char **p; int r; - assert(scope >= 0); - assert(scope < _UNIT_FILE_SCOPE_MAX); assert(paths); assert(name); - /* First look in the persistent config path */ - r = find_symlinks(paths->root_dir, name, paths->persistent_config, paths, &same_name_link); - if (r < 0) - return r; - if (r > 0) { - *state = UNIT_FILE_ENABLED; - return r; + STRV_FOREACH(p, paths->search_path) { + bool same_name_link = false; + + r = find_symlinks(paths->root_dir, name, *p, &same_name_link); + if (r < 0) + return r; + if (r > 0) { + /* We found symlinks in this dir? Yay! Let's see where precisely it is enabled. */ + + r = path_is_config(paths, *p, false); + if (r < 0) + return r; + if (r > 0) { + /* This is the best outcome, let's return it immediately. */ + *state = UNIT_FILE_ENABLED; + return 1; + } + + r = path_is_runtime(paths, *p, false); + if (r < 0) + return r; + if (r > 0) + enabled_in_runtime = true; + else + enabled_at_all = true; + + } else if (same_name_link) { + + r = path_is_config(paths, *p, false); + if (r < 0) + return r; + if (r > 0) + same_name_link_config = true; + else { + r = path_is_runtime(paths, *p, false); + if (r < 0) + return r; + if (r > 0) + same_name_link_runtime = true; + } + } } - /* Then look in runtime config path */ - r = find_symlinks(paths->root_dir, name, paths->runtime_config, paths, &same_name_link_runtime); - if (r < 0) - return r; - if (r > 0) { + if (enabled_in_runtime) { *state = UNIT_FILE_ENABLED_RUNTIME; - return r; + return 1; + } + + /* Here's a special rule: if the unit we are looking for is an instance, and it symlinked in the search path + * outside of runtime and configuration directory, then we consider it statically enabled. Note we do that only + * for instance, not for regular names, as those are merely aliases, while instances explicitly instantiate + * something, and hence are a much stronger concept. */ + if (enabled_at_all && unit_name_is_valid(name, UNIT_NAME_INSTANCE)) { + *state = UNIT_FILE_STATIC; + return 1; } /* Hmm, we didn't find it, but maybe we found the same name * link? */ - if (same_name_link) { + if (same_name_link_config) { *state = UNIT_FILE_LINKED; return 1; } @@ -1354,7 +1397,8 @@ static int install_info_follow( InstallContext *c, UnitFileInstallInfo *i, const char *root_dir, - SearchFlags flags) { + SearchFlags flags, + bool ignore_different_name) { assert(c); assert(i); @@ -1367,7 +1411,7 @@ static int install_info_follow( /* If the basename doesn't match, the caller should add a * complete new entry for this. */ - if (!streq(basename(i->symlink_target), i->name)) + if (!ignore_different_name && !streq(basename(i->symlink_target), i->name)) return -EXDEV; free_and_replace(i->path, i->symlink_target); @@ -1408,14 +1452,14 @@ static int install_info_traverse( return -ELOOP; if (!(flags & SEARCH_FOLLOW_CONFIG_SYMLINKS)) { - r = path_is_config(paths, i->path); + r = path_is_config(paths, i->path, true); if (r < 0) return r; if (r > 0) return -ELOOP; } - r = install_info_follow(c, i, paths->root_dir, flags); + r = install_info_follow(c, i, paths->root_dir, flags, false); if (r == -EXDEV) { _cleanup_free_ char *buffer = NULL; const char *bn; @@ -1439,6 +1483,18 @@ static int install_info_traverse( if (r < 0) return r; + if (streq(buffer, i->name)) { + + /* We filled in the instance, and the target stayed the same? If so, then let's + * honour the link as it is. */ + + r = install_info_follow(c, i, paths->root_dir, flags, true); + if (r < 0) + return r; + + continue; + } + bn = buffer; } @@ -2027,7 +2083,7 @@ static int path_shall_revert(const LookupPaths *paths, const char *path) { /* Checks whether the path is one where the drop-in directories shall be removed. */ - r = path_is_config(paths, path); + r = path_is_config(paths, path, true); if (r != 0) return r; @@ -2135,7 +2191,7 @@ int unit_file_revert( if (errno != ENOENT) return -errno; } else if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) { - r = path_is_config(&paths, path); + r = path_is_config(&paths, path, true); if (r < 0) return r; if (r > 0) { @@ -2481,7 +2537,7 @@ static int unit_file_lookup_state( switch (i->type) { case UNIT_FILE_TYPE_MASKED: - r = path_is_runtime(paths, i->path); + r = path_is_runtime(paths, i->path, true); if (r < 0) return r; @@ -2505,7 +2561,7 @@ static int unit_file_lookup_state( break; } - r = find_symlinks_in_scope(scope, paths, i->name, &state); + r = find_symlinks_in_scope(paths, i->name, &state); if (r < 0) return r; if (r == 0) { diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index 609e0619af..e35f18471c 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -750,10 +750,35 @@ int seccomp_restrict_namespaces(unsigned long retain) { SECCOMP_FOREACH_LOCAL_ARCH(arch) { _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + int clone_reversed_order = -1; unsigned i; log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); + switch (arch) { + + case SCMP_ARCH_X86_64: + case SCMP_ARCH_X86: + case SCMP_ARCH_X32: + clone_reversed_order = 0; + break; + + case SCMP_ARCH_S390: + case SCMP_ARCH_S390X: + /* On s390/s390x the first two parameters to clone are switched */ + clone_reversed_order = 1; + break; + + /* Please add more definitions here, if you port systemd to other architectures! */ + +#if !defined(__i386__) && !defined(__x86_64__) && !defined(__s390__) && !defined(__s390x__) +#warning "Consider adding the right clone() syscall definitions here!" +#endif + } + + if (clone_reversed_order < 0) /* we don't know the right order, let's ignore this arch... */ + continue; + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); if (r < 0) return r; @@ -802,12 +827,20 @@ int seccomp_restrict_namespaces(unsigned long retain) { break; } - r = seccomp_rule_add_exact( - seccomp, - SCMP_ACT_ERRNO(EPERM), - SCMP_SYS(clone), - 1, - SCMP_A0(SCMP_CMP_MASKED_EQ, f, f)); + if (clone_reversed_order == 0) + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(clone), + 1, + SCMP_A0(SCMP_CMP_MASKED_EQ, f, f)); + else + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(clone), + 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, f, f)); if (r < 0) { log_debug_errno(r, "Failed to add clone() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); break; @@ -1086,27 +1119,81 @@ int seccomp_restrict_realtime(void) { } int seccomp_memory_deny_write_execute(void) { + uint32_t arch; int r; SECCOMP_FOREACH_LOCAL_ARCH(arch) { _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + int filter_syscall = 0, block_syscall = 0, shmat_syscall = 0; log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); + switch (arch) { + + case SCMP_ARCH_X86: + filter_syscall = SCMP_SYS(mmap2); + block_syscall = SCMP_SYS(mmap); + + /* Note that shmat() isn't available on i386, where the call is multiplexed through ipc(). We + * ignore that here, which means there's still a way to get writable/executable memory, if an + * IPC key is mapped like this on i386. That's a pity, but no total loss. */ + break; + + case SCMP_ARCH_X86_64: + case SCMP_ARCH_X32: + filter_syscall = SCMP_SYS(mmap); + shmat_syscall = SCMP_SYS(shmat); + break; + + /* Please add more definitions here, if you port systemd to other architectures! */ + +#if !defined(__i386__) && !defined(__x86_64__) +#warning "Consider adding the right mmap() syscall definitions here!" +#endif + } + + /* Can't filter mmap() on this arch, then skip it */ + if (filter_syscall == 0) + continue; + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); if (r < 0) return r; - r = seccomp_rule_add_exact( - seccomp, - SCMP_ACT_ERRNO(EPERM), - SCMP_SYS(mmap), - 1, - SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE)); - if (r < 0) { - log_debug_errno(r, "Failed to add mmap() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); - continue; + if (filter_syscall != 0) { + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + filter_syscall, + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE)); + if (r < 0) { + _cleanup_free_ char *n = NULL; + + n = seccomp_syscall_resolve_num_arch(arch, filter_syscall); + log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m", + strna(n), + seccomp_arch_to_string(arch)); + continue; + } + } + + if (block_syscall != 0) { + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + block_syscall, + 0); + if (r < 0) { + _cleanup_free_ char *n = NULL; + + n = seccomp_syscall_resolve_num_arch(arch, block_syscall); + log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m", + strna(n), + seccomp_arch_to_string(arch)); + continue; + } } r = seccomp_rule_add_exact( @@ -1120,15 +1207,17 @@ int seccomp_memory_deny_write_execute(void) { continue; } - r = seccomp_rule_add_exact( - seccomp, - SCMP_ACT_ERRNO(EPERM), - SCMP_SYS(shmat), - 1, - SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC)); - if (r < 0) { - log_debug_errno(r, "Failed to add shmat() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); - continue; + if (shmat_syscall != 0) { + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(shmat), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC)); + if (r < 0) { + log_debug_errno(r, "Failed to add shmat() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } } r = seccomp_load(seccomp); diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h index 2563fcd38a..61f94de638 100644 --- a/src/shared/seccomp-util.h +++ b/src/shared/seccomp-util.h @@ -84,6 +84,20 @@ int seccomp_memory_deny_write_execute(void); #define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 0 #endif +/* mmap() blocking is only available on some archs for now */ +#if defined(__x86_64__) || defined(__i386__) +#define SECCOMP_MEMORY_DENY_WRITE_EXECUTE_BROKEN 0 +#else +#define SECCOMP_MEMORY_DENY_WRITE_EXECUTE_BROKEN 1 +#endif + +/* we don't know the right order of the clone() parameters except for these archs, for now */ +#if defined(__x86_64__) || defined(__i386__) || defined(__s390x__) || defined(__s390__) +#define SECCOMP_RESTRICT_NAMESPACES_BROKEN 0 +#else +#define SECCOMP_RESTRICT_NAMESPACES_BROKEN 1 +#endif + extern const uint32_t seccomp_local_archs[]; #define SECCOMP_FOREACH_LOCAL_ARCH(arch) \ diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c index 2964b4e6b2..2809dece50 100644 --- a/src/systemctl/systemctl.c +++ b/src/systemctl/systemctl.c @@ -2601,7 +2601,8 @@ static int unit_find_paths( return log_error_errno(r, "Failed to add unit name: %m"); if (dropin_paths) { - r = unit_file_find_dropin_paths(arg_root, lp->search_path, NULL, names, &dropins); + r = unit_file_find_dropin_conf_paths(arg_root, lp->search_path, + NULL, names, &dropins); if (r < 0) return r; } @@ -6217,21 +6218,25 @@ static int enable_unit(int argc, char *argv[], void *userdata) { "4) In case of template units, the unit is meant to be enabled with some\n" " instance name specified."); - if (arg_now && n_changes > 0 && STR_IN_SET(argv[0], "enable", "disable", "mask")) { - char *new_args[n_changes + 2]; + if (arg_now && STR_IN_SET(argv[0], "enable", "disable", "mask")) { sd_bus *bus; - unsigned i; + unsigned len, i; r = acquire_bus(BUS_MANAGER, &bus); if (r < 0) goto finish; - new_args[0] = (char*) (streq(argv[0], "enable") ? "start" : "stop"); - for (i = 0; i < n_changes; i++) - new_args[i + 1] = basename(changes[i].path); - new_args[i + 1] = NULL; + len = strv_length(names); + { + char *new_args[len + 2]; - r = start_unit(strv_length(new_args), new_args, userdata); + new_args[0] = (char*) (streq(argv[0], "enable") ? "start" : "stop"); + for (i = 0; i < len; i++) + new_args[i + 1] = basename(names[i]); + new_args[i + 1] = NULL; + + r = start_unit(len + 1, new_args, userdata); + } } finish: diff --git a/src/test/test-install-root.c b/src/test/test-install-root.c index d0bc8004f3..575401cb91 100644 --- a/src/test/test-install-root.c +++ b/src/test/test-install-root.c @@ -736,6 +736,28 @@ static void test_preset_order(const char *root) { assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "prefix-2.service", &state) >= 0 && state == UNIT_FILE_DISABLED); } +static void test_static_instance(const char *root) { + UnitFileState state; + const char *p; + + assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "static-instance@.service", &state) == -ENOENT); + assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "static-instance@foo.service", &state) == -ENOENT); + + p = strjoina(root, "/usr/lib/systemd/system/static-instance@.service"); + assert_se(write_string_file(p, + "[Install]\n" + "WantedBy=multi-user.target\n", WRITE_STRING_FILE_CREATE) >= 0); + + assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "static-instance@.service", &state) >= 0 && state == UNIT_FILE_DISABLED); + assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "static-instance@foo.service", &state) >= 0 && state == UNIT_FILE_DISABLED); + + p = strjoina(root, "/usr/lib/systemd/system/static-instance@foo.service"); + assert_se(symlink("static-instance@.service", p) >= 0); + + assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "static-instance@.service", &state) >= 0 && state == UNIT_FILE_DISABLED); + assert_se(unit_file_get_state(UNIT_FILE_SYSTEM, root, "static-instance@foo.service", &state) >= 0 && state == UNIT_FILE_STATIC); +} + int main(int argc, char *argv[]) { char root[] = "/tmp/rootXXXXXX"; const char *p; @@ -766,6 +788,7 @@ int main(int argc, char *argv[]) { test_preset_and_list(root); test_preset_order(root); test_revert(root); + test_static_instance(root); assert_se(rm_rf(root, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0); diff --git a/src/test/test-nss.c b/src/test/test-nss.c index 4ccfe75147..b59cb7aa69 100644 --- a/src/test/test-nss.c +++ b/src/test/test-nss.c @@ -511,7 +511,7 @@ int main(int argc, char **argv) { _cleanup_free_ char *dir = NULL; _cleanup_strv_free_ char **modules = NULL, **names = NULL; _cleanup_free_ struct local_address *addresses = NULL; - int n_addresses; + int n_addresses = 0; char **module; int r; diff --git a/src/test/test-seccomp.c b/src/test/test-seccomp.c index 54e7947c2f..34a1275162 100644 --- a/src/test/test-seccomp.c +++ b/src/test/test-seccomp.c @@ -158,6 +158,8 @@ static void test_restrict_namespace(void) { assert_se(streq(s, "cgroup ipc net mnt pid user uts")); assert_se(namespace_flag_from_string_many(s, &ul) == 0 && ul == NAMESPACE_FLAGS_ALL); +#if SECCOMP_RESTRICT_NAMESPACES_BROKEN == 0 + if (!is_seccomp_available()) return; if (geteuid() != 0) @@ -216,6 +218,7 @@ static void test_restrict_namespace(void) { } assert_se(wait_for_terminate_and_warn("nsseccomp", pid, true) == EXIT_SUCCESS); +#endif } static void test_protect_sysctl(void) { @@ -384,11 +387,21 @@ static void test_memory_deny_write_execute(void) { assert_se(p != MAP_FAILED); assert_se(munmap(p, page_size()) >= 0); - seccomp_memory_deny_write_execute(); + p = mmap(NULL, page_size(), PROT_WRITE|PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1,0); + assert_se(p != MAP_FAILED); + assert_se(munmap(p, page_size()) >= 0); + + assert_se(seccomp_memory_deny_write_execute() >= 0); +#if SECCOMP_MEMORY_DENY_WRITE_EXECUTE_BROKEN + p = mmap(NULL, page_size(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1,0); + assert_se(p != MAP_FAILED); + assert_se(munmap(p, page_size()) >= 0); +#else p = mmap(NULL, page_size(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1,0); assert_se(p == MAP_FAILED); assert_se(errno == EPERM); +#endif p = mmap(NULL, page_size(), PROT_WRITE|PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1,0); assert_se(p != MAP_FAILED); |