diff options
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/automount.c | 5 | ||||
-rw-r--r-- | src/core/busname.c | 2 | ||||
-rw-r--r-- | src/core/dbus-execute.c | 9 | ||||
-rw-r--r-- | src/core/dbus-unit.c | 2 | ||||
-rw-r--r-- | src/core/dbus.c | 4 | ||||
-rw-r--r-- | src/core/dynamic-user.c | 2 | ||||
-rw-r--r-- | src/core/execute.c | 408 | ||||
-rw-r--r-- | src/core/execute.h | 2 | ||||
-rw-r--r-- | src/core/load-fragment-gperf.gperf.m4 | 2 | ||||
-rw-r--r-- | src/core/main.c | 37 | ||||
-rw-r--r-- | src/core/manager.c | 165 | ||||
-rw-r--r-- | src/core/manager.h | 17 | ||||
-rw-r--r-- | src/core/mount.c | 48 | ||||
-rw-r--r-- | src/core/namespace.c | 586 | ||||
-rw-r--r-- | src/core/namespace.h | 3 | ||||
-rw-r--r-- | src/core/service.c | 15 | ||||
-rw-r--r-- | src/core/socket.c | 11 | ||||
-rw-r--r-- | src/core/swap.c | 2 | ||||
-rw-r--r-- | src/core/system.conf | 1 | ||||
-rw-r--r-- | src/core/umount.c | 2 | ||||
-rw-r--r-- | src/core/unit.c | 11 |
21 files changed, 1002 insertions, 332 deletions
diff --git a/src/core/automount.c b/src/core/automount.c index 00295cf769..bdc0e06965 100644 --- a/src/core/automount.c +++ b/src/core/automount.c @@ -271,6 +271,11 @@ static int automount_coldplug(Unit *u) { return r; (void) sd_event_source_set_description(a->pipe_event_source, "automount-io"); + if (a->deserialized_state == AUTOMOUNT_RUNNING) { + r = automount_start_expire(a); + if (r < 0) + log_unit_warning_errno(UNIT(a), r, "Failed to start expiration timer, ignoring: %m"); + } } automount_set_state(a, a->deserialized_state); diff --git a/src/core/busname.c b/src/core/busname.c index 7952cd31aa..a69e3831f6 100644 --- a/src/core/busname.c +++ b/src/core/busname.c @@ -868,7 +868,7 @@ static void busname_sigchld_event(Unit *u, pid_t pid, int code, int status) { n->control_pid = 0; - if (is_clean_exit(code, status, NULL)) + if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL)) f = BUSNAME_SUCCESS; else if (code == CLD_EXITED) f = BUSNAME_FAILURE_EXIT_CODE; diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 7e33a2d201..eec4500c8c 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -707,6 +707,8 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_flags), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ProtectKernelTunables", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_tunables), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ProtectHome", "s", bus_property_get_protect_home, offsetof(ExecContext, protect_home), SD_BUS_VTABLE_PROPERTY_CONST), @@ -1072,7 +1074,8 @@ int bus_exec_context_set_transient_property( "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute", - "RestrictRealtime", "DynamicUser", "RemoveIPC")) { + "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables", + "ProtectControlGroups")) { int b; r = sd_bus_message_read(message, "b", &b); @@ -1106,6 +1109,10 @@ int bus_exec_context_set_transient_property( c->dynamic_user = b; else if (streq(name, "RemoveIPC")) c->remove_ipc = b; + else if (streq(name, "ProtectKernelTunables")) + c->protect_kernel_tunables = b; + else if (streq(name, "ProtectControlGroups")) + c->protect_control_groups = b; unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, yes_no(b)); } diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index 1b86bdde43..5020dfba4b 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -1167,7 +1167,7 @@ void bus_unit_send_removed_signal(Unit *u) { int r; assert(u); - if (!u->sent_dbus_new_signal) + if (!u->sent_dbus_new_signal || u->in_dbus_queue) bus_unit_send_change_signal(u); if (!u->id) diff --git a/src/core/dbus.c b/src/core/dbus.c index 1e41a42aa6..070974fe66 100644 --- a/src/core/dbus.c +++ b/src/core/dbus.c @@ -964,10 +964,6 @@ static int bus_init_private(Manager *m) { if (m->private_listen_fd >= 0) return 0; - /* We don't need the private socket if we have kdbus */ - if (m->kdbus_fd >= 0) - return 0; - if (MANAGER_IS_SYSTEM(m)) { /* We want the private bus only when running as init */ diff --git a/src/core/dynamic-user.c b/src/core/dynamic-user.c index 310aaa94e1..1043da3eb7 100644 --- a/src/core/dynamic-user.c +++ b/src/core/dynamic-user.c @@ -233,7 +233,7 @@ static int pick_uid(const char *name, uid_t *ret_uid) { if (st.st_nlink > 0) break; - /* Oh, bummer, we got got the lock, but the file was unlinked between the time we opened it and + /* Oh, bummer, we got the lock, but the file was unlinked between the time we opened it and * got the lock. Close it, and try again. */ lock_fd = safe_close(lock_fd); } diff --git a/src/core/execute.c b/src/core/execute.c index 2026137721..d5c4e60796 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -781,9 +781,10 @@ static int enforce_groups(const ExecContext *context, const char *username, gid_ k++; } - if (setgroups(k, gids) < 0) { + r = maybe_setgroups(k, gids); + if (r < 0) { free(gids); - return -errno; + return r; } free(gids); @@ -837,14 +838,19 @@ static int null_conv( return PAM_CONV_ERR; } +#endif + static int setup_pam( const char *name, const char *user, uid_t uid, + gid_t gid, const char *tty, char ***env, int fds[], unsigned n_fds) { +#ifdef HAVE_PAM + static const struct pam_conv conv = { .conv = null_conv, .appdata_ptr = NULL @@ -944,8 +950,14 @@ static int setup_pam( * and this will make PR_SET_PDEATHSIG work in most cases. * If this fails, ignore the error - but expect sd-pam threads * to fail to exit normally */ + + r = maybe_setgroups(0, NULL); + if (r < 0) + log_warning_errno(r, "Failed to setgroups() in sd-pam: %m"); + if (setresgid(gid, gid, gid) < 0) + log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m"); if (setresuid(uid, uid, uid) < 0) - log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m"); + log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m"); (void) ignore_signals(SIGPIPE, -1); @@ -1038,8 +1050,10 @@ fail: closelog(); return r; -} +#else + return 0; #endif +} static void rename_process_from_path(const char *path) { char process_name[11]; @@ -1273,6 +1287,10 @@ static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) if (!seccomp) return -ENOMEM; + r = seccomp_add_secondary_archs(seccomp); + if (r < 0) + goto finish; + r = seccomp_rule_add( seccomp, SCMP_ACT_ERRNO(EPERM), @@ -1322,6 +1340,10 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) { if (!seccomp) return -ENOMEM; + r = seccomp_add_secondary_archs(seccomp); + if (r < 0) + goto finish; + /* Determine the highest policy constant we want to allow */ for (i = 0; i < ELEMENTSOF(permitted_policies); i++) if (permitted_policies[i] > max_policy) @@ -1375,12 +1397,121 @@ finish: return r; } +static int apply_protect_sysctl(Unit *u, const ExecContext *c) { + scmp_filter_ctx *seccomp; + int r; + + assert(c); + + /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but + * let's protect even those systems where this is left on in the kernel. */ + + if (skip_seccomp_unavailable(u, "ProtectKernelTunables=")) + return 0; + + seccomp = seccomp_init(SCMP_ACT_ALLOW); + if (!seccomp) + return -ENOMEM; + + r = seccomp_add_secondary_archs(seccomp); + if (r < 0) + goto finish; + + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(_sysctl), + 0); + if (r < 0) + goto finish; + + r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + if (r < 0) + goto finish; + + r = seccomp_load(seccomp); + +finish: + seccomp_release(seccomp); + return r; +} + +static int apply_private_devices(Unit *u, const ExecContext *c) { + const SystemCallFilterSet *set; + scmp_filter_ctx *seccomp; + const char *sys; + bool syscalls_found = false; + int r; + + assert(c); + + /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */ + + if (skip_seccomp_unavailable(u, "PrivateDevices=")) + return 0; + + seccomp = seccomp_init(SCMP_ACT_ALLOW); + if (!seccomp) + return -ENOMEM; + + r = seccomp_add_secondary_archs(seccomp); + if (r < 0) + goto finish; + + for (set = syscall_filter_sets; set->set_name; set++) + if (streq(set->set_name, "@raw-io")) { + syscalls_found = true; + break; + } + + /* We should never fail here */ + if (!syscalls_found) { + r = -EOPNOTSUPP; + goto finish; + } + + NULSTR_FOREACH(sys, set->value) { + int id; + bool add = true; + +#ifndef __NR_s390_pci_mmio_read + if (streq(sys, "s390_pci_mmio_read")) + add = false; +#endif +#ifndef __NR_s390_pci_mmio_write + if (streq(sys, "s390_pci_mmio_write")) + add = false; +#endif + + if (!add) + continue; + + id = seccomp_syscall_resolve_name(sys); + + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + id, 0); + if (r < 0) + goto finish; + } + + r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + if (r < 0) + goto finish; + + r = seccomp_load(seccomp); + +finish: + seccomp_release(seccomp); + return r; +} + #endif static void do_idle_pipe_dance(int idle_pipe[4]) { assert(idle_pipe); - idle_pipe[1] = safe_close(idle_pipe[1]); idle_pipe[2] = safe_close(idle_pipe[2]); @@ -1581,7 +1712,9 @@ static bool exec_needs_mount_namespace( if (context->private_devices || context->protect_system != PROTECT_SYSTEM_NO || - context->protect_home != PROTECT_HOME_NO) + context->protect_home != PROTECT_HOME_NO || + context->protect_kernel_tunables || + context->protect_control_groups) return true; return false; @@ -1740,6 +1873,111 @@ static int setup_private_users(uid_t uid, gid_t gid) { return 0; } +static int setup_runtime_directory( + const ExecContext *context, + const ExecParameters *params, + uid_t uid, + gid_t gid) { + + char **rt; + int r; + + assert(context); + assert(params); + + STRV_FOREACH(rt, context->runtime_directory) { + _cleanup_free_ char *p; + + p = strjoin(params->runtime_prefix, "/", *rt, NULL); + if (!p) + return -ENOMEM; + + r = mkdir_p_label(p, context->runtime_directory_mode); + if (r < 0) + return r; + + r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid); + if (r < 0) + return r; + } + + return 0; +} + +static int setup_smack( + const ExecContext *context, + const ExecCommand *command) { + +#ifdef HAVE_SMACK + int r; + + assert(context); + assert(command); + + if (!mac_smack_use()) + return 0; + + if (context->smack_process_label) { + r = mac_smack_apply_pid(0, context->smack_process_label); + if (r < 0) + return r; + } +#ifdef SMACK_DEFAULT_PROCESS_LABEL + else { + _cleanup_free_ char *exec_label = NULL; + + r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label); + if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP) + return r; + + r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL); + if (r < 0) + return r; + } +#endif +#endif + + return 0; +} + +static int compile_read_write_paths( + const ExecContext *context, + const ExecParameters *params, + char ***ret) { + + _cleanup_strv_free_ char **l = NULL; + char **rt; + + /* Compile the list of writable paths. This is the combination of the explicitly configured paths, plus all + * runtime directories. */ + + if (strv_isempty(context->read_write_paths) && + strv_isempty(context->runtime_directory)) { + *ret = NULL; /* NOP if neither is set */ + return 0; + } + + l = strv_copy(context->read_write_paths); + if (!l) + return -ENOMEM; + + STRV_FOREACH(rt, context->runtime_directory) { + char *s; + + s = strjoin(params->runtime_prefix, "/", *rt, NULL); + if (!s) + return -ENOMEM; + + if (strv_consume(&l, s) < 0) + return -ENOMEM; + } + + *ret = l; + l = NULL; + + return 0; +} + static void append_socket_pair(int *array, unsigned *n, int pair[2]) { assert(array); assert(n); @@ -1796,6 +2034,37 @@ static int close_remaining_fds( return close_all_fds(dont_close, n_dont_close); } +static bool context_has_address_families(const ExecContext *c) { + assert(c); + + return c->address_families_whitelist || + !set_isempty(c->address_families); +} + +static bool context_has_syscall_filters(const ExecContext *c) { + assert(c); + + return c->syscall_whitelist || + !set_isempty(c->syscall_filter) || + !set_isempty(c->syscall_archs); +} + +static bool context_has_no_new_privileges(const ExecContext *c) { + assert(c); + + if (c->no_new_privileges) + return true; + + if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */ + return false; + + return context_has_address_families(c) || /* we need NNP if we have any form of seccomp and are unprivileged */ + c->memory_deny_write_execute || + c->restrict_realtime || + c->protect_kernel_tunables || + context_has_syscall_filters(c); +} + static int send_user_lookup( Unit *unit, int user_lookup_fd, @@ -1940,22 +2209,14 @@ static int exec_child( } else { if (context->user) { username = context->user; - r = get_user_creds(&username, &uid, &gid, &home, &shell); + r = get_user_creds_clean(&username, &uid, &gid, &home, &shell); if (r < 0) { *exit_status = EXIT_USER; return r; } - /* Don't set $HOME or $SHELL if they are are not particularly enlightening anyway. */ - if (isempty(home) || path_equal(home, "/")) - home = NULL; - - if (isempty(shell) || PATH_IN_SET(shell, - "/bin/nologin", - "/sbin/nologin", - "/usr/bin/nologin", - "/usr/sbin/nologin")) - shell = NULL; + /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway + * (i.e. are "/" or "/bin/nologin"). */ } if (context->group) { @@ -2108,28 +2369,10 @@ static int exec_child( } if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) { - char **rt; - - STRV_FOREACH(rt, context->runtime_directory) { - _cleanup_free_ char *p; - - p = strjoin(params->runtime_prefix, "/", *rt, NULL); - if (!p) { - *exit_status = EXIT_RUNTIME_DIRECTORY; - return -ENOMEM; - } - - r = mkdir_p_label(p, context->runtime_directory_mode); - if (r < 0) { - *exit_status = EXIT_RUNTIME_DIRECTORY; - return r; - } - - r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid); - if (r < 0) { - *exit_status = EXIT_RUNTIME_DIRECTORY; - return r; - } + r = setup_runtime_directory(context, params, uid, gid); + if (r < 0) { + *exit_status = EXIT_RUNTIME_DIRECTORY; + return r; } } @@ -2168,49 +2411,22 @@ static int exec_child( } accum_env = strv_env_clean(accum_env); - umask(context->umask); + (void) umask(context->umask); if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) { - r = enforce_groups(context, username, gid); + r = setup_smack(context, command); if (r < 0) { - *exit_status = EXIT_GROUP; + *exit_status = EXIT_SMACK_PROCESS_LABEL; return r; } -#ifdef HAVE_SMACK - if (context->smack_process_label) { - r = mac_smack_apply_pid(0, context->smack_process_label); - if (r < 0) { - *exit_status = EXIT_SMACK_PROCESS_LABEL; - return r; - } - } -#ifdef SMACK_DEFAULT_PROCESS_LABEL - else { - _cleanup_free_ char *exec_label = NULL; - - r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label); - if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP) { - *exit_status = EXIT_SMACK_PROCESS_LABEL; - return r; - } - r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL); - if (r < 0) { - *exit_status = EXIT_SMACK_PROCESS_LABEL; - return r; - } - } -#endif -#endif -#ifdef HAVE_PAM if (context->pam_name && username) { - r = setup_pam(context->pam_name, username, uid, context->tty_path, &accum_env, fds, n_fds); + r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds); if (r < 0) { *exit_status = EXIT_PAM; return r; } } -#endif } if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) { @@ -2222,8 +2438,8 @@ static int exec_child( } needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime); - if (needs_mount_namespace) { + _cleanup_free_ char **rw = NULL; char *tmp = NULL, *var = NULL; /* The runtime struct only contains the parent @@ -2239,14 +2455,22 @@ static int exec_child( var = strjoina(runtime->var_tmp_dir, "/tmp"); } + r = compile_read_write_paths(context, params, &rw); + if (r < 0) { + *exit_status = EXIT_NAMESPACE; + return r; + } + r = setup_namespace( (params->flags & EXEC_APPLY_CHROOT) ? context->root_directory : NULL, - context->read_write_paths, + rw, context->read_only_paths, context->inaccessible_paths, tmp, var, context->private_devices, + context->protect_kernel_tunables, + context->protect_control_groups, context->protect_home, context->protect_system, context->mount_flags); @@ -2264,6 +2488,14 @@ static int exec_child( } } + if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) { + r = enforce_groups(context, username, gid); + if (r < 0) { + *exit_status = EXIT_GROUP; + return r; + } + } + if (context->working_directory_home) wd = home; else if (context->working_directory) @@ -2335,11 +2567,6 @@ static int exec_child( if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) { - bool use_address_families = context->address_families_whitelist || - !set_isempty(context->address_families); - bool use_syscall_filter = context->syscall_whitelist || - !set_isempty(context->syscall_filter) || - !set_isempty(context->syscall_archs); int secure_bits = context->secure_bits; for (i = 0; i < _RLIMIT_MAX; i++) { @@ -2416,15 +2643,14 @@ static int exec_child( return -errno; } - if (context->no_new_privileges || - (!have_effective_cap(CAP_SYS_ADMIN) && (use_address_families || context->memory_deny_write_execute || context->restrict_realtime || use_syscall_filter))) + if (context_has_no_new_privileges(context)) if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) { *exit_status = EXIT_NO_NEW_PRIVILEGES; return -errno; } #ifdef HAVE_SECCOMP - if (use_address_families) { + if (context_has_address_families(context)) { r = apply_address_families(unit, context); if (r < 0) { *exit_status = EXIT_ADDRESS_FAMILIES; @@ -2448,7 +2674,23 @@ static int exec_child( } } - if (use_syscall_filter) { + if (context->protect_kernel_tunables) { + r = apply_protect_sysctl(unit, context); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + return r; + } + } + + if (context->private_devices) { + r = apply_private_devices(unit, context); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + return r; + } + } + + if (context_has_syscall_filters(context)) { r = apply_seccomp(unit, context); if (r < 0) { *exit_status = EXIT_SECCOMP; @@ -2880,6 +3122,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { "%sNonBlocking: %s\n" "%sPrivateTmp: %s\n" "%sPrivateDevices: %s\n" + "%sProtectKernelTunables: %s\n" + "%sProtectControlGroups: %s\n" "%sPrivateNetwork: %s\n" "%sPrivateUsers: %s\n" "%sProtectHome: %s\n" @@ -2893,6 +3137,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { prefix, yes_no(c->non_blocking), prefix, yes_no(c->private_tmp), prefix, yes_no(c->private_devices), + prefix, yes_no(c->protect_kernel_tunables), + prefix, yes_no(c->protect_control_groups), prefix, yes_no(c->private_network), prefix, yes_no(c->private_users), prefix, protect_home_to_string(c->protect_home), diff --git a/src/core/execute.h b/src/core/execute.h index 6082c42aba..449180c903 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -174,6 +174,8 @@ struct ExecContext { bool private_users; ProtectSystem protect_system; ProtectHome protect_home; + bool protect_kernel_tunables; + bool protect_control_groups; bool no_new_privileges; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 2e6c965aec..c49c1d6732 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -89,6 +89,8 @@ $1.ReadOnlyPaths, config_parse_namespace_path_strv, 0, $1.InaccessiblePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths) $1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp) $1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices) +$1.ProtectKernelTunables, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_tunables) +$1.ProtectControlGroups, config_parse_bool, 0, offsetof($1, exec_context.protect_control_groups) $1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network) $1.PrivateUsers, config_parse_bool, 0, offsetof($1, exec_context.private_users) $1.ProtectSystem, config_parse_protect_system, 0, offsetof($1, exec_context) diff --git a/src/core/main.c b/src/core/main.c index 7d8322ebd8..4b82a57b3c 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -131,6 +131,7 @@ static bool arg_default_memory_accounting = false; static bool arg_default_tasks_accounting = true; static uint64_t arg_default_tasks_max = UINT64_MAX; static sd_id128_t arg_machine_id = {}; +static CADBurstAction arg_cad_burst_action = CAD_BURST_ACTION_REBOOT; noreturn static void freeze_or_reboot(void) { @@ -202,7 +203,7 @@ noreturn static void crash(int sig) { pid, sigchld_code_to_string(status.si_code), status.si_status, strna(status.si_code == CLD_EXITED - ? exit_status_to_string(status.si_status, EXIT_STATUS_FULL) + ? exit_status_to_string(status.si_status, EXIT_STATUS_MINIMAL) : signal_to_string(status.si_status))); else log_emergency("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid); @@ -648,6 +649,8 @@ static int config_parse_join_controllers(const char *unit, return 0; } +static DEFINE_CONFIG_PARSE_ENUM(config_parse_cad_burst_action, cad_burst_action, CADBurstAction, "Failed to parse service restart specifier"); + static int parse_config_file(void) { const ConfigTableItem items[] = { @@ -702,6 +705,7 @@ static int parse_config_file(void) { { "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting }, { "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting }, { "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max }, + { "Manager", "CtrlAltDelBurstAction", config_parse_cad_burst_action, 0, &arg_cad_burst_action}, {} }; @@ -715,7 +719,7 @@ static int parse_config_file(void) { CONF_PATHS_NULSTR("systemd/system.conf.d") : CONF_PATHS_NULSTR("systemd/user.conf.d"); - config_parse_many(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, false, NULL); + config_parse_many_nulstr(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, false, NULL); /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we used USEC_INFINITY * like everywhere else. */ @@ -996,10 +1000,8 @@ static int parse_argv(int argc, char *argv[]) { case ARG_MACHINE_ID: r = set_machine_id(optarg); - if (r < 0) { - log_error("MachineID '%s' is not valid.", optarg); - return r; - } + if (r < 0) + return log_error_errno(r, "MachineID '%s' is not valid.", optarg); break; case 'h': @@ -1530,15 +1532,9 @@ int main(int argc, char *argv[]) { * need to do that for user instances since they never log * into the console. */ log_show_color(colors_enabled()); - make_null_stdio(); - } - - /* Initialize default unit */ - r = free_and_strdup(&arg_default_unit, SPECIAL_DEFAULT_TARGET); - if (r < 0) { - log_emergency_errno(r, "Failed to set default unit %s: %m", SPECIAL_DEFAULT_TARGET); - error_message = "Failed to set default unit"; - goto finish; + r = make_null_stdio(); + if (r < 0) + log_warning_errno(r, "Failed to redirect standard streams to /dev/null: %m"); } r = initialize_join_controllers(); @@ -1588,6 +1584,16 @@ int main(int argc, char *argv[]) { goto finish; } + /* Initialize default unit */ + if (!arg_default_unit) { + arg_default_unit = strdup(SPECIAL_DEFAULT_TARGET); + if (!arg_default_unit) { + r = log_oom(); + error_message = "Failed to set default unit"; + goto finish; + } + } + if (arg_action == ACTION_TEST && geteuid() == 0) { log_error("Don't run test mode as root."); @@ -1796,6 +1802,7 @@ int main(int argc, char *argv[]) { m->initrd_timestamp = initrd_timestamp; m->security_start_timestamp = security_start_timestamp; m->security_finish_timestamp = security_finish_timestamp; + m->cad_burst_action = arg_cad_burst_action; manager_set_defaults(m); manager_set_show_status(m, arg_show_status); diff --git a/src/core/manager.c b/src/core/manager.c index b58f68fa7a..c1dce62a18 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -590,7 +590,7 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) { m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1; m->pin_cgroupfs_fd = m->notify_fd = m->cgroups_agent_fd = m->signal_fd = m->time_change_fd = - m->dev_autofs_fd = m->private_listen_fd = m->kdbus_fd = m->cgroup_inotify_fd = + m->dev_autofs_fd = m->private_listen_fd = m->cgroup_inotify_fd = m->ask_password_inotify_fd = -1; m->user_lookup_fds[0] = m->user_lookup_fds[1] = -1; @@ -661,9 +661,8 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) { goto fail; } - /* Note that we set up neither kdbus, nor the notify fd - * here. We do that after deserialization, since they might - * have gotten serialized across the reexec. */ + /* Note that we do not set up the notify fd here. We do that after deserialization, + * since they might have gotten serialized across the reexec. */ m->taint_usr = dir_is_empty("/usr") > 0; @@ -879,7 +878,6 @@ static int manager_connect_bus(Manager *m, bool reexecuting) { return 0; try_bus_connect = - m->kdbus_fd >= 0 || reexecuting || (MANAGER_IS_USER(m) && getenv("DBUS_SESSION_BUS_ADDRESS")); @@ -1084,7 +1082,6 @@ Manager* manager_free(Manager *m) { safe_close(m->notify_fd); safe_close(m->cgroups_agent_fd); safe_close(m->time_change_fd); - safe_close(m->kdbus_fd); safe_close_pair(m->user_lookup_fds); manager_close_ask_password(m); @@ -1239,9 +1236,11 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) { return r; /* Make sure the transient directory always exists, so that it remains in the search path */ - r = mkdir_p_label(m->lookup_paths.transient, 0755); - if (r < 0) - return r; + if (!m->test_run) { + r = mkdir_p_label(m->lookup_paths.transient, 0755); + if (r < 0) + return r; + } dual_timestamp_get(&m->generators_start_timestamp); r = manager_run_generators(m); @@ -1287,7 +1286,7 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) { if (q < 0 && r == 0) r = q; - /* We might have deserialized the kdbus control fd, but if we didn't, then let's create the bus now. */ + /* Let's connect to the bus now. */ (void) manager_connect_bus(m, !!serialization); (void) bus_track_coldplug(m, &m->subscribed, false, m->deserialized_subscribed); @@ -1660,13 +1659,12 @@ static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, ui return 0; } -static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const char *buf, size_t n, FDSet *fds) { +static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const char *buf, FDSet *fds) { _cleanup_strv_free_ char **tags = NULL; assert(m); assert(u); assert(buf); - assert(n > 0); tags = strv_split(buf, "\n\r"); if (!tags) { @@ -1676,8 +1674,14 @@ static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const if (UNIT_VTABLE(u)->notify_message) UNIT_VTABLE(u)->notify_message(u, pid, tags, fds); - else - log_unit_debug(u, "Got notification message for unit. Ignoring."); + else if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) { + _cleanup_free_ char *x = NULL, *y = NULL; + + x = cescape(buf); + if (x) + y = ellipsize(x, 20, 90); + log_unit_debug(u, "Got notification message \"%s\", ignoring.", strnull(y)); + } } static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) { @@ -1703,7 +1707,6 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t struct cmsghdr *cmsg; struct ucred *ucred = NULL; - bool found = false; Unit *u1, *u2, *u3; int r, *fd_array = NULL; unsigned n_fds = 0; @@ -1717,12 +1720,15 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t return 0; } - n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC); + n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC|MSG_TRUNC); if (n < 0) { - if (errno == EAGAIN || errno == EINTR) - return 0; + if (IN_SET(errno, EAGAIN, EINTR)) + return 0; /* Spurious wakeup, try again */ - return -errno; + /* If this is any other, real error, then let's stop processing this socket. This of course means we + * won't take notification messages anymore, but that's still better than busy looping around this: + * being woken up over and over again but being unable to actually read the message off the socket. */ + return log_error_errno(errno, "Failed to receive notification message: %m"); } CMSG_FOREACH(cmsg, &msghdr) { @@ -1745,7 +1751,8 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t r = fdset_new_array(&fds, fd_array, n_fds); if (r < 0) { close_many(fd_array, n_fds); - return log_oom(); + log_oom(); + return 0; } } @@ -1754,38 +1761,40 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t return 0; } - if ((size_t) n >= sizeof(buf)) { + if ((size_t) n >= sizeof(buf) || (msghdr.msg_flags & MSG_TRUNC)) { log_warning("Received notify message exceeded maximum size. Ignoring."); return 0; } + /* As extra safety check, let's make sure the string we get doesn't contain embedded NUL bytes. We permit one + * trailing NUL byte in the message, but don't expect it. */ + if (n > 1 && memchr(buf, 0, n-1)) { + log_warning("Received notify message with embedded NUL bytes. Ignoring."); + return 0; + } + + /* Make sure it's NUL-terminated. */ buf[n] = 0; /* Notify every unit that might be interested, but try * to avoid notifying the same one multiple times. */ u1 = manager_get_unit_by_pid_cgroup(m, ucred->pid); - if (u1) { - manager_invoke_notify_message(m, u1, ucred->pid, buf, n, fds); - found = true; - } + if (u1) + manager_invoke_notify_message(m, u1, ucred->pid, buf, fds); u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(ucred->pid)); - if (u2 && u2 != u1) { - manager_invoke_notify_message(m, u2, ucred->pid, buf, n, fds); - found = true; - } + if (u2 && u2 != u1) + manager_invoke_notify_message(m, u2, ucred->pid, buf, fds); u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(ucred->pid)); - if (u3 && u3 != u2 && u3 != u1) { - manager_invoke_notify_message(m, u3, ucred->pid, buf, n, fds); - found = true; - } + if (u3 && u3 != u2 && u3 != u1) + manager_invoke_notify_message(m, u3, ucred->pid, buf, fds); - if (!found) + if (!u1 && !u2 && !u3) log_warning("Cannot find unit for notify message of PID "PID_FMT".", ucred->pid); if (fdset_size(fds) > 0) - log_warning("Got auxiliary fds with notification message, closing all."); + log_warning("Got extra auxiliary fds with notification message, closing them."); return 0; } @@ -1890,6 +1899,35 @@ static int manager_start_target(Manager *m, const char *name, JobMode mode) { return r; } +static void manager_handle_ctrl_alt_del(Manager *m) { + /* If the user presses C-A-D more than + * 7 times within 2s, we reboot/shutdown immediately, + * unless it was disabled in system.conf */ + + if (ratelimit_test(&m->ctrl_alt_del_ratelimit) || m->cad_burst_action == CAD_BURST_ACTION_IGNORE) + manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY); + else { + switch (m->cad_burst_action) { + + case CAD_BURST_ACTION_REBOOT: + m->exit_code = MANAGER_REBOOT; + break; + + case CAD_BURST_ACTION_POWEROFF: + m->exit_code = MANAGER_POWEROFF; + break; + + default: + assert_not_reached("Unknown action."); + } + + log_notice("Ctrl-Alt-Del was pressed more than 7 times within 2s, performing immediate %s.", + cad_burst_action_to_string(m->cad_burst_action)); + status_printf(NULL, true, false, "Ctrl-Alt-Del was pressed more than 7 times within 2s, performing immediate %s.", + cad_burst_action_to_string(m->cad_burst_action)); + } +} + static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) { Manager *m = userdata; ssize_t n; @@ -1908,14 +1946,17 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t for (;;) { n = read(m->signal_fd, &sfsi, sizeof(sfsi)); if (n != sizeof(sfsi)) { + if (n >= 0) { + log_warning("Truncated read from signal fd (%zu bytes)!", n); + return 0; + } - if (n >= 0) - return -EIO; - - if (errno == EINTR || errno == EAGAIN) + if (IN_SET(errno, EINTR, EAGAIN)) break; - return -errno; + /* We return an error here, which will kill this handler, + * to avoid a busy loop on read error. */ + return log_error_errno(errno, "Reading from signal fd failed: %m"); } log_received_signal(sfsi.ssi_signo == SIGCHLD || @@ -1941,19 +1982,7 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t case SIGINT: if (MANAGER_IS_SYSTEM(m)) { - - /* If the user presses C-A-D more than - * 7 times within 2s, we reboot - * immediately. */ - - if (ratelimit_test(&m->ctrl_alt_del_ratelimit)) - manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY); - else { - log_notice("Ctrl-Alt-Del was pressed more than 7 times within 2s, rebooting immediately."); - status_printf(NULL, true, false, "Ctrl-Alt-Del was pressed more than 7 times within 2s, rebooting immediately."); - m->exit_code = MANAGER_REBOOT; - } - + manager_handle_ctrl_alt_del(m); break; } @@ -2481,16 +2510,6 @@ int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) { fprintf(f, "user-lookup=%i %i\n", copy0, copy1); } - if (m->kdbus_fd >= 0) { - int copy; - - copy = fdset_put_dup(fds, m->kdbus_fd); - if (copy < 0) - return copy; - - fprintf(f, "kdbus-fd=%i\n", copy); - } - bus_track_serialize(m->subscribed, f, "subscribed"); r = dynamic_user_serialize(m, f, fds); @@ -2678,16 +2697,6 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { m->user_lookup_fds[1] = fdset_remove(fds, fd1); } - } else if (startswith(l, "kdbus-fd=")) { - int fd; - - if (safe_atoi(l + 9, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd)) - log_debug("Failed to parse kdbus fd: %s", l + 9); - else { - safe_close(m->kdbus_fd); - m->kdbus_fd = fdset_remove(fds, fd); - } - } else if (startswith(l, "dynamic-user=")) dynamic_user_deserialize_one(m, l + 13, fds); else if (startswith(l, "destroy-ipc-uid=")) @@ -2699,7 +2708,7 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { if (strv_extend(&m->deserialized_subscribed, l+11) < 0) log_oom(); - } else + } else if (!startswith(l, "kdbus-fd=")) /* ignore this one */ log_debug("Unknown serialization item '%s'", l); } @@ -3560,3 +3569,11 @@ static const char *const manager_state_table[_MANAGER_STATE_MAX] = { }; DEFINE_STRING_TABLE_LOOKUP(manager_state, ManagerState); + +static const char *const cad_burst_action_table[_CAD_BURST_ACTION_MAX] = { + [CAD_BURST_ACTION_IGNORE] = "ignore", + [CAD_BURST_ACTION_REBOOT] = "reboot-force", + [CAD_BURST_ACTION_POWEROFF] = "poweroff-force", +}; + +DEFINE_STRING_TABLE_LOOKUP(cad_burst_action, CADBurstAction); diff --git a/src/core/manager.h b/src/core/manager.h index b9f2e4b5a1..495440b446 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -62,6 +62,14 @@ typedef enum ManagerExitCode { _MANAGER_EXIT_CODE_INVALID = -1 } ManagerExitCode; +typedef enum CADBurstAction { + CAD_BURST_ACTION_IGNORE, + CAD_BURST_ACTION_REBOOT, + CAD_BURST_ACTION_POWEROFF, + _CAD_BURST_ACTION_MAX, + _CAD_BURST_ACTION_INVALID = -1 +} CADBurstAction; + typedef enum StatusType { STATUS_TYPE_EPHEMERAL, STATUS_TYPE_NORMAL, @@ -294,9 +302,6 @@ struct Manager { * value where Unit objects are contained. */ Hashmap *units_requiring_mounts_for; - /* Reference to the kdbus bus control fd */ - int kdbus_fd; - /* Used for processing polkit authorization responses */ Hashmap *polkit_registry; @@ -307,8 +312,9 @@ struct Manager { Hashmap *uid_refs; Hashmap *gid_refs; - /* When the user hits C-A-D more than 7 times per 2s, reboot immediately... */ + /* When the user hits C-A-D more than 7 times per 2s, do something immediately... */ RateLimit ctrl_alt_del_ratelimit; + CADBurstAction cad_burst_action; const char *unit_log_field; const char *unit_log_format_string; @@ -401,3 +407,6 @@ void manager_deserialize_gid_refs_one(Manager *m, const char *value); const char *manager_state_to_string(ManagerState m) _const_; ManagerState manager_state_from_string(const char *s) _pure_; + +const char *cad_burst_action_to_string(CADBurstAction a) _const_; +CADBurstAction cad_burst_action_from_string(const char *s) _pure_; diff --git a/src/core/mount.c b/src/core/mount.c index 04025b83b9..f5e67b1d78 100644 --- a/src/core/mount.c +++ b/src/core/mount.c @@ -830,7 +830,7 @@ static void mount_enter_signal(Mount *m, MountState state, MountResult f) { fail: log_unit_warning_errno(UNIT(m), r, "Failed to kill processes: %m"); - if (state == MOUNT_REMOUNTING_SIGTERM || state == MOUNT_REMOUNTING_SIGKILL) + if (IN_SET(state, MOUNT_REMOUNTING_SIGTERM, MOUNT_REMOUNTING_SIGKILL)) mount_enter_mounted(m, MOUNT_FAILURE_RESOURCES); else mount_enter_dead(m, MOUNT_FAILURE_RESOURCES); @@ -986,18 +986,19 @@ static int mount_start(Unit *u) { /* We cannot fulfill this request right now, try again later * please! */ - if (m->state == MOUNT_UNMOUNTING || - m->state == MOUNT_UNMOUNTING_SIGTERM || - m->state == MOUNT_UNMOUNTING_SIGKILL || - m->state == MOUNT_MOUNTING_SIGTERM || - m->state == MOUNT_MOUNTING_SIGKILL) + if (IN_SET(m->state, + MOUNT_UNMOUNTING, + MOUNT_UNMOUNTING_SIGTERM, + MOUNT_UNMOUNTING_SIGKILL, + MOUNT_MOUNTING_SIGTERM, + MOUNT_MOUNTING_SIGKILL)) return -EAGAIN; /* Already on it! */ if (m->state == MOUNT_MOUNTING) return 0; - assert(m->state == MOUNT_DEAD || m->state == MOUNT_FAILED); + assert(IN_SET(m->state, MOUNT_DEAD, MOUNT_FAILED)); r = unit_start_limit_test(u); if (r < 0) { @@ -1019,19 +1020,21 @@ static int mount_stop(Unit *u) { assert(m); /* Already on it */ - if (m->state == MOUNT_UNMOUNTING || - m->state == MOUNT_UNMOUNTING_SIGKILL || - m->state == MOUNT_UNMOUNTING_SIGTERM || - m->state == MOUNT_MOUNTING_SIGTERM || - m->state == MOUNT_MOUNTING_SIGKILL) + if (IN_SET(m->state, + MOUNT_UNMOUNTING, + MOUNT_UNMOUNTING_SIGKILL, + MOUNT_UNMOUNTING_SIGTERM, + MOUNT_MOUNTING_SIGTERM, + MOUNT_MOUNTING_SIGKILL)) return 0; - assert(m->state == MOUNT_MOUNTING || - m->state == MOUNT_MOUNTING_DONE || - m->state == MOUNT_MOUNTED || - m->state == MOUNT_REMOUNTING || - m->state == MOUNT_REMOUNTING_SIGTERM || - m->state == MOUNT_REMOUNTING_SIGKILL); + assert(IN_SET(m->state, + MOUNT_MOUNTING, + MOUNT_MOUNTING_DONE, + MOUNT_MOUNTED, + MOUNT_REMOUNTING, + MOUNT_REMOUNTING_SIGTERM, + MOUNT_REMOUNTING_SIGKILL)); mount_enter_unmounting(m); return 1; @@ -1159,7 +1162,7 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) { m->control_pid = 0; - if (is_clean_exit(code, status, NULL)) + if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL)) f = MOUNT_SUCCESS; else if (code == CLD_EXITED) f = MOUNT_FAILURE_EXIT_CODE; @@ -1197,9 +1200,10 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) { case MOUNT_MOUNTING_SIGKILL: case MOUNT_MOUNTING_SIGTERM: - if (f == MOUNT_SUCCESS) - mount_enter_mounted(m, f); - else if (m->from_proc_self_mountinfo) + if (f == MOUNT_SUCCESS || m->from_proc_self_mountinfo) + /* If /bin/mount returned success, or if we see the mount point in /proc/self/mountinfo we are + * happy. If we see the first condition first, we should see the the second condition + * immediately after – or /bin/mount lies to us and is broken. */ mount_enter_mounted(m, f); else mount_enter_dead(m, f); diff --git a/src/core/namespace.c b/src/core/namespace.c index 52a2505d94..43a2f4ba6e 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -29,6 +29,7 @@ #include "alloc-util.h" #include "dev-setup.h" #include "fd-util.h" +#include "fs-util.h" #include "loopback-setup.h" #include "missing.h" #include "mkdir.h" @@ -53,61 +54,230 @@ typedef enum MountMode { PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV, - READWRITE + READWRITE, } MountMode; typedef struct BindMount { - const char *path; + const char *path; /* stack memory, doesn't need to be freed explicitly */ + char *chased; /* malloc()ed memory, needs to be freed */ MountMode mode; - bool done; - bool ignore; + bool ignore; /* Ignore if path does not exist */ } BindMount; +typedef struct TargetMount { + const char *path; + MountMode mode; + bool ignore; /* Ignore if path does not exist */ +} TargetMount; + +/* + * The following Protect tables are to protect paths and mark some of them + * READONLY, in case a path is covered by an option from another table, then + * it is marked READWRITE in the current one, and the more restrictive mode is + * applied from that other table. This way all options can be combined in a + * safe and comprehensible way for users. + */ + +/* ProtectKernelTunables= option and the related filesystem APIs */ +static const TargetMount protect_kernel_tunables_table[] = { + { "/proc/sys", READONLY, false }, + { "/proc/sysrq-trigger", READONLY, true }, + { "/proc/latency_stats", READONLY, true }, + { "/proc/mtrr", READONLY, true }, + { "/proc/apm", READONLY, true }, + { "/proc/acpi", READONLY, true }, + { "/proc/timer_stats", READONLY, true }, + { "/proc/asound", READONLY, true }, + { "/proc/bus", READONLY, true }, + { "/proc/fs", READONLY, true }, + { "/proc/irq", READONLY, true }, + { "/sys", READONLY, false }, + { "/sys/kernel/debug", READONLY, true }, + { "/sys/kernel/tracing", READONLY, true }, + { "/sys/fs/cgroup", READWRITE, false }, /* READONLY is set by ProtectControlGroups= option */ +}; + +/* + * ProtectHome=read-only table, protect $HOME and $XDG_RUNTIME_DIR and rest of + * system should be protected by ProtectSystem= + */ +static const TargetMount protect_home_read_only_table[] = { + { "/home", READONLY, true }, + { "/run/user", READONLY, true }, + { "/root", READONLY, true }, +}; + +/* ProtectHome=yes table */ +static const TargetMount protect_home_yes_table[] = { + { "/home", INACCESSIBLE, true }, + { "/run/user", INACCESSIBLE, true }, + { "/root", INACCESSIBLE, true }, +}; + +/* ProtectSystem=yes table */ +static const TargetMount protect_system_yes_table[] = { + { "/usr", READONLY, false }, + { "/boot", READONLY, true }, + { "/efi", READONLY, true }, +}; + +/* ProtectSystem=full includes ProtectSystem=yes */ +static const TargetMount protect_system_full_table[] = { + { "/usr", READONLY, false }, + { "/boot", READONLY, true }, + { "/efi", READONLY, true }, + { "/etc", READONLY, false }, +}; + +/* + * ProtectSystem=strict table. In this strict mode, we mount everything + * read-only, except for /proc, /dev, /sys which are the kernel API VFS, + * which are left writable, but PrivateDevices= + ProtectKernelTunables= + * protect those, and these options should be fully orthogonal. + * (And of course /home and friends are also left writable, as ProtectHome= + * shall manage those, orthogonally). + */ +static const TargetMount protect_system_strict_table[] = { + { "/", READONLY, false }, + { "/proc", READWRITE, false }, /* ProtectKernelTunables= */ + { "/sys", READWRITE, false }, /* ProtectKernelTunables= */ + { "/dev", READWRITE, false }, /* PrivateDevices= */ + { "/home", READWRITE, true }, /* ProtectHome= */ + { "/run/user", READWRITE, true }, /* ProtectHome= */ + { "/root", READWRITE, true }, /* ProtectHome= */ +}; + +static void set_bind_mount(BindMount **p, const char *path, MountMode mode, bool ignore) { + (*p)->path = path; + (*p)->mode = mode; + (*p)->ignore = ignore; +} + static int append_mounts(BindMount **p, char **strv, MountMode mode) { char **i; assert(p); STRV_FOREACH(i, strv) { + bool ignore = false; - (*p)->ignore = false; - (*p)->done = false; - - if ((mode == INACCESSIBLE || mode == READONLY || mode == READWRITE) && (*i)[0] == '-') { - (*p)->ignore = true; + if (IN_SET(mode, INACCESSIBLE, READONLY, READWRITE) && startswith(*i, "-")) { (*i)++; + ignore = true; } if (!path_is_absolute(*i)) return -EINVAL; - (*p)->path = *i; - (*p)->mode = mode; + set_bind_mount(p, *i, mode, ignore); (*p)++; } return 0; } -static int mount_path_compare(const void *a, const void *b) { - const BindMount *p = a, *q = b; - int d; +static int append_target_mounts(BindMount **p, const char *root_directory, const TargetMount *mounts, const size_t size) { + unsigned i; - d = path_compare(p->path, q->path); + assert(p); + assert(mounts); + + for (i = 0; i < size; i++) { + /* + * Here we assume that the ignore field is set during + * declaration we do not support "-" at the beginning. + */ + const TargetMount *m = &mounts[i]; + const char *path = prefix_roota(root_directory, m->path); + + if (!path_is_absolute(path)) + return -EINVAL; + + set_bind_mount(p, path, m->mode, m->ignore); + (*p)++; + } + + return 0; +} + +static int append_protect_kernel_tunables(BindMount **p, const char *root_directory) { + assert(p); + + return append_target_mounts(p, root_directory, protect_kernel_tunables_table, + ELEMENTSOF(protect_kernel_tunables_table)); +} - if (d == 0) { - /* If the paths are equal, check the mode */ - if (p->mode < q->mode) - return -1; +static int append_protect_home(BindMount **p, const char *root_directory, ProtectHome protect_home) { + int r = 0; - if (p->mode > q->mode) - return 1; + assert(p); + if (protect_home == PROTECT_HOME_NO) return 0; + + switch (protect_home) { + case PROTECT_HOME_READ_ONLY: + r = append_target_mounts(p, root_directory, protect_home_read_only_table, + ELEMENTSOF(protect_home_read_only_table)); + break; + case PROTECT_HOME_YES: + r = append_target_mounts(p, root_directory, protect_home_yes_table, + ELEMENTSOF(protect_home_yes_table)); + break; + default: + r = -EINVAL; + break; } + return r; +} + +static int append_protect_system(BindMount **p, const char *root_directory, ProtectSystem protect_system) { + int r = 0; + + assert(p); + + if (protect_system == PROTECT_SYSTEM_NO) + return 0; + + switch (protect_system) { + case PROTECT_SYSTEM_STRICT: + r = append_target_mounts(p, root_directory, protect_system_strict_table, + ELEMENTSOF(protect_system_strict_table)); + break; + case PROTECT_SYSTEM_YES: + r = append_target_mounts(p, root_directory, protect_system_yes_table, + ELEMENTSOF(protect_system_yes_table)); + break; + case PROTECT_SYSTEM_FULL: + r = append_target_mounts(p, root_directory, protect_system_full_table, + ELEMENTSOF(protect_system_full_table)); + break; + default: + r = -EINVAL; + break; + } + + return r; +} + +static int mount_path_compare(const void *a, const void *b) { + const BindMount *p = a, *q = b; + int d; + /* If the paths are not equal, then order prefixes first */ - return d; + d = path_compare(p->path, q->path); + if (d != 0) + return d; + + /* If the paths are equal, check the mode */ + if (p->mode < q->mode) + return -1; + + if (p->mode > q->mode) + return 1; + + return 0; } static void drop_duplicates(BindMount *m, unsigned *n) { @@ -116,16 +286,110 @@ static void drop_duplicates(BindMount *m, unsigned *n) { assert(m); assert(n); + /* Drops duplicate entries. Expects that the array is properly ordered already. */ + for (f = m, t = m, previous = NULL; f < m+*n; f++) { - /* The first one wins */ - if (previous && path_equal(f->path, previous->path)) + /* The first one wins (which is the one with the more restrictive mode), see mount_path_compare() + * above. */ + if (previous && path_equal(f->path, previous->path)) { + log_debug("%s is duplicate.", f->path); continue; + } *t = *f; - previous = t; + t++; + } + + *n = t - m; +} + +static void drop_inaccessible(BindMount *m, unsigned *n) { + BindMount *f, *t; + const char *clear = NULL; + + assert(m); + assert(n); + + /* Drops all entries obstructed by another entry further up the tree. Expects that the array is properly + * ordered already. */ + for (f = m, t = m; f < m+*n; f++) { + + /* If we found a path set for INACCESSIBLE earlier, and this entry has it as prefix we should drop + * it, as inaccessible paths really should drop the entire subtree. */ + if (clear && path_startswith(f->path, clear)) { + log_debug("%s is masked by %s.", f->path, clear); + continue; + } + + clear = f->mode == INACCESSIBLE ? f->path : NULL; + + *t = *f; + t++; + } + + *n = t - m; +} + +static void drop_nop(BindMount *m, unsigned *n) { + BindMount *f, *t; + + assert(m); + assert(n); + + /* Drops all entries which have an immediate parent that has the same type, as they are redundant. Assumes the + * list is ordered by prefixes. */ + + for (f = m, t = m; f < m+*n; f++) { + + /* Only suppress such subtrees for READONLY and READWRITE entries */ + if (IN_SET(f->mode, READONLY, READWRITE)) { + BindMount *p; + bool found = false; + + /* Now let's find the first parent of the entry we are looking at. */ + for (p = t-1; p >= m; p--) { + if (path_startswith(f->path, p->path)) { + found = true; + break; + } + } + + /* We found it, let's see if it's the same mode, if so, we can drop this entry */ + if (found && p->mode == f->mode) { + log_debug("%s is redundant by %s", f->path, p->path); + continue; + } + } + + *t = *f; + t++; + } + + *n = t - m; +} + +static void drop_outside_root(const char *root_directory, BindMount *m, unsigned *n) { + BindMount *f, *t; + + assert(m); + assert(n); + + if (!root_directory) + return; + + /* Drops all mounts that are outside of the root directory. */ + + for (f = m, t = m; f < m+*n; f++) { + + if (!path_startswith(f->path, root_directory)) { + log_debug("%s is outside of root directory.", f->path); + continue; + } + + *t = *f; t++; } @@ -278,24 +542,23 @@ static int apply_mount( const char *what; int r; - struct stat target; assert(m); + log_debug("Applying namespace mount on %s", m->path); + switch (m->mode) { - case INACCESSIBLE: + case INACCESSIBLE: { + struct stat target; /* First, get rid of everything that is below if there * is anything... Then, overmount it with an * inaccessible path. */ - umount_recursive(m->path, 0); + (void) umount_recursive(m->path, 0); - if (lstat(m->path, &target) < 0) { - if (m->ignore && errno == ENOENT) - return 0; - return -errno; - } + if (lstat(m->path, &target) < 0) + return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", m->path); what = mode_to_inaccessible_node(target.st_mode); if (!what) { @@ -303,11 +566,20 @@ static int apply_mount( return -ELOOP; } break; + } + case READONLY: case READWRITE: - /* Nothing to mount here, we just later toggle the - * MS_RDONLY bit for the mount point */ - return 0; + + r = path_is_mount_point(m->path, 0); + if (r < 0) + return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", m->path); + if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */ + return 0; + + /* This isn't a mount point yet, let's make it one. */ + what = m->path; + break; case PRIVATE_TMP: what = tmp_dir; @@ -326,38 +598,104 @@ static int apply_mount( assert(what); - r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL); - if (r >= 0) { - log_debug("Successfully mounted %s to %s", what, m->path); - return r; - } else { - if (m->ignore && errno == ENOENT) - return 0; + if (mount(what, m->path, NULL, MS_BIND|MS_REC, NULL) < 0) return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, m->path); - } + + log_debug("Successfully mounted %s to %s", what, m->path); + return 0; } -static int make_read_only(BindMount *m) { - int r; +static int make_read_only(BindMount *m, char **blacklist) { + int r = 0; assert(m); if (IN_SET(m->mode, INACCESSIBLE, READONLY)) - r = bind_remount_recursive(m->path, true); - else if (IN_SET(m->mode, READWRITE, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV)) { - r = bind_remount_recursive(m->path, false); - if (r == 0 && m->mode == PRIVATE_DEV) /* can be readonly but the submounts can't*/ - if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0) - r = -errno; + r = bind_remount_recursive(m->path, true, blacklist); + else if (m->mode == PRIVATE_DEV) { /* Can be readonly but the submounts can't*/ + if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0) + r = -errno; } else - r = 0; - - if (m->ignore && r == -ENOENT) return 0; + /* Not that we only turn on the MS_RDONLY flag here, we never turn it off. Something that was marked read-only + * already stays this way. This improves compatibility with container managers, where we won't attempt to undo + * read-only mounts already applied. */ + return r; } +static int chase_all_symlinks(const char *root_directory, BindMount *m, unsigned *n) { + BindMount *f, *t; + int r; + + assert(m); + assert(n); + + /* Since mount() will always follow symlinks and we need to take the different root directory into account we + * chase the symlinks on our own first. This call wil do so for all entries and remove all entries where we + * can't resolve the path, and which have been marked for such removal. */ + + for (f = m, t = m; f < m+*n; f++) { + + r = chase_symlinks(f->path, root_directory, &f->chased); + if (r == -ENOENT && f->ignore) /* Doesn't exist? Then remove it! */ + continue; + if (r < 0) + return log_debug_errno(r, "Failed to chase symlinks for %s: %m", f->path); + + if (path_equal(f->path, f->chased)) + f->chased = mfree(f->chased); + else { + log_debug("Chased %s → %s", f->path, f->chased); + f->path = f->chased; + } + + *t = *f; + t++; + } + + *n = t - m; + return 0; +} + +static unsigned namespace_calculate_mounts( + char** read_write_paths, + char** read_only_paths, + char** inaccessible_paths, + const char* tmp_dir, + const char* var_tmp_dir, + bool private_dev, + bool protect_sysctl, + bool protect_cgroups, + ProtectHome protect_home, + ProtectSystem protect_system) { + + unsigned protect_home_cnt; + unsigned protect_system_cnt = + (protect_system == PROTECT_SYSTEM_STRICT ? + ELEMENTSOF(protect_system_strict_table) : + ((protect_system == PROTECT_SYSTEM_FULL) ? + ELEMENTSOF(protect_system_full_table) : + ((protect_system == PROTECT_SYSTEM_YES) ? + ELEMENTSOF(protect_system_yes_table) : 0))); + + protect_home_cnt = + (protect_home == PROTECT_HOME_YES ? + ELEMENTSOF(protect_home_yes_table) : + ((protect_home == PROTECT_HOME_READ_ONLY) ? + ELEMENTSOF(protect_home_read_only_table) : 0)); + + return !!tmp_dir + !!var_tmp_dir + + strv_length(read_write_paths) + + strv_length(read_only_paths) + + strv_length(inaccessible_paths) + + private_dev + + (protect_sysctl ? ELEMENTSOF(protect_kernel_tunables_table) : 0) + + (protect_cgroups ? 1 : 0) + + protect_home_cnt + protect_system_cnt; +} + int setup_namespace( const char* root_directory, char** read_write_paths, @@ -366,28 +704,31 @@ int setup_namespace( const char* tmp_dir, const char* var_tmp_dir, bool private_dev, + bool protect_sysctl, + bool protect_cgroups, ProtectHome protect_home, ProtectSystem protect_system, unsigned long mount_flags) { BindMount *m, *mounts = NULL; + bool make_slave = false; unsigned n; int r = 0; if (mount_flags == 0) mount_flags = MS_SHARED; - if (unshare(CLONE_NEWNS) < 0) - return -errno; + n = namespace_calculate_mounts(read_write_paths, + read_only_paths, + inaccessible_paths, + tmp_dir, var_tmp_dir, + private_dev, protect_sysctl, + protect_cgroups, protect_home, + protect_system); - n = !!tmp_dir + !!var_tmp_dir + - strv_length(read_write_paths) + - strv_length(read_only_paths) + - strv_length(inaccessible_paths) + - private_dev + - (protect_home != PROTECT_HOME_NO ? 3 : 0) + - (protect_system != PROTECT_SYSTEM_NO ? 2 : 0) + - (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0); + /* Set mount slave mode */ + if (root_directory || n > 0) + make_slave = true; if (n > 0) { m = mounts = (BindMount *) alloca0(n * sizeof(BindMount)); @@ -421,94 +762,112 @@ int setup_namespace( m++; } - if (protect_home != PROTECT_HOME_NO) { - const char *home_dir, *run_user_dir, *root_dir; + if (protect_sysctl) + append_protect_kernel_tunables(&m, root_directory); - home_dir = prefix_roota(root_directory, "/home"); - home_dir = strjoina("-", home_dir); - run_user_dir = prefix_roota(root_directory, "/run/user"); - run_user_dir = strjoina("-", run_user_dir); - root_dir = prefix_roota(root_directory, "/root"); - root_dir = strjoina("-", root_dir); - - r = append_mounts(&m, STRV_MAKE(home_dir, run_user_dir, root_dir), - protect_home == PROTECT_HOME_READ_ONLY ? READONLY : INACCESSIBLE); - if (r < 0) - return r; + if (protect_cgroups) { + m->path = prefix_roota(root_directory, "/sys/fs/cgroup"); + m->mode = READONLY; + m++; } - if (protect_system != PROTECT_SYSTEM_NO) { - const char *usr_dir, *boot_dir, *etc_dir; - - usr_dir = prefix_roota(root_directory, "/usr"); - boot_dir = prefix_roota(root_directory, "/boot"); - boot_dir = strjoina("-", boot_dir); - etc_dir = prefix_roota(root_directory, "/etc"); + r = append_protect_home(&m, root_directory, protect_home); + if (r < 0) + return r; - r = append_mounts(&m, protect_system == PROTECT_SYSTEM_FULL - ? STRV_MAKE(usr_dir, boot_dir, etc_dir) - : STRV_MAKE(usr_dir, boot_dir), READONLY); - if (r < 0) - return r; - } + r = append_protect_system(&m, root_directory, protect_system); + if (r < 0) + return r; assert(mounts + n == m); + /* Resolve symlinks manually first, as mount() will always follow them relative to the host's + * root. Moreover we want to suppress duplicates based on the resolved paths. This of course is a bit + * racy. */ + r = chase_all_symlinks(root_directory, mounts, &n); + if (r < 0) + goto finish; + qsort(mounts, n, sizeof(BindMount), mount_path_compare); + drop_duplicates(mounts, &n); + drop_outside_root(root_directory, mounts, &n); + drop_inaccessible(mounts, &n); + drop_nop(mounts, &n); + } + + if (unshare(CLONE_NEWNS) < 0) { + r = -errno; + goto finish; } - if (n > 0 || root_directory) { + if (make_slave) { /* Remount / as SLAVE so that nothing now mounted in the namespace shows up in the parent */ - if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) - return -errno; + if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) { + r = -errno; + goto finish; + } } if (root_directory) { - /* Turn directory into bind mount */ - if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0) - return -errno; + /* Turn directory into bind mount, if it isn't one yet */ + r = path_is_mount_point(root_directory, AT_SYMLINK_FOLLOW); + if (r < 0) + goto finish; + if (r == 0) { + if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0) { + r = -errno; + goto finish; + } + } } if (n > 0) { + char **blacklist; + unsigned j; + + /* First round, add in all special mounts we need */ for (m = mounts; m < mounts + n; ++m) { r = apply_mount(m, tmp_dir, var_tmp_dir); if (r < 0) - goto fail; + goto finish; } + /* Create a blacklist we can pass to bind_mount_recursive() */ + blacklist = newa(char*, n+1); + for (j = 0; j < n; j++) + blacklist[j] = (char*) mounts[j].path; + blacklist[j] = NULL; + + /* Second round, flip the ro bits if necessary. */ for (m = mounts; m < mounts + n; ++m) { - r = make_read_only(m); + r = make_read_only(m, blacklist); if (r < 0) - goto fail; + goto finish; } } if (root_directory) { /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */ r = mount_move_root(root_directory); - - /* at this point, we cannot rollback */ if (r < 0) - return r; + goto finish; } /* Remount / as the desired mode. Not that this will not * reestablish propagation from our side to the host, since * what's disconnected is disconnected. */ - if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) - /* at this point, we cannot rollback */ - return -errno; + if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) { + r = -errno; + goto finish; + } - return 0; + r = 0; -fail: - if (n > 0) { - for (m = mounts; m < mounts + n; ++m) - if (m->done) - (void) umount2(m->path, MNT_DETACH); - } +finish: + for (m = mounts; m < mounts + n; m++) + free(m->chased); return r; } @@ -658,6 +1017,7 @@ static const char *const protect_system_table[_PROTECT_SYSTEM_MAX] = { [PROTECT_SYSTEM_NO] = "no", [PROTECT_SYSTEM_YES] = "yes", [PROTECT_SYSTEM_FULL] = "full", + [PROTECT_SYSTEM_STRICT] = "strict", }; DEFINE_STRING_TABLE_LOOKUP(protect_system, ProtectSystem); diff --git a/src/core/namespace.h b/src/core/namespace.h index 1aedf5f208..6505bcc499 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -35,6 +35,7 @@ typedef enum ProtectSystem { PROTECT_SYSTEM_NO, PROTECT_SYSTEM_YES, PROTECT_SYSTEM_FULL, + PROTECT_SYSTEM_STRICT, _PROTECT_SYSTEM_MAX, _PROTECT_SYSTEM_INVALID = -1 } ProtectSystem; @@ -46,6 +47,8 @@ int setup_namespace(const char *chroot, const char *tmp_dir, const char *var_tmp_dir, bool private_dev, + bool protect_sysctl, + bool protect_cgroups, ProtectHome protect_home, ProtectSystem protect_system, unsigned long mount_flags); diff --git a/src/core/service.c b/src/core/service.c index 969c62bd83..98edc437a2 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -1256,10 +1256,16 @@ static int service_spawn( socklen_t salen = sizeof(sa); r = getpeername(s->socket_fd, &sa.sa, &salen); - if (r < 0) - return -errno; + if (r < 0) { + r = -errno; + + /* ENOTCONN is legitimate if the endpoint disappeared on shutdown. + * This connection is over, but the socket unit lives on. */ + if (r != -ENOTCONN || !IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST)) + return r; + } - if (IN_SET(sa.sa.sa_family, AF_INET, AF_INET6)) { + if (r == 0 && IN_SET(sa.sa.sa_family, AF_INET, AF_INET6)) { _cleanup_free_ char *addr = NULL; char *t; int port; @@ -2594,8 +2600,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { assert(s); assert(pid >= 0); - if (UNIT(s)->fragment_path ? is_clean_exit(code, status, &s->success_status) : - is_clean_exit_lsb(code, status, &s->success_status)) + if (is_clean_exit(code, status, s->type == SERVICE_ONESHOT ? EXIT_CLEAN_COMMAND : EXIT_CLEAN_DAEMON, &s->success_status)) f = SERVICE_SUCCESS; else if (code == CLD_EXITED) f = SERVICE_FAILURE_EXIT_CODE; diff --git a/src/core/socket.c b/src/core/socket.c index 70d55dd9ed..1b4a1b3dc3 100644 --- a/src/core/socket.c +++ b/src/core/socket.c @@ -1334,14 +1334,9 @@ static int usbffs_select_ep(const struct dirent *d) { static int usbffs_dispatch_eps(SocketPort *p) { _cleanup_free_ struct dirent **ent = NULL; - _cleanup_free_ char *path = NULL; int r, i, n, k; - path = dirname_malloc(p->path); - if (!path) - return -ENOMEM; - - r = scandir(path, &ent, usbffs_select_ep, alphasort); + r = scandir(p->path, &ent, usbffs_select_ep, alphasort); if (r < 0) return -errno; @@ -1356,7 +1351,7 @@ static int usbffs_dispatch_eps(SocketPort *p) { for (i = 0; i < n; ++i) { _cleanup_free_ char *ep = NULL; - ep = path_make_absolute(ent[i]->d_name, path); + ep = path_make_absolute(ent[i]->d_name, p->path); if (!ep) return -ENOMEM; @@ -2748,7 +2743,7 @@ static void socket_sigchld_event(Unit *u, pid_t pid, int code, int status) { s->control_pid = 0; - if (is_clean_exit(code, status, NULL)) + if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL)) f = SOCKET_SUCCESS; else if (code == CLD_EXITED) f = SOCKET_FAILURE_EXIT_CODE; diff --git a/src/core/swap.c b/src/core/swap.c index fb222b6858..fee9e7b0e6 100644 --- a/src/core/swap.c +++ b/src/core/swap.c @@ -988,7 +988,7 @@ static void swap_sigchld_event(Unit *u, pid_t pid, int code, int status) { s->control_pid = 0; - if (is_clean_exit(code, status, NULL)) + if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL)) f = SWAP_SUCCESS; else if (code == CLD_EXITED) f = SWAP_FAILURE_EXIT_CODE; diff --git a/src/core/system.conf b/src/core/system.conf index c6bb050aac..746572b7ff 100644 --- a/src/core/system.conf +++ b/src/core/system.conf @@ -21,6 +21,7 @@ #CrashChangeVT=no #CrashShell=no #CrashReboot=no +#CtrlAltDelBurstAction=reboot-force #CPUAffinity=1 2 #JoinControllers=cpu,cpuacct net_cls,net_prio #RuntimeWatchdogSec=0 diff --git a/src/core/umount.c b/src/core/umount.c index c21a2be54e..1e5459ed80 100644 --- a/src/core/umount.c +++ b/src/core/umount.c @@ -375,7 +375,7 @@ static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_e /* If we are in a container, don't attempt to read-only mount anything as that brings no real benefits, but might confuse the host, as we remount - the superblock here, not the bind mound. */ + the superblock here, not the bind mount. */ if (detect_container() <= 0) { _cleanup_free_ char *options = NULL; /* MS_REMOUNT requires that the data parameter diff --git a/src/core/unit.c b/src/core/unit.c index de22f657c6..693f75c928 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -2225,6 +2225,11 @@ int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_referen return 0; } + if (d == UNIT_BEFORE && other->type == UNIT_DEVICE) { + log_unit_warning(u, "Dependency Before=%s ignored (.device units cannot be delayed)", other->id); + return 0; + } + r = set_ensure_allocated(&u->dependencies[d], NULL); if (r < 0) return r; @@ -3377,8 +3382,14 @@ int unit_patch_contexts(Unit *u) { return -ENOMEM; } + /* If the dynamic user option is on, let's make sure that the unit can't leave its UID/GID + * around in the file system or on IPC objects. Hence enforce a strict sandbox. */ + ec->private_tmp = true; ec->remove_ipc = true; + ec->protect_system = PROTECT_SYSTEM_STRICT; + if (ec->protect_home == PROTECT_HOME_NO) + ec->protect_home = PROTECT_HOME_READ_ONLY; } } |