diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/execute.c | 70 | ||||
-rw-r--r-- | src/core/job.c | 19 | ||||
-rw-r--r-- | src/core/service.c | 61 | ||||
-rw-r--r-- | src/core/unit.c | 6 | ||||
-rw-r--r-- | src/core/unit.h | 2 | ||||
-rw-r--r-- | src/shared/seccomp-util.c | 49 | ||||
-rw-r--r-- | src/shared/seccomp-util.h | 2 | ||||
-rw-r--r-- | src/systemctl/systemctl.c | 2 | ||||
-rw-r--r-- | src/test/test-unit-file.c | 2 |
9 files changed, 141 insertions, 72 deletions
diff --git a/src/core/execute.c b/src/core/execute.c index a1bd0c1238..3f053602b5 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -2540,12 +2540,6 @@ static int exec_child( (void) umask(context->umask); if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) { - r = setup_smack(context, command); - if (r < 0) { - *exit_status = EXIT_SMACK_PROCESS_LABEL; - return r; - } - if (context->pam_name && username) { r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds); if (r < 0) { @@ -2695,6 +2689,41 @@ static int exec_child( } } + /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to + * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires + * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls + * are restricted. */ + +#ifdef HAVE_SELINUX + if (mac_selinux_use()) { + char *exec_context = mac_selinux_context_net ?: context->selinux_context; + + if (exec_context) { + r = setexeccon(exec_context); + if (r < 0) { + *exit_status = EXIT_SELINUX_CONTEXT; + return r; + } + } + } +#endif + + r = setup_smack(context, command); + if (r < 0) { + *exit_status = EXIT_SMACK_PROCESS_LABEL; + return r; + } + +#ifdef HAVE_APPARMOR + if (context->apparmor_profile && mac_apparmor_use()) { + r = aa_change_onexec(context->apparmor_profile); + if (r < 0 && !context->apparmor_profile_ignore) { + *exit_status = EXIT_APPARMOR_PROFILE; + return -errno; + } + } +#endif + /* PR_GET_SECUREBITS is not privileged, while * PR_SET_SECUREBITS is. So to suppress * potential EPERMs we'll try not to call @@ -2760,6 +2789,8 @@ static int exec_child( } } + /* This really should remain the last step before the execve(), to make sure our own code is unaffected + * by the filter as little as possible. */ if (context_has_syscall_filters(context)) { r = apply_seccomp(unit, context); if (r < 0) { @@ -2768,30 +2799,6 @@ static int exec_child( } } #endif - -#ifdef HAVE_SELINUX - if (mac_selinux_use()) { - char *exec_context = mac_selinux_context_net ?: context->selinux_context; - - if (exec_context) { - r = setexeccon(exec_context); - if (r < 0) { - *exit_status = EXIT_SELINUX_CONTEXT; - return r; - } - } - } -#endif - -#ifdef HAVE_APPARMOR - if (context->apparmor_profile && mac_apparmor_use()) { - r = aa_change_onexec(context->apparmor_profile); - if (r < 0 && !context->apparmor_profile_ignore) { - *exit_status = EXIT_APPARMOR_PROFILE; - return -errno; - } - } -#endif } final_argv = replace_env_argv(argv, accum_env); @@ -3613,7 +3620,8 @@ char *exec_command_line(char **argv) { STRV_FOREACH(a, argv) k += strlen(*a)+3; - if (!(n = new(char, k))) + n = new(char, k); + if (!n) return NULL; p = n; diff --git a/src/core/job.c b/src/core/job.c index 3ecc8a1a73..ac6910a906 100644 --- a/src/core/job.c +++ b/src/core/job.c @@ -690,16 +690,16 @@ _pure_ static const char *job_get_status_message_format(Unit *u, JobType t, JobR } static void job_print_status_message(Unit *u, JobType t, JobResult result) { - static struct { + static const struct { const char *color, *word; } const statuses[_JOB_RESULT_MAX] = { - [JOB_DONE] = {ANSI_GREEN, " OK "}, - [JOB_TIMEOUT] = {ANSI_HIGHLIGHT_RED, " TIME "}, - [JOB_FAILED] = {ANSI_HIGHLIGHT_RED, "FAILED"}, - [JOB_DEPENDENCY] = {ANSI_HIGHLIGHT_YELLOW, "DEPEND"}, - [JOB_SKIPPED] = {ANSI_HIGHLIGHT, " INFO "}, - [JOB_ASSERT] = {ANSI_HIGHLIGHT_YELLOW, "ASSERT"}, - [JOB_UNSUPPORTED] = {ANSI_HIGHLIGHT_YELLOW, "UNSUPP"}, + [JOB_DONE] = { ANSI_GREEN, " OK " }, + [JOB_TIMEOUT] = { ANSI_HIGHLIGHT_RED, " TIME " }, + [JOB_FAILED] = { ANSI_HIGHLIGHT_RED, "FAILED" }, + [JOB_DEPENDENCY] = { ANSI_HIGHLIGHT_YELLOW, "DEPEND" }, + [JOB_SKIPPED] = { ANSI_HIGHLIGHT, " INFO " }, + [JOB_ASSERT] = { ANSI_HIGHLIGHT_YELLOW, "ASSERT" }, + [JOB_UNSUPPORTED] = { ANSI_HIGHLIGHT_YELLOW, "UNSUPP" }, }; const char *format; @@ -767,8 +767,9 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) { if (!format) return; + /* The description might be longer than the buffer, but that's OK, we'll just truncate it here */ DISABLE_WARNING_FORMAT_NONLITERAL; - xsprintf(buf, format, unit_description(u)); + snprintf(buf, sizeof(buf), format, unit_description(u)); REENABLE_WARNING; switch (t) { diff --git a/src/core/service.c b/src/core/service.c index ee4f4983fc..a7274a758f 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -289,7 +289,17 @@ static void service_fd_store_unlink(ServiceFDStore *fs) { free(fs); } -static void service_release_resources(Unit *u) { +static void service_release_fd_store(Service *s) { + assert(s); + + log_unit_debug(UNIT(s), "Releasing all stored fds"); + while (s->fd_store) + service_fd_store_unlink(s->fd_store); + + assert(s->n_fd_store == 0); +} + +static void service_release_resources(Unit *u, bool inactive) { Service *s = SERVICE(u); assert(s); @@ -297,16 +307,14 @@ static void service_release_resources(Unit *u) { if (!s->fd_store && s->stdin_fd < 0 && s->stdout_fd < 0 && s->stderr_fd < 0) return; - log_unit_debug(u, "Releasing all resources."); + log_unit_debug(u, "Releasing resources."); s->stdin_fd = safe_close(s->stdin_fd); s->stdout_fd = safe_close(s->stdout_fd); s->stderr_fd = safe_close(s->stderr_fd); - while (s->fd_store) - service_fd_store_unlink(s->fd_store); - - assert(s->n_fd_store == 0); + if (inactive) + service_release_fd_store(s); } static void service_done(Unit *u) { @@ -350,7 +358,7 @@ static void service_done(Unit *u) { s->timer_event_source = sd_event_source_unref(s->timer_event_source); - service_release_resources(u); + service_release_resources(u, true); } static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *userdata) { @@ -360,6 +368,10 @@ static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *us assert(fs); /* If we get either EPOLLHUP or EPOLLERR, it's time to remove this entry from the fd store */ + log_unit_debug(UNIT(fs->service), + "Received %s on stored fd %d (%s), closing.", + revents & EPOLLERR ? "EPOLLERR" : "EPOLLHUP", + fs->fd, strna(fs->fdname)); service_fd_store_unlink(fs); return 0; } @@ -368,20 +380,23 @@ static int service_add_fd_store(Service *s, int fd, const char *name) { ServiceFDStore *fs; int r; + /* fd is always consumed if we return >= 0 */ + assert(s); assert(fd >= 0); if (s->n_fd_store >= s->n_fd_store_max) - return 0; + return -EXFULL; /* Our store is full. + * Use this errno rather than E[NM]FILE to distinguish from + * the case where systemd itself hits the file limit. */ LIST_FOREACH(fd_store, fs, s->fd_store) { r = same_fd(fs->fd, fd); if (r < 0) return r; if (r > 0) { - /* Already included */ safe_close(fd); - return 1; + return 0; /* fd already included */ } } @@ -409,7 +424,7 @@ static int service_add_fd_store(Service *s, int fd, const char *name) { LIST_PREPEND(fd_store, s->fd_store, fs); s->n_fd_store++; - return 1; + return 1; /* fd newly stored */ } static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) { @@ -417,10 +432,7 @@ static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) { assert(s); - if (fdset_size(fds) <= 0) - return 0; - - while (s->n_fd_store < s->n_fd_store_max) { + while (fdset_size(fds) > 0) { _cleanup_close_ int fd = -1; fd = fdset_steal_first(fds); @@ -428,17 +440,17 @@ static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) { break; r = service_add_fd_store(s, fd, name); + if (r == -EXFULL) + return log_unit_warning_errno(UNIT(s), r, + "Cannot store more fds than FileDescriptorStoreMax=%u, closing remaining.", + s->n_fd_store_max); if (r < 0) - return log_unit_error_errno(UNIT(s), r, "Couldn't add fd to fd store: %m"); - if (r > 0) { - log_unit_debug(UNIT(s), "Added fd to fd store."); - fd = -1; - } + return log_unit_error_errno(UNIT(s), r, "Failed to add fd to store: %m"); + if (r > 0) + log_unit_debug(UNIT(s), "Added fd %u (%s) to fd store.", fd, strna(name)); + fd = -1; } - if (fdset_size(fds) > 0) - log_unit_warning(UNIT(s), "Tried to store more fds than FileDescriptorStoreMax=%u allows, closing remaining.", s->n_fd_store_max); - return 0; } @@ -1225,6 +1237,7 @@ static int service_spawn( return r; n_fds = r; + log_unit_debug(UNIT(s), "Passing %i fds to service", n_fds); } r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), timeout)); @@ -2336,7 +2349,7 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value, r = service_add_fd_store(s, fd, t); if (r < 0) log_unit_error_errno(u, r, "Failed to add fd to store: %m"); - else if (r > 0) + else fdset_remove(fds, fd); } diff --git a/src/core/unit.c b/src/core/unit.c index 068d319206..463e6d6a62 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -320,6 +320,7 @@ int unit_set_description(Unit *u, const char *description) { bool unit_check_gc(Unit *u) { UnitActiveState state; + bool inactive; assert(u); if (u->job) @@ -329,16 +330,17 @@ bool unit_check_gc(Unit *u) { return true; state = unit_active_state(u); + inactive = state == UNIT_INACTIVE; /* If the unit is inactive and failed and no job is queued for * it, then release its runtime resources */ if (UNIT_IS_INACTIVE_OR_FAILED(state) && UNIT_VTABLE(u)->release_resources) - UNIT_VTABLE(u)->release_resources(u); + UNIT_VTABLE(u)->release_resources(u, inactive); /* But we keep the unit object around for longer when it is * referenced or configured to not be gc'ed */ - if (state != UNIT_INACTIVE) + if (!inactive) return true; if (u->perpetual) diff --git a/src/core/unit.h b/src/core/unit.h index 899cd62c92..991543664b 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -373,7 +373,7 @@ struct UnitVTable { /* When the unit is not running and no job for it queued we * shall release its runtime resources */ - void (*release_resources)(Unit *u); + void (*release_resources)(Unit *u, bool inactive); /* Invoked on every child that died */ void (*sigchld_event)(Unit *u, pid_t pid, int code, int status); diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index 1cbbb9d757..c9b24f1065 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -217,6 +217,24 @@ bool is_seccomp_available(void) { } const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { + [SYSCALL_FILTER_SET_BASIC_IO] = { + /* Basic IO */ + .name = "@basic-io", + .value = + "close\0" + "dup2\0" + "dup3\0" + "dup\0" + "lseek\0" + "pread64\0" + "preadv\0" + "pwrite64\0" + "pwritev\0" + "read\0" + "readv\0" + "write\0" + "writev\0" + }, [SYSCALL_FILTER_SET_CLOCK] = { /* Clock */ .name = "@clock", @@ -253,15 +271,22 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { "sys_debug_setcontext\0" }, [SYSCALL_FILTER_SET_DEFAULT] = { - /* Default list */ + /* Default list: the most basic of operations */ .name = "@default", .value = + "clock_getres\0" + "clock_gettime\0" + "clock_nanosleep\0" "execve\0" "exit\0" "exit_group\0" "getrlimit\0" /* make sure processes can query stack size and such */ + "gettimeofday\0" + "nanosleep\0" + "pause\0" "rt_sigreturn\0" "sigreturn\0" + "time\0" }, [SYSCALL_FILTER_SET_IO_EVENT] = { /* Event loop use */ @@ -283,9 +308,10 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { "select\0" }, [SYSCALL_FILTER_SET_IPC] = { - /* Message queues, SYSV IPC or other IPC: unusual */ + /* Message queues, SYSV IPC or other IPC */ .name = "@ipc", .value = "ipc\0" + "memfd_create\0" "mq_getsetattr\0" "mq_notify\0" "mq_open\0" @@ -296,6 +322,8 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { "msgget\0" "msgrcv\0" "msgsnd\0" + "pipe2\0" + "pipe\0" "process_vm_readv\0" "process_vm_writev\0" "semctl\0" @@ -436,7 +464,6 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { .value = "arch_prctl\0" "clone\0" - "execve\0" "execveat\0" "fork\0" "kill\0" @@ -463,6 +490,22 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { "s390_pci_mmio_write\0" #endif }, + [SYSCALL_FILTER_SET_RESOURCES] = { + /* Alter resource settings */ + .name = "@resources", + .value = + "sched_setparam\0" + "sched_setscheduler\0" + "sched_setaffinity\0" + "setpriority\0" + "setrlimit\0" + "set_mempolicy\0" + "migrate_pages\0" + "move_pages\0" + "mbind\0" + "sched_setattr\0" + "prlimit64\0" + }, }; const SyscallFilterSet *syscall_filter_set_find(const char *name) { diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h index 8050fc6fbf..8e209efef2 100644 --- a/src/shared/seccomp-util.h +++ b/src/shared/seccomp-util.h @@ -38,6 +38,7 @@ typedef struct SyscallFilterSet { } SyscallFilterSet; enum { + SYSCALL_FILTER_SET_BASIC_IO, SYSCALL_FILTER_SET_CLOCK, SYSCALL_FILTER_SET_CPU_EMULATION, SYSCALL_FILTER_SET_DEBUG, @@ -52,6 +53,7 @@ enum { SYSCALL_FILTER_SET_PRIVILEGED, SYSCALL_FILTER_SET_PROCESS, SYSCALL_FILTER_SET_RAW_IO, + SYSCALL_FILTER_SET_RESOURCES, _SYSCALL_FILTER_SET_MAX }; diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c index fb5a539bc6..dd3b931cd6 100644 --- a/src/systemctl/systemctl.c +++ b/src/systemctl/systemctl.c @@ -5277,7 +5277,7 @@ static int cat(int argc, char *argv[], void *userdata) { else puts(""); - if (need_daemon_reload(bus, *name)) + if (need_daemon_reload(bus, *name) > 0) /* ignore errors (<0), this is informational output */ fprintf(stderr, "%s# Warning: %s changed on disk, the version systemd has loaded is outdated.\n" "%s# This output shows the current version of the unit's original fragment and drop-in files.\n" diff --git a/src/test/test-unit-file.c b/src/test/test-unit-file.c index 7ef087a2e3..12f48bf435 100644 --- a/src/test/test-unit-file.c +++ b/src/test/test-unit-file.c @@ -589,7 +589,7 @@ static void test_install_printf(void) { assert_se(specifier_machine_id('m', NULL, NULL, &mid) >= 0 && mid); assert_se(specifier_boot_id('b', NULL, NULL, &bid) >= 0 && bid); assert_se((host = gethostname_malloc())); - assert_se((user = getusername_malloc())); + assert_se((user = uid_to_name(getuid()))); assert_se(asprintf(&uid, UID_FMT, getuid()) >= 0); #define expect(src, pattern, result) \ |