summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/execute.c70
-rw-r--r--src/core/job.c19
-rw-r--r--src/core/service.c61
-rw-r--r--src/core/unit.c6
-rw-r--r--src/core/unit.h2
-rw-r--r--src/shared/seccomp-util.c49
-rw-r--r--src/shared/seccomp-util.h2
-rw-r--r--src/systemctl/systemctl.c2
-rw-r--r--src/test/test-unit-file.c2
9 files changed, 141 insertions, 72 deletions
diff --git a/src/core/execute.c b/src/core/execute.c
index a1bd0c1238..3f053602b5 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -2540,12 +2540,6 @@ static int exec_child(
(void) umask(context->umask);
if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
- r = setup_smack(context, command);
- if (r < 0) {
- *exit_status = EXIT_SMACK_PROCESS_LABEL;
- return r;
- }
-
if (context->pam_name && username) {
r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
if (r < 0) {
@@ -2695,6 +2689,41 @@ static int exec_child(
}
}
+ /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
+ * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
+ * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
+ * are restricted. */
+
+#ifdef HAVE_SELINUX
+ if (mac_selinux_use()) {
+ char *exec_context = mac_selinux_context_net ?: context->selinux_context;
+
+ if (exec_context) {
+ r = setexeccon(exec_context);
+ if (r < 0) {
+ *exit_status = EXIT_SELINUX_CONTEXT;
+ return r;
+ }
+ }
+ }
+#endif
+
+ r = setup_smack(context, command);
+ if (r < 0) {
+ *exit_status = EXIT_SMACK_PROCESS_LABEL;
+ return r;
+ }
+
+#ifdef HAVE_APPARMOR
+ if (context->apparmor_profile && mac_apparmor_use()) {
+ r = aa_change_onexec(context->apparmor_profile);
+ if (r < 0 && !context->apparmor_profile_ignore) {
+ *exit_status = EXIT_APPARMOR_PROFILE;
+ return -errno;
+ }
+ }
+#endif
+
/* PR_GET_SECUREBITS is not privileged, while
* PR_SET_SECUREBITS is. So to suppress
* potential EPERMs we'll try not to call
@@ -2760,6 +2789,8 @@ static int exec_child(
}
}
+ /* This really should remain the last step before the execve(), to make sure our own code is unaffected
+ * by the filter as little as possible. */
if (context_has_syscall_filters(context)) {
r = apply_seccomp(unit, context);
if (r < 0) {
@@ -2768,30 +2799,6 @@ static int exec_child(
}
}
#endif
-
-#ifdef HAVE_SELINUX
- if (mac_selinux_use()) {
- char *exec_context = mac_selinux_context_net ?: context->selinux_context;
-
- if (exec_context) {
- r = setexeccon(exec_context);
- if (r < 0) {
- *exit_status = EXIT_SELINUX_CONTEXT;
- return r;
- }
- }
- }
-#endif
-
-#ifdef HAVE_APPARMOR
- if (context->apparmor_profile && mac_apparmor_use()) {
- r = aa_change_onexec(context->apparmor_profile);
- if (r < 0 && !context->apparmor_profile_ignore) {
- *exit_status = EXIT_APPARMOR_PROFILE;
- return -errno;
- }
- }
-#endif
}
final_argv = replace_env_argv(argv, accum_env);
@@ -3613,7 +3620,8 @@ char *exec_command_line(char **argv) {
STRV_FOREACH(a, argv)
k += strlen(*a)+3;
- if (!(n = new(char, k)))
+ n = new(char, k);
+ if (!n)
return NULL;
p = n;
diff --git a/src/core/job.c b/src/core/job.c
index 3ecc8a1a73..ac6910a906 100644
--- a/src/core/job.c
+++ b/src/core/job.c
@@ -690,16 +690,16 @@ _pure_ static const char *job_get_status_message_format(Unit *u, JobType t, JobR
}
static void job_print_status_message(Unit *u, JobType t, JobResult result) {
- static struct {
+ static const struct {
const char *color, *word;
} const statuses[_JOB_RESULT_MAX] = {
- [JOB_DONE] = {ANSI_GREEN, " OK "},
- [JOB_TIMEOUT] = {ANSI_HIGHLIGHT_RED, " TIME "},
- [JOB_FAILED] = {ANSI_HIGHLIGHT_RED, "FAILED"},
- [JOB_DEPENDENCY] = {ANSI_HIGHLIGHT_YELLOW, "DEPEND"},
- [JOB_SKIPPED] = {ANSI_HIGHLIGHT, " INFO "},
- [JOB_ASSERT] = {ANSI_HIGHLIGHT_YELLOW, "ASSERT"},
- [JOB_UNSUPPORTED] = {ANSI_HIGHLIGHT_YELLOW, "UNSUPP"},
+ [JOB_DONE] = { ANSI_GREEN, " OK " },
+ [JOB_TIMEOUT] = { ANSI_HIGHLIGHT_RED, " TIME " },
+ [JOB_FAILED] = { ANSI_HIGHLIGHT_RED, "FAILED" },
+ [JOB_DEPENDENCY] = { ANSI_HIGHLIGHT_YELLOW, "DEPEND" },
+ [JOB_SKIPPED] = { ANSI_HIGHLIGHT, " INFO " },
+ [JOB_ASSERT] = { ANSI_HIGHLIGHT_YELLOW, "ASSERT" },
+ [JOB_UNSUPPORTED] = { ANSI_HIGHLIGHT_YELLOW, "UNSUPP" },
};
const char *format;
@@ -767,8 +767,9 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) {
if (!format)
return;
+ /* The description might be longer than the buffer, but that's OK, we'll just truncate it here */
DISABLE_WARNING_FORMAT_NONLITERAL;
- xsprintf(buf, format, unit_description(u));
+ snprintf(buf, sizeof(buf), format, unit_description(u));
REENABLE_WARNING;
switch (t) {
diff --git a/src/core/service.c b/src/core/service.c
index ee4f4983fc..a7274a758f 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -289,7 +289,17 @@ static void service_fd_store_unlink(ServiceFDStore *fs) {
free(fs);
}
-static void service_release_resources(Unit *u) {
+static void service_release_fd_store(Service *s) {
+ assert(s);
+
+ log_unit_debug(UNIT(s), "Releasing all stored fds");
+ while (s->fd_store)
+ service_fd_store_unlink(s->fd_store);
+
+ assert(s->n_fd_store == 0);
+}
+
+static void service_release_resources(Unit *u, bool inactive) {
Service *s = SERVICE(u);
assert(s);
@@ -297,16 +307,14 @@ static void service_release_resources(Unit *u) {
if (!s->fd_store && s->stdin_fd < 0 && s->stdout_fd < 0 && s->stderr_fd < 0)
return;
- log_unit_debug(u, "Releasing all resources.");
+ log_unit_debug(u, "Releasing resources.");
s->stdin_fd = safe_close(s->stdin_fd);
s->stdout_fd = safe_close(s->stdout_fd);
s->stderr_fd = safe_close(s->stderr_fd);
- while (s->fd_store)
- service_fd_store_unlink(s->fd_store);
-
- assert(s->n_fd_store == 0);
+ if (inactive)
+ service_release_fd_store(s);
}
static void service_done(Unit *u) {
@@ -350,7 +358,7 @@ static void service_done(Unit *u) {
s->timer_event_source = sd_event_source_unref(s->timer_event_source);
- service_release_resources(u);
+ service_release_resources(u, true);
}
static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
@@ -360,6 +368,10 @@ static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *us
assert(fs);
/* If we get either EPOLLHUP or EPOLLERR, it's time to remove this entry from the fd store */
+ log_unit_debug(UNIT(fs->service),
+ "Received %s on stored fd %d (%s), closing.",
+ revents & EPOLLERR ? "EPOLLERR" : "EPOLLHUP",
+ fs->fd, strna(fs->fdname));
service_fd_store_unlink(fs);
return 0;
}
@@ -368,20 +380,23 @@ static int service_add_fd_store(Service *s, int fd, const char *name) {
ServiceFDStore *fs;
int r;
+ /* fd is always consumed if we return >= 0 */
+
assert(s);
assert(fd >= 0);
if (s->n_fd_store >= s->n_fd_store_max)
- return 0;
+ return -EXFULL; /* Our store is full.
+ * Use this errno rather than E[NM]FILE to distinguish from
+ * the case where systemd itself hits the file limit. */
LIST_FOREACH(fd_store, fs, s->fd_store) {
r = same_fd(fs->fd, fd);
if (r < 0)
return r;
if (r > 0) {
- /* Already included */
safe_close(fd);
- return 1;
+ return 0; /* fd already included */
}
}
@@ -409,7 +424,7 @@ static int service_add_fd_store(Service *s, int fd, const char *name) {
LIST_PREPEND(fd_store, s->fd_store, fs);
s->n_fd_store++;
- return 1;
+ return 1; /* fd newly stored */
}
static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
@@ -417,10 +432,7 @@ static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
assert(s);
- if (fdset_size(fds) <= 0)
- return 0;
-
- while (s->n_fd_store < s->n_fd_store_max) {
+ while (fdset_size(fds) > 0) {
_cleanup_close_ int fd = -1;
fd = fdset_steal_first(fds);
@@ -428,17 +440,17 @@ static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
break;
r = service_add_fd_store(s, fd, name);
+ if (r == -EXFULL)
+ return log_unit_warning_errno(UNIT(s), r,
+ "Cannot store more fds than FileDescriptorStoreMax=%u, closing remaining.",
+ s->n_fd_store_max);
if (r < 0)
- return log_unit_error_errno(UNIT(s), r, "Couldn't add fd to fd store: %m");
- if (r > 0) {
- log_unit_debug(UNIT(s), "Added fd to fd store.");
- fd = -1;
- }
+ return log_unit_error_errno(UNIT(s), r, "Failed to add fd to store: %m");
+ if (r > 0)
+ log_unit_debug(UNIT(s), "Added fd %u (%s) to fd store.", fd, strna(name));
+ fd = -1;
}
- if (fdset_size(fds) > 0)
- log_unit_warning(UNIT(s), "Tried to store more fds than FileDescriptorStoreMax=%u allows, closing remaining.", s->n_fd_store_max);
-
return 0;
}
@@ -1225,6 +1237,7 @@ static int service_spawn(
return r;
n_fds = r;
+ log_unit_debug(UNIT(s), "Passing %i fds to service", n_fds);
}
r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), timeout));
@@ -2336,7 +2349,7 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
r = service_add_fd_store(s, fd, t);
if (r < 0)
log_unit_error_errno(u, r, "Failed to add fd to store: %m");
- else if (r > 0)
+ else
fdset_remove(fds, fd);
}
diff --git a/src/core/unit.c b/src/core/unit.c
index 068d319206..463e6d6a62 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -320,6 +320,7 @@ int unit_set_description(Unit *u, const char *description) {
bool unit_check_gc(Unit *u) {
UnitActiveState state;
+ bool inactive;
assert(u);
if (u->job)
@@ -329,16 +330,17 @@ bool unit_check_gc(Unit *u) {
return true;
state = unit_active_state(u);
+ inactive = state == UNIT_INACTIVE;
/* If the unit is inactive and failed and no job is queued for
* it, then release its runtime resources */
if (UNIT_IS_INACTIVE_OR_FAILED(state) &&
UNIT_VTABLE(u)->release_resources)
- UNIT_VTABLE(u)->release_resources(u);
+ UNIT_VTABLE(u)->release_resources(u, inactive);
/* But we keep the unit object around for longer when it is
* referenced or configured to not be gc'ed */
- if (state != UNIT_INACTIVE)
+ if (!inactive)
return true;
if (u->perpetual)
diff --git a/src/core/unit.h b/src/core/unit.h
index 899cd62c92..991543664b 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -373,7 +373,7 @@ struct UnitVTable {
/* When the unit is not running and no job for it queued we
* shall release its runtime resources */
- void (*release_resources)(Unit *u);
+ void (*release_resources)(Unit *u, bool inactive);
/* Invoked on every child that died */
void (*sigchld_event)(Unit *u, pid_t pid, int code, int status);
diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c
index 1cbbb9d757..c9b24f1065 100644
--- a/src/shared/seccomp-util.c
+++ b/src/shared/seccomp-util.c
@@ -217,6 +217,24 @@ bool is_seccomp_available(void) {
}
const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
+ [SYSCALL_FILTER_SET_BASIC_IO] = {
+ /* Basic IO */
+ .name = "@basic-io",
+ .value =
+ "close\0"
+ "dup2\0"
+ "dup3\0"
+ "dup\0"
+ "lseek\0"
+ "pread64\0"
+ "preadv\0"
+ "pwrite64\0"
+ "pwritev\0"
+ "read\0"
+ "readv\0"
+ "write\0"
+ "writev\0"
+ },
[SYSCALL_FILTER_SET_CLOCK] = {
/* Clock */
.name = "@clock",
@@ -253,15 +271,22 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"sys_debug_setcontext\0"
},
[SYSCALL_FILTER_SET_DEFAULT] = {
- /* Default list */
+ /* Default list: the most basic of operations */
.name = "@default",
.value =
+ "clock_getres\0"
+ "clock_gettime\0"
+ "clock_nanosleep\0"
"execve\0"
"exit\0"
"exit_group\0"
"getrlimit\0" /* make sure processes can query stack size and such */
+ "gettimeofday\0"
+ "nanosleep\0"
+ "pause\0"
"rt_sigreturn\0"
"sigreturn\0"
+ "time\0"
},
[SYSCALL_FILTER_SET_IO_EVENT] = {
/* Event loop use */
@@ -283,9 +308,10 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"select\0"
},
[SYSCALL_FILTER_SET_IPC] = {
- /* Message queues, SYSV IPC or other IPC: unusual */
+ /* Message queues, SYSV IPC or other IPC */
.name = "@ipc",
.value = "ipc\0"
+ "memfd_create\0"
"mq_getsetattr\0"
"mq_notify\0"
"mq_open\0"
@@ -296,6 +322,8 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"msgget\0"
"msgrcv\0"
"msgsnd\0"
+ "pipe2\0"
+ "pipe\0"
"process_vm_readv\0"
"process_vm_writev\0"
"semctl\0"
@@ -436,7 +464,6 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
.value =
"arch_prctl\0"
"clone\0"
- "execve\0"
"execveat\0"
"fork\0"
"kill\0"
@@ -463,6 +490,22 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"s390_pci_mmio_write\0"
#endif
},
+ [SYSCALL_FILTER_SET_RESOURCES] = {
+ /* Alter resource settings */
+ .name = "@resources",
+ .value =
+ "sched_setparam\0"
+ "sched_setscheduler\0"
+ "sched_setaffinity\0"
+ "setpriority\0"
+ "setrlimit\0"
+ "set_mempolicy\0"
+ "migrate_pages\0"
+ "move_pages\0"
+ "mbind\0"
+ "sched_setattr\0"
+ "prlimit64\0"
+ },
};
const SyscallFilterSet *syscall_filter_set_find(const char *name) {
diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h
index 8050fc6fbf..8e209efef2 100644
--- a/src/shared/seccomp-util.h
+++ b/src/shared/seccomp-util.h
@@ -38,6 +38,7 @@ typedef struct SyscallFilterSet {
} SyscallFilterSet;
enum {
+ SYSCALL_FILTER_SET_BASIC_IO,
SYSCALL_FILTER_SET_CLOCK,
SYSCALL_FILTER_SET_CPU_EMULATION,
SYSCALL_FILTER_SET_DEBUG,
@@ -52,6 +53,7 @@ enum {
SYSCALL_FILTER_SET_PRIVILEGED,
SYSCALL_FILTER_SET_PROCESS,
SYSCALL_FILTER_SET_RAW_IO,
+ SYSCALL_FILTER_SET_RESOURCES,
_SYSCALL_FILTER_SET_MAX
};
diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c
index fb5a539bc6..dd3b931cd6 100644
--- a/src/systemctl/systemctl.c
+++ b/src/systemctl/systemctl.c
@@ -5277,7 +5277,7 @@ static int cat(int argc, char *argv[], void *userdata) {
else
puts("");
- if (need_daemon_reload(bus, *name))
+ if (need_daemon_reload(bus, *name) > 0) /* ignore errors (<0), this is informational output */
fprintf(stderr,
"%s# Warning: %s changed on disk, the version systemd has loaded is outdated.\n"
"%s# This output shows the current version of the unit's original fragment and drop-in files.\n"
diff --git a/src/test/test-unit-file.c b/src/test/test-unit-file.c
index 7ef087a2e3..12f48bf435 100644
--- a/src/test/test-unit-file.c
+++ b/src/test/test-unit-file.c
@@ -589,7 +589,7 @@ static void test_install_printf(void) {
assert_se(specifier_machine_id('m', NULL, NULL, &mid) >= 0 && mid);
assert_se(specifier_boot_id('b', NULL, NULL, &bid) >= 0 && bid);
assert_se((host = gethostname_malloc()));
- assert_se((user = getusername_malloc()));
+ assert_se((user = uid_to_name(getuid())));
assert_se(asprintf(&uid, UID_FMT, getuid()) >= 0);
#define expect(src, pattern, result) \