summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2016-10-21 21:50:05 +0200
committerLennart Poettering <lennart@poettering.net>2016-10-24 17:32:50 +0200
commit8130926d32d76193e98ba783ba932816f276bfad (patch)
tree1c2d5281da29c9cc10480e95b186d88941b1acf8
parente0f3720e399573134657458f4c8bd20c68fc092a (diff)
core: rework syscall filter set handling
A variety of fixes: - rename the SystemCallFilterSet structure to SyscallFilterSet. So far the main instance of it (the syscall_filter_sets[] array) used to abbreviate "SystemCall" as "Syscall". Let's stick to one of the two syntaxes, and not mix and match too wildly. Let's pick the shorter name in this case, as it is sufficiently well established to not confuse hackers reading this. - Export explicit indexes into the syscall_filter_sets[] array via an enum. This way, code that wants to make use of a specific filter set, can index it directly via the enum, instead of having to search for it. This makes apply_private_devices() in particular a lot simpler. - Provide two new helper calls in seccomp-util.c: syscall_filter_set_find() to find a set by its name, seccomp_add_syscall_filter_set() to add a set to a seccomp object. - Update SystemCallFilter= parser to use extract_first_word(). Let's work on deprecating FOREACH_WORD_QUOTED(). - Simplify apply_private_devices() using this functionality
-rw-r--r--src/core/execute.c41
-rw-r--r--src/core/load-fragment.c54
-rw-r--r--src/shared/seccomp-util.c128
-rw-r--r--src/shared/seccomp-util.h32
4 files changed, 155 insertions, 100 deletions
diff --git a/src/core/execute.c b/src/core/execute.c
index e63a12f934..18bb67cda9 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1578,10 +1578,7 @@ finish:
}
static int apply_private_devices(Unit *u, const ExecContext *c) {
- const SystemCallFilterSet *set;
scmp_filter_ctx *seccomp;
- const char *sys;
- bool syscalls_found = false;
int r;
assert(c);
@@ -1599,43 +1596,9 @@ static int apply_private_devices(Unit *u, const ExecContext *c) {
if (r < 0)
goto finish;
- for (set = syscall_filter_sets; set->set_name; set++)
- if (streq(set->set_name, "@raw-io")) {
- syscalls_found = true;
- break;
- }
-
- /* We should never fail here */
- if (!syscalls_found) {
- r = -EOPNOTSUPP;
+ r = seccomp_add_syscall_filter_set(seccomp, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
+ if (r < 0)
goto finish;
- }
-
- NULSTR_FOREACH(sys, set->value) {
- int id;
- bool add = true;
-
-#ifndef __NR_s390_pci_mmio_read
- if (streq(sys, "s390_pci_mmio_read"))
- add = false;
-#endif
-#ifndef __NR_s390_pci_mmio_write
- if (streq(sys, "s390_pci_mmio_write"))
- add = false;
-#endif
-
- if (!add)
- continue;
-
- id = seccomp_syscall_resolve_name(sys);
-
- r = seccomp_rule_add(
- seccomp,
- SCMP_ACT_ERRNO(EPERM),
- id, 0);
- if (r < 0)
- goto finish;
- }
r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
if (r < 0)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 6f68e23340..118b39c1cf 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -2618,6 +2618,7 @@ int config_parse_documentation(const char *unit,
}
#ifdef HAVE_SECCOMP
+
static int syscall_filter_parse_one(
const char *unit,
const char *filename,
@@ -2628,27 +2629,29 @@ static int syscall_filter_parse_one(
bool warn) {
int r;
- if (*t == '@') {
- const SystemCallFilterSet *set;
+ if (t[0] == '@') {
+ const SyscallFilterSet *set;
+ const char *i;
- for (set = syscall_filter_sets; set->set_name; set++)
- if (streq(set->set_name, t)) {
- const char *sys;
+ set = syscall_filter_set_find(t);
+ if (!set) {
+ if (warn)
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Don't know system call group, ignoring: %s", t);
+ return 0;
+ }
- NULSTR_FOREACH(sys, set->value) {
- r = syscall_filter_parse_one(unit, filename, line, c, invert, sys, false);
- if (r < 0)
- return r;
- }
- break;
- }
+ NULSTR_FOREACH(i, set->value) {
+ r = syscall_filter_parse_one(unit, filename, line, c, invert, i, false);
+ if (r < 0)
+ return r;
+ }
} else {
int id;
id = seccomp_syscall_resolve_name(t);
if (id == __NR_SCMP_ERROR) {
if (warn)
- log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t);
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse system call, ignoring: %s", t);
return 0;
}
@@ -2662,8 +2665,9 @@ static int syscall_filter_parse_one(
if (r < 0)
return log_oom();
} else
- set_remove(c->syscall_filter, INT_TO_PTR(id + 1));
+ (void) set_remove(c->syscall_filter, INT_TO_PTR(id + 1));
}
+
return 0;
}
@@ -2682,8 +2686,7 @@ int config_parse_syscall_filter(
ExecContext *c = data;
Unit *u = userdata;
bool invert = false;
- const char *word, *state;
- size_t l;
+ const char *p;
int r;
assert(filename);
@@ -2722,19 +2725,24 @@ int config_parse_syscall_filter(
}
}
- FOREACH_WORD_QUOTED(word, l, rvalue, state) {
- _cleanup_free_ char *t = NULL;
+ p = rvalue;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
- t = strndup(word, l);
- if (!t)
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ break;
+ }
- r = syscall_filter_parse_one(unit, filename, line, c, invert, t, true);
+ r = syscall_filter_parse_one(unit, filename, line, c, invert, word, true);
if (r < 0)
return r;
}
- if (!isempty(state))
- log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring.");
/* Turn on NNP, but only if it wasn't configured explicitly
* before, and only if we are in user mode. */
diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c
index 8116c7671f..1d51f3fd1f 100644
--- a/src/shared/seccomp-util.c
+++ b/src/shared/seccomp-util.c
@@ -26,6 +26,7 @@
#include "macro.h"
#include "seccomp-util.h"
#include "string-util.h"
+#include "util.h"
const char* seccomp_arch_to_string(uint32_t c) {
@@ -132,28 +133,30 @@ bool is_seccomp_available(void) {
return cached_enabled;
}
-const SystemCallFilterSet syscall_filter_sets[] = {
- {
+const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
+ [SYSCALL_FILTER_SET_CLOCK] = {
/* Clock */
- .set_name = "@clock",
+ .name = "@clock",
.value =
"adjtimex\0"
"clock_adjtime\0"
"clock_settime\0"
"settimeofday\0"
"stime\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_CPU_EMULATION] = {
/* CPU emulation calls */
- .set_name = "@cpu-emulation",
+ .name = "@cpu-emulation",
.value =
"modify_ldt\0"
"subpage_prot\0"
"switch_endian\0"
"vm86\0"
"vm86old\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_DEBUG] = {
/* Debugging/Performance Monitoring/Tracing */
- .set_name = "@debug",
+ .name = "@debug",
.value =
"lookup_dcookie\0"
"perf_event_open\0"
@@ -161,11 +164,14 @@ const SystemCallFilterSet syscall_filter_sets[] = {
"process_vm_writev\0"
"ptrace\0"
"rtas\0"
+#ifdef __NR_s390_runtime_instr
"s390_runtime_instr\0"
+#endif
"sys_debug_setcontext\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_DEFAULT] = {
/* Default list */
- .set_name = "@default",
+ .name = "@default",
.value =
"execve\0"
"exit\0"
@@ -173,9 +179,10 @@ const SystemCallFilterSet syscall_filter_sets[] = {
"getrlimit\0" /* make sure processes can query stack size and such */
"rt_sigreturn\0"
"sigreturn\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_IO_EVENT] = {
/* Event loop use */
- .set_name = "@io-event",
+ .name = "@io-event",
.value =
"_newselect\0"
"epoll_create1\0"
@@ -191,9 +198,10 @@ const SystemCallFilterSet syscall_filter_sets[] = {
"ppoll\0"
"pselect6\0"
"select\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_IPC] = {
/* Message queues, SYSV IPC or other IPC: unusual */
- .set_name = "@ipc",
+ .name = "@ipc",
.value = "ipc\0"
"mq_getsetattr\0"
"mq_notify\0"
@@ -215,23 +223,26 @@ const SystemCallFilterSet syscall_filter_sets[] = {
"shmctl\0"
"shmdt\0"
"shmget\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_KEYRING] = {
/* Keyring */
- .set_name = "@keyring",
+ .name = "@keyring",
.value =
"add_key\0"
"keyctl\0"
"request_key\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_MODULE] = {
/* Kernel module control */
- .set_name = "@module",
+ .name = "@module",
.value =
"delete_module\0"
"finit_module\0"
"init_module\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_MOUNT] = {
/* Mounting */
- .set_name = "@mount",
+ .name = "@mount",
.value =
"chroot\0"
"mount\0"
@@ -239,9 +250,10 @@ const SystemCallFilterSet syscall_filter_sets[] = {
"pivot_root\0"
"umount2\0"
"umount\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_NETWORK_IO] = {
/* Network or Unix socket IO, should not be needed if not network facing */
- .set_name = "@network-io",
+ .name = "@network-io",
.value =
"accept4\0"
"accept\0"
@@ -264,9 +276,10 @@ const SystemCallFilterSet syscall_filter_sets[] = {
"socket\0"
"socketcall\0"
"socketpair\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_OBSOLETE] = {
/* Unusual, obsolete or unimplemented, some unknown even to libseccomp */
- .set_name = "@obsolete",
+ .name = "@obsolete",
.value =
"_sysctl\0"
"afs_syscall\0"
@@ -292,9 +305,10 @@ const SystemCallFilterSet syscall_filter_sets[] = {
"uselib\0"
"ustat\0"
"vserver\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_PRIVILEGED] = {
/* Nice grab-bag of all system calls which need superuser capabilities */
- .set_name = "@privileged",
+ .name = "@privileged",
.value =
"@clock\0"
"@module\0"
@@ -333,9 +347,10 @@ const SystemCallFilterSet syscall_filter_sets[] = {
"swapon\0"
"sysctl\0"
"vhangup\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_PROCESS] = {
/* Process control, execution, namespaces */
- .set_name = "@process",
+ .name = "@process",
.value =
"arch_prctl\0"
"clone\0"
@@ -349,19 +364,66 @@ const SystemCallFilterSet syscall_filter_sets[] = {
"tkill\0"
"unshare\0"
"vfork\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_RAW_IO] = {
/* Raw I/O ports */
- .set_name = "@raw-io",
+ .name = "@raw-io",
.value =
"ioperm\0"
"iopl\0"
"pciconfig_iobase\0"
"pciconfig_read\0"
"pciconfig_write\0"
+#ifdef __NR_s390_pci_mmio_read
"s390_pci_mmio_read\0"
+#endif
+#ifdef __NR_s390_pci_mmio_write
"s390_pci_mmio_write\0"
- }, {
- .set_name = NULL,
- .value = NULL
- }
+#endif
+ },
};
+
+const SyscallFilterSet *syscall_filter_set_find(const char *name) {
+ unsigned i;
+
+ if (isempty(name) || name[0] != '@')
+ return NULL;
+
+ for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++)
+ if (streq(syscall_filter_sets[i].name, name))
+ return syscall_filter_sets + i;
+
+ return NULL;
+}
+
+int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action) {
+ const char *sys;
+ int r;
+
+ assert(seccomp);
+ assert(set);
+
+ NULSTR_FOREACH(sys, set->value) {
+ int id;
+
+ if (sys[0] == '@') {
+ const SyscallFilterSet *other;
+
+ other = syscall_filter_set_find(sys);
+ if (!other)
+ return -EINVAL;
+
+ r = seccomp_add_syscall_filter_set(seccomp, other, action);
+ } else {
+ id = seccomp_syscall_resolve_name(sys);
+ if (id == __NR_SCMP_ERROR)
+ return -EINVAL;
+
+ r = seccomp_rule_add(seccomp, action, id, 0);
+ }
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h
index cca7c17912..34fd49c122 100644
--- a/src/shared/seccomp-util.h
+++ b/src/shared/seccomp-util.h
@@ -29,9 +29,31 @@ int seccomp_add_secondary_archs(scmp_filter_ctx *c);
bool is_seccomp_available(void);
-typedef struct SystemCallFilterSet {
- const char *set_name;
+typedef struct SyscallFilterSet {
+ const char *name;
const char *value;
-} SystemCallFilterSet;
-
-extern const SystemCallFilterSet syscall_filter_sets[];
+} SyscallFilterSet;
+
+enum {
+ SYSCALL_FILTER_SET_CLOCK,
+ SYSCALL_FILTER_SET_CPU_EMULATION,
+ SYSCALL_FILTER_SET_DEBUG,
+ SYSCALL_FILTER_SET_DEFAULT,
+ SYSCALL_FILTER_SET_IO_EVENT,
+ SYSCALL_FILTER_SET_IPC,
+ SYSCALL_FILTER_SET_KEYRING,
+ SYSCALL_FILTER_SET_MODULE,
+ SYSCALL_FILTER_SET_MOUNT,
+ SYSCALL_FILTER_SET_NETWORK_IO,
+ SYSCALL_FILTER_SET_OBSOLETE,
+ SYSCALL_FILTER_SET_PRIVILEGED,
+ SYSCALL_FILTER_SET_PROCESS,
+ SYSCALL_FILTER_SET_RAW_IO,
+ _SYSCALL_FILTER_SET_MAX
+};
+
+extern const SyscallFilterSet syscall_filter_sets[];
+
+const SyscallFilterSet *syscall_filter_set_find(const char *name);
+
+int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action);