From ec2ebfd524447caf14e63b49b3f63117b93c7c78 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Oct 2016 18:26:30 +0200 Subject: update-done: minor clean-ups This is a follow-up for fb8b0869a7bc30e23be175cf978df23192d59118, and makes a couple of minor clean-up changes: - The field name in the timestamp file is changed from "TimestampNSec=" to "TIMESTAMP_NSEC=". This is done simply to reflect the fact that we parse the file with the env var file parser, and hence the contents should better follow the usual capitalization of env vars, i.e. be all uppercase. - Needless negation of the errno parameter log_error_errno() and friends has been removed. - Instead of manually calculating the nsec remainder of the timestamp, use timespec_store(). - We now check whether we were able to write the timestamp file in full with fflush_and_check() the way we usually do it. --- src/shared/condition.c | 9 ++++----- src/update-done/update-done.c | 15 +++++++++------ 2 files changed, 13 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/shared/condition.c b/src/shared/condition.c index f13fa6a9fd..69b4837e1f 100644 --- a/src/shared/condition.c +++ b/src/shared/condition.c @@ -329,9 +329,9 @@ static int condition_test_needs_update(Condition *c) { uint64_t timestamp; int r; - r = parse_env_file(p, NULL, "TimestampNSec", ×tamp_str, NULL); + r = parse_env_file(p, NULL, "TIMESTAMP_NSEC", ×tamp_str, NULL); if (r < 0) { - log_error_errno(-r, "Failed to parse timestamp file '%s', using mtime: %m", p); + log_error_errno(r, "Failed to parse timestamp file '%s', using mtime: %m", p); return true; } else if (r == 0) { log_debug("No data in timestamp file '%s', using mtime", p); @@ -340,12 +340,11 @@ static int condition_test_needs_update(Condition *c) { r = safe_atou64(timestamp_str, ×tamp); if (r < 0) { - log_error_errno(-r, "Failed to parse timestamp value '%s' in file '%s', using mtime: %m", - timestamp_str, p); + log_error_errno(r, "Failed to parse timestamp value '%s' in file '%s', using mtime: %m", timestamp_str, p); return true; } - other.st_mtim.tv_nsec = timestamp % NSEC_PER_SEC; + timespec_store(&other.st_mtim, timestamp); } return usr.st_mtim.tv_nsec > other.st_mtim.tv_nsec; diff --git a/src/update-done/update-done.c b/src/update-done/update-done.c index 5cc5abfddf..48c2a3fff4 100644 --- a/src/update-done/update-done.c +++ b/src/update-done/update-done.c @@ -18,6 +18,7 @@ ***/ #include "fd-util.h" +#include "fileio.h" #include "io-util.h" #include "selinux-util.h" #include "util.h" @@ -32,8 +33,8 @@ static int apply_timestamp(const char *path, struct timespec *ts) { *ts, *ts }; - int fd = -1; _cleanup_fclose_ FILE *f = NULL; + int fd = -1; int r; assert(path); @@ -59,18 +60,20 @@ static int apply_timestamp(const char *path, struct timespec *ts) { return log_error_errno(errno, "Failed to create/open timestamp file %s: %m", path); } - f = fdopen(fd, "w"); + f = fdopen(fd, "we"); if (!f) { safe_close(fd); return log_error_errno(errno, "Failed to fdopen() timestamp file %s: %m", path); } (void) fprintf(f, - "%s" - "TimestampNSec=" NSEC_FMT "\n", - MESSAGE, timespec_load_nsec(ts)); + MESSAGE + "TIMESTAMP_NSEC=" NSEC_FMT "\n", + timespec_load_nsec(ts)); - fflush(f); + r = fflush_and_check(f); + if (r < 0) + return log_error_errno(r, "Failed to write timestamp file: %m"); if (futimens(fd, twice) < 0) return log_error_errno(errno, "Failed to update timestamp on %s: %m", path); -- cgit v1.2.3-54-g00ecf From f673b62df67dfa67ddfa4f5a72d78ea3c002278f Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Oct 2016 20:03:51 +0200 Subject: core: simplify skip_seccomp_unavailable() a bit Let's prefer early-exit over deep-indented if blocks. Not behavioural change. --- src/core/execute.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/core/execute.c b/src/core/execute.c index 53356c3c06..b69297241b 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1185,13 +1185,14 @@ static void rename_process_from_path(const char *path) { #ifdef HAVE_SECCOMP static bool skip_seccomp_unavailable(const Unit* u, const char* msg) { - if (!is_seccomp_available()) { - log_open(); - log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg); - log_close(); - return true; - } - return false; + + if (is_seccomp_available()) + return false; + + log_open(); + log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg); + log_close(); + return true; } static int apply_seccomp(const Unit* u, const ExecContext *c) { -- cgit v1.2.3-54-g00ecf From e0f3720e399573134657458f4c8bd20c68fc092a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Oct 2016 20:05:49 +0200 Subject: core: move misplaced comment to the right place --- src/core/execute.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/core/execute.c b/src/core/execute.c index b69297241b..e63a12f934 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1891,9 +1891,9 @@ static int setup_private_users(uid_t uid, gid_t gid) { asprintf(&uid_map, "0 0 1\n" /* Map root → root */ UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */ - uid, uid); /* The case where the above is the same */ + uid, uid); else - uid_map = strdup("0 0 1\n"); + uid_map = strdup("0 0 1\n"); /* The case where the above is the same */ if (!uid_map) return -ENOMEM; -- cgit v1.2.3-54-g00ecf From 8130926d32d76193e98ba783ba932816f276bfad Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Oct 2016 21:50:05 +0200 Subject: core: rework syscall filter set handling A variety of fixes: - rename the SystemCallFilterSet structure to SyscallFilterSet. So far the main instance of it (the syscall_filter_sets[] array) used to abbreviate "SystemCall" as "Syscall". Let's stick to one of the two syntaxes, and not mix and match too wildly. Let's pick the shorter name in this case, as it is sufficiently well established to not confuse hackers reading this. - Export explicit indexes into the syscall_filter_sets[] array via an enum. This way, code that wants to make use of a specific filter set, can index it directly via the enum, instead of having to search for it. This makes apply_private_devices() in particular a lot simpler. - Provide two new helper calls in seccomp-util.c: syscall_filter_set_find() to find a set by its name, seccomp_add_syscall_filter_set() to add a set to a seccomp object. - Update SystemCallFilter= parser to use extract_first_word(). Let's work on deprecating FOREACH_WORD_QUOTED(). - Simplify apply_private_devices() using this functionality --- src/core/execute.c | 41 +-------------- src/core/load-fragment.c | 54 ++++++++++--------- src/shared/seccomp-util.c | 128 ++++++++++++++++++++++++++++++++++------------ src/shared/seccomp-util.h | 32 ++++++++++-- 4 files changed, 155 insertions(+), 100 deletions(-) (limited to 'src') diff --git a/src/core/execute.c b/src/core/execute.c index e63a12f934..18bb67cda9 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1578,10 +1578,7 @@ finish: } static int apply_private_devices(Unit *u, const ExecContext *c) { - const SystemCallFilterSet *set; scmp_filter_ctx *seccomp; - const char *sys; - bool syscalls_found = false; int r; assert(c); @@ -1599,43 +1596,9 @@ static int apply_private_devices(Unit *u, const ExecContext *c) { if (r < 0) goto finish; - for (set = syscall_filter_sets; set->set_name; set++) - if (streq(set->set_name, "@raw-io")) { - syscalls_found = true; - break; - } - - /* We should never fail here */ - if (!syscalls_found) { - r = -EOPNOTSUPP; + r = seccomp_add_syscall_filter_set(seccomp, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM)); + if (r < 0) goto finish; - } - - NULSTR_FOREACH(sys, set->value) { - int id; - bool add = true; - -#ifndef __NR_s390_pci_mmio_read - if (streq(sys, "s390_pci_mmio_read")) - add = false; -#endif -#ifndef __NR_s390_pci_mmio_write - if (streq(sys, "s390_pci_mmio_write")) - add = false; -#endif - - if (!add) - continue; - - id = seccomp_syscall_resolve_name(sys); - - r = seccomp_rule_add( - seccomp, - SCMP_ACT_ERRNO(EPERM), - id, 0); - if (r < 0) - goto finish; - } r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); if (r < 0) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 6f68e23340..118b39c1cf 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -2618,6 +2618,7 @@ int config_parse_documentation(const char *unit, } #ifdef HAVE_SECCOMP + static int syscall_filter_parse_one( const char *unit, const char *filename, @@ -2628,27 +2629,29 @@ static int syscall_filter_parse_one( bool warn) { int r; - if (*t == '@') { - const SystemCallFilterSet *set; + if (t[0] == '@') { + const SyscallFilterSet *set; + const char *i; - for (set = syscall_filter_sets; set->set_name; set++) - if (streq(set->set_name, t)) { - const char *sys; + set = syscall_filter_set_find(t); + if (!set) { + if (warn) + log_syntax(unit, LOG_WARNING, filename, line, 0, "Don't know system call group, ignoring: %s", t); + return 0; + } - NULSTR_FOREACH(sys, set->value) { - r = syscall_filter_parse_one(unit, filename, line, c, invert, sys, false); - if (r < 0) - return r; - } - break; - } + NULSTR_FOREACH(i, set->value) { + r = syscall_filter_parse_one(unit, filename, line, c, invert, i, false); + if (r < 0) + return r; + } } else { int id; id = seccomp_syscall_resolve_name(t); if (id == __NR_SCMP_ERROR) { if (warn) - log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t); + log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse system call, ignoring: %s", t); return 0; } @@ -2662,8 +2665,9 @@ static int syscall_filter_parse_one( if (r < 0) return log_oom(); } else - set_remove(c->syscall_filter, INT_TO_PTR(id + 1)); + (void) set_remove(c->syscall_filter, INT_TO_PTR(id + 1)); } + return 0; } @@ -2682,8 +2686,7 @@ int config_parse_syscall_filter( ExecContext *c = data; Unit *u = userdata; bool invert = false; - const char *word, *state; - size_t l; + const char *p; int r; assert(filename); @@ -2722,19 +2725,24 @@ int config_parse_syscall_filter( } } - FOREACH_WORD_QUOTED(word, l, rvalue, state) { - _cleanup_free_ char *t = NULL; + p = rvalue; + for (;;) { + _cleanup_free_ char *word = NULL; - t = strndup(word, l); - if (!t) + r = extract_first_word(&p, &word, NULL, 0); + if (r == 0) + break; + if (r == -ENOMEM) return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue); + break; + } - r = syscall_filter_parse_one(unit, filename, line, c, invert, t, true); + r = syscall_filter_parse_one(unit, filename, line, c, invert, word, true); if (r < 0) return r; } - if (!isempty(state)) - log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring."); /* Turn on NNP, but only if it wasn't configured explicitly * before, and only if we are in user mode. */ diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index 8116c7671f..1d51f3fd1f 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -26,6 +26,7 @@ #include "macro.h" #include "seccomp-util.h" #include "string-util.h" +#include "util.h" const char* seccomp_arch_to_string(uint32_t c) { @@ -132,28 +133,30 @@ bool is_seccomp_available(void) { return cached_enabled; } -const SystemCallFilterSet syscall_filter_sets[] = { - { +const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { + [SYSCALL_FILTER_SET_CLOCK] = { /* Clock */ - .set_name = "@clock", + .name = "@clock", .value = "adjtimex\0" "clock_adjtime\0" "clock_settime\0" "settimeofday\0" "stime\0" - }, { + }, + [SYSCALL_FILTER_SET_CPU_EMULATION] = { /* CPU emulation calls */ - .set_name = "@cpu-emulation", + .name = "@cpu-emulation", .value = "modify_ldt\0" "subpage_prot\0" "switch_endian\0" "vm86\0" "vm86old\0" - }, { + }, + [SYSCALL_FILTER_SET_DEBUG] = { /* Debugging/Performance Monitoring/Tracing */ - .set_name = "@debug", + .name = "@debug", .value = "lookup_dcookie\0" "perf_event_open\0" @@ -161,11 +164,14 @@ const SystemCallFilterSet syscall_filter_sets[] = { "process_vm_writev\0" "ptrace\0" "rtas\0" +#ifdef __NR_s390_runtime_instr "s390_runtime_instr\0" +#endif "sys_debug_setcontext\0" - }, { + }, + [SYSCALL_FILTER_SET_DEFAULT] = { /* Default list */ - .set_name = "@default", + .name = "@default", .value = "execve\0" "exit\0" @@ -173,9 +179,10 @@ const SystemCallFilterSet syscall_filter_sets[] = { "getrlimit\0" /* make sure processes can query stack size and such */ "rt_sigreturn\0" "sigreturn\0" - }, { + }, + [SYSCALL_FILTER_SET_IO_EVENT] = { /* Event loop use */ - .set_name = "@io-event", + .name = "@io-event", .value = "_newselect\0" "epoll_create1\0" @@ -191,9 +198,10 @@ const SystemCallFilterSet syscall_filter_sets[] = { "ppoll\0" "pselect6\0" "select\0" - }, { + }, + [SYSCALL_FILTER_SET_IPC] = { /* Message queues, SYSV IPC or other IPC: unusual */ - .set_name = "@ipc", + .name = "@ipc", .value = "ipc\0" "mq_getsetattr\0" "mq_notify\0" @@ -215,23 +223,26 @@ const SystemCallFilterSet syscall_filter_sets[] = { "shmctl\0" "shmdt\0" "shmget\0" - }, { + }, + [SYSCALL_FILTER_SET_KEYRING] = { /* Keyring */ - .set_name = "@keyring", + .name = "@keyring", .value = "add_key\0" "keyctl\0" "request_key\0" - }, { + }, + [SYSCALL_FILTER_SET_MODULE] = { /* Kernel module control */ - .set_name = "@module", + .name = "@module", .value = "delete_module\0" "finit_module\0" "init_module\0" - }, { + }, + [SYSCALL_FILTER_SET_MOUNT] = { /* Mounting */ - .set_name = "@mount", + .name = "@mount", .value = "chroot\0" "mount\0" @@ -239,9 +250,10 @@ const SystemCallFilterSet syscall_filter_sets[] = { "pivot_root\0" "umount2\0" "umount\0" - }, { + }, + [SYSCALL_FILTER_SET_NETWORK_IO] = { /* Network or Unix socket IO, should not be needed if not network facing */ - .set_name = "@network-io", + .name = "@network-io", .value = "accept4\0" "accept\0" @@ -264,9 +276,10 @@ const SystemCallFilterSet syscall_filter_sets[] = { "socket\0" "socketcall\0" "socketpair\0" - }, { + }, + [SYSCALL_FILTER_SET_OBSOLETE] = { /* Unusual, obsolete or unimplemented, some unknown even to libseccomp */ - .set_name = "@obsolete", + .name = "@obsolete", .value = "_sysctl\0" "afs_syscall\0" @@ -292,9 +305,10 @@ const SystemCallFilterSet syscall_filter_sets[] = { "uselib\0" "ustat\0" "vserver\0" - }, { + }, + [SYSCALL_FILTER_SET_PRIVILEGED] = { /* Nice grab-bag of all system calls which need superuser capabilities */ - .set_name = "@privileged", + .name = "@privileged", .value = "@clock\0" "@module\0" @@ -333,9 +347,10 @@ const SystemCallFilterSet syscall_filter_sets[] = { "swapon\0" "sysctl\0" "vhangup\0" - }, { + }, + [SYSCALL_FILTER_SET_PROCESS] = { /* Process control, execution, namespaces */ - .set_name = "@process", + .name = "@process", .value = "arch_prctl\0" "clone\0" @@ -349,19 +364,66 @@ const SystemCallFilterSet syscall_filter_sets[] = { "tkill\0" "unshare\0" "vfork\0" - }, { + }, + [SYSCALL_FILTER_SET_RAW_IO] = { /* Raw I/O ports */ - .set_name = "@raw-io", + .name = "@raw-io", .value = "ioperm\0" "iopl\0" "pciconfig_iobase\0" "pciconfig_read\0" "pciconfig_write\0" +#ifdef __NR_s390_pci_mmio_read "s390_pci_mmio_read\0" +#endif +#ifdef __NR_s390_pci_mmio_write "s390_pci_mmio_write\0" - }, { - .set_name = NULL, - .value = NULL - } +#endif + }, }; + +const SyscallFilterSet *syscall_filter_set_find(const char *name) { + unsigned i; + + if (isempty(name) || name[0] != '@') + return NULL; + + for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++) + if (streq(syscall_filter_sets[i].name, name)) + return syscall_filter_sets + i; + + return NULL; +} + +int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action) { + const char *sys; + int r; + + assert(seccomp); + assert(set); + + NULSTR_FOREACH(sys, set->value) { + int id; + + if (sys[0] == '@') { + const SyscallFilterSet *other; + + other = syscall_filter_set_find(sys); + if (!other) + return -EINVAL; + + r = seccomp_add_syscall_filter_set(seccomp, other, action); + } else { + id = seccomp_syscall_resolve_name(sys); + if (id == __NR_SCMP_ERROR) + return -EINVAL; + + r = seccomp_rule_add(seccomp, action, id, 0); + } + if (r < 0) + return r; + } + + return 0; +} diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h index cca7c17912..34fd49c122 100644 --- a/src/shared/seccomp-util.h +++ b/src/shared/seccomp-util.h @@ -29,9 +29,31 @@ int seccomp_add_secondary_archs(scmp_filter_ctx *c); bool is_seccomp_available(void); -typedef struct SystemCallFilterSet { - const char *set_name; +typedef struct SyscallFilterSet { + const char *name; const char *value; -} SystemCallFilterSet; - -extern const SystemCallFilterSet syscall_filter_sets[]; +} SyscallFilterSet; + +enum { + SYSCALL_FILTER_SET_CLOCK, + SYSCALL_FILTER_SET_CPU_EMULATION, + SYSCALL_FILTER_SET_DEBUG, + SYSCALL_FILTER_SET_DEFAULT, + SYSCALL_FILTER_SET_IO_EVENT, + SYSCALL_FILTER_SET_IPC, + SYSCALL_FILTER_SET_KEYRING, + SYSCALL_FILTER_SET_MODULE, + SYSCALL_FILTER_SET_MOUNT, + SYSCALL_FILTER_SET_NETWORK_IO, + SYSCALL_FILTER_SET_OBSOLETE, + SYSCALL_FILTER_SET_PRIVILEGED, + SYSCALL_FILTER_SET_PROCESS, + SYSCALL_FILTER_SET_RAW_IO, + _SYSCALL_FILTER_SET_MAX +}; + +extern const SyscallFilterSet syscall_filter_sets[]; + +const SyscallFilterSet *syscall_filter_set_find(const char *name); + +int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action); -- cgit v1.2.3-54-g00ecf From 25a8d8a0cb297f75b6b9fd3cc15747ba7f56031e Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Oct 2016 20:12:33 +0200 Subject: core: rework apply_protect_kernel_modules() to use seccomp_add_syscall_filter_set() Let's simplify this call, by making use of the new infrastructure. This is actually more in line with Djalal's original patch but instead of search the filter set in the array by its name we can now use the set index and jump directly to it. --- src/core/execute.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/core/execute.c b/src/core/execute.c index 18bb67cda9..f435a079c7 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1534,19 +1534,14 @@ finish: } static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) { - static const int module_syscalls[] = { - SCMP_SYS(delete_module), - SCMP_SYS(finit_module), - SCMP_SYS(init_module), - }; scmp_filter_ctx *seccomp; - unsigned i; + const char *sys; int r; assert(c); - /* Turn of module syscalls on ProtectKernelModules=yes */ + /* Turn off module syscalls on ProtectKernelModules=yes */ if (skip_seccomp_unavailable(u, "ProtectKernelModules=")) return 0; @@ -1559,12 +1554,9 @@ static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) { if (r < 0) goto finish; - for (i = 0; i < ELEMENTSOF(module_syscalls); i++) { - r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), - module_syscalls[i], 0); - if (r < 0) - goto finish; - } + r = seccomp_add_syscall_filter_set(seccomp, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM)); + if (r < 0) + goto finish; r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); if (r < 0) -- cgit v1.2.3-54-g00ecf From 8d7b0c8fd780e88ab5a6d1d79e09e27247245bee Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Oct 2016 20:28:05 +0200 Subject: seccomp: add new seccomp_init_conservative() helper This adds a new seccomp_init_conservative() helper call that is mostly just a wrapper around seccomp_init(), but turns off NNP and adds in all secondary archs, for best compatibility with everything else. Pretty much all of our code used the very same constructs for these three steps, hence unifying this in one small function makes things a lot shorter. This also changes incorrect usage of the "scmp_filter_ctx" type at various places. libseccomp defines it as typedef to "void*", i.e. it is a pointer type (pretty poor choice already!) that casts implicitly to and from all other pointer types (even poorer choice: you defined a confusing type now, and don't even gain any bit of type safety through it...). A lot of the code assumed the type would refer to a structure, and hence aded additional "*" here and there. Remove that. --- src/core/execute.c | 88 ++++++++++----------------------------------- src/nspawn/nspawn-seccomp.c | 18 ++-------- src/shared/seccomp-util.c | 30 ++++++++++++++-- src/shared/seccomp-util.h | 4 ++- 4 files changed, 53 insertions(+), 87 deletions(-) (limited to 'src') diff --git a/src/core/execute.c b/src/core/execute.c index f435a079c7..668504c5cf 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1197,7 +1197,7 @@ static bool skip_seccomp_unavailable(const Unit* u, const char* msg) { static int apply_seccomp(const Unit* u, const ExecContext *c) { uint32_t negative_action, action; - scmp_filter_ctx *seccomp; + scmp_filter_ctx seccomp; Iterator i; void *id; int r; @@ -1248,7 +1248,7 @@ finish: } static int apply_address_families(const Unit* u, const ExecContext *c) { - scmp_filter_ctx *seccomp; + scmp_filter_ctx seccomp; Iterator i; int r; @@ -1257,13 +1257,9 @@ static int apply_address_families(const Unit* u, const ExecContext *c) { if (skip_seccomp_unavailable(u, "RestrictAddressFamilies=")) return 0; - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; - - r = seccomp_add_secondary_archs(seccomp); + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); if (r < 0) - goto finish; + return r; if (c->address_families_whitelist) { int af, first = 0, last = 0; @@ -1360,10 +1356,6 @@ static int apply_address_families(const Unit* u, const ExecContext *c) { } } - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) - goto finish; - r = seccomp_load(seccomp); finish: @@ -1372,7 +1364,7 @@ finish: } static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) { - scmp_filter_ctx *seccomp; + scmp_filter_ctx seccomp; int r; assert(c); @@ -1380,13 +1372,9 @@ static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute=")) return 0; - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; - - r = seccomp_add_secondary_archs(seccomp); + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); if (r < 0) - goto finish; + return r; r = seccomp_rule_add( seccomp, @@ -1406,10 +1394,6 @@ static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) if (r < 0) goto finish; - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) - goto finish; - r = seccomp_load(seccomp); finish: @@ -1424,7 +1408,7 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) { SCHED_IDLE, }; - scmp_filter_ctx *seccomp; + scmp_filter_ctx seccomp; unsigned i; int r, p, max_policy = 0; @@ -1433,13 +1417,9 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) { if (skip_seccomp_unavailable(u, "RestrictRealtime=")) return 0; - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; - - r = seccomp_add_secondary_archs(seccomp); + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); if (r < 0) - goto finish; + return r; /* Determine the highest policy constant we want to allow */ for (i = 0; i < ELEMENTSOF(permitted_policies); i++) @@ -1483,10 +1463,6 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) { if (r < 0) goto finish; - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) - goto finish; - r = seccomp_load(seccomp); finish: @@ -1495,7 +1471,7 @@ finish: } static int apply_protect_sysctl(Unit *u, const ExecContext *c) { - scmp_filter_ctx *seccomp; + scmp_filter_ctx seccomp; int r; assert(c); @@ -1506,13 +1482,9 @@ static int apply_protect_sysctl(Unit *u, const ExecContext *c) { if (skip_seccomp_unavailable(u, "ProtectKernelTunables=")) return 0; - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; - - r = seccomp_add_secondary_archs(seccomp); + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); if (r < 0) - goto finish; + return r; r = seccomp_rule_add( seccomp, @@ -1522,10 +1494,6 @@ static int apply_protect_sysctl(Unit *u, const ExecContext *c) { if (r < 0) goto finish; - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) - goto finish; - r = seccomp_load(seccomp); finish: @@ -1534,9 +1502,7 @@ finish: } static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) { - - scmp_filter_ctx *seccomp; - const char *sys; + scmp_filter_ctx seccomp; int r; assert(c); @@ -1546,22 +1512,14 @@ static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) { if (skip_seccomp_unavailable(u, "ProtectKernelModules=")) return 0; - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; - - r = seccomp_add_secondary_archs(seccomp); + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); if (r < 0) - goto finish; + return r; r = seccomp_add_syscall_filter_set(seccomp, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM)); if (r < 0) goto finish; - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) - goto finish; - r = seccomp_load(seccomp); finish: @@ -1570,7 +1528,7 @@ finish: } static int apply_private_devices(Unit *u, const ExecContext *c) { - scmp_filter_ctx *seccomp; + scmp_filter_ctx seccomp; int r; assert(c); @@ -1580,22 +1538,14 @@ static int apply_private_devices(Unit *u, const ExecContext *c) { if (skip_seccomp_unavailable(u, "PrivateDevices=")) return 0; - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; - - r = seccomp_add_secondary_archs(seccomp); + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); if (r < 0) - goto finish; + return r; r = seccomp_add_syscall_filter_set(seccomp, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM)); if (r < 0) goto finish; - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) - goto finish; - r = seccomp_load(seccomp); finish: diff --git a/src/nspawn/nspawn-seccomp.c b/src/nspawn/nspawn-seccomp.c index 44a0b397ab..03a397d30c 100644 --- a/src/nspawn/nspawn-seccomp.c +++ b/src/nspawn/nspawn-seccomp.c @@ -135,15 +135,9 @@ int setup_seccomp(uint64_t cap_list_retain) { return 0; } - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return log_oom(); - - r = seccomp_add_secondary_archs(seccomp); - if (r < 0) { - log_error_errno(r, "Failed to add secondary archs to seccomp filter: %m"); - goto finish; - } + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); + if (r < 0) + return log_error_errno(r, "Failed to allocate seccomp object: %m"); r = seccomp_add_default_syscall_filter(seccomp, cap_list_retain); if (r < 0) @@ -171,12 +165,6 @@ int setup_seccomp(uint64_t cap_list_retain) { goto finish; } - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) { - log_error_errno(r, "Failed to unset NO_NEW_PRIVS: %m"); - goto finish; - } - r = seccomp_load(seccomp); if (r < 0) { log_error_errno(r, "Failed to install seccomp audit filter: %m"); diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index 1d51f3fd1f..0b9fa47c44 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -74,7 +74,34 @@ int seccomp_arch_from_string(const char *n, uint32_t *ret) { return 0; } -int seccomp_add_secondary_archs(scmp_filter_ctx *c) { +int seccomp_init_conservative(scmp_filter_ctx *ret, uint32_t default_action) { + scmp_filter_ctx seccomp; + int r; + + /* Much like seccomp_init(), but tries to be a bit more conservative in its defaults: all secondary archs are + * added by default, and NNP is turned off. */ + + seccomp = seccomp_init(default_action); + if (!seccomp) + return -ENOMEM; + + r = seccomp_add_secondary_archs(seccomp); + if (r < 0) + goto finish; + + r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + if (r < 0) + goto finish; + + *ret = seccomp; + return 0; + +finish: + seccomp_release(seccomp); + return r; +} + +int seccomp_add_secondary_archs(scmp_filter_ctx c) { #if defined(__i386__) || defined(__x86_64__) int r; @@ -111,7 +138,6 @@ int seccomp_add_secondary_archs(scmp_filter_ctx *c) { #endif return 0; - } static bool is_basic_seccomp_available(void) { diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h index 34fd49c122..2de429a772 100644 --- a/src/shared/seccomp-util.h +++ b/src/shared/seccomp-util.h @@ -25,7 +25,9 @@ const char* seccomp_arch_to_string(uint32_t c); int seccomp_arch_from_string(const char *n, uint32_t *ret); -int seccomp_add_secondary_archs(scmp_filter_ctx *c); +int seccomp_init_conservative(scmp_filter_ctx *ret, uint32_t default_action); + +int seccomp_add_secondary_archs(scmp_filter_ctx c); bool is_seccomp_available(void); -- cgit v1.2.3-54-g00ecf From 60f547cf684d27e8c0e7ff44663650e90f9e0bcf Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Oct 2016 21:15:43 +0200 Subject: seccomp: two fixes for the syscall set tables "oldumount()" is not a syscall, but simply a wrapper for it, the actual syscall nr is called "umount" (and the nr of umount() is called umount2 internally). "sysctl()" is not a syscall, but "_syscall()" is. Fix this in the table. Without these changes libseccomp cannot actually translate the tables in full. This wasn't noticed before as the code was written defensively for this case. --- src/shared/seccomp-util.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src') diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index 0b9fa47c44..f1e9de05b2 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -272,7 +272,6 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { .value = "chroot\0" "mount\0" - "oldumount\0" "pivot_root\0" "umount2\0" "umount\0" @@ -371,7 +370,7 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { "setuid\0" "swapoff\0" "swapon\0" - "sysctl\0" + "_sysctl\0" "vhangup\0" }, [SYSCALL_FILTER_SET_PROCESS] = { -- cgit v1.2.3-54-g00ecf From a3be2849b2570482757f83181b999febbfc7bbef Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Oct 2016 21:18:46 +0200 Subject: seccomp: add new helper call seccomp_load_filter_set() This allows us to unify most of the code in apply_protect_kernel_modules() and apply_private_devices(). --- src/core/execute.c | 34 ++-------------------------------- src/shared/seccomp-util.c | 24 ++++++++++++++++++++++++ src/shared/seccomp-util.h | 2 ++ 3 files changed, 28 insertions(+), 32 deletions(-) (limited to 'src') diff --git a/src/core/execute.c b/src/core/execute.c index 668504c5cf..5e7d7c25d7 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1502,9 +1502,6 @@ finish: } static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) { - scmp_filter_ctx seccomp; - int r; - assert(c); /* Turn off module syscalls on ProtectKernelModules=yes */ @@ -1512,25 +1509,10 @@ static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) { if (skip_seccomp_unavailable(u, "ProtectKernelModules=")) return 0; - r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); - if (r < 0) - return r; - - r = seccomp_add_syscall_filter_set(seccomp, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM)); - if (r < 0) - goto finish; - - r = seccomp_load(seccomp); - -finish: - seccomp_release(seccomp); - return r; + return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM)); } static int apply_private_devices(Unit *u, const ExecContext *c) { - scmp_filter_ctx seccomp; - int r; - assert(c); /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */ @@ -1538,19 +1520,7 @@ static int apply_private_devices(Unit *u, const ExecContext *c) { if (skip_seccomp_unavailable(u, "PrivateDevices=")) return 0; - r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); - if (r < 0) - return r; - - r = seccomp_add_syscall_filter_set(seccomp, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM)); - if (r < 0) - goto finish; - - r = seccomp_load(seccomp); - -finish: - seccomp_release(seccomp); - return r; + return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM)); } #endif diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index f1e9de05b2..6252cd16a6 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -452,3 +452,27 @@ int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterS return 0; } + +int seccomp_load_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action) { + scmp_filter_ctx seccomp; + int r; + + assert(set); + + /* The one-stop solution: allocate a seccomp object, add a filter to it, and apply it */ + + r = seccomp_init_conservative(&seccomp, default_action); + if (r < 0) + return r; + + r = seccomp_add_syscall_filter_set(seccomp, set, action); + if (r < 0) + goto finish; + + r = seccomp_load(seccomp); + +finish: + seccomp_release(seccomp); + return r; + +} diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h index 2de429a772..667687b14f 100644 --- a/src/shared/seccomp-util.h +++ b/src/shared/seccomp-util.h @@ -59,3 +59,5 @@ extern const SyscallFilterSet syscall_filter_sets[]; const SyscallFilterSet *syscall_filter_set_find(const char *name); int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action); + +int seccomp_load_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action); -- cgit v1.2.3-54-g00ecf From f6281133def1da2d7ac875b8cf5af5c32bc63fd8 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Oct 2016 21:48:10 +0200 Subject: seccomp: add test-seccomp test tool This validates the system call set table and many of our seccomp-util.c APIs. --- Makefile.am | 11 +++++ src/shared/seccomp-util.h | 1 + src/test/test-seccomp.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+) create mode 100644 src/test/test-seccomp.c (limited to 'src') diff --git a/Makefile.am b/Makefile.am index 18a5f4a82a..8ef3b42c41 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1570,6 +1570,11 @@ tests += \ test-acl-util endif +if HAVE_SECCOMP +tests += \ + test-seccomp +endif + EXTRA_DIST += \ test/a.service \ test/basic.target \ @@ -2038,6 +2043,12 @@ test_acl_util_SOURCES = \ test_acl_util_LDADD = \ libsystemd-shared.la +test_seccomp_SOURCES = \ + src/test/test-seccomp.c + +test_seccomp_LDADD = \ + libsystemd-shared.la + test_namespace_LDADD = \ libcore.la diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h index 667687b14f..8050fc6fbf 100644 --- a/src/shared/seccomp-util.h +++ b/src/shared/seccomp-util.h @@ -20,6 +20,7 @@ ***/ #include +#include #include const char* seccomp_arch_to_string(uint32_t c); diff --git a/src/test/test-seccomp.c b/src/test/test-seccomp.c new file mode 100644 index 0000000000..0060ecdf02 --- /dev/null +++ b/src/test/test-seccomp.c @@ -0,0 +1,103 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include +#include +#include + +#include "fd-util.h" +#include "macro.h" +#include "process-util.h" +#include "seccomp-util.h" + +static void test_seccomp_arch_to_string(void) { + uint32_t a, b; + const char *name; + + a = seccomp_arch_native(); + assert_se(a > 0); + name = seccomp_arch_to_string(a); + assert_se(name); + assert_se(seccomp_arch_from_string(name, &b) >= 0); + assert_se(a == b); +} + +static void test_syscall_filter_set_find(void) { + assert_se(!syscall_filter_set_find(NULL)); + assert_se(!syscall_filter_set_find("")); + assert_se(!syscall_filter_set_find("quux")); + assert_se(!syscall_filter_set_find("@quux")); + + assert_se(syscall_filter_set_find("@clock") == syscall_filter_sets + SYSCALL_FILTER_SET_CLOCK); + assert_se(syscall_filter_set_find("@default") == syscall_filter_sets + SYSCALL_FILTER_SET_DEFAULT); + assert_se(syscall_filter_set_find("@raw-io") == syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO); +} + +static void test_filter_sets(void) { + unsigned i; + int r; + + if (!is_seccomp_available()) + return; + + if (geteuid() != 0) + return; + + for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++) { + pid_t pid; + + log_info("Testing %s", syscall_filter_sets[i].name); + + pid = fork(); + assert_se(pid >= 0); + + if (pid == 0) { /* Child? */ + int fd; + + if (i == SYSCALL_FILTER_SET_DEFAULT) /* if we look at the default set, whitelist instead of blacklist */ + r = seccomp_load_filter_set(SCMP_ACT_ERRNO(EPERM), syscall_filter_sets + i, SCMP_ACT_ALLOW); + else + r = seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + i, SCMP_ACT_ERRNO(EPERM)); + if (r < 0) + _exit(EXIT_FAILURE); + + /* Test the sycall filter with one random system call */ + fd = eventfd(0, EFD_NONBLOCK|EFD_CLOEXEC); + if (IN_SET(i, SYSCALL_FILTER_SET_IO_EVENT, SYSCALL_FILTER_SET_DEFAULT)) + assert_se(fd < 0 && errno == EPERM); + else { + assert_se(fd >= 0); + safe_close(fd); + } + + _exit(EXIT_SUCCESS); + } + + assert_se(wait_for_terminate_and_warn(syscall_filter_sets[i].name, pid, true) == EXIT_SUCCESS); + } +} + +int main(int argc, char *argv[]) { + + test_seccomp_arch_to_string(); + test_syscall_filter_set_find(); + test_filter_sets(); + + return 0; +} -- cgit v1.2.3-54-g00ecf