diff options
Diffstat (limited to 'src/shared/seccomp-util.c')
-rw-r--r-- | src/shared/seccomp-util.c | 236 |
1 files changed, 197 insertions, 39 deletions
diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index 2c73cb8fa4..84964f750f 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -36,31 +36,72 @@ const uint32_t seccomp_local_archs[] = { -#if defined(__i386__) || defined(__x86_64__) + /* Note: always list the native arch we are compiled as last, so that users can blacklist seccomp(), but our own calls to it still succeed */ + +#if defined(__x86_64__) && defined(__ILP32__) SCMP_ARCH_X86, SCMP_ARCH_X86_64, + SCMP_ARCH_X32, /* native */ +#elif defined(__x86_64__) && !defined(__ILP32__) + SCMP_ARCH_X86, SCMP_ARCH_X32, - -#elif defined(__arm__) || defined(__aarch64__) + SCMP_ARCH_X86_64, /* native */ +#elif defined(__i386__) + SCMP_ARCH_X86, +#elif defined(__aarch64__) SCMP_ARCH_ARM, - SCMP_ARCH_AARCH64, - -#elif defined(__mips__) || defined(__mips64__) + SCMP_ARCH_AARCH64, /* native */ +#elif defined(__arm__) + SCMP_ARCH_ARM, +#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI32 + SCMP_ARCH_MIPSEL, + SCMP_ARCH_MIPS, /* native */ +#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI32 SCMP_ARCH_MIPS, - SCMP_ARCH_MIPS64, + SCMP_ARCH_MIPSEL, /* native */ +#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI64 + SCMP_ARCH_MIPSEL, + SCMP_ARCH_MIPS, + SCMP_ARCH_MIPSEL64N32, + SCMP_ARCH_MIPS64N32, + SCMP_ARCH_MIPSEL64, + SCMP_ARCH_MIPS64, /* native */ +#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI64 + SCMP_ARCH_MIPS, + SCMP_ARCH_MIPSEL, SCMP_ARCH_MIPS64N32, + SCMP_ARCH_MIPSEL64N32, + SCMP_ARCH_MIPS64, + SCMP_ARCH_MIPSEL64, /* native */ +#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_NABI32 SCMP_ARCH_MIPSEL, + SCMP_ARCH_MIPS, SCMP_ARCH_MIPSEL64, + SCMP_ARCH_MIPS64, SCMP_ARCH_MIPSEL64N32, - -#elif defined(__powerpc__) || defined(__powerpc64__) + SCMP_ARCH_MIPS64N32, /* native */ +#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_NABI32 + SCMP_ARCH_MIPS, + SCMP_ARCH_MIPSEL, + SCMP_ARCH_MIPS64, + SCMP_ARCH_MIPSEL64, + SCMP_ARCH_MIPS64N32, + SCMP_ARCH_MIPSEL64N32, /* native */ +#elif defined(__powerpc64__) && __BYTE_ORDER == __BIG_ENDIAN SCMP_ARCH_PPC, - SCMP_ARCH_PPC64, SCMP_ARCH_PPC64LE, - -#elif defined(__s390__) || defined(__s390x__) + SCMP_ARCH_PPC64, /* native */ +#elif defined(__powerpc64__) && __BYTE_ORDER == __LITTLE_ENDIAN + SCMP_ARCH_PPC, + SCMP_ARCH_PPC64, + SCMP_ARCH_PPC64LE, /* native */ +#elif defined(__powerpc__) + SCMP_ARCH_PPC, +#elif defined(__s390x__) + SCMP_ARCH_S390, + SCMP_ARCH_S390X, /* native */ +#elif defined(__s390__) SCMP_ARCH_S390, - SCMP_ARCH_S390X, #endif (uint32_t) -1 }; @@ -171,11 +212,11 @@ int seccomp_init_for_arch(scmp_filter_ctx *ret, uint32_t arch, uint32_t default_ if (arch != SCMP_ARCH_NATIVE && arch != seccomp_arch_native()) { - r = seccomp_arch_add(seccomp, arch); + r = seccomp_arch_remove(seccomp, seccomp_arch_native()); if (r < 0) goto finish; - r = seccomp_arch_remove(seccomp, seccomp_arch_native()); + r = seccomp_arch_add(seccomp, arch); if (r < 0) goto finish; @@ -344,6 +385,7 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { "mknodat\0" "mmap2\0" "mmap\0" + "munmap\0" "newfstatat\0" "open\0" "openat\0" @@ -750,10 +792,35 @@ int seccomp_restrict_namespaces(unsigned long retain) { SECCOMP_FOREACH_LOCAL_ARCH(arch) { _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + int clone_reversed_order = -1; unsigned i; log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); + switch (arch) { + + case SCMP_ARCH_X86_64: + case SCMP_ARCH_X86: + case SCMP_ARCH_X32: + clone_reversed_order = 0; + break; + + case SCMP_ARCH_S390: + case SCMP_ARCH_S390X: + /* On s390/s390x the first two parameters to clone are switched */ + clone_reversed_order = 1; + break; + + /* Please add more definitions here, if you port systemd to other architectures! */ + +#if !defined(__i386__) && !defined(__x86_64__) && !defined(__s390__) && !defined(__s390x__) +#warning "Consider adding the right clone() syscall definitions here!" +#endif + } + + if (clone_reversed_order < 0) /* we don't know the right order, let's ignore this arch... */ + continue; + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); if (r < 0) return r; @@ -802,12 +869,20 @@ int seccomp_restrict_namespaces(unsigned long retain) { break; } - r = seccomp_rule_add_exact( - seccomp, - SCMP_ACT_ERRNO(EPERM), - SCMP_SYS(clone), - 1, - SCMP_A0(SCMP_CMP_MASKED_EQ, f, f)); + if (clone_reversed_order == 0) + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(clone), + 1, + SCMP_A0(SCMP_CMP_MASKED_EQ, f, f)); + else + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(clone), + 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, f, f)); if (r < 0) { log_debug_errno(r, "Failed to add clone() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); break; @@ -878,10 +953,37 @@ int seccomp_restrict_address_families(Set *address_families, bool whitelist) { SECCOMP_FOREACH_LOCAL_ARCH(arch) { _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + bool supported; Iterator i; log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); + switch (arch) { + + case SCMP_ARCH_X86_64: + case SCMP_ARCH_X32: + case SCMP_ARCH_ARM: + case SCMP_ARCH_AARCH64: + /* These we know we support (i.e. are the ones that do not use socketcall()) */ + supported = true; + break; + + case SCMP_ARCH_X86: + case SCMP_ARCH_S390: + case SCMP_ARCH_S390X: + case SCMP_ARCH_PPC: + case SCMP_ARCH_PPC64: + case SCMP_ARCH_PPC64LE: + default: + /* These we either know we don't support (i.e. are the ones that do use socketcall()), or we + * don't know */ + supported = false; + break; + } + + if (!supported) + continue; + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); if (r < 0) return r; @@ -1083,27 +1185,81 @@ int seccomp_restrict_realtime(void) { } int seccomp_memory_deny_write_execute(void) { + uint32_t arch; int r; SECCOMP_FOREACH_LOCAL_ARCH(arch) { _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + int filter_syscall = 0, block_syscall = 0, shmat_syscall = 0; log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); + switch (arch) { + + case SCMP_ARCH_X86: + filter_syscall = SCMP_SYS(mmap2); + block_syscall = SCMP_SYS(mmap); + + /* Note that shmat() isn't available on i386, where the call is multiplexed through ipc(). We + * ignore that here, which means there's still a way to get writable/executable memory, if an + * IPC key is mapped like this on i386. That's a pity, but no total loss. */ + break; + + case SCMP_ARCH_X86_64: + case SCMP_ARCH_X32: + filter_syscall = SCMP_SYS(mmap); + shmat_syscall = SCMP_SYS(shmat); + break; + + /* Please add more definitions here, if you port systemd to other architectures! */ + +#if !defined(__i386__) && !defined(__x86_64__) +#warning "Consider adding the right mmap() syscall definitions here!" +#endif + } + + /* Can't filter mmap() on this arch, then skip it */ + if (filter_syscall == 0) + continue; + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); if (r < 0) return r; - r = seccomp_rule_add_exact( - seccomp, - SCMP_ACT_ERRNO(EPERM), - SCMP_SYS(mmap), - 1, - SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE)); - if (r < 0) { - log_debug_errno(r, "Failed to add mmap() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); - continue; + if (filter_syscall != 0) { + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + filter_syscall, + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE)); + if (r < 0) { + _cleanup_free_ char *n = NULL; + + n = seccomp_syscall_resolve_num_arch(arch, filter_syscall); + log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m", + strna(n), + seccomp_arch_to_string(arch)); + continue; + } + } + + if (block_syscall != 0) { + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + block_syscall, + 0); + if (r < 0) { + _cleanup_free_ char *n = NULL; + + n = seccomp_syscall_resolve_num_arch(arch, block_syscall); + log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m", + strna(n), + seccomp_arch_to_string(arch)); + continue; + } } r = seccomp_rule_add_exact( @@ -1117,15 +1273,17 @@ int seccomp_memory_deny_write_execute(void) { continue; } - r = seccomp_rule_add_exact( - seccomp, - SCMP_ACT_ERRNO(EPERM), - SCMP_SYS(shmat), - 1, - SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC)); - if (r < 0) { - log_debug_errno(r, "Failed to add shmat() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); - continue; + if (shmat_syscall != 0) { + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(shmat), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC)); + if (r < 0) { + log_debug_errno(r, "Failed to add shmat() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } } r = seccomp_load(seccomp); |