summaryrefslogtreecommitdiff
path: root/src/shared/seccomp-util.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/shared/seccomp-util.c')
-rw-r--r--src/shared/seccomp-util.c236
1 files changed, 197 insertions, 39 deletions
diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c
index 2c73cb8fa4..84964f750f 100644
--- a/src/shared/seccomp-util.c
+++ b/src/shared/seccomp-util.c
@@ -36,31 +36,72 @@
const uint32_t seccomp_local_archs[] = {
-#if defined(__i386__) || defined(__x86_64__)
+ /* Note: always list the native arch we are compiled as last, so that users can blacklist seccomp(), but our own calls to it still succeed */
+
+#if defined(__x86_64__) && defined(__ILP32__)
SCMP_ARCH_X86,
SCMP_ARCH_X86_64,
+ SCMP_ARCH_X32, /* native */
+#elif defined(__x86_64__) && !defined(__ILP32__)
+ SCMP_ARCH_X86,
SCMP_ARCH_X32,
-
-#elif defined(__arm__) || defined(__aarch64__)
+ SCMP_ARCH_X86_64, /* native */
+#elif defined(__i386__)
+ SCMP_ARCH_X86,
+#elif defined(__aarch64__)
SCMP_ARCH_ARM,
- SCMP_ARCH_AARCH64,
-
-#elif defined(__mips__) || defined(__mips64__)
+ SCMP_ARCH_AARCH64, /* native */
+#elif defined(__arm__)
+ SCMP_ARCH_ARM,
+#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI32
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI32
SCMP_ARCH_MIPS,
- SCMP_ARCH_MIPS64,
+ SCMP_ARCH_MIPSEL, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI64
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL64N32,
+ SCMP_ARCH_MIPS64N32,
+ SCMP_ARCH_MIPSEL64,
+ SCMP_ARCH_MIPS64, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI64
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL,
SCMP_ARCH_MIPS64N32,
+ SCMP_ARCH_MIPSEL64N32,
+ SCMP_ARCH_MIPS64,
+ SCMP_ARCH_MIPSEL64, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_NABI32
SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS,
SCMP_ARCH_MIPSEL64,
+ SCMP_ARCH_MIPS64,
SCMP_ARCH_MIPSEL64N32,
-
-#elif defined(__powerpc__) || defined(__powerpc64__)
+ SCMP_ARCH_MIPS64N32, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_NABI32
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS64,
+ SCMP_ARCH_MIPSEL64,
+ SCMP_ARCH_MIPS64N32,
+ SCMP_ARCH_MIPSEL64N32, /* native */
+#elif defined(__powerpc64__) && __BYTE_ORDER == __BIG_ENDIAN
SCMP_ARCH_PPC,
- SCMP_ARCH_PPC64,
SCMP_ARCH_PPC64LE,
-
-#elif defined(__s390__) || defined(__s390x__)
+ SCMP_ARCH_PPC64, /* native */
+#elif defined(__powerpc64__) && __BYTE_ORDER == __LITTLE_ENDIAN
+ SCMP_ARCH_PPC,
+ SCMP_ARCH_PPC64,
+ SCMP_ARCH_PPC64LE, /* native */
+#elif defined(__powerpc__)
+ SCMP_ARCH_PPC,
+#elif defined(__s390x__)
+ SCMP_ARCH_S390,
+ SCMP_ARCH_S390X, /* native */
+#elif defined(__s390__)
SCMP_ARCH_S390,
- SCMP_ARCH_S390X,
#endif
(uint32_t) -1
};
@@ -171,11 +212,11 @@ int seccomp_init_for_arch(scmp_filter_ctx *ret, uint32_t arch, uint32_t default_
if (arch != SCMP_ARCH_NATIVE &&
arch != seccomp_arch_native()) {
- r = seccomp_arch_add(seccomp, arch);
+ r = seccomp_arch_remove(seccomp, seccomp_arch_native());
if (r < 0)
goto finish;
- r = seccomp_arch_remove(seccomp, seccomp_arch_native());
+ r = seccomp_arch_add(seccomp, arch);
if (r < 0)
goto finish;
@@ -344,6 +385,7 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
"mknodat\0"
"mmap2\0"
"mmap\0"
+ "munmap\0"
"newfstatat\0"
"open\0"
"openat\0"
@@ -750,10 +792,35 @@ int seccomp_restrict_namespaces(unsigned long retain) {
SECCOMP_FOREACH_LOCAL_ARCH(arch) {
_cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ int clone_reversed_order = -1;
unsigned i;
log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+ switch (arch) {
+
+ case SCMP_ARCH_X86_64:
+ case SCMP_ARCH_X86:
+ case SCMP_ARCH_X32:
+ clone_reversed_order = 0;
+ break;
+
+ case SCMP_ARCH_S390:
+ case SCMP_ARCH_S390X:
+ /* On s390/s390x the first two parameters to clone are switched */
+ clone_reversed_order = 1;
+ break;
+
+ /* Please add more definitions here, if you port systemd to other architectures! */
+
+#if !defined(__i386__) && !defined(__x86_64__) && !defined(__s390__) && !defined(__s390x__)
+#warning "Consider adding the right clone() syscall definitions here!"
+#endif
+ }
+
+ if (clone_reversed_order < 0) /* we don't know the right order, let's ignore this arch... */
+ continue;
+
r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
if (r < 0)
return r;
@@ -802,12 +869,20 @@ int seccomp_restrict_namespaces(unsigned long retain) {
break;
}
- r = seccomp_rule_add_exact(
- seccomp,
- SCMP_ACT_ERRNO(EPERM),
- SCMP_SYS(clone),
- 1,
- SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
+ if (clone_reversed_order == 0)
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(clone),
+ 1,
+ SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
+ else
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(clone),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, f, f));
if (r < 0) {
log_debug_errno(r, "Failed to add clone() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
break;
@@ -878,10 +953,37 @@ int seccomp_restrict_address_families(Set *address_families, bool whitelist) {
SECCOMP_FOREACH_LOCAL_ARCH(arch) {
_cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ bool supported;
Iterator i;
log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+ switch (arch) {
+
+ case SCMP_ARCH_X86_64:
+ case SCMP_ARCH_X32:
+ case SCMP_ARCH_ARM:
+ case SCMP_ARCH_AARCH64:
+ /* These we know we support (i.e. are the ones that do not use socketcall()) */
+ supported = true;
+ break;
+
+ case SCMP_ARCH_X86:
+ case SCMP_ARCH_S390:
+ case SCMP_ARCH_S390X:
+ case SCMP_ARCH_PPC:
+ case SCMP_ARCH_PPC64:
+ case SCMP_ARCH_PPC64LE:
+ default:
+ /* These we either know we don't support (i.e. are the ones that do use socketcall()), or we
+ * don't know */
+ supported = false;
+ break;
+ }
+
+ if (!supported)
+ continue;
+
r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
if (r < 0)
return r;
@@ -1083,27 +1185,81 @@ int seccomp_restrict_realtime(void) {
}
int seccomp_memory_deny_write_execute(void) {
+
uint32_t arch;
int r;
SECCOMP_FOREACH_LOCAL_ARCH(arch) {
_cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ int filter_syscall = 0, block_syscall = 0, shmat_syscall = 0;
log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+ switch (arch) {
+
+ case SCMP_ARCH_X86:
+ filter_syscall = SCMP_SYS(mmap2);
+ block_syscall = SCMP_SYS(mmap);
+
+ /* Note that shmat() isn't available on i386, where the call is multiplexed through ipc(). We
+ * ignore that here, which means there's still a way to get writable/executable memory, if an
+ * IPC key is mapped like this on i386. That's a pity, but no total loss. */
+ break;
+
+ case SCMP_ARCH_X86_64:
+ case SCMP_ARCH_X32:
+ filter_syscall = SCMP_SYS(mmap);
+ shmat_syscall = SCMP_SYS(shmat);
+ break;
+
+ /* Please add more definitions here, if you port systemd to other architectures! */
+
+#if !defined(__i386__) && !defined(__x86_64__)
+#warning "Consider adding the right mmap() syscall definitions here!"
+#endif
+ }
+
+ /* Can't filter mmap() on this arch, then skip it */
+ if (filter_syscall == 0)
+ continue;
+
r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
if (r < 0)
return r;
- r = seccomp_rule_add_exact(
- seccomp,
- SCMP_ACT_ERRNO(EPERM),
- SCMP_SYS(mmap),
- 1,
- SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
- if (r < 0) {
- log_debug_errno(r, "Failed to add mmap() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
- continue;
+ if (filter_syscall != 0) {
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ filter_syscall,
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
+ if (r < 0) {
+ _cleanup_free_ char *n = NULL;
+
+ n = seccomp_syscall_resolve_num_arch(arch, filter_syscall);
+ log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m",
+ strna(n),
+ seccomp_arch_to_string(arch));
+ continue;
+ }
+ }
+
+ if (block_syscall != 0) {
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ block_syscall,
+ 0);
+ if (r < 0) {
+ _cleanup_free_ char *n = NULL;
+
+ n = seccomp_syscall_resolve_num_arch(arch, block_syscall);
+ log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m",
+ strna(n),
+ seccomp_arch_to_string(arch));
+ continue;
+ }
}
r = seccomp_rule_add_exact(
@@ -1117,15 +1273,17 @@ int seccomp_memory_deny_write_execute(void) {
continue;
}
- r = seccomp_rule_add_exact(
- seccomp,
- SCMP_ACT_ERRNO(EPERM),
- SCMP_SYS(shmat),
- 1,
- SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
- if (r < 0) {
- log_debug_errno(r, "Failed to add shmat() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
- continue;
+ if (shmat_syscall != 0) {
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(shmat),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add shmat() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
}
r = seccomp_load(seccomp);