summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2017-02-08 15:14:02 +0100
committerMartin Pitt <martinpitt@users.noreply.github.com>2017-02-08 15:14:02 +0100
commit8a50cf6957f12dbb1f90411659da9b959a1983ff (patch)
tree478a9a32d69f6af15cee06ec346e140d686143a5
parentb6f08ecda90b5ccb6c9c09e5976a627f5918dc0b (diff)
seccomp: MemoryDenyWriteExecute= should affect both mmap() and mmap2() (#5254)
On i386 we block the old mmap() call entirely, since we cannot properly filter it. Thankfully it hasn't been used by glibc since quite some time. Fixes: #5240
-rw-r--r--man/systemd.exec.xml30
-rw-r--r--src/shared/seccomp-util.c92
-rw-r--r--src/shared/seccomp-util.h7
-rw-r--r--src/test/test-seccomp.c12
4 files changed, 106 insertions, 35 deletions
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index bb38ea2467..fd47b0a20a 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -1607,22 +1607,20 @@
<term><varname>MemoryDenyWriteExecute=</varname></term>
<listitem><para>Takes a boolean argument. If set, attempts to create memory mappings that are writable and
- executable at the same time, or to change existing memory mappings to become executable, or mapping shared memory
- segments as executable are prohibited.
- Specifically, a system call filter is added that rejects
- <citerefentry><refentrytitle>mmap</refentrytitle><manvolnum>2</manvolnum></citerefentry>
- system calls with both <constant>PROT_EXEC</constant> and <constant>PROT_WRITE</constant> set,
- <citerefentry><refentrytitle>mprotect</refentrytitle><manvolnum>2</manvolnum></citerefentry>
- system calls with <constant>PROT_EXEC</constant> set and
- <citerefentry><refentrytitle>shmat</refentrytitle><manvolnum>2</manvolnum></citerefentry>
- system calls with <constant>SHM_EXEC</constant> set. Note that this option is incompatible with programs
- that generate program code dynamically at runtime, such as JIT execution engines, or programs compiled making
- use of the code "trampoline" feature of various C compilers. This option improves service security, as it makes
- harder for software exploits to change running code dynamically.
- If running in user mode, or in system mode, but without the <constant>CAP_SYS_ADMIN</constant>
- capability (e.g. setting <varname>User=</varname>), <varname>NoNewPrivileges=yes</varname>
- is implied.
- </para></listitem>
+ executable at the same time, or to change existing memory mappings to become executable, or mapping shared
+ memory segments as executable are prohibited. Specifically, a system call filter is added that rejects
+ <citerefentry><refentrytitle>mmap</refentrytitle><manvolnum>2</manvolnum></citerefentry> system calls with both
+ <constant>PROT_EXEC</constant> and <constant>PROT_WRITE</constant> set,
+ <citerefentry><refentrytitle>mprotect</refentrytitle><manvolnum>2</manvolnum></citerefentry> system calls with
+ <constant>PROT_EXEC</constant> set and
+ <citerefentry><refentrytitle>shmat</refentrytitle><manvolnum>2</manvolnum></citerefentry> system calls with
+ <constant>SHM_EXEC</constant> set. Note that this option is incompatible with programs that generate program
+ code dynamically at runtime, such as JIT execution engines, or programs compiled making use of the code
+ "trampoline" feature of various C compilers. This option improves service security, as it makes harder for
+ software exploits to change running code dynamically. Note that this feature is fully available on x86-64, and
+ partially on x86. Specifically, the <function>shmat()</function> protection is not available on x86. If running
+ in user mode, or in system mode, but without the <constant>CAP_SYS_ADMIN</constant> capability (e.g. setting
+ <varname>User=</varname>), <varname>NoNewPrivileges=yes</varname> is implied. </para></listitem>
</varlistentry>
<varlistentry>
diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c
index 609e0619af..44706669b4 100644
--- a/src/shared/seccomp-util.c
+++ b/src/shared/seccomp-util.c
@@ -1086,27 +1086,81 @@ int seccomp_restrict_realtime(void) {
}
int seccomp_memory_deny_write_execute(void) {
+
uint32_t arch;
int r;
SECCOMP_FOREACH_LOCAL_ARCH(arch) {
_cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ int filter_syscall = 0, block_syscall = 0, shmat_syscall = 0;
log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+ switch (arch) {
+
+ case SCMP_ARCH_X86:
+ filter_syscall = SCMP_SYS(mmap2);
+ block_syscall = SCMP_SYS(mmap);
+
+ /* Note that shmat() isn't available on i386, where the call is multiplexed through ipc(). We
+ * ignore that here, which means there's still a way to get writable/executable memory, if an
+ * IPC key is mapped like this on i386. That's a pity, but no total loss. */
+ break;
+
+ case SCMP_ARCH_X86_64:
+ case SCMP_ARCH_X32:
+ filter_syscall = SCMP_SYS(mmap);
+ shmat_syscall = SCMP_SYS(shmat);
+ break;
+
+ /* Please add more definitions here, if you port systemd to other architectures! */
+
+#if !defined(__i386__) && !defined(__x86_64__)
+#warning "Consider adding the right mmap() syscall definitions here!"
+#endif
+ }
+
+ /* Can't filter mmap() on this arch, then skip it */
+ if (filter_syscall == 0)
+ continue;
+
r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
if (r < 0)
return r;
- r = seccomp_rule_add_exact(
- seccomp,
- SCMP_ACT_ERRNO(EPERM),
- SCMP_SYS(mmap),
- 1,
- SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
- if (r < 0) {
- log_debug_errno(r, "Failed to add mmap() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
- continue;
+ if (filter_syscall != 0) {
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ filter_syscall,
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
+ if (r < 0) {
+ _cleanup_free_ char *n = NULL;
+
+ n = seccomp_syscall_resolve_num_arch(arch, filter_syscall);
+ log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m",
+ strna(n),
+ seccomp_arch_to_string(arch));
+ continue;
+ }
+ }
+
+ if (block_syscall != 0) {
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ block_syscall,
+ 0);
+ if (r < 0) {
+ _cleanup_free_ char *n = NULL;
+
+ n = seccomp_syscall_resolve_num_arch(arch, block_syscall);
+ log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m",
+ strna(n),
+ seccomp_arch_to_string(arch));
+ continue;
+ }
}
r = seccomp_rule_add_exact(
@@ -1120,15 +1174,17 @@ int seccomp_memory_deny_write_execute(void) {
continue;
}
- r = seccomp_rule_add_exact(
- seccomp,
- SCMP_ACT_ERRNO(EPERM),
- SCMP_SYS(shmat),
- 1,
- SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
- if (r < 0) {
- log_debug_errno(r, "Failed to add shmat() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
- continue;
+ if (shmat_syscall != 0) {
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(shmat),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add shmat() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
}
r = seccomp_load(seccomp);
diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h
index 2563fcd38a..bfbfb5ab3d 100644
--- a/src/shared/seccomp-util.h
+++ b/src/shared/seccomp-util.h
@@ -84,6 +84,13 @@ int seccomp_memory_deny_write_execute(void);
#define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 0
#endif
+/* mmap() blocking is only available on some archs for now */
+#if defined(__x86_64__) || defined(__i386__)
+#define SECCOMP_MEMORY_DENY_WRITE_EXECUTE_BROKEN 0
+#else
+#define SECCOMP_MEMORY_DENY_WRITE_EXECUTE_BROKEN 1
+#endif
+
extern const uint32_t seccomp_local_archs[];
#define SECCOMP_FOREACH_LOCAL_ARCH(arch) \
diff --git a/src/test/test-seccomp.c b/src/test/test-seccomp.c
index 54e7947c2f..3659238810 100644
--- a/src/test/test-seccomp.c
+++ b/src/test/test-seccomp.c
@@ -384,11 +384,21 @@ static void test_memory_deny_write_execute(void) {
assert_se(p != MAP_FAILED);
assert_se(munmap(p, page_size()) >= 0);
- seccomp_memory_deny_write_execute();
+ p = mmap(NULL, page_size(), PROT_WRITE|PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
+ assert_se(p != MAP_FAILED);
+ assert_se(munmap(p, page_size()) >= 0);
+ assert_se(seccomp_memory_deny_write_execute() >= 0);
+
+#if SECCOMP_MEMORY_DENY_WRITE_EXECUTE_BROKEN
+ p = mmap(NULL, page_size(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
+ assert_se(p != MAP_FAILED);
+ assert_se(munmap(p, page_size()) >= 0);
+#else
p = mmap(NULL, page_size(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
assert_se(p == MAP_FAILED);
assert_se(errno == EPERM);
+#endif
p = mmap(NULL, page_size(), PROT_WRITE|PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
assert_se(p != MAP_FAILED);