summaryrefslogtreecommitdiff
path: root/src/test/test-seccomp.c
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2016-11-01 20:25:19 -0600
committerLennart Poettering <lennart@poettering.net>2016-11-04 07:40:13 -0600
commitadd005357d535681c7075ced8eec2b6e61b43728 (patch)
treeb780280f06df0b09c738173602cb90c599597996 /src/test/test-seccomp.c
parent9156493171cf2d78e1ac1a3746c385b0e281acf1 (diff)
core: add new RestrictNamespaces= unit file setting
This new setting permits restricting whether namespaces may be created and managed by processes started by a unit. It installs a seccomp filter blocking certain invocations of unshare(), clone() and setns(). RestrictNamespaces=no is the default, and does not restrict namespaces in any way. RestrictNamespaces=yes takes away the ability to create or manage any kind of namspace. "RestrictNamespaces=mnt ipc" restricts the creation of namespaces so that only mount and IPC namespaces may be created/managed, but no other kind of namespaces. This setting should be improve security quite a bit as in particular user namespacing was a major source of CVEs in the kernel in the past, and is accessible to unprivileged processes. With this setting the entire attack surface may be removed for system services that do not make use of namespaces.
Diffstat (limited to 'src/test/test-seccomp.c')
-rw-r--r--src/test/test-seccomp.c94
1 files changed, 94 insertions, 0 deletions
diff --git a/src/test/test-seccomp.c b/src/test/test-seccomp.c
index 43d1567288..beb6a7f422 100644
--- a/src/test/test-seccomp.c
+++ b/src/test/test-seccomp.c
@@ -20,10 +20,15 @@
#include <stdlib.h>
#include <sys/eventfd.h>
#include <unistd.h>
+#include <sched.h>
+#include "alloc-util.h"
#include "fd-util.h"
#include "macro.h"
+#include "missing.h"
+#include "nsflags.h"
#include "process-util.h"
+#include "raw-clone.h"
#include "seccomp-util.h"
#include "string-util.h"
#include "util.h"
@@ -125,12 +130,101 @@ static void test_filter_sets(void) {
}
}
+static void test_restrict_namespace(void) {
+ _cleanup_free_ char *s = NULL;
+ pid_t pid;
+ unsigned long ul;
+
+ assert_se(namespace_flag_to_string(0) == NULL);
+ assert_se(streq(namespace_flag_to_string(CLONE_NEWNS), "mnt"));
+ assert_se(namespace_flag_to_string(CLONE_NEWNS|CLONE_NEWIPC) == NULL);
+ assert_se(streq(namespace_flag_to_string(CLONE_NEWCGROUP), "cgroup"));
+
+ assert_se(namespace_flag_from_string("mnt") == CLONE_NEWNS);
+ assert_se(namespace_flag_from_string(NULL) == 0);
+ assert_se(namespace_flag_from_string("") == 0);
+ assert_se(namespace_flag_from_string("uts") == CLONE_NEWUTS);
+ assert_se(namespace_flag_from_string(namespace_flag_to_string(CLONE_NEWUTS)) == CLONE_NEWUTS);
+ assert_se(streq(namespace_flag_to_string(namespace_flag_from_string("ipc")), "ipc"));
+
+ assert_se(namespace_flag_from_string_many(NULL, &ul) == 0 && ul == 0);
+ assert_se(namespace_flag_from_string_many("", &ul) == 0 && ul == 0);
+ assert_se(namespace_flag_from_string_many("mnt uts ipc", &ul) == 0 && ul == (CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWIPC));
+
+ assert_se(namespace_flag_to_string_many(NAMESPACE_FLAGS_ALL, &s) == 0);
+ assert_se(streq(s, "cgroup ipc net mnt pid user uts"));
+ assert_se(namespace_flag_from_string_many(s, &ul) == 0 && ul == NAMESPACE_FLAGS_ALL);
+
+ if (!is_seccomp_available())
+ return;
+
+ if (geteuid() != 0)
+ return;
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+
+ assert_se(seccomp_restrict_namespaces(CLONE_NEWNS|CLONE_NEWNET) >= 0);
+
+ assert_se(unshare(CLONE_NEWNS) == 0);
+ assert_se(unshare(CLONE_NEWNET) == 0);
+ assert_se(unshare(CLONE_NEWUTS) == -1);
+ assert_se(errno == EPERM);
+ assert_se(unshare(CLONE_NEWIPC) == -1);
+ assert_se(errno == EPERM);
+ assert_se(unshare(CLONE_NEWNET|CLONE_NEWUTS) == -1);
+ assert_se(errno == EPERM);
+
+ /* We use fd 0 (stdin) here, which of course will fail with EINVAL on setns(). Except of course our
+ * seccomp filter worked, and hits first and makes it return EPERM */
+ assert_se(setns(0, CLONE_NEWNS) == -1);
+ assert_se(errno == EINVAL);
+ assert_se(setns(0, CLONE_NEWNET) == -1);
+ assert_se(errno == EINVAL);
+ assert_se(setns(0, CLONE_NEWUTS) == -1);
+ assert_se(errno == EPERM);
+ assert_se(setns(0, CLONE_NEWIPC) == -1);
+ assert_se(errno == EPERM);
+ assert_se(setns(0, CLONE_NEWNET|CLONE_NEWUTS) == -1);
+ assert_se(errno == EPERM);
+ assert_se(setns(0, 0) == -1);
+ assert_se(errno == EPERM);
+
+ pid = raw_clone(CLONE_NEWNS);
+ assert_se(pid >= 0);
+ if (pid == 0)
+ _exit(EXIT_SUCCESS);
+ pid = raw_clone(CLONE_NEWNET);
+ assert_se(pid >= 0);
+ if (pid == 0)
+ _exit(EXIT_SUCCESS);
+ pid = raw_clone(CLONE_NEWUTS);
+ assert_se(pid < 0);
+ assert_se(errno == EPERM);
+ pid = raw_clone(CLONE_NEWIPC);
+ assert_se(pid < 0);
+ assert_se(errno == EPERM);
+ pid = raw_clone(CLONE_NEWNET|CLONE_NEWUTS);
+ assert_se(pid < 0);
+ assert_se(errno == EPERM);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_warn("nsseccomp", pid, true) == EXIT_SUCCESS);
+}
+
int main(int argc, char *argv[]) {
+ log_set_max_level(LOG_DEBUG);
+
test_seccomp_arch_to_string();
test_architecture_table();
test_syscall_filter_set_find();
test_filter_sets();
+ test_restrict_namespace();
return 0;
}