From 28650077f36466d9c5ee27ef2006fae3171a2430 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 30 Jun 2014 16:22:12 +0200 Subject: nspawn: block open_by_handle_at() and others via seccomp Let's protect ourselves against the recently reported docker security issue. Our man page makes clear that we do not make any security promises anyway, but well, this one is easy to mitigate, so let's do it. While we are at it block a couple of more syscalls that are no good in containers, too. --- src/nspawn/nspawn.c | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index fd61d07761..656c1bf9f5 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -1864,22 +1864,25 @@ static int setup_macvlan(pid_t pid) { return 0; } -static int audit_still_doesnt_work_in_containers(void) { +static int setup_seccomp(void) { #ifdef HAVE_SECCOMP + static const int blacklist[] = { + SCMP_SYS(kexec_load), + SCMP_SYS(open_by_handle_at), + SCMP_SYS(init_module), + SCMP_SYS(finit_module), + SCMP_SYS(delete_module), + SCMP_SYS(iopl), + SCMP_SYS(ioperm), + SCMP_SYS(swapon), + SCMP_SYS(swapoff), + }; + scmp_filter_ctx seccomp; + unsigned i; int r; - /* - Audit is broken in containers, much of the userspace audit - hookup will fail if running inside a container. We don't - care and just turn off creation of audit sockets. - - This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail - with EAFNOSUPPORT which audit userspace uses as indication - that audit is disabled in the kernel. - */ - seccomp = seccomp_init(SCMP_ACT_ALLOW); if (!seccomp) return log_oom(); @@ -1890,6 +1893,26 @@ static int audit_still_doesnt_work_in_containers(void) { goto finish; } + for (i = 0; i < ELEMENTSOF(blacklist); i++) { + r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), blacklist[i], 0); + if (r == -EFAULT) + continue; /* unknown syscall */ + if (r < 0) { + log_error("Failed to block syscall: %s", strerror(-r)); + goto finish; + } + } + + /* + Audit is broken in containers, much of the userspace audit + hookup will fail if running inside a container. We don't + care and just turn off creation of audit sockets. + + This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail + with EAFNOSUPPORT which audit userspace uses as indication + that audit is disabled in the kernel. + */ + r = seccomp_rule_add( seccomp, SCMP_ACT_ERRNO(EAFNOSUPPORT), @@ -3050,7 +3073,7 @@ int main(int argc, char *argv[]) { dev_setup(arg_directory); - if (audit_still_doesnt_work_in_containers() < 0) + if (setup_seccomp() < 0) goto child_fail; if (setup_dev_console(arg_directory, console) < 0) -- cgit v1.2.3-54-g00ecf