From f011b0b87a5ab3b2c85849558d464fcfcad85923 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Thu, 26 May 2016 22:42:29 +0200 Subject: nspawn: split out seccomp call into nspawn-seccomp.[ch] Split seccomp into nspawn-seccomp.[ch]. Currently there are no changes, but this will make it easy in the future to share or use the seccomp logic from systemd core. --- Makefile.am | 2 + src/nspawn/nspawn-seccomp.c | 143 ++++++++++++++++++++++++++++++++++++++++++++ src/nspawn/nspawn-seccomp.h | 24 ++++++++ src/nspawn/nspawn.c | 102 +------------------------------ 4 files changed, 171 insertions(+), 100 deletions(-) create mode 100644 src/nspawn/nspawn-seccomp.c create mode 100644 src/nspawn/nspawn-seccomp.h diff --git a/Makefile.am b/Makefile.am index 305099ab66..f8e1fac967 100644 --- a/Makefile.am +++ b/Makefile.am @@ -3016,6 +3016,8 @@ systemd_nspawn_SOURCES = \ src/nspawn/nspawn-expose-ports.h \ src/nspawn/nspawn-cgroup.c \ src/nspawn/nspawn-cgroup.h \ + src/nspawn/nspawn-seccomp.c \ + src/nspawn/nspawn-seccomp.h \ src/nspawn/nspawn-register.c \ src/nspawn/nspawn-register.h \ src/nspawn/nspawn-setuid.c \ diff --git a/src/nspawn/nspawn-seccomp.c b/src/nspawn/nspawn-seccomp.c new file mode 100644 index 0000000000..2d145b68a7 --- /dev/null +++ b/src/nspawn/nspawn-seccomp.c @@ -0,0 +1,143 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include +#include +#include +#include + +#ifdef HAVE_SECCOMP +#include +#endif + +#include "log.h" + +#ifdef HAVE_SECCOMP +#include "seccomp-util.h" +#endif + +#include "nspawn-seccomp.h" + +#ifdef HAVE_SECCOMP + +static int seccomp_add_default_syscall_filter(scmp_filter_ctx ctx, + uint64_t cap_list_retain) { + unsigned i; + int r; + static const struct { + uint64_t capability; + int syscall_num; + } blacklist[] = { + { CAP_SYS_RAWIO, SCMP_SYS(iopl) }, + { CAP_SYS_RAWIO, SCMP_SYS(ioperm) }, + { CAP_SYS_BOOT, SCMP_SYS(kexec_load) }, + { CAP_SYS_ADMIN, SCMP_SYS(swapon) }, + { CAP_SYS_ADMIN, SCMP_SYS(swapoff) }, + { CAP_SYS_ADMIN, SCMP_SYS(open_by_handle_at) }, + { CAP_SYS_MODULE, SCMP_SYS(init_module) }, + { CAP_SYS_MODULE, SCMP_SYS(finit_module) }, + { CAP_SYS_MODULE, SCMP_SYS(delete_module) }, + { CAP_SYSLOG, SCMP_SYS(syslog) }, + }; + + for (i = 0; i < ELEMENTSOF(blacklist); i++) { + if (cap_list_retain & (1ULL << blacklist[i].capability)) + continue; + + r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), blacklist[i].syscall_num, 0); + if (r == -EFAULT) + continue; /* unknown syscall */ + if (r < 0) { + log_error_errno(r, "Failed to block syscall: %m"); + return r; + } + } + + return 0; +} + +int setup_seccomp(uint64_t cap_list_retain) { + scmp_filter_ctx seccomp; + int r; + + seccomp = seccomp_init(SCMP_ACT_ALLOW); + if (!seccomp) + return log_oom(); + + r = seccomp_add_secondary_archs(seccomp); + if (r < 0) { + log_error_errno(r, "Failed to add secondary archs to seccomp filter: %m"); + goto finish; + } + + r = seccomp_add_default_syscall_filter(seccomp, cap_list_retain); + if (r < 0) + goto finish; + + /* + Audit is broken in containers, much of the userspace audit + hookup will fail if running inside a container. We don't + care and just turn off creation of audit sockets. + + This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail + with EAFNOSUPPORT which audit userspace uses as indication + that audit is disabled in the kernel. + */ + + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EAFNOSUPPORT), + SCMP_SYS(socket), + 2, + SCMP_A0(SCMP_CMP_EQ, AF_NETLINK), + SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT)); + if (r < 0) { + log_error_errno(r, "Failed to add audit seccomp rule: %m"); + goto finish; + } + + r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + if (r < 0) { + log_error_errno(r, "Failed to unset NO_NEW_PRIVS: %m"); + goto finish; + } + + r = seccomp_load(seccomp); + if (r == -EINVAL) { + log_debug_errno(r, "Kernel is probably not configured with CONFIG_SECCOMP. Disabling seccomp audit filter: %m"); + r = 0; + goto finish; + } + if (r < 0) { + log_error_errno(r, "Failed to install seccomp audit filter: %m"); + goto finish; + } + +finish: + seccomp_release(seccomp); + return r; +} + +#else + +int setup_seccomp(uint64_t cap_list_retain) { + return 0; +} + +#endif diff --git a/src/nspawn/nspawn-seccomp.h b/src/nspawn/nspawn-seccomp.h new file mode 100644 index 0000000000..5bde16faf9 --- /dev/null +++ b/src/nspawn/nspawn-seccomp.h @@ -0,0 +1,24 @@ +#pragma once + +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include + +int setup_seccomp(uint64_t cap_list_retain); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index ac11bcea5a..646ef96f9b 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -26,9 +26,6 @@ #include #include #include -#ifdef HAVE_SECCOMP -#include -#endif #ifdef HAVE_SELINUX #include #endif @@ -82,15 +79,13 @@ #include "nspawn-settings.h" #include "nspawn-setuid.h" #include "nspawn-stub-pid1.h" +#include "nspawn-seccomp.h" #include "parse-util.h" #include "path-util.h" #include "process-util.h" #include "ptyfwd.h" #include "random-util.h" #include "rm-rf.h" -#ifdef HAVE_SECCOMP -#include "seccomp-util.h" -#endif #include "selinux-util.h" #include "signal-util.h" #include "socket-util.h" @@ -1667,99 +1662,6 @@ static int reset_audit_loginuid(void) { return 0; } -static int setup_seccomp(void) { - -#ifdef HAVE_SECCOMP - static const struct { - uint64_t capability; - int syscall_num; - } blacklist[] = { - { CAP_SYS_RAWIO, SCMP_SYS(iopl) }, - { CAP_SYS_RAWIO, SCMP_SYS(ioperm) }, - { CAP_SYS_BOOT, SCMP_SYS(kexec_load) }, - { CAP_SYS_ADMIN, SCMP_SYS(swapon) }, - { CAP_SYS_ADMIN, SCMP_SYS(swapoff) }, - { CAP_SYS_ADMIN, SCMP_SYS(open_by_handle_at) }, - { CAP_SYS_MODULE, SCMP_SYS(init_module) }, - { CAP_SYS_MODULE, SCMP_SYS(finit_module) }, - { CAP_SYS_MODULE, SCMP_SYS(delete_module) }, - { CAP_SYSLOG, SCMP_SYS(syslog) }, - }; - - scmp_filter_ctx seccomp; - unsigned i; - int r; - - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return log_oom(); - - r = seccomp_add_secondary_archs(seccomp); - if (r < 0) { - log_error_errno(r, "Failed to add secondary archs to seccomp filter: %m"); - goto finish; - } - - for (i = 0; i < ELEMENTSOF(blacklist); i++) { - if (arg_retain & (1ULL << blacklist[i].capability)) - continue; - - r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), blacklist[i].syscall_num, 0); - if (r == -EFAULT) - continue; /* unknown syscall */ - if (r < 0) { - log_error_errno(r, "Failed to block syscall: %m"); - goto finish; - } - } - - /* - Audit is broken in containers, much of the userspace audit - hookup will fail if running inside a container. We don't - care and just turn off creation of audit sockets. - - This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail - with EAFNOSUPPORT which audit userspace uses as indication - that audit is disabled in the kernel. - */ - - r = seccomp_rule_add( - seccomp, - SCMP_ACT_ERRNO(EAFNOSUPPORT), - SCMP_SYS(socket), - 2, - SCMP_A0(SCMP_CMP_EQ, AF_NETLINK), - SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT)); - if (r < 0) { - log_error_errno(r, "Failed to add audit seccomp rule: %m"); - goto finish; - } - - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) { - log_error_errno(r, "Failed to unset NO_NEW_PRIVS: %m"); - goto finish; - } - - r = seccomp_load(seccomp); - if (r == -EINVAL) { - log_debug_errno(r, "Kernel is probably not configured with CONFIG_SECCOMP. Disabling seccomp audit filter: %m"); - r = 0; - goto finish; - } - if (r < 0) { - log_error_errno(r, "Failed to install seccomp audit filter: %m"); - goto finish; - } - -finish: - seccomp_release(seccomp); - return r; -#else - return 0; -#endif - -} static int setup_propagate(const char *root) { const char *p, *q; @@ -2988,7 +2890,7 @@ static int outer_child( if (r < 0) return r; - r = setup_seccomp(); + r = setup_seccomp(arg_retain); if (r < 0) return r; -- cgit v1.2.3-54-g00ecf