diff options
Diffstat (limited to 'src/systemd-nspawn/nspawn-seccomp.c')
-rw-r--r-- | src/systemd-nspawn/nspawn-seccomp.c | 186 |
1 files changed, 186 insertions, 0 deletions
diff --git a/src/systemd-nspawn/nspawn-seccomp.c b/src/systemd-nspawn/nspawn-seccomp.c new file mode 100644 index 0000000000..e5cfe789a1 --- /dev/null +++ b/src/systemd-nspawn/nspawn-seccomp.c @@ -0,0 +1,186 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <errno.h> +#include <sys/capability.h> +#include <sys/types.h> + +#include <linux/netlink.h> + +#ifdef HAVE_SECCOMP +#include <seccomp.h> +#endif + +#include "systemd-basic/log.h" + +#ifdef HAVE_SECCOMP +#include "systemd-shared/seccomp-util.h" +#endif + +#include "nspawn-seccomp.h" + +#ifdef HAVE_SECCOMP + +static int seccomp_add_default_syscall_filter(scmp_filter_ctx ctx, + uint64_t cap_list_retain) { + unsigned i; + int r; + static const struct { + uint64_t capability; + int syscall_num; + } blacklist[] = { + { 0, SCMP_SYS(_sysctl) }, /* obsolete syscall */ + { 0, SCMP_SYS(add_key) }, /* keyring is not namespaced */ + { 0, SCMP_SYS(afs_syscall) }, /* obsolete syscall */ + { 0, SCMP_SYS(bdflush) }, +#ifdef __NR_bpf + { 0, SCMP_SYS(bpf) }, +#endif + { 0, SCMP_SYS(break) }, /* obsolete syscall */ + { 0, SCMP_SYS(create_module) }, /* obsolete syscall */ + { 0, SCMP_SYS(ftime) }, /* obsolete syscall */ + { 0, SCMP_SYS(get_kernel_syms) }, /* obsolete syscall */ + { 0, SCMP_SYS(getpmsg) }, /* obsolete syscall */ + { 0, SCMP_SYS(gtty) }, /* obsolete syscall */ +#ifdef __NR_kexec_file_load + { 0, SCMP_SYS(kexec_file_load) }, +#endif + { 0, SCMP_SYS(kexec_load) }, + { 0, SCMP_SYS(keyctl) }, /* keyring is not namespaced */ + { 0, SCMP_SYS(lock) }, /* obsolete syscall */ + { 0, SCMP_SYS(lookup_dcookie) }, + { 0, SCMP_SYS(mpx) }, /* obsolete syscall */ + { 0, SCMP_SYS(nfsservctl) }, /* obsolete syscall */ + { 0, SCMP_SYS(open_by_handle_at) }, + { 0, SCMP_SYS(perf_event_open) }, + { 0, SCMP_SYS(prof) }, /* obsolete syscall */ + { 0, SCMP_SYS(profil) }, /* obsolete syscall */ + { 0, SCMP_SYS(putpmsg) }, /* obsolete syscall */ + { 0, SCMP_SYS(query_module) }, /* obsolete syscall */ + { 0, SCMP_SYS(quotactl) }, + { 0, SCMP_SYS(request_key) }, /* keyring is not namespaced */ + { 0, SCMP_SYS(security) }, /* obsolete syscall */ + { 0, SCMP_SYS(sgetmask) }, /* obsolete syscall */ + { 0, SCMP_SYS(ssetmask) }, /* obsolete syscall */ + { 0, SCMP_SYS(stty) }, /* obsolete syscall */ + { 0, SCMP_SYS(swapoff) }, + { 0, SCMP_SYS(swapon) }, + { 0, SCMP_SYS(sysfs) }, /* obsolete syscall */ + { 0, SCMP_SYS(tuxcall) }, /* obsolete syscall */ + { 0, SCMP_SYS(ulimit) }, /* obsolete syscall */ + { 0, SCMP_SYS(uselib) }, /* obsolete syscall */ + { 0, SCMP_SYS(ustat) }, /* obsolete syscall */ + { 0, SCMP_SYS(vserver) }, /* obsolete syscall */ + { CAP_SYSLOG, SCMP_SYS(syslog) }, + { CAP_SYS_MODULE, SCMP_SYS(delete_module) }, + { CAP_SYS_MODULE, SCMP_SYS(finit_module) }, + { CAP_SYS_MODULE, SCMP_SYS(init_module) }, + { CAP_SYS_PACCT, SCMP_SYS(acct) }, + { CAP_SYS_PTRACE, SCMP_SYS(process_vm_readv) }, + { CAP_SYS_PTRACE, SCMP_SYS(process_vm_writev) }, + { CAP_SYS_PTRACE, SCMP_SYS(ptrace) }, + { CAP_SYS_RAWIO, SCMP_SYS(ioperm) }, + { CAP_SYS_RAWIO, SCMP_SYS(iopl) }, + { CAP_SYS_RAWIO, SCMP_SYS(pciconfig_iobase) }, + { CAP_SYS_RAWIO, SCMP_SYS(pciconfig_read) }, + { CAP_SYS_RAWIO, SCMP_SYS(pciconfig_write) }, +#ifdef __NR_s390_pci_mmio_read + { CAP_SYS_RAWIO, SCMP_SYS(s390_pci_mmio_read) }, +#endif +#ifdef __NR_s390_pci_mmio_write + { CAP_SYS_RAWIO, SCMP_SYS(s390_pci_mmio_write) }, +#endif + { CAP_SYS_TIME, SCMP_SYS(adjtimex) }, + { CAP_SYS_TIME, SCMP_SYS(clock_adjtime) }, + { CAP_SYS_TIME, SCMP_SYS(clock_settime) }, + { CAP_SYS_TIME, SCMP_SYS(settimeofday) }, + { CAP_SYS_TIME, SCMP_SYS(stime) }, + }; + + for (i = 0; i < ELEMENTSOF(blacklist); i++) { + if (blacklist[i].capability != 0 && (cap_list_retain & (1ULL << blacklist[i].capability))) + continue; + + r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), blacklist[i].syscall_num, 0); + if (r == -EFAULT) + continue; /* unknown syscall */ + if (r < 0) + return log_error_errno(r, "Failed to block syscall: %m"); + } + + return 0; +} + +int setup_seccomp(uint64_t cap_list_retain) { + scmp_filter_ctx seccomp; + int r; + + if (!is_seccomp_available()) { + log_debug("SECCOMP features not detected in the kernel, disabling SECCOMP audit filter"); + return 0; + } + + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); + if (r < 0) + return log_error_errno(r, "Failed to allocate seccomp object: %m"); + + r = seccomp_add_default_syscall_filter(seccomp, cap_list_retain); + if (r < 0) + goto finish; + + /* + Audit is broken in containers, much of the userspace audit + hookup will fail if running inside a container. We don't + care and just turn off creation of audit sockets. + + This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail + with EAFNOSUPPORT which audit userspace uses as indication + that audit is disabled in the kernel. + */ + + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EAFNOSUPPORT), + SCMP_SYS(socket), + 2, + SCMP_A0(SCMP_CMP_EQ, AF_NETLINK), + SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT)); + if (r < 0) { + log_error_errno(r, "Failed to add audit seccomp rule: %m"); + goto finish; + } + + r = seccomp_load(seccomp); + if (r < 0) { + log_error_errno(r, "Failed to install seccomp audit filter: %m"); + goto finish; + } + +finish: + seccomp_release(seccomp); + return r; +} + +#else + +int setup_seccomp(uint64_t cap_list_retain) { + return 0; +} + +#endif |