From 50b52222f2d54a3c4d81e0e5987a0400cbcefb53 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 10 Jun 2016 17:31:06 +0200 Subject: nspawn: order caps to retain alphabetically --- src/nspawn/nspawn.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index ea24de7608..73c56d7310 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -137,6 +137,8 @@ static bool arg_ephemeral = false; static LinkJournal arg_link_journal = LINK_AUTO; static bool arg_link_journal_try = false; static uint64_t arg_caps_retain = + (1ULL << CAP_AUDIT_CONTROL) | + (1ULL << CAP_AUDIT_WRITE) | (1ULL << CAP_CHOWN) | (1ULL << CAP_DAC_OVERRIDE) | (1ULL << CAP_DAC_READ_SEARCH) | @@ -146,23 +148,21 @@ static uint64_t arg_caps_retain = (1ULL << CAP_KILL) | (1ULL << CAP_LEASE) | (1ULL << CAP_LINUX_IMMUTABLE) | + (1ULL << CAP_MKNOD) | (1ULL << CAP_NET_BIND_SERVICE) | (1ULL << CAP_NET_BROADCAST) | (1ULL << CAP_NET_RAW) | - (1ULL << CAP_SETGID) | (1ULL << CAP_SETFCAP) | + (1ULL << CAP_SETGID) | (1ULL << CAP_SETPCAP) | (1ULL << CAP_SETUID) | (1ULL << CAP_SYS_ADMIN) | + (1ULL << CAP_SYS_BOOT) | (1ULL << CAP_SYS_CHROOT) | (1ULL << CAP_SYS_NICE) | (1ULL << CAP_SYS_PTRACE) | - (1ULL << CAP_SYS_TTY_CONFIG) | (1ULL << CAP_SYS_RESOURCE) | - (1ULL << CAP_SYS_BOOT) | - (1ULL << CAP_AUDIT_WRITE) | - (1ULL << CAP_AUDIT_CONTROL) | - (1ULL << CAP_MKNOD); + (1ULL << CAP_SYS_TTY_CONFIG); static CustomMount *arg_custom_mounts = NULL; static unsigned arg_n_custom_mounts = 0; static char **arg_setenv = NULL; -- cgit v1.2.3-54-g00ecf From 1f9ac68b5bc671f1f8b0a32084810d39394208a6 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 10 Jun 2016 17:43:38 +0200 Subject: core: improve seccomp syscall grouping a bit This adds three new seccomp syscall groups: @keyring for kernel keyring access, @cpu-emulation for CPU emulation features, for exampe vm86() for dosemu and suchlike, and @debug for ptrace() and related calls. Also, the @clock group is updated with more syscalls that alter the system clock. capset() is added to @privileged, and pciconfig_iobase() is added to @raw-io. Finally, @obsolete is a cleaned up. A number of syscalls that never existed on Linux and have no number assigned on any architecture are removed, as they only exist in the man pages and other operating sytems, but not in code at all. create_module() is moved from @module to @obsolete, as it is an obsolete system call. mem_getpolicy() is removed from the @obsolete list, as it is not obsolete, but simply a NUMA API. --- man/systemd.exec.xml | 38 ++++++++++++++++++-------------- src/shared/seccomp-util.c | 55 +++++++++++++++++++++++++++++++---------------- 2 files changed, 58 insertions(+), 35 deletions(-) (limited to 'src') diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 1c3256a662..a39e800854 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -1218,49 +1218,55 @@ @clock - System calls for changing the system clock (adjtimex(), - settimeofday()) + System calls for changing the system clock (adjtimex2, settimeofday2, and related calls) + + + @cpu-emulation + System calls for CPU emulation functionality (vm862 and related calls) + + + @debug + Debugging, performance monitoring and tracing functionality (ptrace2, perf_event_open2 and related calls) @io-event - Event loop use (poll(), select(), - epoll7, - eventfd()...) + Event loop system calls (poll2, select2, epoll7, eventfd2 and related calls) @ipc - SysV IPC, POSIX Message Queues or other IPC (mq_overview7, - svipc7) + SysV IPC, POSIX Message Queues or other IPC (mq_overview7, svipc7) + + + @keyring + Kernel keyring access (keyctl2 and related calls) @module - Kernel module control (create_module(), init_module()...) + Kernel module control (init_module2, delete_module2 and related calls) @mount - File system mounting and unmounting (chroot(), mount()...) + File system mounting and unmounting (mount2, chroot2, and related calls) @network-io - Socket I/O (including local AF_UNIX): - socket7, - unix7 + Socket I/O (including local AF_UNIX): socket7, unix7 @obsolete - Unusual, obsolete or unimplemented (fattach(), gtty(), vm86()...) + Unusual, obsolete or unimplemented (create_module2, gtty2, …) @privileged - All system calls which need superuser capabilities (capabilities7) + All system calls which need super-user capabilities (capabilities7) @process - Process control, execution, namespaces (execve(), kill(), namespaces7...) + Process control, execution, namespaces (execve2, kill2, namespaces7, … @raw-io - Raw I/O ports (ioperm(), iopl(), pciconfig_read()...) + Raw I/O port access (ioperm2, iopl2, pciconfig_read(), … diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index 30d22d2242..8656d112b8 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -95,7 +95,31 @@ const SystemCallFilterSet syscall_filter_sets[] = { .set_name = "@clock", .value = "adjtimex\0" + "clock_adjtime\0" + "clock_settime\0" "settimeofday\0" + "stime\0" + }, { + /* CPU emulation calls */ + .set_name = "@cpu-emulation", + .value = + "modify_ldt\0" + "subpage_prot\0" + "switch_endian\0" + "vm86\0" + "vm86old\0" + }, { + /* Debugging/Performance Monitoring/Tracing */ + .set_name = "@debug", + .value = + "lookup_dcookie\0" + "perf_event_open\0" + "process_vm_readv\0" + "process_vm_writev\0" + "ptrace\0" + "rtas\0" + "s390_runtime_instr\0" + "sys_debug_setcontext\0" }, { /* Default list */ .set_name = "@default", @@ -147,11 +171,17 @@ const SystemCallFilterSet syscall_filter_sets[] = { "shmctl\0" "shmdt\0" "shmget\0" + }, { + /* Keyring */ + .set_name = "@keyring", + .value = + "add_key\0" + "keyctl\0" + "request_key\0" }, { /* Kernel module control */ .set_name = "@module", .value = - "create_module\0" "delete_module\0" "finit_module\0" "init_module\0" @@ -197,40 +227,26 @@ const SystemCallFilterSet syscall_filter_sets[] = { "_sysctl\0" "afs_syscall\0" "break\0" - "fattach\0" - "fdetach\0" + "create_module\0" "ftime\0" "get_kernel_syms\0" - "get_mempolicy\0" - "getmsg\0" "getpmsg\0" "gtty\0" - "isastream\0" "lock\0" - "madvise1\0" - "modify_ldt\0" "mpx\0" - "pciconfig_iobase\0" - "perf_event_open\0" "prof\0" "profil\0" - "putmsg\0" "putpmsg\0" "query_module\0" - "rtas\0" - "s390_runtime_instr\0" "security\0" "sgetmask\0" "ssetmask\0" "stty\0" - "subpage_prot\0" - "switch_endian\0" - "sys_debug_setcontext\0" + "sysfs\0" "tuxcall\0" "ulimit\0" "uselib\0" - "vm86\0" - "vm86old\0" + "ustat\0" "vserver\0" }, { /* Nice grab-bag of all system calls which need superuser capabilities */ @@ -242,6 +258,7 @@ const SystemCallFilterSet syscall_filter_sets[] = { "acct\0" "bdflush\0" "bpf\0" + "capset\0" "chown32\0" "chown\0" "chroot\0" @@ -268,7 +285,6 @@ const SystemCallFilterSet syscall_filter_sets[] = { "setreuid\0" "setuid32\0" "setuid\0" - "stime\0" "swapoff\0" "swapon\0" "sysctl\0" @@ -295,6 +311,7 @@ const SystemCallFilterSet syscall_filter_sets[] = { .value = "ioperm\0" "iopl\0" + "pciconfig_iobase\0" "pciconfig_read\0" "pciconfig_write\0" "s390_pci_mmio_read\0" -- cgit v1.2.3-54-g00ecf From 54a17e01de048a2275f8861b211f10d11e56407d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 10 Jun 2016 18:04:02 +0200 Subject: nspawn: lock down system call filter a bit Let's block access to the kernel keyring and a number of obsolete system calls. Also, update list of syscalls that may alter the system clock, and do raw IO access. Filter ptrace() if CAP_SYS_PTRACE is not passed to the container and acct() if CAP_SYS_PACCT is not passed. This also changes things so that kexec(), some profiling calls, the swap calls and quotactl() is never available to containers, not even if CAP_SYS_ADMIN is passed. After all we currently permit CAP_SYS_ADMIN to containers by default, but these calls should not be available, even then. --- src/nspawn/nspawn-seccomp.c | 78 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 67 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/nspawn/nspawn-seccomp.c b/src/nspawn/nspawn-seccomp.c index 2d145b68a7..54db1b47f8 100644 --- a/src/nspawn/nspawn-seccomp.c +++ b/src/nspawn/nspawn-seccomp.c @@ -44,20 +44,76 @@ static int seccomp_add_default_syscall_filter(scmp_filter_ctx ctx, uint64_t capability; int syscall_num; } blacklist[] = { - { CAP_SYS_RAWIO, SCMP_SYS(iopl) }, - { CAP_SYS_RAWIO, SCMP_SYS(ioperm) }, - { CAP_SYS_BOOT, SCMP_SYS(kexec_load) }, - { CAP_SYS_ADMIN, SCMP_SYS(swapon) }, - { CAP_SYS_ADMIN, SCMP_SYS(swapoff) }, - { CAP_SYS_ADMIN, SCMP_SYS(open_by_handle_at) }, - { CAP_SYS_MODULE, SCMP_SYS(init_module) }, - { CAP_SYS_MODULE, SCMP_SYS(finit_module) }, - { CAP_SYS_MODULE, SCMP_SYS(delete_module) }, - { CAP_SYSLOG, SCMP_SYS(syslog) }, + { 0, SCMP_SYS(_sysctl) }, /* obsolete syscall */ + { 0, SCMP_SYS(add_key) }, /* keyring is not namespaced */ + { 0, SCMP_SYS(afs_syscall) }, /* obsolete syscall */ + { 0, SCMP_SYS(bdflush) }, +#ifdef __NR_bpf + { 0, SCMP_SYS(bpf) }, +#endif + { 0, SCMP_SYS(break) }, /* obsolete syscall */ + { 0, SCMP_SYS(create_module) }, /* obsolete syscall */ + { 0, SCMP_SYS(ftime) }, /* obsolete syscall */ + { 0, SCMP_SYS(get_kernel_syms) }, /* obsolete syscall */ + { 0, SCMP_SYS(getpmsg) }, /* obsolete syscall */ + { 0, SCMP_SYS(gtty) }, /* obsolete syscall */ +#ifdef __NR_kexec_file_load + { 0, SCMP_SYS(kexec_file_load) }, +#endif + { 0, SCMP_SYS(kexec_load) }, + { 0, SCMP_SYS(keyctl) }, /* keyring is not namespaced */ + { 0, SCMP_SYS(lock) }, /* obsolete syscall */ + { 0, SCMP_SYS(lookup_dcookie) }, + { 0, SCMP_SYS(mpx) }, /* obsolete syscall */ + { 0, SCMP_SYS(nfsservctl) }, /* obsolete syscall */ + { 0, SCMP_SYS(open_by_handle_at) }, + { 0, SCMP_SYS(perf_event_open) }, + { 0, SCMP_SYS(prof) }, /* obsolete syscall */ + { 0, SCMP_SYS(profil) }, /* obsolete syscall */ + { 0, SCMP_SYS(putpmsg) }, /* obsolete syscall */ + { 0, SCMP_SYS(query_module) }, /* obsolete syscall */ + { 0, SCMP_SYS(quotactl) }, + { 0, SCMP_SYS(request_key) }, /* keyring is not namespaced */ + { 0, SCMP_SYS(security) }, /* obsolete syscall */ + { 0, SCMP_SYS(sgetmask) }, /* obsolete syscall */ + { 0, SCMP_SYS(ssetmask) }, /* obsolete syscall */ + { 0, SCMP_SYS(stty) }, /* obsolete syscall */ + { 0, SCMP_SYS(swapoff) }, + { 0, SCMP_SYS(swapon) }, + { 0, SCMP_SYS(sysfs) }, /* obsolete syscall */ + { 0, SCMP_SYS(tuxcall) }, /* obsolete syscall */ + { 0, SCMP_SYS(ulimit) }, /* obsolete syscall */ + { 0, SCMP_SYS(uselib) }, /* obsolete syscall */ + { 0, SCMP_SYS(ustat) }, /* obsolete syscall */ + { 0, SCMP_SYS(vserver) }, /* obsolete syscall */ + { CAP_SYSLOG, SCMP_SYS(syslog) }, + { CAP_SYS_MODULE, SCMP_SYS(delete_module) }, + { CAP_SYS_MODULE, SCMP_SYS(finit_module) }, + { CAP_SYS_MODULE, SCMP_SYS(init_module) }, + { CAP_SYS_PACCT, SCMP_SYS(acct) }, + { CAP_SYS_PTRACE, SCMP_SYS(process_vm_readv) }, + { CAP_SYS_PTRACE, SCMP_SYS(process_vm_writev) }, + { CAP_SYS_PTRACE, SCMP_SYS(ptrace) }, + { CAP_SYS_RAWIO, SCMP_SYS(ioperm) }, + { CAP_SYS_RAWIO, SCMP_SYS(iopl) }, + { CAP_SYS_RAWIO, SCMP_SYS(pciconfig_iobase) }, + { CAP_SYS_RAWIO, SCMP_SYS(pciconfig_read) }, + { CAP_SYS_RAWIO, SCMP_SYS(pciconfig_write) }, +#ifdef __NR_s390_pci_mmio_read + { CAP_SYS_RAWIO, SCMP_SYS(s390_pci_mmio_read) }, +#endif +#ifdef __NR_s390_pci_mmio_write + { CAP_SYS_RAWIO, SCMP_SYS(s390_pci_mmio_write) }, +#endif + { CAP_SYS_TIME, SCMP_SYS(adjtimex) }, + { CAP_SYS_TIME, SCMP_SYS(clock_adjtime) }, + { CAP_SYS_TIME, SCMP_SYS(clock_settime) }, + { CAP_SYS_TIME, SCMP_SYS(settimeofday) }, + { CAP_SYS_TIME, SCMP_SYS(stime) }, }; for (i = 0; i < ELEMENTSOF(blacklist); i++) { - if (cap_list_retain & (1ULL << blacklist[i].capability)) + if (blacklist[i].capability != 0 && (cap_list_retain & (1ULL << blacklist[i].capability))) continue; r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), blacklist[i].syscall_num, 0); -- cgit v1.2.3-54-g00ecf