From c79aff9a82abf361aea47b5c745ed9729c5f0212 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 25 Oct 2016 15:38:36 +0200 Subject: seccomp: add clock query and sleeping syscalls to "@default" group Timing and sleep are so basic operations, it makes very little sense to ever block them, hence don't. --- man/systemd.exec.xml | 38 ++++++++++++++------------------------ src/shared/seccomp-util.c | 9 ++++++++- 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 11029ca186..e7d8bb23a4 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -1255,30 +1255,20 @@ SystemCallFilter= - Takes a space-separated list of system call - names. If this setting is used, all system calls executed by - the unit processes except for the listed ones will result in - immediate process termination with the - SIGSYS signal (whitelisting). If the - first character of the list is ~, the - effect is inverted: only the listed system calls will result - in immediate process termination (blacklisting). If running in - user mode, or in system mode, but without the - CAP_SYS_ADMIN capability (e.g. setting - User=nobody), - NoNewPrivileges=yes is implied. This - feature makes use of the Secure Computing Mode 2 interfaces of - the kernel ('seccomp filtering') and is useful for enforcing a - minimal sandboxing environment. Note that the - execve, - rt_sigreturn, - sigreturn, - exit_group, exit - system calls are implicitly whitelisted and do not need to be - listed explicitly. This option may be specified more than once, - in which case the filter masks are merged. If the empty string - is assigned, the filter is reset, all prior assignments will - have no effect. This does not affect commands prefixed with +. + Takes a space-separated list of system call names. If this setting is used, all system calls + executed by the unit processes except for the listed ones will result in immediate process termination with the + SIGSYS signal (whitelisting). If the first character of the list is ~, + the effect is inverted: only the listed system calls will result in immediate process termination + (blacklisting). If running in user mode, or in system mode, but without the CAP_SYS_ADMIN + capability (e.g. setting User=nobody), NoNewPrivileges=yes is + implied. This feature makes use of the Secure Computing Mode 2 interfaces of the kernel ('seccomp filtering') + and is useful for enforcing a minimal sandboxing environment. Note that the execve, + exit, exit_group, getrlimit, + rt_sigreturn, sigreturn system calls and the system calls for + querying time and sleeping are implicitly whitelisted and do not need to be listed explicitly. This option may + be specified more than once, in which case the filter masks are merged. If the empty string is assigned, the + filter is reset, all prior assignments will have no effect. This does not affect commands prefixed with + +. If you specify both types of this option (i.e. whitelisting and blacklisting), the first encountered will diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index 1cbbb9d757..ad5782fb29 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -253,15 +253,22 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { "sys_debug_setcontext\0" }, [SYSCALL_FILTER_SET_DEFAULT] = { - /* Default list */ + /* Default list: the most basic of operations */ .name = "@default", .value = + "clock_getres\0" + "clock_gettime\0" + "clock_nanosleep\0" "execve\0" "exit\0" "exit_group\0" "getrlimit\0" /* make sure processes can query stack size and such */ + "gettimeofday\0" + "nanosleep\0" + "pause\0" "rt_sigreturn\0" "sigreturn\0" + "time\0" }, [SYSCALL_FILTER_SET_IO_EVENT] = { /* Event loop use */ -- cgit v1.2.3-54-g00ecf