From c79aff9a82abf361aea47b5c745ed9729c5f0212 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 25 Oct 2016 15:38:36 +0200 Subject: seccomp: add clock query and sleeping syscalls to "@default" group Timing and sleep are so basic operations, it makes very little sense to ever block them, hence don't. --- src/shared/seccomp-util.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/shared') diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index 1cbbb9d757..ad5782fb29 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -253,15 +253,22 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { "sys_debug_setcontext\0" }, [SYSCALL_FILTER_SET_DEFAULT] = { - /* Default list */ + /* Default list: the most basic of operations */ .name = "@default", .value = + "clock_getres\0" + "clock_gettime\0" + "clock_nanosleep\0" "execve\0" "exit\0" "exit_group\0" "getrlimit\0" /* make sure processes can query stack size and such */ + "gettimeofday\0" + "nanosleep\0" + "pause\0" "rt_sigreturn\0" "sigreturn\0" + "time\0" }, [SYSCALL_FILTER_SET_IO_EVENT] = { /* Event loop use */ -- cgit v1.2.3-54-g00ecf From a8c157ff3081ee963adb0d046015abf9a271fa67 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 25 Oct 2016 15:42:10 +0200 Subject: seccomp: drop execve() from @process list The system call is already part in @default hence implicitly allowed anyway. Also, if it is actually blocked then systemd couldn't execute the service in question anymore, since the application of seccomp is immediately followed by it. --- man/systemd.exec.xml | 2 +- src/shared/seccomp-util.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'src/shared') diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index e7d8bb23a4..d45e5362dc 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -1347,7 +1347,7 @@ @process - Process control, execution, namespaces (execve2, kill2, namespaces7, … + Process control, execution, namespaces (clone2, kill2, namespaces7, … @raw-io diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index ad5782fb29..70723e9e4e 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -443,7 +443,6 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { .value = "arch_prctl\0" "clone\0" - "execve\0" "execveat\0" "fork\0" "kill\0" -- cgit v1.2.3-54-g00ecf From cd5bfd7e60c08cfad41bcf881f550c424b2f3e44 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 25 Oct 2016 15:43:31 +0200 Subject: seccomp: include pipes and memfd in @ipc These system calls clearly fall in the @ipc category, hence should be listed there, simply to avoid confusion and surprise by the user. --- man/systemd.exec.xml | 2 +- src/shared/seccomp-util.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'src/shared') diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index d45e5362dc..466511aaf3 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -1319,7 +1319,7 @@ @ipc - SysV IPC, POSIX Message Queues or other IPC (mq_overview7, svipc7) + Pipes, SysV IPC, POSIX Message Queues and other IPC (mq_overview7, svipc7) @keyring diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index 70723e9e4e..e0a61aa358 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -290,9 +290,10 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { "select\0" }, [SYSCALL_FILTER_SET_IPC] = { - /* Message queues, SYSV IPC or other IPC: unusual */ + /* Message queues, SYSV IPC or other IPC */ .name = "@ipc", .value = "ipc\0" + "memfd_create\0" "mq_getsetattr\0" "mq_notify\0" "mq_open\0" @@ -303,6 +304,8 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { "msgget\0" "msgrcv\0" "msgsnd\0" + "pipe2\0" + "pipe\0" "process_vm_readv\0" "process_vm_writev\0" "semctl\0" -- cgit v1.2.3-54-g00ecf From 133ddbbeae74fc06173633605b3e612e934bc2dd Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 2 Nov 2016 08:46:18 -0600 Subject: seccomp: add two new syscall groups @resources contains various syscalls that alter resource limits and memory and scheduling parameters of processes. As such they are good candidates to block for most services. @basic-io contains a number of basic syscalls for I/O, similar to the list seccomp v1 permitted but slightly more complete. It should be useful for building basic whitelisting for minimal sandboxes --- man/systemd.exec.xml | 8 ++++++++ src/shared/seccomp-util.c | 34 ++++++++++++++++++++++++++++++++++ src/shared/seccomp-util.h | 2 ++ 3 files changed, 44 insertions(+) (limited to 'src/shared') diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 3b80bcccd0..7daa3ae78e 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -1301,6 +1301,10 @@ + + @basic-io + System calls for basic I/O: reading, writing, seeking, file descriptor duplication and closing (read2, write2, and related calls) + @clock System calls for changing the system clock (adjtimex2, settimeofday2, and related calls) @@ -1353,6 +1357,10 @@ @raw-io Raw I/O port access (ioperm2, iopl2, pciconfig_read(), …) + + @resources + System calls for changing resource limits, memory and scheduling parameters (setrlimit2, setpriority2, …) + diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index e0a61aa358..c9b24f1065 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -217,6 +217,24 @@ bool is_seccomp_available(void) { } const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { + [SYSCALL_FILTER_SET_BASIC_IO] = { + /* Basic IO */ + .name = "@basic-io", + .value = + "close\0" + "dup2\0" + "dup3\0" + "dup\0" + "lseek\0" + "pread64\0" + "preadv\0" + "pwrite64\0" + "pwritev\0" + "read\0" + "readv\0" + "write\0" + "writev\0" + }, [SYSCALL_FILTER_SET_CLOCK] = { /* Clock */ .name = "@clock", @@ -472,6 +490,22 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { "s390_pci_mmio_write\0" #endif }, + [SYSCALL_FILTER_SET_RESOURCES] = { + /* Alter resource settings */ + .name = "@resources", + .value = + "sched_setparam\0" + "sched_setscheduler\0" + "sched_setaffinity\0" + "setpriority\0" + "setrlimit\0" + "set_mempolicy\0" + "migrate_pages\0" + "move_pages\0" + "mbind\0" + "sched_setattr\0" + "prlimit64\0" + }, }; const SyscallFilterSet *syscall_filter_set_find(const char *name) { diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h index 8050fc6fbf..8e209efef2 100644 --- a/src/shared/seccomp-util.h +++ b/src/shared/seccomp-util.h @@ -38,6 +38,7 @@ typedef struct SyscallFilterSet { } SyscallFilterSet; enum { + SYSCALL_FILTER_SET_BASIC_IO, SYSCALL_FILTER_SET_CLOCK, SYSCALL_FILTER_SET_CPU_EMULATION, SYSCALL_FILTER_SET_DEBUG, @@ -52,6 +53,7 @@ enum { SYSCALL_FILTER_SET_PRIVILEGED, SYSCALL_FILTER_SET_PROCESS, SYSCALL_FILTER_SET_RAW_IO, + SYSCALL_FILTER_SET_RESOURCES, _SYSCALL_FILTER_SET_MAX }; -- cgit v1.2.3-54-g00ecf