summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--man/systemd.exec.xml73
-rw-r--r--src/core/load-fragment.c95
-rw-r--r--src/shared/seccomp-util.c216
-rw-r--r--src/shared/seccomp-util.h7
4 files changed, 350 insertions, 41 deletions
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 4d52982b64..58f18f3a9e 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -1193,7 +1193,78 @@
<function>read</function> and <function>write</function>, and
right after it add a blacklisting of
<function>write</function>, then <function>write</function>
- will be removed from the set.) </para></listitem>
+ will be removed from the set.)</para>
+
+ <para>As the number of possible system
+ calls is large, predefined sets of system calls are provided.
+ A set starts with <literal>@</literal> character, followed by
+ name of the set.
+
+ <table>
+ <title>Currently predefined system call sets</title>
+
+ <tgroup cols='2'>
+ <colspec colname='set' />
+ <colspec colname='description' />
+ <thead>
+ <row>
+ <entry>Set</entry>
+ <entry>Description</entry>
+ </row>
+ </thead>
+ <tbody>
+ <row>
+ <entry>@clock</entry>
+ <entry>System calls for changing the system clock (<function>adjtimex()</function>,
+ <function>settimeofday()</function>)</entry>
+ </row>
+ <row>
+ <entry>@io-event</entry>
+ <entry>Event loop use (<function>poll()</function>, <function>select()</function>,
+ <citerefentry project='man-pages'><refentrytitle>epoll</refentrytitle><manvolnum>7</manvolnum></citerefentry>,
+ <function>eventfd()</function>...)</entry>
+ </row>
+ <row>
+ <entry>@ipc</entry>
+ <entry>SysV IPC, POSIX Message Queues or other IPC (<citerefentry project='man-pages'><refentrytitle>mq_overview</refentrytitle><manvolnum>7</manvolnum></citerefentry>,
+ <citerefentry project='man-pages'><refentrytitle>svipc</refentrytitle><manvolnum>7</manvolnum></citerefentry>)</entry>
+ </row>
+ <row>
+ <entry>@module</entry>
+ <entry>Kernel module control (<function>create_module()</function>, <function>init_module()</function>...)</entry>
+ </row>
+ <row>
+ <entry>@mount</entry>
+ <entry>File system mounting and unmounting (<function>chroot()</function>, <function>mount()</function>...)</entry>
+ </row>
+ <row>
+ <entry>@network-io</entry>
+ <entry>Socket I/O (including local AF_UNIX):
+ <citerefentry project='man-pages'><refentrytitle>socket</refentrytitle><manvolnum>7</manvolnum></citerefentry>,
+ <citerefentry project='man-pages'><refentrytitle>unix</refentrytitle><manvolnum>7</manvolnum></citerefentry></entry>
+ </row>
+ <row>
+ <entry>@obsolete</entry>
+ <entry>Unusual, obsolete or unimplemented (<function>fattach()</function>, <function>gtty()</function>, <function>vm86()</function>...)</entry>
+ </row>
+ <row>
+ <entry>@privileged</entry>
+ <entry>All system calls which need superuser capabilities (<citerefentry project='man-pages'><refentrytitle>capabilities</refentrytitle><manvolnum>7</manvolnum></citerefentry>)</entry>
+ </row>
+ <row>
+ <entry>@process</entry>
+ <entry>Process control, execution, namespaces (<function>execve()</function>, <function>kill()</function>, <citerefentry project='man-pages'><refentrytitle>namespaces</refentrytitle><manvolnum>7</manvolnum></citerefentry>...)</entry>
+ </row>
+ <row>
+ <entry>@raw-io</entry>
+ <entry>Raw I/O ports (<function>ioperm()</function>, <function>iopl()</function>, <function>pciconfig_read()</function>...)</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ Note, that as new system calls are added to the kernel, additional system calls might be added to the groups
+ above, so the contents of the sets may change between systemd versions.</para></listitem>
</varlistentry>
<varlistentry>
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 09d3f65c77..50ff718aab 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -2396,6 +2396,55 @@ int config_parse_documentation(const char *unit,
}
#ifdef HAVE_SECCOMP
+static int syscall_filter_parse_one(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ ExecContext *c,
+ bool invert,
+ const char *t,
+ bool warn) {
+ int r;
+
+ if (*t == '@') {
+ const SystemCallFilterSet *set;
+
+ for (set = syscall_filter_sets; set->set_name; set++)
+ if (streq(set->set_name, t)) {
+ const char *sys;
+
+ NULSTR_FOREACH(sys, set->value) {
+ r = syscall_filter_parse_one(unit, filename, line, c, invert, sys, false);
+ if (r < 0)
+ return r;
+ }
+ break;
+ }
+ } else {
+ int id;
+
+ id = seccomp_syscall_resolve_name(t);
+ if (id < 0) {
+ if (warn)
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t);
+ return 0;
+ }
+
+ /* If we previously wanted to forbid a syscall and now
+ * we want to allow it, then remove it from the list
+ */
+ if (!invert == c->syscall_whitelist) {
+ r = set_put(c->syscall_filter, INT_TO_PTR(id + 1));
+ if (r == 0)
+ return 0;
+ if (r < 0)
+ return log_oom();
+ } else
+ set_remove(c->syscall_filter, INT_TO_PTR(id + 1));
+ }
+ return 0;
+}
+
int config_parse_syscall_filter(
const char *unit,
const char *filename,
@@ -2408,13 +2457,6 @@ int config_parse_syscall_filter(
void *data,
void *userdata) {
- static const char default_syscalls[] =
- "execve\0"
- "exit\0"
- "exit_group\0"
- "rt_sigreturn\0"
- "sigreturn\0";
-
ExecContext *c = data;
Unit *u = userdata;
bool invert = false;
@@ -2448,53 +2490,26 @@ int config_parse_syscall_filter(
/* Allow everything but the ones listed */
c->syscall_whitelist = false;
else {
- const char *i;
-
/* Allow nothing but the ones listed */
c->syscall_whitelist = true;
/* Accept default syscalls if we are on a whitelist */
- NULSTR_FOREACH(i, default_syscalls) {
- int id;
-
- id = seccomp_syscall_resolve_name(i);
- if (id < 0)
- continue;
-
- r = set_put(c->syscall_filter, INT_TO_PTR(id + 1));
- if (r == 0)
- continue;
- if (r < 0)
- return log_oom();
- }
+ r = syscall_filter_parse_one(unit, filename, line, c, false, "@default", false);
+ if (r < 0)
+ return r;
}
}
FOREACH_WORD_QUOTED(word, l, rvalue, state) {
_cleanup_free_ char *t = NULL;
- int id;
t = strndup(word, l);
if (!t)
return log_oom();
- id = seccomp_syscall_resolve_name(t);
- if (id < 0) {
- log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t);
- continue;
- }
-
- /* If we previously wanted to forbid a syscall and now
- * we want to allow it, then remove it from the list
- */
- if (!invert == c->syscall_whitelist) {
- r = set_put(c->syscall_filter, INT_TO_PTR(id + 1));
- if (r == 0)
- continue;
- if (r < 0)
- return log_oom();
- } else
- set_remove(c->syscall_filter, INT_TO_PTR(id + 1));
+ r = syscall_filter_parse_one(unit, filename, line, c, invert, t, true);
+ if (r < 0)
+ return r;
}
if (!isempty(state))
log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring.");
diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c
index cebe0fce2a..30d22d2242 100644
--- a/src/shared/seccomp-util.c
+++ b/src/shared/seccomp-util.c
@@ -88,3 +88,219 @@ int seccomp_add_secondary_archs(scmp_filter_ctx *c) {
return 0;
}
+
+const SystemCallFilterSet syscall_filter_sets[] = {
+ {
+ /* Clock */
+ .set_name = "@clock",
+ .value =
+ "adjtimex\0"
+ "settimeofday\0"
+ }, {
+ /* Default list */
+ .set_name = "@default",
+ .value =
+ "execve\0"
+ "exit\0"
+ "exit_group\0"
+ "rt_sigreturn\0"
+ "sigreturn\0"
+ }, {
+ /* Event loop use */
+ .set_name = "@io-event",
+ .value =
+ "_newselect\0"
+ "epoll_create1\0"
+ "epoll_create\0"
+ "epoll_ctl\0"
+ "epoll_ctl_old\0"
+ "epoll_pwait\0"
+ "epoll_wait\0"
+ "epoll_wait_old\0"
+ "eventfd2\0"
+ "eventfd\0"
+ "poll\0"
+ "ppoll\0"
+ "pselect6\0"
+ "select\0"
+ }, {
+ /* Message queues, SYSV IPC or other IPC: unusual */
+ .set_name = "@ipc",
+ .value = "ipc\0"
+ "mq_getsetattr\0"
+ "mq_notify\0"
+ "mq_open\0"
+ "mq_timedreceive\0"
+ "mq_timedsend\0"
+ "mq_unlink\0"
+ "msgctl\0"
+ "msgget\0"
+ "msgrcv\0"
+ "msgsnd\0"
+ "process_vm_readv\0"
+ "process_vm_writev\0"
+ "semctl\0"
+ "semget\0"
+ "semop\0"
+ "semtimedop\0"
+ "shmat\0"
+ "shmctl\0"
+ "shmdt\0"
+ "shmget\0"
+ }, {
+ /* Kernel module control */
+ .set_name = "@module",
+ .value =
+ "create_module\0"
+ "delete_module\0"
+ "finit_module\0"
+ "init_module\0"
+ }, {
+ /* Mounting */
+ .set_name = "@mount",
+ .value =
+ "chroot\0"
+ "mount\0"
+ "oldumount\0"
+ "pivot_root\0"
+ "umount2\0"
+ "umount\0"
+ }, {
+ /* Network or Unix socket IO, should not be needed if not network facing */
+ .set_name = "@network-io",
+ .value =
+ "accept4\0"
+ "accept\0"
+ "bind\0"
+ "connect\0"
+ "getpeername\0"
+ "getsockname\0"
+ "getsockopt\0"
+ "listen\0"
+ "recv\0"
+ "recvfrom\0"
+ "recvmmsg\0"
+ "recvmsg\0"
+ "send\0"
+ "sendmmsg\0"
+ "sendmsg\0"
+ "sendto\0"
+ "setsockopt\0"
+ "shutdown\0"
+ "socket\0"
+ "socketcall\0"
+ "socketpair\0"
+ }, {
+ /* Unusual, obsolete or unimplemented, some unknown even to libseccomp */
+ .set_name = "@obsolete",
+ .value =
+ "_sysctl\0"
+ "afs_syscall\0"
+ "break\0"
+ "fattach\0"
+ "fdetach\0"
+ "ftime\0"
+ "get_kernel_syms\0"
+ "get_mempolicy\0"
+ "getmsg\0"
+ "getpmsg\0"
+ "gtty\0"
+ "isastream\0"
+ "lock\0"
+ "madvise1\0"
+ "modify_ldt\0"
+ "mpx\0"
+ "pciconfig_iobase\0"
+ "perf_event_open\0"
+ "prof\0"
+ "profil\0"
+ "putmsg\0"
+ "putpmsg\0"
+ "query_module\0"
+ "rtas\0"
+ "s390_runtime_instr\0"
+ "security\0"
+ "sgetmask\0"
+ "ssetmask\0"
+ "stty\0"
+ "subpage_prot\0"
+ "switch_endian\0"
+ "sys_debug_setcontext\0"
+ "tuxcall\0"
+ "ulimit\0"
+ "uselib\0"
+ "vm86\0"
+ "vm86old\0"
+ "vserver\0"
+ }, {
+ /* Nice grab-bag of all system calls which need superuser capabilities */
+ .set_name = "@privileged",
+ .value =
+ "@clock\0"
+ "@module\0"
+ "@raw-io\0"
+ "acct\0"
+ "bdflush\0"
+ "bpf\0"
+ "chown32\0"
+ "chown\0"
+ "chroot\0"
+ "fchown32\0"
+ "fchown\0"
+ "fchownat\0"
+ "kexec_file_load\0"
+ "kexec_load\0"
+ "lchown32\0"
+ "lchown\0"
+ "nfsservctl\0"
+ "pivot_root\0"
+ "quotactl\0"
+ "reboot\0"
+ "setdomainname\0"
+ "setfsuid32\0"
+ "setfsuid\0"
+ "setgroups32\0"
+ "setgroups\0"
+ "sethostname\0"
+ "setresuid32\0"
+ "setresuid\0"
+ "setreuid32\0"
+ "setreuid\0"
+ "setuid32\0"
+ "setuid\0"
+ "stime\0"
+ "swapoff\0"
+ "swapon\0"
+ "sysctl\0"
+ "vhangup\0"
+ }, {
+ /* Process control, execution, namespaces */
+ .set_name = "@process",
+ .value =
+ "arch_prctl\0"
+ "clone\0"
+ "execve\0"
+ "execveat\0"
+ "fork\0"
+ "kill\0"
+ "prctl\0"
+ "setns\0"
+ "tgkill\0"
+ "tkill\0"
+ "unshare\0"
+ "vfork\0"
+ }, {
+ /* Raw I/O ports */
+ .set_name = "@raw-io",
+ .value =
+ "ioperm\0"
+ "iopl\0"
+ "pciconfig_read\0"
+ "pciconfig_write\0"
+ "s390_pci_mmio_read\0"
+ "s390_pci_mmio_write\0"
+ }, {
+ .set_name = NULL,
+ .value = NULL
+ }
+};
diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h
index 4ed2afc1b2..be33eecb85 100644
--- a/src/shared/seccomp-util.h
+++ b/src/shared/seccomp-util.h
@@ -26,3 +26,10 @@ const char* seccomp_arch_to_string(uint32_t c);
int seccomp_arch_from_string(const char *n, uint32_t *ret);
int seccomp_add_secondary_archs(scmp_filter_ctx *c);
+
+typedef struct SystemCallFilterSet {
+ const char *set_name;
+ const char *value;
+} SystemCallFilterSet;
+
+extern const SystemCallFilterSet syscall_filter_sets[];