summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDjalal Harouni <tixxdz@opendz.org>2016-09-25 12:52:27 +0200
committerDjalal Harouni <tixxdz@opendz.org>2016-09-25 12:52:27 +0200
commit8f81a5f61bcf745bae3acad599d7a9da686643e3 (patch)
tree6da524c69d7f9323efaa6db955741298872954e7
parentb6c432ca7ed930c7e9078ac2266ae439aa242632 (diff)
core: Use @raw-io syscall group to filter I/O syscalls when PrivateDevices= is set
Instead of having a local syscall list, use the @raw-io group which contains the same set of syscalls to filter.
-rw-r--r--man/systemd.exec.xml6
-rw-r--r--src/core/execute.c55
2 files changed, 39 insertions, 22 deletions
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index f19e7f6ee9..f70e5c36d4 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -933,8 +933,10 @@
<filename>/dev/random</filename> (as well as the pseudo TTY subsystem) to it, but no physical devices such as
<filename>/dev/sda</filename>, system memory <filename>/dev/mem</filename>, system ports
<filename>/dev/port</filename> and others. This is useful to securely turn off physical device access by the
- executed process. Defaults to false. Enabling this option will also remove <constant>CAP_MKNOD</constant> from
- the capability bounding set for the unit (see above), and set <varname>DevicePolicy=closed</varname> (see
+ executed process. Defaults to false. Enabling this option will install a system call filter to block low-level
+ I/O system calls that are grouped in the <varname>@raw-io</varname> set, will also remove
+ <constant>CAP_MKNOD</constant> from the capability bounding set for the unit (see above), and set
+ <varname>DevicePolicy=closed</varname> (see
<citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
for details). Note that using this setting will disconnect propagation of mounts from the service to the host
(propagation in the opposite direction continues to work). This means that this setting may not be used for
diff --git a/src/core/execute.c b/src/core/execute.c
index 0488ba2ca9..3da7ef3be6 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1429,28 +1429,15 @@ finish:
}
static int apply_private_devices(Unit *u, const ExecContext *c) {
-
- static const int device_syscalls[] = {
- SCMP_SYS(ioperm),
- SCMP_SYS(iopl),
- SCMP_SYS(pciconfig_iobase),
- SCMP_SYS(pciconfig_read),
- SCMP_SYS(pciconfig_write),
-#ifdef __NR_s390_pci_mmio_read
- SCMP_SYS(s390_pci_mmio_read),
-#endif
-#ifdef __NR_s390_pci_mmio_write
- SCMP_SYS(s390_pci_mmio_write),
-#endif
- };
-
+ const SystemCallFilterSet *set;
scmp_filter_ctx *seccomp;
- unsigned i;
+ const char *sys;
+ bool syscalls_found = false;
int r;
assert(c);
- /* If PrivateDevices= is set, also turn off iopl and friends. */
+ /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
if (skip_seccomp_unavailable(u, "PrivateDevices="))
return 0;
@@ -1463,12 +1450,40 @@ static int apply_private_devices(Unit *u, const ExecContext *c) {
if (r < 0)
goto finish;
- for (i = 0; i < ELEMENTSOF(device_syscalls); i++) {
+ for (set = syscall_filter_sets; set->set_name; set++)
+ if (streq(set->set_name, "@raw-io")) {
+ syscalls_found = true;
+ break;
+ }
+
+ /* We should never fail here */
+ if (!syscalls_found) {
+ r = -EOPNOTSUPP;
+ goto finish;
+ }
+
+ NULSTR_FOREACH(sys, set->value) {
+ int id;
+ bool add = true;
+
+#ifndef __NR_s390_pci_mmio_read
+ if (streq(sys, "s390_pci_mmio_read"))
+ add = false;
+#endif
+#ifndef __NR_s390_pci_mmio_write
+ if (streq(sys, "s390_pci_mmio_write"))
+ add = false;
+#endif
+
+ if (!add)
+ continue;
+
+ id = seccomp_syscall_resolve_name(sys);
+
r = seccomp_rule_add(
seccomp,
SCMP_ACT_ERRNO(EPERM),
- device_syscalls[i],
- 0);
+ id, 0);
if (r < 0)
goto finish;
}