summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--man/systemd.exec.xml17
-rw-r--r--src/core/dbus-execute.c5
-rw-r--r--src/core/execute.c52
-rw-r--r--src/core/execute.h1
-rw-r--r--src/core/load-fragment-gperf.gperf.m41
-rw-r--r--src/core/unit.c3
-rw-r--r--src/shared/bus-unit-util.c3
7 files changed, 80 insertions, 2 deletions
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 986985ad35..3bea4976b3 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -1405,6 +1405,23 @@
</varlistentry>
<varlistentry>
+ <term><varname>ProtectKernelModules=</varname></term>
+
+ <listitem><para>Takes a boolean argument. If true, explicit module loading will
+ be denied. This allows to turn off module load and unload operations on modular
+ kernels. It is recomended to turn this on for most services that do not need special
+ file systems or extra kernel modules to work. Default to off. Enabling this option
+ removes <constant>CAP_SYS_MODULE</constant> from the capability bounding set for
+ the unit, and installs a system call filter to block module system calls.
+ Note that limited automatic module loading due to user configuration or kernel
+ mapping tables might still happen as side effect of requested user operations,
+ both privileged and unprivileged. To disable module auto-load feature please see
+ <citerefentry><refentrytitle>sysctl.d</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+ <constant>kernel.modules_disabled</constant> mechanism and
+ <filename>/proc/sys/kernel/modules_disabled</filename> documentation.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>Personality=</varname></term>
<listitem><para>Controls which kernel architecture <citerefentry
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index eec4500c8c..b8720d7d3d 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -708,6 +708,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectKernelTunables", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_tunables), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectKernelModules", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_modules), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -1075,7 +1076,7 @@ int bus_exec_context_set_transient_property(
"PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
"NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute",
"RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
- "ProtectControlGroups")) {
+ "ProtectKernelModules", "ProtectControlGroups")) {
int b;
r = sd_bus_message_read(message, "b", &b);
@@ -1111,6 +1112,8 @@ int bus_exec_context_set_transient_property(
c->remove_ipc = b;
else if (streq(name, "ProtectKernelTunables"))
c->protect_kernel_tunables = b;
+ else if (streq(name, "ProtectKernelModules"))
+ c->protect_kernel_modules = b;
else if (streq(name, "ProtectControlGroups"))
c->protect_control_groups = b;
diff --git a/src/core/execute.c b/src/core/execute.c
index 0c983f4953..7a278b7d31 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1436,6 +1436,50 @@ finish:
return r;
}
+static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) {
+ static const int module_syscalls[] = {
+ SCMP_SYS(delete_module),
+ SCMP_SYS(finit_module),
+ SCMP_SYS(init_module),
+ };
+
+ scmp_filter_ctx *seccomp;
+ unsigned i;
+ int r;
+
+ assert(c);
+
+ /* Turn of module syscalls on ProtectKernelModules=yes */
+
+ if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
+ return 0;
+
+ seccomp = seccomp_init(SCMP_ACT_ALLOW);
+ if (!seccomp)
+ return -ENOMEM;
+
+ r = seccomp_add_secondary_archs(seccomp);
+ if (r < 0)
+ goto finish;
+
+ for (i = 0; i < ELEMENTSOF(module_syscalls); i++) {
+ r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM),
+ module_syscalls[i], 0);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+ if (r < 0)
+ goto finish;
+
+ r = seccomp_load(seccomp);
+
+finish:
+ seccomp_release(seccomp);
+ return r;
+}
+
static int apply_private_devices(Unit *u, const ExecContext *c) {
const SystemCallFilterSet *set;
scmp_filter_ctx *seccomp;
@@ -2690,6 +2734,14 @@ static int exec_child(
}
}
+ if (context->protect_kernel_modules) {
+ r = apply_protect_kernel_modules(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return r;
+ }
+ }
+
if (context->private_devices) {
r = apply_private_devices(unit, context);
if (r < 0) {
diff --git a/src/core/execute.h b/src/core/execute.h
index 449180c903..1de439c3ad 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -175,6 +175,7 @@ struct ExecContext {
ProtectSystem protect_system;
ProtectHome protect_home;
bool protect_kernel_tunables;
+ bool protect_kernel_modules;
bool protect_control_groups;
bool no_new_privileges;
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index c49c1d6732..a700d853cc 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -90,6 +90,7 @@ $1.InaccessiblePaths, config_parse_namespace_path_strv, 0,
$1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp)
$1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices)
$1.ProtectKernelTunables, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_tunables)
+$1.ProtectKernelModules, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_modules)
$1.ProtectControlGroups, config_parse_bool, 0, offsetof($1, exec_context.protect_control_groups)
$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
$1.PrivateUsers, config_parse_bool, 0, offsetof($1, exec_context.private_users)
diff --git a/src/core/unit.c b/src/core/unit.c
index 690f7f7dd9..71f95c0b96 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -3401,6 +3401,9 @@ int unit_patch_contexts(Unit *u) {
if (ec->private_devices)
ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_MKNOD);
+ if (ec->protect_kernel_modules)
+ ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_SYS_MODULE);
+
if (ec->dynamic_user) {
if (!ec->user) {
r = user_from_unit_name(u, &ec->user);
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index a550a370b5..f639e0e832 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -204,7 +204,8 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
"IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "RemainAfterExit",
"PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", "NoNewPrivileges",
"SyslogLevelPrefix", "Delegate", "RemainAfterElapse", "MemoryDenyWriteExecute",
- "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables", "ProtectControlGroups")) {
+ "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
+ "ProtectKernelModules", "ProtectControlGroups")) {
r = parse_boolean(eq);
if (r < 0)