From 3ae33295f00be5e2836f009bf1991b0caddf80b7 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Fri, 7 Oct 2016 19:17:34 +0200 Subject: test: add capability tests for ProtectKernelModules= This just adds capabilities test. --- src/test/test-execute.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/test') diff --git a/src/test/test-execute.c b/src/test/test-execute.c index 8b4ff22495..f7d38fb0f3 100644 --- a/src/test/test-execute.c +++ b/src/test/test-execute.c @@ -142,6 +142,16 @@ static void test_exec_privatedevices_capabilities(Manager *m) { test(m, "exec-privatedevices-no-capability-mknod.service", 0, CLD_EXITED); } +static void test_exec_protectkernelmodules_capabilities(Manager *m) { + if (detect_container() > 0) { + log_notice("testing in container, skipping protectkernelmodules tests"); + return; + } + + test(m, "exec-protectkernelmodules-no-capabilities.service", 0, CLD_EXITED); + test(m, "exec-protectkernelmodules-yes-capabilities.service", 0, CLD_EXITED); +} + static void test_exec_readonlypaths(Manager *m) { test(m, "exec-readonlypaths.service", 0, CLD_EXITED); test(m, "exec-readonlypaths-mount-propagation.service", 0, CLD_EXITED); @@ -368,6 +378,7 @@ int main(int argc, char *argv[]) { test_exec_privatetmp, test_exec_privatedevices, test_exec_privatedevices_capabilities, + test_exec_protectkernelmodules_capabilities, test_exec_readonlypaths, test_exec_readwritepaths, test_exec_inaccessiblepaths, -- cgit v1.2.3-54-g00ecf From 625d8769fa6394a302b024eaee45043e6eb0c87a Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Fri, 7 Oct 2016 20:41:38 +0200 Subject: test: add test to make sure that CAP_SYS_RAWIO was removed on PrivateDevices=yes --- src/test/test-execute.c | 2 ++ .../exec-privatedevices-no-capability-sys-rawio.service | 7 +++++++ .../exec-privatedevices-yes-capability-sys-rawio.service | 7 +++++++ 3 files changed, 16 insertions(+) create mode 100644 test/test-execute/exec-privatedevices-no-capability-sys-rawio.service create mode 100644 test/test-execute/exec-privatedevices-yes-capability-sys-rawio.service (limited to 'src/test') diff --git a/src/test/test-execute.c b/src/test/test-execute.c index f7d38fb0f3..1eade98ed3 100644 --- a/src/test/test-execute.c +++ b/src/test/test-execute.c @@ -140,6 +140,8 @@ static void test_exec_privatedevices_capabilities(Manager *m) { } test(m, "exec-privatedevices-yes-capability-mknod.service", 0, CLD_EXITED); test(m, "exec-privatedevices-no-capability-mknod.service", 0, CLD_EXITED); + test(m, "exec-privatedevices-yes-capability-sys-rawio.service", 0, CLD_EXITED); + test(m, "exec-privatedevices-no-capability-sys-rawio.service", 0, CLD_EXITED); } static void test_exec_protectkernelmodules_capabilities(Manager *m) { diff --git a/test/test-execute/exec-privatedevices-no-capability-sys-rawio.service b/test/test-execute/exec-privatedevices-no-capability-sys-rawio.service new file mode 100644 index 0000000000..e7f529c44c --- /dev/null +++ b/test/test-execute/exec-privatedevices-no-capability-sys-rawio.service @@ -0,0 +1,7 @@ +[Unit] +Description=Test CAP_SYS_RAWIO capability for PrivateDevices=no + +[Service] +PrivateDevices=no +ExecStart=/bin/sh -x -c 'capsh --print | grep cap_sys_rawio' +Type=oneshot diff --git a/test/test-execute/exec-privatedevices-yes-capability-sys-rawio.service b/test/test-execute/exec-privatedevices-yes-capability-sys-rawio.service new file mode 100644 index 0000000000..cebc493a7a --- /dev/null +++ b/test/test-execute/exec-privatedevices-yes-capability-sys-rawio.service @@ -0,0 +1,7 @@ +[Unit] +Description=Test CAP_SYS_RAWIO capability for PrivateDevices=yes + +[Service] +PrivateDevices=yes +ExecStart=/bin/sh -x -c '! capsh --print | grep cap_sys_rawio' +Type=oneshot -- cgit v1.2.3-54-g00ecf From c575770b75b6cd15684fbacd249147bf5fd6ead7 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Wed, 12 Oct 2016 14:11:16 +0200 Subject: core:sandbox: lets make /lib/modules/ inaccessible on ProtectKernelModules= Lets go further and make /lib/modules/ inaccessible for services that do not have business with modules, this is a minor improvment but it may help on setups with custom modules and they are limited... in regard of kernel auto-load feature. This change introduce NameSpaceInfo struct which we may embed later inside ExecContext but for now lets just reduce the argument number to setup_namespace() and merge ProtectKernelModules feature. --- man/systemd.exec.xml | 5 ++++- src/core/execute.c | 11 ++++++++--- src/core/namespace.c | 54 +++++++++++++++++++++++++++++++++++----------------- src/core/namespace.h | 14 +++++++++++--- src/test/test-ns.c | 12 +++++++++--- 5 files changed, 69 insertions(+), 27 deletions(-) (limited to 'src/test') diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 4a68695348..249fcb0363 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -1415,7 +1415,10 @@ kernels. It is recomended to turn this on for most services that do not need special file systems or extra kernel modules to work. Default to off. Enabling this option removes CAP_SYS_MODULE from the capability bounding set for - the unit, and installs a system call filter to block module system calls. + the unit, and installs a system call filter to block module system calls, + also /usr/lib/modules is made inaccessible. For this + setting the same restrictions regarding mount propagation and privileges + apply as for ReadOnlyPaths= and related calls, see above. Note that limited automatic module loading due to user configuration or kernel mapping tables might still happen as side effect of requested user operations, both privileged and unprivileged. To disable module auto-load feature please see diff --git a/src/core/execute.c b/src/core/execute.c index 7a278b7d31..dc078d96f0 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1766,6 +1766,7 @@ static bool exec_needs_mount_namespace( context->protect_system != PROTECT_SYSTEM_NO || context->protect_home != PROTECT_HOME_NO || context->protect_kernel_tunables || + context->protect_kernel_modules || context->protect_control_groups) return true; @@ -2493,6 +2494,12 @@ static int exec_child( if (needs_mount_namespace) { _cleanup_free_ char **rw = NULL; char *tmp = NULL, *var = NULL; + NameSpaceInfo ns_info = { + .private_dev = context->private_devices, + .protect_control_groups = context->protect_control_groups, + .protect_kernel_tunables = context->protect_kernel_tunables, + .protect_kernel_modules = context->protect_kernel_modules, + }; /* The runtime struct only contains the parent * of the private /tmp, which is @@ -2515,14 +2522,12 @@ static int exec_child( r = setup_namespace( (params->flags & EXEC_APPLY_CHROOT) ? context->root_directory : NULL, + &ns_info, rw, context->read_only_paths, context->inaccessible_paths, tmp, var, - context->private_devices, - context->protect_kernel_tunables, - context->protect_control_groups, context->protect_home, context->protect_system, context->mount_flags); diff --git a/src/core/namespace.c b/src/core/namespace.c index 43a2f4ba6e..1195e9a854 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -97,6 +97,14 @@ static const TargetMount protect_kernel_tunables_table[] = { { "/sys/fs/cgroup", READWRITE, false }, /* READONLY is set by ProtectControlGroups= option */ }; +/* ProtectKernelModules= option */ +static const TargetMount protect_kernel_modules_table[] = { +#ifdef HAVE_SPLIT_USR + { "/lib/modules", INACCESSIBLE, true }, +#endif + { "/usr/lib/modules", INACCESSIBLE, true }, +}; + /* * ProtectHome=read-only table, protect $HOME and $XDG_RUNTIME_DIR and rest of * system should be protected by ProtectSystem= @@ -207,6 +215,13 @@ static int append_protect_kernel_tunables(BindMount **p, const char *root_direct ELEMENTSOF(protect_kernel_tunables_table)); } +static int append_protect_kernel_modules(BindMount **p, const char *root_directory) { + assert(p); + + return append_target_mounts(p, root_directory, protect_kernel_modules_table, + ELEMENTSOF(protect_kernel_modules_table)); +} + static int append_protect_home(BindMount **p, const char *root_directory, ProtectHome protect_home) { int r = 0; @@ -660,14 +675,12 @@ static int chase_all_symlinks(const char *root_directory, BindMount *m, unsigned } static unsigned namespace_calculate_mounts( + const NameSpaceInfo *ns_info, char** read_write_paths, char** read_only_paths, char** inaccessible_paths, const char* tmp_dir, const char* var_tmp_dir, - bool private_dev, - bool protect_sysctl, - bool protect_cgroups, ProtectHome protect_home, ProtectSystem protect_system) { @@ -690,22 +703,21 @@ static unsigned namespace_calculate_mounts( strv_length(read_write_paths) + strv_length(read_only_paths) + strv_length(inaccessible_paths) + - private_dev + - (protect_sysctl ? ELEMENTSOF(protect_kernel_tunables_table) : 0) + - (protect_cgroups ? 1 : 0) + + ns_info->private_dev + + (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) + + (ns_info->protect_control_groups ? 1 : 0) + + (ns_info->protect_kernel_modules ? ELEMENTSOF(protect_kernel_modules_table) : 0) + protect_home_cnt + protect_system_cnt; } int setup_namespace( const char* root_directory, + const NameSpaceInfo *ns_info, char** read_write_paths, char** read_only_paths, char** inaccessible_paths, const char* tmp_dir, const char* var_tmp_dir, - bool private_dev, - bool protect_sysctl, - bool protect_cgroups, ProtectHome protect_home, ProtectSystem protect_system, unsigned long mount_flags) { @@ -718,13 +730,12 @@ int setup_namespace( if (mount_flags == 0) mount_flags = MS_SHARED; - n = namespace_calculate_mounts(read_write_paths, + n = namespace_calculate_mounts(ns_info, + read_write_paths, read_only_paths, inaccessible_paths, tmp_dir, var_tmp_dir, - private_dev, protect_sysctl, - protect_cgroups, protect_home, - protect_system); + protect_home, protect_system); /* Set mount slave mode */ if (root_directory || n > 0) @@ -756,16 +767,25 @@ int setup_namespace( m++; } - if (private_dev) { + if (ns_info->private_dev) { m->path = prefix_roota(root_directory, "/dev"); m->mode = PRIVATE_DEV; m++; } - if (protect_sysctl) - append_protect_kernel_tunables(&m, root_directory); + if (ns_info->protect_kernel_tunables) { + r = append_protect_kernel_tunables(&m, root_directory); + if (r < 0) + return r; + } + + if (ns_info->protect_kernel_modules) { + r = append_protect_kernel_modules(&m, root_directory); + if (r < 0) + return r; + } - if (protect_cgroups) { + if (ns_info->protect_control_groups) { m->path = prefix_roota(root_directory, "/sys/fs/cgroup"); m->mode = READONLY; m++; diff --git a/src/core/namespace.h b/src/core/namespace.h index 6505bcc499..6310638e9a 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -4,6 +4,7 @@ This file is part of systemd. Copyright 2010 Lennart Poettering + Copyright 2016 Djalal Harouni systemd is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by @@ -19,6 +20,8 @@ along with systemd; If not, see . ***/ +typedef struct NameSpaceInfo NameSpaceInfo; + #include #include "macro.h" @@ -40,15 +43,20 @@ typedef enum ProtectSystem { _PROTECT_SYSTEM_INVALID = -1 } ProtectSystem; +struct NameSpaceInfo { + bool private_dev:1; + bool protect_control_groups:1; + bool protect_kernel_tunables:1; + bool protect_kernel_modules:1; +}; + int setup_namespace(const char *chroot, + const NameSpaceInfo *ns_info, char **read_write_paths, char **read_only_paths, char **inaccessible_paths, const char *tmp_dir, const char *var_tmp_dir, - bool private_dev, - bool protect_sysctl, - bool protect_cgroups, ProtectHome protect_home, ProtectSystem protect_system, unsigned long mount_flags); diff --git a/src/test/test-ns.c b/src/test/test-ns.c index c4d4da6d05..da7a8b0565 100644 --- a/src/test/test-ns.c +++ b/src/test/test-ns.c @@ -45,6 +45,14 @@ int main(int argc, char *argv[]) { "/home/lennart/projects", NULL }; + + static const NameSpaceInfo ns_info = { + .private_dev = true, + .protect_control_groups = true, + .protect_kernel_tunables = true, + .protect_kernel_modules = true, + }; + char *root_directory; char *projects_directory; int r; @@ -69,14 +77,12 @@ int main(int argc, char *argv[]) { log_info("Not chrooted"); r = setup_namespace(root_directory, + &ns_info, (char **) writable, (char **) readonly, (char **) inaccessible, tmp_dir, var_tmp_dir, - true, - true, - true, PROTECT_HOME_NO, PROTECT_SYSTEM_NO, 0); -- cgit v1.2.3-54-g00ecf From 4982dbcc300d4599aa6ac143e922d6fbee31a860 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Sun, 9 Oct 2016 12:38:45 +0200 Subject: test: add test to make sure that ProtectKernelModules=yes disconnect mount propagation --- Makefile.am | 1 + src/test/test-execute.c | 5 +++-- .../exec-protectkernelmodules-yes-mount-propagation.service | 7 +++++++ 3 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 test/test-execute/exec-protectkernelmodules-yes-mount-propagation.service (limited to 'src/test') diff --git a/Makefile.am b/Makefile.am index 1ea25bb688..4a69236090 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1641,6 +1641,7 @@ EXTRA_DIST += \ test/test-execute/exec-privatedevices-yes-capability-mknod.service \ test/test-execute/exec-protectkernelmodules-no-capabilities.service \ test/test-execute/exec-protectkernelmodules-yes-capabilities.service \ + test/test-execute/exec-protectkernelmodules-yes-mount-propagation.service \ test/test-execute/exec-privatetmp-no.service \ test/test-execute/exec-privatetmp-yes.service \ test/test-execute/exec-readonlypaths.service \ diff --git a/src/test/test-execute.c b/src/test/test-execute.c index 1eade98ed3..e8ff02adaf 100644 --- a/src/test/test-execute.c +++ b/src/test/test-execute.c @@ -144,7 +144,7 @@ static void test_exec_privatedevices_capabilities(Manager *m) { test(m, "exec-privatedevices-no-capability-sys-rawio.service", 0, CLD_EXITED); } -static void test_exec_protectkernelmodules_capabilities(Manager *m) { +static void test_exec_protectkernelmodules(Manager *m) { if (detect_container() > 0) { log_notice("testing in container, skipping protectkernelmodules tests"); return; @@ -152,6 +152,7 @@ static void test_exec_protectkernelmodules_capabilities(Manager *m) { test(m, "exec-protectkernelmodules-no-capabilities.service", 0, CLD_EXITED); test(m, "exec-protectkernelmodules-yes-capabilities.service", 0, CLD_EXITED); + test(m, "exec-protectkernelmodules-yes-mount-propagation.service", 0, CLD_EXITED); } static void test_exec_readonlypaths(Manager *m) { @@ -380,7 +381,7 @@ int main(int argc, char *argv[]) { test_exec_privatetmp, test_exec_privatedevices, test_exec_privatedevices_capabilities, - test_exec_protectkernelmodules_capabilities, + test_exec_protectkernelmodules, test_exec_readonlypaths, test_exec_readwritepaths, test_exec_inaccessiblepaths, diff --git a/test/test-execute/exec-protectkernelmodules-yes-mount-propagation.service b/test/test-execute/exec-protectkernelmodules-yes-mount-propagation.service new file mode 100644 index 0000000000..e438783df3 --- /dev/null +++ b/test/test-execute/exec-protectkernelmodules-yes-mount-propagation.service @@ -0,0 +1,7 @@ +[Unit] +Description=Test to make sure that passing ProtectKernelModules=yes disconnect mount propagation + +[Service] +ProtectKernelModules=yes +ExecStart=/bin/sh -x -c 'mkdir -p /TEST; mount -t tmpfs tmpfs /TEST; grep TEST /proc/self/mountinfo && ! grep TEST /proc/$${PPID}/mountinfo && ! grep TEST /proc/1/mountinfo' +Type=oneshot -- cgit v1.2.3-54-g00ecf