diff options
Diffstat (limited to 'src/shared')
-rw-r--r-- | src/shared/bus-unit-util.c | 25 | ||||
-rw-r--r-- | src/shared/nsflags.c | 126 | ||||
-rw-r--r-- | src/shared/nsflags.h | 49 | ||||
-rw-r--r-- | src/shared/seccomp-util.c | 89 | ||||
-rw-r--r-- | src/shared/seccomp-util.h | 2 |
5 files changed, 291 insertions, 0 deletions
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index f639e0e832..35e2c8f18e 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -27,6 +27,7 @@ #include "hashmap.h" #include "list.h" #include "locale-util.h" +#include "nsflags.h" #include "parse-util.h" #include "path-util.h" #include "process-util.h" @@ -553,6 +554,30 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen r = sd_bus_message_close_container(m); + } else if (streq(field, "RestrictNamespaces")) { + bool invert = false; + uint64_t flags = 0; + + if (eq[0] == '~') { + invert = true; + eq++; + } + + r = parse_boolean(eq); + if (r > 0) + flags = 0; + else if (r == 0) + flags = NAMESPACE_FLAGS_ALL; + else { + r = namespace_flag_from_string_many(eq, &flags); + if (r < 0) + return log_error_errno(r, "Failed to parse %s value %s.", field, eq); + } + + if (invert) + flags = (~flags) & NAMESPACE_FLAGS_ALL; + + r = sd_bus_message_append(m, "v", "t", flags); } else { log_error("Unknown assignment %s.", assignment); return -EINVAL; diff --git a/src/shared/nsflags.c b/src/shared/nsflags.c new file mode 100644 index 0000000000..8fcbe97ba7 --- /dev/null +++ b/src/shared/nsflags.c @@ -0,0 +1,126 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <sched.h> + +#include "alloc-util.h" +#include "extract-word.h" +#include "nsflags.h" +#include "seccomp-util.h" +#include "string-util.h" + +const struct namespace_flag_map namespace_flag_map[] = { + { CLONE_NEWCGROUP, "cgroup" }, + { CLONE_NEWIPC, "ipc" }, + { CLONE_NEWNET, "net" }, + /* So, the mount namespace flag is called CLONE_NEWNS for historical reasons. Let's expose it here under a more + * explanatory name: "mnt". This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */ + { CLONE_NEWNS, "mnt" }, + { CLONE_NEWPID, "pid" }, + { CLONE_NEWUSER, "user" }, + { CLONE_NEWUTS, "uts" }, + {} +}; + +const char* namespace_flag_to_string(unsigned long flag) { + unsigned i; + + flag &= NAMESPACE_FLAGS_ALL; + + for (i = 0; namespace_flag_map[i].name; i++) + if (flag == namespace_flag_map[i].flag) + return namespace_flag_map[i].name; + + return NULL; /* either unknown namespace flag, or a combination of many. This call supports neither. */ +} + +unsigned long namespace_flag_from_string(const char *name) { + unsigned i; + + if (isempty(name)) + return 0; + + for (i = 0; namespace_flag_map[i].name; i++) + if (streq(name, namespace_flag_map[i].name)) + return namespace_flag_map[i].flag; + + return 0; +} + +int namespace_flag_from_string_many(const char *name, unsigned long *ret) { + unsigned long flags = 0; + int r; + + assert_se(ret); + + if (!name) { + *ret = 0; + return 0; + } + + for (;;) { + _cleanup_free_ char *word = NULL; + unsigned long f; + + r = extract_first_word(&name, &word, NULL, 0); + if (r < 0) + return r; + if (r == 0) + break; + + f = namespace_flag_from_string(word); + if (f == 0) + return -EINVAL; + + flags |= f; + } + + *ret = flags; + return 0; +} + +int namespace_flag_to_string_many(unsigned long flags, char **ret) { + _cleanup_free_ char *s = NULL; + unsigned i; + + for (i = 0; namespace_flag_map[i].name; i++) { + if ((flags & namespace_flag_map[i].flag) != namespace_flag_map[i].flag) + continue; + + if (!s) { + s = strdup(namespace_flag_map[i].name); + if (!s) + return -ENOMEM; + } else { + if (!strextend(&s, " ", namespace_flag_map[i].name, NULL)) + return -ENOMEM; + } + } + + if (!s) { + s = strdup(""); + if (!s) + return -ENOMEM; + } + + *ret = s; + s = NULL; + + return 0; +} diff --git a/src/shared/nsflags.h b/src/shared/nsflags.h new file mode 100644 index 0000000000..152ab8b936 --- /dev/null +++ b/src/shared/nsflags.h @@ -0,0 +1,49 @@ +#pragma once + +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <sched.h> + +#include "missing.h" + +/* The combination of all namespace flags defined by the kernel. The right type for this isn't clear. setns() and + * unshare() expect these flags to be passed as (signed) "int", while clone() wants them as "unsigned long". The latter + * is definitely more appropriate for a flags parameter, and also the larger type of the two, hence let's stick to that + * here. */ +#define NAMESPACE_FLAGS_ALL \ + ((unsigned long) (CLONE_NEWCGROUP| \ + CLONE_NEWIPC| \ + CLONE_NEWNET| \ + CLONE_NEWNS| \ + CLONE_NEWPID| \ + CLONE_NEWUSER| \ + CLONE_NEWUTS)) + +const char* namespace_flag_to_string(unsigned long flag); +unsigned long namespace_flag_from_string(const char *name); +int namespace_flag_from_string_many(const char *name, unsigned long *ret); +int namespace_flag_to_string_many(unsigned long flags, char **ret); + +struct namespace_flag_map { + unsigned long flag; + const char *name; +}; + +extern const struct namespace_flag_map namespace_flag_map[]; diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index fc1f6b68f2..4e4b2faca9 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -23,7 +23,9 @@ #include <sys/prctl.h> #include <linux/seccomp.h> +#include "alloc-util.h" #include "macro.h" +#include "nsflags.h" #include "seccomp-util.h" #include "string-util.h" #include "util.h" @@ -576,5 +578,92 @@ int seccomp_load_filter_set(uint32_t default_action, const SyscallFilterSet *set finish: seccomp_release(seccomp); return r; +} + +int seccomp_restrict_namespaces(unsigned long retain) { + scmp_filter_ctx seccomp; + unsigned i; + int r; + + if (log_get_max_level() >= LOG_DEBUG) { + _cleanup_free_ char *s = NULL; + + (void) namespace_flag_to_string_many(retain, &s); + log_debug("Restricting namespace to: %s.", strna(s)); + } + + /* NOOP? */ + if ((retain & NAMESPACE_FLAGS_ALL) == NAMESPACE_FLAGS_ALL) + return 0; + + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + if ((retain & NAMESPACE_FLAGS_ALL) == 0) + /* If every single kind of namespace shall be prohibited, then let's block the whole setns() syscall + * altogether. */ + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(setns), + 0); + else + /* Otherwise, block only the invocations with the appropriate flags in the loop below, but also the + * special invocation with a zero flags argument, right here. */ + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(setns), + 1, + SCMP_A1(SCMP_CMP_EQ, 0)); + if (r < 0) + goto finish; + + for (i = 0; namespace_flag_map[i].name; i++) { + unsigned long f; + + f = namespace_flag_map[i].flag; + if ((retain & f) == f) { + log_debug("Permitting %s.", namespace_flag_map[i].name); + continue; + } + log_debug("Blocking %s.", namespace_flag_map[i].name); + + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(unshare), + 1, + SCMP_A0(SCMP_CMP_MASKED_EQ, f, f)); + if (r < 0) + goto finish; + + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(clone), + 1, + SCMP_A0(SCMP_CMP_MASKED_EQ, f, f)); + if (r < 0) + goto finish; + + if ((retain & NAMESPACE_FLAGS_ALL) != 0) { + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(setns), + 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, f, f)); + if (r < 0) + goto finish; + } + } + + r = seccomp_load(seccomp); + +finish: + seccomp_release(seccomp); + return r; } diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h index f0b9f455ab..438a6671bc 100644 --- a/src/shared/seccomp-util.h +++ b/src/shared/seccomp-util.h @@ -66,3 +66,5 @@ const SyscallFilterSet *syscall_filter_set_find(const char *name); int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action); int seccomp_load_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action); + +int seccomp_restrict_namespaces(unsigned long retain); |