diff options
author | Lennart Poettering <lennart@poettering.net> | 2016-11-01 20:25:19 -0600 |
---|---|---|
committer | Lennart Poettering <lennart@poettering.net> | 2016-11-04 07:40:13 -0600 |
commit | add005357d535681c7075ced8eec2b6e61b43728 (patch) | |
tree | b780280f06df0b09c738173602cb90c599597996 /src/core | |
parent | 9156493171cf2d78e1ac1a3746c385b0e281acf1 (diff) |
core: add new RestrictNamespaces= unit file setting
This new setting permits restricting whether namespaces may be created and
managed by processes started by a unit. It installs a seccomp filter blocking
certain invocations of unshare(), clone() and setns().
RestrictNamespaces=no is the default, and does not restrict namespaces in any
way. RestrictNamespaces=yes takes away the ability to create or manage any kind
of namspace. "RestrictNamespaces=mnt ipc" restricts the creation of namespaces
so that only mount and IPC namespaces may be created/managed, but no other
kind of namespaces.
This setting should be improve security quite a bit as in particular user
namespacing was a major source of CVEs in the kernel in the past, and is
accessible to unprivileged processes. With this setting the entire attack
surface may be removed for system services that do not make use of namespaces.
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/dbus-execute.c | 21 | ||||
-rw-r--r-- | src/core/execute.c | 30 | ||||
-rw-r--r-- | src/core/execute.h | 9 | ||||
-rw-r--r-- | src/core/load-fragment-gperf.gperf.m4 | 2 | ||||
-rw-r--r-- | src/core/load-fragment.c | 49 | ||||
-rw-r--r-- | src/core/load-fragment.h | 1 |
6 files changed, 112 insertions, 0 deletions
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 03f23780c1..d7bb0496a0 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -781,6 +781,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("RuntimeDirectory", "as", NULL, offsetof(ExecContext, runtime_directory), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("MemoryDenyWriteExecute", "b", bus_property_get_bool, offsetof(ExecContext, memory_deny_write_execute), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RestrictRealtime", "b", bus_property_get_bool, offsetof(ExecContext, restrict_realtime), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("RestrictNamespace", "t", bus_property_get_ulong, offsetof(ExecContext, restrict_namespaces), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_VTABLE_END }; @@ -1591,7 +1592,27 @@ int bus_exec_context_set_transient_property( } return 1; + } else if (streq(name, "RestrictNamespaces")) { + uint64_t flags; + r = sd_bus_message_read(message, "t", &flags); + if (r < 0) + return r; + if ((flags & NAMESPACE_FLAGS_ALL) != flags) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown namespace types"); + + if (mode != UNIT_CHECK) { + _cleanup_free_ char *s = NULL; + + r = namespace_flag_to_string_many(flags, &s); + if (r < 0) + return r; + + c->restrict_namespaces = flags; + unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, s); + } + + return 1; } ri = rlimit_from_string(name); diff --git a/src/core/execute.c b/src/core/execute.c index 5bb23e2e4a..b8a0246173 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1534,6 +1534,18 @@ static int apply_private_devices(const Unit *u, const ExecContext *c) { return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM)); } +static int apply_restrict_namespaces(Unit *u, const ExecContext *c) { + assert(c); + + if (!exec_context_restrict_namespaces_set(c)) + return 0; + + if (skip_seccomp_unavailable(u, "RestrictNamespaces=")) + return 0; + + return seccomp_restrict_namespaces(c->restrict_namespaces); +} + #endif static void do_idle_pipe_dance(int idle_pipe[4]) { @@ -2183,6 +2195,7 @@ static bool context_has_no_new_privileges(const ExecContext *c) { return context_has_address_families(c) || /* we need NNP if we have any form of seccomp and are unprivileged */ c->memory_deny_write_execute || c->restrict_realtime || + exec_context_restrict_namespaces_set(c) || c->protect_kernel_tunables || c->protect_kernel_modules || c->private_devices || @@ -2764,6 +2777,12 @@ static int exec_child( } } + r = apply_restrict_namespaces(unit, context); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + return r; + } + if (context->protect_kernel_tunables) { r = apply_protect_sysctl(unit, context); if (r < 0) { @@ -2947,6 +2966,7 @@ void exec_context_init(ExecContext *c) { c->personality = PERSONALITY_INVALID; c->runtime_directory_mode = 0755; c->capability_bounding_set = CAP_ALL; + c->restrict_namespaces = NAMESPACE_FLAGS_ALL; } void exec_context_done(ExecContext *c) { @@ -3244,6 +3264,7 @@ static void strv_fprintf(FILE *f, char **l) { void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { char **e, **d; unsigned i; + int r; assert(c); assert(f); @@ -3524,6 +3545,15 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { fputc('\n', f); } + if (exec_context_restrict_namespaces_set(c)) { + _cleanup_free_ char *s = NULL; + + r = namespace_flag_to_string_many(c->restrict_namespaces, &s); + if (r >= 0) + fprintf(f, "%sRestrictNamespaces: %s\n", + prefix, s); + } + if (c->syscall_errno > 0) fprintf(f, "%sSystemCallErrorNumber: %s\n", diff --git a/src/core/execute.h b/src/core/execute.h index c7d0f7761e..56f880cffe 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -35,6 +35,7 @@ typedef struct ExecParameters ExecParameters; #include "list.h" #include "missing.h" #include "namespace.h" +#include "nsflags.h" typedef enum ExecUtmpMode { EXEC_UTMP_INIT, @@ -195,6 +196,8 @@ struct ExecContext { unsigned long personality; + unsigned long restrict_namespaces; /* The CLONE_NEWxyz flags permitted to the unit's processes */ + Set *syscall_filter; Set *syscall_archs; int syscall_errno; @@ -216,6 +219,12 @@ struct ExecContext { bool no_new_privileges_set:1; }; +static inline bool exec_context_restrict_namespaces_set(const ExecContext *c) { + assert(c); + + return (c->restrict_namespaces & NAMESPACE_FLAGS_ALL) != NAMESPACE_FLAGS_ALL; +} + typedef enum ExecFlags { EXEC_CONFIRM_SPAWN = 1U << 0, EXEC_APPLY_PERMISSIONS = 1U << 1, diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index af2f9d960b..cb2f384f47 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -57,12 +57,14 @@ m4_ifdef(`HAVE_SECCOMP', $1.SystemCallArchitectures, config_parse_syscall_archs, 0, offsetof($1, exec_context.syscall_archs) $1.SystemCallErrorNumber, config_parse_syscall_errno, 0, offsetof($1, exec_context) $1.MemoryDenyWriteExecute, config_parse_bool, 0, offsetof($1, exec_context.memory_deny_write_execute) +$1.RestrictNamespaces, config_parse_restrict_namespaces, 0, offsetof($1, exec_context.restrict_namespaces) $1.RestrictRealtime, config_parse_bool, 0, offsetof($1, exec_context.restrict_realtime) $1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)', `$1.SystemCallFilter, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.SystemCallArchitectures, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.SystemCallErrorNumber, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.MemoryDenyWriteExecute, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 +$1.RestrictNamespaces, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.RestrictRealtime, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0') $1.LimitCPU, config_parse_limit, RLIMIT_CPU, offsetof($1, exec_context.rlimit) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index cbc826809e..e0fa484c1e 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -2907,6 +2907,54 @@ int config_parse_address_families( return 0; } + +int config_parse_restrict_namespaces( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + ExecContext *c = data; + bool invert = false; + int r; + + if (isempty(rvalue)) { + /* Reset to the default. */ + c->restrict_namespaces = NAMESPACE_FLAGS_ALL; + return 0; + } + + if (rvalue[0] == '~') { + invert = true; + rvalue++; + } + + r = parse_boolean(rvalue); + if (r > 0) + c->restrict_namespaces = 0; + else if (r == 0) + c->restrict_namespaces = NAMESPACE_FLAGS_ALL; + else { + /* Not a boolean argument, in this case it's a list of namespace types. */ + + r = namespace_flag_from_string_many(rvalue, &c->restrict_namespaces); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse namespace type string, ignoring: %s", rvalue); + return 0; + } + } + + if (invert) + c->restrict_namespaces = (~c->restrict_namespaces) & NAMESPACE_FLAGS_ALL; + + return 0; +} #endif int config_parse_unit_slice( @@ -4327,6 +4375,7 @@ void unit_dump_config_items(FILE *f) { { config_parse_syscall_archs, "ARCHS" }, { config_parse_syscall_errno, "ERRNO" }, { config_parse_address_families, "FAMILIES" }, + { config_parse_restrict_namespaces, "NAMESPACES" }, #endif { config_parse_cpu_shares, "SHARES" }, { config_parse_cpu_weight, "WEIGHT" }, diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index c05f205c37..1cff815a50 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -116,6 +116,7 @@ int config_parse_fdname(const char *unit, const char *filename, unsigned line, c int config_parse_sec_fix_0(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_user_group(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_user_group_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_restrict_namespaces(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); /* gperf prototypes */ const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length); |