summaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2016-11-01 20:25:19 -0600
committerLennart Poettering <lennart@poettering.net>2016-11-04 07:40:13 -0600
commitadd005357d535681c7075ced8eec2b6e61b43728 (patch)
treeb780280f06df0b09c738173602cb90c599597996 /src/core
parent9156493171cf2d78e1ac1a3746c385b0e281acf1 (diff)
core: add new RestrictNamespaces= unit file setting
This new setting permits restricting whether namespaces may be created and managed by processes started by a unit. It installs a seccomp filter blocking certain invocations of unshare(), clone() and setns(). RestrictNamespaces=no is the default, and does not restrict namespaces in any way. RestrictNamespaces=yes takes away the ability to create or manage any kind of namspace. "RestrictNamespaces=mnt ipc" restricts the creation of namespaces so that only mount and IPC namespaces may be created/managed, but no other kind of namespaces. This setting should be improve security quite a bit as in particular user namespacing was a major source of CVEs in the kernel in the past, and is accessible to unprivileged processes. With this setting the entire attack surface may be removed for system services that do not make use of namespaces.
Diffstat (limited to 'src/core')
-rw-r--r--src/core/dbus-execute.c21
-rw-r--r--src/core/execute.c30
-rw-r--r--src/core/execute.h9
-rw-r--r--src/core/load-fragment-gperf.gperf.m42
-rw-r--r--src/core/load-fragment.c49
-rw-r--r--src/core/load-fragment.h1
6 files changed, 112 insertions, 0 deletions
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index 03f23780c1..d7bb0496a0 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -781,6 +781,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("RuntimeDirectory", "as", NULL, offsetof(ExecContext, runtime_directory), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MemoryDenyWriteExecute", "b", bus_property_get_bool, offsetof(ExecContext, memory_deny_write_execute), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RestrictRealtime", "b", bus_property_get_bool, offsetof(ExecContext, restrict_realtime), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RestrictNamespace", "t", bus_property_get_ulong, offsetof(ExecContext, restrict_namespaces), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_VTABLE_END
};
@@ -1591,7 +1592,27 @@ int bus_exec_context_set_transient_property(
}
return 1;
+ } else if (streq(name, "RestrictNamespaces")) {
+ uint64_t flags;
+ r = sd_bus_message_read(message, "t", &flags);
+ if (r < 0)
+ return r;
+ if ((flags & NAMESPACE_FLAGS_ALL) != flags)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown namespace types");
+
+ if (mode != UNIT_CHECK) {
+ _cleanup_free_ char *s = NULL;
+
+ r = namespace_flag_to_string_many(flags, &s);
+ if (r < 0)
+ return r;
+
+ c->restrict_namespaces = flags;
+ unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, s);
+ }
+
+ return 1;
}
ri = rlimit_from_string(name);
diff --git a/src/core/execute.c b/src/core/execute.c
index 5bb23e2e4a..b8a0246173 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1534,6 +1534,18 @@ static int apply_private_devices(const Unit *u, const ExecContext *c) {
return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
}
+static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
+ assert(c);
+
+ if (!exec_context_restrict_namespaces_set(c))
+ return 0;
+
+ if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
+ return 0;
+
+ return seccomp_restrict_namespaces(c->restrict_namespaces);
+}
+
#endif
static void do_idle_pipe_dance(int idle_pipe[4]) {
@@ -2183,6 +2195,7 @@ static bool context_has_no_new_privileges(const ExecContext *c) {
return context_has_address_families(c) || /* we need NNP if we have any form of seccomp and are unprivileged */
c->memory_deny_write_execute ||
c->restrict_realtime ||
+ exec_context_restrict_namespaces_set(c) ||
c->protect_kernel_tunables ||
c->protect_kernel_modules ||
c->private_devices ||
@@ -2764,6 +2777,12 @@ static int exec_child(
}
}
+ r = apply_restrict_namespaces(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return r;
+ }
+
if (context->protect_kernel_tunables) {
r = apply_protect_sysctl(unit, context);
if (r < 0) {
@@ -2947,6 +2966,7 @@ void exec_context_init(ExecContext *c) {
c->personality = PERSONALITY_INVALID;
c->runtime_directory_mode = 0755;
c->capability_bounding_set = CAP_ALL;
+ c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
}
void exec_context_done(ExecContext *c) {
@@ -3244,6 +3264,7 @@ static void strv_fprintf(FILE *f, char **l) {
void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
char **e, **d;
unsigned i;
+ int r;
assert(c);
assert(f);
@@ -3524,6 +3545,15 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
fputc('\n', f);
}
+ if (exec_context_restrict_namespaces_set(c)) {
+ _cleanup_free_ char *s = NULL;
+
+ r = namespace_flag_to_string_many(c->restrict_namespaces, &s);
+ if (r >= 0)
+ fprintf(f, "%sRestrictNamespaces: %s\n",
+ prefix, s);
+ }
+
if (c->syscall_errno > 0)
fprintf(f,
"%sSystemCallErrorNumber: %s\n",
diff --git a/src/core/execute.h b/src/core/execute.h
index c7d0f7761e..56f880cffe 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -35,6 +35,7 @@ typedef struct ExecParameters ExecParameters;
#include "list.h"
#include "missing.h"
#include "namespace.h"
+#include "nsflags.h"
typedef enum ExecUtmpMode {
EXEC_UTMP_INIT,
@@ -195,6 +196,8 @@ struct ExecContext {
unsigned long personality;
+ unsigned long restrict_namespaces; /* The CLONE_NEWxyz flags permitted to the unit's processes */
+
Set *syscall_filter;
Set *syscall_archs;
int syscall_errno;
@@ -216,6 +219,12 @@ struct ExecContext {
bool no_new_privileges_set:1;
};
+static inline bool exec_context_restrict_namespaces_set(const ExecContext *c) {
+ assert(c);
+
+ return (c->restrict_namespaces & NAMESPACE_FLAGS_ALL) != NAMESPACE_FLAGS_ALL;
+}
+
typedef enum ExecFlags {
EXEC_CONFIRM_SPAWN = 1U << 0,
EXEC_APPLY_PERMISSIONS = 1U << 1,
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index af2f9d960b..cb2f384f47 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -57,12 +57,14 @@ m4_ifdef(`HAVE_SECCOMP',
$1.SystemCallArchitectures, config_parse_syscall_archs, 0, offsetof($1, exec_context.syscall_archs)
$1.SystemCallErrorNumber, config_parse_syscall_errno, 0, offsetof($1, exec_context)
$1.MemoryDenyWriteExecute, config_parse_bool, 0, offsetof($1, exec_context.memory_deny_write_execute)
+$1.RestrictNamespaces, config_parse_restrict_namespaces, 0, offsetof($1, exec_context.restrict_namespaces)
$1.RestrictRealtime, config_parse_bool, 0, offsetof($1, exec_context.restrict_realtime)
$1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)',
`$1.SystemCallFilter, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.SystemCallArchitectures, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.SystemCallErrorNumber, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.MemoryDenyWriteExecute, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.RestrictNamespaces, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.RestrictRealtime, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
$1.LimitCPU, config_parse_limit, RLIMIT_CPU, offsetof($1, exec_context.rlimit)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index cbc826809e..e0fa484c1e 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -2907,6 +2907,54 @@ int config_parse_address_families(
return 0;
}
+
+int config_parse_restrict_namespaces(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ bool invert = false;
+ int r;
+
+ if (isempty(rvalue)) {
+ /* Reset to the default. */
+ c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
+ return 0;
+ }
+
+ if (rvalue[0] == '~') {
+ invert = true;
+ rvalue++;
+ }
+
+ r = parse_boolean(rvalue);
+ if (r > 0)
+ c->restrict_namespaces = 0;
+ else if (r == 0)
+ c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
+ else {
+ /* Not a boolean argument, in this case it's a list of namespace types. */
+
+ r = namespace_flag_from_string_many(rvalue, &c->restrict_namespaces);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse namespace type string, ignoring: %s", rvalue);
+ return 0;
+ }
+ }
+
+ if (invert)
+ c->restrict_namespaces = (~c->restrict_namespaces) & NAMESPACE_FLAGS_ALL;
+
+ return 0;
+}
#endif
int config_parse_unit_slice(
@@ -4327,6 +4375,7 @@ void unit_dump_config_items(FILE *f) {
{ config_parse_syscall_archs, "ARCHS" },
{ config_parse_syscall_errno, "ERRNO" },
{ config_parse_address_families, "FAMILIES" },
+ { config_parse_restrict_namespaces, "NAMESPACES" },
#endif
{ config_parse_cpu_shares, "SHARES" },
{ config_parse_cpu_weight, "WEIGHT" },
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index c05f205c37..1cff815a50 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -116,6 +116,7 @@ int config_parse_fdname(const char *unit, const char *filename, unsigned line, c
int config_parse_sec_fix_0(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_user_group(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_user_group_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_restrict_namespaces(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
/* gperf prototypes */
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length);