From 8351ceaea9480d9c2979aa2ff0f4982cfdfef58d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 17 Jul 2012 04:17:53 +0200 Subject: execute: support syscall filtering using seccomp filters --- src/core/.gitignore | 4 ++ src/core/dbus-execute.c | 31 ++++++++++++- src/core/dbus-execute.h | 5 ++- src/core/execute.c | 70 +++++++++++++++++++++++++++++ src/core/execute.h | 4 ++ src/core/load-fragment-gperf.gperf.m4 | 2 + src/core/load-fragment.c | 85 ++++++++++++++++++++++++++++++++++- src/core/load-fragment.h | 1 + src/core/syscall-list.c | 55 +++++++++++++++++++++++ src/core/syscall-list.h | 30 +++++++++++++ 10 files changed, 284 insertions(+), 3 deletions(-) create mode 100644 src/core/syscall-list.c create mode 100644 src/core/syscall-list.h (limited to 'src/core') diff --git a/src/core/.gitignore b/src/core/.gitignore index f293bbdc93..a763f72507 100644 --- a/src/core/.gitignore +++ b/src/core/.gitignore @@ -1,2 +1,6 @@ +/syscall-from-name.gperf +/syscall-from-name.h +/syscall-list.txt +/syscall-to-name.h /macros.systemd /systemd.pc diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 9322cdfd86..a00ad50795 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -28,6 +28,7 @@ #include "ioprio.h" #include "strv.h" #include "dbus-common.h" +#include "syscall-list.h" DEFINE_BUS_PROPERTY_APPEND_ENUM(bus_execute_append_kill_mode, kill_mode, KillMode); @@ -348,6 +349,32 @@ int bus_execute_append_command(DBusMessageIter *i, const char *property, void *d return 0; } +int bus_execute_append_syscall_filter(DBusMessageIter *i, const char *property, void *data) { + ExecContext *c = data; + dbus_bool_t b; + DBusMessageIter sub; + + assert(i); + assert(property); + assert(c); + + if (!dbus_message_iter_open_container(i, DBUS_TYPE_ARRAY, "u", &sub)) + return -ENOMEM; + + if (c->syscall_filter) + b = dbus_message_iter_append_fixed_array(&sub, DBUS_TYPE_UINT32, &c->syscall_filter, (syscall_max() + 31) >> 4); + else + b = dbus_message_iter_append_fixed_array(&sub, DBUS_TYPE_UINT32, &c->syscall_filter, 0); + + if (!b) + return -ENOMEM; + + if (!dbus_message_iter_close_container(i, &sub)) + return -ENOMEM; + + return 0; +} + const BusProperty bus_exec_context_properties[] = { { "Environment", bus_property_append_strv, "as", offsetof(ExecContext, environment), true }, { "EnvironmentFiles", bus_execute_append_env_files, "a(sb)", offsetof(ExecContext, environment_files), true }, @@ -409,6 +436,8 @@ const BusProperty bus_exec_context_properties[] = { { "UtmpIdentifier", bus_property_append_string, "s", offsetof(ExecContext, utmp_id), true }, { "ControlGroupModify", bus_property_append_bool, "b", offsetof(ExecContext, control_group_modify) }, { "ControlGroupPersistent", bus_property_append_tristate_false, "b", offsetof(ExecContext, control_group_persistent) }, - { "IgnoreSIGPIPE", bus_property_append_bool, "b", offsetof(ExecContext, ignore_sigpipe ) }, + { "IgnoreSIGPIPE", bus_property_append_bool, "b", offsetof(ExecContext, ignore_sigpipe) }, + { "NoNewPrivileges", bus_property_append_bool, "b", offsetof(ExecContext, no_new_privileges) }, + { "SystemCallFilter", bus_execute_append_syscall_filter, "au", 0 }, { NULL, } }; diff --git a/src/core/dbus-execute.h b/src/core/dbus-execute.h index b8bbe1c9f2..dc267e6ccc 100644 --- a/src/core/dbus-execute.h +++ b/src/core/dbus-execute.h @@ -96,7 +96,9 @@ " \n" \ " \n" \ " \n" \ - " \n" + " \n" \ + " \n" \ + " \n" #define BUS_EXEC_COMMAND_INTERFACE(name) \ " \n" @@ -121,5 +123,6 @@ int bus_execute_append_rlimits(DBusMessageIter *i, const char *property, void *d int bus_execute_append_command(DBusMessageIter *u, const char *property, void *data); int bus_execute_append_kill_mode(DBusMessageIter *i, const char *property, void *data); int bus_execute_append_env_files(DBusMessageIter *i, const char *property, void *data); +int bus_execute_append_syscall_filter(DBusMessageIter *i, const char *property, void *data); #endif diff --git a/src/core/execute.c b/src/core/execute.c index daba1a3846..7a72aa486c 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -38,6 +38,7 @@ #include #include #include +#include #ifdef HAVE_PAM #include @@ -60,6 +61,7 @@ #include "def.h" #include "loopback-setup.h" #include "path-util.h" +#include "syscall-list.h" #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC) @@ -924,6 +926,59 @@ static void rename_process_from_path(const char *path) { rename_process(process_name); } +static int apply_seccomp(uint32_t *syscall_filter) { + static const struct sock_filter header[] = { + VALIDATE_ARCHITECTURE, + EXAMINE_SYSCALL + }; + static const struct sock_filter footer[] = { + _KILL_PROCESS + }; + + int i; + unsigned n; + struct sock_filter *f; + struct sock_fprog prog; + + assert(syscall_filter); + + /* First: count the syscalls to check for */ + for (i = 0, n = 0; i < syscall_max(); i++) + if (syscall_filter[i >> 4] & (1 << (i & 31))) + n++; + + /* Second: build the filter program from a header the syscall + * matches and the footer */ + f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer))); + memcpy(f, header, sizeof(header)); + + for (i = 0, n = 0; i < syscall_max(); i++) + if (syscall_filter[i >> 4] & (1 << (i & 31))) { + struct sock_filter item[] = { + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, i, 0, 1), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) + }; + + assert_cc(ELEMENTSOF(item) == 2); + + f[ELEMENTSOF(header) + 2*n] = item[0]; + f[ELEMENTSOF(header) + 2*n+1] = item[1]; + + n++; + } + + memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer)); + + /* Third: install the filter */ + zero(prog); + prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n; + prog.filter = f; + if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0) + return -errno; + + return 0; +} + int exec_spawn(ExecCommand *command, char **argv, const ExecContext *context, @@ -1355,6 +1410,21 @@ int exec_spawn(ExecCommand *command, r = EXIT_CAPABILITIES; goto fail_child; } + + if (context->no_new_privileges) + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) { + err = -errno; + r = EXIT_NO_NEW_PRIVILEGES; + goto fail_child; + } + + if (context->syscall_filter) { + err = apply_seccomp(context->syscall_filter); + if (err < 0) { + r = EXIT_SECCOMP; + goto fail_child; + } + } } if (!(our_env = new0(char*, 7))) { diff --git a/src/core/execute.h b/src/core/execute.h index 2083c2971b..187165cdc2 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -164,6 +164,8 @@ struct ExecContext { bool private_tmp; bool private_network; + bool no_new_privileges; + bool control_group_modify; int control_group_persistent; @@ -174,6 +176,8 @@ struct ExecContext { * don't enter a trigger loop. */ bool same_pgrp; + uint32_t *syscall_filter; + bool oom_score_adjust_set:1; bool nice_set:1; bool ioprio_set:1; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 192c2b2780..140cb9c0a3 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -48,6 +48,8 @@ $1.Capabilities, config_parse_exec_capabilities, 0, $1.SecureBits, config_parse_exec_secure_bits, 0, offsetof($1, exec_context) $1.CapabilityBoundingSet, config_parse_bounding_set, 0, offsetof($1, exec_context.capability_bounding_set_drop) $1.TimerSlackNSec, config_parse_nsec, 0, offsetof($1, exec_context.timer_slack_nsec) +$1.NoNewPrivileges config_parse_bool, 0, offsetof($1, exec_context.no_new_privileges) +$1.SystemCallFilter, config_parse_syscall_filter, 0, offsetof($1, exec_context) $1.LimitCPU, config_parse_limit, RLIMIT_CPU, offsetof($1, exec_context.rlimit) $1.LimitFSIZE, config_parse_limit, RLIMIT_FSIZE, offsetof($1, exec_context.rlimit) $1.LimitDATA, config_parse_limit, RLIMIT_DATA, offsetof($1, exec_context.rlimit) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 748ab55d54..7fcd63a17a 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -45,6 +45,7 @@ #include "bus-errors.h" #include "utf8.h" #include "path-util.h" +#include "syscall-list.h" #ifndef HAVE_SYSV_COMPAT int config_parse_warn_compat( @@ -879,7 +880,7 @@ int config_parse_bounding_set( if (r < 0) { log_error("[%s:%u] Failed to parse capability bounding set, ignoring: %s", filename, line, rvalue); - return 0; + continue; } sum |= ((uint64_t) 1ULL) << (uint64_t) cap; @@ -2001,6 +2002,88 @@ int config_parse_documentation( return r; } +static void syscall_set(uint32_t *p, int nr) { + p[nr >> 4] |= 1 << (nr & 31); +} + +static void syscall_unset(uint32_t *p, int nr) { + p[nr >> 4] &= ~(1 << (nr & 31)); +} + +int config_parse_syscall_filter( + const char *filename, + unsigned line, + const char *section, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + ExecContext *c = data; + Unit *u = userdata; + bool invert; + char *w; + size_t l; + char *state; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(u); + + if (rvalue[0] == '~') { + invert = true; + rvalue++; + } + + if (!c->syscall_filter) { + size_t n; + + n = (syscall_max() + 31) >> 4; + c->syscall_filter = new(uint32_t, n); + if (!c->syscall_filter) + return -ENOMEM; + + memset(c->syscall_filter, invert ? 0xFF : 0, n * sizeof(uint32_t)); + + /* Add these by default */ + syscall_set(c->syscall_filter, __NR_execve); + syscall_set(c->syscall_filter, __NR_rt_sigreturn); +#ifdef __NR_sigreturn + syscall_set(c->syscall_filter, __NR_sigreturn); +#endif + syscall_set(c->syscall_filter, __NR_exit_group); + syscall_set(c->syscall_filter, __NR_exit); + } + + FOREACH_WORD_QUOTED(w, l, rvalue, state) { + int id; + char *t; + + t = strndup(w, l); + if (!t) + return -ENOMEM; + + id = syscall_from_name(t); + free(t); + + if (id < 0) { + log_error("[%s:%u] Failed to parse syscall, ignoring: %s", filename, line, rvalue); + continue; + } + + if (invert) + syscall_unset(c->syscall_filter, id); + else + syscall_set(c->syscall_filter, id); + } + + c->no_new_privileges = true; + + return 0; +} + #define FOLLOW_MAX 8 static int open_follow(char **filename, FILE **_f, Set *names, char **_final) { diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index 501ea4ad47..543e32968f 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -82,6 +82,7 @@ int config_parse_unit_device_allow(const char *filename, unsigned line, const ch int config_parse_unit_blkio_weight(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_unit_blkio_bandwidth(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_unit_requires_mounts_for(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_syscall_filter(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); /* gperf prototypes */ const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length); diff --git a/src/core/syscall-list.c b/src/core/syscall-list.c new file mode 100644 index 0000000000..05fad3e158 --- /dev/null +++ b/src/core/syscall-list.c @@ -0,0 +1,55 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2012 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include +#include + +#include "util.h" + +#include "syscall-list.h" + +const struct syscall_name *lookup_syscall(register const char *str, register unsigned int len); + +#include "syscall-to-name.h" +#include "syscall-from-name.h" + +const char *syscall_to_name(int id) { + if (id < 0 || id >= (int) ELEMENTSOF(syscall_names)) + return NULL; + + return syscall_names[id]; +} + +int syscall_from_name(const char *name) { + const struct syscall_name *sc; + + assert(name); + + sc = lookup_syscall(name, strlen(name)); + if (!sc) + return -1; + + return sc->id; +} + +int syscall_max(void) { + return ELEMENTSOF(syscall_names); +} diff --git a/src/core/syscall-list.h b/src/core/syscall-list.h new file mode 100644 index 0000000000..0fc6859605 --- /dev/null +++ b/src/core/syscall-list.h @@ -0,0 +1,30 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +#ifndef foosyscalllisthfoo +#define foosyscalllisthfoo + +/*** + This file is part of systemd. + + Copyright 2012 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +const char *syscall_to_name(int id); +int syscall_from_name(const char *name); + +int syscall_max(void); + +#endif -- cgit v1.2.3-54-g00ecf