summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2012-07-17 04:17:53 +0200
committerLennart Poettering <lennart@poettering.net>2012-07-17 04:17:53 +0200
commit8351ceaea9480d9c2979aa2ff0f4982cfdfef58d (patch)
treefc1f94e5a17679960774da386a54d145255e4ef1 /src
parentcd96b3b86abb4a88cac2722bdfb6e5d4413f6831 (diff)
execute: support syscall filtering using seccomp filters
Diffstat (limited to 'src')
-rw-r--r--src/core/.gitignore4
-rw-r--r--src/core/dbus-execute.c31
-rw-r--r--src/core/dbus-execute.h5
-rw-r--r--src/core/execute.c70
-rw-r--r--src/core/execute.h4
-rw-r--r--src/core/load-fragment-gperf.gperf.m42
-rw-r--r--src/core/load-fragment.c85
-rw-r--r--src/core/load-fragment.h1
-rw-r--r--src/core/syscall-list.c55
-rw-r--r--src/core/syscall-list.h30
-rw-r--r--src/shared/dbus-common.c23
-rw-r--r--src/shared/exit-status.c6
-rw-r--r--src/shared/exit-status.h5
-rw-r--r--src/shared/linux/seccomp-bpf.h76
-rw-r--r--src/shared/linux/seccomp.h47
-rw-r--r--src/shared/missing.h4
16 files changed, 443 insertions, 5 deletions
diff --git a/src/core/.gitignore b/src/core/.gitignore
index f293bbdc93..a763f72507 100644
--- a/src/core/.gitignore
+++ b/src/core/.gitignore
@@ -1,2 +1,6 @@
+/syscall-from-name.gperf
+/syscall-from-name.h
+/syscall-list.txt
+/syscall-to-name.h
/macros.systemd
/systemd.pc
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index 9322cdfd86..a00ad50795 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -28,6 +28,7 @@
#include "ioprio.h"
#include "strv.h"
#include "dbus-common.h"
+#include "syscall-list.h"
DEFINE_BUS_PROPERTY_APPEND_ENUM(bus_execute_append_kill_mode, kill_mode, KillMode);
@@ -348,6 +349,32 @@ int bus_execute_append_command(DBusMessageIter *i, const char *property, void *d
return 0;
}
+int bus_execute_append_syscall_filter(DBusMessageIter *i, const char *property, void *data) {
+ ExecContext *c = data;
+ dbus_bool_t b;
+ DBusMessageIter sub;
+
+ assert(i);
+ assert(property);
+ assert(c);
+
+ if (!dbus_message_iter_open_container(i, DBUS_TYPE_ARRAY, "u", &sub))
+ return -ENOMEM;
+
+ if (c->syscall_filter)
+ b = dbus_message_iter_append_fixed_array(&sub, DBUS_TYPE_UINT32, &c->syscall_filter, (syscall_max() + 31) >> 4);
+ else
+ b = dbus_message_iter_append_fixed_array(&sub, DBUS_TYPE_UINT32, &c->syscall_filter, 0);
+
+ if (!b)
+ return -ENOMEM;
+
+ if (!dbus_message_iter_close_container(i, &sub))
+ return -ENOMEM;
+
+ return 0;
+}
+
const BusProperty bus_exec_context_properties[] = {
{ "Environment", bus_property_append_strv, "as", offsetof(ExecContext, environment), true },
{ "EnvironmentFiles", bus_execute_append_env_files, "a(sb)", offsetof(ExecContext, environment_files), true },
@@ -409,6 +436,8 @@ const BusProperty bus_exec_context_properties[] = {
{ "UtmpIdentifier", bus_property_append_string, "s", offsetof(ExecContext, utmp_id), true },
{ "ControlGroupModify", bus_property_append_bool, "b", offsetof(ExecContext, control_group_modify) },
{ "ControlGroupPersistent", bus_property_append_tristate_false, "b", offsetof(ExecContext, control_group_persistent) },
- { "IgnoreSIGPIPE", bus_property_append_bool, "b", offsetof(ExecContext, ignore_sigpipe ) },
+ { "IgnoreSIGPIPE", bus_property_append_bool, "b", offsetof(ExecContext, ignore_sigpipe) },
+ { "NoNewPrivileges", bus_property_append_bool, "b", offsetof(ExecContext, no_new_privileges) },
+ { "SystemCallFilter", bus_execute_append_syscall_filter, "au", 0 },
{ NULL, }
};
diff --git a/src/core/dbus-execute.h b/src/core/dbus-execute.h
index b8bbe1c9f2..dc267e6ccc 100644
--- a/src/core/dbus-execute.h
+++ b/src/core/dbus-execute.h
@@ -96,7 +96,9 @@
" <property name=\"ControlGroupModify\" type=\"b\" access=\"read\"/>\n" \
" <property name=\"ControlGroupPersistent\" type=\"b\" access=\"read\"/>\n" \
" <property name=\"PrivateNetwork\" type=\"b\" access=\"read\"/>\n" \
- " <property name=\"IgnoreSIGPIPE\" type=\"b\" access=\"read\"/>\n"
+ " <property name=\"IgnoreSIGPIPE\" type=\"b\" access=\"read\"/>\n" \
+ " <property name=\"NoNewPrivileges\" type=\"b\" access=\"read\"/>\n" \
+ " <property name=\"SystemCallFilter\" type=\"au\" access=\"read\"/>\n"
#define BUS_EXEC_COMMAND_INTERFACE(name) \
" <property name=\"" name "\" type=\"a(sasbttuii)\" access=\"read\"/>\n"
@@ -121,5 +123,6 @@ int bus_execute_append_rlimits(DBusMessageIter *i, const char *property, void *d
int bus_execute_append_command(DBusMessageIter *u, const char *property, void *data);
int bus_execute_append_kill_mode(DBusMessageIter *i, const char *property, void *data);
int bus_execute_append_env_files(DBusMessageIter *i, const char *property, void *data);
+int bus_execute_append_syscall_filter(DBusMessageIter *i, const char *property, void *data);
#endif
diff --git a/src/core/execute.c b/src/core/execute.c
index daba1a3846..7a72aa486c 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -38,6 +38,7 @@
#include <linux/fs.h>
#include <linux/oom.h>
#include <sys/poll.h>
+#include <linux/seccomp-bpf.h>
#ifdef HAVE_PAM
#include <security/pam_appl.h>
@@ -60,6 +61,7 @@
#include "def.h"
#include "loopback-setup.h"
#include "path-util.h"
+#include "syscall-list.h"
#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
@@ -924,6 +926,59 @@ static void rename_process_from_path(const char *path) {
rename_process(process_name);
}
+static int apply_seccomp(uint32_t *syscall_filter) {
+ static const struct sock_filter header[] = {
+ VALIDATE_ARCHITECTURE,
+ EXAMINE_SYSCALL
+ };
+ static const struct sock_filter footer[] = {
+ _KILL_PROCESS
+ };
+
+ int i;
+ unsigned n;
+ struct sock_filter *f;
+ struct sock_fprog prog;
+
+ assert(syscall_filter);
+
+ /* First: count the syscalls to check for */
+ for (i = 0, n = 0; i < syscall_max(); i++)
+ if (syscall_filter[i >> 4] & (1 << (i & 31)))
+ n++;
+
+ /* Second: build the filter program from a header the syscall
+ * matches and the footer */
+ f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
+ memcpy(f, header, sizeof(header));
+
+ for (i = 0, n = 0; i < syscall_max(); i++)
+ if (syscall_filter[i >> 4] & (1 << (i & 31))) {
+ struct sock_filter item[] = {
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, i, 0, 1),
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
+ };
+
+ assert_cc(ELEMENTSOF(item) == 2);
+
+ f[ELEMENTSOF(header) + 2*n] = item[0];
+ f[ELEMENTSOF(header) + 2*n+1] = item[1];
+
+ n++;
+ }
+
+ memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
+
+ /* Third: install the filter */
+ zero(prog);
+ prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
+ prog.filter = f;
+ if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
+ return -errno;
+
+ return 0;
+}
+
int exec_spawn(ExecCommand *command,
char **argv,
const ExecContext *context,
@@ -1355,6 +1410,21 @@ int exec_spawn(ExecCommand *command,
r = EXIT_CAPABILITIES;
goto fail_child;
}
+
+ if (context->no_new_privileges)
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
+ err = -errno;
+ r = EXIT_NO_NEW_PRIVILEGES;
+ goto fail_child;
+ }
+
+ if (context->syscall_filter) {
+ err = apply_seccomp(context->syscall_filter);
+ if (err < 0) {
+ r = EXIT_SECCOMP;
+ goto fail_child;
+ }
+ }
}
if (!(our_env = new0(char*, 7))) {
diff --git a/src/core/execute.h b/src/core/execute.h
index 2083c2971b..187165cdc2 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -164,6 +164,8 @@ struct ExecContext {
bool private_tmp;
bool private_network;
+ bool no_new_privileges;
+
bool control_group_modify;
int control_group_persistent;
@@ -174,6 +176,8 @@ struct ExecContext {
* don't enter a trigger loop. */
bool same_pgrp;
+ uint32_t *syscall_filter;
+
bool oom_score_adjust_set:1;
bool nice_set:1;
bool ioprio_set:1;
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 192c2b2780..140cb9c0a3 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -48,6 +48,8 @@ $1.Capabilities, config_parse_exec_capabilities, 0,
$1.SecureBits, config_parse_exec_secure_bits, 0, offsetof($1, exec_context)
$1.CapabilityBoundingSet, config_parse_bounding_set, 0, offsetof($1, exec_context.capability_bounding_set_drop)
$1.TimerSlackNSec, config_parse_nsec, 0, offsetof($1, exec_context.timer_slack_nsec)
+$1.NoNewPrivileges config_parse_bool, 0, offsetof($1, exec_context.no_new_privileges)
+$1.SystemCallFilter, config_parse_syscall_filter, 0, offsetof($1, exec_context)
$1.LimitCPU, config_parse_limit, RLIMIT_CPU, offsetof($1, exec_context.rlimit)
$1.LimitFSIZE, config_parse_limit, RLIMIT_FSIZE, offsetof($1, exec_context.rlimit)
$1.LimitDATA, config_parse_limit, RLIMIT_DATA, offsetof($1, exec_context.rlimit)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 748ab55d54..7fcd63a17a 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -45,6 +45,7 @@
#include "bus-errors.h"
#include "utf8.h"
#include "path-util.h"
+#include "syscall-list.h"
#ifndef HAVE_SYSV_COMPAT
int config_parse_warn_compat(
@@ -879,7 +880,7 @@ int config_parse_bounding_set(
if (r < 0) {
log_error("[%s:%u] Failed to parse capability bounding set, ignoring: %s", filename, line, rvalue);
- return 0;
+ continue;
}
sum |= ((uint64_t) 1ULL) << (uint64_t) cap;
@@ -2001,6 +2002,88 @@ int config_parse_documentation(
return r;
}
+static void syscall_set(uint32_t *p, int nr) {
+ p[nr >> 4] |= 1 << (nr & 31);
+}
+
+static void syscall_unset(uint32_t *p, int nr) {
+ p[nr >> 4] &= ~(1 << (nr & 31));
+}
+
+int config_parse_syscall_filter(
+ const char *filename,
+ unsigned line,
+ const char *section,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ Unit *u = userdata;
+ bool invert;
+ char *w;
+ size_t l;
+ char *state;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (rvalue[0] == '~') {
+ invert = true;
+ rvalue++;
+ }
+
+ if (!c->syscall_filter) {
+ size_t n;
+
+ n = (syscall_max() + 31) >> 4;
+ c->syscall_filter = new(uint32_t, n);
+ if (!c->syscall_filter)
+ return -ENOMEM;
+
+ memset(c->syscall_filter, invert ? 0xFF : 0, n * sizeof(uint32_t));
+
+ /* Add these by default */
+ syscall_set(c->syscall_filter, __NR_execve);
+ syscall_set(c->syscall_filter, __NR_rt_sigreturn);
+#ifdef __NR_sigreturn
+ syscall_set(c->syscall_filter, __NR_sigreturn);
+#endif
+ syscall_set(c->syscall_filter, __NR_exit_group);
+ syscall_set(c->syscall_filter, __NR_exit);
+ }
+
+ FOREACH_WORD_QUOTED(w, l, rvalue, state) {
+ int id;
+ char *t;
+
+ t = strndup(w, l);
+ if (!t)
+ return -ENOMEM;
+
+ id = syscall_from_name(t);
+ free(t);
+
+ if (id < 0) {
+ log_error("[%s:%u] Failed to parse syscall, ignoring: %s", filename, line, rvalue);
+ continue;
+ }
+
+ if (invert)
+ syscall_unset(c->syscall_filter, id);
+ else
+ syscall_set(c->syscall_filter, id);
+ }
+
+ c->no_new_privileges = true;
+
+ return 0;
+}
+
#define FOLLOW_MAX 8
static int open_follow(char **filename, FILE **_f, Set *names, char **_final) {
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index 501ea4ad47..543e32968f 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -82,6 +82,7 @@ int config_parse_unit_device_allow(const char *filename, unsigned line, const ch
int config_parse_unit_blkio_weight(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_unit_blkio_bandwidth(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_unit_requires_mounts_for(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_syscall_filter(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
/* gperf prototypes */
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length);
diff --git a/src/core/syscall-list.c b/src/core/syscall-list.c
new file mode 100644
index 0000000000..05fad3e158
--- /dev/null
+++ b/src/core/syscall-list.c
@@ -0,0 +1,55 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+ This file is part of systemd.
+
+ Copyright 2012 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/syscall.h>
+#include <string.h>
+
+#include "util.h"
+
+#include "syscall-list.h"
+
+const struct syscall_name *lookup_syscall(register const char *str, register unsigned int len);
+
+#include "syscall-to-name.h"
+#include "syscall-from-name.h"
+
+const char *syscall_to_name(int id) {
+ if (id < 0 || id >= (int) ELEMENTSOF(syscall_names))
+ return NULL;
+
+ return syscall_names[id];
+}
+
+int syscall_from_name(const char *name) {
+ const struct syscall_name *sc;
+
+ assert(name);
+
+ sc = lookup_syscall(name, strlen(name));
+ if (!sc)
+ return -1;
+
+ return sc->id;
+}
+
+int syscall_max(void) {
+ return ELEMENTSOF(syscall_names);
+}
diff --git a/src/core/syscall-list.h b/src/core/syscall-list.h
new file mode 100644
index 0000000000..0fc6859605
--- /dev/null
+++ b/src/core/syscall-list.h
@@ -0,0 +1,30 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#ifndef foosyscalllisthfoo
+#define foosyscalllisthfoo
+
+/***
+ This file is part of systemd.
+
+ Copyright 2012 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+const char *syscall_to_name(int id);
+int syscall_from_name(const char *name);
+
+int syscall_max(void);
+
+#endif
diff --git a/src/shared/dbus-common.c b/src/shared/dbus-common.c
index 3f5ce97680..7d57680cfc 100644
--- a/src/shared/dbus-common.c
+++ b/src/shared/dbus-common.c
@@ -1083,6 +1083,29 @@ int generic_print_property(const char *name, DBusMessageIter *iter, bool all) {
}
return 1;
+
+ } else if (dbus_message_iter_get_element_type(iter) == DBUS_TYPE_UINT32) {
+ DBusMessageIter sub;
+
+ dbus_message_iter_recurse(iter, &sub);
+ if (all ||
+ dbus_message_iter_get_arg_type(&sub) != DBUS_TYPE_INVALID) {
+ printf("%s=", name);
+
+ while (dbus_message_iter_get_arg_type(&sub) != DBUS_TYPE_INVALID) {
+ uint32_t u;
+
+ assert(dbus_message_iter_get_arg_type(&sub) == DBUS_TYPE_UINT32);
+ dbus_message_iter_get_basic(&sub, &u);
+ printf("%08x", u);
+
+ dbus_message_iter_next(&sub);
+ }
+
+ puts("");
+ }
+
+ return 1;
}
break;
diff --git a/src/shared/exit-status.c b/src/shared/exit-status.c
index b07a66a3e2..0dc82b2e13 100644
--- a/src/shared/exit-status.c
+++ b/src/shared/exit-status.c
@@ -122,6 +122,12 @@ const char* exit_status_to_string(ExitStatus status, ExitStatusLevel level) {
case EXIT_NAMESPACE:
return "NAMESPACE";
+
+ case EXIT_NO_NEW_PRIVILEGES:
+ return "NO_NEW_PRIVILEGES";
+
+ case EXIT_SECCOMP:
+ return "SECCOMP";
}
}
diff --git a/src/shared/exit-status.h b/src/shared/exit-status.h
index 349e24fbf2..813f1ce1b4 100644
--- a/src/shared/exit-status.h
+++ b/src/shared/exit-status.h
@@ -66,8 +66,9 @@ typedef enum ExitStatus {
EXIT_TCPWRAP,
EXIT_PAM,
EXIT_NETWORK,
- EXIT_NAMESPACE
-
+ EXIT_NAMESPACE,
+ EXIT_NO_NEW_PRIVILEGES,
+ EXIT_SECCOMP
} ExitStatus;
typedef enum ExitStatusLevel {
diff --git a/src/shared/linux/seccomp-bpf.h b/src/shared/linux/seccomp-bpf.h
new file mode 100644
index 0000000000..1e3d136739
--- /dev/null
+++ b/src/shared/linux/seccomp-bpf.h
@@ -0,0 +1,76 @@
+/*
+ * seccomp example for x86 (32-bit and 64-bit) with BPF macros
+ *
+ * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org>
+ * Authors:
+ * Will Drewry <wad@chromium.org>
+ * Kees Cook <keescook@chromium.org>
+ *
+ * The code may be used by anyone for any purpose, and can serve as a
+ * starting point for developing applications using mode 2 seccomp.
+ */
+#ifndef _SECCOMP_BPF_H_
+#define _SECCOMP_BPF_H_
+
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/prctl.h>
+
+#include <linux/unistd.h>
+#include <linux/audit.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+
+#ifndef SECCOMP_MODE_FILTER
+# define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */
+# define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */
+# define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */
+# define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */
+struct seccomp_data {
+ int nr;
+ __u32 arch;
+ __u64 instruction_pointer;
+ __u64 args[6];
+};
+#endif
+#ifndef SYS_SECCOMP
+# define SYS_SECCOMP 1
+#endif
+
+#define syscall_nr (offsetof(struct seccomp_data, nr))
+#define arch_nr (offsetof(struct seccomp_data, arch))
+
+#if defined(__i386__)
+# define REG_SYSCALL REG_EAX
+# define ARCH_NR AUDIT_ARCH_I386
+#elif defined(__x86_64__)
+# define REG_SYSCALL REG_RAX
+# define ARCH_NR AUDIT_ARCH_X86_64
+#else
+# warning "Platform does not support seccomp filter yet"
+# define REG_SYSCALL 0
+# define ARCH_NR 0
+#endif
+
+#define VALIDATE_ARCHITECTURE \
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, arch_nr), \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ARCH_NR, 1, 0), \
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
+
+#define EXAMINE_SYSCALL \
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_nr)
+
+#define ALLOW_SYSCALL(name) \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
+
+#define _KILL_PROCESS \
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
+
+#endif /* _SECCOMP_BPF_H_ */
diff --git a/src/shared/linux/seccomp.h b/src/shared/linux/seccomp.h
new file mode 100644
index 0000000000..9c03683fa0
--- /dev/null
+++ b/src/shared/linux/seccomp.h
@@ -0,0 +1,47 @@
+#ifndef _LINUX_SECCOMP_H
+#define _LINUX_SECCOMP_H
+
+
+#include <linux/types.h>
+
+
+/* Valid values for seccomp.mode and prctl(PR_SET_SECCOMP, <mode>) */
+#define SECCOMP_MODE_DISABLED 0 /* seccomp is not in use. */
+#define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */
+#define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */
+
+/*
+ * All BPF programs must return a 32-bit value.
+ * The bottom 16-bits are for optional return data.
+ * The upper 16-bits are ordered from least permissive values to most.
+ *
+ * The ordering ensures that a min_t() over composed return values always
+ * selects the least permissive choice.
+ */
+#define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */
+#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */
+#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */
+
+/* Masks for the return value sections. */
+#define SECCOMP_RET_ACTION 0x7fff0000U
+#define SECCOMP_RET_DATA 0x0000ffffU
+
+/**
+ * struct seccomp_data - the format the BPF program executes over.
+ * @nr: the system call number
+ * @arch: indicates system call convention as an AUDIT_ARCH_* value
+ * as defined in <linux/audit.h>.
+ * @instruction_pointer: at the time of the system call.
+ * @args: up to 6 system call arguments always stored as 64-bit values
+ * regardless of the architecture.
+ */
+struct seccomp_data {
+ int nr;
+ __u32 arch;
+ __u64 instruction_pointer;
+ __u64 args[6];
+};
+
+#endif /* _LINUX_SECCOMP_H */
diff --git a/src/shared/missing.h b/src/shared/missing.h
index 0cf7949d2c..d918c4e9a7 100644
--- a/src/shared/missing.h
+++ b/src/shared/missing.h
@@ -188,4 +188,8 @@ static inline pid_t gettid(void) {
#define MS_STRICTATIME (1<<24)
#endif
+#ifndef PR_SET_NO_NEW_PRIVS
+#define PR_SET_NO_NEW_PRIVS 38
+#endif
+
#endif