summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/execute.c176
-rw-r--r--src/core/load-fragment.c54
-rw-r--r--src/core/scope.c44
-rw-r--r--src/core/slice.c38
-rw-r--r--src/network/networkd-netdev-bond.c6
-rw-r--r--src/nspawn/nspawn-seccomp.c18
-rw-r--r--src/shared/condition.c9
-rw-r--r--src/shared/seccomp-util.c185
-rw-r--r--src/shared/seccomp-util.h37
-rw-r--r--src/test/test-seccomp.c103
-rw-r--r--src/update-done/update-done.c15
11 files changed, 419 insertions, 266 deletions
diff --git a/src/core/execute.c b/src/core/execute.c
index 53356c3c06..5e7d7c25d7 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1185,18 +1185,19 @@ static void rename_process_from_path(const char *path) {
#ifdef HAVE_SECCOMP
static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
- if (!is_seccomp_available()) {
- log_open();
- log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
- log_close();
- return true;
- }
- return false;
+
+ if (is_seccomp_available())
+ return false;
+
+ log_open();
+ log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
+ log_close();
+ return true;
}
static int apply_seccomp(const Unit* u, const ExecContext *c) {
uint32_t negative_action, action;
- scmp_filter_ctx *seccomp;
+ scmp_filter_ctx seccomp;
Iterator i;
void *id;
int r;
@@ -1247,7 +1248,7 @@ finish:
}
static int apply_address_families(const Unit* u, const ExecContext *c) {
- scmp_filter_ctx *seccomp;
+ scmp_filter_ctx seccomp;
Iterator i;
int r;
@@ -1256,13 +1257,9 @@ static int apply_address_families(const Unit* u, const ExecContext *c) {
if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
return 0;
- seccomp = seccomp_init(SCMP_ACT_ALLOW);
- if (!seccomp)
- return -ENOMEM;
-
- r = seccomp_add_secondary_archs(seccomp);
+ r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
if (r < 0)
- goto finish;
+ return r;
if (c->address_families_whitelist) {
int af, first = 0, last = 0;
@@ -1359,10 +1356,6 @@ static int apply_address_families(const Unit* u, const ExecContext *c) {
}
}
- r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
- if (r < 0)
- goto finish;
-
r = seccomp_load(seccomp);
finish:
@@ -1371,7 +1364,7 @@ finish:
}
static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
- scmp_filter_ctx *seccomp;
+ scmp_filter_ctx seccomp;
int r;
assert(c);
@@ -1379,13 +1372,9 @@ static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c)
if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
return 0;
- seccomp = seccomp_init(SCMP_ACT_ALLOW);
- if (!seccomp)
- return -ENOMEM;
-
- r = seccomp_add_secondary_archs(seccomp);
+ r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
if (r < 0)
- goto finish;
+ return r;
r = seccomp_rule_add(
seccomp,
@@ -1405,10 +1394,6 @@ static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c)
if (r < 0)
goto finish;
- r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
- if (r < 0)
- goto finish;
-
r = seccomp_load(seccomp);
finish:
@@ -1423,7 +1408,7 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
SCHED_IDLE,
};
- scmp_filter_ctx *seccomp;
+ scmp_filter_ctx seccomp;
unsigned i;
int r, p, max_policy = 0;
@@ -1432,13 +1417,9 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
if (skip_seccomp_unavailable(u, "RestrictRealtime="))
return 0;
- seccomp = seccomp_init(SCMP_ACT_ALLOW);
- if (!seccomp)
- return -ENOMEM;
-
- r = seccomp_add_secondary_archs(seccomp);
+ r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
if (r < 0)
- goto finish;
+ return r;
/* Determine the highest policy constant we want to allow */
for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
@@ -1482,10 +1463,6 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
if (r < 0)
goto finish;
- r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
- if (r < 0)
- goto finish;
-
r = seccomp_load(seccomp);
finish:
@@ -1494,7 +1471,7 @@ finish:
}
static int apply_protect_sysctl(Unit *u, const ExecContext *c) {
- scmp_filter_ctx *seccomp;
+ scmp_filter_ctx seccomp;
int r;
assert(c);
@@ -1505,13 +1482,9 @@ static int apply_protect_sysctl(Unit *u, const ExecContext *c) {
if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
return 0;
- seccomp = seccomp_init(SCMP_ACT_ALLOW);
- if (!seccomp)
- return -ENOMEM;
-
- r = seccomp_add_secondary_archs(seccomp);
+ r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
if (r < 0)
- goto finish;
+ return r;
r = seccomp_rule_add(
seccomp,
@@ -1521,10 +1494,6 @@ static int apply_protect_sysctl(Unit *u, const ExecContext *c) {
if (r < 0)
goto finish;
- r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
- if (r < 0)
- goto finish;
-
r = seccomp_load(seccomp);
finish:
@@ -1533,56 +1502,17 @@ finish:
}
static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) {
- static const int module_syscalls[] = {
- SCMP_SYS(delete_module),
- SCMP_SYS(finit_module),
- SCMP_SYS(init_module),
- };
-
- scmp_filter_ctx *seccomp;
- unsigned i;
- int r;
-
assert(c);
- /* Turn of module syscalls on ProtectKernelModules=yes */
+ /* Turn off module syscalls on ProtectKernelModules=yes */
if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
return 0;
- seccomp = seccomp_init(SCMP_ACT_ALLOW);
- if (!seccomp)
- return -ENOMEM;
-
- r = seccomp_add_secondary_archs(seccomp);
- if (r < 0)
- goto finish;
-
- for (i = 0; i < ELEMENTSOF(module_syscalls); i++) {
- r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM),
- module_syscalls[i], 0);
- if (r < 0)
- goto finish;
- }
-
- r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
- if (r < 0)
- goto finish;
-
- r = seccomp_load(seccomp);
-
-finish:
- seccomp_release(seccomp);
- return r;
+ return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
}
static int apply_private_devices(Unit *u, const ExecContext *c) {
- const SystemCallFilterSet *set;
- scmp_filter_ctx *seccomp;
- const char *sys;
- bool syscalls_found = false;
- int r;
-
assert(c);
/* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
@@ -1590,61 +1520,7 @@ static int apply_private_devices(Unit *u, const ExecContext *c) {
if (skip_seccomp_unavailable(u, "PrivateDevices="))
return 0;
- seccomp = seccomp_init(SCMP_ACT_ALLOW);
- if (!seccomp)
- return -ENOMEM;
-
- r = seccomp_add_secondary_archs(seccomp);
- if (r < 0)
- goto finish;
-
- for (set = syscall_filter_sets; set->set_name; set++)
- if (streq(set->set_name, "@raw-io")) {
- syscalls_found = true;
- break;
- }
-
- /* We should never fail here */
- if (!syscalls_found) {
- r = -EOPNOTSUPP;
- goto finish;
- }
-
- NULSTR_FOREACH(sys, set->value) {
- int id;
- bool add = true;
-
-#ifndef __NR_s390_pci_mmio_read
- if (streq(sys, "s390_pci_mmio_read"))
- add = false;
-#endif
-#ifndef __NR_s390_pci_mmio_write
- if (streq(sys, "s390_pci_mmio_write"))
- add = false;
-#endif
-
- if (!add)
- continue;
-
- id = seccomp_syscall_resolve_name(sys);
-
- r = seccomp_rule_add(
- seccomp,
- SCMP_ACT_ERRNO(EPERM),
- id, 0);
- if (r < 0)
- goto finish;
- }
-
- r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
- if (r < 0)
- goto finish;
-
- r = seccomp_load(seccomp);
-
-finish:
- seccomp_release(seccomp);
- return r;
+ return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
}
#endif
@@ -1890,9 +1766,9 @@ static int setup_private_users(uid_t uid, gid_t gid) {
asprintf(&uid_map,
"0 0 1\n" /* Map root → root */
UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
- uid, uid); /* The case where the above is the same */
+ uid, uid);
else
- uid_map = strdup("0 0 1\n");
+ uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
if (!uid_map)
return -ENOMEM;
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 6f68e23340..118b39c1cf 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -2618,6 +2618,7 @@ int config_parse_documentation(const char *unit,
}
#ifdef HAVE_SECCOMP
+
static int syscall_filter_parse_one(
const char *unit,
const char *filename,
@@ -2628,27 +2629,29 @@ static int syscall_filter_parse_one(
bool warn) {
int r;
- if (*t == '@') {
- const SystemCallFilterSet *set;
+ if (t[0] == '@') {
+ const SyscallFilterSet *set;
+ const char *i;
- for (set = syscall_filter_sets; set->set_name; set++)
- if (streq(set->set_name, t)) {
- const char *sys;
+ set = syscall_filter_set_find(t);
+ if (!set) {
+ if (warn)
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Don't know system call group, ignoring: %s", t);
+ return 0;
+ }
- NULSTR_FOREACH(sys, set->value) {
- r = syscall_filter_parse_one(unit, filename, line, c, invert, sys, false);
- if (r < 0)
- return r;
- }
- break;
- }
+ NULSTR_FOREACH(i, set->value) {
+ r = syscall_filter_parse_one(unit, filename, line, c, invert, i, false);
+ if (r < 0)
+ return r;
+ }
} else {
int id;
id = seccomp_syscall_resolve_name(t);
if (id == __NR_SCMP_ERROR) {
if (warn)
- log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t);
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse system call, ignoring: %s", t);
return 0;
}
@@ -2662,8 +2665,9 @@ static int syscall_filter_parse_one(
if (r < 0)
return log_oom();
} else
- set_remove(c->syscall_filter, INT_TO_PTR(id + 1));
+ (void) set_remove(c->syscall_filter, INT_TO_PTR(id + 1));
}
+
return 0;
}
@@ -2682,8 +2686,7 @@ int config_parse_syscall_filter(
ExecContext *c = data;
Unit *u = userdata;
bool invert = false;
- const char *word, *state;
- size_t l;
+ const char *p;
int r;
assert(filename);
@@ -2722,19 +2725,24 @@ int config_parse_syscall_filter(
}
}
- FOREACH_WORD_QUOTED(word, l, rvalue, state) {
- _cleanup_free_ char *t = NULL;
+ p = rvalue;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
- t = strndup(word, l);
- if (!t)
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ break;
+ }
- r = syscall_filter_parse_one(unit, filename, line, c, invert, t, true);
+ r = syscall_filter_parse_one(unit, filename, line, c, invert, word, true);
if (r < 0)
return r;
}
- if (!isempty(state))
- log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring.");
/* Turn on NNP, but only if it wasn't configured explicitly
* before, and only if we are in user mode. */
diff --git a/src/core/scope.c b/src/core/scope.c
index e7583f6d89..af0c43c7da 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -147,6 +147,34 @@ static int scope_verify(Scope *s) {
return 0;
}
+static int scope_load_init_scope(Unit *u) {
+ assert(u);
+
+ if (!unit_has_name(u, SPECIAL_INIT_SCOPE))
+ return 0;
+
+ u->transient = true;
+ u->no_gc = true;
+
+ /* init.scope is a bit special, as it has to stick around forever. Because of its special semantics we
+ * synthesize it here, instead of relying on the unit file on disk. */
+
+ u->default_dependencies = false;
+ u->ignore_on_isolate = true;
+ u->refuse_manual_start = true;
+ u->refuse_manual_stop = true;
+
+ SCOPE(u)->kill_context.kill_signal = SIGRTMIN+14;
+
+ /* Prettify things, if we can. */
+ if (!u->description)
+ u->description = strdup("System and Service Manager");
+ if (!u->documentation)
+ (void) strv_extend(&u->documentation, "man:systemd(1)");
+
+ return 1;
+}
+
static int scope_load(Unit *u) {
Scope *s = SCOPE(u);
int r;
@@ -158,6 +186,9 @@ static int scope_load(Unit *u) {
/* Refuse to load non-transient scope units, but allow them while reloading. */
return -ENOENT;
+ r = scope_load_init_scope(u);
+ if (r < 0)
+ return r;
r = unit_load_fragment_and_dropin_optional(u);
if (r < 0)
return r;
@@ -543,25 +574,14 @@ static void scope_enumerate(Manager *m) {
r = unit_add_name(u, SPECIAL_INIT_SCOPE);
if (r < 0) {
unit_free(u);
- log_error_errno(r, "Failed to add init.scope name");
+ log_error_errno(r, "Failed to add the " SPECIAL_INIT_SCOPE " name: %m");
return;
}
}
u->transient = true;
- u->default_dependencies = false;
u->no_gc = true;
- u->ignore_on_isolate = true;
- u->refuse_manual_start = true;
- u->refuse_manual_stop = true;
SCOPE(u)->deserialized_state = SCOPE_RUNNING;
- SCOPE(u)->kill_context.kill_signal = SIGRTMIN+14;
-
- /* Prettify things, if we can. */
- if (!u->description)
- u->description = strdup("System and Service Manager");
- if (!u->documentation)
- (void) strv_extend(&u->documentation, "man:systemd(1)");
unit_add_to_load_queue(u);
unit_add_to_dbus_queue(u);
diff --git a/src/core/slice.c b/src/core/slice.c
index 03fe797f27..0fef29661f 100644
--- a/src/core/slice.c
+++ b/src/core/slice.c
@@ -130,6 +130,30 @@ static int slice_verify(Slice *s) {
return 0;
}
+static int slice_load_root_slice(Unit *u) {
+ assert(u);
+
+ if (!unit_has_name(u, SPECIAL_ROOT_SLICE))
+ return 0;
+
+ u->no_gc = true;
+
+ /* The root slice is a bit special. For example it is always running and cannot be terminated. Because of its
+ * special semantics we synthesize it here, instead of relying on the unit file on disk. */
+
+ u->default_dependencies = false;
+ u->ignore_on_isolate = true;
+ u->refuse_manual_start = true;
+ u->refuse_manual_stop = true;
+
+ if (!u->description)
+ u->description = strdup("Root Slice");
+ if (!u->documentation)
+ u->documentation = strv_new("man:systemd.special(7)", NULL);
+
+ return 1;
+}
+
static int slice_load(Unit *u) {
Slice *s = SLICE(u);
int r;
@@ -137,6 +161,9 @@ static int slice_load(Unit *u) {
assert(s);
assert(u->load_state == UNIT_STUB);
+ r = slice_load_root_slice(u);
+ if (r < 0)
+ return r;
r = unit_load_fragment_and_dropin_optional(u);
if (r < 0)
return r;
@@ -283,23 +310,14 @@ static void slice_enumerate(Manager *m) {
r = unit_add_name(u, SPECIAL_ROOT_SLICE);
if (r < 0) {
unit_free(u);
- log_error_errno(r, "Failed to add -.slice name");
+ log_error_errno(r, "Failed to add the "SPECIAL_ROOT_SLICE " name: %m");
return;
}
}
- u->default_dependencies = false;
u->no_gc = true;
- u->ignore_on_isolate = true;
- u->refuse_manual_start = true;
- u->refuse_manual_stop = true;
SLICE(u)->deserialized_state = SLICE_ACTIVE;
- if (!u->description)
- u->description = strdup("Root Slice");
- if (!u->documentation)
- (void) strv_extend(&u->documentation, "man:systemd.special(7)");
-
unit_add_to_load_queue(u);
unit_add_to_dbus_queue(u);
}
diff --git a/src/network/networkd-netdev-bond.c b/src/network/networkd-netdev-bond.c
index 7913b0088e..46d1669337 100644
--- a/src/network/networkd-netdev-bond.c
+++ b/src/network/networkd-netdev-bond.c
@@ -268,13 +268,13 @@ static int netdev_bond_fill_message_create(NetDev *netdev, Link *link, sd_netlin
if (b->arp_all_targets != _NETDEV_BOND_ARP_ALL_TARGETS_INVALID) {
r = sd_netlink_message_append_u32(m, IFLA_BOND_ARP_ALL_TARGETS, b->arp_all_targets);
if (r < 0)
- return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_ARP_VALIDATE attribute: %m");
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_ARP_ALL_TARGETS attribute: %m");
}
if (b->primary_reselect != _NETDEV_BOND_PRIMARY_RESELECT_INVALID) {
- r = sd_netlink_message_append_u32(m, IFLA_BOND_ARP_ALL_TARGETS, b->primary_reselect);
+ r = sd_netlink_message_append_u8(m, IFLA_BOND_PRIMARY_RESELECT, b->primary_reselect);
if (r < 0)
- return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_ARP_ALL_TARGETS attribute: %m");
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BOND_PRIMARY_RESELECT attribute: %m");
}
if (b->resend_igmp <= RESEND_IGMP_MAX) {
diff --git a/src/nspawn/nspawn-seccomp.c b/src/nspawn/nspawn-seccomp.c
index 44a0b397ab..03a397d30c 100644
--- a/src/nspawn/nspawn-seccomp.c
+++ b/src/nspawn/nspawn-seccomp.c
@@ -135,15 +135,9 @@ int setup_seccomp(uint64_t cap_list_retain) {
return 0;
}
- seccomp = seccomp_init(SCMP_ACT_ALLOW);
- if (!seccomp)
- return log_oom();
-
- r = seccomp_add_secondary_archs(seccomp);
- if (r < 0) {
- log_error_errno(r, "Failed to add secondary archs to seccomp filter: %m");
- goto finish;
- }
+ r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate seccomp object: %m");
r = seccomp_add_default_syscall_filter(seccomp, cap_list_retain);
if (r < 0)
@@ -171,12 +165,6 @@ int setup_seccomp(uint64_t cap_list_retain) {
goto finish;
}
- r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
- if (r < 0) {
- log_error_errno(r, "Failed to unset NO_NEW_PRIVS: %m");
- goto finish;
- }
-
r = seccomp_load(seccomp);
if (r < 0) {
log_error_errno(r, "Failed to install seccomp audit filter: %m");
diff --git a/src/shared/condition.c b/src/shared/condition.c
index f13fa6a9fd..69b4837e1f 100644
--- a/src/shared/condition.c
+++ b/src/shared/condition.c
@@ -329,9 +329,9 @@ static int condition_test_needs_update(Condition *c) {
uint64_t timestamp;
int r;
- r = parse_env_file(p, NULL, "TimestampNSec", &timestamp_str, NULL);
+ r = parse_env_file(p, NULL, "TIMESTAMP_NSEC", &timestamp_str, NULL);
if (r < 0) {
- log_error_errno(-r, "Failed to parse timestamp file '%s', using mtime: %m", p);
+ log_error_errno(r, "Failed to parse timestamp file '%s', using mtime: %m", p);
return true;
} else if (r == 0) {
log_debug("No data in timestamp file '%s', using mtime", p);
@@ -340,12 +340,11 @@ static int condition_test_needs_update(Condition *c) {
r = safe_atou64(timestamp_str, &timestamp);
if (r < 0) {
- log_error_errno(-r, "Failed to parse timestamp value '%s' in file '%s', using mtime: %m",
- timestamp_str, p);
+ log_error_errno(r, "Failed to parse timestamp value '%s' in file '%s', using mtime: %m", timestamp_str, p);
return true;
}
- other.st_mtim.tv_nsec = timestamp % NSEC_PER_SEC;
+ timespec_store(&other.st_mtim, timestamp);
}
return usr.st_mtim.tv_nsec > other.st_mtim.tv_nsec;
diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c
index 8116c7671f..6252cd16a6 100644
--- a/src/shared/seccomp-util.c
+++ b/src/shared/seccomp-util.c
@@ -26,6 +26,7 @@
#include "macro.h"
#include "seccomp-util.h"
#include "string-util.h"
+#include "util.h"
const char* seccomp_arch_to_string(uint32_t c) {
@@ -73,7 +74,34 @@ int seccomp_arch_from_string(const char *n, uint32_t *ret) {
return 0;
}
-int seccomp_add_secondary_archs(scmp_filter_ctx *c) {
+int seccomp_init_conservative(scmp_filter_ctx *ret, uint32_t default_action) {
+ scmp_filter_ctx seccomp;
+ int r;
+
+ /* Much like seccomp_init(), but tries to be a bit more conservative in its defaults: all secondary archs are
+ * added by default, and NNP is turned off. */
+
+ seccomp = seccomp_init(default_action);
+ if (!seccomp)
+ return -ENOMEM;
+
+ r = seccomp_add_secondary_archs(seccomp);
+ if (r < 0)
+ goto finish;
+
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+ if (r < 0)
+ goto finish;
+
+ *ret = seccomp;
+ return 0;
+
+finish:
+ seccomp_release(seccomp);
+ return r;
+}
+
+int seccomp_add_secondary_archs(scmp_filter_ctx c) {
#if defined(__i386__) || defined(__x86_64__)
int r;
@@ -110,7 +138,6 @@ int seccomp_add_secondary_archs(scmp_filter_ctx *c) {
#endif
return 0;
-
}
static bool is_basic_seccomp_available(void) {
@@ -132,28 +159,30 @@ bool is_seccomp_available(void) {
return cached_enabled;
}
-const SystemCallFilterSet syscall_filter_sets[] = {
- {
+const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
+ [SYSCALL_FILTER_SET_CLOCK] = {
/* Clock */
- .set_name = "@clock",
+ .name = "@clock",
.value =
"adjtimex\0"
"clock_adjtime\0"
"clock_settime\0"
"settimeofday\0"
"stime\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_CPU_EMULATION] = {
/* CPU emulation calls */
- .set_name = "@cpu-emulation",
+ .name = "@cpu-emulation",
.value =
"modify_ldt\0"
"subpage_prot\0"
"switch_endian\0"
"vm86\0"
"vm86old\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_DEBUG] = {
/* Debugging/Performance Monitoring/Tracing */
- .set_name = "@debug",
+ .name = "@debug",
.value =
"lookup_dcookie\0"
"perf_event_open\0"
@@ -161,11 +190,14 @@ const SystemCallFilterSet syscall_filter_sets[] = {
"process_vm_writev\0"
"ptrace\0"
"rtas\0"
+#ifdef __NR_s390_runtime_instr
"s390_runtime_instr\0"
+#endif
"sys_debug_setcontext\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_DEFAULT] = {
/* Default list */
- .set_name = "@default",
+ .name = "@default",
.value =
"execve\0"
"exit\0"
@@ -173,9 +205,10 @@ const SystemCallFilterSet syscall_filter_sets[] = {
"getrlimit\0" /* make sure processes can query stack size and such */
"rt_sigreturn\0"
"sigreturn\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_IO_EVENT] = {
/* Event loop use */
- .set_name = "@io-event",
+ .name = "@io-event",
.value =
"_newselect\0"
"epoll_create1\0"
@@ -191,9 +224,10 @@ const SystemCallFilterSet syscall_filter_sets[] = {
"ppoll\0"
"pselect6\0"
"select\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_IPC] = {
/* Message queues, SYSV IPC or other IPC: unusual */
- .set_name = "@ipc",
+ .name = "@ipc",
.value = "ipc\0"
"mq_getsetattr\0"
"mq_notify\0"
@@ -215,33 +249,36 @@ const SystemCallFilterSet syscall_filter_sets[] = {
"shmctl\0"
"shmdt\0"
"shmget\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_KEYRING] = {
/* Keyring */
- .set_name = "@keyring",
+ .name = "@keyring",
.value =
"add_key\0"
"keyctl\0"
"request_key\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_MODULE] = {
/* Kernel module control */
- .set_name = "@module",
+ .name = "@module",
.value =
"delete_module\0"
"finit_module\0"
"init_module\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_MOUNT] = {
/* Mounting */
- .set_name = "@mount",
+ .name = "@mount",
.value =
"chroot\0"
"mount\0"
- "oldumount\0"
"pivot_root\0"
"umount2\0"
"umount\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_NETWORK_IO] = {
/* Network or Unix socket IO, should not be needed if not network facing */
- .set_name = "@network-io",
+ .name = "@network-io",
.value =
"accept4\0"
"accept\0"
@@ -264,9 +301,10 @@ const SystemCallFilterSet syscall_filter_sets[] = {
"socket\0"
"socketcall\0"
"socketpair\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_OBSOLETE] = {
/* Unusual, obsolete or unimplemented, some unknown even to libseccomp */
- .set_name = "@obsolete",
+ .name = "@obsolete",
.value =
"_sysctl\0"
"afs_syscall\0"
@@ -292,9 +330,10 @@ const SystemCallFilterSet syscall_filter_sets[] = {
"uselib\0"
"ustat\0"
"vserver\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_PRIVILEGED] = {
/* Nice grab-bag of all system calls which need superuser capabilities */
- .set_name = "@privileged",
+ .name = "@privileged",
.value =
"@clock\0"
"@module\0"
@@ -331,11 +370,12 @@ const SystemCallFilterSet syscall_filter_sets[] = {
"setuid\0"
"swapoff\0"
"swapon\0"
- "sysctl\0"
+ "_sysctl\0"
"vhangup\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_PROCESS] = {
/* Process control, execution, namespaces */
- .set_name = "@process",
+ .name = "@process",
.value =
"arch_prctl\0"
"clone\0"
@@ -349,19 +389,90 @@ const SystemCallFilterSet syscall_filter_sets[] = {
"tkill\0"
"unshare\0"
"vfork\0"
- }, {
+ },
+ [SYSCALL_FILTER_SET_RAW_IO] = {
/* Raw I/O ports */
- .set_name = "@raw-io",
+ .name = "@raw-io",
.value =
"ioperm\0"
"iopl\0"
"pciconfig_iobase\0"
"pciconfig_read\0"
"pciconfig_write\0"
+#ifdef __NR_s390_pci_mmio_read
"s390_pci_mmio_read\0"
+#endif
+#ifdef __NR_s390_pci_mmio_write
"s390_pci_mmio_write\0"
- }, {
- .set_name = NULL,
- .value = NULL
- }
+#endif
+ },
};
+
+const SyscallFilterSet *syscall_filter_set_find(const char *name) {
+ unsigned i;
+
+ if (isempty(name) || name[0] != '@')
+ return NULL;
+
+ for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++)
+ if (streq(syscall_filter_sets[i].name, name))
+ return syscall_filter_sets + i;
+
+ return NULL;
+}
+
+int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action) {
+ const char *sys;
+ int r;
+
+ assert(seccomp);
+ assert(set);
+
+ NULSTR_FOREACH(sys, set->value) {
+ int id;
+
+ if (sys[0] == '@') {
+ const SyscallFilterSet *other;
+
+ other = syscall_filter_set_find(sys);
+ if (!other)
+ return -EINVAL;
+
+ r = seccomp_add_syscall_filter_set(seccomp, other, action);
+ } else {
+ id = seccomp_syscall_resolve_name(sys);
+ if (id == __NR_SCMP_ERROR)
+ return -EINVAL;
+
+ r = seccomp_rule_add(seccomp, action, id, 0);
+ }
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int seccomp_load_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action) {
+ scmp_filter_ctx seccomp;
+ int r;
+
+ assert(set);
+
+ /* The one-stop solution: allocate a seccomp object, add a filter to it, and apply it */
+
+ r = seccomp_init_conservative(&seccomp, default_action);
+ if (r < 0)
+ return r;
+
+ r = seccomp_add_syscall_filter_set(seccomp, set, action);
+ if (r < 0)
+ goto finish;
+
+ r = seccomp_load(seccomp);
+
+finish:
+ seccomp_release(seccomp);
+ return r;
+
+}
diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h
index cca7c17912..8050fc6fbf 100644
--- a/src/shared/seccomp-util.h
+++ b/src/shared/seccomp-util.h
@@ -20,18 +20,45 @@
***/
#include <seccomp.h>
+#include <stdbool.h>
#include <stdint.h>
const char* seccomp_arch_to_string(uint32_t c);
int seccomp_arch_from_string(const char *n, uint32_t *ret);
-int seccomp_add_secondary_archs(scmp_filter_ctx *c);
+int seccomp_init_conservative(scmp_filter_ctx *ret, uint32_t default_action);
+
+int seccomp_add_secondary_archs(scmp_filter_ctx c);
bool is_seccomp_available(void);
-typedef struct SystemCallFilterSet {
- const char *set_name;
+typedef struct SyscallFilterSet {
+ const char *name;
const char *value;
-} SystemCallFilterSet;
+} SyscallFilterSet;
+
+enum {
+ SYSCALL_FILTER_SET_CLOCK,
+ SYSCALL_FILTER_SET_CPU_EMULATION,
+ SYSCALL_FILTER_SET_DEBUG,
+ SYSCALL_FILTER_SET_DEFAULT,
+ SYSCALL_FILTER_SET_IO_EVENT,
+ SYSCALL_FILTER_SET_IPC,
+ SYSCALL_FILTER_SET_KEYRING,
+ SYSCALL_FILTER_SET_MODULE,
+ SYSCALL_FILTER_SET_MOUNT,
+ SYSCALL_FILTER_SET_NETWORK_IO,
+ SYSCALL_FILTER_SET_OBSOLETE,
+ SYSCALL_FILTER_SET_PRIVILEGED,
+ SYSCALL_FILTER_SET_PROCESS,
+ SYSCALL_FILTER_SET_RAW_IO,
+ _SYSCALL_FILTER_SET_MAX
+};
+
+extern const SyscallFilterSet syscall_filter_sets[];
+
+const SyscallFilterSet *syscall_filter_set_find(const char *name);
+
+int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action);
-extern const SystemCallFilterSet syscall_filter_sets[];
+int seccomp_load_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action);
diff --git a/src/test/test-seccomp.c b/src/test/test-seccomp.c
new file mode 100644
index 0000000000..0060ecdf02
--- /dev/null
+++ b/src/test/test-seccomp.c
@@ -0,0 +1,103 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2016 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stdlib.h>
+#include <sys/eventfd.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "macro.h"
+#include "process-util.h"
+#include "seccomp-util.h"
+
+static void test_seccomp_arch_to_string(void) {
+ uint32_t a, b;
+ const char *name;
+
+ a = seccomp_arch_native();
+ assert_se(a > 0);
+ name = seccomp_arch_to_string(a);
+ assert_se(name);
+ assert_se(seccomp_arch_from_string(name, &b) >= 0);
+ assert_se(a == b);
+}
+
+static void test_syscall_filter_set_find(void) {
+ assert_se(!syscall_filter_set_find(NULL));
+ assert_se(!syscall_filter_set_find(""));
+ assert_se(!syscall_filter_set_find("quux"));
+ assert_se(!syscall_filter_set_find("@quux"));
+
+ assert_se(syscall_filter_set_find("@clock") == syscall_filter_sets + SYSCALL_FILTER_SET_CLOCK);
+ assert_se(syscall_filter_set_find("@default") == syscall_filter_sets + SYSCALL_FILTER_SET_DEFAULT);
+ assert_se(syscall_filter_set_find("@raw-io") == syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO);
+}
+
+static void test_filter_sets(void) {
+ unsigned i;
+ int r;
+
+ if (!is_seccomp_available())
+ return;
+
+ if (geteuid() != 0)
+ return;
+
+ for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++) {
+ pid_t pid;
+
+ log_info("Testing %s", syscall_filter_sets[i].name);
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) { /* Child? */
+ int fd;
+
+ if (i == SYSCALL_FILTER_SET_DEFAULT) /* if we look at the default set, whitelist instead of blacklist */
+ r = seccomp_load_filter_set(SCMP_ACT_ERRNO(EPERM), syscall_filter_sets + i, SCMP_ACT_ALLOW);
+ else
+ r = seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + i, SCMP_ACT_ERRNO(EPERM));
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ /* Test the sycall filter with one random system call */
+ fd = eventfd(0, EFD_NONBLOCK|EFD_CLOEXEC);
+ if (IN_SET(i, SYSCALL_FILTER_SET_IO_EVENT, SYSCALL_FILTER_SET_DEFAULT))
+ assert_se(fd < 0 && errno == EPERM);
+ else {
+ assert_se(fd >= 0);
+ safe_close(fd);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(wait_for_terminate_and_warn(syscall_filter_sets[i].name, pid, true) == EXIT_SUCCESS);
+ }
+}
+
+int main(int argc, char *argv[]) {
+
+ test_seccomp_arch_to_string();
+ test_syscall_filter_set_find();
+ test_filter_sets();
+
+ return 0;
+}
diff --git a/src/update-done/update-done.c b/src/update-done/update-done.c
index 5cc5abfddf..48c2a3fff4 100644
--- a/src/update-done/update-done.c
+++ b/src/update-done/update-done.c
@@ -18,6 +18,7 @@
***/
#include "fd-util.h"
+#include "fileio.h"
#include "io-util.h"
#include "selinux-util.h"
#include "util.h"
@@ -32,8 +33,8 @@ static int apply_timestamp(const char *path, struct timespec *ts) {
*ts,
*ts
};
- int fd = -1;
_cleanup_fclose_ FILE *f = NULL;
+ int fd = -1;
int r;
assert(path);
@@ -59,18 +60,20 @@ static int apply_timestamp(const char *path, struct timespec *ts) {
return log_error_errno(errno, "Failed to create/open timestamp file %s: %m", path);
}
- f = fdopen(fd, "w");
+ f = fdopen(fd, "we");
if (!f) {
safe_close(fd);
return log_error_errno(errno, "Failed to fdopen() timestamp file %s: %m", path);
}
(void) fprintf(f,
- "%s"
- "TimestampNSec=" NSEC_FMT "\n",
- MESSAGE, timespec_load_nsec(ts));
+ MESSAGE
+ "TIMESTAMP_NSEC=" NSEC_FMT "\n",
+ timespec_load_nsec(ts));
- fflush(f);
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write timestamp file: %m");
if (futimens(fd, twice) < 0)
return log_error_errno(errno, "Failed to update timestamp on %s: %m", path);