From 74dd6b515fa968c5710b396a7664cac335e25ca8 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 2 Dec 2016 01:54:41 +0100 Subject: core: run each system service with a fresh session keyring This patch ensures that each system service gets its own session kernel keyring automatically, and implicitly. Without this a keyring is allocated for it on-demand, but is then linked with the user's kernel keyring, which is OK behaviour for logged in users, but not so much for system services. With this change each service gets a session keyring that is specific to the service and ceases to exist when the service is shut down. The session keyring is not linked up with the user keyring and keys hence only search within the session boundaries by default. (This is useful in a later commit to store per-service material in the keyring, for example the invocation ID) (With input from David Howells) --- src/basic/exit-status.c | 3 +++ src/basic/exit-status.h | 1 + src/basic/missing.h | 50 +++++++++++++++++++++++++++++++++++++++++++++++++ src/core/execute.c | 44 +++++++++++++++++++++++++++++++++++++++++++ src/core/execute.h | 9 +++++---- src/core/service.c | 1 + 6 files changed, 104 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/basic/exit-status.c b/src/basic/exit-status.c index 59557f8afe..1e23c32c3f 100644 --- a/src/basic/exit-status.c +++ b/src/basic/exit-status.c @@ -148,6 +148,9 @@ const char* exit_status_to_string(int status, ExitStatusLevel level) { case EXIT_SMACK_PROCESS_LABEL: return "SMACK_PROCESS_LABEL"; + + case EXIT_KEYRING: + return "KEYRING"; } } diff --git a/src/basic/exit-status.h b/src/basic/exit-status.h index 0cfdfd7891..d22b2c00e4 100644 --- a/src/basic/exit-status.h +++ b/src/basic/exit-status.h @@ -82,6 +82,7 @@ enum { EXIT_MAKE_STARTER, EXIT_CHOWN, EXIT_SMACK_PROCESS_LABEL, + EXIT_KEYRING, }; typedef enum ExitStatusLevel { diff --git a/src/basic/missing.h b/src/basic/missing.h index 1502b3f4f4..dd4425697f 100644 --- a/src/basic/missing.h +++ b/src/basic/missing.h @@ -1026,6 +1026,22 @@ struct btrfs_ioctl_quota_ctl_args { typedef int32_t key_serial_t; #endif +#ifndef KEYCTL_JOIN_SESSION_KEYRING +#define KEYCTL_JOIN_SESSION_KEYRING 1 +#endif + +#ifndef KEYCTL_CHOWN +#define KEYCTL_CHOWN 4 +#endif + +#ifndef KEYCTL_SETPERM +#define KEYCTL_SETPERM 5 +#endif + +#ifndef KEYCTL_DESCRIBE +#define KEYCTL_DESCRIBE 6 +#endif + #ifndef KEYCTL_READ #define KEYCTL_READ 11 #endif @@ -1034,10 +1050,44 @@ typedef int32_t key_serial_t; #define KEYCTL_SET_TIMEOUT 15 #endif +#ifndef KEY_POS_VIEW +#define KEY_POS_VIEW 0x01000000 +#define KEY_POS_READ 0x02000000 +#define KEY_POS_WRITE 0x04000000 +#define KEY_POS_SEARCH 0x08000000 +#define KEY_POS_LINK 0x10000000 +#define KEY_POS_SETATTR 0x20000000 + +#define KEY_USR_VIEW 0x00010000 +#define KEY_USR_READ 0x00020000 +#define KEY_USR_WRITE 0x00040000 +#define KEY_USR_SEARCH 0x00080000 +#define KEY_USR_LINK 0x00100000 +#define KEY_USR_SETATTR 0x00200000 + +#define KEY_GRP_VIEW 0x00000100 +#define KEY_GRP_READ 0x00000200 +#define KEY_GRP_WRITE 0x00000400 +#define KEY_GRP_SEARCH 0x00000800 +#define KEY_GRP_LINK 0x00001000 +#define KEY_GRP_SETATTR 0x00002000 + +#define KEY_OTH_VIEW 0x00000001 +#define KEY_OTH_READ 0x00000002 +#define KEY_OTH_WRITE 0x00000004 +#define KEY_OTH_SEARCH 0x00000008 +#define KEY_OTH_LINK 0x00000010 +#define KEY_OTH_SETATTR 0x00000020 +#endif + #ifndef KEY_SPEC_USER_KEYRING #define KEY_SPEC_USER_KEYRING -4 #endif +#ifndef KEY_SPEC_SESSION_KEYRING +#define KEY_SPEC_SESSION_KEYRING -3 +#endif + #ifndef PR_CAP_AMBIENT #define PR_CAP_AMBIENT 47 #endif diff --git a/src/core/execute.c b/src/core/execute.c index 07ab067c05..5ac270aa12 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -2196,6 +2196,44 @@ static int apply_working_directory(const ExecContext *context, return 0; } +static int setup_keyring(Unit *u, const ExecParameters *p, uid_t uid, gid_t gid) { + key_serial_t keyring; + + assert(u); + assert(p); + + /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that + * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond + * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be + * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in + * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where + * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */ + + if (!(p->flags & EXEC_NEW_KEYRING)) + return 0; + + keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0); + if (keyring == -1) { + if (errno == ENOSYS) + log_debug_errno(errno, "Kernel keyring not supported, ignoring."); + else if (IN_SET(errno, EACCES, EPERM)) + log_debug_errno(errno, "Kernel keyring access prohibited, ignoring."); + else if (errno == EDQUOT) + log_debug_errno(errno, "Out of kernel keyrings to allocate, ignoring."); + else + return log_error_errno(errno, "Setting up kernel keyring failed: %m"); + + return 0; + } + + /* And now, make the keyring owned by the service's user */ + if (uid_is_valid(uid) || gid_is_valid(gid)) + if (keyctl(KEYCTL_CHOWN, keyring, uid, gid, 0) < 0) + return log_error_errno(errno, "Failed to change ownership of session keyring: %m"); + + return 0; +} + static void append_socket_pair(int *array, unsigned *n, int pair[2]) { assert(array); assert(n); @@ -2638,6 +2676,12 @@ static int exec_child( (void) umask(context->umask); + r = setup_keyring(unit, params, uid, gid); + if (r < 0) { + *exit_status = EXIT_KEYRING; + return r; + } + if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) { if (context->pam_name && username) { r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds); diff --git a/src/core/execute.h b/src/core/execute.h index 951c8f4da3..b376a6db55 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -228,12 +228,13 @@ typedef enum ExecFlags { EXEC_APPLY_PERMISSIONS = 1U << 0, EXEC_APPLY_CHROOT = 1U << 1, EXEC_APPLY_TTY_STDIN = 1U << 2, + EXEC_NEW_KEYRING = 1U << 3, /* The following are not used by execute.c, but by consumers internally */ - EXEC_PASS_FDS = 1U << 3, - EXEC_IS_CONTROL = 1U << 4, - EXEC_SETENV_RESULT = 1U << 5, - EXEC_SET_WATCHDOG = 1U << 6, + EXEC_PASS_FDS = 1U << 4, + EXEC_IS_CONTROL = 1U << 5, + EXEC_SETENV_RESULT = 1U << 6, + EXEC_SET_WATCHDOG = 1U << 7, } ExecFlags; struct ExecParameters { diff --git a/src/core/service.c b/src/core/service.c index 576416ad29..73a8104d17 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -1344,6 +1344,7 @@ static int service_spawn( } else path = UNIT(s)->cgroup_path; + exec_params.flags |= MANAGER_IS_SYSTEM(UNIT(s)->manager) ? EXEC_NEW_KEYRING : 0; exec_params.argv = c->argv; exec_params.environment = final_env; exec_params.fds = fds; -- cgit v1.2.3-54-g00ecf From b3415f5daef49642be3d5f417b8880c078420ff7 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 2 Dec 2016 15:05:55 +0100 Subject: core: store the invocation ID in the per-service keyring Let's store the invocation ID in the per-service keyring as a root-owned key, with strict access rights. This has the advantage over the environment-based ID passing that it also works from SUID binaries (as they key cannot be overidden by unprivileged code starting them), in contrast to the secure_getenv() based mode. The invocation ID is now passed in three different ways to a service: - As environment variable $INVOCATION_ID. This is easy to use, but may be overriden by unprivileged code (which might be a bad or a good thing), which means it's incompatible with SUID code (see above). - As extended attribute on the service cgroup. This cannot be overriden by unprivileged code, and may be queried safely from "outside" of a service. However, it is incompatible with containers right now, as unprivileged containers generally cannot set xattrs on cgroupfs. - As "invocation_id" key in the kernel keyring. This has the benefit that the key cannot be changed by unprivileged service code, and thus is safe to access from SUID code (see above). But do note that service code can replace the session keyring with a fresh one that lacks the key. However in that case the key will not be owned by root, which is easily detectable. The keyring is also incompatible with containers right now, as it is not properly namespace aware (but this is being worked on), and thus most container managers mask the keyring-related system calls. Ideally we'd only have one way to pass the invocation ID, but the different ways all have limitations. The invocation ID hookup in journald is currently only available on the host but not in containers, due to the mentioned limitations. How to verify the new invocation ID in the keyring: # systemd-run -t /bin/sh Running as unit: run-rd917366c04f847b480d486017f7239d6.service Press ^] three times within 1s to disconnect TTY. # keyctl show Session Keyring 680208392 --alswrv 0 0 keyring: _ses 250926536 ----s-rv 0 0 \_ user: invocation_id # keyctl request user invocation_id 250926536 # keyctl read 250926536 16 bytes of data in key: 9c96317c ac64495a a42b9cd7 4f3ff96b # echo $INVOCATION_ID 9c96317cac64495aa42b9cd74f3ff96b # ^D This creates a new transient service runnint a shell. Then verifies the contents of the keyring, requests the invocation ID key, and reads its payload. For comparison the invocation ID as passed via the environment variable is also displayed. --- src/core/execute.c | 15 +++++ src/libsystemd/sd-id128/sd-id128.c | 128 +++++++++++++++++++++++++++++++++++-- src/test/test-id128.c | 8 +++ 3 files changed, 146 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/core/execute.c b/src/core/execute.c index 5ac270aa12..4262f9433b 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -2226,6 +2226,21 @@ static int setup_keyring(Unit *u, const ExecParameters *p, uid_t uid, gid_t gid) return 0; } + /* Populate they keyring with the invocation ID by default. */ + if (!sd_id128_is_null(u->invocation_id)) { + key_serial_t key; + + key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING); + if (key == -1) + log_debug_errno(errno, "Failed to add invocation ID to keyring, ignoring: %m"); + else { + if (keyctl(KEYCTL_SETPERM, key, + KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH| + KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0) + return log_error_errno(errno, "Failed to restrict invocation ID permission: %m"); + } + } + /* And now, make the keyring owned by the service's user */ if (uid_is_valid(uid) || gid_is_valid(gid)) if (keyctl(KEYCTL_CHOWN, keyring, uid, gid, 0) < 0) diff --git a/src/libsystemd/sd-id128/sd-id128.c b/src/libsystemd/sd-id128/sd-id128.c index 0d673ba655..cc89f2de2e 100644 --- a/src/libsystemd/sd-id128/sd-id128.c +++ b/src/libsystemd/sd-id128/sd-id128.c @@ -23,13 +23,16 @@ #include "sd-id128.h" +#include "alloc-util.h" #include "fd-util.h" #include "hexdecoct.h" #include "id128-util.h" #include "io-util.h" #include "khash.h" #include "macro.h" +#include "missing.h" #include "random-util.h" +#include "user-util.h" #include "util.h" _public_ char *sd_id128_to_string(sd_id128_t id, char s[SD_ID128_STRING_MAX]) { @@ -130,6 +133,105 @@ _public_ int sd_id128_get_boot(sd_id128_t *ret) { return 0; } +static int get_invocation_from_keyring(sd_id128_t *ret) { + + _cleanup_free_ char *description = NULL; + char *d, *p, *g, *u, *e; + unsigned long perms; + key_serial_t key; + size_t sz = 256; + uid_t uid; + gid_t gid; + int r, c; + +#define MAX_PERMS ((unsigned long) (KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH| \ + KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH)) + + assert(ret); + + key = request_key("user", "invocation_id", NULL, 0); + if (key == -1) { + /* Keyring support not available? No invocation key stored? */ + if (IN_SET(errno, ENOSYS, ENOKEY)) + return 0; + + return -errno; + } + + for (;;) { + description = new(char, sz); + if (!description) + return -ENOMEM; + + c = keyctl(KEYCTL_DESCRIBE, key, (unsigned long) description, sz, 0); + if (c < 0) + return -errno; + + if ((size_t) c <= sz) + break; + + sz = c; + free(description); + } + + /* The kernel returns a final NUL in the string, verify that. */ + assert(description[c-1] == 0); + + /* Chop off the final description string */ + d = strrchr(description, ';'); + if (!d) + return -EIO; + *d = 0; + + /* Look for the permissions */ + p = strrchr(description, ';'); + if (!p) + return -EIO; + + errno = 0; + perms = strtoul(p + 1, &e, 16); + if (errno > 0) + return -errno; + if (e == p + 1) /* Read at least one character */ + return -EIO; + if (e != d) /* Must reached the end */ + return -EIO; + + if ((perms & ~MAX_PERMS) != 0) + return -EPERM; + + *p = 0; + + /* Look for the group ID */ + g = strrchr(description, ';'); + if (!g) + return -EIO; + r = parse_gid(g + 1, &gid); + if (r < 0) + return r; + if (gid != 0) + return -EPERM; + *g = 0; + + /* Look for the user ID */ + u = strrchr(description, ';'); + if (!u) + return -EIO; + r = parse_uid(u + 1, &uid); + if (r < 0) + return r; + if (uid != 0) + return -EPERM; + + c = keyctl(KEYCTL_READ, key, (unsigned long) ret, sizeof(sd_id128_t), 0); + if (c < 0) + return -errno; + if (c != sizeof(sd_id128_t)) + return -EIO; + + return 1; +} + _public_ int sd_id128_get_invocation(sd_id128_t *ret) { static thread_local sd_id128_t saved_invocation_id = {}; int r; @@ -137,15 +239,31 @@ _public_ int sd_id128_get_invocation(sd_id128_t *ret) { assert_return(ret, -EINVAL); if (sd_id128_is_null(saved_invocation_id)) { - const char *e; - e = secure_getenv("INVOCATION_ID"); - if (!e) - return -ENXIO; + /* We first try to read the invocation ID from the kernel keyring. This has the benefit that it is not + * fakeable by unprivileged code. If the information is not available in the keyring, we use + * $INVOCATION_ID but ignore the data if our process was called by less privileged code + * (i.e. secure_getenv() instead of getenv()). + * + * The kernel keyring is only relevant for system services (as for user services we don't store the + * invocation ID in the keyring, as there'd be no trust benefit in that). The environment variable is + * primarily relevant for user services, and sufficiently safe as no privilege boundary is involved. */ - r = sd_id128_from_string(e, &saved_invocation_id); + r = get_invocation_from_keyring(&saved_invocation_id); if (r < 0) return r; + + if (r == 0) { + const char *e; + + e = secure_getenv("INVOCATION_ID"); + if (!e) + return -ENXIO; + + r = sd_id128_from_string(e, &saved_invocation_id); + if (r < 0) + return r; + } } *ret = saved_invocation_id; diff --git a/src/test/test-id128.c b/src/test/test-id128.c index ab5a111ba9..e8c4c3e550 100644 --- a/src/test/test-id128.c +++ b/src/test/test-id128.c @@ -39,6 +39,7 @@ int main(int argc, char *argv[]) { char t[33], q[37]; _cleanup_free_ char *b = NULL; _cleanup_close_ int fd = -1; + int r; assert_se(sd_id128_randomize(&id) == 0); printf("random: %s\n", sd_id128_to_string(id, t)); @@ -159,5 +160,12 @@ int main(int argc, char *argv[]) { assert_se(sd_id128_get_machine_app_specific(SD_ID128_MAKE(51,df,0b,4b,c3,b0,4c,97,80,e2,99,b9,8c,a3,73,b8), &id2) >= 0); assert_se(!sd_id128_equal(id, id2)); + /* Query the invocation ID */ + r = sd_id128_get_invocation(&id); + if (r < 0) + log_warning_errno(r, "Failed to get invocation ID, ignoring: %m"); + else + log_info("Invocation ID: " SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(id)); + return 0; } -- cgit v1.2.3-54-g00ecf From ab79099d1684457d040ee7c28b2012e8c1ea9a4f Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 7 Dec 2016 20:14:43 +0100 Subject: pam: include pam_keyinit.so in our PAM fragments We want that systemd --user gets its own keyring as usual, even if the barebones PAM snippet we ship upstream is used. If we don't do this we get the basic keyring systemd --system sets up for us. --- src/login/systemd-user.m4 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/login/systemd-user.m4 b/src/login/systemd-user.m4 index e33963b125..4f85b4b7fe 100644 --- a/src/login/systemd-user.m4 +++ b/src/login/systemd-user.m4 @@ -3,10 +3,10 @@ # Used by systemd --user instances. account required pam_unix.so - m4_ifdef(`HAVE_SELINUX', -session required pam_selinux.so close -session required pam_selinux.so nottys open +session required pam_selinux.so close +session required pam_selinux.so nottys open )m4_dnl -session required pam_loginuid.so +session required pam_loginuid.so +session optional pam_keyinit.so force revoke session optional pam_systemd.so -- cgit v1.2.3-54-g00ecf