diff options
Diffstat (limited to 'src/core/execute.c')
-rw-r--r-- | src/core/execute.c | 575 |
1 files changed, 277 insertions, 298 deletions
diff --git a/src/core/execute.c b/src/core/execute.c index 1b7b4a928d..7f343c4902 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -730,74 +730,157 @@ static int ask_for_confirmation(char *response, char **argv) { return r; } -static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) { - bool keep_groups = false; +static int get_fixed_user(const ExecContext *c, const char **user, + uid_t *uid, gid_t *gid, + const char **home, const char **shell) { int r; + const char *name; - assert(context); + assert(c); - /* Lookup and set GID and supplementary group list. Here too - * we avoid NSS lookups for gid=0. */ + if (!c->user) + return 0; - if (context->group || username) { - /* First step, initialize groups from /etc/groups */ - if (username && gid != 0) { - if (initgroups(username, gid) < 0) - return -errno; + /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway + * (i.e. are "/" or "/bin/nologin"). */ - keep_groups = true; - } + name = c->user; + r = get_user_creds_clean(&name, uid, gid, home, shell); + if (r < 0) + return r; - /* Second step, set our gids */ - if (setresgid(gid, gid, gid) < 0) + *user = name; + return 0; +} + +static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) { + int r; + const char *name; + + assert(c); + + if (!c->group) + return 0; + + name = c->group; + r = get_group_creds(&name, gid); + if (r < 0) + return r; + + *group = name; + return 0; +} + +static int get_fixed_supplementary_groups(const ExecContext *c, + const char *user, + const char *group, + gid_t gid, + gid_t **supplementary_gids, int *ngids) { + char **i; + int r, k = 0; + int ngroups_max; + bool keep_groups = false; + gid_t *groups = NULL; + _cleanup_free_ gid_t *l_gids = NULL; + + assert(c); + + if (!c->supplementary_groups) + return 0; + + /* + * If SupplementaryGroups= was passed then NGROUPS_MAX has to + * be positive, otherwise fail. + */ + errno = 0; + ngroups_max = (int) sysconf(_SC_NGROUPS_MAX); + if (ngroups_max <= 0) { + if (errno > 0) return -errno; + else + return -EOPNOTSUPP; /* For all other values */ } - if (context->supplementary_groups) { - int ngroups_max, k; - gid_t *gids; - char **i; + /* + * If user is given, then lookup GID and supplementary group list. + * We avoid NSS lookups for gid=0. + */ + if (user && gid_is_valid(gid) && gid != 0) { + /* First step, initialize groups from /etc/groups */ + if (initgroups(user, gid) < 0) + return -errno; - /* Final step, initialize any manually set supplementary groups */ - assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0); + keep_groups = true; + } - if (!(gids = new(gid_t, ngroups_max))) - return -ENOMEM; + l_gids = new(gid_t, ngroups_max); + if (!l_gids) + return -ENOMEM; - if (keep_groups) { - k = getgroups(ngroups_max, gids); - if (k < 0) { - free(gids); - return -errno; - } - } else - k = 0; + if (keep_groups) { + /* + * Lookup the list of groups that the user belongs to, we + * avoid NSS lookups here too for gid=0. + */ + k = ngroups_max; + if (getgrouplist(user, gid, l_gids, &k) < 0) + return -EINVAL; + } else + k = 0; - STRV_FOREACH(i, context->supplementary_groups) { - const char *g; + STRV_FOREACH(i, c->supplementary_groups) { + const char *g; - if (k >= ngroups_max) { - free(gids); - return -E2BIG; - } + if (k >= ngroups_max) + return -E2BIG; - g = *i; - r = get_group_creds(&g, gids+k); - if (r < 0) { - free(gids); - return r; - } + g = *i; + r = get_group_creds(&g, l_gids+k); + if (r < 0) + return r; - k++; - } + k++; + } - r = maybe_setgroups(k, gids); - if (r < 0) { - free(gids); + /* + * Sets ngids to zero to drop all supplementary groups, happens + * when we are under root and SupplementaryGroups= is empty. + */ + if (k == 0) { + *ngids = 0; + return 0; + } + + /* Otherwise get the final list of supplementary groups */ + groups = memdup(l_gids, sizeof(gid_t) * k); + if (!groups) + return -ENOMEM; + + *supplementary_gids = groups; + *ngids = k; + + groups = NULL; + + return 0; +} + +static int enforce_groups(const ExecContext *context, gid_t gid, + gid_t *supplementary_gids, int ngids) { + int r; + + assert(context); + + /* Handle SupplementaryGroups= even if it is empty */ + if (context->supplementary_groups) { + r = maybe_setgroups(ngids, supplementary_gids); + if (r < 0) return r; - } + } - free(gids); + if (gid_is_valid(gid)) { + /* Then set our gids */ + if (setresgid(gid, gid, gid) < 0) + return -errno; } return 0; @@ -806,6 +889,9 @@ static int enforce_groups(const ExecContext *context, const char *username, gid_ static int enforce_user(const ExecContext *context, uid_t uid) { assert(context); + if (!uid_is_valid(uid)) + return 0; + /* Sets (but doesn't look up) the uid and make sure we keep the * capabilities while doing so. */ @@ -1099,18 +1185,19 @@ static void rename_process_from_path(const char *path) { #ifdef HAVE_SECCOMP static bool skip_seccomp_unavailable(const Unit* u, const char* msg) { - if (!is_seccomp_available()) { - log_open(); - log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg); - log_close(); - return true; - } - return false; + + if (is_seccomp_available()) + return false; + + log_open(); + log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg); + log_close(); + return true; } static int apply_seccomp(const Unit* u, const ExecContext *c) { uint32_t negative_action, action; - scmp_filter_ctx *seccomp; + scmp_filter_ctx seccomp; Iterator i; void *id; int r; @@ -1161,7 +1248,7 @@ finish: } static int apply_address_families(const Unit* u, const ExecContext *c) { - scmp_filter_ctx *seccomp; + scmp_filter_ctx seccomp; Iterator i; int r; @@ -1170,13 +1257,9 @@ static int apply_address_families(const Unit* u, const ExecContext *c) { if (skip_seccomp_unavailable(u, "RestrictAddressFamilies=")) return 0; - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; - - r = seccomp_add_secondary_archs(seccomp); + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); if (r < 0) - goto finish; + return r; if (c->address_families_whitelist) { int af, first = 0, last = 0; @@ -1273,10 +1356,6 @@ static int apply_address_families(const Unit* u, const ExecContext *c) { } } - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) - goto finish; - r = seccomp_load(seccomp); finish: @@ -1285,7 +1364,7 @@ finish: } static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) { - scmp_filter_ctx *seccomp; + scmp_filter_ctx seccomp; int r; assert(c); @@ -1293,13 +1372,9 @@ static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute=")) return 0; - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; - - r = seccomp_add_secondary_archs(seccomp); + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); if (r < 0) - goto finish; + return r; r = seccomp_rule_add( seccomp, @@ -1319,10 +1394,6 @@ static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) if (r < 0) goto finish; - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) - goto finish; - r = seccomp_load(seccomp); finish: @@ -1337,7 +1408,7 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) { SCHED_IDLE, }; - scmp_filter_ctx *seccomp; + scmp_filter_ctx seccomp; unsigned i; int r, p, max_policy = 0; @@ -1346,13 +1417,9 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) { if (skip_seccomp_unavailable(u, "RestrictRealtime=")) return 0; - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; - - r = seccomp_add_secondary_archs(seccomp); + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); if (r < 0) - goto finish; + return r; /* Determine the highest policy constant we want to allow */ for (i = 0; i < ELEMENTSOF(permitted_policies); i++) @@ -1396,10 +1463,6 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) { if (r < 0) goto finish; - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) - goto finish; - r = seccomp_load(seccomp); finish: @@ -1407,8 +1470,8 @@ finish: return r; } -static int apply_protect_sysctl(Unit *u, const ExecContext *c) { - scmp_filter_ctx *seccomp; +static int apply_protect_sysctl(const Unit *u, const ExecContext *c) { + scmp_filter_ctx seccomp; int r; assert(c); @@ -1419,13 +1482,9 @@ static int apply_protect_sysctl(Unit *u, const ExecContext *c) { if (skip_seccomp_unavailable(u, "ProtectKernelTunables=")) return 0; - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; - - r = seccomp_add_secondary_archs(seccomp); + r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW); if (r < 0) - goto finish; + return r; r = seccomp_rule_add( seccomp, @@ -1435,10 +1494,6 @@ static int apply_protect_sysctl(Unit *u, const ExecContext *c) { if (r < 0) goto finish; - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) - goto finish; - r = seccomp_load(seccomp); finish: @@ -1446,57 +1501,18 @@ finish: return r; } -static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) { - static const int module_syscalls[] = { - SCMP_SYS(delete_module), - SCMP_SYS(finit_module), - SCMP_SYS(init_module), - }; - - scmp_filter_ctx *seccomp; - unsigned i; - int r; - +static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) { assert(c); - /* Turn of module syscalls on ProtectKernelModules=yes */ + /* Turn off module syscalls on ProtectKernelModules=yes */ if (skip_seccomp_unavailable(u, "ProtectKernelModules=")) return 0; - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; - - r = seccomp_add_secondary_archs(seccomp); - if (r < 0) - goto finish; - - for (i = 0; i < ELEMENTSOF(module_syscalls); i++) { - r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), - module_syscalls[i], 0); - if (r < 0) - goto finish; - } - - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) - goto finish; - - r = seccomp_load(seccomp); - -finish: - seccomp_release(seccomp); - return r; + return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM)); } -static int apply_private_devices(Unit *u, const ExecContext *c) { - const SystemCallFilterSet *set; - scmp_filter_ctx *seccomp; - const char *sys; - bool syscalls_found = false; - int r; - +static int apply_private_devices(const Unit *u, const ExecContext *c) { assert(c); /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */ @@ -1504,61 +1520,7 @@ static int apply_private_devices(Unit *u, const ExecContext *c) { if (skip_seccomp_unavailable(u, "PrivateDevices=")) return 0; - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return -ENOMEM; - - r = seccomp_add_secondary_archs(seccomp); - if (r < 0) - goto finish; - - for (set = syscall_filter_sets; set->set_name; set++) - if (streq(set->set_name, "@raw-io")) { - syscalls_found = true; - break; - } - - /* We should never fail here */ - if (!syscalls_found) { - r = -EOPNOTSUPP; - goto finish; - } - - NULSTR_FOREACH(sys, set->value) { - int id; - bool add = true; - -#ifndef __NR_s390_pci_mmio_read - if (streq(sys, "s390_pci_mmio_read")) - add = false; -#endif -#ifndef __NR_s390_pci_mmio_write - if (streq(sys, "s390_pci_mmio_write")) - add = false; -#endif - - if (!add) - continue; - - id = seccomp_syscall_resolve_name(sys); - - r = seccomp_rule_add( - seccomp, - SCMP_ACT_ERRNO(EPERM), - id, 0); - if (r < 0) - goto finish; - } - - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) - goto finish; - - r = seccomp_load(seccomp); - -finish: - seccomp_release(seccomp); - return r; + return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM)); } #endif @@ -1804,9 +1766,9 @@ static int setup_private_users(uid_t uid, gid_t gid) { asprintf(&uid_map, "0 0 1\n" /* Map root → root */ UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */ - uid, uid); /* The case where the above is the same */ + uid, uid); else - uid_map = strdup("0 0 1\n"); + uid_map = strdup("0 0 1\n"); /* The case where the above is the same */ if (!uid_map) return -ENOMEM; @@ -2041,6 +2003,92 @@ static int compile_read_write_paths( return 0; } +static int apply_mount_namespace(Unit *u, const ExecContext *context, + const ExecParameters *params, + ExecRuntime *runtime) { + int r; + _cleanup_free_ char **rw = NULL; + char *tmp = NULL, *var = NULL; + const char *root_dir = NULL; + NameSpaceInfo ns_info = { + .private_dev = context->private_devices, + .protect_control_groups = context->protect_control_groups, + .protect_kernel_tunables = context->protect_kernel_tunables, + .protect_kernel_modules = context->protect_kernel_modules, + }; + + assert(context); + + /* The runtime struct only contains the parent of the private /tmp, + * which is non-accessible to world users. Inside of it there's a /tmp + * that is sticky, and that's the one we want to use here. */ + + if (context->private_tmp && runtime) { + if (runtime->tmp_dir) + tmp = strjoina(runtime->tmp_dir, "/tmp"); + if (runtime->var_tmp_dir) + var = strjoina(runtime->var_tmp_dir, "/tmp"); + } + + r = compile_read_write_paths(context, params, &rw); + if (r < 0) + return r; + + if (params->flags & EXEC_APPLY_CHROOT) + root_dir = context->root_directory; + + r = setup_namespace(root_dir, &ns_info, rw, + context->read_only_paths, + context->inaccessible_paths, + tmp, + var, + context->protect_home, + context->protect_system, + context->mount_flags); + + /* If we couldn't set up the namespace this is probably due to a + * missing capability. In this case, silently proceeed. */ + if (IN_SET(r, -EPERM, -EACCES)) { + log_open(); + log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m"); + log_close(); + r = 0; + } + + return r; +} + +static int apply_working_directory(const ExecContext *context, + const ExecParameters *params, + const char *home, + const bool needs_mount_ns) { + const char *d; + const char *wd; + + assert(context); + + if (context->working_directory_home) + wd = home; + else if (context->working_directory) + wd = context->working_directory; + else + wd = "/"; + + if (params->flags & EXEC_APPLY_CHROOT) { + if (!needs_mount_ns && context->root_directory) + if (chroot(context->root_directory) < 0) + return -errno; + + d = wd; + } else + d = strjoina(strempty(context->root_directory), "/", strempty(wd)); + + if (chdir(d) < 0 && !context->working_directory_missing_ok) + return -errno; + + return 0; +} + static void append_socket_pair(int *array, unsigned *n, int pair[2]) { assert(array); assert(n); @@ -2175,13 +2223,15 @@ static int exec_child( _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL; _cleanup_free_ char *mac_selinux_context_net = NULL; - const char *username = NULL, *home = NULL, *shell = NULL, *wd; + _cleanup_free_ gid_t *supplementary_gids = NULL; + const char *username = NULL, *groupname = NULL; + const char *home = NULL, *shell = NULL; dev_t journal_stream_dev = 0; ino_t journal_stream_ino = 0; bool needs_mount_namespace; uid_t uid = UID_INVALID; gid_t gid = GID_INVALID; - int i, r; + int i, r, ngids = 0; assert(unit); assert(command); @@ -2273,26 +2323,23 @@ static int exec_child( username = dcreds->user->name; } else { - if (context->user) { - username = context->user; - r = get_user_creds_clean(&username, &uid, &gid, &home, &shell); - if (r < 0) { - *exit_status = EXIT_USER; - return r; - } - - /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway - * (i.e. are "/" or "/bin/nologin"). */ + r = get_fixed_user(context, &username, &uid, &gid, &home, &shell); + if (r < 0) { + *exit_status = EXIT_USER; + return r; } - if (context->group) { - const char *g = context->group; + r = get_fixed_group(context, &groupname, &gid); + if (r < 0) { + *exit_status = EXIT_GROUP; + return r; + } - r = get_group_creds(&g, &gid); - if (r < 0) { - *exit_status = EXIT_GROUP; - return r; - } + r = get_fixed_supplementary_groups(context, username, groupname, + gid, &supplementary_gids, &ngids); + if (r < 0) { + *exit_status = EXIT_GROUP; + return r; } } @@ -2505,97 +2552,29 @@ static int exec_child( needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime); if (needs_mount_namespace) { - _cleanup_free_ char **rw = NULL; - char *tmp = NULL, *var = NULL; - NameSpaceInfo ns_info = { - .private_dev = context->private_devices, - .protect_control_groups = context->protect_control_groups, - .protect_kernel_tunables = context->protect_kernel_tunables, - .protect_kernel_modules = context->protect_kernel_modules, - }; - - /* The runtime struct only contains the parent - * of the private /tmp, which is - * non-accessible to world users. Inside of it - * there's a /tmp that is sticky, and that's - * the one we want to use here. */ - - if (context->private_tmp && runtime) { - if (runtime->tmp_dir) - tmp = strjoina(runtime->tmp_dir, "/tmp"); - if (runtime->var_tmp_dir) - var = strjoina(runtime->var_tmp_dir, "/tmp"); - } - - r = compile_read_write_paths(context, params, &rw); + r = apply_mount_namespace(unit, context, params, runtime); if (r < 0) { *exit_status = EXIT_NAMESPACE; return r; } + } - r = setup_namespace( - (params->flags & EXEC_APPLY_CHROOT) ? context->root_directory : NULL, - &ns_info, - rw, - context->read_only_paths, - context->inaccessible_paths, - tmp, - var, - context->protect_home, - context->protect_system, - context->mount_flags); - - /* If we couldn't set up the namespace this is - * probably due to a missing capability. In this case, - * silently proceeed. */ - if (r == -EPERM || r == -EACCES) { - log_open(); - log_unit_debug_errno(unit, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m"); - log_close(); - } else if (r < 0) { - *exit_status = EXIT_NAMESPACE; - return r; - } + /* Apply just after mount namespace setup */ + r = apply_working_directory(context, params, home, needs_mount_namespace); + if (r < 0) { + *exit_status = EXIT_CHROOT; + return r; } + /* Drop group as early as possbile */ if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) { - r = enforce_groups(context, username, gid); + r = enforce_groups(context, gid, supplementary_gids, ngids); if (r < 0) { *exit_status = EXIT_GROUP; return r; } } - if (context->working_directory_home) - wd = home; - else if (context->working_directory) - wd = context->working_directory; - else - wd = "/"; - - if (params->flags & EXEC_APPLY_CHROOT) { - if (!needs_mount_namespace && context->root_directory) - if (chroot(context->root_directory) < 0) { - *exit_status = EXIT_CHROOT; - return -errno; - } - - if (chdir(wd) < 0 && - !context->working_directory_missing_ok) { - *exit_status = EXIT_CHDIR; - return -errno; - } - } else { - const char *d; - - d = strjoina(strempty(context->root_directory), "/", strempty(wd)); - if (chdir(d) < 0 && - !context->working_directory_missing_ok) { - *exit_status = EXIT_CHDIR; - return -errno; - } - } - #ifdef HAVE_SELINUX if ((params->flags & EXEC_APPLY_PERMISSIONS) && mac_selinux_use() && |