diff options
author | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2016-10-27 09:17:14 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-10-27 09:17:14 -0400 |
commit | ed06fa62035b96a5e93463e7b1a7b0203bee6fbb (patch) | |
tree | e75664a13dfb6fd11229567beb775637d54c51ab /src/core | |
parent | 492466c1b5fd8c168cb2a09b9330019f60074195 (diff) | |
parent | 59e856c7d33e8260d0ac3a8e21318d6f0768d75b (diff) |
Merge pull request #4485 from endocode/djalal/portable-branch-v1
core: improve mount namespace and working directory setup
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/execute.c | 177 |
1 files changed, 97 insertions, 80 deletions
diff --git a/src/core/execute.c b/src/core/execute.c index 5e7d7c25d7..7f343c4902 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1470,7 +1470,7 @@ finish: return r; } -static int apply_protect_sysctl(Unit *u, const ExecContext *c) { +static int apply_protect_sysctl(const Unit *u, const ExecContext *c) { scmp_filter_ctx seccomp; int r; @@ -1501,7 +1501,7 @@ finish: return r; } -static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) { +static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) { assert(c); /* Turn off module syscalls on ProtectKernelModules=yes */ @@ -1512,7 +1512,7 @@ static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) { return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM)); } -static int apply_private_devices(Unit *u, const ExecContext *c) { +static int apply_private_devices(const Unit *u, const ExecContext *c) { assert(c); /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */ @@ -2003,6 +2003,92 @@ static int compile_read_write_paths( return 0; } +static int apply_mount_namespace(Unit *u, const ExecContext *context, + const ExecParameters *params, + ExecRuntime *runtime) { + int r; + _cleanup_free_ char **rw = NULL; + char *tmp = NULL, *var = NULL; + const char *root_dir = NULL; + NameSpaceInfo ns_info = { + .private_dev = context->private_devices, + .protect_control_groups = context->protect_control_groups, + .protect_kernel_tunables = context->protect_kernel_tunables, + .protect_kernel_modules = context->protect_kernel_modules, + }; + + assert(context); + + /* The runtime struct only contains the parent of the private /tmp, + * which is non-accessible to world users. Inside of it there's a /tmp + * that is sticky, and that's the one we want to use here. */ + + if (context->private_tmp && runtime) { + if (runtime->tmp_dir) + tmp = strjoina(runtime->tmp_dir, "/tmp"); + if (runtime->var_tmp_dir) + var = strjoina(runtime->var_tmp_dir, "/tmp"); + } + + r = compile_read_write_paths(context, params, &rw); + if (r < 0) + return r; + + if (params->flags & EXEC_APPLY_CHROOT) + root_dir = context->root_directory; + + r = setup_namespace(root_dir, &ns_info, rw, + context->read_only_paths, + context->inaccessible_paths, + tmp, + var, + context->protect_home, + context->protect_system, + context->mount_flags); + + /* If we couldn't set up the namespace this is probably due to a + * missing capability. In this case, silently proceeed. */ + if (IN_SET(r, -EPERM, -EACCES)) { + log_open(); + log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m"); + log_close(); + r = 0; + } + + return r; +} + +static int apply_working_directory(const ExecContext *context, + const ExecParameters *params, + const char *home, + const bool needs_mount_ns) { + const char *d; + const char *wd; + + assert(context); + + if (context->working_directory_home) + wd = home; + else if (context->working_directory) + wd = context->working_directory; + else + wd = "/"; + + if (params->flags & EXEC_APPLY_CHROOT) { + if (!needs_mount_ns && context->root_directory) + if (chroot(context->root_directory) < 0) + return -errno; + + d = wd; + } else + d = strjoina(strempty(context->root_directory), "/", strempty(wd)); + + if (chdir(d) < 0 && !context->working_directory_missing_ok) + return -errno; + + return 0; +} + static void append_socket_pair(int *array, unsigned *n, int pair[2]) { assert(array); assert(n); @@ -2139,7 +2225,7 @@ static int exec_child( _cleanup_free_ char *mac_selinux_context_net = NULL; _cleanup_free_ gid_t *supplementary_gids = NULL; const char *username = NULL, *groupname = NULL; - const char *home = NULL, *shell = NULL, *wd; + const char *home = NULL, *shell = NULL; dev_t journal_stream_dev = 0; ino_t journal_stream_ino = 0; bool needs_mount_namespace; @@ -2466,65 +2552,19 @@ static int exec_child( needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime); if (needs_mount_namespace) { - _cleanup_free_ char **rw = NULL; - char *tmp = NULL, *var = NULL; - NameSpaceInfo ns_info = { - .private_dev = context->private_devices, - .protect_control_groups = context->protect_control_groups, - .protect_kernel_tunables = context->protect_kernel_tunables, - .protect_kernel_modules = context->protect_kernel_modules, - }; - - /* The runtime struct only contains the parent - * of the private /tmp, which is - * non-accessible to world users. Inside of it - * there's a /tmp that is sticky, and that's - * the one we want to use here. */ - - if (context->private_tmp && runtime) { - if (runtime->tmp_dir) - tmp = strjoina(runtime->tmp_dir, "/tmp"); - if (runtime->var_tmp_dir) - var = strjoina(runtime->var_tmp_dir, "/tmp"); - } - - r = compile_read_write_paths(context, params, &rw); + r = apply_mount_namespace(unit, context, params, runtime); if (r < 0) { *exit_status = EXIT_NAMESPACE; return r; } - - r = setup_namespace( - (params->flags & EXEC_APPLY_CHROOT) ? context->root_directory : NULL, - &ns_info, - rw, - context->read_only_paths, - context->inaccessible_paths, - tmp, - var, - context->protect_home, - context->protect_system, - context->mount_flags); - - /* If we couldn't set up the namespace this is - * probably due to a missing capability. In this case, - * silently proceeed. */ - if (r == -EPERM || r == -EACCES) { - log_open(); - log_unit_debug_errno(unit, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m"); - log_close(); - } else if (r < 0) { - *exit_status = EXIT_NAMESPACE; - return r; - } } - if (context->working_directory_home) - wd = home; - else if (context->working_directory) - wd = context->working_directory; - else - wd = "/"; + /* Apply just after mount namespace setup */ + r = apply_working_directory(context, params, home, needs_mount_namespace); + if (r < 0) { + *exit_status = EXIT_CHROOT; + return r; + } /* Drop group as early as possbile */ if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) { @@ -2535,29 +2575,6 @@ static int exec_child( } } - if (params->flags & EXEC_APPLY_CHROOT) { - if (!needs_mount_namespace && context->root_directory) - if (chroot(context->root_directory) < 0) { - *exit_status = EXIT_CHROOT; - return -errno; - } - - if (chdir(wd) < 0 && - !context->working_directory_missing_ok) { - *exit_status = EXIT_CHDIR; - return -errno; - } - } else { - const char *d; - - d = strjoina(strempty(context->root_directory), "/", strempty(wd)); - if (chdir(d) < 0 && - !context->working_directory_missing_ok) { - *exit_status = EXIT_CHDIR; - return -errno; - } - } - #ifdef HAVE_SELINUX if ((params->flags & EXEC_APPLY_PERMISSIONS) && mac_selinux_use() && |