diff options
author | Luke Shumaker <lukeshu@sbcglobal.net> | 2016-09-13 21:17:38 -0400 |
---|---|---|
committer | Luke Shumaker <lukeshu@sbcglobal.net> | 2016-09-13 21:17:38 -0400 |
commit | c73c7c774cbd1f0e778254d51da819490a333ab4 (patch) | |
tree | 2db5fa808791e0b4fc671537690264caee56e6da /src/core/execute.c | |
parent | 5496aef5f410ca665c76fb1bbfb584c7925fd49e (diff) | |
parent | b3ec0a0674f4e499bcb6d2469acdf9d2d574c3d6 (diff) |
Merge branch 'systemd/parabola' into notsystemd/premove
# Conflicts:
# Makefile.am
# tmpfiles.d/etc.conf.m4
Diffstat (limited to 'src/core/execute.c')
-rw-r--r-- | src/core/execute.c | 348 |
1 files changed, 270 insertions, 78 deletions
diff --git a/src/core/execute.c b/src/core/execute.c index 5eb3f13695..7c178b97c3 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -25,6 +25,7 @@ #include <signal.h> #include <string.h> #include <sys/capability.h> +#include <sys/mman.h> #include <sys/personality.h> #include <sys/prctl.h> #include <sys/socket.h> @@ -288,7 +289,15 @@ static int connect_journal_socket(int fd, uid_t uid, gid_t gid) { return r; } -static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) { +static int connect_logger_as( + Unit *unit, + const ExecContext *context, + ExecOutput output, + const char *ident, + int nfd, + uid_t uid, + gid_t gid) { + int fd, r; assert(context); @@ -309,7 +318,7 @@ static int connect_logger_as(const ExecContext *context, ExecOutput output, cons return -errno; } - fd_inc_sndbuf(fd, SNDBUF_SIZE); + (void) fd_inc_sndbuf(fd, SNDBUF_SIZE); dprintf(fd, "%s\n" @@ -320,18 +329,18 @@ static int connect_logger_as(const ExecContext *context, ExecOutput output, cons "%i\n" "%i\n", context->syslog_identifier ? context->syslog_identifier : ident, - unit_id, + unit->id, context->syslog_priority, !!context->syslog_level_prefix, output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE, output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE, is_terminal_output(output)); - if (fd != nfd) { - r = dup2(fd, nfd) < 0 ? -errno : nfd; - safe_close(fd); - } else - r = nfd; + if (fd == nfd) + return nfd; + + r = dup2(fd, nfd) < 0 ? -errno : nfd; + safe_close(fd); return r; } @@ -445,7 +454,10 @@ static int setup_output( int fileno, int socket_fd, const char *ident, - uid_t uid, gid_t gid) { + uid_t uid, + gid_t gid, + dev_t *journal_stream_dev, + ino_t *journal_stream_ino) { ExecOutput o; ExecInput i; @@ -455,6 +467,8 @@ static int setup_output( assert(context); assert(params); assert(ident); + assert(journal_stream_dev); + assert(journal_stream_ino); if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) { @@ -530,10 +544,21 @@ static int setup_output( case EXEC_OUTPUT_KMSG_AND_CONSOLE: case EXEC_OUTPUT_JOURNAL: case EXEC_OUTPUT_JOURNAL_AND_CONSOLE: - r = connect_logger_as(context, o, ident, unit->id, fileno, uid, gid); + r = connect_logger_as(unit, context, o, ident, fileno, uid, gid); if (r < 0) { log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr"); r = open_null_as(O_WRONLY, fileno); + } else { + struct stat st; + + /* If we connected this fd to the journal via a stream, patch the device/inode into the passed + * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits + * services to detect whether they are connected to the journal or not. */ + + if (fstat(fileno, &st) >= 0) { + *journal_stream_dev = st.st_dev; + *journal_stream_ino = st.st_ino; + } } return r; @@ -551,6 +576,10 @@ static int chown_terminal(int fd, uid_t uid) { assert(fd >= 0); + /* Before we chown/chmod the TTY, let's ensure this is actually a tty */ + if (isatty(fd) < 1) + return 0; + /* This might fail. What matters are the results. */ (void) fchown(fd, uid, -1); (void) fchmod(fd, TTY_MODE); @@ -794,7 +823,7 @@ static int setup_pam( const char *user, uid_t uid, const char *tty, - char ***pam_env, + char ***env, int fds[], unsigned n_fds) { static const struct pam_conv conv = { @@ -806,14 +835,14 @@ static int setup_pam( pam_handle_t *handle = NULL; sigset_t old_ss; int pam_code = PAM_SUCCESS, r; - char **e = NULL; + char **nv, **e = NULL; bool close_session = false; pid_t pam_pid = 0, parent_pid; int flags = 0; assert(name); assert(user); - assert(pam_env); + assert(env); /* We set up PAM in the parent process, then fork. The child * will then stay around until killed via PR_GET_PDEATHSIG or @@ -841,6 +870,12 @@ static int setup_pam( goto fail; } + STRV_FOREACH(nv, *env) { + pam_code = pam_putenv(handle, *nv); + if (pam_code != PAM_SUCCESS) + goto fail; + } + pam_code = pam_acct_mgmt(handle, flags); if (pam_code != PAM_SUCCESS) goto fail; @@ -961,8 +996,8 @@ static int setup_pam( if (!barrier_place_and_sync(&barrier)) log_error("PAM initialization failed"); - *pam_env = e; - e = NULL; + strv_free(*env); + *env = e; return 0; @@ -1190,6 +1225,115 @@ finish: return r; } +static int apply_memory_deny_write_execute(const ExecContext *c) { + scmp_filter_ctx *seccomp; + int r; + + assert(c); + + seccomp = seccomp_init(SCMP_ACT_ALLOW); + if (!seccomp) + return -ENOMEM; + + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(mmap), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE)); + if (r < 0) + goto finish; + + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(mprotect), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC)); + if (r < 0) + goto finish; + + r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + if (r < 0) + goto finish; + + r = seccomp_load(seccomp); + +finish: + seccomp_release(seccomp); + return r; +} + +static int apply_restrict_realtime(const ExecContext *c) { + static const int permitted_policies[] = { + SCHED_OTHER, + SCHED_BATCH, + SCHED_IDLE, + }; + + scmp_filter_ctx *seccomp; + unsigned i; + int r, p, max_policy = 0; + + assert(c); + + seccomp = seccomp_init(SCMP_ACT_ALLOW); + if (!seccomp) + return -ENOMEM; + + /* Determine the highest policy constant we want to allow */ + for (i = 0; i < ELEMENTSOF(permitted_policies); i++) + if (permitted_policies[i] > max_policy) + max_policy = permitted_policies[i]; + + /* Go through all policies with lower values than that, and block them -- unless they appear in the + * whitelist. */ + for (p = 0; p < max_policy; p++) { + bool good = false; + + /* Check if this is in the whitelist. */ + for (i = 0; i < ELEMENTSOF(permitted_policies); i++) + if (permitted_policies[i] == p) { + good = true; + break; + } + + if (good) + continue; + + /* Deny this policy */ + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(sched_setscheduler), + 1, + SCMP_A1(SCMP_CMP_EQ, p)); + if (r < 0) + goto finish; + } + + /* Blacklist all other policies, i.e. the ones with higher values. Note that all comparisons are unsigned here, + * hence no need no check for < 0 values. */ + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(sched_setscheduler), + 1, + SCMP_A1(SCMP_CMP_GT, max_policy)); + if (r < 0) + goto finish; + + r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + if (r < 0) + goto finish; + + r = seccomp_load(seccomp); + +finish: + seccomp_release(seccomp); + return r; +} + #endif static void do_idle_pipe_dance(int idle_pipe[4]) { @@ -1228,6 +1372,8 @@ static int build_environment( const char *home, const char *username, const char *shell, + dev_t journal_stream_dev, + ino_t journal_stream_ino, char ***ret) { _cleanup_strv_free_ char **our_env = NULL; @@ -1237,7 +1383,7 @@ static int build_environment( assert(c); assert(ret); - our_env = new0(char*, 11); + our_env = new0(char*, 12); if (!our_env) return -ENOMEM; @@ -1309,8 +1455,15 @@ static int build_environment( our_env[n_env++] = x; } + if (journal_stream_dev != 0 && journal_stream_ino != 0) { + if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0) + return -ENOMEM; + + our_env[n_env++] = x; + } + our_env[n_env++] = NULL; - assert(n_env <= 11); + assert(n_env <= 12); *ret = our_env; our_env = NULL; @@ -1354,9 +1507,9 @@ static bool exec_needs_mount_namespace( assert(context); assert(params); - if (!strv_isempty(context->read_write_dirs) || - !strv_isempty(context->read_only_dirs) || - !strv_isempty(context->inaccessible_dirs)) + if (!strv_isempty(context->read_write_paths) || + !strv_isempty(context->read_only_paths) || + !strv_isempty(context->inaccessible_paths)) return true; if (context->mount_flags != 0) @@ -1420,13 +1573,15 @@ static int exec_child( char **files_env, int *exit_status) { - _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL; + _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL; _cleanup_free_ char *mac_selinux_context_net = NULL; const char *username = NULL, *home = NULL, *shell = NULL, *wd; + dev_t journal_stream_dev = 0; + ino_t journal_stream_ino = 0; + bool needs_mount_namespace; uid_t uid = UID_INVALID; gid_t gid = GID_INVALID; int i, r; - bool needs_mount_namespace; assert(unit); assert(command); @@ -1526,13 +1681,13 @@ static int exec_child( return r; } - r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, basename(command->path), uid, gid); + r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino); if (r < 0) { *exit_status = EXIT_STDOUT; return r; } - r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, basename(command->path), uid, gid); + r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino); if (r < 0) { *exit_status = EXIT_STDERR; return r; @@ -1671,9 +1826,43 @@ static int exec_child( } } + r = build_environment( + context, + params, + n_fds, + home, + username, + shell, + journal_stream_dev, + journal_stream_ino, + &our_env); + if (r < 0) { + *exit_status = EXIT_MEMORY; + return r; + } + + r = build_pass_environment(context, &pass_env); + if (r < 0) { + *exit_status = EXIT_MEMORY; + return r; + } + + accum_env = strv_env_merge(5, + params->environment, + our_env, + pass_env, + context->environment, + files_env, + NULL); + if (!accum_env) { + *exit_status = EXIT_MEMORY; + return -ENOMEM; + } + accum_env = strv_env_clean(accum_env); + umask(context->umask); - if (params->apply_permissions) { + if (params->apply_permissions && !command->privileged) { r = enforce_groups(context, username, gid); if (r < 0) { *exit_status = EXIT_GROUP; @@ -1707,7 +1896,7 @@ static int exec_child( #endif #ifdef HAVE_PAM if (context->pam_name && username) { - r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds); + r = setup_pam(context->pam_name, username, uid, context->tty_path, &accum_env, fds, n_fds); if (r < 0) { *exit_status = EXIT_PAM; return r; @@ -1744,9 +1933,9 @@ static int exec_child( r = setup_namespace( params->apply_chroot ? context->root_directory : NULL, - context->read_write_dirs, - context->read_only_dirs, - context->inaccessible_dirs, + context->read_write_paths, + context->read_only_paths, + context->inaccessible_paths, tmp, var, context->private_devices, @@ -1798,7 +1987,7 @@ static int exec_child( } #ifdef HAVE_SELINUX - if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) { + if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0 && !command->privileged) { r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net); if (r < 0) { *exit_status = EXIT_SELINUX_CONTEXT; @@ -1823,7 +2012,7 @@ static int exec_child( return r; } - if (params->apply_permissions) { + if (params->apply_permissions && !command->privileged) { bool use_address_families = context->address_families_whitelist || !set_isempty(context->address_families); @@ -1833,10 +2022,20 @@ static int exec_child( int secure_bits = context->secure_bits; for (i = 0; i < _RLIMIT_MAX; i++) { + if (!context->rlimit[i]) continue; - if (setrlimit_closest(i, context->rlimit[i]) < 0) { + r = setrlimit_closest(i, context->rlimit[i]); + if (r < 0) { + *exit_status = EXIT_LIMITS; + return r; + } + } + + /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */ + if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) { + if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) { *exit_status = EXIT_LIMITS; return -errno; } @@ -1897,7 +2096,7 @@ static int exec_child( } if (context->no_new_privileges || - (!have_effective_cap(CAP_SYS_ADMIN) && (use_address_families || use_syscall_filter))) + (!have_effective_cap(CAP_SYS_ADMIN) && (use_address_families || context->memory_deny_write_execute || context->restrict_realtime || use_syscall_filter))) if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) { *exit_status = EXIT_NO_NEW_PRIVILEGES; return -errno; @@ -1912,6 +2111,22 @@ static int exec_child( } } + if (context->memory_deny_write_execute) { + r = apply_memory_deny_write_execute(context); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + return r; + } + } + + if (context->restrict_realtime) { + r = apply_restrict_realtime(context); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + return r; + } + } + if (use_syscall_filter) { r = apply_seccomp(context); if (r < 0) { @@ -1946,39 +2161,12 @@ static int exec_child( #endif } - r = build_environment(context, params, n_fds, home, username, shell, &our_env); - if (r < 0) { - *exit_status = EXIT_MEMORY; - return r; - } - - r = build_pass_environment(context, &pass_env); - if (r < 0) { - *exit_status = EXIT_MEMORY; - return r; - } - - final_env = strv_env_merge(6, - params->environment, - our_env, - pass_env, - context->environment, - files_env, - pam_env, - NULL); - if (!final_env) { - *exit_status = EXIT_MEMORY; - return -ENOMEM; - } - - final_argv = replace_env_argv(argv, final_env); + final_argv = replace_env_argv(argv, accum_env); if (!final_argv) { *exit_status = EXIT_MEMORY; return -ENOMEM; } - final_env = strv_env_clean(final_env); - if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) { _cleanup_free_ char *line; @@ -1994,7 +2182,7 @@ static int exec_child( } } - execve(command->path, final_argv, final_env); + execve(command->path, final_argv, accum_env); *exit_status = EXIT_EXEC; return -errno; } @@ -2136,9 +2324,9 @@ void exec_context_done(ExecContext *c) { c->pam_name = mfree(c->pam_name); - c->read_only_dirs = strv_free(c->read_only_dirs); - c->read_write_dirs = strv_free(c->read_write_dirs); - c->inaccessible_dirs = strv_free(c->inaccessible_dirs); + c->read_only_paths = strv_free(c->read_only_paths); + c->read_write_paths = strv_free(c->read_write_paths); + c->inaccessible_paths = strv_free(c->inaccessible_paths); if (c->cpuset) CPU_FREE(c->cpuset); @@ -2371,7 +2559,9 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { "%sPrivateDevices: %s\n" "%sProtectHome: %s\n" "%sProtectSystem: %s\n" - "%sIgnoreSIGPIPE: %s\n", + "%sIgnoreSIGPIPE: %s\n" + "%sMemoryDenyWriteExecute: %s\n" + "%sRestrictRealtime: %s\n", prefix, c->umask, prefix, c->working_directory ? c->working_directory : "/", prefix, c->root_directory ? c->root_directory : "/", @@ -2381,7 +2571,9 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { prefix, yes_no(c->private_devices), prefix, protect_home_to_string(c->protect_home), prefix, protect_system_to_string(c->protect_system), - prefix, yes_no(c->ignore_sigpipe)); + prefix, yes_no(c->ignore_sigpipe), + prefix, yes_no(c->memory_deny_write_execute), + prefix, yes_no(c->restrict_realtime)); STRV_FOREACH(e, c->environment) fprintf(f, "%sEnvironment: %s\n", prefix, *e); @@ -2540,21 +2732,21 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { if (c->pam_name) fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name); - if (strv_length(c->read_write_dirs) > 0) { - fprintf(f, "%sReadWriteDirs:", prefix); - strv_fprintf(f, c->read_write_dirs); + if (strv_length(c->read_write_paths) > 0) { + fprintf(f, "%sReadWritePaths:", prefix); + strv_fprintf(f, c->read_write_paths); fputs("\n", f); } - if (strv_length(c->read_only_dirs) > 0) { - fprintf(f, "%sReadOnlyDirs:", prefix); - strv_fprintf(f, c->read_only_dirs); + if (strv_length(c->read_only_paths) > 0) { + fprintf(f, "%sReadOnlyPaths:", prefix); + strv_fprintf(f, c->read_only_paths); fputs("\n", f); } - if (strv_length(c->inaccessible_dirs) > 0) { - fprintf(f, "%sInaccessibleDirs:", prefix); - strv_fprintf(f, c->inaccessible_dirs); + if (strv_length(c->inaccessible_paths) > 0) { + fprintf(f, "%sInaccessiblePaths:", prefix); + strv_fprintf(f, c->inaccessible_paths); fputs("\n", f); } @@ -2635,7 +2827,7 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { bool exec_context_maintains_privileges(ExecContext *c) { assert(c); - /* Returns true if the process forked off would run run under + /* Returns true if the process forked off would run under * an unchanged UID or as root. */ if (!c->user) @@ -2870,7 +3062,7 @@ int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) { return r; if (c->private_network && (*rt)->netns_storage_socket[0] < 0) { - if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0) + if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0) return -errno; } |