From 7732f92bad5f24a4bd03bb357af46da56b0ac94d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 3 Feb 2016 20:32:06 +0100 Subject: nspawn: optionally run a stub init process as PID 1 This adds a new switch --as-pid2, which allows running commands as PID 2, while a stub init process is run as PID 1. This is useful in order to run arbitrary commands in a container, as PID1's semantics are different from all other processes regarding reaping of unknown children or signal handling. --- src/nspawn/nspawn-gperf.gperf | 3 +- src/nspawn/nspawn-settings.c | 93 ++++++++++++++++++++++- src/nspawn/nspawn-settings.h | 38 ++++++---- src/nspawn/nspawn-stub-pid1.c | 170 ++++++++++++++++++++++++++++++++++++++++++ src/nspawn/nspawn-stub-pid1.h | 22 ++++++ src/nspawn/nspawn.c | 52 ++++++++++--- 6 files changed, 350 insertions(+), 28 deletions(-) create mode 100644 src/nspawn/nspawn-stub-pid1.c create mode 100644 src/nspawn/nspawn-stub-pid1.h (limited to 'src/nspawn') diff --git a/src/nspawn/nspawn-gperf.gperf b/src/nspawn/nspawn-gperf.gperf index 08020d510c..116655cdd2 100644 --- a/src/nspawn/nspawn-gperf.gperf +++ b/src/nspawn/nspawn-gperf.gperf @@ -15,7 +15,8 @@ struct ConfigPerfItem; %struct-type %includes %% -Exec.Boot, config_parse_tristate, 0, offsetof(Settings, boot) +Exec.Boot, config_parse_boot, 0, 0 +Exec.ProcessTwo, config_parse_pid2, 0, 0, Exec.Parameters, config_parse_strv, 0, offsetof(Settings, parameters) Exec.Environment, config_parse_strv, 0, offsetof(Settings, environment) Exec.User, config_parse_string, 0, offsetof(Settings, user) diff --git a/src/nspawn/nspawn-settings.c b/src/nspawn/nspawn-settings.c index 3c552e0734..12524d3b89 100644 --- a/src/nspawn/nspawn-settings.c +++ b/src/nspawn/nspawn-settings.c @@ -24,6 +24,7 @@ #include "conf-parser.h" #include "nspawn-network.h" #include "nspawn-settings.h" +#include "parse-util.h" #include "process-util.h" #include "strv.h" #include "util.h" @@ -39,7 +40,7 @@ int settings_load(FILE *f, const char *path, Settings **ret) { if (!s) return -ENOMEM; - s->boot = -1; + s->start_mode = _START_MODE_INVALID; s->personality = PERSONALITY_INVALID; s->read_only = -1; @@ -303,3 +304,93 @@ int config_parse_veth_extra( return 0; } + +int config_parse_boot( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + Settings *settings = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = parse_boolean(rvalue); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse Boot= parameter %s, ignoring: %m", rvalue); + return 0; + } + + if (r > 0) { + if (settings->start_mode == START_PID2) + goto conflict; + + settings->start_mode = START_BOOT; + } else { + if (settings->start_mode == START_BOOT) + goto conflict; + + if (settings->start_mode < 0) + settings->start_mode = START_PID1; + } + + return 0; + +conflict: + log_syntax(unit, LOG_ERR, filename, line, r, "Conflicting Boot= or ProcessTwo= setting found. Ignoring."); + return 0; +} + +int config_parse_pid2( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + Settings *settings = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = parse_boolean(rvalue); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse ProcessTwo= parameter %s, ignoring: %m", rvalue); + return 0; + } + + if (r > 0) { + if (settings->start_mode == START_BOOT) + goto conflict; + + settings->start_mode = START_PID2; + } else { + if (settings->start_mode == START_PID2) + goto conflict; + + if (settings->start_mode < 0) + settings->start_mode = START_PID1; + } + + return 0; + +conflict: + log_syntax(unit, LOG_ERR, filename, line, r, "Conflicting Boot= or ProcessTwo= setting found. Ignoring."); + return 0; +} diff --git a/src/nspawn/nspawn-settings.h b/src/nspawn/nspawn-settings.h index 236a1baa4f..fdb07486da 100644 --- a/src/nspawn/nspawn-settings.h +++ b/src/nspawn/nspawn-settings.h @@ -27,26 +27,34 @@ #include "nspawn-expose-ports.h" #include "nspawn-mount.h" +typedef enum StartMode { + START_PID1, /* Run parameters as command line as process 1 */ + START_PID2, /* Use stub init process as PID 1, run parameters as command line as process 2 */ + START_BOOT, /* Search for init system, pass arguments as parameters */ + _START_MODE_MAX, + _START_MODE_INVALID = -1 +} StartMode; + typedef enum SettingsMask { - SETTING_BOOT = 1 << 0, - SETTING_ENVIRONMENT = 1 << 1, - SETTING_USER = 1 << 2, - SETTING_CAPABILITY = 1 << 3, - SETTING_KILL_SIGNAL = 1 << 4, - SETTING_PERSONALITY = 1 << 5, - SETTING_MACHINE_ID = 1 << 6, - SETTING_NETWORK = 1 << 7, - SETTING_EXPOSE_PORTS = 1 << 8, - SETTING_READ_ONLY = 1 << 9, - SETTING_VOLATILE_MODE = 1 << 10, - SETTING_CUSTOM_MOUNTS = 1 << 11, + SETTING_START_MODE = 1 << 0, + SETTING_ENVIRONMENT = 1 << 1, + SETTING_USER = 1 << 2, + SETTING_CAPABILITY = 1 << 3, + SETTING_KILL_SIGNAL = 1 << 4, + SETTING_PERSONALITY = 1 << 5, + SETTING_MACHINE_ID = 1 << 6, + SETTING_NETWORK = 1 << 7, + SETTING_EXPOSE_PORTS = 1 << 8, + SETTING_READ_ONLY = 1 << 9, + SETTING_VOLATILE_MODE = 1 << 10, + SETTING_CUSTOM_MOUNTS = 1 << 11, SETTING_WORKING_DIRECTORY = 1 << 12, - _SETTINGS_MASK_ALL = (1 << 13) -1 + _SETTINGS_MASK_ALL = (1 << 13) -1 } SettingsMask; typedef struct Settings { /* [Run] */ - int boot; + StartMode start_mode; char **parameters; char **environment; char *user; @@ -91,3 +99,5 @@ int config_parse_volatile_mode(const char *unit, const char *filename, unsigned int config_parse_bind(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_tmpfs(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_veth_extra(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_boot(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_pid2(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); diff --git a/src/nspawn/nspawn-stub-pid1.c b/src/nspawn/nspawn-stub-pid1.c new file mode 100644 index 0000000000..2de87e3c63 --- /dev/null +++ b/src/nspawn/nspawn-stub-pid1.c @@ -0,0 +1,170 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include +#include +#include + +#include "fd-util.h" +#include "log.h" +#include "nspawn-stub-pid1.h" +#include "process-util.h" +#include "signal-util.h" +#include "time-util.h" +#include "def.h" + +int stub_pid1(void) { + enum { + STATE_RUNNING, + STATE_REBOOT, + STATE_POWEROFF, + } state = STATE_RUNNING; + + sigset_t fullmask, oldmask, waitmask; + usec_t quit_usec = USEC_INFINITY; + pid_t pid; + int r; + + /* Implements a stub PID 1, that reaps all processes and processes a couple of standard signals. This is useful + * for allowing arbitrary processes run in a container, and still have all zombies reaped. */ + + assert_se(sigfillset(&fullmask) >= 0); + assert_se(sigprocmask(SIG_BLOCK, &fullmask, &oldmask) >= 0); + + pid = fork(); + if (pid < 0) + return log_error_errno(errno, "Failed to fork child pid: %m"); + + if (pid == 0) { + /* Return in the child */ + assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) >= 0); + setsid(); + return 0; + } + + reset_all_signal_handlers(); + + log_close(); + close_all_fds(NULL, 0); + log_open(); + + rename_process("STUBINIT"); + + assert_se(sigemptyset(&waitmask) >= 0); + assert_se(sigset_add_many(&waitmask, + SIGCHLD, /* posix: process died */ + SIGINT, /* sysv: ctrl-alt-del */ + SIGRTMIN+3, /* systemd: halt */ + SIGRTMIN+4, /* systemd: poweroff */ + SIGRTMIN+5, /* systemd: reboot */ + SIGRTMIN+6, /* systemd: kexec */ + SIGRTMIN+13, /* systemd: halt */ + SIGRTMIN+14, /* systemd: poweroff */ + SIGRTMIN+15, /* systemd: reboot */ + SIGRTMIN+16, /* systemd: kexec */ + -1) >= 0); + + /* Note that we ignore SIGTERM (sysv's reexec), SIGHUP (reload), and all other signals here, since we don't + * support reexec/reloading in this stub process. */ + + for (;;) { + siginfo_t si; + usec_t current_usec; + + si.si_pid = 0; + r = waitid(P_ALL, 0, &si, WEXITED|WNOHANG); + if (r < 0) { + r = log_error_errno(errno, "Failed to reap children: %m"); + goto finish; + } + + current_usec = now(CLOCK_MONOTONIC); + + if (si.si_pid == pid || current_usec >= quit_usec) { + + /* The child we started ourselves died or we reached a timeout. */ + + if (state == STATE_REBOOT) { /* dispatch a queued reboot */ + (void) reboot(RB_AUTOBOOT); + r = log_error_errno(errno, "Failed to reboot: %m"); + goto finish; + + } else if (state == STATE_POWEROFF) + (void) reboot(RB_POWER_OFF); /* if this fails, fall back to normal exit. */ + + if (si.si_pid == pid && si.si_code == CLD_EXITED) + r = si.si_status; /* pass on exit code */ + else + r = 255; /* signal, coredump, timeout, … */ + + goto finish; + } + if (si.si_pid != 0) + /* We reaped something. Retry until there's nothing more to reap. */ + continue; + + if (quit_usec == USEC_INFINITY) + r = sigwaitinfo(&waitmask, &si); + else { + struct timespec ts; + r = sigtimedwait(&waitmask, &si, timespec_store(&ts, quit_usec - current_usec)); + } + if (r < 0) { + if (errno == EINTR) /* strace -p attach can result in EINTR, let's handle this nicely. */ + continue; + if (errno == EAGAIN) /* timeout reached */ + continue; + + r = log_error_errno(errno, "Failed to wait for signal: %m"); + goto finish; + } + + if (si.si_signo == SIGCHLD) + continue; /* Let's reap this */ + + if (state != STATE_RUNNING) + continue; + + /* Would love to use a switch() statement here, but SIGRTMIN is actually a function call, not a + * constant… */ + + if (si.si_signo == SIGRTMIN+3 || + si.si_signo == SIGRTMIN+4 || + si.si_signo == SIGRTMIN+13 || + si.si_signo == SIGRTMIN+14) + + state = STATE_POWEROFF; + + else if (si.si_signo == SIGINT || + si.si_signo == SIGRTMIN+5 || + si.si_signo == SIGRTMIN+6 || + si.si_signo == SIGRTMIN+15 || + si.si_signo == SIGRTMIN+16) + + state = STATE_REBOOT; + else + assert_not_reached("Got unexpected signal"); + + /* (void) kill_and_sigcont(pid, SIGTERM); */ + quit_usec = now(CLOCK_MONOTONIC) + DEFAULT_TIMEOUT_USEC; + } + +finish: + _exit(r < 0 ? EXIT_FAILURE : r); +} diff --git a/src/nspawn/nspawn-stub-pid1.h b/src/nspawn/nspawn-stub-pid1.h new file mode 100644 index 0000000000..36c1aaf5dd --- /dev/null +++ b/src/nspawn/nspawn-stub-pid1.h @@ -0,0 +1,22 @@ +#pragma once + +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +int stub_pid1(void); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 0dae9984c9..e7314ac29c 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -79,6 +79,7 @@ #include "nspawn-register.h" #include "nspawn-settings.h" #include "nspawn-setuid.h" +#include "nspawn-stub-pid1.h" #include "parse-util.h" #include "path-util.h" #include "process-util.h" @@ -123,7 +124,7 @@ static const char *arg_selinux_apifs_context = NULL; static const char *arg_slice = NULL; static bool arg_private_network = false; static bool arg_read_only = false; -static bool arg_boot = false; +static StartMode arg_start_mode = START_PID1; static bool arg_ephemeral = false; static LinkJournal arg_link_journal = LINK_AUTO; static bool arg_link_journal_try = false; @@ -193,6 +194,7 @@ static void help(void) { " -x --ephemeral Run container with snapshot of root directory, and\n" " remove it after exit\n" " -i --image=PATH File system device or disk image for the container\n" + " -a --as-pid2 Maintain a stub init as PID1, invoke binary as PID2\n" " -b --boot Boot up full system (i.e. invoke init)\n" " --chdir=PATH Set working directory in the container\n" " -u --user=USER Run the command under specified user or uid\n" @@ -358,6 +360,7 @@ static int parse_argv(int argc, char *argv[]) { { "ephemeral", no_argument, NULL, 'x' }, { "user", required_argument, NULL, 'u' }, { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK }, + { "as-pid2", no_argument, NULL, 'a' }, { "boot", no_argument, NULL, 'b' }, { "uuid", required_argument, NULL, ARG_UUID }, { "read-only", no_argument, NULL, ARG_READ_ONLY }, @@ -404,7 +407,7 @@ static int parse_argv(int argc, char *argv[]) { assert(argc >= 0); assert(argv); - while ((c = getopt_long(argc, argv, "+hD:u:bL:M:jS:Z:qi:xp:n", options, NULL)) >= 0) + while ((c = getopt_long(argc, argv, "+hD:u:abL:M:jS:Z:qi:xp:n", options, NULL)) >= 0) switch (c) { @@ -495,8 +498,23 @@ static int parse_argv(int argc, char *argv[]) { break; case 'b': - arg_boot = true; - arg_settings_mask |= SETTING_BOOT; + if (arg_start_mode == START_PID2) { + log_error("--boot and --as-pid2 may not be combined."); + return -EINVAL; + } + + arg_start_mode = START_BOOT; + arg_settings_mask |= SETTING_START_MODE; + break; + + case 'a': + if (arg_start_mode == START_BOOT) { + log_error("--boot and --as-pid2 may not be combined."); + return -EINVAL; + } + + arg_start_mode = START_PID2; + arg_settings_mask |= SETTING_START_MODE; break; case ARG_UUID: @@ -876,7 +894,7 @@ static int parse_argv(int argc, char *argv[]) { if (arg_share_system) arg_register = false; - if (arg_boot && arg_share_system) { + if (arg_start_mode != START_PID1 && arg_share_system) { log_error("--boot and --share-system may not be combined."); return -EINVAL; } @@ -924,7 +942,7 @@ static int parse_argv(int argc, char *argv[]) { if (!arg_parameters) return log_oom(); - arg_settings_mask |= SETTING_BOOT; + arg_settings_mask |= SETTING_START_MODE; } /* Load all settings from .nspawn files */ @@ -960,7 +978,7 @@ static int verify_arguments(void) { return -EINVAL; } - if (arg_boot && arg_kill_signal <= 0) + if (arg_start_mode == START_BOOT && arg_kill_signal <= 0) arg_kill_signal = SIGRTMIN+3; return 0; @@ -2584,6 +2602,12 @@ static int inner_child( if (chdir(arg_chdir) < 0) return log_error_errno(errno, "Failed to change to specified working directory %s: %m", arg_chdir); + if (arg_start_mode == START_PID2) { + r = stub_pid1(); + if (r < 0) + return r; + } + /* Now, explicitly close the log, so that we * then can close all remaining fds. Closing * the log explicitly first has the benefit @@ -2595,7 +2619,7 @@ static int inner_child( log_close(); (void) fdset_close_others(fds); - if (arg_boot) { + if (arg_start_mode == START_BOOT) { char **a; size_t m; @@ -2917,9 +2941,9 @@ static int load_settings(void) { /* Copy over bits from the settings, unless they have been * explicitly masked by command line switches. */ - if ((arg_settings_mask & SETTING_BOOT) == 0 && - settings->boot >= 0) { - arg_boot = settings->boot; + if ((arg_settings_mask & SETTING_START_MODE) == 0 && + settings->start_mode >= 0) { + arg_start_mode = settings->start_mode; strv_free(arg_parameters); arg_parameters = settings->parameters; @@ -3074,6 +3098,10 @@ int main(int argc, char *argv[]) { log_parse_environment(); log_open(); + /* Make sure rename_process() in the stub init process can work */ + saved_argv = argv; + saved_argc = argc; + r = parse_argv(argc, argv); if (r <= 0) goto finish; @@ -3180,7 +3208,7 @@ int main(int argc, char *argv[]) { } } - if (arg_boot) { + if (arg_start_mode == START_BOOT) { if (path_is_os_tree(arg_directory) <= 0) { log_error("Directory %s doesn't look like an OS root directory (os-release file is missing). Refusing.", arg_directory); r = -EINVAL; -- cgit v1.2.3-54-g00ecf