diff options
Diffstat (limited to 'src/nspawn')
-rw-r--r-- | src/nspawn/nspawn-gperf.gperf | 4 | ||||
-rw-r--r-- | src/nspawn/nspawn-settings.c | 94 | ||||
-rw-r--r-- | src/nspawn/nspawn-settings.h | 40 | ||||
-rw-r--r-- | src/nspawn/nspawn-stub-pid1.c | 170 | ||||
-rw-r--r-- | src/nspawn/nspawn-stub-pid1.h | 22 | ||||
-rw-r--r-- | src/nspawn/nspawn.c | 89 |
6 files changed, 388 insertions, 31 deletions
diff --git a/src/nspawn/nspawn-gperf.gperf b/src/nspawn/nspawn-gperf.gperf index 58f9f4c635..116655cdd2 100644 --- a/src/nspawn/nspawn-gperf.gperf +++ b/src/nspawn/nspawn-gperf.gperf @@ -15,7 +15,8 @@ struct ConfigPerfItem; %struct-type %includes %% -Exec.Boot, config_parse_tristate, 0, offsetof(Settings, boot) +Exec.Boot, config_parse_boot, 0, 0 +Exec.ProcessTwo, config_parse_pid2, 0, 0, Exec.Parameters, config_parse_strv, 0, offsetof(Settings, parameters) Exec.Environment, config_parse_strv, 0, offsetof(Settings, environment) Exec.User, config_parse_string, 0, offsetof(Settings, user) @@ -24,6 +25,7 @@ Exec.DropCapability, config_parse_capability, 0, offsetof(Settings, Exec.KillSignal, config_parse_signal, 0, offsetof(Settings, kill_signal) Exec.Personality, config_parse_personality, 0, offsetof(Settings, personality) Exec.MachineID, config_parse_id128, 0, offsetof(Settings, machine_id) +Exec.WorkingDirectory, config_parse_path, 0, offsetof(Settings, working_directory) Files.ReadOnly, config_parse_tristate, 0, offsetof(Settings, read_only) Files.Volatile, config_parse_volatile_mode, 0, offsetof(Settings, volatile_mode) Files.Bind, config_parse_bind, 0, 0 diff --git a/src/nspawn/nspawn-settings.c b/src/nspawn/nspawn-settings.c index d6b64d8d5a..12524d3b89 100644 --- a/src/nspawn/nspawn-settings.c +++ b/src/nspawn/nspawn-settings.c @@ -24,6 +24,7 @@ #include "conf-parser.h" #include "nspawn-network.h" #include "nspawn-settings.h" +#include "parse-util.h" #include "process-util.h" #include "strv.h" #include "util.h" @@ -39,7 +40,7 @@ int settings_load(FILE *f, const char *path, Settings **ret) { if (!s) return -ENOMEM; - s->boot = -1; + s->start_mode = _START_MODE_INVALID; s->personality = PERSONALITY_INVALID; s->read_only = -1; @@ -74,6 +75,7 @@ Settings* settings_free(Settings *s) { strv_free(s->parameters); strv_free(s->environment); free(s->user); + free(s->working_directory); strv_free(s->network_interfaces); strv_free(s->network_macvlan); @@ -302,3 +304,93 @@ int config_parse_veth_extra( return 0; } + +int config_parse_boot( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + Settings *settings = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = parse_boolean(rvalue); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse Boot= parameter %s, ignoring: %m", rvalue); + return 0; + } + + if (r > 0) { + if (settings->start_mode == START_PID2) + goto conflict; + + settings->start_mode = START_BOOT; + } else { + if (settings->start_mode == START_BOOT) + goto conflict; + + if (settings->start_mode < 0) + settings->start_mode = START_PID1; + } + + return 0; + +conflict: + log_syntax(unit, LOG_ERR, filename, line, r, "Conflicting Boot= or ProcessTwo= setting found. Ignoring."); + return 0; +} + +int config_parse_pid2( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + Settings *settings = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = parse_boolean(rvalue); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse ProcessTwo= parameter %s, ignoring: %m", rvalue); + return 0; + } + + if (r > 0) { + if (settings->start_mode == START_BOOT) + goto conflict; + + settings->start_mode = START_PID2; + } else { + if (settings->start_mode == START_PID2) + goto conflict; + + if (settings->start_mode < 0) + settings->start_mode = START_PID1; + } + + return 0; + +conflict: + log_syntax(unit, LOG_ERR, filename, line, r, "Conflicting Boot= or ProcessTwo= setting found. Ignoring."); + return 0; +} diff --git a/src/nspawn/nspawn-settings.h b/src/nspawn/nspawn-settings.h index 10230a5b83..fdb07486da 100644 --- a/src/nspawn/nspawn-settings.h +++ b/src/nspawn/nspawn-settings.h @@ -27,25 +27,34 @@ #include "nspawn-expose-ports.h" #include "nspawn-mount.h" +typedef enum StartMode { + START_PID1, /* Run parameters as command line as process 1 */ + START_PID2, /* Use stub init process as PID 1, run parameters as command line as process 2 */ + START_BOOT, /* Search for init system, pass arguments as parameters */ + _START_MODE_MAX, + _START_MODE_INVALID = -1 +} StartMode; + typedef enum SettingsMask { - SETTING_BOOT = 1 << 0, - SETTING_ENVIRONMENT = 1 << 1, - SETTING_USER = 1 << 2, - SETTING_CAPABILITY = 1 << 3, - SETTING_KILL_SIGNAL = 1 << 4, - SETTING_PERSONALITY = 1 << 5, - SETTING_MACHINE_ID = 1 << 6, - SETTING_NETWORK = 1 << 7, - SETTING_EXPOSE_PORTS = 1 << 8, - SETTING_READ_ONLY = 1 << 9, - SETTING_VOLATILE_MODE = 1 << 10, - SETTING_CUSTOM_MOUNTS = 1 << 11, - _SETTINGS_MASK_ALL = (1 << 12) -1 + SETTING_START_MODE = 1 << 0, + SETTING_ENVIRONMENT = 1 << 1, + SETTING_USER = 1 << 2, + SETTING_CAPABILITY = 1 << 3, + SETTING_KILL_SIGNAL = 1 << 4, + SETTING_PERSONALITY = 1 << 5, + SETTING_MACHINE_ID = 1 << 6, + SETTING_NETWORK = 1 << 7, + SETTING_EXPOSE_PORTS = 1 << 8, + SETTING_READ_ONLY = 1 << 9, + SETTING_VOLATILE_MODE = 1 << 10, + SETTING_CUSTOM_MOUNTS = 1 << 11, + SETTING_WORKING_DIRECTORY = 1 << 12, + _SETTINGS_MASK_ALL = (1 << 13) -1 } SettingsMask; typedef struct Settings { /* [Run] */ - int boot; + StartMode start_mode; char **parameters; char **environment; char *user; @@ -54,6 +63,7 @@ typedef struct Settings { int kill_signal; unsigned long personality; sd_id128_t machine_id; + char *working_directory; /* [Image] */ int read_only; @@ -89,3 +99,5 @@ int config_parse_volatile_mode(const char *unit, const char *filename, unsigned int config_parse_bind(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_tmpfs(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_veth_extra(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_boot(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_pid2(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); diff --git a/src/nspawn/nspawn-stub-pid1.c b/src/nspawn/nspawn-stub-pid1.c new file mode 100644 index 0000000000..2de87e3c63 --- /dev/null +++ b/src/nspawn/nspawn-stub-pid1.c @@ -0,0 +1,170 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <sys/reboot.h> +#include <sys/unistd.h> +#include <sys/wait.h> + +#include "fd-util.h" +#include "log.h" +#include "nspawn-stub-pid1.h" +#include "process-util.h" +#include "signal-util.h" +#include "time-util.h" +#include "def.h" + +int stub_pid1(void) { + enum { + STATE_RUNNING, + STATE_REBOOT, + STATE_POWEROFF, + } state = STATE_RUNNING; + + sigset_t fullmask, oldmask, waitmask; + usec_t quit_usec = USEC_INFINITY; + pid_t pid; + int r; + + /* Implements a stub PID 1, that reaps all processes and processes a couple of standard signals. This is useful + * for allowing arbitrary processes run in a container, and still have all zombies reaped. */ + + assert_se(sigfillset(&fullmask) >= 0); + assert_se(sigprocmask(SIG_BLOCK, &fullmask, &oldmask) >= 0); + + pid = fork(); + if (pid < 0) + return log_error_errno(errno, "Failed to fork child pid: %m"); + + if (pid == 0) { + /* Return in the child */ + assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) >= 0); + setsid(); + return 0; + } + + reset_all_signal_handlers(); + + log_close(); + close_all_fds(NULL, 0); + log_open(); + + rename_process("STUBINIT"); + + assert_se(sigemptyset(&waitmask) >= 0); + assert_se(sigset_add_many(&waitmask, + SIGCHLD, /* posix: process died */ + SIGINT, /* sysv: ctrl-alt-del */ + SIGRTMIN+3, /* systemd: halt */ + SIGRTMIN+4, /* systemd: poweroff */ + SIGRTMIN+5, /* systemd: reboot */ + SIGRTMIN+6, /* systemd: kexec */ + SIGRTMIN+13, /* systemd: halt */ + SIGRTMIN+14, /* systemd: poweroff */ + SIGRTMIN+15, /* systemd: reboot */ + SIGRTMIN+16, /* systemd: kexec */ + -1) >= 0); + + /* Note that we ignore SIGTERM (sysv's reexec), SIGHUP (reload), and all other signals here, since we don't + * support reexec/reloading in this stub process. */ + + for (;;) { + siginfo_t si; + usec_t current_usec; + + si.si_pid = 0; + r = waitid(P_ALL, 0, &si, WEXITED|WNOHANG); + if (r < 0) { + r = log_error_errno(errno, "Failed to reap children: %m"); + goto finish; + } + + current_usec = now(CLOCK_MONOTONIC); + + if (si.si_pid == pid || current_usec >= quit_usec) { + + /* The child we started ourselves died or we reached a timeout. */ + + if (state == STATE_REBOOT) { /* dispatch a queued reboot */ + (void) reboot(RB_AUTOBOOT); + r = log_error_errno(errno, "Failed to reboot: %m"); + goto finish; + + } else if (state == STATE_POWEROFF) + (void) reboot(RB_POWER_OFF); /* if this fails, fall back to normal exit. */ + + if (si.si_pid == pid && si.si_code == CLD_EXITED) + r = si.si_status; /* pass on exit code */ + else + r = 255; /* signal, coredump, timeout, … */ + + goto finish; + } + if (si.si_pid != 0) + /* We reaped something. Retry until there's nothing more to reap. */ + continue; + + if (quit_usec == USEC_INFINITY) + r = sigwaitinfo(&waitmask, &si); + else { + struct timespec ts; + r = sigtimedwait(&waitmask, &si, timespec_store(&ts, quit_usec - current_usec)); + } + if (r < 0) { + if (errno == EINTR) /* strace -p attach can result in EINTR, let's handle this nicely. */ + continue; + if (errno == EAGAIN) /* timeout reached */ + continue; + + r = log_error_errno(errno, "Failed to wait for signal: %m"); + goto finish; + } + + if (si.si_signo == SIGCHLD) + continue; /* Let's reap this */ + + if (state != STATE_RUNNING) + continue; + + /* Would love to use a switch() statement here, but SIGRTMIN is actually a function call, not a + * constant… */ + + if (si.si_signo == SIGRTMIN+3 || + si.si_signo == SIGRTMIN+4 || + si.si_signo == SIGRTMIN+13 || + si.si_signo == SIGRTMIN+14) + + state = STATE_POWEROFF; + + else if (si.si_signo == SIGINT || + si.si_signo == SIGRTMIN+5 || + si.si_signo == SIGRTMIN+6 || + si.si_signo == SIGRTMIN+15 || + si.si_signo == SIGRTMIN+16) + + state = STATE_REBOOT; + else + assert_not_reached("Got unexpected signal"); + + /* (void) kill_and_sigcont(pid, SIGTERM); */ + quit_usec = now(CLOCK_MONOTONIC) + DEFAULT_TIMEOUT_USEC; + } + +finish: + _exit(r < 0 ? EXIT_FAILURE : r); +} diff --git a/src/nspawn/nspawn-stub-pid1.h b/src/nspawn/nspawn-stub-pid1.h new file mode 100644 index 0000000000..36c1aaf5dd --- /dev/null +++ b/src/nspawn/nspawn-stub-pid1.h @@ -0,0 +1,22 @@ +#pragma once + +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +int stub_pid1(void); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 9dd4c051b2..370161e9bf 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -79,6 +79,7 @@ #include "nspawn-register.h" #include "nspawn-settings.h" #include "nspawn-setuid.h" +#include "nspawn-stub-pid1.h" #include "parse-util.h" #include "path-util.h" #include "process-util.h" @@ -114,6 +115,7 @@ typedef enum LinkJournal { static char *arg_directory = NULL; static char *arg_template = NULL; +static char *arg_chdir = NULL; static char *arg_user = NULL; static sd_id128_t arg_uuid = {}; static char *arg_machine = NULL; @@ -122,7 +124,7 @@ static const char *arg_selinux_apifs_context = NULL; static const char *arg_slice = NULL; static bool arg_private_network = false; static bool arg_read_only = false; -static bool arg_boot = false; +static StartMode arg_start_mode = START_PID1; static bool arg_ephemeral = false; static LinkJournal arg_link_journal = LINK_AUTO; static bool arg_link_journal_try = false; @@ -192,7 +194,9 @@ static void help(void) { " -x --ephemeral Run container with snapshot of root directory, and\n" " remove it after exit\n" " -i --image=PATH File system device or disk image for the container\n" + " -a --as-pid2 Maintain a stub init as PID1, invoke binary as PID2\n" " -b --boot Boot up full system (i.e. invoke init)\n" + " --chdir=PATH Set working directory in the container\n" " -u --user=USER Run the command under specified user or uid\n" " -M --machine=NAME Set the machine name for the container\n" " --uuid=UUID Set a specific machine UUID for the container\n" @@ -231,8 +235,8 @@ static void help(void) { " capability\n" " --drop-capability=CAP Drop the specified capability from the default set\n" " --kill-signal=SIGNAL Select signal to use for shutting down PID 1\n" - " --link-journal=MODE Link up guest journal, one of no, auto, guest, host,\n" - " try-guest, try-host\n" + " --link-journal=MODE Link up guest journal, one of no, auto, guest, \n" + " host, try-guest, try-host\n" " -j Equivalent to --link-journal=try-guest\n" " --read-only Mount the root directory read-only\n" " --bind=PATH[:PATH[:OPTIONS]]\n" @@ -345,6 +349,7 @@ static int parse_argv(int argc, char *argv[]) { ARG_PRIVATE_USERS, ARG_KILL_SIGNAL, ARG_SETTINGS, + ARG_CHDIR, }; static const struct option options[] = { @@ -355,6 +360,7 @@ static int parse_argv(int argc, char *argv[]) { { "ephemeral", no_argument, NULL, 'x' }, { "user", required_argument, NULL, 'u' }, { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK }, + { "as-pid2", no_argument, NULL, 'a' }, { "boot", no_argument, NULL, 'b' }, { "uuid", required_argument, NULL, ARG_UUID }, { "read-only", no_argument, NULL, ARG_READ_ONLY }, @@ -389,6 +395,7 @@ static int parse_argv(int argc, char *argv[]) { { "private-users", optional_argument, NULL, ARG_PRIVATE_USERS }, { "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL }, { "settings", required_argument, NULL, ARG_SETTINGS }, + { "chdir", required_argument, NULL, ARG_CHDIR }, {} }; @@ -400,7 +407,7 @@ static int parse_argv(int argc, char *argv[]) { assert(argc >= 0); assert(argv); - while ((c = getopt_long(argc, argv, "+hD:u:bL:M:jS:Z:qi:xp:n", options, NULL)) >= 0) + while ((c = getopt_long(argc, argv, "+hD:u:abL:M:jS:Z:qi:xp:n", options, NULL)) >= 0) switch (c) { @@ -491,8 +498,23 @@ static int parse_argv(int argc, char *argv[]) { break; case 'b': - arg_boot = true; - arg_settings_mask |= SETTING_BOOT; + if (arg_start_mode == START_PID2) { + log_error("--boot and --as-pid2 may not be combined."); + return -EINVAL; + } + + arg_start_mode = START_BOOT; + arg_settings_mask |= SETTING_START_MODE; + break; + + case 'a': + if (arg_start_mode == START_BOOT) { + log_error("--boot and --as-pid2 may not be combined."); + return -EINVAL; + } + + arg_start_mode = START_PID2; + arg_settings_mask |= SETTING_START_MODE; break; case ARG_UUID: @@ -849,6 +871,19 @@ static int parse_argv(int argc, char *argv[]) { break; + case ARG_CHDIR: + if (!path_is_absolute(optarg)) { + log_error("Working directory %s is not an absolute path.", optarg); + return -EINVAL; + } + + r = free_and_strdup(&arg_chdir, optarg); + if (r < 0) + return log_oom(); + + arg_settings_mask |= SETTING_WORKING_DIRECTORY; + break; + case '?': return -EINVAL; @@ -859,7 +894,7 @@ static int parse_argv(int argc, char *argv[]) { if (arg_share_system) arg_register = false; - if (arg_boot && arg_share_system) { + if (arg_start_mode != START_PID1 && arg_share_system) { log_error("--boot and --share-system may not be combined."); return -EINVAL; } @@ -907,7 +942,7 @@ static int parse_argv(int argc, char *argv[]) { if (!arg_parameters) return log_oom(); - arg_settings_mask |= SETTING_BOOT; + arg_settings_mask |= SETTING_START_MODE; } /* Load all settings from .nspawn files */ @@ -943,7 +978,7 @@ static int verify_arguments(void) { return -EINVAL; } - if (arg_boot && arg_kill_signal <= 0) + if (arg_start_mode == START_BOOT && arg_kill_signal <= 0) arg_kill_signal = SIGRTMIN+3; return 0; @@ -2563,6 +2598,16 @@ static int inner_child( return -ESRCH; } + if (arg_chdir) + if (chdir(arg_chdir) < 0) + return log_error_errno(errno, "Failed to change to specified working directory %s: %m", arg_chdir); + + if (arg_start_mode == START_PID2) { + r = stub_pid1(); + if (r < 0) + return r; + } + /* Now, explicitly close the log, so that we * then can close all remaining fds. Closing * the log explicitly first has the benefit @@ -2574,7 +2619,7 @@ static int inner_child( log_close(); (void) fdset_close_others(fds); - if (arg_boot) { + if (arg_start_mode == START_BOOT) { char **a; size_t m; @@ -2598,7 +2643,9 @@ static int inner_child( } else if (!strv_isempty(arg_parameters)) execvpe(arg_parameters[0], arg_parameters, env_use); else { - chdir(home ?: "/root"); + if (!arg_chdir) + chdir(home ?: "/root"); + execle("/bin/bash", "-bash", NULL, env_use); execle("/bin/sh", "-sh", NULL, env_use); } @@ -2894,15 +2941,22 @@ static int load_settings(void) { /* Copy over bits from the settings, unless they have been * explicitly masked by command line switches. */ - if ((arg_settings_mask & SETTING_BOOT) == 0 && - settings->boot >= 0) { - arg_boot = settings->boot; + if ((arg_settings_mask & SETTING_START_MODE) == 0 && + settings->start_mode >= 0) { + arg_start_mode = settings->start_mode; strv_free(arg_parameters); arg_parameters = settings->parameters; settings->parameters = NULL; } + if ((arg_settings_mask & SETTING_WORKING_DIRECTORY) == 0 && + settings->working_directory) { + free(arg_chdir); + arg_chdir = settings->working_directory; + settings->working_directory = NULL; + } + if ((arg_settings_mask & SETTING_ENVIRONMENT) == 0 && settings->environment) { strv_free(arg_setenv); @@ -3044,6 +3098,10 @@ int main(int argc, char *argv[]) { log_parse_environment(); log_open(); + /* Make sure rename_process() in the stub init process can work */ + saved_argv = argv; + saved_argc = argc; + r = parse_argv(argc, argv); if (r <= 0) goto finish; @@ -3150,7 +3208,7 @@ int main(int argc, char *argv[]) { } } - if (arg_boot) { + if (arg_start_mode == START_BOOT) { if (path_is_os_tree(arg_directory) <= 0) { log_error("Directory %s doesn't look like an OS root directory (os-release file is missing). Refusing.", arg_directory); r = -EINVAL; @@ -3629,6 +3687,7 @@ finish: free(arg_image); free(arg_machine); free(arg_user); + free(arg_chdir); strv_free(arg_setenv); free(arg_network_bridge); strv_free(arg_network_interfaces); |