summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2012-09-05 16:23:41 -0700
committerLennart Poettering <lennart@poettering.net>2012-09-05 16:23:41 -0700
commitd87be9b0af81a6e07d4fb3028e45c4409100dc26 (patch)
tree3ee0cb8aa69b0919f009f388e934ee3aa2cc2172
parentcb7ec5645e0edf154f0cc8414f5914cb433d0dfe (diff)
nspawn: handle poweroff/reboot nicely in containers
-rw-r--r--TODO7
-rw-r--r--man/systemd-nspawn.xml2
-rw-r--r--src/nspawn/nspawn.c371
-rw-r--r--src/shared/util.c4
4 files changed, 210 insertions, 174 deletions
diff --git a/TODO b/TODO
index e683eeaebf..c7f789b9b6 100644
--- a/TODO
+++ b/TODO
@@ -49,15 +49,16 @@ Bugfixes:
Features:
+* Query Paul Moore about relabelling socket fds while they are open
+
* log fewer journal internal messages to the kernel kmsg
* move keymaps to /usr/lib/... rather than /usr/lib/udev/...
* journald: check whether it is OK if the client can still modify delivered journal entries
-* json: use yajl
-* json: don't add wrapping array, just put entries on one line each
-* json: add -o json-pretty in addition to -o json, make the latter output one line per entry
+* json: use jensson
+
* json: properly serialize multiple fields with the same name per entry
* journalctl: make -l the default
diff --git a/man/systemd-nspawn.xml b/man/systemd-nspawn.xml
index 9f8b8e2ae4..1f7d74e273 100644
--- a/man/systemd-nspawn.xml
+++ b/man/systemd-nspawn.xml
@@ -232,7 +232,7 @@
CAP_SETUID, CAP_SYS_ADMIN,
CAP_SYS_CHROOT, CAP_SYS_NICE,
CAP_SYS_PTRACE, CAP_SYS_TTY_CONFIG,
- CAP_SYS_RESOURCE.</para></listitem>
+ CAP_SYS_RESOURCE, CAP_SYS_BOOT.</para></listitem>
</varlistentry>
<varlistentry>
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 7b1b5eab84..7f084ef2d0 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -92,7 +92,8 @@ static uint64_t arg_retain =
(1ULL << CAP_SYS_NICE) |
(1ULL << CAP_SYS_PTRACE) |
(1ULL << CAP_SYS_TTY_CONFIG) |
- (1ULL << CAP_SYS_RESOURCE);
+ (1ULL << CAP_SYS_RESOURCE) |
+ (1ULL << CAP_SYS_BOOT);
static int help(void) {
@@ -1167,11 +1168,6 @@ int main(int argc, char *argv[]) {
cfmakeraw(&raw_attr);
raw_attr.c_lflag &= ~ECHO;
- if (tcsetattr(STDIN_FILENO, TCSANOW, &raw_attr) < 0) {
- log_error("Failed to set terminal attributes: %m");
- goto finish;
- }
-
if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
log_error("Failed to create kmsg socket pair");
goto finish;
@@ -1181,232 +1177,271 @@ int main(int argc, char *argv[]) {
sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
- pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS|(arg_private_network ? CLONE_NEWNET : 0), NULL);
- if (pid < 0) {
- if (errno == EINVAL)
- log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
- else
- log_error("clone() failed: %m");
+ for (;;) {
+ siginfo_t status;
- goto finish;
- }
+ if (tcsetattr(STDIN_FILENO, TCSANOW, &raw_attr) < 0) {
+ log_error("Failed to set terminal attributes: %m");
+ goto finish;
+ }
- if (pid == 0) {
- /* child */
-
- const char *home = NULL;
- uid_t uid = (uid_t) -1;
- gid_t gid = (gid_t) -1;
- const char *envp[] = {
- "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
- "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
- NULL, /* TERM */
- NULL, /* HOME */
- NULL, /* USER */
- NULL, /* LOGNAME */
- NULL, /* container_uuid */
- NULL
- };
-
- envp[2] = strv_find_prefix(environ, "TERM=");
+ pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS|(arg_private_network ? CLONE_NEWNET : 0), NULL);
+ if (pid < 0) {
+ if (errno == EINVAL)
+ log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
+ else
+ log_error("clone() failed: %m");
- close_nointr_nofail(master);
+ goto finish;
+ }
- close_nointr(STDIN_FILENO);
- close_nointr(STDOUT_FILENO);
- close_nointr(STDERR_FILENO);
+ if (pid == 0) {
+ /* child */
- close_all_fds(&kmsg_socket_pair[1], 1);
+ const char *home = NULL;
+ uid_t uid = (uid_t) -1;
+ gid_t gid = (gid_t) -1;
+ const char *envp[] = {
+ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
+ "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
+ NULL, /* TERM */
+ NULL, /* HOME */
+ NULL, /* USER */
+ NULL, /* LOGNAME */
+ NULL, /* container_uuid */
+ NULL
+ };
- reset_all_signal_handlers();
+ envp[2] = strv_find_prefix(environ, "TERM=");
- assert_se(sigemptyset(&mask) == 0);
- assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
+ close_nointr_nofail(master);
- if (open_terminal(console, O_RDWR) != STDIN_FILENO ||
- dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
- dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO)
- goto child_fail;
+ close_nointr(STDIN_FILENO);
+ close_nointr(STDOUT_FILENO);
+ close_nointr(STDERR_FILENO);
- if (setsid() < 0) {
- log_error("setsid() failed: %m");
- goto child_fail;
- }
+ close_all_fds(&kmsg_socket_pair[1], 1);
- if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
- log_error("PR_SET_PDEATHSIG failed: %m");
- goto child_fail;
- }
+ reset_all_signal_handlers();
- /* Mark everything as slave, so that we still
- * receive mounts from the real root, but don't
- * propagate mounts to the real root. */
- if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
- log_error("MS_SLAVE|MS_REC failed: %m");
- goto child_fail;
- }
+ assert_se(sigemptyset(&mask) == 0);
+ assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
- /* Turn directory into bind mount */
- if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
- log_error("Failed to make bind mount.");
- goto child_fail;
- }
+ if (open_terminal(console, O_RDWR) != STDIN_FILENO ||
+ dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
+ dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO)
+ goto child_fail;
- if (arg_read_only)
- if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
- log_error("Failed to make read-only.");
+ if (setsid() < 0) {
+ log_error("setsid() failed: %m");
goto child_fail;
}
- if (mount_all(arg_directory) < 0)
- goto child_fail;
-
- if (copy_devnodes(arg_directory) < 0)
- goto child_fail;
-
- dev_setup(arg_directory);
-
- if (setup_dev_console(arg_directory, console) < 0)
- goto child_fail;
-
- if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
- goto child_fail;
-
- close_nointr_nofail(kmsg_socket_pair[1]);
+ if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
+ log_error("PR_SET_PDEATHSIG failed: %m");
+ goto child_fail;
+ }
- if (setup_boot_id(arg_directory) < 0)
- goto child_fail;
+ /* Mark everything as slave, so that we still
+ * receive mounts from the real root, but don't
+ * propagate mounts to the real root. */
+ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
+ log_error("MS_SLAVE|MS_REC failed: %m");
+ goto child_fail;
+ }
- if (setup_timezone(arg_directory) < 0)
- goto child_fail;
+ /* Turn directory into bind mount */
+ if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
+ log_error("Failed to make bind mount.");
+ goto child_fail;
+ }
- if (setup_resolv_conf(arg_directory) < 0)
- goto child_fail;
+ if (arg_read_only)
+ if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
+ log_error("Failed to make read-only.");
+ goto child_fail;
+ }
- if (setup_journal(arg_directory) < 0)
- goto child_fail;
+ if (mount_all(arg_directory) < 0)
+ goto child_fail;
- if (chdir(arg_directory) < 0) {
- log_error("chdir(%s) failed: %m", arg_directory);
- goto child_fail;
- }
+ if (copy_devnodes(arg_directory) < 0)
+ goto child_fail;
- if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) {
- log_error("mount(MS_MOVE) failed: %m");
- goto child_fail;
- }
+ dev_setup(arg_directory);
- if (chroot(".") < 0) {
- log_error("chroot() failed: %m");
- goto child_fail;
- }
+ if (setup_dev_console(arg_directory, console) < 0)
+ goto child_fail;
- if (chdir("/") < 0) {
- log_error("chdir() failed: %m");
- goto child_fail;
- }
+ if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
+ goto child_fail;
- umask(0022);
+ close_nointr_nofail(kmsg_socket_pair[1]);
- loopback_setup();
+ if (setup_boot_id(arg_directory) < 0)
+ goto child_fail;
- if (drop_capabilities() < 0) {
- log_error("drop_capabilities() failed: %m");
- goto child_fail;
- }
+ if (setup_timezone(arg_directory) < 0)
+ goto child_fail;
- if (arg_user) {
+ if (setup_resolv_conf(arg_directory) < 0)
+ goto child_fail;
- if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
- log_error("get_user_creds() failed: %m");
+ if (setup_journal(arg_directory) < 0)
goto child_fail;
- }
- if (mkdir_parents_label(home, 0775) < 0) {
- log_error("mkdir_parents_label() failed: %m");
+ if (chdir(arg_directory) < 0) {
+ log_error("chdir(%s) failed: %m", arg_directory);
goto child_fail;
}
- if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
- log_error("mkdir_safe_label() failed: %m");
+ if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) {
+ log_error("mount(MS_MOVE) failed: %m");
goto child_fail;
}
- if (initgroups((const char*)arg_user, gid) < 0) {
- log_error("initgroups() failed: %m");
+ if (chroot(".") < 0) {
+ log_error("chroot() failed: %m");
goto child_fail;
}
- if (setresgid(gid, gid, gid) < 0) {
- log_error("setregid() failed: %m");
+ if (chdir("/") < 0) {
+ log_error("chdir() failed: %m");
goto child_fail;
}
- if (setresuid(uid, uid, uid) < 0) {
- log_error("setreuid() failed: %m");
+ umask(0022);
+
+ loopback_setup();
+
+ if (drop_capabilities() < 0) {
+ log_error("drop_capabilities() failed: %m");
goto child_fail;
}
- }
- if ((asprintf((char**)(envp + 3), "HOME=%s", home ? home: "/root") < 0) ||
- (asprintf((char**)(envp + 4), "USER=%s", arg_user ? arg_user : "root") < 0) ||
- (asprintf((char**)(envp + 5), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
- log_oom();
- goto child_fail;
- }
+ if (arg_user) {
+
+ if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
+ log_error("get_user_creds() failed: %m");
+ goto child_fail;
+ }
+
+ if (mkdir_parents_label(home, 0775) < 0) {
+ log_error("mkdir_parents_label() failed: %m");
+ goto child_fail;
+ }
+
+ if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
+ log_error("mkdir_safe_label() failed: %m");
+ goto child_fail;
+ }
+
+ if (initgroups((const char*)arg_user, gid) < 0) {
+ log_error("initgroups() failed: %m");
+ goto child_fail;
+ }
- if (arg_uuid) {
- if (asprintf((char**)(envp + 6), "container_uuid=%s", arg_uuid) < 0) {
+ if (setresgid(gid, gid, gid) < 0) {
+ log_error("setregid() failed: %m");
+ goto child_fail;
+ }
+
+ if (setresuid(uid, uid, uid) < 0) {
+ log_error("setreuid() failed: %m");
+ goto child_fail;
+ }
+ }
+
+ if ((asprintf((char**)(envp + 3), "HOME=%s", home ? home: "/root") < 0) ||
+ (asprintf((char**)(envp + 4), "USER=%s", arg_user ? arg_user : "root") < 0) ||
+ (asprintf((char**)(envp + 5), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
log_oom();
goto child_fail;
}
- }
- setup_hostname();
+ if (arg_uuid) {
+ if (asprintf((char**)(envp + 6), "container_uuid=%s", arg_uuid) < 0) {
+ log_oom();
+ goto child_fail;
+ }
+ }
+
+ setup_hostname();
+
+ if (arg_boot) {
+ char **a;
+ size_t l;
- if (arg_boot) {
- char **a;
- size_t l;
+ /* Automatically search for the init system */
- /* Automatically search for the init system */
+ l = 1 + argc - optind;
+ a = newa(char*, l + 1);
+ memcpy(a + 1, argv + optind, l * sizeof(char*));
- l = 1 + argc - optind;
- a = newa(char*, l + 1);
- memcpy(a + 1, argv + optind, l * sizeof(char*));
+ a[0] = (char*) "/usr/lib/systemd/systemd";
+ execve(a[0], a, (char**) envp);
- a[0] = (char*) "/usr/lib/systemd/systemd";
- execve(a[0], a, (char**) envp);
+ a[0] = (char*) "/lib/systemd/systemd";
+ execve(a[0], a, (char**) envp);
- a[0] = (char*) "/lib/systemd/systemd";
- execve(a[0], a, (char**) envp);
+ a[0] = (char*) "/sbin/init";
+ execve(a[0], a, (char**) envp);
+ } else if (argc > optind)
+ execvpe(argv[optind], argv + optind, (char**) envp);
+ else {
+ chdir(home ? home : "/root");
+ execle("/bin/bash", "-bash", NULL, (char**) envp);
+ }
+
+ log_error("execv() failed: %m");
- a[0] = (char*) "/sbin/init";
- execve(a[0], a, (char**) envp);
- } else if (argc > optind)
- execvpe(argv[optind], argv + optind, (char**) envp);
- else {
- chdir(home ? home : "/root");
- execle("/bin/bash", "-bash", NULL, (char**) envp);
+ child_fail:
+ _exit(EXIT_FAILURE);
}
- log_error("execv() failed: %m");
+ if (process_pty(master, &mask) < 0)
+ goto finish;
- child_fail:
- _exit(EXIT_FAILURE);
- }
- if (process_pty(master, &mask) < 0)
- goto finish;
+ if (saved_attr_valid)
+ tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr);
- if (saved_attr_valid) {
- tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr);
- saved_attr_valid = false;
- }
+ r = wait_for_terminate(pid, &status);
+ if (r < 0) {
+ r = EXIT_FAILURE;
+ break;
+ }
- r = wait_for_terminate_and_warn(argc > optind ? argv[optind] : "bash", pid);
+ if (status.si_code == CLD_EXITED) {
+ if (status.si_status != 0) {
+ log_error("Container failed with error code %i.", status.si_status);
+ r = status.si_status;
+ break;
+ }
+
+ log_debug("Container exited successfully.");
+ break;
+ } else if (status.si_code == CLD_KILLED &&
+ status.si_status == SIGINT) {
+ log_info("Container has been shut down.");
+ r = 0;
+ break;
+ } else if (status.si_code == CLD_KILLED &&
+ status.si_status == SIGHUP) {
+ log_info("Container is being rebooted.");
+ continue;
+ } else if (status.si_code == CLD_KILLED ||
+ status.si_code == CLD_DUMPED) {
- if (r < 0)
- r = EXIT_FAILURE;
+ log_error("Container terminated by signal %s.", signal_to_string(status.si_status));
+ r = EXIT_FAILURE;
+ break;
+ } else {
+ log_error("Container failed due to unknown reason.");
+ r = EXIT_FAILURE;
+ break;
+ }
+ }
finish:
if (saved_attr_valid)
diff --git a/src/shared/util.c b/src/shared/util.c
index 95b577be0e..4f5cb26e3c 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -4011,7 +4011,8 @@ int wait_for_terminate_and_warn(const char *name, pid_t pid) {
assert(name);
assert(pid > 1);
- if ((r = wait_for_terminate(pid, &status)) < 0) {
+ r = wait_for_terminate(pid, &status);
+ if (r < 0) {
log_warning("Failed to wait for %s: %s", name, strerror(-r));
return r;
}
@@ -4034,7 +4035,6 @@ int wait_for_terminate_and_warn(const char *name, pid_t pid) {
log_warning("%s failed due to unknown reason.", name);
return -EPROTO;
-
}
_noreturn_ void freeze(void) {