summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2013-04-21 20:25:01 -0400
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2013-04-21 21:43:43 -0400
commita383724e4202ee9681bd31cb668c44bdbbf83638 (patch)
treec770133a8de1794fd4fe6d43374e7a5770f3d378 /src
parent9097fe299f22c46316979356d945ebb494e85814 (diff)
systemd,nspawn: use extended attributes to store metadata
All attributes are stored as text, since root_directory is already text, and it seems easier to have all of them in text format. Attributes are written in the trusted. namespace, because the kernel currently does not allow user. attributes on cgroups. This is a PITA, and CAP_SYS_ADMIN is required to *read* the attributes. Alas. A second pipe is opened for the child to signal the parent that the cgroup hierarchy has been set up.
Diffstat (limited to 'src')
-rw-r--r--src/core/mount-setup.c8
-rw-r--r--src/nspawn/nspawn.c62
2 files changed, 67 insertions, 3 deletions
diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c
index e45a6bc1c8..56d358b094 100644
--- a/src/core/mount-setup.c
+++ b/src/core/mount-setup.c
@@ -68,6 +68,12 @@ typedef struct MountPoint {
* other ones we can delay until SELinux and IMA are loaded. */
#define N_EARLY_MOUNT 5
+#ifdef HAVE_XATTR
+# define FS_XATTR_OPT ",xattr"
+#else
+# define FS_XATTR_OPT ""
+#endif
+
static const MountPoint mount_table[] = {
{ "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
NULL, MNT_FATAL|MNT_IN_CONTAINER },
@@ -87,7 +93,7 @@ static const MountPoint mount_table[] = {
NULL, MNT_FATAL|MNT_IN_CONTAINER },
{ "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
NULL, MNT_IN_CONTAINER },
- { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd" FS_XATTR_OPT, MS_NOSUID|MS_NOEXEC|MS_NODEV,
NULL, MNT_IN_CONTAINER },
{ "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
NULL, MNT_NONE },
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 5a43d5ed12..b59b2673cd 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -22,6 +22,7 @@
#include <signal.h>
#include <sched.h>
#include <unistd.h>
+#include <attr/xattr.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <sys/mount.h>
@@ -921,6 +922,46 @@ static int setup_cgroup(const char *path) {
return 0;
}
+static int save_attributes(const char *cgroup, pid_t pid, const char *uuid, const char *directory) {
+ char buf[DECIMAL_STR_MAX(pid_t)], path[PATH_MAX];
+ int r = 0, k;
+
+ assert(cgroup);
+ assert(pid >= 0);
+ assert(arg_directory);
+
+#ifdef HAVE_XATTR
+ assert_se(snprintf(buf, sizeof(buf), "%lu", (unsigned long) pid) < (int) sizeof(buf));
+
+ r = snprintf(path, sizeof(path), "/sys/fs/cgroup/systemd/%s", cgroup);
+ if (r >= (int) sizeof(path)) {
+ log_error("cgroup name too long");
+ return -EINVAL;
+ }
+
+ r = setxattr(path, "trusted.init_pid", buf, strlen(buf), XATTR_CREATE);
+ if (r < 0)
+ log_warning("Failed to set %s attribute on %s: %m", "trusted.init_pid", path);
+
+ if (uuid) {
+ k = setxattr(path, "trusted.machine_id", uuid, strlen(uuid), XATTR_CREATE);
+ if (k < 0) {
+ log_warning("Failed to set %s attribute on %s: %m", "trusted.machine_id", path);
+ if (r == 0)
+ r = k;
+ }
+ }
+
+ k = setxattr(path, "trusted.root_directory", directory, strlen(directory), XATTR_CREATE);
+ if (k < 0) {
+ log_warning("Failed to set %s attribute on %s: %m", "trusted.machine_id", path);
+ if (r == 0)
+ r = k;
+ }
+#endif
+ return r;
+}
+
static int drop_capabilities(void) {
return capability_bounding_set_drop(~arg_retain, false);
}
@@ -1198,7 +1239,7 @@ int main(int argc, char *argv[]) {
arg_directory = get_current_dir_name();
if (!arg_directory) {
- log_error("Failed to determine path");
+ log_error("Failed to determine path, please use -D.");
goto finish;
}
@@ -1313,13 +1354,19 @@ int main(int argc, char *argv[]) {
for (;;) {
siginfo_t status;
- int pipefd[2];
+ int pipefd[2], pipefd2[2];
if (pipe2(pipefd, O_NONBLOCK|O_CLOEXEC) < 0) {
log_error("pipe2(): %m");
goto finish;
}
+ if (pipe2(pipefd2, O_NONBLOCK|O_CLOEXEC) < 0) {
+ log_error("pipe2(): %m");
+ close_pipe(pipefd);
+ goto finish;
+ }
+
pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS|(arg_private_network ? CLONE_NEWNET : 0), NULL);
if (pid < 0) {
if (errno == EINVAL)
@@ -1353,6 +1400,7 @@ int main(int argc, char *argv[]) {
if (envp[n_env])
n_env ++;
+ /* Wait for the parent process to log our PID */
close_nointr_nofail(pipefd[1]);
fd_wait_for_event(pipefd[0], POLLHUP, -1);
close_nointr_nofail(pipefd[0]);
@@ -1409,6 +1457,9 @@ int main(int argc, char *argv[]) {
if (setup_cgroup(newcg) < 0)
goto child_fail;
+ close_nointr_nofail(pipefd2[1]);
+ close_nointr_nofail(pipefd2[0]);
+
/* Mark everything as slave, so that we still
* receive mounts from the real root, but don't
* propagate mounts to the real root. */
@@ -1617,6 +1668,13 @@ int main(int argc, char *argv[]) {
close_nointr_nofail(pipefd[0]);
close_nointr_nofail(pipefd[1]);
+ /* Wait for the child process to establish cgroup hierarchy */
+ close_nointr_nofail(pipefd2[1]);
+ fd_wait_for_event(pipefd2[0], POLLHUP, -1);
+ close_nointr_nofail(pipefd2[0]);
+
+ save_attributes(newcg, pid, arg_uuid, arg_directory);
+
fdset_free(fds);
fds = NULL;