summaryrefslogtreecommitdiff
path: root/src/nspawn/nspawn.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/nspawn/nspawn.c')
-rw-r--r--src/nspawn/nspawn.c100
1 files changed, 71 insertions, 29 deletions
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 5c4341e0ee..94c7eea9b7 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -2648,7 +2648,7 @@ static int inner_child(
int kmsg_socket,
int rtnl_socket,
FDSet *fds,
- CGroupUnified outer_cgver) {
+ CGMounts cgmounts) {
_cleanup_free_ char *home = NULL;
char as_uuid[37];
@@ -2706,19 +2706,11 @@ static int inner_child(
r = unshare(CLONE_NEWCGROUP);
if (r < 0)
return log_error_errno(errno, "Failed to unshare cgroup namespace");
- r = mount_cgroups(
- "",
- outer_cgver,
- arg_unified_cgroup_hierarchy,
- arg_userns_mode != USER_NAMESPACE_NO,
- arg_uid_shift,
- arg_uid_range,
- arg_selinux_apifs_context,
- true);
- if (r < 0)
- return r;
- } else {
- r = mount_systemd_cgroup_writable("", arg_unified_cgroup_hierarchy);
+ r = cgroup_mount_mounts(cgmounts,
+ NULL,
+ arg_userns_mode == USER_NAMESPACE_NO ? UID_INVALID : 0,
+ arg_selinux_apifs_context);
+ cgroup_free_mounts(&cgmounts);
if (r < 0)
return r;
}
@@ -2915,6 +2907,7 @@ static int outer_child(
int kmsg_socket,
int rtnl_socket,
int uid_shift_socket,
+ int cgroup_socket,
FDSet *fds,
CGroupUnified outer_cgver) {
@@ -2922,6 +2915,7 @@ static int outer_child(
ssize_t l;
int r;
_cleanup_close_ int fd = -1;
+ _cleanup_(cgroup_free_mounts) CGMounts cgmounts = {};
assert(barrier);
assert(directory);
@@ -3110,19 +3104,11 @@ static int outer_child(
if (r < 0)
return r;
- if (!arg_use_cgns) {
- r = mount_cgroups(
- directory,
- outer_cgver,
- arg_unified_cgroup_hierarchy,
- arg_userns_mode != USER_NAMESPACE_NO,
- arg_uid_shift,
- arg_uid_range,
- arg_selinux_apifs_context,
- false);
- if (r < 0)
- return r;
- }
+ r = cgroup_decide_mounts(&cgmounts,
+ outer_cgver, arg_unified_cgroup_hierarchy,
+ arg_use_cgns);
+ if (r < 0)
+ return r;
r = mount_move_root(directory);
if (r < 0)
@@ -3143,12 +3129,13 @@ static int outer_child(
uuid_socket = safe_close(uuid_socket);
notify_socket = safe_close(notify_socket);
uid_shift_socket = safe_close(uid_shift_socket);
+ cgroup_socket = safe_close(cgroup_socket);
/* The inner child has all namespaces that are
* requested, so that we all are owned by the user if
* user namespaces are turned on. */
- r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, fds, outer_cgver);
+ r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, fds, cgmounts);
if (r < 0)
_exit(EXIT_FAILURE);
@@ -3175,11 +3162,42 @@ static int outer_child(
if (r < 0)
return log_error_errno(r, "Failed to send notify fd: %m");
+ /* If !use_cgns, then we need to do this here because without cgns cgroups can't be mounted inside of a
+ * less privileged mountns (and using userns causes the mountns to be less privileged). */
+ if (!arg_use_cgns) {
+ /* If !use_cgns, then cgroup_mount_mounts() needs to look at /proc/pid/cgroup; but because we've
+ * already chroot()ed, we don't have access to /proc. So the parent opens the file and sends it to
+ * us. */
+ int cgfd;
+ _cleanup_fclose_ FILE *cgfile = NULL;
+
+ assert(cgroup_socket);
+
+ cgfd = receive_one_fd(cgroup_socket, 0);
+ if (cgfd < 0)
+ return log_error_errno(cgfd, "Failed to recv cgroup fd: %m");
+
+ cgfile = fdopen(cgfd, "re");
+ if (!cgfile) {
+ r = -errno; /* in case safe_close sets errno */
+ cgfd = safe_close(cgfd);
+ return log_error_errno(r, "Failed to create a stream object for cgroup fd: %m");
+ }
+
+ r = cgroup_mount_mounts(cgmounts,
+ cgfile,
+ arg_userns_mode == USER_NAMESPACE_NO ? UID_INVALID : arg_uid_shift,
+ arg_selinux_apifs_context);
+ if (r < 0)
+ return r;
+ }
+
pid_socket = safe_close(pid_socket);
uuid_socket = safe_close(uuid_socket);
notify_socket = safe_close(notify_socket);
kmsg_socket = safe_close(kmsg_socket);
rtnl_socket = safe_close(rtnl_socket);
+ cgroup_socket = safe_close(cgroup_socket);
return 0;
}
@@ -3611,7 +3629,8 @@ static int run(int master,
pid_socket_pair[2] = { -1, -1 },
uuid_socket_pair[2] = { -1, -1 },
notify_socket_pair[2] = { -1, -1 },
- uid_shift_socket_pair[2] = { -1, -1 };
+ uid_shift_socket_pair[2] = { -1, -1 },
+ cgroup_socket_pair[2] = {-1, -1 };
_cleanup_close_ int notify_socket= -1;
_cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
_cleanup_(sd_event_unrefp) sd_event *event = NULL;
@@ -3662,6 +3681,10 @@ static int run(int master,
if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, uid_shift_socket_pair) < 0)
return log_error_errno(errno, "Failed to create uid shift socket pair: %m");
+ if (!arg_use_cgns)
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, cgroup_socket_pair) < 0)
+ return log_error_errno(errno, "Failed to create cgroup socket pair: %m");
+
/* Child can be killed before execv(), so handle SIGCHLD in order to interrupt
* parent's blocking calls and give it a chance to call wait() and terminate. */
r = sigprocmask(SIG_UNBLOCK, &mask_chld, NULL);
@@ -3690,6 +3713,7 @@ static int run(int master,
uuid_socket_pair[0] = safe_close(uuid_socket_pair[0]);
notify_socket_pair[0] = safe_close(notify_socket_pair[0]);
uid_shift_socket_pair[0] = safe_close(uid_shift_socket_pair[0]);
+ cgroup_socket_pair[0] = safe_close(cgroup_socket_pair[0]);
(void) reset_all_signal_handlers();
(void) reset_signal_mask();
@@ -3709,6 +3733,7 @@ static int run(int master,
kmsg_socket_pair[1],
rtnl_socket_pair[1],
uid_shift_socket_pair[1],
+ cgroup_socket_pair[1],
fds,
outer_cgver);
if (r < 0)
@@ -3727,6 +3752,7 @@ static int run(int master,
uuid_socket_pair[1] = safe_close(uuid_socket_pair[1]);
notify_socket_pair[1] = safe_close(notify_socket_pair[1]);
uid_shift_socket_pair[1] = safe_close(uid_shift_socket_pair[1]);
+ cgroup_socket_pair[1] = safe_close(cgroup_socket_pair[1]);
if (arg_userns_mode != USER_NAMESPACE_NO) {
/* The child just let us know the UID shift it might have read from the image. */
@@ -3847,6 +3873,8 @@ static int run(int master,
}
if (arg_register) {
+ /* If the child is to be placed into a different cgroup,
+ * this is what does it. */
r = register_machine(
arg_machine,
*main_pid,
@@ -3867,6 +3895,20 @@ static int run(int master,
if (r < 0)
return r;
+ if (!arg_use_cgns) {
+ /* helper_pid won't exit until this happens */
+ const char *fs;
+ _cleanup_close_ int fd;
+
+ fs = procfs_file_alloca(*main_pid, "cgroup");
+ fd = open(fs, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open cgroups of child: %m");
+
+ r = send_one_fd(cgroup_socket_pair[0], fd, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send cgroup fd: %m");
+ }
/* Wait for the outer child. */
r = wait_for_terminate_and_warn("namespace helper", *helper_pid, NULL);