diff options
Diffstat (limited to 'src/nspawn/nspawn.c')
-rw-r--r-- | src/nspawn/nspawn.c | 100 |
1 files changed, 71 insertions, 29 deletions
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 5c4341e0ee..94c7eea9b7 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -2648,7 +2648,7 @@ static int inner_child( int kmsg_socket, int rtnl_socket, FDSet *fds, - CGroupUnified outer_cgver) { + CGMounts cgmounts) { _cleanup_free_ char *home = NULL; char as_uuid[37]; @@ -2706,19 +2706,11 @@ static int inner_child( r = unshare(CLONE_NEWCGROUP); if (r < 0) return log_error_errno(errno, "Failed to unshare cgroup namespace"); - r = mount_cgroups( - "", - outer_cgver, - arg_unified_cgroup_hierarchy, - arg_userns_mode != USER_NAMESPACE_NO, - arg_uid_shift, - arg_uid_range, - arg_selinux_apifs_context, - true); - if (r < 0) - return r; - } else { - r = mount_systemd_cgroup_writable("", arg_unified_cgroup_hierarchy); + r = cgroup_mount_mounts(cgmounts, + NULL, + arg_userns_mode == USER_NAMESPACE_NO ? UID_INVALID : 0, + arg_selinux_apifs_context); + cgroup_free_mounts(&cgmounts); if (r < 0) return r; } @@ -2915,6 +2907,7 @@ static int outer_child( int kmsg_socket, int rtnl_socket, int uid_shift_socket, + int cgroup_socket, FDSet *fds, CGroupUnified outer_cgver) { @@ -2922,6 +2915,7 @@ static int outer_child( ssize_t l; int r; _cleanup_close_ int fd = -1; + _cleanup_(cgroup_free_mounts) CGMounts cgmounts = {}; assert(barrier); assert(directory); @@ -3110,19 +3104,11 @@ static int outer_child( if (r < 0) return r; - if (!arg_use_cgns) { - r = mount_cgroups( - directory, - outer_cgver, - arg_unified_cgroup_hierarchy, - arg_userns_mode != USER_NAMESPACE_NO, - arg_uid_shift, - arg_uid_range, - arg_selinux_apifs_context, - false); - if (r < 0) - return r; - } + r = cgroup_decide_mounts(&cgmounts, + outer_cgver, arg_unified_cgroup_hierarchy, + arg_use_cgns); + if (r < 0) + return r; r = mount_move_root(directory); if (r < 0) @@ -3143,12 +3129,13 @@ static int outer_child( uuid_socket = safe_close(uuid_socket); notify_socket = safe_close(notify_socket); uid_shift_socket = safe_close(uid_shift_socket); + cgroup_socket = safe_close(cgroup_socket); /* The inner child has all namespaces that are * requested, so that we all are owned by the user if * user namespaces are turned on. */ - r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, fds, outer_cgver); + r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, fds, cgmounts); if (r < 0) _exit(EXIT_FAILURE); @@ -3175,11 +3162,42 @@ static int outer_child( if (r < 0) return log_error_errno(r, "Failed to send notify fd: %m"); + /* If !use_cgns, then we need to do this here because without cgns cgroups can't be mounted inside of a + * less privileged mountns (and using userns causes the mountns to be less privileged). */ + if (!arg_use_cgns) { + /* If !use_cgns, then cgroup_mount_mounts() needs to look at /proc/pid/cgroup; but because we've + * already chroot()ed, we don't have access to /proc. So the parent opens the file and sends it to + * us. */ + int cgfd; + _cleanup_fclose_ FILE *cgfile = NULL; + + assert(cgroup_socket); + + cgfd = receive_one_fd(cgroup_socket, 0); + if (cgfd < 0) + return log_error_errno(cgfd, "Failed to recv cgroup fd: %m"); + + cgfile = fdopen(cgfd, "re"); + if (!cgfile) { + r = -errno; /* in case safe_close sets errno */ + cgfd = safe_close(cgfd); + return log_error_errno(r, "Failed to create a stream object for cgroup fd: %m"); + } + + r = cgroup_mount_mounts(cgmounts, + cgfile, + arg_userns_mode == USER_NAMESPACE_NO ? UID_INVALID : arg_uid_shift, + arg_selinux_apifs_context); + if (r < 0) + return r; + } + pid_socket = safe_close(pid_socket); uuid_socket = safe_close(uuid_socket); notify_socket = safe_close(notify_socket); kmsg_socket = safe_close(kmsg_socket); rtnl_socket = safe_close(rtnl_socket); + cgroup_socket = safe_close(cgroup_socket); return 0; } @@ -3611,7 +3629,8 @@ static int run(int master, pid_socket_pair[2] = { -1, -1 }, uuid_socket_pair[2] = { -1, -1 }, notify_socket_pair[2] = { -1, -1 }, - uid_shift_socket_pair[2] = { -1, -1 }; + uid_shift_socket_pair[2] = { -1, -1 }, + cgroup_socket_pair[2] = {-1, -1 }; _cleanup_close_ int notify_socket= -1; _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL; _cleanup_(sd_event_unrefp) sd_event *event = NULL; @@ -3662,6 +3681,10 @@ static int run(int master, if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, uid_shift_socket_pair) < 0) return log_error_errno(errno, "Failed to create uid shift socket pair: %m"); + if (!arg_use_cgns) + if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, cgroup_socket_pair) < 0) + return log_error_errno(errno, "Failed to create cgroup socket pair: %m"); + /* Child can be killed before execv(), so handle SIGCHLD in order to interrupt * parent's blocking calls and give it a chance to call wait() and terminate. */ r = sigprocmask(SIG_UNBLOCK, &mask_chld, NULL); @@ -3690,6 +3713,7 @@ static int run(int master, uuid_socket_pair[0] = safe_close(uuid_socket_pair[0]); notify_socket_pair[0] = safe_close(notify_socket_pair[0]); uid_shift_socket_pair[0] = safe_close(uid_shift_socket_pair[0]); + cgroup_socket_pair[0] = safe_close(cgroup_socket_pair[0]); (void) reset_all_signal_handlers(); (void) reset_signal_mask(); @@ -3709,6 +3733,7 @@ static int run(int master, kmsg_socket_pair[1], rtnl_socket_pair[1], uid_shift_socket_pair[1], + cgroup_socket_pair[1], fds, outer_cgver); if (r < 0) @@ -3727,6 +3752,7 @@ static int run(int master, uuid_socket_pair[1] = safe_close(uuid_socket_pair[1]); notify_socket_pair[1] = safe_close(notify_socket_pair[1]); uid_shift_socket_pair[1] = safe_close(uid_shift_socket_pair[1]); + cgroup_socket_pair[1] = safe_close(cgroup_socket_pair[1]); if (arg_userns_mode != USER_NAMESPACE_NO) { /* The child just let us know the UID shift it might have read from the image. */ @@ -3847,6 +3873,8 @@ static int run(int master, } if (arg_register) { + /* If the child is to be placed into a different cgroup, + * this is what does it. */ r = register_machine( arg_machine, *main_pid, @@ -3867,6 +3895,20 @@ static int run(int master, if (r < 0) return r; + if (!arg_use_cgns) { + /* helper_pid won't exit until this happens */ + const char *fs; + _cleanup_close_ int fd; + + fs = procfs_file_alloca(*main_pid, "cgroup"); + fd = open(fs, O_RDONLY|O_CLOEXEC); + if (fd < 0) + return log_error_errno(errno, "Failed to open cgroups of child: %m"); + + r = send_one_fd(cgroup_socket_pair[0], fd, 0); + if (r < 0) + return log_error_errno(r, "Failed to send cgroup fd: %m"); + } /* Wait for the outer child. */ r = wait_for_terminate_and_warn("namespace helper", *helper_pid, NULL); |