summaryrefslogtreecommitdiff
path: root/src/nspawn
diff options
context:
space:
mode:
Diffstat (limited to 'src/nspawn')
-rw-r--r--src/nspawn/nspawn-mount.c211
-rw-r--r--src/nspawn/nspawn-register.c11
-rw-r--r--src/nspawn/nspawn-seccomp.c6
-rw-r--r--src/nspawn/nspawn-setuid.c7
-rw-r--r--src/nspawn/nspawn.c124
5 files changed, 258 insertions, 101 deletions
diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c
index 9f4903c842..ac93357ef4 100644
--- a/src/nspawn/nspawn-mount.c
+++ b/src/nspawn/nspawn-mount.c
@@ -23,6 +23,8 @@
#include "alloc-util.h"
#include "cgroup-util.h"
#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
#include "fs-util.h"
#include "label.h"
#include "mkdir.h"
@@ -181,13 +183,15 @@ int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s) {
static int tmpfs_patch_options(
const char *options,
- bool userns, uid_t uid_shift, uid_t uid_range,
+ bool userns,
+ uid_t uid_shift, uid_t uid_range,
+ bool patch_ids,
const char *selinux_apifs_context,
char **ret) {
char *buf = NULL;
- if (userns && uid_shift != 0) {
+ if ((userns && uid_shift != 0) || patch_ids) {
assert(uid_shift != UID_INVALID);
if (options)
@@ -218,7 +222,13 @@ static int tmpfs_patch_options(
}
#endif
+ if (!buf && options) {
+ buf = strdup(options);
+ if (!buf)
+ return -ENOMEM;
+ }
*ret = buf;
+
return !!buf;
}
@@ -271,7 +281,15 @@ int mount_sysfs(const char *dest) {
return log_error_errno(errno, "Failed to remove %s: %m", full);
x = prefix_roota(top, "/fs/kdbus");
- (void) mkdir(x, 0755);
+ (void) mkdir_p(x, 0755);
+
+ /* Create mountpoint for cgroups. Otherwise we are not allowed since we
+ * remount /sys read-only.
+ */
+ if (cg_ns_supported()) {
+ x = prefix_roota(top, "/fs/cgroup");
+ (void) mkdir_p(x, 0755);
+ }
if (mount(NULL, top, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
return log_error_errno(errno, "Failed to make %s read-only: %m", top);
@@ -297,18 +315,19 @@ int mount_all(const char *dest,
} MountPoint;
static const MountPoint mount_table[] = {
- { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true, false },
- { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true, false }, /* Bind mount first */
- { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true, false }, /* Then, make it r/o */
- { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, true },
- { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, false },
- { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false, false },
- { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
- { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
- { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false, false },
+ { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true, false },
+ { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true, false }, /* Bind mount first ...*/
+ { "/proc/sys/net", "/proc/sys/net", NULL, NULL, MS_BIND, true, true, true }, /* (except for this) */
+ { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true, false }, /* ... then, make it r/o */
+ { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, true },
+ { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, false },
+ { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false, false },
+ { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
+ { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
+ { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false, false },
#ifdef HAVE_SELINUX
- { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false, false }, /* Bind mount first */
- { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false, false }, /* Then, make it r/o */
+ { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false, false }, /* Bind mount first */
+ { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false, false }, /* Then, make it r/o */
#endif
};
@@ -348,7 +367,7 @@ int mount_all(const char *dest,
o = mount_table[k].options;
if (streq_ptr(mount_table[k].type, "tmpfs")) {
- r = tmpfs_patch_options(o, use_userns, uid_shift, uid_range, selinux_apifs_context, &options);
+ r = tmpfs_patch_options(o, use_userns, uid_shift, uid_range, false, selinux_apifs_context, &options);
if (r < 0)
return log_oom();
if (r > 0)
@@ -485,7 +504,7 @@ static int mount_tmpfs(
if (r < 0 && r != -EEXIST)
return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
- r = tmpfs_patch_options(m->options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
+ r = tmpfs_patch_options(m->options, userns, uid_shift, uid_range, false, selinux_apifs_context, &buf);
if (r < 0)
return log_oom();
options = r > 0 ? buf : m->options;
@@ -600,6 +619,48 @@ int mount_custom(
return 0;
}
+/* Retrieve existing subsystems. This function is called in a new cgroup
+ * namespace.
+ */
+static int get_controllers(Set *subsystems) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char line[LINE_MAX];
+
+ assert(subsystems);
+
+ f = fopen("/proc/self/cgroup", "re");
+ if (!f)
+ return errno == ENOENT ? -ESRCH : -errno;
+
+ FOREACH_LINE(line, f, return -errno) {
+ int r;
+ char *e, *l, *p;
+
+ truncate_nl(line);
+
+ l = strchr(line, ':');
+ if (!l)
+ continue;
+
+ l++;
+ e = strchr(l, ':');
+ if (!e)
+ continue;
+
+ *e = 0;
+
+ if (streq(l, "") || streq(l, "name=systemd"))
+ continue;
+
+ p = strdup(l);
+ r = set_consume(subsystems, p);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy, bool read_only) {
char *to;
int r;
@@ -628,11 +689,107 @@ static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controlle
return 1;
}
-static int mount_legacy_cgroups(
- const char *dest,
+/* Mount a legacy cgroup hierarchy when cgroup namespaces are supported. */
+static int mount_legacy_cgns_supported(
bool userns, uid_t uid_shift, uid_t uid_range,
const char *selinux_apifs_context) {
+ _cleanup_set_free_free_ Set *controllers = NULL;
+ const char *cgroup_root = "/sys/fs/cgroup", *c;
+ int r;
+ (void) mkdir_p(cgroup_root, 0755);
+
+ /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
+ r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
+ if (r == 0) {
+ _cleanup_free_ char *options = NULL;
+
+ /* When cgroup namespaces are enabled and user namespaces are
+ * used then the mount of the cgroupfs is done *inside* the new
+ * user namespace. We're root in the new user namespace and the
+ * kernel will happily translate our uid/gid to the correct
+ * uid/gid as seen from e.g. /proc/1/mountinfo. So we simply
+ * pass uid 0 and not uid_shift to tmpfs_patch_options().
+ */
+ r = tmpfs_patch_options("mode=755", userns, 0, uid_range, true, selinux_apifs_context, &options);
+ if (r < 0)
+ return log_oom();
+
+ if (mount("tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options) < 0)
+ return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m");
+ }
+
+ if (cg_unified() > 0)
+ goto skip_controllers;
+
+ controllers = set_new(&string_hash_ops);
+ if (!controllers)
+ return log_oom();
+
+ r = get_controllers(controllers);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine cgroup controllers: %m");
+
+ for (;;) {
+ _cleanup_free_ const char *controller = NULL;
+
+ controller = set_steal_first(controllers);
+ if (!controller)
+ break;
+
+ r = mount_legacy_cgroup_hierarchy("", controller, controller, !userns);
+ if (r < 0)
+ return r;
+
+ /* When multiple hierarchies are co-mounted, make their
+ * constituting individual hierarchies a symlink to the
+ * co-mount.
+ */
+ c = controller;
+ for (;;) {
+ _cleanup_free_ char *target = NULL, *tok = NULL;
+
+ r = extract_first_word(&c, &tok, ",", 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extract co-mounted cgroup controller: %m");
+ if (r == 0)
+ break;
+
+ target = prefix_root("/sys/fs/cgroup", tok);
+ if (!target)
+ return log_oom();
+
+ if (streq(controller, tok))
+ break;
+
+ r = symlink_idempotent(controller, target);
+ if (r == -EINVAL)
+ return log_error_errno(r, "Invalid existing symlink for combined hierarchy: %m");
+ if (r < 0)
+ return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m");
+ }
+ }
+
+skip_controllers:
+ r = mount_legacy_cgroup_hierarchy("", "none,name=systemd,xattr", "systemd", false);
+ if (r < 0)
+ return r;
+
+ if (!userns) {
+ if (mount(NULL, cgroup_root, NULL, MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755") < 0)
+ return log_error_errno(errno, "Failed to remount %s read-only: %m", cgroup_root);
+ }
+
+ return 0;
+}
+
+/* Mount legacy cgroup hierarchy when cgroup namespaces are unsupported. */
+static int mount_legacy_cgns_unsupported(
+ const char *dest,
+ bool userns, uid_t uid_shift, uid_t uid_range,
+ const char *selinux_apifs_context) {
_cleanup_set_free_free_ Set *controllers = NULL;
const char *cgroup_root;
int r;
@@ -648,7 +805,7 @@ static int mount_legacy_cgroups(
if (r == 0) {
_cleanup_free_ char *options = NULL;
- r = tmpfs_patch_options("mode=755", userns, uid_shift, uid_range, selinux_apifs_context, &options);
+ r = tmpfs_patch_options("mode=755", userns, uid_shift, uid_range, false, selinux_apifs_context, &options);
if (r < 0)
return log_oom();
@@ -707,10 +864,8 @@ static int mount_legacy_cgroups(
return r;
r = symlink_idempotent(combined, target);
- if (r == -EINVAL) {
- log_error("Invalid existing symlink for combined hierarchy");
- return r;
- }
+ if (r == -EINVAL)
+ return log_error_errno(r, "Invalid existing symlink for combined hierarchy: %m");
if (r < 0)
return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m");
}
@@ -765,8 +920,10 @@ int mount_cgroups(
if (unified_requested)
return mount_unified_cgroups(dest);
- else
- return mount_legacy_cgroups(dest, userns, uid_shift, uid_range, selinux_apifs_context);
+ else if (cg_ns_supported())
+ return mount_legacy_cgns_supported(userns, uid_shift, uid_range, selinux_apifs_context);
+
+ return mount_legacy_cgns_unsupported(dest, userns, uid_shift, uid_range, selinux_apifs_context);
}
int mount_systemd_cgroup_writable(
@@ -834,7 +991,7 @@ int setup_volatile_state(
return log_error_errno(errno, "Failed to create %s: %m", directory);
options = "mode=755";
- r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
+ r = tmpfs_patch_options(options, userns, uid_shift, uid_range, false, selinux_apifs_context, &buf);
if (r < 0)
return log_oom();
if (r > 0)
@@ -870,7 +1027,7 @@ int setup_volatile(
return log_error_errno(errno, "Failed to create temporary directory: %m");
options = "mode=755";
- r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
+ r = tmpfs_patch_options(options, userns, uid_shift, uid_range, false, selinux_apifs_context, &buf);
if (r < 0)
return log_oom();
if (r > 0)
diff --git a/src/nspawn/nspawn-register.c b/src/nspawn/nspawn-register.c
index 20103c5e88..e5b76a0c5d 100644
--- a/src/nspawn/nspawn-register.c
+++ b/src/nspawn/nspawn-register.c
@@ -104,7 +104,7 @@ int register_machine(
return bus_log_create_error(r);
}
- r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "strict");
+ r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "closed");
if (r < 0)
return bus_log_create_error(r);
@@ -112,26 +112,19 @@ int register_machine(
* systemd-nspawn@.service, to keep the device
* policies in sync regardless if we are run with or
* without the --keep-unit switch. */
- r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 9,
+ r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 2,
/* Allow the container to
* access and create the API
* device nodes, so that
* PrivateDevices= in the
* container can work
* fine */
- "/dev/null", "rwm",
- "/dev/zero", "rwm",
- "/dev/full", "rwm",
- "/dev/random", "rwm",
- "/dev/urandom", "rwm",
- "/dev/tty", "rwm",
"/dev/net/tun", "rwm",
/* Allow the container
* access to ptys. However,
* do not permit the
* container to ever create
* these device nodes. */
- "/dev/pts/ptmx", "rw",
"char-pts", "rw");
if (r < 0)
return bus_log_create_error(r);
diff --git a/src/nspawn/nspawn-seccomp.c b/src/nspawn/nspawn-seccomp.c
index 54db1b47f8..3ab7160ebe 100644
--- a/src/nspawn/nspawn-seccomp.c
+++ b/src/nspawn/nspawn-seccomp.c
@@ -119,10 +119,8 @@ static int seccomp_add_default_syscall_filter(scmp_filter_ctx ctx,
r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), blacklist[i].syscall_num, 0);
if (r == -EFAULT)
continue; /* unknown syscall */
- if (r < 0) {
- log_error_errno(r, "Failed to block syscall: %m");
- return r;
- }
+ if (r < 0)
+ return log_error_errno(r, "Failed to block syscall: %m");
}
return 0;
diff --git a/src/nspawn/nspawn-setuid.c b/src/nspawn/nspawn-setuid.c
index ee15a47e93..b8e8e091c8 100644
--- a/src/nspawn/nspawn-setuid.c
+++ b/src/nspawn/nspawn-setuid.c
@@ -124,14 +124,12 @@ int change_uid_gid(const char *user, char **_home) {
fd = -1;
if (!fgets(line, sizeof(line), f)) {
-
if (!ferror(f)) {
log_error("Failed to resolve user %s.", user);
return -ESRCH;
}
- log_error_errno(errno, "Failed to read from getent: %m");
- return -errno;
+ return log_error_errno(errno, "Failed to read from getent: %m");
}
truncate_nl(line);
@@ -214,8 +212,7 @@ int change_uid_gid(const char *user, char **_home) {
return -ESRCH;
}
- log_error_errno(errno, "Failed to read from getent: %m");
- return -errno;
+ return log_error_errno(errno, "Failed to read from getent: %m");
}
truncate_nl(line);
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index befe342b02..f8a43d89a2 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -61,9 +61,9 @@
#include "fs-util.h"
#include "gpt.h"
#include "hostname-util.h"
+#include "id128-util.h"
#include "log.h"
#include "loopback-setup.h"
-#include "machine-id-setup.h"
#include "machine-image.h"
#include "macro.h"
#include "missing.h"
@@ -76,10 +76,10 @@
#include "nspawn-network.h"
#include "nspawn-patch-uid.h"
#include "nspawn-register.h"
+#include "nspawn-seccomp.h"
#include "nspawn-settings.h"
#include "nspawn-setuid.h"
#include "nspawn-stub-pid1.h"
-#include "nspawn-seccomp.h"
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
@@ -101,9 +101,11 @@
#include "util.h"
/* Note that devpts's gid= parameter parses GIDs as signed values, hence we stay away from the upper half of the 32bit
- * UID range here */
+ * UID range here. We leave a bit of room at the lower end and a lot of room at the upper end, so that other subsystems
+ * may have their own allocation ranges too. */
#define UID_SHIFT_PICK_MIN ((uid_t) UINT32_C(0x00080000))
#define UID_SHIFT_PICK_MAX ((uid_t) UINT32_C(0x6FFF0000))
+
/* nspawn is listening on the socket at the path in the constant nspawn_notify_socket_path
* nspawn_notify_socket_path is relative to the container
* the init process in the container pid can send messages to nspawn following the sd_notify(3) protocol */
@@ -277,7 +279,6 @@ static void help(void) {
, program_invocation_short_name);
}
-
static int custom_mounts_prepare(void) {
unsigned i;
int r;
@@ -593,9 +594,12 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_UUID:
r = sd_id128_from_string(optarg, &arg_uuid);
- if (r < 0) {
- log_error("Invalid UUID: %s", optarg);
- return r;
+ if (r < 0)
+ return log_error_errno(r, "Invalid UUID: %s", optarg);
+
+ if (sd_id128_is_null(arg_uuid)) {
+ log_error("Machine UUID may not be all zeroes.");
+ return -EINVAL;
}
arg_settings_mask |= SETTING_MACHINE_ID;
@@ -1265,20 +1269,9 @@ static int setup_resolv_conf(const char *dest) {
return 0;
}
-static char* id128_format_as_uuid(sd_id128_t id, char s[37]) {
- assert(s);
-
- snprintf(s, 37,
- "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
- SD_ID128_FORMAT_VAL(id));
-
- return s;
-}
-
static int setup_boot_id(const char *dest) {
+ sd_id128_t rnd = SD_ID128_NULL;
const char *from, *to;
- sd_id128_t rnd = {};
- char as_uuid[37];
int r;
if (arg_share_system)
@@ -1294,18 +1287,16 @@ static int setup_boot_id(const char *dest) {
if (r < 0)
return log_error_errno(r, "Failed to generate random boot id: %m");
- id128_format_as_uuid(rnd, as_uuid);
-
- r = write_string_file(from, as_uuid, WRITE_STRING_FILE_CREATE);
+ r = id128_write(from, ID128_UUID, rnd, false);
if (r < 0)
return log_error_errno(r, "Failed to write boot id: %m");
if (mount(from, to, NULL, MS_BIND, NULL) < 0)
r = log_error_errno(errno, "Failed to bind mount boot id: %m");
else if (mount(NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_NOSUID|MS_NODEV, NULL) < 0)
- log_warning_errno(errno, "Failed to make boot id read-only: %m");
+ log_warning_errno(errno, "Failed to make boot id read-only, ignoring: %m");
- unlink(from);
+ (void) unlink(from);
return r;
}
@@ -2248,33 +2239,37 @@ static int mount_device(const char *what, const char *where, const char *directo
}
static int setup_machine_id(const char *directory) {
+ const char *etc_machine_id;
+ sd_id128_t id;
int r;
- const char *etc_machine_id, *t;
- _cleanup_free_ char *s = NULL;
- etc_machine_id = prefix_roota(directory, "/etc/machine-id");
+ /* If the UUID in the container is already set, then that's what counts, and we use. If it isn't set, and the
+ * caller passed --uuid=, then we'll pass it in the $container_uuid env var to PID 1 of the container. The
+ * assumption is that PID 1 will then write it to /etc/machine-id to make it persistent. If --uuid= is not
+ * passed we generate a random UUID, and pass it via $container_uuid. In effect this means that /etc/machine-id
+ * in the container and our idea of the container UUID will always be in sync (at least if PID 1 in the
+ * container behaves nicely). */
- r = read_one_line_file(etc_machine_id, &s);
- if (r < 0)
- return log_error_errno(r, "Failed to read machine ID from %s: %m", etc_machine_id);
+ etc_machine_id = prefix_roota(directory, "/etc/machine-id");
- t = strstrip(s);
+ r = id128_read(etc_machine_id, ID128_PLAIN, &id);
+ if (r < 0) {
+ if (!IN_SET(r, -ENOENT, -ENOMEDIUM)) /* If the file is missing or empty, we don't mind */
+ return log_error_errno(r, "Failed to read machine ID from container image: %m");
- if (!isempty(t)) {
- r = sd_id128_from_string(t, &arg_uuid);
- if (r < 0)
- return log_error_errno(r, "Failed to parse machine ID from %s: %m", etc_machine_id);
- } else {
if (sd_id128_is_null(arg_uuid)) {
r = sd_id128_randomize(&arg_uuid);
if (r < 0)
- return log_error_errno(r, "Failed to generate random machine ID: %m");
+ return log_error_errno(r, "Failed to acquire randomized machine UUID: %m");
+ }
+ } else {
+ if (sd_id128_is_null(id)) {
+ log_error("Machine ID in container image is zero, refusing.");
+ return -EINVAL;
}
- }
- r = machine_id_setup(directory, arg_uuid);
- if (r < 0)
- return log_error_errno(r, "Failed to setup machine ID: %m");
+ arg_uuid = id;
+ }
return 0;
}
@@ -2633,9 +2628,24 @@ static int inner_child(
return -ESRCH;
}
- r = mount_systemd_cgroup_writable("", arg_unified_cgroup_hierarchy);
- if (r < 0)
- return r;
+ if (cg_ns_supported()) {
+ r = unshare(CLONE_NEWCGROUP);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to unshare cgroup namespace");
+ r = mount_cgroups(
+ "",
+ arg_unified_cgroup_hierarchy,
+ arg_userns_mode != USER_NAMESPACE_NO,
+ arg_uid_shift,
+ arg_uid_range,
+ arg_selinux_apifs_context);
+ if (r < 0)
+ return r;
+ } else {
+ r = mount_systemd_cgroup_writable("", arg_unified_cgroup_hierarchy);
+ if (r < 0)
+ return r;
+ }
r = reset_uid_gid();
if (r < 0)
@@ -2701,9 +2711,9 @@ static int inner_child(
(asprintf((char**)(envp + n_env++), "LOGNAME=%s", arg_user ? arg_user : "root") < 0))
return log_oom();
- assert(!sd_id128_equal(arg_uuid, SD_ID128_NULL));
+ assert(!sd_id128_is_null(arg_uuid));
- if (asprintf((char**)(envp + n_env++), "container_uuid=%s", id128_format_as_uuid(arg_uuid, as_uuid)) < 0)
+ if (asprintf((char**)(envp + n_env++), "container_uuid=%s", id128_to_uuid_string(arg_uuid, as_uuid)) < 0)
return log_oom();
if (fdset_size(fds) > 0) {
@@ -3019,15 +3029,17 @@ static int outer_child(
if (r < 0)
return r;
- r = mount_cgroups(
- directory,
- arg_unified_cgroup_hierarchy,
- arg_userns_mode != USER_NAMESPACE_NO,
- arg_uid_shift,
- arg_uid_range,
- arg_selinux_apifs_context);
- if (r < 0)
- return r;
+ if (!cg_ns_supported()) {
+ r = mount_cgroups(
+ directory,
+ arg_unified_cgroup_hierarchy,
+ arg_userns_mode != USER_NAMESPACE_NO,
+ arg_uid_shift,
+ arg_uid_range,
+ arg_selinux_apifs_context);
+ if (r < 0)
+ return r;
+ }
r = mount_move_root(directory);
if (r < 0)
@@ -3599,7 +3611,7 @@ int main(int argc, char *argv[]) {
}
if (r < 0) {
log_error_errno(r, "Failed to lock %s: %m", arg_directory);
- return r;
+ goto finish;
}
if (arg_template) {