summaryrefslogtreecommitdiff
path: root/src/nspawn/nspawn-mount.c
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2015-09-30 13:47:28 +0200
committerLennart Poettering <lennart@poettering.net>2015-09-30 15:19:33 +0200
commitd8fc6a000fe21b0c1ba27fbfed8b42d00b349a4b (patch)
tree7b21fcfde83588969aebf609e0ad835566a4358a /src/nspawn/nspawn-mount.c
parent403af78c8049358496ec10920b3aaf741056daf9 (diff)
nspawn: mount /sys as tmpfs, and then mount only select subdirs of the real sysfs below it
This way we can hide things like /sys/firmware or /sys/hypervisor from the container, while keeping the device tree around. While this is a security benefit in itself it also allows us to fix issue #1277. Previously we'd mount /sys before creating the user namespace, in order to be able to mount /sys/fs/cgroup/* beneath it (which resides in it), which we can only mount outside of the user namespace. To ensure that the user namespace owns the network namespace we'd set up the network namespace at the same time as the user namespace. Thus, we'd still see the /sys/class/net/ from the originating network namespace, even though we are in our own network namespace now. With this patch, /sys is mounted before transitioning into the user namespace as tmpfs, so that we can also mount /sys/fs/cgroup/* into it this early. The directories such as /sys/class/ are then later added in from the real sysfs from inside the network and user namespace so that they actually show whatis available in it. Fixes #1277
Diffstat (limited to 'src/nspawn/nspawn-mount.c')
-rw-r--r--src/nspawn/nspawn-mount.c50
1 files changed, 49 insertions, 1 deletions
diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c
index 85e81b43fe..3d302ef9ad 100644
--- a/src/nspawn/nspawn-mount.c
+++ b/src/nspawn/nspawn-mount.c
@@ -216,6 +216,52 @@ static int tmpfs_patch_options(
return !!buf;
}
+int mount_sysfs(const char *dest) {
+ const char *full, *top, *x;
+
+ top = prefix_roota(dest, "/sys");
+ full = prefix_roota(top, "/full");
+
+ (void) mkdir(full, 0755);
+
+ if (mount("sysfs", full, "sysfs", MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) < 0)
+ return log_error_errno(errno, "Failed to mount sysfs to %s: %m", full);
+
+ FOREACH_STRING(x, "block", "bus", "class", "dev", "devices", "kernel") {
+ _cleanup_free_ char *from = NULL, *to = NULL;
+
+ from = prefix_root(full, x);
+ if (!from)
+ return log_oom();
+
+ to = prefix_root(top, x);
+ if (!to)
+ return log_oom();
+
+ (void) mkdir(to, 0755);
+
+ if (mount(from, to, NULL, MS_BIND, NULL) < 0)
+ return log_error_errno(errno, "Failed to mount /sys/%s into place: %m", x);
+
+ if (mount(NULL, to, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
+ return log_error_errno(errno, "Failed to mount /sys/%s read-only: %m", x);
+ }
+
+ if (umount(full) < 0)
+ return log_error_errno(errno, "Failed to unmount %s: %m", full);
+
+ if (rmdir(full) < 0)
+ return log_error_errno(errno, "Failed to remove %s: %m", full);
+
+ x = prefix_roota(top, "/fs/kdbus");
+ (void) mkdir(x, 0755);
+
+ if (mount(NULL, top, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
+ return log_error_errno(errno, "Failed to make %s read-only: %m", top);
+
+ return 0;
+}
+
int mount_all(const char *dest,
bool use_userns, bool in_userns,
uid_t uid_shift, uid_t uid_range,
@@ -235,7 +281,7 @@ int mount_all(const char *dest,
{ "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true },
{ "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true }, /* Bind mount first */
{ NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true }, /* Then, make it r/o */
- { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false },
+ { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false },
{ "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false },
{ "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false },
{ "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false },
@@ -570,6 +616,8 @@ static int mount_legacy_cgroups(
cgroup_root = prefix_roota(dest, "/sys/fs/cgroup");
+ (void) mkdir_p(cgroup_root, 0755);
+
/* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW);
if (r < 0)