From d167824896e583ffaca891b1c355ff852496ff66 Mon Sep 17 00:00:00 2001 From: Iago López Galeiras Date: Thu, 15 Oct 2015 12:13:13 +0200 Subject: nspawn: skip /sys-as-tmpfs if we don't use private-network Since v3.11/7dc5dbc ("sysfs: Restrict mounting sysfs"), the kernel doesn't allow mounting sysfs if you don't have CAP_SYS_ADMIN rights over the network namespace. So the mounting /sys as a tmpfs code introduced in d8fc6a000fe21b0c1ba27fbfed8b42d00b349a4b doesn't work with user namespaces if we don't use private-net. The reason is that we mount sysfs inside the container and we're in the network namespace of the host but we don't have CAP_SYS_ADMIN over that namespace. To fix that, we mount /sys as a sysfs (instead of tmpfs) if we don't use private network and ignore the /sys-as-a-tmpfs code if we find that /sys is already mounted as sysfs. Fixes #1555 --- src/basic/util.c | 30 +++++++++++++++++++++++++++--- src/basic/util.h | 6 ++++++ src/nspawn/nspawn-mount.c | 42 ++++++++++++++++++++++++++++++------------ src/nspawn/nspawn-mount.h | 2 +- src/nspawn/nspawn.c | 4 ++-- 5 files changed, 66 insertions(+), 18 deletions(-) diff --git a/src/basic/util.c b/src/basic/util.c index 2565b0f547..460e74061a 100644 --- a/src/basic/util.c +++ b/src/basic/util.c @@ -2500,11 +2500,35 @@ char *getusername_malloc(void) { return lookup_uid(getuid()); } -bool is_temporary_fs(const struct statfs *s) { +bool is_fs_type(const struct statfs *s, statfs_f_type_t magic_value) { assert(s); + assert_cc(sizeof(statfs_f_type_t) >= sizeof(s->f_type)); + + return F_TYPE_EQUAL(s->f_type, magic_value); +} + +int fd_check_fstype(int fd, statfs_f_type_t magic_value) { + struct statfs s; + + if (fstatfs(fd, &s) < 0) + return -errno; + + return is_fs_type(&s, magic_value); +} + +int path_check_fstype(const char *path, statfs_f_type_t magic_value) { + _cleanup_close_ int fd = -1; - return F_TYPE_EQUAL(s->f_type, TMPFS_MAGIC) || - F_TYPE_EQUAL(s->f_type, RAMFS_MAGIC); + fd = open(path, O_RDONLY); + if (fd < 0) + return -errno; + + return fd_check_fstype(fd, magic_value); +} + +bool is_temporary_fs(const struct statfs *s) { + return is_fs_type(s, TMPFS_MAGIC) || + is_fs_type(s, RAMFS_MAGIC); } int fd_is_temporary_fs(int fd) { diff --git a/src/basic/util.h b/src/basic/util.h index 6c63bc221f..ac74650ff6 100644 --- a/src/basic/util.h +++ b/src/basic/util.h @@ -365,6 +365,12 @@ char* getusername_malloc(void); int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid); int fchmod_and_fchown(int fd, mode_t mode, uid_t uid, gid_t gid); +typedef long statfs_f_type_t; + +bool is_fs_type(const struct statfs *s, statfs_f_type_t magic_value) _pure_; +int fd_check_fstype(int fd, statfs_f_type_t magic_value); +int path_check_fstype(const char *path, statfs_f_type_t magic_value); + bool is_temporary_fs(const struct statfs *s) _pure_; int fd_is_temporary_fs(int fd); diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index 6c8b1d7a26..65bcb68242 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -20,6 +20,7 @@ ***/ #include +#include #include "util.h" #include "rm-rf.h" @@ -218,8 +219,19 @@ static int tmpfs_patch_options( int mount_sysfs(const char *dest) { const char *full, *top, *x; + int r; top = prefix_roota(dest, "/sys"); + r = path_check_fstype(top, SYSFS_MAGIC); + if (r < 0) + return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top); + /* /sys might already be mounted as sysfs by the outer child in the + * !netns case. In this case, it's all good. Don't touch it because we + * don't have the right to do so, see https://github.com/systemd/systemd/issues/1555. + */ + if (r > 0) + return 0; + full = prefix_roota(top, "/full"); (void) mkdir(full, 0755); @@ -264,6 +276,7 @@ int mount_sysfs(const char *dest) { int mount_all(const char *dest, bool use_userns, bool in_userns, + bool use_netns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context) { @@ -274,21 +287,23 @@ int mount_all(const char *dest, const char *options; unsigned long flags; bool fatal; - bool userns; + bool in_userns; + bool use_netns; } MountPoint; static const MountPoint mount_table[] = { - { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true }, - { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true }, /* Bind mount first */ - { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true }, /* Then, make it r/o */ - { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false }, - { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false }, - { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false }, - { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false }, - { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false }, + { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true, false }, + { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true, false }, /* Bind mount first */ + { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true, false }, /* Then, make it r/o */ + { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, true }, + { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, false }, + { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false, false }, + { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false }, + { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false }, + { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false, false }, #ifdef HAVE_SELINUX - { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false }, /* Bind mount first */ - { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false }, /* Then, make it r/o */ + { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false, false }, /* Bind mount first */ + { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false, false }, /* Then, make it r/o */ #endif }; @@ -299,7 +314,10 @@ int mount_all(const char *dest, _cleanup_free_ char *where = NULL, *options = NULL; const char *o; - if (in_userns != mount_table[k].userns) + if (in_userns != mount_table[k].in_userns) + continue; + + if (!use_netns && mount_table[k].use_netns) continue; where = prefix_root(dest, mount_table[k].where); diff --git a/src/nspawn/nspawn-mount.h b/src/nspawn/nspawn-mount.h index 54cab87665..bdab23bcca 100644 --- a/src/nspawn/nspawn-mount.h +++ b/src/nspawn/nspawn-mount.h @@ -57,7 +57,7 @@ int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s); int custom_mount_compare(const void *a, const void *b); -int mount_all(const char *dest, bool use_userns, bool in_userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); +int mount_all(const char *dest, bool use_userns, bool in_userns, bool use_netns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); int mount_sysfs(const char *dest); int mount_cgroups(const char *dest, bool unified_requested, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index ab93f98df4..fca2b72edd 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -2450,7 +2450,7 @@ static int inner_child( } } - r = mount_all(NULL, arg_userns, true, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context); + r = mount_all(NULL, arg_userns, true, arg_uid_shift, arg_private_network, arg_uid_range, arg_selinux_apifs_context); if (r < 0) return r; @@ -2705,7 +2705,7 @@ static int outer_child( return log_error_errno(r, "Failed to make tree read-only: %m"); } - r = mount_all(directory, arg_userns, false, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context); + r = mount_all(directory, arg_userns, false, arg_private_network, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context); if (r < 0) return r; -- cgit v1.2.3-54-g00ecf