summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIago López Galeiras <iaguis@gmail.com>2015-10-15 12:13:13 +0200
committerIago López Galeiras <iaguis@gmail.com>2015-10-20 10:19:23 +0200
commitd167824896e583ffaca891b1c355ff852496ff66 (patch)
tree7a5e78bc85ee51a3b1639e8ffa75133eaff55ae8
parentec566e4c7cee67ec2c39475ef08f18a9f1b80efd (diff)
nspawn: skip /sys-as-tmpfs if we don't use private-network
Since v3.11/7dc5dbc ("sysfs: Restrict mounting sysfs"), the kernel doesn't allow mounting sysfs if you don't have CAP_SYS_ADMIN rights over the network namespace. So the mounting /sys as a tmpfs code introduced in d8fc6a000fe21b0c1ba27fbfed8b42d00b349a4b doesn't work with user namespaces if we don't use private-net. The reason is that we mount sysfs inside the container and we're in the network namespace of the host but we don't have CAP_SYS_ADMIN over that namespace. To fix that, we mount /sys as a sysfs (instead of tmpfs) if we don't use private network and ignore the /sys-as-a-tmpfs code if we find that /sys is already mounted as sysfs. Fixes #1555
-rw-r--r--src/basic/util.c30
-rw-r--r--src/basic/util.h6
-rw-r--r--src/nspawn/nspawn-mount.c42
-rw-r--r--src/nspawn/nspawn-mount.h2
-rw-r--r--src/nspawn/nspawn.c4
5 files changed, 66 insertions, 18 deletions
diff --git a/src/basic/util.c b/src/basic/util.c
index 2565b0f547..460e74061a 100644
--- a/src/basic/util.c
+++ b/src/basic/util.c
@@ -2500,11 +2500,35 @@ char *getusername_malloc(void) {
return lookup_uid(getuid());
}
-bool is_temporary_fs(const struct statfs *s) {
+bool is_fs_type(const struct statfs *s, statfs_f_type_t magic_value) {
assert(s);
+ assert_cc(sizeof(statfs_f_type_t) >= sizeof(s->f_type));
+
+ return F_TYPE_EQUAL(s->f_type, magic_value);
+}
+
+int fd_check_fstype(int fd, statfs_f_type_t magic_value) {
+ struct statfs s;
+
+ if (fstatfs(fd, &s) < 0)
+ return -errno;
+
+ return is_fs_type(&s, magic_value);
+}
+
+int path_check_fstype(const char *path, statfs_f_type_t magic_value) {
+ _cleanup_close_ int fd = -1;
- return F_TYPE_EQUAL(s->f_type, TMPFS_MAGIC) ||
- F_TYPE_EQUAL(s->f_type, RAMFS_MAGIC);
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return -errno;
+
+ return fd_check_fstype(fd, magic_value);
+}
+
+bool is_temporary_fs(const struct statfs *s) {
+ return is_fs_type(s, TMPFS_MAGIC) ||
+ is_fs_type(s, RAMFS_MAGIC);
}
int fd_is_temporary_fs(int fd) {
diff --git a/src/basic/util.h b/src/basic/util.h
index 6c63bc221f..ac74650ff6 100644
--- a/src/basic/util.h
+++ b/src/basic/util.h
@@ -365,6 +365,12 @@ char* getusername_malloc(void);
int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid);
int fchmod_and_fchown(int fd, mode_t mode, uid_t uid, gid_t gid);
+typedef long statfs_f_type_t;
+
+bool is_fs_type(const struct statfs *s, statfs_f_type_t magic_value) _pure_;
+int fd_check_fstype(int fd, statfs_f_type_t magic_value);
+int path_check_fstype(const char *path, statfs_f_type_t magic_value);
+
bool is_temporary_fs(const struct statfs *s) _pure_;
int fd_is_temporary_fs(int fd);
diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c
index 6c8b1d7a26..65bcb68242 100644
--- a/src/nspawn/nspawn-mount.c
+++ b/src/nspawn/nspawn-mount.c
@@ -20,6 +20,7 @@
***/
#include <sys/mount.h>
+#include <linux/magic.h>
#include "util.h"
#include "rm-rf.h"
@@ -218,8 +219,19 @@ static int tmpfs_patch_options(
int mount_sysfs(const char *dest) {
const char *full, *top, *x;
+ int r;
top = prefix_roota(dest, "/sys");
+ r = path_check_fstype(top, SYSFS_MAGIC);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top);
+ /* /sys might already be mounted as sysfs by the outer child in the
+ * !netns case. In this case, it's all good. Don't touch it because we
+ * don't have the right to do so, see https://github.com/systemd/systemd/issues/1555.
+ */
+ if (r > 0)
+ return 0;
+
full = prefix_roota(top, "/full");
(void) mkdir(full, 0755);
@@ -264,6 +276,7 @@ int mount_sysfs(const char *dest) {
int mount_all(const char *dest,
bool use_userns, bool in_userns,
+ bool use_netns,
uid_t uid_shift, uid_t uid_range,
const char *selinux_apifs_context) {
@@ -274,21 +287,23 @@ int mount_all(const char *dest,
const char *options;
unsigned long flags;
bool fatal;
- bool userns;
+ bool in_userns;
+ bool use_netns;
} MountPoint;
static const MountPoint mount_table[] = {
- { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true },
- { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true }, /* Bind mount first */
- { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true }, /* Then, make it r/o */
- { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false },
- { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false },
- { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false },
- { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false },
- { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false },
+ { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true, false },
+ { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true, false }, /* Bind mount first */
+ { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true, false }, /* Then, make it r/o */
+ { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, true },
+ { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, false },
+ { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false, false },
+ { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
+ { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
+ { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false, false },
#ifdef HAVE_SELINUX
- { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false }, /* Bind mount first */
- { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false }, /* Then, make it r/o */
+ { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false, false }, /* Bind mount first */
+ { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false, false }, /* Then, make it r/o */
#endif
};
@@ -299,7 +314,10 @@ int mount_all(const char *dest,
_cleanup_free_ char *where = NULL, *options = NULL;
const char *o;
- if (in_userns != mount_table[k].userns)
+ if (in_userns != mount_table[k].in_userns)
+ continue;
+
+ if (!use_netns && mount_table[k].use_netns)
continue;
where = prefix_root(dest, mount_table[k].where);
diff --git a/src/nspawn/nspawn-mount.h b/src/nspawn/nspawn-mount.h
index 54cab87665..bdab23bcca 100644
--- a/src/nspawn/nspawn-mount.h
+++ b/src/nspawn/nspawn-mount.h
@@ -57,7 +57,7 @@ int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s);
int custom_mount_compare(const void *a, const void *b);
-int mount_all(const char *dest, bool use_userns, bool in_userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
+int mount_all(const char *dest, bool use_userns, bool in_userns, bool use_netns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
int mount_sysfs(const char *dest);
int mount_cgroups(const char *dest, bool unified_requested, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index ab93f98df4..fca2b72edd 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -2450,7 +2450,7 @@ static int inner_child(
}
}
- r = mount_all(NULL, arg_userns, true, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context);
+ r = mount_all(NULL, arg_userns, true, arg_uid_shift, arg_private_network, arg_uid_range, arg_selinux_apifs_context);
if (r < 0)
return r;
@@ -2705,7 +2705,7 @@ static int outer_child(
return log_error_errno(r, "Failed to make tree read-only: %m");
}
- r = mount_all(directory, arg_userns, false, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context);
+ r = mount_all(directory, arg_userns, false, arg_private_network, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context);
if (r < 0)
return r;