diff options
author | Sergiusz Urbaniak <sergiusz.urbaniak@gmail.com> | 2016-10-14 14:00:15 +0200 |
---|---|---|
committer | Sergiusz Urbaniak <sergiusz.urbaniak@gmail.com> | 2016-11-18 09:50:40 +0100 |
commit | 4f086aab52812472a24c9b8b627589880a38696e (patch) | |
tree | 22fd04bdbb8346cbfda4201627eabf57c5b9ce51 /src/nspawn/nspawn-mount.c | |
parent | 843d5baf6aad6c53fc00ea8d95d83209a4f92de1 (diff) |
nspawn: R/W support for /sys, and /proc/sys
This commit adds the possibility to leave /sys, and /proc/sys read-write.
It introduces a new (undocumented) env var SYSTEMD_NSPAWN_API_VFS_WRITABLE
to enable this feature.
If set to "yes", /sys, and /proc/sys will be read-write.
If set to "no", /sys, and /proc/sys will be read-only.
If set to "network" /proc/sys/net will be read-write. This is useful in
use-cases, where systemd-nspawn is used in an external network
namespace.
This adds the possibility to start privileged containers which need more
control over settings in the /proc, and /sys filesystem.
This is also a follow-up on the discussion from
https://github.com/systemd/systemd/pull/4018#r76971862 where an
introduction of a simple env var to enable R/W support for those
directories was already discussed.
Diffstat (limited to 'src/nspawn/nspawn-mount.c')
-rw-r--r-- | src/nspawn/nspawn-mount.c | 70 |
1 files changed, 42 insertions, 28 deletions
diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index 0c24b8e18a..95bb3c09b0 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -225,9 +225,10 @@ static int tmpfs_patch_options( return !!buf; } -int mount_sysfs(const char *dest) { +int mount_sysfs(const char *dest, MountSettingsMask mount_settings) { const char *full, *top, *x; int r; + unsigned long extra_flags = 0; top = prefix_roota(dest, "/sys"); r = path_check_fstype(top, SYSFS_MAGIC); @@ -244,8 +245,11 @@ int mount_sysfs(const char *dest) { (void) mkdir(full, 0755); + if (mount_settings & MOUNT_APPLY_APIVFS_RO) + extra_flags |= MS_RDONLY; + r = mount_verbose(LOG_ERR, "sysfs", full, "sysfs", - MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL); + MS_NOSUID|MS_NOEXEC|MS_NODEV|extra_flags, NULL); if (r < 0) return r; @@ -267,7 +271,7 @@ int mount_sysfs(const char *dest) { return r; r = mount_verbose(LOG_ERR, NULL, to, NULL, - MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL); + MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL); if (r < 0) return r; } @@ -291,7 +295,7 @@ int mount_sysfs(const char *dest) { } return mount_verbose(LOG_ERR, NULL, top, NULL, - MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL); + MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL); } static int mkdir_userns(const char *path, mode_t mode, bool in_userns, uid_t uid_shift) { @@ -348,8 +352,7 @@ static int mkdir_userns_p(const char *prefix, const char *path, mode_t mode, boo } int mount_all(const char *dest, - bool use_userns, bool in_userns, - bool use_netns, + MountSettingsMask mount_settings, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context) { @@ -359,41 +362,52 @@ int mount_all(const char *dest, const char *type; const char *options; unsigned long flags; - bool fatal; - bool in_userns; - bool use_netns; + MountSettingsMask mount_settings; } MountPoint; static const MountPoint mount_table[] = { - { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true, false }, - { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true, false }, /* Bind mount first ...*/ - { "/proc/sys/net", "/proc/sys/net", NULL, NULL, MS_BIND, true, true, true }, /* (except for this) */ - { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true, false }, /* ... then, make it r/o */ - { "/proc/sysrq-trigger", "/proc/sysrq-trigger", NULL, NULL, MS_BIND, false, true, false }, /* Bind mount first ...*/ - { NULL, "/proc/sysrq-trigger", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, true, false }, /* ... then, make it r/o */ - { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, true }, - { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, false }, - { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false, false }, - { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false }, - { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false }, - { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, true, false }, + /* inner child mounts */ + { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_FATAL|MOUNT_IN_USERNS }, + { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ...*/ + { "/proc/sys/net", "/proc/sys/net", NULL, NULL, MS_BIND, MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO|MOUNT_APPLY_APIVFS_NETNS }, /* (except for this) */ + { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* ... then, make it r/o */ + { "/proc/sysrq-trigger", "/proc/sysrq-trigger", NULL, NULL, MS_BIND, MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ...*/ + { NULL, "/proc/sysrq-trigger", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* ... then, make it r/o */ + { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, MOUNT_FATAL|MOUNT_IN_USERNS }, + + /* outer child mounts */ + { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS }, + { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_FATAL|MOUNT_APPLY_APIVFS_RO }, /* skipped if above was mounted */ + { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_FATAL }, /* skipped if above was mounted */ + + { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, MOUNT_FATAL }, + { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, MOUNT_FATAL }, + { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, MOUNT_FATAL }, #ifdef HAVE_SELINUX - { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false, false }, /* Bind mount first */ - { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false, false }, /* Then, make it r/o */ + { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, 0 }, /* Bind mount first */ + { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, 0 }, /* Then, make it r/o */ #endif }; unsigned k; int r; + bool use_userns = (mount_settings & MOUNT_USE_USERNS); + bool netns = (mount_settings & MOUNT_APPLY_APIVFS_NETNS); + bool ro = (mount_settings & MOUNT_APPLY_APIVFS_RO); + bool in_userns = (mount_settings & MOUNT_IN_USERNS); for (k = 0; k < ELEMENTSOF(mount_table); k++) { _cleanup_free_ char *where = NULL, *options = NULL; const char *o; + bool fatal = (mount_table[k].mount_settings & MOUNT_FATAL); + + if (in_userns != (bool)(mount_table[k].mount_settings & MOUNT_IN_USERNS)) + continue; - if (in_userns != mount_table[k].in_userns) + if (!netns && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_APIVFS_NETNS)) continue; - if (!use_netns && mount_table[k].use_netns) + if (!ro && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_APIVFS_RO)) continue; where = prefix_root(dest, mount_table[k].where); @@ -410,7 +424,7 @@ int mount_all(const char *dest, r = mkdir_userns_p(dest, where, 0755, in_userns, uid_shift); if (r < 0 && r != -EEXIST) { - if (mount_table[k].fatal) + if (fatal) return log_error_errno(r, "Failed to create directory %s: %m", where); log_debug_errno(r, "Failed to create directory %s: %m", where); @@ -429,13 +443,13 @@ int mount_all(const char *dest, o = options; } - r = mount_verbose(mount_table[k].fatal ? LOG_ERR : LOG_DEBUG, + r = mount_verbose(fatal ? LOG_ERR : LOG_DEBUG, mount_table[k].what, where, mount_table[k].type, mount_table[k].flags, o); - if (r < 0 && mount_table[k].fatal) + if (r < 0 && fatal) return r; } |