summaryrefslogtreecommitdiff
path: root/src/nspawn/nspawn-mount.c
diff options
context:
space:
mode:
authorSergiusz Urbaniak <sergiusz.urbaniak@gmail.com>2016-10-14 14:00:15 +0200
committerSergiusz Urbaniak <sergiusz.urbaniak@gmail.com>2016-11-18 09:50:40 +0100
commit4f086aab52812472a24c9b8b627589880a38696e (patch)
tree22fd04bdbb8346cbfda4201627eabf57c5b9ce51 /src/nspawn/nspawn-mount.c
parent843d5baf6aad6c53fc00ea8d95d83209a4f92de1 (diff)
nspawn: R/W support for /sys, and /proc/sys
This commit adds the possibility to leave /sys, and /proc/sys read-write. It introduces a new (undocumented) env var SYSTEMD_NSPAWN_API_VFS_WRITABLE to enable this feature. If set to "yes", /sys, and /proc/sys will be read-write. If set to "no", /sys, and /proc/sys will be read-only. If set to "network" /proc/sys/net will be read-write. This is useful in use-cases, where systemd-nspawn is used in an external network namespace. This adds the possibility to start privileged containers which need more control over settings in the /proc, and /sys filesystem. This is also a follow-up on the discussion from https://github.com/systemd/systemd/pull/4018#r76971862 where an introduction of a simple env var to enable R/W support for those directories was already discussed.
Diffstat (limited to 'src/nspawn/nspawn-mount.c')
-rw-r--r--src/nspawn/nspawn-mount.c70
1 files changed, 42 insertions, 28 deletions
diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c
index 0c24b8e18a..95bb3c09b0 100644
--- a/src/nspawn/nspawn-mount.c
+++ b/src/nspawn/nspawn-mount.c
@@ -225,9 +225,10 @@ static int tmpfs_patch_options(
return !!buf;
}
-int mount_sysfs(const char *dest) {
+int mount_sysfs(const char *dest, MountSettingsMask mount_settings) {
const char *full, *top, *x;
int r;
+ unsigned long extra_flags = 0;
top = prefix_roota(dest, "/sys");
r = path_check_fstype(top, SYSFS_MAGIC);
@@ -244,8 +245,11 @@ int mount_sysfs(const char *dest) {
(void) mkdir(full, 0755);
+ if (mount_settings & MOUNT_APPLY_APIVFS_RO)
+ extra_flags |= MS_RDONLY;
+
r = mount_verbose(LOG_ERR, "sysfs", full, "sysfs",
- MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+ MS_NOSUID|MS_NOEXEC|MS_NODEV|extra_flags, NULL);
if (r < 0)
return r;
@@ -267,7 +271,7 @@ int mount_sysfs(const char *dest) {
return r;
r = mount_verbose(LOG_ERR, NULL, to, NULL,
- MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL);
+ MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL);
if (r < 0)
return r;
}
@@ -291,7 +295,7 @@ int mount_sysfs(const char *dest) {
}
return mount_verbose(LOG_ERR, NULL, top, NULL,
- MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL);
+ MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL);
}
static int mkdir_userns(const char *path, mode_t mode, bool in_userns, uid_t uid_shift) {
@@ -348,8 +352,7 @@ static int mkdir_userns_p(const char *prefix, const char *path, mode_t mode, boo
}
int mount_all(const char *dest,
- bool use_userns, bool in_userns,
- bool use_netns,
+ MountSettingsMask mount_settings,
uid_t uid_shift, uid_t uid_range,
const char *selinux_apifs_context) {
@@ -359,41 +362,52 @@ int mount_all(const char *dest,
const char *type;
const char *options;
unsigned long flags;
- bool fatal;
- bool in_userns;
- bool use_netns;
+ MountSettingsMask mount_settings;
} MountPoint;
static const MountPoint mount_table[] = {
- { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true, false },
- { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true, false }, /* Bind mount first ...*/
- { "/proc/sys/net", "/proc/sys/net", NULL, NULL, MS_BIND, true, true, true }, /* (except for this) */
- { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true, false }, /* ... then, make it r/o */
- { "/proc/sysrq-trigger", "/proc/sysrq-trigger", NULL, NULL, MS_BIND, false, true, false }, /* Bind mount first ...*/
- { NULL, "/proc/sysrq-trigger", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, true, false }, /* ... then, make it r/o */
- { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, true },
- { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, false },
- { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false, false },
- { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
- { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
- { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, true, false },
+ /* inner child mounts */
+ { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_FATAL|MOUNT_IN_USERNS },
+ { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ...*/
+ { "/proc/sys/net", "/proc/sys/net", NULL, NULL, MS_BIND, MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO|MOUNT_APPLY_APIVFS_NETNS }, /* (except for this) */
+ { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* ... then, make it r/o */
+ { "/proc/sysrq-trigger", "/proc/sysrq-trigger", NULL, NULL, MS_BIND, MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ...*/
+ { NULL, "/proc/sysrq-trigger", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* ... then, make it r/o */
+ { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, MOUNT_FATAL|MOUNT_IN_USERNS },
+
+ /* outer child mounts */
+ { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS },
+ { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_FATAL|MOUNT_APPLY_APIVFS_RO }, /* skipped if above was mounted */
+ { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_FATAL }, /* skipped if above was mounted */
+
+ { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, MOUNT_FATAL },
+ { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, MOUNT_FATAL },
+ { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, MOUNT_FATAL },
#ifdef HAVE_SELINUX
- { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false, false }, /* Bind mount first */
- { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false, false }, /* Then, make it r/o */
+ { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, 0 }, /* Bind mount first */
+ { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, 0 }, /* Then, make it r/o */
#endif
};
unsigned k;
int r;
+ bool use_userns = (mount_settings & MOUNT_USE_USERNS);
+ bool netns = (mount_settings & MOUNT_APPLY_APIVFS_NETNS);
+ bool ro = (mount_settings & MOUNT_APPLY_APIVFS_RO);
+ bool in_userns = (mount_settings & MOUNT_IN_USERNS);
for (k = 0; k < ELEMENTSOF(mount_table); k++) {
_cleanup_free_ char *where = NULL, *options = NULL;
const char *o;
+ bool fatal = (mount_table[k].mount_settings & MOUNT_FATAL);
+
+ if (in_userns != (bool)(mount_table[k].mount_settings & MOUNT_IN_USERNS))
+ continue;
- if (in_userns != mount_table[k].in_userns)
+ if (!netns && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_APIVFS_NETNS))
continue;
- if (!use_netns && mount_table[k].use_netns)
+ if (!ro && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_APIVFS_RO))
continue;
where = prefix_root(dest, mount_table[k].where);
@@ -410,7 +424,7 @@ int mount_all(const char *dest,
r = mkdir_userns_p(dest, where, 0755, in_userns, uid_shift);
if (r < 0 && r != -EEXIST) {
- if (mount_table[k].fatal)
+ if (fatal)
return log_error_errno(r, "Failed to create directory %s: %m", where);
log_debug_errno(r, "Failed to create directory %s: %m", where);
@@ -429,13 +443,13 @@ int mount_all(const char *dest,
o = options;
}
- r = mount_verbose(mount_table[k].fatal ? LOG_ERR : LOG_DEBUG,
+ r = mount_verbose(fatal ? LOG_ERR : LOG_DEBUG,
mount_table[k].what,
where,
mount_table[k].type,
mount_table[k].flags,
o);
- if (r < 0 && mount_table[k].fatal)
+ if (r < 0 && fatal)
return r;
}