diff options
-rw-r--r-- | src/nspawn/nspawn-mount.c | 70 | ||||
-rw-r--r-- | src/nspawn/nspawn-mount.h | 13 | ||||
-rw-r--r-- | src/nspawn/nspawn.c | 53 | ||||
-rwxr-xr-x | test/TEST-13-NSPAWN-SMOKE/test.sh | 37 |
4 files changed, 126 insertions, 47 deletions
diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index 0c24b8e18a..95bb3c09b0 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -225,9 +225,10 @@ static int tmpfs_patch_options( return !!buf; } -int mount_sysfs(const char *dest) { +int mount_sysfs(const char *dest, MountSettingsMask mount_settings) { const char *full, *top, *x; int r; + unsigned long extra_flags = 0; top = prefix_roota(dest, "/sys"); r = path_check_fstype(top, SYSFS_MAGIC); @@ -244,8 +245,11 @@ int mount_sysfs(const char *dest) { (void) mkdir(full, 0755); + if (mount_settings & MOUNT_APPLY_APIVFS_RO) + extra_flags |= MS_RDONLY; + r = mount_verbose(LOG_ERR, "sysfs", full, "sysfs", - MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL); + MS_NOSUID|MS_NOEXEC|MS_NODEV|extra_flags, NULL); if (r < 0) return r; @@ -267,7 +271,7 @@ int mount_sysfs(const char *dest) { return r; r = mount_verbose(LOG_ERR, NULL, to, NULL, - MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL); + MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL); if (r < 0) return r; } @@ -291,7 +295,7 @@ int mount_sysfs(const char *dest) { } return mount_verbose(LOG_ERR, NULL, top, NULL, - MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL); + MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL); } static int mkdir_userns(const char *path, mode_t mode, bool in_userns, uid_t uid_shift) { @@ -348,8 +352,7 @@ static int mkdir_userns_p(const char *prefix, const char *path, mode_t mode, boo } int mount_all(const char *dest, - bool use_userns, bool in_userns, - bool use_netns, + MountSettingsMask mount_settings, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context) { @@ -359,41 +362,52 @@ int mount_all(const char *dest, const char *type; const char *options; unsigned long flags; - bool fatal; - bool in_userns; - bool use_netns; + MountSettingsMask mount_settings; } MountPoint; static const MountPoint mount_table[] = { - { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true, false }, - { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true, false }, /* Bind mount first ...*/ - { "/proc/sys/net", "/proc/sys/net", NULL, NULL, MS_BIND, true, true, true }, /* (except for this) */ - { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true, false }, /* ... then, make it r/o */ - { "/proc/sysrq-trigger", "/proc/sysrq-trigger", NULL, NULL, MS_BIND, false, true, false }, /* Bind mount first ...*/ - { NULL, "/proc/sysrq-trigger", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, true, false }, /* ... then, make it r/o */ - { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, true }, - { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, false }, - { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false, false }, - { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false }, - { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false }, - { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, true, false }, + /* inner child mounts */ + { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_FATAL|MOUNT_IN_USERNS }, + { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ...*/ + { "/proc/sys/net", "/proc/sys/net", NULL, NULL, MS_BIND, MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO|MOUNT_APPLY_APIVFS_NETNS }, /* (except for this) */ + { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* ... then, make it r/o */ + { "/proc/sysrq-trigger", "/proc/sysrq-trigger", NULL, NULL, MS_BIND, MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ...*/ + { NULL, "/proc/sysrq-trigger", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* ... then, make it r/o */ + { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, MOUNT_FATAL|MOUNT_IN_USERNS }, + + /* outer child mounts */ + { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS }, + { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_FATAL|MOUNT_APPLY_APIVFS_RO }, /* skipped if above was mounted */ + { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_FATAL }, /* skipped if above was mounted */ + + { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, MOUNT_FATAL }, + { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, MOUNT_FATAL }, + { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, MOUNT_FATAL }, #ifdef HAVE_SELINUX - { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false, false }, /* Bind mount first */ - { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false, false }, /* Then, make it r/o */ + { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, 0 }, /* Bind mount first */ + { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, 0 }, /* Then, make it r/o */ #endif }; unsigned k; int r; + bool use_userns = (mount_settings & MOUNT_USE_USERNS); + bool netns = (mount_settings & MOUNT_APPLY_APIVFS_NETNS); + bool ro = (mount_settings & MOUNT_APPLY_APIVFS_RO); + bool in_userns = (mount_settings & MOUNT_IN_USERNS); for (k = 0; k < ELEMENTSOF(mount_table); k++) { _cleanup_free_ char *where = NULL, *options = NULL; const char *o; + bool fatal = (mount_table[k].mount_settings & MOUNT_FATAL); + + if (in_userns != (bool)(mount_table[k].mount_settings & MOUNT_IN_USERNS)) + continue; - if (in_userns != mount_table[k].in_userns) + if (!netns && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_APIVFS_NETNS)) continue; - if (!use_netns && mount_table[k].use_netns) + if (!ro && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_APIVFS_RO)) continue; where = prefix_root(dest, mount_table[k].where); @@ -410,7 +424,7 @@ int mount_all(const char *dest, r = mkdir_userns_p(dest, where, 0755, in_userns, uid_shift); if (r < 0 && r != -EEXIST) { - if (mount_table[k].fatal) + if (fatal) return log_error_errno(r, "Failed to create directory %s: %m", where); log_debug_errno(r, "Failed to create directory %s: %m", where); @@ -429,13 +443,13 @@ int mount_all(const char *dest, o = options; } - r = mount_verbose(mount_table[k].fatal ? LOG_ERR : LOG_DEBUG, + r = mount_verbose(fatal ? LOG_ERR : LOG_DEBUG, mount_table[k].what, where, mount_table[k].type, mount_table[k].flags, o); - if (r < 0 && mount_table[k].fatal) + if (r < 0 && fatal) return r; } diff --git a/src/nspawn/nspawn-mount.h b/src/nspawn/nspawn-mount.h index 7307a838a5..74aee7ee7f 100644 --- a/src/nspawn/nspawn-mount.h +++ b/src/nspawn/nspawn-mount.h @@ -23,6 +23,15 @@ #include "cgroup-util.h" +typedef enum MountSettingsMask { + MOUNT_FATAL = 1 << 0, /* if set, a mount error is considered fatal */ + MOUNT_USE_USERNS = 1 << 1, /* if set, mounts are patched considering uid/gid shifts in a user namespace */ + MOUNT_IN_USERNS = 1 << 2, /* if set, the mount is executed in the inner child, otherwise in the outer child */ + MOUNT_APPLY_APIVFS_RO = 1 << 3, /* if set, /proc/sys, and /sysfs will be mounted read-only, otherwise read-write. */ + MOUNT_APPLY_APIVFS_NETNS = 1 << 4, /* if set, /proc/sys/net will be mounted read-write. + Works only if MOUNT_APPLY_APIVFS_RO is also set. */ +} MountSettingsMask; + typedef enum VolatileMode { VOLATILE_NO, VOLATILE_YES, @@ -57,8 +66,8 @@ int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s); int custom_mount_compare(const void *a, const void *b); -int mount_all(const char *dest, bool use_userns, bool in_userns, bool use_netns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); -int mount_sysfs(const char *dest); +int mount_all(const char *dest, MountSettingsMask mount_settings, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); +int mount_sysfs(const char *dest, MountSettingsMask mount_settings); int mount_cgroups(const char *dest, CGroupUnified unified_requested, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context, bool use_cgns); int mount_systemd_cgroup_writable(const char *dest, CGroupUnified unified_requested); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 50d8aa049c..a6adbbe879 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -197,6 +197,7 @@ static const char *arg_container_service_name = "systemd-nspawn"; static bool arg_notify_ready = false; static bool arg_use_cgns = true; static unsigned long arg_clone_ns_flags = CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS; +static MountSettingsMask arg_mount_settings = MOUNT_APPLY_APIVFS_RO; static void help(void) { printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n" @@ -380,6 +381,31 @@ static void parse_share_ns_env(const char *name, unsigned long ns_flag) { arg_clone_ns_flags = (arg_clone_ns_flags & ~ns_flag) | (r > 0 ? 0 : ns_flag); } +static void parse_mount_settings_env(void) { + int r; + const char *e; + + e = getenv("SYSTEMD_NSPAWN_API_VFS_WRITABLE"); + if (!e) + return; + + if (streq(e, "network")) { + arg_mount_settings |= MOUNT_APPLY_APIVFS_RO|MOUNT_APPLY_APIVFS_NETNS; + return; + } + + r = parse_boolean(e); + if (r < 0) { + log_warning_errno(r, "Failed to parse SYSTEMD_NSPAWN_API_VFS_WRITABLE from environment, ignoring."); + return; + } else if (r > 0) + arg_mount_settings &= ~MOUNT_APPLY_APIVFS_RO; + else + arg_mount_settings |= MOUNT_APPLY_APIVFS_RO; + + arg_mount_settings &= ~MOUNT_APPLY_APIVFS_NETNS; +} + static int parse_argv(int argc, char *argv[]) { enum { @@ -1072,6 +1098,14 @@ static int parse_argv(int argc, char *argv[]) { parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_UTS", CLONE_NEWUTS); parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_SYSTEM", CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS); + if (arg_userns_mode != USER_NAMESPACE_NO) + arg_mount_settings |= MOUNT_USE_USERNS; + + if (arg_private_network) + arg_mount_settings |= MOUNT_APPLY_APIVFS_NETNS; + + parse_mount_settings_env(); + if (!(arg_clone_ns_flags & CLONE_NEWPID) || !(arg_clone_ns_flags & CLONE_NEWUTS)) { arg_register = false; @@ -1166,6 +1200,15 @@ static int parse_argv(int argc, char *argv[]) { } static int verify_arguments(void) { + if (arg_userns_mode != USER_NAMESPACE_NO && (arg_mount_settings & MOUNT_APPLY_APIVFS_NETNS) && !arg_private_network) { + log_error("Invalid namespacing settings. Mounting sysfs with --private-users requires --private-network."); + return -EINVAL; + } + + if (arg_userns_mode != USER_NAMESPACE_NO && !(arg_mount_settings & MOUNT_APPLY_APIVFS_RO)) { + log_error("Cannot combine --private-users with read-write mounts."); + return -EINVAL; + } if (arg_volatile_mode != VOLATILE_NO && arg_read_only) { log_error("Cannot combine --read-only with --volatile. Note that --volatile already implies a read-only base hierarchy."); @@ -2702,9 +2745,7 @@ static int inner_child( return log_error_errno(r, "Couldn't become new root: %m"); r = mount_all(NULL, - arg_userns_mode != USER_NAMESPACE_NO, - true, - arg_private_network, + arg_mount_settings | MOUNT_IN_USERNS, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context); @@ -2712,7 +2753,7 @@ static int inner_child( if (r < 0) return r; - r = mount_sysfs(NULL); + r = mount_sysfs(NULL, arg_mount_settings); if (r < 0) return r; @@ -3079,9 +3120,7 @@ static int outer_child( } r = mount_all(directory, - arg_userns_mode != USER_NAMESPACE_NO, - false, - arg_private_network, + arg_mount_settings, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context); diff --git a/test/TEST-13-NSPAWN-SMOKE/test.sh b/test/TEST-13-NSPAWN-SMOKE/test.sh index e6977a7f1c..305866ae38 100755 --- a/test/TEST-13-NSPAWN-SMOKE/test.sh +++ b/test/TEST-13-NSPAWN-SMOKE/test.sh @@ -39,7 +39,7 @@ test_setup() { eval $(udevadm info --export --query=env --name=${LOOPDEV}p2) setup_basic_environment - dracut_install busybox chmod rmdir + dracut_install busybox chmod rmdir unshare cp create-busybox-container $initdir/ @@ -78,6 +78,11 @@ if [[ -f /proc/1/ns/cgroup ]]; then is_cgns_supported=yes fi +is_user_ns_supported=no +if unshare -U sh -c :; then + is_user_ns_supported=yes +fi + function run { if [[ "$1" = "yes" && "$is_v2_supported" = "no" ]]; then printf "Unified cgroup hierarchy is not supported. Skipping.\n" >&2 @@ -88,20 +93,32 @@ function run { return 0 fi - local _root="/var/lib/machines/unified-$1-cgns-$2" + local _root="/var/lib/machines/unified-$1-cgns-$2-api-vfs-writable-$3" /create-busybox-container "$_root" - UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" systemd-nspawn --register=no -D "$_root" -b - UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" systemd-nspawn --register=no -D "$_root" --private-network -b - UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" systemd-nspawn --register=no -D "$_root" -U -b - UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" systemd-nspawn --register=no -D "$_root" --private-network -U -b + UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" -b + UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" --private-network -b + + if UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" -U -b; then + [[ "$is_user_ns_supported" = "yes" && "$3" = "network" ]] && return 1 + else + [[ "$is_user_ns_supported" = "no" && "$3" = "network" ]] && return 1 + fi + + if UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" SYSTEMD_NSPAWN_API_VFS_WRITABLE="$3" systemd-nspawn --register=no -D "$_root" --private-network -U -b; then + [[ "$is_user_ns_supported" = "yes" && "$3" = "yes" ]] && return 1 + else + [[ "$is_user_ns_supported" = "no" && "$3" = "yes" ]] && return 1 + fi return 0 } -run no no -run yes no -run no yes -run yes yes +for api_vfs_writable in yes no network; do + run no no $api_vfs_writable + run yes no $api_vfs_writable + run no yes $api_vfs_writable + run yes yes $api_vfs_writable +done touch /testok EOF |