From 548bd57376f7eb82cc792f0476688ccc9843962a Mon Sep 17 00:00:00 2001 From: Evgeny Vereshchagin Date: Thu, 20 Oct 2016 09:01:45 +0000 Subject: basic: fallback to the fstat if we don't have access to the /proc/self/fdinfo https://github.com/systemd/systemd/pull/4372#discussion_r83354107: I get `open("/proc/self/fdinfo/13", O_RDONLY|O_CLOEXEC) = -1 EACCES (Permission denied)` 327 mkdir("/proc", 0755 327 <... mkdir resumed> ) = -1 EEXIST (File exists) 327 stat("/proc", 327 <... stat resumed> {st_dev=makedev(8, 1), st_ino=28585, st_mode=S_IFDIR|0755, st_nlink=2, st_uid=0, st_gid=0, st_blksize=1024, st_blocks=4, st_size=1024, st_atime=2016/10/14-02:55:32, st_mtime=2016/ 327 mount("proc", "/proc", "proc", MS_NOSUID|MS_NODEV|MS_NOEXEC, NULL 327 <... mount resumed> ) = 0 327 lstat("/proc", 327 <... lstat resumed> {st_dev=makedev(0, 34), st_ino=1, st_mode=S_IFDIR|0555, st_nlink=75, st_uid=65534, st_gid=65534, st_blksize=1024, st_blocks=0, st_size=0, st_atime=2016/10/14-03:13:35.971031263, 327 lstat("/proc/sys", {st_dev=makedev(0, 34), st_ino=4026531855, st_mode=S_IFDIR|0555, st_nlink=1, st_uid=65534, st_gid=65534, st_blksize=1024, st_blocks=0, st_size=0, st_atime=2016/10/14-03:13:39.1630 327 openat(AT_FDCWD, "/proc", O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_PATH) = 11 327 name_to_handle_at(11, "sys", {handle_bytes=128}, 0x7ffe3a238604, AT_SYMLINK_FOLLOW) = -1 EOPNOTSUPP (Operation not supported) 327 name_to_handle_at(11, "", {handle_bytes=128}, 0x7ffe3a238608, AT_EMPTY_PATH) = -1 EOPNOTSUPP (Operation not supported) 327 openat(11, "sys", O_RDONLY|O_CLOEXEC|O_PATH) = 13 327 open("/proc/self/fdinfo/13", O_RDONLY|O_CLOEXEC) = -1 EACCES (Permission denied) 327 close(13 327 <... close resumed> ) = 0 327 close(11 327 <... close resumed> ) = 0 -bash-4.3# ls -ld /proc/ dr-xr-xr-x 76 65534 65534 0 Oct 14 02:57 /proc/ -bash-4.3# ls -ld /proc/1 dr-xr-xr-x 9 root root 0 Oct 14 02:57 /proc/1 -bash-4.3# ls -ld /proc/1/fdinfo dr-x------ 2 65534 65534 0 Oct 14 03:00 /proc/1/fdinfo --- src/basic/mount-util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/basic/mount-util.c b/src/basic/mount-util.c index 0ef00676ef..2985cc475a 100644 --- a/src/basic/mount-util.c +++ b/src/basic/mount-util.c @@ -162,7 +162,7 @@ int fd_is_mount_point(int fd, const char *filename, int flags) { fallback_fdinfo: r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id); - if (r == -EOPNOTSUPP) + if (IN_SET(r, -EOPNOTSUPP, -EACCES)) goto fallback_fstat; if (r < 0) return r; -- cgit v1.2.3-54-g00ecf From 63eae72312b6b8df4c7186233994a65d747229a7 Mon Sep 17 00:00:00 2001 From: Evgeny Vereshchagin Date: Thu, 20 Oct 2016 09:03:40 +0000 Subject: nspawn: really lchown(uid/gid) https://github.com/systemd/systemd/pull/4372#issuecomment-253723849: * `mount_all (outer_child)` creates `container_dir/sys/fs/selinux` * `mount_all (outer_child)` doesn't patch `container_dir/sys/fs` and so on. * `mount_sysfs (inner_child)` tries to create `/sys/fs/cgroup` * This fails 370 stat("/sys/fs", {st_dev=makedev(0, 28), st_ino=13880, st_mode=S_IFDIR|0755, st_nlink=3, st_uid=65534, st_gid=65534, st_blksize=4096, st_blocks=0, st_size=60, st_atime=2016/10/14-05:16:43.398665943, st_mtime=2016/10/14-05:16:43.399665943, st_ctime=2016/10/14-05:16:43.399665943}) = 0 370 mkdir("/sys/fs/cgroup", 0755) = -1 EACCES (Permission denied) * `mount_syfs (inner_child)` ignores that error and mount(NULL, "/sys", NULL, MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_REMOUNT|MS_BIND, NULL) = 0 * `mount_cgroups` finally fails --- src/nspawn/nspawn-mount.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index 44dc9bfcf4..115de64cf9 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -300,6 +300,59 @@ int mount_sysfs(const char *dest) { MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL); } +static int mkdir_userns(const char *path, mode_t mode, bool in_userns, uid_t uid_shift) { + int r; + + assert(path); + + r = mkdir(path, mode); + if (r < 0 && errno != EEXIST) + return -errno; + + if (!in_userns) { + r = lchown(path, uid_shift, uid_shift); + if (r < 0) + return -errno; + } + + return 0; +} + +static int mkdir_userns_p(const char *prefix, const char *path, mode_t mode, bool in_userns, uid_t uid_shift) { + const char *p, *e; + int r; + + assert(path); + + if (prefix && !path_startswith(path, prefix)) + return -ENOTDIR; + + /* create every parent directory in the path, except the last component */ + p = path + strspn(path, "/"); + for (;;) { + char t[strlen(path) + 1]; + + e = p + strcspn(p, "/"); + p = e + strspn(e, "/"); + + /* Is this the last component? If so, then we're done */ + if (*p == 0) + break; + + memcpy(t, path, e - path); + t[e-path] = 0; + + if (prefix && path_startswith(prefix, t)) + continue; + + r = mkdir_userns(t, mode, in_userns, uid_shift); + if (r < 0) + return r; + } + + return mkdir_userns(path, mode, in_userns, uid_shift); +} + int mount_all(const char *dest, bool use_userns, bool in_userns, bool use_netns, @@ -361,7 +414,7 @@ int mount_all(const char *dest, if (mount_table[k].what && r > 0) continue; - r = mkdir_p(where, 0755); + r = mkdir_userns_p(dest, where, 0755, in_userns, uid_shift); if (r < 0 && r != -EEXIST) { if (mount_table[k].fatal) return log_error_errno(r, "Failed to create directory %s: %m", where); -- cgit v1.2.3-54-g00ecf From 6d66bd3b2a7ebe99aa7fcd06df9bc05b178a142a Mon Sep 17 00:00:00 2001 From: Evgeny Vereshchagin Date: Thu, 20 Oct 2016 09:05:46 +0000 Subject: nspawn: become a new root early https://github.com/torvalds/linux/commit/036d523641c66bef713042894a17f4335f199e49 > vfs: Don't create inodes with a uid or gid unknown to the vfs It is expected that filesystems can not represent uids and gids from outside of their user namespace. Keep things simple by not even trying to create filesystem nodes with non-sense uids and gids. So, we actually should `reset_uid_gid` early to prevent https://github.com/systemd/systemd/pull/4223#issuecomment-252522955 $ sudo UNIFIED_CGROUP_HIERARCHY=no LD_LIBRARY_PATH=.libs .libs/systemd-nspawn -D /var/lib/machines/fedora-rawhide -U -b systemd.unit=multi-user.target Spawning container fedora-rawhide on /var/lib/machines/fedora-rawhide. Press ^] three times within 1s to kill container. Child died too early. Selected user namespace base 1073283072 and range 65536. Failed to mount to /sys/fs/cgroup/systemd: No such file or directory Details: https://github.com/systemd/systemd/pull/4223#issuecomment-253046519 Fixes: #4352 --- src/nspawn/nspawn.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 2cbe563953..295293858e 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -2684,6 +2684,10 @@ static int inner_child( } } + r = reset_uid_gid(); + if (r < 0) + return log_error_errno(r, "Couldn't become new root: %m"); + r = mount_all(NULL, arg_userns_mode != USER_NAMESPACE_NO, true, @@ -2726,10 +2730,6 @@ static int inner_child( return r; } - r = reset_uid_gid(); - if (r < 0) - return log_error_errno(r, "Couldn't become new root: %m"); - r = setup_boot_id(NULL); if (r < 0) return r; -- cgit v1.2.3-54-g00ecf From 844da987ef8b8c98f837d3328eeb3ed481f43835 Mon Sep 17 00:00:00 2001 From: Evgeny Vereshchagin Date: Thu, 20 Oct 2016 09:16:23 +0000 Subject: tests/TEST-13-NSPAWN-SMOKE: remove an expected failure check https://github.com/systemd/systemd/issues/4352 has been fixed So, we don't need this workaround anymore --- Makefile.am | 1 - test/TEST-13-NSPAWN-SMOKE/Makefile | 4 +- test/TEST-13-NSPAWN-SMOKE/has-overflow.c | 143 ------------------------------- test/TEST-13-NSPAWN-SMOKE/test.sh | 19 +--- 4 files changed, 4 insertions(+), 163 deletions(-) delete mode 100644 test/TEST-13-NSPAWN-SMOKE/has-overflow.c diff --git a/Makefile.am b/Makefile.am index 00124a29f8..b68f380e49 100644 --- a/Makefile.am +++ b/Makefile.am @@ -6031,7 +6031,6 @@ EXTRA_DIST += \ test/TEST-12-ISSUE-3171/Makefile \ test/TEST-12-ISSUE-3171/test.sh \ test/TEST-13-NSPAWN-SMOKE/Makefile \ - test/TEST-13-NSPAWN-SMOKE/has-overflow.c \ test/TEST-13-NSPAWN-SMOKE/create-busybox-container \ test/TEST-13-NSPAWN-SMOKE/test.sh \ test/test-functions diff --git a/test/TEST-13-NSPAWN-SMOKE/Makefile b/test/TEST-13-NSPAWN-SMOKE/Makefile index 2ca5b12cf3..ff1470f852 100644 --- a/test/TEST-13-NSPAWN-SMOKE/Makefile +++ b/test/TEST-13-NSPAWN-SMOKE/Makefile @@ -1,7 +1,7 @@ -all: has-overflow +all: @make -s --no-print-directory -C ../.. all @basedir=../.. TEST_BASE_DIR=../ ./test.sh --all -setup: has-overflow +setup: @make --no-print-directory -C ../.. all @basedir=../.. TEST_BASE_DIR=../ ./test.sh --setup clean: diff --git a/test/TEST-13-NSPAWN-SMOKE/has-overflow.c b/test/TEST-13-NSPAWN-SMOKE/has-overflow.c deleted file mode 100644 index 1b3331fad7..0000000000 --- a/test/TEST-13-NSPAWN-SMOKE/has-overflow.c +++ /dev/null @@ -1,143 +0,0 @@ -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0) - -struct child_args { - int pipe_fd[2]; /* Pipe used to synchronize parent and child */ -}; - -static void usage(char *pname) { - fprintf(stderr, "Options can be:\n"); - fprintf(stderr, "\t-M uid_map Specify UID map for user namespace\n"); - fprintf(stderr, "\t-G gid_map Specify GID map for user namespace\n"); - - exit(EXIT_FAILURE); -} - -static void update_map(char *mapping, char *map_file) { - int fd, j; - size_t map_len; - - map_len = strlen(mapping); - - fd = open(map_file, O_RDWR); - if (fd == -1) { - fprintf(stderr, "ERROR: open %s: %s\n", map_file, strerror(errno)); - exit(EXIT_FAILURE); - } - - if (write(fd, mapping, map_len) != map_len) { - fprintf(stderr, "ERROR: write %s: %s\n", map_file, strerror(errno)); - exit(EXIT_FAILURE); - } - - close(fd); -} - -static void proc_setgroups_write(pid_t child_pid, char *str) { - char setgroups_path[PATH_MAX]; - int fd; - - snprintf(setgroups_path, PATH_MAX, "/proc/%ld/setgroups", (long) child_pid); - - fd = open(setgroups_path, O_RDWR); - if (fd == -1) { - if (errno != ENOENT) - fprintf(stderr, "ERROR: open %s: %s\n", setgroups_path, strerror(errno)); - return; - } - - if (write(fd, str, strlen(str)) == -1) - fprintf(stderr, "ERROR: write %s: %s\n", setgroups_path, strerror(errno)); - - close(fd); -} - -static int child_func(void *arg) { - struct child_args *args = (struct child_args *) arg; - char ch; - - close(args->pipe_fd[1]); - if (read(args->pipe_fd[0], &ch, 1) != 0) { - fprintf(stderr, "Failure in child: read from pipe returned != 0\n"); - exit(EXIT_FAILURE); - } - - mount("tmpfs", "/tmp", "tmpfs", MS_MGC_VAL, "mode=777,uid=0,gid=0"); - if (mkdir("/tmp/hey", 0777) < 0) - exit(EXIT_FAILURE); - - exit(EXIT_SUCCESS); -} - -#define STACK_SIZE (1024 * 1024) - -static char child_stack[STACK_SIZE]; - -int main(int argc, char *argv[]) { - int flags, opt; - pid_t child_pid; - struct child_args args; - char *uid_map, *gid_map; - const int MAP_BUF_SIZE = 100; - char map_buf[MAP_BUF_SIZE]; - char map_path[PATH_MAX]; - int status; - - flags = 0; - gid_map = NULL; - uid_map = NULL; - while ((opt = getopt(argc, argv, "+M:G:")) != -1) { - switch (opt) { - case 'M': - uid_map = optarg; - break; - case 'G': - gid_map = optarg; - break; - default: - usage(argv[0]); - } - } - - if (!uid_map || !gid_map) - usage(argv[0]); - - flags |= CLONE_NEWNS; - flags |= CLONE_NEWUSER; - - if (pipe(args.pipe_fd) == -1) - errExit("pipe"); - - child_pid = clone(child_func, child_stack + STACK_SIZE, flags | SIGCHLD, &args); - if (child_pid == -1) - errExit("clone"); - - snprintf(map_path, PATH_MAX, "/proc/%ld/uid_map", (long) child_pid); - update_map(uid_map, map_path); - - proc_setgroups_write(child_pid, "allow"); - snprintf(map_path, PATH_MAX, "/proc/%ld/gid_map", (long) child_pid); - update_map(gid_map, map_path); - - close(args.pipe_fd[1]); - - if (waitpid(child_pid, &status, 0) == -1) - errExit("waitpid"); - - exit(WIFEXITED(status) && WEXITSTATUS(status) == EXIT_SUCCESS ? EXIT_FAILURE : EXIT_SUCCESS); -} diff --git a/test/TEST-13-NSPAWN-SMOKE/test.sh b/test/TEST-13-NSPAWN-SMOKE/test.sh index dfc437c0ee..e6977a7f1c 100755 --- a/test/TEST-13-NSPAWN-SMOKE/test.sh +++ b/test/TEST-13-NSPAWN-SMOKE/test.sh @@ -40,7 +40,6 @@ test_setup() { setup_basic_environment dracut_install busybox chmod rmdir - dracut_install ./has-overflow cp create-busybox-container $initdir/ @@ -93,22 +92,8 @@ function run { /create-busybox-container "$_root" UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" systemd-nspawn --register=no -D "$_root" -b UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" systemd-nspawn --register=no -D "$_root" --private-network -b - - if ! UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" systemd-nspawn --register=no -D "$_root" -U -b; then - if [[ "$1" = "no" && "$2" = "yes" ]] && /has-overflow -M '0 1073283072 65536' -G '0 1073283072 65536'; then - printf "Failure expected, ignoring (see https://github.com/systemd/systemd/issues/4352)\n" >&2 - else - return 1 - fi - fi - - if ! UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" systemd-nspawn --register=no -D "$_root" --private-network -U -b; then - if [[ "$1" = "no" && "$2" = "yes" ]] && /has-overflow -M '0 1073283072 65536' -G '0 1073283072 65536'; then - printf "Failure expected, ignoring (see https://github.com/systemd/systemd/issues/4352)\n" >&2 - else - return 1 - fi - fi + UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" systemd-nspawn --register=no -D "$_root" -U -b + UNIFIED_CGROUP_HIERARCHY="$1" SYSTEMD_NSPAWN_USE_CGNS="$2" systemd-nspawn --register=no -D "$_root" --private-network -U -b return 0 } -- cgit v1.2.3-54-g00ecf