From be39ccf3a0d4d15324af1de4d8552a1d65f40808 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 25 Aug 2016 10:24:10 +0200 Subject: execute: move suppression of HOME=/ and SHELL=/bin/nologin into user-util.c This adds a new call get_user_creds_clean(), which is just like get_user_creds() but returns NULL in the home/shell parameters if they contain no useful information. This code previously lived in execute.c, but by generalizing this we can reuse it in run.c. --- src/basic/user-util.c | 32 +++++++++++++++++++++++++++++++- src/basic/user-util.h | 1 + 2 files changed, 32 insertions(+), 1 deletion(-) (limited to 'src/basic') diff --git a/src/basic/user-util.c b/src/basic/user-util.c index 122d9a0c7c..0522bce1d1 100644 --- a/src/basic/user-util.c +++ b/src/basic/user-util.c @@ -31,14 +31,15 @@ #include #include -#include "missing.h" #include "alloc-util.h" #include "fd-util.h" #include "formats-util.h" #include "macro.h" +#include "missing.h" #include "parse-util.h" #include "path-util.h" #include "string-util.h" +#include "strv.h" #include "user-util.h" #include "utf8.h" @@ -175,6 +176,35 @@ int get_user_creds( return 0; } +int get_user_creds_clean( + const char **username, + uid_t *uid, gid_t *gid, + const char **home, + const char **shell) { + + int r; + + /* Like get_user_creds(), but resets home/shell to NULL if they don't contain anything relevant. */ + + r = get_user_creds(username, uid, gid, home, shell); + if (r < 0) + return r; + + if (shell && + (isempty(*shell) || PATH_IN_SET(*shell, + "/bin/nologin", + "/sbin/nologin", + "/usr/bin/nologin", + "/usr/sbin/nologin"))) + *shell = NULL; + + if (home && + (isempty(*home) || path_equal(*home, "/"))) + *home = NULL; + + return 0; +} + int get_group_creds(const char **groupname, gid_t *gid) { struct group *g; gid_t id; diff --git a/src/basic/user-util.h b/src/basic/user-util.h index f569363811..6c61f63cae 100644 --- a/src/basic/user-util.h +++ b/src/basic/user-util.h @@ -40,6 +40,7 @@ char* getlogname_malloc(void); char* getusername_malloc(void); int get_user_creds(const char **username, uid_t *uid, gid_t *gid, const char **home, const char **shell); +int get_user_creds_clean(const char **username, uid_t *uid, gid_t *gid, const char **home, const char **shell); int get_group_creds(const char **groupname, gid_t *gid); char* uid_to_name(uid_t uid); -- cgit v1.2.3-54-g00ecf From 6b7c9f8bce4679c89f3b89cacfd4932c0aeadad4 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sun, 25 Sep 2016 10:40:51 +0200 Subject: namespace: rework how ReadWritePaths= is applied Previously, if ReadWritePaths= was nested inside a ReadOnlyPaths= specification, then we'd first recursively apply the ReadOnlyPaths= paths, and make everything below read-only, only in order to then flip the read-only bit again for the subdirs listed in ReadWritePaths= below it. This is not only ugly (as for the dirs in question we first turn on the RO bit, only to turn it off again immediately after), but also problematic in containers, where a container manager might have marked a set of dirs read-only and this code will undo this is ReadWritePaths= is set for any. With this patch behaviour in this regard is altered: ReadOnlyPaths= will not be applied to the children listed in ReadWritePaths= in the first place, so that we do not need to turn off the RO bit for those after all. This means that ReadWritePaths=/ReadOnlyPaths= may only be used to turn on the RO bit, but never to turn it off again. Or to say this differently: if some dirs are marked read-only via some external tool, then ReadWritePaths= will not undo it. This is not only the safer option, but also more in-line with what the man page currently claims: "Entries (files or directories) listed in ReadWritePaths= are accessible from within the namespace with the same access rights as from outside." To implement this change bind_remount_recursive() gained a new "blacklist" string list parameter, which when passed may contain subdirs that shall be excluded from the read-only mounting. A number of functions are updated to add more debug logging to make this more digestable. --- src/basic/mount-util.c | 71 ++++++++++++++++++++++++++++++++--------------- src/basic/mount-util.h | 2 +- src/core/namespace.c | 66 ++++++++++++++++++++++++++++--------------- src/nspawn/nspawn-mount.c | 6 ++-- src/nspawn/nspawn.c | 2 +- 5 files changed, 96 insertions(+), 51 deletions(-) (limited to 'src/basic') diff --git a/src/basic/mount-util.c b/src/basic/mount-util.c index bfa04394fe..b9affb4e70 100644 --- a/src/basic/mount-util.c +++ b/src/basic/mount-util.c @@ -36,6 +36,7 @@ #include "set.h" #include "stdio-util.h" #include "string-util.h" +#include "strv.h" static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) { char path[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)]; @@ -287,10 +288,12 @@ int umount_recursive(const char *prefix, int flags) { continue; if (umount2(p, flags) < 0) { - r = -errno; + r = log_debug_errno(errno, "Failed to umount %s: %m", p); continue; } + log_debug("Successfully unmounted %s", p); + again = true; n++; @@ -311,24 +314,21 @@ static int get_mount_flags(const char *path, unsigned long *flags) { return 0; } -int bind_remount_recursive(const char *prefix, bool ro) { +int bind_remount_recursive(const char *prefix, bool ro, char **blacklist) { _cleanup_set_free_free_ Set *done = NULL; _cleanup_free_ char *cleaned = NULL; int r; - /* Recursively remount a directory (and all its submounts) - * read-only or read-write. If the directory is already - * mounted, we reuse the mount and simply mark it - * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write - * operation). If it isn't we first make it one. Afterwards we - * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all - * submounts we can access, too. When mounts are stacked on - * the same mount point we only care for each individual - * "top-level" mount on each point, as we cannot - * influence/access the underlying mounts anyway. We do not - * have any effect on future submounts that might get - * propagated, they migt be writable. This includes future - * submounts that have been triggered via autofs. */ + /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already + * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write + * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to + * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each + * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We + * do not have any effect on future submounts that might get propagated, they migt be writable. This includes + * future submounts that have been triggered via autofs. + * + * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the + * remount operation. Note that we'll ignore the blacklist for the top-level path. */ cleaned = strdup(prefix); if (!cleaned) @@ -385,6 +385,33 @@ int bind_remount_recursive(const char *prefix, bool ro) { if (r < 0) return r; + if (!path_startswith(p, cleaned)) + continue; + + /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount we shall + * operate on. */ + if (!path_equal(cleaned, p)) { + bool blacklisted = false; + char **i; + + STRV_FOREACH(i, blacklist) { + + if (path_equal(*i, cleaned)) + continue; + + if (!path_startswith(*i, cleaned)) + continue; + + if (path_startswith(p, *i)) { + blacklisted = true; + log_debug("Not remounting %s, because blacklisted by %s, called for %s", p, *i, cleaned); + break; + } + } + if (blacklisted) + continue; + } + /* Let's ignore autofs mounts. If they aren't * triggered yet, we want to avoid triggering * them, as we don't make any guarantees for @@ -396,12 +423,9 @@ int bind_remount_recursive(const char *prefix, bool ro) { continue; } - if (path_startswith(p, cleaned) && - !set_contains(done, p)) { - + if (!set_contains(done, p)) { r = set_consume(todo, p); p = NULL; - if (r == -EEXIST) continue; if (r < 0) @@ -418,8 +442,7 @@ int bind_remount_recursive(const char *prefix, bool ro) { if (!set_contains(done, cleaned) && !set_contains(todo, cleaned)) { - /* The prefix directory itself is not yet a - * mount, make it one. */ + /* The prefix directory itself is not yet a mount, make it one. */ if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0) return -errno; @@ -430,6 +453,8 @@ int bind_remount_recursive(const char *prefix, bool ro) { if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) return -errno; + log_debug("Made top-level directory %s a mount point.", prefix); + x = strdup(cleaned); if (!x) return -ENOMEM; @@ -447,8 +472,7 @@ int bind_remount_recursive(const char *prefix, bool ro) { if (r < 0) return r; - /* Deal with mount points that are obstructed by a - * later mount */ + /* Deal with mount points that are obstructed by a later mount */ r = path_is_mount_point(x, 0); if (r == -ENOENT || r == 0) continue; @@ -463,6 +487,7 @@ int bind_remount_recursive(const char *prefix, bool ro) { if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) return -errno; + log_debug("Remounted %s read-only.", x); } } } diff --git a/src/basic/mount-util.h b/src/basic/mount-util.h index f46989ebb3..74730de663 100644 --- a/src/basic/mount-util.h +++ b/src/basic/mount-util.h @@ -35,7 +35,7 @@ int path_is_mount_point(const char *path, int flags); int repeat_unmount(const char *path, int flags); int umount_recursive(const char *target, int flags); -int bind_remount_recursive(const char *prefix, bool ro); +int bind_remount_recursive(const char *prefix, bool ro, char **blacklist); int mount_move_root(const char *path); diff --git a/src/core/namespace.c b/src/core/namespace.c index 72f850b2f2..b0dab9459e 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -375,9 +375,19 @@ static int apply_mount( case READONLY: case READWRITE: - /* Nothing to mount here, we just later toggle the - * MS_RDONLY bit for the mount point */ - return 0; + + r = path_is_mount_point(m->path, 0); + if (r < 0) { + if (m->ignore && errno == ENOENT) + return 0; + return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", m->path); + } + if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */ + return 0; + + /* This isn't a mount point yet, let's make it one. */ + what = m->path; + break; case PRIVATE_TMP: what = tmp_dir; @@ -396,31 +406,33 @@ static int apply_mount( assert(what); - r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL); - if (r >= 0) { - log_debug("Successfully mounted %s to %s", what, m->path); - return r; - } else { + if (mount(what, m->path, NULL, MS_BIND|MS_REC, NULL) < 0) { if (m->ignore && errno == ENOENT) return 0; + return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, m->path); } + + log_debug("Successfully mounted %s to %s", what, m->path); + return 0; } -static int make_read_only(BindMount *m) { - int r; +static int make_read_only(BindMount *m, char **blacklist) { + int r = 0; assert(m); if (IN_SET(m->mode, INACCESSIBLE, READONLY)) - r = bind_remount_recursive(m->path, true); - else if (IN_SET(m->mode, READWRITE, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV)) { - r = bind_remount_recursive(m->path, false); - if (r == 0 && m->mode == PRIVATE_DEV) /* can be readonly but the submounts can't*/ - if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0) - r = -errno; + r = bind_remount_recursive(m->path, true, blacklist); + else if (m->mode == PRIVATE_DEV) { /* Can be readonly but the submounts can't*/ + if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0) + r = -errno; } else - r = 0; + return 0; + + /* Not that we only turn on the MS_RDONLY flag here, we never turn it off. Something that was marked read-only + * already stays this way. This improves compatibility with container managers, where we won't attempt to undo + * read-only mounts already applied. */ if (m->ignore && r == -ENOENT) return 0; @@ -570,14 +582,25 @@ int setup_namespace( } if (n > 0) { + char **blacklist; + unsigned j; + + /* First round, add in all special mounts we need */ for (m = mounts; m < mounts + n; ++m) { r = apply_mount(m, tmp_dir, var_tmp_dir); if (r < 0) goto fail; } + /* Create a blacklist we can pass to bind_mount_recursive() */ + blacklist = newa(char*, n+1); + for (j = 0; j < n; j++) + blacklist[j] = (char*) mounts[j].path; + blacklist[j] = NULL; + + /* Second round, flip the ro bits if necessary. */ for (m = mounts; m < mounts + n; ++m) { - r = make_read_only(m); + r = make_read_only(m, blacklist); if (r < 0) goto fail; } @@ -586,9 +609,7 @@ int setup_namespace( if (root_directory) { /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */ r = mount_move_root(root_directory); - - /* at this point, we cannot rollback */ - if (r < 0) + if (r < 0) /* at this point, we cannot rollback */ return r; } @@ -596,8 +617,7 @@ int setup_namespace( * reestablish propagation from our side to the host, since * what's disconnected is disconnected. */ if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) - /* at this point, we cannot rollback */ - return -errno; + return -errno; /* at this point, we cannot rollback */ return 0; diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index 295b75341f..8457357003 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -476,7 +476,7 @@ static int mount_bind(const char *dest, CustomMount *m) { return log_error_errno(errno, "mount(%s) failed: %m", where); if (m->read_only) { - r = bind_remount_recursive(where, true); + r = bind_remount_recursive(where, true, NULL); if (r < 0) return log_error_errno(r, "Read-only bind mount failed: %m"); } @@ -990,7 +990,7 @@ int setup_volatile_state( /* --volatile=state means we simply overmount /var with a tmpfs, and the rest read-only. */ - r = bind_remount_recursive(directory, true); + r = bind_remount_recursive(directory, true, NULL); if (r < 0) return log_error_errno(r, "Failed to remount %s read-only: %m", directory); @@ -1065,7 +1065,7 @@ int setup_volatile( bind_mounted = true; - r = bind_remount_recursive(t, true); + r = bind_remount_recursive(t, true, NULL); if (r < 0) { log_error_errno(r, "Failed to remount %s read-only: %m", t); goto fail; diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 0d61d34ebf..1f3e1f2dac 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -3019,7 +3019,7 @@ static int outer_child( return r; if (arg_read_only) { - r = bind_remount_recursive(directory, true); + r = bind_remount_recursive(directory, true, NULL); if (r < 0) return log_error_errno(r, "Failed to make tree read-only: %m"); } -- cgit v1.2.3-54-g00ecf From d944dc9553009822deaddec76814f5642a6a8176 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sat, 24 Sep 2016 12:41:30 +0200 Subject: namespace: chase symlinks for mounts to set up in userspace This adds logic to chase symlinks for all mount points that shall be created in a namespace environment in userspace, instead of leaving this to the kernel. This has the advantage that we can correctly handle absolute symlinks that shall be taken relative to a specific root directory. Moreover, we can properly handle mounts created on symlinked files or directories as we can merge their mounts as necessary. (This also drops the "done" flag in the namespace logic, which was never actually working, but was supposed to permit a partial rollback of the namespace logic, which however is only mildly useful as it wasn't clear in which case it would or would not be able to roll back.) Fixes: #3867 --- src/basic/fs-util.c | 187 ++++++++++++++++++++++++++++++++++++++++++++++++ src/basic/fs-util.h | 2 + src/core/namespace.c | 118 +++++++++++++++++++----------- src/test/test-fs-util.c | 96 ++++++++++++++++++++++++- src/test/test-ns.c | 10 ++- 5 files changed, 367 insertions(+), 46 deletions(-) (limited to 'src/basic') diff --git a/src/basic/fs-util.c b/src/basic/fs-util.c index ce87257bc1..86d9ad7e36 100644 --- a/src/basic/fs-util.c +++ b/src/basic/fs-util.c @@ -597,3 +597,190 @@ int inotify_add_watch_fd(int fd, int what, uint32_t mask) { return r; } + +int chase_symlinks(const char *path, const char *_root, char **ret) { + _cleanup_free_ char *buffer = NULL, *done = NULL, *root = NULL; + _cleanup_close_ int fd = -1; + unsigned max_follow = 32; /* how many symlinks to follow before giving up and returning ELOOP */ + char *todo; + int r; + + assert(path); + + /* This is a lot like canonicalize_file_name(), but takes an additional "root" parameter, that allows following + * symlinks relative to a root directory, instead of the root of the host. + * + * Note that "root" matters only if we encounter an absolute symlink, it's unused otherwise. Most importantly + * this means the path parameter passed in is not prefixed by it. + * + * Algorithmically this operates on two path buffers: "done" are the components of the path we already + * processed and resolved symlinks, "." and ".." of. "todo" are the components of the path we still need to + * process. On each iteration, we move one component from "todo" to "done", processing it's special meaning + * each time. The "todo" path always starts with at least one slash, the "done" path always ends in no + * slash. We always keep an O_PATH fd to the component we are currently processing, thus keeping lookup races + * at a minimum. */ + + r = path_make_absolute_cwd(path, &buffer); + if (r < 0) + return r; + + if (_root) { + r = path_make_absolute_cwd(_root, &root); + if (r < 0) + return r; + } + + fd = open("/", O_CLOEXEC|O_NOFOLLOW|O_PATH); + if (fd < 0) + return -errno; + + todo = buffer; + for (;;) { + _cleanup_free_ char *first = NULL; + _cleanup_close_ int child = -1; + struct stat st; + size_t n, m; + + /* Determine length of first component in the path */ + n = strspn(todo, "/"); /* The slashes */ + m = n + strcspn(todo + n, "/"); /* The entire length of the component */ + + /* Extract the first component. */ + first = strndup(todo, m); + if (!first) + return -ENOMEM; + + todo += m; + + /* Just a single slash? Then we reached the end. */ + if (isempty(first) || path_equal(first, "/")) + break; + + /* Just a dot? Then let's eat this up. */ + if (path_equal(first, "/.")) + continue; + + /* Two dots? Then chop off the last bit of what we already found out. */ + if (path_equal(first, "/..")) { + _cleanup_free_ char *parent = NULL; + int fd_parent = -1; + + if (isempty(done) || path_equal(done, "/")) + return -EINVAL; + + parent = dirname_malloc(done); + if (!parent) + return -ENOMEM; + + /* Don't allow this to leave the root dir */ + if (root && + path_startswith(done, root) && + !path_startswith(parent, root)) + return -EINVAL; + + free(done); + done = parent; + parent = NULL; + + fd_parent = openat(fd, "..", O_CLOEXEC|O_NOFOLLOW|O_PATH); + if (fd_parent < 0) + return -errno; + + safe_close(fd); + fd = fd_parent; + + continue; + } + + /* Otherwise let's see what this is. */ + child = openat(fd, first + n, O_CLOEXEC|O_NOFOLLOW|O_PATH); + if (child < 0) + return -errno; + + if (fstat(child, &st) < 0) + return -errno; + + if (S_ISLNK(st.st_mode)) { + _cleanup_free_ char *destination = NULL; + + /* This is a symlink, in this case read the destination. But let's make sure we don't follow + * symlinks without bounds. */ + if (--max_follow <= 0) + return -ELOOP; + + r = readlinkat_malloc(fd, first + n, &destination); + if (r < 0) + return r; + if (isempty(destination)) + return -EINVAL; + + if (path_is_absolute(destination)) { + + /* An absolute destination. Start the loop from the beginning, but use the root + * directory as base. */ + + safe_close(fd); + fd = open(root ?: "/", O_CLOEXEC|O_NOFOLLOW|O_PATH); + if (fd < 0) + return -errno; + + free(buffer); + buffer = destination; + destination = NULL; + + todo = buffer; + free(done); + + /* Note that we do not revalidate the root, we take it as is. */ + if (isempty(root)) + done = NULL; + else { + done = strdup(root); + if (!done) + return -ENOMEM; + } + + } else { + char *joined; + + /* A relative destination. If so, this is what we'll prefix what's left to do with what + * we just read, and start the loop again, but remain in the current directory. */ + + joined = strjoin("/", destination, todo, NULL); + if (!joined) + return -ENOMEM; + + free(buffer); + todo = buffer = joined; + } + + continue; + } + + /* If this is not a symlink, then let's just add the name we read to what we already verified. */ + if (!done) { + done = first; + first = NULL; + } else { + if (!strextend(&done, first, NULL)) + return -ENOMEM; + } + + /* And iterate again, but go one directory further down. */ + safe_close(fd); + fd = child; + child = -1; + } + + if (!done) { + /* Special case, turn the empty string into "/", to indicate the root directory. */ + done = strdup("/"); + if (!done) + return -ENOMEM; + } + + *ret = done; + done = NULL; + + return 0; +} diff --git a/src/basic/fs-util.h b/src/basic/fs-util.h index 2c3b9a1c74..31df47cf1e 100644 --- a/src/basic/fs-util.h +++ b/src/basic/fs-util.h @@ -77,3 +77,5 @@ union inotify_event_buffer { }; int inotify_add_watch_fd(int fd, int what, uint32_t mask); + +int chase_symlinks(const char *path, const char *_root, char **ret); diff --git a/src/core/namespace.c b/src/core/namespace.c index 356d3c8121..d3ab2e8e3e 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -29,6 +29,7 @@ #include "alloc-util.h" #include "dev-setup.h" #include "fd-util.h" +#include "fs-util.h" #include "loopback-setup.h" #include "missing.h" #include "mkdir.h" @@ -57,9 +58,9 @@ typedef enum MountMode { } MountMode; typedef struct BindMount { - const char *path; + const char *path; /* stack memory, doesn't need to be freed explicitly */ + char *chased; /* malloc()ed memory, needs to be freed */ MountMode mode; - bool done; bool ignore; } BindMount; @@ -71,7 +72,6 @@ static int append_mounts(BindMount **p, char **strv, MountMode mode) { STRV_FOREACH(i, strv) { (*p)->ignore = false; - (*p)->done = false; if ((mode == INACCESSIBLE || mode == READONLY || mode == READWRITE) && (*i)[0] == '-') { (*p)->ignore = true; @@ -360,11 +360,8 @@ static int apply_mount( * inaccessible path. */ (void) umount_recursive(m->path, 0); - if (lstat(m->path, &target) < 0) { - if (m->ignore && errno == ENOENT) - return 0; + if (lstat(m->path, &target) < 0) return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", m->path); - } what = mode_to_inaccessible_node(target.st_mode); if (!what) { @@ -378,11 +375,8 @@ static int apply_mount( case READWRITE: r = path_is_mount_point(m->path, 0); - if (r < 0) { - if (m->ignore && errno == ENOENT) - return 0; + if (r < 0) return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", m->path); - } if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */ return 0; @@ -407,12 +401,8 @@ static int apply_mount( assert(what); - if (mount(what, m->path, NULL, MS_BIND|MS_REC, NULL) < 0) { - if (m->ignore && errno == ENOENT) - return 0; - + if (mount(what, m->path, NULL, MS_BIND|MS_REC, NULL) < 0) return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, m->path); - } log_debug("Successfully mounted %s to %s", what, m->path); return 0; @@ -435,12 +425,43 @@ static int make_read_only(BindMount *m, char **blacklist) { * already stays this way. This improves compatibility with container managers, where we won't attempt to undo * read-only mounts already applied. */ - if (m->ignore && r == -ENOENT) - return 0; - return r; } +static int chase_all_symlinks(const char *root_directory, BindMount *m, unsigned *n) { + BindMount *f, *t; + int r; + + assert(m); + assert(n); + + /* Since mount() will always follow symlinks and we need to take the different root directory into account we + * chase the symlinks on our own first. This call wil do so for all entries and remove all entries where we + * can't resolve the path, and which have been marked for such removal. */ + + for (f = m, t = m; f < m+*n; f++) { + + r = chase_symlinks(f->path, root_directory, &f->chased); + if (r == -ENOENT && f->ignore) /* Doesn't exist? Then remove it! */ + continue; + if (r < 0) + return log_debug_errno(r, "Failed to chase symlinks for %s: %m", f->path); + + if (path_equal(f->path, f->chased)) + f->chased = mfree(f->chased); + else { + log_debug("Chased %s → %s", f->path, f->chased); + f->path = f->chased; + } + + *t = *f; + t++; + } + + *n = t - m; + return 0; +} + int setup_namespace( const char* root_directory, char** read_write_paths, @@ -456,6 +477,7 @@ int setup_namespace( unsigned long mount_flags) { BindMount *m, *mounts = NULL; + bool make_slave = false; unsigned n; int r = 0; @@ -475,6 +497,9 @@ int setup_namespace( ((protect_system != PROTECT_SYSTEM_NO ? 3 : 0) + (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0))); + if (root_directory || n > 0) + make_slave = true; + if (n > 0) { m = mounts = (BindMount *) alloca0(n * sizeof(BindMount)); r = append_mounts(&m, read_write_paths, READWRITE); @@ -596,6 +621,13 @@ int setup_namespace( assert(mounts + n == m); + /* Resolve symlinks manually first, as mount() will always follow them relative to the host's + * root. Moreover we want to suppress duplicates based on the resolved paths. This of course is a bit + * racy. */ + r = chase_all_symlinks(root_directory, mounts, &n); + if (r < 0) + goto finish; + qsort(mounts, n, sizeof(BindMount), mount_path_compare); drop_duplicates(mounts, &n); @@ -603,20 +635,26 @@ int setup_namespace( drop_nop(mounts, &n); } - if (unshare(CLONE_NEWNS) < 0) - return -errno; + if (unshare(CLONE_NEWNS) < 0) { + r = -errno; + goto finish; + } - if (n > 0 || root_directory) { + if (make_slave) { /* Remount / as SLAVE so that nothing now mounted in the namespace shows up in the parent */ - if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) - return -errno; + if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) { + r = -errno; + goto finish; + } } if (root_directory) { /* Turn directory into bind mount */ - if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0) - return -errno; + if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0) { + r = -errno; + goto finish; + } } if (n > 0) { @@ -627,7 +665,7 @@ int setup_namespace( for (m = mounts; m < mounts + n; ++m) { r = apply_mount(m, tmp_dir, var_tmp_dir); if (r < 0) - goto fail; + goto finish; } /* Create a blacklist we can pass to bind_mount_recursive() */ @@ -640,34 +678,30 @@ int setup_namespace( for (m = mounts; m < mounts + n; ++m) { r = make_read_only(m, blacklist); if (r < 0) - goto fail; + goto finish; } } if (root_directory) { /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */ r = mount_move_root(root_directory); - if (r < 0) /* at this point, we cannot rollback */ - return r; + if (r < 0) + goto finish; } /* Remount / as the desired mode. Not that this will not * reestablish propagation from our side to the host, since * what's disconnected is disconnected. */ - if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) - return -errno; /* at this point, we cannot rollback */ + if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) { + r = -errno; + goto finish; + } - return 0; + r = 0; -fail: - if (n > 0) { - for (m = mounts; m < mounts + n; ++m) { - if (!m->done) - continue; - - (void) umount2(m->path, MNT_DETACH); - } - } +finish: + for (m = mounts; m < mounts + n; m++) + free(m->chased); return r; } diff --git a/src/test/test-fs-util.c b/src/test/test-fs-util.c index b35a2ea2c8..53a3cdc663 100644 --- a/src/test/test-fs-util.c +++ b/src/test/test-fs-util.c @@ -20,16 +20,109 @@ #include #include "alloc-util.h" -#include "fileio.h" #include "fd-util.h" +#include "fileio.h" #include "fs-util.h" #include "macro.h" #include "mkdir.h" +#include "path-util.h" #include "rm-rf.h" #include "string-util.h" #include "strv.h" #include "util.h" +static void test_chase_symlinks(void) { + _cleanup_free_ char *result = NULL; + char temp[] = "/tmp/test-chase.XXXXXX"; + const char *top, *p, *q; + int r; + + assert_se(mkdtemp(temp)); + + top = strjoina(temp, "/top"); + assert_se(mkdir(top, 0700) >= 0); + + p = strjoina(top, "/dot"); + assert_se(symlink(".", p) >= 0); + + p = strjoina(top, "/dotdot"); + assert_se(symlink("..", p) >= 0); + + p = strjoina(top, "/dotdota"); + assert_se(symlink("../a", p) >= 0); + + p = strjoina(temp, "/a"); + assert_se(symlink("b", p) >= 0); + + p = strjoina(temp, "/b"); + assert_se(symlink("/usr", p) >= 0); + + p = strjoina(temp, "/start"); + assert_se(symlink("top/dot/dotdota", p) >= 0); + + r = chase_symlinks(p, NULL, &result); + assert_se(r >= 0); + assert_se(path_equal(result, "/usr")); + + result = mfree(result); + r = chase_symlinks(p, temp, &result); + assert_se(r == -ENOENT); + + q = strjoina(temp, "/usr"); + assert_se(mkdir(q, 0700) >= 0); + + r = chase_symlinks(p, temp, &result); + assert_se(r >= 0); + assert_se(path_equal(result, q)); + + p = strjoina(temp, "/slash"); + assert_se(symlink("/", p) >= 0); + + result = mfree(result); + r = chase_symlinks(p, NULL, &result); + assert_se(r >= 0); + assert_se(path_equal(result, "/")); + + result = mfree(result); + r = chase_symlinks(p, temp, &result); + assert_se(r >= 0); + assert_se(path_equal(result, temp)); + + p = strjoina(temp, "/slashslash"); + assert_se(symlink("///usr///", p) >= 0); + + result = mfree(result); + r = chase_symlinks(p, NULL, &result); + assert_se(r >= 0); + assert_se(path_equal(result, "/usr")); + + result = mfree(result); + r = chase_symlinks(p, temp, &result); + assert_se(r >= 0); + assert_se(path_equal(result, q)); + + result = mfree(result); + r = chase_symlinks("/etc/./.././", NULL, &result); + assert_se(r >= 0); + assert_se(path_equal(result, "/")); + + result = mfree(result); + r = chase_symlinks("/etc/./.././", "/etc", &result); + assert_se(r == -EINVAL); + + result = mfree(result); + r = chase_symlinks("/etc/machine-id/foo", NULL, &result); + assert_se(r == -ENOTDIR); + + result = mfree(result); + p = strjoina(temp, "/recursive-symlink"); + assert_se(symlink("recursive-symlink", p) >= 0); + r = chase_symlinks(p, NULL, &result); + assert_se(r == -ELOOP); + + assert_se(rm_rf(temp, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0); +} + static void test_unlink_noerrno(void) { char name[] = "/tmp/test-close_nointr.XXXXXX"; int fd; @@ -144,6 +237,7 @@ int main(int argc, char *argv[]) { test_readlink_and_make_absolute(); test_get_files_in_directory(); test_var_tmp(); + test_chase_symlinks(); return 0; } diff --git a/src/test/test-ns.c b/src/test/test-ns.c index 03a24620af..c4d4da6d05 100644 --- a/src/test/test-ns.c +++ b/src/test/test-ns.c @@ -26,14 +26,18 @@ int main(int argc, char *argv[]) { const char * const writable[] = { "/home", - "/home/lennart/projects/foobar", /* this should be masked automatically */ + "-/home/lennart/projects/foobar", /* this should be masked automatically */ NULL }; const char * const readonly[] = { - "/", - "/usr", + /* "/", */ + /* "/usr", */ "/boot", + "/lib", + "/usr/lib", + "-/lib64", + "-/usr/lib64", NULL }; -- cgit v1.2.3-54-g00ecf