summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/basic/fs-util.c187
-rw-r--r--src/basic/fs-util.h2
-rw-r--r--src/core/namespace.c118
-rw-r--r--src/test/test-fs-util.c96
-rw-r--r--src/test/test-ns.c10
5 files changed, 367 insertions, 46 deletions
diff --git a/src/basic/fs-util.c b/src/basic/fs-util.c
index ce87257bc1..86d9ad7e36 100644
--- a/src/basic/fs-util.c
+++ b/src/basic/fs-util.c
@@ -597,3 +597,190 @@ int inotify_add_watch_fd(int fd, int what, uint32_t mask) {
return r;
}
+
+int chase_symlinks(const char *path, const char *_root, char **ret) {
+ _cleanup_free_ char *buffer = NULL, *done = NULL, *root = NULL;
+ _cleanup_close_ int fd = -1;
+ unsigned max_follow = 32; /* how many symlinks to follow before giving up and returning ELOOP */
+ char *todo;
+ int r;
+
+ assert(path);
+
+ /* This is a lot like canonicalize_file_name(), but takes an additional "root" parameter, that allows following
+ * symlinks relative to a root directory, instead of the root of the host.
+ *
+ * Note that "root" matters only if we encounter an absolute symlink, it's unused otherwise. Most importantly
+ * this means the path parameter passed in is not prefixed by it.
+ *
+ * Algorithmically this operates on two path buffers: "done" are the components of the path we already
+ * processed and resolved symlinks, "." and ".." of. "todo" are the components of the path we still need to
+ * process. On each iteration, we move one component from "todo" to "done", processing it's special meaning
+ * each time. The "todo" path always starts with at least one slash, the "done" path always ends in no
+ * slash. We always keep an O_PATH fd to the component we are currently processing, thus keeping lookup races
+ * at a minimum. */
+
+ r = path_make_absolute_cwd(path, &buffer);
+ if (r < 0)
+ return r;
+
+ if (_root) {
+ r = path_make_absolute_cwd(_root, &root);
+ if (r < 0)
+ return r;
+ }
+
+ fd = open("/", O_CLOEXEC|O_NOFOLLOW|O_PATH);
+ if (fd < 0)
+ return -errno;
+
+ todo = buffer;
+ for (;;) {
+ _cleanup_free_ char *first = NULL;
+ _cleanup_close_ int child = -1;
+ struct stat st;
+ size_t n, m;
+
+ /* Determine length of first component in the path */
+ n = strspn(todo, "/"); /* The slashes */
+ m = n + strcspn(todo + n, "/"); /* The entire length of the component */
+
+ /* Extract the first component. */
+ first = strndup(todo, m);
+ if (!first)
+ return -ENOMEM;
+
+ todo += m;
+
+ /* Just a single slash? Then we reached the end. */
+ if (isempty(first) || path_equal(first, "/"))
+ break;
+
+ /* Just a dot? Then let's eat this up. */
+ if (path_equal(first, "/."))
+ continue;
+
+ /* Two dots? Then chop off the last bit of what we already found out. */
+ if (path_equal(first, "/..")) {
+ _cleanup_free_ char *parent = NULL;
+ int fd_parent = -1;
+
+ if (isempty(done) || path_equal(done, "/"))
+ return -EINVAL;
+
+ parent = dirname_malloc(done);
+ if (!parent)
+ return -ENOMEM;
+
+ /* Don't allow this to leave the root dir */
+ if (root &&
+ path_startswith(done, root) &&
+ !path_startswith(parent, root))
+ return -EINVAL;
+
+ free(done);
+ done = parent;
+ parent = NULL;
+
+ fd_parent = openat(fd, "..", O_CLOEXEC|O_NOFOLLOW|O_PATH);
+ if (fd_parent < 0)
+ return -errno;
+
+ safe_close(fd);
+ fd = fd_parent;
+
+ continue;
+ }
+
+ /* Otherwise let's see what this is. */
+ child = openat(fd, first + n, O_CLOEXEC|O_NOFOLLOW|O_PATH);
+ if (child < 0)
+ return -errno;
+
+ if (fstat(child, &st) < 0)
+ return -errno;
+
+ if (S_ISLNK(st.st_mode)) {
+ _cleanup_free_ char *destination = NULL;
+
+ /* This is a symlink, in this case read the destination. But let's make sure we don't follow
+ * symlinks without bounds. */
+ if (--max_follow <= 0)
+ return -ELOOP;
+
+ r = readlinkat_malloc(fd, first + n, &destination);
+ if (r < 0)
+ return r;
+ if (isempty(destination))
+ return -EINVAL;
+
+ if (path_is_absolute(destination)) {
+
+ /* An absolute destination. Start the loop from the beginning, but use the root
+ * directory as base. */
+
+ safe_close(fd);
+ fd = open(root ?: "/", O_CLOEXEC|O_NOFOLLOW|O_PATH);
+ if (fd < 0)
+ return -errno;
+
+ free(buffer);
+ buffer = destination;
+ destination = NULL;
+
+ todo = buffer;
+ free(done);
+
+ /* Note that we do not revalidate the root, we take it as is. */
+ if (isempty(root))
+ done = NULL;
+ else {
+ done = strdup(root);
+ if (!done)
+ return -ENOMEM;
+ }
+
+ } else {
+ char *joined;
+
+ /* A relative destination. If so, this is what we'll prefix what's left to do with what
+ * we just read, and start the loop again, but remain in the current directory. */
+
+ joined = strjoin("/", destination, todo, NULL);
+ if (!joined)
+ return -ENOMEM;
+
+ free(buffer);
+ todo = buffer = joined;
+ }
+
+ continue;
+ }
+
+ /* If this is not a symlink, then let's just add the name we read to what we already verified. */
+ if (!done) {
+ done = first;
+ first = NULL;
+ } else {
+ if (!strextend(&done, first, NULL))
+ return -ENOMEM;
+ }
+
+ /* And iterate again, but go one directory further down. */
+ safe_close(fd);
+ fd = child;
+ child = -1;
+ }
+
+ if (!done) {
+ /* Special case, turn the empty string into "/", to indicate the root directory. */
+ done = strdup("/");
+ if (!done)
+ return -ENOMEM;
+ }
+
+ *ret = done;
+ done = NULL;
+
+ return 0;
+}
diff --git a/src/basic/fs-util.h b/src/basic/fs-util.h
index 2c3b9a1c74..31df47cf1e 100644
--- a/src/basic/fs-util.h
+++ b/src/basic/fs-util.h
@@ -77,3 +77,5 @@ union inotify_event_buffer {
};
int inotify_add_watch_fd(int fd, int what, uint32_t mask);
+
+int chase_symlinks(const char *path, const char *_root, char **ret);
diff --git a/src/core/namespace.c b/src/core/namespace.c
index 356d3c8121..d3ab2e8e3e 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -29,6 +29,7 @@
#include "alloc-util.h"
#include "dev-setup.h"
#include "fd-util.h"
+#include "fs-util.h"
#include "loopback-setup.h"
#include "missing.h"
#include "mkdir.h"
@@ -57,9 +58,9 @@ typedef enum MountMode {
} MountMode;
typedef struct BindMount {
- const char *path;
+ const char *path; /* stack memory, doesn't need to be freed explicitly */
+ char *chased; /* malloc()ed memory, needs to be freed */
MountMode mode;
- bool done;
bool ignore;
} BindMount;
@@ -71,7 +72,6 @@ static int append_mounts(BindMount **p, char **strv, MountMode mode) {
STRV_FOREACH(i, strv) {
(*p)->ignore = false;
- (*p)->done = false;
if ((mode == INACCESSIBLE || mode == READONLY || mode == READWRITE) && (*i)[0] == '-') {
(*p)->ignore = true;
@@ -360,11 +360,8 @@ static int apply_mount(
* inaccessible path. */
(void) umount_recursive(m->path, 0);
- if (lstat(m->path, &target) < 0) {
- if (m->ignore && errno == ENOENT)
- return 0;
+ if (lstat(m->path, &target) < 0)
return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", m->path);
- }
what = mode_to_inaccessible_node(target.st_mode);
if (!what) {
@@ -378,11 +375,8 @@ static int apply_mount(
case READWRITE:
r = path_is_mount_point(m->path, 0);
- if (r < 0) {
- if (m->ignore && errno == ENOENT)
- return 0;
+ if (r < 0)
return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", m->path);
- }
if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */
return 0;
@@ -407,12 +401,8 @@ static int apply_mount(
assert(what);
- if (mount(what, m->path, NULL, MS_BIND|MS_REC, NULL) < 0) {
- if (m->ignore && errno == ENOENT)
- return 0;
-
+ if (mount(what, m->path, NULL, MS_BIND|MS_REC, NULL) < 0)
return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, m->path);
- }
log_debug("Successfully mounted %s to %s", what, m->path);
return 0;
@@ -435,12 +425,43 @@ static int make_read_only(BindMount *m, char **blacklist) {
* already stays this way. This improves compatibility with container managers, where we won't attempt to undo
* read-only mounts already applied. */
- if (m->ignore && r == -ENOENT)
- return 0;
-
return r;
}
+static int chase_all_symlinks(const char *root_directory, BindMount *m, unsigned *n) {
+ BindMount *f, *t;
+ int r;
+
+ assert(m);
+ assert(n);
+
+ /* Since mount() will always follow symlinks and we need to take the different root directory into account we
+ * chase the symlinks on our own first. This call wil do so for all entries and remove all entries where we
+ * can't resolve the path, and which have been marked for such removal. */
+
+ for (f = m, t = m; f < m+*n; f++) {
+
+ r = chase_symlinks(f->path, root_directory, &f->chased);
+ if (r == -ENOENT && f->ignore) /* Doesn't exist? Then remove it! */
+ continue;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to chase symlinks for %s: %m", f->path);
+
+ if (path_equal(f->path, f->chased))
+ f->chased = mfree(f->chased);
+ else {
+ log_debug("Chased %s → %s", f->path, f->chased);
+ f->path = f->chased;
+ }
+
+ *t = *f;
+ t++;
+ }
+
+ *n = t - m;
+ return 0;
+}
+
int setup_namespace(
const char* root_directory,
char** read_write_paths,
@@ -456,6 +477,7 @@ int setup_namespace(
unsigned long mount_flags) {
BindMount *m, *mounts = NULL;
+ bool make_slave = false;
unsigned n;
int r = 0;
@@ -475,6 +497,9 @@ int setup_namespace(
((protect_system != PROTECT_SYSTEM_NO ? 3 : 0) +
(protect_system == PROTECT_SYSTEM_FULL ? 1 : 0)));
+ if (root_directory || n > 0)
+ make_slave = true;
+
if (n > 0) {
m = mounts = (BindMount *) alloca0(n * sizeof(BindMount));
r = append_mounts(&m, read_write_paths, READWRITE);
@@ -596,6 +621,13 @@ int setup_namespace(
assert(mounts + n == m);
+ /* Resolve symlinks manually first, as mount() will always follow them relative to the host's
+ * root. Moreover we want to suppress duplicates based on the resolved paths. This of course is a bit
+ * racy. */
+ r = chase_all_symlinks(root_directory, mounts, &n);
+ if (r < 0)
+ goto finish;
+
qsort(mounts, n, sizeof(BindMount), mount_path_compare);
drop_duplicates(mounts, &n);
@@ -603,20 +635,26 @@ int setup_namespace(
drop_nop(mounts, &n);
}
- if (unshare(CLONE_NEWNS) < 0)
- return -errno;
+ if (unshare(CLONE_NEWNS) < 0) {
+ r = -errno;
+ goto finish;
+ }
- if (n > 0 || root_directory) {
+ if (make_slave) {
/* Remount / as SLAVE so that nothing now mounted in the namespace
shows up in the parent */
- if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
- return -errno;
+ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
+ r = -errno;
+ goto finish;
+ }
}
if (root_directory) {
/* Turn directory into bind mount */
- if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0)
- return -errno;
+ if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0) {
+ r = -errno;
+ goto finish;
+ }
}
if (n > 0) {
@@ -627,7 +665,7 @@ int setup_namespace(
for (m = mounts; m < mounts + n; ++m) {
r = apply_mount(m, tmp_dir, var_tmp_dir);
if (r < 0)
- goto fail;
+ goto finish;
}
/* Create a blacklist we can pass to bind_mount_recursive() */
@@ -640,34 +678,30 @@ int setup_namespace(
for (m = mounts; m < mounts + n; ++m) {
r = make_read_only(m, blacklist);
if (r < 0)
- goto fail;
+ goto finish;
}
}
if (root_directory) {
/* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */
r = mount_move_root(root_directory);
- if (r < 0) /* at this point, we cannot rollback */
- return r;
+ if (r < 0)
+ goto finish;
}
/* Remount / as the desired mode. Not that this will not
* reestablish propagation from our side to the host, since
* what's disconnected is disconnected. */
- if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0)
- return -errno; /* at this point, we cannot rollback */
+ if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) {
+ r = -errno;
+ goto finish;
+ }
- return 0;
+ r = 0;
-fail:
- if (n > 0) {
- for (m = mounts; m < mounts + n; ++m) {
- if (!m->done)
- continue;
-
- (void) umount2(m->path, MNT_DETACH);
- }
- }
+finish:
+ for (m = mounts; m < mounts + n; m++)
+ free(m->chased);
return r;
}
diff --git a/src/test/test-fs-util.c b/src/test/test-fs-util.c
index b35a2ea2c8..53a3cdc663 100644
--- a/src/test/test-fs-util.c
+++ b/src/test/test-fs-util.c
@@ -20,16 +20,109 @@
#include <unistd.h>
#include "alloc-util.h"
-#include "fileio.h"
#include "fd-util.h"
+#include "fileio.h"
#include "fs-util.h"
#include "macro.h"
#include "mkdir.h"
+#include "path-util.h"
#include "rm-rf.h"
#include "string-util.h"
#include "strv.h"
#include "util.h"
+static void test_chase_symlinks(void) {
+ _cleanup_free_ char *result = NULL;
+ char temp[] = "/tmp/test-chase.XXXXXX";
+ const char *top, *p, *q;
+ int r;
+
+ assert_se(mkdtemp(temp));
+
+ top = strjoina(temp, "/top");
+ assert_se(mkdir(top, 0700) >= 0);
+
+ p = strjoina(top, "/dot");
+ assert_se(symlink(".", p) >= 0);
+
+ p = strjoina(top, "/dotdot");
+ assert_se(symlink("..", p) >= 0);
+
+ p = strjoina(top, "/dotdota");
+ assert_se(symlink("../a", p) >= 0);
+
+ p = strjoina(temp, "/a");
+ assert_se(symlink("b", p) >= 0);
+
+ p = strjoina(temp, "/b");
+ assert_se(symlink("/usr", p) >= 0);
+
+ p = strjoina(temp, "/start");
+ assert_se(symlink("top/dot/dotdota", p) >= 0);
+
+ r = chase_symlinks(p, NULL, &result);
+ assert_se(r >= 0);
+ assert_se(path_equal(result, "/usr"));
+
+ result = mfree(result);
+ r = chase_symlinks(p, temp, &result);
+ assert_se(r == -ENOENT);
+
+ q = strjoina(temp, "/usr");
+ assert_se(mkdir(q, 0700) >= 0);
+
+ r = chase_symlinks(p, temp, &result);
+ assert_se(r >= 0);
+ assert_se(path_equal(result, q));
+
+ p = strjoina(temp, "/slash");
+ assert_se(symlink("/", p) >= 0);
+
+ result = mfree(result);
+ r = chase_symlinks(p, NULL, &result);
+ assert_se(r >= 0);
+ assert_se(path_equal(result, "/"));
+
+ result = mfree(result);
+ r = chase_symlinks(p, temp, &result);
+ assert_se(r >= 0);
+ assert_se(path_equal(result, temp));
+
+ p = strjoina(temp, "/slashslash");
+ assert_se(symlink("///usr///", p) >= 0);
+
+ result = mfree(result);
+ r = chase_symlinks(p, NULL, &result);
+ assert_se(r >= 0);
+ assert_se(path_equal(result, "/usr"));
+
+ result = mfree(result);
+ r = chase_symlinks(p, temp, &result);
+ assert_se(r >= 0);
+ assert_se(path_equal(result, q));
+
+ result = mfree(result);
+ r = chase_symlinks("/etc/./.././", NULL, &result);
+ assert_se(r >= 0);
+ assert_se(path_equal(result, "/"));
+
+ result = mfree(result);
+ r = chase_symlinks("/etc/./.././", "/etc", &result);
+ assert_se(r == -EINVAL);
+
+ result = mfree(result);
+ r = chase_symlinks("/etc/machine-id/foo", NULL, &result);
+ assert_se(r == -ENOTDIR);
+
+ result = mfree(result);
+ p = strjoina(temp, "/recursive-symlink");
+ assert_se(symlink("recursive-symlink", p) >= 0);
+ r = chase_symlinks(p, NULL, &result);
+ assert_se(r == -ELOOP);
+
+ assert_se(rm_rf(temp, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+}
+
static void test_unlink_noerrno(void) {
char name[] = "/tmp/test-close_nointr.XXXXXX";
int fd;
@@ -144,6 +237,7 @@ int main(int argc, char *argv[]) {
test_readlink_and_make_absolute();
test_get_files_in_directory();
test_var_tmp();
+ test_chase_symlinks();
return 0;
}
diff --git a/src/test/test-ns.c b/src/test/test-ns.c
index 03a24620af..c4d4da6d05 100644
--- a/src/test/test-ns.c
+++ b/src/test/test-ns.c
@@ -26,14 +26,18 @@
int main(int argc, char *argv[]) {
const char * const writable[] = {
"/home",
- "/home/lennart/projects/foobar", /* this should be masked automatically */
+ "-/home/lennart/projects/foobar", /* this should be masked automatically */
NULL
};
const char * const readonly[] = {
- "/",
- "/usr",
+ /* "/", */
+ /* "/usr", */
"/boot",
+ "/lib",
+ "/usr/lib",
+ "-/lib64",
+ "-/usr/lib64",
NULL
};