diff options
author | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2017-02-08 23:05:05 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-02-08 23:05:05 -0500 |
commit | fc6149a6ce7a5560ae239a317b7f43039a3f80fd (patch) | |
tree | d6411b7b1fe425234524e8baea3c297fdd22202f /src | |
parent | 52a4aafb4dd178afae5ce8ceadd852233cac10f3 (diff) | |
parent | ef3116b5d4b9f12ae9f0fc25c8b40a04712c6d56 (diff) |
Merge pull request #4962 from poettering/root-directory-2
Add new MountAPIVFS= boolean unit file setting + RootImage=
Diffstat (limited to 'src')
-rw-r--r-- | src/core/dbus-execute.c | 22 | ||||
-rw-r--r-- | src/core/execute.c | 39 | ||||
-rw-r--r-- | src/core/execute.h | 3 | ||||
-rw-r--r-- | src/core/load-fragment-gperf.gperf.m4 | 2 | ||||
-rw-r--r-- | src/core/load-fragment.c | 19 | ||||
-rw-r--r-- | src/core/namespace.c | 187 | ||||
-rw-r--r-- | src/core/namespace.h | 6 | ||||
-rw-r--r-- | src/core/unit.c | 6 | ||||
-rw-r--r-- | src/dissect/dissect.c | 8 | ||||
-rw-r--r-- | src/nspawn/nspawn.c | 57 | ||||
-rw-r--r-- | src/shared/bus-unit-util.c | 8 | ||||
-rw-r--r-- | src/shared/dissect-image.c | 98 | ||||
-rw-r--r-- | src/shared/dissect-image.h | 2 | ||||
-rw-r--r-- | src/test/test-ns.c | 2 |
14 files changed, 350 insertions, 109 deletions
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index cc10e2d8e7..7df4cab3f6 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -758,6 +758,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("LimitRTTIMESoft", "t", bus_property_get_rlimit, offsetof(ExecContext, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("WorkingDirectory", "s", property_get_working_directory, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RootDirectory", "s", NULL, offsetof(ExecContext, root_directory), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("RootImage", "s", NULL, offsetof(ExecContext, root_image), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("OOMScoreAdjust", "i", property_get_oom_score_adjust, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Nice", "i", property_get_nice, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("IOScheduling", "i", property_get_ioprio, 0, SD_BUS_VTABLE_PROPERTY_CONST), @@ -828,6 +829,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("RestrictNamespaces", "t", bus_property_get_ulong, offsetof(ExecContext, restrict_namespaces), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("BindPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("BindReadOnlyPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("MountAPIVFS", "b", bus_property_get_bool, offsetof(ExecContext, mount_apivfs), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_VTABLE_END }; @@ -1047,7 +1049,7 @@ int bus_exec_context_set_transient_property( return 1; - } else if (STR_IN_SET(name, "TTYPath", "RootDirectory")) { + } else if (STR_IN_SET(name, "TTYPath", "RootDirectory", "RootImage")) { const char *s; r = sd_bus_message_read(message, "s", &s); @@ -1060,6 +1062,8 @@ int bus_exec_context_set_transient_property( if (mode != UNIT_CHECK) { if (streq(name, "TTYPath")) r = free_and_strdup(&c->tty_path, s); + else if (streq(name, "RootImage")) + r = free_and_strdup(&c->root_image, s); else { assert(streq(name, "RootDirectory")); r = free_and_strdup(&c->root_directory, s); @@ -1207,7 +1211,7 @@ int bus_exec_context_set_transient_property( "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute", "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables", - "ProtectKernelModules", "ProtectControlGroups")) { + "ProtectKernelModules", "ProtectControlGroups", "MountAPIVFS")) { int b; r = sd_bus_message_read(message, "b", &b); @@ -1247,6 +1251,8 @@ int bus_exec_context_set_transient_property( c->protect_kernel_modules = b; else if (streq(name, "ProtectControlGroups")) c->protect_control_groups = b; + else if (streq(name, "MountAPIVFS")) + c->mount_apivfs = b; unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, yes_no(b)); } @@ -1495,12 +1501,15 @@ int bus_exec_context_set_transient_property( return r; STRV_FOREACH(p, l) { - int offset; - if (!utf8_is_valid(*p)) + const char *i = *p; + size_t offset; + + if (!utf8_is_valid(i)) return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid %s", name); - offset = **p == '-'; - if (!path_is_absolute(*p + offset)) + offset = i[0] == '-'; + offset += i[offset] == '+'; + if (!path_is_absolute(i + offset)) return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid %s", name); } @@ -1519,7 +1528,6 @@ int bus_exec_context_set_transient_property( unit_write_drop_in_private_format(u, mode, name, "%s=", name); } else { r = strv_extend_strv(dirs, l, true); - if (r < 0) return -ENOMEM; diff --git a/src/core/execute.c b/src/core/execute.c index aa0ddb564e..f57eb26388 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1640,6 +1640,9 @@ static bool exec_needs_mount_namespace( assert(context); assert(params); + if (context->root_image) + return true; + if (!strv_isempty(context->read_write_paths) || !strv_isempty(context->read_only_paths) || !strv_isempty(context->inaccessible_paths)) @@ -1662,6 +1665,9 @@ static bool exec_needs_mount_namespace( context->protect_control_groups) return true; + if (context->mount_apivfs) + return true; + return false; } @@ -1935,13 +1941,14 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context, int r; _cleanup_strv_free_ char **rw = NULL; char *tmp = NULL, *var = NULL; - const char *root_dir = NULL; + const char *root_dir = NULL, *root_image = NULL; NameSpaceInfo ns_info = { .ignore_protect_paths = false, .private_dev = context->private_devices, .protect_control_groups = context->protect_control_groups, .protect_kernel_tunables = context->protect_kernel_tunables, .protect_kernel_modules = context->protect_kernel_modules, + .mount_apivfs = context->mount_apivfs, }; assert(context); @@ -1961,8 +1968,12 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context, if (r < 0) return r; - if (params->flags & EXEC_APPLY_CHROOT) - root_dir = context->root_directory; + if (params->flags & EXEC_APPLY_CHROOT) { + root_image = context->root_image; + + if (!root_image) + root_dir = context->root_directory; + } /* * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed @@ -1972,7 +1983,8 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context, if (!context->dynamic_user && root_dir) ns_info.ignore_protect_paths = true; - r = setup_namespace(root_dir, &ns_info, rw, + r = setup_namespace(root_dir, root_image, + &ns_info, rw, context->read_only_paths, context->inaccessible_paths, context->bind_mounts, @@ -1981,7 +1993,8 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context, var, context->protect_home, context->protect_system, - context->mount_flags); + context->mount_flags, + DISSECT_IMAGE_DISCARD_ON_LOOP); /* If we couldn't set up the namespace this is probably due to a * missing capability. In this case, silently proceeed. */ @@ -1995,10 +2008,12 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context, return r; } -static int apply_working_directory(const ExecContext *context, - const ExecParameters *params, - const char *home, - const bool needs_mount_ns) { +static int apply_working_directory( + const ExecContext *context, + const ExecParameters *params, + const char *home, + const bool needs_mount_ns) { + const char *d; const char *wd; @@ -2979,6 +2994,7 @@ void exec_context_done(ExecContext *c) { c->working_directory = mfree(c->working_directory); c->root_directory = mfree(c->root_directory); + c->root_image = mfree(c->root_image); c->tty_path = mfree(c->tty_path); c->syslog_identifier = mfree(c->syslog_identifier); c->user = mfree(c->user); @@ -3294,6 +3310,7 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { "%sPrivateUsers: %s\n" "%sProtectHome: %s\n" "%sProtectSystem: %s\n" + "%sMountAPIVFS: %s\n" "%sIgnoreSIGPIPE: %s\n" "%sMemoryDenyWriteExecute: %s\n" "%sRestrictRealtime: %s\n", @@ -3310,10 +3327,14 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { prefix, yes_no(c->private_users), prefix, protect_home_to_string(c->protect_home), prefix, protect_system_to_string(c->protect_system), + prefix, yes_no(c->mount_apivfs), prefix, yes_no(c->ignore_sigpipe), prefix, yes_no(c->memory_deny_write_execute), prefix, yes_no(c->restrict_realtime)); + if (c->root_image) + fprintf(f, "%sRootImage: %s\n", prefix, c->root_image); + STRV_FOREACH(e, c->environment) fprintf(f, "%sEnvironment: %s\n", prefix, *e); diff --git a/src/core/execute.h b/src/core/execute.h index f8694ef520..9f2b6fd39e 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -106,7 +106,7 @@ struct ExecContext { char **pass_environment; struct rlimit *rlimit[_RLIMIT_MAX]; - char *working_directory, *root_directory; + char *working_directory, *root_directory, *root_image; bool working_directory_missing_ok; bool working_directory_home; @@ -183,6 +183,7 @@ struct ExecContext { bool protect_kernel_tunables; bool protect_kernel_modules; bool protect_control_groups; + bool mount_apivfs; bool no_new_privileges; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 15f22a2681..cb9e6fea27 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -19,6 +19,7 @@ m4_dnl Define the context options only once m4_define(`EXEC_CONTEXT_CONFIG_ITEMS', `$1.WorkingDirectory, config_parse_working_directory, 0, offsetof($1, exec_context) $1.RootDirectory, config_parse_unit_path_printf, 0, offsetof($1, exec_context.root_directory) +$1.RootImage, config_parse_unit_path_printf, 0, offsetof($1, exec_context.root_image) $1.User, config_parse_user_group, 0, offsetof($1, exec_context.user) $1.Group, config_parse_user_group, 0, offsetof($1, exec_context.group) $1.SupplementaryGroups, config_parse_user_group_strv, 0, offsetof($1, exec_context.supplementary_groups) @@ -101,6 +102,7 @@ $1.PrivateUsers, config_parse_bool, 0, $1.ProtectSystem, config_parse_protect_system, 0, offsetof($1, exec_context) $1.ProtectHome, config_parse_protect_home, 0, offsetof($1, exec_context) $1.MountFlags, config_parse_exec_mount_flags, 0, offsetof($1, exec_context) +$1.MountAPIVFS, config_parse_bool, 0, offsetof($1, exec_context.mount_apivfs) $1.Personality, config_parse_personality, 0, offsetof($1, exec_context.personality) $1.RuntimeDirectoryMode, config_parse_mode, 0, offsetof($1, exec_context.runtime_directory_mode) $1.RuntimeDirectory, config_parse_runtime_directory, 0, offsetof($1, exec_context.runtime_directory) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 243c288885..5b7471c0d0 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -3839,7 +3839,8 @@ int config_parse_namespace_path_strv( cur = rvalue; for (;;) { _cleanup_free_ char *word = NULL, *resolved = NULL, *joined = NULL; - bool ignore_enoent; + const char *w; + bool ignore_enoent = false, shall_prefix = false; r = extract_first_word(&cur, &word, NULL, EXTRACT_QUOTES); if (r == 0) @@ -3856,9 +3857,17 @@ int config_parse_namespace_path_strv( continue; } - ignore_enoent = word[0] == '-'; + w = word; + if (startswith(w, "-")) { + ignore_enoent = true; + w++; + } + if (startswith(w, "+")) { + shall_prefix = true; + w++; + } - r = unit_full_printf(u, word + ignore_enoent, &resolved); + r = unit_full_printf(u, w, &resolved); if (r < 0) { log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve specifiers in %s: %m", word); continue; @@ -3871,7 +3880,9 @@ int config_parse_namespace_path_strv( path_kill_slashes(resolved); - joined = strjoin(ignore_enoent ? "-" : "", resolved); + joined = strjoin(ignore_enoent ? "-" : "", + shall_prefix ? "+" : "", + resolved); r = strv_push(sv, joined); if (r < 0) diff --git a/src/core/namespace.c b/src/core/namespace.c index 834883267c..75dca5b791 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -30,6 +30,7 @@ #include "dev-setup.h" #include "fd-util.h" #include "fs-util.h" +#include "loop-util.h" #include "loopback-setup.h" #include "missing.h" #include "mkdir.h" @@ -52,10 +53,13 @@ typedef enum MountMode { INACCESSIBLE, BIND_MOUNT, BIND_MOUNT_RECURSIVE, - READONLY, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV, + BIND_DEV, + SYSFS, + PROCFS, + READONLY, READWRITE, } MountMode; @@ -70,13 +74,13 @@ typedef struct MountEntry { char *source_malloc; } MountEntry; -/* - * The following Protect tables are to protect paths and mark some of them - * READONLY, in case a path is covered by an option from another table, then - * it is marked READWRITE in the current one, and the more restrictive mode is - * applied from that other table. This way all options can be combined in a - * safe and comprehensible way for users. - */ +/* If MountAPIVFS= is used, let's mount /sys and /proc into the it, but only as a fallback if the user hasn't mounted + * something there already. These mounts are hence overriden by any other explicitly configured mounts. */ +static const MountEntry apivfs_table[] = { + { "/proc", PROCFS, false }, + { "/dev", BIND_DEV, false }, + { "/sys", SYSFS, false }, +}; /* ProtectKernelTunables= option and the related filesystem APIs */ static const MountEntry protect_kernel_tunables_table[] = { @@ -176,6 +180,13 @@ static const char *mount_entry_source(const MountEntry *p) { return p->source_malloc ?: p->source_const; } +static void mount_entry_done(MountEntry *p) { + assert(p); + + p->path_malloc = mfree(p->path_malloc); + p->source_malloc = mfree(p->source_malloc); +} + static int append_access_mounts(MountEntry **p, char **strv, MountMode mode) { char **i; @@ -351,7 +362,7 @@ static void drop_duplicates(MountEntry *m, unsigned *n) { if (previous && path_equal(mount_entry_path(f), mount_entry_path(previous))) { log_debug("%s is duplicate.", mount_entry_path(f)); previous->read_only = previous->read_only || mount_entry_read_only(f); /* Propagate the read-only flag to the remaining entry */ - f->path_malloc = mfree(f->path_malloc); + mount_entry_done(f); continue; } @@ -379,7 +390,7 @@ static void drop_inaccessible(MountEntry *m, unsigned *n) { * it, as inaccessible paths really should drop the entire subtree. */ if (clear && path_startswith(mount_entry_path(f), clear)) { log_debug("%s is masked by %s.", mount_entry_path(f), clear); - f->path_malloc = mfree(f->path_malloc); + mount_entry_done(f); continue; } @@ -419,7 +430,7 @@ static void drop_nop(MountEntry *m, unsigned *n) { /* We found it, let's see if it's the same mode, if so, we can drop this entry */ if (found && p->mode == f->mode) { log_debug("%s is redundant by %s", mount_entry_path(f), mount_entry_path(p)); - f->path_malloc = mfree(f->path_malloc); + mount_entry_done(f); continue; } } @@ -447,7 +458,7 @@ static void drop_outside_root(const char *root_directory, MountEntry *m, unsigne if (!path_startswith(mount_entry_path(f), root_directory)) { log_debug("%s is outside of root directory.", mount_entry_path(f)); - f->path_malloc = mfree(f->path_malloc); + mount_entry_done(f); continue; } @@ -458,7 +469,7 @@ static void drop_outside_root(const char *root_directory, MountEntry *m, unsigne *n = t - m; } -static int mount_dev(MountEntry *m) { +static int mount_private_dev(MountEntry *m) { static const char devnodes[] = "/dev/null\0" "/dev/zero\0" @@ -597,6 +608,62 @@ fail: return r; } +static int mount_bind_dev(MountEntry *m) { + int r; + + assert(m); + + /* Implements the little brother of mount_private_dev(): simply bind mounts the host's /dev into the service's + * /dev. This is only used when RootDirectory= is set. */ + + r = path_is_mount_point(mount_entry_path(m), NULL, 0); + if (r < 0) + return log_debug_errno(r, "Unable to determine whether /dev is already mounted: %m"); + if (r > 0) /* make this a NOP if /dev is already a mount point */ + return 0; + + if (mount("/dev", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL) < 0) + return log_debug_errno(errno, "Failed to bind mount %s: %m", mount_entry_path(m)); + + return 1; +} + +static int mount_sysfs(MountEntry *m) { + int r; + + assert(m); + + r = path_is_mount_point(mount_entry_path(m), NULL, 0); + if (r < 0) + return log_debug_errno(r, "Unable to determine whether /sys is already mounted: %m"); + if (r > 0) /* make this a NOP if /sys is already a mount point */ + return 0; + + /* Bind mount the host's version so that we get all child mounts of it, too. */ + if (mount("/sys", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL) < 0) + return log_debug_errno(errno, "Failed to mount %s: %m", mount_entry_path(m)); + + return 1; +} + +static int mount_procfs(MountEntry *m) { + int r; + + assert(m); + + r = path_is_mount_point(mount_entry_path(m), NULL, 0); + if (r < 0) + return log_debug_errno(r, "Unable to determine whether /proc is already mounted: %m"); + if (r > 0) /* make this a NOP if /proc is already a mount point */ + return 0; + + /* Mount a new instance, so that we get the one that matches our user namespace, if we are running in one */ + if (mount("proc", mount_entry_path(m), "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) < 0) + return log_debug_errno(errno, "Failed to mount %s: %m", mount_entry_path(m)); + + return 1; +} + static int mount_entry_chase( const char *root_directory, MountEntry *m, @@ -684,6 +751,7 @@ static int apply_mount( case BIND_MOUNT_RECURSIVE: /* Also chase the source mount */ + r = mount_entry_chase(root_directory, m, mount_entry_source(m), &m->source_malloc); if (r <= 0) return r; @@ -700,7 +768,16 @@ static int apply_mount( break; case PRIVATE_DEV: - return mount_dev(m); + return mount_private_dev(m); + + case BIND_DEV: + return mount_bind_dev(m); + + case SYSFS: + return mount_sysfs(m); + + case PROCFS: + return mount_procfs(m); default: assert_not_reached("Unknown mode"); @@ -722,7 +799,7 @@ static int make_read_only(MountEntry *m, char **blacklist) { if (mount_entry_read_only(m)) r = bind_remount_recursive(mount_entry_path(m), true, blacklist); - else if (m->mode == PRIVATE_DEV) { /* Can be readonly but the submounts can't*/ + else if (m->mode == PRIVATE_DEV) { /* Superblock can be readonly but the submounts can't*/ if (mount(NULL, mount_entry_path(m), NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0) r = -errno; } else @@ -738,6 +815,17 @@ static int make_read_only(MountEntry *m, char **blacklist) { return r; } +static bool namespace_info_mount_apivfs(const NameSpaceInfo *ns_info) { + assert(ns_info); + + /* ProtectControlGroups= and ProtectKernelTunables= imply MountAPIVFS=, since to protect the API VFS mounts, + * they need to be around in the first place... */ + + return ns_info->mount_apivfs || + ns_info->protect_control_groups || + ns_info->protect_kernel_tunables; +} + static unsigned namespace_calculate_mounts( const NameSpaceInfo *ns_info, char** read_write_paths, @@ -774,11 +862,13 @@ static unsigned namespace_calculate_mounts( (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) + (ns_info->protect_control_groups ? 1 : 0) + (ns_info->protect_kernel_modules ? ELEMENTSOF(protect_kernel_modules_table) : 0) + - protect_home_cnt + protect_system_cnt; + protect_home_cnt + protect_system_cnt + + (namespace_info_mount_apivfs(ns_info) ? ELEMENTSOF(apivfs_table) : 0); } int setup_namespace( const char* root_directory, + const char* root_image, const NameSpaceInfo *ns_info, char** read_write_paths, char** read_only_paths, @@ -789,16 +879,57 @@ int setup_namespace( const char* var_tmp_dir, ProtectHome protect_home, ProtectSystem protect_system, - unsigned long mount_flags) { + unsigned long mount_flags, + DissectImageFlags dissect_image_flags) { + _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL; + _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL; + _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL; + _cleanup_free_ void *root_hash = NULL; MountEntry *m, *mounts = NULL; + size_t root_hash_size = 0; bool make_slave = false; unsigned n_mounts; int r = 0; + assert(ns_info); + if (mount_flags == 0) mount_flags = MS_SHARED; + if (root_image) { + dissect_image_flags |= DISSECT_IMAGE_REQUIRE_ROOT; + + if (protect_system == PROTECT_SYSTEM_STRICT && strv_isempty(read_write_paths)) + dissect_image_flags |= DISSECT_IMAGE_READ_ONLY; + + r = loop_device_make_by_path(root_image, + dissect_image_flags & DISSECT_IMAGE_READ_ONLY ? O_RDONLY : O_RDWR, + &loop_device); + if (r < 0) + return r; + + r = root_hash_load(root_image, &root_hash, &root_hash_size); + if (r < 0) + return r; + + r = dissect_image(loop_device->fd, root_hash, root_hash_size, dissect_image_flags, &dissected_image); + if (r < 0) + return r; + + r = dissected_image_decrypt(dissected_image, NULL, root_hash, root_hash_size, dissect_image_flags, &decrypted_image); + if (r < 0) + return r; + + if (!root_directory) { + /* Create a mount point for the image, if it's still missing. We use the same mount point for + * all images, which is safe, since they all live in their own namespaces after all, and hence + * won't see each other. */ + root_directory = "/run/systemd/unit-root"; + (void) mkdir(root_directory, 0700); + } + } + n_mounts = namespace_calculate_mounts( ns_info, read_write_paths, @@ -878,6 +1009,12 @@ int setup_namespace( if (r < 0) goto finish; + if (namespace_info_mount_apivfs(ns_info)) { + r = append_static_mounts(&m, apivfs_table, ELEMENTSOF(apivfs_table), ns_info->ignore_protect_paths); + if (r < 0) + goto finish; + } + assert(mounts + n_mounts == m); /* Prepend the root directory where that's necessary */ @@ -907,7 +1044,19 @@ int setup_namespace( } } - if (root_directory) { + if (root_image) { + r = dissected_image_mount(dissected_image, root_directory, dissect_image_flags); + if (r < 0) + goto finish; + + r = decrypted_image_relinquish(decrypted_image); + if (r < 0) + goto finish; + + loop_device_relinquish(loop_device); + + } else if (root_directory) { + /* Turn directory into bind mount, if it isn't one yet */ r = path_is_mount_point(root_directory, NULL, AT_SYMLINK_FOLLOW); if (r < 0) @@ -964,7 +1113,7 @@ int setup_namespace( finish: for (m = mounts; m < mounts + n_mounts; m++) - free(m->path_malloc); + mount_entry_done(m); return r; } diff --git a/src/core/namespace.h b/src/core/namespace.h index de3edc419c..f54954bd86 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -25,6 +25,7 @@ typedef struct BindMount BindMount; #include <stdbool.h> +#include "dissect-image.h" #include "macro.h" typedef enum ProtectHome { @@ -50,6 +51,7 @@ struct NameSpaceInfo { bool protect_control_groups:1; bool protect_kernel_tunables:1; bool protect_kernel_modules:1; + bool mount_apivfs:1; }; struct BindMount { @@ -62,6 +64,7 @@ struct BindMount { int setup_namespace( const char *root_directory, + const char *root_image, const NameSpaceInfo *ns_info, char **read_write_paths, char **read_only_paths, @@ -72,7 +75,8 @@ int setup_namespace( const char *var_tmp_dir, ProtectHome protect_home, ProtectSystem protect_system, - unsigned long mount_flags); + unsigned long mount_flags, + DissectImageFlags dissected_image_flags); int setup_tmp_dirs( const char *id, diff --git a/src/core/unit.c b/src/core/unit.c index 44f1d5e206..90d7eea956 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -862,6 +862,12 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) { return r; } + if (c->root_image) { + r = unit_require_mounts_for(u, c->root_image); + if (r < 0) + return r; + } + if (!MANAGER_IS_SYSTEM(u->manager)) return 0; diff --git a/src/dissect/dissect.c b/src/dissect/dissect.c index fd9db5ba87..59bd7d9e84 100644 --- a/src/dissect/dissect.c +++ b/src/dissect/dissect.c @@ -191,6 +191,14 @@ int main(int argc, char *argv[]) { goto finish; } + if (!arg_root_hash) { + r = root_hash_load(arg_image, &arg_root_hash, &arg_root_hash_size); + if (r < 0) { + log_error_errno(r, "Failed to read root hash file for %s: %m", arg_image); + goto finish; + } + } + r = dissect_image(d->fd, arg_root_hash, arg_root_hash_size, arg_flags, &m); if (r == -ENOPKG) { log_error_errno(r, "Couldn't identify a suitable partition table or file system in %s.", arg_image); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index a8d33ad907..b172b44933 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -3507,53 +3507,6 @@ static int run(int master, return 1; /* loop again */ } -static int load_root_hash(const char *image) { - _cleanup_free_ char *text = NULL, *fn = NULL; - char *n, *e; - void *k; - size_t l; - int r; - - assert_se(image); - - /* Try to load the root hash from a file next to the image file if it exists. */ - - if (arg_root_hash) - return 0; - - fn = new(char, strlen(image) + strlen(".roothash") + 1); - if (!fn) - return log_oom(); - - n = stpcpy(fn, image); - e = endswith(fn, ".raw"); - if (e) - n = e; - - strcpy(n, ".roothash"); - - r = read_one_line_file(fn, &text); - if (r == -ENOENT) - return 0; - if (r < 0) { - log_warning_errno(r, "Failed to read %s, ignoring: %m", fn); - return 0; - } - - r = unhexmem(text, strlen(text), &k, &l); - if (r < 0) - return log_error_errno(r, "Invalid root hash: %s", text); - if (l < sizeof(sd_id128_t)) { - free(k); - return log_error_errno(r, "Root hash too short: %s", text); - } - - arg_root_hash = k; - arg_root_hash_size = l; - - return 0; -} - int main(int argc, char *argv[]) { _cleanup_free_ char *console = NULL; @@ -3769,9 +3722,13 @@ int main(int argc, char *argv[]) { goto finish; } - r = load_root_hash(arg_image); - if (r < 0) - goto finish; + if (!arg_root_hash) { + r = root_hash_load(arg_image, &arg_root_hash, &arg_root_hash_size); + if (r < 0) { + log_error_errno(r, "Failed to load root hash file for %s: %m", arg_image); + goto finish; + } + } } if (!mkdtemp(tmprootdir)) { diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 829be2c6da..20c1085697 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -208,7 +208,7 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", "NoNewPrivileges", "SyslogLevelPrefix", "Delegate", "RemainAfterElapse", "MemoryDenyWriteExecute", "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables", - "ProtectKernelModules", "ProtectControlGroups")) { + "ProtectKernelModules", "ProtectControlGroups", "MountAPIVFS")) { r = parse_boolean(eq); if (r < 0) @@ -266,7 +266,7 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen "StandardInput", "StandardOutput", "StandardError", "Description", "Slice", "Type", "WorkingDirectory", "RootDirectory", "SyslogIdentifier", "ProtectSystem", - "ProtectHome", "SELinuxContext", "Restart")) + "ProtectHome", "SELinuxContext", "Restart", "RootImage")) r = sd_bus_message_append(m, "v", "s", eq); else if (streq(field, "SyslogLevel")) { @@ -484,7 +484,7 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen for (p = eq;;) { _cleanup_free_ char *word = NULL; - int offset; + size_t offset; r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES); if (r < 0) { @@ -500,6 +500,8 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen } offset = word[0] == '-'; + offset += word[offset] == '+'; + if (!path_is_absolute(word + offset)) { log_error("Failed to parse %s value %s", field, eq); return -EINVAL; diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c index c17486cba2..410a7764ed 100644 --- a/src/shared/dissect-image.c +++ b/src/shared/dissect-image.c @@ -28,14 +28,19 @@ #include "blkid-util.h" #include "dissect-image.h" #include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" #include "gpt.h" +#include "hexdecoct.h" #include "mount-util.h" #include "path-util.h" #include "stat-util.h" #include "stdio-util.h" #include "string-table.h" #include "string-util.h" +#include "strv.h" #include "udev-util.h" +#include "xattr-util.h" static int probe_filesystem(const char *node, char **ret_fstype) { #ifdef HAVE_BLKID @@ -657,7 +662,9 @@ static int mount_partition( DissectImageFlags flags) { const char *p, *options = NULL, *node, *fstype; + _cleanup_free_ char *chased = NULL; bool rw; + int r; assert(m); assert(where); @@ -674,9 +681,13 @@ static int mount_partition( rw = m->rw && !(flags & DISSECT_IMAGE_READ_ONLY); - if (directory) - p = strjoina(where, directory); - else + if (directory) { + r = chase_symlinks(directory, where, CHASE_PREFIX_ROOT, &chased); + if (r < 0) + return r; + + p = chased; + } else p = where; /* If requested, turn on discard support. */ @@ -710,22 +721,23 @@ int dissected_image_mount(DissectedImage *m, const char *where, DissectImageFlag return r; if (m->partitions[PARTITION_ESP].found) { - const char *mp, *x; + const char *mp; /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */ - mp = "/efi"; - x = strjoina(where, mp); - r = dir_is_empty(x); - if (r == -ENOENT) { - mp = "/boot"; - x = strjoina(where, mp); - r = dir_is_empty(x); - } - if (r > 0) { - r = mount_partition(m->partitions + PARTITION_ESP, where, mp, flags); + FOREACH_STRING(mp, "/efi", "/boot") { + _cleanup_free_ char *p = NULL; + + r = chase_symlinks(mp, where, CHASE_PREFIX_ROOT, &p); if (r < 0) - return r; + continue; + + r = dir_is_empty(p); + if (r > 0) { + r = mount_partition(m->partitions + PARTITION_ESP, where, mp, flags); + if (r < 0) + return r; + } } } @@ -1111,6 +1123,62 @@ int decrypted_image_relinquish(DecryptedImage *d) { return 0; } +int root_hash_load(const char *image, void **ret, size_t *ret_size) { + _cleanup_free_ char *text = NULL; + _cleanup_free_ void *k = NULL; + size_t l; + int r; + + assert(image); + assert(ret); + assert(ret_size); + + if (is_device_path(image)) { + /* If we are asked to load the root hash for a device node, exit early */ + *ret = NULL; + *ret_size = 0; + return 0; + } + + r = getxattr_malloc(image, "user.verity.roothash", &text, true); + if (r < 0) { + char *fn, *e, *n; + + if (!IN_SET(r, -ENODATA, -EOPNOTSUPP, -ENOENT)) + return r; + + fn = newa(char, strlen(image) + strlen(".roothash") + 1); + n = stpcpy(fn, image); + e = endswith(fn, ".raw"); + if (e) + n = e; + + strcpy(n, ".roothash"); + + r = read_one_line_file(fn, &text); + if (r == -ENOENT) { + *ret = NULL; + *ret_size = 0; + return 0; + } + if (r < 0) + return r; + } + + r = unhexmem(text, strlen(text), &k, &l); + if (r < 0) + return r; + if (l < sizeof(sd_id128_t)) + return -EINVAL; + + *ret = k; + *ret_size = l; + + k = NULL; + + return 1; +} + static const char *const partition_designator_table[] = { [PARTITION_ROOT] = "root", [PARTITION_ROOT_SECONDARY] = "root-secondary", diff --git a/src/shared/dissect-image.h b/src/shared/dissect-image.h index 26319bd8e7..cdb083be6f 100644 --- a/src/shared/dissect-image.h +++ b/src/shared/dissect-image.h @@ -94,3 +94,5 @@ int decrypted_image_relinquish(DecryptedImage *d); const char* partition_designator_to_string(int i) _const_; int partition_designator_from_string(const char *name) _pure_; + +int root_hash_load(const char *image, void **ret, size_t *ret_size); diff --git a/src/test/test-ns.c b/src/test/test-ns.c index c99bcb371b..0125d905a6 100644 --- a/src/test/test-ns.c +++ b/src/test/test-ns.c @@ -77,6 +77,7 @@ int main(int argc, char *argv[]) { log_info("Not chrooted"); r = setup_namespace(root_directory, + NULL, &ns_info, (char **) writable, (char **) readonly, @@ -86,6 +87,7 @@ int main(int argc, char *argv[]) { var_tmp_dir, PROTECT_HOME_NO, PROTECT_SYSTEM_NO, + 0, 0); if (r < 0) { log_error_errno(r, "Failed to setup namespace: %m"); |