From a4c35b6b4d8457e15caa3fe18ce8050918932d00 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 8 Dec 2016 00:28:05 +0100 Subject: nspawn: split out VolatileMode definitions This moves the VolatileMode enum and its helper functions to src/shared/. This is useful to then reuse them to implement systemd.volatile= in a later commit. --- src/shared/volatile-util.c | 41 +++++++++++++++++++++++++++++++++++++++++ src/shared/volatile-util.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 src/shared/volatile-util.c create mode 100644 src/shared/volatile-util.h (limited to 'src/shared') diff --git a/src/shared/volatile-util.c b/src/shared/volatile-util.c new file mode 100644 index 0000000000..1329b51f4e --- /dev/null +++ b/src/shared/volatile-util.c @@ -0,0 +1,41 @@ +/*** + This file is part of systemd. + + Copyright 2015 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include "macro.h" +#include "parse-util.h" +#include "string-util.h" +#include "volatile-util.h" + +VolatileMode volatile_mode_from_string(const char *s) { + int b; + + if (isempty(s)) + return _VOLATILE_MODE_INVALID; + + b = parse_boolean(s); + if (b > 0) + return VOLATILE_YES; + if (b == 0) + return VOLATILE_NO; + + if (streq(s, "state")) + return VOLATILE_STATE; + + return _VOLATILE_MODE_INVALID; +} diff --git a/src/shared/volatile-util.h b/src/shared/volatile-util.h new file mode 100644 index 0000000000..d012940c76 --- /dev/null +++ b/src/shared/volatile-util.h @@ -0,0 +1,30 @@ +#pragma once + +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +typedef enum VolatileMode { + VOLATILE_NO, + VOLATILE_YES, + VOLATILE_STATE, + _VOLATILE_MODE_MAX, + _VOLATILE_MODE_INVALID = -1 +} VolatileMode; + +VolatileMode volatile_mode_from_string(const char *s); -- cgit v1.2.3-54-g00ecf From 289cb4d5cd7462a074ffb5018efa385e82220318 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 8 Dec 2016 10:50:29 +0100 Subject: shared: use uid_is_valid() for checking validity of UIDs --- src/shared/base-filesystem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shared') diff --git a/src/shared/base-filesystem.c b/src/shared/base-filesystem.c index f1fbce9dca..127cbe44e3 100644 --- a/src/shared/base-filesystem.c +++ b/src/shared/base-filesystem.c @@ -101,7 +101,7 @@ int base_filesystem_create(const char *root, uid_t uid, gid_t gid) { if (r < 0 && errno != EEXIST) return log_error_errno(errno, "Failed to create symlink at %s/%s: %m", root, table[i].dir); - if (uid != UID_INVALID || gid != UID_INVALID) { + if (uid_is_valid(uid) || gid_is_valid(gid)) { if (fchownat(fd, table[i].dir, uid, gid, AT_SYMLINK_NOFOLLOW) < 0) return log_error_errno(errno, "Failed to chown symlink at %s/%s: %m", root, table[i].dir); } -- cgit v1.2.3-54-g00ecf From 9ffcff0e0a3ede3f4c227327cb1a96a436386f4f Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 8 Dec 2016 19:36:46 +0100 Subject: tree-wide: always invoke setmntent() with "re" mode Let's make sure O_CLOEXEC is set for the file descriptor. --- src/cryptsetup/cryptsetup.c | 2 +- src/remount-fs/remount-fs.c | 2 +- src/shared/fstab-util.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/shared') diff --git a/src/cryptsetup/cryptsetup.c b/src/cryptsetup/cryptsetup.c index c7fec609df..b3abbb8b2f 100644 --- a/src/cryptsetup/cryptsetup.c +++ b/src/cryptsetup/cryptsetup.c @@ -310,7 +310,7 @@ static char *disk_mount_point(const char *label) { if (asprintf(&device, "/dev/mapper/%s", label) < 0) return NULL; - f = setmntent("/etc/fstab", "r"); + f = setmntent("/etc/fstab", "re"); if (!f) return NULL; diff --git a/src/remount-fs/remount-fs.c b/src/remount-fs/remount-fs.c index c3bdcaf1da..0cb9bd9261 100644 --- a/src/remount-fs/remount-fs.c +++ b/src/remount-fs/remount-fs.c @@ -56,7 +56,7 @@ int main(int argc, char *argv[]) { umask(0022); - f = setmntent("/etc/fstab", "r"); + f = setmntent("/etc/fstab", "re"); if (!f) { if (errno == ENOENT) { r = 0; diff --git a/src/shared/fstab-util.c b/src/shared/fstab-util.c index f0bfb30bb5..87b520b540 100644 --- a/src/shared/fstab-util.c +++ b/src/shared/fstab-util.c @@ -38,7 +38,7 @@ bool fstab_is_mount_point(const char *mount) { _cleanup_endmntent_ FILE *f = NULL; struct mntent *m; - f = setmntent("/etc/fstab", "r"); + f = setmntent("/etc/fstab", "re"); if (!f) return false; -- cgit v1.2.3-54-g00ecf From 9b6deb03fcb358d6987ce86fcad08e2e290ee5d0 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 9 Dec 2016 18:39:15 +0100 Subject: dissect: optionally, only look for GPT partition tables, nothing else This is useful for reusing the dissector logic in the gpt-auto-discovery logic: there we really don't want to use MBR or naked file systems as root device. --- src/dissect/dissect.c | 2 +- src/machine/image-dbus.c | 2 +- src/nspawn/nspawn.c | 2 +- src/shared/dissect-image.c | 89 +++++++++++++++++++++++-------------------- src/shared/dissect-image.h | 3 +- src/test/test-dissect-image.c | 2 +- 6 files changed, 54 insertions(+), 46 deletions(-) (limited to 'src/shared') diff --git a/src/dissect/dissect.c b/src/dissect/dissect.c index f2f1e135ec..aa06894037 100644 --- a/src/dissect/dissect.c +++ b/src/dissect/dissect.c @@ -191,7 +191,7 @@ int main(int argc, char *argv[]) { goto finish; } - r = dissect_image(d->fd, arg_root_hash, arg_root_hash_size, &m); + r = dissect_image(d->fd, arg_root_hash, arg_root_hash_size, 0, &m); if (r == -ENOPKG) { log_error_errno(r, "Couldn't identify a suitable partition table or file system in %s.", arg_image); goto finish; diff --git a/src/machine/image-dbus.c b/src/machine/image-dbus.c index e2fb882393..2b168b267b 100644 --- a/src/machine/image-dbus.c +++ b/src/machine/image-dbus.c @@ -336,7 +336,7 @@ static int raw_image_get_os_release(Image *image, char ***ret, sd_bus_error *err if (r < 0) return sd_bus_error_set_errnof(error, r, "Failed to set up loop block device for %s: %m", image->path); - r = dissect_image(d->fd, NULL, 0, &m); + r = dissect_image(d->fd, NULL, 0, 0, &m); if (r == -ENOPKG) return sd_bus_error_set_errnof(error, r, "Disk image %s not understood: %m", image->path); if (r < 0) diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index dcc639f15c..2e879d7d7f 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -3743,7 +3743,7 @@ int main(int argc, char *argv[]) { goto finish; } - r = dissect_image(loop->fd, arg_root_hash, arg_root_hash_size, &dissected_image); + r = dissect_image(loop->fd, arg_root_hash, arg_root_hash_size, 0, &dissected_image); if (r == -ENOPKG) { log_error_errno(r, "Could not find a suitable file system or partition table in image: %s", arg_image); diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c index d3ba9b9dde..10d53eab45 100644 --- a/src/shared/dissect-image.c +++ b/src/shared/dissect-image.c @@ -84,7 +84,7 @@ not_found: #endif } -int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectedImage **ret) { +int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DissectedImage **ret) { #ifdef HAVE_BLKID sd_id128_t root_uuid = SD_ID128_NULL, verity_uuid = SD_ID128_NULL; @@ -95,7 +95,7 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, Dissecte _cleanup_blkid_free_probe_ blkid_probe b = NULL; _cleanup_udev_unref_ struct udev *udev = NULL; _cleanup_free_ char *generic_node = NULL; - const char *pttype = NULL, *usage = NULL; + const char *pttype = NULL; struct udev_list_entry *first, *item; blkid_partlist pl; int r, generic_nr; @@ -147,8 +147,12 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, Dissecte return -errno; } - blkid_probe_enable_superblocks(b, 1); - blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE); + if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) { + /* Look for file system superblocks, unless we only shall look for GPT partition tables */ + blkid_probe_enable_superblocks(b, 1); + blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE); + } + blkid_probe_enable_partitions(b, 1); blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS); @@ -169,40 +173,44 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, Dissecte if (!m) return -ENOMEM; - (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL); - if (STRPTR_IN_SET(usage, "filesystem", "crypto")) { - _cleanup_free_ char *t = NULL, *n = NULL; - const char *fstype = NULL; + if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) { + const char *usage = NULL; - /* OK, we have found a file system, that's our root partition then. */ - (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL); + (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL); + if (STRPTR_IN_SET(usage, "filesystem", "crypto")) { + _cleanup_free_ char *t = NULL, *n = NULL; + const char *fstype = NULL; - if (fstype) { - t = strdup(fstype); - if (!t) - return -ENOMEM; - } + /* OK, we have found a file system, that's our root partition then. */ + (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL); - if (asprintf(&n, "/dev/block/%u:%u", major(st.st_rdev), minor(st.st_rdev)) < 0) - return -ENOMEM; + if (fstype) { + t = strdup(fstype); + if (!t) + return -ENOMEM; + } - m->partitions[PARTITION_ROOT] = (DissectedPartition) { - .found = true, - .rw = true, - .partno = -1, - .architecture = _ARCHITECTURE_INVALID, - .fstype = t, - .node = n, - }; + if (asprintf(&n, "/dev/block/%u:%u", major(st.st_rdev), minor(st.st_rdev)) < 0) + return -ENOMEM; - t = n = NULL; + m->partitions[PARTITION_ROOT] = (DissectedPartition) { + .found = true, + .rw = true, + .partno = -1, + .architecture = _ARCHITECTURE_INVALID, + .fstype = t, + .node = n, + }; - m->encrypted = streq(fstype, "crypto_LUKS"); + t = n = NULL; - *ret = m; - m = NULL; + m->encrypted = streq(fstype, "crypto_LUKS"); - return 0; + *ret = m; + m = NULL; + + return 0; + } } (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL); @@ -212,7 +220,7 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, Dissecte is_gpt = streq_ptr(pttype, "gpt"); is_mbr = streq_ptr(pttype, "dos"); - if (!is_gpt && !is_mbr) + if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr)) return -ENOPKG; errno = 0; @@ -300,7 +308,7 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, Dissecte first = udev_enumerate_get_list_entry(e); udev_list_entry_foreach(item, first) { _cleanup_udev_device_unref_ struct udev_device *q; - unsigned long long flags; + unsigned long long pflags; blkid_partition pp; const char *node; dev_t qn; @@ -325,7 +333,7 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, Dissecte if (!pp) continue; - flags = blkid_partition_get_flags(pp); + pflags = blkid_partition_get_flags(pp); nr = blkid_partition_get_partno(pp); if (nr < 0) @@ -337,7 +345,7 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, Dissecte sd_id128_t type_id, id; bool rw = true; - if (flags & GPT_FLAG_NO_AUTO) + if (pflags & GPT_FLAG_NO_AUTO) continue; sid = blkid_partition_get_uuid(pp); @@ -354,10 +362,10 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, Dissecte if (sd_id128_equal(type_id, GPT_HOME)) { designator = PARTITION_HOME; - rw = !(flags & GPT_FLAG_READ_ONLY); + rw = !(pflags & GPT_FLAG_READ_ONLY); } else if (sd_id128_equal(type_id, GPT_SRV)) { designator = PARTITION_SRV; - rw = !(flags & GPT_FLAG_READ_ONLY); + rw = !(pflags & GPT_FLAG_READ_ONLY); } else if (sd_id128_equal(type_id, GPT_ESP)) { designator = PARTITION_ESP; fstype = "vfat"; @@ -371,7 +379,7 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, Dissecte designator = PARTITION_ROOT; architecture = native_architecture(); - rw = !(flags & GPT_FLAG_READ_ONLY); + rw = !(pflags & GPT_FLAG_READ_ONLY); } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) { m->can_verity = true; @@ -395,9 +403,8 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, Dissecte designator = PARTITION_ROOT_SECONDARY; architecture = SECONDARY_ARCHITECTURE; - rw = !(flags & GPT_FLAG_READ_ONLY); + rw = !(pflags & GPT_FLAG_READ_ONLY); } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) { - m->can_verity = true; /* Ignore verity unless root has is specified */ @@ -419,7 +426,7 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, Dissecte multiple_generic = true; else { generic_nr = nr; - generic_rw = !(flags & GPT_FLAG_READ_ONLY); + generic_rw = !(pflags & GPT_FLAG_READ_ONLY); generic_node = strdup(node); if (!generic_node) return -ENOMEM; @@ -457,7 +464,7 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, Dissecte } else if (is_mbr) { - if (flags != 0x80) /* Bootable flag */ + if (pflags != 0x80) /* Bootable flag */ continue; if (blkid_partition_get_type(pp) != 0x83) /* Linux partition */ diff --git a/src/shared/dissect-image.h b/src/shared/dissect-image.h index 175ddd8ea0..b424dac665 100644 --- a/src/shared/dissect-image.h +++ b/src/shared/dissect-image.h @@ -67,6 +67,7 @@ typedef enum DissectImageFlags { DISSECT_IMAGE_DISCARD_ANY = DISSECT_IMAGE_DISCARD_ON_LOOP | DISSECT_IMAGE_DISCARD | DISSECT_IMAGE_DISCARD_ON_CRYPTO, + DISSECT_IMAGE_GPT_ONLY = 16, /* Only recognize images with GPT partition tables */ } DissectImageFlags; struct DissectedImage { @@ -76,7 +77,7 @@ struct DissectedImage { DissectedPartition partitions[_PARTITION_DESIGNATOR_MAX]; }; -int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectedImage **ret); +int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DissectedImage **ret); DissectedImage* dissected_image_unref(DissectedImage *m); DEFINE_TRIVIAL_CLEANUP_FUNC(DissectedImage*, dissected_image_unref); diff --git a/src/test/test-dissect-image.c b/src/test/test-dissect-image.c index 0512a15e88..ddaf3a0d8b 100644 --- a/src/test/test-dissect-image.c +++ b/src/test/test-dissect-image.c @@ -43,7 +43,7 @@ int main(int argc, char *argv[]) { return EXIT_FAILURE; } - r = dissect_image(d->fd, NULL, 0, &m); + r = dissect_image(d->fd, NULL, 0, 0, &m); if (r < 0) { log_error_errno(r, "Failed to dissect image: %m"); return EXIT_FAILURE; -- cgit v1.2.3-54-g00ecf From e5b422034160937712fe7a546f175b883e39e763 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 12 Dec 2016 21:03:34 +0100 Subject: shared: rework switch_root() code Let's follow symlinks before invoking mount() on arbitrary paths, so that we won't get confused if directories are prepared with absolute symlinks. Use FOREACH_STRING() instead of NULSTR_FOREACH() as it is more readable. Don't use snprintf() for concatenating strings, let chase_symlinks() to that. Replace homegrown mount check with path_is_mount_point(). Also, change the behaviour when we encounter this: instead of unmounting the old mount point, simply leave it around and don't replace it, so that initrds can mount stuff there with different settings than we would apply. This is in-line with how we handle automatic mounts in nspawn for example. Use umount_recursive() instead of a simple umount2() for unmounting the old root, so that we actually cover really all mounts, not just the top-level one. --- src/shared/switch-root.c | 139 ++++++++++++++++++++--------------------------- 1 file changed, 59 insertions(+), 80 deletions(-) (limited to 'src/shared') diff --git a/src/shared/switch-root.c b/src/shared/switch-root.c index 4eff4f692e..afdf1ab5ad 100644 --- a/src/shared/switch-root.c +++ b/src/shared/switch-root.c @@ -28,123 +28,102 @@ #include "base-filesystem.h" #include "fd-util.h" +#include "fs-util.h" #include "log.h" #include "missing.h" #include "mkdir.h" +#include "mount-util.h" #include "path-util.h" #include "rm-rf.h" #include "stdio-util.h" #include "string-util.h" +#include "strv.h" #include "switch-root.h" #include "user-util.h" #include "util.h" -int switch_root(const char *new_root, const char *oldroot, bool detach_oldroot, unsigned long mountflags) { - - /* Don't try to unmount/move the old "/", there's no way to do it. */ - static const char move_mounts[] = - "/dev\0" - "/proc\0" - "/sys\0" - "/run\0"; +int switch_root(const char *new_root, + const char *old_root_after, /* path below the new root, where to place the old root after the transition */ + bool unmount_old_root, + unsigned long mount_flags) { /* MS_MOVE or MS_BIND */ + _cleanup_free_ char *resolved_old_root_after = NULL; _cleanup_close_ int old_root_fd = -1; - struct stat new_root_stat; bool old_root_remove; - const char *i, *temporary_old_root; + const char *i; + int r; + + assert(new_root); + assert(old_root_after); if (path_equal(new_root, "/")) return 0; - temporary_old_root = strjoina(new_root, oldroot); - mkdir_p_label(temporary_old_root, 0755); - + /* Check if we shall remove the contents of the old root */ old_root_remove = in_initrd(); + if (old_root_remove) { + old_root_fd = open("/", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY|O_DIRECTORY); + if (old_root_fd < 0) + return log_error_errno(errno, "Failed to open root directory: %m"); + } - if (stat(new_root, &new_root_stat) < 0) - return log_error_errno(errno, "Failed to stat directory %s: %m", new_root); + /* Determine where we shall place the old root after the transition */ + r = chase_symlinks(old_root_after, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &resolved_old_root_after); + if (r < 0) + return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, old_root_after); + if (r == 0) /* Doesn't exist yet. Let's create it */ + (void) mkdir_p_label(resolved_old_root_after, 0755); - /* Work-around for kernel design: the kernel refuses switching - * root if any file systems are mounted MS_SHARED. Hence + /* Work-around for kernel design: the kernel refuses MS_MOVE if any file systems are mounted MS_SHARED. Hence * remount them MS_PRIVATE here as a work-around. * * https://bugzilla.redhat.com/show_bug.cgi?id=847418 */ if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL) < 0) - log_warning_errno(errno, "Failed to make \"/\" private mount: %m"); - - NULSTR_FOREACH(i, move_mounts) { - char new_mount[PATH_MAX]; - struct stat sb; - size_t n; - - n = snprintf(new_mount, sizeof new_mount, "%s%s", new_root, i); - if (n >= sizeof new_mount) { - bool move = mountflags & MS_MOVE; - - log_warning("New path is too long, %s: %s%s", - move ? "forcing unmount instead" : "ignoring", - new_root, i); - - if (move) - if (umount2(i, MNT_FORCE) < 0) - log_warning_errno(errno, "Failed to unmount %s: %m", i); - continue; - } - - mkdir_p_label(new_mount, 0755); - - if (stat(new_mount, &sb) < 0 || - sb.st_dev != new_root_stat.st_dev) { - - /* Mount point seems to be mounted already or - * stat failed. Unmount the old mount point. */ - if (umount2(i, MNT_DETACH) < 0) - log_warning_errno(errno, "Failed to unmount %s: %m", i); - continue; - } - - if (mount(i, new_mount, NULL, mountflags, NULL) < 0) { - if (mountflags & MS_MOVE) { - log_error_errno(errno, "Failed to move mount %s to %s, forcing unmount: %m", i, new_mount); - - if (umount2(i, MNT_FORCE) < 0) - log_warning_errno(errno, "Failed to unmount %s: %m", i); - - } else if (mountflags & MS_BIND) - log_error_errno(errno, "Failed to bind mount %s to %s: %m", i, new_mount); - } + return log_error_errno(errno, "Failed to set \"/\" mount propagation to private: %m"); + + FOREACH_STRING(i, "/sys", "/dev", "/run", "/proc") { + _cleanup_free_ char *chased = NULL; + + r = chase_symlinks(i, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &chased); + if (r < 0) + return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, i); + if (r > 0) { + /* Already exists. Let's see if it is a mount point already. */ + r = path_is_mount_point(chased, NULL, 0); + if (r < 0) + return log_error_errno(r, "Failed to determine whether %s is a mount point: %m", chased); + if (r > 0) /* If it is already mounted, then do nothing */ + continue; + } else + /* Doesn't exist yet? */ + (void) mkdir_p_label(chased, 0755); + + if (mount(i, chased, NULL, mount_flags, NULL) < 0) + return log_error_errno(r, "Failed to mount %s to %s: %m", i, chased); } - /* Do not fail, if base_filesystem_create() fails. Not all - * switch roots are like base_filesystem_create() wants them - * to look like. They might even boot, if they are RO and - * don't have the FS layout. Just ignore the error and - * switch_root() nevertheless. */ + /* Do not fail if base_filesystem_create() fails. Not all switch roots are like base_filesystem_create() wants + * them to look like. They might even boot, if they are RO and don't have the FS layout. Just ignore the error + * and switch_root() nevertheless. */ (void) base_filesystem_create(new_root, UID_INVALID, GID_INVALID); if (chdir(new_root) < 0) return log_error_errno(errno, "Failed to change directory to %s: %m", new_root); - if (old_root_remove) { - old_root_fd = open("/", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY|O_DIRECTORY); - if (old_root_fd < 0) - log_warning_errno(errno, "Failed to open root directory: %m"); - } - - /* We first try a pivot_root() so that we can umount the old - * root dir. In many cases (i.e. where rootfs is /), that's - * not possible however, and hence we simply overmount root */ - if (pivot_root(new_root, temporary_old_root) >= 0) { + /* We first try a pivot_root() so that we can umount the old root dir. In many cases (i.e. where rootfs is /), + * that's not possible however, and hence we simply overmount root */ + if (pivot_root(new_root, resolved_old_root_after) >= 0) { /* Immediately get rid of the old root, if detach_oldroot is set. * Since we are running off it we need to do this lazily. */ - if (detach_oldroot && umount2(oldroot, MNT_DETACH) < 0) - log_error_errno(errno, "Failed to lazily umount old root dir %s, %s: %m", - oldroot, - errno == ENOENT ? "ignoring" : "leaving it around"); + if (unmount_old_root) { + r = umount_recursive(old_root_after, MNT_DETACH); + if (r < 0) + log_warning_errno(r, "Failed to unmount old root directory tree, ignoring: %m"); + } } else if (mount(new_root, "/", NULL, MS_MOVE, NULL) < 0) - return log_error_errno(errno, "Failed to mount moving %s to /: %m", new_root); + return log_error_errno(errno, "Failed to move %s to /: %m", new_root); if (chroot(".") < 0) return log_error_errno(errno, "Failed to change root: %m"); -- cgit v1.2.3-54-g00ecf From 91214a37ef4eb8042d2598aa89bae52b410d11a7 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 13 Dec 2016 12:45:19 +0100 Subject: fstab-generator: add support for volatile boots This adds support for a new kernel command line option "systemd.volatile=" that provides the same functionality that systemd-nspawn's --volatile= switch provides, but for host systems (i.e. systems booting with a kernel). It takes the same parameter and has the same effect. In order to implement systemd.volatile=yes a new service systemd-volatile-root.service is introduced that only runs in the initrd and rearranges the root directory as needed to become a tmpfs instance. Note that systemd.volatile=state is implemented different: it simply generates a var.mount unit file that is part of the normal boot and has no effect on the initrd execution. The way this is implemented ensures that other explicit configuration for /var can always override the effect of these options. Specifically, the var.mount unit is generated in the "late" generator directory, so that it only is in effect if nothing else overrides it. --- .gitignore | 1 + Makefile-man.am | 7 ++ Makefile.am | 10 +++ man/kernel-command-line.xml | 23 +++++ man/systemd-fstab-generator.xml | 46 +++++++++- man/systemd-nspawn.xml | 15 ++-- man/systemd-volatile-root.service.xml | 79 +++++++++++++++++ src/fstab-generator/fstab-generator.c | 85 +++++++++++++++--- src/shared/volatile-util.c | 27 ++++++ src/shared/volatile-util.h | 2 + src/volatile-root/Makefile | 1 + src/volatile-root/volatile-root.c | 157 +++++++++++++++++++++++++++++++++ units/.gitignore | 1 + units/systemd-volatile-root.service.in | 21 +++++ 14 files changed, 456 insertions(+), 19 deletions(-) create mode 100644 man/systemd-volatile-root.service.xml create mode 120000 src/volatile-root/Makefile create mode 100644 src/volatile-root/volatile-root.c create mode 100644 units/systemd-volatile-root.service.in (limited to 'src/shared') diff --git a/.gitignore b/.gitignore index ec4b7bd672..f246d3e6d5 100644 --- a/.gitignore +++ b/.gitignore @@ -126,6 +126,7 @@ /systemd-update-utmp /systemd-user-sessions /systemd-vconsole-setup +/systemd-volatile-root /tags /test-acd /test-acl-util diff --git a/Makefile-man.am b/Makefile-man.am index 5e6eee5e32..27660ef1c2 100644 --- a/Makefile-man.am +++ b/Makefile-man.am @@ -142,6 +142,7 @@ MANPAGES += \ man/systemd-tty-ask-password-agent.1 \ man/systemd-udevd.service.8 \ man/systemd-update-done.service.8 \ + man/systemd-volatile-root.service.8 \ man/systemd.1 \ man/systemd.automount.5 \ man/systemd.device.5 \ @@ -482,6 +483,7 @@ MANPAGES_ALIAS += \ man/systemd-udevd.8 \ man/systemd-update-done.8 \ man/systemd-user.conf.5 \ + man/systemd-volatile-root.8 \ man/udev_device_get_action.3 \ man/udev_device_get_devlinks_list_entry.3 \ man/udev_device_get_devnode.3 \ @@ -837,6 +839,7 @@ man/systemd-udevd-kernel.socket.8: man/systemd-udevd.service.8 man/systemd-udevd.8: man/systemd-udevd.service.8 man/systemd-update-done.8: man/systemd-update-done.service.8 man/systemd-user.conf.5: man/systemd-system.conf.5 +man/systemd-volatile-root.8: man/systemd-volatile-root.service.8 man/udev_device_get_action.3: man/udev_device_get_syspath.3 man/udev_device_get_devlinks_list_entry.3: man/udev_device_has_tag.3 man/udev_device_get_devnode.3: man/udev_device_get_syspath.3 @@ -1790,6 +1793,9 @@ man/systemd-update-done.html: man/systemd-update-done.service.html man/systemd-user.conf.html: man/systemd-system.conf.html $(html-alias) +man/systemd-volatile-root.html: man/systemd-volatile-root.service.html + $(html-alias) + man/udev_device_get_action.html: man/udev_device_get_syspath.html $(html-alias) @@ -2804,6 +2810,7 @@ EXTRA_DIST += \ man/systemd-update-utmp.service.xml \ man/systemd-user-sessions.service.xml \ man/systemd-vconsole-setup.service.xml \ + man/systemd-volatile-root.service.xml \ man/systemd.automount.xml \ man/systemd.device.xml \ man/systemd.exec.xml \ diff --git a/Makefile.am b/Makefile.am index 56b8aa3fe8..92a3680461 100644 --- a/Makefile.am +++ b/Makefile.am @@ -397,6 +397,7 @@ rootlibexec_PROGRAMS = \ systemd-initctl \ systemd-shutdown \ systemd-remount-fs \ + systemd-volatile-root \ systemd-reply-password \ systemd-fsck \ systemd-ac-power \ @@ -538,6 +539,7 @@ nodist_systemunit_DATA = \ units/system-update-cleanup.service \ units/systemd-initctl.service \ units/systemd-remount-fs.service \ + units/systemd-volatile-root.service \ units/systemd-ask-password-wall.service \ units/systemd-ask-password-console.service \ units/systemd-sysctl.service \ @@ -602,6 +604,7 @@ EXTRA_DIST += \ units/system-update-cleanup.service.in \ units/systemd-initctl.service.in \ units/systemd-remount-fs.service.in \ + units/systemd-volatile-root.service.in \ units/systemd-update-utmp.service.in \ units/systemd-update-utmp-runlevel.service.in \ units/systemd-ask-password-wall.service.in \ @@ -3067,6 +3070,13 @@ systemd_remount_fs_SOURCES = \ systemd_remount_fs_LDADD = \ libsystemd-shared.la +# ------------------------------------------------------------------------------ +systemd_volatile_root_SOURCES = \ + src/volatile-root/volatile-root.c + +systemd_volatile_root_LDADD = \ + libsystemd-shared.la + # ------------------------------------------------------------------------------ systemd_cgroups_agent_SOURCES = \ src/cgroups-agent/cgroups-agent.c diff --git a/man/kernel-command-line.xml b/man/kernel-command-line.xml index 78e45e66a9..7e1d408ded 100644 --- a/man/kernel-command-line.xml +++ b/man/kernel-command-line.xml @@ -124,6 +124,28 @@ + + systemd.volatile= + + This parameter controls whether the system shall boot up in volatile mode. Takes a boolean argument, or + the special value state. If false (the default), normal boot mode is selected, the root + directory and /var are mounted as specified on the kernel command line or + /etc/fstab, or otherwise configured. If true, full state-less boot mode is selected. In + this case the root directory is mounted as volatile memory file system (tmpfs), and only + /usr is mounted from the file system configured as root device, in read-only mode. This + enables fully state-less boots were the vendor-supplied OS is used as shipped, with only default + configuration and no stored state in effect, as /etc and /var (as + well as all other resources shipped in the root file system) are reset at boot and lost on shutdown. If this + setting is set to state the root file system is mounted as usual, however + /var is mounted as a volatile memory file system (tmpfs), so that the + system boots up with the normal configuration applied, but all state reset at boot and lost at shutdown. For details, + see + systemd-volatile-root.service8 + and + systemd-fstab-generator8. + + + quiet @@ -382,6 +404,7 @@ systemd-cryptsetup-generator8, systemd-fstab-generator8, systemd-gpt-auto-generator8, + systemd-volatile-root.service8, systemd-modules-load.service8, systemd-backlight@.service8, systemd-rfkill.service8, diff --git a/man/systemd-fstab-generator.xml b/man/systemd-fstab-generator.xml index a971cb3675..5f37e9193e 100644 --- a/man/systemd-fstab-generator.xml +++ b/man/systemd-fstab-generator.xml @@ -89,12 +89,13 @@ Takes a boolean argument. Defaults to yes. If no, causes the - generator to ignore any mounts or swaps configured in + generator to ignore any mounts or swap devices configured in /etc/fstab. rd.fstab= - is honored only by initial RAM disk (initrd) while + is honored only by the initial RAM disk (initrd) while fstab= is honored by both the main system and the initrd. + root= @@ -102,6 +103,7 @@ initrd. root= is honored by the initrd. + rootfstype= @@ -109,6 +111,7 @@ passed to the mount command. rootfstype= is honored by the initrd. + rootflags= @@ -116,6 +119,7 @@ use. rootflags= is honored by the initrd. + mount.usr= @@ -133,6 +137,7 @@ mount.usr= is honored by the initrd. + mount.usrfstype= @@ -150,6 +155,7 @@ mount.usrfstype= is honored by the initrd. + mount.usrflags= @@ -166,6 +172,39 @@ mount.usrflags= is honored by the initrd. + + + systemd.volatile= + + Controls whether the system shall boot up in volatile mode. Takes a boolean argument or the + special value . + + If false (the default), this generator makes no changes to the mount tree and the system is booted up in + normal mode. + + If true the generator ensures + systemd-volatile-root.service8 + is run as part of the initial RAM disk ("initrd"). This service changes the mount table before transitioning to + the host system, so that a volatile memory file system (tmpfs) is used as root directory, + with only /usr mounted into it from the configured root file system, in read-only + mode. This way the system operates in fully stateless mode, with all configuration and state reset at boot and + lost at shutdown, as /etc and /var will be served from the (initially + unpopulated) volatile memory file system. + + If set to the generator will leave the root + directory mount point unaltered, however will mount a tmpfs file system to + /var. In this mode the normal system configuration (i.e the contents of + /etc) is in effect (and may be modified during system runtime), however the system state + (i.e. the contents of /var) is reset at boot and lost at shutdown. + + Note that in none of these modes the root directory, /etc, /var + or any other resources stored in the root file system are physically removed. It's thus safe to boot a system + that is normally operated in non-volatile mode temporarily into volatile mode, without losing data. + + Note that enabling this setting will only work correctly on operating systems that can boot up with only + /usr mounted, and are able to automatically populate /etc, and also + /var in case of systemd.volatile=yes. + @@ -176,7 +215,8 @@ fstab5, systemd.mount5, systemd.swap5, - systemd-cryptsetup-generator8 + systemd-cryptsetup-generator8, + kernel-command-line7 diff --git a/man/systemd-nspawn.xml b/man/systemd-nspawn.xml index 2bc81ea1aa..f6b3f57fc7 100644 --- a/man/systemd-nspawn.xml +++ b/man/systemd-nspawn.xml @@ -939,12 +939,15 @@ (the default), the whole OS tree is made available writable. - Note that setting this to or - will only work correctly with - operating systems in the container that can boot up with only - /usr mounted, and are able to populate - /var automatically, as - needed. + This option provides similar functionality for containers as the systemd.volatile= + kernel command line switch provides for host systems. See + kernel-command-line7 for + details. + + Note that enabling this setting will only work correctly with operating systems in the container that can + boot up with only /usr mounted, and are able to automatically populate + /var, and also /etc in case of + --volatile=yes. diff --git a/man/systemd-volatile-root.service.xml b/man/systemd-volatile-root.service.xml new file mode 100644 index 0000000000..b90a3261fa --- /dev/null +++ b/man/systemd-volatile-root.service.xml @@ -0,0 +1,79 @@ + + + + + + + + systemd-volatile-root.service + systemd + + + + Developer + Lennart + Poettering + lennart@poettering.net + + + + + + systemd-volatile-root.service + 8 + + + + systemd-volatile-root.service + systemd-volatile-root + Make the root file system volatile + + + + systemd-volatile-root.service + /usr/lib/systemd/systemd-volatile-root + + + + Description + + systemd-volatile-root.service is a service that replaces the root directory with a + volatile memory file system (tmpfs), mounting the original (non-volatile) + /usr inside it read-only. This way, vendor data from /usr is available as + usual, but all configuration data in /etc, all state data in /var and all + other resources stored directly under the root directory are reset on boot and lost at shutdown, enabling fully + stateless systems. + + This service is only enabled if full volatile mode is selected, for example by specifying + systemd.volatile=yes on the kernel command line. This service runs only in the initial RAM disk + ("initrd"), before the system transitions to the host's root directory. Note that this service is not used if + systemd.volatile=state is used, as in that mode the root directory is non-volatile. + + + + See Also + + systemd1, + systemd-fstab-generator8, + kernel-command-line7 + + + + diff --git a/src/fstab-generator/fstab-generator.c b/src/fstab-generator/fstab-generator.c index 84163abbc5..f58aa27df2 100644 --- a/src/fstab-generator/fstab-generator.c +++ b/src/fstab-generator/fstab-generator.c @@ -42,8 +42,10 @@ #include "unit-name.h" #include "util.h" #include "virt.h" +#include "volatile-util.h" static const char *arg_dest = "/tmp"; +static const char *arg_dest_late = "/tmp"; static bool arg_fstab_enabled = true; static char *arg_root_what = NULL; static char *arg_root_fstype = NULL; @@ -52,6 +54,7 @@ static int arg_root_rw = -1; static char *arg_usr_what = NULL; static char *arg_usr_fstype = NULL; static char *arg_usr_options = NULL; +static VolatileMode arg_volatile_mode = _VOLATILE_MODE_INVALID; static int add_swap( const char *what, @@ -235,6 +238,7 @@ static int write_requires_mounts_for(FILE *f, const char *opts) { } static int add_mount( + const char *dest, const char *what, const char *where, const char *fstype, @@ -286,7 +290,7 @@ static int add_mount( if (r < 0) return log_error_errno(r, "Failed to generate unit name: %m"); - unit = strjoin(arg_dest, "/", name); + unit = strjoin(dest, "/", name); if (!unit) return log_oom(); @@ -318,7 +322,7 @@ static int add_mount( } if (passno != 0) { - r = generator_write_fsck_deps(f, arg_dest, what, where, fstype); + r = generator_write_fsck_deps(f, dest, what, where, fstype); if (r < 0) return r; } @@ -334,7 +338,7 @@ static int add_mount( if (!isempty(fstype) && !streq(fstype, "auto")) fprintf(f, "Type=%s\n", fstype); - r = generator_write_timeouts(arg_dest, what, where, opts, &filtered); + r = generator_write_timeouts(dest, what, where, opts, &filtered); if (r < 0) return r; @@ -350,7 +354,7 @@ static int add_mount( return log_error_errno(r, "Failed to write unit file %s: %m", unit); if (!noauto && !automount) { - lnk = strjoin(arg_dest, "/", post, nofail ? ".wants/" : ".requires/", name); + lnk = strjoin(dest, "/", post, nofail ? ".wants/" : ".requires/", name); if (!lnk) return log_oom(); @@ -364,7 +368,7 @@ static int add_mount( if (r < 0) return log_error_errno(r, "Failed to generate unit name: %m"); - automount_unit = strjoin(arg_dest, "/", automount_name); + automount_unit = strjoin(dest, "/", automount_name); if (!automount_unit) return log_oom(); @@ -406,7 +410,7 @@ static int add_mount( return log_error_errno(r, "Failed to write unit file %s: %m", automount_unit); free(lnk); - lnk = strjoin(arg_dest, "/", post, nofail ? ".wants/" : ".requires/", automount_name); + lnk = strjoin(dest, "/", post, nofail ? ".wants/" : ".requires/", automount_name); if (!lnk) return log_oom(); @@ -479,7 +483,8 @@ static int parse_fstab(bool initrd) { else post = SPECIAL_LOCAL_FS_TARGET; - k = add_mount(what, + k = add_mount(arg_dest, + what, where, me->mnt_type, me->mnt_opts, @@ -540,7 +545,8 @@ static int add_sysroot_mount(void) { return r; } - return add_mount(what, + return add_mount(arg_dest, + what, "/sysroot", arg_root_fstype, opts, @@ -593,7 +599,8 @@ static int add_sysroot_usr_mount(void) { opts = arg_usr_options; log_debug("Found entry what=%s where=/sysroot/usr type=%s", what, strna(arg_usr_fstype)); - return add_mount(what, + return add_mount(arg_dest, + what, "/sysroot/usr", arg_usr_fstype, opts, @@ -605,6 +612,46 @@ static int add_sysroot_usr_mount(void) { "/proc/cmdline"); } +static int add_volatile_root(void) { + const char *from, *to; + + if (arg_volatile_mode != VOLATILE_YES) + return 0; + + /* Let's add in systemd-remount-volatile.service which will remount the root device to tmpfs if this is + * requested, leaving only /usr from the root mount inside. */ + + from = strjoina(SYSTEM_DATA_UNIT_PATH "/systemd-volatile-root.service"); + to = strjoina(arg_dest, "/" SPECIAL_INITRD_ROOT_FS_TARGET, ".requires/systemd-volatile-root.service"); + + (void) mkdir_parents(to, 0755); + + if (symlink(from, to) < 0) + return log_error_errno(errno, "Failed to hook in volatile remount service: %m"); + + return 0; +} + +static int add_volatile_var(void) { + + if (arg_volatile_mode != VOLATILE_STATE) + return 0; + + /* If requested, mount /var as tmpfs, but do so only if there's nothing else defined for this. */ + + return add_mount(arg_dest_late, + "tmpfs", + "/var", + "tmpfs", + "mode=0755", + 0, + false, + false, + false, + SPECIAL_LOCAL_FS_TARGET, + "/proc/cmdline"); +} + static int parse_proc_cmdline_item(const char *key, const char *value, void *data) { int r; @@ -686,6 +733,18 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat arg_root_rw = true; else if (streq(key, "ro") && !value) arg_root_rw = false; + else if (streq(key, "systemd.volatile")) { + VolatileMode m; + + if (value) { + m = volatile_mode_from_string(value); + if (m < 0) + log_warning("Failed to parse systemd.volatile= argument: %s", value); + else + arg_volatile_mode = m; + } else + arg_volatile_mode = VOLATILE_YES; + } return 0; } @@ -700,6 +759,8 @@ int main(int argc, char *argv[]) { if (argc > 1) arg_dest = argv[1]; + if (argc > 3) + arg_dest_late = argv[3]; log_set_target(LOG_TARGET_SAFE); log_parse_environment(); @@ -720,8 +781,12 @@ int main(int argc, char *argv[]) { k = add_sysroot_usr_mount(); if (k < 0) r = k; + + k = add_volatile_root(); + if (k < 0) + r = k; } else - r = 0; + r = add_volatile_var(); /* Honour /etc/fstab only when that's enabled */ if (arg_fstab_enabled) { diff --git a/src/shared/volatile-util.c b/src/shared/volatile-util.c index 1329b51f4e..e7e9721411 100644 --- a/src/shared/volatile-util.c +++ b/src/shared/volatile-util.c @@ -17,8 +17,10 @@ along with systemd; If not, see . ***/ +#include "alloc-util.h" #include "macro.h" #include "parse-util.h" +#include "proc-cmdline.h" #include "string-util.h" #include "volatile-util.h" @@ -39,3 +41,28 @@ VolatileMode volatile_mode_from_string(const char *s) { return _VOLATILE_MODE_INVALID; } + +int query_volatile_mode(VolatileMode *ret) { + _cleanup_free_ char *mode = NULL; + VolatileMode m = VOLATILE_NO; + int r; + + r = proc_cmdline_get_key("systemd.volatile", PROC_CMDLINE_VALUE_OPTIONAL, &mode); + if (r < 0) + return r; + if (r == 0) + goto finish; + + if (mode) { + m = volatile_mode_from_string(mode); + if (m < 0) + return -EINVAL; + } else + m = VOLATILE_YES; + + r = 1; + +finish: + *ret = m; + return r; +} diff --git a/src/shared/volatile-util.h b/src/shared/volatile-util.h index d012940c76..17930ba6ae 100644 --- a/src/shared/volatile-util.h +++ b/src/shared/volatile-util.h @@ -28,3 +28,5 @@ typedef enum VolatileMode { } VolatileMode; VolatileMode volatile_mode_from_string(const char *s); + +int query_volatile_mode(VolatileMode *ret); diff --git a/src/volatile-root/Makefile b/src/volatile-root/Makefile new file mode 120000 index 0000000000..d0b0e8e008 --- /dev/null +++ b/src/volatile-root/Makefile @@ -0,0 +1 @@ +../Makefile \ No newline at end of file diff --git a/src/volatile-root/volatile-root.c b/src/volatile-root/volatile-root.c new file mode 100644 index 0000000000..3c0b6fa1de --- /dev/null +++ b/src/volatile-root/volatile-root.c @@ -0,0 +1,157 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include + +#include "alloc-util.h" +#include "fs-util.h" +#include "mkdir.h" +#include "mount-util.h" +#include "stat-util.h" +#include "volatile-util.h" +#include "string-util.h" +#include "path-util.h" + +static int make_volatile(const char *path) { + _cleanup_free_ char *old_usr = NULL; + int r; + + r = path_is_mount_point(path, NULL, AT_SYMLINK_FOLLOW); + if (r < 0) + return log_error_errno(r, "Couldn't determine whether %s is a mount point: %m", path); + if (r == 0) { + log_error("%s is not a mount point.", path); + return -EINVAL; + } + + r = path_is_temporary_fs(path); + if (r < 0) + return log_error_errno(r, "Couldn't determine whether %s is a temporary file system: %m", path); + if (r > 0) { + log_info("%s already is a temporary file system.", path); + return 0; + } + + r = chase_symlinks("/usr", path, CHASE_PREFIX_ROOT, &old_usr); + if (r < 0) + return log_error_errno(r, "/usr not available in old root: %m"); + + r = mkdir_p("/run/systemd/volatile-sysroot", 0700); + if (r < 0) + return log_error_errno(r, "Couldn't generate volatile sysroot directory: %m"); + + r = mount_verbose(LOG_ERR, "tmpfs", "/run/systemd/volatile-sysroot", "tmpfs", MS_STRICTATIME, "mode=755"); + if (r < 0) + goto finish_rmdir; + + if (mkdir("/run/systemd/volatile-sysroot/usr", 0755) < 0) { + r = -errno; + goto finish_umount; + } + + r = mount_verbose(LOG_ERR, old_usr, "/run/systemd/volatile-sysroot/usr", NULL, MS_BIND|MS_REC, NULL); + if (r < 0) + goto finish_umount; + + r = bind_remount_recursive("/run/systemd/volatile-sysroot/usr", true, NULL); + if (r < 0) + goto finish_umount; + + r = umount_recursive(path, 0); + if (r < 0) { + log_error_errno(r, "Failed to unmount %s: %m", path); + goto finish_umount; + } + + if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) + log_warning_errno(errno, "Failed to remount %s MS_SLAVE|MS_REC: %m", path); + + r = mount_verbose(LOG_ERR, "/run/systemd/volatile-sysroot", path, NULL, MS_MOVE, NULL); + +finish_umount: + (void) umount_recursive("/run/systemd/volatile-sysroot", 0); + +finish_rmdir: + (void) rmdir("/run/systemd/volatile-sysroot"); + + return r; +} + +int main(int argc, char *argv[]) { + VolatileMode m = _VOLATILE_MODE_INVALID; + const char *path; + int r; + + log_set_target(LOG_TARGET_AUTO); + log_parse_environment(); + log_open(); + + if (argc > 3) { + log_error("Too many arguments. Expected directory and mode."); + r = -EINVAL; + goto finish; + } + + r = query_volatile_mode(&m); + if (r < 0) { + log_error_errno(r, "Failed to determine volatile mode from kernel command line."); + goto finish; + } + if (r == 0 && argc >= 2) { + /* The kernel command line always wins. However if nothing was set there, the argument passed here wins instead. */ + m = volatile_mode_from_string(argv[1]); + if (m < 0) { + log_error("Couldn't parse volatile mode: %s", argv[1]); + r = -EINVAL; + goto finish; + } + } + + if (argc < 3) + path = "/sysroot"; + else { + path = argv[2]; + + if (isempty(path)) { + log_error("Directory name cannot be empty."); + r = -EINVAL; + goto finish; + } + if (!path_is_absolute(path)) { + log_error("Directory must be specified as absolute path."); + r = -EINVAL; + goto finish; + } + if (path_equal(path, "/")) { + log_error("Directory cannot be the root directory."); + r = -EINVAL; + goto finish; + } + } + + if (m != VOLATILE_YES) { + r = 0; + goto finish; + } + + r = make_volatile(path); + +finish: + return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/units/.gitignore b/units/.gitignore index 8fdb6e9ab5..4398a59f91 100644 --- a/units/.gitignore +++ b/units/.gitignore @@ -75,5 +75,6 @@ /systemd-update-utmp.service /systemd-user-sessions.service /systemd-vconsole-setup.service +/systemd-volatile-root.service /tmp.mount /user@.service diff --git a/units/systemd-volatile-root.service.in b/units/systemd-volatile-root.service.in new file mode 100644 index 0000000000..cc4e604e4c --- /dev/null +++ b/units/systemd-volatile-root.service.in @@ -0,0 +1,21 @@ +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. + +[Unit] +Description=Enforce Volatile Root File Systems +Documentation=man:systemd-volatile-root.service(8) +DefaultDependencies=no +Conflicts=shutdown.target +After=sysroot.mount +Before=initrd-root-fs.target shutdown.target +Conflicts=shutdown.target +AssertPathExists=/etc/initrd-release + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=@rootlibexecdir@/systemd-volatile-root yes /sysroot -- cgit v1.2.3-54-g00ecf From be30ad41ffd796ba40a5026b4963c17f04cc334d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 15 Dec 2016 17:17:43 +0100 Subject: dissect: return the GPT partition UUID, too This is useful as we can match up the EFI UUID with the one the firmware supposedly used. --- src/dissect/dissect.c | 3 +++ src/shared/dissect-image.c | 4 ++++ src/shared/dissect-image.h | 1 + 3 files changed, 8 insertions(+) (limited to 'src/shared') diff --git a/src/dissect/dissect.c b/src/dissect/dissect.c index aa06894037..78ec88fa35 100644 --- a/src/dissect/dissect.c +++ b/src/dissect/dissect.c @@ -221,6 +221,9 @@ int main(int argc, char *argv[]) { p->rw ? "writable" : "read-only", partition_designator_to_string(i)); + if (!sd_id128_is_null(p->uuid)) + printf(" (UUID " SD_ID128_FORMAT_STR ")", SD_ID128_FORMAT_VAL(p->uuid)); + if (p->fstype) printf(" of type %s", p->fstype); diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c index 10d53eab45..5b6e78dd3d 100644 --- a/src/shared/dissect-image.c +++ b/src/shared/dissect-image.c @@ -95,6 +95,7 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectI _cleanup_blkid_free_probe_ blkid_probe b = NULL; _cleanup_udev_unref_ struct udev *udev = NULL; _cleanup_free_ char *generic_node = NULL; + sd_id128_t generic_uuid = SD_ID128_NULL; const char *pttype = NULL; struct udev_list_entry *first, *item; blkid_partlist pl; @@ -427,6 +428,7 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectI else { generic_nr = nr; generic_rw = !(pflags & GPT_FLAG_READ_ONLY); + generic_uuid = id; generic_node = strdup(node); if (!generic_node) return -ENOMEM; @@ -457,6 +459,7 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectI .architecture = architecture, .node = n, .fstype = t, + .uuid = id, }; n = t = NULL; @@ -507,6 +510,7 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectI .partno = generic_nr, .architecture = _ARCHITECTURE_INVALID, .node = generic_node, + .uuid = generic_uuid, }; generic_node = NULL; diff --git a/src/shared/dissect-image.h b/src/shared/dissect-image.h index b424dac665..76104e5780 100644 --- a/src/shared/dissect-image.h +++ b/src/shared/dissect-image.h @@ -32,6 +32,7 @@ struct DissectedPartition { bool rw:1; int partno; /* -1 if there was no partition and the images contains a file system directly */ int architecture; /* Intended architecture: either native, secondary or unset (-1). */ + sd_id128_t uuid; /* Partition entry UUID as reported by the GPT */ char *fstype; char *node; char *decrypted_node; -- cgit v1.2.3-54-g00ecf From e0f9e7bd03eebeb8fdfce7f766c4e254e3586dc8 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 15 Dec 2016 17:38:11 +0100 Subject: dissect: make using a generic partition as root partition optional In preparation for reusing the image dissector in the GPT auto-discovery logic, only optionally fail the dissection when we can't identify a root partition. In the GPT auto-discovery we are completely fine with any kind of root, given that we run when it is already mounted and all we do is find some additional auxiliary partitions on the same disk. --- src/dissect/dissect.c | 4 ++-- src/machine/image-dbus.c | 2 +- src/nspawn/nspawn.c | 6 +++++- src/shared/dissect-image.c | 25 +++++++++++++++++-------- src/shared/dissect-image.h | 1 + src/test/test-dissect-image.c | 2 +- 6 files changed, 27 insertions(+), 13 deletions(-) (limited to 'src/shared') diff --git a/src/dissect/dissect.c b/src/dissect/dissect.c index 78ec88fa35..fd9db5ba87 100644 --- a/src/dissect/dissect.c +++ b/src/dissect/dissect.c @@ -35,7 +35,7 @@ static enum { } arg_action = ACTION_DISSECT; static const char *arg_image = NULL; static const char *arg_path = NULL; -static DissectImageFlags arg_flags = DISSECT_IMAGE_DISCARD_ON_LOOP; +static DissectImageFlags arg_flags = DISSECT_IMAGE_REQUIRE_ROOT|DISSECT_IMAGE_DISCARD_ON_LOOP; static void *arg_root_hash = NULL; static size_t arg_root_hash_size = 0; @@ -191,7 +191,7 @@ int main(int argc, char *argv[]) { goto finish; } - r = dissect_image(d->fd, arg_root_hash, arg_root_hash_size, 0, &m); + r = dissect_image(d->fd, arg_root_hash, arg_root_hash_size, arg_flags, &m); if (r == -ENOPKG) { log_error_errno(r, "Couldn't identify a suitable partition table or file system in %s.", arg_image); goto finish; diff --git a/src/machine/image-dbus.c b/src/machine/image-dbus.c index 2b168b267b..1891f07586 100644 --- a/src/machine/image-dbus.c +++ b/src/machine/image-dbus.c @@ -336,7 +336,7 @@ static int raw_image_get_os_release(Image *image, char ***ret, sd_bus_error *err if (r < 0) return sd_bus_error_set_errnof(error, r, "Failed to set up loop block device for %s: %m", image->path); - r = dissect_image(d->fd, NULL, 0, 0, &m); + r = dissect_image(d->fd, NULL, 0, DISSECT_IMAGE_REQUIRE_ROOT, &m); if (r == -ENOPKG) return sd_bus_error_set_errnof(error, r, "Disk image %s not understood: %m", image->path); if (r < 0) diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 01d89df1a4..224d30fca6 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -3743,7 +3743,11 @@ int main(int argc, char *argv[]) { goto finish; } - r = dissect_image(loop->fd, arg_root_hash, arg_root_hash_size, 0, &dissected_image); + r = dissect_image( + loop->fd, + arg_root_hash, arg_root_hash_size, + DISSECT_IMAGE_REQUIRE_ROOT, + &dissected_image); if (r == -ENOPKG) { log_error_errno(r, "Could not find a suitable file system or partition table in image: %s", arg_image); diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c index 5b6e78dd3d..878cb008aa 100644 --- a/src/shared/dissect-image.c +++ b/src/shared/dissect-image.c @@ -174,7 +174,8 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectI if (!m) return -ENOMEM; - if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) { + if (!(flags & DISSECT_IMAGE_GPT_ONLY) && + (flags & DISSECT_IMAGE_REQUIRE_ROOT)) { const char *usage = NULL; (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL); @@ -490,7 +491,7 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectI * either, then check if there's a single generic one, and use that. */ if (m->partitions[PARTITION_ROOT_VERITY].found) - return -ENXIO; + return -EADDRNOTAVAIL; if (m->partitions[PARTITION_ROOT_SECONDARY].found) { m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY]; @@ -499,8 +500,19 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectI m->partitions[PARTITION_ROOT_VERITY] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY]; zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY]); - } else if (generic_node && !root_hash) { + } else if (flags & DISSECT_IMAGE_REQUIRE_ROOT) { + + /* If the root has was set, then we won't fallback to a generic node, because the root hash + * decides */ + if (root_hash) + return -EADDRNOTAVAIL; + /* If we didn't find a generic node, then we can't fix this up either */ + if (!generic_node) + return -ENXIO; + + /* If we didn't find a properly marked root partition, but we did find a single suitable + * generic Linux partition, then use this as root partition, if the caller asked for it. */ if (multiple_generic) return -ENOTUNIQ; @@ -514,14 +526,11 @@ int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectI }; generic_node = NULL; - } else - return -ENXIO; + } } - assert(m->partitions[PARTITION_ROOT].found); - if (root_hash) { - if (!m->partitions[PARTITION_ROOT_VERITY].found) + if (!m->partitions[PARTITION_ROOT_VERITY].found || !m->partitions[PARTITION_ROOT].found) return -EADDRNOTAVAIL; /* If we found the primary root with the hash, then we definitely want to suppress any secondary root diff --git a/src/shared/dissect-image.h b/src/shared/dissect-image.h index 76104e5780..26319bd8e7 100644 --- a/src/shared/dissect-image.h +++ b/src/shared/dissect-image.h @@ -69,6 +69,7 @@ typedef enum DissectImageFlags { DISSECT_IMAGE_DISCARD | DISSECT_IMAGE_DISCARD_ON_CRYPTO, DISSECT_IMAGE_GPT_ONLY = 16, /* Only recognize images with GPT partition tables */ + DISSECT_IMAGE_REQUIRE_ROOT = 32, /* Don't accept disks without root partition */ } DissectImageFlags; struct DissectedImage { diff --git a/src/test/test-dissect-image.c b/src/test/test-dissect-image.c index ddaf3a0d8b..2bb68be0db 100644 --- a/src/test/test-dissect-image.c +++ b/src/test/test-dissect-image.c @@ -43,7 +43,7 @@ int main(int argc, char *argv[]) { return EXIT_FAILURE; } - r = dissect_image(d->fd, NULL, 0, 0, &m); + r = dissect_image(d->fd, NULL, 0, DISSECT_IMAGE_REQUIRE_ROOT, &m); if (r < 0) { log_error_errno(r, "Failed to dissect image: %m"); return EXIT_FAILURE; -- cgit v1.2.3-54-g00ecf From bafbac4e85a5eefd4b57a5cd0eb61885fb60edc9 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 21 Dec 2016 12:24:11 +0100 Subject: machined: when renaming/removing/cloning images, always take care of .roothash file too Since nspawn looks for them, importd now downloads them, and mkosi generates them, let's make sure they also processed correctly on all machined operations. --- src/shared/machine-image.c | 49 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 7 deletions(-) (limited to 'src/shared') diff --git a/src/shared/machine-image.c b/src/shared/machine-image.c index 712aff65b9..7bc5c0a128 100644 --- a/src/shared/machine-image.c +++ b/src/shared/machine-image.c @@ -99,6 +99,16 @@ static char **image_settings_path(Image *image) { return ret; } +static char *image_roothash_path(Image *image) { + const char *fn; + + assert(image); + + fn = strjoina(image->name, ".roothash"); + + return file_in_same_dir(image->path, fn); +} + static int image_new( ImageType t, const char *pretty, @@ -397,6 +407,7 @@ void image_hashmap_free(Hashmap *map) { int image_remove(Image *i) { _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT; _cleanup_strv_free_ char **settings = NULL; + _cleanup_free_ char *roothash = NULL; char **j; int r; @@ -409,6 +420,10 @@ int image_remove(Image *i) { if (!settings) return -ENOMEM; + roothash = image_roothash_path(i); + if (!roothash) + return -ENOMEM; + /* Make sure we don't interfere with a running nspawn */ r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock); if (r < 0) @@ -445,14 +460,17 @@ int image_remove(Image *i) { log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j); } + if (unlink(roothash) < 0 && errno != ENOENT) + log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", roothash); + return 0; } -static int rename_settings_file(const char *path, const char *new_name) { +static int rename_auxiliary_file(const char *path, const char *new_name, const char *suffix) { _cleanup_free_ char *rs = NULL; const char *fn; - fn = strjoina(new_name, ".nspawn"); + fn = strjoina(new_name, suffix); rs = file_in_same_dir(path, fn); if (!rs) @@ -463,7 +481,7 @@ static int rename_settings_file(const char *path, const char *new_name) { int image_rename(Image *i, const char *new_name) { _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT; - _cleanup_free_ char *new_path = NULL, *nn = NULL; + _cleanup_free_ char *new_path = NULL, *nn = NULL, *roothash = NULL; _cleanup_strv_free_ char **settings = NULL; unsigned file_attr = 0; char **j; @@ -481,6 +499,10 @@ int image_rename(Image *i, const char *new_name) { if (!settings) return -ENOMEM; + roothash = image_roothash_path(i); + if (!roothash) + return -ENOMEM; + /* Make sure we don't interfere with a running nspawn */ r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock); if (r < 0) @@ -550,19 +572,23 @@ int image_rename(Image *i, const char *new_name) { nn = NULL; STRV_FOREACH(j, settings) { - r = rename_settings_file(*j, new_name); + r = rename_auxiliary_file(*j, new_name, ".nspawn"); if (r < 0 && r != -ENOENT) log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j); } + r = rename_auxiliary_file(roothash, new_name, ".roothash"); + if (r < 0 && r != -ENOENT) + log_debug_errno(r, "Failed to rename roothash file %s, ignoring: %m", roothash); + return 0; } -static int clone_settings_file(const char *path, const char *new_name) { +static int clone_auxiliary_file(const char *path, const char *new_name, const char *suffix) { _cleanup_free_ char *rs = NULL; const char *fn; - fn = strjoina(new_name, ".nspawn"); + fn = strjoina(new_name, suffix); rs = file_in_same_dir(path, fn); if (!rs) @@ -574,6 +600,7 @@ static int clone_settings_file(const char *path, const char *new_name) { int image_clone(Image *i, const char *new_name, bool read_only) { _cleanup_release_lock_file_ LockFile name_lock = LOCK_FILE_INIT; _cleanup_strv_free_ char **settings = NULL; + _cleanup_free_ char *roothash = NULL; const char *new_path; char **j; int r; @@ -587,6 +614,10 @@ int image_clone(Image *i, const char *new_name, bool read_only) { if (!settings) return -ENOMEM; + roothash = image_roothash_path(i); + if (!roothash) + return -ENOMEM; + /* Make sure nobody takes the new name, between the time we * checked it is currently unused in all search paths, and the * time we take possession of it */ @@ -636,11 +667,15 @@ int image_clone(Image *i, const char *new_name, bool read_only) { return r; STRV_FOREACH(j, settings) { - r = clone_settings_file(*j, new_name); + r = clone_auxiliary_file(*j, new_name, ".nspawn"); if (r < 0 && r != -ENOENT) log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j); } + r = clone_auxiliary_file(roothash, new_name, ".roothash"); + if (r < 0 && r != -ENOENT) + log_debug_errno(r, "Failed to clone root hash file %s, ignoring: %m", roothash); + return 0; } -- cgit v1.2.3-54-g00ecf