summaryrefslogtreecommitdiff
path: root/src/shared/dissect-image.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/shared/dissect-image.c')
-rw-r--r--src/shared/dissect-image.c548
1 files changed, 548 insertions, 0 deletions
diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c
new file mode 100644
index 0000000000..7b65daa0eb
--- /dev/null
+++ b/src/shared/dissect-image.c
@@ -0,0 +1,548 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2016 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/mount.h>
+
+#include "architecture.h"
+#include "blkid-util.h"
+#include "dissect-image.h"
+#include "gpt.h"
+#include "mount-util.h"
+#include "path-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "udev-util.h"
+
+int dissect_image(int fd, DissectedImage **ret) {
+
+#ifdef HAVE_BLKID
+ _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL;
+ bool is_gpt, is_mbr, generic_rw, multiple_generic = false;
+ _cleanup_udev_device_unref_ struct udev_device *d = NULL;
+ _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
+ _cleanup_blkid_free_probe_ blkid_probe b = NULL;
+ _cleanup_udev_unref_ struct udev *udev = NULL;
+ _cleanup_free_ char *generic_node = NULL;
+ const char *pttype = NULL, *usage = NULL;
+ struct udev_list_entry *first, *item;
+ blkid_partlist pl;
+ int r, generic_nr;
+ struct stat st;
+ unsigned i;
+
+ assert(fd >= 0);
+ assert(ret);
+
+ /* Probes a disk image, and returns information about what it found in *ret.
+ *
+ * Returns -ENOPKG if no suitable partition table or file system could be found. */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISBLK(st.st_mode))
+ return -ENOTBLK;
+
+ b = blkid_new_probe();
+ if (!b)
+ return -ENOMEM;
+
+ errno = 0;
+ r = blkid_probe_set_device(b, fd, 0, 0);
+ if (r != 0) {
+ if (errno == 0)
+ return -ENOMEM;
+
+ return -errno;
+ }
+
+ blkid_probe_enable_superblocks(b, 1);
+ blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE);
+ blkid_probe_enable_partitions(b, 1);
+ blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
+
+ errno = 0;
+ r = blkid_do_safeprobe(b);
+ if (r == -2 || r == 1) {
+ log_debug("Failed to identify any partition table.");
+ return -ENOPKG;
+ }
+ if (r != 0) {
+ if (errno == 0)
+ return -EIO;
+
+ return -errno;
+ }
+
+ m = new0(DissectedImage, 1);
+ if (!m)
+ return -ENOMEM;
+
+ (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL);
+ if (streq_ptr(usage, "filesystem")) {
+ _cleanup_free_ char *t = NULL, *n = NULL;
+ const char *fstype = NULL;
+
+ /* OK, we have found a file system, that's our root partition then. */
+ (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
+
+ if (fstype) {
+ t = strdup(fstype);
+ if (!t)
+ return -ENOMEM;
+ }
+
+ if (asprintf(&n, "/dev/block/%u:%u", major(st.st_rdev), minor(st.st_rdev)) < 0)
+ return -ENOMEM;
+
+ m->partitions[PARTITION_ROOT] = (DissectedPartition) {
+ .found = true,
+ .rw = true,
+ .partno = -1,
+ .architecture = _ARCHITECTURE_INVALID,
+ .fstype = t,
+ .node = n,
+ };
+
+ t = n = NULL;
+
+ *ret = m;
+ m = NULL;
+
+ return 0;
+ }
+
+ (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
+ if (!pttype)
+ return -ENOPKG;
+
+ is_gpt = streq_ptr(pttype, "gpt");
+ is_mbr = streq_ptr(pttype, "dos");
+
+ if (!is_gpt && !is_mbr)
+ return -ENOPKG;
+
+ errno = 0;
+ pl = blkid_probe_get_partitions(b);
+ if (!pl) {
+ if (errno == 0)
+ return -ENOMEM;
+
+ return -errno;
+ }
+
+ udev = udev_new();
+ if (!udev)
+ return -errno;
+
+ d = udev_device_new_from_devnum(udev, 'b', st.st_rdev);
+ if (!d)
+ return -ENOMEM;
+
+ for (i = 0;; i++) {
+ int n, z;
+
+ if (i >= 10) {
+ log_debug("Kernel partitions never appeared.");
+ return -ENXIO;
+ }
+
+ e = udev_enumerate_new(udev);
+ if (!e)
+ return -errno;
+
+ r = udev_enumerate_add_match_parent(e, d);
+ if (r < 0)
+ return r;
+
+ r = udev_enumerate_scan_devices(e);
+ if (r < 0)
+ return r;
+
+ /* Count the partitions enumerated by the kernel */
+ n = 0;
+ first = udev_enumerate_get_list_entry(e);
+ udev_list_entry_foreach(item, first)
+ n++;
+
+ /* Count the partitions enumerated by blkid */
+ z = blkid_partlist_numof_partitions(pl);
+ if (n == z + 1)
+ break;
+ if (n > z + 1) {
+ log_debug("blkid and kernel partition list do not match.");
+ return -EIO;
+ }
+ if (n < z + 1) {
+ unsigned j;
+
+ /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running
+ * or it got EBUSY because udev already opened the device. Let's reprobe the device, which is a
+ * synchronous call that waits until probing is complete. */
+
+ for (j = 0; j < 20; j++) {
+
+ r = ioctl(fd, BLKRRPART, 0);
+ if (r < 0)
+ r = -errno;
+ if (r >= 0 || r != -EBUSY)
+ break;
+
+ /* If something else has the device open, such as an udev rule, the ioctl will return
+ * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a
+ * bit, and try again.
+ *
+ * This is really something they should fix in the kernel! */
+
+ usleep(50 * USEC_PER_MSEC);
+ }
+
+ if (r < 0)
+ return r;
+ }
+
+ e = udev_enumerate_unref(e);
+ }
+
+ first = udev_enumerate_get_list_entry(e);
+ udev_list_entry_foreach(item, first) {
+ _cleanup_udev_device_unref_ struct udev_device *q;
+ unsigned long long flags;
+ blkid_partition pp;
+ const char *node;
+ dev_t qn;
+ int nr;
+
+ q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
+ if (!q)
+ return -errno;
+
+ qn = udev_device_get_devnum(q);
+ if (major(qn) == 0)
+ continue;
+
+ if (st.st_rdev == qn)
+ continue;
+
+ node = udev_device_get_devnode(q);
+ if (!node)
+ continue;
+
+ pp = blkid_partlist_devno_to_partition(pl, qn);
+ if (!pp)
+ continue;
+
+ flags = blkid_partition_get_flags(pp);
+
+ nr = blkid_partition_get_partno(pp);
+ if (nr < 0)
+ continue;
+
+ if (is_gpt) {
+ int designator = _PARTITION_DESIGNATOR_INVALID, architecture = _ARCHITECTURE_INVALID;
+ const char *stype, *fstype = NULL;
+ sd_id128_t type_id;
+ bool rw = true;
+
+ if (flags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ stype = blkid_partition_get_type_string(pp);
+ if (!stype)
+ continue;
+
+ if (sd_id128_from_string(stype, &type_id) < 0)
+ continue;
+
+ if (sd_id128_equal(type_id, GPT_HOME)) {
+ designator = PARTITION_HOME;
+ rw = !(flags & GPT_FLAG_READ_ONLY);
+ } else if (sd_id128_equal(type_id, GPT_SRV)) {
+ designator = PARTITION_SRV;
+ rw = !(flags & GPT_FLAG_READ_ONLY);
+ } else if (sd_id128_equal(type_id, GPT_ESP)) {
+ designator = PARTITION_ESP;
+ fstype = "vfat";
+ }
+#ifdef GPT_ROOT_NATIVE
+ else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
+ designator = PARTITION_ROOT;
+ architecture = native_architecture();
+ rw = !(flags & GPT_FLAG_READ_ONLY);
+ }
+#endif
+#ifdef GPT_ROOT_SECONDARY
+ else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
+ designator = PARTITION_ROOT_SECONDARY;
+ architecture = SECONDARY_ARCHITECTURE;
+ rw = !(flags & GPT_FLAG_READ_ONLY);
+ }
+#endif
+ else if (sd_id128_equal(type_id, GPT_SWAP)) {
+ designator = PARTITION_SWAP;
+ fstype = "swap";
+ } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) {
+
+ if (generic_node)
+ multiple_generic = true;
+ else {
+ generic_nr = nr;
+ generic_rw = !(flags & GPT_FLAG_READ_ONLY);
+ generic_node = strdup(node);
+ if (!generic_node)
+ return -ENOMEM;
+ }
+ }
+
+ if (designator != _PARTITION_DESIGNATOR_INVALID) {
+ _cleanup_free_ char *t = NULL, *n = NULL;
+
+ /* First one wins */
+ if (m->partitions[designator].found)
+ continue;
+
+ if (fstype) {
+ t = strdup(fstype);
+ if (!t)
+ return -ENOMEM;
+ }
+
+ n = strdup(node);
+ if (!n)
+ return -ENOMEM;
+
+ m->partitions[designator] = (DissectedPartition) {
+ .found = true,
+ .partno = nr,
+ .rw = rw,
+ .architecture = architecture,
+ .node = n,
+ .fstype = t,
+ };
+
+ n = t = NULL;
+ }
+
+ } else if (is_mbr) {
+
+ if (flags != 0x80) /* Bootable flag */
+ continue;
+
+ if (blkid_partition_get_type(pp) != 0x83) /* Linux partition */
+ continue;
+
+ if (generic_node)
+ multiple_generic = true;
+ else {
+ generic_nr = nr;
+ generic_rw = true;
+ generic_node = strdup(node);
+ if (!generic_node)
+ return -ENOMEM;
+ }
+ }
+ }
+
+ if (!m->partitions[PARTITION_ROOT].found) {
+ /* No root partition found? Then let's see if ther's one for the secondary architecture. And if not
+ * either, then check if there's a single generic one, and use that. */
+
+ if (m->partitions[PARTITION_ROOT_SECONDARY].found) {
+ m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY];
+ zero(m->partitions[PARTITION_ROOT_SECONDARY]);
+ } else if (generic_node) {
+
+ if (multiple_generic)
+ return -ENOTUNIQ;
+
+ m->partitions[PARTITION_ROOT] = (DissectedPartition) {
+ .found = true,
+ .rw = generic_rw,
+ .partno = generic_nr,
+ .architecture = _ARCHITECTURE_INVALID,
+ .node = generic_node,
+ };
+
+ generic_node = NULL;
+ } else
+ return -ENXIO;
+ }
+
+ /* Fill in file system types if we don't know them yet. */
+ for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
+ const char *fstype;
+
+ if (!m->partitions[i].found) /* not found? */
+ continue;
+
+ if (m->partitions[i].fstype) /* already know the type? */
+ continue;
+
+ if (!m->partitions[i].node) /* have no device node for? */
+ continue;
+
+ if (b)
+ blkid_free_probe(b);
+
+ b = blkid_new_probe_from_filename(m->partitions[i].node);
+ if (!b)
+ return -ENOMEM;
+
+ blkid_probe_enable_superblocks(b, 1);
+ blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
+
+ errno = 0;
+ r = blkid_do_safeprobe(b);
+ if (r == -2 || r == 1) {
+ log_debug("Failed to identify any partition type on partition %i", m->partitions[i].partno);
+ continue;
+ }
+ if (r != 0) {
+ if (errno == 0)
+ return -EIO;
+
+ return -errno;
+ }
+
+ (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
+ if (fstype) {
+ char *t;
+
+ t = strdup(fstype);
+ if (!t)
+ return -ENOMEM;
+
+ m->partitions[i].fstype = t;
+ }
+ }
+
+ *ret = m;
+ m = NULL;
+
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+DissectedImage* dissected_image_unref(DissectedImage *m) {
+ unsigned i;
+
+ if (!m)
+ return NULL;
+
+ for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
+ free(m->partitions[i].fstype);
+ free(m->partitions[i].node);
+ }
+
+ free(m);
+ return NULL;
+}
+
+static int mount_partition(DissectedPartition *m, const char *where, const char *directory, DissectedImageMountFlags flags) {
+ const char *p, *options = NULL;
+ bool rw;
+
+ assert(m);
+ assert(where);
+
+ if (!m->found || !m->node || !m->fstype)
+ return 0;
+
+ rw = m->rw && !(flags & DISSECTED_IMAGE_READ_ONLY);
+
+ if (directory)
+ p = strjoina(where, directory);
+ else
+ p = where;
+
+ /* Not supported for now. */
+ if (streq(m->fstype, "crypto_LUKS"))
+ return -EOPNOTSUPP;
+
+ /* If this is a loopback device then let's mount the image with discard, so that the underlying file remains
+ * sparse when possible. */
+ if ((flags & DISSECTED_IMAGE_DISCARD_ON_LOOP) &&
+ STR_IN_SET(m->fstype, "btrfs", "ext4", "vfat", "xfs")) {
+ const char *l;
+
+ l = path_startswith(m->node, "/dev");
+ if (l && startswith(l, "loop"))
+ options = "discard";
+ }
+
+ return mount_verbose(LOG_DEBUG, m->node, p, m->fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
+}
+
+int dissected_image_mount(DissectedImage *m, const char *where, DissectedImageMountFlags flags) {
+ int r;
+
+ assert(m);
+ assert(where);
+
+ if (!m->partitions[PARTITION_ROOT].found)
+ return -ENXIO;
+
+ r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, flags);
+ if (r < 0)
+ return r;
+
+ r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", flags);
+ if (r < 0)
+ return r;
+
+ r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", flags);
+ if (r < 0)
+ return r;
+
+ if (m->partitions[PARTITION_ESP].found) {
+ const char *mp, *x;
+
+ /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */
+
+ mp = "/efi";
+ x = strjoina(where, mp);
+ r = dir_is_empty(x);
+ if (r == -ENOENT) {
+ mp = "/boot";
+ x = strjoina(where, mp);
+ r = dir_is_empty(x);
+ }
+ if (r > 0) {
+ r = mount_partition(m->partitions + PARTITION_ESP, where, mp, flags);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static const char *const partition_designator_table[] = {
+ [PARTITION_ROOT] = "root",
+ [PARTITION_ROOT_SECONDARY] = "root-secondary",
+ [PARTITION_HOME] = "home",
+ [PARTITION_SRV] = "srv",
+ [PARTITION_ESP] = "esp",
+ [PARTITION_SWAP] = "swap",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(partition_designator, int);