summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/nspawn/nspawn.c694
1 files changed, 38 insertions, 656 deletions
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index ddd6a64ec6..6ad20f7457 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -53,6 +53,7 @@
#include "cgroup-util.h"
#include "copy.h"
#include "dev-setup.h"
+#include "dissect-image.h"
#include "env-util.h"
#include "fd-util.h"
#include "fdset.h"
@@ -63,6 +64,7 @@
#include "hostname-util.h"
#include "id128-util.h"
#include "log.h"
+#include "loop-util.h"
#include "loopback-setup.h"
#include "machine-image.h"
#include "macro.h"
@@ -1769,546 +1771,6 @@ static int setup_propagate(const char *root) {
return mount_verbose(LOG_ERR, NULL, q, NULL, MS_SLAVE, NULL);
}
-static int setup_image(char **device_path, int *loop_nr) {
- struct loop_info64 info = {
- .lo_flags = LO_FLAGS_AUTOCLEAR|LO_FLAGS_PARTSCAN
- };
- _cleanup_close_ int fd = -1, control = -1, loop = -1;
- _cleanup_free_ char* loopdev = NULL;
- struct stat st;
- int r, nr;
-
- assert(device_path);
- assert(loop_nr);
- assert(arg_image);
-
- fd = open(arg_image, O_CLOEXEC|(arg_read_only ? O_RDONLY : O_RDWR)|O_NONBLOCK|O_NOCTTY);
- if (fd < 0)
- return log_error_errno(errno, "Failed to open %s: %m", arg_image);
-
- if (fstat(fd, &st) < 0)
- return log_error_errno(errno, "Failed to stat %s: %m", arg_image);
-
- if (S_ISBLK(st.st_mode)) {
- char *p;
-
- p = strdup(arg_image);
- if (!p)
- return log_oom();
-
- *device_path = p;
-
- *loop_nr = -1;
-
- r = fd;
- fd = -1;
-
- return r;
- }
-
- if (!S_ISREG(st.st_mode)) {
- log_error("%s is not a regular file or block device.", arg_image);
- return -EINVAL;
- }
-
- control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
- if (control < 0)
- return log_error_errno(errno, "Failed to open /dev/loop-control: %m");
-
- nr = ioctl(control, LOOP_CTL_GET_FREE);
- if (nr < 0)
- return log_error_errno(errno, "Failed to allocate loop device: %m");
-
- if (asprintf(&loopdev, "/dev/loop%i", nr) < 0)
- return log_oom();
-
- loop = open(loopdev, O_CLOEXEC|(arg_read_only ? O_RDONLY : O_RDWR)|O_NONBLOCK|O_NOCTTY);
- if (loop < 0)
- return log_error_errno(errno, "Failed to open loop device %s: %m", loopdev);
-
- if (ioctl(loop, LOOP_SET_FD, fd) < 0)
- return log_error_errno(errno, "Failed to set loopback file descriptor on %s: %m", loopdev);
-
- if (arg_read_only)
- info.lo_flags |= LO_FLAGS_READ_ONLY;
-
- if (ioctl(loop, LOOP_SET_STATUS64, &info) < 0)
- return log_error_errno(errno, "Failed to set loopback settings on %s: %m", loopdev);
-
- *device_path = loopdev;
- loopdev = NULL;
-
- *loop_nr = nr;
-
- r = loop;
- loop = -1;
-
- return r;
-}
-
-#define PARTITION_TABLE_BLURB \
- "Note that the disk image needs to either contain only a single MBR partition of\n" \
- "type 0x83 that is marked bootable, or a single GPT partition of type " \
- "0FC63DAF-8483-4772-8E79-3D69D8477DE4 or follow\n" \
- " http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/\n" \
- "to be bootable with systemd-nspawn."
-
-static int dissect_image(
- int fd,
- char **root_device, bool *root_device_rw,
- char **home_device, bool *home_device_rw,
- char **srv_device, bool *srv_device_rw,
- char **esp_device,
- bool *secondary) {
-
-#ifdef HAVE_BLKID
- int home_nr = -1, srv_nr = -1, esp_nr = -1;
-#ifdef GPT_ROOT_NATIVE
- int root_nr = -1;
-#endif
-#ifdef GPT_ROOT_SECONDARY
- int secondary_root_nr = -1;
-#endif
- _cleanup_free_ char *home = NULL, *root = NULL, *secondary_root = NULL, *srv = NULL, *esp = NULL, *generic = NULL;
- _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL;
- _cleanup_udev_device_unref_ struct udev_device *d = NULL;
- _cleanup_blkid_free_probe_ blkid_probe b = NULL;
- _cleanup_udev_unref_ struct udev *udev = NULL;
- struct udev_list_entry *first, *item;
- bool home_rw = true, root_rw = true, secondary_root_rw = true, srv_rw = true, generic_rw = true;
- bool is_gpt, is_mbr, multiple_generic = false;
- const char *pttype = NULL;
- blkid_partlist pl;
- struct stat st;
- unsigned i;
- int r;
-
- assert(fd >= 0);
- assert(root_device);
- assert(home_device);
- assert(srv_device);
- assert(esp_device);
- assert(secondary);
- assert(arg_image);
-
- b = blkid_new_probe();
- if (!b)
- return log_oom();
-
- errno = 0;
- r = blkid_probe_set_device(b, fd, 0, 0);
- if (r != 0) {
- if (errno == 0)
- return log_oom();
-
- return log_error_errno(errno, "Failed to set device on blkid probe: %m");
- }
-
- blkid_probe_enable_partitions(b, 1);
- blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
-
- errno = 0;
- r = blkid_do_safeprobe(b);
- if (r == -2 || r == 1) {
- log_error("Failed to identify any partition table on\n"
- " %s\n"
- PARTITION_TABLE_BLURB, arg_image);
- return -EINVAL;
- } else if (r != 0) {
- if (errno == 0)
- errno = EIO;
- return log_error_errno(errno, "Failed to probe: %m");
- }
-
- (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
-
- is_gpt = streq_ptr(pttype, "gpt");
- is_mbr = streq_ptr(pttype, "dos");
-
- if (!is_gpt && !is_mbr) {
- log_error("No GPT or MBR partition table discovered on\n"
- " %s\n"
- PARTITION_TABLE_BLURB, arg_image);
- return -EINVAL;
- }
-
- errno = 0;
- pl = blkid_probe_get_partitions(b);
- if (!pl) {
- if (errno == 0)
- return log_oom();
-
- log_error("Failed to list partitions of %s", arg_image);
- return -errno;
- }
-
- udev = udev_new();
- if (!udev)
- return log_oom();
-
- if (fstat(fd, &st) < 0)
- return log_error_errno(errno, "Failed to stat block device: %m");
-
- d = udev_device_new_from_devnum(udev, 'b', st.st_rdev);
- if (!d)
- return log_oom();
-
- for (i = 0;; i++) {
- int n, m;
-
- if (i >= 10) {
- log_error("Kernel partitions never appeared.");
- return -ENXIO;
- }
-
- e = udev_enumerate_new(udev);
- if (!e)
- return log_oom();
-
- r = udev_enumerate_add_match_parent(e, d);
- if (r < 0)
- return log_oom();
-
- r = udev_enumerate_scan_devices(e);
- if (r < 0)
- return log_error_errno(r, "Failed to scan for partition devices of %s: %m", arg_image);
-
- /* Count the partitions enumerated by the kernel */
- n = 0;
- first = udev_enumerate_get_list_entry(e);
- udev_list_entry_foreach(item, first)
- n++;
-
- /* Count the partitions enumerated by blkid */
- m = blkid_partlist_numof_partitions(pl);
- if (n == m + 1)
- break;
- if (n > m + 1) {
- log_error("blkid and kernel partition list do not match.");
- return -EIO;
- }
- if (n < m + 1) {
- unsigned j;
-
- /* The kernel has probed fewer partitions than
- * blkid? Maybe the kernel prober is still
- * running or it got EBUSY because udev
- * already opened the device. Let's reprobe
- * the device, which is a synchronous call
- * that waits until probing is complete. */
-
- for (j = 0; j < 20; j++) {
-
- r = ioctl(fd, BLKRRPART, 0);
- if (r < 0)
- r = -errno;
- if (r >= 0 || r != -EBUSY)
- break;
-
- /* If something else has the device
- * open, such as an udev rule, the
- * ioctl will return EBUSY. Since
- * there's no way to wait until it
- * isn't busy anymore, let's just wait
- * a bit, and try again.
- *
- * This is really something they
- * should fix in the kernel! */
-
- usleep(50 * USEC_PER_MSEC);
- }
-
- if (r < 0)
- return log_error_errno(r, "Failed to reread partition table: %m");
- }
-
- e = udev_enumerate_unref(e);
- }
-
- first = udev_enumerate_get_list_entry(e);
- udev_list_entry_foreach(item, first) {
- _cleanup_udev_device_unref_ struct udev_device *q;
- const char *node;
- unsigned long long flags;
- blkid_partition pp;
- dev_t qn;
- int nr;
-
- errno = 0;
- q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
- if (!q) {
- if (!errno)
- errno = ENOMEM;
-
- return log_error_errno(errno, "Failed to get partition device of %s: %m", arg_image);
- }
-
- qn = udev_device_get_devnum(q);
- if (major(qn) == 0)
- continue;
-
- if (st.st_rdev == qn)
- continue;
-
- node = udev_device_get_devnode(q);
- if (!node)
- continue;
-
- pp = blkid_partlist_devno_to_partition(pl, qn);
- if (!pp)
- continue;
-
- flags = blkid_partition_get_flags(pp);
-
- nr = blkid_partition_get_partno(pp);
- if (nr < 0)
- continue;
-
- if (is_gpt) {
- sd_id128_t type_id;
- const char *stype;
-
- if (flags & GPT_FLAG_NO_AUTO)
- continue;
-
- stype = blkid_partition_get_type_string(pp);
- if (!stype)
- continue;
-
- if (sd_id128_from_string(stype, &type_id) < 0)
- continue;
-
- if (sd_id128_equal(type_id, GPT_HOME)) {
-
- if (home && nr >= home_nr)
- continue;
-
- home_nr = nr;
- home_rw = !(flags & GPT_FLAG_READ_ONLY);
-
- r = free_and_strdup(&home, node);
- if (r < 0)
- return log_oom();
-
- } else if (sd_id128_equal(type_id, GPT_SRV)) {
-
- if (srv && nr >= srv_nr)
- continue;
-
- srv_nr = nr;
- srv_rw = !(flags & GPT_FLAG_READ_ONLY);
-
- r = free_and_strdup(&srv, node);
- if (r < 0)
- return log_oom();
- } else if (sd_id128_equal(type_id, GPT_ESP)) {
-
- if (esp && nr >= esp_nr)
- continue;
-
- esp_nr = nr;
-
- r = free_and_strdup(&esp, node);
- if (r < 0)
- return log_oom();
- }
-#ifdef GPT_ROOT_NATIVE
- else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
-
- if (root && nr >= root_nr)
- continue;
-
- root_nr = nr;
- root_rw = !(flags & GPT_FLAG_READ_ONLY);
-
- r = free_and_strdup(&root, node);
- if (r < 0)
- return log_oom();
- }
-#endif
-#ifdef GPT_ROOT_SECONDARY
- else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
-
- if (secondary_root && nr >= secondary_root_nr)
- continue;
-
- secondary_root_nr = nr;
- secondary_root_rw = !(flags & GPT_FLAG_READ_ONLY);
-
- r = free_and_strdup(&secondary_root, node);
- if (r < 0)
- return log_oom();
- }
-#endif
- else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) {
-
- if (generic)
- multiple_generic = true;
- else {
- generic_rw = !(flags & GPT_FLAG_READ_ONLY);
-
- r = free_and_strdup(&generic, node);
- if (r < 0)
- return log_oom();
- }
- }
-
- } else if (is_mbr) {
- int type;
-
- if (flags != 0x80) /* Bootable flag */
- continue;
-
- type = blkid_partition_get_type(pp);
- if (type != 0x83) /* Linux partition */
- continue;
-
- if (generic)
- multiple_generic = true;
- else {
- generic_rw = true;
-
- r = free_and_strdup(&root, node);
- if (r < 0)
- return log_oom();
- }
- }
- }
-
- if (root) {
- *root_device = root;
- root = NULL;
-
- *root_device_rw = root_rw;
- *secondary = false;
- } else if (secondary_root) {
- *root_device = secondary_root;
- secondary_root = NULL;
-
- *root_device_rw = secondary_root_rw;
- *secondary = true;
- } else if (generic) {
-
- /* There were no partitions with precise meanings
- * around, but we found generic partitions. In this
- * case, if there's only one, we can go ahead and boot
- * it, otherwise we bail out, because we really cannot
- * make any sense of it. */
-
- if (multiple_generic) {
- log_error("Identified multiple bootable Linux partitions on\n"
- " %s\n"
- PARTITION_TABLE_BLURB, arg_image);
- return -EINVAL;
- }
-
- *root_device = generic;
- generic = NULL;
-
- *root_device_rw = generic_rw;
- *secondary = false;
- } else {
- log_error("Failed to identify root partition in disk image\n"
- " %s\n"
- PARTITION_TABLE_BLURB, arg_image);
- return -EINVAL;
- }
-
- if (home) {
- *home_device = home;
- home = NULL;
-
- *home_device_rw = home_rw;
- }
-
- if (srv) {
- *srv_device = srv;
- srv = NULL;
-
- *srv_device_rw = srv_rw;
- }
-
- if (esp) {
- *esp_device = esp;
- esp = NULL;
- }
-
- return 0;
-#else
- log_error("--image= is not supported, compiled without blkid support.");
- return -EOPNOTSUPP;
-#endif
-}
-
-static int mount_device(const char *what, const char *where, const char *directory, bool rw) {
-#ifdef HAVE_BLKID
- _cleanup_blkid_free_probe_ blkid_probe b = NULL;
- const char *fstype, *p, *options;
- int r;
-
- assert(what);
- assert(where);
-
- if (arg_read_only)
- rw = false;
-
- if (directory)
- p = strjoina(where, directory);
- else
- p = where;
-
- errno = 0;
- b = blkid_new_probe_from_filename(what);
- if (!b) {
- if (errno == 0)
- return log_oom();
- return log_error_errno(errno, "Failed to allocate prober for %s: %m", what);
- }
-
- blkid_probe_enable_superblocks(b, 1);
- blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
-
- errno = 0;
- r = blkid_do_safeprobe(b);
- if (r == -1 || r == 1) {
- log_error("Cannot determine file system type of %s", what);
- return -EINVAL;
- } else if (r != 0) {
- if (errno == 0)
- errno = EIO;
- return log_error_errno(errno, "Failed to probe %s: %m", what);
- }
-
- errno = 0;
- if (blkid_probe_lookup_value(b, "TYPE", &fstype, NULL) < 0) {
- if (errno == 0)
- errno = EINVAL;
- log_error("Failed to determine file system type of %s", what);
- return -errno;
- }
-
- if (streq(fstype, "crypto_LUKS")) {
- log_error("nspawn currently does not support LUKS disk images.");
- return -EOPNOTSUPP;
- }
-
- /* If this is a loopback device then let's mount the image with discard, so that the underlying file remains
- * sparse when possible. */
- if (STR_IN_SET(fstype, "btrfs", "ext4", "vfat", "xfs")) {
- const char *l;
-
- l = path_startswith(what, "/dev");
- if (l && startswith(l, "loop"))
- options = "discard";
- }
-
- return mount_verbose(LOG_ERR, what, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
-#else
- log_error("--image= is not supported, compiled without blkid support.");
- return -EOPNOTSUPP;
-#endif
-}
-
static int setup_machine_id(const char *directory) {
const char *etc_machine_id;
sd_id128_t id;
@@ -2368,83 +1830,6 @@ static int recursive_chown(const char *directory, uid_t shift, uid_t range) {
return r;
}
-static int mount_devices(
- const char *where,
- const char *root_device, bool root_device_rw,
- const char *home_device, bool home_device_rw,
- const char *srv_device, bool srv_device_rw,
- const char *esp_device) {
- int r;
-
- assert(where);
-
- if (root_device) {
- r = mount_device(root_device, arg_directory, NULL, root_device_rw);
- if (r < 0)
- return log_error_errno(r, "Failed to mount root directory: %m");
- }
-
- if (home_device) {
- r = mount_device(home_device, arg_directory, "/home", home_device_rw);
- if (r < 0)
- return log_error_errno(r, "Failed to mount home directory: %m");
- }
-
- if (srv_device) {
- r = mount_device(srv_device, arg_directory, "/srv", srv_device_rw);
- if (r < 0)
- return log_error_errno(r, "Failed to mount server data directory: %m");
- }
-
- if (esp_device) {
- const char *mp, *x;
-
- /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */
-
- mp = "/efi";
- x = strjoina(arg_directory, mp);
- r = dir_is_empty(x);
- if (r == -ENOENT) {
- mp = "/boot";
- x = strjoina(arg_directory, mp);
- r = dir_is_empty(x);
- }
-
- if (r > 0) {
- r = mount_device(esp_device, arg_directory, mp, true);
- if (r < 0)
- return log_error_errno(r, "Failed to mount ESP: %m");
- }
- }
-
- return 0;
-}
-
-static void loop_remove(int nr, int *image_fd) {
- _cleanup_close_ int control = -1;
- int r;
-
- if (nr < 0)
- return;
-
- if (image_fd && *image_fd >= 0) {
- r = ioctl(*image_fd, LOOP_CLR_FD);
- if (r < 0)
- log_debug_errno(errno, "Failed to close loop image: %m");
- *image_fd = safe_close(*image_fd);
- }
-
- control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
- if (control < 0) {
- log_warning_errno(errno, "Failed to open /dev/loop-control: %m");
- return;
- }
-
- r = ioctl(control, LOOP_CTL_REMOVE, nr);
- if (r < 0)
- log_debug_errno(errno, "Failed to remove loop %d: %m", nr);
-}
-
/*
* Return values:
* < 0 : wait_for_terminate() failed to get the state of the
@@ -2919,10 +2304,7 @@ static int outer_child(
Barrier *barrier,
const char *directory,
const char *console,
- const char *root_device, bool root_device_rw,
- const char *home_device, bool home_device_rw,
- const char *srv_device, bool srv_device_rw,
- const char *esp_device,
+ DissectedImage *dissected_image,
bool interactive,
bool secondary,
int pid_socket,
@@ -2982,13 +2364,11 @@ static int outer_child(
if (r < 0)
return r;
- r = mount_devices(directory,
- root_device, root_device_rw,
- home_device, home_device_rw,
- srv_device, srv_device_rw,
- esp_device);
- if (r < 0)
- return r;
+ if (dissected_image) {
+ r = dissected_image_mount(dissected_image, directory, DISSECTED_IMAGE_DISCARD_ON_LOOP|(arg_read_only ? DISSECTED_IMAGE_READ_ONLY : 0));
+ if (r < 0)
+ return r;
+ }
r = determine_uid_shift(directory);
if (r < 0)
@@ -3605,10 +2985,7 @@ static int load_settings(void) {
static int run(int master,
const char* console,
- const char *root_device, bool root_device_rw,
- const char *home_device, bool home_device_rw,
- const char *srv_device, bool srv_device_rw,
- const char *esp_device,
+ DissectedImage *dissected_image,
bool interactive,
bool secondary,
FDSet *fds,
@@ -3715,10 +3092,7 @@ static int run(int master,
r = outer_child(&barrier,
arg_directory,
console,
- root_device, root_device_rw,
- home_device, home_device_rw,
- srv_device, srv_device_rw,
- esp_device,
+ dissected_image,
interactive,
secondary,
pid_socket_pair[1],
@@ -4025,11 +3399,10 @@ static int run(int master,
int main(int argc, char *argv[]) {
- _cleanup_free_ char *device_path = NULL, *root_device = NULL, *home_device = NULL, *srv_device = NULL, *esp_device = NULL, *console = NULL;
- bool root_device_rw = true, home_device_rw = true, srv_device_rw = true;
- _cleanup_close_ int master = -1, image_fd = -1;
+ _cleanup_free_ char *console = NULL;
+ _cleanup_close_ int master = -1;
_cleanup_fdset_free_ FDSet *fds = NULL;
- int r, n_fd_passed, loop_nr = -1, ret = EXIT_SUCCESS;
+ int r, n_fd_passed, ret = EXIT_SUCCESS;
char veth_name[IFNAMSIZ] = "";
bool secondary = false, remove_directory = false, remove_image = false;
pid_t pid = 0;
@@ -4037,6 +3410,8 @@ int main(int argc, char *argv[]) {
_cleanup_release_lock_file_ LockFile tree_global_lock = LOCK_FILE_INIT, tree_local_lock = LOCK_FILE_INIT;
bool interactive, veth_created = false, remove_tmprootdir = false;
char tmprootdir[] = "/tmp/nspawn-root-XXXXXX";
+ _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
+ _cleanup_(loop_device_unrefp) LoopDevice *loop = NULL;
log_parse_environment();
log_open();
@@ -4250,20 +3625,32 @@ int main(int argc, char *argv[]) {
goto finish;
}
- image_fd = setup_image(&device_path, &loop_nr);
- if (image_fd < 0) {
- r = image_fd;
+ r = loop_device_make_by_path(arg_image, arg_read_only ? O_RDONLY : O_RDWR, &loop);
+ if (r < 0) {
+ log_error_errno(r, "Failed to set up loopback block device: %m");
goto finish;
}
- r = dissect_image(image_fd,
- &root_device, &root_device_rw,
- &home_device, &home_device_rw,
- &srv_device, &srv_device_rw,
- &esp_device,
- &secondary);
- if (r < 0)
+ r = dissect_image(loop->fd, &dissected_image);
+ if (r == -ENOPKG) {
+ log_error_errno(r, "Could not find a suitable file system or partition table in image: %s", arg_image);
+
+ log_notice("Note that the disk image needs to\n"
+ " a) either contain only a single MBR partition of type 0x83 that is marked bootable\n"
+ " b) or contain a single GPT partition of type 0FC63DAF-8483-4772-8E79-3D69D8477DE4\n"
+ " c) or follow http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/\n"
+ " d) or contain a file system without a partition table\n"
+ "in order to be bootable with systemd-nspawn.");
goto finish;
+ }
+ if (r == -EOPNOTSUPP) {
+ log_error_errno(r, "--image= is not supported, compiled without blkid support.");
+ goto finish;
+ }
+ if (r < 0) {
+ log_error_errno(r, "Failed to dissect image: %m");
+ goto finish;
+ }
/* Now that we mounted the image, let's try to remove it again, if it is ephemeral */
if (remove_image && unlink(arg_image) >= 0)
@@ -4315,10 +3702,7 @@ int main(int argc, char *argv[]) {
for (;;) {
r = run(master,
console,
- root_device, root_device_rw,
- home_device, home_device_rw,
- srv_device, srv_device_rw,
- esp_device,
+ dissected_image,
interactive, secondary,
fds,
veth_name, &veth_created,
@@ -4345,8 +3729,6 @@ finish:
if (pid > 0)
(void) wait_for_terminate(pid, NULL);
- loop_remove(loop_nr, &image_fd);
-
if (remove_directory && arg_directory) {
int k;