diff options
Diffstat (limited to 'src/nspawn/nspawn.c')
-rw-r--r-- | src/nspawn/nspawn.c | 259 |
1 files changed, 126 insertions, 133 deletions
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 7451c2bf64..4c48681f17 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -46,21 +46,23 @@ #include "sd-daemon.h" #include "sd-id128.h" +#include "alloc-util.h" #include "barrier.h" #include "base-filesystem.h" #include "blkid-util.h" #include "btrfs-util.h" -#include "build.h" #include "cap-list.h" -#include "capability.h" +#include "capability-util.h" #include "cgroup-util.h" #include "copy.h" #include "dev-setup.h" #include "env-util.h" #include "event-util.h" +#include "fd-util.h" #include "fdset.h" #include "fileio.h" #include "formats-util.h" +#include "fs-util.h" #include "gpt.h" #include "hostname-util.h" #include "log.h" @@ -69,7 +71,16 @@ #include "macro.h" #include "missing.h" #include "mkdir.h" +#include "mount-util.h" #include "netlink-util.h" +#include "nspawn-cgroup.h" +#include "nspawn-expose-ports.h" +#include "nspawn-mount.h" +#include "nspawn-network.h" +#include "nspawn-register.h" +#include "nspawn-settings.h" +#include "nspawn-setuid.h" +#include "parse-util.h" #include "path-util.h" #include "process-util.h" #include "ptyfwd.h" @@ -79,19 +90,17 @@ #include "seccomp-util.h" #endif #include "signal-util.h" +#include "socket-util.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "string-util.h" #include "strv.h" #include "terminal-util.h" #include "udev-util.h" +#include "umask-util.h" +#include "user-util.h" #include "util.h" -#include "nspawn-settings.h" -#include "nspawn-mount.h" -#include "nspawn-network.h" -#include "nspawn-expose-ports.h" -#include "nspawn-cgroup.h" -#include "nspawn-register.h" -#include "nspawn-setuid.h" - typedef enum ContainerStatus { CONTAINER_TERMINATED, CONTAINER_REBOOTED @@ -169,6 +178,7 @@ static bool arg_unified_cgroup_hierarchy = false; static SettingsMask arg_settings_mask = 0; static int arg_settings_trusted = -1; static char **arg_parameters = NULL; +static const char *arg_container_service_name = "systemd-nspawn"; static void help(void) { printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n" @@ -200,10 +210,10 @@ static void help(void) { " --network-ipvlan=INTERFACE\n" " Create a ipvlan network interface based on an\n" " existing network interface to the container\n" - " -n --network-veth Add a virtual ethernet connection between host\n" + " -n --network-veth Add a virtual Ethernet connection between host\n" " and container\n" " --network-bridge=INTERFACE\n" - " Add a virtual ethernet connection between host\n" + " Add a virtual Ethernet connection between host\n" " and container and add it to an existing bridge on\n" " the host\n" " -p --port=[PROTOCOL:]HOSTPORT[:CONTAINERPORT]\n" @@ -277,27 +287,6 @@ static int custom_mounts_prepare(void) { return 0; } -static int set_sanitized_path(char **b, const char *path) { - char *p; - - assert(b); - assert(path); - - p = canonicalize_file_name(path); - if (!p) { - if (errno != ENOENT) - return -errno; - - p = path_make_absolute_cwd(path); - if (!p) - return -ENOMEM; - } - - free(*b); - *b = path_kill_slashes(p); - return 0; -} - static int detect_unified_cgroup_hierarchy(void) { const char *e; int r; @@ -399,6 +388,7 @@ static int parse_argv(int argc, char *argv[]) { }; int c, r; + const char *p, *e; uint64_t plus = 0, minus = 0; bool mask_all_settings = false, mask_no_settings = false; @@ -414,29 +404,24 @@ static int parse_argv(int argc, char *argv[]) { return 0; case ARG_VERSION: - puts(PACKAGE_STRING); - puts(SYSTEMD_FEATURES); - return 0; + return version(); case 'D': - r = set_sanitized_path(&arg_directory, optarg); + r = parse_path_argument_and_warn(optarg, false, &arg_directory); if (r < 0) - return log_error_errno(r, "Invalid root directory: %m"); - + return r; break; case ARG_TEMPLATE: - r = set_sanitized_path(&arg_template, optarg); + r = parse_path_argument_and_warn(optarg, false, &arg_template); if (r < 0) - return log_error_errno(r, "Invalid template directory: %m"); - + return r; break; case 'i': - r = set_sanitized_path(&arg_image, optarg); + r = parse_path_argument_and_warn(optarg, false, &arg_image); if (r < 0) - return log_error_errno(r, "Invalid image path: %m"); - + return r; break; case 'x': @@ -541,15 +526,16 @@ static int parse_argv(int argc, char *argv[]) { case ARG_CAPABILITY: case ARG_DROP_CAPABILITY: { - const char *state, *word; - size_t length; + p = optarg; + for(;;) { + _cleanup_free_ char *t = NULL; - FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) { - _cleanup_free_ char *t; + r = extract_first_word(&p, &t, ",", 0); + if (r < 0) + return log_error_errno(r, "Failed to parse capability %s.", t); - t = strndup(word, length); - if (!t) - return log_oom(); + if (r == 0) + break; if (streq(t, "all")) { if (c == ARG_CAPABILITY) @@ -924,6 +910,10 @@ static int parse_argv(int argc, char *argv[]) { if (r < 0) return r; + e = getenv("SYSTEMD_NSPAWN_CONTAINER_SERVICE"); + if (e) + arg_container_service_name = e; + return 1; } @@ -1192,6 +1182,7 @@ static int copy_devnodes(const char *dest) { static int setup_pts(const char *dest) { _cleanup_free_ char *options = NULL; const char *p; + int r; #ifdef HAVE_SELINUX if (arg_selinux_apifs_context) @@ -1214,20 +1205,23 @@ static int setup_pts(const char *dest) { return log_error_errno(errno, "Failed to create /dev/pts: %m"); if (mount("devpts", p, "devpts", MS_NOSUID|MS_NOEXEC, options) < 0) return log_error_errno(errno, "Failed to mount /dev/pts: %m"); - if (userns_lchown(p, 0, 0) < 0) - return log_error_errno(errno, "Failed to chown /dev/pts: %m"); + r = userns_lchown(p, 0, 0); + if (r < 0) + return log_error_errno(r, "Failed to chown /dev/pts: %m"); /* Create /dev/ptmx symlink */ p = prefix_roota(dest, "/dev/ptmx"); if (symlink("pts/ptmx", p) < 0) return log_error_errno(errno, "Failed to create /dev/ptmx symlink: %m"); - if (userns_lchown(p, 0, 0) < 0) - return log_error_errno(errno, "Failed to chown /dev/ptmx: %m"); + r = userns_lchown(p, 0, 0); + if (r < 0) + return log_error_errno(r, "Failed to chown /dev/ptmx: %m"); /* And fix /dev/pts/ptmx ownership */ p = prefix_roota(dest, "/dev/pts/ptmx"); - if (userns_lchown(p, 0, 0) < 0) - return log_error_errno(errno, "Failed to chown /dev/pts/ptmx: %m"); + r = userns_lchown(p, 0, 0); + if (r < 0) + return log_error_errno(r, "Failed to chown /dev/pts/ptmx: %m"); return 0; } @@ -1291,7 +1285,7 @@ static int setup_kmsg(const char *dest, int kmsg_socket) { /* Store away the fd in the socket, so that it stays open as * long as we run the child */ - r = send_one_fd(kmsg_socket, fd); + r = send_one_fd(kmsg_socket, fd, 0); safe_close(fd); if (r < 0) @@ -1409,7 +1403,7 @@ static int setup_journal(const char *directory) { r = userns_mkdir(directory, p, 0755, 0, 0); if (r < 0) - log_warning_errno(errno, "Failed to create directory %s: %m", q); + log_warning_errno(r, "Failed to create directory %s: %m", q); return 0; } @@ -1423,15 +1417,11 @@ static int setup_journal(const char *directory) { if (errno == ENOTDIR) { log_error("%s already exists and is neither a symlink nor a directory", p); return r; - } else { - log_error_errno(errno, "Failed to remove %s: %m", p); - return -errno; - } + } else + return log_error_errno(errno, "Failed to remove %s: %m", p); } - } else if (r != -ENOENT) { - log_error_errno(errno, "readlink(%s) failed: %m", p); - return r; - } + } else if (r != -ENOENT) + return log_error_errno(r, "readlink(%s) failed: %m", p); if (arg_link_journal == LINK_GUEST) { @@ -1439,15 +1429,13 @@ static int setup_journal(const char *directory) { if (arg_link_journal_try) { log_debug_errno(errno, "Failed to symlink %s to %s, skipping journal setup: %m", q, p); return 0; - } else { - log_error_errno(errno, "Failed to symlink %s to %s: %m", q, p); - return -errno; - } + } else + return log_error_errno(errno, "Failed to symlink %s to %s: %m", q, p); } r = userns_mkdir(directory, p, 0755, 0, 0); if (r < 0) - log_warning_errno(errno, "Failed to create directory %s: %m", q); + log_warning_errno(r, "Failed to create directory %s: %m", q); return 0; } @@ -1459,10 +1447,8 @@ static int setup_journal(const char *directory) { if (arg_link_journal_try) { log_debug_errno(errno, "Failed to create %s, skipping journal setup: %m", p); return 0; - } else { - log_error_errno(errno, "Failed to create %s: %m", p); - return r; - } + } else + return log_error_errno(errno, "Failed to create %s: %m", p); } } else if (access(p, F_OK) < 0) @@ -1472,10 +1458,8 @@ static int setup_journal(const char *directory) { log_warning("%s is not empty, proceeding anyway.", q); r = userns_mkdir(directory, p, 0755, 0, 0); - if (r < 0) { - log_error_errno(errno, "Failed to create %s: %m", q); - return r; - } + if (r < 0) + return log_error_errno(r, "Failed to create %s: %m", q); if (mount(p, q, NULL, MS_BIND, NULL) < 0) return log_error_errno(errno, "Failed to bind mount journal from host into guest: %m"); @@ -1616,20 +1600,24 @@ finish: static int setup_propagate(const char *root) { const char *p, *q; + int r; (void) mkdir_p("/run/systemd/nspawn/", 0755); (void) mkdir_p("/run/systemd/nspawn/propagate", 0600); p = strjoina("/run/systemd/nspawn/propagate/", arg_machine); (void) mkdir_p(p, 0600); - if (userns_mkdir(root, "/run/systemd", 0755, 0, 0) < 0) - return log_error_errno(errno, "Failed to create /run/systemd: %m"); + r = userns_mkdir(root, "/run/systemd", 0755, 0, 0); + if (r < 0) + return log_error_errno(r, "Failed to create /run/systemd: %m"); - if (userns_mkdir(root, "/run/systemd/nspawn", 0755, 0, 0) < 0) - return log_error_errno(errno, "Failed to create /run/systemd/nspawn: %m"); + r = userns_mkdir(root, "/run/systemd/nspawn", 0755, 0, 0); + if (r < 0) + return log_error_errno(r, "Failed to create /run/systemd/nspawn: %m"); - if (userns_mkdir(root, "/run/systemd/nspawn/incoming", 0600, 0, 0) < 0) - return log_error_errno(errno, "Failed to create /run/systemd/nspawn/incoming: %m"); + r = userns_mkdir(root, "/run/systemd/nspawn/incoming", 0600, 0, 0); + if (r < 0) + return log_error_errno(r, "Failed to create /run/systemd/nspawn/incoming: %m"); q = prefix_roota(root, "/run/systemd/nspawn/incoming"); if (mount(p, q, NULL, MS_BIND, NULL) < 0) @@ -1679,7 +1667,7 @@ static int setup_image(char **device_path, int *loop_nr) { } if (!S_ISREG(st.st_mode)) { - log_error_errno(errno, "%s is not a regular file or block device: %m", arg_image); + log_error("%s is not a regular file or block device.", arg_image); return -EINVAL; } @@ -1771,8 +1759,7 @@ static int dissect_image( if (errno == 0) return log_oom(); - log_error_errno(errno, "Failed to set device on blkid probe: %m"); - return -errno; + return log_error_errno(errno, "Failed to set device on blkid probe: %m"); } blkid_probe_enable_partitions(b, 1); @@ -1788,8 +1775,7 @@ static int dissect_image( } else if (r != 0) { if (errno == 0) errno = EIO; - log_error_errno(errno, "Failed to probe: %m"); - return -errno; + return log_error_errno(errno, "Failed to probe: %m"); } (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL); @@ -1912,8 +1898,7 @@ static int dissect_image( if (!errno) errno = ENOMEM; - log_error_errno(errno, "Failed to get partition device of %s: %m", arg_image); - return -errno; + return log_error_errno(errno, "Failed to get partition device of %s: %m", arg_image); } qn = udev_device_get_devnum(q); @@ -2120,8 +2105,7 @@ static int mount_device(const char *what, const char *where, const char *directo if (!b) { if (errno == 0) return log_oom(); - log_error_errno(errno, "Failed to allocate prober for %s: %m", what); - return -errno; + return log_error_errno(errno, "Failed to allocate prober for %s: %m", what); } blkid_probe_enable_superblocks(b, 1); @@ -2135,8 +2119,7 @@ static int mount_device(const char *what, const char *where, const char *directo } else if (r != 0) { if (errno == 0) errno = EIO; - log_error_errno(errno, "Failed to probe %s: %m", what); - return -errno; + return log_error_errno(errno, "Failed to probe %s: %m", what); } errno = 0; @@ -2282,8 +2265,6 @@ static int wait_for_container(pid_t pid, ContainerStatus *container) { return r; } -static void nop_handler(int sig) {} - static int on_orderly_shutdown(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) { pid_t pid; @@ -2327,9 +2308,9 @@ static int determine_names(void) { } if (i->type == IMAGE_RAW) - r = set_sanitized_path(&arg_image, i->path); + r = free_and_strdup(&arg_image, i->path); else - r = set_sanitized_path(&arg_directory, i->path); + r = free_and_strdup(&arg_directory, i->path); if (r < 0) return log_error_errno(r, "Invalid image directory: %m"); @@ -2421,10 +2402,10 @@ static int inner_child( FDSet *fds) { _cleanup_free_ char *home = NULL; - unsigned n_env = 2; + unsigned n_env = 1; const char *envp[] = { "PATH=" DEFAULT_PATH_SPLIT_USR, - "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */ + NULL, /* container */ NULL, /* TERM */ NULL, /* HOME */ NULL, /* USER */ @@ -2455,7 +2436,11 @@ static int inner_child( } } - r = mount_all(NULL, true, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context); + r = mount_all(NULL, arg_userns, true, arg_uid_shift, arg_private_network, arg_uid_range, arg_selinux_apifs_context); + if (r < 0) + return r; + + r = mount_sysfs(NULL); if (r < 0) return r; @@ -2498,8 +2483,9 @@ static int inner_child( rtnl_socket = safe_close(rtnl_socket); } - if (drop_capabilities() < 0) - return log_error_errno(errno, "drop_capabilities() failed: %m"); + r = drop_capabilities(); + if (r < 0) + return log_error_errno(r, "drop_capabilities() failed: %m"); setup_hostname(); @@ -2521,6 +2507,9 @@ static int inner_child( if (r < 0) return r; + /* LXC sets container=lxc, so follow the scheme here */ + envp[n_env++] = strjoina("container=", arg_container_service_name); + envp[n_env] = strv_find_prefix(environ, "TERM="); if (envp[n_env]) n_env ++; @@ -2599,8 +2588,9 @@ static int inner_child( execle("/bin/sh", "-sh", NULL, env_use); } + r = -errno; (void) log_open(); - return log_error_errno(errno, "execv() failed: %m"); + return log_error_errno(r, "execv() failed: %m"); } static int outer_child( @@ -2706,7 +2696,7 @@ static int outer_child( return log_error_errno(r, "Failed to make tree read-only: %m"); } - r = mount_all(directory, false, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context); + r = mount_all(directory, arg_userns, false, arg_private_network, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context); if (r < 0) return r; @@ -2841,7 +2831,7 @@ static int load_settings(void) { p = j; j = NULL; - /* By default we trust configuration from /etc and /run */ + /* By default, we trust configuration from /etc and /run */ if (arg_settings_trusted < 0) arg_settings_trusted = true; @@ -2871,7 +2861,7 @@ static int load_settings(void) { if (!f && errno != ENOENT) return log_error_errno(errno, "Failed to open %s: %m", p); - /* By default we do not trust configuration from /var/lib/machines */ + /* By default, we do not trust configuration from /var/lib/machines */ if (arg_settings_trusted < 0) arg_settings_trusted = false; } @@ -2913,11 +2903,17 @@ static int load_settings(void) { } if ((arg_settings_mask & SETTING_CAPABILITY) == 0) { + uint64_t plus; - if (!arg_settings_trusted && settings->capability != 0) - log_warning("Ignoring Capability= setting, file %s is not trusted.", p); - else - arg_retain |= settings->capability; + plus = settings->capability; + if (settings_private_network(settings)) + plus |= (1ULL << CAP_NET_ADMIN); + + if (!arg_settings_trusted && plus != 0) { + if (settings->capability != 0) + log_warning("Ignoring Capability= setting, file %s is not trusted.", p); + } else + arg_retain |= plus; arg_retain &= ~settings->drop_capability; } @@ -2973,6 +2969,9 @@ static int load_settings(void) { if (!arg_settings_trusted) log_warning("Ignoring network settings, file %s is not trusted.", p); else { + arg_network_veth = settings_private_network(settings); + arg_private_network = settings_private_network(settings); + strv_free(arg_network_interfaces); arg_network_interfaces = settings->network_interfaces; settings->network_interfaces = NULL; @@ -2988,10 +2987,6 @@ static int load_settings(void) { free(arg_network_bridge); arg_network_bridge = settings->network_bridge; settings->network_bridge = NULL; - - arg_network_veth = settings->network_veth > 0 || settings->network_bridge; - - arg_private_network = true; /* all these settings imply private networking */ } } @@ -3097,7 +3092,7 @@ int main(int argc, char *argv[]) { goto finish; } - r = btrfs_subvol_snapshot(arg_directory, np, (arg_read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE); + r = btrfs_subvol_snapshot(arg_directory, np, (arg_read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE | BTRFS_SNAPSHOT_QUOTA); if (r < 0) { log_error_errno(r, "Failed to create snapshot %s from %s: %m", np, arg_directory); goto finish; @@ -3121,7 +3116,7 @@ int main(int argc, char *argv[]) { } if (arg_template) { - r = btrfs_subvol_snapshot(arg_template, arg_directory, (arg_read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE); + r = btrfs_subvol_snapshot(arg_template, arg_directory, (arg_read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE | BTRFS_SNAPSHOT_QUOTA); if (r == -EEXIST) { if (!arg_quiet) log_info("Directory %s already exists, not populating from template %s.", arg_directory, arg_template); @@ -3144,10 +3139,9 @@ int main(int argc, char *argv[]) { } else { const char *p; - p = strjoina(arg_directory, - argc > optind && path_is_absolute(argv[optind]) ? argv[optind] : "/usr/bin/"); - if (access(p, F_OK) < 0) { - log_error("Directory %s lacks the binary to execute or doesn't look like a binary tree. Refusing.", arg_directory); + p = strjoina(arg_directory, "/usr/"); + if (laccess(p, F_OK) < 0) { + log_error("Directory %s doesn't look like it has an OS tree. Refusing.", arg_directory); r = -EINVAL; goto finish; } @@ -3236,12 +3230,11 @@ int main(int argc, char *argv[]) { } for (;;) { - _cleanup_close_pair_ int kmsg_socket_pair[2] = { -1, -1 }, rtnl_socket_pair[2] = { -1, -1 }, pid_socket_pair[2] = { -1, -1 }, - uid_shift_socket_pair[2] = { -1, -1 }; + _cleanup_close_pair_ int kmsg_socket_pair[2] = { -1, -1 }, rtnl_socket_pair[2] = { -1, -1 }, pid_socket_pair[2] = { -1, -1 }, uid_shift_socket_pair[2] = { -1, -1 }; ContainerStatus container_status; _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL; static const struct sigaction sa = { - .sa_handler = nop_handler, + .sa_handler = nop_signal_handler, .sa_flags = SA_NOCLDSTOP, }; int ifi = 0; @@ -3338,8 +3331,7 @@ int main(int argc, char *argv[]) { barrier_set_role(&barrier, BARRIER_PARENT); - fdset_free(fds); - fds = NULL; + fds = fdset_free(fds); kmsg_socket_pair[1] = safe_close(kmsg_socket_pair[1]); rtnl_socket_pair[1] = safe_close(rtnl_socket_pair[1]); @@ -3437,7 +3429,8 @@ int main(int argc, char *argv[]) { arg_custom_mounts, arg_n_custom_mounts, arg_kill_signal, arg_property, - arg_keep_unit); + arg_keep_unit, + arg_container_service_name); if (r < 0) goto finish; } @@ -3514,7 +3507,7 @@ int main(int argc, char *argv[]) { rtnl_socket_pair[0] = safe_close(rtnl_socket_pair[0]); - r = pty_forward_new(event, master, true, !interactive, &forward); + r = pty_forward_new(event, master, PTY_FORWARD_IGNORE_VHANGUP | (interactive ? 0 : PTY_FORWARD_READ_ONLY), &forward); if (r < 0) { log_error_errno(r, "Failed to create PTY forwarder: %m"); goto finish; @@ -3593,7 +3586,7 @@ finish: if (remove_subvol && arg_directory) { int k; - k = btrfs_subvol_remove(arg_directory, true); + k = btrfs_subvol_remove(arg_directory, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA); if (k < 0) log_warning_errno(k, "Cannot remove subvolume '%s', ignoring: %m", arg_directory); } |