diff options
Diffstat (limited to 'src/nspawn')
-rw-r--r-- | src/nspawn/nspawn-cgroup.c | 13 | ||||
-rw-r--r-- | src/nspawn/nspawn-cgroup.h | 2 | ||||
-rw-r--r-- | src/nspawn/nspawn-expose-ports.c | 60 | ||||
-rw-r--r-- | src/nspawn/nspawn-expose-ports.h | 3 | ||||
-rw-r--r-- | src/nspawn/nspawn-gperf.gperf | 43 | ||||
-rw-r--r-- | src/nspawn/nspawn-mount.c | 129 | ||||
-rw-r--r-- | src/nspawn/nspawn-mount.h | 3 | ||||
-rw-r--r-- | src/nspawn/nspawn-network.c | 192 | ||||
-rw-r--r-- | src/nspawn/nspawn-network.h | 6 | ||||
-rw-r--r-- | src/nspawn/nspawn-register.c | 27 | ||||
-rw-r--r-- | src/nspawn/nspawn-register.h | 2 | ||||
-rw-r--r-- | src/nspawn/nspawn-settings.c | 68 | ||||
-rw-r--r-- | src/nspawn/nspawn-settings.h | 8 | ||||
-rw-r--r-- | src/nspawn/nspawn-setuid.c | 17 | ||||
-rw-r--r-- | src/nspawn/nspawn.c | 328 |
15 files changed, 554 insertions, 347 deletions
diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c index c0e9ccd7a4..3c0e26ea5a 100644 --- a/src/nspawn/nspawn-cgroup.c +++ b/src/nspawn/nspawn-cgroup.c @@ -21,13 +21,15 @@ #include <sys/mount.h> -#include "util.h" -#include "strv.h" -#include "mkdir.h" -#include "fileio.h" +#include "alloc-util.h" #include "cgroup-util.h" - +#include "fd-util.h" +#include "fileio.h" +#include "mkdir.h" #include "nspawn-cgroup.h" +#include "string-util.h" +#include "strv.h" +#include "util.h" int chown_cgroup(pid_t pid, uid_t uid_shift) { _cleanup_free_ char *path = NULL, *fs = NULL; @@ -52,6 +54,7 @@ int chown_cgroup(pid_t pid, uid_t uid_shift) { "tasks", "notify_on_release", "cgroup.procs", + "cgroup.events", "cgroup.clone_children", "cgroup.controllers", "cgroup.subtree_control", diff --git a/src/nspawn/nspawn-cgroup.h b/src/nspawn/nspawn-cgroup.h index 985fdfaad5..4e8db63750 100644 --- a/src/nspawn/nspawn-cgroup.h +++ b/src/nspawn/nspawn-cgroup.h @@ -21,8 +21,8 @@ along with systemd; If not, see <http://www.gnu.org/licenses/>. ***/ -#include <sys/types.h> #include <stdbool.h> +#include <sys/types.h> int chown_cgroup(pid_t pid, uid_t uid_shift); int sync_cgroup(pid_t pid, bool unified_requested); diff --git a/src/nspawn/nspawn-expose-ports.c b/src/nspawn/nspawn-expose-ports.c index 38250b6e02..89e5c57db3 100644 --- a/src/nspawn/nspawn-expose-ports.c +++ b/src/nspawn/nspawn-expose-ports.c @@ -21,13 +21,17 @@ #include "sd-netlink.h" -#include "util.h" -#include "in-addr-util.h" +#include "alloc-util.h" +#include "fd-util.h" #include "firewall-util.h" +#include "in-addr-util.h" #include "local-addresses.h" #include "netlink-util.h" - #include "nspawn-expose-ports.h" +#include "parse-util.h" +#include "socket-util.h" +#include "string-util.h" +#include "util.h" int expose_port_parse(ExposePort **l, const char *s) { @@ -183,17 +187,8 @@ int expose_port_execute(sd_netlink *rtnl, ExposePort *l, union in_addr_union *ex } int expose_port_send_rtnl(int send_fd) { - union { - struct cmsghdr cmsghdr; - uint8_t buf[CMSG_SPACE(sizeof(int))]; - } control = {}; - struct msghdr mh = { - .msg_control = &control, - .msg_controllen = sizeof(control), - }; - struct cmsghdr *cmsg; _cleanup_close_ int fd = -1; - ssize_t k; + int r; assert(send_fd >= 0); @@ -201,19 +196,11 @@ int expose_port_send_rtnl(int send_fd) { if (fd < 0) return log_error_errno(errno, "Failed to allocate container netlink: %m"); - cmsg = CMSG_FIRSTHDR(&mh); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - cmsg->cmsg_len = CMSG_LEN(sizeof(int)); - memcpy(CMSG_DATA(cmsg), &fd, sizeof(int)); - - mh.msg_controllen = cmsg->cmsg_len; - /* Store away the fd in the socket, so that it stays open as * long as we run the child */ - k = sendmsg(send_fd, &mh, MSG_NOSIGNAL); - if (k < 0) - return log_error_errno(errno, "Failed to send netlink fd: %m"); + r = send_one_fd(send_fd, fd, 0); + if (r < 0) + return log_error_errno(r, "Failed to send netlink fd: %m"); return 0; } @@ -224,33 +211,16 @@ int expose_port_watch_rtnl( sd_netlink_message_handler_t handler, union in_addr_union *exposed, sd_netlink **ret) { - - union { - struct cmsghdr cmsghdr; - uint8_t buf[CMSG_SPACE(sizeof(int))]; - } control = {}; - struct msghdr mh = { - .msg_control = &control, - .msg_controllen = sizeof(control), - }; - struct cmsghdr *cmsg; - _cleanup_netlink_unref_ sd_netlink *rtnl = NULL; + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; int fd, r; - ssize_t k; assert(event); assert(recv_fd >= 0); assert(ret); - k = recvmsg(recv_fd, &mh, MSG_NOSIGNAL); - if (k < 0) - return log_error_errno(errno, "Failed to recv netlink fd: %m"); - - cmsg = CMSG_FIRSTHDR(&mh); - assert(cmsg->cmsg_level == SOL_SOCKET); - assert(cmsg->cmsg_type == SCM_RIGHTS); - assert(cmsg->cmsg_len == CMSG_LEN(sizeof(int))); - memcpy(&fd, CMSG_DATA(cmsg), sizeof(int)); + fd = receive_one_fd(recv_fd, 0); + if (fd < 0) + return log_error_errno(fd, "Failed to recv netlink fd: %m"); r = sd_netlink_open_fd(&rtnl, fd); if (r < 0) { diff --git a/src/nspawn/nspawn-expose-ports.h b/src/nspawn/nspawn-expose-ports.h index 39cec28695..cb7340bad7 100644 --- a/src/nspawn/nspawn-expose-ports.h +++ b/src/nspawn/nspawn-expose-ports.h @@ -25,8 +25,9 @@ #include "sd-event.h" #include "sd-netlink.h" -#include "list.h" + #include "in-addr-util.h" +#include "list.h" typedef struct ExposePort { int protocol; diff --git a/src/nspawn/nspawn-gperf.gperf b/src/nspawn/nspawn-gperf.gperf index b5127a387c..58f9f4c635 100644 --- a/src/nspawn/nspawn-gperf.gperf +++ b/src/nspawn/nspawn-gperf.gperf @@ -15,24 +15,25 @@ struct ConfigPerfItem; %struct-type %includes %% -Exec.Boot, config_parse_tristate, 0, offsetof(Settings, boot) -Exec.Parameters, config_parse_strv, 0, offsetof(Settings, parameters) -Exec.Environment, config_parse_strv, 0, offsetof(Settings, environment) -Exec.User, config_parse_string, 0, offsetof(Settings, user) -Exec.Capability, config_parse_capability, 0, offsetof(Settings, capability) -Exec.DropCapability, config_parse_capability, 0, offsetof(Settings, drop_capability) -Exec.KillSignal, config_parse_signal, 0, offsetof(Settings, kill_signal) -Exec.Personality, config_parse_personality, 0, offsetof(Settings, personality) -Exec.MachineID, config_parse_id128, 0, offsetof(Settings, machine_id) -Files.ReadOnly, config_parse_tristate, 0, offsetof(Settings, read_only) -Files.Volatile, config_parse_volatile_mode, 0, offsetof(Settings, volatile_mode) -Files.Bind, config_parse_bind, 0, 0 -Files.BindReadOnly, config_parse_bind, 1, 0 -Files.TemporaryFileSystem, config_parse_tmpfs, 0, 0 -Network.Private, config_parse_tristate, 0, offsetof(Settings, private_network) -Network.Interface, config_parse_strv, 0, offsetof(Settings, network_interfaces) -Network.MACVLAN, config_parse_strv, 0, offsetof(Settings, network_macvlan) -Network.IPVLAN, config_parse_strv, 0, offsetof(Settings, network_ipvlan) -Network.VirtualEthernet, config_parse_tristate, 0, offsetof(Settings, network_veth) -Network.Bridge, config_parse_string, 0, offsetof(Settings, network_bridge) -Network.Port, config_parse_expose_port, 0, 0 +Exec.Boot, config_parse_tristate, 0, offsetof(Settings, boot) +Exec.Parameters, config_parse_strv, 0, offsetof(Settings, parameters) +Exec.Environment, config_parse_strv, 0, offsetof(Settings, environment) +Exec.User, config_parse_string, 0, offsetof(Settings, user) +Exec.Capability, config_parse_capability, 0, offsetof(Settings, capability) +Exec.DropCapability, config_parse_capability, 0, offsetof(Settings, drop_capability) +Exec.KillSignal, config_parse_signal, 0, offsetof(Settings, kill_signal) +Exec.Personality, config_parse_personality, 0, offsetof(Settings, personality) +Exec.MachineID, config_parse_id128, 0, offsetof(Settings, machine_id) +Files.ReadOnly, config_parse_tristate, 0, offsetof(Settings, read_only) +Files.Volatile, config_parse_volatile_mode, 0, offsetof(Settings, volatile_mode) +Files.Bind, config_parse_bind, 0, 0 +Files.BindReadOnly, config_parse_bind, 1, 0 +Files.TemporaryFileSystem, config_parse_tmpfs, 0, 0 +Network.Private, config_parse_tristate, 0, offsetof(Settings, private_network) +Network.Interface, config_parse_strv, 0, offsetof(Settings, network_interfaces) +Network.MACVLAN, config_parse_strv, 0, offsetof(Settings, network_macvlan) +Network.IPVLAN, config_parse_strv, 0, offsetof(Settings, network_ipvlan) +Network.VirtualEthernet, config_parse_tristate, 0, offsetof(Settings, network_veth) +Network.VirtualEthernetExtra, config_parse_veth_extra, 0, 0 +Network.Bridge, config_parse_string, 0, offsetof(Settings, network_bridge) +Network.Port, config_parse_expose_port, 0, 0 diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index 2bca39f45d..c8e627ac78 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -20,17 +20,25 @@ ***/ #include <sys/mount.h> +#include <linux/magic.h> -#include "util.h" -#include "rm-rf.h" -#include "strv.h" -#include "path-util.h" -#include "mkdir.h" -#include "label.h" -#include "set.h" +#include "alloc-util.h" #include "cgroup-util.h" - +#include "escape.h" +#include "fs-util.h" +#include "label.h" +#include "mkdir.h" +#include "mount-util.h" #include "nspawn-mount.h" +#include "parse-util.h" +#include "path-util.h" +#include "rm-rf.h" +#include "set.h" +#include "stat-util.h" +#include "string-util.h" +#include "strv.h" +#include "user-util.h" +#include "util.h" CustomMount* custom_mount_add(CustomMount **l, unsigned *n, CustomMountType t) { CustomMount *c, *ret; @@ -216,8 +224,67 @@ static int tmpfs_patch_options( return !!buf; } +int mount_sysfs(const char *dest) { + const char *full, *top, *x; + int r; + + top = prefix_roota(dest, "/sys"); + r = path_check_fstype(top, SYSFS_MAGIC); + if (r < 0) + return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top); + /* /sys might already be mounted as sysfs by the outer child in the + * !netns case. In this case, it's all good. Don't touch it because we + * don't have the right to do so, see https://github.com/systemd/systemd/issues/1555. + */ + if (r > 0) + return 0; + + full = prefix_roota(top, "/full"); + + (void) mkdir(full, 0755); + + if (mount("sysfs", full, "sysfs", MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) < 0) + return log_error_errno(errno, "Failed to mount sysfs to %s: %m", full); + + FOREACH_STRING(x, "block", "bus", "class", "dev", "devices", "kernel") { + _cleanup_free_ char *from = NULL, *to = NULL; + + from = prefix_root(full, x); + if (!from) + return log_oom(); + + to = prefix_root(top, x); + if (!to) + return log_oom(); + + (void) mkdir(to, 0755); + + if (mount(from, to, NULL, MS_BIND, NULL) < 0) + return log_error_errno(errno, "Failed to mount /sys/%s into place: %m", x); + + if (mount(NULL, to, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0) + return log_error_errno(errno, "Failed to mount /sys/%s read-only: %m", x); + } + + if (umount(full) < 0) + return log_error_errno(errno, "Failed to unmount %s: %m", full); + + if (rmdir(full) < 0) + return log_error_errno(errno, "Failed to remove %s: %m", full); + + x = prefix_roota(top, "/fs/kdbus"); + (void) mkdir(x, 0755); + + if (mount(NULL, top, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0) + return log_error_errno(errno, "Failed to make %s read-only: %m", top); + + return 0; +} + int mount_all(const char *dest, - bool userns, uid_t uid_shift, uid_t uid_range, + bool use_userns, bool in_userns, + bool use_netns, + uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context) { typedef struct MountPoint { @@ -227,21 +294,23 @@ int mount_all(const char *dest, const char *options; unsigned long flags; bool fatal; - bool userns; + bool in_userns; + bool use_netns; } MountPoint; static const MountPoint mount_table[] = { - { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true }, - { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true }, /* Bind mount first */ - { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true }, /* Then, make it r/o */ - { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false }, - { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false }, - { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false }, - { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false }, - { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false }, + { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true, false }, + { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true, false }, /* Bind mount first */ + { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true, false }, /* Then, make it r/o */ + { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, true }, + { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, false }, + { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false, false }, + { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false }, + { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false }, + { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false, false }, #ifdef HAVE_SELINUX - { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false }, /* Bind mount first */ - { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false }, /* Then, make it r/o */ + { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false, false }, /* Bind mount first */ + { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false, false }, /* Then, make it r/o */ #endif }; @@ -252,7 +321,10 @@ int mount_all(const char *dest, _cleanup_free_ char *where = NULL, *options = NULL; const char *o; - if (userns != mount_table[k].userns) + if (in_userns != mount_table[k].in_userns) + continue; + + if (!use_netns && mount_table[k].use_netns) continue; where = prefix_root(dest, mount_table[k].where); @@ -278,7 +350,7 @@ int mount_all(const char *dest, o = mount_table[k].options; if (streq_ptr(mount_table[k].type, "tmpfs")) { - r = tmpfs_patch_options(o, userns, uid_shift, uid_range, selinux_apifs_context, &options); + r = tmpfs_patch_options(o, use_userns, uid_shift, uid_range, selinux_apifs_context, &options); if (r < 0) return log_oom(); if (r > 0) @@ -369,8 +441,7 @@ static int mount_bind(const char *dest, CustomMount *m) { if (r < 0) return log_error_errno(r, "Failed to make parents of %s: %m", where); } else { - log_error_errno(errno, "Failed to stat %s: %m", where); - return -errno; + return log_error_errno(errno, "Failed to stat %s: %m", where); } /* Create the mount point. Any non-directory file can be @@ -534,7 +605,7 @@ static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controlle char *to; int r; - to = strjoina(dest, "/sys/fs/cgroup/", hierarchy); + to = strjoina(strempty(dest), "/sys/fs/cgroup/", hierarchy); r = path_is_mount_point(to, 0); if (r < 0 && r != -ENOENT) @@ -569,6 +640,8 @@ static int mount_legacy_cgroups( cgroup_root = prefix_roota(dest, "/sys/fs/cgroup"); + (void) mkdir_p(cgroup_root, 0755); + /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */ r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW); if (r < 0) @@ -661,13 +734,15 @@ static int mount_unified_cgroups(const char *dest) { assert(dest); - p = strjoina(dest, "/sys/fs/cgroup"); + p = prefix_roota(dest, "/sys/fs/cgroup"); + + (void) mkdir_p(p, 0755); r = path_is_mount_point(p, AT_SYMLINK_FOLLOW); if (r < 0) return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p); if (r > 0) { - p = strjoina(dest, "/sys/fs/cgroup/cgroup.procs"); + p = prefix_roota(dest, "/sys/fs/cgroup/cgroup.procs"); if (access(p, F_OK) >= 0) return 0; if (errno != ENOENT) diff --git a/src/nspawn/nspawn-mount.h b/src/nspawn/nspawn-mount.h index 5abd44cc4b..bdab23bcca 100644 --- a/src/nspawn/nspawn-mount.h +++ b/src/nspawn/nspawn-mount.h @@ -57,7 +57,8 @@ int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s); int custom_mount_compare(const void *a, const void *b); -int mount_all(const char *dest, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); +int mount_all(const char *dest, bool use_userns, bool in_userns, bool use_netns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); +int mount_sysfs(const char *dest); int mount_cgroups(const char *dest, bool unified_requested, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); int mount_systemd_cgroup_writable(const char *dest, bool unified_requested); diff --git a/src/nspawn/nspawn-network.c b/src/nspawn/nspawn-network.c index 74abe5379a..3104c8e953 100644 --- a/src/nspawn/nspawn-network.c +++ b/src/nspawn/nspawn-network.c @@ -22,20 +22,23 @@ #include <linux/veth.h> #include <net/if.h> +#include "libudev.h" #include "sd-id128.h" #include "sd-netlink.h" -#include "libudev.h" -#include "util.h" +#include "alloc-util.h" #include "ether-addr-util.h" -#include "siphash24.h" #include "netlink-util.h" -#include "udev-util.h" - #include "nspawn-network.h" +#include "siphash24.h" +#include "string-util.h" +#include "udev-util.h" +#include "util.h" #define HOST_HASH_KEY SD_ID128_MAKE(1a,37,6f,c7,46,ec,45,0b,ad,a3,d5,31,06,60,5d,b1) #define CONTAINER_HASH_KEY SD_ID128_MAKE(c3,c4,f9,19,b5,57,b2,1c,e6,cf,14,27,03,9c,ee,a2) +#define VETH_EXTRA_HOST_HASH_KEY SD_ID128_MAKE(48,c7,f6,b7,ea,9d,4c,9e,b7,28,d4,de,91,d5,bf,66) +#define VETH_EXTRA_CONTAINER_HASH_KEY SD_ID128_MAKE(af,50,17,61,ce,f9,4d,35,84,0d,2b,20,54,be,ce,59) #define MACVLAN_HASH_KEY SD_ID128_MAKE(00,13,6d,bc,66,83,44,81,bb,0c,f9,51,1f,24,a6,6f) static int generate_mac( @@ -44,7 +47,7 @@ static int generate_mac( sd_id128_t hash_key, uint64_t idx) { - uint8_t result[8]; + uint64_t result; size_t l, sz; uint8_t *v, *i; int r; @@ -71,10 +74,10 @@ static int generate_mac( /* Let's hash the host machine ID plus the container name. We * use a fixed, but originally randomly created hash key here. */ - siphash24(result, v, sz, hash_key.bytes); + result = htole64(siphash24(v, sz, hash_key.bytes)); assert_cc(ETH_ALEN <= sizeof(result)); - memcpy(mac->ether_addr_octet, result, ETH_ALEN); + memcpy(mac->ether_addr_octet, &result, ETH_ALEN); /* see eth_random_addr in the kernel */ mac->ether_addr_octet[0] &= 0xfe; /* clear multicast bit */ @@ -83,42 +86,32 @@ static int generate_mac( return 0; } -int setup_veth(const char *machine_name, - pid_t pid, - char iface_name[IFNAMSIZ], - bool bridge) { - - _cleanup_netlink_message_unref_ sd_netlink_message *m = NULL; - _cleanup_netlink_unref_ sd_netlink *rtnl = NULL; - struct ether_addr mac_host, mac_container; - int r, i; +static int add_veth( + sd_netlink *rtnl, + pid_t pid, + const char *ifname_host, + const struct ether_addr *mac_host, + const char *ifname_container, + const struct ether_addr *mac_container) { - /* Use two different interface name prefixes depending whether - * we are in bridge mode or not. */ - snprintf(iface_name, IFNAMSIZ - 1, "%s-%s", - bridge ? "vb" : "ve", machine_name); - - r = generate_mac(machine_name, &mac_container, CONTAINER_HASH_KEY, 0); - if (r < 0) - return log_error_errno(r, "Failed to generate predictable MAC address for container side: %m"); - - r = generate_mac(machine_name, &mac_host, HOST_HASH_KEY, 0); - if (r < 0) - return log_error_errno(r, "Failed to generate predictable MAC address for host side: %m"); + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; - r = sd_netlink_open(&rtnl); - if (r < 0) - return log_error_errno(r, "Failed to connect to netlink: %m"); + assert(rtnl); + assert(ifname_host); + assert(mac_host); + assert(ifname_container); + assert(mac_container); r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0); if (r < 0) return log_error_errno(r, "Failed to allocate netlink message: %m"); - r = sd_netlink_message_append_string(m, IFLA_IFNAME, iface_name); + r = sd_netlink_message_append_string(m, IFLA_IFNAME, ifname_host); if (r < 0) return log_error_errno(r, "Failed to add netlink interface name: %m"); - r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, &mac_host); + r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, mac_host); if (r < 0) return log_error_errno(r, "Failed to add netlink MAC address: %m"); @@ -134,11 +127,11 @@ int setup_veth(const char *machine_name, if (r < 0) return log_error_errno(r, "Failed to open netlink container: %m"); - r = sd_netlink_message_append_string(m, IFLA_IFNAME, "host0"); + r = sd_netlink_message_append_string(m, IFLA_IFNAME, ifname_container); if (r < 0) return log_error_errno(r, "Failed to add netlink interface name: %m"); - r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, &mac_container); + r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, mac_container); if (r < 0) return log_error_errno(r, "Failed to add netlink MAC address: %m"); @@ -160,7 +153,44 @@ int setup_veth(const char *machine_name, r = sd_netlink_call(rtnl, m, 0, NULL); if (r < 0) - return log_error_errno(r, "Failed to add new veth interfaces (host0, %s): %m", iface_name); + return log_error_errno(r, "Failed to add new veth interfaces (%s:%s): %m", ifname_host, ifname_container); + + return 0; +} + +int setup_veth(const char *machine_name, + pid_t pid, + char iface_name[IFNAMSIZ], + bool bridge) { + + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; + struct ether_addr mac_host, mac_container; + int r, i; + + assert(machine_name); + assert(pid > 0); + assert(iface_name); + + /* Use two different interface name prefixes depending whether + * we are in bridge mode or not. */ + snprintf(iface_name, IFNAMSIZ - 1, "%s-%s", + bridge ? "vb" : "ve", machine_name); + + r = generate_mac(machine_name, &mac_container, CONTAINER_HASH_KEY, 0); + if (r < 0) + return log_error_errno(r, "Failed to generate predictable MAC address for container side: %m"); + + r = generate_mac(machine_name, &mac_host, HOST_HASH_KEY, 0); + if (r < 0) + return log_error_errno(r, "Failed to generate predictable MAC address for host side: %m"); + + r = sd_netlink_open(&rtnl); + if (r < 0) + return log_error_errno(r, "Failed to connect to netlink: %m"); + + r = add_veth(rtnl, pid, iface_name, &mac_host, "host0", &mac_container); + if (r < 0) + return r; i = (int) if_nametoindex(iface_name); if (i <= 0) @@ -169,9 +199,50 @@ int setup_veth(const char *machine_name, return i; } +int setup_veth_extra( + const char *machine_name, + pid_t pid, + char **pairs) { + + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; + uint64_t idx = 0; + char **a, **b; + int r; + + assert(machine_name); + assert(pid > 0); + + if (strv_isempty(pairs)) + return 0; + + r = sd_netlink_open(&rtnl); + if (r < 0) + return log_error_errno(r, "Failed to connect to netlink: %m"); + + STRV_FOREACH_PAIR(a, b, pairs) { + struct ether_addr mac_host, mac_container; + + r = generate_mac(machine_name, &mac_container, VETH_EXTRA_CONTAINER_HASH_KEY, idx); + if (r < 0) + return log_error_errno(r, "Failed to generate predictable MAC address for container side of extra veth link: %m"); + + r = generate_mac(machine_name, &mac_host, VETH_EXTRA_HOST_HASH_KEY, idx); + if (r < 0) + return log_error_errno(r, "Failed to generate predictable MAC address for container side of extra veth link: %m"); + + r = add_veth(rtnl, pid, *a, &mac_host, *b, &mac_container); + if (r < 0) + return r; + + idx ++; + } + + return 0; +} + int setup_bridge(const char *veth_name, const char *bridge_name) { - _cleanup_netlink_message_unref_ sd_netlink_message *m = NULL; - _cleanup_netlink_unref_ sd_netlink *rtnl = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; int r, bridge_ifi; assert(veth_name); @@ -232,7 +303,7 @@ static int parse_interface(struct udev *udev, const char *name) { int move_network_interfaces(pid_t pid, char **ifaces) { _cleanup_udev_unref_ struct udev *udev = NULL; - _cleanup_netlink_unref_ sd_netlink *rtnl = NULL; + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; char **i; int r; @@ -250,7 +321,7 @@ int move_network_interfaces(pid_t pid, char **ifaces) { } STRV_FOREACH(i, ifaces) { - _cleanup_netlink_message_unref_ sd_netlink_message *m = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; int ifi; ifi = parse_interface(udev, *i); @@ -275,7 +346,7 @@ int move_network_interfaces(pid_t pid, char **ifaces) { int setup_macvlan(const char *machine_name, pid_t pid, char **ifaces) { _cleanup_udev_unref_ struct udev *udev = NULL; - _cleanup_netlink_unref_ sd_netlink *rtnl = NULL; + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; unsigned idx = 0; char **i; int r; @@ -294,7 +365,7 @@ int setup_macvlan(const char *machine_name, pid_t pid, char **ifaces) { } STRV_FOREACH(i, ifaces) { - _cleanup_netlink_message_unref_ sd_netlink_message *m = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; _cleanup_free_ char *n = NULL; struct ether_addr mac; int ifi; @@ -363,7 +434,7 @@ int setup_macvlan(const char *machine_name, pid_t pid, char **ifaces) { int setup_ipvlan(const char *machine_name, pid_t pid, char **ifaces) { _cleanup_udev_unref_ struct udev *udev = NULL; - _cleanup_netlink_unref_ sd_netlink *rtnl = NULL; + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; char **i; int r; @@ -381,7 +452,7 @@ int setup_ipvlan(const char *machine_name, pid_t pid, char **ifaces) { } STRV_FOREACH(i, ifaces) { - _cleanup_netlink_message_unref_ sd_netlink_message *m = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; _cleanup_free_ char *n = NULL; int ifi; @@ -438,3 +509,34 @@ int setup_ipvlan(const char *machine_name, pid_t pid, char **ifaces) { return 0; } + +int veth_extra_parse(char ***l, const char *p) { + _cleanup_free_ char *a = NULL, *b = NULL; + int r; + + r = extract_first_word(&p, &a, ":", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return r; + if (r == 0 || isempty(a)) + return -EINVAL; + + r = extract_first_word(&p, &b, ":", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return r; + if (r == 0 || isempty(b)) { + free(b); + b = strdup(a); + if (!b) + return -ENOMEM; + } + + if (p) + return -EINVAL; + + r = strv_push_pair(l, a, b); + if (r < 0) + return -ENOMEM; + + a = b = NULL; + return 0; +} diff --git a/src/nspawn/nspawn-network.h b/src/nspawn/nspawn-network.h index 311e6d06cb..c91fc79c42 100644 --- a/src/nspawn/nspawn-network.h +++ b/src/nspawn/nspawn-network.h @@ -22,11 +22,11 @@ ***/ #include <net/if.h> - -#include <sys/types.h> #include <stdbool.h> +#include <sys/types.h> int setup_veth(const char *machine_name, pid_t pid, char iface_name[IFNAMSIZ], bool bridge); +int setup_veth_extra(const char *machine_name, pid_t pid, char **pairs); int setup_bridge(const char *veth_name, const char *bridge_name); @@ -34,3 +34,5 @@ int setup_macvlan(const char *machine_name, pid_t pid, char **ifaces); int setup_ipvlan(const char *machine_name, pid_t pid, char **ifaces); int move_network_interfaces(pid_t pid, char **ifaces); + +int veth_extra_parse(char ***l, const char *p); diff --git a/src/nspawn/nspawn-register.c b/src/nspawn/nspawn-register.c index b2776a61c2..65ca9c762b 100644 --- a/src/nspawn/nspawn-register.c +++ b/src/nspawn/nspawn-register.c @@ -21,12 +21,12 @@ #include "sd-bus.h" -#include "util.h" -#include "strv.h" -#include "bus-util.h" #include "bus-error.h" - +#include "bus-util.h" #include "nspawn-register.h" +#include "stat-util.h" +#include "strv.h" +#include "util.h" int register_machine( const char *machine_name, @@ -39,10 +39,11 @@ int register_machine( unsigned n_mounts, int kill_signal, char **properties, - bool keep_unit) { + bool keep_unit, + const char *service) { - _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL; - _cleanup_bus_flush_close_unref_ sd_bus *bus = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; int r; r = sd_bus_default_system(&bus); @@ -61,13 +62,13 @@ int register_machine( "sayssusai", machine_name, SD_BUS_MESSAGE_APPEND_ID128(uuid), - "nspawn", + service, "container", (uint32_t) pid, strempty(directory), local_ifindex > 0 ? 1 : 0, local_ifindex); } else { - _cleanup_bus_message_unref_ sd_bus_message *m = NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; char **i; unsigned j; @@ -86,7 +87,7 @@ int register_machine( "sayssusai", machine_name, SD_BUS_MESSAGE_APPEND_ID128(uuid), - "nspawn", + service, "container", (uint32_t) pid, strempty(directory), @@ -194,9 +195,9 @@ int register_machine( } int terminate_machine(pid_t pid) { - _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL; - _cleanup_bus_message_unref_ sd_bus_message *reply = NULL; - _cleanup_bus_flush_close_unref_ sd_bus *bus = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; const char *path; int r; diff --git a/src/nspawn/nspawn-register.h b/src/nspawn/nspawn-register.h index b27841ff59..d3bfd84e5e 100644 --- a/src/nspawn/nspawn-register.h +++ b/src/nspawn/nspawn-register.h @@ -27,5 +27,5 @@ #include "nspawn-mount.h" -int register_machine(const char *machine_name, pid_t pid, const char *directory, sd_id128_t uuid, int local_ifindex, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties, bool keep_unit); +int register_machine(const char *machine_name, pid_t pid, const char *directory, sd_id128_t uuid, int local_ifindex, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties, bool keep_unit, const char *service); int terminate_machine(pid_t pid); diff --git a/src/nspawn/nspawn-settings.c b/src/nspawn/nspawn-settings.c index 419f5d1c40..d6b64d8d5a 100644 --- a/src/nspawn/nspawn-settings.c +++ b/src/nspawn/nspawn-settings.c @@ -19,12 +19,14 @@ along with systemd; If not, see <http://www.gnu.org/licenses/>. ***/ -#include "util.h" -#include "conf-parser.h" -#include "strv.h" +#include "alloc-util.h" #include "cap-list.h" - +#include "conf-parser.h" +#include "nspawn-network.h" #include "nspawn-settings.h" +#include "process-util.h" +#include "strv.h" +#include "util.h" int settings_load(FILE *f, const char *path, Settings **ret) { _cleanup_(settings_freep) Settings *s = NULL; @@ -76,6 +78,7 @@ Settings* settings_free(Settings *s) { strv_free(s->network_interfaces); strv_free(s->network_macvlan); strv_free(s->network_ipvlan); + strv_free(s->network_veth_extra); free(s->network_bridge); expose_port_free_all(s->expose_ports); @@ -85,6 +88,27 @@ Settings* settings_free(Settings *s) { return NULL; } +bool settings_private_network(Settings *s) { + assert(s); + + return + s->private_network > 0 || + s->network_veth > 0 || + s->network_bridge || + s->network_interfaces || + s->network_macvlan || + s->network_ipvlan || + s->network_veth_extra; +} + +bool settings_network_veth(Settings *s) { + assert(s); + + return + s->network_veth > 0 || + s->network_bridge; +} + DEFINE_CONFIG_PARSE_ENUM(config_parse_volatile_mode, volatile_mode, VolatileMode, "Failed to parse volatile mode"); int config_parse_expose_port( @@ -152,7 +176,7 @@ int config_parse_capability( cap = capability_from_name(word); if (cap < 0) { - log_syntax(unit, LOG_ERR, filename, line, cap, "Failed to parse capability, ignoring: %s", word); + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse capability, ignoring: %s", word); continue; } @@ -248,15 +272,33 @@ int config_parse_tmpfs( return 0; } - if (settings->network_bridge) - settings->network_veth = true; + return 0; +} - if (settings->network_interfaces || - settings->network_macvlan || - settings->network_ipvlan || - settings->network_bridge || - settings->network_veth) - settings->private_network = true; +int config_parse_veth_extra( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + Settings *settings = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = veth_extra_parse(&settings->network_veth_extra, rvalue); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Invalid extra virtual Ethernet link specification %s: %m", rvalue); + return 0; + } return 0; } diff --git a/src/nspawn/nspawn-settings.h b/src/nspawn/nspawn-settings.h index 4cec40c1b7..10230a5b83 100644 --- a/src/nspawn/nspawn-settings.h +++ b/src/nspawn/nspawn-settings.h @@ -24,9 +24,8 @@ #include <stdio.h> #include "macro.h" - -#include "nspawn-mount.h" #include "nspawn-expose-ports.h" +#include "nspawn-mount.h" typedef enum SettingsMask { SETTING_BOOT = 1 << 0, @@ -69,12 +68,16 @@ typedef struct Settings { char **network_interfaces; char **network_macvlan; char **network_ipvlan; + char **network_veth_extra; ExposePort *expose_ports; } Settings; int settings_load(FILE *f, const char *path, Settings **ret); Settings* settings_free(Settings *s); +bool settings_network_veth(Settings *s); +bool settings_private_network(Settings *s); + DEFINE_TRIVIAL_CLEANUP_FUNC(Settings*, settings_free); const struct ConfigPerfItem* nspawn_gperf_lookup(const char *key, unsigned length); @@ -85,3 +88,4 @@ int config_parse_expose_port(const char *unit, const char *filename, unsigned li int config_parse_volatile_mode(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_bind(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_tmpfs(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_veth_extra(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); diff --git a/src/nspawn/nspawn-setuid.c b/src/nspawn/nspawn-setuid.c index eda7f62900..014a40b243 100644 --- a/src/nspawn/nspawn-setuid.c +++ b/src/nspawn/nspawn-setuid.c @@ -19,16 +19,19 @@ along with systemd; If not, see <http://www.gnu.org/licenses/>. ***/ +#include <grp.h> #include <sys/types.h> #include <unistd.h> -#include <grp.h> -#include "util.h" -#include "signal-util.h" +#include "alloc-util.h" +#include "fd-util.h" #include "mkdir.h" -#include "process-util.h" - #include "nspawn-setuid.h" +#include "process-util.h" +#include "signal-util.h" +#include "string-util.h" +#include "user-util.h" +#include "util.h" static int spawn_getent(const char *database, const char *key, pid_t *rpid) { int pipe_fds[2]; @@ -258,10 +261,10 @@ int change_uid_gid(const char *user, char **_home) { return log_error_errno(errno, "Failed to set auxiliary groups: %m"); if (setresgid(gid, gid, gid) < 0) - return log_error_errno(errno, "setregid() failed: %m"); + return log_error_errno(errno, "setresgid() failed: %m"); if (setresuid(uid, uid, uid) < 0) - return log_error_errno(errno, "setreuid() failed: %m"); + return log_error_errno(errno, "setresuid() failed: %m"); if (_home) { *_home = home; diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 5702df8ab4..d619206dd6 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -46,21 +46,22 @@ #include "sd-daemon.h" #include "sd-id128.h" +#include "alloc-util.h" #include "barrier.h" #include "base-filesystem.h" #include "blkid-util.h" #include "btrfs-util.h" -#include "build.h" #include "cap-list.h" -#include "capability.h" +#include "capability-util.h" #include "cgroup-util.h" #include "copy.h" #include "dev-setup.h" #include "env-util.h" -#include "event-util.h" +#include "fd-util.h" #include "fdset.h" #include "fileio.h" #include "formats-util.h" +#include "fs-util.h" #include "gpt.h" #include "hostname-util.h" #include "log.h" @@ -69,7 +70,16 @@ #include "macro.h" #include "missing.h" #include "mkdir.h" +#include "mount-util.h" #include "netlink-util.h" +#include "nspawn-cgroup.h" +#include "nspawn-expose-ports.h" +#include "nspawn-mount.h" +#include "nspawn-network.h" +#include "nspawn-register.h" +#include "nspawn-settings.h" +#include "nspawn-setuid.h" +#include "parse-util.h" #include "path-util.h" #include "process-util.h" #include "ptyfwd.h" @@ -79,19 +89,17 @@ #include "seccomp-util.h" #endif #include "signal-util.h" +#include "socket-util.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "string-util.h" #include "strv.h" #include "terminal-util.h" #include "udev-util.h" +#include "umask-util.h" +#include "user-util.h" #include "util.h" -#include "nspawn-settings.h" -#include "nspawn-mount.h" -#include "nspawn-network.h" -#include "nspawn-expose-ports.h" -#include "nspawn-cgroup.h" -#include "nspawn-register.h" -#include "nspawn-setuid.h" - typedef enum ContainerStatus { CONTAINER_TERMINATED, CONTAINER_REBOOTED @@ -156,6 +164,7 @@ static char **arg_network_interfaces = NULL; static char **arg_network_macvlan = NULL; static char **arg_network_ipvlan = NULL; static bool arg_network_veth = false; +static char **arg_network_veth_extra = NULL; static char *arg_network_bridge = NULL; static unsigned long arg_personality = PERSONALITY_INVALID; static char *arg_image = NULL; @@ -169,6 +178,7 @@ static bool arg_unified_cgroup_hierarchy = false; static SettingsMask arg_settings_mask = 0; static int arg_settings_trusted = -1; static char **arg_parameters = NULL; +static const char *arg_container_service_name = "systemd-nspawn"; static void help(void) { printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n" @@ -200,10 +210,13 @@ static void help(void) { " --network-ipvlan=INTERFACE\n" " Create a ipvlan network interface based on an\n" " existing network interface to the container\n" - " -n --network-veth Add a virtual ethernet connection between host\n" + " -n --network-veth Add a virtual Ethernet connection between host\n" " and container\n" + " --network-veth-extra=HOSTIF[:CONTAINERIF]\n" + " Add an additional virtual Ethernet link between\n" + " host and container\n" " --network-bridge=INTERFACE\n" - " Add a virtual ethernet connection between host\n" + " Add a virtual Ethernet connection between host\n" " and container and add it to an existing bridge on\n" " the host\n" " -p --port=[PROTOCOL:]HOSTPORT[:CONTAINERPORT]\n" @@ -277,27 +290,6 @@ static int custom_mounts_prepare(void) { return 0; } -static int set_sanitized_path(char **b, const char *path) { - char *p; - - assert(b); - assert(path); - - p = canonicalize_file_name(path); - if (!p) { - if (errno != ENOENT) - return -errno; - - p = path_make_absolute_cwd(path); - if (!p) - return -ENOMEM; - } - - free(*b); - *b = path_kill_slashes(p); - return 0; -} - static int detect_unified_cgroup_hierarchy(void) { const char *e; int r; @@ -345,6 +337,7 @@ static int parse_argv(int argc, char *argv[]) { ARG_NETWORK_MACVLAN, ARG_NETWORK_IPVLAN, ARG_NETWORK_BRIDGE, + ARG_NETWORK_VETH_EXTRA, ARG_PERSONALITY, ARG_VOLATILE, ARG_TEMPLATE, @@ -386,6 +379,7 @@ static int parse_argv(int argc, char *argv[]) { { "network-macvlan", required_argument, NULL, ARG_NETWORK_MACVLAN }, { "network-ipvlan", required_argument, NULL, ARG_NETWORK_IPVLAN }, { "network-veth", no_argument, NULL, 'n' }, + { "network-veth-extra", required_argument, NULL, ARG_NETWORK_VETH_EXTRA}, { "network-bridge", required_argument, NULL, ARG_NETWORK_BRIDGE }, { "personality", required_argument, NULL, ARG_PERSONALITY }, { "image", required_argument, NULL, 'i' }, @@ -399,6 +393,7 @@ static int parse_argv(int argc, char *argv[]) { }; int c, r; + const char *p, *e; uint64_t plus = 0, minus = 0; bool mask_all_settings = false, mask_no_settings = false; @@ -414,29 +409,24 @@ static int parse_argv(int argc, char *argv[]) { return 0; case ARG_VERSION: - puts(PACKAGE_STRING); - puts(SYSTEMD_FEATURES); - return 0; + return version(); case 'D': - r = set_sanitized_path(&arg_directory, optarg); + r = parse_path_argument_and_warn(optarg, false, &arg_directory); if (r < 0) - return log_error_errno(r, "Invalid root directory: %m"); - + return r; break; case ARG_TEMPLATE: - r = set_sanitized_path(&arg_template, optarg); + r = parse_path_argument_and_warn(optarg, false, &arg_template); if (r < 0) - return log_error_errno(r, "Invalid template directory: %m"); - + return r; break; case 'i': - r = set_sanitized_path(&arg_image, optarg); + r = parse_path_argument_and_warn(optarg, false, &arg_image); if (r < 0) - return log_error_errno(r, "Invalid image path: %m"); - + return r; break; case 'x': @@ -464,6 +454,15 @@ static int parse_argv(int argc, char *argv[]) { arg_settings_mask |= SETTING_NETWORK; break; + case ARG_NETWORK_VETH_EXTRA: + r = veth_extra_parse(&arg_network_veth_extra, optarg); + if (r < 0) + return log_error_errno(r, "Failed to parse --network-veth-extra= parameter: %s", optarg); + + arg_private_network = true; + arg_settings_mask |= SETTING_NETWORK; + break; + case ARG_NETWORK_INTERFACE: if (strv_extend(&arg_network_interfaces, optarg) < 0) return log_oom(); @@ -541,15 +540,16 @@ static int parse_argv(int argc, char *argv[]) { case ARG_CAPABILITY: case ARG_DROP_CAPABILITY: { - const char *state, *word; - size_t length; + p = optarg; + for(;;) { + _cleanup_free_ char *t = NULL; - FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) { - _cleanup_free_ char *t; + r = extract_first_word(&p, &t, ",", 0); + if (r < 0) + return log_error_errno(r, "Failed to parse capability %s.", t); - t = strndup(word, length); - if (!t) - return log_oom(); + if (r == 0) + break; if (streq(t, "all")) { if (c == ARG_CAPABILITY) @@ -924,6 +924,10 @@ static int parse_argv(int argc, char *argv[]) { if (r < 0) return r; + e = getenv("SYSTEMD_NSPAWN_CONTAINER_SERVICE"); + if (e) + arg_container_service_name = e; + return 1; } @@ -1192,6 +1196,7 @@ static int copy_devnodes(const char *dest) { static int setup_pts(const char *dest) { _cleanup_free_ char *options = NULL; const char *p; + int r; #ifdef HAVE_SELINUX if (arg_selinux_apifs_context) @@ -1214,20 +1219,23 @@ static int setup_pts(const char *dest) { return log_error_errno(errno, "Failed to create /dev/pts: %m"); if (mount("devpts", p, "devpts", MS_NOSUID|MS_NOEXEC, options) < 0) return log_error_errno(errno, "Failed to mount /dev/pts: %m"); - if (userns_lchown(p, 0, 0) < 0) - return log_error_errno(errno, "Failed to chown /dev/pts: %m"); + r = userns_lchown(p, 0, 0); + if (r < 0) + return log_error_errno(r, "Failed to chown /dev/pts: %m"); /* Create /dev/ptmx symlink */ p = prefix_roota(dest, "/dev/ptmx"); if (symlink("pts/ptmx", p) < 0) return log_error_errno(errno, "Failed to create /dev/ptmx symlink: %m"); - if (userns_lchown(p, 0, 0) < 0) - return log_error_errno(errno, "Failed to chown /dev/ptmx: %m"); + r = userns_lchown(p, 0, 0); + if (r < 0) + return log_error_errno(r, "Failed to chown /dev/ptmx: %m"); /* And fix /dev/pts/ptmx ownership */ p = prefix_roota(dest, "/dev/pts/ptmx"); - if (userns_lchown(p, 0, 0) < 0) - return log_error_errno(errno, "Failed to chown /dev/pts/ptmx: %m"); + r = userns_lchown(p, 0, 0); + if (r < 0) + return log_error_errno(r, "Failed to chown /dev/pts/ptmx: %m"); return 0; } @@ -1264,16 +1272,7 @@ static int setup_dev_console(const char *dest, const char *console) { static int setup_kmsg(const char *dest, int kmsg_socket) { const char *from, *to; _cleanup_umask_ mode_t u; - int fd, k; - union { - struct cmsghdr cmsghdr; - uint8_t buf[CMSG_SPACE(sizeof(int))]; - } control = {}; - struct msghdr mh = { - .msg_control = &control, - .msg_controllen = sizeof(control), - }; - struct cmsghdr *cmsg; + int fd, r; assert(kmsg_socket >= 0); @@ -1298,21 +1297,13 @@ static int setup_kmsg(const char *dest, int kmsg_socket) { if (fd < 0) return log_error_errno(errno, "Failed to open fifo: %m"); - cmsg = CMSG_FIRSTHDR(&mh); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - cmsg->cmsg_len = CMSG_LEN(sizeof(int)); - memcpy(CMSG_DATA(cmsg), &fd, sizeof(int)); - - mh.msg_controllen = cmsg->cmsg_len; - /* Store away the fd in the socket, so that it stays open as * long as we run the child */ - k = sendmsg(kmsg_socket, &mh, MSG_NOSIGNAL); + r = send_one_fd(kmsg_socket, fd, 0); safe_close(fd); - if (k < 0) - return log_error_errno(errno, "Failed to send FIFO fd: %m"); + if (r < 0) + return log_error_errno(r, "Failed to send FIFO fd: %m"); /* And now make the FIFO unavailable as /run/kmsg... */ (void) unlink(from); @@ -1426,7 +1417,7 @@ static int setup_journal(const char *directory) { r = userns_mkdir(directory, p, 0755, 0, 0); if (r < 0) - log_warning_errno(errno, "Failed to create directory %s: %m", q); + log_warning_errno(r, "Failed to create directory %s: %m", q); return 0; } @@ -1440,15 +1431,11 @@ static int setup_journal(const char *directory) { if (errno == ENOTDIR) { log_error("%s already exists and is neither a symlink nor a directory", p); return r; - } else { - log_error_errno(errno, "Failed to remove %s: %m", p); - return -errno; - } + } else + return log_error_errno(errno, "Failed to remove %s: %m", p); } - } else if (r != -ENOENT) { - log_error_errno(errno, "readlink(%s) failed: %m", p); - return r; - } + } else if (r != -ENOENT) + return log_error_errno(r, "readlink(%s) failed: %m", p); if (arg_link_journal == LINK_GUEST) { @@ -1456,15 +1443,13 @@ static int setup_journal(const char *directory) { if (arg_link_journal_try) { log_debug_errno(errno, "Failed to symlink %s to %s, skipping journal setup: %m", q, p); return 0; - } else { - log_error_errno(errno, "Failed to symlink %s to %s: %m", q, p); - return -errno; - } + } else + return log_error_errno(errno, "Failed to symlink %s to %s: %m", q, p); } r = userns_mkdir(directory, p, 0755, 0, 0); if (r < 0) - log_warning_errno(errno, "Failed to create directory %s: %m", q); + log_warning_errno(r, "Failed to create directory %s: %m", q); return 0; } @@ -1476,10 +1461,8 @@ static int setup_journal(const char *directory) { if (arg_link_journal_try) { log_debug_errno(errno, "Failed to create %s, skipping journal setup: %m", p); return 0; - } else { - log_error_errno(errno, "Failed to create %s: %m", p); - return r; - } + } else + return log_error_errno(errno, "Failed to create %s: %m", p); } } else if (access(p, F_OK) < 0) @@ -1489,10 +1472,8 @@ static int setup_journal(const char *directory) { log_warning("%s is not empty, proceeding anyway.", q); r = userns_mkdir(directory, p, 0755, 0, 0); - if (r < 0) { - log_error_errno(errno, "Failed to create %s: %m", q); - return r; - } + if (r < 0) + return log_error_errno(r, "Failed to create %s: %m", q); if (mount(p, q, NULL, MS_BIND, NULL) < 0) return log_error_errno(errno, "Failed to bind mount journal from host into guest: %m"); @@ -1501,7 +1482,7 @@ static int setup_journal(const char *directory) { } static int drop_capabilities(void) { - return capability_bounding_set_drop(~arg_retain, false); + return capability_bounding_set_drop(arg_retain, false); } static int reset_audit_loginuid(void) { @@ -1633,20 +1614,24 @@ finish: static int setup_propagate(const char *root) { const char *p, *q; + int r; (void) mkdir_p("/run/systemd/nspawn/", 0755); (void) mkdir_p("/run/systemd/nspawn/propagate", 0600); p = strjoina("/run/systemd/nspawn/propagate/", arg_machine); (void) mkdir_p(p, 0600); - if (userns_mkdir(root, "/run/systemd", 0755, 0, 0) < 0) - return log_error_errno(errno, "Failed to create /run/systemd: %m"); + r = userns_mkdir(root, "/run/systemd", 0755, 0, 0); + if (r < 0) + return log_error_errno(r, "Failed to create /run/systemd: %m"); - if (userns_mkdir(root, "/run/systemd/nspawn", 0755, 0, 0) < 0) - return log_error_errno(errno, "Failed to create /run/systemd/nspawn: %m"); + r = userns_mkdir(root, "/run/systemd/nspawn", 0755, 0, 0); + if (r < 0) + return log_error_errno(r, "Failed to create /run/systemd/nspawn: %m"); - if (userns_mkdir(root, "/run/systemd/nspawn/incoming", 0600, 0, 0) < 0) - return log_error_errno(errno, "Failed to create /run/systemd/nspawn/incoming: %m"); + r = userns_mkdir(root, "/run/systemd/nspawn/incoming", 0600, 0, 0); + if (r < 0) + return log_error_errno(r, "Failed to create /run/systemd/nspawn/incoming: %m"); q = prefix_roota(root, "/run/systemd/nspawn/incoming"); if (mount(p, q, NULL, MS_BIND, NULL) < 0) @@ -1696,7 +1681,7 @@ static int setup_image(char **device_path, int *loop_nr) { } if (!S_ISREG(st.st_mode)) { - log_error_errno(errno, "%s is not a regular file or block device: %m", arg_image); + log_error("%s is not a regular file or block device.", arg_image); return -EINVAL; } @@ -1788,8 +1773,7 @@ static int dissect_image( if (errno == 0) return log_oom(); - log_error_errno(errno, "Failed to set device on blkid probe: %m"); - return -errno; + return log_error_errno(errno, "Failed to set device on blkid probe: %m"); } blkid_probe_enable_partitions(b, 1); @@ -1805,8 +1789,7 @@ static int dissect_image( } else if (r != 0) { if (errno == 0) errno = EIO; - log_error_errno(errno, "Failed to probe: %m"); - return -errno; + return log_error_errno(errno, "Failed to probe: %m"); } (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL); @@ -1929,8 +1912,7 @@ static int dissect_image( if (!errno) errno = ENOMEM; - log_error_errno(errno, "Failed to get partition device of %s: %m", arg_image); - return -errno; + return log_error_errno(errno, "Failed to get partition device of %s: %m", arg_image); } qn = udev_device_get_devnum(q); @@ -2137,8 +2119,7 @@ static int mount_device(const char *what, const char *where, const char *directo if (!b) { if (errno == 0) return log_oom(); - log_error_errno(errno, "Failed to allocate prober for %s: %m", what); - return -errno; + return log_error_errno(errno, "Failed to allocate prober for %s: %m", what); } blkid_probe_enable_superblocks(b, 1); @@ -2152,8 +2133,7 @@ static int mount_device(const char *what, const char *where, const char *directo } else if (r != 0) { if (errno == 0) errno = EIO; - log_error_errno(errno, "Failed to probe %s: %m", what); - return -errno; + return log_error_errno(errno, "Failed to probe %s: %m", what); } errno = 0; @@ -2299,12 +2279,10 @@ static int wait_for_container(pid_t pid, ContainerStatus *container) { return r; } -static void nop_handler(int sig) {} - static int on_orderly_shutdown(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) { pid_t pid; - pid = PTR_TO_UINT32(userdata); + pid = PTR_TO_PID(userdata); if (pid > 0) { if (kill(pid, arg_kill_signal) >= 0) { log_info("Trying to halt container. Send SIGTERM again to trigger immediate termination."); @@ -2344,9 +2322,9 @@ static int determine_names(void) { } if (i->type == IMAGE_RAW) - r = set_sanitized_path(&arg_image, i->path); + r = free_and_strdup(&arg_image, i->path); else - r = set_sanitized_path(&arg_directory, i->path); + r = free_and_strdup(&arg_directory, i->path); if (r < 0) return log_error_errno(r, "Invalid image directory: %m"); @@ -2438,10 +2416,10 @@ static int inner_child( FDSet *fds) { _cleanup_free_ char *home = NULL; - unsigned n_env = 2; + unsigned n_env = 1; const char *envp[] = { "PATH=" DEFAULT_PATH_SPLIT_USR, - "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */ + NULL, /* container */ NULL, /* TERM */ NULL, /* HOME */ NULL, /* USER */ @@ -2472,7 +2450,11 @@ static int inner_child( } } - r = mount_all(NULL, true, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context); + r = mount_all(NULL, arg_userns, true, arg_uid_shift, arg_private_network, arg_uid_range, arg_selinux_apifs_context); + if (r < 0) + return r; + + r = mount_sysfs(NULL); if (r < 0) return r; @@ -2515,8 +2497,9 @@ static int inner_child( rtnl_socket = safe_close(rtnl_socket); } - if (drop_capabilities() < 0) - return log_error_errno(errno, "drop_capabilities() failed: %m"); + r = drop_capabilities(); + if (r < 0) + return log_error_errno(r, "drop_capabilities() failed: %m"); setup_hostname(); @@ -2538,6 +2521,9 @@ static int inner_child( if (r < 0) return r; + /* LXC sets container=lxc, so follow the scheme here */ + envp[n_env++] = strjoina("container=", arg_container_service_name); + envp[n_env] = strv_find_prefix(environ, "TERM="); if (envp[n_env]) n_env ++; @@ -2616,8 +2602,9 @@ static int inner_child( execle("/bin/sh", "-sh", NULL, env_use); } + r = -errno; (void) log_open(); - return log_error_errno(errno, "execv() failed: %m"); + return log_error_errno(r, "execv() failed: %m"); } static int outer_child( @@ -2723,7 +2710,7 @@ static int outer_child( return log_error_errno(r, "Failed to make tree read-only: %m"); } - r = mount_all(directory, false, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context); + r = mount_all(directory, arg_userns, false, arg_private_network, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context); if (r < 0) return r; @@ -2804,6 +2791,8 @@ static int outer_child( } pid_socket = safe_close(pid_socket); + kmsg_socket = safe_close(kmsg_socket); + rtnl_socket = safe_close(rtnl_socket); return 0; } @@ -2856,7 +2845,7 @@ static int load_settings(void) { p = j; j = NULL; - /* By default we trust configuration from /etc and /run */ + /* By default, we trust configuration from /etc and /run */ if (arg_settings_trusted < 0) arg_settings_trusted = true; @@ -2886,7 +2875,7 @@ static int load_settings(void) { if (!f && errno != ENOENT) return log_error_errno(errno, "Failed to open %s: %m", p); - /* By default we do not trust configuration from /var/lib/machines */ + /* By default, we do not trust configuration from /var/lib/machines */ if (arg_settings_trusted < 0) arg_settings_trusted = false; } @@ -2928,11 +2917,17 @@ static int load_settings(void) { } if ((arg_settings_mask & SETTING_CAPABILITY) == 0) { + uint64_t plus; - if (!arg_settings_trusted && settings->capability != 0) - log_warning("Ignoring Capability= setting, file %s is not trusted.", p); - else - arg_retain |= settings->capability; + plus = settings->capability; + if (settings_private_network(settings)) + plus |= (1ULL << CAP_NET_ADMIN); + + if (!arg_settings_trusted && plus != 0) { + if (settings->capability != 0) + log_warning("Ignoring Capability= setting, file %s is not trusted.", p); + } else + arg_retain |= plus; arg_retain &= ~settings->drop_capability; } @@ -2983,11 +2978,15 @@ static int load_settings(void) { settings->network_bridge || settings->network_interfaces || settings->network_macvlan || - settings->network_ipvlan)) { + settings->network_ipvlan || + settings->network_veth_extra)) { if (!arg_settings_trusted) log_warning("Ignoring network settings, file %s is not trusted.", p); else { + arg_network_veth = settings_network_veth(settings); + arg_private_network = settings_private_network(settings); + strv_free(arg_network_interfaces); arg_network_interfaces = settings->network_interfaces; settings->network_interfaces = NULL; @@ -3000,13 +2999,13 @@ static int load_settings(void) { arg_network_ipvlan = settings->network_ipvlan; settings->network_ipvlan = NULL; + strv_free(arg_network_veth_extra); + arg_network_veth_extra = settings->network_veth_extra; + settings->network_veth_extra = NULL; + free(arg_network_bridge); arg_network_bridge = settings->network_bridge; settings->network_bridge = NULL; - - arg_network_veth = settings->network_veth > 0 || settings->network_bridge; - - arg_private_network = true; /* all these settings imply private networking */ } } @@ -3112,7 +3111,7 @@ int main(int argc, char *argv[]) { goto finish; } - r = btrfs_subvol_snapshot(arg_directory, np, (arg_read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE); + r = btrfs_subvol_snapshot(arg_directory, np, (arg_read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE | BTRFS_SNAPSHOT_QUOTA); if (r < 0) { log_error_errno(r, "Failed to create snapshot %s from %s: %m", np, arg_directory); goto finish; @@ -3136,7 +3135,7 @@ int main(int argc, char *argv[]) { } if (arg_template) { - r = btrfs_subvol_snapshot(arg_template, arg_directory, (arg_read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE); + r = btrfs_subvol_snapshot(arg_template, arg_directory, (arg_read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE | BTRFS_SNAPSHOT_QUOTA); if (r == -EEXIST) { if (!arg_quiet) log_info("Directory %s already exists, not populating from template %s.", arg_directory, arg_template); @@ -3159,10 +3158,9 @@ int main(int argc, char *argv[]) { } else { const char *p; - p = strjoina(arg_directory, - argc > optind && path_is_absolute(argv[optind]) ? argv[optind] : "/usr/bin/"); - if (access(p, F_OK) < 0) { - log_error("Directory %s lacks the binary to execute or doesn't look like a binary tree. Refusing.", arg_directory); + p = strjoina(arg_directory, "/usr/"); + if (laccess(p, F_OK) < 0) { + log_error("Directory %s doesn't look like it has an OS tree. Refusing.", arg_directory); r = -EINVAL; goto finish; } @@ -3251,19 +3249,18 @@ int main(int argc, char *argv[]) { } for (;;) { - _cleanup_close_pair_ int kmsg_socket_pair[2] = { -1, -1 }, rtnl_socket_pair[2] = { -1, -1 }, pid_socket_pair[2] = { -1, -1 }, - uid_shift_socket_pair[2] = { -1, -1 }; + _cleanup_close_pair_ int kmsg_socket_pair[2] = { -1, -1 }, rtnl_socket_pair[2] = { -1, -1 }, pid_socket_pair[2] = { -1, -1 }, uid_shift_socket_pair[2] = { -1, -1 }; ContainerStatus container_status; _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL; static const struct sigaction sa = { - .sa_handler = nop_handler, + .sa_handler = nop_signal_handler, .sa_flags = SA_NOCLDSTOP, }; int ifi = 0; ssize_t l; - _cleanup_event_unref_ sd_event *event = NULL; + _cleanup_(sd_event_unrefp) sd_event *event = NULL; _cleanup_(pty_forward_freep) PTYForward *forward = NULL; - _cleanup_netlink_unref_ sd_netlink *rtnl = NULL; + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; char last_char = 0; r = barrier_create(&barrier); @@ -3353,8 +3350,7 @@ int main(int argc, char *argv[]) { barrier_set_role(&barrier, BARRIER_PARENT); - fdset_free(fds); - fds = NULL; + fds = fdset_free(fds); kmsg_socket_pair[1] = safe_close(kmsg_socket_pair[1]); rtnl_socket_pair[1] = safe_close(rtnl_socket_pair[1]); @@ -3432,6 +3428,10 @@ int main(int argc, char *argv[]) { } } + r = setup_veth_extra(arg_machine, pid, arg_network_veth_extra); + if (r < 0) + goto finish; + r = setup_macvlan(arg_machine, pid, arg_network_macvlan); if (r < 0) goto finish; @@ -3452,7 +3452,8 @@ int main(int argc, char *argv[]) { arg_custom_mounts, arg_n_custom_mounts, arg_kill_signal, arg_property, - arg_keep_unit); + arg_keep_unit, + arg_container_service_name); if (r < 0) goto finish; } @@ -3489,8 +3490,8 @@ int main(int argc, char *argv[]) { } /* Let the child know that we are ready and wait that the child is completely ready now. */ - if (!barrier_place_and_sync(&barrier)) { /* #5 */ - log_error("Client died too early."); + if (!barrier_place_and_sync(&barrier)) { /* #4 */ + log_error("Child died too early."); r = -ESRCH; goto finish; } @@ -3508,8 +3509,8 @@ int main(int argc, char *argv[]) { if (arg_kill_signal > 0) { /* Try to kill the init system on SIGINT or SIGTERM */ - sd_event_add_signal(event, NULL, SIGINT, on_orderly_shutdown, UINT32_TO_PTR(pid)); - sd_event_add_signal(event, NULL, SIGTERM, on_orderly_shutdown, UINT32_TO_PTR(pid)); + sd_event_add_signal(event, NULL, SIGINT, on_orderly_shutdown, PID_TO_PTR(pid)); + sd_event_add_signal(event, NULL, SIGTERM, on_orderly_shutdown, PID_TO_PTR(pid)); } else { /* Immediately exit */ sd_event_add_signal(event, NULL, SIGINT, NULL, NULL); @@ -3529,7 +3530,7 @@ int main(int argc, char *argv[]) { rtnl_socket_pair[0] = safe_close(rtnl_socket_pair[0]); - r = pty_forward_new(event, master, true, !interactive, &forward); + r = pty_forward_new(event, master, PTY_FORWARD_IGNORE_VHANGUP | (interactive ? 0 : PTY_FORWARD_READ_ONLY), &forward); if (r < 0) { log_error_errno(r, "Failed to create PTY forwarder: %m"); goto finish; @@ -3608,7 +3609,7 @@ finish: if (remove_subvol && arg_directory) { int k; - k = btrfs_subvol_remove(arg_directory, true); + k = btrfs_subvol_remove(arg_directory, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA); if (k < 0) log_warning_errno(k, "Cannot remove subvolume '%s', ignoring: %m", arg_directory); } @@ -3632,6 +3633,7 @@ finish: strv_free(arg_network_interfaces); strv_free(arg_network_macvlan); strv_free(arg_network_ipvlan); + strv_free(arg_network_veth_extra); strv_free(arg_parameters); custom_mount_free_all(arg_custom_mounts, arg_n_custom_mounts); expose_port_free_all(arg_expose_ports); |