summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile-man.am2
-rw-r--r--Makefile.am30
-rw-r--r--NEWS28
-rw-r--r--TODO4
-rw-r--r--configure.ac2
-rw-r--r--man/os-release.xml7
-rw-r--r--man/systemctl.xml1
-rw-r--r--man/systemd-gpt-auto-generator.xml2
-rw-r--r--man/systemd-nspawn.xml67
-rw-r--r--man/systemd.nspawn.xml383
-rw-r--r--man/systemd.unit.xml2
-rw-r--r--shell-completion/bash/systemd-analyze6
-rw-r--r--src/basic/cgroup-util.c2
-rw-r--r--src/basic/conf-files.h7
-rw-r--r--src/basic/util.c2
-rw-r--r--src/basic/virt.c395
-rw-r--r--src/basic/virt.h46
-rw-r--r--src/core/dbus-manager.c6
-rw-r--r--src/core/job.c2
-rw-r--r--src/core/load-fragment-gperf.gperf.m42
-rw-r--r--src/core/load-fragment.c65
-rw-r--r--src/core/load-fragment.h1
-rw-r--r--src/core/locale-setup.c2
-rw-r--r--src/core/machine-id-setup.c30
-rw-r--r--src/core/main.c14
-rw-r--r--src/core/manager.c6
-rw-r--r--src/core/mount-setup.c4
-rw-r--r--src/core/shutdown.c2
-rw-r--r--src/core/swap.c8
-rw-r--r--src/core/umount.c2
-rw-r--r--src/core/unit.c21
-rw-r--r--src/detect-virt/detect-virt.c31
-rw-r--r--src/fstab-generator/fstab-generator.c2
-rw-r--r--src/getty-generator/getty-generator.c2
-rw-r--r--src/gpt-auto-generator/gpt-auto-generator.c100
-rw-r--r--src/hostname/hostnamed.c6
-rw-r--r--src/journal/journald-kmsg.c2
-rw-r--r--src/libsystemd-network/dhcp-identifier.c2
-rw-r--r--src/libsystemd-network/test-dhcp6-client.c12
-rw-r--r--src/libsystemd/sd-bus/bus-container.c50
-rw-r--r--src/libsystemd/sd-event/sd-event.c2
-rw-r--r--src/libsystemd/sd-login/sd-login.c4
-rw-r--r--src/locale/localectl.c2
-rw-r--r--src/login/logind-dbus.c22
-rw-r--r--src/login/pam_systemd.c2
-rw-r--r--src/machine/machine-dbus.c8
-rw-r--r--src/machine/machine.c14
-rw-r--r--src/network/networkd-link.c2
-rw-r--r--src/network/networkd-manager.c2
-rw-r--r--src/nspawn/.gitignore1
-rw-r--r--src/nspawn/nspawn-cgroup.c162
-rw-r--r--src/nspawn/nspawn-cgroup.h29
-rw-r--r--src/nspawn/nspawn-expose-ports.c277
-rw-r--r--src/nspawn/nspawn-expose-ports.h45
-rw-r--r--src/nspawn/nspawn-gperf.gperf38
-rw-r--r--src/nspawn/nspawn-mount.c869
-rw-r--r--src/nspawn/nspawn-mount.h70
-rw-r--r--src/nspawn/nspawn-network.c440
-rw-r--r--src/nspawn/nspawn-network.h36
-rw-r--r--src/nspawn/nspawn-register.c244
-rw-r--r--src/nspawn/nspawn-register.h31
-rw-r--r--src/nspawn/nspawn-settings.c262
-rw-r--r--src/nspawn/nspawn-settings.h87
-rw-r--r--src/nspawn/nspawn-setuid.c272
-rw-r--r--src/nspawn/nspawn-setuid.h24
-rw-r--r--src/nspawn/nspawn.c2589
-rw-r--r--src/resolve/resolved-dns-packet.h2
-rw-r--r--src/shared/condition.c9
-rw-r--r--src/shared/conf-parser.c96
-rw-r--r--src/shared/conf-parser.h3
-rw-r--r--src/shared/efivars.c2
-rw-r--r--src/shared/machine-image.c2
-rw-r--r--src/systemctl/systemctl.c28
-rw-r--r--src/test/test-architecture.c8
-rw-r--r--src/test/test-cgroup-util.c2
-rw-r--r--src/test/test-process-util.c2
-rw-r--r--src/udev/udev-builtin-path_id.c46
-rw-r--r--src/udev/udev-builtin-uaccess.c2
-rw-r--r--src/vconsole/vconsole-setup.c2
-rw-r--r--units/systemd-nspawn@.service.in2
80 files changed, 4465 insertions, 2633 deletions
diff --git a/Makefile-man.am b/Makefile-man.am
index ab68c81b1d..3b8038611b 100644
--- a/Makefile-man.am
+++ b/Makefile-man.am
@@ -130,6 +130,7 @@ MANPAGES += \
man/systemd.kill.5 \
man/systemd.link.5 \
man/systemd.mount.5 \
+ man/systemd.nspawn.5 \
man/systemd.path.5 \
man/systemd.preset.5 \
man/systemd.resource-control.5 \
@@ -2382,6 +2383,7 @@ EXTRA_DIST += \
man/systemd.mount.xml \
man/systemd.netdev.xml \
man/systemd.network.xml \
+ man/systemd.nspawn.xml \
man/systemd.path.xml \
man/systemd.preset.xml \
man/systemd.resource-control.xml \
diff --git a/Makefile.am b/Makefile.am
index 003308e580..8e0448f433 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -42,9 +42,9 @@ LIBUDEV_CURRENT=7
LIBUDEV_REVISION=4
LIBUDEV_AGE=6
-LIBSYSTEMD_CURRENT=10
-LIBSYSTEMD_REVISION=2
-LIBSYSTEMD_AGE=10
+LIBSYSTEMD_CURRENT=11
+LIBSYSTEMD_REVISION=0
+LIBSYSTEMD_AGE=11
# The following four libraries only exist for compatibility reasons,
# their version info should not be bumped anymore
@@ -155,6 +155,7 @@ noinst_PROGRAMS =
TESTS =
endif
udevlibexec_PROGRAMS =
+gperf_gperf_sources =
in_files = $(filter %.in,$(EXTRA_DIST))
in_in_files = $(filter %.in.in, $(in_files))
@@ -220,6 +221,7 @@ AM_CPPFLAGS = \
-I $(top_builddir)/src/journal \
-I $(top_srcdir)/src/timedate \
-I $(top_srcdir)/src/timesync \
+ -I $(top_srcdir)/src/nspawn \
-I $(top_srcdir)/src/resolve \
-I $(top_builddir)/src/resolve \
-I $(top_srcdir)/src/systemd \
@@ -2776,11 +2778,31 @@ systemd_cgtop_LDADD = \
# ------------------------------------------------------------------------------
systemd_nspawn_SOURCES = \
src/nspawn/nspawn.c \
+ src/nspawn/nspawn-settings.c \
+ src/nspawn/nspawn-settings.h \
+ src/nspawn/nspawn-mount.c \
+ src/nspawn/nspawn-mount.h \
+ src/nspawn/nspawn-network.c \
+ src/nspawn/nspawn-network.h \
+ src/nspawn/nspawn-expose-ports.c \
+ src/nspawn/nspawn-expose-ports.h \
+ src/nspawn/nspawn-cgroup.c \
+ src/nspawn/nspawn-cgroup.h \
+ src/nspawn/nspawn-register.c \
+ src/nspawn/nspawn-register.h \
+ src/nspawn/nspawn-setuid.c \
+ src/nspawn/nspawn-setuid.h \
src/core/mount-setup.c \
src/core/mount-setup.h \
src/core/loopback-setup.c \
src/core/loopback-setup.h
+nodist_systemd_nspawn_SOURCES = \
+ src/nspawn/nspawn-gperf.c
+
+gperf_gperf_sources += \
+ src/nspawn/nspawn-gperf.gperf
+
systemd_nspawn_CFLAGS = \
$(AM_CFLAGS) \
$(BLKID_CFLAGS) \
@@ -3486,7 +3508,7 @@ nodist_libudev_core_la_SOURCES = \
src/udev/keyboard-keys-to-name.h \
src/udev/net/link-config-gperf.c
-gperf_gperf_sources = \
+gperf_gperf_sources += \
src/udev/net/link-config-gperf.gperf
libudev_core_la_CFLAGS = \
diff --git a/NEWS b/NEWS
index af831a949d..6803c6588f 100644
--- a/NEWS
+++ b/NEWS
@@ -87,8 +87,8 @@ CHANGES WITH 226:
* systemd-nspawn's --bind= and --bind-ro= options have been
extended to allow creation of non-recursive bind mounts.
- * libsystemd gained two new calls sd_pid_get_cgroup() an
- sd_peer_get_cgroup() which returns the control group path of
+ * libsystemd gained two new calls sd_pid_get_cgroup() and
+ sd_peer_get_cgroup() which return the control group path of
a process or peer of a connected AF_UNIX socket. This
function call is particularly useful when implementing
delegated subtrees support in the control group hierarchy.
@@ -103,14 +103,22 @@ CHANGES WITH 226:
powerful PolicyKit policies, that make decisions depending
on these parameters.
- Contributions from: Cristian Rodríguez, Daniel Mack, David Herrmann,
- Eugene Yakubovich, Evgeny Vereshchagin, Filipe Brandenburger, Jan
- Alexander Steffens (heftig), Jan Synacek, Kay Sievers, Lennart
- Poettering, Mangix, Marcel Holtmann, Martin Pitt, Michal Sekletar, Peter
- Hutterer, Piotr Drąg, reverendhomer, Robin Hack, Susant Sahani, Sylvain
- Pasche, Thomas Hindoe Paaboel Andersen, Tom Gundersen
-
- -- Berlin, 2015-09-XX
+ * nspawn learnt support for .nspawn settings files, that may
+ accompany the image files or directories of containers, and
+ may contain additional settings for the container. This is
+ an alternative to configuring container parameters via the
+ nspawn command line.
+
+ Contributions from: Cristian Rodríguez, Daniel Mack, David
+ Herrmann, Eugene Yakubovich, Evgeny Vereshchagin, Filipe
+ Brandenburger, Hans de Goede, Jan Alexander Steffens, Jan
+ Synacek, Kay Sievers, Lennart Poettering, Mangix, Marcel
+ Holtmann, Martin Pitt, Michael Biebl, Michael Chapman, Michal
+ Sekletar, Peter Hutterer, Piotr Drąg, reverendhomer, Robin
+ Hack, Susant Sahani, Sylvain Pasche, Thomas Hindoe Paaboel
+ Andersen, Tom Gundersen, Torstein Husebø
+
+ -- Berlin, 2015-09-08
CHANGES WITH 225:
diff --git a/TODO b/TODO
index 587f17bc5e..4fdecebd0f 100644
--- a/TODO
+++ b/TODO
@@ -26,8 +26,6 @@ External:
Features:
-* sd-event: somehow handle multiple queued incoming signals
-
* sd-event: maybe add support for inotify events
* PID 1 should send out sd_notify("WATCHDOG=1") messages (for usage in the --user mode, and when run via nspawn)
@@ -205,8 +203,6 @@ Features:
* "machinectl list-images" should show os-release data, as well as machine-info data (including deployment level)
-* nspawn: when start a container "foobar" look for its configuration in a file "foobar.nspawn" in /etc/systemd/nspawn/ as well as next to the actualy directory or image to boot
-
* Port various tools to make use of verbs.[ch], where applicable
* "machinectl history"
diff --git a/configure.ac b/configure.ac
index a7070916fe..2024939ad0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -20,7 +20,7 @@
AC_PREREQ([2.64])
AC_INIT([systemd],
- [225],
+ [226],
[http://github.com/systemd/systemd/issues],
[systemd],
[http://www.freedesktop.org/wiki/Software/systemd])
diff --git a/man/os-release.xml b/man/os-release.xml
index 4ca2e59706..d2e2598204 100644
--- a/man/os-release.xml
+++ b/man/os-release.xml
@@ -214,10 +214,11 @@
<varlistentry>
<term><varname>CPE_NAME=</varname></term>
- <listitem><para>A CPE name for the operating system, following
- the <ulink url="https://cpe.mitre.org/specification/">Common
+ <listitem><para>A CPE name for the operating system, in URI
+ binding syntax, following the
+ <ulink url="http://scap.nist.gov/specifications/cpe/">Common
Platform Enumeration Specification</ulink> as proposed by the
- MITRE Corporation. This field is optional. Example:
+ NIST. This field is optional. Example:
<literal>CPE_NAME="cpe:/o:fedoraproject:fedora:17"</literal>
</para></listitem>
</varlistentry>
diff --git a/man/systemctl.xml b/man/systemctl.xml
index 20d143741b..37ba4ab6de 100644
--- a/man/systemctl.xml
+++ b/man/systemctl.xml
@@ -918,6 +918,7 @@ kobject-uevent 1 systemd-udevd-kernel.socket systemd-udevd.service
<varname>RequiresOverridable=</varname>,
<varname>Requisite=</varname>,
<varname>RequisiteOverridable=</varname>,
+ <varname>ConsistsOf=</varname>,
<varname>Wants=</varname>, <varname>BindsTo=</varname>
dependencies. If no unit is specified,
<filename>default.target</filename> is implied.</para>
diff --git a/man/systemd-gpt-auto-generator.xml b/man/systemd-gpt-auto-generator.xml
index 27ec72c986..f569ea3cde 100644
--- a/man/systemd-gpt-auto-generator.xml
+++ b/man/systemd-gpt-auto-generator.xml
@@ -69,7 +69,7 @@
units are explicitly configured (for example, listed in
<citerefentry
project='man-pages'><refentrytitle>fstab</refentrytitle><manvolnum>5</manvolnum></citerefentry>),
- the units this generator creates are overriden, but additional
+ the units this generator creates are overridden, but additional
automatic dependencies might be created.</para>
<para>This generator will only look for root partitions on the
diff --git a/man/systemd-nspawn.xml b/man/systemd-nspawn.xml
index 6165fe1357..bc5dacd98f 100644
--- a/man/systemd-nspawn.xml
+++ b/man/systemd-nspawn.xml
@@ -1,4 +1,4 @@
-<?xml version='1.0'?> <!--*-nxml-*-->
+<?xml version='1.0'?> <!--*- Mode: nxml; nxml-child-indent: 2; indent-tabs-mode: nil -*-->
<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
@@ -748,34 +748,86 @@
</varlistentry>
<varlistentry>
- <term><option>--volatile</option><replaceable>=MODE</replaceable></term>
+ <term><option>--volatile</option></term>
+ <term><option>--volatile=</option><replaceable>MODE</replaceable></term>
<listitem><para>Boots the container in volatile mode. When no
mode parameter is passed or when mode is specified as
- <literal>yes</literal> full volatile mode is enabled. This
+ <option>yes</option> full volatile mode is enabled. This
means the root directory is mounted as mostly unpopulated
<literal>tmpfs</literal> instance, and
<filename>/usr</filename> from the OS tree is mounted into it,
read-only (the system thus starts up with read-only OS
resources, but pristine state and configuration, any changes
to the either are lost on shutdown). When the mode parameter
- is specified as <literal>state</literal> the OS tree is
+ is specified as <option>state</option> the OS tree is
mounted read-only, but <filename>/var</filename> is mounted as
<literal>tmpfs</literal> instance into it (the system thus
starts up with read-only OS resources and configuration, but
pristine state, any changes to the latter are lost on
shutdown). When the mode parameter is specified as
- <literal>no</literal> (the default) the whole OS tree is made
+ <option>no</option> (the default) the whole OS tree is made
available writable.</para>
- <para>Note that setting this to <literal>yes</literal> or
- <literal>state</literal> will only work correctly with
+ <para>Note that setting this to <option>yes</option> or
+ <option>state</option> will only work correctly with
operating systems in the container that can boot up with only
<filename>/usr</filename> mounted, and are able to populate
<filename>/var</filename> automatically, as
needed.</para></listitem>
</varlistentry>
+ <varlistentry>
+ <term><option>--settings=</option><replaceable>MODE</replaceable></term>
+
+ <listitem><para>Controls whether
+ <command>systemd-nspawn</command> shall search for and use
+ additional per-container settings from
+ <filename>.nspawn</filename> files. Takes a boolean or the
+ special values <option>override</option> or
+ <option>trusted</option>.</para>
+
+ <para>If enabled (the default) a settings file named after the
+ machine (as specified with the <option>--machine=</option>
+ setting, or derived from the directory or image file name)
+ with the suffix <filename>.nspawn</filename> is searched in
+ <filename>/etc/systemd/nspawn/</filename> and
+ <filename>/run/systemd/nspawn/</filename>. If it is found
+ there, its settings are read and used. If it is not found
+ there it is subsequently searched in the same directory as the
+ image file or in the immediate parent of the root directory of
+ the container. In this case, if the file is found its settings
+ will be also read and used, but potentially unsafe settings
+ are ignored. Note that in both these cases settings on the
+ command line take precedence over the corresponding settings
+ from loaded <filename>.nspawn</filename> files, if both are
+ specified. Unsafe settings are considered all settings that
+ elevate the container's privileges or grant access to
+ additional resources such as files or directories of the
+ host. For details about the format and contents of
+ <filename>.nspawn</filename> files consult
+ <citerefentry><refentrytitle>systemd.nspawn</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
+
+ <para>If this option is set to <option>override</option> the
+ file is searched, read and used the same way, however the order of
+ precedence is reversed: settings read from the
+ <filename>.nspawn</filename> file will take precedence over
+ the corresponding command line options, if both are
+ specified.</para>
+
+ <para>If this option is set to <option>trusted</option> the
+ file is searched, read and used the same way, but regardless
+ if found in <filename>/etc/systemd/nspawn/</filename>,
+ <filename>/run/systemd/nspawn/</filename> or next to the image
+ file or container root directory, all settings will take
+ effect, however command line arguments still take precedence
+ over corresponding settings.</para>
+
+ <para>If disabled no <filename>.nspawn</filename> file is read
+ and no settings except the ones on the command line are in
+ effect.</para></listitem>
+ </varlistentry>
+
<xi:include href="standard-options.xml" xpointer="help" />
<xi:include href="standard-options.xml" xpointer="version" />
</variablelist>
@@ -859,6 +911,7 @@
<title>See Also</title>
<para>
<citerefentry><refentrytitle>systemd</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
+ <citerefentry><refentrytitle>systemd.nspawn</refentrytitle><manvolnum>5</manvolnum></citerefentry>,
<citerefentry project='man-pages'><refentrytitle>chroot</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
<citerefentry project='mankier'><refentrytitle>dnf</refentrytitle><manvolnum>8</manvolnum></citerefentry>,
<citerefentry project='die-net'><refentrytitle>yum</refentrytitle><manvolnum>8</manvolnum></citerefentry>,
diff --git a/man/systemd.nspawn.xml b/man/systemd.nspawn.xml
new file mode 100644
index 0000000000..7bfafb424f
--- /dev/null
+++ b/man/systemd.nspawn.xml
@@ -0,0 +1,383 @@
+<?xml version='1.0'?> <!--*- Mode: nxml; nxml-child-indent: 2; indent-tabs-mode: nil -*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" [
+<!ENTITY % entities SYSTEM "custom-entities.ent" >
+%entities;
+]>
+
+<!--
+ This file is part of systemd.
+
+ Copyright 2015 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+-->
+
+<refentry id="systemd.nspawn">
+
+ <refentryinfo>
+ <title>systemd.nspawn</title>
+ <productname>systemd</productname>
+
+ <authorgroup>
+ <author>
+ <contrib>Developer</contrib>
+ <firstname>Lennart</firstname>
+ <surname>Poettering</surname>
+ <email>lennart@poettering.net</email>
+ </author>
+ </authorgroup>
+ </refentryinfo>
+
+ <refmeta>
+ <refentrytitle>systemd.nspawn</refentrytitle>
+ <manvolnum>5</manvolnum>
+ </refmeta>
+
+ <refnamediv>
+ <refname>systemd.nspawn</refname>
+ <refpurpose>Container settings</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <para><filename>/etc/systemd/nspawn/<replaceable>machine</replaceable>.nspawn</filename></para>
+ <para><filename>/run/systemd/nspawn/<replaceable>machine</replaceable>.nspawn</filename></para>
+ <para><filename>/var/lib/machines/<replaceable>machine</replaceable>.nspawn</filename></para>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>An nspawn container settings file (suffix
+ <filename>.nspawn</filename>) encodes additional runtime
+ information about a local container, and is searched, read and
+ used by
+ <citerefentry><refentrytitle>systemd-nspawn</refentrytitle><manvolnum>1</manvolnum></citerefentry>
+ when starting a container. Files of this type are named after the
+ containers they define settings for. They are optional, and only
+ required for containers whose execution environment shall differ
+ from the defaults. Files of this type mostly contain settings that
+ may also be set on the <command>systemd-nspawn</command> command
+ line, and make it easier to persistently attach specific settings
+ to specific containers. The syntax of these files is inspired by
+ <filename>.desktop</filename> files following the <ulink
+ url="http://standards.freedesktop.org/desktop-entry-spec/latest/">XDG
+ Desktop Entry Specification</ulink>, which are in turn inspired by
+ Microsoft Windows <filename>.ini</filename> files.</para>
+
+ <para>Boolean arguments used in these settings files can be
+ written in various formats. For positive settings the strings
+ <option>1</option>, <option>yes</option>, <option>true</option>
+ and <option>on</option> are equivalent. For negative settings, the
+ strings <option>0</option>, <option>no</option>,
+ <option>false</option> and <option>off</option> are
+ equivalent.</para>
+
+ <para>Empty lines and lines starting with # or ; are
+ ignored. This may be used for commenting. Lines ending
+ in a backslash are concatenated with the following
+ line while reading and the backslash is replaced by a
+ space character. This may be used to wrap long lines.</para>
+
+ </refsect1>
+
+ <refsect1>
+ <title><filename>.nspawn</filename> File Discovery</title>
+
+ <para>Files are searched by appending the
+ <filename>.nspawn</filename> suffix to the machine name of the
+ container, as specified with the <option>--machine=</option>
+ switch of <command>systemd-nspawn</command>, or derived from the
+ directory or image file name. This file is first searched in
+ <filename>/etc/systemd/nspawn/</filename> and
+ <filename>/run/systemd/nspawn/</filename>. If found in these
+ directories its settings are read and all of them take full effect
+ (but are possibly overridden by corresponding command line
+ arguments). If not found the file will then be searched next to
+ the image file or in the immediate parent of the root directory of
+ the container. If the file is found there only a subset of the
+ settings will take effect however. All settings that possibly
+ elevate privileges or grant additional access to resources of the
+ host (such as files or directories) are ignored. To which options
+ this applies is documented below.</para>
+
+ <para>Persistent settings file created and maintained by the
+ administrator (and thus trusted) should be placed in
+ <filename>/etc/systemd/nspawn/</filename>, while automatically
+ downloaded (and thus potentially untrusted) settings files are
+ placed in <filename>/var/lib/machines/</filename> instead (next to
+ the container images), where their security impact is limited. In
+ order to add privileged settings to <filename>.nspawn</filename>
+ files acquired from the image vendor it is recommended to copy the
+ settings files into <filename>/etc/systemd/nspawn/</filename> and
+ edit them there, so that the privileged options become
+ available. The precise algorithm how the files are searched and
+ interpreted may be configured with
+ <command>systemd-nspawn</command>'s <option>--settings=</option>
+ switch, see
+ <citerefentry><refentrytitle>systemd-nspawn</refentrytitle><manvolnum>1</manvolnum></citerefentry>
+ for details.</para>
+ </refsect1>
+
+ <refsect1>
+ <title>[Exec] Section Options</title>
+
+ <para>Settings files may include an <literal>[Exec]</literal>
+ section, which carries various execution parameters:</para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term><varname>Boot=</varname></term>
+
+ <listitem><para>Takes a boolean argument, defaults to off. If
+ enabled <command>systemd-nspawn</command> will automatically
+ search for an <filename>init</filename> executable and invoke
+ it. In this case the specified parameters using
+ <varname>Parameters=</varname> are passed as additional
+ arguments to the <filename>init</filename> process. This
+ setting corresponds to the <option>--boot</option> switch on
+ the <command>systemd-nspawn</command> command
+ line. </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>Parameters=</varname></term>
+
+ <listitem><para>Takes a space separated list of
+ arguments. This is either a command line, beginning with the
+ binary name to execute, or – if <varname>Boot=</varname> is
+ enabled – the list of arguments to pass to the init
+ process. This setting corresponds to the command line
+ parameters passed on the <command>systemd-nspawn</command>
+ command line.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>Environment=</varname></term>
+
+ <listitem><para>Takes an environment variable assignment
+ consisting of key and value, separated by
+ <literal>=</literal>. Sets an environment variable for the
+ main process invoked in the container. This setting may be
+ used multiple times to set multiple environment variables. It
+ corresponds to the <option>--setenv=</option> command line
+ switch.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>User=</varname></term>
+
+ <listitem><para>Takes a UNIX user name. Specifies the user
+ name to invoke the main process of the container as. This user
+ must be known in the container's user database. This
+ corresponds to the <option>--user=</option> command line
+ switch.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>Capability=</varname></term>
+ <term><varname>DropCapability=</varname></term>
+
+ <listitem><para>Takes a space separated list of Linux process
+ capabilities (see
+ <citerefentry><refentrytitle>capabilities</refentrytitle><manvolnum>7</manvolnum></citerefentry>
+ for details). The <varname>Capability=</varname> setting
+ specifies additional capabilities to pass on top of the
+ default set of capabilities. The
+ <varname>DropCapability=</varname> setting specifies
+ capabilities to drop from the default set. These settings
+ correspond to the <option>--capability=</option> and
+ <option>--drop-capability=</option> command line
+ switches. Note that <varname>Capability=</varname> is a
+ privileged setting, and only takes effect in
+ <filename>.nspawn</filename> files in
+ <filename>/etc/systemd/nspawn/</filename> and
+ <filename>/run/system/nspawn/</filename> (see above). On the
+ other hand <varname>DropCapability=</varname> takes effect in
+ all cases.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>Personality=</varname></term>
+
+ <listitem><para>Configures the kernel personality for the
+ container. This is equivalent to the
+ <option>--personality=</option> switch.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>MachineID=</varname></term>
+
+ <listitem><para>Configures the 128bit machine ID (UUID) to pass to
+ the container. This is equivalent to the
+ <option>--uuid=</option> command line switch. This option is
+ privileged (see above). </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>[Files] Section Options</title>
+
+ <para>Settings files may include a <literal>[Files]</literal>
+ section, which carries various parameters configuring the file
+ system of the container:</para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term><varname>ReadOnly=</varname></term>
+
+ <listitem><para>Takes a boolean argument, defaults to off. If
+ specified the container will be run with a read-only file
+ system. This setting corresponds to the
+ <option>--read-only</option> command line
+ switch.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>Volatile=</varname></term>
+
+ <listitem><para>Takes a boolean argument, or the special value
+ <literal>state</literal>. This configures whether to run the
+ container with volatile state and/or configuration. This
+ option is equivalent to <option>--volatile=</option>, see
+ <citerefentry><refentrytitle>systemd-nspawn</refentrytitle><manvolnum>1</manvolnum></citerefentry>
+ for details about the specific options
+ supported.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>Bind=</varname></term>
+ <term><varname>BindReadOnly=</varname></term>
+
+ <listitem><para>Adds a bind mount from the host into the
+ container. Takes a single path, a pair of two paths separated
+ by a colon, or a triplet of two paths plus an option string
+ separated by colons. This option may be used multiple times to
+ configure multiple bind mounts. This option is equivalent to
+ the command line switches <option>--bind=</option> and
+ <option>--bind-ro=</option>, see
+ <citerefentry><refentrytitle>systemd-nspawn</refentrytitle><manvolnum>1</manvolnum></citerefentry>
+ for details about the specific options supported. This setting
+ is privileged (see above).</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>TemporaryFileSystem=</varname></term>
+
+ <listitem><para>Adds a <literal>tmpfs</literal> mount to the
+ container. Takes a path or a pair of path and option string,
+ separated by a colon. This option may be used multiple times to
+ configure multiple <literal>tmpfs</literal> mounts. This
+ option is equivalent to the command line switch
+ <option>--tmpfs=</option>, see
+ <citerefentry><refentrytitle>systemd-nspawn</refentrytitle><manvolnum>1</manvolnum></citerefentry>
+ for details about the specific options supported. This setting
+ is privileged (see above).</para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>[Network] Section Options</title>
+
+ <para>Settings files may include a <literal>[Network]</literal>
+ section, which carries various parameters configuring the network
+ connectivity of the container:</para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term><varname>Private=</varname></term>
+
+ <listitem><para>Takes a boolean argument, defaults to off. If
+ enabled the container will run in its own network namespace
+ and not share network interfaces and configuration with the
+ host. This setting corresponds to the
+ <option>--private-network</option> command line
+ switch.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>VirtualEthernet=</varname></term>
+
+ <listitem><para>Takes a boolean argument. Configures whether
+ to create a virtual ethernet connection
+ (<literal>veth</literal>) between host and the container. This
+ setting implies <varname>Private=yes</varname>. This setting
+ corresponds to the <option>--network-veth</option> command
+ line switch. This option is privileged (see
+ above).</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>Interface=</varname></term>
+
+ <listitem><para>Takes a space separated list of interfaces to
+ add to the container. This option corresponds to the
+ <option>--network-interface=</option> command line switch and
+ implies <varname>Private=yes</varname>. This option is
+ privileged (see above).</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>MACVLAN=</varname></term>
+ <term><varname>IPVLAN=</varname></term>
+
+ <listitem><para>Takes a space separated list of interfaces to
+ add MACLVAN or IPVLAN interfaces to, which are then added to
+ the container. These options correspond to the
+ <option>--network-macvlan=</option> and
+ <option>--network-ipvlan=</option> command line switches and
+ imply <varname>Private=yes</varname>. These options are
+ privileged (see above).</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>Bridge=</varname></term>
+
+ <listitem><para>Takes an interface name. This setting implies
+ <varname>VirtualEthernet=yes</varname> and
+ <varname>Private=yes</varname> and has the effect that the
+ host side of the created virtual Ethernet link is connected to
+ the specified bridge interface. This option corresponds to the
+ <option>--network-bridge=</option> command line switch. This
+ option is privileged (see above).</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>Port=</varname></term>
+
+ <listitem><para>Exposes a TCP or UDP port of the container on
+ the host. This option corresponds to the
+ <option>--port=</option> command line switch, see
+ <citerefentry><refentrytitle>systemd-nspawn</refentrytitle><manvolnum>1</manvolnum></citerefentry>
+ for the precise syntax of the argument this option takes. This
+ option is privileged (see above).</para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>See Also</title>
+ <para>
+ <citerefentry><refentrytitle>systemd</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
+ <citerefentry><refentrytitle>systemd-nspawn</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
+ <citerefentry><refentrytitle>systemd.directives</refentrytitle><manvolnum>7</manvolnum></citerefentry>
+ </para>
+ </refsect1>
+
+</refentry>
diff --git a/man/systemd.unit.xml b/man/systemd.unit.xml
index 407f6d32eb..ea58580bba 100644
--- a/man/systemd.unit.xml
+++ b/man/systemd.unit.xml
@@ -256,7 +256,7 @@
</refsect1>
<refsect1>
- <title>Unit Load Path</title>
+ <title>Unit File Load Path</title>
<para>Unit files are loaded from a set of paths determined during
compilation, described in the two tables below. Unit files found
diff --git a/shell-completion/bash/systemd-analyze b/shell-completion/bash/systemd-analyze
index 00947029c6..7a5f46ba1d 100644
--- a/shell-completion/bash/systemd-analyze
+++ b/shell-completion/bash/systemd-analyze
@@ -35,8 +35,8 @@ _systemd_analyze() {
local cur=${COMP_WORDS[COMP_CWORD]} prev=${COMP_WORDS[COMP_CWORD-1]}
local -A OPTS=(
- [STANDALONE]='--help --version --system --user --from-pattern --to-pattern --order --require --no-pager'
- [ARG]='-H --host -M --machine --fuzz --man'
+ [STANDALONE]='--help --version --system --user --order --require --no-pager --man'
+ [ARG]='-H --host -M --machine --fuzz --from-pattern --to-pattern '
)
local -A VERBS=(
@@ -102,7 +102,7 @@ _systemd_analyze() {
elif __contains_word "$verb" ${VERBS[VERIFY]}; then
if [[ $cur = -* ]]; then
- comps='--help --version --system --user --no-man'
+ comps='--help --version --system --user --man'
else
comps=$( compgen -A file -- "$cur" )
compopt -o filenames
diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c
index a298b29382..f661a54d99 100644
--- a/src/basic/cgroup-util.c
+++ b/src/basic/cgroup-util.c
@@ -2211,7 +2211,7 @@ static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
[CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
[CGROUP_CONTROLLER_BLKIO] = "blkio",
[CGROUP_CONTROLLER_MEMORY] = "memory",
- [CGROUP_CONTROLLER_DEVICE] = "device",
+ [CGROUP_CONTROLLER_DEVICE] = "devices",
};
DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
diff --git a/src/basic/conf-files.h b/src/basic/conf-files.h
index 3169a907f1..d8aebc5e5b 100644
--- a/src/basic/conf-files.h
+++ b/src/basic/conf-files.h
@@ -22,7 +22,6 @@
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
-
-int conf_files_list(char ***strv, const char *suffix, const char *root, const char *dir, ...);
-int conf_files_list_strv(char ***strv, const char *suffix, const char *root, const char* const* dirs);
-int conf_files_list_nulstr(char ***strv, const char *suffix, const char *root, const char *dirs);
+int conf_files_list(char ***ret, const char *suffix, const char *root, const char *dir, ...);
+int conf_files_list_strv(char ***ret, const char *suffix, const char *root, const char* const* dirs);
+int conf_files_list_nulstr(char ***ret, const char *suffix, const char *root, const char *dirs);
diff --git a/src/basic/util.c b/src/basic/util.c
index 86aacad307..3e41e6ad41 100644
--- a/src/basic/util.c
+++ b/src/basic/util.c
@@ -4817,7 +4817,7 @@ int shall_restore_state(void) {
int proc_cmdline(char **ret) {
assert(ret);
- if (detect_container(NULL) > 0)
+ if (detect_container() > 0)
return get_process_cmdline(1, 0, false, ret);
else
return read_one_line_file("/proc/cmdline", ret);
diff --git a/src/basic/virt.c b/src/basic/virt.c
index 4a4bebd528..1fc6c1baba 100644
--- a/src/basic/virt.c
+++ b/src/basic/virt.c
@@ -28,25 +28,24 @@
#include "virt.h"
#include "fileio.h"
-static int detect_vm_cpuid(const char **_id) {
+static int detect_vm_cpuid(void) {
/* Both CPUID and DMI are x86 specific interfaces... */
#if defined(__i386__) || defined(__x86_64__)
- static const char cpuid_vendor_table[] =
- "XenVMMXenVMM\0" "xen\0"
- "KVMKVMKVM\0" "kvm\0"
+ static const struct {
+ const char *cpuid;
+ int id;
+ } cpuid_vendor_table[] = {
+ { "XenVMMXenVMM", VIRTUALIZATION_XEN },
+ { "KVMKVMKVM", VIRTUALIZATION_KVM },
/* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
- "VMwareVMware\0" "vmware\0"
+ { "VMwareVMware", VIRTUALIZATION_VMWARE },
/* http://msdn.microsoft.com/en-us/library/ff542428.aspx */
- "Microsoft Hv\0" "microsoft\0";
+ { "Microsoft Hv", VIRTUALIZATION_MICROSOFT },
+ };
uint32_t eax, ecx;
- union {
- uint32_t sig32[3];
- char text[13];
- } sig = {};
- const char *j, *k;
bool hypervisor;
/* http://lwn.net/Articles/301888/ */
@@ -74,6 +73,11 @@ static int detect_vm_cpuid(const char **_id) {
hypervisor = !!(ecx & 0x80000000U);
if (hypervisor) {
+ union {
+ uint32_t sig32[3];
+ char text[13];
+ } sig = {};
+ unsigned j;
/* There is a hypervisor, see what it is */
eax = 0x40000000U;
@@ -88,57 +92,54 @@ static int detect_vm_cpuid(const char **_id) {
: "0" (eax)
);
- NULSTR_FOREACH_PAIR(j, k, cpuid_vendor_table)
- if (streq(sig.text, j)) {
- *_id = k;
- return 1;
- }
+ for (j = 0; j < ELEMENTSOF(cpuid_vendor_table); j ++)
+ if (streq(sig.text, cpuid_vendor_table[j].cpuid))
+ return cpuid_vendor_table[j].id;
- *_id = "other";
- return 0;
+ return VIRTUALIZATION_VM_OTHER;
}
#endif
- return 0;
+ return VIRTUALIZATION_NONE;
}
-static int detect_vm_devicetree(const char **_id) {
+static int detect_vm_device_tree(void) {
#if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
_cleanup_free_ char *hvtype = NULL;
int r;
r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
- if (r >= 0) {
- if (streq(hvtype, "linux,kvm")) {
- *_id = "kvm";
- return 1;
- } else if (strstr(hvtype, "xen")) {
- *_id = "xen";
- return 1;
- }
- } else if (r == -ENOENT) {
+ if (r == -ENOENT) {
_cleanup_closedir_ DIR *dir = NULL;
struct dirent *dent;
dir = opendir("/proc/device-tree");
if (!dir) {
if (errno == ENOENT)
- return 0;
+ return VIRTUALIZATION_NONE;
return -errno;
}
- FOREACH_DIRENT(dent, dir, return -errno) {
- if (strstr(dent->d_name, "fw-cfg")) {
- *_id = "qemu";
- return 1;
- }
- }
- }
+ FOREACH_DIRENT(dent, dir, return -errno)
+ if (strstr(dent->d_name, "fw-cfg"))
+ return VIRTUALIZATION_QEMU;
+
+ return VIRTUALIZATION_NONE;
+ } else if (r < 0)
+ return r;
+
+ if (streq(hvtype, "linux,kvm"))
+ return VIRTUALIZATION_KVM;
+ else if (strstr(hvtype, "xen"))
+ return VIRTUALIZATION_XEN;
+ else
+ return VIRTUALIZATION_VM_OTHER;
+#else
+ return VIRTUALIZATION_NONE;
#endif
- return 0;
}
-static int detect_vm_dmi(const char **_id) {
+static int detect_vm_dmi(void) {
/* Both CPUID and DMI are x86 specific interfaces... */
#if defined(__i386__) || defined(__x86_64__)
@@ -149,188 +150,195 @@ static int detect_vm_dmi(const char **_id) {
"/sys/class/dmi/id/bios_vendor"
};
- static const char dmi_vendor_table[] =
- "QEMU\0" "qemu\0"
+ static const struct {
+ const char *vendor;
+ int id;
+ } dmi_vendor_table[] = {
+ { "QEMU", VIRTUALIZATION_QEMU },
/* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
- "VMware\0" "vmware\0"
- "VMW\0" "vmware\0"
- "innotek GmbH\0" "oracle\0"
- "Xen\0" "xen\0"
- "Bochs\0" "bochs\0"
- "Parallels\0" "parallels\0";
+ { "VMware", VIRTUALIZATION_VMWARE },
+ { "VMW", VIRTUALIZATION_VMWARE },
+ { "innotek GmbH", VIRTUALIZATION_ORACLE },
+ { "Xen", VIRTUALIZATION_XEN },
+ { "Bochs", VIRTUALIZATION_BOCHS },
+ { "Parallels", VIRTUALIZATION_PARALLELS },
+ };
unsigned i;
+ int r;
for (i = 0; i < ELEMENTSOF(dmi_vendors); i++) {
_cleanup_free_ char *s = NULL;
- const char *j, *k;
- int r;
+ unsigned j;
r = read_one_line_file(dmi_vendors[i], &s);
if (r < 0) {
- if (r != -ENOENT)
- return r;
+ if (r == -ENOENT)
+ continue;
- continue;
+ return r;
}
- NULSTR_FOREACH_PAIR(j, k, dmi_vendor_table)
- if (startswith(s, j)) {
- *_id = k;
- return 1;
- }
+ for (j = 0; j < ELEMENTSOF(dmi_vendor_table); j++)
+ if (startswith(s, dmi_vendor_table[j].vendor))
+ return dmi_vendor_table[j].id;
}
#endif
- return 0;
+ return VIRTUALIZATION_NONE;
}
-/* Returns a short identifier for the various VM implementations */
-int detect_vm(const char **id) {
- _cleanup_free_ char *domcap = NULL, *cpuinfo_contents = NULL;
- static thread_local int cached_found = -1;
- static thread_local const char *cached_id = NULL;
- const char *_id = NULL, *_id_cpuid = NULL;
+static int detect_vm_xen(void) {
+ _cleanup_free_ char *domcap = NULL;
+ char *cap, *i;
int r;
- if (_likely_(cached_found >= 0)) {
+ r = read_one_line_file("/proc/xen/capabilities", &domcap);
+ if (r == -ENOENT)
+ return VIRTUALIZATION_NONE;
- if (id)
- *id = cached_id;
+ i = domcap;
+ while ((cap = strsep(&i, ",")))
+ if (streq(cap, "control_d"))
+ break;
- return cached_found;
- }
-
- /* Try xen capabilities file first, if not found try high-level hypervisor sysfs file:
- *
- * https://bugs.freedesktop.org/show_bug.cgi?id=77271 */
- r = read_one_line_file("/proc/xen/capabilities", &domcap);
- if (r >= 0) {
- char *cap, *i = domcap;
+ return cap ? VIRTUALIZATION_NONE : VIRTUALIZATION_XEN;
+}
- while ((cap = strsep(&i, ",")))
- if (streq(cap, "control_d"))
- break;
+static int detect_vm_hypervisor(void) {
+ _cleanup_free_ char *hvtype = NULL;
+ int r;
- if (!cap) {
- _id = "xen";
- r = 1;
- }
+ r = read_one_line_file("/sys/hypervisor/type", &hvtype);
+ if (r == -ENOENT)
+ return VIRTUALIZATION_NONE;
+ if (r < 0)
+ return r;
- goto finish;
+ if (streq(hvtype, "xen"))
+ return VIRTUALIZATION_XEN;
+ else
+ return VIRTUALIZATION_VM_OTHER;
+}
- } else if (r == -ENOENT) {
- _cleanup_free_ char *hvtype = NULL;
+static int detect_vm_uml(void) {
+ _cleanup_free_ char *cpuinfo_contents = NULL;
+ int r;
- r = read_one_line_file("/sys/hypervisor/type", &hvtype);
- if (r >= 0) {
- if (streq(hvtype, "xen")) {
- _id = "xen";
- r = 1;
- goto finish;
- }
- } else if (r != -ENOENT)
- return r;
- } else
+ /* Detect User-Mode Linux by reading /proc/cpuinfo */
+ r = read_full_file("/proc/cpuinfo", &cpuinfo_contents, NULL);
+ if (r < 0)
return r;
+ if (strstr(cpuinfo_contents, "\nvendor_id\t: User Mode Linux\n"))
+ return VIRTUALIZATION_UML;
- /* this will set _id to "other" and return 0 for unknown hypervisors */
- r = detect_vm_cpuid(&_id);
+ return VIRTUALIZATION_NONE;
+}
- /* finish when found a known hypervisor other than kvm */
- if (r < 0 || (r > 0 && !streq(_id, "kvm")))
- goto finish;
+static int detect_vm_zvm(void) {
- _id_cpuid = _id;
+#if defined(__s390__)
+ _cleanup_free_ char *t = NULL;
+ int r;
- r = detect_vm_dmi(&_id);
+ r = get_status_field("/proc/sysinfo", "VM00 Control Program:", &t);
+ if (r == -ENOENT)
+ return VIRTUALIZATION_NONE;
+ if (r < 0)
+ return r;
- /* kvm with and without Virtualbox */
- /* Parallels exports KVMKVMKVM leaf */
- if (streq_ptr(_id_cpuid, "kvm")) {
- if (r > 0 && (streq(_id, "oracle") || streq(_id, "parallels")))
- goto finish;
+ if (streq(t, "z/VM"))
+ return VIRTUALIZATION_ZVM;
+ else
+ return VIRTUALIZATION_KVM;
+#else
+ return VIRTUALIZATION_NONE;
+#endif
+}
- _id = _id_cpuid;
- r = 1;
- goto finish;
- }
+/* Returns a short identifier for the various VM implementations */
+int detect_vm(void) {
+ static thread_local int cached_found = _VIRTUALIZATION_INVALID;
+ int r;
- /* information from dmi */
- if (r != 0)
- goto finish;
+ if (cached_found >= 0)
+ return cached_found;
- r = detect_vm_devicetree(&_id);
- if (r != 0)
+ /* Try xen capabilities file first, if not found try
+ * high-level hypervisor sysfs file:
+ *
+ * https://bugs.freedesktop.org/show_bug.cgi?id=77271 */
+
+ r = detect_vm_xen();
+ if (r < 0)
+ return r;
+ if (r != VIRTUALIZATION_NONE)
goto finish;
- if (_id) {
- /* "other" */
- r = 1;
+ r = detect_vm_dmi();
+ if (r < 0)
+ return r;
+ if (r != VIRTUALIZATION_NONE)
goto finish;
- }
- /* Detect User-Mode Linux by reading /proc/cpuinfo */
- r = read_full_file("/proc/cpuinfo", &cpuinfo_contents, NULL);
+ r = detect_vm_cpuid();
if (r < 0)
return r;
- if (strstr(cpuinfo_contents, "\nvendor_id\t: User Mode Linux\n")) {
- _id = "uml";
- r = 1;
+ if (r != VIRTUALIZATION_NONE)
goto finish;
- }
-#if defined(__s390__)
- {
- _cleanup_free_ char *t = NULL;
+ r = detect_vm_hypervisor();
+ if (r < 0)
+ return r;
+ if (r != VIRTUALIZATION_NONE)
+ goto finish;
- r = get_status_field("/proc/sysinfo", "VM00 Control Program:", &t);
- if (r >= 0) {
- if (streq(t, "z/VM"))
- _id = "zvm";
- else
- _id = "kvm";
- r = 1;
+ r = detect_vm_device_tree();
+ if (r < 0)
+ return r;
+ if (r != VIRTUALIZATION_NONE)
+ goto finish;
- goto finish;
- }
- }
-#endif
+ r = detect_vm_uml();
+ if (r < 0)
+ return r;
+ if (r != VIRTUALIZATION_NONE)
+ goto finish;
- r = 0;
+ r = detect_vm_zvm();
+ if (r < 0)
+ return r;
finish:
cached_found = r;
-
- cached_id = _id;
- if (id)
- *id = _id;
-
return r;
}
-int detect_container(const char **id) {
+int detect_container(void) {
- static thread_local int cached_found = -1;
- static thread_local const char *cached_id = NULL;
+ static const struct {
+ const char *value;
+ int id;
+ } value_table[] = {
+ { "lxc", VIRTUALIZATION_LXC },
+ { "lxc-libvirt", VIRTUALIZATION_LXC_LIBVIRT },
+ { "systemd-nspawn", VIRTUALIZATION_SYSTEMD_NSPAWN },
+ { "docker", VIRTUALIZATION_DOCKER },
+ };
+ static thread_local int cached_found = _VIRTUALIZATION_INVALID;
_cleanup_free_ char *m = NULL;
- const char *_id = NULL, *e = NULL;
+ const char *e = NULL;
+ unsigned j;
int r;
- if (_likely_(cached_found >= 0)) {
-
- if (id)
- *id = cached_id;
-
+ if (cached_found >= 0)
return cached_found;
- }
/* /proc/vz exists in container and outside of the container,
* /proc/bc only outside of the container. */
if (access("/proc/vz", F_OK) >= 0 &&
access("/proc/bc", F_OK) < 0) {
- _id = "openvz";
- r = 1;
+ r = VIRTUALIZATION_OPENVZ;
goto finish;
}
@@ -340,7 +348,7 @@ int detect_container(const char **id) {
e = getenv("container");
if (isempty(e)) {
- r = 0;
+ r = VIRTUALIZATION_NONE;
goto finish;
}
} else {
@@ -369,7 +377,7 @@ int detect_container(const char **id) {
* as /proc/1/environ is only readable
* with privileges. */
- r = 0;
+ r = VIRTUALIZATION_NONE;
goto finish;
}
}
@@ -379,46 +387,49 @@ int detect_container(const char **id) {
e = m;
}
- /* We only recognize a selected few here, since we want to
- * enforce a redacted namespace */
- if (streq(e, "lxc"))
- _id ="lxc";
- else if (streq(e, "lxc-libvirt"))
- _id = "lxc-libvirt";
- else if (streq(e, "systemd-nspawn"))
- _id = "systemd-nspawn";
- else if (streq(e, "docker"))
- _id = "docker";
- else
- _id = "other";
+ for (j = 0; j < ELEMENTSOF(value_table); j++)
+ if (streq(e, value_table[j].value)) {
+ r = value_table[j].id;
+ goto finish;
+ }
- r = 1;
+ r = VIRTUALIZATION_NONE;
finish:
cached_found = r;
-
- cached_id = _id;
- if (id)
- *id = _id;
-
return r;
}
-/* Returns a short identifier for the various VM/container implementations */
-int detect_virtualization(const char **id) {
+int detect_virtualization(void) {
int r;
- r = detect_container(id);
- if (r < 0)
- return r;
- if (r > 0)
- return VIRTUALIZATION_CONTAINER;
-
- r = detect_vm(id);
- if (r < 0)
+ r = detect_container();
+ if (r != 0)
return r;
- if (r > 0)
- return VIRTUALIZATION_VM;
- return VIRTUALIZATION_NONE;
+ return detect_vm();
}
+
+static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
+ [VIRTUALIZATION_NONE] = "none",
+ [VIRTUALIZATION_KVM] = "kvm",
+ [VIRTUALIZATION_QEMU] = "qemu",
+ [VIRTUALIZATION_BOCHS] = "bochs",
+ [VIRTUALIZATION_XEN] = "xen",
+ [VIRTUALIZATION_UML] = "uml",
+ [VIRTUALIZATION_VMWARE] = "vmware",
+ [VIRTUALIZATION_ORACLE] = "oracle",
+ [VIRTUALIZATION_MICROSOFT] = "microsoft",
+ [VIRTUALIZATION_ZVM] = "zvm",
+ [VIRTUALIZATION_PARALLELS] = "parallels",
+ [VIRTUALIZATION_VM_OTHER] = "vm-other",
+
+ [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
+ [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
+ [VIRTUALIZATION_LXC] = "lxc",
+ [VIRTUALIZATION_OPENVZ] = "openvz",
+ [VIRTUALIZATION_DOCKER] = "docker",
+ [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(virtualization, int);
diff --git a/src/basic/virt.h b/src/basic/virt.h
index 7194ab2bf7..449e069901 100644
--- a/src/basic/virt.h
+++ b/src/basic/virt.h
@@ -21,15 +21,51 @@
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
-int detect_vm(const char **id);
-int detect_container(const char **id);
+#include <stdbool.h>
+
+#include "macro.h"
enum {
VIRTUALIZATION_NONE = 0,
- VIRTUALIZATION_VM,
- VIRTUALIZATION_CONTAINER,
+
+ VIRTUALIZATION_VM_FIRST,
+ VIRTUALIZATION_KVM = VIRTUALIZATION_VM_FIRST,
+ VIRTUALIZATION_QEMU,
+ VIRTUALIZATION_BOCHS,
+ VIRTUALIZATION_XEN,
+ VIRTUALIZATION_UML,
+ VIRTUALIZATION_VMWARE,
+ VIRTUALIZATION_ORACLE,
+ VIRTUALIZATION_MICROSOFT,
+ VIRTUALIZATION_ZVM,
+ VIRTUALIZATION_PARALLELS,
+ VIRTUALIZATION_VM_OTHER,
+ VIRTUALIZATION_VM_LAST = VIRTUALIZATION_VM_OTHER,
+
+ VIRTUALIZATION_CONTAINER_FIRST,
+ VIRTUALIZATION_SYSTEMD_NSPAWN = VIRTUALIZATION_CONTAINER_FIRST,
+ VIRTUALIZATION_LXC_LIBVIRT,
+ VIRTUALIZATION_LXC,
+ VIRTUALIZATION_OPENVZ,
+ VIRTUALIZATION_DOCKER,
+ VIRTUALIZATION_CONTAINER_OTHER,
+ VIRTUALIZATION_CONTAINER_LAST = VIRTUALIZATION_CONTAINER_OTHER,
+
_VIRTUALIZATION_MAX,
_VIRTUALIZATION_INVALID = -1
};
-int detect_virtualization(const char **id);
+static inline bool VIRTUALIZATION_IS_VM(int x) {
+ return x >= VIRTUALIZATION_VM_FIRST && x <= VIRTUALIZATION_VM_LAST;
+}
+
+static inline bool VIRTUALIZATION_IS_CONTAINER(int x) {
+ return x >= VIRTUALIZATION_CONTAINER_FIRST && x <= VIRTUALIZATION_CONTAINER_LAST;
+}
+
+int detect_vm(void);
+int detect_container(void);
+int detect_virtualization(void);
+
+const char *virtualization_to_string(int v) _const_;
+int virtualization_from_string(const char *s) _pure_;
diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c
index 0b365391ec..4e5d67fc19 100644
--- a/src/core/dbus-manager.c
+++ b/src/core/dbus-manager.c
@@ -81,14 +81,10 @@ static int property_get_virtualization(
void *userdata,
sd_bus_error *error) {
- const char *id = NULL;
-
assert(bus);
assert(reply);
- detect_virtualization(&id);
-
- return sd_bus_message_append(reply, "s", id);
+ return sd_bus_message_append(reply, "s", virtualization_to_string(detect_virtualization()));
}
static int property_get_architecture(
diff --git a/src/core/job.c b/src/core/job.c
index 15f5cc0cc9..2a35d1e2de 100644
--- a/src/core/job.c
+++ b/src/core/job.c
@@ -1137,7 +1137,7 @@ void job_shutdown_magic(Job *j) {
/* In case messages on console has been disabled on boot */
j->unit->manager->no_console_output = false;
- if (detect_container(NULL) > 0)
+ if (detect_container() > 0)
return;
asynchronous_sync();
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 60b97722db..edd55b9e45 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -106,7 +106,7 @@ m4_define(`KILL_CONTEXT_CONFIG_ITEMS',
`$1.SendSIGKILL, config_parse_bool, 0, offsetof($1, kill_context.send_sigkill)
$1.SendSIGHUP, config_parse_bool, 0, offsetof($1, kill_context.send_sighup)
$1.KillMode, config_parse_kill_mode, 0, offsetof($1, kill_context.kill_mode)
-$1.KillSignal, config_parse_kill_signal, 0, offsetof($1, kill_context.kill_signal)'
+$1.KillSignal, config_parse_signal, 0, offsetof($1, kill_context.kill_signal)'
)m4_dnl
m4_define(`CGROUP_CONTEXT_CONFIG_ITEMS',
`$1.Slice, config_parse_unit_slice, 0, 0
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 745291c5c6..542bf8eb26 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -1048,7 +1048,7 @@ int config_parse_bounding_set(const char *unit,
cap = capability_from_name(t);
if (cap < 0) {
- log_syntax(unit, LOG_ERR, filename, line, errno, "Failed to parse capability in bounding set, ignoring: %s", t);
+ log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Failed to parse capability in bounding set, ignoring: %s", t);
continue;
}
@@ -1143,39 +1143,8 @@ int config_parse_sysv_priority(const char *unit,
#endif
DEFINE_CONFIG_PARSE_ENUM(config_parse_exec_utmp_mode, exec_utmp_mode, ExecUtmpMode, "Failed to parse utmp mode");
-
DEFINE_CONFIG_PARSE_ENUM(config_parse_kill_mode, kill_mode, KillMode, "Failed to parse kill mode");
-int config_parse_kill_signal(const char *unit,
- const char *filename,
- unsigned line,
- const char *section,
- unsigned section_line,
- const char *lvalue,
- int ltype,
- const char *rvalue,
- void *data,
- void *userdata) {
-
- int *sig = data;
- int r;
-
- assert(filename);
- assert(lvalue);
- assert(rvalue);
- assert(sig);
-
- r = signal_from_string_try_harder(rvalue);
- if (r <= 0) {
- log_syntax(unit, LOG_ERR, filename, line, -r,
- "Failed to parse kill signal, ignoring: %s", rvalue);
- return 0;
- }
-
- *sig = r;
- return 0;
-}
-
int config_parse_exec_mount_flags(const char *unit,
const char *filename,
unsigned line,
@@ -3021,36 +2990,6 @@ int config_parse_job_mode_isolate(
return 0;
}
-int config_parse_personality(
- const char *unit,
- const char *filename,
- unsigned line,
- const char *section,
- unsigned section_line,
- const char *lvalue,
- int ltype,
- const char *rvalue,
- void *data,
- void *userdata) {
-
- unsigned long *personality = data, p;
-
- assert(filename);
- assert(lvalue);
- assert(rvalue);
- assert(personality);
-
- p = personality_from_string(rvalue);
- if (p == PERSONALITY_INVALID) {
- log_syntax(unit, LOG_ERR, filename, line, EINVAL,
- "Failed to parse personality, ignoring: %s", rvalue);
- return 0;
- }
-
- *personality = p;
- return 0;
-}
-
int config_parse_runtime_directory(
const char *unit,
const char *filename,
@@ -3720,7 +3659,7 @@ void unit_dump_config_items(FILE *f) {
{ config_parse_sysv_priority, "SYSVPRIORITY" },
#endif
{ config_parse_kill_mode, "KILLMODE" },
- { config_parse_kill_signal, "SIGNAL" },
+ { config_parse_signal, "SIGNAL" },
{ config_parse_socket_listen, "SOCKET [...]" },
{ config_parse_socket_bind, "SOCKETBIND" },
{ config_parse_socket_bindtodevice, "NETWORKINTERFACE" },
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index fcca2b0221..1d128716c4 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -92,7 +92,6 @@ int config_parse_blockio_bandwidth(const char *unit, const char *filename, unsig
int config_parse_job_mode(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_job_mode_isolate(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_exec_selinux_context(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
-int config_parse_personality(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_exec_apparmor_profile(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_exec_smack_process_label(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_address_families(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
diff --git a/src/core/locale-setup.c b/src/core/locale-setup.c
index 108072ca9f..6961c26674 100644
--- a/src/core/locale-setup.c
+++ b/src/core/locale-setup.c
@@ -35,7 +35,7 @@ int locale_setup(char ***environment) {
char *variables[_VARIABLE_LC_MAX] = {};
int r = 0, i;
- if (detect_container(NULL) <= 0) {
+ if (detect_container() <= 0) {
r = parse_env_file("/proc/cmdline", WHITESPACE,
"locale.LANG", &variables[VARIABLE_LANG],
"locale.LANGUAGE", &variables[VARIABLE_LANGUAGE],
diff --git a/src/core/machine-id-setup.c b/src/core/machine-id-setup.c
index 2d5ae3b3b9..8f682c6d10 100644
--- a/src/core/machine-id-setup.c
+++ b/src/core/machine-id-setup.c
@@ -108,7 +108,7 @@ static int generate_machine_id(char id[34], const char *root) {
unsigned char *p;
sd_id128_t buf;
char *q;
- const char *vm_id, *dbus_machine_id;
+ const char *dbus_machine_id;
assert(id);
@@ -133,8 +133,8 @@ static int generate_machine_id(char id[34], const char *root) {
/* If that didn't work, see if we are running in a container,
* and a machine ID was passed in via $container_uuid the way
* libvirt/LXC does it */
- r = detect_container(NULL);
- if (r > 0) {
+
+ if (detect_container() > 0) {
_cleanup_free_ char *e = NULL;
r = getenv_for_pid(1, "container_uuid", &e);
@@ -146,26 +146,24 @@ static int generate_machine_id(char id[34], const char *root) {
}
}
- } else {
+ } else if (detect_vm() == VIRTUALIZATION_KVM) {
+
/* If we are not running in a container, see if we are
* running in qemu/kvm and a machine ID was passed in
* via -uuid on the qemu/kvm command line */
- r = detect_vm(&vm_id);
- if (r > 0 && streq(vm_id, "kvm")) {
- char uuid[36];
+ char uuid[36];
- fd = open("/sys/class/dmi/id/product_uuid", O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
- if (fd >= 0) {
- r = loop_read_exact(fd, uuid, 36, false);
- safe_close(fd);
+ fd = open("/sys/class/dmi/id/product_uuid", O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd >= 0) {
+ r = loop_read_exact(fd, uuid, 36, false);
+ safe_close(fd);
+ if (r >= 0) {
+ r = shorten_uuid(id, uuid);
if (r >= 0) {
- r = shorten_uuid(id, uuid);
- if (r >= 0) {
- log_info("Initializing machine ID from KVM UUID.");
- return 0;
- }
+ log_info("Initializing machine ID from KVM UUID.");
+ return 0;
}
}
}
diff --git a/src/core/main.c b/src/core/main.c
index 4cd2b08c38..fe8f1924bd 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -374,7 +374,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value) {
/* Note that log_parse_environment() handles 'debug'
* too, and sets the log level to LOG_DEBUG. */
- if (detect_container(NULL) > 0)
+ if (detect_container() > 0)
log_set_target(LOG_TARGET_CONSOLE);
} else if (!in_initrd() && !value) {
@@ -1297,7 +1297,7 @@ int main(int argc, char *argv[]) {
if (getpid() == 1)
umask(0);
- if (getpid() == 1 && detect_container(NULL) <= 0) {
+ if (getpid() == 1 && detect_container() <= 0) {
/* Running outside of a container as PID 1 */
arg_running_as = MANAGER_SYSTEM;
@@ -1551,14 +1551,14 @@ int main(int argc, char *argv[]) {
}
if (arg_running_as == MANAGER_SYSTEM) {
- const char *virtualization = NULL;
+ int v;
log_info(PACKAGE_STRING " running in %ssystem mode. (" SYSTEMD_FEATURES ")",
arg_action == ACTION_TEST ? "test " : "" );
- detect_virtualization(&virtualization);
- if (virtualization)
- log_info("Detected virtualization %s.", virtualization);
+ v = detect_virtualization();
+ if (v > 0)
+ log_info("Detected virtualization %s.", virtualization_to_string(v));
write_container_id();
@@ -2046,7 +2046,7 @@ finish:
/* Avoid the creation of new processes forked by the
* kernel; at this point, we will not listen to the
* signals anyway */
- if (detect_container(NULL) <= 0)
+ if (detect_container() <= 0)
(void) cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
diff --git a/src/core/manager.c b/src/core/manager.c
index fc10ddb5d9..56f2c92feb 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -554,7 +554,7 @@ int manager_new(ManagerRunningAs running_as, bool test_run, Manager **_m) {
return -ENOMEM;
#ifdef ENABLE_EFI
- if (running_as == MANAGER_SYSTEM && detect_container(NULL) <= 0)
+ if (running_as == MANAGER_SYSTEM && detect_container() <= 0)
boot_timestamps(&m->userspace_timestamp, &m->firmware_timestamp, &m->loader_timestamp);
#endif
@@ -2156,7 +2156,7 @@ void manager_send_unit_plymouth(Manager *m, Unit *u) {
if (m->running_as != MANAGER_SYSTEM)
return;
- if (detect_container(NULL) > 0)
+ if (detect_container() > 0)
return;
if (u->type != UNIT_SERVICE &&
@@ -2613,7 +2613,7 @@ static void manager_notify_finished(Manager *m) {
if (m->test_run)
return;
- if (m->running_as == MANAGER_SYSTEM && detect_container(NULL) <= 0) {
+ if (m->running_as == MANAGER_SYSTEM && detect_container() <= 0) {
/* Note that m->kernel_usec.monotonic is always at 0,
* and m->firmware_usec.monotonic and
diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c
index c6f3569915..e84f80b61b 100644
--- a/src/core/mount-setup.c
+++ b/src/core/mount-setup.c
@@ -164,7 +164,7 @@ static int mount_one(const MountPoint *p, bool relabel) {
return 0;
/* Skip securityfs in a container */
- if (!(p->mode & MNT_IN_CONTAINER) && detect_container(NULL) > 0)
+ if (!(p->mode & MNT_IN_CONTAINER) && detect_container() > 0)
return 0;
/* The access mode here doesn't really matter too much, since
@@ -385,7 +385,7 @@ int mount_setup(bool loaded_policy) {
* nspawn and the container tools work out of the box. If
* specific setups need other settings they can reset the
* propagation mode to private if needed. */
- if (detect_container(NULL) <= 0)
+ if (detect_container() <= 0)
if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
log_warning_errno(errno, "Failed to set up the root directory for shared mount propagation: %m");
diff --git a/src/core/shutdown.c b/src/core/shutdown.c
index aba16b4689..8a6fd25f31 100644
--- a/src/core/shutdown.c
+++ b/src/core/shutdown.c
@@ -202,7 +202,7 @@ int main(int argc, char *argv[]) {
log_info("Sending SIGKILL to remaining processes...");
broadcast_signal(SIGKILL, true, false);
- in_container = detect_container(NULL) > 0;
+ in_container = detect_container() > 0;
need_umount = !in_container;
need_swapoff = !in_container;
diff --git a/src/core/swap.c b/src/core/swap.c
index 4f3ddc9f04..2f462e339d 100644
--- a/src/core/swap.c
+++ b/src/core/swap.c
@@ -215,7 +215,7 @@ static int swap_add_default_dependencies(Swap *s) {
if (UNIT(s)->manager->running_as != MANAGER_SYSTEM)
return 0;
- if (detect_container(NULL) > 0)
+ if (detect_container() > 0)
return 0;
return unit_add_two_dependencies_by_name(UNIT(s), UNIT_BEFORE, UNIT_CONFLICTS, SPECIAL_UMOUNT_TARGET, NULL, true);
@@ -824,7 +824,7 @@ static int swap_start(Unit *u) {
assert(s->state == SWAP_DEAD || s->state == SWAP_FAILED);
- if (detect_container(NULL) > 0)
+ if (detect_container() > 0)
return -EPERM;
/* If there's a job for another swap unit for the same node
@@ -857,7 +857,7 @@ static int swap_stop(Unit *u) {
s->state == SWAP_ACTIVATING_DONE ||
s->state == SWAP_ACTIVE);
- if (detect_container(NULL) > 0)
+ if (detect_container() > 0)
return -EPERM;
swap_enter_deactivating(s);
@@ -1404,7 +1404,7 @@ static bool swap_supported(void) {
if (supported < 0)
supported =
access("/proc/swaps", F_OK) >= 0 &&
- detect_container(NULL) <= 0;
+ detect_container() <= 0;
return supported;
}
diff --git a/src/core/umount.c b/src/core/umount.c
index d59b5d0ffb..22dbe67259 100644
--- a/src/core/umount.c
+++ b/src/core/umount.c
@@ -368,7 +368,7 @@ static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_e
read-only mount anything as that brings no real
benefits, but might confuse the host, as we remount
the superblock here, not the bind mound. */
- if (detect_container(NULL) <= 0) {
+ if (detect_container() <= 0) {
/* We always try to remount directories
* read-only first, before we go on and umount
* them.
diff --git a/src/core/unit.c b/src/core/unit.c
index a5714adf38..24a6747b10 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -1416,6 +1416,7 @@ int unit_start(Unit *u) {
assert(u);
+ /* Units that aren't loaded cannot be started */
if (u->load_state != UNIT_LOADED)
return -EINVAL;
@@ -1444,6 +1445,15 @@ int unit_start(Unit *u) {
return -EPROTO;
}
+ /* Units of types that aren't supported cannot be
+ * started. Note that we do this test only after the condition
+ * checks, so that we rather return condition check errors
+ * (which are usually not considered a true failure) than "not
+ * supported" errors (which are considered a failure).
+ */
+ if (!unit_supported(u))
+ return -EOPNOTSUPP;
+
/* Forward to the main object, if we aren't it. */
following = unit_following(u);
if (following) {
@@ -1451,9 +1461,6 @@ int unit_start(Unit *u) {
return unit_start(following);
}
- if (!unit_supported(u))
- return -EOPNOTSUPP;
-
/* If it is stopped, but we cannot start it, then fail */
if (!UNIT_VTABLE(u)->start)
return -EBADR;
@@ -1472,6 +1479,12 @@ int unit_start(Unit *u) {
bool unit_can_start(Unit *u) {
assert(u);
+ if (u->load_state != UNIT_LOADED)
+ return false;
+
+ if (!unit_supported(u))
+ return false;
+
return !!UNIT_VTABLE(u)->start;
}
@@ -3497,7 +3510,7 @@ int unit_kill_context(
* them.*/
if (cg_unified() > 0 ||
- (detect_container(NULL) == 0 && !unit_cgroup_delegate(u)))
+ (detect_container() == 0 && !unit_cgroup_delegate(u)))
wait_for_exit = true;
if (c->send_sighup && k != KILL_KILL) {
diff --git a/src/detect-virt/detect-virt.c b/src/detect-virt/detect-virt.c
index 606d073cbc..97ae569ca5 100644
--- a/src/detect-virt/detect-virt.c
+++ b/src/detect-virt/detect-virt.c
@@ -108,9 +108,7 @@ static int parse_argv(int argc, char *argv[]) {
}
int main(int argc, char *argv[]) {
- const char *id = NULL;
- int retval = EXIT_SUCCESS;
- int r;
+ int retval = EXIT_SUCCESS, r;
/* This is mostly intended to be used for scripts which want
* to detect whether we are being run in a virtualized
@@ -125,42 +123,39 @@ int main(int argc, char *argv[]) {
switch (arg_mode) {
- case ANY_VIRTUALIZATION: {
- int v;
-
- v = detect_virtualization(&id);
- if (v < 0) {
- log_error_errno(v, "Failed to check for virtualization: %m");
+ case ONLY_VM:
+ r = detect_vm();
+ if (r < 0) {
+ log_error_errno(r, "Failed to check for vm: %m");
return EXIT_FAILURE;
}
- retval = v != VIRTUALIZATION_NONE ? EXIT_SUCCESS : EXIT_FAILURE;
break;
- }
case ONLY_CONTAINER:
- r = detect_container(&id);
+ r = detect_container();
if (r < 0) {
log_error_errno(r, "Failed to check for container: %m");
return EXIT_FAILURE;
}
- retval = r > 0 ? EXIT_SUCCESS : EXIT_FAILURE;
break;
- case ONLY_VM:
- r = detect_vm(&id);
+ case ANY_VIRTUALIZATION:
+ default:
+ r = detect_virtualization();
if (r < 0) {
- log_error_errno(r, "Failed to check for vm: %m");
+ log_error_errno(r, "Failed to check for virtualization: %m");
return EXIT_FAILURE;
}
- retval = r > 0 ? EXIT_SUCCESS : EXIT_FAILURE;
break;
}
if (!arg_quiet)
- puts(id ? id : "none");
+ puts(virtualization_to_string(r));
+
+ retval = r != VIRTUALIZATION_NONE ? EXIT_SUCCESS : EXIT_FAILURE;
return retval;
}
diff --git a/src/fstab-generator/fstab-generator.c b/src/fstab-generator/fstab-generator.c
index a88b68e2c0..3f8ea5647c 100644
--- a/src/fstab-generator/fstab-generator.c
+++ b/src/fstab-generator/fstab-generator.c
@@ -65,7 +65,7 @@ static int add_swap(
return 0;
}
- if (detect_container(NULL) > 0) {
+ if (detect_container() > 0) {
log_info("Running in a container, ignoring fstab swap entry for %s.", what);
return 0;
}
diff --git a/src/getty-generator/getty-generator.c b/src/getty-generator/getty-generator.c
index d23caab44a..9a4b038ef3 100644
--- a/src/getty-generator/getty-generator.c
+++ b/src/getty-generator/getty-generator.c
@@ -142,7 +142,7 @@ int main(int argc, char *argv[]) {
umask(0022);
- if (detect_container(NULL) > 0) {
+ if (detect_container() > 0) {
_cleanup_free_ char *container_ttys = NULL;
log_debug("Automatically adding console shell.");
diff --git a/src/gpt-auto-generator/gpt-auto-generator.c b/src/gpt-auto-generator/gpt-auto-generator.c
index 0a34f86be7..bb821797f1 100644
--- a/src/gpt-auto-generator/gpt-auto-generator.c
+++ b/src/gpt-auto-generator/gpt-auto-generator.c
@@ -460,7 +460,7 @@ static int add_boot(const char *what) {
return 0;
}
- if (detect_container(NULL) > 0) {
+ if (detect_container() > 0) {
log_debug("In a container, ignoring /boot.");
return 0;
}
@@ -526,9 +526,9 @@ static int add_boot(const char *what) {
what,
"/boot",
"vfat",
- "EFI System Partition Automount",
- false,
+ true,
"umask=0077",
+ "EFI System Partition Automount",
120 * USEC_PER_SEC);
return r;
@@ -666,6 +666,7 @@ static int enumerate_partitions(dev_t devnum) {
first = udev_enumerate_get_list_entry(e);
udev_list_entry_foreach(item, first) {
_cleanup_udev_device_unref_ struct udev_device *q;
+ unsigned long long flags;
const char *stype, *subnode;
sd_id128_t type_id;
blkid_partition pp;
@@ -705,10 +706,10 @@ static int enumerate_partitions(dev_t devnum) {
if (sd_id128_from_string(stype, &type_id) < 0)
continue;
+ flags = blkid_partition_get_flags(pp);
+
if (sd_id128_equal(type_id, GPT_SWAP)) {
- unsigned long long flags;
- flags = blkid_partition_get_flags(pp);
if (flags & GPT_FLAG_NO_AUTO)
continue;
@@ -727,6 +728,10 @@ static int enumerate_partitions(dev_t devnum) {
if (boot && nr >= boot_nr)
continue;
+ /* Note that we do not honour the "no-auto"
+ * flag for the ESP, as it is often unset, to
+ * hide it from Windows. */
+
boot_nr = nr;
r = free_and_strdup(&boot, subnode);
@@ -734,9 +739,7 @@ static int enumerate_partitions(dev_t devnum) {
return log_oom();
} else if (sd_id128_equal(type_id, GPT_HOME)) {
- unsigned long long flags;
- flags = blkid_partition_get_flags(pp);
if (flags & GPT_FLAG_NO_AUTO)
continue;
@@ -752,9 +755,7 @@ static int enumerate_partitions(dev_t devnum) {
return log_oom();
} else if (sd_id128_equal(type_id, GPT_SRV)) {
- unsigned long long flags;
- flags = blkid_partition_get_flags(pp);
if (flags & GPT_FLAG_NO_AUTO)
continue;
@@ -799,6 +800,10 @@ static int get_block_device(const char *path, dev_t *dev) {
assert(path);
assert(dev);
+ /* Get's the block device directly backing a file system. If
+ * the block device is encrypted, returns the device mapper
+ * block device. */
+
if (lstat(path, &st))
return -errno;
@@ -816,6 +821,77 @@ static int get_block_device(const char *path, dev_t *dev) {
return 0;
}
+static int get_block_device_harder(const char *path, dev_t *dev) {
+ _cleanup_closedir_ DIR *d = NULL;
+ _cleanup_free_ char *p = NULL, *t = NULL;
+ struct dirent *de, *found = NULL;
+ const char *q;
+ unsigned maj, min;
+ dev_t dt;
+ int r;
+
+ assert(path);
+ assert(dev);
+
+ /* Gets the backing block device for a file system, and
+ * handles LUKS encrypted file systems, looking for its
+ * immediate parent, if there is one. */
+
+ r = get_block_device(path, &dt);
+ if (r <= 0)
+ return r;
+
+ if (asprintf(&p, "/sys/dev/block/%u:%u/slaves", major(dt), minor(dt)) < 0)
+ return -ENOMEM;
+
+ d = opendir(p);
+ if (!d) {
+ if (errno == ENOENT)
+ goto fallback;
+
+ return -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+
+ if (STR_IN_SET(de->d_name, ".", ".."))
+ continue;
+
+ if (!IN_SET(de->d_type, DT_LNK, DT_UNKNOWN))
+ continue;
+
+ if (found) /* Don't try to support multiple backing block devices */
+ goto fallback;
+
+ found = de;
+ break;
+ }
+
+ if (!found)
+ goto fallback;
+
+ q = strjoina(p, "/", found->d_name, "/dev");
+
+ r = read_one_line_file(q, &t);
+ if (r == -ENOENT)
+ goto fallback;
+ if (r < 0)
+ return r;
+
+ if (sscanf(t, "%u:%u", &maj, &min) != 2)
+ return -EINVAL;
+
+ if (maj == 0)
+ goto fallback;
+
+ *dev = makedev(maj, min);
+ return 1;
+
+fallback:
+ *dev = dt;
+ return 1;
+}
+
static int parse_proc_cmdline_item(const char *key, const char *value) {
int r;
@@ -883,11 +959,11 @@ static int add_mounts(void) {
dev_t devno;
int r;
- r = get_block_device("/", &devno);
+ r = get_block_device_harder("/", &devno);
if (r < 0)
return log_error_errno(r, "Failed to determine block device of root file system: %m");
else if (r == 0) {
- r = get_block_device("/usr", &devno);
+ r = get_block_device_harder("/usr", &devno);
if (r < 0)
return log_error_errno(r, "Failed to determine block device of /usr file system: %m");
else if (r == 0) {
@@ -916,7 +992,7 @@ int main(int argc, char *argv[]) {
umask(0022);
- if (detect_container(NULL) > 0) {
+ if (detect_container() > 0) {
log_debug("In a container, exiting.");
return EXIT_SUCCESS;
}
diff --git a/src/hostname/hostnamed.c b/src/hostname/hostnamed.c
index c423be3767..e90ae54156 100644
--- a/src/hostname/hostnamed.c
+++ b/src/hostname/hostnamed.c
@@ -155,11 +155,11 @@ static const char* fallback_chassis(void) {
unsigned t;
int v;
- v = detect_virtualization(NULL);
+ v = detect_virtualization();
- if (v == VIRTUALIZATION_VM)
+ if (VIRTUALIZATION_IS_VM(v))
return "vm";
- if (v == VIRTUALIZATION_CONTAINER)
+ if (VIRTUALIZATION_IS_CONTAINER(v))
return "container";
r = read_one_line_file("/sys/firmware/acpi/pm_profile", &type);
diff --git a/src/journal/journald-kmsg.c b/src/journal/journald-kmsg.c
index e5be7f7766..51fe3aa50a 100644
--- a/src/journal/journald-kmsg.c
+++ b/src/journal/journald-kmsg.c
@@ -190,7 +190,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
for (j = 0; l > 0 && j < N_IOVEC_KERNEL_FIELDS; j++) {
char *m;
- /* Meta data fields attached */
+ /* Metadata fields attached */
if (*k != ' ')
break;
diff --git a/src/libsystemd-network/dhcp-identifier.c b/src/libsystemd-network/dhcp-identifier.c
index 70c68ad131..7d9cad2a70 100644
--- a/src/libsystemd-network/dhcp-identifier.c
+++ b/src/libsystemd-network/dhcp-identifier.c
@@ -66,7 +66,7 @@ int dhcp_identifier_set_iaid(int ifindex, uint8_t *mac, size_t mac_len, void *_i
const char *name = NULL;
uint64_t id;
- if (detect_container(NULL) <= 0) {
+ if (detect_container() <= 0) {
/* not in a container, udev will be around */
_cleanup_udev_unref_ struct udev *udev;
char ifindex_str[2 + DECIMAL_STR_MAX(int)];
diff --git a/src/libsystemd-network/test-dhcp6-client.c b/src/libsystemd-network/test-dhcp6-client.c
index 12daac3211..9c7f9ffb1b 100644
--- a/src/libsystemd-network/test-dhcp6-client.c
+++ b/src/libsystemd-network/test-dhcp6-client.c
@@ -357,18 +357,6 @@ static int test_hangcheck(sd_event_source *s, uint64_t usec, void *userdata) {
return 0;
}
-int detect_vm(const char **id) {
- return 1;
-}
-
-int detect_container(const char **id) {
- return 1;
-}
-
-int detect_virtualization(const char **id) {
- return 1;
-}
-
static void test_client_solicit_cb(sd_dhcp6_client *client, int event,
void *userdata) {
sd_event *e = userdata;
diff --git a/src/libsystemd/sd-bus/bus-container.c b/src/libsystemd/sd-bus/bus-container.c
index 56dc086ae2..5c607f49b1 100644
--- a/src/libsystemd/sd-bus/bus-container.c
+++ b/src/libsystemd/sd-bus/bus-container.c
@@ -29,10 +29,12 @@
#include "bus-container.h"
int bus_container_connect_socket(sd_bus *b) {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
_cleanup_close_ int pidnsfd = -1, mntnsfd = -1, usernsfd = -1, rootfd = -1;
pid_t child;
siginfo_t si;
- int r;
+ int r, error_buf = 0;
+ ssize_t n;
assert(b);
assert(b->input_fd < 0);
@@ -57,6 +59,9 @@ int bus_container_connect_socket(sd_bus *b) {
bus_socket_setup(b);
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET, 0, pair) < 0)
+ return -errno;
+
child = fork();
if (child < 0)
return -errno;
@@ -64,9 +69,11 @@ int bus_container_connect_socket(sd_bus *b) {
if (child == 0) {
pid_t grandchild;
+ pair[0] = safe_close(pair[0]);
+
r = namespace_enter(pidnsfd, mntnsfd, -1, usernsfd, rootfd);
if (r < 0)
- _exit(255);
+ _exit(EXIT_FAILURE);
/* We just changed PID namespace, however it will only
* take effect on the children we now fork. Hence,
@@ -77,16 +84,16 @@ int bus_container_connect_socket(sd_bus *b) {
grandchild = fork();
if (grandchild < 0)
- _exit(255);
+ _exit(EXIT_FAILURE);
if (grandchild == 0) {
r = connect(b->input_fd, &b->sockaddr.sa, b->sockaddr_size);
if (r < 0) {
- if (errno == EINPROGRESS)
- _exit(1);
-
- _exit(255);
+ /* Try to send error up */
+ error_buf = errno;
+ (void) write(pair[1], &error_buf, sizeof(error_buf));
+ _exit(EXIT_FAILURE);
}
_exit(EXIT_SUCCESS);
@@ -94,24 +101,41 @@ int bus_container_connect_socket(sd_bus *b) {
r = wait_for_terminate(grandchild, &si);
if (r < 0)
- _exit(255);
+ _exit(EXIT_FAILURE);
if (si.si_code != CLD_EXITED)
- _exit(255);
+ _exit(EXIT_FAILURE);
_exit(si.si_status);
}
+ pair[1] = safe_close(pair[1]);
+
r = wait_for_terminate(child, &si);
if (r < 0)
return r;
+ n = read(pair[0], &error_buf, sizeof(error_buf));
+ if (n < 0)
+ return -errno;
+
+ if (n > 0) {
+ if (n != sizeof(error_buf))
+ return -EIO;
+
+ if (error_buf < 0)
+ return -EIO;
+
+ if (error_buf == EINPROGRESS)
+ return 1;
+
+ if (error_buf > 0)
+ return -error_buf;
+ }
+
if (si.si_code != CLD_EXITED)
return -EIO;
- if (si.si_status == 1)
- return 1;
-
if (si.si_status != EXIT_SUCCESS)
return -EIO;
@@ -157,7 +181,7 @@ int bus_container_connect_kernel(sd_bus *b) {
if (r < 0)
return r;
- if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET, 0, pair) < 0)
return -errno;
child = fork();
diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c
index 62b63ec3d9..fd39a56225 100644
--- a/src/libsystemd/sd-event/sd-event.c
+++ b/src/libsystemd/sd-event/sd-event.c
@@ -1708,7 +1708,7 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
s->enabled = m;
- r = event_make_signal_data(s->event, s->signal.sig, NULL);
+ r = event_make_signal_data(s->event, SIGCHLD, NULL);
if (r < 0) {
s->enabled = SD_EVENT_OFF;
s->event->n_enabled_child_sources--;
diff --git a/src/libsystemd/sd-login/sd-login.c b/src/libsystemd/sd-login/sd-login.c
index 55da26e9d9..265c7c7db2 100644
--- a/src/libsystemd/sd-login/sd-login.c
+++ b/src/libsystemd/sd-login/sd-login.c
@@ -645,10 +645,10 @@ _public_ int sd_seat_get_active(const char *seat, char **session, uid_t *uid) {
return r;
if (session && !s)
- return -ENOENT;
+ return -ENODATA;
if (uid && !t)
- return -ENOENT;
+ return -ENODATA;
if (uid && t) {
r = parse_uid(t, uid);
diff --git a/src/locale/localectl.c b/src/locale/localectl.c
index 1e06f2251c..4a91c7420a 100644
--- a/src/locale/localectl.c
+++ b/src/locale/localectl.c
@@ -96,7 +96,7 @@ static void print_overridden_variables(void) {
LocaleVariable j;
bool print_warning = true;
- if (detect_container(NULL) > 0 || arg_host)
+ if (detect_container() > 0 || arg_host)
return;
r = parse_env_file("/proc/cmdline", WHITESPACE,
diff --git a/src/login/logind-dbus.c b/src/login/logind-dbus.c
index 14b6e0ddad..b6fa50aa52 100644
--- a/src/login/logind-dbus.c
+++ b/src/login/logind-dbus.c
@@ -1789,10 +1789,9 @@ static int nologin_timeout_handler(
}
static int update_schedule_file(Manager *m) {
-
- int r;
+ _cleanup_free_ char *temp_path = NULL;
_cleanup_fclose_ FILE *f = NULL;
- _cleanup_free_ char *t = NULL, *temp_path = NULL;
+ int r;
assert(m);
@@ -1800,12 +1799,6 @@ static int update_schedule_file(Manager *m) {
if (r < 0)
return log_error_errno(r, "Failed to create shutdown subdirectory: %m");
- if (!isempty(m->wall_message)) {
- t = cescape(m->wall_message);
- if (!t)
- return log_oom();
- }
-
r = fopen_temporary("/run/systemd/shutdown/scheduled", &f, &temp_path);
if (r < 0)
return log_error_errno(r, "Failed to save information about scheduled shutdowns: %m");
@@ -1820,8 +1813,17 @@ static int update_schedule_file(Manager *m) {
m->enable_wall_messages,
m->scheduled_shutdown_type);
- if (t)
+ if (!isempty(m->wall_message)) {
+ _cleanup_free_ char *t;
+
+ t = cescape(m->wall_message);
+ if (!t) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
fprintf(f, "WALL_MESSAGE=%s\n", t);
+ }
r = fflush_and_check(f);
if (r < 0)
diff --git a/src/login/pam_systemd.c b/src/login/pam_systemd.c
index 71c84d8e0a..f66f1ce842 100644
--- a/src/login/pam_systemd.c
+++ b/src/login/pam_systemd.c
@@ -185,7 +185,7 @@ static int export_legacy_dbus_address(
if (asprintf(&s, KERNEL_USER_BUS_ADDRESS_FMT ";" UNIX_USER_BUS_ADDRESS_FMT, uid, runtime) < 0)
goto error;
} else {
- /* FIXME: We *realy* should move the access() check into the
+ /* FIXME: We *really* should move the access() check into the
* daemons that spawn dbus-daemon, instead of forcing
* DBUS_SESSION_BUS_ADDRESS= here. */
diff --git a/src/machine/machine-dbus.c b/src/machine/machine-dbus.c
index fbeace0ed6..cc38116704 100644
--- a/src/machine/machine-dbus.c
+++ b/src/machine/machine-dbus.c
@@ -519,7 +519,7 @@ int bus_machine_method_open_pty(sd_bus_message *message, void *userdata, sd_bus_
return sd_bus_send(NULL, reply, NULL);
}
-static int container_bus_new(Machine *m, sd_bus **ret) {
+static int container_bus_new(Machine *m, sd_bus_error *error, sd_bus **ret) {
int r;
assert(m);
@@ -548,6 +548,8 @@ static int container_bus_new(Machine *m, sd_bus **ret) {
bus->is_system = true;
r = sd_bus_start(bus);
+ if (r == -ENOENT)
+ return sd_bus_error_set_errnof(error, r, "There is no system bus in container %s.", m->name);
if (r < 0)
return r;
@@ -602,7 +604,7 @@ int bus_machine_method_open_login(sd_bus_message *message, void *userdata, sd_bu
if (!p)
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "PTS name %s is invalid", pty_name);
- r = container_bus_new(m, &allocated_bus);
+ r = container_bus_new(m, error, &allocated_bus);
if (r < 0)
return r;
@@ -704,7 +706,7 @@ int bus_machine_method_open_shell(sd_bus_message *message, void *userdata, sd_bu
utmp_id = path_startswith(pty_name, "/dev/");
assert(utmp_id);
- r = container_bus_new(m, &allocated_bus);
+ r = container_bus_new(m, error, &allocated_bus);
if (r < 0)
return r;
diff --git a/src/machine/machine.c b/src/machine/machine.c
index 0d1b119dc1..a056ec3b08 100644
--- a/src/machine/machine.c
+++ b/src/machine/machine.c
@@ -547,8 +547,18 @@ int machine_openpt(Machine *m, int flags) {
switch (m->class) {
- case MACHINE_HOST:
- return posix_openpt(flags);
+ case MACHINE_HOST: {
+ int fd;
+
+ fd = posix_openpt(flags);
+ if (fd < 0)
+ return -errno;
+
+ if (unlockpt(fd) < 0)
+ return -errno;
+
+ return fd;
+ }
case MACHINE_CONTAINER:
if (m->leader <= 0)
diff --git a/src/network/networkd-link.c b/src/network/networkd-link.c
index 1dc9db0fca..9d4a69b0db 100644
--- a/src/network/networkd-link.c
+++ b/src/network/networkd-link.c
@@ -2152,7 +2152,7 @@ int link_add(Manager *m, sd_netlink_message *message, Link **ret) {
log_link_debug(link, "Link %d added", link->ifindex);
- if (detect_container(NULL) <= 0) {
+ if (detect_container() <= 0) {
/* not in a container, udev will be around */
sprintf(ifindex_str, "n%d", link->ifindex);
device = udev_device_new_from_device_id(m->udev, ifindex_str);
diff --git a/src/network/networkd-manager.c b/src/network/networkd-manager.c
index 92b607297d..b4259cafef 100644
--- a/src/network/networkd-manager.c
+++ b/src/network/networkd-manager.c
@@ -241,7 +241,7 @@ static int manager_connect_udev(Manager *m) {
/* udev does not initialize devices inside containers,
* so we rely on them being already initialized before
* entering the container */
- if (detect_container(NULL) > 0)
+ if (detect_container() > 0)
return 0;
m->udev = udev_new();
diff --git a/src/nspawn/.gitignore b/src/nspawn/.gitignore
new file mode 100644
index 0000000000..85c81fff24
--- /dev/null
+++ b/src/nspawn/.gitignore
@@ -0,0 +1 @@
+/nspawn-gperf.c
diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c
new file mode 100644
index 0000000000..c0e9ccd7a4
--- /dev/null
+++ b/src/nspawn/nspawn-cgroup.c
@@ -0,0 +1,162 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+ This file is part of systemd.
+
+ Copyright 2015 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/mount.h>
+
+#include "util.h"
+#include "strv.h"
+#include "mkdir.h"
+#include "fileio.h"
+#include "cgroup-util.h"
+
+#include "nspawn-cgroup.h"
+
+int chown_cgroup(pid_t pid, uid_t uid_shift) {
+ _cleanup_free_ char *path = NULL, *fs = NULL;
+ _cleanup_close_ int fd = -1;
+ const char *fn;
+ int r;
+
+ r = cg_pid_get_path(NULL, pid, &path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get container cgroup path: %m");
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get file system path for container cgroup: %m");
+
+ fd = open(fs, O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open %s: %m", fs);
+
+ FOREACH_STRING(fn,
+ ".",
+ "tasks",
+ "notify_on_release",
+ "cgroup.procs",
+ "cgroup.clone_children",
+ "cgroup.controllers",
+ "cgroup.subtree_control",
+ "cgroup.populated")
+ if (fchownat(fd, fn, uid_shift, uid_shift, 0) < 0)
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to chown() cgroup file %s, ignoring: %m", fn);
+
+ return 0;
+}
+
+int sync_cgroup(pid_t pid, bool unified_requested) {
+ _cleanup_free_ char *cgroup = NULL;
+ char tree[] = "/tmp/unifiedXXXXXX", pid_string[DECIMAL_STR_MAX(pid) + 1];
+ bool undo_mount = false;
+ const char *fn;
+ int unified, r;
+
+ unified = cg_unified();
+ if (unified < 0)
+ return log_error_errno(unified, "Failed to determine whether the unified hierachy is used: %m");
+
+ if ((unified > 0) == unified_requested)
+ return 0;
+
+ /* When the host uses the legacy cgroup setup, but the
+ * container shall use the unified hierarchy, let's make sure
+ * we copy the path from the name=systemd hierarchy into the
+ * unified hierarchy. Similar for the reverse situation. */
+
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get control group of " PID_FMT ": %m", pid);
+
+ /* In order to access the unified hierarchy we need to mount it */
+ if (!mkdtemp(tree))
+ return log_error_errno(errno, "Failed to generate temporary mount point for unified hierarchy: %m");
+
+ if (unified)
+ r = mount("cgroup", tree, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "none,name=systemd,xattr");
+ else
+ r = mount("cgroup", tree, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "__DEVEL__sane_behavior");
+ if (r < 0) {
+ r = log_error_errno(errno, "Failed to mount unified hierarchy: %m");
+ goto finish;
+ }
+
+ undo_mount = true;
+
+ fn = strjoina(tree, cgroup, "/cgroup.procs");
+ (void) mkdir_parents(fn, 0755);
+
+ sprintf(pid_string, PID_FMT, pid);
+ r = write_string_file(fn, pid_string, 0);
+ if (r < 0)
+ log_error_errno(r, "Failed to move process: %m");
+
+finish:
+ if (undo_mount)
+ (void) umount(tree);
+
+ (void) rmdir(tree);
+ return r;
+}
+
+int create_subcgroup(pid_t pid, bool unified_requested) {
+ _cleanup_free_ char *cgroup = NULL;
+ const char *child;
+ int unified, r;
+ CGroupMask supported;
+
+ /* In the unified hierarchy inner nodes may only only contain
+ * subgroups, but not processes. Hence, if we running in the
+ * unified hierarchy and the container does the same, and we
+ * did not create a scope unit for the container move us and
+ * the container into two separate subcgroups. */
+
+ if (!unified_requested)
+ return 0;
+
+ unified = cg_unified();
+ if (unified < 0)
+ return log_error_errno(unified, "Failed to determine whether the unified hierachy is used: %m");
+ if (unified == 0)
+ return 0;
+
+ r = cg_mask_supported(&supported);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine supported controllers: %m");
+
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get our control group: %m");
+
+ child = strjoina(cgroup, "/payload");
+ r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, child, pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create %s subcgroup: %m", child);
+
+ child = strjoina(cgroup, "/supervisor");
+ r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, child, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create %s subcgroup: %m", child);
+
+ /* Try to enable as many controllers as possible for the new payload. */
+ (void) cg_enable_everywhere(supported, supported, cgroup);
+ return 0;
+}
diff --git a/src/nspawn/nspawn-cgroup.h b/src/nspawn/nspawn-cgroup.h
new file mode 100644
index 0000000000..985fdfaad5
--- /dev/null
+++ b/src/nspawn/nspawn-cgroup.h
@@ -0,0 +1,29 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+ This file is part of systemd.
+
+ Copyright 2015 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/types.h>
+#include <stdbool.h>
+
+int chown_cgroup(pid_t pid, uid_t uid_shift);
+int sync_cgroup(pid_t pid, bool unified_requested);
+int create_subcgroup(pid_t pid, bool unified_requested);
diff --git a/src/nspawn/nspawn-expose-ports.c b/src/nspawn/nspawn-expose-ports.c
new file mode 100644
index 0000000000..38250b6e02
--- /dev/null
+++ b/src/nspawn/nspawn-expose-ports.c
@@ -0,0 +1,277 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+ This file is part of systemd.
+
+ Copyright 2015 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "sd-netlink.h"
+
+#include "util.h"
+#include "in-addr-util.h"
+#include "firewall-util.h"
+#include "local-addresses.h"
+#include "netlink-util.h"
+
+#include "nspawn-expose-ports.h"
+
+int expose_port_parse(ExposePort **l, const char *s) {
+
+ const char *split, *e;
+ uint16_t container_port, host_port;
+ int protocol;
+ ExposePort *p;
+ int r;
+
+ assert(l);
+ assert(s);
+
+ if ((e = startswith(s, "tcp:")))
+ protocol = IPPROTO_TCP;
+ else if ((e = startswith(s, "udp:")))
+ protocol = IPPROTO_UDP;
+ else {
+ e = s;
+ protocol = IPPROTO_TCP;
+ }
+
+ split = strchr(e, ':');
+ if (split) {
+ char v[split - e + 1];
+
+ memcpy(v, e, split - e);
+ v[split - e] = 0;
+
+ r = safe_atou16(v, &host_port);
+ if (r < 0 || host_port <= 0)
+ return -EINVAL;
+
+ r = safe_atou16(split + 1, &container_port);
+ } else {
+ r = safe_atou16(e, &container_port);
+ host_port = container_port;
+ }
+
+ if (r < 0 || container_port <= 0)
+ return -EINVAL;
+
+ LIST_FOREACH(ports, p, *l)
+ if (p->protocol == protocol && p->host_port == host_port)
+ return -EEXIST;
+
+ p = new(ExposePort, 1);
+ if (!p)
+ return -ENOMEM;
+
+ p->protocol = protocol;
+ p->host_port = host_port;
+ p->container_port = container_port;
+
+ LIST_PREPEND(ports, *l, p);
+
+ return 0;
+}
+
+void expose_port_free_all(ExposePort *p) {
+
+ while (p) {
+ ExposePort *q = p;
+ LIST_REMOVE(ports, p, q);
+ free(q);
+ }
+}
+
+int expose_port_flush(ExposePort* l, union in_addr_union *exposed) {
+ ExposePort *p;
+ int r, af = AF_INET;
+
+ assert(exposed);
+
+ if (!l)
+ return 0;
+
+ if (in_addr_is_null(af, exposed))
+ return 0;
+
+ log_debug("Lost IP address.");
+
+ LIST_FOREACH(ports, p, l) {
+ r = fw_add_local_dnat(false,
+ af,
+ p->protocol,
+ NULL,
+ NULL, 0,
+ NULL, 0,
+ p->host_port,
+ exposed,
+ p->container_port,
+ NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to modify firewall: %m");
+ }
+
+ *exposed = IN_ADDR_NULL;
+ return 0;
+}
+
+int expose_port_execute(sd_netlink *rtnl, ExposePort *l, union in_addr_union *exposed) {
+ _cleanup_free_ struct local_address *addresses = NULL;
+ _cleanup_free_ char *pretty = NULL;
+ union in_addr_union new_exposed;
+ ExposePort *p;
+ bool add;
+ int af = AF_INET, r;
+
+ assert(exposed);
+
+ /* Invoked each time an address is added or removed inside the
+ * container */
+
+ if (!l)
+ return 0;
+
+ r = local_addresses(rtnl, 0, af, &addresses);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate local addresses: %m");
+
+ add = r > 0 &&
+ addresses[0].family == af &&
+ addresses[0].scope < RT_SCOPE_LINK;
+
+ if (!add)
+ return expose_port_flush(l, exposed);
+
+ new_exposed = addresses[0].address;
+ if (in_addr_equal(af, exposed, &new_exposed))
+ return 0;
+
+ in_addr_to_string(af, &new_exposed, &pretty);
+ log_debug("New container IP is %s.", strna(pretty));
+
+ LIST_FOREACH(ports, p, l) {
+
+ r = fw_add_local_dnat(true,
+ af,
+ p->protocol,
+ NULL,
+ NULL, 0,
+ NULL, 0,
+ p->host_port,
+ &new_exposed,
+ p->container_port,
+ in_addr_is_null(af, exposed) ? NULL : exposed);
+ if (r < 0)
+ log_warning_errno(r, "Failed to modify firewall: %m");
+ }
+
+ *exposed = new_exposed;
+ return 0;
+}
+
+int expose_port_send_rtnl(int send_fd) {
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(int))];
+ } control = {};
+ struct msghdr mh = {
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+ struct cmsghdr *cmsg;
+ _cleanup_close_ int fd = -1;
+ ssize_t k;
+
+ assert(send_fd >= 0);
+
+ fd = socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_ROUTE);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to allocate container netlink: %m");
+
+ cmsg = CMSG_FIRSTHDR(&mh);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
+
+ mh.msg_controllen = cmsg->cmsg_len;
+
+ /* Store away the fd in the socket, so that it stays open as
+ * long as we run the child */
+ k = sendmsg(send_fd, &mh, MSG_NOSIGNAL);
+ if (k < 0)
+ return log_error_errno(errno, "Failed to send netlink fd: %m");
+
+ return 0;
+}
+
+int expose_port_watch_rtnl(
+ sd_event *event,
+ int recv_fd,
+ sd_netlink_message_handler_t handler,
+ union in_addr_union *exposed,
+ sd_netlink **ret) {
+
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(int))];
+ } control = {};
+ struct msghdr mh = {
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+ struct cmsghdr *cmsg;
+ _cleanup_netlink_unref_ sd_netlink *rtnl = NULL;
+ int fd, r;
+ ssize_t k;
+
+ assert(event);
+ assert(recv_fd >= 0);
+ assert(ret);
+
+ k = recvmsg(recv_fd, &mh, MSG_NOSIGNAL);
+ if (k < 0)
+ return log_error_errno(errno, "Failed to recv netlink fd: %m");
+
+ cmsg = CMSG_FIRSTHDR(&mh);
+ assert(cmsg->cmsg_level == SOL_SOCKET);
+ assert(cmsg->cmsg_type == SCM_RIGHTS);
+ assert(cmsg->cmsg_len == CMSG_LEN(sizeof(int)));
+ memcpy(&fd, CMSG_DATA(cmsg), sizeof(int));
+
+ r = sd_netlink_open_fd(&rtnl, fd);
+ if (r < 0) {
+ safe_close(fd);
+ return log_error_errno(r, "Failed to create rtnl object: %m");
+ }
+
+ r = sd_netlink_add_match(rtnl, RTM_NEWADDR, handler, exposed);
+ if (r < 0)
+ return log_error_errno(r, "Failed to subscribe to RTM_NEWADDR messages: %m");
+
+ r = sd_netlink_add_match(rtnl, RTM_DELADDR, handler, exposed);
+ if (r < 0)
+ return log_error_errno(r, "Failed to subscribe to RTM_DELADDR messages: %m");
+
+ r = sd_netlink_attach_event(rtnl, event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add to even loop: %m");
+
+ *ret = rtnl;
+ rtnl = NULL;
+
+ return 0;
+}
diff --git a/src/nspawn/nspawn-expose-ports.h b/src/nspawn/nspawn-expose-ports.h
new file mode 100644
index 0000000000..39cec28695
--- /dev/null
+++ b/src/nspawn/nspawn-expose-ports.h
@@ -0,0 +1,45 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+ This file is part of systemd.
+
+ Copyright 2015 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+
+#include "sd-event.h"
+#include "sd-netlink.h"
+#include "list.h"
+#include "in-addr-util.h"
+
+typedef struct ExposePort {
+ int protocol;
+ uint16_t host_port;
+ uint16_t container_port;
+ LIST_FIELDS(struct ExposePort, ports);
+} ExposePort;
+
+void expose_port_free_all(ExposePort *p);
+int expose_port_parse(ExposePort **l, const char *s);
+
+int expose_port_watch_rtnl(sd_event *event, int recv_fd, sd_netlink_message_handler_t handler, union in_addr_union *exposed, sd_netlink **ret);
+int expose_port_send_rtnl(int send_fd);
+
+int expose_port_execute(sd_netlink *rtnl, ExposePort *l, union in_addr_union *exposed);
+int expose_port_flush(ExposePort* l, union in_addr_union *exposed);
diff --git a/src/nspawn/nspawn-gperf.gperf b/src/nspawn/nspawn-gperf.gperf
new file mode 100644
index 0000000000..074b380306
--- /dev/null
+++ b/src/nspawn/nspawn-gperf.gperf
@@ -0,0 +1,38 @@
+%{
+#include <stddef.h>
+#include "conf-parser.h"
+#include "nspawn-settings.h"
+#include "nspawn-expose-ports.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name nspawn_gperf_hash
+%define lookup-function-name nspawn_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Exec.Boot, config_parse_tristate, 0, offsetof(Settings, boot)
+Exec.Parameters, config_parse_strv, 0, offsetof(Settings, parameters)
+Exec.Environment, config_parse_strv, 0, offsetof(Settings, environment)
+Exec.User, config_parse_string, 0, offsetof(Settings, user)
+Exec.Capability, config_parse_capability, 0, offsetof(Settings, capability)
+Exec.DropCapability, config_parse_capability, 0, offsetof(Settings, drop_capability)
+Exec.KillSignal, config_parse_signal, 0, offsetof(Settings, kill_signal)
+Exec.Personality, config_parse_personality, 0, offsetof(Settings, personality)
+Exec.MachineID, config_parse_id128, 0, offsetof(Settings, machine_id)
+Files.ReadOnly, config_parse_tristate, 0, offsetof(Settings, read_only)
+Files.Volatile, config_parse_volatile_mode, 0, offsetof(Settings, volatile_mode)
+Files.Bind, config_parse_bind, 0, 0
+Files.BindReadOnly, config_parse_bind, 1, 0
+Files.TemporaryFileSystem, config_parse_tmpfs, 0, 0
+Network.Private, config_parse_tristate, 0, offsetof(Settings, private_network)
+Network.Interface, config_parse_strv, 0, offsetof(Settings, network_interfaces)
+Network.MACVLAN, config_parse_strv, 0, offsetof(Settings, network_macvlan)
+Network.IPVLAN, config_parse_strv, 0, offsetof(Settings, network_ipvlan)
+Network.VirtualEthernet, config_parse_tristate, 0, offsetof(Settings, network_veth)
+Network.Bridge config_parse_string, 0, offsetof(Settings, network_bridge)
+Network.Port, config_parse_expose_port, 0, 0
diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c
new file mode 100644
index 0000000000..2bca39f45d
--- /dev/null
+++ b/src/nspawn/nspawn-mount.c
@@ -0,0 +1,869 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+ This file is part of systemd.
+
+ Copyright 2015 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/mount.h>
+
+#include "util.h"
+#include "rm-rf.h"
+#include "strv.h"
+#include "path-util.h"
+#include "mkdir.h"
+#include "label.h"
+#include "set.h"
+#include "cgroup-util.h"
+
+#include "nspawn-mount.h"
+
+CustomMount* custom_mount_add(CustomMount **l, unsigned *n, CustomMountType t) {
+ CustomMount *c, *ret;
+
+ assert(l);
+ assert(n);
+ assert(t >= 0);
+ assert(t < _CUSTOM_MOUNT_TYPE_MAX);
+
+ c = realloc(*l, (*n + 1) * sizeof(CustomMount));
+ if (!c)
+ return NULL;
+
+ *l = c;
+ ret = *l + *n;
+ (*n)++;
+
+ *ret = (CustomMount) { .type = t };
+
+ return ret;
+}
+
+void custom_mount_free_all(CustomMount *l, unsigned n) {
+ unsigned i;
+
+ for (i = 0; i < n; i++) {
+ CustomMount *m = l + i;
+
+ free(m->source);
+ free(m->destination);
+ free(m->options);
+
+ if (m->work_dir) {
+ (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
+ free(m->work_dir);
+ }
+
+ strv_free(m->lower);
+ }
+
+ free(l);
+}
+
+int custom_mount_compare(const void *a, const void *b) {
+ const CustomMount *x = a, *y = b;
+ int r;
+
+ r = path_compare(x->destination, y->destination);
+ if (r != 0)
+ return r;
+
+ if (x->type < y->type)
+ return -1;
+ if (x->type > y->type)
+ return 1;
+
+ return 0;
+}
+
+int bind_mount_parse(CustomMount **l, unsigned *n, const char *s, bool read_only) {
+ _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL;
+ const char *p = s;
+ CustomMount *m;
+ int r;
+
+ assert(l);
+ assert(n);
+
+ r = extract_many_words(&p, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ if (r == 1) {
+ destination = strdup(source);
+ if (!destination)
+ return -ENOMEM;
+ }
+
+ if (r == 2 && !isempty(p)) {
+ opts = strdup(p);
+ if (!opts)
+ return -ENOMEM;
+ }
+
+ if (!path_is_absolute(source))
+ return -EINVAL;
+
+ if (!path_is_absolute(destination))
+ return -EINVAL;
+
+ m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
+ if (!m)
+ return log_oom();
+
+ m->source = source;
+ m->destination = destination;
+ m->read_only = read_only;
+ m->options = opts;
+
+ source = destination = opts = NULL;
+ return 0;
+}
+
+int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s) {
+ _cleanup_free_ char *path = NULL, *opts = NULL;
+ const char *p = s;
+ CustomMount *m;
+ int r;
+
+ assert(l);
+ assert(n);
+ assert(s);
+
+ r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ if (isempty(p))
+ opts = strdup("mode=0755");
+ else
+ opts = strdup(p);
+ if (!opts)
+ return -ENOMEM;
+
+ if (!path_is_absolute(path))
+ return -EINVAL;
+
+ m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
+ if (!m)
+ return -ENOMEM;
+
+ m->destination = path;
+ m->options = opts;
+
+ path = opts = NULL;
+ return 0;
+}
+
+static int tmpfs_patch_options(
+ const char *options,
+ bool userns, uid_t uid_shift, uid_t uid_range,
+ const char *selinux_apifs_context,
+ char **ret) {
+
+ char *buf = NULL;
+
+ if (userns && uid_shift != 0) {
+ assert(uid_shift != UID_INVALID);
+
+ if (options)
+ (void) asprintf(&buf, "%s,uid=" UID_FMT ",gid=" UID_FMT, options, uid_shift, uid_shift);
+ else
+ (void) asprintf(&buf, "uid=" UID_FMT ",gid=" UID_FMT, uid_shift, uid_shift);
+ if (!buf)
+ return -ENOMEM;
+
+ options = buf;
+ }
+
+#ifdef HAVE_SELINUX
+ if (selinux_apifs_context) {
+ char *t;
+
+ if (options)
+ t = strjoin(options, ",context=\"", selinux_apifs_context, "\"", NULL);
+ else
+ t = strjoin("context=\"", selinux_apifs_context, "\"", NULL);
+ if (!t) {
+ free(buf);
+ return -ENOMEM;
+ }
+
+ free(buf);
+ buf = t;
+ }
+#endif
+
+ *ret = buf;
+ return !!buf;
+}
+
+int mount_all(const char *dest,
+ bool userns, uid_t uid_shift, uid_t uid_range,
+ const char *selinux_apifs_context) {
+
+ typedef struct MountPoint {
+ const char *what;
+ const char *where;
+ const char *type;
+ const char *options;
+ unsigned long flags;
+ bool fatal;
+ bool userns;
+ } MountPoint;
+
+ static const MountPoint mount_table[] = {
+ { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true },
+ { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true }, /* Bind mount first */
+ { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true }, /* Then, make it r/o */
+ { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false },
+ { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false },
+ { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false },
+ { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false },
+ { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false },
+#ifdef HAVE_SELINUX
+ { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false }, /* Bind mount first */
+ { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false }, /* Then, make it r/o */
+#endif
+ };
+
+ unsigned k;
+ int r;
+
+ for (k = 0; k < ELEMENTSOF(mount_table); k++) {
+ _cleanup_free_ char *where = NULL, *options = NULL;
+ const char *o;
+
+ if (userns != mount_table[k].userns)
+ continue;
+
+ where = prefix_root(dest, mount_table[k].where);
+ if (!where)
+ return log_oom();
+
+ r = path_is_mount_point(where, AT_SYMLINK_FOLLOW);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
+
+ /* Skip this entry if it is not a remount. */
+ if (mount_table[k].what && r > 0)
+ continue;
+
+ r = mkdir_p(where, 0755);
+ if (r < 0) {
+ if (mount_table[k].fatal)
+ return log_error_errno(r, "Failed to create directory %s: %m", where);
+
+ log_warning_errno(r, "Failed to create directory %s: %m", where);
+ continue;
+ }
+
+ o = mount_table[k].options;
+ if (streq_ptr(mount_table[k].type, "tmpfs")) {
+ r = tmpfs_patch_options(o, userns, uid_shift, uid_range, selinux_apifs_context, &options);
+ if (r < 0)
+ return log_oom();
+ if (r > 0)
+ o = options;
+ }
+
+ if (mount(mount_table[k].what,
+ where,
+ mount_table[k].type,
+ mount_table[k].flags,
+ o) < 0) {
+
+ if (mount_table[k].fatal)
+ return log_error_errno(errno, "mount(%s) failed: %m", where);
+
+ log_warning_errno(errno, "mount(%s) failed, ignoring: %m", where);
+ }
+ }
+
+ return 0;
+}
+
+static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts) {
+ const char *p = options;
+ unsigned long flags = *mount_flags;
+ char *opts = NULL;
+
+ assert(options);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ int r = extract_first_word(&p, &word, ",", 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extract mount option: %m");
+ if (r == 0)
+ break;
+
+ if (streq(word, "rbind"))
+ flags |= MS_REC;
+ else if (streq(word, "norbind"))
+ flags &= ~MS_REC;
+ else {
+ log_error("Invalid bind mount option: %s", word);
+ return -EINVAL;
+ }
+ }
+
+ *mount_flags = flags;
+ /* in the future mount_opts will hold string options for mount(2) */
+ *mount_opts = opts;
+
+ return 0;
+}
+
+static int mount_bind(const char *dest, CustomMount *m) {
+ struct stat source_st, dest_st;
+ const char *where;
+ unsigned long mount_flags = MS_BIND | MS_REC;
+ _cleanup_free_ char *mount_opts = NULL;
+ int r;
+
+ assert(m);
+
+ if (m->options) {
+ r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts);
+ if (r < 0)
+ return r;
+ }
+
+ if (stat(m->source, &source_st) < 0)
+ return log_error_errno(errno, "Failed to stat %s: %m", m->source);
+
+ where = prefix_roota(dest, m->destination);
+
+ if (stat(where, &dest_st) >= 0) {
+ if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode)) {
+ log_error("Cannot bind mount directory %s on file %s.", m->source, where);
+ return -EINVAL;
+ }
+
+ if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode)) {
+ log_error("Cannot bind mount file %s on directory %s.", m->source, where);
+ return -EINVAL;
+ }
+
+ } else if (errno == ENOENT) {
+ r = mkdir_parents_label(where, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make parents of %s: %m", where);
+ } else {
+ log_error_errno(errno, "Failed to stat %s: %m", where);
+ return -errno;
+ }
+
+ /* Create the mount point. Any non-directory file can be
+ * mounted on any non-directory file (regular, fifo, socket,
+ * char, block).
+ */
+ if (S_ISDIR(source_st.st_mode))
+ r = mkdir_label(where, 0755);
+ else
+ r = touch(where);
+ if (r < 0 && r != -EEXIST)
+ return log_error_errno(r, "Failed to create mount point %s: %m", where);
+
+ if (mount(m->source, where, NULL, mount_flags, mount_opts) < 0)
+ return log_error_errno(errno, "mount(%s) failed: %m", where);
+
+ if (m->read_only) {
+ r = bind_remount_recursive(where, true);
+ if (r < 0)
+ return log_error_errno(r, "Read-only bind mount failed: %m");
+ }
+
+ return 0;
+}
+
+static int mount_tmpfs(
+ const char *dest,
+ CustomMount *m,
+ bool userns, uid_t uid_shift, uid_t uid_range,
+ const char *selinux_apifs_context) {
+
+ const char *where, *options;
+ _cleanup_free_ char *buf = NULL;
+ int r;
+
+ assert(dest);
+ assert(m);
+
+ where = prefix_roota(dest, m->destination);
+
+ r = mkdir_p_label(where, 0755);
+ if (r < 0 && r != -EEXIST)
+ return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
+
+ r = tmpfs_patch_options(m->options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
+ if (r < 0)
+ return log_oom();
+ options = r > 0 ? buf : m->options;
+
+ if (mount("tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options) < 0)
+ return log_error_errno(errno, "tmpfs mount to %s failed: %m", where);
+
+ return 0;
+}
+
+static char *joined_and_escaped_lower_dirs(char * const *lower) {
+ _cleanup_strv_free_ char **sv = NULL;
+
+ sv = strv_copy(lower);
+ if (!sv)
+ return NULL;
+
+ strv_reverse(sv);
+
+ if (!strv_shell_escape(sv, ",:"))
+ return NULL;
+
+ return strv_join(sv, ":");
+}
+
+static int mount_overlay(const char *dest, CustomMount *m) {
+ _cleanup_free_ char *lower = NULL;
+ const char *where, *options;
+ int r;
+
+ assert(dest);
+ assert(m);
+
+ where = prefix_roota(dest, m->destination);
+
+ r = mkdir_label(where, 0755);
+ if (r < 0 && r != -EEXIST)
+ return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
+
+ (void) mkdir_p_label(m->source, 0755);
+
+ lower = joined_and_escaped_lower_dirs(m->lower);
+ if (!lower)
+ return log_oom();
+
+ if (m->read_only) {
+ _cleanup_free_ char *escaped_source = NULL;
+
+ escaped_source = shell_escape(m->source, ",:");
+ if (!escaped_source)
+ return log_oom();
+
+ options = strjoina("lowerdir=", escaped_source, ":", lower);
+ } else {
+ _cleanup_free_ char *escaped_source = NULL, *escaped_work_dir = NULL;
+
+ assert(m->work_dir);
+ (void) mkdir_label(m->work_dir, 0700);
+
+ escaped_source = shell_escape(m->source, ",:");
+ if (!escaped_source)
+ return log_oom();
+ escaped_work_dir = shell_escape(m->work_dir, ",:");
+ if (!escaped_work_dir)
+ return log_oom();
+
+ options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
+ }
+
+ if (mount("overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options) < 0)
+ return log_error_errno(errno, "overlay mount to %s failed: %m", where);
+
+ return 0;
+}
+
+int mount_custom(
+ const char *dest,
+ CustomMount *mounts, unsigned n,
+ bool userns, uid_t uid_shift, uid_t uid_range,
+ const char *selinux_apifs_context) {
+
+ unsigned i;
+ int r;
+
+ assert(dest);
+
+ for (i = 0; i < n; i++) {
+ CustomMount *m = mounts + i;
+
+ switch (m->type) {
+
+ case CUSTOM_MOUNT_BIND:
+ r = mount_bind(dest, m);
+ break;
+
+ case CUSTOM_MOUNT_TMPFS:
+ r = mount_tmpfs(dest, m, userns, uid_shift, uid_range, selinux_apifs_context);
+ break;
+
+ case CUSTOM_MOUNT_OVERLAY:
+ r = mount_overlay(dest, m);
+ break;
+
+ default:
+ assert_not_reached("Unknown custom mount type");
+ }
+
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy, bool read_only) {
+ char *to;
+ int r;
+
+ to = strjoina(dest, "/sys/fs/cgroup/", hierarchy);
+
+ r = path_is_mount_point(to, 0);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to);
+ if (r > 0)
+ return 0;
+
+ mkdir_p(to, 0755);
+
+ /* The superblock mount options of the mount point need to be
+ * identical to the hosts', and hence writable... */
+ if (mount("cgroup", to, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, controller) < 0)
+ return log_error_errno(errno, "Failed to mount to %s: %m", to);
+
+ /* ... hence let's only make the bind mount read-only, not the
+ * superblock. */
+ if (read_only) {
+ if (mount(NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
+ return log_error_errno(errno, "Failed to remount %s read-only: %m", to);
+ }
+ return 1;
+}
+
+static int mount_legacy_cgroups(
+ const char *dest,
+ bool userns, uid_t uid_shift, uid_t uid_range,
+ const char *selinux_apifs_context) {
+
+ _cleanup_set_free_free_ Set *controllers = NULL;
+ const char *cgroup_root;
+ int r;
+
+ cgroup_root = prefix_roota(dest, "/sys/fs/cgroup");
+
+ /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
+ r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
+ if (r == 0) {
+ _cleanup_free_ char *options = NULL;
+
+ r = tmpfs_patch_options("mode=755", userns, uid_shift, uid_range, selinux_apifs_context, &options);
+ if (r < 0)
+ return log_oom();
+
+ if (mount("tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options) < 0)
+ return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m");
+ }
+
+ if (cg_unified() > 0)
+ goto skip_controllers;
+
+ controllers = set_new(&string_hash_ops);
+ if (!controllers)
+ return log_oom();
+
+ r = cg_kernel_controllers(controllers);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine cgroup controllers: %m");
+
+ for (;;) {
+ _cleanup_free_ char *controller = NULL, *origin = NULL, *combined = NULL;
+
+ controller = set_steal_first(controllers);
+ if (!controller)
+ break;
+
+ origin = prefix_root("/sys/fs/cgroup/", controller);
+ if (!origin)
+ return log_oom();
+
+ r = readlink_malloc(origin, &combined);
+ if (r == -EINVAL) {
+ /* Not a symbolic link, but directly a single cgroup hierarchy */
+
+ r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);
+ if (r < 0)
+ return r;
+
+ } else if (r < 0)
+ return log_error_errno(r, "Failed to read link %s: %m", origin);
+ else {
+ _cleanup_free_ char *target = NULL;
+
+ target = prefix_root(dest, origin);
+ if (!target)
+ return log_oom();
+
+ /* A symbolic link, a combination of controllers in one hierarchy */
+
+ if (!filename_is_valid(combined)) {
+ log_warning("Ignoring invalid combined hierarchy %s.", combined);
+ continue;
+ }
+
+ r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);
+ if (r < 0)
+ return r;
+
+ r = symlink_idempotent(combined, target);
+ if (r == -EINVAL) {
+ log_error("Invalid existing symlink for combined hierarchy");
+ return r;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m");
+ }
+ }
+
+skip_controllers:
+ r = mount_legacy_cgroup_hierarchy(dest, "none,name=systemd,xattr", "systemd", false);
+ if (r < 0)
+ return r;
+
+ if (mount(NULL, cgroup_root, NULL, MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755") < 0)
+ return log_error_errno(errno, "Failed to remount %s read-only: %m", cgroup_root);
+
+ return 0;
+}
+
+static int mount_unified_cgroups(const char *dest) {
+ const char *p;
+ int r;
+
+ assert(dest);
+
+ p = strjoina(dest, "/sys/fs/cgroup");
+
+ r = path_is_mount_point(p, AT_SYMLINK_FOLLOW);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p);
+ if (r > 0) {
+ p = strjoina(dest, "/sys/fs/cgroup/cgroup.procs");
+ if (access(p, F_OK) >= 0)
+ return 0;
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to determine if mount point %s contains the unified cgroup hierarchy: %m", p);
+
+ log_error("%s is already mounted but not a unified cgroup hierarchy. Refusing.", p);
+ return -EINVAL;
+ }
+
+ if (mount("cgroup", p, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "__DEVEL__sane_behavior") < 0)
+ return log_error_errno(errno, "Failed to mount unified cgroup hierarchy to %s: %m", p);
+
+ return 0;
+}
+
+int mount_cgroups(
+ const char *dest,
+ bool unified_requested,
+ bool userns, uid_t uid_shift, uid_t uid_range,
+ const char *selinux_apifs_context) {
+
+ if (unified_requested)
+ return mount_unified_cgroups(dest);
+ else
+ return mount_legacy_cgroups(dest, userns, uid_shift, uid_range, selinux_apifs_context);
+}
+
+int mount_systemd_cgroup_writable(
+ const char *dest,
+ bool unified_requested) {
+
+ _cleanup_free_ char *own_cgroup_path = NULL;
+ const char *systemd_root, *systemd_own;
+ int r;
+
+ assert(dest);
+
+ r = cg_pid_get_path(NULL, 0, &own_cgroup_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine our own cgroup path: %m");
+
+ /* If we are living in the top-level, then there's nothing to do... */
+ if (path_equal(own_cgroup_path, "/"))
+ return 0;
+
+ if (unified_requested) {
+ systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path);
+ systemd_root = prefix_roota(dest, "/sys/fs/cgroup");
+ } else {
+ systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path);
+ systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd");
+ }
+
+ /* Make our own cgroup a (writable) bind mount */
+ if (mount(systemd_own, systemd_own, NULL, MS_BIND, NULL) < 0)
+ return log_error_errno(errno, "Failed to turn %s into a bind mount: %m", own_cgroup_path);
+
+ /* And then remount the systemd cgroup root read-only */
+ if (mount(NULL, systemd_root, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
+ return log_error_errno(errno, "Failed to mount cgroup root read-only: %m");
+
+ return 0;
+}
+
+int setup_volatile_state(
+ const char *directory,
+ VolatileMode mode,
+ bool userns, uid_t uid_shift, uid_t uid_range,
+ const char *selinux_apifs_context) {
+
+ _cleanup_free_ char *buf = NULL;
+ const char *p, *options;
+ int r;
+
+ assert(directory);
+
+ if (mode != VOLATILE_STATE)
+ return 0;
+
+ /* --volatile=state means we simply overmount /var
+ with a tmpfs, and the rest read-only. */
+
+ r = bind_remount_recursive(directory, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
+
+ p = prefix_roota(directory, "/var");
+ r = mkdir(p, 0755);
+ if (r < 0 && errno != EEXIST)
+ return log_error_errno(errno, "Failed to create %s: %m", directory);
+
+ options = "mode=755";
+ r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
+ if (r < 0)
+ return log_oom();
+ if (r > 0)
+ options = buf;
+
+ if (mount("tmpfs", p, "tmpfs", MS_STRICTATIME, options) < 0)
+ return log_error_errno(errno, "Failed to mount tmpfs to /var: %m");
+
+ return 0;
+}
+
+int setup_volatile(
+ const char *directory,
+ VolatileMode mode,
+ bool userns, uid_t uid_shift, uid_t uid_range,
+ const char *selinux_apifs_context) {
+
+ bool tmpfs_mounted = false, bind_mounted = false;
+ char template[] = "/tmp/nspawn-volatile-XXXXXX";
+ _cleanup_free_ char *buf = NULL;
+ const char *f, *t, *options;
+ int r;
+
+ assert(directory);
+
+ if (mode != VOLATILE_YES)
+ return 0;
+
+ /* --volatile=yes means we mount a tmpfs to the root dir, and
+ the original /usr to use inside it, and that read-only. */
+
+ if (!mkdtemp(template))
+ return log_error_errno(errno, "Failed to create temporary directory: %m");
+
+ options = "mode=755";
+ r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
+ if (r < 0)
+ return log_oom();
+ if (r > 0)
+ options = buf;
+
+ if (mount("tmpfs", template, "tmpfs", MS_STRICTATIME, options) < 0) {
+ r = log_error_errno(errno, "Failed to mount tmpfs for root directory: %m");
+ goto fail;
+ }
+
+ tmpfs_mounted = true;
+
+ f = prefix_roota(directory, "/usr");
+ t = prefix_roota(template, "/usr");
+
+ r = mkdir(t, 0755);
+ if (r < 0 && errno != EEXIST) {
+ r = log_error_errno(errno, "Failed to create %s: %m", t);
+ goto fail;
+ }
+
+ if (mount(f, t, NULL, MS_BIND|MS_REC, NULL) < 0) {
+ r = log_error_errno(errno, "Failed to create /usr bind mount: %m");
+ goto fail;
+ }
+
+ bind_mounted = true;
+
+ r = bind_remount_recursive(t, true);
+ if (r < 0) {
+ log_error_errno(r, "Failed to remount %s read-only: %m", t);
+ goto fail;
+ }
+
+ if (mount(template, directory, NULL, MS_MOVE, NULL) < 0) {
+ r = log_error_errno(errno, "Failed to move root mount: %m");
+ goto fail;
+ }
+
+ (void) rmdir(template);
+
+ return 0;
+
+fail:
+ if (bind_mounted)
+ (void) umount(t);
+
+ if (tmpfs_mounted)
+ (void) umount(template);
+ (void) rmdir(template);
+ return r;
+}
+
+VolatileMode volatile_mode_from_string(const char *s) {
+ int b;
+
+ if (isempty(s))
+ return _VOLATILE_MODE_INVALID;
+
+ b = parse_boolean(s);
+ if (b > 0)
+ return VOLATILE_YES;
+ if (b == 0)
+ return VOLATILE_NO;
+
+ if (streq(s, "state"))
+ return VOLATILE_STATE;
+
+ return _VOLATILE_MODE_INVALID;
+}
diff --git a/src/nspawn/nspawn-mount.h b/src/nspawn/nspawn-mount.h
new file mode 100644
index 0000000000..5abd44cc4b
--- /dev/null
+++ b/src/nspawn/nspawn-mount.h
@@ -0,0 +1,70 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+ This file is part of systemd.
+
+ Copyright 2015 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stdbool.h>
+
+typedef enum VolatileMode {
+ VOLATILE_NO,
+ VOLATILE_YES,
+ VOLATILE_STATE,
+ _VOLATILE_MODE_MAX,
+ _VOLATILE_MODE_INVALID = -1
+} VolatileMode;
+
+typedef enum CustomMountType {
+ CUSTOM_MOUNT_BIND,
+ CUSTOM_MOUNT_TMPFS,
+ CUSTOM_MOUNT_OVERLAY,
+ _CUSTOM_MOUNT_TYPE_MAX,
+ _CUSTOM_MOUNT_TYPE_INVALID = -1
+} CustomMountType;
+
+typedef struct CustomMount {
+ CustomMountType type;
+ bool read_only;
+ char *source; /* for overlayfs this is the upper directory */
+ char *destination;
+ char *options;
+ char *work_dir;
+ char **lower;
+} CustomMount;
+
+CustomMount* custom_mount_add(CustomMount **l, unsigned *n, CustomMountType t);
+
+void custom_mount_free_all(CustomMount *l, unsigned n);
+int bind_mount_parse(CustomMount **l, unsigned *n, const char *s, bool read_only);
+int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s);
+
+int custom_mount_compare(const void *a, const void *b);
+
+int mount_all(const char *dest, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
+
+int mount_cgroups(const char *dest, bool unified_requested, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
+int mount_systemd_cgroup_writable(const char *dest, bool unified_requested);
+
+int mount_custom(const char *dest, CustomMount *mounts, unsigned n, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
+
+int setup_volatile(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
+int setup_volatile_state(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
+
+VolatileMode volatile_mode_from_string(const char *s);
diff --git a/src/nspawn/nspawn-network.c b/src/nspawn/nspawn-network.c
new file mode 100644
index 0000000000..74abe5379a
--- /dev/null
+++ b/src/nspawn/nspawn-network.c
@@ -0,0 +1,440 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+ This file is part of systemd.
+
+ Copyright 2015 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <linux/veth.h>
+#include <net/if.h>
+
+#include "sd-id128.h"
+#include "sd-netlink.h"
+#include "libudev.h"
+
+#include "util.h"
+#include "ether-addr-util.h"
+#include "siphash24.h"
+#include "netlink-util.h"
+#include "udev-util.h"
+
+#include "nspawn-network.h"
+
+#define HOST_HASH_KEY SD_ID128_MAKE(1a,37,6f,c7,46,ec,45,0b,ad,a3,d5,31,06,60,5d,b1)
+#define CONTAINER_HASH_KEY SD_ID128_MAKE(c3,c4,f9,19,b5,57,b2,1c,e6,cf,14,27,03,9c,ee,a2)
+#define MACVLAN_HASH_KEY SD_ID128_MAKE(00,13,6d,bc,66,83,44,81,bb,0c,f9,51,1f,24,a6,6f)
+
+static int generate_mac(
+ const char *machine_name,
+ struct ether_addr *mac,
+ sd_id128_t hash_key,
+ uint64_t idx) {
+
+ uint8_t result[8];
+ size_t l, sz;
+ uint8_t *v, *i;
+ int r;
+
+ l = strlen(machine_name);
+ sz = sizeof(sd_id128_t) + l;
+ if (idx > 0)
+ sz += sizeof(idx);
+
+ v = alloca(sz);
+
+ /* fetch some persistent data unique to the host */
+ r = sd_id128_get_machine((sd_id128_t*) v);
+ if (r < 0)
+ return r;
+
+ /* combine with some data unique (on this host) to this
+ * container instance */
+ i = mempcpy(v + sizeof(sd_id128_t), machine_name, l);
+ if (idx > 0) {
+ idx = htole64(idx);
+ memcpy(i, &idx, sizeof(idx));
+ }
+
+ /* Let's hash the host machine ID plus the container name. We
+ * use a fixed, but originally randomly created hash key here. */
+ siphash24(result, v, sz, hash_key.bytes);
+
+ assert_cc(ETH_ALEN <= sizeof(result));
+ memcpy(mac->ether_addr_octet, result, ETH_ALEN);
+
+ /* see eth_random_addr in the kernel */
+ mac->ether_addr_octet[0] &= 0xfe; /* clear multicast bit */
+ mac->ether_addr_octet[0] |= 0x02; /* set local assignment bit (IEEE802) */
+
+ return 0;
+}
+
+int setup_veth(const char *machine_name,
+ pid_t pid,
+ char iface_name[IFNAMSIZ],
+ bool bridge) {
+
+ _cleanup_netlink_message_unref_ sd_netlink_message *m = NULL;
+ _cleanup_netlink_unref_ sd_netlink *rtnl = NULL;
+ struct ether_addr mac_host, mac_container;
+ int r, i;
+
+ /* Use two different interface name prefixes depending whether
+ * we are in bridge mode or not. */
+ snprintf(iface_name, IFNAMSIZ - 1, "%s-%s",
+ bridge ? "vb" : "ve", machine_name);
+
+ r = generate_mac(machine_name, &mac_container, CONTAINER_HASH_KEY, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate predictable MAC address for container side: %m");
+
+ r = generate_mac(machine_name, &mac_host, HOST_HASH_KEY, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate predictable MAC address for host side: %m");
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate netlink message: %m");
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, iface_name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface name: %m");
+
+ r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, &mac_host);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink MAC address: %m");
+
+ r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "veth");
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_open_container(m, VETH_INFO_PEER);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, "host0");
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface name: %m");
+
+ r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, &mac_container);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink MAC address: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink namespace field: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_call(rtnl, m, 0, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add new veth interfaces (host0, %s): %m", iface_name);
+
+ i = (int) if_nametoindex(iface_name);
+ if (i <= 0)
+ return log_error_errno(errno, "Failed to resolve interface %s: %m", iface_name);
+
+ return i;
+}
+
+int setup_bridge(const char *veth_name, const char *bridge_name) {
+ _cleanup_netlink_message_unref_ sd_netlink_message *m = NULL;
+ _cleanup_netlink_unref_ sd_netlink *rtnl = NULL;
+ int r, bridge_ifi;
+
+ assert(veth_name);
+ assert(bridge_name);
+
+ bridge_ifi = (int) if_nametoindex(bridge_name);
+ if (bridge_ifi <= 0)
+ return log_error_errno(errno, "Failed to resolve interface %s: %m", bridge_name);
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_SETLINK, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate netlink message: %m");
+
+ r = sd_rtnl_message_link_set_flags(m, IFF_UP, IFF_UP);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set IFF_UP flag: %m");
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, veth_name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface name field: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_MASTER, bridge_ifi);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink master field: %m");
+
+ r = sd_netlink_call(rtnl, m, 0, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add veth interface to bridge: %m");
+
+ return bridge_ifi;
+}
+
+static int parse_interface(struct udev *udev, const char *name) {
+ _cleanup_udev_device_unref_ struct udev_device *d = NULL;
+ char ifi_str[2 + DECIMAL_STR_MAX(int)];
+ int ifi;
+
+ ifi = (int) if_nametoindex(name);
+ if (ifi <= 0)
+ return log_error_errno(errno, "Failed to resolve interface %s: %m", name);
+
+ sprintf(ifi_str, "n%i", ifi);
+ d = udev_device_new_from_device_id(udev, ifi_str);
+ if (!d)
+ return log_error_errno(errno, "Failed to get udev device for interface %s: %m", name);
+
+ if (udev_device_get_is_initialized(d) <= 0) {
+ log_error("Network interface %s is not initialized yet.", name);
+ return -EBUSY;
+ }
+
+ return ifi;
+}
+
+int move_network_interfaces(pid_t pid, char **ifaces) {
+ _cleanup_udev_unref_ struct udev *udev = NULL;
+ _cleanup_netlink_unref_ sd_netlink *rtnl = NULL;
+ char **i;
+ int r;
+
+ if (strv_isempty(ifaces))
+ return 0;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ udev = udev_new();
+ if (!udev) {
+ log_error("Failed to connect to udev.");
+ return -ENOMEM;
+ }
+
+ STRV_FOREACH(i, ifaces) {
+ _cleanup_netlink_message_unref_ sd_netlink_message *m = NULL;
+ int ifi;
+
+ ifi = parse_interface(udev, *i);
+ if (ifi < 0)
+ return ifi;
+
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_SETLINK, ifi);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate netlink message: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to append namespace PID to netlink message: %m");
+
+ r = sd_netlink_call(rtnl, m, 0, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to move interface %s to namespace: %m", *i);
+ }
+
+ return 0;
+}
+
+int setup_macvlan(const char *machine_name, pid_t pid, char **ifaces) {
+ _cleanup_udev_unref_ struct udev *udev = NULL;
+ _cleanup_netlink_unref_ sd_netlink *rtnl = NULL;
+ unsigned idx = 0;
+ char **i;
+ int r;
+
+ if (strv_isempty(ifaces))
+ return 0;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ udev = udev_new();
+ if (!udev) {
+ log_error("Failed to connect to udev.");
+ return -ENOMEM;
+ }
+
+ STRV_FOREACH(i, ifaces) {
+ _cleanup_netlink_message_unref_ sd_netlink_message *m = NULL;
+ _cleanup_free_ char *n = NULL;
+ struct ether_addr mac;
+ int ifi;
+
+ ifi = parse_interface(udev, *i);
+ if (ifi < 0)
+ return ifi;
+
+ r = generate_mac(machine_name, &mac, MACVLAN_HASH_KEY, idx++);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create MACVLAN MAC address: %m");
+
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate netlink message: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_LINK, ifi);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface index: %m");
+
+ n = strappend("mv-", *i);
+ if (!n)
+ return log_oom();
+
+ strshorten(n, IFNAMSIZ-1);
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface name: %m");
+
+ r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, &mac);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink MAC address: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink namespace field: %m");
+
+ r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "macvlan");
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_MACVLAN_MODE, MACVLAN_MODE_BRIDGE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to append macvlan mode: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_call(rtnl, m, 0, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add new macvlan interfaces: %m");
+ }
+
+ return 0;
+}
+
+int setup_ipvlan(const char *machine_name, pid_t pid, char **ifaces) {
+ _cleanup_udev_unref_ struct udev *udev = NULL;
+ _cleanup_netlink_unref_ sd_netlink *rtnl = NULL;
+ char **i;
+ int r;
+
+ if (strv_isempty(ifaces))
+ return 0;
+
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ udev = udev_new();
+ if (!udev) {
+ log_error("Failed to connect to udev.");
+ return -ENOMEM;
+ }
+
+ STRV_FOREACH(i, ifaces) {
+ _cleanup_netlink_message_unref_ sd_netlink_message *m = NULL;
+ _cleanup_free_ char *n = NULL;
+ int ifi;
+
+ ifi = parse_interface(udev, *i);
+ if (ifi < 0)
+ return ifi;
+
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate netlink message: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_LINK, ifi);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface index: %m");
+
+ n = strappend("iv-", *i);
+ if (!n)
+ return log_oom();
+
+ strshorten(n, IFNAMSIZ-1);
+
+ r = sd_netlink_message_append_string(m, IFLA_IFNAME, n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface name: %m");
+
+ r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink namespace field: %m");
+
+ r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "ipvlan");
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_netlink_message_append_u16(m, IFLA_IPVLAN_MODE, IPVLAN_MODE_L2);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add ipvlan mode: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_netlink_call(rtnl, m, 0, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add new ipvlan interfaces: %m");
+ }
+
+ return 0;
+}
diff --git a/src/nspawn/nspawn-network.h b/src/nspawn/nspawn-network.h
new file mode 100644
index 0000000000..311e6d06cb
--- /dev/null
+++ b/src/nspawn/nspawn-network.h
@@ -0,0 +1,36 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+ This file is part of systemd.
+
+ Copyright 2015 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <net/if.h>
+
+#include <sys/types.h>
+#include <stdbool.h>
+
+int setup_veth(const char *machine_name, pid_t pid, char iface_name[IFNAMSIZ], bool bridge);
+
+int setup_bridge(const char *veth_name, const char *bridge_name);
+
+int setup_macvlan(const char *machine_name, pid_t pid, char **ifaces);
+int setup_ipvlan(const char *machine_name, pid_t pid, char **ifaces);
+
+int move_network_interfaces(pid_t pid, char **ifaces);
diff --git a/src/nspawn/nspawn-register.c b/src/nspawn/nspawn-register.c
new file mode 100644
index 0000000000..b2776a61c2
--- /dev/null
+++ b/src/nspawn/nspawn-register.c
@@ -0,0 +1,244 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+ This file is part of systemd.
+
+ Copyright 2015 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "sd-bus.h"
+
+#include "util.h"
+#include "strv.h"
+#include "bus-util.h"
+#include "bus-error.h"
+
+#include "nspawn-register.h"
+
+int register_machine(
+ const char *machine_name,
+ pid_t pid,
+ const char *directory,
+ sd_id128_t uuid,
+ int local_ifindex,
+ const char *slice,
+ CustomMount *mounts,
+ unsigned n_mounts,
+ int kill_signal,
+ char **properties,
+ bool keep_unit) {
+
+ _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_bus_flush_close_unref_ sd_bus *bus = NULL;
+ int r;
+
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open system bus: %m");
+
+ if (keep_unit) {
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "RegisterMachineWithNetwork",
+ &error,
+ NULL,
+ "sayssusai",
+ machine_name,
+ SD_BUS_MESSAGE_APPEND_ID128(uuid),
+ "nspawn",
+ "container",
+ (uint32_t) pid,
+ strempty(directory),
+ local_ifindex > 0 ? 1 : 0, local_ifindex);
+ } else {
+ _cleanup_bus_message_unref_ sd_bus_message *m = NULL;
+ char **i;
+ unsigned j;
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "CreateMachineWithNetwork");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(
+ m,
+ "sayssusai",
+ machine_name,
+ SD_BUS_MESSAGE_APPEND_ID128(uuid),
+ "nspawn",
+ "container",
+ (uint32_t) pid,
+ strempty(directory),
+ local_ifindex > 0 ? 1 : 0, local_ifindex);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (!isempty(slice)) {
+ r = sd_bus_message_append(m, "(sv)", "Slice", "s", slice);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "strict");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* If you make changes here, also make sure to update
+ * systemd-nspawn@.service, to keep the device
+ * policies in sync regardless if we are run with or
+ * without the --keep-unit switch. */
+ r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 9,
+ /* Allow the container to
+ * access and create the API
+ * device nodes, so that
+ * PrivateDevices= in the
+ * container can work
+ * fine */
+ "/dev/null", "rwm",
+ "/dev/zero", "rwm",
+ "/dev/full", "rwm",
+ "/dev/random", "rwm",
+ "/dev/urandom", "rwm",
+ "/dev/tty", "rwm",
+ "/dev/net/tun", "rwm",
+ /* Allow the container
+ * access to ptys. However,
+ * do not permit the
+ * container to ever create
+ * these device nodes. */
+ "/dev/pts/ptmx", "rw",
+ "char-pts", "rw");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (j = 0; j < n_mounts; j++) {
+ CustomMount *cm = mounts + j;
+
+ if (cm->type != CUSTOM_MOUNT_BIND)
+ continue;
+
+ r = is_device_node(cm->source);
+ if (r < 0)
+ return log_error_errno(r, "Failed to stat %s: %m", cm->source);
+
+ if (r) {
+ r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 1,
+ cm->source, cm->read_only ? "r" : "rw");
+ if (r < 0)
+ return log_error_errno(r, "Failed to append message arguments: %m");
+ }
+ }
+
+ if (kill_signal != 0) {
+ r = sd_bus_message_append(m, "(sv)", "KillSignal", "i", kill_signal);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "(sv)", "KillMode", "s", "mixed");
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ STRV_FOREACH(i, properties) {
+ r = sd_bus_message_open_container(m, 'r', "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = bus_append_unit_property_assignment(m, *i);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, NULL);
+ }
+
+ if (r < 0) {
+ log_error("Failed to register machine: %s", bus_error_message(&error, r));
+ return r;
+ }
+
+ return 0;
+}
+
+int terminate_machine(pid_t pid) {
+ _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_bus_message_unref_ sd_bus_message *reply = NULL;
+ _cleanup_bus_flush_close_unref_ sd_bus *bus = NULL;
+ const char *path;
+ int r;
+
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open system bus: %m");
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "GetMachineByPID",
+ &error,
+ &reply,
+ "u",
+ (uint32_t) pid);
+ if (r < 0) {
+ /* Note that the machine might already have been
+ * cleaned up automatically, hence don't consider it a
+ * failure if we cannot get the machine object. */
+ log_debug("Failed to get machine: %s", bus_error_message(&error, r));
+ return 0;
+ }
+
+ r = sd_bus_message_read(reply, "o", &path);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ path,
+ "org.freedesktop.machine1.Machine",
+ "Terminate",
+ &error,
+ NULL,
+ NULL);
+ if (r < 0) {
+ log_debug("Failed to terminate machine: %s", bus_error_message(&error, r));
+ return 0;
+ }
+
+ return 0;
+}
diff --git a/src/nspawn/nspawn-register.h b/src/nspawn/nspawn-register.h
new file mode 100644
index 0000000000..b27841ff59
--- /dev/null
+++ b/src/nspawn/nspawn-register.h
@@ -0,0 +1,31 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+ This file is part of systemd.
+
+ Copyright 2015 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/types.h>
+
+#include "sd-id128.h"
+
+#include "nspawn-mount.h"
+
+int register_machine(const char *machine_name, pid_t pid, const char *directory, sd_id128_t uuid, int local_ifindex, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties, bool keep_unit);
+int terminate_machine(pid_t pid);
diff --git a/src/nspawn/nspawn-settings.c b/src/nspawn/nspawn-settings.c
new file mode 100644
index 0000000000..419f5d1c40
--- /dev/null
+++ b/src/nspawn/nspawn-settings.c
@@ -0,0 +1,262 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+ This file is part of systemd.
+
+ Copyright 2015 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "util.h"
+#include "conf-parser.h"
+#include "strv.h"
+#include "cap-list.h"
+
+#include "nspawn-settings.h"
+
+int settings_load(FILE *f, const char *path, Settings **ret) {
+ _cleanup_(settings_freep) Settings *s = NULL;
+ int r;
+
+ assert(path);
+ assert(ret);
+
+ s = new0(Settings, 1);
+ if (!s)
+ return -ENOMEM;
+
+ s->boot = -1;
+ s->personality = PERSONALITY_INVALID;
+
+ s->read_only = -1;
+ s->volatile_mode = _VOLATILE_MODE_INVALID;
+
+ s->private_network = -1;
+ s->network_veth = -1;
+
+ r = config_parse(NULL, path, f,
+ "Exec\0"
+ "Network\0"
+ "Files\0",
+ config_item_perf_lookup, nspawn_gperf_lookup,
+ false,
+ false,
+ true,
+ s);
+ if (r < 0)
+ return r;
+
+ *ret = s;
+ s = NULL;
+
+ return 0;
+}
+
+Settings* settings_free(Settings *s) {
+
+ if (!s)
+ return NULL;
+
+ strv_free(s->parameters);
+ strv_free(s->environment);
+ free(s->user);
+
+ strv_free(s->network_interfaces);
+ strv_free(s->network_macvlan);
+ strv_free(s->network_ipvlan);
+ free(s->network_bridge);
+ expose_port_free_all(s->expose_ports);
+
+ custom_mount_free_all(s->custom_mounts, s->n_custom_mounts);
+ free(s);
+
+ return NULL;
+}
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_volatile_mode, volatile_mode, VolatileMode, "Failed to parse volatile mode");
+
+int config_parse_expose_port(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *s = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = expose_port_parse(&s->expose_ports, rvalue);
+ if (r == -EEXIST) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Duplicate port specification, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse host port %s: %m", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_capability(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t u = 0, *result = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ int cap;
+
+ r = extract_first_word(&rvalue, &word, NULL, 0);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to extract capability string, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ break;
+
+ cap = capability_from_name(word);
+ if (cap < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, cap, "Failed to parse capability, ignoring: %s", word);
+ continue;
+ }
+
+ u |= 1 << ((uint64_t) cap);
+ }
+
+ if (u == 0)
+ return 0;
+
+ *result |= u;
+ return 0;
+}
+
+int config_parse_id128(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ sd_id128_t t, *result = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = sd_id128_from_string(rvalue, &t);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse 128bit ID/UUID, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *result = t;
+ return 0;
+}
+
+int config_parse_bind(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = bind_mount_parse(&settings->custom_mounts, &settings->n_custom_mounts, rvalue, ltype);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid bind mount specification %s: %m", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_tmpfs(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Settings *settings = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = tmpfs_mount_parse(&settings->custom_mounts, &settings->n_custom_mounts, rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid temporary file system specification %s: %m", rvalue);
+ return 0;
+ }
+
+ if (settings->network_bridge)
+ settings->network_veth = true;
+
+ if (settings->network_interfaces ||
+ settings->network_macvlan ||
+ settings->network_ipvlan ||
+ settings->network_bridge ||
+ settings->network_veth)
+ settings->private_network = true;
+
+ return 0;
+}
diff --git a/src/nspawn/nspawn-settings.h b/src/nspawn/nspawn-settings.h
new file mode 100644
index 0000000000..4cec40c1b7
--- /dev/null
+++ b/src/nspawn/nspawn-settings.h
@@ -0,0 +1,87 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+ This file is part of systemd.
+
+ Copyright 2015 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stdio.h>
+
+#include "macro.h"
+
+#include "nspawn-mount.h"
+#include "nspawn-expose-ports.h"
+
+typedef enum SettingsMask {
+ SETTING_BOOT = 1 << 0,
+ SETTING_ENVIRONMENT = 1 << 1,
+ SETTING_USER = 1 << 2,
+ SETTING_CAPABILITY = 1 << 3,
+ SETTING_KILL_SIGNAL = 1 << 4,
+ SETTING_PERSONALITY = 1 << 5,
+ SETTING_MACHINE_ID = 1 << 6,
+ SETTING_NETWORK = 1 << 7,
+ SETTING_EXPOSE_PORTS = 1 << 8,
+ SETTING_READ_ONLY = 1 << 9,
+ SETTING_VOLATILE_MODE = 1 << 10,
+ SETTING_CUSTOM_MOUNTS = 1 << 11,
+ _SETTINGS_MASK_ALL = (1 << 12) -1
+} SettingsMask;
+
+typedef struct Settings {
+ /* [Run] */
+ int boot;
+ char **parameters;
+ char **environment;
+ char *user;
+ uint64_t capability;
+ uint64_t drop_capability;
+ int kill_signal;
+ unsigned long personality;
+ sd_id128_t machine_id;
+
+ /* [Image] */
+ int read_only;
+ VolatileMode volatile_mode;
+ CustomMount *custom_mounts;
+ unsigned n_custom_mounts;
+
+ /* [Network] */
+ int private_network;
+ int network_veth;
+ char *network_bridge;
+ char **network_interfaces;
+ char **network_macvlan;
+ char **network_ipvlan;
+ ExposePort *expose_ports;
+} Settings;
+
+int settings_load(FILE *f, const char *path, Settings **ret);
+Settings* settings_free(Settings *s);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Settings*, settings_free);
+
+const struct ConfigPerfItem* nspawn_gperf_lookup(const char *key, unsigned length);
+
+int config_parse_capability(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_id128(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_expose_port(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_volatile_mode(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_bind(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_tmpfs(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
diff --git a/src/nspawn/nspawn-setuid.c b/src/nspawn/nspawn-setuid.c
new file mode 100644
index 0000000000..eda7f62900
--- /dev/null
+++ b/src/nspawn/nspawn-setuid.c
@@ -0,0 +1,272 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+ This file is part of systemd.
+
+ Copyright 2015 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <grp.h>
+
+#include "util.h"
+#include "signal-util.h"
+#include "mkdir.h"
+#include "process-util.h"
+
+#include "nspawn-setuid.h"
+
+static int spawn_getent(const char *database, const char *key, pid_t *rpid) {
+ int pipe_fds[2];
+ pid_t pid;
+
+ assert(database);
+ assert(key);
+ assert(rpid);
+
+ if (pipe2(pipe_fds, O_CLOEXEC) < 0)
+ return log_error_errno(errno, "Failed to allocate pipe: %m");
+
+ pid = fork();
+ if (pid < 0)
+ return log_error_errno(errno, "Failed to fork getent child: %m");
+ else if (pid == 0) {
+ int nullfd;
+ char *empty_env = NULL;
+
+ if (dup3(pipe_fds[1], STDOUT_FILENO, 0) < 0)
+ _exit(EXIT_FAILURE);
+
+ if (pipe_fds[0] > 2)
+ safe_close(pipe_fds[0]);
+ if (pipe_fds[1] > 2)
+ safe_close(pipe_fds[1]);
+
+ nullfd = open("/dev/null", O_RDWR);
+ if (nullfd < 0)
+ _exit(EXIT_FAILURE);
+
+ if (dup3(nullfd, STDIN_FILENO, 0) < 0)
+ _exit(EXIT_FAILURE);
+
+ if (dup3(nullfd, STDERR_FILENO, 0) < 0)
+ _exit(EXIT_FAILURE);
+
+ if (nullfd > 2)
+ safe_close(nullfd);
+
+ (void) reset_all_signal_handlers();
+ (void) reset_signal_mask();
+ close_all_fds(NULL, 0);
+
+ execle("/usr/bin/getent", "getent", database, key, NULL, &empty_env);
+ execle("/bin/getent", "getent", database, key, NULL, &empty_env);
+ _exit(EXIT_FAILURE);
+ }
+
+ pipe_fds[1] = safe_close(pipe_fds[1]);
+
+ *rpid = pid;
+
+ return pipe_fds[0];
+}
+
+int change_uid_gid(const char *user, char **_home) {
+ char line[LINE_MAX], *x, *u, *g, *h;
+ const char *word, *state;
+ _cleanup_free_ uid_t *uids = NULL;
+ _cleanup_free_ char *home = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_close_ int fd = -1;
+ unsigned n_uids = 0;
+ size_t sz = 0, l;
+ uid_t uid;
+ gid_t gid;
+ pid_t pid;
+ int r;
+
+ assert(_home);
+
+ if (!user || streq(user, "root") || streq(user, "0")) {
+ /* Reset everything fully to 0, just in case */
+
+ r = reset_uid_gid();
+ if (r < 0)
+ return log_error_errno(r, "Failed to become root: %m");
+
+ *_home = NULL;
+ return 0;
+ }
+
+ /* First, get user credentials */
+ fd = spawn_getent("passwd", user, &pid);
+ if (fd < 0)
+ return fd;
+
+ f = fdopen(fd, "r");
+ if (!f)
+ return log_oom();
+ fd = -1;
+
+ if (!fgets(line, sizeof(line), f)) {
+
+ if (!ferror(f)) {
+ log_error("Failed to resolve user %s.", user);
+ return -ESRCH;
+ }
+
+ log_error_errno(errno, "Failed to read from getent: %m");
+ return -errno;
+ }
+
+ truncate_nl(line);
+
+ wait_for_terminate_and_warn("getent passwd", pid, true);
+
+ x = strchr(line, ':');
+ if (!x) {
+ log_error("/etc/passwd entry has invalid user field.");
+ return -EIO;
+ }
+
+ u = strchr(x+1, ':');
+ if (!u) {
+ log_error("/etc/passwd entry has invalid password field.");
+ return -EIO;
+ }
+
+ u++;
+ g = strchr(u, ':');
+ if (!g) {
+ log_error("/etc/passwd entry has invalid UID field.");
+ return -EIO;
+ }
+
+ *g = 0;
+ g++;
+ x = strchr(g, ':');
+ if (!x) {
+ log_error("/etc/passwd entry has invalid GID field.");
+ return -EIO;
+ }
+
+ *x = 0;
+ h = strchr(x+1, ':');
+ if (!h) {
+ log_error("/etc/passwd entry has invalid GECOS field.");
+ return -EIO;
+ }
+
+ h++;
+ x = strchr(h, ':');
+ if (!x) {
+ log_error("/etc/passwd entry has invalid home directory field.");
+ return -EIO;
+ }
+
+ *x = 0;
+
+ r = parse_uid(u, &uid);
+ if (r < 0) {
+ log_error("Failed to parse UID of user.");
+ return -EIO;
+ }
+
+ r = parse_gid(g, &gid);
+ if (r < 0) {
+ log_error("Failed to parse GID of user.");
+ return -EIO;
+ }
+
+ home = strdup(h);
+ if (!home)
+ return log_oom();
+
+ /* Second, get group memberships */
+ fd = spawn_getent("initgroups", user, &pid);
+ if (fd < 0)
+ return fd;
+
+ fclose(f);
+ f = fdopen(fd, "r");
+ if (!f)
+ return log_oom();
+ fd = -1;
+
+ if (!fgets(line, sizeof(line), f)) {
+ if (!ferror(f)) {
+ log_error("Failed to resolve user %s.", user);
+ return -ESRCH;
+ }
+
+ log_error_errno(errno, "Failed to read from getent: %m");
+ return -errno;
+ }
+
+ truncate_nl(line);
+
+ wait_for_terminate_and_warn("getent initgroups", pid, true);
+
+ /* Skip over the username and subsequent separator whitespace */
+ x = line;
+ x += strcspn(x, WHITESPACE);
+ x += strspn(x, WHITESPACE);
+
+ FOREACH_WORD(word, l, x, state) {
+ char c[l+1];
+
+ memcpy(c, word, l);
+ c[l] = 0;
+
+ if (!GREEDY_REALLOC(uids, sz, n_uids+1))
+ return log_oom();
+
+ r = parse_uid(c, &uids[n_uids++]);
+ if (r < 0) {
+ log_error("Failed to parse group data from getent.");
+ return -EIO;
+ }
+ }
+
+ r = mkdir_parents(home, 0775);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make home root directory: %m");
+
+ r = mkdir_safe(home, 0755, uid, gid);
+ if (r < 0 && r != -EEXIST)
+ return log_error_errno(r, "Failed to make home directory: %m");
+
+ (void) fchown(STDIN_FILENO, uid, gid);
+ (void) fchown(STDOUT_FILENO, uid, gid);
+ (void) fchown(STDERR_FILENO, uid, gid);
+
+ if (setgroups(n_uids, uids) < 0)
+ return log_error_errno(errno, "Failed to set auxiliary groups: %m");
+
+ if (setresgid(gid, gid, gid) < 0)
+ return log_error_errno(errno, "setregid() failed: %m");
+
+ if (setresuid(uid, uid, uid) < 0)
+ return log_error_errno(errno, "setreuid() failed: %m");
+
+ if (_home) {
+ *_home = home;
+ home = NULL;
+ }
+
+ return 0;
+}
diff --git a/src/nspawn/nspawn-setuid.h b/src/nspawn/nspawn-setuid.h
new file mode 100644
index 0000000000..33be44a946
--- /dev/null
+++ b/src/nspawn/nspawn-setuid.h
@@ -0,0 +1,24 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+ This file is part of systemd.
+
+ Copyright 2015 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+int change_uid_gid(const char *user, char **ret);
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 1c64c3e771..33943a4b2f 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -19,95 +19,78 @@
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
-#include <signal.h>
-#include <sched.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mount.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
+#ifdef HAVE_BLKID
+#include <blkid/blkid.h>
+#endif
#include <errno.h>
-#include <sys/prctl.h>
#include <getopt.h>
-#include <grp.h>
-#include <linux/fs.h>
-#include <sys/socket.h>
-#include <linux/netlink.h>
-#include <net/if.h>
-#include <linux/veth.h>
-#include <sys/personality.h>
#include <linux/loop.h>
-#include <sys/file.h>
-
-#ifdef HAVE_SELINUX
-#include <selinux/selinux.h>
-#endif
-
+#include <sched.h>
#ifdef HAVE_SECCOMP
#include <seccomp.h>
#endif
-
-#ifdef HAVE_BLKID
-#include <blkid/blkid.h>
+#ifdef HAVE_SELINUX
+#include <selinux/selinux.h>
#endif
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/mount.h>
+#include <sys/personality.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <unistd.h>
#include "sd-daemon.h"
-#include "sd-bus.h"
#include "sd-id128.h"
-#include "sd-netlink.h"
-#include "random-util.h"
-#include "log.h"
-#include "util.h"
-#include "mkdir.h"
-#include "rm-rf.h"
-#include "macro.h"
-#include "missing.h"
+
+#include "barrier.h"
+#include "base-filesystem.h"
+#include "blkid-util.h"
+#include "btrfs-util.h"
+#include "build.h"
+#include "cap-list.h"
+#include "capability.h"
#include "cgroup-util.h"
-#include "strv.h"
-#include "path-util.h"
-#include "loopback-setup.h"
+#include "copy.h"
#include "dev-setup.h"
+#include "env-util.h"
+#include "event-util.h"
#include "fdset.h"
-#include "build.h"
#include "fileio.h"
-#include "bus-util.h"
-#include "bus-error.h"
-#include "ptyfwd.h"
-#include "env-util.h"
-#include "netlink-util.h"
-#include "udev-util.h"
-#include "blkid-util.h"
+#include "formats-util.h"
#include "gpt.h"
-#include "siphash24.h"
-#include "copy.h"
-#include "base-filesystem.h"
-#include "barrier.h"
-#include "event-util.h"
-#include "capability.h"
-#include "cap-list.h"
-#include "btrfs-util.h"
+#include "hostname-util.h"
+#include "log.h"
+#include "loopback-setup.h"
#include "machine-image.h"
-#include "list.h"
-#include "in-addr-util.h"
-#include "firewall-util.h"
-#include "local-addresses.h"
-#include "formats-util.h"
+#include "macro.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "netlink-util.h"
+#include "path-util.h"
#include "process-util.h"
-#include "terminal-util.h"
-#include "hostname-util.h"
-#include "signal-util.h"
-
+#include "ptyfwd.h"
+#include "random-util.h"
+#include "rm-rf.h"
#ifdef HAVE_SECCOMP
#include "seccomp-util.h"
#endif
+#include "signal-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "udev-util.h"
+#include "util.h"
-typedef struct ExposePort {
- int protocol;
- uint16_t host_port;
- uint16_t container_port;
- LIST_FIELDS(struct ExposePort, ports);
-} ExposePort;
+#include "nspawn-settings.h"
+#include "nspawn-mount.h"
+#include "nspawn-network.h"
+#include "nspawn-expose-ports.h"
+#include "nspawn-cgroup.h"
+#include "nspawn-register.h"
+#include "nspawn-setuid.h"
typedef enum ContainerStatus {
CONTAINER_TERMINATED,
@@ -121,28 +104,6 @@ typedef enum LinkJournal {
LINK_GUEST
} LinkJournal;
-typedef enum Volatile {
- VOLATILE_NO,
- VOLATILE_YES,
- VOLATILE_STATE,
-} Volatile;
-
-typedef enum CustomMountType {
- CUSTOM_MOUNT_BIND,
- CUSTOM_MOUNT_TMPFS,
- CUSTOM_MOUNT_OVERLAY,
-} CustomMountType;
-
-typedef struct CustomMount {
- CustomMountType type;
- bool read_only;
- char *source; /* for overlayfs this is the upper directory */
- char *destination;
- char *options;
- char *work_dir;
- char **lower;
-} CustomMount;
-
static char *arg_directory = NULL;
static char *arg_template = NULL;
static char *arg_user = NULL;
@@ -195,16 +156,19 @@ static char **arg_network_interfaces = NULL;
static char **arg_network_macvlan = NULL;
static char **arg_network_ipvlan = NULL;
static bool arg_network_veth = false;
-static const char *arg_network_bridge = NULL;
+static char *arg_network_bridge = NULL;
static unsigned long arg_personality = PERSONALITY_INVALID;
static char *arg_image = NULL;
-static Volatile arg_volatile = VOLATILE_NO;
+static VolatileMode arg_volatile_mode = VOLATILE_NO;
static ExposePort *arg_expose_ports = NULL;
static char **arg_property = NULL;
static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U;
static bool arg_userns = false;
static int arg_kill_signal = 0;
static bool arg_unified_cgroup_hierarchy = false;
+static SettingsMask arg_settings_mask = 0;
+static int arg_settings_trusted = -1;
+static char **arg_parameters = NULL;
static void help(void) {
printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
@@ -275,62 +239,10 @@ static void help(void) {
" --keep-unit Do not register a scope for the machine, reuse\n"
" the service unit nspawn is running in\n"
" --volatile[=MODE] Run the system in volatile mode\n"
+ " --settings=BOOLEAN Load additional settings from .nspawn file\n"
, program_invocation_short_name);
}
-static CustomMount* custom_mount_add(CustomMountType t) {
- CustomMount *c, *ret;
-
- c = realloc(arg_custom_mounts, (arg_n_custom_mounts + 1) * sizeof(CustomMount));
- if (!c)
- return NULL;
-
- arg_custom_mounts = c;
- ret = arg_custom_mounts + arg_n_custom_mounts;
- arg_n_custom_mounts++;
-
- *ret = (CustomMount) { .type = t };
-
- return ret;
-}
-
-static void custom_mount_free_all(void) {
- unsigned i;
-
- for (i = 0; i < arg_n_custom_mounts; i++) {
- CustomMount *m = &arg_custom_mounts[i];
-
- free(m->source);
- free(m->destination);
- free(m->options);
-
- if (m->work_dir) {
- (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
- free(m->work_dir);
- }
-
- strv_free(m->lower);
- }
-
- arg_custom_mounts = mfree(arg_custom_mounts);
- arg_n_custom_mounts = 0;
-}
-
-static int custom_mount_compare(const void *a, const void *b) {
- const CustomMount *x = a, *y = b;
- int r;
-
- r = path_compare(x->destination, y->destination);
- if (r != 0)
- return r;
-
- if (x->type < y->type)
- return -1;
- if (x->type > y->type)
- return 1;
-
- return 0;
-}
static int custom_mounts_prepare(void) {
unsigned i;
@@ -439,6 +351,7 @@ static int parse_argv(int argc, char *argv[]) {
ARG_PROPERTY,
ARG_PRIVATE_USERS,
ARG_KILL_SIGNAL,
+ ARG_SETTINGS,
};
static const struct option options[] = {
@@ -481,11 +394,13 @@ static int parse_argv(int argc, char *argv[]) {
{ "property", required_argument, NULL, ARG_PROPERTY },
{ "private-users", optional_argument, NULL, ARG_PRIVATE_USERS },
{ "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL },
+ { "settings", required_argument, NULL, ARG_SETTINGS },
{}
};
int c, r;
uint64_t plus = 0, minus = 0;
+ bool mask_all_settings = false, mask_no_settings = false;
assert(argc >= 0);
assert(argv);
@@ -533,16 +448,20 @@ static int parse_argv(int argc, char *argv[]) {
if (r < 0)
return log_oom();
+ arg_settings_mask |= SETTING_USER;
break;
case ARG_NETWORK_BRIDGE:
- arg_network_bridge = optarg;
+ r = free_and_strdup(&arg_network_bridge, optarg);
+ if (r < 0)
+ return log_oom();
/* fall through */
case 'n':
arg_network_veth = true;
arg_private_network = true;
+ arg_settings_mask |= SETTING_NETWORK;
break;
case ARG_NETWORK_INTERFACE:
@@ -550,6 +469,7 @@ static int parse_argv(int argc, char *argv[]) {
return log_oom();
arg_private_network = true;
+ arg_settings_mask |= SETTING_NETWORK;
break;
case ARG_NETWORK_MACVLAN:
@@ -557,6 +477,7 @@ static int parse_argv(int argc, char *argv[]) {
return log_oom();
arg_private_network = true;
+ arg_settings_mask |= SETTING_NETWORK;
break;
case ARG_NETWORK_IPVLAN:
@@ -567,10 +488,12 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_PRIVATE_NETWORK:
arg_private_network = true;
+ arg_settings_mask |= SETTING_NETWORK;
break;
case 'b':
arg_boot = true;
+ arg_settings_mask |= SETTING_BOOT;
break;
case ARG_UUID:
@@ -579,6 +502,8 @@ static int parse_argv(int argc, char *argv[]) {
log_error("Invalid UUID: %s", optarg);
return r;
}
+
+ arg_settings_mask |= SETTING_MACHINE_ID;
break;
case 'S':
@@ -611,6 +536,7 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_READ_ONLY:
arg_read_only = true;
+ arg_settings_mask |= SETTING_READ_ONLY;
break;
case ARG_CAPABILITY:
@@ -646,6 +572,7 @@ static int parse_argv(int argc, char *argv[]) {
}
}
+ arg_settings_mask |= SETTING_CAPABILITY;
break;
}
@@ -681,83 +608,21 @@ static int parse_argv(int argc, char *argv[]) {
break;
case ARG_BIND:
- case ARG_BIND_RO: {
- const char *current = optarg;
- _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL;
- CustomMount *m;
-
- r = extract_many_words(&current, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, &opts, NULL);
- switch (r) {
- case 1:
- destination = strdup(source);
- case 2:
- case 3:
- break;
- case -ENOMEM:
- return log_oom();
- default:
- log_error("Invalid bind mount specification: %s", optarg);
- return -EINVAL;
- }
-
- if (!source || !destination)
- return log_oom();
-
- if (!path_is_absolute(source) || !path_is_absolute(destination)) {
- log_error("Invalid bind mount specification: %s", optarg);
- return -EINVAL;
- }
-
- m = custom_mount_add(CUSTOM_MOUNT_BIND);
- if (!m)
- return log_oom();
-
- m->source = source;
- m->destination = destination;
- m->read_only = c == ARG_BIND_RO;
- m->options = opts;
-
- source = destination = opts = NULL;
+ case ARG_BIND_RO:
+ r = bind_mount_parse(&arg_custom_mounts, &arg_n_custom_mounts, optarg, c == ARG_BIND_RO);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --bind(-ro)= argument %s: %m", optarg);
+ arg_settings_mask |= SETTING_CUSTOM_MOUNTS;
break;
- }
-
- case ARG_TMPFS: {
- const char *current = optarg;
- _cleanup_free_ char *path = NULL, *opts = NULL;
- CustomMount *m;
- r = extract_first_word(&current, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
- if (r == -ENOMEM)
- return log_oom();
- else if (r < 0) {
- log_error("Invalid tmpfs specification: %s", optarg);
- return r;
- }
- if (r)
- opts = strdup(current);
- else
- opts = strdup("mode=0755");
-
- if (!path || !opts)
- return log_oom();
-
- if (!path_is_absolute(path)) {
- log_error("Invalid tmpfs specification: %s", optarg);
- return -EINVAL;
- }
-
- m = custom_mount_add(CUSTOM_MOUNT_TMPFS);
- if (!m)
- return log_oom();
-
- m->destination = path;
- m->options = opts;
-
- path = opts = NULL;
+ case ARG_TMPFS:
+ r = tmpfs_mount_parse(&arg_custom_mounts, &arg_n_custom_mounts, optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse --tmpfs= argument %s: %m", optarg);
+ arg_settings_mask |= SETTING_CUSTOM_MOUNTS;
break;
- }
case ARG_OVERLAY:
case ARG_OVERLAY_RO: {
@@ -808,7 +673,7 @@ static int parse_argv(int argc, char *argv[]) {
lower[n - 2] = NULL;
}
- m = custom_mount_add(CUSTOM_MOUNT_OVERLAY);
+ m = custom_mount_add(&arg_custom_mounts, &arg_n_custom_mounts, CUSTOM_MOUNT_OVERLAY);
if (!m)
return log_oom();
@@ -820,6 +685,7 @@ static int parse_argv(int argc, char *argv[]) {
upper = destination = NULL;
lower = NULL;
+ arg_settings_mask |= SETTING_CUSTOM_MOUNTS;
break;
}
@@ -837,6 +703,8 @@ static int parse_argv(int argc, char *argv[]) {
strv_free(arg_setenv);
arg_setenv = n;
+
+ arg_settings_mask |= SETTING_ENVIRONMENT;
break;
}
@@ -870,85 +738,36 @@ static int parse_argv(int argc, char *argv[]) {
return -EINVAL;
}
+ arg_settings_mask |= SETTING_PERSONALITY;
break;
case ARG_VOLATILE:
if (!optarg)
- arg_volatile = VOLATILE_YES;
- else {
- r = parse_boolean(optarg);
- if (r < 0) {
- if (streq(optarg, "state"))
- arg_volatile = VOLATILE_STATE;
- else {
- log_error("Failed to parse --volatile= argument: %s", optarg);
- return r;
- }
- } else
- arg_volatile = r ? VOLATILE_YES : VOLATILE_NO;
- }
-
- break;
-
- case 'p': {
- const char *split, *e;
- uint16_t container_port, host_port;
- int protocol;
- ExposePort *p;
-
- if ((e = startswith(optarg, "tcp:")))
- protocol = IPPROTO_TCP;
- else if ((e = startswith(optarg, "udp:")))
- protocol = IPPROTO_UDP;
+ arg_volatile_mode = VOLATILE_YES;
else {
- e = optarg;
- protocol = IPPROTO_TCP;
- }
-
- split = strchr(e, ':');
- if (split) {
- char v[split - e + 1];
-
- memcpy(v, e, split - e);
- v[split - e] = 0;
-
- r = safe_atou16(v, &host_port);
- if (r < 0 || host_port <= 0) {
- log_error("Failed to parse host port: %s", optarg);
- return -EINVAL;
- }
-
- r = safe_atou16(split + 1, &container_port);
- } else {
- r = safe_atou16(e, &container_port);
- host_port = container_port;
- }
+ VolatileMode m;
- if (r < 0 || container_port <= 0) {
- log_error("Failed to parse host port: %s", optarg);
- return -EINVAL;
- }
-
- LIST_FOREACH(ports, p, arg_expose_ports) {
- if (p->protocol == protocol && p->host_port == host_port) {
- log_error("Duplicate port specification: %s", optarg);
+ m = volatile_mode_from_string(optarg);
+ if (m < 0) {
+ log_error("Failed to parse --volatile= argument: %s", optarg);
return -EINVAL;
- }
+ } else
+ arg_volatile_mode = m;
}
- p = new(ExposePort, 1);
- if (!p)
- return log_oom();
-
- p->protocol = protocol;
- p->host_port = host_port;
- p->container_port = container_port;
+ arg_settings_mask |= SETTING_VOLATILE_MODE;
+ break;
- LIST_PREPEND(ports, arg_expose_ports, p);
+ case 'p':
+ r = expose_port_parse(&arg_expose_ports, optarg);
+ if (r == -EEXIST)
+ return log_error_errno(r, "Duplicate port specification: %s", optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse host port %s: %m", optarg);
+ arg_settings_mask |= SETTING_EXPOSE_PORTS;
break;
- }
case ARG_PROPERTY:
if (strv_extend(&arg_property, optarg) < 0)
@@ -992,6 +811,42 @@ static int parse_argv(int argc, char *argv[]) {
return -EINVAL;
}
+ arg_settings_mask |= SETTING_KILL_SIGNAL;
+ break;
+
+ case ARG_SETTINGS:
+
+ /* no → do not read files
+ * yes → read files, do not override cmdline, trust only subset
+ * override → read files, override cmdline, trust only subset
+ * trusted → read files, do not override cmdline, trust all
+ */
+
+ r = parse_boolean(optarg);
+ if (r < 0) {
+ if (streq(optarg, "trusted")) {
+ mask_all_settings = false;
+ mask_no_settings = false;
+ arg_settings_trusted = true;
+
+ } else if (streq(optarg, "override")) {
+ mask_all_settings = false;
+ mask_no_settings = true;
+ arg_settings_trusted = -1;
+ } else
+ return log_error_errno(r, "Failed to parse --settings= argument: %s", optarg);
+ } else if (r > 0) {
+ /* yes */
+ mask_all_settings = false;
+ mask_no_settings = false;
+ arg_settings_trusted = -1;
+ } else {
+ /* no */
+ mask_all_settings = true;
+ mask_no_settings = false;
+ arg_settings_trusted = false;
+ }
+
break;
case '?':
@@ -1044,557 +899,48 @@ static int parse_argv(int argc, char *argv[]) {
return -EINVAL;
}
- if (arg_volatile != VOLATILE_NO && arg_read_only) {
- log_error("Cannot combine --read-only with --volatile. Note that --volatile already implies a read-only base hierarchy.");
- return -EINVAL;
- }
-
- if (arg_expose_ports && !arg_private_network) {
- log_error("Cannot use --port= without private networking.");
- return -EINVAL;
- }
-
if (arg_userns && access("/proc/self/uid_map", F_OK) < 0)
return log_error_errno(EOPNOTSUPP, "--private-users= is not supported, kernel compiled without user namespace support.");
- arg_retain = (arg_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus;
-
- if (arg_boot && arg_kill_signal <= 0)
- arg_kill_signal = SIGRTMIN+3;
-
- r = detect_unified_cgroup_hierarchy();
- if (r < 0)
- return r;
-
- return 1;
-}
-
-static int tmpfs_patch_options(const char *options, char **ret) {
- char *buf = NULL;
-
- if (arg_userns && arg_uid_shift != 0) {
- assert(arg_uid_shift != UID_INVALID);
-
- if (options)
- (void) asprintf(&buf, "%s,uid=" UID_FMT ",gid=" UID_FMT, options, arg_uid_shift, arg_uid_shift);
- else
- (void) asprintf(&buf, "uid=" UID_FMT ",gid=" UID_FMT, arg_uid_shift, arg_uid_shift);
- if (!buf)
- return -ENOMEM;
-
- options = buf;
- }
-
-#ifdef HAVE_SELINUX
- if (arg_selinux_apifs_context) {
- char *t;
-
- if (options)
- t = strjoin(options, ",context=\"", arg_selinux_apifs_context, "\"", NULL);
- else
- t = strjoin("context=\"", arg_selinux_apifs_context, "\"", NULL);
- if (!t) {
- free(buf);
- return -ENOMEM;
- }
-
- free(buf);
- buf = t;
- }
-#endif
-
- *ret = buf;
- return !!buf;
-}
-
-static int mount_all(const char *dest, bool userns) {
-
- typedef struct MountPoint {
- const char *what;
- const char *where;
- const char *type;
- const char *options;
- unsigned long flags;
- bool fatal;
- bool userns;
- } MountPoint;
-
- static const MountPoint mount_table[] = {
- { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true },
- { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true }, /* Bind mount first */
- { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true }, /* Then, make it r/o */
- { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false },
- { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false },
- { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false },
- { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false },
- { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false },
-#ifdef HAVE_SELINUX
- { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false }, /* Bind mount first */
- { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false }, /* Then, make it r/o */
-#endif
- };
-
- unsigned k;
- int r;
-
- for (k = 0; k < ELEMENTSOF(mount_table); k++) {
- _cleanup_free_ char *where = NULL, *options = NULL;
- const char *o;
-
- if (userns != mount_table[k].userns)
- continue;
-
- where = prefix_root(dest, mount_table[k].where);
- if (!where)
+ if (argc > optind) {
+ arg_parameters = strv_copy(argv + optind);
+ if (!arg_parameters)
return log_oom();
- r = path_is_mount_point(where, AT_SYMLINK_FOLLOW);
- if (r < 0 && r != -ENOENT)
- return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
-
- /* Skip this entry if it is not a remount. */
- if (mount_table[k].what && r > 0)
- continue;
-
- r = mkdir_p(where, 0755);
- if (r < 0) {
- if (mount_table[k].fatal)
- return log_error_errno(r, "Failed to create directory %s: %m", where);
-
- log_warning_errno(r, "Failed to create directory %s: %m", where);
- continue;
- }
-
- o = mount_table[k].options;
- if (streq_ptr(mount_table[k].type, "tmpfs")) {
- r = tmpfs_patch_options(o, &options);
- if (r < 0)
- return log_oom();
- if (r > 0)
- o = options;
- }
-
- if (mount(mount_table[k].what,
- where,
- mount_table[k].type,
- mount_table[k].flags,
- o) < 0) {
-
- if (mount_table[k].fatal)
- return log_error_errno(errno, "mount(%s) failed: %m", where);
-
- log_warning_errno(errno, "mount(%s) failed, ignoring: %m", where);
- }
+ arg_settings_mask |= SETTING_BOOT;
}
- return 0;
-}
+ /* Load all settings from .nspawn files */
+ if (mask_no_settings)
+ arg_settings_mask = 0;
-static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts) {
- const char *p = options;
- unsigned long flags = *mount_flags;
- char *opts = NULL;
+ /* Don't load any settings from .nspawn files */
+ if (mask_all_settings)
+ arg_settings_mask = _SETTINGS_MASK_ALL;
- assert(options);
-
- for (;;) {
- _cleanup_free_ char *word = NULL;
- int r = extract_first_word(&p, &word, ",", 0);
- if (r < 0)
- return log_error_errno(r, "Failed to extract mount option: %m");
- if (r == 0)
- break;
-
- if (streq(word, "rbind"))
- flags |= MS_REC;
- else if (streq(word, "norbind"))
- flags &= ~MS_REC;
- else {
- log_error("Invalid bind mount option: %s", word);
- return -EINVAL;
- }
- }
-
- *mount_flags = flags;
- /* in the future mount_opts will hold string options for mount(2) */
- *mount_opts = opts;
-
- return 0;
-}
-
-static int mount_bind(const char *dest, CustomMount *m) {
- struct stat source_st, dest_st;
- const char *where;
- unsigned long mount_flags = MS_BIND | MS_REC;
- _cleanup_free_ char *mount_opts = NULL;
- int r;
-
- assert(m);
-
- if (m->options) {
- r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts);
- if (r < 0)
- return r;
- }
-
- if (stat(m->source, &source_st) < 0)
- return log_error_errno(errno, "Failed to stat %s: %m", m->source);
-
- where = prefix_roota(dest, m->destination);
-
- if (stat(where, &dest_st) >= 0) {
- if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode)) {
- log_error("Cannot bind mount directory %s on file %s.", m->source, where);
- return -EINVAL;
- }
-
- if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode)) {
- log_error("Cannot bind mount file %s on directory %s.", m->source, where);
- return -EINVAL;
- }
-
- } else if (errno == ENOENT) {
- r = mkdir_parents_label(where, 0755);
- if (r < 0)
- return log_error_errno(r, "Failed to make parents of %s: %m", where);
- } else {
- log_error_errno(errno, "Failed to stat %s: %m", where);
- return -errno;
- }
-
- /* Create the mount point. Any non-directory file can be
- * mounted on any non-directory file (regular, fifo, socket,
- * char, block).
- */
- if (S_ISDIR(source_st.st_mode))
- r = mkdir_label(where, 0755);
- else
- r = touch(where);
- if (r < 0 && r != -EEXIST)
- return log_error_errno(r, "Failed to create mount point %s: %m", where);
-
- if (mount(m->source, where, NULL, mount_flags, mount_opts) < 0)
- return log_error_errno(errno, "mount(%s) failed: %m", where);
-
- if (m->read_only) {
- r = bind_remount_recursive(where, true);
- if (r < 0)
- return log_error_errno(r, "Read-only bind mount failed: %m");
- }
-
- return 0;
-}
-
-static int mount_tmpfs(const char *dest, CustomMount *m) {
- const char *where, *options;
- _cleanup_free_ char *buf = NULL;
- int r;
-
- assert(dest);
- assert(m);
-
- where = prefix_roota(dest, m->destination);
-
- r = mkdir_p_label(where, 0755);
- if (r < 0 && r != -EEXIST)
- return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
-
- r = tmpfs_patch_options(m->options, &buf);
- if (r < 0)
- return log_oom();
- options = r > 0 ? buf : m->options;
-
- if (mount("tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options) < 0)
- return log_error_errno(errno, "tmpfs mount to %s failed: %m", where);
-
- return 0;
-}
-
-static char *joined_and_escaped_lower_dirs(char * const *lower) {
- _cleanup_strv_free_ char **sv = NULL;
-
- sv = strv_copy(lower);
- if (!sv)
- return NULL;
-
- strv_reverse(sv);
-
- if (!strv_shell_escape(sv, ",:"))
- return NULL;
-
- return strv_join(sv, ":");
-}
-
-static int mount_overlay(const char *dest, CustomMount *m) {
- _cleanup_free_ char *lower = NULL;
- const char *where, *options;
- int r;
-
- assert(dest);
- assert(m);
-
- where = prefix_roota(dest, m->destination);
-
- r = mkdir_label(where, 0755);
- if (r < 0 && r != -EEXIST)
- return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
-
- (void) mkdir_p_label(m->source, 0755);
-
- lower = joined_and_escaped_lower_dirs(m->lower);
- if (!lower)
- return log_oom();
-
- if (m->read_only) {
- _cleanup_free_ char *escaped_source = NULL;
-
- escaped_source = shell_escape(m->source, ",:");
- if (!escaped_source)
- return log_oom();
-
- options = strjoina("lowerdir=", escaped_source, ":", lower);
- } else {
- _cleanup_free_ char *escaped_source = NULL, *escaped_work_dir = NULL;
-
- assert(m->work_dir);
- (void) mkdir_label(m->work_dir, 0700);
-
- escaped_source = shell_escape(m->source, ",:");
- if (!escaped_source)
- return log_oom();
- escaped_work_dir = shell_escape(m->work_dir, ",:");
- if (!escaped_work_dir)
- return log_oom();
-
- options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
- }
-
- if (mount("overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options) < 0)
- return log_error_errno(errno, "overlay mount to %s failed: %m", where);
-
- return 0;
-}
-
-static int mount_custom(const char *dest) {
- unsigned i;
- int r;
-
- assert(dest);
-
- for (i = 0; i < arg_n_custom_mounts; i++) {
- CustomMount *m = &arg_custom_mounts[i];
-
- switch (m->type) {
-
- case CUSTOM_MOUNT_BIND:
- r = mount_bind(dest, m);
- break;
-
- case CUSTOM_MOUNT_TMPFS:
- r = mount_tmpfs(dest, m);
- break;
-
- case CUSTOM_MOUNT_OVERLAY:
- r = mount_overlay(dest, m);
- break;
-
- default:
- assert_not_reached("Unknown custom mount type");
- }
-
- if (r < 0)
- return r;
- }
-
- return 0;
-}
-
-static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy, bool read_only) {
- char *to;
- int r;
-
- to = strjoina(dest, "/sys/fs/cgroup/", hierarchy);
-
- r = path_is_mount_point(to, 0);
- if (r < 0 && r != -ENOENT)
- return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to);
- if (r > 0)
- return 0;
-
- mkdir_p(to, 0755);
-
- /* The superblock mount options of the mount point need to be
- * identical to the hosts', and hence writable... */
- if (mount("cgroup", to, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, controller) < 0)
- return log_error_errno(errno, "Failed to mount to %s: %m", to);
-
- /* ... hence let's only make the bind mount read-only, not the
- * superblock. */
- if (read_only) {
- if (mount(NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
- return log_error_errno(errno, "Failed to remount %s read-only: %m", to);
- }
- return 1;
-}
-
-static int mount_legacy_cgroups(const char *dest) {
- _cleanup_set_free_free_ Set *controllers = NULL;
- const char *cgroup_root;
- int r;
-
- cgroup_root = prefix_roota(dest, "/sys/fs/cgroup");
-
- /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
- r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW);
- if (r < 0)
- return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
- if (r == 0) {
- _cleanup_free_ char *options = NULL;
-
- r = tmpfs_patch_options("mode=755", &options);
- if (r < 0)
- return log_oom();
-
- if (mount("tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options) < 0)
- return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m");
- }
-
- if (cg_unified() > 0)
- goto skip_controllers;
-
- controllers = set_new(&string_hash_ops);
- if (!controllers)
- return log_oom();
-
- r = cg_kernel_controllers(controllers);
- if (r < 0)
- return log_error_errno(r, "Failed to determine cgroup controllers: %m");
-
- for (;;) {
- _cleanup_free_ char *controller = NULL, *origin = NULL, *combined = NULL;
-
- controller = set_steal_first(controllers);
- if (!controller)
- break;
-
- origin = prefix_root("/sys/fs/cgroup/", controller);
- if (!origin)
- return log_oom();
-
- r = readlink_malloc(origin, &combined);
- if (r == -EINVAL) {
- /* Not a symbolic link, but directly a single cgroup hierarchy */
-
- r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);
- if (r < 0)
- return r;
-
- } else if (r < 0)
- return log_error_errno(r, "Failed to read link %s: %m", origin);
- else {
- _cleanup_free_ char *target = NULL;
-
- target = prefix_root(dest, origin);
- if (!target)
- return log_oom();
-
- /* A symbolic link, a combination of controllers in one hierarchy */
-
- if (!filename_is_valid(combined)) {
- log_warning("Ignoring invalid combined hierarchy %s.", combined);
- continue;
- }
-
- r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);
- if (r < 0)
- return r;
-
- r = symlink_idempotent(combined, target);
- if (r == -EINVAL) {
- log_error("Invalid existing symlink for combined hierarchy");
- return r;
- }
- if (r < 0)
- return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m");
- }
- }
+ arg_retain = (arg_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus;
-skip_controllers:
- r = mount_legacy_cgroup_hierarchy(dest, "none,name=systemd,xattr", "systemd", false);
+ r = detect_unified_cgroup_hierarchy();
if (r < 0)
return r;
- if (mount(NULL, cgroup_root, NULL, MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755") < 0)
- return log_error_errno(errno, "Failed to remount %s read-only: %m", cgroup_root);
-
- return 0;
+ return 1;
}
-static int mount_unified_cgroups(const char *dest) {
- const char *p;
- int r;
+static int verify_arguments(void) {
- assert(dest);
-
- p = strjoina(dest, "/sys/fs/cgroup");
-
- r = path_is_mount_point(p, AT_SYMLINK_FOLLOW);
- if (r < 0)
- return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p);
- if (r > 0) {
- p = strjoina(dest, "/sys/fs/cgroup/cgroup.procs");
- if (access(p, F_OK) >= 0)
- return 0;
- if (errno != ENOENT)
- return log_error_errno(errno, "Failed to determine if mount point %s contains the unified cgroup hierarchy: %m", p);
-
- log_error("%s is already mounted but not a unified cgroup hierarchy. Refusing.", p);
+ if (arg_volatile_mode != VOLATILE_NO && arg_read_only) {
+ log_error("Cannot combine --read-only with --volatile. Note that --volatile already implies a read-only base hierarchy.");
return -EINVAL;
}
- if (mount("cgroup", p, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "__DEVEL__sane_behavior") < 0)
- return log_error_errno(errno, "Failed to mount unified cgroup hierarchy to %s: %m", p);
-
- return 0;
-}
-
-static int mount_cgroups(const char *dest) {
- if (arg_unified_cgroup_hierarchy)
- return mount_unified_cgroups(dest);
- else
- return mount_legacy_cgroups(dest);
-}
-
-static int mount_systemd_cgroup_writable(const char *dest) {
- _cleanup_free_ char *own_cgroup_path = NULL;
- const char *systemd_root, *systemd_own;
- int r;
-
- assert(dest);
-
- r = cg_pid_get_path(NULL, 0, &own_cgroup_path);
- if (r < 0)
- return log_error_errno(r, "Failed to determine our own cgroup path: %m");
-
- /* If we are living in the top-level, then there's nothing to do... */
- if (path_equal(own_cgroup_path, "/"))
- return 0;
-
- if (arg_unified_cgroup_hierarchy) {
- systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path);
- systemd_root = prefix_roota(dest, "/sys/fs/cgroup");
- } else {
- systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path);
- systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd");
+ if (arg_expose_ports && !arg_private_network) {
+ log_error("Cannot use --port= without private networking.");
+ return -EINVAL;
}
- /* Make our own cgroup a (writable) bind mount */
- if (mount(systemd_own, systemd_own, NULL, MS_BIND, NULL) < 0)
- return log_error_errno(errno, "Failed to turn %s into a bind mount: %m", own_cgroup_path);
-
- /* And then remount the systemd cgroup root read-only */
- if (mount(NULL, systemd_root, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
- return log_error_errno(errno, "Failed to mount cgroup root read-only: %m");
+ if (arg_boot && arg_kill_signal <= 0)
+ arg_kill_signal = SIGRTMIN+3;
return 0;
}
@@ -1736,114 +1082,6 @@ static int setup_resolv_conf(const char *dest) {
return 0;
}
-static int setup_volatile_state(const char *directory) {
- _cleanup_free_ char *buf = NULL;
- const char *p, *options;
- int r;
-
- assert(directory);
-
- if (arg_volatile != VOLATILE_STATE)
- return 0;
-
- /* --volatile=state means we simply overmount /var
- with a tmpfs, and the rest read-only. */
-
- r = bind_remount_recursive(directory, true);
- if (r < 0)
- return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
-
- p = prefix_roota(directory, "/var");
- r = mkdir(p, 0755);
- if (r < 0 && errno != EEXIST)
- return log_error_errno(errno, "Failed to create %s: %m", directory);
-
- options = "mode=755";
- r = tmpfs_patch_options(options, &buf);
- if (r < 0)
- return log_oom();
- if (r > 0)
- options = buf;
-
- if (mount("tmpfs", p, "tmpfs", MS_STRICTATIME, options) < 0)
- return log_error_errno(errno, "Failed to mount tmpfs to /var: %m");
-
- return 0;
-}
-
-static int setup_volatile(const char *directory) {
- bool tmpfs_mounted = false, bind_mounted = false;
- char template[] = "/tmp/nspawn-volatile-XXXXXX";
- _cleanup_free_ char *buf = NULL;
- const char *f, *t, *options;
- int r;
-
- assert(directory);
-
- if (arg_volatile != VOLATILE_YES)
- return 0;
-
- /* --volatile=yes means we mount a tmpfs to the root dir, and
- the original /usr to use inside it, and that read-only. */
-
- if (!mkdtemp(template))
- return log_error_errno(errno, "Failed to create temporary directory: %m");
-
- options = "mode=755";
- r = tmpfs_patch_options(options, &buf);
- if (r < 0)
- return log_oom();
- if (r > 0)
- options = buf;
-
- if (mount("tmpfs", template, "tmpfs", MS_STRICTATIME, options) < 0) {
- r = log_error_errno(errno, "Failed to mount tmpfs for root directory: %m");
- goto fail;
- }
-
- tmpfs_mounted = true;
-
- f = prefix_roota(directory, "/usr");
- t = prefix_roota(template, "/usr");
-
- r = mkdir(t, 0755);
- if (r < 0 && errno != EEXIST) {
- r = log_error_errno(errno, "Failed to create %s: %m", t);
- goto fail;
- }
-
- if (mount(f, t, NULL, MS_BIND|MS_REC, NULL) < 0) {
- r = log_error_errno(errno, "Failed to create /usr bind mount: %m");
- goto fail;
- }
-
- bind_mounted = true;
-
- r = bind_remount_recursive(t, true);
- if (r < 0) {
- log_error_errno(r, "Failed to remount %s read-only: %m", t);
- goto fail;
- }
-
- if (mount(template, directory, NULL, MS_MOVE, NULL) < 0) {
- r = log_error_errno(errno, "Failed to move root mount: %m");
- goto fail;
- }
-
- (void) rmdir(template);
-
- return 0;
-
-fail:
- if (bind_mounted)
- (void) umount(t);
-
- if (tmpfs_mounted)
- (void) umount(template);
- (void) rmdir(template);
- return r;
-}
-
static char* id128_format_as_uuid(sd_id128_t id, char s[37]) {
assert(s);
@@ -2082,132 +1320,6 @@ static int setup_kmsg(const char *dest, int kmsg_socket) {
return 0;
}
-static int send_rtnl(int send_fd) {
- union {
- struct cmsghdr cmsghdr;
- uint8_t buf[CMSG_SPACE(sizeof(int))];
- } control = {};
- struct msghdr mh = {
- .msg_control = &control,
- .msg_controllen = sizeof(control),
- };
- struct cmsghdr *cmsg;
- _cleanup_close_ int fd = -1;
- ssize_t k;
-
- assert(send_fd >= 0);
-
- if (!arg_expose_ports)
- return 0;
-
- fd = socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_ROUTE);
- if (fd < 0)
- return log_error_errno(errno, "Failed to allocate container netlink: %m");
-
- cmsg = CMSG_FIRSTHDR(&mh);
- cmsg->cmsg_level = SOL_SOCKET;
- cmsg->cmsg_type = SCM_RIGHTS;
- cmsg->cmsg_len = CMSG_LEN(sizeof(int));
- memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
-
- mh.msg_controllen = cmsg->cmsg_len;
-
- /* Store away the fd in the socket, so that it stays open as
- * long as we run the child */
- k = sendmsg(send_fd, &mh, MSG_NOSIGNAL);
- if (k < 0)
- return log_error_errno(errno, "Failed to send netlink fd: %m");
-
- return 0;
-}
-
-static int flush_ports(union in_addr_union *exposed) {
- ExposePort *p;
- int r, af = AF_INET;
-
- assert(exposed);
-
- if (!arg_expose_ports)
- return 0;
-
- if (in_addr_is_null(af, exposed))
- return 0;
-
- log_debug("Lost IP address.");
-
- LIST_FOREACH(ports, p, arg_expose_ports) {
- r = fw_add_local_dnat(false,
- af,
- p->protocol,
- NULL,
- NULL, 0,
- NULL, 0,
- p->host_port,
- exposed,
- p->container_port,
- NULL);
- if (r < 0)
- log_warning_errno(r, "Failed to modify firewall: %m");
- }
-
- *exposed = IN_ADDR_NULL;
- return 0;
-}
-
-static int expose_ports(sd_netlink *rtnl, union in_addr_union *exposed) {
- _cleanup_free_ struct local_address *addresses = NULL;
- _cleanup_free_ char *pretty = NULL;
- union in_addr_union new_exposed;
- ExposePort *p;
- bool add;
- int af = AF_INET, r;
-
- assert(exposed);
-
- /* Invoked each time an address is added or removed inside the
- * container */
-
- if (!arg_expose_ports)
- return 0;
-
- r = local_addresses(rtnl, 0, af, &addresses);
- if (r < 0)
- return log_error_errno(r, "Failed to enumerate local addresses: %m");
-
- add = r > 0 &&
- addresses[0].family == af &&
- addresses[0].scope < RT_SCOPE_LINK;
-
- if (!add)
- return flush_ports(exposed);
-
- new_exposed = addresses[0].address;
- if (in_addr_equal(af, exposed, &new_exposed))
- return 0;
-
- in_addr_to_string(af, &new_exposed, &pretty);
- log_debug("New container IP is %s.", strna(pretty));
-
- LIST_FOREACH(ports, p, arg_expose_ports) {
-
- r = fw_add_local_dnat(true,
- af,
- p->protocol,
- NULL,
- NULL, 0,
- NULL, 0,
- p->host_port,
- &new_exposed,
- p->container_port,
- in_addr_is_null(af, exposed) ? NULL : exposed);
- if (r < 0)
- log_warning_errno(r, "Failed to modify firewall: %m");
- }
-
- *exposed = new_exposed;
- return 0;
-}
-
static int on_address_change(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) {
union in_addr_union *exposed = userdata;
@@ -2215,62 +1327,7 @@ static int on_address_change(sd_netlink *rtnl, sd_netlink_message *m, void *user
assert(m);
assert(exposed);
- expose_ports(rtnl, exposed);
- return 0;
-}
-
-static int watch_rtnl(sd_event *event, int recv_fd, union in_addr_union *exposed, sd_netlink **ret) {
- union {
- struct cmsghdr cmsghdr;
- uint8_t buf[CMSG_SPACE(sizeof(int))];
- } control = {};
- struct msghdr mh = {
- .msg_control = &control,
- .msg_controllen = sizeof(control),
- };
- struct cmsghdr *cmsg;
- _cleanup_netlink_unref_ sd_netlink *rtnl = NULL;
- int fd, r;
- ssize_t k;
-
- assert(event);
- assert(recv_fd >= 0);
- assert(ret);
-
- if (!arg_expose_ports)
- return 0;
-
- k = recvmsg(recv_fd, &mh, MSG_NOSIGNAL);
- if (k < 0)
- return log_error_errno(errno, "Failed to recv netlink fd: %m");
-
- cmsg = CMSG_FIRSTHDR(&mh);
- assert(cmsg->cmsg_level == SOL_SOCKET);
- assert(cmsg->cmsg_type == SCM_RIGHTS);
- assert(cmsg->cmsg_len == CMSG_LEN(sizeof(int)));
- memcpy(&fd, CMSG_DATA(cmsg), sizeof(int));
-
- r = sd_netlink_open_fd(&rtnl, fd);
- if (r < 0) {
- safe_close(fd);
- return log_error_errno(r, "Failed to create rtnl object: %m");
- }
-
- r = sd_netlink_add_match(rtnl, RTM_NEWADDR, on_address_change, exposed);
- if (r < 0)
- return log_error_errno(r, "Failed to subscribe to RTM_NEWADDR messages: %m");
-
- r = sd_netlink_add_match(rtnl, RTM_DELADDR, on_address_change, exposed);
- if (r < 0)
- return log_error_errno(r, "Failed to subscribe to RTM_DELADDR messages: %m");
-
- r = sd_netlink_attach_event(rtnl, event, 0);
- if (r < 0)
- return log_error_errno(r, "Failed to add to even loop: %m");
-
- *ret = rtnl;
- rtnl = NULL;
-
+ expose_port_execute(rtnl, arg_expose_ports, exposed);
return 0;
}
@@ -2447,220 +1504,6 @@ static int drop_capabilities(void) {
return capability_bounding_set_drop(~arg_retain, false);
}
-static int register_machine(pid_t pid, int local_ifindex) {
- _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
- _cleanup_bus_flush_close_unref_ sd_bus *bus = NULL;
- int r;
-
- if (!arg_register)
- return 0;
-
- r = sd_bus_default_system(&bus);
- if (r < 0)
- return log_error_errno(r, "Failed to open system bus: %m");
-
- if (arg_keep_unit) {
- r = sd_bus_call_method(
- bus,
- "org.freedesktop.machine1",
- "/org/freedesktop/machine1",
- "org.freedesktop.machine1.Manager",
- "RegisterMachineWithNetwork",
- &error,
- NULL,
- "sayssusai",
- arg_machine,
- SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
- "nspawn",
- "container",
- (uint32_t) pid,
- strempty(arg_directory),
- local_ifindex > 0 ? 1 : 0, local_ifindex);
- } else {
- _cleanup_bus_message_unref_ sd_bus_message *m = NULL;
- char **i;
- unsigned j;
-
- r = sd_bus_message_new_method_call(
- bus,
- &m,
- "org.freedesktop.machine1",
- "/org/freedesktop/machine1",
- "org.freedesktop.machine1.Manager",
- "CreateMachineWithNetwork");
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_append(
- m,
- "sayssusai",
- arg_machine,
- SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
- "nspawn",
- "container",
- (uint32_t) pid,
- strempty(arg_directory),
- local_ifindex > 0 ? 1 : 0, local_ifindex);
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_open_container(m, 'a', "(sv)");
- if (r < 0)
- return bus_log_create_error(r);
-
- if (!isempty(arg_slice)) {
- r = sd_bus_message_append(m, "(sv)", "Slice", "s", arg_slice);
- if (r < 0)
- return bus_log_create_error(r);
- }
-
- r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "strict");
- if (r < 0)
- return bus_log_create_error(r);
-
- /* If you make changes here, also make sure to update
- * systemd-nspawn@.service, to keep the device
- * policies in sync regardless if we are run with or
- * without the --keep-unit switch. */
- r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 9,
- /* Allow the container to
- * access and create the API
- * device nodes, so that
- * PrivateDevices= in the
- * container can work
- * fine */
- "/dev/null", "rwm",
- "/dev/zero", "rwm",
- "/dev/full", "rwm",
- "/dev/random", "rwm",
- "/dev/urandom", "rwm",
- "/dev/tty", "rwm",
- "/dev/net/tun", "rwm",
- /* Allow the container
- * access to ptys. However,
- * do not permit the
- * container to ever create
- * these device nodes. */
- "/dev/pts/ptmx", "rw",
- "char-pts", "rw");
- if (r < 0)
- return bus_log_create_error(r);
-
- for (j = 0; j < arg_n_custom_mounts; j++) {
- CustomMount *cm = &arg_custom_mounts[j];
-
- if (cm->type != CUSTOM_MOUNT_BIND)
- continue;
-
- r = is_device_node(cm->source);
- if (r < 0)
- return log_error_errno(r, "Failed to stat %s: %m", cm->source);
-
- if (r) {
- r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 1,
- cm->source, cm->read_only ? "r" : "rw");
- if (r < 0)
- return log_error_errno(r, "Failed to append message arguments: %m");
- }
- }
-
- if (arg_kill_signal != 0) {
- r = sd_bus_message_append(m, "(sv)", "KillSignal", "i", arg_kill_signal);
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_message_append(m, "(sv)", "KillMode", "s", "mixed");
- if (r < 0)
- return bus_log_create_error(r);
- }
-
- STRV_FOREACH(i, arg_property) {
- r = sd_bus_message_open_container(m, 'r', "sv");
- if (r < 0)
- return bus_log_create_error(r);
-
- r = bus_append_unit_property_assignment(m, *i);
- if (r < 0)
- return r;
-
- r = sd_bus_message_close_container(m);
- if (r < 0)
- return bus_log_create_error(r);
- }
-
- r = sd_bus_message_close_container(m);
- if (r < 0)
- return bus_log_create_error(r);
-
- r = sd_bus_call(bus, m, 0, &error, NULL);
- }
-
- if (r < 0) {
- log_error("Failed to register machine: %s", bus_error_message(&error, r));
- return r;
- }
-
- return 0;
-}
-
-static int terminate_machine(pid_t pid) {
- _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
- _cleanup_bus_message_unref_ sd_bus_message *reply = NULL;
- _cleanup_bus_flush_close_unref_ sd_bus *bus = NULL;
- const char *path;
- int r;
-
- if (!arg_register)
- return 0;
-
- /* If we are reusing the unit, then just exit, systemd will do
- * the right thing when we exit. */
- if (arg_keep_unit)
- return 0;
-
- r = sd_bus_default_system(&bus);
- if (r < 0)
- return log_error_errno(r, "Failed to open system bus: %m");
-
- r = sd_bus_call_method(
- bus,
- "org.freedesktop.machine1",
- "/org/freedesktop/machine1",
- "org.freedesktop.machine1.Manager",
- "GetMachineByPID",
- &error,
- &reply,
- "u",
- (uint32_t) pid);
- if (r < 0) {
- /* Note that the machine might already have been
- * cleaned up automatically, hence don't consider it a
- * failure if we cannot get the machine object. */
- log_debug("Failed to get machine: %s", bus_error_message(&error, r));
- return 0;
- }
-
- r = sd_bus_message_read(reply, "o", &path);
- if (r < 0)
- return bus_log_parse_error(r);
-
- r = sd_bus_call_method(
- bus,
- "org.freedesktop.machine1",
- path,
- "org.freedesktop.machine1.Machine",
- "Terminate",
- &error,
- NULL,
- NULL);
- if (r < 0) {
- log_debug("Failed to terminate machine: %s", bus_error_message(&error, r));
- return 0;
- }
-
- return 0;
-}
-
static int reset_audit_loginuid(void) {
_cleanup_free_ char *p = NULL;
int r;
@@ -2693,427 +1536,6 @@ static int reset_audit_loginuid(void) {
return 0;
}
-#define HOST_HASH_KEY SD_ID128_MAKE(1a,37,6f,c7,46,ec,45,0b,ad,a3,d5,31,06,60,5d,b1)
-#define CONTAINER_HASH_KEY SD_ID128_MAKE(c3,c4,f9,19,b5,57,b2,1c,e6,cf,14,27,03,9c,ee,a2)
-#define MACVLAN_HASH_KEY SD_ID128_MAKE(00,13,6d,bc,66,83,44,81,bb,0c,f9,51,1f,24,a6,6f)
-
-static int generate_mac(struct ether_addr *mac, sd_id128_t hash_key, uint64_t idx) {
- uint8_t result[8];
- size_t l, sz;
- uint8_t *v, *i;
- int r;
-
- l = strlen(arg_machine);
- sz = sizeof(sd_id128_t) + l;
- if (idx > 0)
- sz += sizeof(idx);
-
- v = alloca(sz);
-
- /* fetch some persistent data unique to the host */
- r = sd_id128_get_machine((sd_id128_t*) v);
- if (r < 0)
- return r;
-
- /* combine with some data unique (on this host) to this
- * container instance */
- i = mempcpy(v + sizeof(sd_id128_t), arg_machine, l);
- if (idx > 0) {
- idx = htole64(idx);
- memcpy(i, &idx, sizeof(idx));
- }
-
- /* Let's hash the host machine ID plus the container name. We
- * use a fixed, but originally randomly created hash key here. */
- siphash24(result, v, sz, hash_key.bytes);
-
- assert_cc(ETH_ALEN <= sizeof(result));
- memcpy(mac->ether_addr_octet, result, ETH_ALEN);
-
- /* see eth_random_addr in the kernel */
- mac->ether_addr_octet[0] &= 0xfe; /* clear multicast bit */
- mac->ether_addr_octet[0] |= 0x02; /* set local assignment bit (IEEE802) */
-
- return 0;
-}
-
-static int setup_veth(pid_t pid, char iface_name[IFNAMSIZ], int *ifi) {
- _cleanup_netlink_message_unref_ sd_netlink_message *m = NULL;
- _cleanup_netlink_unref_ sd_netlink *rtnl = NULL;
- struct ether_addr mac_host, mac_container;
- int r, i;
-
- if (!arg_private_network)
- return 0;
-
- if (!arg_network_veth)
- return 0;
-
- /* Use two different interface name prefixes depending whether
- * we are in bridge mode or not. */
- snprintf(iface_name, IFNAMSIZ - 1, "%s-%s",
- arg_network_bridge ? "vb" : "ve", arg_machine);
-
- r = generate_mac(&mac_container, CONTAINER_HASH_KEY, 0);
- if (r < 0)
- return log_error_errno(r, "Failed to generate predictable MAC address for container side: %m");
-
- r = generate_mac(&mac_host, HOST_HASH_KEY, 0);
- if (r < 0)
- return log_error_errno(r, "Failed to generate predictable MAC address for host side: %m");
-
- r = sd_netlink_open(&rtnl);
- if (r < 0)
- return log_error_errno(r, "Failed to connect to netlink: %m");
-
- r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
- if (r < 0)
- return log_error_errno(r, "Failed to allocate netlink message: %m");
-
- r = sd_netlink_message_append_string(m, IFLA_IFNAME, iface_name);
- if (r < 0)
- return log_error_errno(r, "Failed to add netlink interface name: %m");
-
- r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, &mac_host);
- if (r < 0)
- return log_error_errno(r, "Failed to add netlink MAC address: %m");
-
- r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
- if (r < 0)
- return log_error_errno(r, "Failed to open netlink container: %m");
-
- r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "veth");
- if (r < 0)
- return log_error_errno(r, "Failed to open netlink container: %m");
-
- r = sd_netlink_message_open_container(m, VETH_INFO_PEER);
- if (r < 0)
- return log_error_errno(r, "Failed to open netlink container: %m");
-
- r = sd_netlink_message_append_string(m, IFLA_IFNAME, "host0");
- if (r < 0)
- return log_error_errno(r, "Failed to add netlink interface name: %m");
-
- r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, &mac_container);
- if (r < 0)
- return log_error_errno(r, "Failed to add netlink MAC address: %m");
-
- r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid);
- if (r < 0)
- return log_error_errno(r, "Failed to add netlink namespace field: %m");
-
- r = sd_netlink_message_close_container(m);
- if (r < 0)
- return log_error_errno(r, "Failed to close netlink container: %m");
-
- r = sd_netlink_message_close_container(m);
- if (r < 0)
- return log_error_errno(r, "Failed to close netlink container: %m");
-
- r = sd_netlink_message_close_container(m);
- if (r < 0)
- return log_error_errno(r, "Failed to close netlink container: %m");
-
- r = sd_netlink_call(rtnl, m, 0, NULL);
- if (r < 0)
- return log_error_errno(r, "Failed to add new veth interfaces (host0, %s): %m", iface_name);
-
- i = (int) if_nametoindex(iface_name);
- if (i <= 0)
- return log_error_errno(errno, "Failed to resolve interface %s: %m", iface_name);
-
- *ifi = i;
-
- return 0;
-}
-
-static int setup_bridge(const char veth_name[], int *ifi) {
- _cleanup_netlink_message_unref_ sd_netlink_message *m = NULL;
- _cleanup_netlink_unref_ sd_netlink *rtnl = NULL;
- int r, bridge;
-
- if (!arg_private_network)
- return 0;
-
- if (!arg_network_veth)
- return 0;
-
- if (!arg_network_bridge)
- return 0;
-
- bridge = (int) if_nametoindex(arg_network_bridge);
- if (bridge <= 0)
- return log_error_errno(errno, "Failed to resolve interface %s: %m", arg_network_bridge);
-
- *ifi = bridge;
-
- r = sd_netlink_open(&rtnl);
- if (r < 0)
- return log_error_errno(r, "Failed to connect to netlink: %m");
-
- r = sd_rtnl_message_new_link(rtnl, &m, RTM_SETLINK, 0);
- if (r < 0)
- return log_error_errno(r, "Failed to allocate netlink message: %m");
-
- r = sd_rtnl_message_link_set_flags(m, IFF_UP, IFF_UP);
- if (r < 0)
- return log_error_errno(r, "Failed to set IFF_UP flag: %m");
-
- r = sd_netlink_message_append_string(m, IFLA_IFNAME, veth_name);
- if (r < 0)
- return log_error_errno(r, "Failed to add netlink interface name field: %m");
-
- r = sd_netlink_message_append_u32(m, IFLA_MASTER, bridge);
- if (r < 0)
- return log_error_errno(r, "Failed to add netlink master field: %m");
-
- r = sd_netlink_call(rtnl, m, 0, NULL);
- if (r < 0)
- return log_error_errno(r, "Failed to add veth interface to bridge: %m");
-
- return 0;
-}
-
-static int parse_interface(struct udev *udev, const char *name) {
- _cleanup_udev_device_unref_ struct udev_device *d = NULL;
- char ifi_str[2 + DECIMAL_STR_MAX(int)];
- int ifi;
-
- ifi = (int) if_nametoindex(name);
- if (ifi <= 0)
- return log_error_errno(errno, "Failed to resolve interface %s: %m", name);
-
- sprintf(ifi_str, "n%i", ifi);
- d = udev_device_new_from_device_id(udev, ifi_str);
- if (!d)
- return log_error_errno(errno, "Failed to get udev device for interface %s: %m", name);
-
- if (udev_device_get_is_initialized(d) <= 0) {
- log_error("Network interface %s is not initialized yet.", name);
- return -EBUSY;
- }
-
- return ifi;
-}
-
-static int move_network_interfaces(pid_t pid) {
- _cleanup_udev_unref_ struct udev *udev = NULL;
- _cleanup_netlink_unref_ sd_netlink *rtnl = NULL;
- char **i;
- int r;
-
- if (!arg_private_network)
- return 0;
-
- if (strv_isempty(arg_network_interfaces))
- return 0;
-
- r = sd_netlink_open(&rtnl);
- if (r < 0)
- return log_error_errno(r, "Failed to connect to netlink: %m");
-
- udev = udev_new();
- if (!udev) {
- log_error("Failed to connect to udev.");
- return -ENOMEM;
- }
-
- STRV_FOREACH(i, arg_network_interfaces) {
- _cleanup_netlink_message_unref_ sd_netlink_message *m = NULL;
- int ifi;
-
- ifi = parse_interface(udev, *i);
- if (ifi < 0)
- return ifi;
-
- r = sd_rtnl_message_new_link(rtnl, &m, RTM_SETLINK, ifi);
- if (r < 0)
- return log_error_errno(r, "Failed to allocate netlink message: %m");
-
- r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid);
- if (r < 0)
- return log_error_errno(r, "Failed to append namespace PID to netlink message: %m");
-
- r = sd_netlink_call(rtnl, m, 0, NULL);
- if (r < 0)
- return log_error_errno(r, "Failed to move interface %s to namespace: %m", *i);
- }
-
- return 0;
-}
-
-static int setup_macvlan(pid_t pid) {
- _cleanup_udev_unref_ struct udev *udev = NULL;
- _cleanup_netlink_unref_ sd_netlink *rtnl = NULL;
- unsigned idx = 0;
- char **i;
- int r;
-
- if (!arg_private_network)
- return 0;
-
- if (strv_isempty(arg_network_macvlan))
- return 0;
-
- r = sd_netlink_open(&rtnl);
- if (r < 0)
- return log_error_errno(r, "Failed to connect to netlink: %m");
-
- udev = udev_new();
- if (!udev) {
- log_error("Failed to connect to udev.");
- return -ENOMEM;
- }
-
- STRV_FOREACH(i, arg_network_macvlan) {
- _cleanup_netlink_message_unref_ sd_netlink_message *m = NULL;
- _cleanup_free_ char *n = NULL;
- struct ether_addr mac;
- int ifi;
-
- ifi = parse_interface(udev, *i);
- if (ifi < 0)
- return ifi;
-
- r = generate_mac(&mac, MACVLAN_HASH_KEY, idx++);
- if (r < 0)
- return log_error_errno(r, "Failed to create MACVLAN MAC address: %m");
-
- r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
- if (r < 0)
- return log_error_errno(r, "Failed to allocate netlink message: %m");
-
- r = sd_netlink_message_append_u32(m, IFLA_LINK, ifi);
- if (r < 0)
- return log_error_errno(r, "Failed to add netlink interface index: %m");
-
- n = strappend("mv-", *i);
- if (!n)
- return log_oom();
-
- strshorten(n, IFNAMSIZ-1);
-
- r = sd_netlink_message_append_string(m, IFLA_IFNAME, n);
- if (r < 0)
- return log_error_errno(r, "Failed to add netlink interface name: %m");
-
- r = sd_netlink_message_append_ether_addr(m, IFLA_ADDRESS, &mac);
- if (r < 0)
- return log_error_errno(r, "Failed to add netlink MAC address: %m");
-
- r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid);
- if (r < 0)
- return log_error_errno(r, "Failed to add netlink namespace field: %m");
-
- r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
- if (r < 0)
- return log_error_errno(r, "Failed to open netlink container: %m");
-
- r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "macvlan");
- if (r < 0)
- return log_error_errno(r, "Failed to open netlink container: %m");
-
- r = sd_netlink_message_append_u32(m, IFLA_MACVLAN_MODE, MACVLAN_MODE_BRIDGE);
- if (r < 0)
- return log_error_errno(r, "Failed to append macvlan mode: %m");
-
- r = sd_netlink_message_close_container(m);
- if (r < 0)
- return log_error_errno(r, "Failed to close netlink container: %m");
-
- r = sd_netlink_message_close_container(m);
- if (r < 0)
- return log_error_errno(r, "Failed to close netlink container: %m");
-
- r = sd_netlink_call(rtnl, m, 0, NULL);
- if (r < 0)
- return log_error_errno(r, "Failed to add new macvlan interfaces: %m");
- }
-
- return 0;
-}
-
-static int setup_ipvlan(pid_t pid) {
- _cleanup_udev_unref_ struct udev *udev = NULL;
- _cleanup_netlink_unref_ sd_netlink *rtnl = NULL;
- char **i;
- int r;
-
- if (!arg_private_network)
- return 0;
-
- if (strv_isempty(arg_network_ipvlan))
- return 0;
-
- r = sd_netlink_open(&rtnl);
- if (r < 0)
- return log_error_errno(r, "Failed to connect to netlink: %m");
-
- udev = udev_new();
- if (!udev) {
- log_error("Failed to connect to udev.");
- return -ENOMEM;
- }
-
- STRV_FOREACH(i, arg_network_ipvlan) {
- _cleanup_netlink_message_unref_ sd_netlink_message *m = NULL;
- _cleanup_free_ char *n = NULL;
- int ifi;
-
- ifi = parse_interface(udev, *i);
- if (ifi < 0)
- return ifi;
-
- r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
- if (r < 0)
- return log_error_errno(r, "Failed to allocate netlink message: %m");
-
- r = sd_netlink_message_append_u32(m, IFLA_LINK, ifi);
- if (r < 0)
- return log_error_errno(r, "Failed to add netlink interface index: %m");
-
- n = strappend("iv-", *i);
- if (!n)
- return log_oom();
-
- strshorten(n, IFNAMSIZ-1);
-
- r = sd_netlink_message_append_string(m, IFLA_IFNAME, n);
- if (r < 0)
- return log_error_errno(r, "Failed to add netlink interface name: %m");
-
- r = sd_netlink_message_append_u32(m, IFLA_NET_NS_PID, pid);
- if (r < 0)
- return log_error_errno(r, "Failed to add netlink namespace field: %m");
-
- r = sd_netlink_message_open_container(m, IFLA_LINKINFO);
- if (r < 0)
- return log_error_errno(r, "Failed to open netlink container: %m");
-
- r = sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "ipvlan");
- if (r < 0)
- return log_error_errno(r, "Failed to open netlink container: %m");
-
- r = sd_netlink_message_append_u16(m, IFLA_IPVLAN_MODE, IPVLAN_MODE_L2);
- if (r < 0)
- return log_error_errno(r, "Failed to add ipvlan mode: %m");
-
- r = sd_netlink_message_close_container(m);
- if (r < 0)
- return log_error_errno(r, "Failed to close netlink container: %m");
-
- r = sd_netlink_message_close_container(m);
- if (r < 0)
- return log_error_errno(r, "Failed to close netlink container: %m");
-
- r = sd_netlink_call(rtnl, m, 0, NULL);
- if (r < 0)
- return log_error_errno(r, "Failed to add new ipvlan interfaces: %m");
- }
-
- return 0;
-}
-
static int setup_seccomp(void) {
#ifdef HAVE_SECCOMP
@@ -3812,247 +2234,6 @@ static void loop_remove(int nr, int *image_fd) {
log_debug_errno(errno, "Failed to remove loop %d: %m", nr);
}
-static int spawn_getent(const char *database, const char *key, pid_t *rpid) {
- int pipe_fds[2];
- pid_t pid;
-
- assert(database);
- assert(key);
- assert(rpid);
-
- if (pipe2(pipe_fds, O_CLOEXEC) < 0)
- return log_error_errno(errno, "Failed to allocate pipe: %m");
-
- pid = fork();
- if (pid < 0)
- return log_error_errno(errno, "Failed to fork getent child: %m");
- else if (pid == 0) {
- int nullfd;
- char *empty_env = NULL;
-
- if (dup3(pipe_fds[1], STDOUT_FILENO, 0) < 0)
- _exit(EXIT_FAILURE);
-
- if (pipe_fds[0] > 2)
- safe_close(pipe_fds[0]);
- if (pipe_fds[1] > 2)
- safe_close(pipe_fds[1]);
-
- nullfd = open("/dev/null", O_RDWR);
- if (nullfd < 0)
- _exit(EXIT_FAILURE);
-
- if (dup3(nullfd, STDIN_FILENO, 0) < 0)
- _exit(EXIT_FAILURE);
-
- if (dup3(nullfd, STDERR_FILENO, 0) < 0)
- _exit(EXIT_FAILURE);
-
- if (nullfd > 2)
- safe_close(nullfd);
-
- (void) reset_all_signal_handlers();
- (void) reset_signal_mask();
- close_all_fds(NULL, 0);
-
- execle("/usr/bin/getent", "getent", database, key, NULL, &empty_env);
- execle("/bin/getent", "getent", database, key, NULL, &empty_env);
- _exit(EXIT_FAILURE);
- }
-
- pipe_fds[1] = safe_close(pipe_fds[1]);
-
- *rpid = pid;
-
- return pipe_fds[0];
-}
-
-static int change_uid_gid(char **_home) {
- char line[LINE_MAX], *x, *u, *g, *h;
- const char *word, *state;
- _cleanup_free_ uid_t *uids = NULL;
- _cleanup_free_ char *home = NULL;
- _cleanup_fclose_ FILE *f = NULL;
- _cleanup_close_ int fd = -1;
- unsigned n_uids = 0;
- size_t sz = 0, l;
- uid_t uid;
- gid_t gid;
- pid_t pid;
- int r;
-
- assert(_home);
-
- if (!arg_user || streq(arg_user, "root") || streq(arg_user, "0")) {
- /* Reset everything fully to 0, just in case */
-
- r = reset_uid_gid();
- if (r < 0)
- return log_error_errno(r, "Failed to become root: %m");
-
- *_home = NULL;
- return 0;
- }
-
- /* First, get user credentials */
- fd = spawn_getent("passwd", arg_user, &pid);
- if (fd < 0)
- return fd;
-
- f = fdopen(fd, "r");
- if (!f)
- return log_oom();
- fd = -1;
-
- if (!fgets(line, sizeof(line), f)) {
-
- if (!ferror(f)) {
- log_error("Failed to resolve user %s.", arg_user);
- return -ESRCH;
- }
-
- log_error_errno(errno, "Failed to read from getent: %m");
- return -errno;
- }
-
- truncate_nl(line);
-
- wait_for_terminate_and_warn("getent passwd", pid, true);
-
- x = strchr(line, ':');
- if (!x) {
- log_error("/etc/passwd entry has invalid user field.");
- return -EIO;
- }
-
- u = strchr(x+1, ':');
- if (!u) {
- log_error("/etc/passwd entry has invalid password field.");
- return -EIO;
- }
-
- u++;
- g = strchr(u, ':');
- if (!g) {
- log_error("/etc/passwd entry has invalid UID field.");
- return -EIO;
- }
-
- *g = 0;
- g++;
- x = strchr(g, ':');
- if (!x) {
- log_error("/etc/passwd entry has invalid GID field.");
- return -EIO;
- }
-
- *x = 0;
- h = strchr(x+1, ':');
- if (!h) {
- log_error("/etc/passwd entry has invalid GECOS field.");
- return -EIO;
- }
-
- h++;
- x = strchr(h, ':');
- if (!x) {
- log_error("/etc/passwd entry has invalid home directory field.");
- return -EIO;
- }
-
- *x = 0;
-
- r = parse_uid(u, &uid);
- if (r < 0) {
- log_error("Failed to parse UID of user.");
- return -EIO;
- }
-
- r = parse_gid(g, &gid);
- if (r < 0) {
- log_error("Failed to parse GID of user.");
- return -EIO;
- }
-
- home = strdup(h);
- if (!home)
- return log_oom();
-
- /* Second, get group memberships */
- fd = spawn_getent("initgroups", arg_user, &pid);
- if (fd < 0)
- return fd;
-
- fclose(f);
- f = fdopen(fd, "r");
- if (!f)
- return log_oom();
- fd = -1;
-
- if (!fgets(line, sizeof(line), f)) {
- if (!ferror(f)) {
- log_error("Failed to resolve user %s.", arg_user);
- return -ESRCH;
- }
-
- log_error_errno(errno, "Failed to read from getent: %m");
- return -errno;
- }
-
- truncate_nl(line);
-
- wait_for_terminate_and_warn("getent initgroups", pid, true);
-
- /* Skip over the username and subsequent separator whitespace */
- x = line;
- x += strcspn(x, WHITESPACE);
- x += strspn(x, WHITESPACE);
-
- FOREACH_WORD(word, l, x, state) {
- char c[l+1];
-
- memcpy(c, word, l);
- c[l] = 0;
-
- if (!GREEDY_REALLOC(uids, sz, n_uids+1))
- return log_oom();
-
- r = parse_uid(c, &uids[n_uids++]);
- if (r < 0) {
- log_error("Failed to parse group data from getent.");
- return -EIO;
- }
- }
-
- r = mkdir_parents(home, 0775);
- if (r < 0)
- return log_error_errno(r, "Failed to make home root directory: %m");
-
- r = mkdir_safe(home, 0755, uid, gid);
- if (r < 0 && r != -EEXIST)
- return log_error_errno(r, "Failed to make home directory: %m");
-
- (void) fchown(STDIN_FILENO, uid, gid);
- (void) fchown(STDOUT_FILENO, uid, gid);
- (void) fchown(STDERR_FILENO, uid, gid);
-
- if (setgroups(n_uids, uids) < 0)
- return log_error_errno(errno, "Failed to set auxiliary groups: %m");
-
- if (setresgid(gid, gid, gid) < 0)
- return log_error_errno(errno, "setregid() failed: %m");
-
- if (setresuid(uid, uid, uid) < 0)
- return log_error_errno(errno, "setreuid() failed: %m");
-
- if (_home) {
- *_home = home;
- home = NULL;
- }
-
- return 0;
-}
-
/*
* Return values:
* < 0 : wait_for_terminate() failed to get the state of the
@@ -4254,9 +2435,7 @@ static int inner_child(
bool secondary,
int kmsg_socket,
int rtnl_socket,
- FDSet *fds,
- int argc,
- char *argv[]) {
+ FDSet *fds) {
_cleanup_free_ char *home = NULL;
unsigned n_env = 2;
@@ -4293,7 +2472,7 @@ static int inner_child(
}
}
- r = mount_all(NULL, true);
+ r = mount_all(NULL, true, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context);
if (r < 0)
return r;
@@ -4304,7 +2483,7 @@ static int inner_child(
return -ESRCH;
}
- r = mount_systemd_cgroup_writable("");
+ r = mount_systemd_cgroup_writable("", arg_unified_cgroup_hierarchy);
if (r < 0)
return r;
@@ -4329,10 +2508,12 @@ static int inner_child(
if (arg_private_network)
loopback_setup();
- r = send_rtnl(rtnl_socket);
- if (r < 0)
- return r;
- rtnl_socket = safe_close(rtnl_socket);
+ if (arg_expose_ports) {
+ r = expose_port_send_rtnl(rtnl_socket);
+ if (r < 0)
+ return r;
+ rtnl_socket = safe_close(rtnl_socket);
+ }
if (drop_capabilities() < 0)
return log_error_errno(errno, "drop_capabilities() failed: %m");
@@ -4353,7 +2534,7 @@ static int inner_child(
return log_error_errno(errno, "setexeccon(\"%s\") failed: %m", arg_selinux_context);
#endif
- r = change_uid_gid(&home);
+ r = change_uid_gid(arg_user, &home);
if (r < 0)
return r;
@@ -4412,9 +2593,12 @@ static int inner_child(
/* Automatically search for the init system */
- m = 1 + argc - optind;
+ m = 1 + strv_length(arg_parameters);
a = newa(char*, m + 1);
- memcpy(a + 1, argv + optind, m * sizeof(char*));
+ if (strv_isempty(arg_parameters))
+ a[1] = NULL;
+ else
+ memcpy(a + 1, arg_parameters, m * sizeof(char*));
a[0] = (char*) "/usr/lib/systemd/systemd";
execve(a[0], a, env_use);
@@ -4424,10 +2608,10 @@ static int inner_child(
a[0] = (char*) "/sbin/init";
execve(a[0], a, env_use);
- } else if (argc > optind)
- execvpe(argv[optind], argv + optind, env_use);
+ } else if (!strv_isempty(arg_parameters))
+ execvpe(arg_parameters[0], arg_parameters, env_use);
else {
- chdir(home ? home : "/root");
+ chdir(home ?: "/root");
execle("/bin/bash", "-bash", NULL, env_use);
execle("/bin/sh", "-sh", NULL, env_use);
}
@@ -4449,9 +2633,7 @@ static int outer_child(
int kmsg_socket,
int rtnl_socket,
int uid_shift_socket,
- FDSet *fds,
- int argc,
- char *argv[]) {
+ FDSet *fds) {
pid_t pid;
ssize_t l;
@@ -4523,11 +2705,11 @@ static int outer_child(
if (mount(directory, directory, NULL, MS_BIND|MS_REC, NULL) < 0)
return log_error_errno(errno, "Failed to make bind mount: %m");
- r = setup_volatile(directory);
+ r = setup_volatile(directory, arg_volatile_mode, arg_userns, arg_uid_shift, arg_uid_range, arg_selinux_context);
if (r < 0)
return r;
- r = setup_volatile_state(directory);
+ r = setup_volatile_state(directory, arg_volatile_mode, arg_userns, arg_uid_shift, arg_uid_range, arg_selinux_context);
if (r < 0)
return r;
@@ -4541,16 +2723,18 @@ static int outer_child(
return log_error_errno(r, "Failed to make tree read-only: %m");
}
- r = mount_all(directory, false);
+ r = mount_all(directory, false, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context);
if (r < 0)
return r;
- if (copy_devnodes(directory) < 0)
+ r = copy_devnodes(directory);
+ if (r < 0)
return r;
dev_setup(directory, arg_uid_shift, arg_uid_shift);
- if (setup_pts(directory) < 0)
+ r = setup_pts(directory);
+ if (r < 0)
return r;
r = setup_propagate(directory);
@@ -4577,11 +2761,11 @@ static int outer_child(
if (r < 0)
return r;
- r = mount_custom(directory);
+ r = mount_custom(directory, arg_custom_mounts, arg_n_custom_mounts, arg_userns, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context);
if (r < 0)
return r;
- r = mount_cgroups(directory);
+ r = mount_cgroups(directory, arg_unified_cgroup_hierarchy, arg_userns, arg_uid_shift, arg_uid_range, arg_selinux_apifs_context);
if (r < 0)
return r;
@@ -4604,7 +2788,7 @@ static int outer_child(
* requested, so that we all are owned by the user if
* user namespaces are turned on. */
- r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, fds, argc, argv);
+ r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, fds);
if (r < 0)
_exit(EXIT_FAILURE);
@@ -4645,138 +2829,199 @@ static int setup_uid_map(pid_t pid) {
return 0;
}
-static int chown_cgroup(pid_t pid) {
- _cleanup_free_ char *path = NULL, *fs = NULL;
- _cleanup_close_ int fd = -1;
- const char *fn;
+static int load_settings(void) {
+ _cleanup_(settings_freep) Settings *settings = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *p = NULL;
+ const char *fn, *i;
int r;
- r = cg_pid_get_path(NULL, pid, &path);
- if (r < 0)
- return log_error_errno(r, "Failed to get container cgroup path: %m");
+ /* If all settings are masked, there's no point in looking for
+ * the settings file */
+ if ((arg_settings_mask & _SETTINGS_MASK_ALL) == _SETTINGS_MASK_ALL)
+ return 0;
- r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
- if (r < 0)
- return log_error_errno(r, "Failed to get file system path for container cgroup: %m");
+ fn = strjoina(arg_machine, ".nspawn");
- fd = open(fs, O_RDONLY|O_CLOEXEC|O_DIRECTORY);
- if (fd < 0)
- return log_error_errno(errno, "Failed to open %s: %m", fs);
-
- FOREACH_STRING(fn,
- ".",
- "tasks",
- "notify_on_release",
- "cgroup.procs",
- "cgroup.clone_children",
- "cgroup.controllers",
- "cgroup.subtree_control",
- "cgroup.populated")
- if (fchownat(fd, fn, arg_uid_shift, arg_uid_shift, 0) < 0)
- log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
- "Failed to chown() cgroup file %s, ignoring: %m", fn);
+ /* We first look in the admin's directories in /etc and /run */
+ FOREACH_STRING(i, "/etc/systemd/nspawn", "/run/systemd/nspawn") {
+ _cleanup_free_ char *j = NULL;
- return 0;
-}
+ j = strjoin(i, "/", fn, NULL);
+ if (!j)
+ return log_oom();
+
+ f = fopen(j, "re");
+ if (f) {
+ p = j;
+ j = NULL;
+
+ /* By default we trust configuration from /etc and /run */
+ if (arg_settings_trusted < 0)
+ arg_settings_trusted = true;
+
+ break;
+ }
-static int sync_cgroup(pid_t pid) {
- _cleanup_free_ char *cgroup = NULL;
- char tree[] = "/tmp/unifiedXXXXXX", pid_string[DECIMAL_STR_MAX(pid) + 1];
- bool undo_mount = false;
- const char *fn;
- int unified, r;
+ if (errno != ENOENT)
+ return log_error_errno(errno, "Failed to open %s: %m", j);
+ }
+
+ if (!f) {
+ /* After that, let's look for a file next to the
+ * actual image we shall boot. */
+
+ if (arg_image) {
+ p = file_in_same_dir(arg_image, fn);
+ if (!p)
+ return log_oom();
+ } else if (arg_directory) {
+ p = file_in_same_dir(arg_directory, fn);
+ if (!p)
+ return log_oom();
+ }
- unified = cg_unified();
- if (unified < 0)
- return log_error_errno(unified, "Failed to determine whether the unified hierachy is used: %m");
+ if (p) {
+ f = fopen(p, "re");
+ if (!f && errno != ENOENT)
+ return log_error_errno(errno, "Failed to open %s: %m", p);
- if ((unified > 0) == arg_unified_cgroup_hierarchy)
+ /* By default we do not trust configuration from /var/lib/machines */
+ if (arg_settings_trusted < 0)
+ arg_settings_trusted = false;
+ }
+ }
+
+ if (!f)
return 0;
- /* When the host uses the legacy cgroup setup, but the
- * container shall use the unified hierarchy, let's make sure
- * we copy the path from the name=systemd hierarchy into the
- * unified hierarchy. Similar for the reverse situation. */
+ log_debug("Settings are trusted: %s", yes_no(arg_settings_trusted));
- r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
+ r = settings_load(f, p, &settings);
if (r < 0)
- return log_error_errno(r, "Failed to get control group of " PID_FMT ": %m", pid);
+ return r;
- /* In order to access the unified hierarchy we need to mount it */
- if (!mkdtemp(tree))
- return log_error_errno(errno, "Failed to generate temporary mount point for unified hierarchy: %m");
+ /* Copy over bits from the settings, unless they have been
+ * explicitly masked by command line switches. */
- if (unified)
- r = mount("cgroup", tree, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "none,name=systemd,xattr");
- else
- r = mount("cgroup", tree, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "__DEVEL__sane_behavior");
- if (r < 0) {
- r = log_error_errno(errno, "Failed to mount unified hierarchy: %m");
- goto finish;
+ if ((arg_settings_mask & SETTING_BOOT) == 0 &&
+ settings->boot >= 0) {
+ arg_boot = settings->boot;
+
+ strv_free(arg_parameters);
+ arg_parameters = settings->parameters;
+ settings->parameters = NULL;
}
- undo_mount = true;
+ if ((arg_settings_mask & SETTING_ENVIRONMENT) == 0 &&
+ settings->environment) {
+ strv_free(arg_setenv);
+ arg_setenv = settings->environment;
+ settings->environment = NULL;
+ }
- fn = strjoina(tree, cgroup, "/cgroup.procs");
- (void) mkdir_parents(fn, 0755);
+ if ((arg_settings_mask & SETTING_USER) == 0 &&
+ settings->user) {
+ free(arg_user);
+ arg_user = settings->user;
+ settings->user = NULL;
+ }
- sprintf(pid_string, PID_FMT, pid);
- r = write_string_file(fn, pid_string, 0);
- if (r < 0)
- log_error_errno(r, "Failed to move process: %m");
+ if ((arg_settings_mask & SETTING_CAPABILITY) == 0) {
-finish:
- if (undo_mount)
- (void) umount(tree);
+ if (!arg_settings_trusted && settings->capability != 0)
+ log_warning("Ignoring Capability= setting, file %s is not trusted.", p);
+ else
+ arg_retain |= settings->capability;
- (void) rmdir(tree);
- return r;
-}
+ arg_retain &= ~settings->drop_capability;
+ }
-static int create_subcgroup(pid_t pid) {
- _cleanup_free_ char *cgroup = NULL;
- const char *child;
- int unified, r;
- CGroupMask supported;
+ if ((arg_settings_mask & SETTING_KILL_SIGNAL) == 0 &&
+ settings->kill_signal > 0)
+ arg_kill_signal = settings->kill_signal;
- /* In the unified hierarchy inner nodes may only only contain
- * subgroups, but not processes. Hence, if we running in the
- * unified hierarchy and the container does the same, and we
- * did not create a scope unit for the container move us and
- * the container into two separate subcgroups. */
+ if ((arg_settings_mask & SETTING_PERSONALITY) == 0 &&
+ settings->personality != PERSONALITY_INVALID)
+ arg_personality = settings->personality;
- if (!arg_keep_unit)
- return 0;
+ if ((arg_settings_mask & SETTING_MACHINE_ID) == 0 &&
+ !sd_id128_is_null(settings->machine_id)) {
- if (!arg_unified_cgroup_hierarchy)
- return 0;
+ if (!arg_settings_trusted)
+ log_warning("Ignoring MachineID= setting, file %s is not trusted.", p);
+ else
+ arg_uuid = settings->machine_id;
+ }
- unified = cg_unified();
- if (unified < 0)
- return log_error_errno(unified, "Failed to determine whether the unified hierachy is used: %m");
- if (unified == 0)
- return 0;
+ if ((arg_settings_mask & SETTING_READ_ONLY) == 0 &&
+ settings->read_only >= 0)
+ arg_read_only = settings->read_only;
- r = cg_mask_supported(&supported);
- if (r < 0)
- return log_error_errno(r, "Failed to determine supported controllers: %m");
+ if ((arg_settings_mask & SETTING_VOLATILE_MODE) == 0 &&
+ settings->volatile_mode != _VOLATILE_MODE_INVALID)
+ arg_volatile_mode = settings->volatile_mode;
- r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
- if (r < 0)
- return log_error_errno(r, "Failed to get our control group: %m");
+ if ((arg_settings_mask & SETTING_CUSTOM_MOUNTS) == 0 &&
+ settings->n_custom_mounts > 0) {
- child = strjoina(cgroup, "/payload");
- r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, child, pid);
- if (r < 0)
- return log_error_errno(r, "Failed to create %s subcgroup: %m", child);
+ if (!arg_settings_trusted)
+ log_warning("Ignoring TemporaryFileSystem=, Bind= and BindReadOnly= settings, file %s is not trusted.", p);
+ else {
+ custom_mount_free_all(arg_custom_mounts, arg_n_custom_mounts);
+ arg_custom_mounts = settings->custom_mounts;
+ arg_n_custom_mounts = settings->n_custom_mounts;
- child = strjoina(cgroup, "/supervisor");
- r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, child, 0);
- if (r < 0)
- return log_error_errno(r, "Failed to create %s subcgroup: %m", child);
+ settings->custom_mounts = NULL;
+ settings->n_custom_mounts = 0;
+ }
+ }
+
+ if ((arg_settings_mask & SETTING_NETWORK) == 0 &&
+ (settings->private_network >= 0 ||
+ settings->network_veth >= 0 ||
+ settings->network_bridge ||
+ settings->network_interfaces ||
+ settings->network_macvlan ||
+ settings->network_ipvlan)) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring network settings, file %s is not trusted.", p);
+ else {
+ strv_free(arg_network_interfaces);
+ arg_network_interfaces = settings->network_interfaces;
+ settings->network_interfaces = NULL;
+
+ strv_free(arg_network_macvlan);
+ arg_network_macvlan = settings->network_macvlan;
+ settings->network_macvlan = NULL;
+
+ strv_free(arg_network_ipvlan);
+ arg_network_ipvlan = settings->network_ipvlan;
+ settings->network_ipvlan = NULL;
+
+ free(arg_network_bridge);
+ arg_network_bridge = settings->network_bridge;
+ settings->network_bridge = NULL;
+
+ arg_network_veth = settings->network_veth > 0 || settings->network_bridge;
+
+ arg_private_network = true; /* all these settings imply private networking */
+ }
+ }
+
+ if ((arg_settings_mask & SETTING_EXPOSE_PORTS) == 0 &&
+ settings->expose_ports) {
+
+ if (!arg_settings_trusted)
+ log_warning("Ignoring Port= setting, file %s is not trusted.", p);
+ else {
+ expose_port_free_all(arg_expose_ports);
+ arg_expose_ports = settings->expose_ports;
+ settings->expose_ports = NULL;
+ }
+ }
- /* Try to enable as many controllers as possible for the new payload. */
- (void) cg_enable_everywhere(supported, supported, cgroup);
return 0;
}
@@ -4803,15 +3048,22 @@ int main(int argc, char *argv[]) {
if (r <= 0)
goto finish;
- r = determine_names();
- if (r < 0)
- goto finish;
-
if (geteuid() != 0) {
log_error("Need to be root.");
r = -EPERM;
goto finish;
}
+ r = determine_names();
+ if (r < 0)
+ goto finish;
+
+ r = load_settings();
+ if (r < 0)
+ goto finish;
+
+ r = verify_arguments();
+ if (r < 0)
+ goto finish;
n_fd_passed = sd_listen_fds(false);
if (n_fd_passed > 0) {
@@ -5020,23 +3272,23 @@ int main(int argc, char *argv[]) {
goto finish;
}
- if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
r = log_error_errno(errno, "Failed to create kmsg socket pair: %m");
goto finish;
}
- if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, rtnl_socket_pair) < 0) {
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, rtnl_socket_pair) < 0) {
r = log_error_errno(errno, "Failed to create rtnl socket pair: %m");
goto finish;
}
- if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, pid_socket_pair) < 0) {
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pid_socket_pair) < 0) {
r = log_error_errno(errno, "Failed to create pid socket pair: %m");
goto finish;
}
if (arg_userns)
- if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, uid_shift_socket_pair) < 0) {
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, uid_shift_socket_pair) < 0) {
r = log_error_errno(errno, "Failed to create uid shift socket pair: %m");
goto finish;
}
@@ -5092,8 +3344,7 @@ int main(int argc, char *argv[]) {
kmsg_socket_pair[1],
rtnl_socket_pair[1],
uid_shift_socket_pair[1],
- fds,
- argc, argv);
+ fds);
if (r < 0)
_exit(EXIT_FAILURE);
@@ -5108,6 +3359,7 @@ int main(int argc, char *argv[]) {
kmsg_socket_pair[1] = safe_close(kmsg_socket_pair[1]);
rtnl_socket_pair[1] = safe_close(rtnl_socket_pair[1]);
pid_socket_pair[1] = safe_close(pid_socket_pair[1]);
+ uid_shift_socket_pair[1] = safe_close(uid_shift_socket_pair[1]);
/* Wait for the outer child. */
r = wait_for_terminate_and_warn("namespace helper", pid, NULL);
@@ -5126,7 +3378,7 @@ int main(int argc, char *argv[]) {
goto finish;
}
if (l != sizeof(pid)) {
- log_error("Short read while reading inner child PID: %m");
+ log_error("Short read while reading inner child PID.");
r = EIO;
goto finish;
}
@@ -5146,7 +3398,7 @@ int main(int argc, char *argv[]) {
goto finish;
}
if (l != sizeof(arg_uid_shift)) {
- log_error("Short read while reading UID shift: %m");
+ log_error("Short read while reading UID shift.");
r = EIO;
goto finish;
}
@@ -5158,39 +3410,64 @@ int main(int argc, char *argv[]) {
(void) barrier_place(&barrier); /* #2 */
}
- r = move_network_interfaces(pid);
- if (r < 0)
- goto finish;
+ if (arg_private_network) {
- r = setup_veth(pid, veth_name, &ifi);
- if (r < 0)
- goto finish;
+ r = move_network_interfaces(pid, arg_network_interfaces);
+ if (r < 0)
+ goto finish;
- r = setup_bridge(veth_name, &ifi);
- if (r < 0)
- goto finish;
+ if (arg_network_veth) {
+ r = setup_veth(arg_machine, pid, veth_name, !!arg_network_bridge);
+ if (r < 0)
+ goto finish;
+ else if (r > 0)
+ ifi = r;
- r = setup_macvlan(pid);
- if (r < 0)
- goto finish;
+ if (arg_network_bridge) {
+ r = setup_bridge(veth_name, arg_network_bridge);
+ if (r < 0)
+ goto finish;
+ if (r > 0)
+ ifi = r;
+ }
+ }
- r = setup_ipvlan(pid);
- if (r < 0)
- goto finish;
+ r = setup_macvlan(arg_machine, pid, arg_network_macvlan);
+ if (r < 0)
+ goto finish;
- r = register_machine(pid, ifi);
- if (r < 0)
- goto finish;
+ r = setup_ipvlan(arg_machine, pid, arg_network_ipvlan);
+ if (r < 0)
+ goto finish;
+ }
- r = sync_cgroup(pid);
- if (r < 0)
- goto finish;
+ if (arg_register) {
+ r = register_machine(
+ arg_machine,
+ pid,
+ arg_directory,
+ arg_uuid,
+ ifi,
+ arg_slice,
+ arg_custom_mounts, arg_n_custom_mounts,
+ arg_kill_signal,
+ arg_property,
+ arg_keep_unit);
+ if (r < 0)
+ goto finish;
+ }
- r = create_subcgroup(pid);
+ r = sync_cgroup(pid, arg_unified_cgroup_hierarchy);
if (r < 0)
goto finish;
- r = chown_cgroup(pid);
+ if (arg_keep_unit) {
+ r = create_subcgroup(pid, arg_unified_cgroup_hierarchy);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = chown_cgroup(pid, arg_uid_shift);
if (r < 0)
goto finish;
@@ -5243,11 +3520,11 @@ int main(int argc, char *argv[]) {
sd_event_add_signal(event, NULL, SIGCHLD, NULL, NULL);
if (arg_expose_ports) {
- r = watch_rtnl(event, rtnl_socket_pair[0], &exposed, &rtnl);
+ r = expose_port_watch_rtnl(event, rtnl_socket_pair[0], on_address_change, &exposed, &rtnl);
if (r < 0)
goto finish;
- (void) expose_ports(rtnl, &exposed);
+ (void) expose_port_execute(rtnl, arg_expose_ports, &exposed);
}
rtnl_socket_pair[0] = safe_close(rtnl_socket_pair[0]);
@@ -5272,7 +3549,8 @@ int main(int argc, char *argv[]) {
putc('\n', stdout);
/* Kill if it is not dead yet anyway */
- terminate_machine(pid);
+ if (arg_register && !arg_keep_unit)
+ terminate_machine(pid);
/* Normally redundant, but better safe than sorry */
kill(pid, SIGKILL);
@@ -5310,7 +3588,7 @@ int main(int argc, char *argv[]) {
break;
}
- flush_ports(&exposed);
+ expose_port_flush(arg_expose_ports, &exposed);
}
finish:
@@ -5342,24 +3620,21 @@ finish:
(void) rm_rf(p, REMOVE_ROOT);
}
+ expose_port_flush(arg_expose_ports, &exposed);
+
free(arg_directory);
free(arg_template);
free(arg_image);
free(arg_machine);
free(arg_user);
strv_free(arg_setenv);
+ free(arg_network_bridge);
strv_free(arg_network_interfaces);
strv_free(arg_network_macvlan);
strv_free(arg_network_ipvlan);
- custom_mount_free_all();
-
- flush_ports(&exposed);
-
- while (arg_expose_ports) {
- ExposePort *p = arg_expose_ports;
- LIST_REMOVE(ports, arg_expose_ports, p);
- free(p);
- }
+ strv_free(arg_parameters);
+ custom_mount_free_all(arg_custom_mounts, arg_n_custom_mounts);
+ expose_port_free_all(arg_expose_ports);
return r < 0 ? EXIT_FAILURE : ret;
}
diff --git a/src/resolve/resolved-dns-packet.h b/src/resolve/resolved-dns-packet.h
index 5628e579ad..fbbabaf232 100644
--- a/src/resolve/resolved-dns-packet.h
+++ b/src/resolve/resolved-dns-packet.h
@@ -78,7 +78,7 @@ struct DnsPacket {
DnsQuestion *question;
DnsAnswer *answer;
- /* Packet reception meta data */
+ /* Packet reception metadata */
int ifindex;
int family, ipproto;
union in_addr_union sender, destination;
diff --git a/src/shared/condition.c b/src/shared/condition.c
index f58e84a3d0..1d7dd49e04 100644
--- a/src/shared/condition.c
+++ b/src/shared/condition.c
@@ -125,13 +125,12 @@ static int condition_test_kernel_command_line(Condition *c) {
static int condition_test_virtualization(Condition *c) {
int b, v;
- const char *id;
assert(c);
assert(c->parameter);
assert(c->type == CONDITION_VIRTUALIZATION);
- v = detect_virtualization(&id);
+ v = detect_virtualization();
if (v < 0)
return v;
@@ -145,14 +144,14 @@ static int condition_test_virtualization(Condition *c) {
return true;
/* Then, compare categorization */
- if (v == VIRTUALIZATION_VM && streq(c->parameter, "vm"))
+ if (VIRTUALIZATION_IS_VM(v) && streq(c->parameter, "vm"))
return true;
- if (v == VIRTUALIZATION_CONTAINER && streq(c->parameter, "container"))
+ if (VIRTUALIZATION_IS_CONTAINER(v) && streq(c->parameter, "container"))
return true;
/* Finally compare id */
- return v > 0 && streq(c->parameter, id);
+ return v != VIRTUALIZATION_NONE && streq(c->parameter, virtualization_to_string(v));
}
static int condition_test_architecture(Condition *c) {
diff --git a/src/shared/conf-parser.c b/src/shared/conf-parser.c
index 1f4aea6d6b..36150d8744 100644
--- a/src/shared/conf-parser.c
+++ b/src/shared/conf-parser.c
@@ -24,7 +24,7 @@
#include <errno.h>
#include <stdlib.h>
-#include "conf-parser.h"
+#include "sd-messages.h"
#include "conf-files.h"
#include "util.h"
#include "macro.h"
@@ -32,7 +32,8 @@
#include "log.h"
#include "utf8.h"
#include "path-util.h"
-#include "sd-messages.h"
+#include "signal-util.h"
+#include "conf-parser.h"
int config_item_table_lookup(
const void *table,
@@ -575,6 +576,39 @@ int config_parse_bool(const char* unit,
return 0;
}
+int config_parse_tristate(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int k, *t = data;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* A tristate is pretty much a boolean, except that it can
+ * also take the special value -1, indicating "uninitialized",
+ * much like NULL is for a pointer type. */
+
+ k = parse_boolean(rvalue);
+ if (k < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, k, "Failed to parse boolean value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *t = !!k;
+ return 0;
+}
+
int config_parse_string(
const char *unit,
const char *filename,
@@ -802,3 +836,61 @@ int config_parse_log_level(
*o = (*o & LOG_FACMASK) | x;
return 0;
}
+
+int config_parse_signal(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int *sig = data, r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(sig);
+
+ r = signal_from_string_try_harder(rvalue);
+ if (r <= 0) {
+ log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Failed to parse signal name, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *sig = r;
+ return 0;
+}
+
+int config_parse_personality(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ unsigned long *personality = data, p;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(personality);
+
+ p = personality_from_string(rvalue);
+ if (p == PERSONALITY_INVALID) {
+ log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Failed to parse personality, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *personality = p;
+ return 0;
+}
diff --git a/src/shared/conf-parser.h b/src/shared/conf-parser.h
index 66c80890d3..34e3815782 100644
--- a/src/shared/conf-parser.h
+++ b/src/shared/conf-parser.h
@@ -111,6 +111,7 @@ int config_parse_iec_size(const char *unit, const char *filename, unsigned line,
int config_parse_si_size(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_iec_off(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_bool(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_tristate(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_string(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_path(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
@@ -119,6 +120,8 @@ int config_parse_nsec(const char *unit, const char *filename, unsigned line, con
int config_parse_mode(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_log_facility(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_log_level(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_signal(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_personality(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
#define log_invalid_utf8(unit, level, config_file, config_line, error, rvalue) \
do { \
diff --git a/src/shared/efivars.c b/src/shared/efivars.c
index 347cd30b09..f087c2a566 100644
--- a/src/shared/efivars.c
+++ b/src/shared/efivars.c
@@ -101,7 +101,7 @@ int efi_reboot_to_firmware_supported(void) {
uint64_t b;
_cleanup_free_ void *v = NULL;
- if (!is_efi_boot() || detect_container(NULL) > 0)
+ if (!is_efi_boot() || detect_container() > 0)
return -EOPNOTSUPP;
r = efi_get_variable(EFI_VENDOR_GLOBAL, "OsIndicationsSupported", NULL, &v, &s);
diff --git a/src/shared/machine-image.c b/src/shared/machine-image.c
index 273dacff1f..70220bdd14 100644
--- a/src/shared/machine-image.c
+++ b/src/shared/machine-image.c
@@ -33,7 +33,7 @@
static const char image_search_path[] =
"/var/lib/machines\0"
- "/var/lib/container\0"
+ "/var/lib/container\0" /* legacy */
"/usr/local/lib/machines\0"
"/usr/lib/machines\0";
diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c
index 8d80aae182..96c39aa0a9 100644
--- a/src/systemctl/systemctl.c
+++ b/src/systemctl/systemctl.c
@@ -535,10 +535,8 @@ static int get_unit_list(
return bus_log_create_error(r);
r = sd_bus_call(bus, m, 0, &error, &reply);
- if (r < 0) {
- log_error("Failed to list units: %s", bus_error_message(&error, r));
- return r;
- }
+ if (r < 0)
+ return log_error_errno(r, "Failed to list units: %s", bus_error_message(&error, r));
r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "(ssssssouso)");
if (r < 0)
@@ -605,7 +603,7 @@ static int get_unit_list_recursive(
r = set_put(replies, reply);
if (r < 0) {
sd_bus_message_unref(reply);
- return r;
+ return log_oom();
}
if (arg_recursive) {
@@ -614,7 +612,7 @@ static int get_unit_list_recursive(
r = sd_get_machine_names(&machines);
if (r < 0)
- return r;
+ return log_error_errno(r, "Failed to get machine names: %m");
STRV_FOREACH(i, machines) {
_cleanup_bus_flush_close_unref_ sd_bus *container = NULL;
@@ -622,7 +620,7 @@ static int get_unit_list_recursive(
r = sd_bus_open_system_machine(&container, *i);
if (r < 0) {
- log_error_errno(r, "Failed to connect to container %s: %m", *i);
+ log_warning_errno(r, "Failed to connect to container %s, ignoring: %m", *i);
continue;
}
@@ -635,7 +633,7 @@ static int get_unit_list_recursive(
r = set_put(replies, reply);
if (r < 0) {
sd_bus_message_unref(reply);
- return r;
+ return log_oom();
}
}
@@ -1474,6 +1472,7 @@ static int list_dependencies_get_dependencies(sd_bus *bus, const char *name, cha
"Requisite\0"
"RequisiteOverridable\0"
"Wants\0"
+ "ConsistsOf\0"
"BindsTo\0",
[DEPENDENCY_REVERSE] = "RequiredBy\0"
"RequiredByOverridable\0"
@@ -1743,7 +1742,7 @@ static int get_machine_list(
_cleanup_free_ char *hn = NULL;
size_t sz = 0;
char **i;
- int c = 0;
+ int c = 0, r;
hn = gethostname_malloc();
if (!hn)
@@ -1761,7 +1760,10 @@ static int get_machine_list(
c++;
}
- sd_get_machine_names(&m);
+ r = sd_get_machine_names(&m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get machine list: %m");
+
STRV_FOREACH(i, m) {
_cleanup_free_ char *class = NULL;
@@ -4267,10 +4269,8 @@ static int show_one(
&error,
&reply,
"s", "");
- if (r < 0) {
- log_error("Failed to get properties: %s", bus_error_message(&error, r));
- return r;
- }
+ if (r < 0)
+ return log_error_errno(r, "Failed to get properties: %s", bus_error_message(&error, r));
r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "{sv}");
if (r < 0)
diff --git a/src/test/test-architecture.c b/src/test/test-architecture.c
index 30bdec45e5..a5b66a7d2f 100644
--- a/src/test/test-architecture.c
+++ b/src/test/test-architecture.c
@@ -25,18 +25,18 @@
#include "log.h"
int main(int argc, char *argv[]) {
- const char *id = NULL;
int a, v;
- v = detect_virtualization(&id);
+ v = detect_virtualization();
if (v == -EPERM || v == -EACCES)
return EXIT_TEST_SKIP;
assert_se(v >= 0);
log_info("virtualization=%s id=%s",
- v == VIRTUALIZATION_CONTAINER ? "container" : v == VIRTUALIZATION_VM ? "vm" : "n/a",
- strna(id));
+ VIRTUALIZATION_IS_CONTAINER(v) ? "container" :
+ VIRTUALIZATION_IS_VM(v) ? "vm" : "n/a",
+ virtualization_to_string(v));
a = uname_architecture();
assert_se(a >= 0);
diff --git a/src/test/test-cgroup-util.c b/src/test/test-cgroup-util.c
index ff7e45901c..4ecf09a29e 100644
--- a/src/test/test-cgroup-util.c
+++ b/src/test/test-cgroup-util.c
@@ -320,7 +320,7 @@ int main(void) {
test_controller_is_valid();
test_slice_to_path();
test_shift_path();
- test_mask_supported();
+ TEST_REQ_RUNNING_SYSTEMD(test_mask_supported());
return 0;
}
diff --git a/src/test/test-process-util.c b/src/test/test-process-util.c
index e4e2efecd5..eb0f443a43 100644
--- a/src/test/test-process-util.c
+++ b/src/test/test-process-util.c
@@ -85,7 +85,7 @@ static void test_get_process_comm(void) {
assert_se(r >= 0 || r == -EACCES);
log_info("self strlen(environ): '%zu'", strlen(env));
- if (!detect_container(NULL))
+ if (!detect_container())
assert_se(get_ctty_devnr(1, &h) == -ENXIO);
getenv_for_pid(1, "PATH", &i);
diff --git a/src/udev/udev-builtin-path_id.c b/src/udev/udev-builtin-path_id.c
index f529ffcf25..01e2c659ae 100644
--- a/src/udev/udev-builtin-path_id.c
+++ b/src/udev/udev-builtin-path_id.c
@@ -317,6 +317,39 @@ out:
return parent;
}
+static struct udev_device *handle_scsi_ata(struct udev_device *parent, char **path) {
+ struct udev *udev = udev_device_get_udev(parent);
+ struct udev_device *targetdev;
+ struct udev_device *target_parent;
+ struct udev_device *atadev;
+ const char *port_no;
+
+ assert(parent);
+ assert(path);
+
+ targetdev = udev_device_get_parent_with_subsystem_devtype(parent, "scsi", "scsi_host");
+ if (!targetdev)
+ return NULL;
+
+ target_parent = udev_device_get_parent(targetdev);
+ if (!target_parent)
+ return NULL;
+
+ atadev = udev_device_new_from_subsystem_sysname(udev, "ata_port", udev_device_get_sysname(target_parent));
+ if (!atadev)
+ return NULL;
+
+ port_no = udev_device_get_sysattr_value(atadev, "port_no");
+ if (!port_no) {
+ parent = NULL;
+ goto out;
+ }
+ path_prepend(path, "ata-%s", port_no);
+out:
+ udev_device_unref(atadev);
+ return parent;
+}
+
static struct udev_device *handle_scsi_default(struct udev_device *parent, char **path) {
struct udev_device *hostdev;
int host, bus, target, lun;
@@ -482,19 +515,8 @@ static struct udev_device *handle_scsi(struct udev_device *parent, char **path,
goto out;
}
- /*
- * We do not support the ATA transport class, it uses global counters
- * to name the ata devices which numbers spread across multiple
- * controllers.
- *
- * The real link numbers are not exported. Also, possible chains of ports
- * behind port multipliers cannot be composed that way.
- *
- * Until all that is solved at the kernel level, there are no by-path/
- * links for ATA devices.
- */
if (strstr(name, "/ata") != NULL) {
- parent = NULL;
+ parent = handle_scsi_ata(parent, path);
goto out;
}
diff --git a/src/udev/udev-builtin-uaccess.c b/src/udev/udev-builtin-uaccess.c
index 43bab8af63..7bf4e7f24d 100644
--- a/src/udev/udev-builtin-uaccess.c
+++ b/src/udev/udev-builtin-uaccess.c
@@ -45,7 +45,7 @@ static int builtin_uaccess(struct udev_device *dev, int argc, char *argv[], bool
seat = "seat0";
r = sd_seat_get_active(seat, NULL, &uid);
- if (r == -ENOENT) {
+ if (r == -ENXIO || r == -ENODATA) {
/* No active session on this seat */
r = 0;
goto finish;
diff --git a/src/vconsole/vconsole-setup.c b/src/vconsole/vconsole-setup.c
index 7bdc158ad7..6353579283 100644
--- a/src/vconsole/vconsole-setup.c
+++ b/src/vconsole/vconsole-setup.c
@@ -293,7 +293,7 @@ int main(int argc, char **argv) {
log_warning_errno(r, "Failed to read /etc/vconsole.conf: %m");
/* Let the kernel command line override /etc/vconsole.conf */
- if (detect_container(NULL) <= 0) {
+ if (detect_container() <= 0) {
r = parse_env_file("/proc/cmdline", WHITESPACE,
"vconsole.keymap", &vc_keymap,
"vconsole.keymap.toggle", &vc_keymap_toggle,
diff --git a/units/systemd-nspawn@.service.in b/units/systemd-nspawn@.service.in
index 074b916d38..6b86e0a7f7 100644
--- a/units/systemd-nspawn@.service.in
+++ b/units/systemd-nspawn@.service.in
@@ -13,7 +13,7 @@ Before=machines.target
After=network.target
[Service]
-ExecStart=@bindir@/systemd-nspawn --quiet --keep-unit --boot --link-journal=try-guest --network-veth --machine=%I
+ExecStart=@bindir@/systemd-nspawn --quiet --keep-unit --boot --link-journal=try-guest --network-veth --settings=override --machine=%I
KillMode=mixed
Type=notify
RestartForceExitStatus=133