summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2016-12-13 22:30:07 -0500
committerGitHub <noreply@github.com>2016-12-13 22:30:07 -0500
commit9cf314f34d9ca26bb8867effdf54fc2c78b06f31 (patch)
tree62f00b1788b78f3ab19c5b14a5f98fc530c1bc18
parent9ef4e1e5a2d0a9cc50406f1cae05f3918d6f0c2a (diff)
parentf59d1da8cd15d42b36fa5bab756cf4d144785e2b (diff)
Merge pull request #4727 from poettering/exec-bind
More namespace improvements
-rw-r--r--.gitignore1
-rw-r--r--Makefile.am7
-rw-r--r--TODO12
-rw-r--r--man/systemd.exec.xml25
-rw-r--r--src/basic/mount-util.c32
-rw-r--r--src/basic/mount-util.h3
-rw-r--r--src/core/dbus-execute.c131
-rw-r--r--src/core/dbus-service.c23
-rw-r--r--src/core/execute.c16
-rw-r--r--src/core/execute.h2
-rw-r--r--src/core/load-fragment-gperf.gperf.m42
-rw-r--r--src/core/load-fragment.c166
-rw-r--r--src/core/load-fragment.h1
-rw-r--r--src/core/namespace.c360
-rw-r--r--src/core/namespace.h44
-rw-r--r--src/shared/bus-unit-util.c89
-rw-r--r--src/test/test-mount-util.c45
-rw-r--r--src/test/test-ns.c1
18 files changed, 784 insertions, 176 deletions
diff --git a/.gitignore b/.gitignore
index 016ba625e3..ec4b7bd672 100644
--- a/.gitignore
+++ b/.gitignore
@@ -239,6 +239,7 @@
/test-loopback
/test-machine-tables
/test-mmap-cache
+/test-mount-util
/test-namespace
/test-ndisc-rs
/test-netlink
diff --git a/Makefile.am b/Makefile.am
index 3bd8c29dd3..8f7b83f0ab 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1512,6 +1512,7 @@ tests += \
test-utf8 \
test-ellipsize \
test-util \
+ test-mount-util \
test-cpu-set-util \
test-hexdecoct \
test-escape \
@@ -1890,6 +1891,12 @@ test_util_SOURCES = \
test_util_LDADD = \
libsystemd-shared.la
+test_mount_util_SOURCES = \
+ src/test/test-mount-util.c
+
+test_mount_util_LDADD = \
+ libsystemd-shared.la
+
test_hexdecoct_SOURCES = \
src/test/test-hexdecoct.c
diff --git a/TODO b/TODO
index 9f6f13e2c2..f3f6cb18bb 100644
--- a/TODO
+++ b/TODO
@@ -27,6 +27,10 @@ Janitorial Clean-ups:
Features:
+* Add ExecMonitor= setting. May be used multiple times. Forks off a process in
+ the service cgroup, which is supposed to monitor the service, and when it
+ exits the service is considered failed by its monitor.
+
* replace all canonicalize_file_name() invocations by chase_symlinks(), in
particulr those where a rootdir is relevant.
@@ -78,8 +82,6 @@ Features:
* Add DataDirectory=, CacheDirectory= and LogDirectory= to match
RuntimeDirectory=, and create it as necessary when starting a service, owned by the right user.
-* Add BindDirectory= for allowing arbitrary, private bind mounts for services
-
* Add RootImage= for mounting a disk image or file as root directory
* make sure the ratelimit object can deal with USEC_INFINITY as way to turn off things
@@ -340,8 +342,6 @@ Features:
* refuse boot if /usr/lib/os-release is missing or /etc/machine-id cannot be set up
-* btrfs raid assembly: some .device jobs stay stuck in the queue
-
* man: the documentation of Restart= currently is very misleading and suggests the tools from ExecStartPre= might get restarted.
* load .d/*.conf dropins for device units
@@ -587,15 +587,13 @@ Features:
* on shutdown: move utmp, wall, audit logic all into PID 1 (or logind?), get rid of systemd-update-utmp-runlevel
-* make repeated alt-ctrl-del presses printing a dump, or even force a reboot without
- waiting for the timeout
+* make repeated alt-ctrl-del presses printing a dump
* hostnamed: before returning information from /etc/machine-info.conf check the modification data and reread. Similar for localed, ...
* currently x-systemd.timeout is lost in the initrd, since crypttab is copied into dracut, but fstab is not
* nspawn:
- - nspawn -x should support ephemeral instances of gpt images
- emulate /dev/kmsg using CUSE and turn off the syslog syscall
with seccomp. That should provide us with a useful log buffer that
systemd can log to during early boot, and disconnect container logs
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index f27e4a5c04..812e615530 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -968,6 +968,31 @@
</varlistentry>
<varlistentry>
+ <term><varname>BindPaths=</varname></term>
+ <term><varname>BindReadOnlyPaths=</varname></term>
+
+ <listitem><para>Configures unit-specific bind mounts. A bind mount makes a particular file or directory
+ available at an additional place in the unit's view of the file system. Any bind mounts created with this
+ option are specific to the unit, and are not visible in the host's mount table. This option expects a
+ whitespace separated list of bind mount definitions. Each definition consists of a colon-separated triple of
+ source path, destination path and option string, where the latter two are optional. If only a source path is
+ specified the source and destination is taken to be the same. The option string may be either
+ <literal>rbind</literal> or <literal>norbind</literal> for configuring a recursive or non-recursive bind
+ mount. If the destination parth is omitted, the option string must be omitted too.</para>
+
+ <para><varname>BindPaths=</varname> creates regular writable bind mounts (unless the source file system mount
+ is already marked read-only), while <varname>BindReadOnlyPaths=</varname> creates read-only bind mounts. These
+ settings may be used more than once, each usage appends to the unit's list of bind mounts. If the empty string
+ is assigned to either of these two options the entire list of bind mounts defined prior to this is reset. Note
+ that in this case both read-only and regular bind mounts are reset, regardless which of the two settings is
+ used.</para>
+
+ <para>This option is particularly useful when <varname>RootDirectory=</varname> is used. In this case the
+ source path refers to a path on the host file system, while the destination path referes to a path below the
+ root directory of the unit.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>PrivateTmp=</varname></term>
<listitem><para>Takes a boolean argument. If true, sets up a new file system namespace for the executed
diff --git a/src/basic/mount-util.c b/src/basic/mount-util.c
index 352c3505fb..8970050408 100644
--- a/src/basic/mount-util.c
+++ b/src/basic/mount-util.c
@@ -689,3 +689,35 @@ int umount_verbose(const char *what) {
return log_error_errno(errno, "Failed to unmount %s: %m", what);
return 0;
}
+
+const char *mount_propagation_flags_to_string(unsigned long flags) {
+
+ switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
+
+ case MS_SHARED:
+ return "shared";
+
+ case MS_SLAVE:
+ return "slave";
+
+ case MS_PRIVATE:
+ return "private";
+ }
+
+ return NULL;
+}
+
+unsigned long mount_propagation_flags_from_string(const char *name) {
+
+ if (isempty(name))
+ return 0;
+
+ if (streq(name, "shared"))
+ return MS_SHARED;
+ if (streq(name, "slave"))
+ return MS_SLAVE;
+ if (streq(name, "private"))
+ return MS_PRIVATE;
+
+ return 0;
+}
diff --git a/src/basic/mount-util.h b/src/basic/mount-util.h
index b840956d63..c8049198d4 100644
--- a/src/basic/mount-util.h
+++ b/src/basic/mount-util.h
@@ -61,3 +61,6 @@ int mount_verbose(
unsigned long flags,
const char *options);
int umount_verbose(const char *where);
+
+const char *mount_propagation_flags_to_string(unsigned long flags);
+unsigned long mount_propagation_flags_from_string(const char *name);
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index 23c1b44573..b3fc0ff5c3 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -34,6 +34,7 @@
#include "fileio.h"
#include "ioprio.h"
#include "missing.h"
+#include "mount-util.h"
#include "namespace.h"
#include "parse-util.h"
#include "path-util.h"
@@ -674,6 +675,49 @@ static int property_get_output_fdname(
return sd_bus_message_append(reply, "s", name);
}
+static int property_get_bind_paths(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ unsigned i;
+ bool ro;
+ int r;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ ro = !!strstr(property, "ReadOnly");
+
+ r = sd_bus_message_open_container(reply, 'a', "(ssbt)");
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < c->n_bind_mounts; i++) {
+
+ if (ro != c->bind_mounts[i].read_only)
+ continue;
+
+ r = sd_bus_message_append(
+ reply, "(ssbt)",
+ c->bind_mounts[i].source,
+ c->bind_mounts[i].destination,
+ c->bind_mounts[i].ignore_enoent,
+ c->bind_mounts[i].recursive ? MS_REC : 0);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_VTABLE_START(0),
SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(ExecContext, environment), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -782,6 +826,8 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("MemoryDenyWriteExecute", "b", bus_property_get_bool, offsetof(ExecContext, memory_deny_write_execute), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RestrictRealtime", "b", bus_property_get_bool, offsetof(ExecContext, restrict_realtime), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RestrictNamespaces", "t", bus_property_get_ulong, offsetof(ExecContext, restrict_namespaces), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("BindPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("BindReadOnlyPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_VTABLE_END
};
@@ -1363,8 +1409,8 @@ int bus_exec_context_set_transient_property(
if (r < 0)
return r;
- if (!isempty(path) && !path_is_absolute(path))
- return sd_bus_error_set_errnof(error, EINVAL, "Path %s is not absolute.", path);
+ if (!path_is_absolute(path))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path %s is not absolute.", path);
if (mode != UNIT_CHECK) {
char *buf = NULL;
@@ -1613,6 +1659,87 @@ int bus_exec_context_set_transient_property(
}
return 1;
+ } else if (streq(name, "MountFlags")) {
+ uint64_t flags;
+
+ r = sd_bus_message_read(message, "t", &flags);
+ if (r < 0)
+ return r;
+ if (!IN_SET(flags, 0, MS_SHARED, MS_PRIVATE, MS_SLAVE))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown mount propagation flags");
+
+ if (mode != UNIT_CHECK) {
+ c->mount_flags = flags;
+
+ unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, strempty(mount_propagation_flags_to_string(flags)));
+ }
+
+ return 1;
+ } else if (STR_IN_SET(name, "BindPaths", "BindReadOnlyPaths")) {
+ unsigned empty = true;
+
+ r = sd_bus_message_enter_container(message, 'a', "(ssbt)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_enter_container(message, 'r', "ssbt")) > 0) {
+ const char *source, *destination;
+ int ignore_enoent;
+ uint64_t mount_flags;
+
+ r = sd_bus_message_read(message, "ssbt", &source, &destination, &ignore_enoent, &mount_flags);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!path_is_absolute(source))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path %s is not absolute.", source);
+ if (!path_is_absolute(destination))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Destination path %s is not absolute.", source);
+ if (!IN_SET(mount_flags, 0, MS_REC))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown mount flags.");
+
+ if (mode != UNIT_CHECK) {
+ r = bind_mount_add(&c->bind_mounts, &c->n_bind_mounts,
+ &(BindMount) {
+ .source = strdup(source),
+ .destination = strdup(destination),
+ .read_only = !!strstr(name, "ReadOnly"),
+ .recursive = !!(mount_flags & MS_REC),
+ .ignore_enoent = ignore_enoent,
+ });
+ if (r < 0)
+ return r;
+
+ unit_write_drop_in_private_format(
+ u, mode, name,
+ "%s=%s%s:%s:%s",
+ name,
+ ignore_enoent ? "-" : "",
+ source,
+ destination,
+ (mount_flags & MS_REC) ? "rbind" : "norbind");
+ }
+
+ empty = false;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (empty) {
+ bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
+ c->bind_mounts = NULL;
+ c->n_bind_mounts = 0;
+ }
+
+ return 1;
}
ri = rlimit_from_string(name);
diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c
index 61b83d2d62..85b67318ed 100644
--- a/src/core/dbus-service.c
+++ b/src/core/dbus-service.c
@@ -143,6 +143,29 @@ static int bus_service_set_transient_property(
return 1;
+ } else if (streq(name, "Restart")) {
+ ServiceRestart sr;
+ const char *v;
+
+ r = sd_bus_message_read(message, "s", &v);
+ if (r < 0)
+ return r;
+
+ if (isempty(v))
+ sr = SERVICE_RESTART_NO;
+ else {
+ sr = service_restart_from_string(v);
+ if (sr < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid restart setting: %s", v);
+ }
+
+ if (mode != UNIT_CHECK) {
+ s->restart = sr;
+ unit_write_drop_in_private_format(UNIT(s), mode, name, "Restart=%s", service_restart_to_string(sr));
+ }
+
+ return 1;
+
} else if (STR_IN_SET(name,
"StandardInputFileDescriptor",
"StandardOutputFileDescriptor",
diff --git a/src/core/execute.c b/src/core/execute.c
index 07ab067c05..2ee8c9a416 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1826,6 +1826,9 @@ static bool exec_needs_mount_namespace(
!strv_isempty(context->inaccessible_paths))
return true;
+ if (context->n_bind_mounts > 0)
+ return true;
+
if (context->mount_flags != 0)
return true;
@@ -2147,6 +2150,8 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context,
r = setup_namespace(root_dir, &ns_info, rw,
context->read_only_paths,
context->inaccessible_paths,
+ context->bind_mounts,
+ context->n_bind_mounts,
tmp,
var,
context->protect_home,
@@ -3086,6 +3091,8 @@ void exec_context_done(ExecContext *c) {
c->read_write_paths = strv_free(c->read_write_paths);
c->inaccessible_paths = strv_free(c->inaccessible_paths);
+ bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
+
if (c->cpuset)
CPU_FREE(c->cpuset);
@@ -3569,6 +3576,15 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
fputs("\n", f);
}
+ if (c->n_bind_mounts > 0)
+ for (i = 0; i < c->n_bind_mounts; i++) {
+ fprintf(f, "%s%s: %s:%s:%s\n", prefix,
+ c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
+ c->bind_mounts[i].source,
+ c->bind_mounts[i].destination,
+ c->bind_mounts[i].recursive ? "rbind" : "norbind");
+ }
+
if (c->utmp_id)
fprintf(f,
"%sUtmpIdentifier: %s\n",
diff --git a/src/core/execute.h b/src/core/execute.h
index 951c8f4da3..84ab4339cf 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -161,6 +161,8 @@ struct ExecContext {
char **read_write_paths, **read_only_paths, **inaccessible_paths;
unsigned long mount_flags;
+ BindMount *bind_mounts;
+ unsigned n_bind_mounts;
uint64_t capability_bounding_set;
uint64_t capability_ambient_set;
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 2610442b91..15f22a2681 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -89,6 +89,8 @@ $1.InaccessibleDirectories, config_parse_namespace_path_strv, 0,
$1.ReadWritePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_write_paths)
$1.ReadOnlyPaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_paths)
$1.InaccessiblePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths)
+$1.BindPaths, config_parse_bind_paths, 0, offsetof($1, exec_context)
+$1.BindReadOnlyPaths, config_parse_bind_paths, 0, offsetof($1, exec_context)
$1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp)
$1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices)
$1.ProtectKernelTunables, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_tunables)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 687cd1dd31..f325d853c6 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -49,6 +49,7 @@
#include "load-fragment.h"
#include "log.h"
#include "missing.h"
+#include "mount-util.h"
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
@@ -1264,19 +1265,20 @@ int config_parse_sysv_priority(const char *unit,
DEFINE_CONFIG_PARSE_ENUM(config_parse_exec_utmp_mode, exec_utmp_mode, ExecUtmpMode, "Failed to parse utmp mode");
DEFINE_CONFIG_PARSE_ENUM(config_parse_kill_mode, kill_mode, KillMode, "Failed to parse kill mode");
-int config_parse_exec_mount_flags(const char *unit,
- const char *filename,
- unsigned line,
- const char *section,
- unsigned section_line,
- const char *lvalue,
- int ltype,
- const char *rvalue,
- void *data,
- void *userdata) {
+int config_parse_exec_mount_flags(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
- unsigned long flags = 0;
+ unsigned long flags;
ExecContext *c = data;
assert(filename);
@@ -1284,15 +1286,14 @@ int config_parse_exec_mount_flags(const char *unit,
assert(rvalue);
assert(data);
- if (streq(rvalue, "shared"))
- flags = MS_SHARED;
- else if (streq(rvalue, "slave"))
- flags = MS_SLAVE;
- else if (streq(rvalue, "private"))
- flags = MS_PRIVATE;
+ if (isempty(rvalue))
+ flags = 0;
else {
- log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse mount flag %s, ignoring.", rvalue);
- return 0;
+ flags = mount_propagation_flags_from_string(rvalue);
+ if (flags == 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse mount flag %s, ignoring.", rvalue);
+ return 0;
+ }
}
c->mount_flags = flags;
@@ -3890,6 +3891,132 @@ int config_parse_namespace_path_strv(
return 0;
}
+int config_parse_bind_paths(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ const char *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
+ c->bind_mounts = NULL;
+ c->n_bind_mounts = 0;
+ return 0;
+ }
+
+ p = rvalue;
+ for (;;) {
+ _cleanup_free_ char *source = NULL, *destination = NULL;
+ char *s = NULL, *d = NULL;
+ bool rbind = true, ignore_enoent = false;
+
+ r = extract_first_word(&p, &source, ":" WHITESPACE, EXTRACT_QUOTES|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse %s: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ s = source;
+ if (s[0] == '-') {
+ ignore_enoent = true;
+ s++;
+ }
+
+ if (!utf8_is_valid(s)) {
+ log_syntax_invalid_utf8(unit, LOG_ERR, filename, line, s);
+ return 0;
+ }
+ if (!path_is_absolute(s)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Not an absolute source path, ignoring: %s", s);
+ return 0;
+ }
+
+ path_kill_slashes(s);
+
+ /* Optionally, the destination is specified. */
+ if (p && p[-1] == ':') {
+ r = extract_first_word(&p, &destination, ":" WHITESPACE, EXTRACT_QUOTES|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse %s: %s", lvalue, rvalue);
+ return 0;
+ }
+ if (r == 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Missing argument after ':': %s", rvalue);
+ return 0;
+ }
+
+ if (!utf8_is_valid(destination)) {
+ log_syntax_invalid_utf8(unit, LOG_ERR, filename, line, destination);
+ return 0;
+ }
+ if (!path_is_absolute(destination)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Not an absolute destination path, ignoring: %s", destination);
+ return 0;
+ }
+
+ d = path_kill_slashes(destination);
+
+ /* Optionally, there's also a short option string specified */
+ if (p && p[-1] == ':') {
+ _cleanup_free_ char *options = NULL;
+
+ r = extract_first_word(&p, &options, NULL, EXTRACT_QUOTES);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse %s: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ if (isempty(options) || streq(options, "rbind"))
+ rbind = true;
+ else if (streq(options, "norbind"))
+ rbind = false;
+ else {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid option string, ignoring setting: %s", options);
+ return 0;
+ }
+ }
+ } else
+ d = s;
+
+ r = bind_mount_add(&c->bind_mounts, &c->n_bind_mounts,
+ &(BindMount) {
+ .source = s,
+ .destination = d,
+ .read_only = !!strstr(lvalue, "ReadOnly"),
+ .recursive = rbind,
+ .ignore_enoent = ignore_enoent,
+ });
+ if (r < 0)
+ return log_oom();
+ }
+
+ return 0;
+}
+
int config_parse_no_new_privileges(
const char* unit,
const char *filename,
@@ -4387,6 +4514,7 @@ void unit_dump_config_items(FILE *f) {
{ config_parse_sec, "SECONDS" },
{ config_parse_nsec, "NANOSECONDS" },
{ config_parse_namespace_path_strv, "PATH [...]" },
+ { config_parse_bind_paths, "PATH[:PATH[:OPTIONS]] [...]" },
{ config_parse_unit_requires_mounts_for, "PATH [...]" },
{ config_parse_exec_mount_flags, "MOUNTFLAG [...]" },
{ config_parse_unit_string_printf, "STRING" },
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index 1cff815a50..bbac2d84b5 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -117,6 +117,7 @@ int config_parse_sec_fix_0(const char *unit, const char *filename, unsigned line
int config_parse_user_group(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_user_group_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_restrict_namespaces(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_bind_paths(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
/* gperf prototypes */
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length);
diff --git a/src/core/namespace.c b/src/core/namespace.c
index aca47a4d2f..834883267c 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -50,6 +50,8 @@
typedef enum MountMode {
/* This is ordered by priority! */
INACCESSIBLE,
+ BIND_MOUNT,
+ BIND_MOUNT_RECURSIVE,
READONLY,
PRIVATE_TMP,
PRIVATE_VAR_TMP,
@@ -57,13 +59,16 @@ typedef enum MountMode {
READWRITE,
} MountMode;
-typedef struct BindMount {
+typedef struct MountEntry {
const char *path_const; /* Memory allocated on stack or static */
- MountMode mode:6;
+ MountMode mode:5;
bool ignore:1; /* Ignore if path does not exist? */
bool has_prefix:1; /* Already is prefixed by the root dir? */
+ bool read_only:1; /* Shall this mount point be read-only? */
char *path_malloc; /* Use this instead of 'path' if we had to allocate memory */
-} BindMount;
+ const char *source_const; /* The source path, for bind mounts */
+ char *source_malloc;
+} MountEntry;
/*
* The following Protect tables are to protect paths and mark some of them
@@ -74,7 +79,7 @@ typedef struct BindMount {
*/
/* ProtectKernelTunables= option and the related filesystem APIs */
-static const BindMount protect_kernel_tunables_table[] = {
+static const MountEntry protect_kernel_tunables_table[] = {
{ "/proc/sys", READONLY, false },
{ "/proc/sysrq-trigger", READONLY, true },
{ "/proc/latency_stats", READONLY, true },
@@ -93,7 +98,7 @@ static const BindMount protect_kernel_tunables_table[] = {
};
/* ProtectKernelModules= option */
-static const BindMount protect_kernel_modules_table[] = {
+static const MountEntry protect_kernel_modules_table[] = {
#ifdef HAVE_SPLIT_USR
{ "/lib/modules", INACCESSIBLE, true },
#endif
@@ -104,28 +109,28 @@ static const BindMount protect_kernel_modules_table[] = {
* ProtectHome=read-only table, protect $HOME and $XDG_RUNTIME_DIR and rest of
* system should be protected by ProtectSystem=
*/
-static const BindMount protect_home_read_only_table[] = {
+static const MountEntry protect_home_read_only_table[] = {
{ "/home", READONLY, true },
{ "/run/user", READONLY, true },
{ "/root", READONLY, true },
};
/* ProtectHome=yes table */
-static const BindMount protect_home_yes_table[] = {
+static const MountEntry protect_home_yes_table[] = {
{ "/home", INACCESSIBLE, true },
{ "/run/user", INACCESSIBLE, true },
{ "/root", INACCESSIBLE, true },
};
/* ProtectSystem=yes table */
-static const BindMount protect_system_yes_table[] = {
+static const MountEntry protect_system_yes_table[] = {
{ "/usr", READONLY, false },
{ "/boot", READONLY, true },
{ "/efi", READONLY, true },
};
/* ProtectSystem=full includes ProtectSystem=yes */
-static const BindMount protect_system_full_table[] = {
+static const MountEntry protect_system_full_table[] = {
{ "/usr", READONLY, false },
{ "/boot", READONLY, true },
{ "/efi", READONLY, true },
@@ -140,17 +145,17 @@ static const BindMount protect_system_full_table[] = {
* (And of course /home and friends are also left writable, as ProtectHome=
* shall manage those, orthogonally).
*/
-static const BindMount protect_system_strict_table[] = {
- { "/", READONLY, false },
- { "/proc", READWRITE, false }, /* ProtectKernelTunables= */
- { "/sys", READWRITE, false }, /* ProtectKernelTunables= */
- { "/dev", READWRITE, false }, /* PrivateDevices= */
- { "/home", READWRITE, true }, /* ProtectHome= */
- { "/run/user", READWRITE, true }, /* ProtectHome= */
- { "/root", READWRITE, true }, /* ProtectHome= */
+static const MountEntry protect_system_strict_table[] = {
+ { "/", READONLY, false },
+ { "/proc", READWRITE, false }, /* ProtectKernelTunables= */
+ { "/sys", READWRITE, false }, /* ProtectKernelTunables= */
+ { "/dev", READWRITE, false }, /* PrivateDevices= */
+ { "/home", READWRITE, true }, /* ProtectHome= */
+ { "/run/user", READWRITE, true }, /* ProtectHome= */
+ { "/root", READWRITE, true }, /* ProtectHome= */
};
-static const char *bind_mount_path(const BindMount *p) {
+static const char *mount_entry_path(const MountEntry *p) {
assert(p);
/* Returns the path of this bind mount. If the malloc()-allocated ->path_buffer field is set we return that,
@@ -159,7 +164,19 @@ static const char *bind_mount_path(const BindMount *p) {
return p->path_malloc ?: p->path_const;
}
-static int append_access_mounts(BindMount **p, char **strv, MountMode mode) {
+static bool mount_entry_read_only(const MountEntry *p) {
+ assert(p);
+
+ return p->read_only || IN_SET(p->mode, READONLY, INACCESSIBLE);
+}
+
+static const char *mount_entry_source(const MountEntry *p) {
+ assert(p);
+
+ return p->source_malloc ?: p->source_const;
+}
+
+static int append_access_mounts(MountEntry **p, char **strv, MountMode mode) {
char **i;
assert(p);
@@ -183,7 +200,7 @@ static int append_access_mounts(BindMount **p, char **strv, MountMode mode) {
if (!path_is_absolute(e))
return -EINVAL;
- *((*p)++) = (BindMount) {
+ *((*p)++) = (MountEntry) {
.path_const = e,
.mode = mode,
.ignore = ignore,
@@ -194,7 +211,26 @@ static int append_access_mounts(BindMount **p, char **strv, MountMode mode) {
return 0;
}
-static int append_static_mounts(BindMount **p, const BindMount *mounts, unsigned n, bool ignore_protect) {
+static int append_bind_mounts(MountEntry **p, const BindMount *binds, unsigned n) {
+ unsigned i;
+
+ assert(p);
+
+ for (i = 0; i < n; i++) {
+ const BindMount *b = binds + i;
+
+ *((*p)++) = (MountEntry) {
+ .path_const = b->destination,
+ .mode = b->recursive ? BIND_MOUNT_RECURSIVE : BIND_MOUNT,
+ .read_only = b->read_only,
+ .source_const = b->source,
+ };
+ }
+
+ return 0;
+}
+
+static int append_static_mounts(MountEntry **p, const MountEntry *mounts, unsigned n, bool ignore_protect) {
unsigned i;
assert(p);
@@ -203,8 +239,8 @@ static int append_static_mounts(BindMount **p, const BindMount *mounts, unsigned
/* Adds a list of static pre-defined entries */
for (i = 0; i < n; i++)
- *((*p)++) = (BindMount) {
- .path_const = bind_mount_path(mounts+i),
+ *((*p)++) = (MountEntry) {
+ .path_const = mount_entry_path(mounts+i),
.mode = mounts[i].mode,
.ignore = mounts[i].ignore || ignore_protect,
};
@@ -212,7 +248,7 @@ static int append_static_mounts(BindMount **p, const BindMount *mounts, unsigned
return 0;
}
-static int append_protect_home(BindMount **p, ProtectHome protect_home, bool ignore_protect) {
+static int append_protect_home(MountEntry **p, ProtectHome protect_home, bool ignore_protect) {
assert(p);
switch (protect_home) {
@@ -231,7 +267,7 @@ static int append_protect_home(BindMount **p, ProtectHome protect_home, bool ign
}
}
-static int append_protect_system(BindMount **p, ProtectSystem protect_system, bool ignore_protect) {
+static int append_protect_system(MountEntry **p, ProtectSystem protect_system, bool ignore_protect) {
assert(p);
switch (protect_system) {
@@ -254,11 +290,11 @@ static int append_protect_system(BindMount **p, ProtectSystem protect_system, bo
}
static int mount_path_compare(const void *a, const void *b) {
- const BindMount *p = a, *q = b;
+ const MountEntry *p = a, *q = b;
int d;
/* If the paths are not equal, then order prefixes first */
- d = path_compare(bind_mount_path(p), bind_mount_path(q));
+ d = path_compare(mount_entry_path(p), mount_entry_path(q));
if (d != 0)
return d;
@@ -272,7 +308,7 @@ static int mount_path_compare(const void *a, const void *b) {
return 0;
}
-static int prefix_where_needed(BindMount *m, unsigned n, const char *root_directory) {
+static int prefix_where_needed(MountEntry *m, unsigned n, const char *root_directory) {
unsigned i;
/* Prefixes all paths in the bind mount table with the root directory if it is specified and the entry needs
@@ -287,7 +323,7 @@ static int prefix_where_needed(BindMount *m, unsigned n, const char *root_direct
if (m[i].has_prefix)
continue;
- s = prefix_root(root_directory, bind_mount_path(m+i));
+ s = prefix_root(root_directory, mount_entry_path(m+i));
if (!s)
return -ENOMEM;
@@ -300,8 +336,8 @@ static int prefix_where_needed(BindMount *m, unsigned n, const char *root_direct
return 0;
}
-static void drop_duplicates(BindMount *m, unsigned *n) {
- BindMount *f, *t, *previous;
+static void drop_duplicates(MountEntry *m, unsigned *n) {
+ MountEntry *f, *t, *previous;
assert(m);
assert(n);
@@ -312,8 +348,9 @@ static void drop_duplicates(BindMount *m, unsigned *n) {
/* The first one wins (which is the one with the more restrictive mode), see mount_path_compare()
* above. */
- if (previous && path_equal(bind_mount_path(f), bind_mount_path(previous))) {
- log_debug("%s is duplicate.", bind_mount_path(f));
+ if (previous && path_equal(mount_entry_path(f), mount_entry_path(previous))) {
+ log_debug("%s is duplicate.", mount_entry_path(f));
+ previous->read_only = previous->read_only || mount_entry_read_only(f); /* Propagate the read-only flag to the remaining entry */
f->path_malloc = mfree(f->path_malloc);
continue;
}
@@ -326,8 +363,8 @@ static void drop_duplicates(BindMount *m, unsigned *n) {
*n = t - m;
}
-static void drop_inaccessible(BindMount *m, unsigned *n) {
- BindMount *f, *t;
+static void drop_inaccessible(MountEntry *m, unsigned *n) {
+ MountEntry *f, *t;
const char *clear = NULL;
assert(m);
@@ -340,13 +377,13 @@ static void drop_inaccessible(BindMount *m, unsigned *n) {
/* If we found a path set for INACCESSIBLE earlier, and this entry has it as prefix we should drop
* it, as inaccessible paths really should drop the entire subtree. */
- if (clear && path_startswith(bind_mount_path(f), clear)) {
- log_debug("%s is masked by %s.", bind_mount_path(f), clear);
+ if (clear && path_startswith(mount_entry_path(f), clear)) {
+ log_debug("%s is masked by %s.", mount_entry_path(f), clear);
f->path_malloc = mfree(f->path_malloc);
continue;
}
- clear = f->mode == INACCESSIBLE ? bind_mount_path(f) : NULL;
+ clear = f->mode == INACCESSIBLE ? mount_entry_path(f) : NULL;
*t = *f;
t++;
@@ -355,8 +392,8 @@ static void drop_inaccessible(BindMount *m, unsigned *n) {
*n = t - m;
}
-static void drop_nop(BindMount *m, unsigned *n) {
- BindMount *f, *t;
+static void drop_nop(MountEntry *m, unsigned *n) {
+ MountEntry *f, *t;
assert(m);
assert(n);
@@ -368,12 +405,12 @@ static void drop_nop(BindMount *m, unsigned *n) {
/* Only suppress such subtrees for READONLY and READWRITE entries */
if (IN_SET(f->mode, READONLY, READWRITE)) {
- BindMount *p;
+ MountEntry *p;
bool found = false;
/* Now let's find the first parent of the entry we are looking at. */
for (p = t-1; p >= m; p--) {
- if (path_startswith(bind_mount_path(f), bind_mount_path(p))) {
+ if (path_startswith(mount_entry_path(f), mount_entry_path(p))) {
found = true;
break;
}
@@ -381,7 +418,7 @@ static void drop_nop(BindMount *m, unsigned *n) {
/* We found it, let's see if it's the same mode, if so, we can drop this entry */
if (found && p->mode == f->mode) {
- log_debug("%s is redundant by %s", bind_mount_path(f), bind_mount_path(p));
+ log_debug("%s is redundant by %s", mount_entry_path(f), mount_entry_path(p));
f->path_malloc = mfree(f->path_malloc);
continue;
}
@@ -394,8 +431,8 @@ static void drop_nop(BindMount *m, unsigned *n) {
*n = t - m;
}
-static void drop_outside_root(const char *root_directory, BindMount *m, unsigned *n) {
- BindMount *f, *t;
+static void drop_outside_root(const char *root_directory, MountEntry *m, unsigned *n) {
+ MountEntry *f, *t;
assert(m);
assert(n);
@@ -408,8 +445,8 @@ static void drop_outside_root(const char *root_directory, BindMount *m, unsigned
for (f = m, t = m; f < m + *n; f++) {
- if (!path_startswith(bind_mount_path(f), root_directory)) {
- log_debug("%s is outside of root directory.", bind_mount_path(f));
+ if (!path_startswith(mount_entry_path(f), root_directory)) {
+ log_debug("%s is outside of root directory.", mount_entry_path(f));
f->path_malloc = mfree(f->path_malloc);
continue;
}
@@ -421,7 +458,7 @@ static void drop_outside_root(const char *root_directory, BindMount *m, unsigned
*n = t - m;
}
-static int mount_dev(BindMount *m) {
+static int mount_dev(MountEntry *m) {
static const char devnodes[] =
"/dev/null\0"
"/dev/zero\0"
@@ -526,11 +563,11 @@ static int mount_dev(BindMount *m) {
* missing when the service is started with RootDirectory. This is
* consistent with mount units creating the mount points when missing.
*/
- (void) mkdir_p_label(bind_mount_path(m), 0755);
+ (void) mkdir_p_label(mount_entry_path(m), 0755);
/* Unmount everything in old /dev */
- umount_recursive(bind_mount_path(m), 0);
- if (mount(dev, bind_mount_path(m), NULL, MS_MOVE, NULL) < 0) {
+ umount_recursive(mount_entry_path(m), 0);
+ if (mount(dev, mount_entry_path(m), NULL, MS_MOVE, NULL) < 0) {
r = -errno;
goto fail;
}
@@ -560,17 +597,54 @@ fail:
return r;
}
+static int mount_entry_chase(
+ const char *root_directory,
+ MountEntry *m,
+ const char *path,
+ char **location) {
+
+ char *chased;
+ int r;
+
+ assert(m);
+
+ /* Since mount() will always follow symlinks and we need to take the different root directory into account we
+ * chase the symlinks on our own first. This is called for the destination path, as well as the source path (if
+ * that applies). The result is stored in "location". */
+
+ r = chase_symlinks(path, root_directory, 0, &chased);
+ if (r == -ENOENT && m->ignore) {
+ log_debug_errno(r, "Path %s does not exist, ignoring.", path);
+ return 0;
+ }
+ if (r < 0)
+ return log_debug_errno(r, "Failed to follow symlinks on %s: %m", path);
+
+ log_debug("Followed symlinks %s → %s.", path, chased);
+
+ free(*location);
+ *location = chased;
+
+ return 1;
+}
+
static int apply_mount(
- BindMount *m,
+ const char *root_directory,
+ MountEntry *m,
const char *tmp_dir,
const char *var_tmp_dir) {
const char *what;
+ bool rbind = true;
int r;
assert(m);
- log_debug("Applying namespace mount on %s", bind_mount_path(m));
+ r = mount_entry_chase(root_directory, m, mount_entry_path(m), &m->path_malloc);
+ if (r <= 0)
+ return r;
+
+ log_debug("Applying namespace mount on %s", mount_entry_path(m));
switch (m->mode) {
@@ -580,10 +654,10 @@ static int apply_mount(
/* First, get rid of everything that is below if there
* is anything... Then, overmount it with an
* inaccessible path. */
- (void) umount_recursive(bind_mount_path(m), 0);
+ (void) umount_recursive(mount_entry_path(m), 0);
- if (lstat(bind_mount_path(m), &target) < 0)
- return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", bind_mount_path(m));
+ if (lstat(mount_entry_path(m), &target) < 0)
+ return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", mount_entry_path(m));
what = mode_to_inaccessible_node(target.st_mode);
if (!what) {
@@ -595,14 +669,26 @@ static int apply_mount(
case READONLY:
case READWRITE:
-
- r = path_is_mount_point(bind_mount_path(m), NULL, 0);
+ r = path_is_mount_point(mount_entry_path(m), root_directory, 0);
if (r < 0)
- return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", bind_mount_path(m));
+ return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", mount_entry_path(m));
if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */
return 0;
/* This isn't a mount point yet, let's make it one. */
- what = bind_mount_path(m);
+ what = mount_entry_path(m);
+ break;
+
+ case BIND_MOUNT:
+ rbind = false;
+ /* fallthrough */
+
+ case BIND_MOUNT_RECURSIVE:
+ /* Also chase the source mount */
+ r = mount_entry_chase(root_directory, m, mount_entry_source(m), &m->source_malloc);
+ if (r <= 0)
+ return r;
+
+ what = mount_entry_source(m);
break;
case PRIVATE_TMP:
@@ -622,22 +708,22 @@ static int apply_mount(
assert(what);
- if (mount(what, bind_mount_path(m), NULL, MS_BIND|MS_REC, NULL) < 0)
- return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, bind_mount_path(m));
+ if (mount(what, mount_entry_path(m), NULL, MS_BIND|(rbind ? MS_REC : 0), NULL) < 0)
+ return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, mount_entry_path(m));
- log_debug("Successfully mounted %s to %s", what, bind_mount_path(m));
+ log_debug("Successfully mounted %s to %s", what, mount_entry_path(m));
return 0;
}
-static int make_read_only(BindMount *m, char **blacklist) {
+static int make_read_only(MountEntry *m, char **blacklist) {
int r = 0;
assert(m);
- if (IN_SET(m->mode, INACCESSIBLE, READONLY))
- r = bind_remount_recursive(bind_mount_path(m), true, blacklist);
+ if (mount_entry_read_only(m))
+ r = bind_remount_recursive(mount_entry_path(m), true, blacklist);
else if (m->mode == PRIVATE_DEV) { /* Can be readonly but the submounts can't*/
- if (mount(NULL, bind_mount_path(m), NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0)
+ if (mount(NULL, mount_entry_path(m), NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0)
r = -errno;
} else
return 0;
@@ -646,50 +732,9 @@ static int make_read_only(BindMount *m, char **blacklist) {
* already stays this way. This improves compatibility with container managers, where we won't attempt to undo
* read-only mounts already applied. */
- return r;
-}
-
-/* Chase symlinks and remove failed paths from mounts */
-static int chase_all_symlinks(const char *root_directory, BindMount *m, unsigned *n) {
- BindMount *f, *t;
- int r = 0;
-
- assert(m);
- assert(n);
-
- /* Since mount() will always follow symlinks and we need to take the different root directory into account we
- * chase the symlinks on our own first. This call wil do so for all entries and remove all entries where we
- * can't resolve the path, and which have been marked for such removal. */
-
- for (f = m, t = m; f < m + *n; f++) {
- _cleanup_free_ char *chased = NULL;
- int k;
-
- k = chase_symlinks(bind_mount_path(f), root_directory, 0, &chased);
- if (k < 0) {
- /* Get only real errors */
- if (r >= 0 && (k != -ENOENT || !f->ignore))
- r = k;
-
- /* Doesn't exist or failed? Then remove it and continue! */
- log_debug_errno(k, "Failed to chase symlinks for %s: %m", bind_mount_path(f));
- f->path_malloc = mfree(f->path_malloc);
- continue;
- }
-
- if (!path_equal(bind_mount_path(f), chased)) {
- log_debug("Chased %s → %s", bind_mount_path(f), chased);
-
- free(f->path_malloc);
- f->path_malloc = chased;
- chased = NULL;
- }
-
- *t = *f;
- t++;
- }
+ if (r == -ENOENT && m->ignore)
+ r = 0;
- *n = t - m;
return r;
}
@@ -698,6 +743,8 @@ static unsigned namespace_calculate_mounts(
char** read_write_paths,
char** read_only_paths,
char** inaccessible_paths,
+ const BindMount *bind_mounts,
+ unsigned n_bind_mounts,
const char* tmp_dir,
const char* var_tmp_dir,
ProtectHome protect_home,
@@ -722,6 +769,7 @@ static unsigned namespace_calculate_mounts(
strv_length(read_write_paths) +
strv_length(read_only_paths) +
strv_length(inaccessible_paths) +
+ n_bind_mounts +
ns_info->private_dev +
(ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +
(ns_info->protect_control_groups ? 1 : 0) +
@@ -735,13 +783,15 @@ int setup_namespace(
char** read_write_paths,
char** read_only_paths,
char** inaccessible_paths,
+ const BindMount *bind_mounts,
+ unsigned n_bind_mounts,
const char* tmp_dir,
const char* var_tmp_dir,
ProtectHome protect_home,
ProtectSystem protect_system,
unsigned long mount_flags) {
- BindMount *m, *mounts = NULL;
+ MountEntry *m, *mounts = NULL;
bool make_slave = false;
unsigned n_mounts;
int r = 0;
@@ -749,19 +799,21 @@ int setup_namespace(
if (mount_flags == 0)
mount_flags = MS_SHARED;
- n_mounts = namespace_calculate_mounts(ns_info,
- read_write_paths,
- read_only_paths,
- inaccessible_paths,
- tmp_dir, var_tmp_dir,
- protect_home, protect_system);
+ n_mounts = namespace_calculate_mounts(
+ ns_info,
+ read_write_paths,
+ read_only_paths,
+ inaccessible_paths,
+ bind_mounts, n_bind_mounts,
+ tmp_dir, var_tmp_dir,
+ protect_home, protect_system);
/* Set mount slave mode */
if (root_directory || n_mounts > 0)
make_slave = true;
if (n_mounts > 0) {
- m = mounts = (BindMount *) alloca0(n_mounts * sizeof(BindMount));
+ m = mounts = (MountEntry *) alloca0(n_mounts * sizeof(MountEntry));
r = append_access_mounts(&m, read_write_paths, READWRITE);
if (r < 0)
goto finish;
@@ -774,22 +826,26 @@ int setup_namespace(
if (r < 0)
goto finish;
+ r = append_bind_mounts(&m, bind_mounts, n_bind_mounts);
+ if (r < 0)
+ goto finish;
+
if (tmp_dir) {
- *(m++) = (BindMount) {
+ *(m++) = (MountEntry) {
.path_const = "/tmp",
.mode = PRIVATE_TMP,
};
}
if (var_tmp_dir) {
- *(m++) = (BindMount) {
+ *(m++) = (MountEntry) {
.path_const = "/var/tmp",
.mode = PRIVATE_VAR_TMP,
};
}
if (ns_info->private_dev) {
- *(m++) = (BindMount) {
+ *(m++) = (MountEntry) {
.path_const = "/dev",
.mode = PRIVATE_DEV,
};
@@ -808,7 +864,7 @@ int setup_namespace(
}
if (ns_info->protect_control_groups) {
- *(m++) = (BindMount) {
+ *(m++) = (MountEntry) {
.path_const = "/sys/fs/cgroup",
.mode = READONLY,
};
@@ -829,14 +885,7 @@ int setup_namespace(
if (r < 0)
goto finish;
- /* Resolve symlinks manually first, as mount() will always follow them relative to the host's
- * root. Moreover we want to suppress duplicates based on the resolved paths. This of course is a bit
- * racy. */
- r = chase_all_symlinks(root_directory, mounts, &n_mounts);
- if (r < 0)
- goto finish;
-
- qsort(mounts, n_mounts, sizeof(BindMount), mount_path_compare);
+ qsort(mounts, n_mounts, sizeof(MountEntry), mount_path_compare);
drop_duplicates(mounts, &n_mounts);
drop_outside_root(root_directory, mounts, &n_mounts);
@@ -877,7 +926,7 @@ int setup_namespace(
/* First round, add in all special mounts we need */
for (m = mounts; m < mounts + n_mounts; ++m) {
- r = apply_mount(m, tmp_dir, var_tmp_dir);
+ r = apply_mount(root_directory, m, tmp_dir, var_tmp_dir);
if (r < 0)
goto finish;
}
@@ -885,7 +934,7 @@ int setup_namespace(
/* Create a blacklist we can pass to bind_mount_recursive() */
blacklist = newa(char*, n_mounts+1);
for (j = 0; j < n_mounts; j++)
- blacklist[j] = (char*) bind_mount_path(mounts+j);
+ blacklist[j] = (char*) mount_entry_path(mounts+j);
blacklist[j] = NULL;
/* Second round, flip the ro bits if necessary. */
@@ -920,6 +969,53 @@ finish:
return r;
}
+void bind_mount_free_many(BindMount *b, unsigned n) {
+ unsigned i;
+
+ assert(b || n == 0);
+
+ for (i = 0; i < n; i++) {
+ free(b[i].source);
+ free(b[i].destination);
+ }
+
+ free(b);
+}
+
+int bind_mount_add(BindMount **b, unsigned *n, const BindMount *item) {
+ _cleanup_free_ char *s = NULL, *d = NULL;
+ BindMount *c;
+
+ assert(b);
+ assert(n);
+ assert(item);
+
+ s = strdup(item->source);
+ if (!s)
+ return -ENOMEM;
+
+ d = strdup(item->destination);
+ if (!d)
+ return -ENOMEM;
+
+ c = realloc_multiply(*b, sizeof(BindMount), *n + 1);
+ if (!c)
+ return -ENOMEM;
+
+ *b = c;
+
+ c[(*n) ++] = (BindMount) {
+ .source = s,
+ .destination = d,
+ .read_only = item->read_only,
+ .recursive = item->recursive,
+ .ignore_enoent = item->ignore_enoent,
+ };
+
+ s = d = NULL;
+ return 0;
+}
+
static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) {
_cleanup_free_ char *x = NULL;
char bid[SD_ID128_STRING_MAX];
diff --git a/src/core/namespace.h b/src/core/namespace.h
index 2c278fd457..de3edc419c 100644
--- a/src/core/namespace.h
+++ b/src/core/namespace.h
@@ -21,6 +21,7 @@
***/
typedef struct NameSpaceInfo NameSpaceInfo;
+typedef struct BindMount BindMount;
#include <stdbool.h>
@@ -51,20 +52,32 @@ struct NameSpaceInfo {
bool protect_kernel_modules:1;
};
-int setup_namespace(const char *chroot,
- const NameSpaceInfo *ns_info,
- char **read_write_paths,
- char **read_only_paths,
- char **inaccessible_paths,
- const char *tmp_dir,
- const char *var_tmp_dir,
- ProtectHome protect_home,
- ProtectSystem protect_system,
- unsigned long mount_flags);
-
-int setup_tmp_dirs(const char *id,
- char **tmp_dir,
- char **var_tmp_dir);
+struct BindMount {
+ char *source;
+ char *destination;
+ bool read_only:1;
+ bool recursive:1;
+ bool ignore_enoent:1;
+};
+
+int setup_namespace(
+ const char *root_directory,
+ const NameSpaceInfo *ns_info,
+ char **read_write_paths,
+ char **read_only_paths,
+ char **inaccessible_paths,
+ const BindMount *bind_mounts,
+ unsigned n_bind_mounts,
+ const char *tmp_dir,
+ const char *var_tmp_dir,
+ ProtectHome protect_home,
+ ProtectSystem protect_system,
+ unsigned long mount_flags);
+
+int setup_tmp_dirs(
+ const char *id,
+ char **tmp_dir,
+ char **var_tmp_dir);
int setup_netns(int netns_storage_socket[2]);
@@ -73,3 +86,6 @@ ProtectHome protect_home_from_string(const char *s) _pure_;
const char* protect_system_to_string(ProtectSystem p) _const_;
ProtectSystem protect_system_from_string(const char *s) _pure_;
+
+void bind_mount_free_many(BindMount *b, unsigned n);
+int bind_mount_add(BindMount **b, unsigned *n, const BindMount *item);
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index 3114275c85..b030b3b9d1 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -27,6 +27,7 @@
#include "hashmap.h"
#include "list.h"
#include "locale-util.h"
+#include "mount-util.h"
#include "nsflags.h"
#include "parse-util.h"
#include "path-util.h"
@@ -265,7 +266,7 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
"StandardInput", "StandardOutput", "StandardError",
"Description", "Slice", "Type", "WorkingDirectory",
"RootDirectory", "SyslogIdentifier", "ProtectSystem",
- "ProtectHome", "SELinuxContext"))
+ "ProtectHome", "SELinuxContext", "Restart"))
r = sd_bus_message_append(m, "v", "s", eq);
else if (streq(field, "SyslogLevel")) {
@@ -575,7 +576,91 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
r = sd_bus_message_append(m, "v", "t", flags);
} else if ((dep = unit_dependency_from_string(field)) >= 0)
r = sd_bus_message_append(m, "v", "as", 1, eq);
- else {
+ else if (streq(field, "MountFlags")) {
+ unsigned long f;
+
+ if (isempty(eq))
+ f = 0;
+ else {
+ f = mount_propagation_flags_from_string(eq);
+ if (f == 0) {
+ log_error("Failed to parse mount propagation type: %s", eq);
+ return -EINVAL;
+ }
+ }
+
+ r = sd_bus_message_append(m, "v", "t", f);
+ } else if (STR_IN_SET(field, "BindPaths", "BindReadOnlyPaths")) {
+ const char *p = eq;
+
+ r = sd_bus_message_open_container(m, 'v', "a(ssbt)");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "(ssbt)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *source = NULL, *destination = NULL;
+ char *s = NULL, *d = NULL;
+ bool ignore_enoent = false;
+ uint64_t flags = MS_REC;
+
+ r = extract_first_word(&p, &source, ":" WHITESPACE, EXTRACT_QUOTES|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+ if (r == 0)
+ break;
+
+ s = source;
+ if (s[0] == '-') {
+ ignore_enoent = true;
+ s++;
+ }
+
+ if (p && p[-1] == ':') {
+ r = extract_first_word(&p, &destination, ":" WHITESPACE, EXTRACT_QUOTES|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+ if (r == 0) {
+ log_error("Missing argument after ':': %s", eq);
+ return -EINVAL;
+ }
+
+ d = destination;
+
+ if (p && p[-1] == ':') {
+ _cleanup_free_ char *options = NULL;
+
+ r = extract_first_word(&p, &options, NULL, EXTRACT_QUOTES);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+
+ if (isempty(options) || streq(options, "rbind"))
+ flags = MS_REC;
+ else if (streq(options, "norbind"))
+ flags = 0;
+ else {
+ log_error("Unknown options: %s", eq);
+ return -EINVAL;
+ }
+ }
+ } else
+ d = s;
+
+
+ r = sd_bus_message_append(m, "(ssbt)", s, d, ignore_enoent, flags);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ } else {
log_error("Unknown assignment %s.", assignment);
return -EINVAL;
}
diff --git a/src/test/test-mount-util.c b/src/test/test-mount-util.c
new file mode 100644
index 0000000000..da7f35623b
--- /dev/null
+++ b/src/test/test-mount-util.c
@@ -0,0 +1,45 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2016 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/mount.h>
+
+#include "log.h"
+#include "mount-util.h"
+#include "string-util.h"
+
+static void test_mount_propagation_flags(const char *name, unsigned long f) {
+ assert(mount_propagation_flags_from_string(name) == f);
+
+ if (f != 0)
+ assert_se(streq_ptr(mount_propagation_flags_to_string(f), name));
+}
+
+int main(int argc, char *argv[]) {
+
+ log_set_max_level(LOG_DEBUG);
+
+ test_mount_propagation_flags("shared", MS_SHARED);
+ test_mount_propagation_flags("slave", MS_SLAVE);
+ test_mount_propagation_flags("private", MS_PRIVATE);
+ test_mount_propagation_flags(NULL, 0);
+ test_mount_propagation_flags("", 0);
+ test_mount_propagation_flags("xxxx", 0);
+
+ return 0;
+}
diff --git a/src/test/test-ns.c b/src/test/test-ns.c
index da7a8b0565..c99bcb371b 100644
--- a/src/test/test-ns.c
+++ b/src/test/test-ns.c
@@ -81,6 +81,7 @@ int main(int argc, char *argv[]) {
(char **) writable,
(char **) readonly,
(char **) inaccessible,
+ &(BindMount) { .source = (char*) "/usr/bin", .destination = (char*) "/etc/systemd", .read_only = true }, 1,
tmp_dir,
var_tmp_dir,
PROTECT_HOME_NO,