diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Makefile.am | 7 | ||||
-rw-r--r-- | TODO | 12 | ||||
-rw-r--r-- | man/systemd.exec.xml | 25 | ||||
-rw-r--r-- | src/basic/mount-util.c | 32 | ||||
-rw-r--r-- | src/basic/mount-util.h | 3 | ||||
-rw-r--r-- | src/core/dbus-execute.c | 131 | ||||
-rw-r--r-- | src/core/dbus-service.c | 23 | ||||
-rw-r--r-- | src/core/execute.c | 16 | ||||
-rw-r--r-- | src/core/execute.h | 2 | ||||
-rw-r--r-- | src/core/load-fragment-gperf.gperf.m4 | 2 | ||||
-rw-r--r-- | src/core/load-fragment.c | 166 | ||||
-rw-r--r-- | src/core/load-fragment.h | 1 | ||||
-rw-r--r-- | src/core/namespace.c | 360 | ||||
-rw-r--r-- | src/core/namespace.h | 44 | ||||
-rw-r--r-- | src/shared/bus-unit-util.c | 89 | ||||
-rw-r--r-- | src/test/test-mount-util.c | 45 | ||||
-rw-r--r-- | src/test/test-ns.c | 1 |
18 files changed, 784 insertions, 176 deletions
diff --git a/.gitignore b/.gitignore index 016ba625e3..ec4b7bd672 100644 --- a/.gitignore +++ b/.gitignore @@ -239,6 +239,7 @@ /test-loopback /test-machine-tables /test-mmap-cache +/test-mount-util /test-namespace /test-ndisc-rs /test-netlink diff --git a/Makefile.am b/Makefile.am index 3bd8c29dd3..8f7b83f0ab 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1512,6 +1512,7 @@ tests += \ test-utf8 \ test-ellipsize \ test-util \ + test-mount-util \ test-cpu-set-util \ test-hexdecoct \ test-escape \ @@ -1890,6 +1891,12 @@ test_util_SOURCES = \ test_util_LDADD = \ libsystemd-shared.la +test_mount_util_SOURCES = \ + src/test/test-mount-util.c + +test_mount_util_LDADD = \ + libsystemd-shared.la + test_hexdecoct_SOURCES = \ src/test/test-hexdecoct.c @@ -27,6 +27,10 @@ Janitorial Clean-ups: Features: +* Add ExecMonitor= setting. May be used multiple times. Forks off a process in + the service cgroup, which is supposed to monitor the service, and when it + exits the service is considered failed by its monitor. + * replace all canonicalize_file_name() invocations by chase_symlinks(), in particulr those where a rootdir is relevant. @@ -78,8 +82,6 @@ Features: * Add DataDirectory=, CacheDirectory= and LogDirectory= to match RuntimeDirectory=, and create it as necessary when starting a service, owned by the right user. -* Add BindDirectory= for allowing arbitrary, private bind mounts for services - * Add RootImage= for mounting a disk image or file as root directory * make sure the ratelimit object can deal with USEC_INFINITY as way to turn off things @@ -340,8 +342,6 @@ Features: * refuse boot if /usr/lib/os-release is missing or /etc/machine-id cannot be set up -* btrfs raid assembly: some .device jobs stay stuck in the queue - * man: the documentation of Restart= currently is very misleading and suggests the tools from ExecStartPre= might get restarted. * load .d/*.conf dropins for device units @@ -587,15 +587,13 @@ Features: * on shutdown: move utmp, wall, audit logic all into PID 1 (or logind?), get rid of systemd-update-utmp-runlevel -* make repeated alt-ctrl-del presses printing a dump, or even force a reboot without - waiting for the timeout +* make repeated alt-ctrl-del presses printing a dump * hostnamed: before returning information from /etc/machine-info.conf check the modification data and reread. Similar for localed, ... * currently x-systemd.timeout is lost in the initrd, since crypttab is copied into dracut, but fstab is not * nspawn: - - nspawn -x should support ephemeral instances of gpt images - emulate /dev/kmsg using CUSE and turn off the syslog syscall with seccomp. That should provide us with a useful log buffer that systemd can log to during early boot, and disconnect container logs diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index f27e4a5c04..812e615530 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -968,6 +968,31 @@ </varlistentry> <varlistentry> + <term><varname>BindPaths=</varname></term> + <term><varname>BindReadOnlyPaths=</varname></term> + + <listitem><para>Configures unit-specific bind mounts. A bind mount makes a particular file or directory + available at an additional place in the unit's view of the file system. Any bind mounts created with this + option are specific to the unit, and are not visible in the host's mount table. This option expects a + whitespace separated list of bind mount definitions. Each definition consists of a colon-separated triple of + source path, destination path and option string, where the latter two are optional. If only a source path is + specified the source and destination is taken to be the same. The option string may be either + <literal>rbind</literal> or <literal>norbind</literal> for configuring a recursive or non-recursive bind + mount. If the destination parth is omitted, the option string must be omitted too.</para> + + <para><varname>BindPaths=</varname> creates regular writable bind mounts (unless the source file system mount + is already marked read-only), while <varname>BindReadOnlyPaths=</varname> creates read-only bind mounts. These + settings may be used more than once, each usage appends to the unit's list of bind mounts. If the empty string + is assigned to either of these two options the entire list of bind mounts defined prior to this is reset. Note + that in this case both read-only and regular bind mounts are reset, regardless which of the two settings is + used.</para> + + <para>This option is particularly useful when <varname>RootDirectory=</varname> is used. In this case the + source path refers to a path on the host file system, while the destination path referes to a path below the + root directory of the unit.</para></listitem> + </varlistentry> + + <varlistentry> <term><varname>PrivateTmp=</varname></term> <listitem><para>Takes a boolean argument. If true, sets up a new file system namespace for the executed diff --git a/src/basic/mount-util.c b/src/basic/mount-util.c index 352c3505fb..8970050408 100644 --- a/src/basic/mount-util.c +++ b/src/basic/mount-util.c @@ -689,3 +689,35 @@ int umount_verbose(const char *what) { return log_error_errno(errno, "Failed to unmount %s: %m", what); return 0; } + +const char *mount_propagation_flags_to_string(unsigned long flags) { + + switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) { + + case MS_SHARED: + return "shared"; + + case MS_SLAVE: + return "slave"; + + case MS_PRIVATE: + return "private"; + } + + return NULL; +} + +unsigned long mount_propagation_flags_from_string(const char *name) { + + if (isempty(name)) + return 0; + + if (streq(name, "shared")) + return MS_SHARED; + if (streq(name, "slave")) + return MS_SLAVE; + if (streq(name, "private")) + return MS_PRIVATE; + + return 0; +} diff --git a/src/basic/mount-util.h b/src/basic/mount-util.h index b840956d63..c8049198d4 100644 --- a/src/basic/mount-util.h +++ b/src/basic/mount-util.h @@ -61,3 +61,6 @@ int mount_verbose( unsigned long flags, const char *options); int umount_verbose(const char *where); + +const char *mount_propagation_flags_to_string(unsigned long flags); +unsigned long mount_propagation_flags_from_string(const char *name); diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 23c1b44573..b3fc0ff5c3 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -34,6 +34,7 @@ #include "fileio.h" #include "ioprio.h" #include "missing.h" +#include "mount-util.h" #include "namespace.h" #include "parse-util.h" #include "path-util.h" @@ -674,6 +675,49 @@ static int property_get_output_fdname( return sd_bus_message_append(reply, "s", name); } +static int property_get_bind_paths( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + ExecContext *c = userdata; + unsigned i; + bool ro; + int r; + + assert(bus); + assert(c); + assert(property); + assert(reply); + + ro = !!strstr(property, "ReadOnly"); + + r = sd_bus_message_open_container(reply, 'a', "(ssbt)"); + if (r < 0) + return r; + + for (i = 0; i < c->n_bind_mounts; i++) { + + if (ro != c->bind_mounts[i].read_only) + continue; + + r = sd_bus_message_append( + reply, "(ssbt)", + c->bind_mounts[i].source, + c->bind_mounts[i].destination, + c->bind_mounts[i].ignore_enoent, + c->bind_mounts[i].recursive ? MS_REC : 0); + if (r < 0) + return r; + } + + return sd_bus_message_close_container(reply); +} + const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_VTABLE_START(0), SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(ExecContext, environment), SD_BUS_VTABLE_PROPERTY_CONST), @@ -782,6 +826,8 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("MemoryDenyWriteExecute", "b", bus_property_get_bool, offsetof(ExecContext, memory_deny_write_execute), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RestrictRealtime", "b", bus_property_get_bool, offsetof(ExecContext, restrict_realtime), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RestrictNamespaces", "t", bus_property_get_ulong, offsetof(ExecContext, restrict_namespaces), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("BindPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("BindReadOnlyPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_VTABLE_END }; @@ -1363,8 +1409,8 @@ int bus_exec_context_set_transient_property( if (r < 0) return r; - if (!isempty(path) && !path_is_absolute(path)) - return sd_bus_error_set_errnof(error, EINVAL, "Path %s is not absolute.", path); + if (!path_is_absolute(path)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path %s is not absolute.", path); if (mode != UNIT_CHECK) { char *buf = NULL; @@ -1613,6 +1659,87 @@ int bus_exec_context_set_transient_property( } return 1; + } else if (streq(name, "MountFlags")) { + uint64_t flags; + + r = sd_bus_message_read(message, "t", &flags); + if (r < 0) + return r; + if (!IN_SET(flags, 0, MS_SHARED, MS_PRIVATE, MS_SLAVE)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown mount propagation flags"); + + if (mode != UNIT_CHECK) { + c->mount_flags = flags; + + unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, strempty(mount_propagation_flags_to_string(flags))); + } + + return 1; + } else if (STR_IN_SET(name, "BindPaths", "BindReadOnlyPaths")) { + unsigned empty = true; + + r = sd_bus_message_enter_container(message, 'a', "(ssbt)"); + if (r < 0) + return r; + + while ((r = sd_bus_message_enter_container(message, 'r', "ssbt")) > 0) { + const char *source, *destination; + int ignore_enoent; + uint64_t mount_flags; + + r = sd_bus_message_read(message, "ssbt", &source, &destination, &ignore_enoent, &mount_flags); + if (r < 0) + return r; + + r = sd_bus_message_exit_container(message); + if (r < 0) + return r; + + if (!path_is_absolute(source)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path %s is not absolute.", source); + if (!path_is_absolute(destination)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Destination path %s is not absolute.", source); + if (!IN_SET(mount_flags, 0, MS_REC)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown mount flags."); + + if (mode != UNIT_CHECK) { + r = bind_mount_add(&c->bind_mounts, &c->n_bind_mounts, + &(BindMount) { + .source = strdup(source), + .destination = strdup(destination), + .read_only = !!strstr(name, "ReadOnly"), + .recursive = !!(mount_flags & MS_REC), + .ignore_enoent = ignore_enoent, + }); + if (r < 0) + return r; + + unit_write_drop_in_private_format( + u, mode, name, + "%s=%s%s:%s:%s", + name, + ignore_enoent ? "-" : "", + source, + destination, + (mount_flags & MS_REC) ? "rbind" : "norbind"); + } + + empty = false; + } + if (r < 0) + return r; + + r = sd_bus_message_exit_container(message); + if (r < 0) + return r; + + if (empty) { + bind_mount_free_many(c->bind_mounts, c->n_bind_mounts); + c->bind_mounts = NULL; + c->n_bind_mounts = 0; + } + + return 1; } ri = rlimit_from_string(name); diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c index 61b83d2d62..85b67318ed 100644 --- a/src/core/dbus-service.c +++ b/src/core/dbus-service.c @@ -143,6 +143,29 @@ static int bus_service_set_transient_property( return 1; + } else if (streq(name, "Restart")) { + ServiceRestart sr; + const char *v; + + r = sd_bus_message_read(message, "s", &v); + if (r < 0) + return r; + + if (isempty(v)) + sr = SERVICE_RESTART_NO; + else { + sr = service_restart_from_string(v); + if (sr < 0) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid restart setting: %s", v); + } + + if (mode != UNIT_CHECK) { + s->restart = sr; + unit_write_drop_in_private_format(UNIT(s), mode, name, "Restart=%s", service_restart_to_string(sr)); + } + + return 1; + } else if (STR_IN_SET(name, "StandardInputFileDescriptor", "StandardOutputFileDescriptor", diff --git a/src/core/execute.c b/src/core/execute.c index 07ab067c05..2ee8c9a416 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1826,6 +1826,9 @@ static bool exec_needs_mount_namespace( !strv_isempty(context->inaccessible_paths)) return true; + if (context->n_bind_mounts > 0) + return true; + if (context->mount_flags != 0) return true; @@ -2147,6 +2150,8 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context, r = setup_namespace(root_dir, &ns_info, rw, context->read_only_paths, context->inaccessible_paths, + context->bind_mounts, + context->n_bind_mounts, tmp, var, context->protect_home, @@ -3086,6 +3091,8 @@ void exec_context_done(ExecContext *c) { c->read_write_paths = strv_free(c->read_write_paths); c->inaccessible_paths = strv_free(c->inaccessible_paths); + bind_mount_free_many(c->bind_mounts, c->n_bind_mounts); + if (c->cpuset) CPU_FREE(c->cpuset); @@ -3569,6 +3576,15 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { fputs("\n", f); } + if (c->n_bind_mounts > 0) + for (i = 0; i < c->n_bind_mounts; i++) { + fprintf(f, "%s%s: %s:%s:%s\n", prefix, + c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths", + c->bind_mounts[i].source, + c->bind_mounts[i].destination, + c->bind_mounts[i].recursive ? "rbind" : "norbind"); + } + if (c->utmp_id) fprintf(f, "%sUtmpIdentifier: %s\n", diff --git a/src/core/execute.h b/src/core/execute.h index 951c8f4da3..84ab4339cf 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -161,6 +161,8 @@ struct ExecContext { char **read_write_paths, **read_only_paths, **inaccessible_paths; unsigned long mount_flags; + BindMount *bind_mounts; + unsigned n_bind_mounts; uint64_t capability_bounding_set; uint64_t capability_ambient_set; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 2610442b91..15f22a2681 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -89,6 +89,8 @@ $1.InaccessibleDirectories, config_parse_namespace_path_strv, 0, $1.ReadWritePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_write_paths) $1.ReadOnlyPaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_paths) $1.InaccessiblePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths) +$1.BindPaths, config_parse_bind_paths, 0, offsetof($1, exec_context) +$1.BindReadOnlyPaths, config_parse_bind_paths, 0, offsetof($1, exec_context) $1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp) $1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices) $1.ProtectKernelTunables, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_tunables) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 687cd1dd31..f325d853c6 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -49,6 +49,7 @@ #include "load-fragment.h" #include "log.h" #include "missing.h" +#include "mount-util.h" #include "parse-util.h" #include "path-util.h" #include "process-util.h" @@ -1264,19 +1265,20 @@ int config_parse_sysv_priority(const char *unit, DEFINE_CONFIG_PARSE_ENUM(config_parse_exec_utmp_mode, exec_utmp_mode, ExecUtmpMode, "Failed to parse utmp mode"); DEFINE_CONFIG_PARSE_ENUM(config_parse_kill_mode, kill_mode, KillMode, "Failed to parse kill mode"); -int config_parse_exec_mount_flags(const char *unit, - const char *filename, - unsigned line, - const char *section, - unsigned section_line, - const char *lvalue, - int ltype, - const char *rvalue, - void *data, - void *userdata) { +int config_parse_exec_mount_flags( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { - unsigned long flags = 0; + unsigned long flags; ExecContext *c = data; assert(filename); @@ -1284,15 +1286,14 @@ int config_parse_exec_mount_flags(const char *unit, assert(rvalue); assert(data); - if (streq(rvalue, "shared")) - flags = MS_SHARED; - else if (streq(rvalue, "slave")) - flags = MS_SLAVE; - else if (streq(rvalue, "private")) - flags = MS_PRIVATE; + if (isempty(rvalue)) + flags = 0; else { - log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse mount flag %s, ignoring.", rvalue); - return 0; + flags = mount_propagation_flags_from_string(rvalue); + if (flags == 0) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse mount flag %s, ignoring.", rvalue); + return 0; + } } c->mount_flags = flags; @@ -3890,6 +3891,132 @@ int config_parse_namespace_path_strv( return 0; } +int config_parse_bind_paths( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + ExecContext *c = data; + const char *p; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + if (isempty(rvalue)) { + /* Empty assignment resets the list */ + bind_mount_free_many(c->bind_mounts, c->n_bind_mounts); + c->bind_mounts = NULL; + c->n_bind_mounts = 0; + return 0; + } + + p = rvalue; + for (;;) { + _cleanup_free_ char *source = NULL, *destination = NULL; + char *s = NULL, *d = NULL; + bool rbind = true, ignore_enoent = false; + + r = extract_first_word(&p, &source, ":" WHITESPACE, EXTRACT_QUOTES|EXTRACT_DONT_COALESCE_SEPARATORS); + if (r == 0) + break; + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse %s: %s", lvalue, rvalue); + return 0; + } + + s = source; + if (s[0] == '-') { + ignore_enoent = true; + s++; + } + + if (!utf8_is_valid(s)) { + log_syntax_invalid_utf8(unit, LOG_ERR, filename, line, s); + return 0; + } + if (!path_is_absolute(s)) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Not an absolute source path, ignoring: %s", s); + return 0; + } + + path_kill_slashes(s); + + /* Optionally, the destination is specified. */ + if (p && p[-1] == ':') { + r = extract_first_word(&p, &destination, ":" WHITESPACE, EXTRACT_QUOTES|EXTRACT_DONT_COALESCE_SEPARATORS); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse %s: %s", lvalue, rvalue); + return 0; + } + if (r == 0) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Missing argument after ':': %s", rvalue); + return 0; + } + + if (!utf8_is_valid(destination)) { + log_syntax_invalid_utf8(unit, LOG_ERR, filename, line, destination); + return 0; + } + if (!path_is_absolute(destination)) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Not an absolute destination path, ignoring: %s", destination); + return 0; + } + + d = path_kill_slashes(destination); + + /* Optionally, there's also a short option string specified */ + if (p && p[-1] == ':') { + _cleanup_free_ char *options = NULL; + + r = extract_first_word(&p, &options, NULL, EXTRACT_QUOTES); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse %s: %s", lvalue, rvalue); + return 0; + } + + if (isempty(options) || streq(options, "rbind")) + rbind = true; + else if (streq(options, "norbind")) + rbind = false; + else { + log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid option string, ignoring setting: %s", options); + return 0; + } + } + } else + d = s; + + r = bind_mount_add(&c->bind_mounts, &c->n_bind_mounts, + &(BindMount) { + .source = s, + .destination = d, + .read_only = !!strstr(lvalue, "ReadOnly"), + .recursive = rbind, + .ignore_enoent = ignore_enoent, + }); + if (r < 0) + return log_oom(); + } + + return 0; +} + int config_parse_no_new_privileges( const char* unit, const char *filename, @@ -4387,6 +4514,7 @@ void unit_dump_config_items(FILE *f) { { config_parse_sec, "SECONDS" }, { config_parse_nsec, "NANOSECONDS" }, { config_parse_namespace_path_strv, "PATH [...]" }, + { config_parse_bind_paths, "PATH[:PATH[:OPTIONS]] [...]" }, { config_parse_unit_requires_mounts_for, "PATH [...]" }, { config_parse_exec_mount_flags, "MOUNTFLAG [...]" }, { config_parse_unit_string_printf, "STRING" }, diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index 1cff815a50..bbac2d84b5 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -117,6 +117,7 @@ int config_parse_sec_fix_0(const char *unit, const char *filename, unsigned line int config_parse_user_group(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_user_group_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_restrict_namespaces(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_bind_paths(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); /* gperf prototypes */ const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length); diff --git a/src/core/namespace.c b/src/core/namespace.c index aca47a4d2f..834883267c 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -50,6 +50,8 @@ typedef enum MountMode { /* This is ordered by priority! */ INACCESSIBLE, + BIND_MOUNT, + BIND_MOUNT_RECURSIVE, READONLY, PRIVATE_TMP, PRIVATE_VAR_TMP, @@ -57,13 +59,16 @@ typedef enum MountMode { READWRITE, } MountMode; -typedef struct BindMount { +typedef struct MountEntry { const char *path_const; /* Memory allocated on stack or static */ - MountMode mode:6; + MountMode mode:5; bool ignore:1; /* Ignore if path does not exist? */ bool has_prefix:1; /* Already is prefixed by the root dir? */ + bool read_only:1; /* Shall this mount point be read-only? */ char *path_malloc; /* Use this instead of 'path' if we had to allocate memory */ -} BindMount; + const char *source_const; /* The source path, for bind mounts */ + char *source_malloc; +} MountEntry; /* * The following Protect tables are to protect paths and mark some of them @@ -74,7 +79,7 @@ typedef struct BindMount { */ /* ProtectKernelTunables= option and the related filesystem APIs */ -static const BindMount protect_kernel_tunables_table[] = { +static const MountEntry protect_kernel_tunables_table[] = { { "/proc/sys", READONLY, false }, { "/proc/sysrq-trigger", READONLY, true }, { "/proc/latency_stats", READONLY, true }, @@ -93,7 +98,7 @@ static const BindMount protect_kernel_tunables_table[] = { }; /* ProtectKernelModules= option */ -static const BindMount protect_kernel_modules_table[] = { +static const MountEntry protect_kernel_modules_table[] = { #ifdef HAVE_SPLIT_USR { "/lib/modules", INACCESSIBLE, true }, #endif @@ -104,28 +109,28 @@ static const BindMount protect_kernel_modules_table[] = { * ProtectHome=read-only table, protect $HOME and $XDG_RUNTIME_DIR and rest of * system should be protected by ProtectSystem= */ -static const BindMount protect_home_read_only_table[] = { +static const MountEntry protect_home_read_only_table[] = { { "/home", READONLY, true }, { "/run/user", READONLY, true }, { "/root", READONLY, true }, }; /* ProtectHome=yes table */ -static const BindMount protect_home_yes_table[] = { +static const MountEntry protect_home_yes_table[] = { { "/home", INACCESSIBLE, true }, { "/run/user", INACCESSIBLE, true }, { "/root", INACCESSIBLE, true }, }; /* ProtectSystem=yes table */ -static const BindMount protect_system_yes_table[] = { +static const MountEntry protect_system_yes_table[] = { { "/usr", READONLY, false }, { "/boot", READONLY, true }, { "/efi", READONLY, true }, }; /* ProtectSystem=full includes ProtectSystem=yes */ -static const BindMount protect_system_full_table[] = { +static const MountEntry protect_system_full_table[] = { { "/usr", READONLY, false }, { "/boot", READONLY, true }, { "/efi", READONLY, true }, @@ -140,17 +145,17 @@ static const BindMount protect_system_full_table[] = { * (And of course /home and friends are also left writable, as ProtectHome= * shall manage those, orthogonally). */ -static const BindMount protect_system_strict_table[] = { - { "/", READONLY, false }, - { "/proc", READWRITE, false }, /* ProtectKernelTunables= */ - { "/sys", READWRITE, false }, /* ProtectKernelTunables= */ - { "/dev", READWRITE, false }, /* PrivateDevices= */ - { "/home", READWRITE, true }, /* ProtectHome= */ - { "/run/user", READWRITE, true }, /* ProtectHome= */ - { "/root", READWRITE, true }, /* ProtectHome= */ +static const MountEntry protect_system_strict_table[] = { + { "/", READONLY, false }, + { "/proc", READWRITE, false }, /* ProtectKernelTunables= */ + { "/sys", READWRITE, false }, /* ProtectKernelTunables= */ + { "/dev", READWRITE, false }, /* PrivateDevices= */ + { "/home", READWRITE, true }, /* ProtectHome= */ + { "/run/user", READWRITE, true }, /* ProtectHome= */ + { "/root", READWRITE, true }, /* ProtectHome= */ }; -static const char *bind_mount_path(const BindMount *p) { +static const char *mount_entry_path(const MountEntry *p) { assert(p); /* Returns the path of this bind mount. If the malloc()-allocated ->path_buffer field is set we return that, @@ -159,7 +164,19 @@ static const char *bind_mount_path(const BindMount *p) { return p->path_malloc ?: p->path_const; } -static int append_access_mounts(BindMount **p, char **strv, MountMode mode) { +static bool mount_entry_read_only(const MountEntry *p) { + assert(p); + + return p->read_only || IN_SET(p->mode, READONLY, INACCESSIBLE); +} + +static const char *mount_entry_source(const MountEntry *p) { + assert(p); + + return p->source_malloc ?: p->source_const; +} + +static int append_access_mounts(MountEntry **p, char **strv, MountMode mode) { char **i; assert(p); @@ -183,7 +200,7 @@ static int append_access_mounts(BindMount **p, char **strv, MountMode mode) { if (!path_is_absolute(e)) return -EINVAL; - *((*p)++) = (BindMount) { + *((*p)++) = (MountEntry) { .path_const = e, .mode = mode, .ignore = ignore, @@ -194,7 +211,26 @@ static int append_access_mounts(BindMount **p, char **strv, MountMode mode) { return 0; } -static int append_static_mounts(BindMount **p, const BindMount *mounts, unsigned n, bool ignore_protect) { +static int append_bind_mounts(MountEntry **p, const BindMount *binds, unsigned n) { + unsigned i; + + assert(p); + + for (i = 0; i < n; i++) { + const BindMount *b = binds + i; + + *((*p)++) = (MountEntry) { + .path_const = b->destination, + .mode = b->recursive ? BIND_MOUNT_RECURSIVE : BIND_MOUNT, + .read_only = b->read_only, + .source_const = b->source, + }; + } + + return 0; +} + +static int append_static_mounts(MountEntry **p, const MountEntry *mounts, unsigned n, bool ignore_protect) { unsigned i; assert(p); @@ -203,8 +239,8 @@ static int append_static_mounts(BindMount **p, const BindMount *mounts, unsigned /* Adds a list of static pre-defined entries */ for (i = 0; i < n; i++) - *((*p)++) = (BindMount) { - .path_const = bind_mount_path(mounts+i), + *((*p)++) = (MountEntry) { + .path_const = mount_entry_path(mounts+i), .mode = mounts[i].mode, .ignore = mounts[i].ignore || ignore_protect, }; @@ -212,7 +248,7 @@ static int append_static_mounts(BindMount **p, const BindMount *mounts, unsigned return 0; } -static int append_protect_home(BindMount **p, ProtectHome protect_home, bool ignore_protect) { +static int append_protect_home(MountEntry **p, ProtectHome protect_home, bool ignore_protect) { assert(p); switch (protect_home) { @@ -231,7 +267,7 @@ static int append_protect_home(BindMount **p, ProtectHome protect_home, bool ign } } -static int append_protect_system(BindMount **p, ProtectSystem protect_system, bool ignore_protect) { +static int append_protect_system(MountEntry **p, ProtectSystem protect_system, bool ignore_protect) { assert(p); switch (protect_system) { @@ -254,11 +290,11 @@ static int append_protect_system(BindMount **p, ProtectSystem protect_system, bo } static int mount_path_compare(const void *a, const void *b) { - const BindMount *p = a, *q = b; + const MountEntry *p = a, *q = b; int d; /* If the paths are not equal, then order prefixes first */ - d = path_compare(bind_mount_path(p), bind_mount_path(q)); + d = path_compare(mount_entry_path(p), mount_entry_path(q)); if (d != 0) return d; @@ -272,7 +308,7 @@ static int mount_path_compare(const void *a, const void *b) { return 0; } -static int prefix_where_needed(BindMount *m, unsigned n, const char *root_directory) { +static int prefix_where_needed(MountEntry *m, unsigned n, const char *root_directory) { unsigned i; /* Prefixes all paths in the bind mount table with the root directory if it is specified and the entry needs @@ -287,7 +323,7 @@ static int prefix_where_needed(BindMount *m, unsigned n, const char *root_direct if (m[i].has_prefix) continue; - s = prefix_root(root_directory, bind_mount_path(m+i)); + s = prefix_root(root_directory, mount_entry_path(m+i)); if (!s) return -ENOMEM; @@ -300,8 +336,8 @@ static int prefix_where_needed(BindMount *m, unsigned n, const char *root_direct return 0; } -static void drop_duplicates(BindMount *m, unsigned *n) { - BindMount *f, *t, *previous; +static void drop_duplicates(MountEntry *m, unsigned *n) { + MountEntry *f, *t, *previous; assert(m); assert(n); @@ -312,8 +348,9 @@ static void drop_duplicates(BindMount *m, unsigned *n) { /* The first one wins (which is the one with the more restrictive mode), see mount_path_compare() * above. */ - if (previous && path_equal(bind_mount_path(f), bind_mount_path(previous))) { - log_debug("%s is duplicate.", bind_mount_path(f)); + if (previous && path_equal(mount_entry_path(f), mount_entry_path(previous))) { + log_debug("%s is duplicate.", mount_entry_path(f)); + previous->read_only = previous->read_only || mount_entry_read_only(f); /* Propagate the read-only flag to the remaining entry */ f->path_malloc = mfree(f->path_malloc); continue; } @@ -326,8 +363,8 @@ static void drop_duplicates(BindMount *m, unsigned *n) { *n = t - m; } -static void drop_inaccessible(BindMount *m, unsigned *n) { - BindMount *f, *t; +static void drop_inaccessible(MountEntry *m, unsigned *n) { + MountEntry *f, *t; const char *clear = NULL; assert(m); @@ -340,13 +377,13 @@ static void drop_inaccessible(BindMount *m, unsigned *n) { /* If we found a path set for INACCESSIBLE earlier, and this entry has it as prefix we should drop * it, as inaccessible paths really should drop the entire subtree. */ - if (clear && path_startswith(bind_mount_path(f), clear)) { - log_debug("%s is masked by %s.", bind_mount_path(f), clear); + if (clear && path_startswith(mount_entry_path(f), clear)) { + log_debug("%s is masked by %s.", mount_entry_path(f), clear); f->path_malloc = mfree(f->path_malloc); continue; } - clear = f->mode == INACCESSIBLE ? bind_mount_path(f) : NULL; + clear = f->mode == INACCESSIBLE ? mount_entry_path(f) : NULL; *t = *f; t++; @@ -355,8 +392,8 @@ static void drop_inaccessible(BindMount *m, unsigned *n) { *n = t - m; } -static void drop_nop(BindMount *m, unsigned *n) { - BindMount *f, *t; +static void drop_nop(MountEntry *m, unsigned *n) { + MountEntry *f, *t; assert(m); assert(n); @@ -368,12 +405,12 @@ static void drop_nop(BindMount *m, unsigned *n) { /* Only suppress such subtrees for READONLY and READWRITE entries */ if (IN_SET(f->mode, READONLY, READWRITE)) { - BindMount *p; + MountEntry *p; bool found = false; /* Now let's find the first parent of the entry we are looking at. */ for (p = t-1; p >= m; p--) { - if (path_startswith(bind_mount_path(f), bind_mount_path(p))) { + if (path_startswith(mount_entry_path(f), mount_entry_path(p))) { found = true; break; } @@ -381,7 +418,7 @@ static void drop_nop(BindMount *m, unsigned *n) { /* We found it, let's see if it's the same mode, if so, we can drop this entry */ if (found && p->mode == f->mode) { - log_debug("%s is redundant by %s", bind_mount_path(f), bind_mount_path(p)); + log_debug("%s is redundant by %s", mount_entry_path(f), mount_entry_path(p)); f->path_malloc = mfree(f->path_malloc); continue; } @@ -394,8 +431,8 @@ static void drop_nop(BindMount *m, unsigned *n) { *n = t - m; } -static void drop_outside_root(const char *root_directory, BindMount *m, unsigned *n) { - BindMount *f, *t; +static void drop_outside_root(const char *root_directory, MountEntry *m, unsigned *n) { + MountEntry *f, *t; assert(m); assert(n); @@ -408,8 +445,8 @@ static void drop_outside_root(const char *root_directory, BindMount *m, unsigned for (f = m, t = m; f < m + *n; f++) { - if (!path_startswith(bind_mount_path(f), root_directory)) { - log_debug("%s is outside of root directory.", bind_mount_path(f)); + if (!path_startswith(mount_entry_path(f), root_directory)) { + log_debug("%s is outside of root directory.", mount_entry_path(f)); f->path_malloc = mfree(f->path_malloc); continue; } @@ -421,7 +458,7 @@ static void drop_outside_root(const char *root_directory, BindMount *m, unsigned *n = t - m; } -static int mount_dev(BindMount *m) { +static int mount_dev(MountEntry *m) { static const char devnodes[] = "/dev/null\0" "/dev/zero\0" @@ -526,11 +563,11 @@ static int mount_dev(BindMount *m) { * missing when the service is started with RootDirectory. This is * consistent with mount units creating the mount points when missing. */ - (void) mkdir_p_label(bind_mount_path(m), 0755); + (void) mkdir_p_label(mount_entry_path(m), 0755); /* Unmount everything in old /dev */ - umount_recursive(bind_mount_path(m), 0); - if (mount(dev, bind_mount_path(m), NULL, MS_MOVE, NULL) < 0) { + umount_recursive(mount_entry_path(m), 0); + if (mount(dev, mount_entry_path(m), NULL, MS_MOVE, NULL) < 0) { r = -errno; goto fail; } @@ -560,17 +597,54 @@ fail: return r; } +static int mount_entry_chase( + const char *root_directory, + MountEntry *m, + const char *path, + char **location) { + + char *chased; + int r; + + assert(m); + + /* Since mount() will always follow symlinks and we need to take the different root directory into account we + * chase the symlinks on our own first. This is called for the destination path, as well as the source path (if + * that applies). The result is stored in "location". */ + + r = chase_symlinks(path, root_directory, 0, &chased); + if (r == -ENOENT && m->ignore) { + log_debug_errno(r, "Path %s does not exist, ignoring.", path); + return 0; + } + if (r < 0) + return log_debug_errno(r, "Failed to follow symlinks on %s: %m", path); + + log_debug("Followed symlinks %s → %s.", path, chased); + + free(*location); + *location = chased; + + return 1; +} + static int apply_mount( - BindMount *m, + const char *root_directory, + MountEntry *m, const char *tmp_dir, const char *var_tmp_dir) { const char *what; + bool rbind = true; int r; assert(m); - log_debug("Applying namespace mount on %s", bind_mount_path(m)); + r = mount_entry_chase(root_directory, m, mount_entry_path(m), &m->path_malloc); + if (r <= 0) + return r; + + log_debug("Applying namespace mount on %s", mount_entry_path(m)); switch (m->mode) { @@ -580,10 +654,10 @@ static int apply_mount( /* First, get rid of everything that is below if there * is anything... Then, overmount it with an * inaccessible path. */ - (void) umount_recursive(bind_mount_path(m), 0); + (void) umount_recursive(mount_entry_path(m), 0); - if (lstat(bind_mount_path(m), &target) < 0) - return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", bind_mount_path(m)); + if (lstat(mount_entry_path(m), &target) < 0) + return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", mount_entry_path(m)); what = mode_to_inaccessible_node(target.st_mode); if (!what) { @@ -595,14 +669,26 @@ static int apply_mount( case READONLY: case READWRITE: - - r = path_is_mount_point(bind_mount_path(m), NULL, 0); + r = path_is_mount_point(mount_entry_path(m), root_directory, 0); if (r < 0) - return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", bind_mount_path(m)); + return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", mount_entry_path(m)); if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */ return 0; /* This isn't a mount point yet, let's make it one. */ - what = bind_mount_path(m); + what = mount_entry_path(m); + break; + + case BIND_MOUNT: + rbind = false; + /* fallthrough */ + + case BIND_MOUNT_RECURSIVE: + /* Also chase the source mount */ + r = mount_entry_chase(root_directory, m, mount_entry_source(m), &m->source_malloc); + if (r <= 0) + return r; + + what = mount_entry_source(m); break; case PRIVATE_TMP: @@ -622,22 +708,22 @@ static int apply_mount( assert(what); - if (mount(what, bind_mount_path(m), NULL, MS_BIND|MS_REC, NULL) < 0) - return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, bind_mount_path(m)); + if (mount(what, mount_entry_path(m), NULL, MS_BIND|(rbind ? MS_REC : 0), NULL) < 0) + return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, mount_entry_path(m)); - log_debug("Successfully mounted %s to %s", what, bind_mount_path(m)); + log_debug("Successfully mounted %s to %s", what, mount_entry_path(m)); return 0; } -static int make_read_only(BindMount *m, char **blacklist) { +static int make_read_only(MountEntry *m, char **blacklist) { int r = 0; assert(m); - if (IN_SET(m->mode, INACCESSIBLE, READONLY)) - r = bind_remount_recursive(bind_mount_path(m), true, blacklist); + if (mount_entry_read_only(m)) + r = bind_remount_recursive(mount_entry_path(m), true, blacklist); else if (m->mode == PRIVATE_DEV) { /* Can be readonly but the submounts can't*/ - if (mount(NULL, bind_mount_path(m), NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0) + if (mount(NULL, mount_entry_path(m), NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0) r = -errno; } else return 0; @@ -646,50 +732,9 @@ static int make_read_only(BindMount *m, char **blacklist) { * already stays this way. This improves compatibility with container managers, where we won't attempt to undo * read-only mounts already applied. */ - return r; -} - -/* Chase symlinks and remove failed paths from mounts */ -static int chase_all_symlinks(const char *root_directory, BindMount *m, unsigned *n) { - BindMount *f, *t; - int r = 0; - - assert(m); - assert(n); - - /* Since mount() will always follow symlinks and we need to take the different root directory into account we - * chase the symlinks on our own first. This call wil do so for all entries and remove all entries where we - * can't resolve the path, and which have been marked for such removal. */ - - for (f = m, t = m; f < m + *n; f++) { - _cleanup_free_ char *chased = NULL; - int k; - - k = chase_symlinks(bind_mount_path(f), root_directory, 0, &chased); - if (k < 0) { - /* Get only real errors */ - if (r >= 0 && (k != -ENOENT || !f->ignore)) - r = k; - - /* Doesn't exist or failed? Then remove it and continue! */ - log_debug_errno(k, "Failed to chase symlinks for %s: %m", bind_mount_path(f)); - f->path_malloc = mfree(f->path_malloc); - continue; - } - - if (!path_equal(bind_mount_path(f), chased)) { - log_debug("Chased %s → %s", bind_mount_path(f), chased); - - free(f->path_malloc); - f->path_malloc = chased; - chased = NULL; - } - - *t = *f; - t++; - } + if (r == -ENOENT && m->ignore) + r = 0; - *n = t - m; return r; } @@ -698,6 +743,8 @@ static unsigned namespace_calculate_mounts( char** read_write_paths, char** read_only_paths, char** inaccessible_paths, + const BindMount *bind_mounts, + unsigned n_bind_mounts, const char* tmp_dir, const char* var_tmp_dir, ProtectHome protect_home, @@ -722,6 +769,7 @@ static unsigned namespace_calculate_mounts( strv_length(read_write_paths) + strv_length(read_only_paths) + strv_length(inaccessible_paths) + + n_bind_mounts + ns_info->private_dev + (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) + (ns_info->protect_control_groups ? 1 : 0) + @@ -735,13 +783,15 @@ int setup_namespace( char** read_write_paths, char** read_only_paths, char** inaccessible_paths, + const BindMount *bind_mounts, + unsigned n_bind_mounts, const char* tmp_dir, const char* var_tmp_dir, ProtectHome protect_home, ProtectSystem protect_system, unsigned long mount_flags) { - BindMount *m, *mounts = NULL; + MountEntry *m, *mounts = NULL; bool make_slave = false; unsigned n_mounts; int r = 0; @@ -749,19 +799,21 @@ int setup_namespace( if (mount_flags == 0) mount_flags = MS_SHARED; - n_mounts = namespace_calculate_mounts(ns_info, - read_write_paths, - read_only_paths, - inaccessible_paths, - tmp_dir, var_tmp_dir, - protect_home, protect_system); + n_mounts = namespace_calculate_mounts( + ns_info, + read_write_paths, + read_only_paths, + inaccessible_paths, + bind_mounts, n_bind_mounts, + tmp_dir, var_tmp_dir, + protect_home, protect_system); /* Set mount slave mode */ if (root_directory || n_mounts > 0) make_slave = true; if (n_mounts > 0) { - m = mounts = (BindMount *) alloca0(n_mounts * sizeof(BindMount)); + m = mounts = (MountEntry *) alloca0(n_mounts * sizeof(MountEntry)); r = append_access_mounts(&m, read_write_paths, READWRITE); if (r < 0) goto finish; @@ -774,22 +826,26 @@ int setup_namespace( if (r < 0) goto finish; + r = append_bind_mounts(&m, bind_mounts, n_bind_mounts); + if (r < 0) + goto finish; + if (tmp_dir) { - *(m++) = (BindMount) { + *(m++) = (MountEntry) { .path_const = "/tmp", .mode = PRIVATE_TMP, }; } if (var_tmp_dir) { - *(m++) = (BindMount) { + *(m++) = (MountEntry) { .path_const = "/var/tmp", .mode = PRIVATE_VAR_TMP, }; } if (ns_info->private_dev) { - *(m++) = (BindMount) { + *(m++) = (MountEntry) { .path_const = "/dev", .mode = PRIVATE_DEV, }; @@ -808,7 +864,7 @@ int setup_namespace( } if (ns_info->protect_control_groups) { - *(m++) = (BindMount) { + *(m++) = (MountEntry) { .path_const = "/sys/fs/cgroup", .mode = READONLY, }; @@ -829,14 +885,7 @@ int setup_namespace( if (r < 0) goto finish; - /* Resolve symlinks manually first, as mount() will always follow them relative to the host's - * root. Moreover we want to suppress duplicates based on the resolved paths. This of course is a bit - * racy. */ - r = chase_all_symlinks(root_directory, mounts, &n_mounts); - if (r < 0) - goto finish; - - qsort(mounts, n_mounts, sizeof(BindMount), mount_path_compare); + qsort(mounts, n_mounts, sizeof(MountEntry), mount_path_compare); drop_duplicates(mounts, &n_mounts); drop_outside_root(root_directory, mounts, &n_mounts); @@ -877,7 +926,7 @@ int setup_namespace( /* First round, add in all special mounts we need */ for (m = mounts; m < mounts + n_mounts; ++m) { - r = apply_mount(m, tmp_dir, var_tmp_dir); + r = apply_mount(root_directory, m, tmp_dir, var_tmp_dir); if (r < 0) goto finish; } @@ -885,7 +934,7 @@ int setup_namespace( /* Create a blacklist we can pass to bind_mount_recursive() */ blacklist = newa(char*, n_mounts+1); for (j = 0; j < n_mounts; j++) - blacklist[j] = (char*) bind_mount_path(mounts+j); + blacklist[j] = (char*) mount_entry_path(mounts+j); blacklist[j] = NULL; /* Second round, flip the ro bits if necessary. */ @@ -920,6 +969,53 @@ finish: return r; } +void bind_mount_free_many(BindMount *b, unsigned n) { + unsigned i; + + assert(b || n == 0); + + for (i = 0; i < n; i++) { + free(b[i].source); + free(b[i].destination); + } + + free(b); +} + +int bind_mount_add(BindMount **b, unsigned *n, const BindMount *item) { + _cleanup_free_ char *s = NULL, *d = NULL; + BindMount *c; + + assert(b); + assert(n); + assert(item); + + s = strdup(item->source); + if (!s) + return -ENOMEM; + + d = strdup(item->destination); + if (!d) + return -ENOMEM; + + c = realloc_multiply(*b, sizeof(BindMount), *n + 1); + if (!c) + return -ENOMEM; + + *b = c; + + c[(*n) ++] = (BindMount) { + .source = s, + .destination = d, + .read_only = item->read_only, + .recursive = item->recursive, + .ignore_enoent = item->ignore_enoent, + }; + + s = d = NULL; + return 0; +} + static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) { _cleanup_free_ char *x = NULL; char bid[SD_ID128_STRING_MAX]; diff --git a/src/core/namespace.h b/src/core/namespace.h index 2c278fd457..de3edc419c 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -21,6 +21,7 @@ ***/ typedef struct NameSpaceInfo NameSpaceInfo; +typedef struct BindMount BindMount; #include <stdbool.h> @@ -51,20 +52,32 @@ struct NameSpaceInfo { bool protect_kernel_modules:1; }; -int setup_namespace(const char *chroot, - const NameSpaceInfo *ns_info, - char **read_write_paths, - char **read_only_paths, - char **inaccessible_paths, - const char *tmp_dir, - const char *var_tmp_dir, - ProtectHome protect_home, - ProtectSystem protect_system, - unsigned long mount_flags); - -int setup_tmp_dirs(const char *id, - char **tmp_dir, - char **var_tmp_dir); +struct BindMount { + char *source; + char *destination; + bool read_only:1; + bool recursive:1; + bool ignore_enoent:1; +}; + +int setup_namespace( + const char *root_directory, + const NameSpaceInfo *ns_info, + char **read_write_paths, + char **read_only_paths, + char **inaccessible_paths, + const BindMount *bind_mounts, + unsigned n_bind_mounts, + const char *tmp_dir, + const char *var_tmp_dir, + ProtectHome protect_home, + ProtectSystem protect_system, + unsigned long mount_flags); + +int setup_tmp_dirs( + const char *id, + char **tmp_dir, + char **var_tmp_dir); int setup_netns(int netns_storage_socket[2]); @@ -73,3 +86,6 @@ ProtectHome protect_home_from_string(const char *s) _pure_; const char* protect_system_to_string(ProtectSystem p) _const_; ProtectSystem protect_system_from_string(const char *s) _pure_; + +void bind_mount_free_many(BindMount *b, unsigned n); +int bind_mount_add(BindMount **b, unsigned *n, const BindMount *item); diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 3114275c85..b030b3b9d1 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -27,6 +27,7 @@ #include "hashmap.h" #include "list.h" #include "locale-util.h" +#include "mount-util.h" #include "nsflags.h" #include "parse-util.h" #include "path-util.h" @@ -265,7 +266,7 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen "StandardInput", "StandardOutput", "StandardError", "Description", "Slice", "Type", "WorkingDirectory", "RootDirectory", "SyslogIdentifier", "ProtectSystem", - "ProtectHome", "SELinuxContext")) + "ProtectHome", "SELinuxContext", "Restart")) r = sd_bus_message_append(m, "v", "s", eq); else if (streq(field, "SyslogLevel")) { @@ -575,7 +576,91 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen r = sd_bus_message_append(m, "v", "t", flags); } else if ((dep = unit_dependency_from_string(field)) >= 0) r = sd_bus_message_append(m, "v", "as", 1, eq); - else { + else if (streq(field, "MountFlags")) { + unsigned long f; + + if (isempty(eq)) + f = 0; + else { + f = mount_propagation_flags_from_string(eq); + if (f == 0) { + log_error("Failed to parse mount propagation type: %s", eq); + return -EINVAL; + } + } + + r = sd_bus_message_append(m, "v", "t", f); + } else if (STR_IN_SET(field, "BindPaths", "BindReadOnlyPaths")) { + const char *p = eq; + + r = sd_bus_message_open_container(m, 'v', "a(ssbt)"); + if (r < 0) + return r; + + r = sd_bus_message_open_container(m, 'a', "(ssbt)"); + if (r < 0) + return r; + + for (;;) { + _cleanup_free_ char *source = NULL, *destination = NULL; + char *s = NULL, *d = NULL; + bool ignore_enoent = false; + uint64_t flags = MS_REC; + + r = extract_first_word(&p, &source, ":" WHITESPACE, EXTRACT_QUOTES|EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return log_error_errno(r, "Failed to parse argument: %m"); + if (r == 0) + break; + + s = source; + if (s[0] == '-') { + ignore_enoent = true; + s++; + } + + if (p && p[-1] == ':') { + r = extract_first_word(&p, &destination, ":" WHITESPACE, EXTRACT_QUOTES|EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return log_error_errno(r, "Failed to parse argument: %m"); + if (r == 0) { + log_error("Missing argument after ':': %s", eq); + return -EINVAL; + } + + d = destination; + + if (p && p[-1] == ':') { + _cleanup_free_ char *options = NULL; + + r = extract_first_word(&p, &options, NULL, EXTRACT_QUOTES); + if (r < 0) + return log_error_errno(r, "Failed to parse argument: %m"); + + if (isempty(options) || streq(options, "rbind")) + flags = MS_REC; + else if (streq(options, "norbind")) + flags = 0; + else { + log_error("Unknown options: %s", eq); + return -EINVAL; + } + } + } else + d = s; + + + r = sd_bus_message_append(m, "(ssbt)", s, d, ignore_enoent, flags); + if (r < 0) + return r; + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return r; + + r = sd_bus_message_close_container(m); + } else { log_error("Unknown assignment %s.", assignment); return -EINVAL; } diff --git a/src/test/test-mount-util.c b/src/test/test-mount-util.c new file mode 100644 index 0000000000..da7f35623b --- /dev/null +++ b/src/test/test-mount-util.c @@ -0,0 +1,45 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <sys/mount.h> + +#include "log.h" +#include "mount-util.h" +#include "string-util.h" + +static void test_mount_propagation_flags(const char *name, unsigned long f) { + assert(mount_propagation_flags_from_string(name) == f); + + if (f != 0) + assert_se(streq_ptr(mount_propagation_flags_to_string(f), name)); +} + +int main(int argc, char *argv[]) { + + log_set_max_level(LOG_DEBUG); + + test_mount_propagation_flags("shared", MS_SHARED); + test_mount_propagation_flags("slave", MS_SLAVE); + test_mount_propagation_flags("private", MS_PRIVATE); + test_mount_propagation_flags(NULL, 0); + test_mount_propagation_flags("", 0); + test_mount_propagation_flags("xxxx", 0); + + return 0; +} diff --git a/src/test/test-ns.c b/src/test/test-ns.c index da7a8b0565..c99bcb371b 100644 --- a/src/test/test-ns.c +++ b/src/test/test-ns.c @@ -81,6 +81,7 @@ int main(int argc, char *argv[]) { (char **) writable, (char **) readonly, (char **) inaccessible, + &(BindMount) { .source = (char*) "/usr/bin", .destination = (char*) "/etc/systemd", .read_only = true }, 1, tmp_dir, var_tmp_dir, PROTECT_HOME_NO, |