diff options
| author | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2016-12-13 22:30:07 -0500 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2016-12-13 22:30:07 -0500 | 
| commit | 9cf314f34d9ca26bb8867effdf54fc2c78b06f31 (patch) | |
| tree | 62f00b1788b78f3ab19c5b14a5f98fc530c1bc18 | |
| parent | 9ef4e1e5a2d0a9cc50406f1cae05f3918d6f0c2a (diff) | |
| parent | f59d1da8cd15d42b36fa5bab756cf4d144785e2b (diff) | |
Merge pull request #4727 from poettering/exec-bind
More namespace improvements
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | Makefile.am | 7 | ||||
| -rw-r--r-- | TODO | 12 | ||||
| -rw-r--r-- | man/systemd.exec.xml | 25 | ||||
| -rw-r--r-- | src/basic/mount-util.c | 32 | ||||
| -rw-r--r-- | src/basic/mount-util.h | 3 | ||||
| -rw-r--r-- | src/core/dbus-execute.c | 131 | ||||
| -rw-r--r-- | src/core/dbus-service.c | 23 | ||||
| -rw-r--r-- | src/core/execute.c | 16 | ||||
| -rw-r--r-- | src/core/execute.h | 2 | ||||
| -rw-r--r-- | src/core/load-fragment-gperf.gperf.m4 | 2 | ||||
| -rw-r--r-- | src/core/load-fragment.c | 166 | ||||
| -rw-r--r-- | src/core/load-fragment.h | 1 | ||||
| -rw-r--r-- | src/core/namespace.c | 360 | ||||
| -rw-r--r-- | src/core/namespace.h | 44 | ||||
| -rw-r--r-- | src/shared/bus-unit-util.c | 89 | ||||
| -rw-r--r-- | src/test/test-mount-util.c | 45 | ||||
| -rw-r--r-- | src/test/test-ns.c | 1 | 
18 files changed, 784 insertions, 176 deletions
| diff --git a/.gitignore b/.gitignore index 016ba625e3..ec4b7bd672 100644 --- a/.gitignore +++ b/.gitignore @@ -239,6 +239,7 @@  /test-loopback  /test-machine-tables  /test-mmap-cache +/test-mount-util  /test-namespace  /test-ndisc-rs  /test-netlink diff --git a/Makefile.am b/Makefile.am index 3bd8c29dd3..8f7b83f0ab 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1512,6 +1512,7 @@ tests += \  	test-utf8 \  	test-ellipsize \  	test-util \ +	test-mount-util \  	test-cpu-set-util \  	test-hexdecoct \  	test-escape \ @@ -1890,6 +1891,12 @@ test_util_SOURCES = \  test_util_LDADD = \  	libsystemd-shared.la +test_mount_util_SOURCES = \ +	src/test/test-mount-util.c + +test_mount_util_LDADD = \ +	libsystemd-shared.la +  test_hexdecoct_SOURCES = \  	src/test/test-hexdecoct.c @@ -27,6 +27,10 @@ Janitorial Clean-ups:  Features: +* Add ExecMonitor= setting. May be used multiple times. Forks off a process in +  the service cgroup, which is supposed to monitor the service, and when it +  exits the service is considered failed by its monitor. +  * replace all canonicalize_file_name() invocations by chase_symlinks(), in    particulr those where a rootdir is relevant. @@ -78,8 +82,6 @@ Features:  * Add DataDirectory=, CacheDirectory= and LogDirectory= to match    RuntimeDirectory=, and create it as necessary when starting a service, owned by the right user. -* Add BindDirectory= for allowing arbitrary, private bind mounts for services -  * Add RootImage= for mounting a disk image or file as root directory  * make sure the ratelimit object can deal with USEC_INFINITY as way to turn off things @@ -340,8 +342,6 @@ Features:  * refuse boot if /usr/lib/os-release is missing or /etc/machine-id cannot be set up -* btrfs raid assembly: some .device jobs stay stuck in the queue -  * man: the documentation of Restart= currently is very misleading and suggests the tools from ExecStartPre= might get restarted.  * load .d/*.conf dropins for device units @@ -587,15 +587,13 @@ Features:  * on shutdown: move utmp, wall, audit logic all into PID 1 (or logind?), get rid of systemd-update-utmp-runlevel -* make repeated alt-ctrl-del presses printing a dump, or even force a reboot without -  waiting for the timeout +* make repeated alt-ctrl-del presses printing a dump  * hostnamed: before returning information from /etc/machine-info.conf check the modification data and reread. Similar for localed, ...  * currently x-systemd.timeout is lost in the initrd, since crypttab is copied into dracut, but fstab is not  * nspawn: -  - nspawn -x should support ephemeral instances of gpt images    - emulate /dev/kmsg using CUSE and turn off the syslog syscall      with seccomp. That should provide us with a useful log buffer that      systemd can log to during early boot, and disconnect container logs diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index f27e4a5c04..812e615530 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -968,6 +968,31 @@        </varlistentry>        <varlistentry> +        <term><varname>BindPaths=</varname></term> +        <term><varname>BindReadOnlyPaths=</varname></term> + +        <listitem><para>Configures unit-specific bind mounts. A bind mount makes a particular file or directory +        available at an additional place in the unit's view of the file system. Any bind mounts created with this +        option are specific to the unit, and are not visible in the host's mount table. This option expects a +        whitespace separated list of bind mount definitions. Each definition consists of a colon-separated triple of +        source path, destination path and option string, where the latter two are optional. If only a source path is +        specified the source and destination is taken to be the same. The option string may be either +        <literal>rbind</literal> or <literal>norbind</literal> for configuring a recursive or non-recursive bind +        mount. If the destination parth is omitted, the option string must be omitted too.</para> + +        <para><varname>BindPaths=</varname> creates regular writable bind mounts (unless the source file system mount +        is already marked read-only), while <varname>BindReadOnlyPaths=</varname> creates read-only bind mounts. These +        settings may be used more than once, each usage appends to the unit's list of bind mounts. If the empty string +        is assigned to either of these two options the entire list of bind mounts defined prior to this is reset. Note +        that in this case both read-only and regular bind mounts are reset, regardless which of the two settings is +        used.</para> + +        <para>This option is particularly useful when <varname>RootDirectory=</varname> is used. In this case the +        source path refers to a path on the host file system, while the destination path referes to a path below the +        root directory of the unit.</para></listitem> +      </varlistentry> + +      <varlistentry>          <term><varname>PrivateTmp=</varname></term>          <listitem><para>Takes a boolean argument. If true, sets up a new file system namespace for the executed diff --git a/src/basic/mount-util.c b/src/basic/mount-util.c index 352c3505fb..8970050408 100644 --- a/src/basic/mount-util.c +++ b/src/basic/mount-util.c @@ -689,3 +689,35 @@ int umount_verbose(const char *what) {                  return log_error_errno(errno, "Failed to unmount %s: %m", what);          return 0;  } + +const char *mount_propagation_flags_to_string(unsigned long flags) { + +        switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) { + +        case MS_SHARED: +                return "shared"; + +        case MS_SLAVE: +                return "slave"; + +        case MS_PRIVATE: +                return "private"; +        } + +        return NULL; +} + +unsigned long mount_propagation_flags_from_string(const char *name) { + +        if (isempty(name)) +                return 0; + +        if (streq(name, "shared")) +                return MS_SHARED; +        if (streq(name, "slave")) +                return MS_SLAVE; +        if (streq(name, "private")) +                return MS_PRIVATE; + +        return 0; +} diff --git a/src/basic/mount-util.h b/src/basic/mount-util.h index b840956d63..c8049198d4 100644 --- a/src/basic/mount-util.h +++ b/src/basic/mount-util.h @@ -61,3 +61,6 @@ int mount_verbose(                  unsigned long flags,                  const char *options);  int umount_verbose(const char *where); + +const char *mount_propagation_flags_to_string(unsigned long flags); +unsigned long mount_propagation_flags_from_string(const char *name); diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 23c1b44573..b3fc0ff5c3 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -34,6 +34,7 @@  #include "fileio.h"  #include "ioprio.h"  #include "missing.h" +#include "mount-util.h"  #include "namespace.h"  #include "parse-util.h"  #include "path-util.h" @@ -674,6 +675,49 @@ static int property_get_output_fdname(          return sd_bus_message_append(reply, "s", name);  } +static int property_get_bind_paths( +                sd_bus *bus, +                const char *path, +                const char *interface, +                const char *property, +                sd_bus_message *reply, +                void *userdata, +                sd_bus_error *error) { + +        ExecContext *c = userdata; +        unsigned i; +        bool ro; +        int r; + +        assert(bus); +        assert(c); +        assert(property); +        assert(reply); + +        ro = !!strstr(property, "ReadOnly"); + +        r = sd_bus_message_open_container(reply, 'a', "(ssbt)"); +        if (r < 0) +                return r; + +        for (i = 0; i < c->n_bind_mounts; i++) { + +                if (ro != c->bind_mounts[i].read_only) +                        continue; + +                r = sd_bus_message_append( +                                reply, "(ssbt)", +                                c->bind_mounts[i].source, +                                c->bind_mounts[i].destination, +                                c->bind_mounts[i].ignore_enoent, +                                c->bind_mounts[i].recursive ? MS_REC : 0); +                if (r < 0) +                        return r; +        } + +        return sd_bus_message_close_container(reply); +} +  const sd_bus_vtable bus_exec_vtable[] = {          SD_BUS_VTABLE_START(0),          SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(ExecContext, environment), SD_BUS_VTABLE_PROPERTY_CONST), @@ -782,6 +826,8 @@ const sd_bus_vtable bus_exec_vtable[] = {          SD_BUS_PROPERTY("MemoryDenyWriteExecute", "b", bus_property_get_bool, offsetof(ExecContext, memory_deny_write_execute), SD_BUS_VTABLE_PROPERTY_CONST),          SD_BUS_PROPERTY("RestrictRealtime", "b", bus_property_get_bool, offsetof(ExecContext, restrict_realtime), SD_BUS_VTABLE_PROPERTY_CONST),          SD_BUS_PROPERTY("RestrictNamespaces", "t", bus_property_get_ulong, offsetof(ExecContext, restrict_namespaces), SD_BUS_VTABLE_PROPERTY_CONST), +        SD_BUS_PROPERTY("BindPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST), +        SD_BUS_PROPERTY("BindReadOnlyPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST),          SD_BUS_VTABLE_END  }; @@ -1363,8 +1409,8 @@ int bus_exec_context_set_transient_property(                          if (r < 0)                                  return r; -                        if (!isempty(path) && !path_is_absolute(path)) -                                return sd_bus_error_set_errnof(error, EINVAL, "Path %s is not absolute.", path); +                        if (!path_is_absolute(path)) +                                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path %s is not absolute.", path);                          if (mode != UNIT_CHECK) {                                  char *buf = NULL; @@ -1613,6 +1659,87 @@ int bus_exec_context_set_transient_property(                  }                  return 1; +        } else if (streq(name, "MountFlags")) { +                uint64_t flags; + +                r = sd_bus_message_read(message, "t", &flags); +                if (r < 0) +                        return r; +                if (!IN_SET(flags, 0, MS_SHARED, MS_PRIVATE, MS_SLAVE)) +                        return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown mount propagation flags"); + +                if (mode != UNIT_CHECK) { +                        c->mount_flags = flags; + +                        unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, strempty(mount_propagation_flags_to_string(flags))); +                } + +                return 1; +        } else if (STR_IN_SET(name, "BindPaths", "BindReadOnlyPaths")) { +                unsigned empty = true; + +                r = sd_bus_message_enter_container(message, 'a', "(ssbt)"); +                if (r < 0) +                        return r; + +                while ((r = sd_bus_message_enter_container(message, 'r', "ssbt")) > 0) { +                        const char *source, *destination; +                        int ignore_enoent; +                        uint64_t mount_flags; + +                        r = sd_bus_message_read(message, "ssbt", &source, &destination, &ignore_enoent, &mount_flags); +                        if (r < 0) +                                return r; + +                        r = sd_bus_message_exit_container(message); +                        if (r < 0) +                                return r; + +                        if (!path_is_absolute(source)) +                                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path %s is not absolute.", source); +                        if (!path_is_absolute(destination)) +                                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Destination path %s is not absolute.", source); +                        if (!IN_SET(mount_flags, 0, MS_REC)) +                                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown mount flags."); + +                        if (mode != UNIT_CHECK) { +                                r = bind_mount_add(&c->bind_mounts, &c->n_bind_mounts, +                                                   &(BindMount) { +                                                           .source = strdup(source), +                                                           .destination = strdup(destination), +                                                           .read_only = !!strstr(name, "ReadOnly"), +                                                           .recursive = !!(mount_flags & MS_REC), +                                                           .ignore_enoent = ignore_enoent, +                                                   }); +                                if (r < 0) +                                        return r; + +                                unit_write_drop_in_private_format( +                                                u, mode, name, +                                                "%s=%s%s:%s:%s", +                                                name, +                                                ignore_enoent ? "-" : "", +                                                source, +                                                destination, +                                                (mount_flags & MS_REC) ? "rbind" : "norbind"); +                        } + +                        empty = false; +                } +                if (r < 0) +                        return r; + +                r = sd_bus_message_exit_container(message); +                if (r < 0) +                        return r; + +                if (empty) { +                        bind_mount_free_many(c->bind_mounts, c->n_bind_mounts); +                        c->bind_mounts = NULL; +                        c->n_bind_mounts = 0; +                } + +                return 1;          }          ri = rlimit_from_string(name); diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c index 61b83d2d62..85b67318ed 100644 --- a/src/core/dbus-service.c +++ b/src/core/dbus-service.c @@ -143,6 +143,29 @@ static int bus_service_set_transient_property(                  return 1; +        } else if (streq(name, "Restart")) { +                ServiceRestart sr; +                const char *v; + +                r = sd_bus_message_read(message, "s", &v); +                if (r < 0) +                        return r; + +                if (isempty(v)) +                        sr = SERVICE_RESTART_NO; +                else { +                        sr = service_restart_from_string(v); +                        if (sr < 0) +                                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid restart setting: %s", v); +                } + +                if (mode != UNIT_CHECK) { +                        s->restart = sr; +                        unit_write_drop_in_private_format(UNIT(s), mode, name, "Restart=%s", service_restart_to_string(sr)); +                } + +                return 1; +          } else if (STR_IN_SET(name,                                "StandardInputFileDescriptor",                                "StandardOutputFileDescriptor", diff --git a/src/core/execute.c b/src/core/execute.c index 07ab067c05..2ee8c9a416 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1826,6 +1826,9 @@ static bool exec_needs_mount_namespace(              !strv_isempty(context->inaccessible_paths))                  return true; +        if (context->n_bind_mounts > 0) +                return true; +          if (context->mount_flags != 0)                  return true; @@ -2147,6 +2150,8 @@ static int apply_mount_namespace(Unit *u, const ExecContext *context,          r = setup_namespace(root_dir, &ns_info, rw,                              context->read_only_paths,                              context->inaccessible_paths, +                            context->bind_mounts, +                            context->n_bind_mounts,                              tmp,                              var,                              context->protect_home, @@ -3086,6 +3091,8 @@ void exec_context_done(ExecContext *c) {          c->read_write_paths = strv_free(c->read_write_paths);          c->inaccessible_paths = strv_free(c->inaccessible_paths); +        bind_mount_free_many(c->bind_mounts, c->n_bind_mounts); +          if (c->cpuset)                  CPU_FREE(c->cpuset); @@ -3569,6 +3576,15 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {                  fputs("\n", f);          } +        if (c->n_bind_mounts > 0) +                for (i = 0; i < c->n_bind_mounts; i++) { +                        fprintf(f, "%s%s: %s:%s:%s\n", prefix, +                                c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths", +                                c->bind_mounts[i].source, +                                c->bind_mounts[i].destination, +                                c->bind_mounts[i].recursive ? "rbind" : "norbind"); +                } +          if (c->utmp_id)                  fprintf(f,                          "%sUtmpIdentifier: %s\n", diff --git a/src/core/execute.h b/src/core/execute.h index 951c8f4da3..84ab4339cf 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -161,6 +161,8 @@ struct ExecContext {          char **read_write_paths, **read_only_paths, **inaccessible_paths;          unsigned long mount_flags; +        BindMount *bind_mounts; +        unsigned n_bind_mounts;          uint64_t capability_bounding_set;          uint64_t capability_ambient_set; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 2610442b91..15f22a2681 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -89,6 +89,8 @@ $1.InaccessibleDirectories,      config_parse_namespace_path_strv,   0,  $1.ReadWritePaths,               config_parse_namespace_path_strv,   0,                             offsetof($1, exec_context.read_write_paths)  $1.ReadOnlyPaths,                config_parse_namespace_path_strv,   0,                             offsetof($1, exec_context.read_only_paths)  $1.InaccessiblePaths,            config_parse_namespace_path_strv,   0,                             offsetof($1, exec_context.inaccessible_paths) +$1.BindPaths,                    config_parse_bind_paths,            0,                             offsetof($1, exec_context) +$1.BindReadOnlyPaths,            config_parse_bind_paths,            0,                             offsetof($1, exec_context)  $1.PrivateTmp,                   config_parse_bool,                  0,                             offsetof($1, exec_context.private_tmp)  $1.PrivateDevices,               config_parse_bool,                  0,                             offsetof($1, exec_context.private_devices)  $1.ProtectKernelTunables,        config_parse_bool,                  0,                             offsetof($1, exec_context.protect_kernel_tunables) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 687cd1dd31..f325d853c6 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -49,6 +49,7 @@  #include "load-fragment.h"  #include "log.h"  #include "missing.h" +#include "mount-util.h"  #include "parse-util.h"  #include "path-util.h"  #include "process-util.h" @@ -1264,19 +1265,20 @@ int config_parse_sysv_priority(const char *unit,  DEFINE_CONFIG_PARSE_ENUM(config_parse_exec_utmp_mode, exec_utmp_mode, ExecUtmpMode, "Failed to parse utmp mode");  DEFINE_CONFIG_PARSE_ENUM(config_parse_kill_mode, kill_mode, KillMode, "Failed to parse kill mode"); -int config_parse_exec_mount_flags(const char *unit, -                                  const char *filename, -                                  unsigned line, -                                  const char *section, -                                  unsigned section_line, -                                  const char *lvalue, -                                  int ltype, -                                  const char *rvalue, -                                  void *data, -                                  void *userdata) { +int config_parse_exec_mount_flags( +                const char *unit, +                const char *filename, +                unsigned line, +                const char *section, +                unsigned section_line, +                const char *lvalue, +                int ltype, +                const char *rvalue, +                void *data, +                void *userdata) { -        unsigned long flags = 0; +        unsigned long flags;          ExecContext *c = data;          assert(filename); @@ -1284,15 +1286,14 @@ int config_parse_exec_mount_flags(const char *unit,          assert(rvalue);          assert(data); -        if (streq(rvalue, "shared")) -                flags = MS_SHARED; -        else if (streq(rvalue, "slave")) -                flags = MS_SLAVE; -        else if (streq(rvalue, "private")) -                flags = MS_PRIVATE; +        if (isempty(rvalue)) +                flags = 0;          else { -                log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse mount flag %s, ignoring.", rvalue); -                return 0; +                flags = mount_propagation_flags_from_string(rvalue); +                if (flags == 0) { +                        log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse mount flag %s, ignoring.", rvalue); +                        return 0; +                }          }          c->mount_flags = flags; @@ -3890,6 +3891,132 @@ int config_parse_namespace_path_strv(          return 0;  } +int config_parse_bind_paths( +                const char *unit, +                const char *filename, +                unsigned line, +                const char *section, +                unsigned section_line, +                const char *lvalue, +                int ltype, +                const char *rvalue, +                void *data, +                void *userdata) { + +        ExecContext *c = data; +        const char *p; +        int r; + +        assert(filename); +        assert(lvalue); +        assert(rvalue); +        assert(data); + +        if (isempty(rvalue)) { +                /* Empty assignment resets the list */ +                bind_mount_free_many(c->bind_mounts, c->n_bind_mounts); +                c->bind_mounts = NULL; +                c->n_bind_mounts = 0; +                return 0; +        } + +        p = rvalue; +        for (;;) { +                _cleanup_free_ char *source = NULL, *destination = NULL; +                char *s = NULL, *d = NULL; +                bool rbind = true, ignore_enoent = false; + +                r = extract_first_word(&p, &source, ":" WHITESPACE, EXTRACT_QUOTES|EXTRACT_DONT_COALESCE_SEPARATORS); +                if (r == 0) +                        break; +                if (r == -ENOMEM) +                        return log_oom(); +                if (r < 0) { +                        log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse %s: %s", lvalue, rvalue); +                        return 0; +                } + +                s = source; +                if (s[0] == '-') { +                        ignore_enoent = true; +                        s++; +                } + +                if (!utf8_is_valid(s)) { +                        log_syntax_invalid_utf8(unit, LOG_ERR, filename, line, s); +                        return 0; +                } +                if (!path_is_absolute(s)) { +                        log_syntax(unit, LOG_ERR, filename, line, 0, "Not an absolute source path, ignoring: %s", s); +                        return 0; +                } + +                path_kill_slashes(s); + +                /* Optionally, the destination is specified. */ +                if (p && p[-1] == ':') { +                        r = extract_first_word(&p, &destination, ":" WHITESPACE, EXTRACT_QUOTES|EXTRACT_DONT_COALESCE_SEPARATORS); +                        if (r == -ENOMEM) +                                return log_oom(); +                        if (r < 0) { +                                log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse %s: %s", lvalue, rvalue); +                                return 0; +                        } +                        if (r == 0) { +                                log_syntax(unit, LOG_ERR, filename, line, 0, "Missing argument after ':': %s", rvalue); +                                return 0; +                        } + +                        if (!utf8_is_valid(destination)) { +                                log_syntax_invalid_utf8(unit, LOG_ERR, filename, line, destination); +                                return 0; +                        } +                        if (!path_is_absolute(destination)) { +                                log_syntax(unit, LOG_ERR, filename, line, 0, "Not an absolute destination path, ignoring: %s", destination); +                                return 0; +                        } + +                        d = path_kill_slashes(destination); + +                        /* Optionally, there's also a short option string specified */ +                        if (p && p[-1] == ':') { +                                _cleanup_free_ char *options = NULL; + +                                r = extract_first_word(&p, &options, NULL, EXTRACT_QUOTES); +                                if (r == -ENOMEM) +                                        return log_oom(); +                                if (r < 0) { +                                        log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse %s: %s", lvalue, rvalue); +                                        return 0; +                                } + +                                if (isempty(options) || streq(options, "rbind")) +                                        rbind = true; +                                else if (streq(options, "norbind")) +                                        rbind = false; +                                else { +                                        log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid option string, ignoring setting: %s", options); +                                        return 0; +                                } +                        } +                } else +                        d = s; + +                r = bind_mount_add(&c->bind_mounts, &c->n_bind_mounts, +                                   &(BindMount) { +                                           .source = s, +                                           .destination = d, +                                           .read_only = !!strstr(lvalue, "ReadOnly"), +                                           .recursive = rbind, +                                           .ignore_enoent = ignore_enoent, +                                   }); +                if (r < 0) +                        return log_oom(); +        } + +        return 0; +} +  int config_parse_no_new_privileges(                  const char* unit,                  const char *filename, @@ -4387,6 +4514,7 @@ void unit_dump_config_items(FILE *f) {                  { config_parse_sec,                   "SECONDS" },                  { config_parse_nsec,                  "NANOSECONDS" },                  { config_parse_namespace_path_strv,   "PATH [...]" }, +                { config_parse_bind_paths,            "PATH[:PATH[:OPTIONS]] [...]" },                  { config_parse_unit_requires_mounts_for, "PATH [...]" },                  { config_parse_exec_mount_flags,      "MOUNTFLAG [...]" },                  { config_parse_unit_string_printf,    "STRING" }, diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index 1cff815a50..bbac2d84b5 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -117,6 +117,7 @@ int config_parse_sec_fix_0(const char *unit, const char *filename, unsigned line  int config_parse_user_group(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);  int config_parse_user_group_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);  int config_parse_restrict_namespaces(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_bind_paths(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);  /* gperf prototypes */  const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length); diff --git a/src/core/namespace.c b/src/core/namespace.c index aca47a4d2f..834883267c 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -50,6 +50,8 @@  typedef enum MountMode {          /* This is ordered by priority! */          INACCESSIBLE, +        BIND_MOUNT, +        BIND_MOUNT_RECURSIVE,          READONLY,          PRIVATE_TMP,          PRIVATE_VAR_TMP, @@ -57,13 +59,16 @@ typedef enum MountMode {          READWRITE,  } MountMode; -typedef struct BindMount { +typedef struct MountEntry {          const char *path_const;   /* Memory allocated on stack or static */ -        MountMode mode:6; +        MountMode mode:5;          bool ignore:1;            /* Ignore if path does not exist? */          bool has_prefix:1;        /* Already is prefixed by the root dir? */ +        bool read_only:1;         /* Shall this mount point be read-only? */          char *path_malloc;        /* Use this instead of 'path' if we had to allocate memory */ -} BindMount; +        const char *source_const; /* The source path, for bind mounts */ +        char *source_malloc; +} MountEntry;  /*   * The following Protect tables are to protect paths and mark some of them @@ -74,7 +79,7 @@ typedef struct BindMount {   */  /* ProtectKernelTunables= option and the related filesystem APIs */ -static const BindMount protect_kernel_tunables_table[] = { +static const MountEntry protect_kernel_tunables_table[] = {          { "/proc/sys",           READONLY,     false },          { "/proc/sysrq-trigger", READONLY,     true  },          { "/proc/latency_stats", READONLY,     true  }, @@ -93,7 +98,7 @@ static const BindMount protect_kernel_tunables_table[] = {  };  /* ProtectKernelModules= option */ -static const BindMount protect_kernel_modules_table[] = { +static const MountEntry protect_kernel_modules_table[] = {  #ifdef HAVE_SPLIT_USR          { "/lib/modules",        INACCESSIBLE, true  },  #endif @@ -104,28 +109,28 @@ static const BindMount protect_kernel_modules_table[] = {   * ProtectHome=read-only table, protect $HOME and $XDG_RUNTIME_DIR and rest of   * system should be protected by ProtectSystem=   */ -static const BindMount protect_home_read_only_table[] = { +static const MountEntry protect_home_read_only_table[] = {          { "/home",               READONLY,     true  },          { "/run/user",           READONLY,     true  },          { "/root",               READONLY,     true  },  };  /* ProtectHome=yes table */ -static const BindMount protect_home_yes_table[] = { +static const MountEntry protect_home_yes_table[] = {          { "/home",               INACCESSIBLE, true  },          { "/run/user",           INACCESSIBLE, true  },          { "/root",               INACCESSIBLE, true  },  };  /* ProtectSystem=yes table */ -static const BindMount protect_system_yes_table[] = { +static const MountEntry protect_system_yes_table[] = {          { "/usr",                READONLY,     false },          { "/boot",               READONLY,     true  },          { "/efi",                READONLY,     true  },  };  /* ProtectSystem=full includes ProtectSystem=yes */ -static const BindMount protect_system_full_table[] = { +static const MountEntry protect_system_full_table[] = {          { "/usr",                READONLY,     false },          { "/boot",               READONLY,     true  },          { "/efi",                READONLY,     true  }, @@ -140,17 +145,17 @@ static const BindMount protect_system_full_table[] = {   * (And of course /home and friends are also left writable, as ProtectHome=   * shall manage those, orthogonally).   */ -static const BindMount protect_system_strict_table[] = { -        { "/",          READONLY,       false }, -        { "/proc",      READWRITE,      false },      /* ProtectKernelTunables= */ -        { "/sys",       READWRITE,      false },      /* ProtectKernelTunables= */ -        { "/dev",       READWRITE,      false },      /* PrivateDevices= */ -        { "/home",      READWRITE,      true  },      /* ProtectHome= */ -        { "/run/user",  READWRITE,      true  },      /* ProtectHome= */ -        { "/root",      READWRITE,      true  },      /* ProtectHome= */ +static const MountEntry protect_system_strict_table[] = { +        { "/",                   READONLY,     false }, +        { "/proc",               READWRITE,    false },      /* ProtectKernelTunables= */ +        { "/sys",                READWRITE,    false },      /* ProtectKernelTunables= */ +        { "/dev",                READWRITE,    false },      /* PrivateDevices= */ +        { "/home",               READWRITE,    true  },      /* ProtectHome= */ +        { "/run/user",           READWRITE,    true  },      /* ProtectHome= */ +        { "/root",               READWRITE,    true  },      /* ProtectHome= */  }; -static const char *bind_mount_path(const BindMount *p) { +static const char *mount_entry_path(const MountEntry *p) {          assert(p);          /* Returns the path of this bind mount. If the malloc()-allocated ->path_buffer field is set we return that, @@ -159,7 +164,19 @@ static const char *bind_mount_path(const BindMount *p) {          return p->path_malloc ?: p->path_const;  } -static int append_access_mounts(BindMount **p, char **strv, MountMode mode) { +static bool mount_entry_read_only(const MountEntry *p) { +        assert(p); + +        return p->read_only || IN_SET(p->mode, READONLY, INACCESSIBLE); +} + +static const char *mount_entry_source(const MountEntry *p) { +        assert(p); + +        return p->source_malloc ?: p->source_const; +} + +static int append_access_mounts(MountEntry **p, char **strv, MountMode mode) {          char **i;          assert(p); @@ -183,7 +200,7 @@ static int append_access_mounts(BindMount **p, char **strv, MountMode mode) {                  if (!path_is_absolute(e))                          return -EINVAL; -                *((*p)++) = (BindMount) { +                *((*p)++) = (MountEntry) {                          .path_const = e,                          .mode = mode,                          .ignore = ignore, @@ -194,7 +211,26 @@ static int append_access_mounts(BindMount **p, char **strv, MountMode mode) {          return 0;  } -static int append_static_mounts(BindMount **p, const BindMount *mounts, unsigned n, bool ignore_protect) { +static int append_bind_mounts(MountEntry **p, const BindMount *binds, unsigned n) { +        unsigned i; + +        assert(p); + +        for (i = 0; i < n; i++) { +                const BindMount *b = binds + i; + +                *((*p)++) = (MountEntry) { +                        .path_const = b->destination, +                        .mode = b->recursive ? BIND_MOUNT_RECURSIVE : BIND_MOUNT, +                        .read_only = b->read_only, +                        .source_const = b->source, +                }; +        } + +        return 0; +} + +static int append_static_mounts(MountEntry **p, const MountEntry *mounts, unsigned n, bool ignore_protect) {          unsigned i;          assert(p); @@ -203,8 +239,8 @@ static int append_static_mounts(BindMount **p, const BindMount *mounts, unsigned          /* Adds a list of static pre-defined entries */          for (i = 0; i < n; i++) -                *((*p)++) = (BindMount) { -                        .path_const = bind_mount_path(mounts+i), +                *((*p)++) = (MountEntry) { +                        .path_const = mount_entry_path(mounts+i),                          .mode = mounts[i].mode,                          .ignore = mounts[i].ignore || ignore_protect,                  }; @@ -212,7 +248,7 @@ static int append_static_mounts(BindMount **p, const BindMount *mounts, unsigned          return 0;  } -static int append_protect_home(BindMount **p, ProtectHome protect_home, bool ignore_protect) { +static int append_protect_home(MountEntry **p, ProtectHome protect_home, bool ignore_protect) {          assert(p);          switch (protect_home) { @@ -231,7 +267,7 @@ static int append_protect_home(BindMount **p, ProtectHome protect_home, bool ign          }  } -static int append_protect_system(BindMount **p, ProtectSystem protect_system, bool ignore_protect) { +static int append_protect_system(MountEntry **p, ProtectSystem protect_system, bool ignore_protect) {          assert(p);          switch (protect_system) { @@ -254,11 +290,11 @@ static int append_protect_system(BindMount **p, ProtectSystem protect_system, bo  }  static int mount_path_compare(const void *a, const void *b) { -        const BindMount *p = a, *q = b; +        const MountEntry *p = a, *q = b;          int d;          /* If the paths are not equal, then order prefixes first */ -        d = path_compare(bind_mount_path(p), bind_mount_path(q)); +        d = path_compare(mount_entry_path(p), mount_entry_path(q));          if (d != 0)                  return d; @@ -272,7 +308,7 @@ static int mount_path_compare(const void *a, const void *b) {          return 0;  } -static int prefix_where_needed(BindMount *m, unsigned n, const char *root_directory) { +static int prefix_where_needed(MountEntry *m, unsigned n, const char *root_directory) {          unsigned i;          /* Prefixes all paths in the bind mount table with the root directory if it is specified and the entry needs @@ -287,7 +323,7 @@ static int prefix_where_needed(BindMount *m, unsigned n, const char *root_direct                  if (m[i].has_prefix)                          continue; -                s = prefix_root(root_directory, bind_mount_path(m+i)); +                s = prefix_root(root_directory, mount_entry_path(m+i));                  if (!s)                          return -ENOMEM; @@ -300,8 +336,8 @@ static int prefix_where_needed(BindMount *m, unsigned n, const char *root_direct          return 0;  } -static void drop_duplicates(BindMount *m, unsigned *n) { -        BindMount *f, *t, *previous; +static void drop_duplicates(MountEntry *m, unsigned *n) { +        MountEntry *f, *t, *previous;          assert(m);          assert(n); @@ -312,8 +348,9 @@ static void drop_duplicates(BindMount *m, unsigned *n) {                  /* The first one wins (which is the one with the more restrictive mode), see mount_path_compare()                   * above. */ -                if (previous && path_equal(bind_mount_path(f), bind_mount_path(previous))) { -                        log_debug("%s is duplicate.", bind_mount_path(f)); +                if (previous && path_equal(mount_entry_path(f), mount_entry_path(previous))) { +                        log_debug("%s is duplicate.", mount_entry_path(f)); +                        previous->read_only = previous->read_only || mount_entry_read_only(f); /* Propagate the read-only flag to the remaining entry */                          f->path_malloc = mfree(f->path_malloc);                          continue;                  } @@ -326,8 +363,8 @@ static void drop_duplicates(BindMount *m, unsigned *n) {          *n = t - m;  } -static void drop_inaccessible(BindMount *m, unsigned *n) { -        BindMount *f, *t; +static void drop_inaccessible(MountEntry *m, unsigned *n) { +        MountEntry *f, *t;          const char *clear = NULL;          assert(m); @@ -340,13 +377,13 @@ static void drop_inaccessible(BindMount *m, unsigned *n) {                  /* If we found a path set for INACCESSIBLE earlier, and this entry has it as prefix we should drop                   * it, as inaccessible paths really should drop the entire subtree. */ -                if (clear && path_startswith(bind_mount_path(f), clear)) { -                        log_debug("%s is masked by %s.", bind_mount_path(f), clear); +                if (clear && path_startswith(mount_entry_path(f), clear)) { +                        log_debug("%s is masked by %s.", mount_entry_path(f), clear);                          f->path_malloc = mfree(f->path_malloc);                          continue;                  } -                clear = f->mode == INACCESSIBLE ? bind_mount_path(f) : NULL; +                clear = f->mode == INACCESSIBLE ? mount_entry_path(f) : NULL;                  *t = *f;                  t++; @@ -355,8 +392,8 @@ static void drop_inaccessible(BindMount *m, unsigned *n) {          *n = t - m;  } -static void drop_nop(BindMount *m, unsigned *n) { -        BindMount *f, *t; +static void drop_nop(MountEntry *m, unsigned *n) { +        MountEntry *f, *t;          assert(m);          assert(n); @@ -368,12 +405,12 @@ static void drop_nop(BindMount *m, unsigned *n) {                  /* Only suppress such subtrees for READONLY and READWRITE entries */                  if (IN_SET(f->mode, READONLY, READWRITE)) { -                        BindMount *p; +                        MountEntry *p;                          bool found = false;                          /* Now let's find the first parent of the entry we are looking at. */                          for (p = t-1; p >= m; p--) { -                                if (path_startswith(bind_mount_path(f), bind_mount_path(p))) { +                                if (path_startswith(mount_entry_path(f), mount_entry_path(p))) {                                          found = true;                                          break;                                  } @@ -381,7 +418,7 @@ static void drop_nop(BindMount *m, unsigned *n) {                          /* We found it, let's see if it's the same mode, if so, we can drop this entry */                          if (found && p->mode == f->mode) { -                                log_debug("%s is redundant by %s", bind_mount_path(f), bind_mount_path(p)); +                                log_debug("%s is redundant by %s", mount_entry_path(f), mount_entry_path(p));                                  f->path_malloc = mfree(f->path_malloc);                                  continue;                          } @@ -394,8 +431,8 @@ static void drop_nop(BindMount *m, unsigned *n) {          *n = t - m;  } -static void drop_outside_root(const char *root_directory, BindMount *m, unsigned *n) { -        BindMount *f, *t; +static void drop_outside_root(const char *root_directory, MountEntry *m, unsigned *n) { +        MountEntry *f, *t;          assert(m);          assert(n); @@ -408,8 +445,8 @@ static void drop_outside_root(const char *root_directory, BindMount *m, unsigned          for (f = m, t = m; f < m + *n; f++) { -                if (!path_startswith(bind_mount_path(f), root_directory)) { -                        log_debug("%s is outside of root directory.", bind_mount_path(f)); +                if (!path_startswith(mount_entry_path(f), root_directory)) { +                        log_debug("%s is outside of root directory.", mount_entry_path(f));                          f->path_malloc = mfree(f->path_malloc);                          continue;                  } @@ -421,7 +458,7 @@ static void drop_outside_root(const char *root_directory, BindMount *m, unsigned          *n = t - m;  } -static int mount_dev(BindMount *m) { +static int mount_dev(MountEntry *m) {          static const char devnodes[] =                  "/dev/null\0"                  "/dev/zero\0" @@ -526,11 +563,11 @@ static int mount_dev(BindMount *m) {           * missing when the service is started with RootDirectory. This is           * consistent with mount units creating the mount points when missing.           */ -        (void) mkdir_p_label(bind_mount_path(m), 0755); +        (void) mkdir_p_label(mount_entry_path(m), 0755);          /* Unmount everything in old /dev */ -        umount_recursive(bind_mount_path(m), 0); -        if (mount(dev, bind_mount_path(m), NULL, MS_MOVE, NULL) < 0) { +        umount_recursive(mount_entry_path(m), 0); +        if (mount(dev, mount_entry_path(m), NULL, MS_MOVE, NULL) < 0) {                  r = -errno;                  goto fail;          } @@ -560,17 +597,54 @@ fail:          return r;  } +static int mount_entry_chase( +                const char *root_directory, +                MountEntry *m, +                const char *path, +                char **location) { + +        char *chased; +        int r; + +        assert(m); + +        /* Since mount() will always follow symlinks and we need to take the different root directory into account we +         * chase the symlinks on our own first. This is called for the destination path, as well as the source path (if +         * that applies). The result is stored in "location". */ + +        r = chase_symlinks(path, root_directory, 0, &chased); +        if (r == -ENOENT && m->ignore) { +                log_debug_errno(r, "Path %s does not exist, ignoring.", path); +                return 0; +        } +        if (r < 0) +                return log_debug_errno(r, "Failed to follow symlinks on %s: %m", path); + +        log_debug("Followed symlinks %s → %s.", path, chased); + +        free(*location); +        *location = chased; + +        return 1; +} +  static int apply_mount( -                BindMount *m, +                const char *root_directory, +                MountEntry *m,                  const char *tmp_dir,                  const char *var_tmp_dir) {          const char *what; +        bool rbind = true;          int r;          assert(m); -        log_debug("Applying namespace mount on %s", bind_mount_path(m)); +        r = mount_entry_chase(root_directory, m, mount_entry_path(m), &m->path_malloc); +        if (r <= 0) +                return r; + +        log_debug("Applying namespace mount on %s", mount_entry_path(m));          switch (m->mode) { @@ -580,10 +654,10 @@ static int apply_mount(                  /* First, get rid of everything that is below if there                   * is anything... Then, overmount it with an                   * inaccessible path. */ -                (void) umount_recursive(bind_mount_path(m), 0); +                (void) umount_recursive(mount_entry_path(m), 0); -                if (lstat(bind_mount_path(m), &target) < 0) -                        return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", bind_mount_path(m)); +                if (lstat(mount_entry_path(m), &target) < 0) +                        return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", mount_entry_path(m));                  what = mode_to_inaccessible_node(target.st_mode);                  if (!what) { @@ -595,14 +669,26 @@ static int apply_mount(          case READONLY:          case READWRITE: - -                r = path_is_mount_point(bind_mount_path(m), NULL, 0); +                r = path_is_mount_point(mount_entry_path(m), root_directory, 0);                  if (r < 0) -                        return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", bind_mount_path(m)); +                        return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", mount_entry_path(m));                  if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */                          return 0;                  /* This isn't a mount point yet, let's make it one. */ -                what = bind_mount_path(m); +                what = mount_entry_path(m); +                break; + +        case BIND_MOUNT: +                rbind = false; +                /* fallthrough */ + +        case BIND_MOUNT_RECURSIVE: +                /* Also chase the source mount */ +                r = mount_entry_chase(root_directory, m, mount_entry_source(m), &m->source_malloc); +                if (r <= 0) +                        return r; + +                what = mount_entry_source(m);                  break;          case PRIVATE_TMP: @@ -622,22 +708,22 @@ static int apply_mount(          assert(what); -        if (mount(what, bind_mount_path(m), NULL, MS_BIND|MS_REC, NULL) < 0) -                return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, bind_mount_path(m)); +        if (mount(what, mount_entry_path(m), NULL, MS_BIND|(rbind ? MS_REC : 0), NULL) < 0) +                return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, mount_entry_path(m)); -        log_debug("Successfully mounted %s to %s", what, bind_mount_path(m)); +        log_debug("Successfully mounted %s to %s", what, mount_entry_path(m));          return 0;  } -static int make_read_only(BindMount *m, char **blacklist) { +static int make_read_only(MountEntry *m, char **blacklist) {          int r = 0;          assert(m); -        if (IN_SET(m->mode, INACCESSIBLE, READONLY)) -                r = bind_remount_recursive(bind_mount_path(m), true, blacklist); +        if (mount_entry_read_only(m)) +                r = bind_remount_recursive(mount_entry_path(m), true, blacklist);          else if (m->mode == PRIVATE_DEV) { /* Can be readonly but the submounts can't*/ -                if (mount(NULL, bind_mount_path(m), NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0) +                if (mount(NULL, mount_entry_path(m), NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0)                          r = -errno;          } else                  return 0; @@ -646,50 +732,9 @@ static int make_read_only(BindMount *m, char **blacklist) {           * already stays this way. This improves compatibility with container managers, where we won't attempt to undo           * read-only mounts already applied. */ -        return r; -} - -/* Chase symlinks and remove failed paths from mounts */ -static int chase_all_symlinks(const char *root_directory, BindMount *m, unsigned *n) { -        BindMount *f, *t; -        int r = 0; - -        assert(m); -        assert(n); - -        /* Since mount() will always follow symlinks and we need to take the different root directory into account we -         * chase the symlinks on our own first. This call wil do so for all entries and remove all entries where we -         * can't resolve the path, and which have been marked for such removal. */ - -        for (f = m, t = m; f < m + *n; f++) { -                _cleanup_free_ char *chased = NULL; -                int k; - -                k = chase_symlinks(bind_mount_path(f), root_directory, 0, &chased); -                if (k < 0) { -                        /* Get only real errors */ -                        if (r >= 0 && (k != -ENOENT || !f->ignore)) -                                r = k; - -                        /* Doesn't exist or failed? Then remove it and continue! */ -                        log_debug_errno(k, "Failed to chase symlinks for %s: %m", bind_mount_path(f)); -                        f->path_malloc = mfree(f->path_malloc); -                        continue; -                } - -                if (!path_equal(bind_mount_path(f), chased)) { -                        log_debug("Chased %s → %s", bind_mount_path(f), chased); - -                        free(f->path_malloc); -                        f->path_malloc = chased; -                        chased = NULL; -                } - -                *t = *f; -                t++; -        } +        if (r == -ENOENT && m->ignore) +                r = 0; -        *n = t - m;          return r;  } @@ -698,6 +743,8 @@ static unsigned namespace_calculate_mounts(                  char** read_write_paths,                  char** read_only_paths,                  char** inaccessible_paths, +                const BindMount *bind_mounts, +                unsigned n_bind_mounts,                  const char* tmp_dir,                  const char* var_tmp_dir,                  ProtectHome protect_home, @@ -722,6 +769,7 @@ static unsigned namespace_calculate_mounts(                  strv_length(read_write_paths) +                  strv_length(read_only_paths) +                  strv_length(inaccessible_paths) + +                n_bind_mounts +                  ns_info->private_dev +                  (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +                  (ns_info->protect_control_groups ? 1 : 0) + @@ -735,13 +783,15 @@ int setup_namespace(                  char** read_write_paths,                  char** read_only_paths,                  char** inaccessible_paths, +                const BindMount *bind_mounts, +                unsigned n_bind_mounts,                  const char* tmp_dir,                  const char* var_tmp_dir,                  ProtectHome protect_home,                  ProtectSystem protect_system,                  unsigned long mount_flags) { -        BindMount *m, *mounts = NULL; +        MountEntry *m, *mounts = NULL;          bool make_slave = false;          unsigned n_mounts;          int r = 0; @@ -749,19 +799,21 @@ int setup_namespace(          if (mount_flags == 0)                  mount_flags = MS_SHARED; -        n_mounts = namespace_calculate_mounts(ns_info, -                                              read_write_paths, -                                              read_only_paths, -                                              inaccessible_paths, -                                              tmp_dir, var_tmp_dir, -                                              protect_home, protect_system); +        n_mounts = namespace_calculate_mounts( +                        ns_info, +                        read_write_paths, +                        read_only_paths, +                        inaccessible_paths, +                        bind_mounts, n_bind_mounts, +                        tmp_dir, var_tmp_dir, +                        protect_home, protect_system);          /* Set mount slave mode */          if (root_directory || n_mounts > 0)                  make_slave = true;          if (n_mounts > 0) { -                m = mounts = (BindMount *) alloca0(n_mounts * sizeof(BindMount)); +                m = mounts = (MountEntry *) alloca0(n_mounts * sizeof(MountEntry));                  r = append_access_mounts(&m, read_write_paths, READWRITE);                  if (r < 0)                          goto finish; @@ -774,22 +826,26 @@ int setup_namespace(                  if (r < 0)                          goto finish; +                r = append_bind_mounts(&m, bind_mounts, n_bind_mounts); +                if (r < 0) +                        goto finish; +                  if (tmp_dir) { -                        *(m++) = (BindMount) { +                        *(m++) = (MountEntry) {                                  .path_const = "/tmp",                                  .mode = PRIVATE_TMP,                          };                  }                  if (var_tmp_dir) { -                        *(m++) = (BindMount) { +                        *(m++) = (MountEntry) {                                  .path_const = "/var/tmp",                                  .mode = PRIVATE_VAR_TMP,                          };                  }                  if (ns_info->private_dev) { -                        *(m++) = (BindMount) { +                        *(m++) = (MountEntry) {                                  .path_const = "/dev",                                  .mode = PRIVATE_DEV,                          }; @@ -808,7 +864,7 @@ int setup_namespace(                  }                  if (ns_info->protect_control_groups) { -                        *(m++) = (BindMount) { +                        *(m++) = (MountEntry) {                                  .path_const = "/sys/fs/cgroup",                                  .mode = READONLY,                          }; @@ -829,14 +885,7 @@ int setup_namespace(                  if (r < 0)                          goto finish; -                /* Resolve symlinks manually first, as mount() will always follow them relative to the host's -                 * root. Moreover we want to suppress duplicates based on the resolved paths. This of course is a bit -                 * racy. */ -                r = chase_all_symlinks(root_directory, mounts, &n_mounts); -                if (r < 0) -                        goto finish; - -                qsort(mounts, n_mounts, sizeof(BindMount), mount_path_compare); +                qsort(mounts, n_mounts, sizeof(MountEntry), mount_path_compare);                  drop_duplicates(mounts, &n_mounts);                  drop_outside_root(root_directory, mounts, &n_mounts); @@ -877,7 +926,7 @@ int setup_namespace(                  /* First round, add in all special mounts we need */                  for (m = mounts; m < mounts + n_mounts; ++m) { -                        r = apply_mount(m, tmp_dir, var_tmp_dir); +                        r = apply_mount(root_directory, m, tmp_dir, var_tmp_dir);                          if (r < 0)                                  goto finish;                  } @@ -885,7 +934,7 @@ int setup_namespace(                  /* Create a blacklist we can pass to bind_mount_recursive() */                  blacklist = newa(char*, n_mounts+1);                  for (j = 0; j < n_mounts; j++) -                        blacklist[j] = (char*) bind_mount_path(mounts+j); +                        blacklist[j] = (char*) mount_entry_path(mounts+j);                  blacklist[j] = NULL;                  /* Second round, flip the ro bits if necessary. */ @@ -920,6 +969,53 @@ finish:          return r;  } +void bind_mount_free_many(BindMount *b, unsigned n) { +        unsigned i; + +        assert(b || n == 0); + +        for (i = 0; i < n; i++) { +                free(b[i].source); +                free(b[i].destination); +        } + +        free(b); +} + +int bind_mount_add(BindMount **b, unsigned *n, const BindMount *item) { +        _cleanup_free_ char *s = NULL, *d = NULL; +        BindMount *c; + +        assert(b); +        assert(n); +        assert(item); + +        s = strdup(item->source); +        if (!s) +                return -ENOMEM; + +        d = strdup(item->destination); +        if (!d) +                return -ENOMEM; + +        c = realloc_multiply(*b, sizeof(BindMount), *n + 1); +        if (!c) +                return -ENOMEM; + +        *b = c; + +        c[(*n) ++] = (BindMount) { +                .source = s, +                .destination = d, +                .read_only = item->read_only, +                .recursive = item->recursive, +                .ignore_enoent = item->ignore_enoent, +        }; + +        s = d = NULL; +        return 0; +} +  static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) {          _cleanup_free_ char *x = NULL;          char bid[SD_ID128_STRING_MAX]; diff --git a/src/core/namespace.h b/src/core/namespace.h index 2c278fd457..de3edc419c 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -21,6 +21,7 @@  ***/  typedef struct NameSpaceInfo NameSpaceInfo; +typedef struct BindMount BindMount;  #include <stdbool.h> @@ -51,20 +52,32 @@ struct NameSpaceInfo {          bool protect_kernel_modules:1;  }; -int setup_namespace(const char *chroot, -                    const NameSpaceInfo *ns_info, -                    char **read_write_paths, -                    char **read_only_paths, -                    char **inaccessible_paths, -                    const char *tmp_dir, -                    const char *var_tmp_dir, -                    ProtectHome protect_home, -                    ProtectSystem protect_system, -                    unsigned long mount_flags); - -int setup_tmp_dirs(const char *id, -                  char **tmp_dir, -                  char **var_tmp_dir); +struct BindMount { +        char *source; +        char *destination; +        bool read_only:1; +        bool recursive:1; +        bool ignore_enoent:1; +}; + +int setup_namespace( +                const char *root_directory, +                const NameSpaceInfo *ns_info, +                char **read_write_paths, +                char **read_only_paths, +                char **inaccessible_paths, +                const BindMount *bind_mounts, +                unsigned n_bind_mounts, +                const char *tmp_dir, +                const char *var_tmp_dir, +                ProtectHome protect_home, +                ProtectSystem protect_system, +                unsigned long mount_flags); + +int setup_tmp_dirs( +                const char *id, +                char **tmp_dir, +                char **var_tmp_dir);  int setup_netns(int netns_storage_socket[2]); @@ -73,3 +86,6 @@ ProtectHome protect_home_from_string(const char *s) _pure_;  const char* protect_system_to_string(ProtectSystem p) _const_;  ProtectSystem protect_system_from_string(const char *s) _pure_; + +void bind_mount_free_many(BindMount *b, unsigned n); +int bind_mount_add(BindMount **b, unsigned *n, const BindMount *item); diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 3114275c85..b030b3b9d1 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -27,6 +27,7 @@  #include "hashmap.h"  #include "list.h"  #include "locale-util.h" +#include "mount-util.h"  #include "nsflags.h"  #include "parse-util.h"  #include "path-util.h" @@ -265,7 +266,7 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen                                "StandardInput", "StandardOutput", "StandardError",                                "Description", "Slice", "Type", "WorkingDirectory",                                "RootDirectory", "SyslogIdentifier", "ProtectSystem", -                              "ProtectHome", "SELinuxContext")) +                              "ProtectHome", "SELinuxContext", "Restart"))                  r = sd_bus_message_append(m, "v", "s", eq);          else if (streq(field, "SyslogLevel")) { @@ -575,7 +576,91 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen                  r = sd_bus_message_append(m, "v", "t", flags);          } else if ((dep = unit_dependency_from_string(field)) >= 0)                  r = sd_bus_message_append(m, "v", "as", 1, eq); -        else { +        else if (streq(field, "MountFlags")) { +                unsigned long f; + +                if (isempty(eq)) +                        f = 0; +                else { +                        f = mount_propagation_flags_from_string(eq); +                        if (f == 0) { +                                log_error("Failed to parse mount propagation type: %s", eq); +                                return -EINVAL; +                        } +                } + +                r = sd_bus_message_append(m, "v", "t", f); +        } else if (STR_IN_SET(field, "BindPaths", "BindReadOnlyPaths")) { +                const char *p = eq; + +                r = sd_bus_message_open_container(m, 'v', "a(ssbt)"); +                if (r < 0) +                        return r; + +                r = sd_bus_message_open_container(m, 'a', "(ssbt)"); +                if (r < 0) +                        return r; + +                for (;;) { +                        _cleanup_free_ char *source = NULL, *destination = NULL; +                        char *s = NULL, *d = NULL; +                        bool ignore_enoent = false; +                        uint64_t flags = MS_REC; + +                        r = extract_first_word(&p, &source, ":" WHITESPACE, EXTRACT_QUOTES|EXTRACT_DONT_COALESCE_SEPARATORS); +                        if (r < 0) +                                return log_error_errno(r, "Failed to parse argument: %m"); +                        if (r == 0) +                                break; + +                        s = source; +                        if (s[0] == '-') { +                                ignore_enoent = true; +                                s++; +                        } + +                        if (p && p[-1] == ':') { +                                r = extract_first_word(&p, &destination, ":" WHITESPACE, EXTRACT_QUOTES|EXTRACT_DONT_COALESCE_SEPARATORS); +                                if (r < 0) +                                        return log_error_errno(r, "Failed to parse argument: %m"); +                                if (r == 0) { +                                        log_error("Missing argument after ':': %s", eq); +                                        return -EINVAL; +                                } + +                                d = destination; + +                                if (p && p[-1] == ':') { +                                        _cleanup_free_ char *options = NULL; + +                                        r = extract_first_word(&p, &options, NULL, EXTRACT_QUOTES); +                                        if (r < 0) +                                                return log_error_errno(r, "Failed to parse argument: %m"); + +                                        if (isempty(options) || streq(options, "rbind")) +                                                flags = MS_REC; +                                        else if (streq(options, "norbind")) +                                                flags = 0; +                                        else { +                                                log_error("Unknown options: %s", eq); +                                                return -EINVAL; +                                        } +                                } +                        } else +                                d = s; + + +                        r = sd_bus_message_append(m, "(ssbt)", s, d, ignore_enoent, flags); +                        if (r < 0) +                                return r; +                } + +                r = sd_bus_message_close_container(m); +                if (r < 0) +                        return r; + +                r = sd_bus_message_close_container(m); +        } else {                  log_error("Unknown assignment %s.", assignment);                  return -EINVAL;          } diff --git a/src/test/test-mount-util.c b/src/test/test-mount-util.c new file mode 100644 index 0000000000..da7f35623b --- /dev/null +++ b/src/test/test-mount-util.c @@ -0,0 +1,45 @@ +/*** +  This file is part of systemd. + +  Copyright 2016 Lennart Poettering + +  systemd is free software; you can redistribute it and/or modify it +  under the terms of the GNU Lesser General Public License as published by +  the Free Software Foundation; either version 2.1 of the License, or +  (at your option) any later version. + +  systemd is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  Lesser General Public License for more details. + +  You should have received a copy of the GNU Lesser General Public License +  along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <sys/mount.h> + +#include "log.h" +#include "mount-util.h" +#include "string-util.h" + +static void test_mount_propagation_flags(const char *name, unsigned long f) { +        assert(mount_propagation_flags_from_string(name) == f); + +        if (f != 0) +                assert_se(streq_ptr(mount_propagation_flags_to_string(f), name)); +} + +int main(int argc, char *argv[]) { + +        log_set_max_level(LOG_DEBUG); + +        test_mount_propagation_flags("shared", MS_SHARED); +        test_mount_propagation_flags("slave", MS_SLAVE); +        test_mount_propagation_flags("private", MS_PRIVATE); +        test_mount_propagation_flags(NULL, 0); +        test_mount_propagation_flags("", 0); +        test_mount_propagation_flags("xxxx", 0); + +        return 0; +} diff --git a/src/test/test-ns.c b/src/test/test-ns.c index da7a8b0565..c99bcb371b 100644 --- a/src/test/test-ns.c +++ b/src/test/test-ns.c @@ -81,6 +81,7 @@ int main(int argc, char *argv[]) {                              (char **) writable,                              (char **) readonly,                              (char **) inaccessible, +                            &(BindMount) { .source = (char*) "/usr/bin", .destination = (char*) "/etc/systemd", .read_only = true }, 1,                              tmp_dir,                              var_tmp_dir,                              PROTECT_HOME_NO, | 
