diff options
-rw-r--r-- | NEWS | 2 | ||||
-rw-r--r-- | hwdb/70-pointingstick.hwdb | 2 | ||||
-rw-r--r-- | man/systemd.exec.xml | 73 | ||||
-rw-r--r-- | man/systemd.resource-control.xml | 4 | ||||
-rw-r--r-- | src/basic/siphash24.c | 2 | ||||
-rw-r--r-- | src/core/cgroup.c | 185 | ||||
-rw-r--r-- | src/core/cgroup.h | 1 | ||||
-rw-r--r-- | src/core/load-fragment.c | 99 | ||||
-rw-r--r-- | src/resolve/resolved-resolv-conf.c | 49 | ||||
-rw-r--r-- | src/shared/bus-unit-util.c | 4 | ||||
-rw-r--r-- | src/shared/seccomp-util.c | 216 | ||||
-rw-r--r-- | src/shared/seccomp-util.h | 7 | ||||
-rwxr-xr-x | test/networkd-test.py | 71 | ||||
-rw-r--r-- | test/test-functions | 5 | ||||
-rw-r--r-- | units/rescue.service.in | 2 |
15 files changed, 582 insertions, 140 deletions
@@ -916,7 +916,7 @@ CHANGES WITH 226: available, systemd will fall back to the legacy cgroup hierarchy setup, as before. Host system and containers can mix and match legacy and unified hierarchies as they - wish. nspawn understands the $UNIFIED_CROUP_HIERARCHY + wish. nspawn understands the $UNIFIED_CGROUP_HIERARCHY environment variable to individually select the hierarchy to use for executed containers. By default, nspawn will use the unified hierarchy for the containers if the host uses the diff --git a/hwdb/70-pointingstick.hwdb b/hwdb/70-pointingstick.hwdb index b2af467d5f..9adcf6d804 100644 --- a/hwdb/70-pointingstick.hwdb +++ b/hwdb/70-pointingstick.hwdb @@ -105,6 +105,8 @@ evdev:name:TPPS/2 IBM TrackPoint:dmi:bvn*:bvr*:bd*:svnLENOVO:pn*:pvrThinkPadT540 evdev:name:TPPS/2 IBM TrackPoint:dmi:bvn*:bvr*:bd*:svnLENOVO:pn*:pvrThinkPadT550:* # Lenovo Thinkpad X1 Carbon 3rd gen evdev:name:TPPS/2 IBM TrackPoint:dmi:bvn*:bvr*:bd*:svnLENOVO:pn*:pvrThinkPadX1Carbon3rd:* +# Lenovo Thinkpad X1 Carbon 4th gen +evdev:name:TPPS/2 IBM TrackPoint:dmi:bvn*:bvr*:bd*:svnLENOVO:pn*:pvrThinkPadX1Carbon4th:* POINTINGSTICK_SENSITIVITY=200 POINTINGSTICK_CONST_ACCEL=1.0 diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 4d52982b64..58f18f3a9e 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -1193,7 +1193,78 @@ <function>read</function> and <function>write</function>, and right after it add a blacklisting of <function>write</function>, then <function>write</function> - will be removed from the set.) </para></listitem> + will be removed from the set.)</para> + + <para>As the number of possible system + calls is large, predefined sets of system calls are provided. + A set starts with <literal>@</literal> character, followed by + name of the set. + + <table> + <title>Currently predefined system call sets</title> + + <tgroup cols='2'> + <colspec colname='set' /> + <colspec colname='description' /> + <thead> + <row> + <entry>Set</entry> + <entry>Description</entry> + </row> + </thead> + <tbody> + <row> + <entry>@clock</entry> + <entry>System calls for changing the system clock (<function>adjtimex()</function>, + <function>settimeofday()</function>)</entry> + </row> + <row> + <entry>@io-event</entry> + <entry>Event loop use (<function>poll()</function>, <function>select()</function>, + <citerefentry project='man-pages'><refentrytitle>epoll</refentrytitle><manvolnum>7</manvolnum></citerefentry>, + <function>eventfd()</function>...)</entry> + </row> + <row> + <entry>@ipc</entry> + <entry>SysV IPC, POSIX Message Queues or other IPC (<citerefentry project='man-pages'><refentrytitle>mq_overview</refentrytitle><manvolnum>7</manvolnum></citerefentry>, + <citerefentry project='man-pages'><refentrytitle>svipc</refentrytitle><manvolnum>7</manvolnum></citerefentry>)</entry> + </row> + <row> + <entry>@module</entry> + <entry>Kernel module control (<function>create_module()</function>, <function>init_module()</function>...)</entry> + </row> + <row> + <entry>@mount</entry> + <entry>File system mounting and unmounting (<function>chroot()</function>, <function>mount()</function>...)</entry> + </row> + <row> + <entry>@network-io</entry> + <entry>Socket I/O (including local AF_UNIX): + <citerefentry project='man-pages'><refentrytitle>socket</refentrytitle><manvolnum>7</manvolnum></citerefentry>, + <citerefentry project='man-pages'><refentrytitle>unix</refentrytitle><manvolnum>7</manvolnum></citerefentry></entry> + </row> + <row> + <entry>@obsolete</entry> + <entry>Unusual, obsolete or unimplemented (<function>fattach()</function>, <function>gtty()</function>, <function>vm86()</function>...)</entry> + </row> + <row> + <entry>@privileged</entry> + <entry>All system calls which need superuser capabilities (<citerefentry project='man-pages'><refentrytitle>capabilities</refentrytitle><manvolnum>7</manvolnum></citerefentry>)</entry> + </row> + <row> + <entry>@process</entry> + <entry>Process control, execution, namespaces (<function>execve()</function>, <function>kill()</function>, <citerefentry project='man-pages'><refentrytitle>namespaces</refentrytitle><manvolnum>7</manvolnum></citerefentry>...)</entry> + </row> + <row> + <entry>@raw-io</entry> + <entry>Raw I/O ports (<function>ioperm()</function>, <function>iopl()</function>, <function>pciconfig_read()</function>...)</entry> + </row> + </tbody> + </tgroup> + </table> + + Note, that as new system calls are added to the kernel, additional system calls might be added to the groups + above, so the contents of the sets may change between systemd versions.</para></listitem> </varlistentry> <varlistentry> diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index 570619a743..d4c8fa7091 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -248,7 +248,7 @@ <para>Takes a memory size in bytes. If the value is suffixed with K, M, G or T, the specified memory size is parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes (with the base 1024), respectively. If assigned the - special value <literal>max</literal>, no memory limit is applied. This controls the + special value <literal>infinity</literal>, no memory limit is applied. This controls the <literal>memory.high</literal> control group attribute. For details about this control group attribute, see <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>.</para> @@ -269,7 +269,7 @@ <para>Takes a memory size in bytes. If the value is suffixed with K, M, G or T, the specified memory size is parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes (with the base 1024), respectively. If assigned the - special value <literal>max</literal>, no memory limit is applied. This controls the + special value <literal>infinity</literal>, no memory limit is applied. This controls the <literal>memory.max</literal> control group attribute. For details about this control group attribute, see <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>.</para> diff --git a/src/basic/siphash24.c b/src/basic/siphash24.c index 060e8ba387..8c1cdc3db6 100644 --- a/src/basic/siphash24.c +++ b/src/basic/siphash24.c @@ -17,6 +17,8 @@ coding style) */ +#include <stdio.h> + #include "macro.h" #include "siphash24.h" #include "unaligned.h" diff --git a/src/core/cgroup.c b/src/core/cgroup.c index fbe69df4e9..f3e0c54b76 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -36,6 +36,22 @@ #define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC) +static void cgroup_compat_warn(void) +{ + static bool cgroup_compat_warned = false; + + if (cgroup_compat_warned) + return; + + log_warning("cgroup compatibility translation between legacy and unified hierarchy settings activated. See cgroup-compat debug messages for details."); + cgroup_compat_warned = true; +} + +#define log_cgroup_compat(unit, fmt, ...) do { \ + cgroup_compat_warn(); \ + log_unit_debug(unit, "cgroup-compat: " fmt, ##__VA_ARGS__); \ + } while (0) + void cgroup_context_init(CGroupContext *c) { assert(c); @@ -413,7 +429,7 @@ static uint64_t cgroup_weight_io_to_blkio(uint64_t io_weight) { CGROUP_BLKIO_WEIGHT_MIN, CGROUP_BLKIO_WEIGHT_MAX); } -static void cgroup_apply_io_device_weight(const char *path, const char *dev_path, uint64_t io_weight) { +static void cgroup_apply_io_device_weight(Unit *u, const char *dev_path, uint64_t io_weight) { char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1]; dev_t dev; int r; @@ -423,13 +439,13 @@ static void cgroup_apply_io_device_weight(const char *path, const char *dev_path return; xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), io_weight); - r = cg_set_attribute("io", path, "io.weight", buf); + r = cg_set_attribute("io", u->cgroup_path, "io.weight", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set io.weight on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set io.weight: %m"); } -static void cgroup_apply_blkio_device_weight(const char *path, const char *dev_path, uint64_t blkio_weight) { +static void cgroup_apply_blkio_device_weight(Unit *u, const char *dev_path, uint64_t blkio_weight) { char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1]; dev_t dev; int r; @@ -439,13 +455,13 @@ static void cgroup_apply_blkio_device_weight(const char *path, const char *dev_p return; xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), blkio_weight); - r = cg_set_attribute("blkio", path, "blkio.weight_device", buf); + r = cg_set_attribute("blkio", u->cgroup_path, "blkio.weight_device", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set blkio.weight_device on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set blkio.weight_device: %m"); } -static unsigned cgroup_apply_io_device_limit(const char *path, const char *dev_path, uint64_t *limits) { +static unsigned cgroup_apply_io_device_limit(Unit *u, const char *dev_path, uint64_t *limits) { char limit_bufs[_CGROUP_IO_LIMIT_TYPE_MAX][DECIMAL_STR_MAX(uint64_t)]; char buf[DECIMAL_STR_MAX(dev_t)*2+2+(6+DECIMAL_STR_MAX(uint64_t)+1)*4]; CGroupIOLimitType type; @@ -469,14 +485,14 @@ static unsigned cgroup_apply_io_device_limit(const char *path, const char *dev_p xsprintf(buf, "%u:%u rbps=%s wbps=%s riops=%s wiops=%s\n", major(dev), minor(dev), limit_bufs[CGROUP_IO_RBPS_MAX], limit_bufs[CGROUP_IO_WBPS_MAX], limit_bufs[CGROUP_IO_RIOPS_MAX], limit_bufs[CGROUP_IO_WIOPS_MAX]); - r = cg_set_attribute("io", path, "io.max", buf); + r = cg_set_attribute("io", u->cgroup_path, "io.max", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set io.max on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set io.max: %m"); return n; } -static unsigned cgroup_apply_blkio_device_limit(const char *path, const char *dev_path, uint64_t rbps, uint64_t wbps) { +static unsigned cgroup_apply_blkio_device_limit(Unit *u, const char *dev_path, uint64_t rbps, uint64_t wbps) { char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1]; dev_t dev; unsigned n = 0; @@ -489,18 +505,18 @@ static unsigned cgroup_apply_blkio_device_limit(const char *path, const char *de if (rbps != CGROUP_LIMIT_MAX) n++; sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), rbps); - r = cg_set_attribute("blkio", path, "blkio.throttle.read_bps_device", buf); + r = cg_set_attribute("blkio", u->cgroup_path, "blkio.throttle.read_bps_device", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set blkio.throttle.read_bps_device on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set blkio.throttle.read_bps_device: %m"); if (wbps != CGROUP_LIMIT_MAX) n++; sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), wbps); - r = cg_set_attribute("blkio", path, "blkio.throttle.write_bps_device", buf); + r = cg_set_attribute("blkio", u->cgroup_path, "blkio.throttle.write_bps_device", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set blkio.throttle.write_bps_device on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set blkio.throttle.write_bps_device: %m"); return n; } @@ -509,23 +525,30 @@ static bool cgroup_context_has_unified_memory_config(CGroupContext *c) { return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX; } -static void cgroup_apply_unified_memory_limit(const char *path, const char *file, uint64_t v) { +static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_t v) { char buf[DECIMAL_STR_MAX(uint64_t) + 1] = "max"; int r; if (v != CGROUP_LIMIT_MAX) xsprintf(buf, "%" PRIu64 "\n", v); - r = cg_set_attribute("memory", path, file, buf); + r = cg_set_attribute("memory", u->cgroup_path, file, buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set %s on %s: %m", file, path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set %s: %m", file); } -void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state) { +static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { + const char *path; + CGroupContext *c; bool is_root; int r; + assert(u); + + c = unit_get_cgroup_context(u); + path = u->cgroup_path; + assert(c); assert(path); @@ -551,14 +574,14 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M c->cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->cpu_shares : CGROUP_CPU_SHARES_DEFAULT); r = cg_set_attribute("cpu", path, "cpu.shares", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.shares on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.shares: %m"); sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC); r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.cfs_period_us on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.cfs_period_us: %m"); if (c->cpu_quota_per_sec_usec != USEC_INFINITY) { sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC); @@ -566,8 +589,8 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M } else r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1"); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set cpu.cfs_quota_us on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set cpu.cfs_quota_us: %m"); } if (mask & CGROUP_MASK_IO) { @@ -580,29 +603,40 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M if (has_io) weight = cgroup_context_io_weight(c, state); - else if (has_blockio) - weight = cgroup_weight_blkio_to_io(cgroup_context_blkio_weight(c, state)); - else + else if (has_blockio) { + uint64_t blkio_weight = cgroup_context_blkio_weight(c, state); + + weight = cgroup_weight_blkio_to_io(blkio_weight); + + log_cgroup_compat(u, "Applying [Startup]BlockIOWeight %" PRIu64 " as [Startup]IOWeight %" PRIu64, + blkio_weight, weight); + } else weight = CGROUP_WEIGHT_DEFAULT; xsprintf(buf, "default %" PRIu64 "\n", weight); r = cg_set_attribute("io", path, "io.weight", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set io.weight on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set io.weight: %m"); if (has_io) { CGroupIODeviceWeight *w; /* FIXME: no way to reset this list */ LIST_FOREACH(device_weights, w, c->io_device_weights) - cgroup_apply_io_device_weight(path, w->path, w->weight); + cgroup_apply_io_device_weight(u, w->path, w->weight); } else if (has_blockio) { CGroupBlockIODeviceWeight *w; /* FIXME: no way to reset this list */ - LIST_FOREACH(device_weights, w, c->blockio_device_weights) - cgroup_apply_io_device_weight(path, w->path, cgroup_weight_blkio_to_io(w->weight)); + LIST_FOREACH(device_weights, w, c->blockio_device_weights) { + weight = cgroup_weight_blkio_to_io(w->weight); + + log_cgroup_compat(u, "Applying BlockIODeviceWeight %" PRIu64 " as IODeviceWeight %" PRIu64 " for %s", + w->weight, weight, w->path); + + cgroup_apply_io_device_weight(u, w->path, weight); + } } } @@ -611,7 +645,7 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M CGroupIODeviceLimit *l, *next; LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { - if (!cgroup_apply_io_device_limit(path, l->path, l->limits)) + if (!cgroup_apply_io_device_limit(u, l->path, l->limits)) cgroup_context_free_io_device_limit(c, l); } } else if (has_blockio) { @@ -627,7 +661,10 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M limits[CGROUP_IO_RBPS_MAX] = b->rbps; limits[CGROUP_IO_WBPS_MAX] = b->wbps; - if (!cgroup_apply_io_device_limit(path, b->path, limits)) + log_cgroup_compat(u, "Applying BlockIO{Read|Write}Bandwidth %" PRIu64 " %" PRIu64 " as IO{Read|Write}BandwidthMax for %s", + b->rbps, b->wbps, b->path); + + if (!cgroup_apply_io_device_limit(u, b->path, limits)) cgroup_context_free_blockio_device_bandwidth(c, b); } } @@ -643,29 +680,40 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M if (has_blockio) weight = cgroup_context_blkio_weight(c, state); - else if (has_io) + else if (has_io) { + uint64_t io_weight = cgroup_context_io_weight(c, state); + weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state)); - else + + log_cgroup_compat(u, "Applying [Startup]IOWeight %" PRIu64 " as [Startup]BlockIOWeight %" PRIu64, + io_weight, weight); + } else weight = CGROUP_BLKIO_WEIGHT_DEFAULT; xsprintf(buf, "%" PRIu64 "\n", weight); r = cg_set_attribute("blkio", path, "blkio.weight", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set blkio.weight on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set blkio.weight: %m"); if (has_blockio) { CGroupBlockIODeviceWeight *w; /* FIXME: no way to reset this list */ LIST_FOREACH(device_weights, w, c->blockio_device_weights) - cgroup_apply_blkio_device_weight(path, w->path, w->weight); + cgroup_apply_blkio_device_weight(u, w->path, w->weight); } else if (has_io) { CGroupIODeviceWeight *w; /* FIXME: no way to reset this list */ - LIST_FOREACH(device_weights, w, c->io_device_weights) - cgroup_apply_blkio_device_weight(path, w->path, cgroup_weight_io_to_blkio(w->weight)); + LIST_FOREACH(device_weights, w, c->io_device_weights) { + weight = cgroup_weight_io_to_blkio(w->weight); + + log_cgroup_compat(u, "Applying IODeviceWeight %" PRIu64 " as BlockIODeviceWeight %" PRIu64 " for %s", + w->weight, weight, w->path); + + cgroup_apply_blkio_device_weight(u, w->path, weight); + } } } @@ -674,14 +722,17 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M CGroupBlockIODeviceBandwidth *b, *next; LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) { - if (!cgroup_apply_blkio_device_limit(path, b->path, b->rbps, b->wbps)) + if (!cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps)) cgroup_context_free_blockio_device_bandwidth(c, b); } } else if (has_io) { CGroupIODeviceLimit *l, *next; LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { - if (!cgroup_apply_blkio_device_limit(path, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX])) + log_cgroup_compat(u, "Applying IO{Read|Write}Bandwidth %" PRIu64 " %" PRIu64 " as BlockIO{Read|Write}BandwidthMax for %s", + l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX], l->path); + + if (!cgroup_apply_blkio_device_limit(u, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX])) cgroup_context_free_io_device_limit(c, l); } } @@ -693,24 +744,32 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M if (cgroup_context_has_unified_memory_config(c)) max = c->memory_max; - else + else { max = c->memory_limit; - cgroup_apply_unified_memory_limit(path, "memory.low", c->memory_low); - cgroup_apply_unified_memory_limit(path, "memory.high", c->memory_high); - cgroup_apply_unified_memory_limit(path, "memory.max", max); + if (max != CGROUP_LIMIT_MAX) + log_cgroup_compat(u, "Applying MemoryLimit %" PRIu64 " as MemoryMax", max); + } + + cgroup_apply_unified_memory_limit(u, "memory.low", c->memory_low); + cgroup_apply_unified_memory_limit(u, "memory.high", c->memory_high); + cgroup_apply_unified_memory_limit(u, "memory.max", max); } else { char buf[DECIMAL_STR_MAX(uint64_t) + 1]; if (c->memory_limit != CGROUP_LIMIT_MAX) xsprintf(buf, "%" PRIu64 "\n", c->memory_limit); - else + else { xsprintf(buf, "%" PRIu64 "\n", c->memory_max); + if (c->memory_max != CGROUP_LIMIT_MAX) + log_cgroup_compat(u, "Applying MemoryMax %" PRIu64 " as MemoryLimit", c->memory_max); + } + r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set memory.limit_in_bytes on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set memory.limit_in_bytes: %m"); } } @@ -726,8 +785,8 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M else r = cg_set_attribute("devices", path, "devices.allow", "a"); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to reset devices.list on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to reset devices.list: %m"); if (c->device_policy == CGROUP_CLOSED || (c->device_policy == CGROUP_AUTO && c->device_allow)) { @@ -773,7 +832,7 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M else if (startswith(a->path, "char-")) whitelist_major(path, a->path + 5, 'c', acc); else - log_debug("Ignoring device %s while writing cgroup attribute.", a->path); + log_unit_debug(u, "Ignoring device %s while writing cgroup attribute.", a->path); } } @@ -788,8 +847,8 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M r = cg_set_attribute("pids", path, "pids.max", "max"); if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set pids.max on %s: %m", path); + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set pids.max: %m"); } } @@ -1224,7 +1283,7 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) { return r; /* Finally, apply the necessary attributes. */ - cgroup_context_apply(unit_get_cgroup_context(u), target_mask, u->cgroup_path, state); + cgroup_context_apply(u, target_mask, state); return 0; } @@ -1355,7 +1414,7 @@ void unit_prune_cgroup(Unit *u) { r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice); if (r < 0) { - log_debug_errno(r, "Failed to destroy cgroup %s, ignoring: %m", u->cgroup_path); + log_unit_debug_errno(u, r, "Failed to destroy cgroup %s, ignoring: %m", u->cgroup_path); return; } diff --git a/src/core/cgroup.h b/src/core/cgroup.h index ff87adfba1..f21409bd5d 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -125,7 +125,6 @@ struct CGroupContext { void cgroup_context_init(CGroupContext *c); void cgroup_context_done(CGroupContext *c); void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix); -void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state); CGroupMask cgroup_context_get_mask(CGroupContext *c); diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 09d3f65c77..b53301a147 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -2396,6 +2396,55 @@ int config_parse_documentation(const char *unit, } #ifdef HAVE_SECCOMP +static int syscall_filter_parse_one( + const char *unit, + const char *filename, + unsigned line, + ExecContext *c, + bool invert, + const char *t, + bool warn) { + int r; + + if (*t == '@') { + const SystemCallFilterSet *set; + + for (set = syscall_filter_sets; set->set_name; set++) + if (streq(set->set_name, t)) { + const char *sys; + + NULSTR_FOREACH(sys, set->value) { + r = syscall_filter_parse_one(unit, filename, line, c, invert, sys, false); + if (r < 0) + return r; + } + break; + } + } else { + int id; + + id = seccomp_syscall_resolve_name(t); + if (id < 0) { + if (warn) + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t); + return 0; + } + + /* If we previously wanted to forbid a syscall and now + * we want to allow it, then remove it from the list + */ + if (!invert == c->syscall_whitelist) { + r = set_put(c->syscall_filter, INT_TO_PTR(id + 1)); + if (r == 0) + return 0; + if (r < 0) + return log_oom(); + } else + set_remove(c->syscall_filter, INT_TO_PTR(id + 1)); + } + return 0; +} + int config_parse_syscall_filter( const char *unit, const char *filename, @@ -2408,13 +2457,6 @@ int config_parse_syscall_filter( void *data, void *userdata) { - static const char default_syscalls[] = - "execve\0" - "exit\0" - "exit_group\0" - "rt_sigreturn\0" - "sigreturn\0"; - ExecContext *c = data; Unit *u = userdata; bool invert = false; @@ -2448,53 +2490,26 @@ int config_parse_syscall_filter( /* Allow everything but the ones listed */ c->syscall_whitelist = false; else { - const char *i; - /* Allow nothing but the ones listed */ c->syscall_whitelist = true; /* Accept default syscalls if we are on a whitelist */ - NULSTR_FOREACH(i, default_syscalls) { - int id; - - id = seccomp_syscall_resolve_name(i); - if (id < 0) - continue; - - r = set_put(c->syscall_filter, INT_TO_PTR(id + 1)); - if (r == 0) - continue; - if (r < 0) - return log_oom(); - } + r = syscall_filter_parse_one(unit, filename, line, c, false, "@default", false); + if (r < 0) + return r; } } FOREACH_WORD_QUOTED(word, l, rvalue, state) { _cleanup_free_ char *t = NULL; - int id; t = strndup(word, l); if (!t) return log_oom(); - id = seccomp_syscall_resolve_name(t); - if (id < 0) { - log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t); - continue; - } - - /* If we previously wanted to forbid a syscall and now - * we want to allow it, then remove it from the list - */ - if (!invert == c->syscall_whitelist) { - r = set_put(c->syscall_filter, INT_TO_PTR(id + 1)); - if (r == 0) - continue; - if (r < 0) - return log_oom(); - } else - set_remove(c->syscall_filter, INT_TO_PTR(id + 1)); + r = syscall_filter_parse_one(unit, filename, line, c, invert, t, true); + if (r < 0) + return r; } if (!isempty(state)) log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring."); @@ -2796,7 +2811,7 @@ int config_parse_memory_limit( uint64_t bytes = CGROUP_LIMIT_MAX; int r; - if (!isempty(rvalue) && !streq(rvalue, "infinity") && !streq(rvalue, "max")) { + if (!isempty(rvalue) && !streq(rvalue, "infinity")) { r = parse_size(rvalue, 1024, &bytes); if (r < 0 || bytes < 1) { log_syntax(unit, LOG_ERR, filename, line, r, "Memory limit '%s' invalid. Ignoring.", rvalue); @@ -3065,7 +3080,7 @@ int config_parse_io_limit( return 0; } - if (streq("max", limit)) { + if (streq("infinity", limit)) { num = CGROUP_LIMIT_MAX; } else { r = parse_size(limit, 1000, &num); diff --git a/src/resolve/resolved-resolv-conf.c b/src/resolve/resolved-resolv-conf.c index ff03acc772..fa89de4c21 100644 --- a/src/resolve/resolved-resolv-conf.c +++ b/src/resolve/resolved-resolv-conf.c @@ -164,30 +164,32 @@ static void write_resolv_conf_server(DnsServer *s, FILE *f, unsigned *count) { } static void write_resolv_conf_search( - const char *domain, - FILE *f, - unsigned *count, - unsigned *length) { + OrderedSet *domains, + FILE *f) { + unsigned length = 0, count = 0; + Iterator i; + char *domain; - assert(domain); + assert(domains); assert(f); - assert(length); - if (*count >= MAXDNSRCH || - *length + strlen(domain) > 256) { - if (*count == MAXDNSRCH) - fputs(" # Too many search domains configured, remaining ones ignored.", f); - if (*length <= 256) - fputs(" # Total length of all search domains is too long, remaining ones ignored.", f); + fputs("search", f); - return; + ORDERED_SET_FOREACH(domain, domains, i) { + if (++count > MAXDNSRCH) { + fputs("\n# Too many search domains configured, remaining ones ignored.", f); + break; + } + length += strlen(domain) + 1; + if (length > 256) { + fputs("\n# Total length of all search domains is too long, remaining ones ignored.", f); + break; + } + fputc(' ', f); + fputs(domain, f); } - (*length) += strlen(domain); - (*count)++; - - fputc(' ', f); - fputs(domain, f); + fputs("\n", f); } static int write_resolv_conf_contents(FILE *f, OrderedSet *dns, OrderedSet *domains) { @@ -209,15 +211,8 @@ static int write_resolv_conf_contents(FILE *f, OrderedSet *dns, OrderedSet *doma write_resolv_conf_server(s, f, &count); } - if (!ordered_set_isempty(domains)) { - unsigned length = 0, count = 0; - char *domain; - - fputs("search", f); - ORDERED_SET_FOREACH(domain, domains, i) - write_resolv_conf_search(domain, f, &count, &length); - fputs("\n", f); - } + if (!ordered_set_isempty(domains)) + write_resolv_conf_search(domains, f); return fflush_and_check(f); } diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 502e98d9dc..bf0b2e89e3 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -169,7 +169,7 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen } else if (STR_IN_SET(field, "MemoryLow", "MemoryHigh", "MemoryMax", "MemoryLimit")) { uint64_t bytes; - if (isempty(eq) || streq(eq, "max") || streq(eq, "infinity")) + if (isempty(eq) || streq(eq, "infinity")) bytes = CGROUP_LIMIT_MAX; else { r = parse_size(eq, 1024, &bytes); @@ -306,7 +306,7 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen return -EINVAL; } - if (streq(bandwidth, "max")) { + if (streq(bandwidth, "infinity")) { bytes = CGROUP_LIMIT_MAX; } else { r = parse_size(bandwidth, 1000, &bytes); diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index cebe0fce2a..30d22d2242 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -88,3 +88,219 @@ int seccomp_add_secondary_archs(scmp_filter_ctx *c) { return 0; } + +const SystemCallFilterSet syscall_filter_sets[] = { + { + /* Clock */ + .set_name = "@clock", + .value = + "adjtimex\0" + "settimeofday\0" + }, { + /* Default list */ + .set_name = "@default", + .value = + "execve\0" + "exit\0" + "exit_group\0" + "rt_sigreturn\0" + "sigreturn\0" + }, { + /* Event loop use */ + .set_name = "@io-event", + .value = + "_newselect\0" + "epoll_create1\0" + "epoll_create\0" + "epoll_ctl\0" + "epoll_ctl_old\0" + "epoll_pwait\0" + "epoll_wait\0" + "epoll_wait_old\0" + "eventfd2\0" + "eventfd\0" + "poll\0" + "ppoll\0" + "pselect6\0" + "select\0" + }, { + /* Message queues, SYSV IPC or other IPC: unusual */ + .set_name = "@ipc", + .value = "ipc\0" + "mq_getsetattr\0" + "mq_notify\0" + "mq_open\0" + "mq_timedreceive\0" + "mq_timedsend\0" + "mq_unlink\0" + "msgctl\0" + "msgget\0" + "msgrcv\0" + "msgsnd\0" + "process_vm_readv\0" + "process_vm_writev\0" + "semctl\0" + "semget\0" + "semop\0" + "semtimedop\0" + "shmat\0" + "shmctl\0" + "shmdt\0" + "shmget\0" + }, { + /* Kernel module control */ + .set_name = "@module", + .value = + "create_module\0" + "delete_module\0" + "finit_module\0" + "init_module\0" + }, { + /* Mounting */ + .set_name = "@mount", + .value = + "chroot\0" + "mount\0" + "oldumount\0" + "pivot_root\0" + "umount2\0" + "umount\0" + }, { + /* Network or Unix socket IO, should not be needed if not network facing */ + .set_name = "@network-io", + .value = + "accept4\0" + "accept\0" + "bind\0" + "connect\0" + "getpeername\0" + "getsockname\0" + "getsockopt\0" + "listen\0" + "recv\0" + "recvfrom\0" + "recvmmsg\0" + "recvmsg\0" + "send\0" + "sendmmsg\0" + "sendmsg\0" + "sendto\0" + "setsockopt\0" + "shutdown\0" + "socket\0" + "socketcall\0" + "socketpair\0" + }, { + /* Unusual, obsolete or unimplemented, some unknown even to libseccomp */ + .set_name = "@obsolete", + .value = + "_sysctl\0" + "afs_syscall\0" + "break\0" + "fattach\0" + "fdetach\0" + "ftime\0" + "get_kernel_syms\0" + "get_mempolicy\0" + "getmsg\0" + "getpmsg\0" + "gtty\0" + "isastream\0" + "lock\0" + "madvise1\0" + "modify_ldt\0" + "mpx\0" + "pciconfig_iobase\0" + "perf_event_open\0" + "prof\0" + "profil\0" + "putmsg\0" + "putpmsg\0" + "query_module\0" + "rtas\0" + "s390_runtime_instr\0" + "security\0" + "sgetmask\0" + "ssetmask\0" + "stty\0" + "subpage_prot\0" + "switch_endian\0" + "sys_debug_setcontext\0" + "tuxcall\0" + "ulimit\0" + "uselib\0" + "vm86\0" + "vm86old\0" + "vserver\0" + }, { + /* Nice grab-bag of all system calls which need superuser capabilities */ + .set_name = "@privileged", + .value = + "@clock\0" + "@module\0" + "@raw-io\0" + "acct\0" + "bdflush\0" + "bpf\0" + "chown32\0" + "chown\0" + "chroot\0" + "fchown32\0" + "fchown\0" + "fchownat\0" + "kexec_file_load\0" + "kexec_load\0" + "lchown32\0" + "lchown\0" + "nfsservctl\0" + "pivot_root\0" + "quotactl\0" + "reboot\0" + "setdomainname\0" + "setfsuid32\0" + "setfsuid\0" + "setgroups32\0" + "setgroups\0" + "sethostname\0" + "setresuid32\0" + "setresuid\0" + "setreuid32\0" + "setreuid\0" + "setuid32\0" + "setuid\0" + "stime\0" + "swapoff\0" + "swapon\0" + "sysctl\0" + "vhangup\0" + }, { + /* Process control, execution, namespaces */ + .set_name = "@process", + .value = + "arch_prctl\0" + "clone\0" + "execve\0" + "execveat\0" + "fork\0" + "kill\0" + "prctl\0" + "setns\0" + "tgkill\0" + "tkill\0" + "unshare\0" + "vfork\0" + }, { + /* Raw I/O ports */ + .set_name = "@raw-io", + .value = + "ioperm\0" + "iopl\0" + "pciconfig_read\0" + "pciconfig_write\0" + "s390_pci_mmio_read\0" + "s390_pci_mmio_write\0" + }, { + .set_name = NULL, + .value = NULL + } +}; diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h index 4ed2afc1b2..be33eecb85 100644 --- a/src/shared/seccomp-util.h +++ b/src/shared/seccomp-util.h @@ -26,3 +26,10 @@ const char* seccomp_arch_to_string(uint32_t c); int seccomp_arch_from_string(const char *n, uint32_t *ret); int seccomp_add_secondary_archs(scmp_filter_ctx *c); + +typedef struct SystemCallFilterSet { + const char *set_name; + const char *value; +} SystemCallFilterSet; + +extern const SystemCallFilterSet syscall_filter_sets[]; diff --git a/test/networkd-test.py b/test/networkd-test.py index d4de5adf1a..f94224cce2 100755 --- a/test/networkd-test.py +++ b/test/networkd-test.py @@ -370,6 +370,77 @@ exec $(systemctl cat systemd-networkd.service | sed -n '/^ExecStart=/ { s/^.*=// def test_coldplug_dhcp_ip6(self): pass + def test_search_domains(self): + + # we don't use this interface for this test + self.if_router = None + + with open('/run/systemd/network/test.netdev', 'w') as f: + f.write('''[NetDev] +Name=dummy0 +Kind=dummy +MACAddress=12:34:56:78:9a:bc''') + with open('/run/systemd/network/test.network', 'w') as f: + f.write('''[Match] +Name=dummy0 +[Network] +Address=192.168.42.100 +DNS=192.168.42.1 +Domains= one two three four five six seven eight nine ten''') + self.addCleanup(os.remove, '/run/systemd/network/test.netdev') + self.addCleanup(os.remove, '/run/systemd/network/test.network') + + subprocess.check_call(['systemctl', 'start', 'systemd-networkd']) + + if os.path.islink('/etc/resolv.conf'): + for timeout in range(50): + with open('/etc/resolv.conf') as f: + contents = f.read() + if 'search one\n' in contents: + break + time.sleep(0.1) + self.assertIn('search one two three four five six\n' + '# Too many search domains configured, remaining ones ignored.\n', + contents) + + def test_search_domains_too_long(self): + + # we don't use this interface for this test + self.if_router = None + + name_prefix = 'a' * 60 + + with open('/run/systemd/network/test.netdev', 'w') as f: + f.write('''[NetDev] +Name=dummy0 +Kind=dummy +MACAddress=12:34:56:78:9a:bc''') + with open('/run/systemd/network/test.network', 'w') as f: + f.write('''[Match] +Name=dummy0 +[Network] +Address=192.168.42.100 +DNS=192.168.42.1 +Domains=''') + for i in range(5): + f.write('%s%i ' % (name_prefix, i)) + + self.addCleanup(os.remove, '/run/systemd/network/test.netdev') + self.addCleanup(os.remove, '/run/systemd/network/test.network') + + subprocess.check_call(['systemctl', 'start', 'systemd-networkd']) + + if os.path.islink('/etc/resolv.conf'): + for timeout in range(50): + with open('/etc/resolv.conf') as f: + contents = f.read() + if 'search one\n' in contents: + break + time.sleep(0.1) + self.assertIn('search %(p)s0 %(p)s1 %(p)s2 %(p)s3\n' + '# Total length of all search domains is too long, remaining ones ignored.' % {'p': name_prefix}, + contents) + if __name__ == '__main__': unittest.main(testRunner=unittest.TextTestRunner(stream=sys.stdout, diff --git a/test/test-functions b/test/test-functions index e2e07a833c..5f95a8129e 100644 --- a/test/test-functions +++ b/test/test-functions @@ -10,6 +10,7 @@ KERNEL_MODS="/lib/modules/$KERNEL_VER/" QEMU_TIMEOUT="${QEMU_TIMEOUT:-infinity}" NSPAWN_TIMEOUT="${NSPAWN_TIMEOUT:-infinity}" FSTYPE="${FSTYPE:-ext3}" +UNIFIED_CGROUP_HIERARCHY="${UNIFIED_CGROUP_HIERARCHY:-no}" if ! ROOTLIBDIR=$(pkg-config --variable=systemdutildir systemd); then echo "WARNING! Cannot determine rootlibdir from pkg-config, assuming /usr/lib/systemd" >&2 @@ -70,6 +71,7 @@ init=$ROOTLIBDIR/systemd \ ro \ console=ttyS0 \ selinux=0 \ +systemd.unified_cgroup_hierarchy=$UNIFIED_CGROUP_HIERARCHY \ $KERNEL_APPEND \ " @@ -101,6 +103,9 @@ run_nspawn() { if [[ "$NSPAWN_TIMEOUT" != "infinity" ]]; then _nspawn_cmd="timeout --foreground $NSPAWN_TIMEOUT $_nspawn_cmd" fi + + _nspawn_cmd="env UNIFIED_CGROUP_HIERARCHY=$UNIFIED_CGROUP_HIERARCHY $_nspawn_cmd" + set -x $_nspawn_cmd } diff --git a/units/rescue.service.in b/units/rescue.service.in index 92553f61dd..ecf96bc211 100644 --- a/units/rescue.service.in +++ b/units/rescue.service.in @@ -16,7 +16,7 @@ Before=shutdown.target [Service] Environment=HOME=/root WorkingDirectory=-/root -ExecStartPre=-/bin/plymouth quit +ExecStartPre=-/bin/plymouth --wait quit ExecStartPre=-/bin/echo -e 'Welcome to rescue mode! After logging in, type "journalctl -xb" to view\\nsystem logs, "systemctl reboot" to reboot, "systemctl default" or ^D to\\nboot into default mode.' ExecStart=-/bin/sh -c "@SULOGIN@; @SYSTEMCTL@ --job-mode=fail --no-block default" Type=idle |