summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/CONTRIBUTING.md5
-rw-r--r--NEWS25
-rw-r--r--hwdb/60-evdev.hwdb7
-rw-r--r--man/systemd.exec.xml5
-rw-r--r--man/systemd.netdev.xml9
-rw-r--r--man/systemd.resource-control.xml174
-rw-r--r--man/systemd.timer.xml2
-rw-r--r--man/systemd.unit.xml2
-rw-r--r--src/basic/architecture.c2
-rw-r--r--src/basic/architecture.h2
-rw-r--r--src/basic/cgroup-util.c25
-rw-r--r--src/basic/cgroup-util.h18
-rw-r--r--src/basic/missing.h4
-rw-r--r--src/cgtop/cgtop.c50
-rw-r--r--src/core/cgroup.c147
-rw-r--r--src/core/cgroup.h26
-rw-r--r--src/core/dbus-cgroup.c308
-rw-r--r--src/core/dbus-job.c2
-rw-r--r--src/core/job.c24
-rw-r--r--src/core/job.h2
-rw-r--r--src/core/load-fragment-gperf.gperf.m46
-rw-r--r--src/core/load-fragment.c193
-rw-r--r--src/core/load-fragment.h3
-rw-r--r--src/core/main.c3
-rw-r--r--src/core/manager.c2
-rw-r--r--src/core/manager.h1
-rw-r--r--src/core/namespace.c10
-rw-r--r--src/core/system.conf1
-rw-r--r--src/core/transaction.c2
-rw-r--r--src/core/unit.c14
-rw-r--r--src/network/networkd-netdev-bridge.c7
-rw-r--r--src/network/networkd-netdev-bridge.h1
-rw-r--r--src/network/networkd-netdev-gperf.gperf1
-rw-r--r--src/network/networkd-netdev.c2
-rw-r--r--src/shared/bus-unit-util.c30
-rw-r--r--src/systemctl/systemctl.c5
-rw-r--r--src/vconsole/vconsole-setup.c8
37 files changed, 1018 insertions, 110 deletions
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 18081cbb48..60f0fb9bef 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -24,10 +24,7 @@ right-away for being misfiled.
code. This is a requirement for all code we merge.
* Make sure to run "make check" locally, before posting your PR. We use a CI system, meaning we don't even look at your
PR, if the build and tests don't pass.
-* If you need to update the code in an existing PR, please consider opening a new PR (mentioning in it which old PR it
- replaces) and closing the old PR. This is much preferable over force-pushing a new patch set into the PR's branch, as
- commit comments aren't lost that way. That said, we don't follow this rule ourselves quite often, hence this is
- really just a say as we say, not say as we do...
+* If you need to update the code in an existing PR, force-push into the same branch, overriding old commits with new versions.
## Final Words
diff --git a/NEWS b/NEWS
index b3e51877c9..ff2dd9abbf 100644
--- a/NEWS
+++ b/NEWS
@@ -53,7 +53,9 @@ CHANGES WITH 230 in spe:
* The unified cgroup hierarchy added in Linux 4.5 is now supported.
Use systemd.unified_cgroup_hierarchy=1 on the kernel command line to
- enable.
+ enable. Also, support for the "io" cgroup controller in the unified
+ hierarchy has been added, so that the "memory", "pids" and "io" are
+ now the controllers that are supported on the unified hierarchy.
WARNING: it is not possible to use previous systemd versions with
systemd.unified_cgroup_hierarchy=1 and the new kernel. Therefore it
@@ -142,6 +144,19 @@ CHANGES WITH 230 in spe:
deployable. The systemd-nspaw@.service template unit file has been
changed to use this functionality by default.
+ * systemd-nspawn gained a new --network-zone= switch, that allows
+ creating ad-hoc virtual Ethernet links between multiple containers,
+ that only exist as long as at least one container referencing them is
+ running. This allows easy connecting of multiple containers with a
+ common link that implements an Ethernet broadcast domain. Each of
+ these network "zones" may be named relatively freely by the user, and
+ may be referenced by any number of containers, but each container may
+ only reference one of these "zones". On the lower level, this is
+ implemented by an automatically managed bridge network interface for
+ each zone, that is created when the first container referencing its
+ zone is created and removed when the last one referencing its zone
+ terminates.
+
* The default start timeout may now be configured on the kernel command
line via systemd.default_timeout_start_sec=. It was already
configurable via the DefaultTimeoutStartSec= option in
@@ -157,6 +172,14 @@ CHANGES WITH 230 in spe:
value is understood as UNIX nice value. If not prefixed like this it
is understood as raw RLIMIT_NICE limit.
+ * Note that the effect of the PrivateDevices= unit file setting changed
+ slightly with this release: the per-device /dev file system will be
+ mounted read-only from this version on, and will have "noexec"
+ set. This (minor) change of behaviour might cause some (exceptional)
+ legacy software to break, when PrivateDevices=yes is set for its
+ service. Please leave PrivateDevices= off if you run into problems
+ with this.
+
Contributions from: Alban Crequy, Alexander Kuleshov, Alex Crawford,
Andrew Eikum, Beniamino Galvani, Benjamin Robin, Benjamin ROBIN, Biao
Lu, Bjørnar Ness, Calvin Owens, Christian Hesse, Colin Guthrie, Daniel
diff --git a/hwdb/60-evdev.hwdb b/hwdb/60-evdev.hwdb
index 547db3a2ca..90acb44a1c 100644
--- a/hwdb/60-evdev.hwdb
+++ b/hwdb/60-evdev.hwdb
@@ -160,6 +160,13 @@ evdev:name:SynPS/2 Synaptics TouchPad*:dmi:*svnHewlett-Packard:pnHPPaviliondm4*
# Lenovo
#########################################
+# Lenovo E530
+evdev:name:SynPS/2 Synaptics TouchPad:dmi:*svnLENOVO:pn*ThinkPadEdgeE530*
+ EVDEV_ABS_00=1241:5703:49
+ EVDEV_ABS_01=1105:4820:68
+ EVDEV_ABS_35=1241:5703:49
+ EVDEV_ABS_36=1105:4820:68
+
# Lenovo P50
evdev:name:SynPS/2 Synaptics TouchPad:dmi:*svnLENOVO*:pn*ThinkPad*P50*
EVDEV_ABS_00=::44
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 2a93760428..3cf6de8256 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -933,7 +933,10 @@
(propagation in the opposite direction continues to work).
This means that this setting may not be used for services
which shall be able to install mount points in the main mount
- namespace.</para></listitem>
+ namespace. The /dev namespace will be mounted read-only and 'noexec'.
+ The latter may break old programs which try to set up executable
+ memory by using <citerefentry><refentrytitle>mmap</refentrytitle><manvolnum>2</manvolnum></citerefentry>
+ of <filename>/dev/zero</filename> instead of using <constant>MAP_ANON</constant>.</para></listitem>
</varlistentry>
<varlistentry>
diff --git a/man/systemd.netdev.xml b/man/systemd.netdev.xml
index 48c283c8df..8d12c305d2 100644
--- a/man/systemd.netdev.xml
+++ b/man/systemd.netdev.xml
@@ -321,6 +321,15 @@
</para>
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><varname>MulticastSnooping=</varname></term>
+ <listitem>
+ <para>A boolean. This setting controls the IFLA_BR_MCAST_SNOOPING option in the kernel.
+ If enabled, IGMP snooping monitors the Internet Group Management Protocol (IGMP) traffic
+ between hosts and multicast routers. When unset, the kernel's default setting applies.
+ </para>
+ </listitem>
+ </varlistentry>
</variablelist>
</refsect1>
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml
index fd6f7a1b69..313a49a959 100644
--- a/man/systemd.resource-control.xml
+++ b/man/systemd.resource-control.xml
@@ -249,17 +249,106 @@
</varlistentry>
<varlistentry>
+ <term><varname>IOAccounting=</varname></term>
+
+ <listitem>
+ <para>Turn on Block I/O accounting for this unit, if the unified control group hierarchy is used on the
+ system. Takes a boolean argument. Note that turning on block I/O accounting for one unit will also implicitly
+ turn it on for all units contained in the same slice and all for its parent slices and the units contained
+ therein. The system default for this setting may be controlled with <varname>DefaultIOAccounting=</varname>
+ in
+ <citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
+
+ <para>This setting is supported only if the unified control group hierarchy is used. Use
+ <varname>BlockIOAccounting=</varname> on systems using the legacy control group hierarchy.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>IOWeight=<replaceable>weight</replaceable></varname></term>
+ <term><varname>StartupIOWeight=<replaceable>weight</replaceable></varname></term>
+
+ <listitem>
+ <para>Set the default overall block I/O weight for the executed processes, if the unified control group
+ hierarchy is used on the system. Takes a single weight value (between 1 and 10000) to set the default block
+ I/O weight. This controls the <literal>io.weight</literal> control group attribute, which defaults to
+ 100. For details about this control group attribute, see <ulink
+ url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>. The available I/O
+ bandwidth is split up among all units within one slice relative to their block I/O weight.</para>
+
+ <para>While <varname>StartupIOWeight=</varname> only applies
+ to the startup phase of the system,
+ <varname>IOWeight=</varname> applies to the later runtime of
+ the system, and if the former is not set also to the startup
+ phase. This allows prioritizing specific services at boot-up
+ differently than during runtime.</para>
+
+ <para>Implies <literal>IOAccounting=true</literal>.</para>
+
+ <para>This setting is supported only if the unified control group hierarchy is used. Use
+ <varname>BlockIOWeight=</varname> and <varname>StartupBlockIOWeight=</varname> on systems using the legacy
+ control group hierarchy.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>IODeviceWeight=<replaceable>device</replaceable> <replaceable>weight</replaceable></varname></term>
+
+ <listitem>
+ <para>Set the per-device overall block I/O weight for the executed processes, if the unified control group
+ hierarchy is used on the system. Takes a space-separated pair of a file path and a weight value to specify
+ the device specific weight value, between 1 and 10000. (Example: "/dev/sda 1000"). The file path may be
+ specified as path to a block device node or as any other file, in which case the backing block device of the
+ file system of the file is determined. This controls the <literal>io.weight</literal> control group
+ attribute, which defaults to 100. Use this option multiple times to set weights for multiple devices. For
+ details about this control group attribute, see <ulink
+ url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>.</para>
+
+ <para>Implies <literal>IOAccounting=true</literal>.</para>
+
+ <para>This setting is supported only if the unified control group hierarchy is used. Use
+ <varname>BlockIODeviceWeight=</varname> on systems using the legacy control group hierarchy.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>IOReadBandwidthMax=<replaceable>device</replaceable> <replaceable>bytes</replaceable></varname></term>
+ <term><varname>IOWriteBandwidthMax=<replaceable>device</replaceable> <replaceable>bytes</replaceable></varname></term>
+
+ <listitem>
+ <para>Set the per-device overall block I/O bandwidth maximum limit for the executed processes, if the unified
+ control group hierarchy is used on the system. This limit is not work-conserving and the executed processes
+ are not allowed to use more even if the device has idle capacity. Takes a space-separated pair of a file
+ path and a bandwidth value (in bytes per second) to specify the device specific bandwidth. The file path may
+ be a path to a block device node, or as any other file in which case the backing block device of the file
+ system of the file is used. If the bandwidth is suffixed with K, M, G, or T, the specified bandwidth is
+ parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes, respectively, to the base of 1000. (Example:
+ "/dev/disk/by-path/pci-0000:00:1f.2-scsi-0:0:0:0 5M"). This controls the <literal>io.max</literal> control
+ group attributes. Use this option multiple times to set bandwidth limits for multiple devices. For details
+ about this control group attribute, see <ulink
+ url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>.
+ </para>
+
+ <para>Implies <literal>IOAccounting=true</literal>.</para>
+
+ <para>This setting is supported only if the unified control group hierarchy is used. Use
+ <varname>BlockIOAccounting=</varname> on systems using the legacy control group hierarchy.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>BlockIOAccounting=</varname></term>
<listitem>
- <para>Turn on Block I/O accounting for this unit. Takes a
- boolean argument. Note that turning on block I/O accounting
- for one unit will also implicitly turn it on for all units
- contained in the same slice and all for its parent slices
- and the units contained therein. The system default for this
- setting may be controlled with
+ <para>Turn on Block I/O accounting for this unit, if the legacy control group hierarchy is used on the
+ system. Takes a boolean argument. Note that turning on block I/O accounting for one unit will also implicitly
+ turn it on for all units contained in the same slice and all for its parent slices and the units contained
+ therein. The system default for this setting may be controlled with
<varname>DefaultBlockIOAccounting=</varname> in
<citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
+
+ <para>This setting is supported only if the legacy control group hierarchy is used. Use
+ <varname>IOAccounting=</varname> on systems using the unified control group hierarchy.</para>
</listitem>
</varlistentry>
@@ -267,15 +356,13 @@
<term><varname>BlockIOWeight=<replaceable>weight</replaceable></varname></term>
<term><varname>StartupBlockIOWeight=<replaceable>weight</replaceable></varname></term>
- <listitem><para>Set the default overall block I/O weight for
- the executed processes. Takes a single weight value (between
- 10 and 1000) to set the default block I/O weight. This controls
- the <literal>blkio.weight</literal> control group attribute,
- which defaults to 500. For details about this control group
- attribute, see <ulink
+ <listitem><para>Set the default overall block I/O weight for the executed processes, if the legacy control
+ group hierarchy is used on the system. Takes a single weight value (between 10 and 1000) to set the default
+ block I/O weight. This controls the <literal>blkio.weight</literal> control group attribute, which defaults to
+ 500. For details about this control group attribute, see <ulink
url="https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt">blkio-controller.txt</ulink>.
- The available I/O bandwidth is split up among all units within
- one slice relative to their block I/O weight.</para>
+ The available I/O bandwidth is split up among all units within one slice relative to their block I/O
+ weight.</para>
<para>While <varname>StartupBlockIOWeight=</varname> only
applies to the startup phase of the system,
@@ -286,29 +373,32 @@
<para>Implies
<literal>BlockIOAccounting=true</literal>.</para>
- </listitem>
+
+ <para>This setting is supported only if the legacy control group hierarchy is used. Use
+ <varname>IOWeight=</varname> and <varname>StartupIOWeight=</varname> on systems using the unified control group
+ hierarchy.</para>
+
+ </listitem>
</varlistentry>
<varlistentry>
<term><varname>BlockIODeviceWeight=<replaceable>device</replaceable> <replaceable>weight</replaceable></varname></term>
<listitem>
- <para>Set the per-device overall block I/O weight for the
- executed processes. Takes a space-separated pair of a file
- path and a weight value to specify the device specific
- weight value, between 10 and 1000. (Example: "/dev/sda
- 500"). The file path may be specified as path to a block
- device node or as any other file, in which case the backing
- block device of the file system of the file is
- determined. This controls the
- <literal>blkio.weight_device</literal> control group
- attribute, which defaults to 1000. Use this option multiple
- times to set weights for multiple devices. For details about
- this control group attribute, see <ulink
+ <para>Set the per-device overall block I/O weight for the executed processes, if the legacy control group
+ hierarchy is used on the system. Takes a space-separated pair of a file path and a weight value to specify
+ the device specific weight value, between 10 and 1000. (Example: "/dev/sda 500"). The file path may be
+ specified as path to a block device node or as any other file, in which case the backing block device of the
+ file system of the file is determined. This controls the <literal>blkio.weight_device</literal> control group
+ attribute, which defaults to 1000. Use this option multiple times to set weights for multiple devices. For
+ details about this control group attribute, see <ulink
url="https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt">blkio-controller.txt</ulink>.</para>
<para>Implies
<literal>BlockIOAccounting=true</literal>.</para>
+
+ <para>This setting is supported only if the legacy control group hierarchy is used. Use
+ <varname>IODeviceWeight=</varname> on systems using the unified control group hierarchy.</para>
</listitem>
</varlistentry>
@@ -317,27 +407,25 @@
<term><varname>BlockIOWriteBandwidth=<replaceable>device</replaceable> <replaceable>bytes</replaceable></varname></term>
<listitem>
- <para>Set the per-device overall block I/O bandwidth limit
- for the executed processes. Takes a space-separated pair of
- a file path and a bandwidth value (in bytes per second) to
- specify the device specific bandwidth. The file path may be
- a path to a block device node, or as any other file in which
- case the backing block device of the file system of the file
- is used. If the bandwidth is suffixed with K, M, G, or T,
- the specified bandwidth is parsed as Kilobytes, Megabytes,
- Gigabytes, or Terabytes, respectively, to the base of
- 1000. (Example:
- "/dev/disk/by-path/pci-0000:00:1f.2-scsi-0:0:0:0 5M"). This
- controls the <literal>blkio.throttle.read_bps_device</literal> and
- <literal>blkio.throttle.write_bps_device</literal> control group
- attributes. Use this option multiple times to set bandwidth
- limits for multiple devices. For details about these control
- group attributes, see <ulink
+ <para>Set the per-device overall block I/O bandwidth limit for the executed processes, if the legacy control
+ group hierarchy is used on the system. Takes a space-separated pair of a file path and a bandwidth value (in
+ bytes per second) to specify the device specific bandwidth. The file path may be a path to a block device
+ node, or as any other file in which case the backing block device of the file system of the file is used. If
+ the bandwidth is suffixed with K, M, G, or T, the specified bandwidth is parsed as Kilobytes, Megabytes,
+ Gigabytes, or Terabytes, respectively, to the base of 1000. (Example:
+ "/dev/disk/by-path/pci-0000:00:1f.2-scsi-0:0:0:0 5M"). This controls the
+ <literal>blkio.throttle.read_bps_device</literal> and <literal>blkio.throttle.write_bps_device</literal>
+ control group attributes. Use this option multiple times to set bandwidth limits for multiple devices. For
+ details about these control group attributes, see <ulink
url="https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt">blkio-controller.txt</ulink>.
</para>
<para>Implies
<literal>BlockIOAccounting=true</literal>.</para>
+
+ <para>This setting is supported only if the legacy control group hierarchy is used. Use
+ <varname>IOReadBandwidthMax=</varname> and <varname>IOWriteBandwidthMax=</varname> on systems using the
+ unified control group hierarchy.</para>
</listitem>
</varlistentry>
diff --git a/man/systemd.timer.xml b/man/systemd.timer.xml
index 0d0cccf152..0fa95e97a8 100644
--- a/man/systemd.timer.xml
+++ b/man/systemd.timer.xml
@@ -288,7 +288,7 @@
starting a timer unit that only elapses once: if
<varname>RemainAfterElapse=</varname> is on, it will not be
started again, and is guaranteed to elapse only once. However,
- if <varname>RemainAfterLeapse=</varname> is off, it might be
+ if <varname>RemainAfterElapse=</varname> is off, it might be
started again if it is already elapsed, and thus be triggered
multiple times. Defaults to
<varname>yes</varname>.</para></listitem>
diff --git a/man/systemd.unit.xml b/man/systemd.unit.xml
index 90a1ec6b9c..341789cd47 100644
--- a/man/systemd.unit.xml
+++ b/man/systemd.unit.xml
@@ -603,7 +603,7 @@
<citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry>
for details). If a unit that has this setting set is started,
its processes will see the same <filename>/tmp</filename>,
- <filename>/tmp/var</filename> and network namespace as one
+ <filename>/var/tmp</filename> and network namespace as one
listed unit that is started. If multiple listed units are
already started, it is not defined which namespace is joined.
Note that this setting only has an effect if
diff --git a/src/basic/architecture.c b/src/basic/architecture.c
index 8e2c2b02d2..b1c8e91f50 100644
--- a/src/basic/architecture.c
+++ b/src/basic/architecture.c
@@ -63,7 +63,7 @@ int uname_architecture(void) {
#elif defined(__s390__) || defined(__s390x__)
{ "s390x", ARCHITECTURE_S390X },
{ "s390", ARCHITECTURE_S390 },
-#elif defined(__sparc__) || defined(__sparc64__)
+#elif defined(__sparc__)
{ "sparc64", ARCHITECTURE_SPARC64 },
{ "sparc", ARCHITECTURE_SPARC },
#elif defined(__mips__) || defined(__mips64__)
diff --git a/src/basic/architecture.h b/src/basic/architecture.h
index 91ec108e04..b3e4d85906 100644
--- a/src/basic/architecture.h
+++ b/src/basic/architecture.h
@@ -116,7 +116,7 @@ int uname_architecture(void);
#elif defined(__s390__)
# define native_architecture() ARCHITECTURE_S390
# define LIB_ARCH_TUPLE "s390-linux-gnu"
-#elif defined(__sparc64__)
+#elif defined(__sparc__) && defined (__arch64__)
# define native_architecture() ARCHITECTURE_SPARC64
# define LIB_ARCH_TUPLE "sparc64-linux-gnu"
#elif defined(__sparc__)
diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c
index 5043180747..ff57cf30b7 100644
--- a/src/basic/cgroup-util.c
+++ b/src/basic/cgroup-util.c
@@ -2060,10 +2060,10 @@ int cg_mask_supported(CGroupMask *ret) {
mask |= CGROUP_CONTROLLER_TO_MASK(v);
}
- /* Currently, we only support the memory and pids
+ /* Currently, we only support the memory, io and pids
* controller in the unified hierarchy, mask
* everything else off. */
- mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_PIDS;
+ mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
} else {
CGroupController c;
@@ -2249,6 +2249,26 @@ bool cg_is_legacy_wanted(void) {
return !cg_is_unified_wanted();
}
+int cg_weight_parse(const char *s, uint64_t *ret) {
+ uint64_t u;
+ int r;
+
+ if (isempty(s)) {
+ *ret = CGROUP_WEIGHT_INVALID;
+ return 0;
+ }
+
+ r = safe_atou64(s, &u);
+ if (r < 0)
+ return r;
+
+ if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
+ return -ERANGE;
+
+ *ret = u;
+ return 0;
+}
+
int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
uint64_t u;
int r;
@@ -2292,6 +2312,7 @@ int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
[CGROUP_CONTROLLER_CPU] = "cpu",
[CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
+ [CGROUP_CONTROLLER_IO] = "io",
[CGROUP_CONTROLLER_BLKIO] = "blkio",
[CGROUP_CONTROLLER_MEMORY] = "memory",
[CGROUP_CONTROLLER_DEVICES] = "devices",
diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h
index 4254e51e5d..a696c1fa60 100644
--- a/src/basic/cgroup-util.h
+++ b/src/basic/cgroup-util.h
@@ -34,6 +34,7 @@
typedef enum CGroupController {
CGROUP_CONTROLLER_CPU,
CGROUP_CONTROLLER_CPUACCT,
+ CGROUP_CONTROLLER_IO,
CGROUP_CONTROLLER_BLKIO,
CGROUP_CONTROLLER_MEMORY,
CGROUP_CONTROLLER_DEVICES,
@@ -48,6 +49,7 @@ typedef enum CGroupController {
typedef enum CGroupMask {
CGROUP_MASK_CPU = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPU),
CGROUP_MASK_CPUACCT = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPUACCT),
+ CGROUP_MASK_IO = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_IO),
CGROUP_MASK_BLKIO = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BLKIO),
CGROUP_MASK_MEMORY = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_MEMORY),
CGROUP_MASK_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_DEVICES),
@@ -55,6 +57,21 @@ typedef enum CGroupMask {
_CGROUP_MASK_ALL = CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX) - 1
} CGroupMask;
+/* Special values for all weight knobs on unified hierarchy */
+#define CGROUP_WEIGHT_INVALID ((uint64_t) -1)
+#define CGROUP_WEIGHT_MIN UINT64_C(1)
+#define CGROUP_WEIGHT_MAX UINT64_C(10000)
+#define CGROUP_WEIGHT_DEFAULT UINT64_C(100)
+
+#define CGROUP_LIMIT_MIN UINT64_C(0)
+#define CGROUP_LIMIT_MAX ((uint64_t) -1)
+
+static inline bool CGROUP_WEIGHT_IS_OK(uint64_t x) {
+ return
+ x == CGROUP_WEIGHT_INVALID ||
+ (x >= CGROUP_WEIGHT_MIN && x <= CGROUP_WEIGHT_MAX);
+}
+
/* Special values for the cpu.shares attribute */
#define CGROUP_CPU_SHARES_INVALID ((uint64_t) -1)
#define CGROUP_CPU_SHARES_MIN UINT64_C(2)
@@ -190,5 +207,6 @@ bool cg_is_legacy_wanted(void);
const char* cgroup_controller_to_string(CGroupController c) _const_;
CGroupController cgroup_controller_from_string(const char *s) _pure_;
+int cg_weight_parse(const char *s, uint64_t *ret);
int cg_cpu_shares_parse(const char *s, uint64_t *ret);
int cg_blkio_weight_parse(const char *s, uint64_t *ret);
diff --git a/src/basic/missing.h b/src/basic/missing.h
index 22ea8f67cc..651e414395 100644
--- a/src/basic/missing.h
+++ b/src/basic/missing.h
@@ -389,6 +389,10 @@ struct btrfs_ioctl_quota_ctl_args {
struct btrfs_ioctl_qgroup_limit_args)
#endif
+#ifndef BTRFS_IOC_QUOTA_RESCAN_WAIT
+#define BTRFS_IOC_QUOTA_RESCAN_WAIT _IO(BTRFS_IOCTL_MAGIC, 46)
+#endif
+
#ifndef BTRFS_FIRST_FREE_OBJECTID
#define BTRFS_FIRST_FREE_OBJECTID 256
#endif
diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c
index 14eb46c8db..e088e4b197 100644
--- a/src/cgtop/cgtop.c
+++ b/src/cgtop/cgtop.c
@@ -269,13 +269,15 @@ static int process(
if (g->memory > 0)
g->memory_valid = true;
- } else if (streq(controller, "blkio") && cg_unified() <= 0) {
+ } else if ((streq(controller, "io") && cg_unified() > 0) ||
+ (streq(controller, "blkio") && cg_unified() <= 0)) {
_cleanup_fclose_ FILE *f = NULL;
_cleanup_free_ char *p = NULL;
+ bool unified = cg_unified() > 0;
uint64_t wr = 0, rd = 0;
nsec_t timestamp;
- r = cg_get_path(controller, path, "blkio.io_service_bytes", &p);
+ r = cg_get_path(controller, path, unified ? "io.stat" : "blkio.io_service_bytes", &p);
if (r < 0)
return r;
@@ -293,25 +295,38 @@ static int process(
if (!fgets(line, sizeof(line), f))
break;
+ /* Trim and skip the device */
l = strstrip(line);
l += strcspn(l, WHITESPACE);
l += strspn(l, WHITESPACE);
- if (first_word(l, "Read")) {
- l += 4;
- q = &rd;
- } else if (first_word(l, "Write")) {
- l += 5;
- q = &wr;
- } else
- continue;
-
- l += strspn(l, WHITESPACE);
- r = safe_atou64(l, &k);
- if (r < 0)
- continue;
+ if (unified) {
+ while (!isempty(l)) {
+ if (sscanf(l, "rbytes=%" SCNu64, &k))
+ rd += k;
+ else if (sscanf(l, "wbytes=%" SCNu64, &k))
+ wr += k;
- *q += k;
+ l += strcspn(l, WHITESPACE);
+ l += strspn(l, WHITESPACE);
+ }
+ } else {
+ if (first_word(l, "Read")) {
+ l += 4;
+ q = &rd;
+ } else if (first_word(l, "Write")) {
+ l += 5;
+ q = &wr;
+ } else
+ continue;
+
+ l += strspn(l, WHITESPACE);
+ r = safe_atou64(l, &k);
+ if (r < 0)
+ continue;
+
+ *q += k;
+ }
}
timestamp = now_nsec(CLOCK_MONOTONIC);
@@ -439,6 +454,9 @@ static int refresh(const char *root, Hashmap *a, Hashmap *b, unsigned iteration)
r = refresh_one("memory", root, a, b, iteration, 0, NULL);
if (r < 0)
return r;
+ r = refresh_one("io", root, a, b, iteration, 0, NULL);
+ if (r < 0)
+ return r;
r = refresh_one("blkio", root, a, b, iteration, 0, NULL);
if (r < 0)
return r;
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 996efde22b..4f1637ffe9 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -32,6 +32,7 @@
#include "special.h"
#include "string-table.h"
#include "string-util.h"
+#include "stdio-util.h"
#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
@@ -47,6 +48,9 @@ void cgroup_context_init(CGroupContext *c) {
c->memory_limit = (uint64_t) -1;
+ c->io_weight = CGROUP_WEIGHT_INVALID;
+ c->startup_io_weight = CGROUP_WEIGHT_INVALID;
+
c->blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID;
c->startup_blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID;
@@ -62,6 +66,24 @@ void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
free(a);
}
+void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight *w) {
+ assert(c);
+ assert(w);
+
+ LIST_REMOVE(device_weights, c->io_device_weights, w);
+ free(w->path);
+ free(w);
+}
+
+void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l) {
+ assert(c);
+ assert(l);
+
+ LIST_REMOVE(device_limits, c->io_device_limits, l);
+ free(l->path);
+ free(l);
+}
+
void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
assert(c);
assert(w);
@@ -83,6 +105,12 @@ void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockI
void cgroup_context_done(CGroupContext *c) {
assert(c);
+ while (c->io_device_weights)
+ cgroup_context_free_io_device_weight(c, c->io_device_weights);
+
+ while (c->io_device_limits)
+ cgroup_context_free_io_device_limit(c, c->io_device_limits);
+
while (c->blockio_device_weights)
cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
@@ -94,6 +122,8 @@ void cgroup_context_done(CGroupContext *c) {
}
void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
+ CGroupIODeviceLimit *il;
+ CGroupIODeviceWeight *iw;
CGroupBlockIODeviceBandwidth *b;
CGroupBlockIODeviceWeight *w;
CGroupDeviceAllow *a;
@@ -106,12 +136,15 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
fprintf(f,
"%sCPUAccounting=%s\n"
+ "%sIOAccounting=%s\n"
"%sBlockIOAccounting=%s\n"
"%sMemoryAccounting=%s\n"
"%sTasksAccounting=%s\n"
"%sCPUShares=%" PRIu64 "\n"
"%sStartupCPUShares=%" PRIu64 "\n"
"%sCPUQuotaPerSecSec=%s\n"
+ "%sIOWeight=%" PRIu64 "\n"
+ "%sStartupIOWeight=%" PRIu64 "\n"
"%sBlockIOWeight=%" PRIu64 "\n"
"%sStartupBlockIOWeight=%" PRIu64 "\n"
"%sMemoryLimit=%" PRIu64 "\n"
@@ -119,12 +152,15 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
"%sDevicePolicy=%s\n"
"%sDelegate=%s\n",
prefix, yes_no(c->cpu_accounting),
+ prefix, yes_no(c->io_accounting),
prefix, yes_no(c->blockio_accounting),
prefix, yes_no(c->memory_accounting),
prefix, yes_no(c->tasks_accounting),
prefix, c->cpu_shares,
prefix, c->startup_cpu_shares,
prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1),
+ prefix, c->io_weight,
+ prefix, c->startup_io_weight,
prefix, c->blockio_weight,
prefix, c->startup_blockio_weight,
prefix, c->memory_limit,
@@ -139,6 +175,31 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
a->path,
a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
+ LIST_FOREACH(device_weights, iw, c->io_device_weights)
+ fprintf(f,
+ "%sIODeviceWeight=%s %" PRIu64,
+ prefix,
+ iw->path,
+ iw->weight);
+
+ LIST_FOREACH(device_limits, il, c->io_device_limits) {
+ char buf[FORMAT_BYTES_MAX];
+
+ if (il->rbps_max != CGROUP_LIMIT_MAX)
+ fprintf(f,
+ "%sIOReadBandwidthMax=%s %s\n",
+ prefix,
+ il->path,
+ format_bytes(buf, sizeof(buf), il->rbps_max));
+
+ if (il->wbps_max != CGROUP_LIMIT_MAX)
+ fprintf(f,
+ "%sIOWriteBandwidthMax=%s %s\n",
+ prefix,
+ il->path,
+ format_bytes(buf, sizeof(buf), il->wbps_max));
+ }
+
LIST_FOREACH(device_weights, w, c->blockio_device_weights)
fprintf(f,
"%sBlockIODeviceWeight=%s %" PRIu64,
@@ -158,7 +219,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
}
}
-static int lookup_blkio_device(const char *p, dev_t *dev) {
+static int lookup_block_device(const char *p, dev_t *dev) {
struct stat st;
int r;
@@ -343,6 +404,77 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
"Failed to set cpu.cfs_quota_us on %s: %m", path);
}
+ if (mask & CGROUP_MASK_IO) {
+ CGroupIODeviceWeight *w;
+ CGroupIODeviceLimit *l, *next;
+
+ if (!is_root) {
+ char buf[MAX(8+DECIMAL_STR_MAX(uint64_t)+1,
+ DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
+ uint64_t weight = CGROUP_WEIGHT_DEFAULT;
+
+ if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+ c->startup_io_weight != CGROUP_WEIGHT_INVALID)
+ weight = c->startup_io_weight;
+ else if (c->io_weight != CGROUP_WEIGHT_INVALID)
+ weight = c->io_weight;
+
+ xsprintf(buf, "default %" PRIu64 "\n", weight);
+ r = cg_set_attribute("io", path, "io.weight", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set io.weight on %s: %m", path);
+
+ /* FIXME: no way to reset this list */
+ LIST_FOREACH(device_weights, w, c->io_device_weights) {
+ dev_t dev;
+
+ r = lookup_block_device(w->path, &dev);
+ if (r < 0)
+ continue;
+
+ xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), w->weight);
+ r = cg_set_attribute("io", path, "io.weight", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set io.weight on %s: %m", path);
+ }
+ }
+
+ LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) {
+ char rbps_buf[DECIMAL_STR_MAX(uint64_t)] = "max";
+ char wbps_buf[DECIMAL_STR_MAX(uint64_t)] = "max";
+ char buf[DECIMAL_STR_MAX(dev_t)*2+2+(5+DECIMAL_STR_MAX(uint64_t)+1)*2];
+ dev_t dev;
+ unsigned n = 0;
+
+ r = lookup_block_device(l->path, &dev);
+ if (r < 0)
+ continue;
+
+ if (l->rbps_max != CGROUP_LIMIT_MAX) {
+ xsprintf(rbps_buf, "%" PRIu64, l->rbps_max);
+ n++;
+ }
+
+ if (l->wbps_max != CGROUP_LIMIT_MAX) {
+ xsprintf(wbps_buf, "%" PRIu64, l->wbps_max);
+ n++;
+ }
+
+ xsprintf(buf, "%u:%u rbps=%s wbps=%s\n", major(dev), minor(dev), rbps_buf, wbps_buf);
+ r = cg_set_attribute("io", path, "io.max", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set io.max on %s: %m", path);
+
+ /* If @l contained no config, we just cleared the kernel
+ counterpart too. No reason to keep @l around. */
+ if (!n)
+ cgroup_context_free_io_device_limit(c, l);
+ }
+ }
+
if (mask & CGROUP_MASK_BLKIO) {
char buf[MAX(DECIMAL_STR_MAX(uint64_t)+1,
DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
@@ -362,7 +494,7 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
dev_t dev;
- r = lookup_blkio_device(w->path, &dev);
+ r = lookup_block_device(w->path, &dev);
if (r < 0)
continue;
@@ -379,7 +511,7 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
const char *a;
dev_t dev;
- r = lookup_blkio_device(b->path, &dev);
+ r = lookup_block_device(b->path, &dev);
if (r < 0)
continue;
@@ -506,6 +638,13 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) {
c->cpu_quota_per_sec_usec != USEC_INFINITY)
mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU;
+ if (c->io_accounting ||
+ c->io_weight != CGROUP_WEIGHT_INVALID ||
+ c->startup_io_weight != CGROUP_WEIGHT_INVALID ||
+ c->io_device_weights ||
+ c->io_device_limits)
+ mask |= CGROUP_MASK_IO;
+
if (c->blockio_accounting ||
c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ||
c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ||
@@ -1615,7 +1754,7 @@ void manager_invalidate_startup_units(Manager *m) {
assert(m);
SET_FOREACH(u, m->startup_units, i)
- unit_invalidate_cgroup(u, CGROUP_MASK_CPU|CGROUP_MASK_BLKIO);
+ unit_invalidate_cgroup(u, CGROUP_MASK_CPU|CGROUP_MASK_IO|CGROUP_MASK_BLKIO);
}
static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index 360bbca30f..a533923072 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -26,6 +26,8 @@
typedef struct CGroupContext CGroupContext;
typedef struct CGroupDeviceAllow CGroupDeviceAllow;
+typedef struct CGroupIODeviceWeight CGroupIODeviceWeight;
+typedef struct CGroupIODeviceLimit CGroupIODeviceLimit;
typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight;
typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth;
@@ -53,6 +55,19 @@ struct CGroupDeviceAllow {
bool m:1;
};
+struct CGroupIODeviceWeight {
+ LIST_FIELDS(CGroupIODeviceWeight, device_weights);
+ char *path;
+ uint64_t weight;
+};
+
+struct CGroupIODeviceLimit {
+ LIST_FIELDS(CGroupIODeviceLimit, device_limits);
+ char *path;
+ uint64_t rbps_max;
+ uint64_t wbps_max;
+};
+
struct CGroupBlockIODeviceWeight {
LIST_FIELDS(CGroupBlockIODeviceWeight, device_weights);
char *path;
@@ -68,10 +83,18 @@ struct CGroupBlockIODeviceBandwidth {
struct CGroupContext {
bool cpu_accounting;
+ bool io_accounting;
bool blockio_accounting;
bool memory_accounting;
bool tasks_accounting;
+ /* For unified hierarchy */
+ uint64_t io_weight;
+ uint64_t startup_io_weight;
+ LIST_HEAD(CGroupIODeviceWeight, io_device_weights);
+ LIST_HEAD(CGroupIODeviceLimit, io_device_limits);
+
+ /* For legacy hierarchies */
uint64_t cpu_shares;
uint64_t startup_cpu_shares;
usec_t cpu_quota_per_sec_usec;
@@ -86,6 +109,7 @@ struct CGroupContext {
CGroupDevicePolicy device_policy;
LIST_HEAD(CGroupDeviceAllow, device_allow);
+ /* Common */
uint64_t tasks_max;
bool delegate;
@@ -102,6 +126,8 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
CGroupMask cgroup_context_get_mask(CGroupContext *c);
void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a);
+void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight *w);
+void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l);
void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w);
void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b);
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index 859d155ec1..a2a4a6249c 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -28,6 +28,76 @@
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_cgroup_device_policy, cgroup_device_policy, CGroupDevicePolicy);
+static int property_get_io_device_weight(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupContext *c = userdata;
+ CGroupIODeviceWeight *w;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(device_weights, w, c->io_device_weights) {
+ r = sd_bus_message_append(reply, "(st)", w->path, w->weight);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
+static int property_get_io_device_limits(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupContext *c = userdata;
+ CGroupIODeviceLimit *l;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ LIST_FOREACH(device_limits, l, c->io_device_limits) {
+ uint64_t v;
+
+ if (streq(property, "IOReadBandwidthMax"))
+ v = l->rbps_max;
+ else
+ v = l->wbps_max;
+
+ if (v == CGROUP_LIMIT_MAX)
+ continue;
+
+ r = sd_bus_message_append(reply, "(st)", l->path, v);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(reply);
+}
+
static int property_get_blockio_device_weight(
sd_bus *bus,
const char *path,
@@ -141,6 +211,12 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("CPUShares", "t", NULL, offsetof(CGroupContext, cpu_shares), 0),
SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0),
SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0),
+ SD_BUS_PROPERTY("IOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, io_accounting), 0),
+ SD_BUS_PROPERTY("IOWeight", "t", NULL, offsetof(CGroupContext, io_weight), 0),
+ SD_BUS_PROPERTY("StartupIOWeight", "t", NULL, offsetof(CGroupContext, startup_io_weight), 0),
+ SD_BUS_PROPERTY("IODeviceWeight", "a(st)", property_get_io_device_weight, 0, 0),
+ SD_BUS_PROPERTY("IOReadBandwidthMax", "a(st)", property_get_io_device_limits, 0, 0),
+ SD_BUS_PROPERTY("IOWriteBandwidthMax", "a(st)", property_get_io_device_limits, 0, 0),
SD_BUS_PROPERTY("BlockIOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, blockio_accounting), 0),
SD_BUS_PROPERTY("BlockIOWeight", "t", NULL, offsetof(CGroupContext, blockio_weight), 0),
SD_BUS_PROPERTY("StartupBlockIOWeight", "t", NULL, offsetof(CGroupContext, startup_blockio_weight), 0),
@@ -281,6 +357,238 @@ int bus_cgroup_set_property(
return 1;
+ } else if (streq(name, "IOAccounting")) {
+ int b;
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ if (mode != UNIT_CHECK) {
+ c->io_accounting = b;
+ unit_invalidate_cgroup(u, CGROUP_MASK_IO);
+ unit_write_drop_in_private(u, mode, name, b ? "IOAccounting=yes" : "IOAccounting=no");
+ }
+
+ return 1;
+
+ } else if (streq(name, "IOWeight")) {
+ uint64_t weight;
+
+ r = sd_bus_message_read(message, "t", &weight);
+ if (r < 0)
+ return r;
+
+ if (!CGROUP_WEIGHT_IS_OK(weight))
+ return sd_bus_error_set_errnof(error, EINVAL, "IOWeight value out of range");
+
+ if (mode != UNIT_CHECK) {
+ c->io_weight = weight;
+ unit_invalidate_cgroup(u, CGROUP_MASK_IO);
+
+ if (weight == CGROUP_WEIGHT_INVALID)
+ unit_write_drop_in_private(u, mode, name, "IOWeight=");
+ else
+ unit_write_drop_in_private_format(u, mode, name, "IOWeight=%" PRIu64, weight);
+ }
+
+ return 1;
+
+ } else if (streq(name, "StartupIOWeight")) {
+ uint64_t weight;
+
+ r = sd_bus_message_read(message, "t", &weight);
+ if (r < 0)
+ return r;
+
+ if (CGROUP_WEIGHT_IS_OK(weight))
+ return sd_bus_error_set_errnof(error, EINVAL, "StartupIOWeight value out of range");
+
+ if (mode != UNIT_CHECK) {
+ c->startup_io_weight = weight;
+ unit_invalidate_cgroup(u, CGROUP_MASK_IO);
+
+ if (weight == CGROUP_WEIGHT_INVALID)
+ unit_write_drop_in_private(u, mode, name, "StartupIOWeight=");
+ else
+ unit_write_drop_in_private_format(u, mode, name, "StartupIOWeight=%" PRIu64, weight);
+ }
+
+ return 1;
+
+ } else if (streq(name, "IOReadBandwidthMax") || streq(name, "IOWriteBandwidthMax")) {
+ const char *path;
+ bool read = true;
+ unsigned n = 0;
+ uint64_t u64;
+
+ if (streq(name, "IOWriteBandwidthMax"))
+ read = false;
+
+ r = sd_bus_message_enter_container(message, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(st)", &path, &u64)) > 0) {
+
+ if (mode != UNIT_CHECK) {
+ CGroupIODeviceLimit *a = NULL, *b;
+
+ LIST_FOREACH(device_limits, b, c->io_device_limits) {
+ if (path_equal(path, b->path)) {
+ a = b;
+ break;
+ }
+ }
+
+ if (!a) {
+ a = new0(CGroupIODeviceLimit, 1);
+ if (!a)
+ return -ENOMEM;
+
+ a->path = strdup(path);
+ if (!a->path) {
+ free(a);
+ return -ENOMEM;
+ }
+
+ a->rbps_max = CGROUP_LIMIT_MAX;
+ a->wbps_max = CGROUP_LIMIT_MAX;
+
+ LIST_PREPEND(device_limits, c->io_device_limits, a);
+ }
+
+ if (read)
+ a->rbps_max = u64;
+ else
+ a->wbps_max = u64;
+ }
+
+ n++;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (mode != UNIT_CHECK) {
+ CGroupIODeviceLimit *a;
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ size_t size = 0;
+
+ if (n == 0) {
+ LIST_FOREACH(device_limits, a, c->io_device_limits)
+ if (read)
+ a->rbps_max = CGROUP_LIMIT_MAX;
+ else
+ a->wbps_max = CGROUP_LIMIT_MAX;
+ }
+
+ unit_invalidate_cgroup(u, CGROUP_MASK_IO);
+
+ f = open_memstream(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ if (read) {
+ fputs("IOReadBandwidthMax=\n", f);
+ LIST_FOREACH(device_limits, a, c->io_device_limits)
+ if (a->rbps_max != CGROUP_LIMIT_MAX)
+ fprintf(f, "IOReadBandwidthMax=%s %" PRIu64 "\n", a->path, a->rbps_max);
+ } else {
+ fputs("IOWriteBandwidthMax=\n", f);
+ LIST_FOREACH(device_limits, a, c->io_device_limits)
+ if (a->wbps_max != CGROUP_LIMIT_MAX)
+ fprintf(f, "IOWriteBandwidthMax=%s %" PRIu64 "\n", a->path, a->wbps_max);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+ unit_write_drop_in_private(u, mode, name, buf);
+ }
+
+ return 1;
+
+ } else if (streq(name, "IODeviceWeight")) {
+ const char *path;
+ uint64_t weight;
+ unsigned n = 0;
+
+ r = sd_bus_message_enter_container(message, 'a', "(st)");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read(message, "(st)", &path, &weight)) > 0) {
+
+ if (!CGROUP_WEIGHT_IS_OK(weight) || weight == CGROUP_WEIGHT_INVALID)
+ return sd_bus_error_set_errnof(error, EINVAL, "IODeviceWeight out of range");
+
+ if (mode != UNIT_CHECK) {
+ CGroupIODeviceWeight *a = NULL, *b;
+
+ LIST_FOREACH(device_weights, b, c->io_device_weights) {
+ if (path_equal(b->path, path)) {
+ a = b;
+ break;
+ }
+ }
+
+ if (!a) {
+ a = new0(CGroupIODeviceWeight, 1);
+ if (!a)
+ return -ENOMEM;
+
+ a->path = strdup(path);
+ if (!a->path) {
+ free(a);
+ return -ENOMEM;
+ }
+ LIST_PREPEND(device_weights,c->io_device_weights, a);
+ }
+
+ a->weight = weight;
+ }
+
+ n++;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (mode != UNIT_CHECK) {
+ _cleanup_free_ char *buf = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ CGroupIODeviceWeight *a;
+ size_t size = 0;
+
+ if (n == 0) {
+ while (c->io_device_weights)
+ cgroup_context_free_io_device_weight(c, c->io_device_weights);
+ }
+
+ unit_invalidate_cgroup(u, CGROUP_MASK_IO);
+
+ f = open_memstream(&buf, &size);
+ if (!f)
+ return -ENOMEM;
+
+ fputs("IODeviceWeight=\n", f);
+ LIST_FOREACH(device_weights, a, c->io_device_weights)
+ fprintf(f, "IODeviceWeight=%s %" PRIu64 "\n", a->path, a->weight);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+ unit_write_drop_in_private(u, mode, name, buf);
+ }
+
+ return 1;
+
} else if (streq(name, "BlockIOAccounting")) {
int b;
diff --git a/src/core/dbus-job.c b/src/core/dbus-job.c
index 97a93fb2f1..ccf7453d47 100644
--- a/src/core/dbus-job.c
+++ b/src/core/dbus-job.c
@@ -75,7 +75,7 @@ int bus_job_method_cancel(sd_bus_message *message, void *userdata, sd_bus_error
return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
}
- job_finish_and_invalidate(j, JOB_CANCELED, true);
+ job_finish_and_invalidate(j, JOB_CANCELED, true, false);
return sd_bus_reply_method_return(message, NULL);
}
diff --git a/src/core/job.c b/src/core/job.c
index d9c5669c9f..7557874d4d 100644
--- a/src/core/job.c
+++ b/src/core/job.c
@@ -191,7 +191,7 @@ Job* job_install(Job *j) {
if (uj) {
if (job_type_is_conflicting(uj->type, j->type))
- job_finish_and_invalidate(uj, JOB_CANCELED, false);
+ job_finish_and_invalidate(uj, JOB_CANCELED, false, false);
else {
/* not conflicting, i.e. mergeable */
@@ -614,19 +614,19 @@ int job_run_and_invalidate(Job *j) {
if (j) {
if (r == -EALREADY)
- r = job_finish_and_invalidate(j, JOB_DONE, true);
+ r = job_finish_and_invalidate(j, JOB_DONE, true, true);
else if (r == -EBADR)
- r = job_finish_and_invalidate(j, JOB_SKIPPED, true);
+ r = job_finish_and_invalidate(j, JOB_SKIPPED, true, false);
else if (r == -ENOEXEC)
- r = job_finish_and_invalidate(j, JOB_INVALID, true);
+ r = job_finish_and_invalidate(j, JOB_INVALID, true, false);
else if (r == -EPROTO)
- r = job_finish_and_invalidate(j, JOB_ASSERT, true);
+ r = job_finish_and_invalidate(j, JOB_ASSERT, true, false);
else if (r == -EOPNOTSUPP)
- r = job_finish_and_invalidate(j, JOB_UNSUPPORTED, true);
+ r = job_finish_and_invalidate(j, JOB_UNSUPPORTED, true, false);
else if (r == -EAGAIN)
job_set_state(j, JOB_WAITING);
else if (r < 0)
- r = job_finish_and_invalidate(j, JOB_FAILED, true);
+ r = job_finish_and_invalidate(j, JOB_FAILED, true, false);
}
return r;
@@ -827,11 +827,11 @@ static void job_fail_dependencies(Unit *u, UnitDependency d) {
if (!IN_SET(j->type, JOB_START, JOB_VERIFY_ACTIVE))
continue;
- job_finish_and_invalidate(j, JOB_DEPENDENCY, true);
+ job_finish_and_invalidate(j, JOB_DEPENDENCY, true, false);
}
}
-int job_finish_and_invalidate(Job *j, JobResult result, bool recursive) {
+int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool already) {
Unit *u;
Unit *other;
JobType t;
@@ -848,7 +848,9 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive) {
log_unit_debug(u, "Job %s/%s finished, result=%s", u->id, job_type_to_string(t), job_result_to_string(result));
- job_emit_status_message(u, t, result);
+ /* If this job did nothing to respective unit we don't log the status message */
+ if (!already)
+ job_emit_status_message(u, t, result);
job_add_to_dbus_queue(j);
@@ -923,7 +925,7 @@ static int job_dispatch_timer(sd_event_source *s, uint64_t monotonic, void *user
log_unit_warning(j->unit, "Job %s/%s timed out.", j->unit->id, job_type_to_string(j->type));
u = j->unit;
- job_finish_and_invalidate(j, JOB_TIMEOUT, true);
+ job_finish_and_invalidate(j, JOB_TIMEOUT, true, false);
failure_action(u->manager, u->job_timeout_action, u->job_timeout_reboot_arg);
diff --git a/src/core/job.h b/src/core/job.h
index 856b0ce829..d359e8bb3e 100644
--- a/src/core/job.h
+++ b/src/core/job.h
@@ -219,7 +219,7 @@ void job_add_to_dbus_queue(Job *j);
int job_start_timer(Job *j);
int job_run_and_invalidate(Job *j);
-int job_finish_and_invalidate(Job *j, JobResult result, bool recursive);
+int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool already);
char *job_dbus_path(Job *j);
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 928b913c7b..ad45611d9d 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -120,6 +120,12 @@ $1.MemoryAccounting, config_parse_bool, 0,
$1.MemoryLimit, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.DeviceAllow, config_parse_device_allow, 0, offsetof($1, cgroup_context)
$1.DevicePolicy, config_parse_device_policy, 0, offsetof($1, cgroup_context.device_policy)
+$1.IOAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.io_accounting)
+$1.IOWeight, config_parse_io_weight, 0, offsetof($1, cgroup_context.io_weight)
+$1.StartupIOWeight, config_parse_io_weight, 0, offsetof($1, cgroup_context.startup_io_weight)
+$1.IODeviceWeight, config_parse_io_device_weight, 0, offsetof($1, cgroup_context)
+$1.IOReadBandwidthMax, config_parse_io_limit, 0, offsetof($1, cgroup_context)
+$1.IOWriteBandwidthMax, config_parse_io_limit, 0, offsetof($1, cgroup_context)
$1.BlockIOAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.blockio_accounting)
$1.BlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.blockio_weight)
$1.StartupBlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.startup_blockio_weight)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index a12dd38c60..cea615132a 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -2910,6 +2910,196 @@ int config_parse_device_allow(
return 0;
}
+int config_parse_io_weight(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t *weight = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = cg_weight_parse(rvalue, weight);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "IO weight '%s' invalid. Ignoring.", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_io_device_weight(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *path = NULL;
+ CGroupIODeviceWeight *w;
+ CGroupContext *c = data;
+ const char *weight;
+ uint64_t u;
+ size_t n;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (isempty(rvalue)) {
+ while (c->io_device_weights)
+ cgroup_context_free_io_device_weight(c, c->io_device_weights);
+
+ return 0;
+ }
+
+ n = strcspn(rvalue, WHITESPACE);
+ weight = rvalue + n;
+ weight += strspn(weight, WHITESPACE);
+
+ if (isempty(weight)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Expected block device and device weight. Ignoring.");
+ return 0;
+ }
+
+ path = strndup(rvalue, n);
+ if (!path)
+ return log_oom();
+
+ if (!path_startswith(path, "/dev")) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid device node path '%s'. Ignoring.", path);
+ return 0;
+ }
+
+ r = cg_weight_parse(weight, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "IO weight '%s' invalid. Ignoring.", weight);
+ return 0;
+ }
+
+ assert(u != CGROUP_WEIGHT_INVALID);
+
+ w = new0(CGroupIODeviceWeight, 1);
+ if (!w)
+ return log_oom();
+
+ w->path = path;
+ path = NULL;
+
+ w->weight = u;
+
+ LIST_PREPEND(device_weights, c->io_device_weights, w);
+ return 0;
+}
+
+int config_parse_io_limit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *path = NULL;
+ CGroupIODeviceLimit *l = NULL, *t;
+ CGroupContext *c = data;
+ const char *limit;
+ uint64_t num;
+ bool read;
+ size_t n;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ read = streq("IOReadBandwidthMax", lvalue);
+
+ if (isempty(rvalue)) {
+ LIST_FOREACH(device_limits, l, c->io_device_limits)
+ if (read)
+ l->rbps_max = CGROUP_LIMIT_MAX;
+ else
+ l->wbps_max = CGROUP_LIMIT_MAX;
+ return 0;
+ }
+
+ n = strcspn(rvalue, WHITESPACE);
+ limit = rvalue + n;
+ limit += strspn(limit, WHITESPACE);
+
+ if (!*limit) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Expected space separated pair of device node and bandwidth. Ignoring.");
+ return 0;
+ }
+
+ path = strndup(rvalue, n);
+ if (!path)
+ return log_oom();
+
+ if (!path_startswith(path, "/dev")) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid device node path '%s'. Ignoring.", path);
+ return 0;
+ }
+
+ if (streq("max", limit)) {
+ num = CGROUP_LIMIT_MAX;
+ } else {
+ r = parse_size(limit, 1000, &num);
+ if (r < 0 || num <= 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "IO Limit '%s' invalid. Ignoring.", rvalue);
+ return 0;
+ }
+ }
+
+ LIST_FOREACH(device_limits, t, c->io_device_limits) {
+ if (path_equal(path, t->path)) {
+ l = t;
+ break;
+ }
+ }
+
+ if (!l) {
+ l = new0(CGroupIODeviceLimit, 1);
+ if (!l)
+ return log_oom();
+
+ l->path = path;
+ path = NULL;
+ l->rbps_max = CGROUP_LIMIT_MAX;
+ l->wbps_max = CGROUP_LIMIT_MAX;
+
+ LIST_PREPEND(device_limits, c->io_device_limits, l);
+ }
+
+ if (read)
+ l->rbps_max = num;
+ else
+ l->wbps_max = num;
+
+ return 0;
+}
+
int config_parse_blockio_weight(
const char *unit,
const char *filename,
@@ -3830,6 +4020,9 @@ void unit_dump_config_items(FILE *f) {
{ config_parse_memory_limit, "LIMIT" },
{ config_parse_device_allow, "DEVICE" },
{ config_parse_device_policy, "POLICY" },
+ { config_parse_io_limit, "LIMIT" },
+ { config_parse_io_weight, "WEIGHT" },
+ { config_parse_io_device_weight, "DEVICEWEIGHT" },
{ config_parse_blockio_bandwidth, "BANDWIDTH" },
{ config_parse_blockio_weight, "WEIGHT" },
{ config_parse_blockio_device_weight, "DEVICEWEIGHT" },
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index 34f15afa62..b36a2e3a02 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -86,6 +86,9 @@ int config_parse_memory_limit(const char *unit, const char *filename, unsigned l
int config_parse_tasks_max(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_device_policy(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_device_allow(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_io_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_io_device_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_io_limit(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_blockio_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_blockio_device_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_blockio_bandwidth(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
diff --git a/src/core/main.c b/src/core/main.c
index ed4d42c8cc..6397aadc73 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -122,6 +122,7 @@ static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
static Set* arg_syscall_archs = NULL;
static FILE* arg_serialization = NULL;
static bool arg_default_cpu_accounting = false;
+static bool arg_default_io_accounting = false;
static bool arg_default_blockio_accounting = false;
static bool arg_default_memory_accounting = false;
static bool arg_default_tasks_accounting = true;
@@ -691,6 +692,7 @@ static int parse_config_file(void) {
{ "Manager", "DefaultLimitRTPRIO", config_parse_limit, RLIMIT_RTPRIO, arg_default_rlimit },
{ "Manager", "DefaultLimitRTTIME", config_parse_limit, RLIMIT_RTTIME, arg_default_rlimit },
{ "Manager", "DefaultCPUAccounting", config_parse_bool, 0, &arg_default_cpu_accounting },
+ { "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting },
{ "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
{ "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
{ "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting },
@@ -733,6 +735,7 @@ static void manager_set_defaults(Manager *m) {
m->default_start_limit_interval = arg_default_start_limit_interval;
m->default_start_limit_burst = arg_default_start_limit_burst;
m->default_cpu_accounting = arg_default_cpu_accounting;
+ m->default_io_accounting = arg_default_io_accounting;
m->default_blockio_accounting = arg_default_blockio_accounting;
m->default_memory_accounting = arg_default_memory_accounting;
m->default_tasks_accounting = arg_default_tasks_accounting;
diff --git a/src/core/manager.c b/src/core/manager.c
index e192cd475d..7838f56fd2 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1494,7 +1494,7 @@ void manager_clear_jobs(Manager *m) {
while ((j = hashmap_first(m->jobs)))
/* No need to recurse. We're cancelling all jobs. */
- job_finish_and_invalidate(j, JOB_CANCELED, false);
+ job_finish_and_invalidate(j, JOB_CANCELED, false, false);
}
static int manager_dispatch_run_queue(sd_event_source *source, void *userdata) {
diff --git a/src/core/manager.h b/src/core/manager.h
index 4bccca75cb..6ed15c1a41 100644
--- a/src/core/manager.h
+++ b/src/core/manager.h
@@ -255,6 +255,7 @@ struct Manager {
bool default_cpu_accounting;
bool default_memory_accounting;
+ bool default_io_accounting;
bool default_blockio_accounting;
bool default_tasks_accounting;
diff --git a/src/core/namespace.c b/src/core/namespace.c
index ef85bfec23..203d122810 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -44,6 +44,8 @@
#include "user-util.h"
#include "util.h"
+#define DEV_MOUNT_OPTIONS (MS_NOSUID|MS_STRICTATIME|MS_NOEXEC)
+
typedef enum MountMode {
/* This is ordered by priority! */
INACCESSIBLE,
@@ -153,7 +155,7 @@ static int mount_dev(BindMount *m) {
dev = strjoina(temporary_mount, "/dev");
(void) mkdir(dev, 0755);
- if (mount("tmpfs", dev, "tmpfs", MS_NOSUID|MS_STRICTATIME, "mode=755") < 0) {
+ if (mount("tmpfs", dev, "tmpfs", DEV_MOUNT_OPTIONS, "mode=755") < 0) {
r = -errno;
goto fail;
}
@@ -330,9 +332,11 @@ static int make_read_only(BindMount *m) {
if (IN_SET(m->mode, INACCESSIBLE, READONLY))
r = bind_remount_recursive(m->path, true);
- else if (IN_SET(m->mode, READWRITE, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV))
+ else if (IN_SET(m->mode, READWRITE, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV)) {
r = bind_remount_recursive(m->path, false);
- else
+ if (r == 0 && m->mode == PRIVATE_DEV) /* can be readonly but the submounts can't*/
+ r = mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL);
+ } else
r = 0;
if (m->ignore && r == -ENOENT)
diff --git a/src/core/system.conf b/src/core/system.conf
index eacd7ee282..db8b7acd78 100644
--- a/src/core/system.conf
+++ b/src/core/system.conf
@@ -38,6 +38,7 @@
#DefaultStartLimitBurst=5
#DefaultEnvironment=
#DefaultCPUAccounting=no
+#DefaultIOAccounting=no
#DefaultBlockIOAccounting=no
#DefaultMemoryAccounting=no
#DefaultTasksAccounting=yes
diff --git a/src/core/transaction.c b/src/core/transaction.c
index d5370b2a14..e06a48a2f1 100644
--- a/src/core/transaction.c
+++ b/src/core/transaction.c
@@ -597,7 +597,7 @@ static int transaction_apply(Transaction *tr, Manager *m, JobMode mode) {
/* Not invalidating recursively. Avoids triggering
* OnFailure= actions of dependent jobs. Also avoids
* invalidating our iterator. */
- job_finish_and_invalidate(j, JOB_CANCELED, false);
+ job_finish_and_invalidate(j, JOB_CANCELED, false, false);
}
}
diff --git a/src/core/unit.c b/src/core/unit.c
index dc8325515c..2fff3f2d8b 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -132,6 +132,7 @@ static void unit_init(Unit *u) {
* been initialized */
cc->cpu_accounting = u->manager->default_cpu_accounting;
+ cc->io_accounting = u->manager->default_io_accounting;
cc->blockio_accounting = u->manager->default_blockio_accounting;
cc->memory_accounting = u->manager->default_memory_accounting;
cc->tasks_accounting = u->manager->default_tasks_accounting;
@@ -1213,6 +1214,7 @@ static int unit_add_startup_units(Unit *u) {
return 0;
if (c->startup_cpu_shares == CGROUP_CPU_SHARES_INVALID &&
+ c->startup_io_weight == CGROUP_WEIGHT_INVALID &&
c->startup_blockio_weight == CGROUP_BLKIO_WEIGHT_INVALID)
return 0;
@@ -1916,12 +1918,12 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
case JOB_VERIFY_ACTIVE:
if (UNIT_IS_ACTIVE_OR_RELOADING(ns))
- job_finish_and_invalidate(u->job, JOB_DONE, true);
+ job_finish_and_invalidate(u->job, JOB_DONE, true, false);
else if (u->job->state == JOB_RUNNING && ns != UNIT_ACTIVATING) {
unexpected = true;
if (UNIT_IS_INACTIVE_OR_FAILED(ns))
- job_finish_and_invalidate(u->job, ns == UNIT_FAILED ? JOB_FAILED : JOB_DONE, true);
+ job_finish_and_invalidate(u->job, ns == UNIT_FAILED ? JOB_FAILED : JOB_DONE, true, false);
}
break;
@@ -1932,12 +1934,12 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
if (u->job->state == JOB_RUNNING) {
if (ns == UNIT_ACTIVE)
- job_finish_and_invalidate(u->job, reload_success ? JOB_DONE : JOB_FAILED, true);
+ job_finish_and_invalidate(u->job, reload_success ? JOB_DONE : JOB_FAILED, true, false);
else if (ns != UNIT_ACTIVATING && ns != UNIT_RELOADING) {
unexpected = true;
if (UNIT_IS_INACTIVE_OR_FAILED(ns))
- job_finish_and_invalidate(u->job, ns == UNIT_FAILED ? JOB_FAILED : JOB_DONE, true);
+ job_finish_and_invalidate(u->job, ns == UNIT_FAILED ? JOB_FAILED : JOB_DONE, true, false);
}
}
@@ -1948,10 +1950,10 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
case JOB_TRY_RESTART:
if (UNIT_IS_INACTIVE_OR_FAILED(ns))
- job_finish_and_invalidate(u->job, JOB_DONE, true);
+ job_finish_and_invalidate(u->job, JOB_DONE, true, false);
else if (u->job->state == JOB_RUNNING && ns != UNIT_DEACTIVATING) {
unexpected = true;
- job_finish_and_invalidate(u->job, JOB_FAILED, true);
+ job_finish_and_invalidate(u->job, JOB_FAILED, true, false);
}
break;
diff --git a/src/network/networkd-netdev-bridge.c b/src/network/networkd-netdev-bridge.c
index 3e44dd7960..4cfd00413f 100644
--- a/src/network/networkd-netdev-bridge.c
+++ b/src/network/networkd-netdev-bridge.c
@@ -96,6 +96,12 @@ static int netdev_bridge_post_create(NetDev *netdev, Link *link, sd_netlink_mess
return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_MCAST_QUERIER attribute: %m");
}
+ if (b->mcast_snooping >= 0) {
+ r = sd_netlink_message_append_u8(req, IFLA_BR_MCAST_SNOOPING, b->mcast_snooping);
+ if (r < 0)
+ return log_netdev_error_errno(netdev, r, "Could not append IFLA_BR_MCAST_SNOOPING attribute: %m");
+ }
+
r = sd_netlink_message_close_container(req);
if (r < 0)
return log_netdev_error_errno(netdev, r, "Could not append IFLA_LINKINFO attribute: %m");
@@ -121,6 +127,7 @@ static void bridge_init(NetDev *n) {
assert(b);
b->mcast_querier = -1;
+ b->mcast_snooping = -1;
}
const NetDevVTable bridge_vtable = {
diff --git a/src/network/networkd-netdev-bridge.h b/src/network/networkd-netdev-bridge.h
index b921439f02..f2ae21fc50 100644
--- a/src/network/networkd-netdev-bridge.h
+++ b/src/network/networkd-netdev-bridge.h
@@ -25,6 +25,7 @@ typedef struct Bridge {
NetDev meta;
int mcast_querier;
+ int mcast_snooping;
usec_t forward_delay;
usec_t hello_time;
diff --git a/src/network/networkd-netdev-gperf.gperf b/src/network/networkd-netdev-gperf.gperf
index 1ebd0fdf20..ba04bb0165 100644
--- a/src/network/networkd-netdev-gperf.gperf
+++ b/src/network/networkd-netdev-gperf.gperf
@@ -99,3 +99,4 @@ Bridge.HelloTimeSec, config_parse_sec, 0,
Bridge.MaxAgeSec, config_parse_sec, 0, offsetof(Bridge, max_age)
Bridge.ForwardDelaySec, config_parse_sec, 0, offsetof(Bridge, forward_delay)
Bridge.MulticastQuerier, config_parse_tristate, 0, offsetof(Bridge, mcast_querier)
+Bridge.MulticastSnooping, config_parse_tristate, 0, offsetof(Bridge, mcast_snooping)
diff --git a/src/network/networkd-netdev.c b/src/network/networkd-netdev.c
index d7d014f05d..851a36290c 100644
--- a/src/network/networkd-netdev.c
+++ b/src/network/networkd-netdev.c
@@ -656,7 +656,7 @@ static int netdev_load_one(Manager *manager, const char *filename) {
if (!netdev->filename)
return log_oom();
- if (!netdev->mac) {
+ if (!netdev->mac && netdev->kind != NETDEV_KIND_VLAN) {
r = netdev_get_mac(netdev->ifname, &netdev->mac);
if (r < 0)
return log_error_errno(r, "Failed to generate predictable MAC address for %s: %m", netdev->ifname);
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index cfced96804..8f0df84793 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -154,7 +154,7 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
r = sd_bus_message_append(m, "sv", sn, "t", l.rlim_cur);
} else if (STR_IN_SET(field,
- "CPUAccounting", "MemoryAccounting", "BlockIOAccounting", "TasksAccounting",
+ "CPUAccounting", "MemoryAccounting", "IOAccounting", "BlockIOAccounting", "TasksAccounting",
"SendSIGHUP", "SendSIGKILL", "WakeSystem", "DefaultDependencies",
"IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "RemainAfterExit",
"PrivateTmp", "PrivateDevices", "PrivateNetwork", "NoNewPrivileges",
@@ -207,6 +207,17 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
r = sd_bus_message_append(m, "v", "t", u);
+ } else if (STR_IN_SET(field, "IOWeight", "StartupIOWeight")) {
+ uint64_t u;
+
+ r = cg_weight_parse(eq, &u);
+ if (r < 0) {
+ log_error("Failed to parse %s value %s.", field, eq);
+ return -EINVAL;
+ }
+
+ r = sd_bus_message_append(m, "v", "t", u);
+
} else if (STR_IN_SET(field, "BlockIOWeight", "StartupBlockIOWeight")) {
uint64_t u;
@@ -273,7 +284,8 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
r = sd_bus_message_append(m, "v", "a(ss)", 1, path, rwm);
}
- } else if (STR_IN_SET(field, "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) {
+ } else if (STR_IN_SET(field, "IOReadBandwidthMax", "IOWriteBandwidthMax",
+ "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) {
if (isempty(eq))
r = sd_bus_message_append(m, "v", "a(st)", 0);
@@ -295,16 +307,20 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
return -EINVAL;
}
- r = parse_size(bandwidth, 1000, &bytes);
- if (r < 0) {
- log_error("Failed to parse byte value %s.", bandwidth);
- return -EINVAL;
+ if (streq(bandwidth, "max")) {
+ bytes = CGROUP_LIMIT_MAX;
+ } else {
+ r = parse_size(bandwidth, 1000, &bytes);
+ if (r < 0) {
+ log_error("Failed to parse byte value %s.", bandwidth);
+ return -EINVAL;
+ }
}
r = sd_bus_message_append(m, "v", "a(st)", 1, path, bytes);
}
- } else if (streq(field, "BlockIODeviceWeight")) {
+ } else if (STR_IN_SET(field, "IODeviceWeight", "BlockIODeviceWeight")) {
if (isempty(eq))
r = sd_bus_message_append(m, "v", "a(st)", 0);
diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c
index bc35c25c1b..0faf37d320 100644
--- a/src/systemctl/systemctl.c
+++ b/src/systemctl/systemctl.c
@@ -4428,7 +4428,7 @@ static int print_property(const char *name, sd_bus_message *m, const char *conte
return 0;
- } else if (contents[1] == SD_BUS_TYPE_STRUCT_BEGIN && streq(name, "BlockIODeviceWeight")) {
+ } else if (contents[1] == SD_BUS_TYPE_STRUCT_BEGIN && (streq(name, "IODeviceWeight") || streq(name, "BlockIODeviceWeight"))) {
const char *path;
uint64_t weight;
@@ -4447,7 +4447,8 @@ static int print_property(const char *name, sd_bus_message *m, const char *conte
return 0;
- } else if (contents[1] == SD_BUS_TYPE_STRUCT_BEGIN && (streq(name, "BlockIOReadBandwidth") || streq(name, "BlockIOWriteBandwidth"))) {
+ } else if (contents[1] == SD_BUS_TYPE_STRUCT_BEGIN && (streq(name, "IOReadBandwidthMax") || streq(name, "IOWriteBandwidthMax") ||
+ streq(name, "BlockIOReadBandwidth") || streq(name, "BlockIOWriteBandwidth"))) {
const char *path;
uint64_t bandwidth;
diff --git a/src/vconsole/vconsole-setup.c b/src/vconsole/vconsole-setup.c
index 8a1b824e65..08ae4aad27 100644
--- a/src/vconsole/vconsole-setup.c
+++ b/src/vconsole/vconsole-setup.c
@@ -195,9 +195,15 @@ static void font_copy_to_all_vcs(int fd) {
unsigned char map8[E_TABSZ];
unsigned short map16[E_TABSZ];
struct unimapdesc unimapd;
- struct unipair unipairs[USHRT_MAX];
+ _cleanup_free_ struct unipair* unipairs = NULL;
int i, r;
+ unipairs = new(struct unipair, USHRT_MAX);
+ if (unipairs == NULL) {
+ log_error("Not enough memory to copy fonts");
+ return;
+ }
+
/* get active, and 16 bit mask of used VT numbers */
r = ioctl(fd, VT_GETSTATE, &vcs);
if (r < 0) {