diff options
39 files changed, 1027 insertions, 756 deletions
diff --git a/Makefile.am b/Makefile.am index 305099ab66..f8e1fac967 100644 --- a/Makefile.am +++ b/Makefile.am @@ -3016,6 +3016,8 @@ systemd_nspawn_SOURCES = \ src/nspawn/nspawn-expose-ports.h \ src/nspawn/nspawn-cgroup.c \ src/nspawn/nspawn-cgroup.h \ + src/nspawn/nspawn-seccomp.c \ + src/nspawn/nspawn-seccomp.h \ src/nspawn/nspawn-register.c \ src/nspawn/nspawn-register.h \ src/nspawn/nspawn-setuid.c \ diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index 066f2cc19b..570619a743 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -114,6 +114,13 @@ prefixed ones. On unified hierarchy, IO resource control also applies to buffered writes.</para> </listitem> </varlistentry> + <varlistentry> + <term><option>Memory</option></term> + <listitem> + <para><varname>MemoryMax</varname> replaces <varname>MemoryLimit</varname>. <varname>MemoryLow</varname> + and <varname>MemoryHigh</varname> are effective only on unified hierarchy.</para> + </listitem> + </varlistentry> </variablelist> </para> @@ -213,6 +220,67 @@ </varlistentry> <varlistentry> + <term><varname>MemoryLow=<replaceable>bytes</replaceable></varname></term> + + <listitem> + <para>Specify the best-effort memory usage protection of the executed processes in this unit. If the memory + usages of this unit and all its ancestors are below their low boundaries, this unit's memory won't be + reclaimed as long as memory can be reclaimed from unprotected units.</para> + + <para>Takes a memory size in bytes. If the value is suffixed with K, M, G or T, the specified memory size is + parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes (with the base 1024), respectively. This controls the + <literal>memory.low</literal> control group attribute. For details about this control group attribute, see + <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>.</para> + + <para>Implies <literal>MemoryAccounting=true</literal>.</para> + + <para>This setting is supported only if the unified control group hierarchy is used.</para> + </listitem> + </varlistentry> + + <varlistentry> + <term><varname>MemoryHigh=<replaceable>bytes</replaceable></varname></term> + + <listitem> + <para>Specify the high limit on memory usage of the executed processes in this unit. Memory usage may go + above the limit if unavoidable, but the processes are heavily slowed down and memory is taken away + aggressively in such cases. This is the main mechanism to control memory usage of a unit.</para> + + <para>Takes a memory size in bytes. If the value is suffixed with K, M, G or T, the specified memory size is + parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes (with the base 1024), respectively. If assigned the + special value <literal>max</literal>, no memory limit is applied. This controls the + <literal>memory.high</literal> control group attribute. For details about this control group attribute, see + <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>.</para> + + <para>Implies <literal>MemoryAccounting=true</literal>.</para> + + <para>This setting is supported only if the unified control group hierarchy is used.</para> + </listitem> + </varlistentry> + + <varlistentry> + <term><varname>MemoryMax=<replaceable>bytes</replaceable></varname></term> + + <listitem> + <para>Specify the absolute limit on memory usage of the executed processes in this unit. If memory usage + cannot be contained under the limit, out-of-memory killer is invoked inside the unit. It is recommended to + use <varname>MemoryHigh=</varname> as the main control mechanism and use <varname>MemoryMax=</varname> as the + last line of defense.</para> + + <para>Takes a memory size in bytes. If the value is suffixed with K, M, G or T, the specified memory size is + parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes (with the base 1024), respectively. If assigned the + special value <literal>max</literal>, no memory limit is applied. This controls the + <literal>memory.max</literal> control group attribute. For details about this control group attribute, see + <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>.</para> + + <para>Implies <literal>MemoryAccounting=true</literal>.</para> + + <para>This setting is supported only if the unified control group hierarchy is used. Use + <varname>MemoryLimit=</varname> on systems using the legacy control group hierarchy.</para> + </listitem> + </varlistentry> + + <varlistentry> <term><varname>MemoryLimit=<replaceable>bytes</replaceable></varname></term> <listitem> @@ -230,6 +298,9 @@ url="https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt">memory.txt</ulink>.</para> <para>Implies <literal>MemoryAccounting=true</literal>.</para> + + <para>This setting is supported only if the legacy control group hierarchy is used. Use + <varname>MemoryMax=</varname> on systems using the unified control group hierarchy.</para> </listitem> </varlistentry> diff --git a/src/basic/missing.h b/src/basic/missing.h index 651e414395..2077ada72d 100644 --- a/src/basic/missing.h +++ b/src/basic/missing.h @@ -453,6 +453,18 @@ struct btrfs_ioctl_quota_ctl_args { #define MQUEUE_MAGIC 0x19800202 #endif +#ifndef SECURITYFS_MAGIC +#define SECURITYFS_MAGIC 0x73636673 +#endif + +#ifndef TRACEFS_MAGIC +#define TRACEFS_MAGIC 0x74726163 +#endif + +#ifndef BPF_FS_MAGIC +#define BPF_FS_MAGIC 0xcafe4a11 +#endif + #ifndef MS_MOVE #define MS_MOVE 8192 #endif diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 0fb63b1bd1..fbe69df4e9 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -46,7 +46,10 @@ void cgroup_context_init(CGroupContext *c) { c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID; c->cpu_quota_per_sec_usec = USEC_INFINITY; - c->memory_limit = (uint64_t) -1; + c->memory_high = CGROUP_LIMIT_MAX; + c->memory_max = CGROUP_LIMIT_MAX; + + c->memory_limit = CGROUP_LIMIT_MAX; c->io_weight = CGROUP_WEIGHT_INVALID; c->startup_io_weight = CGROUP_WEIGHT_INVALID; @@ -147,6 +150,9 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { "%sStartupIOWeight=%" PRIu64 "\n" "%sBlockIOWeight=%" PRIu64 "\n" "%sStartupBlockIOWeight=%" PRIu64 "\n" + "%sMemoryLow=%" PRIu64 "\n" + "%sMemoryHigh=%" PRIu64 "\n" + "%sMemoryMax=%" PRIu64 "\n" "%sMemoryLimit=%" PRIu64 "\n" "%sTasksMax=%" PRIu64 "\n" "%sDevicePolicy=%s\n" @@ -163,6 +169,9 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { prefix, c->startup_io_weight, prefix, c->blockio_weight, prefix, c->startup_blockio_weight, + prefix, c->memory_low, + prefix, c->memory_high, + prefix, c->memory_max, prefix, c->memory_limit, prefix, c->tasks_max, prefix, cgroup_device_policy_to_string(c->device_policy), @@ -496,6 +505,23 @@ static unsigned cgroup_apply_blkio_device_limit(const char *path, const char *de return n; } +static bool cgroup_context_has_unified_memory_config(CGroupContext *c) { + return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX; +} + +static void cgroup_apply_unified_memory_limit(const char *path, const char *file, uint64_t v) { + char buf[DECIMAL_STR_MAX(uint64_t) + 1] = "max"; + int r; + + if (v != CGROUP_LIMIT_MAX) + xsprintf(buf, "%" PRIu64 "\n", v); + + r = cg_set_attribute("memory", path, file, buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set %s on %s: %m", file, path); +} + void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state) { bool is_root; int r; @@ -662,26 +688,30 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M } if ((mask & CGROUP_MASK_MEMORY) && !is_root) { - if (c->memory_limit != (uint64_t) -1) { - char buf[DECIMAL_STR_MAX(uint64_t) + 1]; - - sprintf(buf, "%" PRIu64 "\n", c->memory_limit); + if (cg_unified() > 0) { + uint64_t max = c->memory_max; - if (cg_unified() <= 0) - r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf); + if (cgroup_context_has_unified_memory_config(c)) + max = c->memory_max; else - r = cg_set_attribute("memory", path, "memory.max", buf); + max = c->memory_limit; + cgroup_apply_unified_memory_limit(path, "memory.low", c->memory_low); + cgroup_apply_unified_memory_limit(path, "memory.high", c->memory_high); + cgroup_apply_unified_memory_limit(path, "memory.max", max); } else { - if (cg_unified() <= 0) - r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1"); + char buf[DECIMAL_STR_MAX(uint64_t) + 1]; + + if (c->memory_limit != CGROUP_LIMIT_MAX) + xsprintf(buf, "%" PRIu64 "\n", c->memory_limit); else - r = cg_set_attribute("memory", path, "memory.max", "max"); - } + xsprintf(buf, "%" PRIu64 "\n", c->memory_max); - if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set memory.limit_in_bytes/memory.max on %s: %m", path); + r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set memory.limit_in_bytes on %s: %m", path); + } } if ((mask & CGROUP_MASK_DEVICES) && !is_root) { @@ -778,7 +808,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) { mask |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO; if (c->memory_accounting || - c->memory_limit != (uint64_t) -1) + c->memory_limit != CGROUP_LIMIT_MAX || + cgroup_context_has_unified_memory_config(c)) mask |= CGROUP_MASK_MEMORY; if (c->device_allow || diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 2b1edbafc4..ff87adfba1 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -94,6 +94,10 @@ struct CGroupContext { LIST_HEAD(CGroupIODeviceWeight, io_device_weights); LIST_HEAD(CGroupIODeviceLimit, io_device_limits); + uint64_t memory_low; + uint64_t memory_high; + uint64_t memory_max; + /* For legacy hierarchies */ uint64_t cpu_shares; uint64_t startup_cpu_shares; diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index eef1c47c14..27050b4507 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -228,6 +228,9 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_PROPERTY("BlockIOReadBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0), SD_BUS_PROPERTY("BlockIOWriteBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0), SD_BUS_PROPERTY("MemoryAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, memory_accounting), 0), + SD_BUS_PROPERTY("MemoryLow", "t", NULL, offsetof(CGroupContext, memory_low), 0), + SD_BUS_PROPERTY("MemoryHigh", "t", NULL, offsetof(CGroupContext, memory_high), 0), + SD_BUS_PROPERTY("MemoryMax", "t", NULL, offsetof(CGroupContext, memory_max), 0), SD_BUS_PROPERTY("MemoryLimit", "t", NULL, offsetof(CGroupContext, memory_limit), 0), SD_BUS_PROPERTY("DevicePolicy", "s", property_get_cgroup_device_policy, offsetof(CGroupContext, device_policy), 0), SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0), @@ -260,7 +263,7 @@ static int bus_cgroup_set_transient_property( if (mode != UNIT_CHECK) { c->delegate = b; - unit_write_drop_in_private(u, mode, name, b ? "Delegate=yes" : "Delegate=no"); + unit_write_drop_in_private(u, mode, name, b ? "Delegate=yes\n" : "Delegate=no\n"); } return 1; @@ -295,7 +298,7 @@ int bus_cgroup_set_property( if (mode != UNIT_CHECK) { c->cpu_accounting = b; unit_invalidate_cgroup(u, CGROUP_MASK_CPUACCT|CGROUP_MASK_CPU); - unit_write_drop_in_private(u, mode, name, b ? "CPUAccounting=yes" : "CPUAccounting=no"); + unit_write_drop_in_private(u, mode, name, b ? "CPUAccounting=yes\n" : "CPUAccounting=no\n"); } return 1; @@ -315,9 +318,9 @@ int bus_cgroup_set_property( unit_invalidate_cgroup(u, CGROUP_MASK_CPU); if (shares == CGROUP_CPU_SHARES_INVALID) - unit_write_drop_in_private(u, mode, name, "CPUShares="); + unit_write_drop_in_private(u, mode, name, "CPUShares=\n"); else - unit_write_drop_in_private_format(u, mode, name, "CPUShares=%" PRIu64, shares); + unit_write_drop_in_private_format(u, mode, name, "CPUShares=%" PRIu64 "\n", shares); } return 1; @@ -337,9 +340,9 @@ int bus_cgroup_set_property( unit_invalidate_cgroup(u, CGROUP_MASK_CPU); if (shares == CGROUP_CPU_SHARES_INVALID) - unit_write_drop_in_private(u, mode, name, "StartupCPUShares="); + unit_write_drop_in_private(u, mode, name, "StartupCPUShares=\n"); else - unit_write_drop_in_private_format(u, mode, name, "StartupCPUShares=%" PRIu64, shares); + unit_write_drop_in_private_format(u, mode, name, "StartupCPUShares=%" PRIu64 "\n", shares); } return 1; @@ -357,7 +360,7 @@ int bus_cgroup_set_property( if (mode != UNIT_CHECK) { c->cpu_quota_per_sec_usec = u64; unit_invalidate_cgroup(u, CGROUP_MASK_CPU); - unit_write_drop_in_private_format(u, mode, "CPUQuota", "CPUQuota=%0.f%%", (double) (c->cpu_quota_per_sec_usec / 10000)); + unit_write_drop_in_private_format(u, mode, "CPUQuota", "CPUQuota=%0.f%%\n", (double) (c->cpu_quota_per_sec_usec / 10000)); } return 1; @@ -372,7 +375,7 @@ int bus_cgroup_set_property( if (mode != UNIT_CHECK) { c->io_accounting = b; unit_invalidate_cgroup(u, CGROUP_MASK_IO); - unit_write_drop_in_private(u, mode, name, b ? "IOAccounting=yes" : "IOAccounting=no"); + unit_write_drop_in_private(u, mode, name, b ? "IOAccounting=yes\n" : "IOAccounting=no\n"); } return 1; @@ -392,9 +395,9 @@ int bus_cgroup_set_property( unit_invalidate_cgroup(u, CGROUP_MASK_IO); if (weight == CGROUP_WEIGHT_INVALID) - unit_write_drop_in_private(u, mode, name, "IOWeight="); + unit_write_drop_in_private(u, mode, name, "IOWeight=\n"); else - unit_write_drop_in_private_format(u, mode, name, "IOWeight=%" PRIu64, weight); + unit_write_drop_in_private_format(u, mode, name, "IOWeight=%" PRIu64 "\n", weight); } return 1; @@ -414,9 +417,9 @@ int bus_cgroup_set_property( unit_invalidate_cgroup(u, CGROUP_MASK_IO); if (weight == CGROUP_WEIGHT_INVALID) - unit_write_drop_in_private(u, mode, name, "StartupIOWeight="); + unit_write_drop_in_private(u, mode, name, "StartupIOWeight=\n"); else - unit_write_drop_in_private_format(u, mode, name, "StartupIOWeight=%" PRIu64, weight); + unit_write_drop_in_private_format(u, mode, name, "StartupIOWeight=%" PRIu64 "\n", weight); } return 1; @@ -589,7 +592,7 @@ int bus_cgroup_set_property( if (mode != UNIT_CHECK) { c->blockio_accounting = b; unit_invalidate_cgroup(u, CGROUP_MASK_BLKIO); - unit_write_drop_in_private(u, mode, name, b ? "BlockIOAccounting=yes" : "BlockIOAccounting=no"); + unit_write_drop_in_private(u, mode, name, b ? "BlockIOAccounting=yes\n" : "BlockIOAccounting=no\n"); } return 1; @@ -609,9 +612,9 @@ int bus_cgroup_set_property( unit_invalidate_cgroup(u, CGROUP_MASK_BLKIO); if (weight == CGROUP_BLKIO_WEIGHT_INVALID) - unit_write_drop_in_private(u, mode, name, "BlockIOWeight="); + unit_write_drop_in_private(u, mode, name, "BlockIOWeight=\n"); else - unit_write_drop_in_private_format(u, mode, name, "BlockIOWeight=%" PRIu64, weight); + unit_write_drop_in_private_format(u, mode, name, "BlockIOWeight=%" PRIu64 "\n", weight); } return 1; @@ -623,7 +626,7 @@ int bus_cgroup_set_property( if (r < 0) return r; - if (CGROUP_BLKIO_WEIGHT_IS_OK(weight)) + if (!CGROUP_BLKIO_WEIGHT_IS_OK(weight)) return sd_bus_error_set_errnof(error, EINVAL, "StartupBlockIOWeight value out of range"); if (mode != UNIT_CHECK) { @@ -631,9 +634,9 @@ int bus_cgroup_set_property( unit_invalidate_cgroup(u, CGROUP_MASK_BLKIO); if (weight == CGROUP_BLKIO_WEIGHT_INVALID) - unit_write_drop_in_private(u, mode, name, "StartupBlockIOWeight="); + unit_write_drop_in_private(u, mode, name, "StartupBlockIOWeight=\n"); else - unit_write_drop_in_private_format(u, mode, name, "StartupBlockIOWeight=%" PRIu64, weight); + unit_write_drop_in_private_format(u, mode, name, "StartupBlockIOWeight=%" PRIu64 "\n", weight); } return 1; @@ -821,7 +824,32 @@ int bus_cgroup_set_property( if (mode != UNIT_CHECK) { c->memory_accounting = b; unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY); - unit_write_drop_in_private(u, mode, name, b ? "MemoryAccounting=yes" : "MemoryAccounting=no"); + unit_write_drop_in_private(u, mode, name, b ? "MemoryAccounting=yes\n" : "MemoryAccounting=no\n"); + } + + return 1; + + } else if (STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax")) { + uint64_t v; + + r = sd_bus_message_read(message, "t", &v); + if (r < 0) + return r; + + if (mode != UNIT_CHECK) { + if (streq(name, "MemoryLow")) + c->memory_low = v; + else if (streq(name, "MemoryHigh")) + c->memory_high = v; + else + c->memory_max = v; + + unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY); + + if (v == CGROUP_LIMIT_MAX) + unit_write_drop_in_private_format(u, mode, name, "%s=max\n", name); + else + unit_write_drop_in_private_format(u, mode, name, "%s=%" PRIu64 "\n", name, v); } return 1; @@ -838,9 +866,9 @@ int bus_cgroup_set_property( unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY); if (limit == (uint64_t) -1) - unit_write_drop_in_private(u, mode, name, "MemoryLimit=infinity"); + unit_write_drop_in_private(u, mode, name, "MemoryLimit=infinity\n"); else - unit_write_drop_in_private_format(u, mode, name, "MemoryLimit=%" PRIu64, limit); + unit_write_drop_in_private_format(u, mode, name, "MemoryLimit=%" PRIu64 "\n", limit); } return 1; @@ -858,13 +886,9 @@ int bus_cgroup_set_property( return -EINVAL; if (mode != UNIT_CHECK) { - char *buf; - c->device_policy = p; unit_invalidate_cgroup(u, CGROUP_MASK_DEVICES); - - buf = strjoina("DevicePolicy=", policy); - unit_write_drop_in_private(u, mode, name, buf); + unit_write_drop_in_private_format(u, mode, name, "DevicePolicy=%s\n", policy); } return 1; @@ -968,7 +992,7 @@ int bus_cgroup_set_property( if (mode != UNIT_CHECK) { c->tasks_accounting = b; unit_invalidate_cgroup(u, CGROUP_MASK_PIDS); - unit_write_drop_in_private(u, mode, name, b ? "TasksAccounting=yes" : "TasksAccounting=no"); + unit_write_drop_in_private(u, mode, name, b ? "TasksAccounting=yes\n" : "TasksAccounting=no\n"); } return 1; @@ -985,9 +1009,9 @@ int bus_cgroup_set_property( unit_invalidate_cgroup(u, CGROUP_MASK_PIDS); if (limit == (uint64_t) -1) - unit_write_drop_in_private(u, mode, name, "TasksMax=infinity"); + unit_write_drop_in_private(u, mode, name, "TasksMax=infinity\n"); else - unit_write_drop_in_private_format(u, mode, name, "TasksMax=%" PRIu64, limit); + unit_write_drop_in_private_format(u, mode, name, "TasksMax=%" PRIu64 "\n", limit); } return 1; diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 06943c6365..e21956def1 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -987,7 +987,7 @@ int bus_exec_context_set_transient_property( } c->working_directory_missing_ok = missing_ok; - unit_write_drop_in_private_format(u, mode, name, "WorkingDirectory=%s%s", missing_ok ? "-" : "", s); + unit_write_drop_in_private_format(u, mode, name, "WorkingDirectory=%s%s\n", missing_ok ? "-" : "", s); } return 1; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 8193418980..00bdc238ce 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -117,6 +117,9 @@ $1.CPUShares, config_parse_cpu_shares, 0, $1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares) $1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context) $1.MemoryAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.memory_accounting) +$1.MemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context) +$1.MemoryHigh, config_parse_memory_limit, 0, offsetof($1, cgroup_context) +$1.MemoryMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.MemoryLimit, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.DeviceAllow, config_parse_device_allow, 0, offsetof($1, cgroup_context) $1.DevicePolicy, config_parse_device_policy, 0, offsetof($1, cgroup_context.device_policy) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 86b4fb071b..09d3f65c77 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -2793,21 +2793,26 @@ int config_parse_memory_limit( void *userdata) { CGroupContext *c = data; - uint64_t bytes; + uint64_t bytes = CGROUP_LIMIT_MAX; int r; - if (isempty(rvalue) || streq(rvalue, "infinity")) { - c->memory_limit = (uint64_t) -1; - return 0; + if (!isempty(rvalue) && !streq(rvalue, "infinity") && !streq(rvalue, "max")) { + r = parse_size(rvalue, 1024, &bytes); + if (r < 0 || bytes < 1) { + log_syntax(unit, LOG_ERR, filename, line, r, "Memory limit '%s' invalid. Ignoring.", rvalue); + return 0; + } } - r = parse_size(rvalue, 1024, &bytes); - if (r < 0 || bytes < 1) { - log_syntax(unit, LOG_ERR, filename, line, r, "Memory limit '%s' invalid. Ignoring.", rvalue); - return 0; - } + if (streq(lvalue, "MemoryLow")) + c->memory_low = bytes; + else if (streq(lvalue, "MemoryHigh")) + c->memory_high = bytes; + else if (streq(lvalue, "MemoryMax")) + c->memory_max = bytes; + else + c->memory_limit = bytes; - c->memory_limit = bytes; return 0; } diff --git a/src/core/manager.c b/src/core/manager.c index 7838f56fd2..14d97a87d0 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -1610,9 +1610,9 @@ static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const } static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) { + _cleanup_fdset_free_ FDSet *fds = NULL; Manager *m = userdata; - char buf[NOTIFY_BUFFER_MAX+1]; struct iovec iovec = { .iov_base = buf, diff --git a/src/libsystemd-network/dhcp-internal.h b/src/libsystemd-network/dhcp-internal.h index 4662b0d847..99f690897d 100644 --- a/src/libsystemd-network/dhcp-internal.h +++ b/src/libsystemd-network/dhcp-internal.h @@ -65,4 +65,5 @@ int dhcp_packet_verify_headers(DHCPPacket *packet, size_t len, bool checksum); #define DHCP_CLIENT_DONT_DESTROY(client) \ _cleanup_(sd_dhcp_client_unrefp) _unused_ sd_dhcp_client *_dont_destroy_##client = sd_dhcp_client_ref(client) -#define log_dhcp_client(client, fmt, ...) log_internal(LOG_DEBUG, 0, __FILE__, __LINE__, __func__, "DHCP CLIENT (0x%x): " fmt, client->xid, ##__VA_ARGS__) +#define log_dhcp_client_errno(client, error, fmt, ...) log_internal(LOG_DEBUG, error, __FILE__, __LINE__, __func__, "DHCP CLIENT (0x%x): " fmt, client->xid, ##__VA_ARGS__) +#define log_dhcp_client(client, fmt, ...) log_dhcp_client_errno(client, 0, fmt, ##__VA_ARGS__) diff --git a/src/libsystemd-network/dhcp-protocol.h b/src/libsystemd-network/dhcp-protocol.h index 3e32484c1d..5cf7abbff9 100644 --- a/src/libsystemd-network/dhcp-protocol.h +++ b/src/libsystemd-network/dhcp-protocol.h @@ -59,7 +59,7 @@ typedef struct DHCPPacket DHCPPacket; #define DHCP_IP_UDP_SIZE (int32_t)(sizeof(struct udphdr) + DHCP_IP_SIZE) #define DHCP_MESSAGE_SIZE (int32_t)(sizeof(DHCPMessage)) #define DHCP_DEFAULT_MIN_SIZE 576 /* the minimum internet hosts must be able to receive */ -#define DHCP_MIN_OPTIONS_SIZE DHCP_DEFAULT_MIN_SIZE - DHCP_IP_UDP_SIZE - DHCP_MESSAGE_SIZE +#define DHCP_MIN_OPTIONS_SIZE (DHCP_DEFAULT_MIN_SIZE - DHCP_IP_UDP_SIZE - DHCP_MESSAGE_SIZE) #define DHCP_MAGIC_COOKIE (uint32_t)(0x63825363) enum { diff --git a/src/libsystemd-network/sd-dhcp-client.c b/src/libsystemd-network/sd-dhcp-client.c index ad79c6cc2c..179e5950bd 100644 --- a/src/libsystemd-network/sd-dhcp-client.c +++ b/src/libsystemd-network/sd-dhcp-client.c @@ -53,7 +53,7 @@ struct sd_dhcp_client { sd_event *event; int event_priority; sd_event_source *timeout_resend; - int index; + int ifindex; int fd; union sockaddr_union link; sd_event_source *receive_message; @@ -101,7 +101,7 @@ struct sd_dhcp_client { sd_event_source *timeout_t1; sd_event_source *timeout_t2; sd_event_source *timeout_expire; - sd_dhcp_client_callback_t cb; + sd_dhcp_client_callback_t callback; void *userdata; sd_dhcp_lease *lease; usec_t start_delay; @@ -131,9 +131,10 @@ int sd_dhcp_client_set_callback( sd_dhcp_client *client, sd_dhcp_client_callback_t cb, void *userdata) { + assert_return(client, -EINVAL); - client->cb = cb; + client->callback = cb; client->userdata = userdata; return 0; @@ -151,10 +152,10 @@ int sd_dhcp_client_set_request_option(sd_dhcp_client *client, uint8_t option) { size_t i; assert_return(client, -EINVAL); - assert_return (IN_SET(client->state, DHCP_STATE_INIT, - DHCP_STATE_STOPPED), -EBUSY); + assert_return(IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_STOPPED), -EBUSY); switch(option) { + case SD_DHCP_OPTION_PAD: case SD_DHCP_OPTION_OVERLOAD: case SD_DHCP_OPTION_MESSAGE_TYPE: @@ -182,9 +183,9 @@ int sd_dhcp_client_set_request_option(sd_dhcp_client *client, uint8_t option) { int sd_dhcp_client_set_request_address( sd_dhcp_client *client, const struct in_addr *last_addr) { + assert_return(client, -EINVAL); - assert_return (IN_SET(client->state, DHCP_STATE_INIT, - DHCP_STATE_STOPPED), -EBUSY); + assert_return(IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_STOPPED), -EBUSY); if (last_addr) client->last_addr = last_addr->s_addr; @@ -194,14 +195,13 @@ int sd_dhcp_client_set_request_address( return 0; } -int sd_dhcp_client_set_index(sd_dhcp_client *client, int interface_index) { - assert_return(client, -EINVAL); - assert_return (IN_SET(client->state, DHCP_STATE_INIT, - DHCP_STATE_STOPPED), -EBUSY); - assert_return(interface_index > 0, -EINVAL); +int sd_dhcp_client_set_ifindex(sd_dhcp_client *client, int ifindex) { - client->index = interface_index; + assert_return(client, -EINVAL); + assert_return(IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_STOPPED), -EBUSY); + assert_return(ifindex > 0, -EINVAL); + client->ifindex = ifindex; return 0; } @@ -231,8 +231,7 @@ int sd_dhcp_client_set_mac( return 0; if (!IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_STOPPED)) { - log_dhcp_client(client, "Changing MAC address on running DHCP " - "client, restarting"); + log_dhcp_client(client, "Changing MAC address on running DHCP client, restarting"); need_restart = true; client_stop(client, SD_DHCP_CLIENT_EVENT_STOP); } @@ -284,14 +283,17 @@ int sd_dhcp_client_set_client_id( assert_return(data_len > 0 && data_len <= MAX_CLIENT_ID_LEN, -EINVAL); switch (type) { + case ARPHRD_ETHER: if (data_len != ETH_ALEN) return -EINVAL; break; + case ARPHRD_INFINIBAND: if (data_len != INFINIBAND_ALEN) return -EINVAL; break; + default: break; } @@ -348,7 +350,7 @@ int sd_dhcp_client_set_iaid_duid( /* If IAID is not configured, generate it. */ if (iaid == 0) { - r = dhcp_identifier_set_iaid(client->index, client->mac_addr, + r = dhcp_identifier_set_iaid(client->ifindex, client->mac_addr, client->mac_addr_len, &client->client_id.ns.iaid); if (r < 0) @@ -435,28 +437,29 @@ int sd_dhcp_client_set_mtu(sd_dhcp_client *client, uint32_t mtu) { int sd_dhcp_client_get_lease(sd_dhcp_client *client, sd_dhcp_lease **ret) { assert_return(client, -EINVAL); - assert_return(ret, -EINVAL); if (client->state != DHCP_STATE_BOUND && client->state != DHCP_STATE_RENEWING && client->state != DHCP_STATE_REBINDING) return -EADDRNOTAVAIL; - *ret = client->lease; + if (ret) + *ret = client->lease; return 0; } static void client_notify(sd_dhcp_client *client, int event) { - if (client->cb) - client->cb(client, event, client->userdata); + assert(client); + + if (client->callback) + client->callback(client, event, client->userdata); } static int client_initialize(sd_dhcp_client *client) { assert_return(client, -EINVAL); - client->receive_message = - sd_event_source_unref(client->receive_message); + client->receive_message = sd_event_source_unref(client->receive_message); client->fd = asynchronous_close(client->fd); @@ -565,7 +568,7 @@ static int client_message_init( client->client_id.type = 255; - r = dhcp_identifier_set_iaid(client->index, client->mac_addr, client->mac_addr_len, &client->client_id.ns.iaid); + r = dhcp_identifier_set_iaid(client->ifindex, client->mac_addr, client->mac_addr_len, &client->client_id.ns.iaid); if (r < 0) return r; @@ -751,8 +754,9 @@ static int client_send_request(sd_dhcp_client *client) { size_t optoffset, optlen; int r; - r = client_message_init(client, &request, DHCP_REQUEST, - &optlen, &optoffset); + assert(client); + + r = client_message_init(client, &request, DHCP_REQUEST, &optlen, &optoffset); if (r < 0) return r; @@ -849,18 +853,23 @@ static int client_send_request(sd_dhcp_client *client) { return r; switch (client->state) { + case DHCP_STATE_REQUESTING: log_dhcp_client(client, "REQUEST (requesting)"); break; + case DHCP_STATE_INIT_REBOOT: log_dhcp_client(client, "REQUEST (init-reboot)"); break; + case DHCP_STATE_RENEWING: log_dhcp_client(client, "REQUEST (renewing)"); break; + case DHCP_STATE_REBINDING: log_dhcp_client(client, "REQUEST (rebinding)"); break; + default: log_dhcp_client(client, "REQUEST (invalid)"); break; @@ -892,6 +901,7 @@ static int client_timeout_resend( goto error; switch (client->state) { + case DHCP_STATE_RENEWING: time_left = (client->lease->t2 - client->lease->t1) / 2; @@ -1101,15 +1111,14 @@ static int client_start_delayed(sd_dhcp_client *client) { assert_return(client, -EINVAL); assert_return(client->event, -EINVAL); - assert_return(client->index > 0, -EINVAL); + assert_return(client->ifindex > 0, -EINVAL); assert_return(client->fd < 0, -EBUSY); assert_return(client->xid == 0, -EINVAL); - assert_return(client->state == DHCP_STATE_INIT || - client->state == DHCP_STATE_INIT_REBOOT, -EBUSY); + assert_return(IN_SET(client->state, DHCP_STATE_INIT, DHCP_STATE_INIT_REBOOT), -EBUSY); client->xid = random_u32(); - r = dhcp_network_bind_raw_socket(client->index, &client->link, + r = dhcp_network_bind_raw_socket(client->ifindex, &client->link, client->xid, client->mac_addr, client->mac_addr_len, client->arp_type); if (r < 0) { @@ -1151,13 +1160,15 @@ static int client_timeout_t2(sd_event_source *s, uint64_t usec, void *userdata) DHCP_CLIENT_DONT_DESTROY(client); int r; + assert(client); + client->receive_message = sd_event_source_unref(client->receive_message); client->fd = asynchronous_close(client->fd); client->state = DHCP_STATE_REBINDING; client->attempt = 1; - r = dhcp_network_bind_raw_socket(client->index, &client->link, + r = dhcp_network_bind_raw_socket(client->ifindex, &client->link, client->xid, client->mac_addr, client->mac_addr_len, client->arp_type); if (r < 0) { @@ -1624,7 +1635,7 @@ static int client_receive_message_udp( sd_dhcp_client *client = userdata; _cleanup_free_ DHCPMessage *message = NULL; - const struct ether_addr zero_mac = { { 0, 0, 0, 0, 0, 0 } }; + const struct ether_addr zero_mac = {}; const struct ether_addr *expected_chaddr = NULL; uint8_t expected_hlen = 0; ssize_t len, buflen; @@ -1645,9 +1656,9 @@ static int client_receive_message_udp( if (errno == EAGAIN || errno == EINTR) return 0; - log_dhcp_client(client, "Could not receive message from UDP socket: %m"); - return -errno; - } else if ((size_t)len < sizeof(DHCPMessage)) { + return log_dhcp_client_errno(client, errno, "Could not receive message from UDP socket: %m"); + } + if ((size_t) len < sizeof(DHCPMessage)) { log_dhcp_client(client, "Too small to be a DHCP message: ignoring"); return 0; } @@ -1778,7 +1789,7 @@ int sd_dhcp_client_start(sd_dhcp_client *client) { r = client_start(client); if (r >= 0) - log_dhcp_client(client, "STARTED on ifindex %i", client->index); + log_dhcp_client(client, "STARTED on ifindex %i", client->ifindex); return r; } @@ -1822,8 +1833,7 @@ int sd_dhcp_client_detach_event(sd_dhcp_client *client) { } sd_event *sd_dhcp_client_get_event(sd_dhcp_client *client) { - if (!client) - return NULL; + assert_return(client, NULL); return client->event; } @@ -1879,13 +1889,12 @@ int sd_dhcp_client_new(sd_dhcp_client **ret) { client->n_ref = 1; client->state = DHCP_STATE_INIT; - client->index = -1; + client->ifindex = -1; client->fd = -1; client->attempt = 1; client->mtu = DHCP_DEFAULT_MIN_SIZE; client->req_opts_size = ELEMENTSOF(default_req_opts); - client->req_opts = memdup(default_req_opts, client->req_opts_size); if (!client->req_opts) return -ENOMEM; diff --git a/src/libsystemd-network/sd-dhcp6-client.c b/src/libsystemd-network/sd-dhcp6-client.c index 05972e01c9..463fde401c 100644 --- a/src/libsystemd-network/sd-dhcp6-client.c +++ b/src/libsystemd-network/sd-dhcp6-client.c @@ -45,7 +45,7 @@ struct sd_dhcp6_client { enum DHCP6State state; sd_event *event; int event_priority; - int index; + int ifindex; struct in6_addr local_address; uint8_t mac_addr[MAX_MAC_ADDR_LEN]; size_t mac_addr_len; @@ -64,7 +64,7 @@ struct sd_dhcp6_client { uint8_t retransmit_count; sd_event_source *timeout_resend; sd_event_source *timeout_resend_expire; - sd_dhcp6_client_callback_t cb; + sd_dhcp6_client_callback_t callback; void *userdata; struct duid duid; size_t duid_len; @@ -115,22 +115,22 @@ int sd_dhcp6_client_set_callback( sd_dhcp6_client *client, sd_dhcp6_client_callback_t cb, void *userdata) { + assert_return(client, -EINVAL); - client->cb = cb; + client->callback = cb; client->userdata = userdata; return 0; } -int sd_dhcp6_client_set_index(sd_dhcp6_client *client, int interface_index) { - assert_return(client, -EINVAL); - assert_return(interface_index >= -1, -EINVAL); +int sd_dhcp6_client_set_ifindex(sd_dhcp6_client *client, int ifindex) { + assert_return(client, -EINVAL); + assert_return(ifindex >= -1, -EINVAL); assert_return(IN_SET(client->state, DHCP6_STATE_STOPPED), -EBUSY); - client->index = interface_index; - + client->ifindex = ifindex; return 0; } @@ -256,6 +256,7 @@ int sd_dhcp6_client_set_request_option(sd_dhcp6_client *client, uint16_t option) assert_return(client->state == DHCP6_STATE_STOPPED, -EBUSY); switch(option) { + case SD_DHCP6_OPTION_DNS_SERVERS: case SD_DHCP6_OPTION_DOMAIN_LIST: case SD_DHCP6_OPTION_SNTP_SERVERS: @@ -292,20 +293,25 @@ int sd_dhcp6_client_get_lease(sd_dhcp6_client *client, sd_dhcp6_lease **ret) { } static void client_notify(sd_dhcp6_client *client, int event) { - if (client->cb) - client->cb(client, event, client->userdata); + assert(client); + + if (client->callback) + client->callback(client, event, client->userdata); } static void client_set_lease(sd_dhcp6_client *client, sd_dhcp6_lease *lease) { + assert(client); + if (client->lease) { dhcp6_lease_clear_timers(&client->lease->ia); sd_dhcp6_lease_unref(client->lease); } + client->lease = lease; } static int client_reset(sd_dhcp6_client *client) { - assert_return(client, -EINVAL); + assert(client); client_set_lease(client, NULL); @@ -353,6 +359,8 @@ static int client_send_message(sd_dhcp6_client *client, usec_t time_now) { usec_t elapsed_usec; be16_t elapsed_time; + assert(client); + len = sizeof(DHCP6Message) + optlen; message = malloc0(len); @@ -454,9 +462,9 @@ static int client_send_message(sd_dhcp6_client *client, usec_t time_now) { static int client_timeout_t2(sd_event_source *s, uint64_t usec, void *userdata) { sd_dhcp6_client *client = userdata; - assert_return(s, -EINVAL); - assert_return(client, -EINVAL); - assert_return(client->lease, -EINVAL); + assert(s); + assert(client); + assert(client->lease); client->lease->ia.timeout_t2 = sd_event_source_unref(client->lease->ia.timeout_t2); @@ -471,9 +479,9 @@ static int client_timeout_t2(sd_event_source *s, uint64_t usec, void *userdata) static int client_timeout_t1(sd_event_source *s, uint64_t usec, void *userdata) { sd_dhcp6_client *client = userdata; - assert_return(s, -EINVAL); - assert_return(client, -EINVAL); - assert_return(client->lease, -EINVAL); + assert(s); + assert(client); + assert(client->lease); client->lease->ia.timeout_t1 = sd_event_source_unref(client->lease->ia.timeout_t1); @@ -671,7 +679,7 @@ static int client_ensure_iaid(sd_dhcp6_client *client) { if (client->ia_na.id) return 0; - r = dhcp_identifier_set_iaid(client->index, client->mac_addr, client->mac_addr_len, &client->ia_na.id); + r = dhcp_identifier_set_iaid(client->ifindex, client->mac_addr, client->mac_addr_len, &client->ia_na.id); if (r < 0) return r; @@ -690,6 +698,11 @@ static int client_parse_message( bool clientid = false; be32_t iaid_lease; + assert(client); + assert(message); + assert(len >= sizeof(DHCP6Message)); + assert(lease); + option = (uint8_t *)message + sizeof(DHCP6Message); len -= sizeof(DHCP6Message); @@ -834,9 +847,12 @@ static int client_parse_message( } static int client_receive_reply(sd_dhcp6_client *client, DHCP6Message *reply, size_t len) { - int r; _cleanup_(sd_dhcp6_lease_unrefp) sd_dhcp6_lease *lease = NULL; bool rapid_commit; + int r; + + assert(client); + assert(reply); if (reply->type != DHCP6_REPLY) return 0; @@ -865,9 +881,9 @@ static int client_receive_reply(sd_dhcp6_client *client, DHCP6Message *reply, si } static int client_receive_advertise(sd_dhcp6_client *client, DHCP6Message *advertise, size_t len) { - int r; _cleanup_(sd_dhcp6_lease_unrefp) sd_dhcp6_lease *lease = NULL; uint8_t pref_advertise = 0, pref_lease = 0; + int r; if (advertise->type != DHCP6_ADVERTISE) return 0; @@ -898,7 +914,12 @@ static int client_receive_advertise(sd_dhcp6_client *client, DHCP6Message *adver return r; } -static int client_receive_message(sd_event_source *s, int fd, uint32_t revents, void *userdata) { +static int client_receive_message( + sd_event_source *s, + int fd, uint32_t + revents, + void *userdata) { + sd_dhcp6_client *client = userdata; DHCP6_CLIENT_DONT_DESTROY(client); _cleanup_free_ DHCP6Message *message = NULL; @@ -924,8 +945,11 @@ static int client_receive_message(sd_event_source *s, int fd, uint32_t revents, return log_dhcp6_client_errno(client, errno, "Could not receive message from UDP socket: %m"); - } else if ((size_t)len < sizeof(DHCP6Message)) + } + if ((size_t) len < sizeof(DHCP6Message)) { + log_dhcp6_client(client, "Too small to be DHCP6 message: ignoring"); return 0; + } switch(message->type) { case DHCP6_SOLICIT: @@ -1019,7 +1043,7 @@ static int client_start(sd_dhcp6_client *client, enum DHCP6State state) { assert_return(client, -EINVAL); assert_return(client->event, -EINVAL); - assert_return(client->index > 0, -EINVAL); + assert_return(client->ifindex > 0, -EINVAL); assert_return(client->state != state, -EINVAL); client->timeout_resend_expire = @@ -1152,12 +1176,12 @@ int sd_dhcp6_client_is_running(sd_dhcp6_client *client) { } int sd_dhcp6_client_start(sd_dhcp6_client *client) { - int r = 0; enum DHCP6State state = DHCP6_STATE_SOLICITATION; + int r = 0; assert_return(client, -EINVAL); assert_return(client->event, -EINVAL); - assert_return(client->index > 0, -EINVAL); + assert_return(client->ifindex > 0, -EINVAL); assert_return(in_addr_is_link_local(AF_INET6, (const union in_addr_union *) &client->local_address) > 0, -EINVAL); if (!IN_SET(client->state, DHCP6_STATE_STOPPED)) @@ -1175,7 +1199,7 @@ int sd_dhcp6_client_start(sd_dhcp6_client *client) { if (r < 0) return r; - r = dhcp6_network_bind_udp_socket(client->index, &client->local_address); + r = dhcp6_network_bind_udp_socket(client->ifindex, &client->local_address); if (r < 0) { _cleanup_free_ char *p = NULL; @@ -1244,8 +1268,7 @@ int sd_dhcp6_client_detach_event(sd_dhcp6_client *client) { } sd_event *sd_dhcp6_client_get_event(sd_dhcp6_client *client) { - if (!client) - return NULL; + assert_return(client, NULL); return client->event; } @@ -1293,15 +1316,11 @@ int sd_dhcp6_client_new(sd_dhcp6_client **ret) { return -ENOMEM; client->n_ref = 1; - client->ia_na.type = SD_DHCP6_OPTION_IA_NA; - - client->index = -1; - + client->ifindex = -1; client->fd = -1; client->req_opts_len = ELEMENTSOF(default_req_opts); - client->req_opts = new0(be16_t, client->req_opts_len); if (!client->req_opts) return -ENOMEM; diff --git a/src/libsystemd-network/sd-ipv4acd.c b/src/libsystemd-network/sd-ipv4acd.c index c1f43c824b..662885840f 100644 --- a/src/libsystemd-network/sd-ipv4acd.c +++ b/src/libsystemd-network/sd-ipv4acd.c @@ -33,41 +33,25 @@ #include "in-addr-util.h" #include "list.h" #include "random-util.h" -#include "refcnt.h" #include "siphash24.h" +#include "string-util.h" #include "util.h" /* Constants from the RFC */ -#define PROBE_WAIT 1 -#define PROBE_NUM 3 -#define PROBE_MIN 1 -#define PROBE_MAX 2 -#define ANNOUNCE_WAIT 2 -#define ANNOUNCE_NUM 2 -#define ANNOUNCE_INTERVAL 2 -#define MAX_CONFLICTS 10 -#define RATE_LIMIT_INTERVAL 60 -#define DEFEND_INTERVAL 10 - -#define IPV4ACD_NETWORK 0xA9FE0000L -#define IPV4ACD_NETMASK 0xFFFF0000L - -#define log_ipv4acd_full(ll, level, error, fmt, ...) log_internal(level, error, __FILE__, __LINE__, __func__, "ACD: " fmt, ##__VA_ARGS__) - -#define log_ipv4acd_debug(ll, ...) log_ipv4acd_full(ll, LOG_DEBUG, 0, ##__VA_ARGS__) -#define log_ipv4acd_info(ll, ...) log_ipv4acd_full(ll, LOG_INFO, 0, ##__VA_ARGS__) -#define log_ipv4acd_notice(ll, ...) log_ipv4acd_full(ll, LOG_NOTICE, 0, ##__VA_ARGS__) -#define log_ipv4acd_warning(ll, ...) log_ipv4acd_full(ll, LOG_WARNING, 0, ##__VA_ARGS__) -#define log_ipv4acd_error(ll, ...) log_ipv4acd_full(ll, LOG_ERR, 0, ##__VA_ARGS__) - -#define log_ipv4acd_debug_errno(ll, error, ...) log_ipv4acd_full(ll, LOG_DEBUG, error, ##__VA_ARGS__) -#define log_ipv4acd_info_errno(ll, error, ...) log_ipv4acd_full(ll, LOG_INFO, error, ##__VA_ARGS__) -#define log_ipv4acd_notice_errno(ll, error, ...) log_ipv4acd_full(ll, LOG_NOTICE, error, ##__VA_ARGS__) -#define log_ipv4acd_warning_errno(ll, error, ...) log_ipv4acd_full(ll, LOG_WARNING, error, ##__VA_ARGS__) -#define log_ipv4acd_error_errno(ll, error, ...) log_ipv4acd_full(ll, LOG_ERR, error, ##__VA_ARGS__) +#define PROBE_WAIT_USEC (1U * USEC_PER_SEC) +#define PROBE_NUM 3U +#define PROBE_MIN_USEC (1U * USEC_PER_SEC) +#define PROBE_MAX_USEC (2U * USEC_PER_SEC) +#define ANNOUNCE_WAIT_USEC (2U * USEC_PER_SEC) +#define ANNOUNCE_NUM 2U +#define ANNOUNCE_INTERVAL_USEC (2U * USEC_PER_SEC) +#define MAX_CONFLICTS 10U +#define RATE_LIMIT_INTERVAL_USEC (60U * USEC_PER_SEC) +#define DEFEND_INTERVAL_USEC (10U * USEC_PER_SEC) typedef enum IPv4ACDState { IPV4ACD_STATE_INIT, + IPV4ACD_STATE_STARTED, IPV4ACD_STATE_WAITING_PROBE, IPV4ACD_STATE_PROBING, IPV4ACD_STATE_WAITING_ANNOUNCE, @@ -78,158 +62,164 @@ typedef enum IPv4ACDState { } IPv4ACDState; struct sd_ipv4acd { - RefCount n_ref; + unsigned n_ref; IPv4ACDState state; - int index; + int ifindex; int fd; - int iteration; - int conflict; - sd_event_source *receive_message; - sd_event_source *timer; + + unsigned n_iteration; + unsigned n_conflict; + + sd_event_source *receive_message_event_source; + sd_event_source *timer_event_source; + usec_t defend_window; be32_t address; + /* External */ struct ether_addr mac_addr; + sd_event *event; int event_priority; - sd_ipv4acd_callback_t cb; + sd_ipv4acd_callback_t callback; void* userdata; }; -sd_ipv4acd *sd_ipv4acd_ref(sd_ipv4acd *ll) { - if (ll) - assert_se(REFCNT_INC(ll->n_ref) >= 2); +#define log_ipv4acd_errno(acd, error, fmt, ...) log_internal(LOG_DEBUG, error, __FILE__, __LINE__, __func__, "IPV4ACD: " fmt, ##__VA_ARGS__) +#define log_ipv4acd(acd, fmt, ...) log_ipv4acd_errno(acd, 0, fmt, ##__VA_ARGS__) + +static void ipv4acd_set_state(sd_ipv4acd *acd, IPv4ACDState st, bool reset_counter) { + assert(acd); + assert(st < _IPV4ACD_STATE_MAX); + + if (st == acd->state && !reset_counter) + acd->n_iteration++; + else { + acd->state = st; + acd->n_iteration = 0; + } +} + +static void ipv4acd_reset(sd_ipv4acd *acd) { + assert(acd); + + acd->timer_event_source = sd_event_source_unref(acd->timer_event_source); + acd->receive_message_event_source = sd_event_source_unref(acd->receive_message_event_source); + + acd->fd = safe_close(acd->fd); - return ll; + ipv4acd_set_state(acd, IPV4ACD_STATE_INIT, true); } -sd_ipv4acd *sd_ipv4acd_unref(sd_ipv4acd *ll) { - if (!ll || REFCNT_DEC(ll->n_ref) > 0) +sd_ipv4acd *sd_ipv4acd_ref(sd_ipv4acd *acd) { + if (!acd) return NULL; - ll->receive_message = sd_event_source_unref(ll->receive_message); - ll->fd = safe_close(ll->fd); + assert_se(acd->n_ref >= 1); + acd->n_ref++; - ll->timer = sd_event_source_unref(ll->timer); + return acd; +} + +sd_ipv4acd *sd_ipv4acd_unref(sd_ipv4acd *acd) { + if (!acd) + return NULL; - sd_ipv4acd_detach_event(ll); + assert_se(acd->n_ref >= 1); + acd->n_ref--; - free(ll); + if (acd->n_ref > 0) + return NULL; + + ipv4acd_reset(acd); + sd_ipv4acd_detach_event(acd); + + free(acd); return NULL; } int sd_ipv4acd_new(sd_ipv4acd **ret) { - _cleanup_(sd_ipv4acd_unrefp) sd_ipv4acd *ll = NULL; + _cleanup_(sd_ipv4acd_unrefp) sd_ipv4acd *acd = NULL; assert_return(ret, -EINVAL); - ll = new0(sd_ipv4acd, 1); - if (!ll) + acd = new0(sd_ipv4acd, 1); + if (!acd) return -ENOMEM; - ll->n_ref = REFCNT_INIT; - ll->state = IPV4ACD_STATE_INIT; - ll->index = -1; - ll->fd = -1; + acd->n_ref = 1; + acd->state = IPV4ACD_STATE_INIT; + acd->ifindex = -1; + acd->fd = -1; - *ret = ll; - ll = NULL; + *ret = acd; + acd = NULL; return 0; } -static void ipv4acd_set_state(sd_ipv4acd *ll, IPv4ACDState st, bool reset_counter) { - - assert(ll); - assert(st < _IPV4ACD_STATE_MAX); - - if (st == ll->state && !reset_counter) - ll->iteration++; - else { - ll->state = st; - ll->iteration = 0; - } -} - -static void ipv4acd_client_notify(sd_ipv4acd *ll, int event) { - assert(ll); +static void ipv4acd_client_notify(sd_ipv4acd *acd, int event) { + assert(acd); - if (!ll->cb) + if (!acd->callback) return; - ll->cb(ll, event, ll->userdata); + acd->callback(acd, event, acd->userdata); } -static void ipv4acd_stop(sd_ipv4acd *ll) { - assert(ll); +int sd_ipv4acd_stop(sd_ipv4acd *acd) { + assert_return(acd, -EINVAL); - ll->receive_message = sd_event_source_unref(ll->receive_message); - ll->fd = safe_close(ll->fd); + ipv4acd_reset(acd); - ll->timer = sd_event_source_unref(ll->timer); + log_ipv4acd(acd, "STOPPED"); - log_ipv4acd_debug(ll, "STOPPED"); - - ipv4acd_set_state (ll, IPV4ACD_STATE_INIT, true); -} - -int sd_ipv4acd_stop(sd_ipv4acd *ll) { - assert_return(ll, -EINVAL); - - ipv4acd_stop(ll); - - ipv4acd_client_notify(ll, SD_IPV4ACD_EVENT_STOP); + ipv4acd_client_notify(acd, SD_IPV4ACD_EVENT_STOP); return 0; } static int ipv4acd_on_timeout(sd_event_source *s, uint64_t usec, void *userdata); -static int ipv4acd_set_next_wakeup(sd_ipv4acd *ll, int sec, int random_sec) { +static int ipv4acd_set_next_wakeup(sd_ipv4acd *acd, usec_t usec, usec_t random_usec) { _cleanup_(sd_event_source_unrefp) sd_event_source *timer = NULL; - usec_t next_timeout; - usec_t time_now; + usec_t next_timeout, time_now; int r; - assert(sec >= 0); - assert(random_sec >= 0); - assert(ll); + assert(acd); - next_timeout = sec * USEC_PER_SEC; + next_timeout = usec; - if (random_sec) - next_timeout += random_u32() % (random_sec * USEC_PER_SEC); + if (random_usec > 0) + next_timeout += (usec_t) random_u64() % random_usec; - assert_se(sd_event_now(ll->event, clock_boottime_or_monotonic(), &time_now) >= 0); + assert_se(sd_event_now(acd->event, clock_boottime_or_monotonic(), &time_now) >= 0); - r = sd_event_add_time(ll->event, &timer, clock_boottime_or_monotonic(), - time_now + next_timeout, 0, ipv4acd_on_timeout, ll); + r = sd_event_add_time(acd->event, &timer, clock_boottime_or_monotonic(), time_now + next_timeout, 0, ipv4acd_on_timeout, acd); if (r < 0) return r; - r = sd_event_source_set_priority(timer, ll->event_priority); + r = sd_event_source_set_priority(timer, acd->event_priority); if (r < 0) return r; - r = sd_event_source_set_description(timer, "ipv4acd-timer"); - if (r < 0) - return r; + (void) sd_event_source_set_description(timer, "ipv4acd-timer"); - ll->timer = sd_event_source_unref(ll->timer); - ll->timer = timer; + sd_event_source_unref(acd->timer_event_source); + acd->timer_event_source = timer; timer = NULL; return 0; } -static bool ipv4acd_arp_conflict(sd_ipv4acd *ll, struct ether_arp *arp) { - assert(ll); +static bool ipv4acd_arp_conflict(sd_ipv4acd *acd, struct ether_arp *arp) { + assert(acd); assert(arp); /* see the BPF */ - if (memcmp(arp->arp_spa, &ll->address, sizeof(ll->address)) == 0) + if (memcmp(arp->arp_spa, &acd->address, sizeof(acd->address)) == 0) return true; /* the TPA matched instead of the SPA, this is not a conflict */ @@ -237,116 +227,118 @@ static bool ipv4acd_arp_conflict(sd_ipv4acd *ll, struct ether_arp *arp) { } static int ipv4acd_on_timeout(sd_event_source *s, uint64_t usec, void *userdata) { - sd_ipv4acd *ll = userdata; + sd_ipv4acd *acd = userdata; int r = 0; - assert(ll); + assert(acd); + + switch (acd->state) { - switch (ll->state) { - case IPV4ACD_STATE_INIT: + case IPV4ACD_STATE_STARTED: + ipv4acd_set_state(acd, IPV4ACD_STATE_WAITING_PROBE, true); - ipv4acd_set_state(ll, IPV4ACD_STATE_WAITING_PROBE, true); + if (acd->n_conflict >= MAX_CONFLICTS) { + char ts[FORMAT_TIMESPAN_MAX]; + log_ipv4acd(acd, "Max conflicts reached, delaying by %s", format_timespan(ts, sizeof(ts), RATE_LIMIT_INTERVAL_USEC, 0)); - if (ll->conflict >= MAX_CONFLICTS) { - log_ipv4acd_notice(ll, "Max conflicts reached, delaying by %us", RATE_LIMIT_INTERVAL); - r = ipv4acd_set_next_wakeup(ll, RATE_LIMIT_INTERVAL, PROBE_WAIT); + r = ipv4acd_set_next_wakeup(acd, RATE_LIMIT_INTERVAL_USEC, PROBE_WAIT_USEC); if (r < 0) - goto out; + goto fail; - ll->conflict = 0; + acd->n_conflict = 0; } else { - r = ipv4acd_set_next_wakeup(ll, 0, PROBE_WAIT); + r = ipv4acd_set_next_wakeup(acd, 0, PROBE_WAIT_USEC); if (r < 0) - goto out; + goto fail; } break; + case IPV4ACD_STATE_WAITING_PROBE: case IPV4ACD_STATE_PROBING: /* Send a probe */ - r = arp_send_probe(ll->fd, ll->index, ll->address, &ll->mac_addr); + r = arp_send_probe(acd->fd, acd->ifindex, acd->address, &acd->mac_addr); if (r < 0) { - log_ipv4acd_error_errno(ll, r, "Failed to send ARP probe: %m"); - goto out; + log_ipv4acd_errno(acd, r, "Failed to send ARP probe: %m"); + goto fail; } else { _cleanup_free_ char *address = NULL; - union in_addr_union addr = { .in.s_addr = ll->address }; + union in_addr_union addr = { .in.s_addr = acd->address }; - r = in_addr_to_string(AF_INET, &addr, &address); - if (r >= 0) - log_ipv4acd_debug(ll, "Probing %s", address); + (void) in_addr_to_string(AF_INET, &addr, &address); + log_ipv4acd(acd, "Probing %s", strna(address)); } - if (ll->iteration < PROBE_NUM - 2) { - ipv4acd_set_state(ll, IPV4ACD_STATE_PROBING, false); + if (acd->n_iteration < PROBE_NUM - 2) { + ipv4acd_set_state(acd, IPV4ACD_STATE_PROBING, false); - r = ipv4acd_set_next_wakeup(ll, PROBE_MIN, (PROBE_MAX-PROBE_MIN)); + r = ipv4acd_set_next_wakeup(acd, PROBE_MIN_USEC, (PROBE_MAX_USEC-PROBE_MIN_USEC)); if (r < 0) - goto out; + goto fail; } else { - ipv4acd_set_state(ll, IPV4ACD_STATE_WAITING_ANNOUNCE, true); + ipv4acd_set_state(acd, IPV4ACD_STATE_WAITING_ANNOUNCE, true); - r = ipv4acd_set_next_wakeup(ll, ANNOUNCE_WAIT, 0); + r = ipv4acd_set_next_wakeup(acd, ANNOUNCE_WAIT_USEC, 0); if (r < 0) - goto out; + goto fail; } break; case IPV4ACD_STATE_ANNOUNCING: - if (ll->iteration >= ANNOUNCE_NUM - 1) { - ipv4acd_set_state(ll, IPV4ACD_STATE_RUNNING, false); - + if (acd->n_iteration >= ANNOUNCE_NUM - 1) { + ipv4acd_set_state(acd, IPV4ACD_STATE_RUNNING, false); break; } + + /* fall through */ + case IPV4ACD_STATE_WAITING_ANNOUNCE: /* Send announcement packet */ - r = arp_send_announcement(ll->fd, ll->index, ll->address, &ll->mac_addr); + r = arp_send_announcement(acd->fd, acd->ifindex, acd->address, &acd->mac_addr); if (r < 0) { - log_ipv4acd_error_errno(ll, r, "Failed to send ARP announcement: %m"); - goto out; + log_ipv4acd_errno(acd, r, "Failed to send ARP announcement: %m"); + goto fail; } else - log_ipv4acd_debug(ll, "ANNOUNCE"); + log_ipv4acd(acd, "ANNOUNCE"); - ipv4acd_set_state(ll, IPV4ACD_STATE_ANNOUNCING, false); + ipv4acd_set_state(acd, IPV4ACD_STATE_ANNOUNCING, false); - r = ipv4acd_set_next_wakeup(ll, ANNOUNCE_INTERVAL, 0); + r = ipv4acd_set_next_wakeup(acd, ANNOUNCE_INTERVAL_USEC, 0); if (r < 0) - goto out; + goto fail; - if (ll->iteration == 0) { - ll->conflict = 0; - ipv4acd_client_notify(ll, SD_IPV4ACD_EVENT_BIND); + if (acd->n_iteration == 0) { + acd->n_conflict = 0; + ipv4acd_client_notify(acd, SD_IPV4ACD_EVENT_BIND); } break; + default: assert_not_reached("Invalid state."); } -out: - if (r < 0) - sd_ipv4acd_stop(ll); + return 0; - return 1; +fail: + sd_ipv4acd_stop(acd); + return 0; } -static void ipv4acd_on_conflict(sd_ipv4acd *ll) { +static void ipv4acd_on_conflict(sd_ipv4acd *acd) { _cleanup_free_ char *address = NULL; - union in_addr_union addr = { .in.s_addr = ll->address }; - int r; + union in_addr_union addr = { .in.s_addr = acd->address }; - assert(ll); + assert(acd); - ll->conflict++; + acd->n_conflict++; - r = in_addr_to_string(AF_INET, &addr, &address); - if (r >= 0) - log_ipv4acd_debug(ll, "Conflict on %s (%u)", address, ll->conflict); + (void) in_addr_to_string(AF_INET, &addr, &address); + log_ipv4acd(acd, "Conflict on %s (%u)", strna(address), acd->n_conflict); - ipv4acd_stop(ll); - - ipv4acd_client_notify(ll, SD_IPV4ACD_EVENT_CONFLICT); + ipv4acd_reset(acd); + ipv4acd_client_notify(acd, SD_IPV4ACD_EVENT_CONFLICT); } static int ipv4acd_on_packet( @@ -355,47 +347,50 @@ static int ipv4acd_on_packet( uint32_t revents, void *userdata) { - sd_ipv4acd *ll = userdata; + sd_ipv4acd *acd = userdata; struct ether_arp packet; ssize_t n; int r; assert(s); - assert(ll); + assert(acd); assert(fd >= 0); n = recv(fd, &packet, sizeof(struct ether_arp), 0); if (n < 0) { - r = log_ipv4acd_debug_errno(ll, errno, "Failed to read ARP packet: %m"); - goto out; + if (errno == EAGAIN || errno == EINTR) + return 0; + + log_ipv4acd_errno(acd, errno, "Failed to read ARP packet: %m"); + goto fail; } if ((size_t) n != sizeof(struct ether_arp)) { - log_ipv4acd_debug(ll, "Ignoring too short ARP packet."); + log_ipv4acd(acd, "Ignoring too short ARP packet."); return 0; } - switch (ll->state) { + switch (acd->state) { case IPV4ACD_STATE_ANNOUNCING: case IPV4ACD_STATE_RUNNING: - if (ipv4acd_arp_conflict(ll, &packet)) { + if (ipv4acd_arp_conflict(acd, &packet)) { usec_t ts; - assert_se(sd_event_now(ll->event, clock_boottime_or_monotonic(), &ts) >= 0); + assert_se(sd_event_now(acd->event, clock_boottime_or_monotonic(), &ts) >= 0); /* Defend address */ - if (ts > ll->defend_window) { - ll->defend_window = ts + DEFEND_INTERVAL * USEC_PER_SEC; - r = arp_send_announcement(ll->fd, ll->index, ll->address, &ll->mac_addr); + if (ts > acd->defend_window) { + acd->defend_window = ts + DEFEND_INTERVAL_USEC; + r = arp_send_announcement(acd->fd, acd->ifindex, acd->address, &acd->mac_addr); if (r < 0) { - log_ipv4acd_error_errno(ll, r, "Failed to send ARP announcement: %m"); - goto out; + log_ipv4acd_errno(acd, r, "Failed to send ARP announcement: %m"); + goto fail; } else - log_ipv4acd_debug(ll, "DEFEND"); + log_ipv4acd(acd, "DEFEND"); } else - ipv4acd_on_conflict(ll); + ipv4acd_on_conflict(acd); } break; @@ -403,132 +398,129 @@ static int ipv4acd_on_packet( case IPV4ACD_STATE_PROBING: case IPV4ACD_STATE_WAITING_ANNOUNCE: /* BPF ensures this packet indicates a conflict */ - ipv4acd_on_conflict(ll); + ipv4acd_on_conflict(acd); break; default: assert_not_reached("Invalid state."); } -out: - if (r < 0) - sd_ipv4acd_stop(ll); + return 0; - return 1; +fail: + sd_ipv4acd_stop(acd); + return 0; } -int sd_ipv4acd_set_index(sd_ipv4acd *ll, int interface_index) { - assert_return(ll, -EINVAL); - assert_return(interface_index > 0, -EINVAL); - assert_return(ll->state == IPV4ACD_STATE_INIT, -EBUSY); +int sd_ipv4acd_set_ifindex(sd_ipv4acd *acd, int ifindex) { + assert_return(acd, -EINVAL); + assert_return(ifindex > 0, -EINVAL); + assert_return(acd->state == IPV4ACD_STATE_INIT, -EBUSY); - ll->index = interface_index; + acd->ifindex = ifindex; return 0; } -int sd_ipv4acd_set_mac(sd_ipv4acd *ll, const struct ether_addr *addr) { - assert_return(ll, -EINVAL); +int sd_ipv4acd_set_mac(sd_ipv4acd *acd, const struct ether_addr *addr) { + assert_return(acd, -EINVAL); assert_return(addr, -EINVAL); - assert_return(ll->state == IPV4ACD_STATE_INIT, -EBUSY); + assert_return(acd->state == IPV4ACD_STATE_INIT, -EBUSY); - memcpy(&ll->mac_addr, addr, ETH_ALEN); + acd->mac_addr = *addr; return 0; } -int sd_ipv4acd_detach_event(sd_ipv4acd *ll) { - assert_return(ll, -EINVAL); +int sd_ipv4acd_detach_event(sd_ipv4acd *acd) { + assert_return(acd, -EINVAL); - ll->event = sd_event_unref(ll->event); + acd->event = sd_event_unref(acd->event); return 0; } -int sd_ipv4acd_attach_event(sd_ipv4acd *ll, sd_event *event, int64_t priority) { +int sd_ipv4acd_attach_event(sd_ipv4acd *acd, sd_event *event, int64_t priority) { int r; - assert_return(ll, -EINVAL); - assert_return(!ll->event, -EBUSY); + assert_return(acd, -EINVAL); + assert_return(!acd->event, -EBUSY); if (event) - ll->event = sd_event_ref(event); + acd->event = sd_event_ref(event); else { - r = sd_event_default(&ll->event); + r = sd_event_default(&acd->event); if (r < 0) return r; } - ll->event_priority = priority; + acd->event_priority = priority; return 0; } -int sd_ipv4acd_set_callback(sd_ipv4acd *ll, sd_ipv4acd_callback_t cb, void *userdata) { - assert_return(ll, -EINVAL); +int sd_ipv4acd_set_callback(sd_ipv4acd *acd, sd_ipv4acd_callback_t cb, void *userdata) { + assert_return(acd, -EINVAL); - ll->cb = cb; - ll->userdata = userdata; + acd->callback = cb; + acd->userdata = userdata; return 0; } -int sd_ipv4acd_set_address(sd_ipv4acd *ll, const struct in_addr *address) { - assert_return(ll, -EINVAL); +int sd_ipv4acd_set_address(sd_ipv4acd *acd, const struct in_addr *address) { + assert_return(acd, -EINVAL); assert_return(address, -EINVAL); - assert_return(ll->state == IPV4ACD_STATE_INIT, -EBUSY); + assert_return(acd->state == IPV4ACD_STATE_INIT, -EBUSY); - ll->address = address->s_addr; + acd->address = address->s_addr; return 0; } -int sd_ipv4acd_is_running(sd_ipv4acd *ll) { - assert_return(ll, false); +int sd_ipv4acd_is_running(sd_ipv4acd *acd) { + assert_return(acd, false); - return ll->state != IPV4ACD_STATE_INIT; + return acd->state != IPV4ACD_STATE_INIT; } -int sd_ipv4acd_start(sd_ipv4acd *ll) { +int sd_ipv4acd_start(sd_ipv4acd *acd) { int r; - assert_return(ll, -EINVAL); - assert_return(ll->event, -EINVAL); - assert_return(ll->index > 0, -EINVAL); - assert_return(ll->address != 0, -EINVAL); - assert_return(!ether_addr_is_null(&ll->mac_addr), -EINVAL); - assert_return(ll->state == IPV4ACD_STATE_INIT, -EBUSY); + assert_return(acd, -EINVAL); + assert_return(acd->event, -EINVAL); + assert_return(acd->ifindex > 0, -EINVAL); + assert_return(acd->address != 0, -EINVAL); + assert_return(!ether_addr_is_null(&acd->mac_addr), -EINVAL); + assert_return(acd->state == IPV4ACD_STATE_INIT, -EBUSY); - ll->defend_window = 0; - - r = arp_network_bind_raw_socket(ll->index, ll->address, &ll->mac_addr); + r = arp_network_bind_raw_socket(acd->ifindex, acd->address, &acd->mac_addr); if (r < 0) - goto out; + return r; - ll->fd = safe_close(ll->fd); - ll->fd = r; + safe_close(acd->fd); + acd->fd = r; + acd->defend_window = 0; + acd->n_conflict = 0; - r = sd_event_add_io(ll->event, &ll->receive_message, ll->fd, - EPOLLIN, ipv4acd_on_packet, ll); + r = sd_event_add_io(acd->event, &acd->receive_message_event_source, acd->fd, EPOLLIN, ipv4acd_on_packet, acd); if (r < 0) - goto out; + goto fail; - r = sd_event_source_set_priority(ll->receive_message, ll->event_priority); + r = sd_event_source_set_priority(acd->receive_message_event_source, acd->event_priority); if (r < 0) - goto out; + goto fail; - r = sd_event_source_set_description(ll->receive_message, "ipv4acd-receive-message"); - if (r < 0) - goto out; + (void) sd_event_source_set_description(acd->receive_message_event_source, "ipv4acd-receive-message"); - r = ipv4acd_set_next_wakeup(ll, 0, 0); + r = ipv4acd_set_next_wakeup(acd, 0, 0); if (r < 0) - goto out; -out: - if (r < 0) { - ipv4acd_stop(ll); - return r; - } + goto fail; + ipv4acd_set_state(acd, IPV4ACD_STATE_STARTED, true); return 0; + +fail: + ipv4acd_reset(acd); + return r; } diff --git a/src/libsystemd-network/sd-ipv4ll.c b/src/libsystemd-network/sd-ipv4ll.c index 2a06418c53..5603a533a5 100644 --- a/src/libsystemd-network/sd-ipv4ll.c +++ b/src/libsystemd-network/sd-ipv4ll.c @@ -28,16 +28,17 @@ #include "sd-ipv4ll.h" #include "alloc-util.h" +#include "ether-addr-util.h" #include "in-addr-util.h" #include "list.h" #include "random-util.h" -#include "refcnt.h" #include "siphash24.h" #include "sparse-endian.h" +#include "string-util.h" #include "util.h" -#define IPV4LL_NETWORK 0xA9FE0000L -#define IPV4LL_NETMASK 0xFFFF0000L +#define IPV4LL_NETWORK UINT32_C(0xA9FE0000) +#define IPV4LL_NETMASK UINT32_C(0xFFFF0000) #define IPV4LL_DONT_DESTROY(ll) \ _cleanup_(sd_ipv4ll_unrefp) _unused_ sd_ipv4ll *_dont_destroy_##ll = sd_ipv4ll_ref(ll) @@ -46,16 +47,28 @@ struct sd_ipv4ll { unsigned n_ref; sd_ipv4acd *acd; + be32_t address; /* the address pushed to ACD */ - struct random_data *random_data; - char *random_data_state; + struct ether_addr mac; + + struct { + le64_t value; + le64_t generation; + } seed; + bool seed_set; /* External */ be32_t claimed_address; - sd_ipv4ll_callback_t cb; + + sd_ipv4ll_callback_t callback; void* userdata; }; +#define log_ipv4ll_errno(ll, error, fmt, ...) log_internal(LOG_DEBUG, error, __FILE__, __LINE__, __func__, "IPV4LL: " fmt, ##__VA_ARGS__) +#define log_ipv4ll(ll, fmt, ...) log_ipv4ll_errno(ll, 0, fmt, ##__VA_ARGS__) + +static void ipv4ll_on_acd(sd_ipv4acd *ll, int event, void *userdata); + sd_ipv4ll *sd_ipv4ll_ref(sd_ipv4ll *ll) { if (!ll) return NULL; @@ -77,16 +90,11 @@ sd_ipv4ll *sd_ipv4ll_unref(sd_ipv4ll *ll) { return NULL; sd_ipv4acd_unref(ll->acd); - - free(ll->random_data); - free(ll->random_data_state); free(ll); return NULL; } -static void ipv4ll_on_acd(sd_ipv4acd *ll, int event, void *userdata); - int sd_ipv4ll_new(sd_ipv4ll **ret) { _cleanup_(sd_ipv4ll_unrefp) sd_ipv4ll *ll = NULL; int r; @@ -114,44 +122,32 @@ int sd_ipv4ll_new(sd_ipv4ll **ret) { } int sd_ipv4ll_stop(sd_ipv4ll *ll) { - int r; - assert_return(ll, -EINVAL); - r = sd_ipv4acd_stop(ll->acd); - if (r < 0) - return r; - - return 0; + return sd_ipv4acd_stop(ll->acd); } -int sd_ipv4ll_set_index(sd_ipv4ll *ll, int interface_index) { +int sd_ipv4ll_set_ifindex(sd_ipv4ll *ll, int ifindex) { assert_return(ll, -EINVAL); + assert_return(ifindex > 0, -EINVAL); + assert_return(sd_ipv4ll_is_running(ll) == 0, -EBUSY); - return sd_ipv4acd_set_index(ll->acd, interface_index); + return sd_ipv4acd_set_ifindex(ll->acd, ifindex); } -#define HASH_KEY SD_ID128_MAKE(df,04,22,98,3f,ad,14,52,f9,87,2e,d1,9c,70,e2,f2) - int sd_ipv4ll_set_mac(sd_ipv4ll *ll, const struct ether_addr *addr) { int r; assert_return(ll, -EINVAL); + assert_return(addr, -EINVAL); + assert_return(sd_ipv4ll_is_running(ll) == 0, -EBUSY); - if (!ll->random_data) { - uint64_t seed; - - /* If no random data is set, generate some from the MAC */ - seed = siphash24(&addr->ether_addr_octet, ETH_ALEN, HASH_KEY.bytes); - - assert_cc(sizeof(unsigned) <= 8); - - r = sd_ipv4ll_set_address_seed(ll, (unsigned) htole64(seed)); - if (r < 0) - return r; - } + r = sd_ipv4acd_set_mac(ll->acd, addr); + if (r < 0) + return r; - return sd_ipv4acd_set_mac(ll->acd, addr); + ll->mac = *addr; + return 0; } int sd_ipv4ll_detach_event(sd_ipv4ll *ll) { @@ -161,21 +157,15 @@ int sd_ipv4ll_detach_event(sd_ipv4ll *ll) { } int sd_ipv4ll_attach_event(sd_ipv4ll *ll, sd_event *event, int64_t priority) { - int r; - assert_return(ll, -EINVAL); - r = sd_ipv4acd_attach_event(ll->acd, event, priority); - if (r < 0) - return r; - - return 0; + return sd_ipv4acd_attach_event(ll->acd, event, priority); } int sd_ipv4ll_set_callback(sd_ipv4ll *ll, sd_ipv4ll_callback_t cb, void *userdata) { assert_return(ll, -EINVAL); - ll->cb = cb; + ll->callback = cb; ll->userdata = userdata; return 0; @@ -193,32 +183,12 @@ int sd_ipv4ll_get_address(sd_ipv4ll *ll, struct in_addr *address) { return 0; } -int sd_ipv4ll_set_address_seed(sd_ipv4ll *ll, unsigned seed) { - _cleanup_free_ struct random_data *random_data = NULL; - _cleanup_free_ char *random_data_state = NULL; - int r; - +int sd_ipv4ll_set_address_seed(sd_ipv4ll *ll, uint64_t seed) { assert_return(ll, -EINVAL); + assert_return(sd_ipv4ll_is_running(ll) == 0, -EBUSY); - random_data = new0(struct random_data, 1); - if (!random_data) - return -ENOMEM; - - random_data_state = new0(char, 128); - if (!random_data_state) - return -ENOMEM; - - r = initstate_r(seed, random_data_state, 128, random_data); - if (r < 0) - return r; - - free(ll->random_data); - ll->random_data = random_data; - random_data = NULL; - - free(ll->random_data_state); - ll->random_data_state = random_data_state; - random_data_state = NULL; + ll->seed.value = htole64(seed); + ll->seed_set = true; return 0; } @@ -230,20 +200,12 @@ int sd_ipv4ll_is_running(sd_ipv4ll *ll) { } static bool ipv4ll_address_is_valid(const struct in_addr *address) { - uint32_t addr; - assert(address); if (!in_addr_is_link_local(AF_INET, (const union in_addr_union *) address)) return false; - addr = be32toh(address->s_addr); - - if ((addr & 0x0000FF00) == 0x0000 || - (addr & 0x0000FF00) == 0xFF00) - return false; - - return true; + return !IN_SET(be32toh(address->s_addr) & 0x0000FF00U, 0x0000U, 0xFF00U); } int sd_ipv4ll_set_address(sd_ipv4ll *ll, const struct in_addr *address) { @@ -262,48 +224,67 @@ int sd_ipv4ll_set_address(sd_ipv4ll *ll, const struct in_addr *address) { return 0; } +#define PICK_HASH_KEY SD_ID128_MAKE(15,ac,82,a6,d6,3f,49,78,98,77,5d,0c,69,02,94,0b) + static int ipv4ll_pick_address(sd_ipv4ll *ll) { - struct in_addr in_addr; + _cleanup_free_ char *address = NULL; be32_t addr; - int r; - int32_t random; assert(ll); - assert(ll->random_data); do { - r = random_r(ll->random_data, &random); - if (r < 0) - return r; - addr = htonl((random & 0x0000FFFF) | IPV4LL_NETWORK); - } while (addr == ll->address || - (ntohl(addr) & 0x0000FF00) == 0x0000 || - (ntohl(addr) & 0x0000FF00) == 0xFF00); + uint64_t h; - in_addr.s_addr = addr; + h = siphash24(&ll->seed, sizeof(ll->seed), PICK_HASH_KEY.bytes); - r = sd_ipv4ll_set_address(ll, &in_addr); - if (r < 0) - return r; + /* Increase the generation counter by one */ + ll->seed.generation = htole64(le64toh(ll->seed.generation) + 1); - return 0; + addr = htobe32((h & UINT32_C(0x0000FFFF)) | IPV4LL_NETWORK); + } while (addr == ll->address || + IN_SET(be32toh(addr) & 0x0000FF00U, 0x0000U, 0xFF00U)); + + (void) in_addr_to_string(AF_INET, &(union in_addr_union) { .in.s_addr = addr }, &address); + log_ipv4ll(ll, "Picked new IP address %s.", strna(address)); + + return sd_ipv4ll_set_address(ll, &(struct in_addr) { addr }); } +#define MAC_HASH_KEY SD_ID128_MAKE(df,04,22,98,3f,ad,14,52,f9,87,2e,d1,9c,70,e2,f2) + int sd_ipv4ll_start(sd_ipv4ll *ll) { int r; + bool picked_address = false; assert_return(ll, -EINVAL); - assert_return(ll->random_data, -EINVAL); + assert_return(!ether_addr_is_null(&ll->mac), -EINVAL); + assert_return(sd_ipv4ll_is_running(ll) == 0, -EBUSY); + + /* If no random seed is set, generate some from the MAC address */ + if (!ll->seed_set) + ll->seed.value = htole64(siphash24(ll->mac.ether_addr_octet, ETH_ALEN, MAC_HASH_KEY.bytes)); + + /* Restart the generation counter. */ + ll->seed.generation = 0; if (ll->address == 0) { r = ipv4ll_pick_address(ll); if (r < 0) return r; + + picked_address = true; } r = sd_ipv4acd_start(ll->acd); - if (r < 0) + if (r < 0) { + + /* We couldn't start? If so, let's forget the picked address again, the user might make a change and + * retry, and we want the new data to take effect when picking an address. */ + if (picked_address) + ll->address = 0; + return r; + } return 0; } @@ -311,8 +292,8 @@ int sd_ipv4ll_start(sd_ipv4ll *ll) { static void ipv4ll_client_notify(sd_ipv4ll *ll, int event) { assert(ll); - if (ll->cb) - ll->cb(ll, event, ll->userdata); + if (ll->callback) + ll->callback(ll, event, ll->userdata); } void ipv4ll_on_acd(sd_ipv4acd *acd, int event, void *userdata) { @@ -324,17 +305,17 @@ void ipv4ll_on_acd(sd_ipv4acd *acd, int event, void *userdata) { assert(ll); switch (event) { + case SD_IPV4ACD_EVENT_STOP: ipv4ll_client_notify(ll, SD_IPV4LL_EVENT_STOP); - ll->claimed_address = 0; - break; + case SD_IPV4ACD_EVENT_BIND: ll->claimed_address = ll->address; ipv4ll_client_notify(ll, SD_IPV4LL_EVENT_BIND); - break; + case SD_IPV4ACD_EVENT_CONFLICT: /* if an address was already bound we must call up to the user to handle this, otherwise we just try again */ @@ -353,6 +334,7 @@ void ipv4ll_on_acd(sd_ipv4acd *acd, int event, void *userdata) { } break; + default: assert_not_reached("Invalid IPv4ACD event."); } diff --git a/src/libsystemd-network/sd-ndisc.c b/src/libsystemd-network/sd-ndisc.c index fb4ef55673..ccb8002173 100644 --- a/src/libsystemd-network/sd-ndisc.c +++ b/src/libsystemd-network/sd-ndisc.c @@ -34,20 +34,20 @@ #include "socket-util.h" #include "string-util.h" -#define NDISC_ROUTER_SOLICITATION_INTERVAL 4 * USEC_PER_SEC -#define NDISC_MAX_ROUTER_SOLICITATIONS 3 +#define NDISC_ROUTER_SOLICITATION_INTERVAL (4U * USEC_PER_SEC) +#define NDISC_MAX_ROUTER_SOLICITATIONS 3U enum NDiscState { NDISC_STATE_IDLE, NDISC_STATE_SOLICITATION_SENT, - NDISC_STATE_ADVERTISMENT_LISTEN, + NDISC_STATE_ADVERTISEMENT_LISTEN, _NDISC_STATE_MAX, _NDISC_STATE_INVALID = -1, }; -#define IP6_MIN_MTU (unsigned)1280 +#define IP6_MIN_MTU 1280U #define ICMP6_RECV_SIZE (IP6_MIN_MTU - sizeof(struct ip6_hdr)) -#define NDISC_OPT_LEN_UNITS 8 +#define NDISC_OPT_LEN_UNITS 8U #define ND_RA_FLAG_PREF 0x18 #define ND_RA_FLAG_PREF_LOW 0x03 @@ -73,16 +73,22 @@ struct sd_ndisc { unsigned n_ref; enum NDiscState state; + int ifindex; + int fd; + sd_event *event; int event_priority; - int index; + struct ether_addr mac_addr; uint32_t mtu; + LIST_HEAD(NDiscPrefix, prefixes); - int fd; - sd_event_source *recv; - sd_event_source *timeout; - int nd_sent; + + sd_event_source *recv_event_source; + sd_event_source *timeout_event_source; + + unsigned nd_sent; + sd_ndisc_router_callback_t router_callback; sd_ndisc_prefix_autonomous_callback_t prefix_autonomous_callback; sd_ndisc_prefix_onlink_callback_t prefix_onlink_callback; @@ -90,7 +96,8 @@ struct sd_ndisc { void *userdata; }; -#define log_ndisc(p, fmt, ...) log_internal(LOG_DEBUG, 0, __FILE__, __LINE__, __func__, "NDisc CLIENT: " fmt, ##__VA_ARGS__) +#define log_ndisc_errno(p, error, fmt, ...) log_internal(LOG_DEBUG, error, __FILE__, __LINE__, __func__, "NDisc CLIENT: " fmt, ##__VA_ARGS__) +#define log_ndisc(p, fmt, ...) log_ndisc_errno(p, 0, fmt, ##__VA_ARGS__) static NDiscPrefix *ndisc_prefix_unref(NDiscPrefix *prefix) { @@ -128,13 +135,15 @@ static int ndisc_prefix_new(sd_ndisc *nd, NDiscPrefix **ret) { return 0; } -int sd_ndisc_set_callback(sd_ndisc *nd, - sd_ndisc_router_callback_t router_callback, - sd_ndisc_prefix_onlink_callback_t prefix_onlink_callback, - sd_ndisc_prefix_autonomous_callback_t prefix_autonomous_callback, - sd_ndisc_callback_t callback, - void *userdata) { - assert(nd); +int sd_ndisc_set_callback( + sd_ndisc *nd, + sd_ndisc_router_callback_t router_callback, + sd_ndisc_prefix_onlink_callback_t prefix_onlink_callback, + sd_ndisc_prefix_autonomous_callback_t prefix_autonomous_callback, + sd_ndisc_callback_t callback, + void *userdata) { + + assert_return(nd, -EINVAL); nd->router_callback = router_callback; nd->prefix_onlink_callback = prefix_onlink_callback; @@ -145,17 +154,16 @@ int sd_ndisc_set_callback(sd_ndisc *nd, return 0; } -int sd_ndisc_set_index(sd_ndisc *nd, int interface_index) { - assert(nd); - assert(interface_index >= -1); - - nd->index = interface_index; +int sd_ndisc_set_ifindex(sd_ndisc *nd, int ifindex) { + assert_return(nd, -EINVAL); + assert_return(ifindex > 0, -EINVAL); + nd->ifindex = ifindex; return 0; } int sd_ndisc_set_mac(sd_ndisc *nd, const struct ether_addr *mac_addr) { - assert(nd); + assert_return(nd, -EINVAL); if (mac_addr) memcpy(&nd->mac_addr, mac_addr, sizeof(nd->mac_addr)); @@ -194,7 +202,7 @@ int sd_ndisc_detach_event(sd_ndisc *nd) { } sd_event *sd_ndisc_get_event(sd_ndisc *nd) { - assert(nd); + assert_return(nd, NULL); return nd->event; } @@ -210,12 +218,12 @@ sd_ndisc *sd_ndisc_ref(sd_ndisc *nd) { return nd; } -static int ndisc_init(sd_ndisc *nd) { +static int ndisc_reset(sd_ndisc *nd) { assert(nd); - nd->recv = sd_event_source_unref(nd->recv); + nd->recv_event_source = sd_event_source_unref(nd->recv_event_source); nd->fd = asynchronous_close(nd->fd); - nd->timeout = sd_event_source_unref(nd->timeout); + nd->timeout_event_source = sd_event_source_unref(nd->timeout_event_source); return 0; } @@ -232,7 +240,7 @@ sd_ndisc *sd_ndisc_unref(sd_ndisc *nd) { if (nd->n_ref > 0) return NULL; - ndisc_init(nd); + ndisc_reset(nd); sd_ndisc_detach_event(nd); LIST_FOREACH_SAFE(prefixes, prefix, p, nd->prefixes) @@ -246,15 +254,14 @@ sd_ndisc *sd_ndisc_unref(sd_ndisc *nd) { int sd_ndisc_new(sd_ndisc **ret) { _cleanup_(sd_ndisc_unrefp) sd_ndisc *nd = NULL; - assert(ret); + assert_return(ret, -EINVAL); nd = new0(sd_ndisc, 1); if (!nd) return -ENOMEM; nd->n_ref = 1; - - nd->index = -1; + nd->ifindex = -1; nd->fd = -1; LIST_HEAD_INIT(nd->prefixes); @@ -273,7 +280,6 @@ int sd_ndisc_get_mtu(sd_ndisc *nd, uint32_t *mtu) { return -ENOMSG; *mtu = nd->mtu; - return 0; } @@ -282,8 +288,8 @@ static int prefix_match(const struct in6_addr *prefix, uint8_t prefixlen, uint8_t addr_prefixlen) { uint8_t bytes, mask, len; - assert_return(prefix, -EINVAL); - assert_return(addr, -EINVAL); + assert(prefix); + assert(addr); len = MIN(prefixlen, addr_prefixlen); @@ -336,7 +342,7 @@ static int ndisc_prefix_update(sd_ndisc *nd, ssize_t len, assert(prefix_opt); if (len < prefix_opt->nd_opt_pi_len) - return -ENOMSG; + return -EBADMSG; if (!(prefix_opt->nd_opt_pi_flags_reserved & (ND_OPT_PI_FLAG_ONLINK | ND_OPT_PI_FLAG_AUTO))) return 0; @@ -356,7 +362,7 @@ static int ndisc_prefix_update(sd_ndisc *nd, ssize_t len, if (r != -EADDRNOTAVAIL) return r; - /* if router advertisment prefix valid timeout is zero, the timeout + /* if router advertisement prefix valid timeout is zero, the timeout callback will be called immediately to clean up the prefix */ r = ndisc_prefix_new(nd, &prefix); @@ -411,32 +417,32 @@ static int ndisc_prefix_update(sd_ndisc *nd, ssize_t len, return 0; } -static int ndisc_ra_parse(sd_ndisc *nd, struct nd_router_advert *ra, ssize_t len) { - void *opt; +static int ndisc_ra_parse(sd_ndisc *nd, struct nd_router_advert *ra, size_t len) { struct nd_opt_hdr *opt_hdr; + void *opt; - assert_return(nd, -EINVAL); - assert_return(ra, -EINVAL); + assert(nd); + assert(ra); - len -= sizeof(*ra); - if (len < NDISC_OPT_LEN_UNITS) { + if (len < sizeof(struct nd_router_advert) + NDISC_OPT_LEN_UNITS) { log_ndisc(nd, "Router Advertisement below minimum length"); - - return -ENOMSG; + return -EBADMSG; } + len -= sizeof(struct nd_router_advert); opt = ra + 1; opt_hdr = opt; while (len != 0 && len >= opt_hdr->nd_opt_len * NDISC_OPT_LEN_UNITS) { struct nd_opt_mtu *opt_mtu; - uint32_t mtu; struct nd_opt_prefix_info *opt_prefix; + uint32_t mtu; if (opt_hdr->nd_opt_len == 0) - return -ENOMSG; + return -EBADMSG; switch (opt_hdr->nd_opt_type) { + case ND_OPT_MTU: opt_mtu = opt; @@ -444,24 +450,19 @@ static int ndisc_ra_parse(sd_ndisc *nd, struct nd_router_advert *ra, ssize_t len if (mtu != nd->mtu) { nd->mtu = MAX(mtu, IP6_MIN_MTU); - - log_ndisc(nd, "Router Advertisement link MTU %d using %d", - mtu, nd->mtu); + log_ndisc(nd, "Router Advertisement link MTU %d using %d", mtu, nd->mtu); } break; case ND_OPT_PREFIX_INFORMATION: opt_prefix = opt; - ndisc_prefix_update(nd, len, opt_prefix); - break; } len -= opt_hdr->nd_opt_len * NDISC_OPT_LEN_UNITS; - opt = (void *)((char *)opt + - opt_hdr->nd_opt_len * NDISC_OPT_LEN_UNITS); + opt = (void*) ((uint8_t*) opt + opt_hdr->nd_opt_len * NDISC_OPT_LEN_UNITS); opt_hdr = opt; } @@ -471,7 +472,7 @@ static int ndisc_ra_parse(sd_ndisc *nd, struct nd_router_advert *ra, ssize_t len return 0; } -static int ndisc_router_advertisment_recv(sd_event_source *s, int fd, uint32_t revents, void *userdata) { +static int ndisc_router_advertisement_recv(sd_event_source *s, int fd, uint32_t revents, void *userdata) { _cleanup_free_ struct nd_router_advert *ra = NULL; sd_ndisc *nd = userdata; union { @@ -515,11 +516,14 @@ static int ndisc_router_advertisment_recv(sd_event_source *s, int fd, uint32_t r if (errno == EAGAIN || errno == EINTR) return 0; - log_ndisc(nd, "Could not receive message from ICMPv6 socket: %m"); - return -errno; - } else if ((size_t)len < sizeof(struct nd_router_advert)) { + return log_ndisc_errno(nd, errno, "Could not receive message from ICMPv6 socket: %m"); + } + if ((size_t) len < sizeof(struct nd_router_advert)) { + log_ndisc(nd, "Too small to be a router advertisement: ignoring"); return 0; - } else if (msg.msg_namelen == 0) + } + + if (msg.msg_namelen == 0) gw = NULL; /* only happens when running the test-suite over a socketpair */ else if (msg.msg_namelen != sizeof(sa.in6)) { log_ndisc(nd, "Received invalid source address size from ICMPv6 socket: %zu bytes", (size_t)msg.msg_namelen); @@ -560,21 +564,14 @@ static int ndisc_router_advertisment_recv(sd_event_source *s, int fd, uint32_t r if (ra->nd_ra_code != 0) return 0; - nd->timeout = sd_event_source_unref(nd->timeout); - - nd->state = NDISC_STATE_ADVERTISMENT_LISTEN; + nd->timeout_event_source = sd_event_source_unref(nd->timeout_event_source); + nd->state = NDISC_STATE_ADVERTISEMENT_LISTEN; stateful = ra->nd_ra_flags_reserved & (ND_RA_FLAG_MANAGED | ND_RA_FLAG_OTHER); pref = (ra->nd_ra_flags_reserved & ND_RA_FLAG_PREF) >> 3; - switch (pref) { - case ND_RA_FLAG_PREF_LOW: - case ND_RA_FLAG_PREF_HIGH: - break; - default: + if (!IN_SET(pref, ND_RA_FLAG_PREF_LOW, ND_RA_FLAG_PREF_HIGH)) pref = ND_RA_FLAG_PREF_MEDIUM; - break; - } lifetime = be16toh(ra->nd_ra_router_lifetime); @@ -583,9 +580,9 @@ static int ndisc_router_advertisment_recv(sd_event_source *s, int fd, uint32_t r pref == ND_RA_FLAG_PREF_HIGH ? "high" : pref == ND_RA_FLAG_PREF_LOW ? "low" : "medium", lifetime); - r = ndisc_ra_parse(nd, ra, len); + r = ndisc_ra_parse(nd, ra, (size_t) len); if (r < 0) { - log_ndisc(nd, "Could not parse Router Advertisement: %s", strerror(-r)); + log_ndisc_errno(nd, r, "Could not parse Router Advertisement: %m"); return 0; } @@ -597,24 +594,25 @@ static int ndisc_router_advertisment_recv(sd_event_source *s, int fd, uint32_t r static int ndisc_router_solicitation_timeout(sd_event_source *s, uint64_t usec, void *userdata) { sd_ndisc *nd = userdata; - uint64_t time_now, next_timeout; + usec_t time_now, next_timeout; int r; assert(s); assert(nd); assert(nd->event); - nd->timeout = sd_event_source_unref(nd->timeout); + nd->timeout_event_source = sd_event_source_unref(nd->timeout_event_source); if (nd->nd_sent >= NDISC_MAX_ROUTER_SOLICITATIONS) { if (nd->callback) nd->callback(nd, SD_NDISC_EVENT_TIMEOUT, nd->userdata); - nd->state = NDISC_STATE_ADVERTISMENT_LISTEN; + nd->state = NDISC_STATE_ADVERTISEMENT_LISTEN; } else { r = icmp6_send_router_solicitation(nd->fd, &nd->mac_addr); - if (r < 0) - log_ndisc(nd, "Error sending Router Solicitation"); - else { + if (r < 0) { + log_ndisc_errno(nd, r, "Error sending Router Solicitation: %m"); + goto fail; + } else { nd->state = NDISC_STATE_SOLICITATION_SENT; log_ndisc(nd, "Sent Router Solicitation"); } @@ -625,35 +623,39 @@ static int ndisc_router_solicitation_timeout(sd_event_source *s, uint64_t usec, next_timeout = time_now + NDISC_ROUTER_SOLICITATION_INTERVAL; - r = sd_event_add_time(nd->event, &nd->timeout, clock_boottime_or_monotonic(), + r = sd_event_add_time(nd->event, &nd->timeout_event_source, clock_boottime_or_monotonic(), next_timeout, 0, ndisc_router_solicitation_timeout, nd); if (r < 0) { - /* we cannot continue if we are unable to rearm the timer */ - sd_ndisc_stop(nd); - return 0; + log_ndisc_errno(nd, r, "Failed to allocate timer event: %m"); + goto fail; } - r = sd_event_source_set_priority(nd->timeout, nd->event_priority); - if (r < 0) - return 0; + r = sd_event_source_set_priority(nd->timeout_event_source, nd->event_priority); + if (r < 0) { + log_ndisc_errno(nd, r, "Cannot set timer priority: %m"); + goto fail; + } - r = sd_event_source_set_description(nd->timeout, "ndisc-timeout"); - if (r < 0) - return 0; + (void) sd_event_source_set_description(nd->timeout_event_source, "ndisc-timeout"); } return 0; + +fail: + sd_ndisc_stop(nd); + return 0; } int sd_ndisc_stop(sd_ndisc *nd) { assert_return(nd, -EINVAL); - assert_return(nd->event, -EINVAL); - log_ndisc(client, "Stop NDisc"); + if (nd->state == NDISC_STATE_IDLE) + return 0; - ndisc_init(nd); + log_ndisc(nd, "Stopping IPv6 Router Solicitation client"); + ndisc_reset(nd); nd->state = NDISC_STATE_IDLE; if (nd->callback) @@ -665,49 +667,41 @@ int sd_ndisc_stop(sd_ndisc *nd) { int sd_ndisc_router_discovery_start(sd_ndisc *nd) { int r; - assert(nd); - assert(nd->event); - - if (nd->state != NDISC_STATE_IDLE) - return -EBUSY; - - if (nd->index < 1) - return -EINVAL; + assert_return(nd, -EINVAL); + assert_return(nd->event, -EINVAL); + assert_return(nd->ifindex > 0, -EINVAL); + assert_return(nd->state == NDISC_STATE_IDLE, -EBUSY); - r = icmp6_bind_router_solicitation(nd->index); + r = icmp6_bind_router_solicitation(nd->ifindex); if (r < 0) return r; nd->fd = r; - r = sd_event_add_io(nd->event, &nd->recv, nd->fd, EPOLLIN, - ndisc_router_advertisment_recv, nd); + r = sd_event_add_io(nd->event, &nd->recv_event_source, nd->fd, EPOLLIN, ndisc_router_advertisement_recv, nd); if (r < 0) - goto error; + goto fail; - r = sd_event_source_set_priority(nd->recv, nd->event_priority); + r = sd_event_source_set_priority(nd->recv_event_source, nd->event_priority); if (r < 0) - goto error; + goto fail; - r = sd_event_source_set_description(nd->recv, "ndisc-receive-message"); - if (r < 0) - goto error; + (void) sd_event_source_set_description(nd->recv_event_source, "ndisc-receive-message"); - r = sd_event_add_time(nd->event, &nd->timeout, clock_boottime_or_monotonic(), - 0, 0, ndisc_router_solicitation_timeout, nd); + r = sd_event_add_time(nd->event, &nd->timeout_event_source, clock_boottime_or_monotonic(), 0, 0, ndisc_router_solicitation_timeout, nd); if (r < 0) - goto error; + goto fail; - r = sd_event_source_set_priority(nd->timeout, nd->event_priority); + r = sd_event_source_set_priority(nd->timeout_event_source, nd->event_priority); if (r < 0) - goto error; + goto fail; - r = sd_event_source_set_description(nd->timeout, "ndisc-timeout"); -error: - if (r < 0) - ndisc_init(nd); - else - log_ndisc(client, "Start Router Solicitation"); + (void) sd_event_source_set_description(nd->timeout_event_source, "ndisc-timeout"); + + log_ndisc(ns, "Started IPv6 Router Solicitation client"); + return 0; +fail: + ndisc_reset(nd); return r; } diff --git a/src/libsystemd-network/test-acd.c b/src/libsystemd-network/test-acd.c index 75564615b9..27fcc332a3 100644 --- a/src/libsystemd-network/test-acd.c +++ b/src/libsystemd-network/test-acd.c @@ -56,7 +56,7 @@ static int client_run(int ifindex, const struct in_addr *pa, const struct ether_ assert_se(sd_ipv4acd_new(&acd) >= 0); assert_se(sd_ipv4acd_attach_event(acd, e, 0) >= 0); - assert_se(sd_ipv4acd_set_index(acd, ifindex) >= 0); + assert_se(sd_ipv4acd_set_ifindex(acd, ifindex) >= 0); assert_se(sd_ipv4acd_set_mac(acd, ha) >= 0); assert_se(sd_ipv4acd_set_address(acd, pa) >= 0); assert_se(sd_ipv4acd_set_callback(acd, acd_handler, NULL) >= 0); diff --git a/src/libsystemd-network/test-dhcp-client.c b/src/libsystemd-network/test-dhcp-client.c index c3c08fef5e..2a101cb1fe 100644 --- a/src/libsystemd-network/test-dhcp-client.c +++ b/src/libsystemd-network/test-dhcp-client.c @@ -66,13 +66,13 @@ static void test_request_basic(sd_event *e) { assert_se(sd_dhcp_client_set_request_option(NULL, 0) == -EINVAL); assert_se(sd_dhcp_client_set_request_address(NULL, NULL) == -EINVAL); - assert_se(sd_dhcp_client_set_index(NULL, 0) == -EINVAL); + assert_se(sd_dhcp_client_set_ifindex(NULL, 0) == -EINVAL); - assert_se(sd_dhcp_client_set_index(client, 15) == 0); - assert_se(sd_dhcp_client_set_index(client, -42) == -EINVAL); - assert_se(sd_dhcp_client_set_index(client, -1) == -EINVAL); - assert_se(sd_dhcp_client_set_index(client, 0) == -EINVAL); - assert_se(sd_dhcp_client_set_index(client, 1) == 0); + assert_se(sd_dhcp_client_set_ifindex(client, 15) == 0); + assert_se(sd_dhcp_client_set_ifindex(client, -42) == -EINVAL); + assert_se(sd_dhcp_client_set_ifindex(client, -1) == -EINVAL); + assert_se(sd_dhcp_client_set_ifindex(client, 0) == -EINVAL); + assert_se(sd_dhcp_client_set_ifindex(client, 1) == 0); assert_se(sd_dhcp_client_set_request_option(client, SD_DHCP_OPTION_SUBNET_MASK) == -EEXIST); @@ -243,7 +243,7 @@ static void test_discover_message(sd_event *e) { r = sd_dhcp_client_attach_event(client, e, 0); assert_se(r >= 0); - assert_se(sd_dhcp_client_set_index(client, 42) >= 0); + assert_se(sd_dhcp_client_set_ifindex(client, 42) >= 0); assert_se(sd_dhcp_client_set_mac(client, mac_addr, ETH_ALEN, ARPHRD_ETHER) >= 0); assert_se(sd_dhcp_client_set_request_option(client, 248) >= 0); @@ -458,7 +458,7 @@ static void test_addr_acq(sd_event *e) { r = sd_dhcp_client_attach_event(client, e, 0); assert_se(r >= 0); - assert_se(sd_dhcp_client_set_index(client, 42) >= 0); + assert_se(sd_dhcp_client_set_ifindex(client, 42) >= 0); assert_se(sd_dhcp_client_set_mac(client, mac_addr, ETH_ALEN, ARPHRD_ETHER) >= 0); assert_se(sd_dhcp_client_set_callback(client, test_addr_acq_acquired, e) >= 0); diff --git a/src/libsystemd-network/test-dhcp6-client.c b/src/libsystemd-network/test-dhcp6-client.c index e74c8c72db..bd289fa802 100644 --- a/src/libsystemd-network/test-dhcp6-client.c +++ b/src/libsystemd-network/test-dhcp6-client.c @@ -59,10 +59,10 @@ static int test_client_basic(sd_event *e) { assert_se(sd_dhcp6_client_attach_event(client, e, 0) >= 0); - assert_se(sd_dhcp6_client_set_index(client, 15) == 0); - assert_se(sd_dhcp6_client_set_index(client, -42) == -EINVAL); - assert_se(sd_dhcp6_client_set_index(client, -1) == 0); - assert_se(sd_dhcp6_client_set_index(client, 42) >= 0); + assert_se(sd_dhcp6_client_set_ifindex(client, 15) == 0); + assert_se(sd_dhcp6_client_set_ifindex(client, -42) == -EINVAL); + assert_se(sd_dhcp6_client_set_ifindex(client, -1) == 0); + assert_se(sd_dhcp6_client_set_ifindex(client, 42) >= 0); assert_se(sd_dhcp6_client_set_mac(client, (const uint8_t *) &mac_addr, sizeof (mac_addr), @@ -712,7 +712,7 @@ static int test_client_solicit(sd_event *e) { assert_se(sd_dhcp6_client_attach_event(client, e, 0) >= 0); - assert_se(sd_dhcp6_client_set_index(client, test_index) == 0); + assert_se(sd_dhcp6_client_set_ifindex(client, test_index) == 0); assert_se(sd_dhcp6_client_set_mac(client, (const uint8_t *) &mac_addr, sizeof (mac_addr), ARPHRD_ETHER) >= 0); diff --git a/src/libsystemd-network/test-ipv4ll-manual.c b/src/libsystemd-network/test-ipv4ll-manual.c index 85dd61470d..2b1387fa91 100644 --- a/src/libsystemd-network/test-ipv4ll-manual.c +++ b/src/libsystemd-network/test-ipv4ll-manual.c @@ -64,7 +64,7 @@ static int client_run(int ifindex, const char *seed_str, const struct ether_addr assert_se(sd_ipv4ll_new(&ll) >= 0); assert_se(sd_ipv4ll_attach_event(ll, e, 0) >= 0); - assert_se(sd_ipv4ll_set_index(ll, ifindex) >= 0); + assert_se(sd_ipv4ll_set_ifindex(ll, ifindex) >= 0); assert_se(sd_ipv4ll_set_mac(ll, ha) >= 0); assert_se(sd_ipv4ll_set_callback(ll, ll_handler, NULL) >= 0); diff --git a/src/libsystemd-network/test-ipv4ll.c b/src/libsystemd-network/test-ipv4ll.c index 8cdbfb8ed8..fe70697075 100644 --- a/src/libsystemd-network/test-ipv4ll.c +++ b/src/libsystemd-network/test-ipv4ll.c @@ -101,7 +101,7 @@ int arp_network_bind_raw_socket(int index, be32_t address, const struct ether_ad static void test_public_api_setters(sd_event *e) { struct in_addr address = {}; - unsigned seed = 0; + uint64_t seed = 0; sd_ipv4ll *ll; struct ether_addr mac_addr = { .ether_addr_octet = {'A', 'B', 'C', '1', '2', '3'}}; @@ -136,11 +136,11 @@ static void test_public_api_setters(sd_event *e) { assert_se(sd_ipv4ll_set_mac(ll, NULL) == -EINVAL); assert_se(sd_ipv4ll_set_mac(ll, &mac_addr) == 0); - assert_se(sd_ipv4ll_set_index(NULL, -1) == -EINVAL); - assert_se(sd_ipv4ll_set_index(ll, -1) == -EINVAL); - assert_se(sd_ipv4ll_set_index(ll, -99) == -EINVAL); - assert_se(sd_ipv4ll_set_index(ll, 1) == 0); - assert_se(sd_ipv4ll_set_index(ll, 99) == 0); + assert_se(sd_ipv4ll_set_ifindex(NULL, -1) == -EINVAL); + assert_se(sd_ipv4ll_set_ifindex(ll, -1) == -EINVAL); + assert_se(sd_ipv4ll_set_ifindex(ll, -99) == -EINVAL); + assert_se(sd_ipv4ll_set_ifindex(ll, 1) == 0); + assert_se(sd_ipv4ll_set_ifindex(ll, 99) == 0); assert_se(sd_ipv4ll_ref(ll) == ll); assert_se(sd_ipv4ll_unref(ll) == NULL); @@ -172,7 +172,7 @@ static void test_basic_request(sd_event *e) { basic_request_handler_userdata) == 0); assert_se(sd_ipv4ll_start(ll) == -EINVAL); - assert_se(sd_ipv4ll_set_index(ll, 1) == 0); + assert_se(sd_ipv4ll_set_ifindex(ll, 1) == 0); assert_se(sd_ipv4ll_start(ll) == 0); sd_event_run(e, (uint64_t) -1); diff --git a/src/libsystemd-network/test-ndisc-rs.c b/src/libsystemd-network/test-ndisc-rs.c index f7b2eb8050..4817c968ac 100644 --- a/src/libsystemd-network/test-ndisc-rs.c +++ b/src/libsystemd-network/test-ndisc-rs.c @@ -130,7 +130,7 @@ static void test_rs(void) { assert_se(sd_ndisc_attach_event(nd, e, 0) >= 0); - assert_se(sd_ndisc_set_index(nd, 42) >= 0); + assert_se(sd_ndisc_set_ifindex(nd, 42) >= 0); assert_se(sd_ndisc_set_mac(nd, &mac_addr) >= 0); assert_se(sd_ndisc_set_callback(nd, test_rs_done, NULL, NULL, NULL, e) >= 0); diff --git a/src/login/org.freedesktop.login1.policy.in b/src/login/org.freedesktop.login1.policy.in index 1fa6441629..66cbce393c 100644 --- a/src/login/org.freedesktop.login1.policy.in +++ b/src/login/org.freedesktop.login1.policy.in @@ -116,6 +116,8 @@ <_message>Explicit request is required to run programs as a non-logged-in user.</_message> <defaults> <allow_any>yes</allow_any> + <allow_inactive>yes</allow_inactive> + <allow_active>yes</allow_active> </defaults> </action> diff --git a/src/network/networkd-dhcp4.c b/src/network/networkd-dhcp4.c index 2ddcee9db8..12fb8e3fce 100644 --- a/src/network/networkd-dhcp4.c +++ b/src/network/networkd-dhcp4.c @@ -554,7 +554,7 @@ int dhcp4_configure(Link *link) { if (r < 0) return r; - r = sd_dhcp_client_set_index(link->dhcp_client, link->ifindex); + r = sd_dhcp_client_set_ifindex(link->dhcp_client, link->ifindex); if (r < 0) return r; diff --git a/src/network/networkd-dhcp6.c b/src/network/networkd-dhcp6.c index 37e13e639e..a44c9ea71d 100644 --- a/src/network/networkd-dhcp6.c +++ b/src/network/networkd-dhcp6.c @@ -237,7 +237,7 @@ int dhcp6_configure(Link *link) { if (r < 0) goto error; - r = sd_dhcp6_client_set_index(client, link->ifindex); + r = sd_dhcp6_client_set_ifindex(client, link->ifindex); if (r < 0) goto error; diff --git a/src/network/networkd-ipv4ll.c b/src/network/networkd-ipv4ll.c index ae323d595b..735c231a4c 100644 --- a/src/network/networkd-ipv4ll.c +++ b/src/network/networkd-ipv4ll.c @@ -215,9 +215,7 @@ int ipv4ll_configure(Link *link) { if (link->udev_device) { r = net_get_unique_predictable_data(link->udev_device, &seed); if (r >= 0) { - assert_cc(sizeof(unsigned) <= 8); - - r = sd_ipv4ll_set_address_seed(link->ipv4ll, (unsigned)seed); + r = sd_ipv4ll_set_address_seed(link->ipv4ll, seed); if (r < 0) return r; } @@ -231,7 +229,7 @@ int ipv4ll_configure(Link *link) { if (r < 0) return r; - r = sd_ipv4ll_set_index(link->ipv4ll, link->ifindex); + r = sd_ipv4ll_set_ifindex(link->ipv4ll, link->ifindex); if (r < 0) return r; diff --git a/src/network/networkd-ndisc.c b/src/network/networkd-ndisc.c index 1a380bd214..3baca2e63c 100644 --- a/src/network/networkd-ndisc.c +++ b/src/network/networkd-ndisc.c @@ -243,7 +243,7 @@ int ndisc_configure(Link *link) { if (r < 0) return r; - r = sd_ndisc_set_index(link->ndisc_router_discovery, link->ifindex); + r = sd_ndisc_set_ifindex(link->ndisc_router_discovery, link->ifindex); if (r < 0) return r; diff --git a/src/nspawn/nspawn-patch-uid.c b/src/nspawn/nspawn-patch-uid.c index c7382d412d..cc79597c95 100644 --- a/src/nspawn/nspawn-patch-uid.c +++ b/src/nspawn/nspawn-patch-uid.c @@ -280,7 +280,13 @@ static int patch_fd(int fd, const char *name, const struct stat *st, uid_t shift return r > 0 || changed; } -static int is_procfs_sysfs_or_suchlike(int fd) { +/* + * Check if the filesystem is fully compatible with user namespaces or + * UID/GID patching. Some filesystems in this list can be fully mounted inside + * user namespaces, however their inodes may relate to host resources or only + * valid in the global user namespace, therefore no patching should be applied. + */ +static int is_fs_fully_userns_compatible(int fd) { struct statfs sfs; assert(fd >= 0); @@ -300,6 +306,9 @@ static int is_procfs_sysfs_or_suchlike(int fd) { F_TYPE_EQUAL(sfs.f_type, PSTOREFS_MAGIC) || F_TYPE_EQUAL(sfs.f_type, SELINUX_MAGIC) || F_TYPE_EQUAL(sfs.f_type, SMACK_MAGIC) || + F_TYPE_EQUAL(sfs.f_type, SECURITYFS_MAGIC) || + F_TYPE_EQUAL(sfs.f_type, BPF_FS_MAGIC) || + F_TYPE_EQUAL(sfs.f_type, TRACEFS_MAGIC) || F_TYPE_EQUAL(sfs.f_type, SYSFS_MAGIC); } @@ -311,8 +320,8 @@ static int recurse_fd(int fd, bool donate_fd, const struct stat *st, uid_t shift /* We generally want to permit crossing of mount boundaries when patching the UIDs/GIDs. However, we * probably shouldn't do this for /proc and /sys if that is already mounted into place. Hence, let's - * stop the recursion when we hit a procfs or sysfs file system. */ - r = is_procfs_sysfs_or_suchlike(fd); + * stop the recursion when we hit procfs, sysfs or some other special file systems. */ + r = is_fs_fully_userns_compatible(fd); if (r < 0) goto finish; if (r > 0) { diff --git a/src/nspawn/nspawn-seccomp.c b/src/nspawn/nspawn-seccomp.c new file mode 100644 index 0000000000..2d145b68a7 --- /dev/null +++ b/src/nspawn/nspawn-seccomp.c @@ -0,0 +1,143 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <errno.h> +#include <linux/netlink.h> +#include <sys/capability.h> +#include <sys/types.h> + +#ifdef HAVE_SECCOMP +#include <seccomp.h> +#endif + +#include "log.h" + +#ifdef HAVE_SECCOMP +#include "seccomp-util.h" +#endif + +#include "nspawn-seccomp.h" + +#ifdef HAVE_SECCOMP + +static int seccomp_add_default_syscall_filter(scmp_filter_ctx ctx, + uint64_t cap_list_retain) { + unsigned i; + int r; + static const struct { + uint64_t capability; + int syscall_num; + } blacklist[] = { + { CAP_SYS_RAWIO, SCMP_SYS(iopl) }, + { CAP_SYS_RAWIO, SCMP_SYS(ioperm) }, + { CAP_SYS_BOOT, SCMP_SYS(kexec_load) }, + { CAP_SYS_ADMIN, SCMP_SYS(swapon) }, + { CAP_SYS_ADMIN, SCMP_SYS(swapoff) }, + { CAP_SYS_ADMIN, SCMP_SYS(open_by_handle_at) }, + { CAP_SYS_MODULE, SCMP_SYS(init_module) }, + { CAP_SYS_MODULE, SCMP_SYS(finit_module) }, + { CAP_SYS_MODULE, SCMP_SYS(delete_module) }, + { CAP_SYSLOG, SCMP_SYS(syslog) }, + }; + + for (i = 0; i < ELEMENTSOF(blacklist); i++) { + if (cap_list_retain & (1ULL << blacklist[i].capability)) + continue; + + r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), blacklist[i].syscall_num, 0); + if (r == -EFAULT) + continue; /* unknown syscall */ + if (r < 0) { + log_error_errno(r, "Failed to block syscall: %m"); + return r; + } + } + + return 0; +} + +int setup_seccomp(uint64_t cap_list_retain) { + scmp_filter_ctx seccomp; + int r; + + seccomp = seccomp_init(SCMP_ACT_ALLOW); + if (!seccomp) + return log_oom(); + + r = seccomp_add_secondary_archs(seccomp); + if (r < 0) { + log_error_errno(r, "Failed to add secondary archs to seccomp filter: %m"); + goto finish; + } + + r = seccomp_add_default_syscall_filter(seccomp, cap_list_retain); + if (r < 0) + goto finish; + + /* + Audit is broken in containers, much of the userspace audit + hookup will fail if running inside a container. We don't + care and just turn off creation of audit sockets. + + This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail + with EAFNOSUPPORT which audit userspace uses as indication + that audit is disabled in the kernel. + */ + + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EAFNOSUPPORT), + SCMP_SYS(socket), + 2, + SCMP_A0(SCMP_CMP_EQ, AF_NETLINK), + SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT)); + if (r < 0) { + log_error_errno(r, "Failed to add audit seccomp rule: %m"); + goto finish; + } + + r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + if (r < 0) { + log_error_errno(r, "Failed to unset NO_NEW_PRIVS: %m"); + goto finish; + } + + r = seccomp_load(seccomp); + if (r == -EINVAL) { + log_debug_errno(r, "Kernel is probably not configured with CONFIG_SECCOMP. Disabling seccomp audit filter: %m"); + r = 0; + goto finish; + } + if (r < 0) { + log_error_errno(r, "Failed to install seccomp audit filter: %m"); + goto finish; + } + +finish: + seccomp_release(seccomp); + return r; +} + +#else + +int setup_seccomp(uint64_t cap_list_retain) { + return 0; +} + +#endif diff --git a/src/nspawn/nspawn-seccomp.h b/src/nspawn/nspawn-seccomp.h new file mode 100644 index 0000000000..5bde16faf9 --- /dev/null +++ b/src/nspawn/nspawn-seccomp.h @@ -0,0 +1,24 @@ +#pragma once + +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <sys/types.h> + +int setup_seccomp(uint64_t cap_list_retain); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index ac11bcea5a..b421c182ce 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -26,9 +26,6 @@ #include <linux/loop.h> #include <pwd.h> #include <sched.h> -#ifdef HAVE_SECCOMP -#include <seccomp.h> -#endif #ifdef HAVE_SELINUX #include <selinux/selinux.h> #endif @@ -82,15 +79,13 @@ #include "nspawn-settings.h" #include "nspawn-setuid.h" #include "nspawn-stub-pid1.h" +#include "nspawn-seccomp.h" #include "parse-util.h" #include "path-util.h" #include "process-util.h" #include "ptyfwd.h" #include "random-util.h" #include "rm-rf.h" -#ifdef HAVE_SECCOMP -#include "seccomp-util.h" -#endif #include "selinux-util.h" #include "signal-util.h" #include "socket-util.h" @@ -136,7 +131,7 @@ static StartMode arg_start_mode = START_PID1; static bool arg_ephemeral = false; static LinkJournal arg_link_journal = LINK_AUTO; static bool arg_link_journal_try = false; -static uint64_t arg_retain = +static uint64_t arg_caps_retain = (1ULL << CAP_CHOWN) | (1ULL << CAP_DAC_OVERRIDE) | (1ULL << CAP_DAC_READ_SEARCH) | @@ -1075,7 +1070,7 @@ static int parse_argv(int argc, char *argv[]) { if (mask_all_settings) arg_settings_mask = _SETTINGS_MASK_ALL; - arg_retain = (arg_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus; + arg_caps_retain = (arg_caps_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus; r = detect_unified_cgroup_hierarchy(); if (r < 0) @@ -1632,7 +1627,7 @@ static int setup_journal(const char *directory) { } static int drop_capabilities(void) { - return capability_bounding_set_drop(arg_retain, false); + return capability_bounding_set_drop(arg_caps_retain, false); } static int reset_audit_loginuid(void) { @@ -1667,99 +1662,6 @@ static int reset_audit_loginuid(void) { return 0; } -static int setup_seccomp(void) { - -#ifdef HAVE_SECCOMP - static const struct { - uint64_t capability; - int syscall_num; - } blacklist[] = { - { CAP_SYS_RAWIO, SCMP_SYS(iopl) }, - { CAP_SYS_RAWIO, SCMP_SYS(ioperm) }, - { CAP_SYS_BOOT, SCMP_SYS(kexec_load) }, - { CAP_SYS_ADMIN, SCMP_SYS(swapon) }, - { CAP_SYS_ADMIN, SCMP_SYS(swapoff) }, - { CAP_SYS_ADMIN, SCMP_SYS(open_by_handle_at) }, - { CAP_SYS_MODULE, SCMP_SYS(init_module) }, - { CAP_SYS_MODULE, SCMP_SYS(finit_module) }, - { CAP_SYS_MODULE, SCMP_SYS(delete_module) }, - { CAP_SYSLOG, SCMP_SYS(syslog) }, - }; - - scmp_filter_ctx seccomp; - unsigned i; - int r; - - seccomp = seccomp_init(SCMP_ACT_ALLOW); - if (!seccomp) - return log_oom(); - - r = seccomp_add_secondary_archs(seccomp); - if (r < 0) { - log_error_errno(r, "Failed to add secondary archs to seccomp filter: %m"); - goto finish; - } - - for (i = 0; i < ELEMENTSOF(blacklist); i++) { - if (arg_retain & (1ULL << blacklist[i].capability)) - continue; - - r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), blacklist[i].syscall_num, 0); - if (r == -EFAULT) - continue; /* unknown syscall */ - if (r < 0) { - log_error_errno(r, "Failed to block syscall: %m"); - goto finish; - } - } - - /* - Audit is broken in containers, much of the userspace audit - hookup will fail if running inside a container. We don't - care and just turn off creation of audit sockets. - - This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail - with EAFNOSUPPORT which audit userspace uses as indication - that audit is disabled in the kernel. - */ - - r = seccomp_rule_add( - seccomp, - SCMP_ACT_ERRNO(EAFNOSUPPORT), - SCMP_SYS(socket), - 2, - SCMP_A0(SCMP_CMP_EQ, AF_NETLINK), - SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT)); - if (r < 0) { - log_error_errno(r, "Failed to add audit seccomp rule: %m"); - goto finish; - } - - r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); - if (r < 0) { - log_error_errno(r, "Failed to unset NO_NEW_PRIVS: %m"); - goto finish; - } - - r = seccomp_load(seccomp); - if (r == -EINVAL) { - log_debug_errno(r, "Kernel is probably not configured with CONFIG_SECCOMP. Disabling seccomp audit filter: %m"); - r = 0; - goto finish; - } - if (r < 0) { - log_error_errno(r, "Failed to install seccomp audit filter: %m"); - goto finish; - } - -finish: - seccomp_release(seccomp); - return r; -#else - return 0; -#endif - -} static int setup_propagate(const char *root) { const char *p, *q; @@ -2988,7 +2890,7 @@ static int outer_child( if (r < 0) return r; - r = setup_seccomp(); + r = setup_seccomp(arg_caps_retain); if (r < 0) return r; @@ -3272,9 +3174,9 @@ static int load_settings(void) { if (settings->capability != 0) log_warning("Ignoring Capability= setting, file %s is not trusted.", p); } else - arg_retain |= plus; + arg_caps_retain |= plus; - arg_retain &= ~settings->drop_capability; + arg_caps_retain &= ~settings->drop_capability; } if ((arg_settings_mask & SETTING_KILL_SIGNAL) == 0 && diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index f68c4a41ac..502e98d9dc 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -166,11 +166,11 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen r = sd_bus_message_append(m, "v", "b", r); - } else if (streq(field, "MemoryLimit")) { + } else if (STR_IN_SET(field, "MemoryLow", "MemoryHigh", "MemoryMax", "MemoryLimit")) { uint64_t bytes; - if (isempty(eq) || streq(eq, "infinity")) - bytes = (uint64_t) -1; + if (isempty(eq) || streq(eq, "max") || streq(eq, "infinity")) + bytes = CGROUP_LIMIT_MAX; else { r = parse_size(eq, 1024, &bytes); if (r < 0) { diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c index 0500593d06..c301c6a64f 100644 --- a/src/systemctl/systemctl.c +++ b/src/systemctl/systemctl.c @@ -1439,6 +1439,8 @@ static int list_unit_files(int argc, char *argv[], void *userdata) { assert(c <= n_units); hashmap_free(h); + + r = 0; } else { _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -2025,6 +2027,7 @@ static int get_default(int argc, char *argv[], void *userdata) { return log_error_errno(r, "Failed to get default target: %m"); path = _path; + r = 0; } else { _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; sd_bus *bus; @@ -2072,6 +2075,9 @@ static int set_default(int argc, char *argv[], void *userdata) { if (install_client_side()) { r = unit_file_set_default(arg_scope, arg_root, unit, true, &changes, &n_changes); unit_file_dump_changes(r, "set default", changes, n_changes, arg_quiet); + + if (r > 0) + r = 0; } else { _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; @@ -3493,6 +3499,9 @@ typedef struct UnitStatusInfo { /* CGroup */ uint64_t memory_current; + uint64_t memory_low; + uint64_t memory_high; + uint64_t memory_max; uint64_t memory_limit; uint64_t cpu_usage_nsec; uint64_t tasks_current; @@ -3775,10 +3784,30 @@ static void print_status_info( printf(" Memory: %s", format_bytes(buf, sizeof(buf), i->memory_current)); - if (i->memory_limit != (uint64_t) -1) - printf(" (limit: %s)\n", format_bytes(buf, sizeof(buf), i->memory_limit)); - else - printf("\n"); + if (i->memory_low > 0 || i->memory_high != CGROUP_LIMIT_MAX || i->memory_max != CGROUP_LIMIT_MAX || + i->memory_limit != CGROUP_LIMIT_MAX) { + const char *prefix = ""; + + printf(" ("); + if (i->memory_low > 0) { + printf("%slow: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_low)); + prefix = " "; + } + if (i->memory_high != CGROUP_LIMIT_MAX) { + printf("%shigh: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_high)); + prefix = " "; + } + if (i->memory_max != CGROUP_LIMIT_MAX) { + printf("%smax: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_max)); + prefix = " "; + } + if (i->memory_limit != CGROUP_LIMIT_MAX) { + printf("%slimit: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_limit)); + prefix = " "; + } + printf(")"); + } + printf("\n"); } if (i->cpu_usage_nsec != (uint64_t) -1) { @@ -4007,6 +4036,12 @@ static int status_property(const char *name, sd_bus_message *m, UnitStatusInfo * i->assert_timestamp = (usec_t) u; else if (streq(name, "MemoryCurrent")) i->memory_current = u; + else if (streq(name, "MemoryLow")) + i->memory_low = u; + else if (streq(name, "MemoryHigh")) + i->memory_high = u; + else if (streq(name, "MemoryMax")) + i->memory_max = u; else if (streq(name, "MemoryLimit")) i->memory_limit = u; else if (streq(name, "TasksCurrent")) @@ -4500,6 +4535,8 @@ static int show_one( _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; UnitStatusInfo info = { .memory_current = (uint64_t) -1, + .memory_high = CGROUP_LIMIT_MAX, + .memory_max = CGROUP_LIMIT_MAX, .memory_limit = (uint64_t) -1, .cpu_usage_nsec = (uint64_t) -1, .tasks_current = (uint64_t) -1, @@ -5508,7 +5545,7 @@ static int enable_unit(int argc, char *argv[], void *userdata) { unit_file_dump_changes(r, verb, changes, n_changes, arg_quiet); if (r < 0) - return r; + goto finish; r = 0; } else { _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL, *m = NULL; @@ -5600,7 +5637,7 @@ static int enable_unit(int argc, char *argv[], void *userdata) { r = bus_deserialize_and_dump_unit_file_changes(reply, arg_quiet, &changes, &n_changes); if (r < 0) - return r; + goto finish; /* Try to reload if enabled */ if (!arg_no_reload) @@ -5676,6 +5713,9 @@ static int add_dependency(int argc, char *argv[], void *userdata) { if (install_client_side()) { r = unit_file_add_dependency(arg_scope, arg_runtime, arg_root, names, target, dep, arg_force, &changes, &n_changes); unit_file_dump_changes(r, "add dependency on", changes, n_changes, arg_quiet); + + if (r > 0) + r = 0; } else { _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL, *m = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; @@ -5735,6 +5775,9 @@ static int preset_all(int argc, char *argv[], void *userdata) { if (install_client_side()) { r = unit_file_preset_all(arg_scope, arg_runtime, arg_root, arg_preset_mode, arg_force, &changes, &n_changes); unit_file_dump_changes(r, "preset", changes, n_changes, arg_quiet); + + if (r > 0) + r = 0; } else { _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; @@ -5817,6 +5860,7 @@ static int unit_is_enabled(int argc, char *argv[], void *userdata) { puts(unit_file_state_to_string(state)); } + r = 0; } else { _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; sd_bus *bus; @@ -7466,7 +7510,7 @@ static int systemctl_main(int argc, char *argv[]) { { "switch-root", 2, VERB_ANY, VERB_NOCHROOT, switch_root }, { "list-dependencies", VERB_ANY, 2, VERB_NOCHROOT, list_dependencies }, { "set-default", 2, 2, 0, set_default }, - { "get-default", VERB_ANY, 1, 0, get_default, }, + { "get-default", VERB_ANY, 1, 0, get_default }, { "set-property", 3, VERB_ANY, VERB_NOCHROOT, set_property }, { "is-system-running", VERB_ANY, 1, 0, is_system_running }, { "add-wants", 3, VERB_ANY, 0, add_dependency }, diff --git a/src/systemd/sd-dhcp-client.h b/src/systemd/sd-dhcp-client.h index 20b8c2873f..9a90c2ed42 100644 --- a/src/systemd/sd-dhcp-client.h +++ b/src/systemd/sd-dhcp-client.h @@ -99,7 +99,7 @@ int sd_dhcp_client_set_request_address( int sd_dhcp_client_set_request_broadcast( sd_dhcp_client *client, int broadcast); -int sd_dhcp_client_set_index( +int sd_dhcp_client_set_ifindex( sd_dhcp_client *client, int interface_index); int sd_dhcp_client_set_mac( diff --git a/src/systemd/sd-dhcp6-client.h b/src/systemd/sd-dhcp6-client.h index 90f62eaca4..7819f0d2de 100644 --- a/src/systemd/sd-dhcp6-client.h +++ b/src/systemd/sd-dhcp6-client.h @@ -82,7 +82,7 @@ int sd_dhcp6_client_set_callback( sd_dhcp6_client_callback_t cb, void *userdata); -int sd_dhcp6_client_set_index( +int sd_dhcp6_client_set_ifindex( sd_dhcp6_client *client, int interface_index); int sd_dhcp6_client_set_local_address( diff --git a/src/systemd/sd-ipv4acd.h b/src/systemd/sd-ipv4acd.h index 9e3e14a30c..16d99983a8 100644 --- a/src/systemd/sd-ipv4acd.h +++ b/src/systemd/sd-ipv4acd.h @@ -37,20 +37,20 @@ enum { }; typedef struct sd_ipv4acd sd_ipv4acd; -typedef void (*sd_ipv4acd_callback_t)(sd_ipv4acd *ll, int event, void *userdata); - -int sd_ipv4acd_detach_event(sd_ipv4acd *ll); -int sd_ipv4acd_attach_event(sd_ipv4acd *ll, sd_event *event, int64_t priority); -int sd_ipv4acd_get_address(sd_ipv4acd *ll, struct in_addr *address); -int sd_ipv4acd_set_callback(sd_ipv4acd *ll, sd_ipv4acd_callback_t cb, void *userdata); -int sd_ipv4acd_set_mac(sd_ipv4acd *ll, const struct ether_addr *addr); -int sd_ipv4acd_set_index(sd_ipv4acd *ll, int interface_index); -int sd_ipv4acd_set_address(sd_ipv4acd *ll, const struct in_addr *address); -int sd_ipv4acd_is_running(sd_ipv4acd *ll); -int sd_ipv4acd_start(sd_ipv4acd *ll); -int sd_ipv4acd_stop(sd_ipv4acd *ll); -sd_ipv4acd *sd_ipv4acd_ref(sd_ipv4acd *ll); -sd_ipv4acd *sd_ipv4acd_unref(sd_ipv4acd *ll); +typedef void (*sd_ipv4acd_callback_t)(sd_ipv4acd *acd, int event, void *userdata); + +int sd_ipv4acd_detach_event(sd_ipv4acd *acd); +int sd_ipv4acd_attach_event(sd_ipv4acd *acd, sd_event *event, int64_t priority); +int sd_ipv4acd_get_address(sd_ipv4acd *acd, struct in_addr *address); +int sd_ipv4acd_set_callback(sd_ipv4acd *acd, sd_ipv4acd_callback_t cb, void *userdata); +int sd_ipv4acd_set_mac(sd_ipv4acd *acd, const struct ether_addr *addr); +int sd_ipv4acd_set_ifindex(sd_ipv4acd *acd, int interface_index); +int sd_ipv4acd_set_address(sd_ipv4acd *acd, const struct in_addr *address); +int sd_ipv4acd_is_running(sd_ipv4acd *acd); +int sd_ipv4acd_start(sd_ipv4acd *acd); +int sd_ipv4acd_stop(sd_ipv4acd *acd); +sd_ipv4acd *sd_ipv4acd_ref(sd_ipv4acd *acd); +sd_ipv4acd *sd_ipv4acd_unref(sd_ipv4acd *acd); int sd_ipv4acd_new(sd_ipv4acd **ret); _SD_DEFINE_POINTER_CLEANUP_FUNC(sd_ipv4acd, sd_ipv4acd_unref); diff --git a/src/systemd/sd-ipv4ll.h b/src/systemd/sd-ipv4ll.h index 6fa38a2243..1109ec52e0 100644 --- a/src/systemd/sd-ipv4ll.h +++ b/src/systemd/sd-ipv4ll.h @@ -43,15 +43,15 @@ int sd_ipv4ll_attach_event(sd_ipv4ll *ll, sd_event *event, int64_t priority); int sd_ipv4ll_get_address(sd_ipv4ll *ll, struct in_addr *address); int sd_ipv4ll_set_callback(sd_ipv4ll *ll, sd_ipv4ll_callback_t cb, void *userdata); int sd_ipv4ll_set_mac(sd_ipv4ll *ll, const struct ether_addr *addr); -int sd_ipv4ll_set_index(sd_ipv4ll *ll, int interface_index); +int sd_ipv4ll_set_ifindex(sd_ipv4ll *ll, int interface_index); int sd_ipv4ll_set_address(sd_ipv4ll *ll, const struct in_addr *address); -int sd_ipv4ll_set_address_seed(sd_ipv4ll *ll, unsigned seed); +int sd_ipv4ll_set_address_seed(sd_ipv4ll *ll, uint64_t seed); int sd_ipv4ll_is_running(sd_ipv4ll *ll); int sd_ipv4ll_start(sd_ipv4ll *ll); int sd_ipv4ll_stop(sd_ipv4ll *ll); sd_ipv4ll *sd_ipv4ll_ref(sd_ipv4ll *ll); sd_ipv4ll *sd_ipv4ll_unref(sd_ipv4ll *ll); -int sd_ipv4ll_new (sd_ipv4ll **ret); +int sd_ipv4ll_new(sd_ipv4ll **ret); _SD_DEFINE_POINTER_CLEANUP_FUNC(sd_ipv4ll, sd_ipv4ll_unref); diff --git a/src/systemd/sd-ndisc.h b/src/systemd/sd-ndisc.h index 29bcbe8e3e..2b774233b8 100644 --- a/src/systemd/sd-ndisc.h +++ b/src/systemd/sd-ndisc.h @@ -49,7 +49,7 @@ int sd_ndisc_set_callback(sd_ndisc *nd, sd_ndisc_prefix_autonomous_callback_t pacb, sd_ndisc_callback_t cb, void *userdata); -int sd_ndisc_set_index(sd_ndisc *nd, int interface_index); +int sd_ndisc_set_ifindex(sd_ndisc *nd, int interface_index); int sd_ndisc_set_mac(sd_ndisc *nd, const struct ether_addr *mac_addr); int sd_ndisc_attach_event(sd_ndisc *nd, sd_event *event, int64_t priority); |