summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--man/logind.conf.xml9
-rw-r--r--man/systemd-system.conf.xml9
-rw-r--r--man/systemd.resource-control.xml15
-rw-r--r--src/basic/mount-util.c11
-rw-r--r--src/basic/util.c55
-rw-r--r--src/basic/util.h3
-rw-r--r--src/core/cgroup.c5
-rw-r--r--src/core/dbus-cgroup.c34
-rw-r--r--src/core/load-fragment.c19
-rw-r--r--src/core/macros.systemd.in6
-rw-r--r--src/core/main.c22
-rw-r--r--src/core/manager.c2
-rw-r--r--src/core/system.conf2
-rw-r--r--src/coredump/coredump.c55
-rw-r--r--src/login/logind-gperf.gperf2
-rw-r--r--src/login/logind-user.c45
-rw-r--r--src/login/logind.c2
-rw-r--r--src/login/logind.conf.in2
-rw-r--r--src/login/logind.h1
-rw-r--r--src/nspawn/nspawn-register.c17
-rw-r--r--src/shared/bus-unit-util.c39
-rw-r--r--src/systemctl/systemctl.c3
-rw-r--r--src/test/test-util.c38
-rw-r--r--units/systemd-nspawn@.service.in11
24 files changed, 310 insertions, 97 deletions
diff --git a/man/logind.conf.xml b/man/logind.conf.xml
index fe92277a1f..adba5a4131 100644
--- a/man/logind.conf.xml
+++ b/man/logind.conf.xml
@@ -315,12 +315,11 @@
<varlistentry>
<term><varname>UserTasksMax=</varname></term>
- <listitem><para>Sets the maximum number of OS tasks each user
- may run concurrently. This controls the
- <varname>TasksMax=</varname> setting of the per-user slice
- unit, see
+ <listitem><para>Sets the maximum number of OS tasks each user may run concurrently. This controls the
+ <varname>TasksMax=</varname> setting of the per-user slice unit, see
<citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
- for details. Defaults to 12288 (12K).</para></listitem>
+ for details. Defaults to 33%, which equals 10813 with the kernel's defaults on the host, but might be smaller
+ in OS containers.</para></listitem>
</varlistentry>
<varlistentry>
diff --git a/man/systemd-system.conf.xml b/man/systemd-system.conf.xml
index 8833e73c72..1bb40fd234 100644
--- a/man/systemd-system.conf.xml
+++ b/man/systemd-system.conf.xml
@@ -325,12 +325,11 @@
<varlistentry>
<term><varname>DefaultTasksMax=</varname></term>
- <listitem><para>Configure the default value for the per-unit
- <varname>TasksMax=</varname> setting. See
+ <listitem><para>Configure the default value for the per-unit <varname>TasksMax=</varname> setting. See
<citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
- for details. This setting applies to all unit types that
- support resource control settings, with the exception of slice
- units. Defaults to 512.</para></listitem>
+ for details. This setting applies to all unit types that support resource control settings, with the exception
+ of slice units. Defaults to 15%, which equals 4915 with the kernel's defaults on the host, but might be smaller
+ in OS containers.</para></listitem>
</varlistentry>
<varlistentry>
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml
index 7263c0b329..bf44a68345 100644
--- a/man/systemd.resource-control.xml
+++ b/man/systemd.resource-control.xml
@@ -327,15 +327,12 @@
<term><varname>TasksMax=<replaceable>N</replaceable></varname></term>
<listitem>
- <para>Specify the maximum number of tasks that may be
- created in the unit. This ensures that the number of tasks
- accounted for the unit (see above) stays below a specific
- limit. If assigned the special value
- <literal>infinity</literal>, no tasks limit is applied. This
- controls the <literal>pids.max</literal> control group
- attribute. For details about this control group attribute,
- see <ulink
- url="https://www.kernel.org/doc/Documentation/cgroup-v1/pids.txt">pids.txt</ulink>.</para>
+ <para>Specify the maximum number of tasks that may be created in the unit. This ensures that the number of
+ tasks accounted for the unit (see above) stays below a specific limit. This either takes an absolute number
+ of tasks or a percentage value that is taken relative to the configured maximum number of tasks on the
+ system. If assigned the special value <literal>infinity</literal>, no tasks limit is applied. This controls
+ the <literal>pids.max</literal> control group attribute. For details about this control group attribute, see
+ <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v1/pids.txt">pids.txt</ulink>.</para>
<para>Implies <literal>TasksAccounting=true</literal>. The
system default for this setting may be controlled with
diff --git a/src/basic/mount-util.c b/src/basic/mount-util.c
index 63dff3dd5c..b91f0f9e0e 100644
--- a/src/basic/mount-util.c
+++ b/src/basic/mount-util.c
@@ -534,15 +534,22 @@ int repeat_unmount(const char *path, int flags) {
}
const char* mode_to_inaccessible_node(mode_t mode) {
+ /* This function maps a node type to the correspondent inaccessible node type.
+ * Character and block inaccessible devices may not be created (because major=0 and minor=0),
+ * in such case we map character and block devices to the inaccessible node type socket. */
switch(mode & S_IFMT) {
case S_IFREG:
return "/run/systemd/inaccessible/reg";
case S_IFDIR:
return "/run/systemd/inaccessible/dir";
case S_IFCHR:
- return "/run/systemd/inaccessible/chr";
+ if (access("/run/systemd/inaccessible/chr", F_OK) == 0)
+ return "/run/systemd/inaccessible/chr";
+ return "/run/systemd/inaccessible/sock";
case S_IFBLK:
- return "/run/systemd/inaccessible/blk";
+ if (access("/run/systemd/inaccessible/blk", F_OK) == 0)
+ return "/run/systemd/inaccessible/blk";
+ return "/run/systemd/inaccessible/sock";
case S_IFIFO:
return "/run/systemd/inaccessible/fifo";
case S_IFSOCK:
diff --git a/src/basic/util.c b/src/basic/util.c
index 41e79315ae..9d66d28eb7 100644
--- a/src/basic/util.c
+++ b/src/basic/util.c
@@ -791,6 +791,61 @@ uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
return r;
}
+uint64_t system_tasks_max(void) {
+
+#if SIZEOF_PID_T == 4
+#define TASKS_MAX ((uint64_t) (INT32_MAX-1))
+#elif SIZEOF_PID_T == 2
+#define TASKS_MAX ((uint64_t) (INT16_MAX-1))
+#else
+#error "Unknown pid_t size"
+#endif
+
+ _cleanup_free_ char *value = NULL, *root = NULL;
+ uint64_t a = TASKS_MAX, b = TASKS_MAX;
+
+ /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
+ * limit:
+ *
+ * a) the maximum value for the pid_t type
+ * b) the cgroups pids_max attribute for the system
+ * c) the kernel's configure maximum PID value
+ *
+ * And then pick the smallest of the three */
+
+ if (read_one_line_file("/proc/sys/kernel/pid_max", &value) >= 0)
+ (void) safe_atou64(value, &a);
+
+ if (cg_get_root_path(&root) >= 0) {
+ value = mfree(value);
+
+ if (cg_get_attribute("pids", root, "pids.max", &value) >= 0)
+ (void) safe_atou64(value, &b);
+ }
+
+ return MIN3(TASKS_MAX,
+ a <= 0 ? TASKS_MAX : a,
+ b <= 0 ? TASKS_MAX : b);
+}
+
+uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
+ uint64_t t, m;
+
+ assert(max > 0);
+
+ /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
+ * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
+
+ t = system_tasks_max();
+ assert(t > 0);
+
+ m = t * v;
+ if (m / t != v) /* overflow? */
+ return UINT64_MAX;
+
+ return m / max;
+}
+
int update_reboot_parameter_and_warn(const char *param) {
int r;
diff --git a/src/basic/util.h b/src/basic/util.h
index 94b8091906..44497dcd78 100644
--- a/src/basic/util.h
+++ b/src/basic/util.h
@@ -184,6 +184,9 @@ int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int
uint64_t physical_memory(void);
uint64_t physical_memory_scale(uint64_t v, uint64_t max);
+uint64_t system_tasks_max(void);
+uint64_t system_tasks_max_scale(uint64_t v, uint64_t max);
+
int update_reboot_parameter_and_warn(const char *param);
int version(void);
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 8b0f11ed50..c19e43f571 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -800,7 +800,10 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
"/dev/random\0" "rwm\0"
"/dev/urandom\0" "rwm\0"
"/dev/tty\0" "rwm\0"
- "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */
+ "/dev/pts/ptmx\0" "rw\0" /* /dev/pts/ptmx may not be duplicated, but accessed */
+ /* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */
+ "/run/systemd/inaccessible/chr\0" "rwm\0"
+ "/run/systemd/inaccessible/blk\0" "rwm\0";
const char *x, *y;
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index 6167ce92cd..85b0c86a2f 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -856,7 +856,7 @@ int bus_cgroup_set_property(
return 1;
- } else if (STR_IN_SET(name, "MemoryLowByPhysicalMemory", "MemoryHighByPhysicalMemory", "MemoryMaxByPhysicalMemory")) {
+ } else if (STR_IN_SET(name, "MemoryLowScale", "MemoryHighScale", "MemoryMaxScale")) {
uint32_t raw;
uint64_t v;
@@ -872,7 +872,7 @@ int bus_cgroup_set_property(
const char *e;
/* Chop off suffix */
- assert_se(e = endswith(name, "ByPhysicalMemory"));
+ assert_se(e = endswith(name, "Scale"));
name = strndupa(name, e - name);
if (streq(name, "MemoryLow"))
@@ -883,7 +883,8 @@ int bus_cgroup_set_property(
c->memory_max = v;
unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY);
- unit_write_drop_in_private_format(u, mode, name, "%s=%" PRIu32 "%%", name, (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100, (uint64_t) UINT32_MAX)));
+ unit_write_drop_in_private_format(u, mode, name, "%s=%" PRIu32 "%%", name,
+ (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX)));
}
return 1;
@@ -909,7 +910,7 @@ int bus_cgroup_set_property(
return 1;
- } else if (streq(name, "MemoryLimitByPhysicalMemory")) {
+ } else if (streq(name, "MemoryLimitScale")) {
uint64_t limit;
uint32_t raw;
@@ -924,7 +925,8 @@ int bus_cgroup_set_property(
if (mode != UNIT_CHECK) {
c->memory_limit = limit;
unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY);
- unit_write_drop_in_private_format(u, mode, "MemoryLimit", "MemoryLimit=%" PRIu32 "%%", (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100, (uint64_t) UINT32_MAX)));
+ unit_write_drop_in_private_format(u, mode, "MemoryLimit", "MemoryLimit=%" PRIu32 "%%",
+ (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX)));
}
return 1;
@@ -1060,6 +1062,8 @@ int bus_cgroup_set_property(
r = sd_bus_message_read(message, "t", &limit);
if (r < 0)
return r;
+ if (limit <= 0)
+ return sd_bus_error_set_errnof(error, EINVAL, "%s= is too small", name);
if (mode != UNIT_CHECK) {
c->tasks_max = limit;
@@ -1072,6 +1076,26 @@ int bus_cgroup_set_property(
}
return 1;
+ } else if (streq(name, "TasksMaxScale")) {
+ uint64_t limit;
+ uint32_t raw;
+
+ r = sd_bus_message_read(message, "u", &raw);
+ if (r < 0)
+ return r;
+
+ limit = system_tasks_max_scale(raw, UINT32_MAX);
+ if (limit <= 0 || limit >= UINT64_MAX)
+ return sd_bus_error_set_errnof(error, EINVAL, "%s= is out of range", name);
+
+ if (mode != UNIT_CHECK) {
+ c->tasks_max = limit;
+ unit_invalidate_cgroup(u, CGROUP_MASK_PIDS);
+ unit_write_drop_in_private_format(u, mode, name, "TasksMax=%" PRIu32 "%%",
+ (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX)));
+ }
+
+ return 1;
}
if (u->transient && u->load_state == UNIT_STUB) {
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 782e420e4c..ae306de4ae 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -2823,8 +2823,8 @@ int config_parse_memory_limit(
} else
bytes = physical_memory_scale(r, 100U);
- if (bytes < 1) {
- log_syntax(unit, LOG_ERR, filename, line, 0, "Memory limit '%s' too small. Ignoring.", rvalue);
+ if (bytes <= 0 || bytes >= UINT64_MAX) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Memory limit '%s' out of range. Ignoring.", rvalue);
return 0;
}
}
@@ -2861,9 +2861,18 @@ int config_parse_tasks_max(
return 0;
}
- r = safe_atou64(rvalue, &u);
- if (r < 0 || u < 1) {
- log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue);
+ r = parse_percent(rvalue);
+ if (r < 0) {
+ r = safe_atou64(rvalue, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue);
+ return 0;
+ }
+ } else
+ u = system_tasks_max_scale(r, 100U);
+
+ if (u <= 0 || u >= UINT64_MAX) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range. Ignoring.", rvalue);
return 0;
}
diff --git a/src/core/macros.systemd.in b/src/core/macros.systemd.in
index 028db1cc4a..6e8a3b3e3d 100644
--- a/src/core/macros.systemd.in
+++ b/src/core/macros.systemd.in
@@ -38,6 +38,12 @@ Requires(preun): systemd \
Requires(postun): systemd \
%{nil}
+%systemd_ordering \
+OrderWithRequires(post): systemd \
+OrderWithRequires(preun): systemd \
+OrderWithRequires(postun): systemd \
+%{nil}
+
%systemd_post() \
if [ $1 -eq 1 ] ; then \
# Initial installation \
diff --git a/src/core/main.c b/src/core/main.c
index f49ba82156..f2adca7d2b 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -127,7 +127,7 @@ static bool arg_default_io_accounting = false;
static bool arg_default_blockio_accounting = false;
static bool arg_default_memory_accounting = false;
static bool arg_default_tasks_accounting = true;
-static uint64_t arg_default_tasks_max = UINT64_C(512);
+static uint64_t arg_default_tasks_max = UINT64_MAX;
static sd_id128_t arg_machine_id = {};
noreturn static void freeze_or_reboot(void) {
@@ -1300,6 +1300,11 @@ static int fixup_environment(void) {
_cleanup_free_ char *term = NULL;
int r;
+ /* We expect the environment to be set correctly
+ * if run inside a container. */
+ if (detect_container() > 0)
+ return 0;
+
/* When started as PID1, the kernel uses /dev/console
* for our stdios and uses TERM=linux whatever the
* backend device used by the console. We try to make
@@ -1316,7 +1321,7 @@ static int fixup_environment(void) {
if (r == 0) {
term = strdup(default_term_for_tty("/dev/console") + 5);
if (!term)
- return -errno;
+ return -ENOMEM;
}
if (setenv("TERM", term, 1) < 0)
@@ -1510,13 +1515,10 @@ int main(int argc, char *argv[]) {
}
if (arg_system) {
- /* We expect the environment to be set correctly
- * if run inside a container. */
- if (detect_container() <= 0)
- if (fixup_environment() < 0) {
- error_message = "Failed to fix up PID1 environment";
- goto finish;
- }
+ if (fixup_environment() < 0) {
+ error_message = "Failed to fix up PID1 environment";
+ goto finish;
+ }
/* Try to figure out if we can use colors with the console. No
* need to do that for user instances since they never log
@@ -1558,6 +1560,8 @@ int main(int argc, char *argv[]) {
(void) reset_all_signal_handlers();
(void) ignore_signals(SIGNALS_IGNORE, -1);
+ arg_default_tasks_max = system_tasks_max_scale(15U, 100U); /* 15% the system PIDs equals 4915 by default. */
+
if (parse_config_file() < 0) {
error_message = "Failed to parse config file";
goto finish;
diff --git a/src/core/manager.c b/src/core/manager.c
index a0181e2138..4d84a0b37e 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -569,7 +569,7 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) {
m->exit_code = _MANAGER_EXIT_CODE_INVALID;
m->default_timer_accuracy_usec = USEC_PER_MINUTE;
m->default_tasks_accounting = true;
- m->default_tasks_max = UINT64_C(512);
+ m->default_tasks_max = UINT64_MAX;
#ifdef ENABLE_EFI
if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0)
diff --git a/src/core/system.conf b/src/core/system.conf
index db8b7acd78..c6bb050aac 100644
--- a/src/core/system.conf
+++ b/src/core/system.conf
@@ -42,7 +42,7 @@
#DefaultBlockIOAccounting=no
#DefaultMemoryAccounting=no
#DefaultTasksAccounting=yes
-#DefaultTasksMax=512
+#DefaultTasksMax=15%
#DefaultLimitCPU=
#DefaultLimitFSIZE=
#DefaultLimitDATA=
diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c
index 82a54968e7..e5719e67c3 100644
--- a/src/coredump/coredump.c
+++ b/src/coredump/coredump.c
@@ -756,7 +756,6 @@ static int process_socket(int fd) {
iovec[n_iovec].iov_len = l;
iovec[n_iovec].iov_base = malloc(l + 1);
-
if (!iovec[n_iovec].iov_base) {
r = log_oom();
goto finish;
@@ -852,12 +851,42 @@ static int send_iovec(const struct iovec iovec[], size_t n_iovec, int input_fd)
return log_error_errno(errno, "Failed to connect to coredump service: %m");
for (i = 0; i < n_iovec; i++) {
- ssize_t n;
- assert(iovec[i].iov_len > 0);
+ struct msghdr mh = {
+ .msg_iov = (struct iovec*) iovec + i,
+ .msg_iovlen = 1,
+ };
+ struct iovec copy[2];
+
+ for (;;) {
+ if (sendmsg(fd, &mh, MSG_NOSIGNAL) >= 0)
+ break;
+
+ if (errno == EMSGSIZE && mh.msg_iov[0].iov_len > 0) {
+ /* This field didn't fit? That's a pity. Given that this is just metadata,
+ * let's truncate the field at half, and try again. We append three dots, in
+ * order to show that this is truncated. */
+
+ if (mh.msg_iov != copy) {
+ /* We don't want to modify the caller's iovec, hence let's create our
+ * own array, consisting of two new iovecs, where the first is a
+ * (truncated) copy of what we want to send, and the second one
+ * contains the trailing dots. */
+ copy[0] = iovec[i];
+ copy[1] = (struct iovec) {
+ .iov_base = (char[]) { '.', '.', '.' },
+ .iov_len = 3,
+ };
+
+ mh.msg_iov = copy;
+ mh.msg_iovlen = 2;
+ }
+
+ copy[0].iov_len /= 2; /* halve it, and try again */
+ continue;
+ }
- n = send(fd, iovec[i].iov_base, iovec[i].iov_len, MSG_NOSIGNAL);
- if (n < 0)
return log_error_errno(errno, "Failed to send coredump datagram: %m");
+ }
}
r = send_one_fd(fd, input_fd, 0);
@@ -867,7 +896,7 @@ static int send_iovec(const struct iovec iovec[], size_t n_iovec, int input_fd)
return 0;
}
-static int process_journald_crash(const char *context[], int input_fd) {
+static int process_special_crash(const char *context[], int input_fd) {
_cleanup_close_ int coredump_fd = -1, coredump_node_fd = -1;
_cleanup_free_ char *filename = NULL;
uint64_t coredump_size;
@@ -876,7 +905,7 @@ static int process_journald_crash(const char *context[], int input_fd) {
assert(context);
assert(input_fd >= 0);
- /* If we are journald, we cut things short, don't write to the journal, but still create a coredump. */
+ /* If we are pid1 or journald, we cut things short, don't write to the journal, but still create a coredump. */
if (arg_storage != COREDUMP_STORAGE_NONE)
arg_storage = COREDUMP_STORAGE_EXTERNAL;
@@ -889,7 +918,11 @@ static int process_journald_crash(const char *context[], int input_fd) {
if (r < 0)
return r;
- log_info("Detected coredump of the journal daemon itself, diverted to %s.", filename);
+ log_notice("Detected coredump of the journal daemon or PID 1, diverted to %s.", filename);
+
+ log_notice("Due to the special circumstances, coredump collection will now be turned off.");
+ (void) write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", 0);
+
return 0;
}
@@ -949,9 +982,11 @@ static int process_kernel(int argc, char* argv[]) {
if (cg_pid_get_unit(pid, &t) >= 0) {
- if (streq(t, SPECIAL_JOURNALD_SERVICE)) {
+ /* Let's avoid dead-locks when processing journald and init crashes, as socket activation and logging
+ * are unlikely to work then. */
+ if (STR_IN_SET(t, SPECIAL_JOURNALD_SERVICE, SPECIAL_INIT_SCOPE)) {
free(t);
- return process_journald_crash(context, STDIN_FILENO);
+ return process_special_crash(context, STDIN_FILENO);
}
core_unit = strjoina("COREDUMP_UNIT=", t);
diff --git a/src/login/logind-gperf.gperf b/src/login/logind-gperf.gperf
index 6bd08adc05..0b6a5f3cf4 100644
--- a/src/login/logind-gperf.gperf
+++ b/src/login/logind-gperf.gperf
@@ -36,4 +36,4 @@ Login.RuntimeDirectorySize, config_parse_tmpfs_size, 0, offsetof(Manag
Login.RemoveIPC, config_parse_bool, 0, offsetof(Manager, remove_ipc)
Login.InhibitorsMax, config_parse_uint64, 0, offsetof(Manager, inhibitors_max)
Login.SessionsMax, config_parse_uint64, 0, offsetof(Manager, sessions_max)
-Login.UserTasksMax, config_parse_uint64, 0, offsetof(Manager, user_tasks_max)
+Login.UserTasksMax, config_parse_user_tasks_max,0, offsetof(Manager, user_tasks_max)
diff --git a/src/login/logind-user.c b/src/login/logind-user.c
index de44d369cf..f7d83909a0 100644
--- a/src/login/logind-user.c
+++ b/src/login/logind-user.c
@@ -870,3 +870,48 @@ int config_parse_tmpfs_size(
return 0;
}
+
+int config_parse_user_tasks_max(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t *m = data;
+ uint64_t k;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* First, try to parse as percentage */
+ r = parse_percent(rvalue);
+ if (r > 0 && r < 100)
+ k = system_tasks_max_scale(r, 100U);
+ else {
+
+ /* If the passed argument was not a percentage, or out of range, parse as byte size */
+
+ r = safe_atou64(rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse tasks maximum, ignoring: %s", rvalue);
+ return 0;
+ }
+ }
+
+ if (k <= 0 || k >= UINT64_MAX) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Tasks maximum out of range, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *m = k;
+ return 0;
+}
diff --git a/src/login/logind.c b/src/login/logind.c
index d01dd110ea..5ce36d28c7 100644
--- a/src/login/logind.c
+++ b/src/login/logind.c
@@ -62,7 +62,7 @@ static void manager_reset_config(Manager *m) {
m->idle_action = HANDLE_IGNORE;
m->runtime_dir_size = physical_memory_scale(10U, 100U); /* 10% */
- m->user_tasks_max = 12288;
+ m->user_tasks_max = system_tasks_max_scale(33U, 100U); /* 33% */
m->sessions_max = 8192;
m->inhibitors_max = 8192;
diff --git a/src/login/logind.conf.in b/src/login/logind.conf.in
index 32c0844cb6..6f720b7708 100644
--- a/src/login/logind.conf.in
+++ b/src/login/logind.conf.in
@@ -34,4 +34,4 @@
#RemoveIPC=yes
#InhibitorsMax=8192
#SessionsMax=8192
-#UserTasksMax=12288
+#UserTasksMax=33%
diff --git a/src/login/logind.h b/src/login/logind.h
index 90431eb4b0..086fa1eeb5 100644
--- a/src/login/logind.h
+++ b/src/login/logind.h
@@ -187,6 +187,7 @@ const struct ConfigPerfItem* logind_gperf_lookup(const char *key, unsigned lengt
int manager_set_lid_switch_ignore(Manager *m, usec_t until);
int config_parse_tmpfs_size(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_user_tasks_max(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int manager_get_session_from_creds(Manager *m, sd_bus_message *message, const char *name, sd_bus_error *error, Session **ret);
int manager_get_user_from_creds(Manager *m, sd_bus_message *message, uid_t uid, sd_bus_error *error, User **ret);
diff --git a/src/nspawn/nspawn-register.c b/src/nspawn/nspawn-register.c
index 7fd711b8a4..e5b76a0c5d 100644
--- a/src/nspawn/nspawn-register.c
+++ b/src/nspawn/nspawn-register.c
@@ -104,7 +104,7 @@ int register_machine(
return bus_log_create_error(r);
}
- r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "strict");
+ r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "closed");
if (r < 0)
return bus_log_create_error(r);
@@ -112,31 +112,20 @@ int register_machine(
* systemd-nspawn@.service, to keep the device
* policies in sync regardless if we are run with or
* without the --keep-unit switch. */
- r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 11,
+ r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 2,
/* Allow the container to
* access and create the API
* device nodes, so that
* PrivateDevices= in the
* container can work
* fine */
- "/dev/null", "rwm",
- "/dev/zero", "rwm",
- "/dev/full", "rwm",
- "/dev/random", "rwm",
- "/dev/urandom", "rwm",
- "/dev/tty", "rwm",
"/dev/net/tun", "rwm",
/* Allow the container
* access to ptys. However,
* do not permit the
* container to ever create
* these device nodes. */
- "/dev/pts/ptmx", "rw",
- "char-pts", "rw",
- /* Allow /run/systemd/inaccessible/{chr,blk}
- * devices inside the container */
- "/run/systemd/inaccessible/chr", "rwm",
- "/run/systemd/inaccessible/blk", "rwm");
+ "char-pts", "rw");
if (r < 0)
return bus_log_create_error(r);
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index 94ffa8af87..ea020b517b 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -132,10 +132,10 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
char *n;
/* When this is a percentage we'll convert this into a relative value in the range
- * 0…UINT32_MAX and pass it in the MemoryLowByPhysicalMemory property (and related
+ * 0…UINT32_MAX and pass it in the MemoryLowScale property (and related
* ones). This way the physical memory size can be determined server-side */
- n = strjoina(field, "ByPhysicalMemory");
+ n = strjoina(field, "Scale");
r = sd_bus_message_append(m, "sv", n, "u", (uint32_t) (((uint64_t) UINT32_MAX * r) / 100U));
goto finish;
@@ -148,6 +148,26 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
r = sd_bus_message_append(m, "sv", field, "t", bytes);
goto finish;
+ } else if (streq(field, "TasksMax")) {
+ uint64_t t;
+
+ if (isempty(eq) || streq(eq, "infinity"))
+ t = (uint64_t) -1;
+ else {
+ r = parse_percent(eq);
+ if (r >= 0) {
+ r = sd_bus_message_append(m, "sv", "TasksMaxScale", "u", (uint32_t) (((uint64_t) UINT32_MAX * r) / 100U));
+ goto finish;
+ } else {
+ r = safe_atou64(eq, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse maximum tasks specification %s", assignment);
+ }
+
+ }
+
+ r = sd_bus_message_append(m, "sv", "TasksMax", "t", t);
+ goto finish;
}
r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
@@ -191,21 +211,6 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
r = sd_bus_message_append(m, "v", "b", r);
- } else if (streq(field, "TasksMax")) {
- uint64_t n;
-
- if (isempty(eq) || streq(eq, "infinity"))
- n = (uint64_t) -1;
- else {
- r = safe_atou64(eq, &n);
- if (r < 0) {
- log_error("Failed to parse maximum tasks specification %s", assignment);
- return -EINVAL;
- }
- }
-
- r = sd_bus_message_append(m, "v", "t", n);
-
} else if (STR_IN_SET(field, "CPUShares", "StartupCPUShares")) {
uint64_t u;
diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c
index ff76d8287a..92ba6aa4e4 100644
--- a/src/systemctl/systemctl.c
+++ b/src/systemctl/systemctl.c
@@ -3033,6 +3033,9 @@ static int logind_check_inhibitors(enum action a) {
if (!on_tty())
return 0;
+ if (arg_transport != BUS_TRANSPORT_LOCAL)
+ return 0;
+
r = acquire_bus(BUS_FULL, &bus);
if (r < 0)
return r;
diff --git a/src/test/test-util.c b/src/test/test-util.c
index e177612a9f..1b5cba86c1 100644
--- a/src/test/test-util.c
+++ b/src/test/test-util.c
@@ -308,7 +308,43 @@ static void test_physical_memory_scale(void) {
/* overflow */
assert_se(physical_memory_scale(UINT64_MAX/4, UINT64_MAX) == UINT64_MAX);
+}
+
+static void test_system_tasks_max(void) {
+ uint64_t t;
+
+ t = system_tasks_max();
+ assert_se(t > 0);
+ assert_se(t < UINT64_MAX);
+
+ log_info("Max tasks: %" PRIu64, t);
+}
+
+static void test_system_tasks_max_scale(void) {
+ uint64_t t;
+
+ t = system_tasks_max();
+
+ assert_se(system_tasks_max_scale(0, 100) == 0);
+ assert_se(system_tasks_max_scale(100, 100) == t);
+
+ assert_se(system_tasks_max_scale(0, 1) == 0);
+ assert_se(system_tasks_max_scale(1, 1) == t);
+ assert_se(system_tasks_max_scale(2, 1) == 2*t);
+
+ assert_se(system_tasks_max_scale(0, 2) == 0);
+ assert_se(system_tasks_max_scale(1, 2) == t/2);
+ assert_se(system_tasks_max_scale(2, 2) == t);
+ assert_se(system_tasks_max_scale(3, 2) == (3*t)/2);
+ assert_se(system_tasks_max_scale(4, 2) == t*2);
+
+ assert_se(system_tasks_max_scale(0, UINT32_MAX) == 0);
+ assert_se(system_tasks_max_scale((UINT32_MAX-1)/2, UINT32_MAX-1) == t/2);
+ assert_se(system_tasks_max_scale(UINT32_MAX, UINT32_MAX) == t);
+
+ /* overflow */
+ assert_se(system_tasks_max_scale(UINT64_MAX/4, UINT64_MAX) == UINT64_MAX);
}
int main(int argc, char *argv[]) {
@@ -327,6 +363,8 @@ int main(int argc, char *argv[]) {
test_raw_clone();
test_physical_memory();
test_physical_memory_scale();
+ test_system_tasks_max();
+ test_system_tasks_max_scale();
return 0;
}
diff --git a/units/systemd-nspawn@.service.in b/units/systemd-nspawn@.service.in
index 8f9cf9acfe..c8141639b6 100644
--- a/units/systemd-nspawn@.service.in
+++ b/units/systemd-nspawn@.service.in
@@ -25,18 +25,9 @@ TasksMax=16384
# Enforce a strict device policy, similar to the one nspawn configures
# when it allocates its own scope unit. Make sure to keep these
# policies in sync if you change them!
-DevicePolicy=strict
-DeviceAllow=/dev/null rwm
-DeviceAllow=/dev/zero rwm
-DeviceAllow=/dev/full rwm
-DeviceAllow=/dev/random rwm
-DeviceAllow=/dev/urandom rwm
-DeviceAllow=/dev/tty rwm
+DevicePolicy=closed
DeviceAllow=/dev/net/tun rwm
-DeviceAllow=/dev/pts/ptmx rw
DeviceAllow=char-pts rw
-DeviceAllow=/run/systemd/inaccessible/chr rwm
-DeviceAllow=/run/systemd/inaccessible/blk rwm
# nspawn itself needs access to /dev/loop-control and /dev/loop, to
# implement the --image= option. Add these here, too.