summaryrefslogtreecommitdiff
path: root/src/grp-system
diff options
context:
space:
mode:
Diffstat (limited to 'src/grp-system')
-rw-r--r--src/grp-system/90-systemd.preset4
-rw-r--r--src/grp-system/grp-utils/systemd-analyze/analyze-verify.c17
-rw-r--r--src/grp-system/grp-utils/systemd-analyze/systemd-analyze.xml17
-rw-r--r--src/grp-system/grp-utils/systemd-fstab-generator/fstab-generator.c4
-rw-r--r--src/grp-system/grp-utils/systemd-run/run.c296
-rw-r--r--src/grp-system/grp-utils/systemd-run/systemd-run.completion.bash3
-rw-r--r--src/grp-system/grp-utils/systemd-run/systemd-run.completion.zsh1
-rw-r--r--src/grp-system/grp-utils/systemd-run/systemd-run.xml170
-rw-r--r--src/grp-system/grp-utils/systemd-sysv-generator/sysv-generator.c25
-rw-r--r--src/grp-system/kernel-command-line.xml11
-rw-r--r--src/grp-system/libcore/include/core/cgroup.h6
-rw-r--r--src/grp-system/libcore/include/core/dynamic-user.h66
-rw-r--r--src/grp-system/libcore/include/core/emergency-action.h (renamed from src/grp-system/libcore/include/core/failure-action.h)28
-rw-r--r--src/grp-system/libcore/include/core/execute.h35
-rw-r--r--src/grp-system/libcore/include/core/job.h4
-rw-r--r--src/grp-system/libcore/include/core/load-fragment.h7
-rw-r--r--src/grp-system/libcore/include/core/manager.h36
-rw-r--r--src/grp-system/libcore/include/core/mount.h7
-rw-r--r--src/grp-system/libcore/include/core/namespace.h13
-rw-r--r--src/grp-system/libcore/include/core/service.h4
-rw-r--r--src/grp-system/libcore/include/core/socket.h12
-rw-r--r--src/grp-system/libcore/include/core/swap.h1
-rw-r--r--src/grp-system/libcore/include/core/unit.h64
-rw-r--r--src/grp-system/libcore/src/Makefile6
-rw-r--r--src/grp-system/libcore/src/automount.c14
-rw-r--r--src/grp-system/libcore/src/busname.c12
-rw-r--r--src/grp-system/libcore/src/cgroup.c341
-rw-r--r--src/grp-system/libcore/src/dbus-automount.c54
-rw-r--r--src/grp-system/libcore/src/dbus-automount.h2
-rw-r--r--src/grp-system/libcore/src/dbus-cgroup.c51
-rw-r--r--src/grp-system/libcore/src/dbus-execute.c121
-rw-r--r--src/grp-system/libcore/src/dbus-manager.c257
-rw-r--r--src/grp-system/libcore/src/dbus-mount.c7
-rw-r--r--src/grp-system/libcore/src/dbus-service.c18
-rw-r--r--src/grp-system/libcore/src/dbus-socket.c3
-rw-r--r--src/grp-system/libcore/src/dbus-swap.c2
-rw-r--r--src/grp-system/libcore/src/dbus-unit.c181
-rw-r--r--src/grp-system/libcore/src/dbus-unit.h8
-rw-r--r--src/grp-system/libcore/src/dbus.c75
-rw-r--r--src/grp-system/libcore/src/dbus.h5
-rw-r--r--src/grp-system/libcore/src/device.c12
-rw-r--r--src/grp-system/libcore/src/dynamic-user.c794
-rw-r--r--src/grp-system/libcore/src/emergency-action.c (renamed from src/grp-system/libcore/src/failure-action.c)65
-rw-r--r--src/grp-system/libcore/src/execute.c1282
-rw-r--r--src/grp-system/libcore/src/job.c38
-rw-r--r--src/grp-system/libcore/src/load-fragment-gperf.gperf.m437
-rw-r--r--src/grp-system/libcore/src/load-fragment.c411
-rw-r--r--src/grp-system/libcore/src/manager.c598
-rw-r--r--src/grp-system/libcore/src/mount-setup.c6
-rw-r--r--src/grp-system/libcore/src/mount.c240
-rw-r--r--src/grp-system/libcore/src/namespace.c604
-rw-r--r--src/grp-system/libcore/src/path.c6
-rw-r--r--src/grp-system/libcore/src/scope.c63
-rw-r--r--src/grp-system/libcore/src/selinux-access.h2
-rw-r--r--src/grp-system/libcore/src/service.c284
-rw-r--r--src/grp-system/libcore/src/show-status.c5
-rw-r--r--src/grp-system/libcore/src/slice.c52
-rw-r--r--src/grp-system/libcore/src/socket.c296
-rw-r--r--src/grp-system/libcore/src/swap.c50
-rw-r--r--src/grp-system/libcore/src/target.c5
-rw-r--r--src/grp-system/libcore/src/timer.c10
-rw-r--r--src/grp-system/libcore/src/transaction.c6
-rw-r--r--src/grp-system/libcore/src/unit.c522
-rw-r--r--src/grp-system/systemctl/systemctl.c527
-rw-r--r--src/grp-system/systemctl/systemctl.completion.bash.in43
-rw-r--r--src/grp-system/systemctl/systemctl.completion.zsh.in18
-rw-r--r--src/grp-system/systemctl/systemctl.xml117
-rw-r--r--src/grp-system/systemctl/systemd.preset.xml4
-rw-r--r--src/grp-system/systemd-shutdown/umount.c2
-rw-r--r--src/grp-system/systemd/main.c91
-rw-r--r--src/grp-system/systemd/org.freedesktop.systemd1.conf24
-rw-r--r--src/grp-system/systemd/system.conf1
-rw-r--r--src/grp-system/systemd/systemd-system.conf.xml14
-rw-r--r--src/grp-system/systemd/systemd.exec.xml809
-rw-r--r--src/grp-system/systemd/systemd.link.xml42
-rw-r--r--src/grp-system/systemd/systemd.mount.xml28
-rw-r--r--src/grp-system/systemd/systemd.netdev.xml103
-rw-r--r--src/grp-system/systemd/systemd.network.xml179
-rw-r--r--src/grp-system/systemd/systemd.offline-updates.xml8
-rw-r--r--src/grp-system/systemd/systemd.resource-control.xml403
-rw-r--r--src/grp-system/systemd/systemd.service.xml50
-rw-r--r--src/grp-system/systemd/systemd.socket.xml16
-rw-r--r--src/grp-system/systemd/systemd.special.xml64
-rw-r--r--src/grp-system/systemd/systemd.target.xml2
-rw-r--r--src/grp-system/systemd/systemd.time.xml150
-rw-r--r--src/grp-system/systemd/systemd.unit.xml113
-rw-r--r--src/grp-system/systemd/systemd.xml12
87 files changed, 7885 insertions, 2311 deletions
diff --git a/src/grp-system/90-systemd.preset b/src/grp-system/90-systemd.preset
index 138937011c..5f7b292244 100644
--- a/src/grp-system/90-systemd.preset
+++ b/src/grp-system/90-systemd.preset
@@ -14,11 +14,11 @@ enable machines.target
enable getty@.service
disable console-getty.service
-disable console-shell.service
disable debug-shell.service
disable halt.target
disable kexec.target
disable poweroff.target
-disable reboot.target
+enable reboot.target
disable rescue.target
+disable exit.target
diff --git a/src/grp-system/grp-utils/systemd-analyze/analyze-verify.c b/src/grp-system/grp-utils/systemd-analyze/analyze-verify.c
index 620ae7ee35..22d210a14b 100644
--- a/src/grp-system/grp-utils/systemd-analyze/analyze-verify.c
+++ b/src/grp-system/grp-utils/systemd-analyze/analyze-verify.c
@@ -72,6 +72,7 @@ static int prepare_filename(const char *filename, char **ret) {
}
static int generate_path(char **var, char **filenames) {
+ const char *old;
char **filename;
_cleanup_strv_free_ char **ans = NULL;
@@ -91,9 +92,19 @@ static int generate_path(char **var, char **filenames) {
assert_se(strv_uniq(ans));
- r = strv_extend(&ans, "");
- if (r < 0)
- return r;
+ /* First, prepend our directories. Second, if some path was specified, use that, and
+ * otherwise use the defaults. Any duplicates will be filtered out in path-lookup.c.
+ * Treat explicit empty path to mean that nothing should be appended.
+ */
+ old = getenv("SYSTEMD_UNIT_PATH");
+ if (!streq_ptr(old, "")) {
+ if (!old)
+ old = ":";
+
+ r = strv_extend(&ans, old);
+ if (r < 0)
+ return r;
+ }
*var = strv_join(ans, ":");
if (!*var)
diff --git a/src/grp-system/grp-utils/systemd-analyze/systemd-analyze.xml b/src/grp-system/grp-utils/systemd-analyze/systemd-analyze.xml
index bc37765dff..8fa7cd3329 100644
--- a/src/grp-system/grp-utils/systemd-analyze/systemd-analyze.xml
+++ b/src/grp-system/grp-utils/systemd-analyze/systemd-analyze.xml
@@ -181,14 +181,15 @@
<option>--log-target=</option>, described in
<citerefentry><refentrytitle>systemd</refentrytitle><manvolnum>1</manvolnum></citerefentry>).</para>
- <para><command>systemd-analyze verify</command> will load unit
- files and print warnings if any errors are detected. Files
- specified on the command line will be loaded, but also any other
- units referenced by them. This command works by prepending the
- directories for all command line arguments at the beginning of the
- unit load path, which means that all units files found in those
- directories will be used in preference to the unit files found in
- the standard locations, even if not listed explicitly.</para>
+ <para><command>systemd-analyze verify</command> will load unit files and print
+ warnings if any errors are detected. Files specified on the command line will be
+ loaded, but also any other units referenced by them. The full unit search path is
+ formed by combining the directories for all command line arguments, and the usual unit
+ load paths (variable <varname>$SYSTEMD_UNIT_PATH</varname> is supported, and may be
+ used to replace or augment the compiled in set of unit load paths; see
+ <citerefentry><refentrytitle>systemd.unit</refentrytitle><manvolnum>5</manvolnum></citerefentry>).
+ All units files present in the directories containing the command line arguments will
+ be used in preference to the other paths.</para>
<para>If no command is passed, <command>systemd-analyze
time</command> is implied.</para>
diff --git a/src/grp-system/grp-utils/systemd-fstab-generator/fstab-generator.c b/src/grp-system/grp-utils/systemd-fstab-generator/fstab-generator.c
index f4b3a048e9..aceac5151c 100644
--- a/src/grp-system/grp-utils/systemd-fstab-generator/fstab-generator.c
+++ b/src/grp-system/grp-utils/systemd-fstab-generator/fstab-generator.c
@@ -590,7 +590,7 @@ static int add_sysroot_usr_mount(void) {
"/proc/cmdline");
}
-static int parse_proc_cmdline_item(const char *key, const char *value) {
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
int r;
/* root=, usr=, usrfstype= and roofstype= may occur more than once, the last
@@ -674,7 +674,7 @@ int main(int argc, char *argv[]) {
umask(0022);
- r = parse_proc_cmdline(parse_proc_cmdline_item);
+ r = parse_proc_cmdline(parse_proc_cmdline_item, NULL, false);
if (r < 0)
log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
diff --git a/src/grp-system/grp-utils/systemd-run/run.c b/src/grp-system/grp-utils/systemd-run/run.c
index a68938fb71..274edaf2f0 100644
--- a/src/grp-system/grp-utils/systemd-run/run.c
+++ b/src/grp-system/grp-utils/systemd-run/run.c
@@ -32,6 +32,7 @@
#include "systemd-basic/formats-util.h"
#include "systemd-basic/parse-util.h"
#include "systemd-basic/path-util.h"
+#include "systemd-basic/process-util.h"
#include "systemd-basic/signal-util.h"
#include "systemd-basic/strv.h"
#include "systemd-basic/terminal-util.h"
@@ -45,6 +46,7 @@ static bool arg_ask_password = true;
static bool arg_scope = false;
static bool arg_remain_after_exit = false;
static bool arg_no_block = false;
+static bool arg_wait = false;
static const char *arg_unit = NULL;
static const char *arg_description = NULL;
static const char *arg_slice = NULL;
@@ -83,9 +85,7 @@ static void polkit_agent_open_if_enabled(void) {
static void help(void) {
printf("%s [OPTIONS...] {COMMAND} [ARGS...]\n\n"
- "Run the specified command in a transient scope or service or timer\n"
- "unit. If a timer option is specified and the unit specified with\n"
- "the --unit option exists, the command can be omitted.\n\n"
+ "Run the specified command in a transient scope or service.\n\n"
" -h --help Show this help\n"
" --version Show package version\n"
" --no-ask-password Do not prompt for password\n"
@@ -94,11 +94,12 @@ static void help(void) {
" -M --machine=CONTAINER Operate on local container\n"
" --scope Run this as scope rather than service\n"
" --unit=UNIT Run under the specified unit name\n"
- " -p --property=NAME=VALUE Set unit property\n"
+ " -p --property=NAME=VALUE Set service or scope unit property\n"
" --description=TEXT Description for unit\n"
" --slice=SLICE Run in the specified slice\n"
" --no-block Do not wait until operation finished\n"
" -r --remain-after-exit Leave service around until explicitly stopped\n"
+ " --wait Wait until service stopped again\n"
" --send-sighup Send SIGHUP when terminating\n"
" --service-type=TYPE Service type\n"
" --uid=USER Run as system user\n"
@@ -107,15 +108,15 @@ static void help(void) {
" -E --setenv=NAME=VALUE Set environment\n"
" -t --pty Run service on pseudo tty\n"
" -q --quiet Suppress information messages during runtime\n\n"
- "Timer options:\n\n"
+ "Timer options:\n"
" --on-active=SECONDS Run after SECONDS delay\n"
" --on-boot=SECONDS Run SECONDS after machine was booted up\n"
" --on-startup=SECONDS Run SECONDS after systemd activation\n"
" --on-unit-active=SECONDS Run SECONDS after the last activation\n"
" --on-unit-inactive=SECONDS Run SECONDS after the last deactivation\n"
" --on-calendar=SPEC Realtime timer\n"
- " --timer-property=NAME=VALUE Set timer unit property\n",
- program_invocation_short_name);
+ " --timer-property=NAME=VALUE Set timer unit property\n"
+ , program_invocation_short_name);
}
static bool with_timer(void) {
@@ -146,6 +147,7 @@ static int parse_argv(int argc, char *argv[]) {
ARG_TIMER_PROPERTY,
ARG_NO_BLOCK,
ARG_NO_ASK_PASSWORD,
+ ARG_WAIT,
};
static const struct option options[] = {
@@ -162,6 +164,7 @@ static int parse_argv(int argc, char *argv[]) {
{ "host", required_argument, NULL, 'H' },
{ "machine", required_argument, NULL, 'M' },
{ "service-type", required_argument, NULL, ARG_SERVICE_TYPE },
+ { "wait", no_argument, NULL, ARG_WAIT },
{ "uid", required_argument, NULL, ARG_EXEC_USER },
{ "gid", required_argument, NULL, ARG_EXEC_GROUP },
{ "nice", required_argument, NULL, ARG_NICE },
@@ -178,7 +181,7 @@ static int parse_argv(int argc, char *argv[]) {
{ "on-calendar", required_argument, NULL, ARG_ON_CALENDAR },
{ "timer-property", required_argument, NULL, ARG_TIMER_PROPERTY },
{ "no-block", no_argument, NULL, ARG_NO_BLOCK },
- { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
+ { "no-ask-password", no_argument, NULL, ARG_NO_ASK_PASSWORD },
{},
};
@@ -195,13 +198,13 @@ static int parse_argv(int argc, char *argv[]) {
help();
return 0;
+ case ARG_VERSION:
+ return version();
+
case ARG_NO_ASK_PASSWORD:
arg_ask_password = false;
break;
- case ARG_VERSION:
- return version();
-
case ARG_USER:
arg_user = true;
break;
@@ -257,11 +260,9 @@ static int parse_argv(int argc, char *argv[]) {
break;
case ARG_NICE:
- r = safe_atoi(optarg, &arg_nice);
- if (r < 0 || arg_nice < PRIO_MIN || arg_nice >= PRIO_MAX) {
- log_error("Failed to parse nice value");
- return -EINVAL;
- }
+ r = parse_nice(optarg, &arg_nice);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse nice value: %s", optarg);
arg_nice_set = true;
break;
@@ -361,6 +362,10 @@ static int parse_argv(int argc, char *argv[]) {
arg_no_block = true;
break;
+ case ARG_WAIT:
+ arg_wait = true;
+ break;
+
case '?':
return -EINVAL;
@@ -408,22 +413,36 @@ static int parse_argv(int argc, char *argv[]) {
return -EINVAL;
}
+ if (arg_wait) {
+ if (arg_no_block) {
+ log_error("--wait may not be combined with --no-block.");
+ return -EINVAL;
+ }
+
+ if (with_timer()) {
+ log_error("--wait may not be combined with timer operations.");
+ return -EINVAL;
+ }
+
+ if (arg_scope) {
+ log_error("--wait may not be combined with --scope.");
+ return -EINVAL;
+ }
+ }
+
return 1;
}
static int transient_unit_set_properties(sd_bus_message *m, char **properties) {
- char **i;
int r;
r = sd_bus_message_append(m, "(sv)", "Description", "s", arg_description);
if (r < 0)
return r;
- STRV_FOREACH(i, properties) {
- r = bus_append_unit_property_assignment(m, *i);
- if (r < 0)
- return r;
- }
+ r = bus_append_unit_property_assignment_many(m, properties);
+ if (r < 0)
+ return r;
return 0;
}
@@ -473,6 +492,12 @@ static int transient_service_set_properties(sd_bus_message *m, char **argv, cons
if (r < 0)
return r;
+ if (arg_wait) {
+ r = sd_bus_message_append(m, "(sv)", "AddRef", "b", 1);
+ if (r < 0)
+ return r;
+ }
+
if (arg_remain_after_exit) {
r = sd_bus_message_append(m, "(sv)", "RemainAfterExit", "b", arg_remain_after_exit);
if (r < 0)
@@ -730,9 +755,97 @@ static int make_unit_name(sd_bus *bus, UnitType t, char **ret) {
return 0;
}
+typedef struct RunContext {
+ sd_bus *bus;
+ sd_event *event;
+ PTYForward *forward;
+ sd_bus_slot *match;
+
+ /* The exit data of the unit */
+ char *active_state;
+ uint64_t inactive_exit_usec;
+ uint64_t inactive_enter_usec;
+ char *result;
+ uint64_t cpu_usage_nsec;
+ uint32_t exit_code;
+ uint32_t exit_status;
+} RunContext;
+
+static void run_context_free(RunContext *c) {
+ assert(c);
+
+ c->forward = pty_forward_free(c->forward);
+ c->match = sd_bus_slot_unref(c->match);
+ c->bus = sd_bus_unref(c->bus);
+ c->event = sd_event_unref(c->event);
+
+ free(c->active_state);
+ free(c->result);
+}
+
+static void run_context_check_done(RunContext *c) {
+ bool done = true;
+
+ assert(c);
+
+ if (c->match)
+ done = done && (c->active_state && STR_IN_SET(c->active_state, "inactive", "failed"));
+
+ if (c->forward)
+ done = done && pty_forward_is_done(c->forward);
+
+ if (done)
+ sd_event_exit(c->event, EXIT_SUCCESS);
+}
+
+static int on_properties_changed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+
+ static const struct bus_properties_map map[] = {
+ { "ActiveState", "s", NULL, offsetof(RunContext, active_state) },
+ { "InactiveExitTimestampMonotonic", "t", NULL, offsetof(RunContext, inactive_exit_usec) },
+ { "InactiveEnterTimestampMonotonic", "t", NULL, offsetof(RunContext, inactive_enter_usec) },
+ { "Result", "s", NULL, offsetof(RunContext, result) },
+ { "ExecMainCode", "i", NULL, offsetof(RunContext, exit_code) },
+ { "ExecMainStatus", "i", NULL, offsetof(RunContext, exit_status) },
+ { "CPUUsageNSec", "t", NULL, offsetof(RunContext, cpu_usage_nsec) },
+ {}
+ };
+
+ RunContext *c = userdata;
+ int r;
+
+ r = bus_map_all_properties(c->bus,
+ "org.freedesktop.systemd1",
+ sd_bus_message_get_path(m),
+ map,
+ c);
+ if (r < 0) {
+ sd_event_exit(c->event, EXIT_FAILURE);
+ return log_error_errno(r, "Failed to query unit state: %m");
+ }
+
+ run_context_check_done(c);
+ return 0;
+}
+
+static int pty_forward_handler(PTYForward *f, int rcode, void *userdata) {
+ RunContext *c = userdata;
+
+ assert(f);
+
+ if (rcode < 0) {
+ sd_event_exit(c->event, EXIT_FAILURE);
+ return log_error_errno(rcode, "Error on PTY forwarding logic: %m");
+ }
+
+ run_context_check_done(c);
+ return 0;
+}
+
static int start_transient_service(
sd_bus *bus,
- char **argv) {
+ char **argv,
+ int *retval) {
_cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
@@ -743,6 +856,7 @@ static int start_transient_service(
assert(bus);
assert(argv);
+ assert(retval);
if (arg_pty) {
@@ -866,40 +980,95 @@ static int start_transient_service(
return r;
}
- if (master >= 0) {
- _cleanup_(pty_forward_freep) PTYForward *forward = NULL;
- _cleanup_(sd_event_unrefp) sd_event *event = NULL;
- char last_char = 0;
+ if (!arg_quiet)
+ log_info("Running as unit: %s", service);
+
+ if (arg_wait || master >= 0) {
+ _cleanup_(run_context_free) RunContext c = {};
- r = sd_event_default(&event);
+ c.bus = sd_bus_ref(bus);
+
+ r = sd_event_default(&c.event);
if (r < 0)
return log_error_errno(r, "Failed to get event loop: %m");
- assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGWINCH, SIGTERM, SIGINT, -1) >= 0);
+ if (master >= 0) {
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGWINCH, SIGTERM, SIGINT, -1) >= 0);
+ (void) sd_event_add_signal(c.event, NULL, SIGINT, NULL, NULL);
+ (void) sd_event_add_signal(c.event, NULL, SIGTERM, NULL, NULL);
- (void) sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
- (void) sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
+ if (!arg_quiet)
+ log_info("Press ^] three times within 1s to disconnect TTY.");
- if (!arg_quiet)
- log_info("Running as unit: %s\nPress ^] three times within 1s to disconnect TTY.", service);
+ r = pty_forward_new(c.event, master, PTY_FORWARD_IGNORE_INITIAL_VHANGUP, &c.forward);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create PTY forwarder: %m");
- r = pty_forward_new(event, master, PTY_FORWARD_IGNORE_INITIAL_VHANGUP, &forward);
- if (r < 0)
- return log_error_errno(r, "Failed to create PTY forwarder: %m");
+ pty_forward_set_handler(c.forward, pty_forward_handler, &c);
+ }
+
+ if (arg_wait) {
+ _cleanup_free_ char *path = NULL;
+ const char *mt;
+
+ path = unit_dbus_path_from_name(service);
+ if (!path)
+ return log_oom();
+
+ mt = strjoina("type='signal',"
+ "sender='org.freedesktop.systemd1',"
+ "path='", path, "',"
+ "interface='org.freedesktop.DBus.Properties',"
+ "member='PropertiesChanged'");
+ r = sd_bus_add_match(bus, &c.match, mt, on_properties_changed, &c);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add properties changed signal.");
+
+ r = sd_bus_attach_event(bus, c.event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop.");
+ }
- r = sd_event_loop(event);
+ r = sd_event_loop(c.event);
if (r < 0)
return log_error_errno(r, "Failed to run event loop: %m");
- pty_forward_get_last_char(forward, &last_char);
+ if (c.forward) {
+ char last_char = 0;
- forward = pty_forward_free(forward);
+ r = pty_forward_get_last_char(c.forward, &last_char);
+ if (r >= 0 && !arg_quiet && last_char != '\n')
+ fputc('\n', stdout);
+ }
- if (!arg_quiet && last_char != '\n')
- fputc('\n', stdout);
+ if (!arg_quiet) {
+ if (!isempty(c.result))
+ log_info("Finished with result: %s", strna(c.result));
- } else if (!arg_quiet)
- log_info("Running as unit: %s", service);
+ if (c.exit_code == CLD_EXITED)
+ log_info("Main processes terminated with: code=%s/status=%i", sigchld_code_to_string(c.exit_code), c.exit_status);
+ else if (c.exit_code > 0)
+ log_info("Main processes terminated with: code=%s/status=%s", sigchld_code_to_string(c.exit_code), signal_to_string(c.exit_status));
+
+ if (c.inactive_enter_usec > 0 && c.inactive_enter_usec != USEC_INFINITY &&
+ c.inactive_exit_usec > 0 && c.inactive_exit_usec != USEC_INFINITY &&
+ c.inactive_enter_usec > c.inactive_exit_usec) {
+ char ts[FORMAT_TIMESPAN_MAX];
+ log_info("Service runtime: %s", format_timespan(ts, sizeof(ts), c.inactive_enter_usec - c.inactive_exit_usec, USEC_PER_MSEC));
+ }
+
+ if (c.cpu_usage_nsec > 0 && c.cpu_usage_nsec != NSEC_INFINITY) {
+ char ts[FORMAT_TIMESPAN_MAX];
+ log_info("CPU time consumed: %s", format_timespan(ts, sizeof(ts), (c.cpu_usage_nsec + NSEC_PER_USEC - 1) / NSEC_PER_USEC, USEC_PER_MSEC));
+ }
+ }
+
+ /* Try to propagate the service's return value */
+ if (c.result && STR_IN_SET(c.result, "success", "exit-code") && c.exit_code == CLD_EXITED)
+ *retval = c.exit_status;
+ else
+ *retval = EXIT_FAILURE;
+ }
return 0;
}
@@ -999,17 +1168,21 @@ static int start_transient_scope(
uid_t uid;
gid_t gid;
- r = get_user_creds(&arg_exec_user, &uid, &gid, &home, &shell);
+ r = get_user_creds_clean(&arg_exec_user, &uid, &gid, &home, &shell);
if (r < 0)
return log_error_errno(r, "Failed to resolve user %s: %m", arg_exec_user);
- r = strv_extendf(&user_env, "HOME=%s", home);
- if (r < 0)
- return log_oom();
+ if (home) {
+ r = strv_extendf(&user_env, "HOME=%s", home);
+ if (r < 0)
+ return log_oom();
+ }
- r = strv_extendf(&user_env, "SHELL=%s", shell);
- if (r < 0)
- return log_oom();
+ if (shell) {
+ r = strv_extendf(&user_env, "SHELL=%s", shell);
+ if (r < 0)
+ return log_oom();
+ }
r = strv_extendf(&user_env, "USER=%s", arg_exec_user);
if (r < 0)
@@ -1146,7 +1319,7 @@ static int start_transient_timer(
if (r < 0)
return bus_log_create_error(r);
- if (argv[0]) {
+ if (!strv_isempty(argv)) {
r = sd_bus_message_open_container(m, 'r', "sa(sv)");
if (r < 0)
return bus_log_create_error(r);
@@ -1192,9 +1365,11 @@ static int start_transient_timer(
if (r < 0)
return r;
- log_info("Running timer as unit: %s", timer);
- if (argv[0])
- log_info("Will run service as unit: %s", service);
+ if (!arg_quiet) {
+ log_info("Running timer as unit: %s", timer);
+ if (argv[0])
+ log_info("Will run service as unit: %s", service);
+ }
return 0;
}
@@ -1202,7 +1377,7 @@ static int start_transient_timer(
int main(int argc, char* argv[]) {
_cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
_cleanup_free_ char *description = NULL, *command = NULL;
- int r;
+ int r, retval = EXIT_SUCCESS;
log_parse_environment();
log_open();
@@ -1239,7 +1414,12 @@ int main(int argc, char* argv[]) {
arg_description = description;
}
- r = bus_connect_transport_systemd(arg_transport, arg_host, arg_user, &bus);
+ /* If --wait is used connect via the bus, unconditionally, as ref/unref is not supported via the limited direct
+ * connection */
+ if (arg_wait)
+ r = bus_connect_transport(arg_transport, arg_host, arg_user, &bus);
+ else
+ r = bus_connect_transport_systemd(arg_transport, arg_host, arg_user, &bus);
if (r < 0) {
log_error_errno(r, "Failed to create bus connection: %m");
goto finish;
@@ -1250,12 +1430,12 @@ int main(int argc, char* argv[]) {
else if (with_timer())
r = start_transient_timer(bus, argv + optind);
else
- r = start_transient_service(bus, argv + optind);
+ r = start_transient_service(bus, argv + optind, &retval);
finish:
strv_free(arg_environment);
strv_free(arg_property);
strv_free(arg_timer_property);
- return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+ return r < 0 ? EXIT_FAILURE : retval;
}
diff --git a/src/grp-system/grp-utils/systemd-run/systemd-run.completion.bash b/src/grp-system/grp-utils/systemd-run/systemd-run.completion.bash
index 022331e6a9..4116ba7eca 100644
--- a/src/grp-system/grp-utils/systemd-run/systemd-run.completion.bash
+++ b/src/grp-system/grp-utils/systemd-run/systemd-run.completion.bash
@@ -36,7 +36,8 @@ _systemd_run() {
-r --remain-after-exit --send-sighup -H --host -M --machine --service-type
--on-active --on-boot --on-startup --on-unit-active --on-unit-inactive
--on-calendar --timer-property -t --pty -q --quiet --no-block
- --uid --gid --nice --setenv -p --property --no-ask-password'
+ --uid --gid --nice --setenv -p --property --no-ask-password
+ --wait'
local mode=--system
local i
diff --git a/src/grp-system/grp-utils/systemd-run/systemd-run.completion.zsh b/src/grp-system/grp-utils/systemd-run/systemd-run.completion.zsh
index 6362b97766..da9f73a6d0 100644
--- a/src/grp-system/grp-utils/systemd-run/systemd-run.completion.zsh
+++ b/src/grp-system/grp-utils/systemd-run/systemd-run.completion.zsh
@@ -57,4 +57,5 @@ _arguments \
'--on-unit-inactive=[Run after SEC seconds from the last deactivation]:SEC' \
'--on-calendar=[Realtime timer]:SPEC' \
'--timer-property=[Set timer unit property]:NAME=VALUE' \
+ '--wait=[Wait until service stopped again]' \
'*::command:_command'
diff --git a/src/grp-system/grp-utils/systemd-run/systemd-run.xml b/src/grp-system/grp-utils/systemd-run/systemd-run.xml
index 9c1a29218e..2ad8cb0835 100644
--- a/src/grp-system/grp-utils/systemd-run/systemd-run.xml
+++ b/src/grp-system/grp-utils/systemd-run/systemd-run.xml
@@ -45,7 +45,7 @@
<refnamediv>
<refname>systemd-run</refname>
- <refpurpose>Run programs in transient scope or service or timer units</refpurpose>
+ <refpurpose>Run programs in transient scope units, service units, or timer-scheduled service units</refpurpose>
</refnamediv>
<refsynopsisdiv>
@@ -68,42 +68,30 @@
<refsect1>
<title>Description</title>
- <para><command>systemd-run</command> may be used to create and
- start a transient <filename>.service</filename> or
- <filename>.scope</filename> unit and run the specified
- <replaceable>COMMAND</replaceable> in it. It may also be used to
- create and start transient <filename>.timer</filename>
- units.</para>
-
- <para>If a command is run as transient service unit, it will be
- started and managed by the service manager like any other service,
- and thus shows up in the output of <command>systemctl
- list-units</command> like any other unit. It will run in a clean
- and detached execution environment, with the service manager as
- its parent process. In this mode, <command>systemd-run</command>
- will start the service asynchronously in the background and return
- after the command has begun execution.</para>
-
- <para>If a command is run as transient scope unit, it will be
- started by <command>systemd-run</command> itself as parent process
- and will thus inherit the execution environment of the
- caller. However, the processes of the command are managed by the
- service manager similar to normal services, and will show up in
- the output of <command>systemctl list-units</command>. Execution
- in this case is synchronous, and will return only when the command
- finishes. This mode is enabled via the <option>--scope</option>
- switch (see below). </para>
-
- <para>If a command is run with timer options such as
- <option>--on-calendar=</option> (see below), a transient timer
- unit is created alongside the service unit for the specified
- command. Only the transient timer unit is started immediately, the
- transient service unit will be started when the transient timer
- elapses. If the <option>--unit=</option> is specified, the
- <replaceable>COMMAND</replaceable> may be omitted. In this case,
- <command>systemd-run</command> only creates a
- <filename>.timer</filename> unit that invokes the specified unit
- when elapsing.</para>
+ <para><command>systemd-run</command> may be used to create and start a transient <filename>.service</filename> or
+ <filename>.scope</filename> unit and run the specified <replaceable>COMMAND</replaceable> in it. It may also be
+ used to create and start a transient <filename>.timer</filename> unit, that activates a
+ <filename>.service</filename> unit when elapsing.</para>
+
+ <para>If a command is run as transient service unit, it will be started and managed by the service manager like any
+ other service, and thus shows up in the output of <command>systemctl list-units</command> like any other unit. It
+ will run in a clean and detached execution environment, with the service manager as its parent process. In this
+ mode, <command>systemd-run</command> will start the service asynchronously in the background and return after the
+ command has begun execution (unless <option>--no-block</option> or <option>--watch</option> are specified, see
+ below).</para>
+
+ <para>If a command is run as transient scope unit, it will be executed by <command>systemd-run</command> itself as
+ parent process and will thus inherit the execution environment of the caller. However, the processes of the command
+ are managed by the service manager similar to normal services, and will show up in the output of <command>systemctl
+ list-units</command>. Execution in this case is synchronous, and will return only when the command finishes. This
+ mode is enabled via the <option>--scope</option> switch (see below). </para>
+
+ <para>If a command is run with timer options such as <option>--on-calendar=</option> (see below), a transient timer
+ unit is created alongside the service unit for the specified command. Only the transient timer unit is started
+ immediately, the transient service unit will be started when the timer elapses. If the <option>--unit=</option>
+ option is specified, the <replaceable>COMMAND</replaceable> may be omitted. In this case,
+ <command>systemd-run</command> creates only a <filename>.timer</filename> unit that invokes the specified unit when
+ elapsing.</para>
</refsect1>
<refsect1>
@@ -123,8 +111,8 @@
<term><option>--scope</option></term>
<listitem>
- <para>Create a transient <filename>.scope</filename> unit instead of
- the default transient <filename>.service</filename> unit.
+ <para>Create a transient <filename>.scope</filename> unit instead of the default transient
+ <filename>.service</filename> unit (see above).
</para>
</listitem>
</varlistentry>
@@ -140,9 +128,8 @@
<term><option>--property=</option></term>
<term><option>-p</option></term>
- <listitem><para>Sets a unit property for the scope or service
- unit that is created. This takes an assignment in the same
- format as
+ <listitem><para>Sets a property on the scope or service unit that is created. This option takes an assignment
+ in the same format as
<citerefentry><refentrytitle>systemctl</refentrytitle><manvolnum>1</manvolnum></citerefentry>'s
<command>set-property</command> command.</para>
</listitem>
@@ -151,9 +138,8 @@
<varlistentry>
<term><option>--description=</option></term>
- <listitem><para>Provide a description for the service or scope
- unit. If not specified, the command itself will be used as a
- description. See <varname>Description=</varname> in
+ <listitem><para>Provide a description for the service, scope or timer unit. If not specified, the command
+ itself will be used as a description. See <varname>Description=</varname> in
<citerefentry><refentrytitle>systemd.unit</refentrytitle><manvolnum>5</manvolnum></citerefentry>.
</para></listitem>
</varlistentry>
@@ -161,19 +147,16 @@
<varlistentry>
<term><option>--slice=</option></term>
- <listitem><para>Make the new <filename>.service</filename> or
- <filename>.scope</filename> unit part of the specified slice,
- instead of the <filename>system.slice</filename>.</para>
+ <listitem><para>Make the new <filename>.service</filename> or <filename>.scope</filename> unit part of the
+ specified slice, instead of <filename>system.slice</filename>.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--remain-after-exit</option></term>
- <listitem><para>After the service or scope process has
- terminated, keep the service around until it is explicitly
- stopped. This is useful to collect runtime information about
- the service after it finished running. Also see
+ <listitem><para>After the service process has terminated, keep the service around until it is explicitly
+ stopped. This is useful to collect runtime information about the service after it finished running. Also see
<varname>RemainAfterExit=</varname> in
<citerefentry><refentrytitle>systemd.service</refentrytitle><manvolnum>5</manvolnum></citerefentry>.
</para>
@@ -183,10 +166,8 @@
<varlistentry>
<term><option>--send-sighup</option></term>
- <listitem><para>When terminating the scope or service unit,
- send a SIGHUP immediately after SIGTERM. This is useful to
- indicate to shells and shell-like processes that the
- connection has been severed. Also see
+ <listitem><para>When terminating the scope or service unit, send a SIGHUP immediately after SIGTERM. This is
+ useful to indicate to shells and shell-like processes that the connection has been severed. Also see
<varname>SendSIGHUP=</varname> in
<citerefentry><refentrytitle>systemd.kill</refentrytitle><manvolnum>5</manvolnum></citerefentry>.
</para>
@@ -209,9 +190,8 @@
<term><option>--uid=</option></term>
<term><option>--gid=</option></term>
- <listitem><para>Runs the service process under the UNIX user
- and group. Also see <varname>User=</varname> and
- <varname>Group=</varname> in
+ <listitem><para>Runs the service process under the specified UNIX user and group. Also see
+ <varname>User=</varname> and <varname>Group=</varname> in
<citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
</listitem>
</varlistentry>
@@ -239,11 +219,9 @@
<term><option>--pty</option></term>
<term><option>-t</option></term>
- <listitem><para>When invoking a command, the service connects
- its standard input and output to the invoking tty via a
- pseudo TTY device. This allows invoking binaries as services
- that expect interactive user input, such as interactive
- command shells.</para></listitem>
+ <listitem><para>When invoking the command, the transient service connects its standard input and output to the
+ terminal <command>systemd-run</command> is invoked on, via a pseudo TTY device. This allows running binaries
+ that expect interactive user input as services, such as interactive command shells.</para></listitem>
</varlistentry>
<varlistentry>
@@ -263,44 +241,32 @@
<term><option>--on-unit-active=</option></term>
<term><option>--on-unit-inactive=</option></term>
- <listitem><para>Defines monotonic timers relative to different
- starting points. Also see <varname>OnActiveSec=</varname>,
- <varname>OnBootSec=</varname>,
- <varname>OnStartupSec=</varname>,
- <varname>OnUnitActiveSec=</varname> and
- <varname>OnUnitInactiveSec=</varname> in
- <citerefentry><refentrytitle>systemd.timer</refentrytitle><manvolnum>5</manvolnum></citerefentry>. This
- options have no effect in conjunction with
- <option>--scope</option>.</para>
+ <listitem><para>Defines a monotonic timer relative to different starting points for starting the specified
+ command. See <varname>OnActiveSec=</varname>, <varname>OnBootSec=</varname>, <varname>OnStartupSec=</varname>,
+ <varname>OnUnitActiveSec=</varname> and <varname>OnUnitInactiveSec=</varname> in
+ <citerefentry><refentrytitle>systemd.timer</refentrytitle><manvolnum>5</manvolnum></citerefentry> for
+ details. These options may not be combined with <option>--scope</option>.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--on-calendar=</option></term>
- <listitem><para>Defines realtime (i.e. wallclock) timers with
- calendar event expressions. Also see
- <varname>OnCalendar=</varname> in
- <citerefentry><refentrytitle>systemd.timer</refentrytitle><manvolnum>5</manvolnum></citerefentry>. This
- option has no effect in conjunction with
- <option>--scope</option>.</para>
+ <listitem><para>Defines a calendar timer for starting the specified command. See <varname>OnCalendar=</varname>
+ in <citerefentry><refentrytitle>systemd.timer</refentrytitle><manvolnum>5</manvolnum></citerefentry>. This
+ option may not be combined with <option>--scope</option>.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--timer-property=</option></term>
- <listitem><para>Sets a timer unit property for the timer unit
- that is created. It is similar with
- <option>--property</option> but only for created timer
- unit. This option only has effect in conjunction with
- <option>--on-active=</option>, <option>--on-boot=</option>,
- <option>--on-startup=</option>,
- <option>--on-unit-active=</option>,
- <option>--on-unit-inactive=</option>,
- <option>--on-calendar=</option>. This takes an assignment in
- the same format as
- <citerefentry><refentrytitle>systemctl</refentrytitle><manvolnum>1</manvolnum></citerefentry>'s
+ <listitem><para>Sets a property on the timer unit that is created. This option is similar to
+ <option>--property=</option> but applies to the transient timer unit rather than the transient service unit
+ created. This option only has an effect in conjunction with <option>--on-active=</option>,
+ <option>--on-boot=</option>, <option>--on-startup=</option>, <option>--on-unit-active=</option>,
+ <option>--on-unit-inactive=</option> or <option>--on-calendar=</option>. This option takes an assignment in the
+ same format as <citerefentry><refentrytitle>systemctl</refentrytitle><manvolnum>1</manvolnum></citerefentry>'s
<command>set-property</command> command.</para> </listitem>
</varlistentry>
@@ -308,14 +274,25 @@
<term><option>--no-block</option></term>
<listitem>
- <para>Do not synchronously wait for the requested operation
- to finish. If this is not specified, the job will be
- verified, enqueued and <command>systemd-run</command> will
- wait until the unit's start-up is completed. By passing this
- argument, it is only verified and enqueued.</para>
+ <para>Do not synchronously wait for the unit start operation to finish. If this option is not specified, the
+ start request for the transient unit will be verified, enqueued and <command>systemd-run</command> will wait
+ until the unit's start-up is completed. By passing this argument, it is only verified and enqueued. This
+ option may not be combined with <option>--wait</option>.</para>
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><option>--wait</option></term>
+
+ <listitem><para>Synchronously wait for the transient service to terminate. If this option is specified, the
+ start request for the transient unit is verified, enqueued, and waited for. Subsequently the invoked unit is
+ monitored, and it is waited until it is deactivated again (most likely because the specified command
+ completed). On exit, terse information about the unit's runtime is shown, including total runtime (as well as
+ CPU usage, if <option>--property=CPUAccounting=1</option> was set) and the exit code and status of the main
+ process. This output may be suppressed with <option>--quiet</option>. This option may not be combined with
+ <option>--no-block</option>, <option>--scope</option> or the various timer options.</para></listitem>
+ </varlistentry>
+
<xi:include href="user-system-options.xml" xpointer="user" />
<xi:include href="user-system-options.xml" xpointer="system" />
<xi:include href="user-system-options.xml" xpointer="host" />
@@ -425,7 +402,7 @@ There is a screen on:
when the user first logs in, and stays around as long as at least one
login session is open. After the user logs out of the last session,
<filename>user@.service</filename> and all services underneath it
- are terminated. This behaviour is the default, when "lingering" is
+ are terminated. This behavior is the default, when "lingering" is
not enabled for that user. Enabling lingering means that
<filename>user@.service</filename> is started automatically during
boot, even if the user is not logged in, and that the service is
@@ -452,6 +429,7 @@ There is a screen on:
<citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry>,
<citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>,
<citerefentry><refentrytitle>systemd.timer</refentrytitle><manvolnum>5</manvolnum></citerefentry>,
+ <citerefentry><refentrytitle>systemd-mount</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
<citerefentry><refentrytitle>machinectl</refentrytitle><manvolnum>1</manvolnum></citerefentry>
</para>
</refsect1>
diff --git a/src/grp-system/grp-utils/systemd-sysv-generator/sysv-generator.c b/src/grp-system/grp-utils/systemd-sysv-generator/sysv-generator.c
index 873cab1c5c..42dc76c20e 100644
--- a/src/grp-system/grp-utils/systemd-sysv-generator/sysv-generator.c
+++ b/src/grp-system/grp-utils/systemd-sysv-generator/sysv-generator.c
@@ -25,6 +25,7 @@
#include "systemd-basic/alloc-util.h"
#include "systemd-basic/dirent-util.h"
+#include "systemd-basic/exit-status.h"
#include "systemd-basic/fd-util.h"
#include "systemd-basic/fileio.h"
#include "systemd-basic/hashmap.h"
@@ -199,6 +200,13 @@ static int generate_unit_file(SysvStub *s) {
if (s->pid_file)
fprintf(f, "PIDFile=%s\n", s->pid_file);
+ /* Consider two special LSB exit codes a clean exit */
+ if (s->has_lsb)
+ fprintf(f,
+ "SuccessExitStatus=%i %i\n",
+ EXIT_NOTINSTALLED,
+ EXIT_NOTCONFIGURED);
+
fprintf(f,
"ExecStart=%s start\n"
"ExecStop=%s stop\n",
@@ -247,7 +255,7 @@ static char *sysv_translate_name(const char *name) {
return res;
}
-static int sysv_translate_facility(const char *name, const char *filename, char **ret) {
+static int sysv_translate_facility(SysvStub *s, unsigned line, const char *name, char **ret) {
/* We silently ignore the $ prefix here. According to the LSB
* spec it simply indicates whether something is a
@@ -266,15 +274,18 @@ static int sysv_translate_facility(const char *name, const char *filename, char
"time", SPECIAL_TIME_SYNC_TARGET,
};
+ const char *filename;
char *filename_no_sh, *e, *m;
const char *n;
unsigned i;
int r;
assert(name);
- assert(filename);
+ assert(s);
assert(ret);
+ filename = basename(s->path);
+
n = *name == '$' ? name + 1 : name;
for (i = 0; i < ELEMENTSOF(table); i += 2) {
@@ -299,7 +310,7 @@ static int sysv_translate_facility(const char *name, const char *filename, char
if (*name == '$') {
r = unit_name_build(n, NULL, ".target", ret);
if (r < 0)
- return log_error_errno(r, "Failed to build name: %m");
+ return log_error_errno(r, "[%s:%u] Could not build name for facility %s: %m", s->path, line, name);
return r;
}
@@ -337,11 +348,11 @@ static int handle_provides(SysvStub *s, unsigned line, const char *full_text, co
r = extract_first_word(&text, &word, NULL, EXTRACT_QUOTES|EXTRACT_RELAX);
if (r < 0)
- return log_error_errno(r, "Failed to parse word from provides string: %m");
+ return log_error_errno(r, "[%s:%u] Failed to parse word from provides string: %m", s->path, line);
if (r == 0)
break;
- r = sysv_translate_facility(word, basename(s->path), &m);
+ r = sysv_translate_facility(s, line, word, &m);
if (r <= 0) /* continue on error */
continue;
@@ -403,11 +414,11 @@ static int handle_dependencies(SysvStub *s, unsigned line, const char *full_text
r = extract_first_word(&text, &word, NULL, EXTRACT_QUOTES|EXTRACT_RELAX);
if (r < 0)
- return log_error_errno(r, "Failed to parse word from provides string: %m");
+ return log_error_errno(r, "[%s:%u] Failed to parse word from provides string: %m", s->path, line);
if (r == 0)
break;
- r = sysv_translate_facility(word, basename(s->path), &m);
+ r = sysv_translate_facility(s, line, word, &m);
if (r <= 0) /* continue on error */
continue;
diff --git a/src/grp-system/kernel-command-line.xml b/src/grp-system/kernel-command-line.xml
index 3ecc969c10..1fa31a14b7 100644
--- a/src/grp-system/kernel-command-line.xml
+++ b/src/grp-system/kernel-command-line.xml
@@ -224,15 +224,14 @@
<varlistentry>
<term><varname>vconsole.keymap=</varname></term>
- <term><varname>vconsole.keymap.toggle=</varname></term>
+ <term><varname>vconsole.keymap_toggle=</varname></term>
<term><varname>vconsole.font=</varname></term>
- <term><varname>vconsole.font.map=</varname></term>
- <term><varname>vconsole.font.unimap=</varname></term>
+ <term><varname>vconsole.font_map=</varname></term>
+ <term><varname>vconsole.font_unimap=</varname></term>
<listitem>
- <para>Parameters understood by the virtual console setup
- logic. For details, see
- <citerefentry><refentrytitle>systemd-vconsole-setup.service</refentrytitle><manvolnum>8</manvolnum></citerefentry>.</para>
+ <para>Parameters understood by the virtual console setup logic. For details, see
+ <citerefentry><refentrytitle>vconsole.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
</listitem>
</varlistentry>
diff --git a/src/grp-system/libcore/include/core/cgroup.h b/src/grp-system/libcore/include/core/cgroup.h
index 0026609570..6293b84cd7 100644
--- a/src/grp-system/libcore/include/core/cgroup.h
+++ b/src/grp-system/libcore/include/core/cgroup.h
@@ -89,6 +89,10 @@ struct CGroupContext {
bool tasks_accounting;
/* For unified hierarchy */
+ uint64_t cpu_weight;
+ uint64_t startup_cpu_weight;
+ usec_t cpu_quota_per_sec_usec;
+
uint64_t io_weight;
uint64_t startup_io_weight;
LIST_HEAD(CGroupIODeviceWeight, io_device_weights);
@@ -97,11 +101,11 @@ struct CGroupContext {
uint64_t memory_low;
uint64_t memory_high;
uint64_t memory_max;
+ uint64_t memory_swap_max;
/* For legacy hierarchies */
uint64_t cpu_shares;
uint64_t startup_cpu_shares;
- usec_t cpu_quota_per_sec_usec;
uint64_t blockio_weight;
uint64_t startup_blockio_weight;
diff --git a/src/grp-system/libcore/include/core/dynamic-user.h b/src/grp-system/libcore/include/core/dynamic-user.h
new file mode 100644
index 0000000000..0b8bce1a72
--- /dev/null
+++ b/src/grp-system/libcore/include/core/dynamic-user.h
@@ -0,0 +1,66 @@
+#pragma once
+
+/***
+ This file is part of systemd.
+
+ Copyright 2016 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+typedef struct DynamicUser DynamicUser;
+
+typedef struct DynamicCreds {
+ /* A combination of a dynamic user and group */
+ DynamicUser *user;
+ DynamicUser *group;
+} DynamicCreds;
+
+#include "manager.h"
+
+/* Note that this object always allocates a pair of user and group under the same name, even if one of them isn't
+ * used. This means, if you want to allocate a group and user pair, and they might have two different names, then you
+ * need to allocated two of these objects. DynamicCreds below makes that easy. */
+struct DynamicUser {
+ int n_ref;
+ Manager *manager;
+
+ /* An AF_UNIX socket pair that contains a datagram containing both the numeric ID assigned, as well as a lock
+ * file fd locking the user ID we picked. */
+ int storage_socket[2];
+
+ char name[];
+};
+
+int dynamic_user_acquire(Manager *m, const char *name, DynamicUser **ret);
+
+int dynamic_user_realize(DynamicUser *d, uid_t *ret);
+int dynamic_user_current(DynamicUser *d, uid_t *ret);
+
+DynamicUser* dynamic_user_ref(DynamicUser *d);
+DynamicUser* dynamic_user_unref(DynamicUser *d);
+DynamicUser* dynamic_user_destroy(DynamicUser *d);
+
+int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds);
+void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds);
+void dynamic_user_vacuum(Manager *m, bool close_user);
+
+int dynamic_user_lookup_uid(Manager *m, uid_t uid, char **ret);
+int dynamic_user_lookup_name(Manager *m, const char *name, uid_t *ret);
+
+int dynamic_creds_acquire(DynamicCreds *creds, Manager *m, const char *user, const char *group);
+int dynamic_creds_realize(DynamicCreds *creds, uid_t *uid, gid_t *gid);
+
+void dynamic_creds_unref(DynamicCreds *creds);
+void dynamic_creds_destroy(DynamicCreds *creds);
diff --git a/src/grp-system/libcore/include/core/failure-action.h b/src/grp-system/libcore/include/core/emergency-action.h
index 47c08f4006..c463b892bc 100644
--- a/src/grp-system/libcore/include/core/failure-action.h
+++ b/src/grp-system/libcore/include/core/emergency-action.h
@@ -20,23 +20,23 @@
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
-typedef enum FailureAction {
- FAILURE_ACTION_NONE,
- FAILURE_ACTION_REBOOT,
- FAILURE_ACTION_REBOOT_FORCE,
- FAILURE_ACTION_REBOOT_IMMEDIATE,
- FAILURE_ACTION_POWEROFF,
- FAILURE_ACTION_POWEROFF_FORCE,
- FAILURE_ACTION_POWEROFF_IMMEDIATE,
- _FAILURE_ACTION_MAX,
- _FAILURE_ACTION_INVALID = -1
-} FailureAction;
+typedef enum EmergencyAction {
+ EMERGENCY_ACTION_NONE,
+ EMERGENCY_ACTION_REBOOT,
+ EMERGENCY_ACTION_REBOOT_FORCE,
+ EMERGENCY_ACTION_REBOOT_IMMEDIATE,
+ EMERGENCY_ACTION_POWEROFF,
+ EMERGENCY_ACTION_POWEROFF_FORCE,
+ EMERGENCY_ACTION_POWEROFF_IMMEDIATE,
+ _EMERGENCY_ACTION_MAX,
+ _EMERGENCY_ACTION_INVALID = -1
+} EmergencyAction;
#include "systemd-basic/macro.h"
#include "manager.h"
-int failure_action(Manager *m, FailureAction action, const char *reboot_arg);
+int emergency_action(Manager *m, EmergencyAction action, const char *reboot_arg, const char *reason);
-const char* failure_action_to_string(FailureAction i) _const_;
-FailureAction failure_action_from_string(const char *s) _pure_;
+const char* emergency_action_to_string(EmergencyAction i) _const_;
+EmergencyAction emergency_action_from_string(const char *s) _pure_;
diff --git a/src/grp-system/libcore/include/core/execute.h b/src/grp-system/libcore/include/core/execute.h
index 178adc00ae..4dad8713fc 100644
--- a/src/grp-system/libcore/include/core/execute.h
+++ b/src/grp-system/libcore/include/core/execute.h
@@ -51,6 +51,7 @@ typedef enum ExecInput {
EXEC_INPUT_TTY_FORCE,
EXEC_INPUT_TTY_FAIL,
EXEC_INPUT_SOCKET,
+ EXEC_INPUT_NAMED_FD,
_EXEC_INPUT_MAX,
_EXEC_INPUT_INVALID = -1
} ExecInput;
@@ -66,6 +67,7 @@ typedef enum ExecOutput {
EXEC_OUTPUT_JOURNAL,
EXEC_OUTPUT_JOURNAL_AND_CONSOLE,
EXEC_OUTPUT_SOCKET,
+ EXEC_OUTPUT_NAMED_FD,
_EXEC_OUTPUT_MAX,
_EXEC_OUTPUT_INVALID = -1
} ExecOutput;
@@ -93,6 +95,8 @@ struct ExecRuntime {
char *tmp_dir;
char *var_tmp_dir;
+ /* An AF_UNIX socket pair, that contains a datagram containing a file descriptor referring to the network
+ * namespace. */
int netns_storage_socket[2];
};
@@ -119,6 +123,7 @@ struct ExecContext {
ExecInput std_input;
ExecOutput std_output;
ExecOutput std_error;
+ char *stdio_fdname[3];
nsec_t timer_slack_nsec;
@@ -170,11 +175,18 @@ struct ExecContext {
bool private_tmp;
bool private_network;
bool private_devices;
+ bool private_users;
ProtectSystem protect_system;
ProtectHome protect_home;
+ bool protect_kernel_tunables;
+ bool protect_kernel_modules;
+ bool protect_control_groups;
bool no_new_privileges;
+ bool dynamic_user;
+ bool remove_ipc;
+
/* This is not exposed to the user but available
* internally. We need it to make sure that whenever we spawn
* /usr/bin/mount it is run in the same process group as us so
@@ -205,6 +217,19 @@ struct ExecContext {
bool no_new_privileges_set:1;
};
+typedef enum ExecFlags {
+ EXEC_CONFIRM_SPAWN = 1U << 0,
+ EXEC_APPLY_PERMISSIONS = 1U << 1,
+ EXEC_APPLY_CHROOT = 1U << 2,
+ EXEC_APPLY_TTY_STDIN = 1U << 3,
+
+ /* The following are not used by execute.c, but by consumers internally */
+ EXEC_PASS_FDS = 1U << 4,
+ EXEC_IS_CONTROL = 1U << 5,
+ EXEC_SETENV_RESULT = 1U << 6,
+ EXEC_SET_WATCHDOG = 1U << 7,
+} ExecFlags;
+
struct ExecParameters {
char **argv;
char **environment;
@@ -213,11 +238,7 @@ struct ExecParameters {
char **fd_names;
unsigned n_fds;
- bool apply_permissions:1;
- bool apply_chroot:1;
- bool apply_tty_stdin:1;
-
- bool confirm_spawn:1;
+ ExecFlags flags;
bool selinux_context_net:1;
bool cgroup_delegate:1;
@@ -235,6 +256,7 @@ struct ExecParameters {
int stderr_fd;
};
+#include "dynamic-user.h"
#include "unit.h"
int exec_spawn(Unit *unit,
@@ -242,6 +264,7 @@ int exec_spawn(Unit *unit,
const ExecContext *context,
const ExecParameters *exec_params,
ExecRuntime *runtime,
+ DynamicCreds *dynamic_creds,
pid_t *ret);
void exec_command_done(ExecCommand *c);
@@ -265,6 +288,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix);
int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_root);
int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l);
+int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]);
+const char* exec_context_fdname(const ExecContext *c, int fd_index);
bool exec_context_may_touch_console(ExecContext *c);
bool exec_context_maintains_privileges(ExecContext *c);
diff --git a/src/grp-system/libcore/include/core/job.h b/src/grp-system/libcore/include/core/job.h
index 7338601bb7..3e62465695 100644
--- a/src/grp-system/libcore/include/core/job.h
+++ b/src/grp-system/libcore/include/core/job.h
@@ -177,8 +177,8 @@ Job* job_install(Job *j);
int job_install_deserialized(Job *j);
void job_uninstall(Job *j);
void job_dump(Job *j, FILE*f, const char *prefix);
-int job_serialize(Job *j, FILE *f, FDSet *fds);
-int job_deserialize(Job *j, FILE *f, FDSet *fds);
+int job_serialize(Job *j, FILE *f);
+int job_deserialize(Job *j, FILE *f);
int job_coldplug(Job *j);
JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts);
diff --git a/src/grp-system/libcore/include/core/load-fragment.h b/src/grp-system/libcore/include/core/load-fragment.h
index b36a2e3a02..c05f205c37 100644
--- a/src/grp-system/libcore/include/core/load-fragment.h
+++ b/src/grp-system/libcore/include/core/load-fragment.h
@@ -45,7 +45,9 @@ int config_parse_service_timeout(const char *unit, const char *filename, unsigne
int config_parse_service_type(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_service_restart(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_socket_bindtodevice(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_exec_output(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_output(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_exec_input(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_input(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_exec_io_class(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_exec_io_priority(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
@@ -73,7 +75,7 @@ int config_parse_unit_condition_string(const char *unit, const char *filename, u
int config_parse_unit_condition_null(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_kill_mode(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_notify_access(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
-int config_parse_failure_action(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_emergency_action(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_unit_requires_mounts_for(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_syscall_filter(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_syscall_archs(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
@@ -81,6 +83,7 @@ int config_parse_syscall_errno(const char *unit, const char *filename, unsigned
int config_parse_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_pass_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_unit_slice(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_cpu_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_cpu_shares(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_memory_limit(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_tasks_max(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
@@ -111,6 +114,8 @@ int config_parse_exec_utmp_mode(const char *unit, const char *filename, unsigned
int config_parse_working_directory(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_fdname(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_sec_fix_0(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_user_group(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_user_group_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
/* gperf prototypes */
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length);
diff --git a/src/grp-system/libcore/include/core/manager.h b/src/grp-system/libcore/include/core/manager.h
index ae56cf1a9a..429b86a94c 100644
--- a/src/grp-system/libcore/include/core/manager.h
+++ b/src/grp-system/libcore/include/core/manager.h
@@ -82,6 +82,7 @@ struct Manager {
/* Active jobs and units */
Hashmap *units; /* name string => Unit object n:1 */
+ Hashmap *units_by_invocation_id;
Hashmap *jobs; /* job id => Job object 1:1 */
/* To make it easy to iterate through the units of a specific
@@ -144,6 +145,9 @@ struct Manager {
sd_event_source *jobs_in_progress_event_source;
+ int user_lookup_fds[2];
+ sd_event_source *user_lookup_event_source;
+
UnitFileScope unit_file_scope;
LookupPaths lookup_paths;
Set *unit_path_cache;
@@ -235,7 +239,6 @@ struct Manager {
bool dispatching_dbus_queue:1;
bool taint_usr:1;
-
bool test_run:1;
/* If non-zero, exit with the following value when the systemd
@@ -293,18 +296,26 @@ struct Manager {
* value where Unit objects are contained. */
Hashmap *units_requiring_mounts_for;
- /* Reference to the kdbus bus control fd */
- int kdbus_fd;
-
/* Used for processing polkit authorization responses */
Hashmap *polkit_registry;
- /* When the user hits C-A-D more than 7 times per 2s, reboot immediately... */
+ /* Dynamic users/groups, indexed by their name */
+ Hashmap *dynamic_users;
+
+ /* Keep track of all UIDs and GIDs any of our services currently use. This is useful for the RemoveIPC= logic. */
+ Hashmap *uid_refs;
+ Hashmap *gid_refs;
+
+ /* When the user hits C-A-D more than 7 times per 2s, do something immediately... */
RateLimit ctrl_alt_del_ratelimit;
+ EmergencyAction cad_burst_action;
const char *unit_log_field;
const char *unit_log_format_string;
+ const char *invocation_log_field;
+ const char *invocation_log_format_string;
+
int first_boot; /* tri-state */
};
@@ -376,5 +387,20 @@ ManagerState manager_state(Manager *m);
int manager_update_failed_units(Manager *m, Unit *u, bool failed);
+void manager_unref_uid(Manager *m, uid_t uid, bool destroy_now);
+int manager_ref_uid(Manager *m, uid_t uid, bool clean_ipc);
+
+void manager_unref_gid(Manager *m, gid_t gid, bool destroy_now);
+int manager_ref_gid(Manager *m, gid_t gid, bool destroy_now);
+
+void manager_vacuum_uid_refs(Manager *m);
+void manager_vacuum_gid_refs(Manager *m);
+
+void manager_serialize_uid_refs(Manager *m, FILE *f);
+void manager_deserialize_uid_refs_one(Manager *m, const char *value);
+
+void manager_serialize_gid_refs(Manager *m, FILE *f);
+void manager_deserialize_gid_refs_one(Manager *m, const char *value);
+
const char *manager_state_to_string(ManagerState m) _const_;
ManagerState manager_state_from_string(const char *s) _pure_;
diff --git a/src/grp-system/libcore/include/core/mount.h b/src/grp-system/libcore/include/core/mount.h
index 0caef2b451..148fedf354 100644
--- a/src/grp-system/libcore/include/core/mount.h
+++ b/src/grp-system/libcore/include/core/mount.h
@@ -21,8 +21,7 @@
typedef struct Mount Mount;
-#include "cgroup.h"
-#include "execute.h"
+#include "dynamic-user.h"
#include "kill.h"
typedef enum MountExecCommand {
@@ -72,6 +71,9 @@ struct Mount {
bool sloppy_options;
+ bool lazy_unmount;
+ bool force_unmount;
+
MountResult result;
MountResult reload_result;
@@ -86,6 +88,7 @@ struct Mount {
CGroupContext cgroup_context;
ExecRuntime *exec_runtime;
+ DynamicCreds dynamic_creds;
MountState state, deserialized_state;
diff --git a/src/grp-system/libcore/include/core/namespace.h b/src/grp-system/libcore/include/core/namespace.h
index 89c6225c39..8e80e2f38e 100644
--- a/src/grp-system/libcore/include/core/namespace.h
+++ b/src/grp-system/libcore/include/core/namespace.h
@@ -4,6 +4,7 @@
This file is part of systemd.
Copyright 2010 Lennart Poettering
+ Copyright 2016 Djalal Harouni
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
@@ -23,6 +24,8 @@
#include "systemd-basic/macro.h"
+typedef struct NameSpaceInfo NameSpaceInfo;
+
typedef enum ProtectHome {
PROTECT_HOME_NO,
PROTECT_HOME_YES,
@@ -35,17 +38,25 @@ typedef enum ProtectSystem {
PROTECT_SYSTEM_NO,
PROTECT_SYSTEM_YES,
PROTECT_SYSTEM_FULL,
+ PROTECT_SYSTEM_STRICT,
_PROTECT_SYSTEM_MAX,
_PROTECT_SYSTEM_INVALID = -1
} ProtectSystem;
+struct NameSpaceInfo {
+ bool private_dev:1;
+ bool protect_control_groups:1;
+ bool protect_kernel_tunables:1;
+ bool protect_kernel_modules:1;
+};
+
int setup_namespace(const char *chroot,
+ const NameSpaceInfo *ns_info,
char **read_write_paths,
char **read_only_paths,
char **inaccessible_paths,
const char *tmp_dir,
const char *var_tmp_dir,
- bool private_dev,
ProtectHome protect_home,
ProtectSystem protect_system,
unsigned long mount_flags);
diff --git a/src/grp-system/libcore/include/core/service.h b/src/grp-system/libcore/include/core/service.h
index 8670913dbd..7e4763a0ae 100644
--- a/src/grp-system/libcore/include/core/service.h
+++ b/src/grp-system/libcore/include/core/service.h
@@ -150,9 +150,11 @@ struct Service {
/* Runtime data of the execution context */
ExecRuntime *exec_runtime;
+ DynamicCreds dynamic_creds;
pid_t main_pid, control_pid;
int socket_fd;
+ SocketPeer *peer;
bool socket_fd_selinux_context_net;
bool permissions_start_only;
@@ -178,7 +180,7 @@ struct Service {
char *status_text;
int status_errno;
- FailureAction failure_action;
+ EmergencyAction emergency_action;
UnitRef accept_socket;
diff --git a/src/grp-system/libcore/include/core/socket.h b/src/grp-system/libcore/include/core/socket.h
index 0e3ead5060..d17bf70f18 100644
--- a/src/grp-system/libcore/include/core/socket.h
+++ b/src/grp-system/libcore/include/core/socket.h
@@ -22,6 +22,7 @@
#include "systemd-basic/socket-util.h"
typedef struct Socket Socket;
+typedef struct SocketPeer SocketPeer;
#include "service.h"
@@ -79,9 +80,12 @@ struct Socket {
LIST_HEAD(SocketPort, ports);
+ Set *peers_by_address;
+
unsigned n_accepted;
unsigned n_connections;
unsigned max_connections;
+ unsigned max_connections_per_source;
unsigned backlog;
unsigned keep_alive_cnt;
@@ -94,7 +98,9 @@ struct Socket {
ExecContext exec_context;
KillContext kill_context;
CGroupContext cgroup_context;
+
ExecRuntime *exec_runtime;
+ DynamicCreds dynamic_creds;
/* For Accept=no sockets refers to the one service we'll
activate. For Accept=yes sockets is either NULL, or filled
@@ -162,6 +168,12 @@ struct Socket {
RateLimit trigger_limit;
};
+SocketPeer *socket_peer_ref(SocketPeer *p);
+SocketPeer *socket_peer_unref(SocketPeer *p);
+int socket_acquire_peer(Socket *s, int fd, SocketPeer **p);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(SocketPeer*, socket_peer_unref);
+
/* Called from the service code when collecting fds */
int socket_collect_fds(Socket *s, int **fds);
diff --git a/src/grp-system/libcore/include/core/swap.h b/src/grp-system/libcore/include/core/swap.h
index 52e22e22e8..e0c204a062 100644
--- a/src/grp-system/libcore/include/core/swap.h
+++ b/src/grp-system/libcore/include/core/swap.h
@@ -84,6 +84,7 @@ struct Swap {
CGroupContext cgroup_context;
ExecRuntime *exec_runtime;
+ DynamicCreds dynamic_creds;
SwapState state, deserialized_state;
diff --git a/src/grp-system/libcore/include/core/unit.h b/src/grp-system/libcore/include/core/unit.h
index d57caceb19..f40b23828a 100644
--- a/src/grp-system/libcore/include/core/unit.h
+++ b/src/grp-system/libcore/include/core/unit.h
@@ -33,7 +33,7 @@ typedef struct UnitRef UnitRef;
typedef struct UnitStatusMessageFormats UnitStatusMessageFormats;
typedef struct UnitVTable UnitVTable;
-#include "failure-action.h"
+#include "emergency-action.h"
typedef enum KillOperation {
KILL_TERMINATE,
@@ -109,9 +109,13 @@ struct Unit {
/* The slot used for watching NameOwnerChanged signals */
sd_bus_slot *match_bus_slot;
+ /* References to this unit from clients */
+ sd_bus_track *bus_track;
+ char **deserialized_refs;
+
/* Job timeout and action to take */
usec_t job_timeout;
- FailureAction job_timeout_action;
+ EmergencyAction job_timeout_action;
char *job_timeout_reboot_arg;
/* References to this */
@@ -175,18 +179,23 @@ struct Unit {
/* Put a ratelimit on unit starting */
RateLimit start_limit;
- FailureAction start_limit_action;
+ EmergencyAction start_limit_action;
char *reboot_arg;
/* Make sure we never enter endless loops with the check unneeded logic, or the BindsTo= logic */
RateLimit auto_stop_ratelimit;
+ /* Reference to a specific UID/GID */
+ uid_t ref_uid;
+ gid_t ref_gid;
+
/* Cached unit file state and preset */
UnitFileState unit_file_state;
int unit_file_preset;
- /* Where the cpuacct.usage cgroup counter was at the time the unit was started */
- nsec_t cpuacct_usage_base;
+ /* Where the cpu.stat or cpuacct.usage was at the time the unit was started */
+ nsec_t cpu_usage_base;
+ nsec_t cpu_usage_last; /* the most recently read value */
/* Counterparts in the cgroup filesystem */
char *cgroup_path;
@@ -196,11 +205,13 @@ struct Unit {
CGroupMask cgroup_members_mask;
int cgroup_inotify_wd;
- uint32_t cgroup_netclass_id;
-
/* How to start OnFailure units */
JobMode on_failure_job_mode;
+ /* The current invocation ID */
+ sd_id128_t invocation_id;
+ char invocation_id_string[SD_ID128_STRING_MAX]; /* useful when logging */
+
/* Garbage collect us we nobody wants or requires us anymore */
bool stop_when_unneeded;
@@ -226,6 +237,9 @@ struct Unit {
/* Is this a transient unit? */
bool transient;
+ /* Is this a unit that is always running and cannot be stopped? */
+ bool perpetual;
+
bool in_load_queue:1;
bool in_dbus_queue:1;
bool in_cleanup_queue:1;
@@ -234,8 +248,6 @@ struct Unit {
bool sent_dbus_new_signal:1;
- bool no_gc:1;
-
bool in_audit:1;
bool cgroup_realized:1;
@@ -246,6 +258,9 @@ struct Unit {
/* Did we already invoke unit_coldplug() for this unit? */
bool coldplugged:1;
+
+ /* For transient units: whether to add a bus track reference after creating the unit */
+ bool bus_track_add:1;
};
struct UnitStatusMessageFormats {
@@ -294,6 +309,10 @@ struct UnitVTable {
* that */
size_t exec_runtime_offset;
+ /* If greater than 0, the offset into the object where the pointer to DynamicCreds is found, if the unit type
+ * has that. */
+ size_t dynamic_creds_offset;
+
/* The name of the configuration file section with the private settings of this unit */
const char *private_section;
@@ -357,7 +376,7 @@ struct UnitVTable {
/* When the unit is not running and no job for it queued we
* shall release its runtime resources */
- void (*release_resources)(Unit *u);
+ void (*release_resources)(Unit *u, bool inactive);
/* Invoked on every child that died */
void (*sigchld_event)(Unit *u, pid_t pid, int code, int status);
@@ -372,8 +391,7 @@ struct UnitVTable {
/* Called whenever a process of this unit sends us a message */
void (*notify_message)(Unit *u, pid_t pid, char **tags, FDSet *fds);
- /* Called whenever a name this Unit registered for comes or
- * goes away. */
+ /* Called whenever a name this Unit registered for comes or goes away. */
void (*bus_name_owner_change)(Unit *u, const char *name, const char *old_owner, const char *new_owner);
/* Called for each property that is being set */
@@ -466,6 +484,7 @@ DEFINE_CAST(SCOPE, Scope);
Unit *unit_new(Manager *m, size_t size);
void unit_free(Unit *u);
+int unit_new_for_name(Manager *m, size_t size, const char *name, Unit **ret);
int unit_add_name(Unit *u, const char *name);
int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_reference);
@@ -510,6 +529,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix);
bool unit_can_reload(Unit *u) _pure_;
bool unit_can_start(Unit *u) _pure_;
+bool unit_can_stop(Unit *u) _pure_;
bool unit_can_isolate(Unit *u) _pure_;
int unit_start(Unit *u);
@@ -536,6 +556,7 @@ bool unit_job_is_applicable(Unit *u, JobType j);
int set_unit_path(const char *p);
char *unit_dbus_path(Unit *u);
+char *unit_dbus_path_invocation_id(Unit *u);
int unit_load_related_unit(Unit *u, const char *type, Unit **_found);
@@ -592,6 +613,7 @@ CGroupContext *unit_get_cgroup_context(Unit *u) _pure_;
ExecRuntime *unit_get_exec_runtime(Unit *u) _pure_;
int unit_setup_exec_runtime(Unit *u);
+int unit_setup_dynamic_creds(Unit *u);
int unit_write_drop_in(Unit *u, UnitSetPropertiesMode mode, const char *name, const char *data);
int unit_write_drop_in_format(Unit *u, UnitSetPropertiesMode mode, const char *name, const char *format, ...) _printf_(4,5);
@@ -621,12 +643,26 @@ int unit_fail_if_symlink(Unit *u, const char* where);
int unit_start_limit_test(Unit *u);
+void unit_unref_uid(Unit *u, bool destroy_now);
+int unit_ref_uid(Unit *u, uid_t uid, bool clean_ipc);
+
+void unit_unref_gid(Unit *u, bool destroy_now);
+int unit_ref_gid(Unit *u, gid_t gid, bool clean_ipc);
+
+int unit_ref_uid_gid(Unit *u, uid_t uid, gid_t gid);
+void unit_unref_uid_gid(Unit *u, bool destroy_now);
+
+void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid);
+
+int unit_set_invocation_id(Unit *u, sd_id128_t id);
+int unit_acquire_invocation_id(Unit *u);
+
/* Macros which append UNIT= or USER_UNIT= to the message */
#define log_unit_full(unit, level, error, ...) \
({ \
- Unit *_u = (unit); \
- _u ? log_object_internal(level, error, __FILE__, __LINE__, __func__, _u->manager->unit_log_field, _u->id, ##__VA_ARGS__) : \
+ const Unit *_u = (unit); \
+ _u ? log_object_internal(level, error, __FILE__, __LINE__, __func__, _u->manager->unit_log_field, _u->id, _u->manager->invocation_log_field, _u->invocation_id_string, ##__VA_ARGS__) : \
log_internal(level, error, __FILE__, __LINE__, __func__, ##__VA_ARGS__); \
})
diff --git a/src/grp-system/libcore/src/Makefile b/src/grp-system/libcore/src/Makefile
index 9580b69a14..ab731a5b4f 100644
--- a/src/grp-system/libcore/src/Makefile
+++ b/src/grp-system/libcore/src/Makefile
@@ -69,6 +69,8 @@ libcore_la_SOURCES = \
src/core/load-dropin.h \
src/core/execute.c \
src/core/execute.h \
+ src/core/dynamic-user.c \
+ src/core/dynamic-user.h \
src/core/kill.c \
src/core/kill.h \
src/core/dbus.c \
@@ -139,8 +141,8 @@ libcore_la_SOURCES = \
src/core/audit-fd.h \
src/core/show-status.c \
src/core/show-status.h \
- src/core/failure-action.c \
- src/core/failure-action.h
+ src/core/emergency-action.c \
+ src/core/emergency-action.h
nodist_libcore_la_SOURCES = \
src/core/load-fragment-gperf.c \
diff --git a/src/grp-system/libcore/src/automount.c b/src/grp-system/libcore/src/automount.c
index 2f116977de..e0ba6f68a9 100644
--- a/src/grp-system/libcore/src/automount.c
+++ b/src/grp-system/libcore/src/automount.c
@@ -273,6 +273,11 @@ static int automount_coldplug(Unit *u) {
return r;
(void) sd_event_source_set_description(a->pipe_event_source, "automount-io");
+ if (a->deserialized_state == AUTOMOUNT_RUNNING) {
+ r = automount_start_expire(a);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(a), r, "Failed to start expiration timer, ignoring: %m");
+ }
}
automount_set_state(a, a->deserialized_state);
@@ -303,7 +308,7 @@ static void automount_dump(Unit *u, FILE *f, const char *prefix) {
static void automount_enter_dead(Automount *a, AutomountResult f) {
assert(a);
- if (f != AUTOMOUNT_SUCCESS)
+ if (a->result == AUTOMOUNT_SUCCESS)
a->result = f;
automount_set_state(a, a->result != AUTOMOUNT_SUCCESS ? AUTOMOUNT_FAILED : AUTOMOUNT_DEAD);
@@ -797,6 +802,10 @@ static int automount_start(Unit *u) {
return r;
}
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
a->result = AUTOMOUNT_SUCCESS;
automount_enter_waiting(a);
return 1;
@@ -1107,6 +1116,9 @@ const UnitVTable automount_vtable = {
.reset_failed = automount_reset_failed,
.bus_vtable = bus_automount_vtable,
+ .bus_set_property = bus_automount_set_property,
+
+ .can_transient = true,
.shutdown = automount_shutdown,
.supported = automount_supported,
diff --git a/src/grp-system/libcore/src/busname.c b/src/grp-system/libcore/src/busname.c
index 178866fc15..3d365530bf 100644
--- a/src/grp-system/libcore/src/busname.c
+++ b/src/grp-system/libcore/src/busname.c
@@ -443,7 +443,7 @@ fail:
static void busname_enter_dead(BusName *n, BusNameResult f) {
assert(n);
- if (f != BUSNAME_SUCCESS)
+ if (n->result == BUSNAME_SUCCESS)
n->result = f;
busname_set_state(n, n->result != BUSNAME_SUCCESS ? BUSNAME_FAILED : BUSNAME_DEAD);
@@ -455,7 +455,7 @@ static void busname_enter_signal(BusName *n, BusNameState state, BusNameResult f
assert(n);
- if (f != BUSNAME_SUCCESS)
+ if (n->result == BUSNAME_SUCCESS)
n->result = f;
kill_context_init(&kill_context);
@@ -640,6 +640,10 @@ static int busname_start(Unit *u) {
return r;
}
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
n->result = BUSNAME_SUCCESS;
busname_enter_making(n);
@@ -869,7 +873,7 @@ static void busname_sigchld_event(Unit *u, pid_t pid, int code, int status) {
n->control_pid = 0;
- if (is_clean_exit(code, status, NULL))
+ if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
f = BUSNAME_SUCCESS;
else if (code == CLD_EXITED)
f = BUSNAME_FAILURE_EXIT_CODE;
@@ -883,7 +887,7 @@ static void busname_sigchld_event(Unit *u, pid_t pid, int code, int status) {
log_unit_full(u, f == BUSNAME_SUCCESS ? LOG_DEBUG : LOG_NOTICE, 0,
"Control process exited, code=%s status=%i", sigchld_code_to_string(code), status);
- if (f != BUSNAME_SUCCESS)
+ if (n->result == BUSNAME_SUCCESS)
n->result = f;
switch (n->state) {
diff --git a/src/grp-system/libcore/src/cgroup.c b/src/grp-system/libcore/src/cgroup.c
index fca369c4ff..62222903fe 100644
--- a/src/grp-system/libcore/src/cgroup.c
+++ b/src/grp-system/libcore/src/cgroup.c
@@ -57,12 +57,16 @@ void cgroup_context_init(CGroupContext *c) {
/* Initialize everything to the kernel defaults, assuming the
* structure is preinitialized to 0 */
+ c->cpu_weight = CGROUP_WEIGHT_INVALID;
+ c->startup_cpu_weight = CGROUP_WEIGHT_INVALID;
+ c->cpu_quota_per_sec_usec = USEC_INFINITY;
+
c->cpu_shares = CGROUP_CPU_SHARES_INVALID;
c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID;
- c->cpu_quota_per_sec_usec = USEC_INFINITY;
c->memory_high = CGROUP_LIMIT_MAX;
c->memory_max = CGROUP_LIMIT_MAX;
+ c->memory_swap_max = CGROUP_LIMIT_MAX;
c->memory_limit = CGROUP_LIMIT_MAX;
@@ -158,6 +162,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
"%sBlockIOAccounting=%s\n"
"%sMemoryAccounting=%s\n"
"%sTasksAccounting=%s\n"
+ "%sCPUWeight=%" PRIu64 "\n"
+ "%sStartupCPUWeight=%" PRIu64 "\n"
"%sCPUShares=%" PRIu64 "\n"
"%sStartupCPUShares=%" PRIu64 "\n"
"%sCPUQuotaPerSecSec=%s\n"
@@ -168,6 +174,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
"%sMemoryLow=%" PRIu64 "\n"
"%sMemoryHigh=%" PRIu64 "\n"
"%sMemoryMax=%" PRIu64 "\n"
+ "%sMemorySwapMax=%" PRIu64 "\n"
"%sMemoryLimit=%" PRIu64 "\n"
"%sTasksMax=%" PRIu64 "\n"
"%sDevicePolicy=%s\n"
@@ -177,6 +184,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
prefix, yes_no(c->blockio_accounting),
prefix, yes_no(c->memory_accounting),
prefix, yes_no(c->tasks_accounting),
+ prefix, c->cpu_weight,
+ prefix, c->startup_cpu_weight,
prefix, c->cpu_shares,
prefix, c->startup_cpu_shares,
prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1),
@@ -187,6 +196,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
prefix, c->memory_low,
prefix, c->memory_high,
prefix, c->memory_max,
+ prefix, c->memory_swap_max,
prefix, c->memory_limit,
prefix, c->tasks_max,
prefix, cgroup_device_policy_to_string(c->device_policy),
@@ -382,6 +392,95 @@ fail:
return -errno;
}
+static bool cgroup_context_has_cpu_weight(CGroupContext *c) {
+ return c->cpu_weight != CGROUP_WEIGHT_INVALID ||
+ c->startup_cpu_weight != CGROUP_WEIGHT_INVALID;
+}
+
+static bool cgroup_context_has_cpu_shares(CGroupContext *c) {
+ return c->cpu_shares != CGROUP_CPU_SHARES_INVALID ||
+ c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID;
+}
+
+static uint64_t cgroup_context_cpu_weight(CGroupContext *c, ManagerState state) {
+ if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+ c->startup_cpu_weight != CGROUP_WEIGHT_INVALID)
+ return c->startup_cpu_weight;
+ else if (c->cpu_weight != CGROUP_WEIGHT_INVALID)
+ return c->cpu_weight;
+ else
+ return CGROUP_WEIGHT_DEFAULT;
+}
+
+static uint64_t cgroup_context_cpu_shares(CGroupContext *c, ManagerState state) {
+ if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
+ c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID)
+ return c->startup_cpu_shares;
+ else if (c->cpu_shares != CGROUP_CPU_SHARES_INVALID)
+ return c->cpu_shares;
+ else
+ return CGROUP_CPU_SHARES_DEFAULT;
+}
+
+static void cgroup_apply_unified_cpu_config(Unit *u, uint64_t weight, uint64_t quota) {
+ char buf[MAX(DECIMAL_STR_MAX(uint64_t) + 1, (DECIMAL_STR_MAX(usec_t) + 1) * 2)];
+ int r;
+
+ xsprintf(buf, "%" PRIu64 "\n", weight);
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.weight", buf);
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.weight: %m");
+
+ if (quota != USEC_INFINITY)
+ xsprintf(buf, USEC_FMT " " USEC_FMT "\n",
+ quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC, CGROUP_CPU_QUOTA_PERIOD_USEC);
+ else
+ xsprintf(buf, "max " USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
+
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.max", buf);
+
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.max: %m");
+}
+
+static void cgroup_apply_legacy_cpu_config(Unit *u, uint64_t shares, uint64_t quota) {
+ char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1];
+ int r;
+
+ xsprintf(buf, "%" PRIu64 "\n", shares);
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.shares", buf);
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.shares: %m");
+
+ xsprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_period_us", buf);
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.cfs_period_us: %m");
+
+ if (quota != USEC_INFINITY) {
+ xsprintf(buf, USEC_FMT "\n", quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", buf);
+ } else
+ r = cg_set_attribute("cpu", u->cgroup_path, "cpu.cfs_quota_us", "-1");
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.cfs_quota_us: %m");
+}
+
+static uint64_t cgroup_cpu_shares_to_weight(uint64_t shares) {
+ return CLAMP(shares * CGROUP_WEIGHT_DEFAULT / CGROUP_CPU_SHARES_DEFAULT,
+ CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX);
+}
+
+static uint64_t cgroup_cpu_weight_to_shares(uint64_t weight) {
+ return CLAMP(weight * CGROUP_CPU_SHARES_DEFAULT / CGROUP_WEIGHT_DEFAULT,
+ CGROUP_CPU_SHARES_MIN, CGROUP_CPU_SHARES_MAX);
+}
+
static bool cgroup_context_has_io_config(CGroupContext *c) {
return c->io_accounting ||
c->io_weight != CGROUP_WEIGHT_INVALID ||
@@ -521,7 +620,7 @@ static unsigned cgroup_apply_blkio_device_limit(Unit *u, const char *dev_path, u
}
static bool cgroup_context_has_unified_memory_config(CGroupContext *c) {
- return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX;
+ return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX || c->memory_swap_max != CGROUP_LIMIT_MAX;
}
static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_t v) {
@@ -566,30 +665,42 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
* and missing cgroups, i.e. EROFS and ENOENT. */
if ((mask & CGROUP_MASK_CPU) && !is_root) {
- char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1];
+ bool has_weight = cgroup_context_has_cpu_weight(c);
+ bool has_shares = cgroup_context_has_cpu_shares(c);
- sprintf(buf, "%" PRIu64 "\n",
- IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->startup_cpu_shares :
- c->cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->cpu_shares : CGROUP_CPU_SHARES_DEFAULT);
- r = cg_set_attribute("cpu", path, "cpu.shares", buf);
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set cpu.shares: %m");
+ if (cg_all_unified() > 0) {
+ uint64_t weight;
- sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
- r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf);
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set cpu.cfs_period_us: %m");
+ if (has_weight)
+ weight = cgroup_context_cpu_weight(c, state);
+ else if (has_shares) {
+ uint64_t shares = cgroup_context_cpu_shares(c, state);
- if (c->cpu_quota_per_sec_usec != USEC_INFINITY) {
- sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
- r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf);
- } else
- r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1");
- if (r < 0)
- log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set cpu.cfs_quota_us: %m");
+ weight = cgroup_cpu_shares_to_weight(shares);
+
+ log_cgroup_compat(u, "Applying [Startup]CpuShares %" PRIu64 " as [Startup]CpuWeight %" PRIu64 " on %s",
+ shares, weight, path);
+ } else
+ weight = CGROUP_WEIGHT_DEFAULT;
+
+ cgroup_apply_unified_cpu_config(u, weight, c->cpu_quota_per_sec_usec);
+ } else {
+ uint64_t shares;
+
+ if (has_weight) {
+ uint64_t weight = cgroup_context_cpu_weight(c, state);
+
+ shares = cgroup_cpu_weight_to_shares(weight);
+
+ log_cgroup_compat(u, "Applying [Startup]CpuWeight %" PRIu64 " as [Startup]CpuShares %" PRIu64 " on %s",
+ weight, shares, path);
+ } else if (has_shares)
+ shares = cgroup_context_cpu_shares(c, state);
+ else
+ shares = CGROUP_CPU_SHARES_DEFAULT;
+
+ cgroup_apply_legacy_cpu_config(u, shares, c->cpu_quota_per_sec_usec);
+ }
}
if (mask & CGROUP_MASK_IO) {
@@ -677,16 +788,16 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
char buf[DECIMAL_STR_MAX(uint64_t)+1];
uint64_t weight;
- if (has_blockio)
- weight = cgroup_context_blkio_weight(c, state);
- else if (has_io) {
+ if (has_io) {
uint64_t io_weight = cgroup_context_io_weight(c, state);
weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state));
log_cgroup_compat(u, "Applying [Startup]IOWeight %" PRIu64 " as [Startup]BlockIOWeight %" PRIu64,
io_weight, weight);
- } else
+ } else if (has_blockio)
+ weight = cgroup_context_blkio_weight(c, state);
+ else
weight = CGROUP_BLKIO_WEIGHT_DEFAULT;
xsprintf(buf, "%" PRIu64 "\n", weight);
@@ -695,13 +806,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to set blkio.weight: %m");
- if (has_blockio) {
- CGroupBlockIODeviceWeight *w;
-
- /* FIXME: no way to reset this list */
- LIST_FOREACH(device_weights, w, c->blockio_device_weights)
- cgroup_apply_blkio_device_weight(u, w->path, w->weight);
- } else if (has_io) {
+ if (has_io) {
CGroupIODeviceWeight *w;
/* FIXME: no way to reset this list */
@@ -713,18 +818,17 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
cgroup_apply_blkio_device_weight(u, w->path, weight);
}
+ } else if (has_blockio) {
+ CGroupBlockIODeviceWeight *w;
+
+ /* FIXME: no way to reset this list */
+ LIST_FOREACH(device_weights, w, c->blockio_device_weights)
+ cgroup_apply_blkio_device_weight(u, w->path, w->weight);
}
}
/* Apply limits and free ones without config. */
- if (has_blockio) {
- CGroupBlockIODeviceBandwidth *b, *next;
-
- LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) {
- if (!cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps))
- cgroup_context_free_blockio_device_bandwidth(c, b);
- }
- } else if (has_io) {
+ if (has_io) {
CGroupIODeviceLimit *l, *next;
LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) {
@@ -734,16 +838,24 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
if (!cgroup_apply_blkio_device_limit(u, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX]))
cgroup_context_free_io_device_limit(c, l);
}
+ } else if (has_blockio) {
+ CGroupBlockIODeviceBandwidth *b, *next;
+
+ LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths)
+ if (!cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps))
+ cgroup_context_free_blockio_device_bandwidth(c, b);
}
}
if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
- if (cg_unified() > 0) {
- uint64_t max = c->memory_max;
+ if (cg_all_unified() > 0) {
+ uint64_t max;
+ uint64_t swap_max = CGROUP_LIMIT_MAX;
- if (cgroup_context_has_unified_memory_config(c))
+ if (cgroup_context_has_unified_memory_config(c)) {
max = c->memory_max;
- else {
+ swap_max = c->memory_swap_max;
+ } else {
max = c->memory_limit;
if (max != CGROUP_LIMIT_MAX)
@@ -753,16 +865,16 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
cgroup_apply_unified_memory_limit(u, "memory.low", c->memory_low);
cgroup_apply_unified_memory_limit(u, "memory.high", c->memory_high);
cgroup_apply_unified_memory_limit(u, "memory.max", max);
+ cgroup_apply_unified_memory_limit(u, "memory.swap.max", swap_max);
} else {
char buf[DECIMAL_STR_MAX(uint64_t) + 1];
- uint64_t val = c->memory_limit;
+ uint64_t val;
- if (val == CGROUP_LIMIT_MAX) {
+ if (cgroup_context_has_unified_memory_config(c)) {
val = c->memory_max;
-
- if (val != CGROUP_LIMIT_MAX)
- log_cgroup_compat(u, "Applying MemoryMax %" PRIi64 " as MemoryLimit", c->memory_max);
- }
+ log_cgroup_compat(u, "Applying MemoryMax %" PRIi64 " as MemoryLimit", val);
+ } else
+ val = c->memory_limit;
if (val == CGROUP_LIMIT_MAX)
strncpy(buf, "-1\n", sizeof(buf));
@@ -844,7 +956,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
if ((mask & CGROUP_MASK_PIDS) && !is_root) {
- if (c->tasks_max != (uint64_t) -1) {
+ if (c->tasks_max != CGROUP_LIMIT_MAX) {
char buf[DECIMAL_STR_MAX(uint64_t) + 2];
sprintf(buf, "%" PRIu64 "\n", c->tasks_max);
@@ -864,8 +976,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) {
/* Figure out which controllers we need */
if (c->cpu_accounting ||
- c->cpu_shares != CGROUP_CPU_SHARES_INVALID ||
- c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ||
+ cgroup_context_has_cpu_weight(c) ||
+ cgroup_context_has_cpu_shares(c) ||
c->cpu_quota_per_sec_usec != USEC_INFINITY)
mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU;
@@ -911,7 +1023,7 @@ CGroupMask unit_get_own_mask(Unit *u) {
e = unit_get_exec_context(u);
if (!e ||
exec_context_maintains_privileges(e) ||
- cg_unified() > 0)
+ cg_all_unified() > 0)
return _CGROUP_MASK_ALL;
}
@@ -1137,7 +1249,7 @@ int unit_watch_cgroup(Unit *u) {
return 0;
/* Only applies to the unified hierarchy */
- r = cg_unified();
+ r = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
if (r < 0)
return log_unit_error_errno(u, r, "Failed detect whether the unified hierarchy is used: %m");
if (r == 0)
@@ -1247,6 +1359,26 @@ int unit_attach_pids_to_cgroup(Unit *u) {
return 0;
}
+static void cgroup_xattr_apply(Unit *u) {
+ char ids[SD_ID128_STRING_MAX];
+ int r;
+
+ assert(u);
+
+ if (!MANAGER_IS_SYSTEM(u->manager))
+ return;
+
+ if (sd_id128_is_null(u->invocation_id))
+ return;
+
+ r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path,
+ "trusted.invocation_id",
+ sd_id128_to_string(u->invocation_id, ids), 32,
+ 0);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path);
+}
+
static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask enable_mask) {
assert(u);
@@ -1290,6 +1422,7 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
/* Finally, apply the necessary attributes. */
cgroup_context_apply(u, target_mask, state);
+ cgroup_xattr_apply(u);
return 0;
}
@@ -1416,6 +1549,8 @@ void unit_prune_cgroup(Unit *u) {
if (!u->cgroup_path)
return;
+ (void) unit_get_cpu_usage(u, NULL); /* Cache the last CPU usage value before we destroy the cgroup */
+
is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE);
r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice);
@@ -1537,7 +1672,7 @@ int unit_watch_all_pids(Unit *u) {
if (!u->cgroup_path)
return -ENOENT;
- if (cg_unified() > 0) /* On unified we can use proper notifications */
+ if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0) /* On unified we can use proper notifications */
return 0;
return unit_watch_pids_in_path(u, u->cgroup_path);
@@ -1610,7 +1745,7 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents,
int manager_setup_cgroup(Manager *m) {
_cleanup_free_ char *path = NULL;
CGroupController c;
- int r, unified;
+ int r, all_unified, systemd_unified;
char *e;
assert(m);
@@ -1647,11 +1782,17 @@ int manager_setup_cgroup(Manager *m) {
if (r < 0)
return log_error_errno(r, "Cannot find cgroup mount point: %m");
- unified = cg_unified();
- if (unified < 0)
- return log_error_errno(r, "Couldn't determine if we are running in the unified hierarchy: %m");
- if (unified > 0)
+ all_unified = cg_all_unified();
+ systemd_unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER);
+
+ if (all_unified < 0 || systemd_unified < 0)
+ return log_error_errno(all_unified < 0 ? all_unified : systemd_unified,
+ "Couldn't determine if we are running in the unified hierarchy: %m");
+
+ if (all_unified > 0)
log_debug("Unified cgroup hierarchy is located at %s.", path);
+ else if (systemd_unified > 0)
+ log_debug("Unified cgroup hierarchy is located at %s. Controllers are on legacy hierarchies.", path);
else
log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
@@ -1659,7 +1800,7 @@ int manager_setup_cgroup(Manager *m) {
const char *scope_path;
/* 3. Install agent */
- if (unified) {
+ if (systemd_unified) {
/* In the unified hierarchy we can get
* cgroup empty notifications via inotify. */
@@ -1719,7 +1860,7 @@ int manager_setup_cgroup(Manager *m) {
return log_error_errno(errno, "Failed to open pin file: %m");
/* 6. Always enable hierarchical support if it exists... */
- if (!unified)
+ if (!all_unified)
(void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
}
@@ -1845,7 +1986,7 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) {
if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0)
return -ENODATA;
- if (cg_unified() <= 0)
+ if (cg_all_unified() <= 0)
r = cg_get_attribute("memory", u->cgroup_path, "memory.usage_in_bytes", &v);
else
r = cg_get_attribute("memory", u->cgroup_path, "memory.current", &v);
@@ -1890,18 +2031,37 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
if (!u->cgroup_path)
return -ENODATA;
- if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0)
- return -ENODATA;
+ if (cg_all_unified() > 0) {
+ const char *keys[] = { "usage_usec", NULL };
+ _cleanup_free_ char *val = NULL;
+ uint64_t us;
- r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v);
- if (r == -ENOENT)
- return -ENODATA;
- if (r < 0)
- return r;
+ if ((u->cgroup_realized_mask & CGROUP_MASK_CPU) == 0)
+ return -ENODATA;
- r = safe_atou64(v, &ns);
- if (r < 0)
- return r;
+ r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", keys, &val);
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(val, &us);
+ if (r < 0)
+ return r;
+
+ ns = us * NSEC_PER_USEC;
+ } else {
+ if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0)
+ return -ENODATA;
+
+ r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(v, &ns);
+ if (r < 0)
+ return r;
+ }
*ret = ns;
return 0;
@@ -1911,16 +2071,33 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
nsec_t ns;
int r;
+ assert(u);
+
+ /* Retrieve the current CPU usage counter. This will subtract the CPU counter taken when the unit was
+ * started. If the cgroup has been removed already, returns the last cached value. To cache the value, simply
+ * call this function with a NULL return value. */
+
r = unit_get_cpu_usage_raw(u, &ns);
+ if (r == -ENODATA && u->cpu_usage_last != NSEC_INFINITY) {
+ /* If we can't get the CPU usage anymore (because the cgroup was already removed, for example), use our
+ * cached value. */
+
+ if (ret)
+ *ret = u->cpu_usage_last;
+ return 0;
+ }
if (r < 0)
return r;
- if (ns > u->cpuacct_usage_base)
- ns -= u->cpuacct_usage_base;
+ if (ns > u->cpu_usage_base)
+ ns -= u->cpu_usage_base;
else
ns = 0;
- *ret = ns;
+ u->cpu_usage_last = ns;
+ if (ret)
+ *ret = ns;
+
return 0;
}
@@ -1930,13 +2107,15 @@ int unit_reset_cpu_usage(Unit *u) {
assert(u);
+ u->cpu_usage_last = NSEC_INFINITY;
+
r = unit_get_cpu_usage_raw(u, &ns);
if (r < 0) {
- u->cpuacct_usage_base = 0;
+ u->cpu_usage_base = 0;
return r;
}
- u->cpuacct_usage_base = ns;
+ u->cpu_usage_base = ns;
return 0;
}
diff --git a/src/grp-system/libcore/src/dbus-automount.c b/src/grp-system/libcore/src/dbus-automount.c
index 500dcee502..05e248758f 100644
--- a/src/grp-system/libcore/src/dbus-automount.c
+++ b/src/grp-system/libcore/src/dbus-automount.c
@@ -33,3 +33,57 @@ const sd_bus_vtable bus_automount_vtable[] = {
SD_BUS_PROPERTY("TimeoutIdleUSec", "t", bus_property_get_usec, offsetof(Automount, timeout_idle_usec), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_VTABLE_END
};
+
+static int bus_automount_set_transient_property(
+ Automount *a,
+ const char *name,
+ sd_bus_message *message,
+ UnitSetPropertiesMode mode,
+ sd_bus_error *error) {
+
+ int r;
+
+ assert(a);
+ assert(name);
+ assert(message);
+
+ if (streq(name, "TimeoutIdleUSec")) {
+ usec_t timeout_idle_usec;
+ r = sd_bus_message_read(message, "t", &timeout_idle_usec);
+ if (r < 0)
+ return r;
+
+ if (mode != UNIT_CHECK) {
+ char time[FORMAT_TIMESPAN_MAX];
+
+ a->timeout_idle_usec = timeout_idle_usec;
+ unit_write_drop_in_format(UNIT(a), mode, name, "[Automount]\nTimeoutIdleSec=%s\n",
+ format_timespan(time, sizeof(time), timeout_idle_usec, USEC_PER_MSEC));
+ }
+ } else
+ return 0;
+
+ return 1;
+}
+
+int bus_automount_set_property(
+ Unit *u,
+ const char *name,
+ sd_bus_message *message,
+ UnitSetPropertiesMode mode,
+ sd_bus_error *error) {
+
+ Automount *a = AUTOMOUNT(u);
+ int r = 0;
+
+ assert(a);
+ assert(name);
+ assert(message);
+
+ if (u->transient && u->load_state == UNIT_STUB)
+ /* This is a transient unit, let's load a little more */
+
+ r = bus_automount_set_transient_property(a, name, message, mode, error);
+
+ return r;
+}
diff --git a/src/grp-system/libcore/src/dbus-automount.h b/src/grp-system/libcore/src/dbus-automount.h
index d1168c8188..34caf6fc4c 100644
--- a/src/grp-system/libcore/src/dbus-automount.h
+++ b/src/grp-system/libcore/src/dbus-automount.h
@@ -22,3 +22,5 @@
#include <systemd/sd-bus.h>
extern const sd_bus_vtable bus_automount_vtable[];
+
+int bus_automount_set_property(Unit *u, const char *name, sd_bus_message *message, UnitSetPropertiesMode mode, sd_bus_error *error);
diff --git a/src/grp-system/libcore/src/dbus-cgroup.c b/src/grp-system/libcore/src/dbus-cgroup.c
index e531f780fd..875dc35e87 100644
--- a/src/grp-system/libcore/src/dbus-cgroup.c
+++ b/src/grp-system/libcore/src/dbus-cgroup.c
@@ -211,6 +211,8 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_VTABLE_START(0),
SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0),
+ SD_BUS_PROPERTY("CPUWeight", "t", NULL, offsetof(CGroupContext, cpu_weight), 0),
+ SD_BUS_PROPERTY("StartupCPUWeight", "t", NULL, offsetof(CGroupContext, startup_cpu_weight), 0),
SD_BUS_PROPERTY("CPUShares", "t", NULL, offsetof(CGroupContext, cpu_shares), 0),
SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0),
SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0),
@@ -232,6 +234,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("MemoryLow", "t", NULL, offsetof(CGroupContext, memory_low), 0),
SD_BUS_PROPERTY("MemoryHigh", "t", NULL, offsetof(CGroupContext, memory_high), 0),
SD_BUS_PROPERTY("MemoryMax", "t", NULL, offsetof(CGroupContext, memory_max), 0),
+ SD_BUS_PROPERTY("MemorySwapMax", "t", NULL, offsetof(CGroupContext, memory_swap_max), 0),
SD_BUS_PROPERTY("MemoryLimit", "t", NULL, offsetof(CGroupContext, memory_limit), 0),
SD_BUS_PROPERTY("DevicePolicy", "s", property_get_cgroup_device_policy, offsetof(CGroupContext, device_policy), 0),
SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0),
@@ -304,6 +307,50 @@ int bus_cgroup_set_property(
return 1;
+ } else if (streq(name, "CPUWeight")) {
+ uint64_t weight;
+
+ r = sd_bus_message_read(message, "t", &weight);
+ if (r < 0)
+ return r;
+
+ if (!CGROUP_WEIGHT_IS_OK(weight))
+ return sd_bus_error_set_errnof(error, EINVAL, "CPUWeight value out of range");
+
+ if (mode != UNIT_CHECK) {
+ c->cpu_weight = weight;
+ unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
+
+ if (weight == CGROUP_WEIGHT_INVALID)
+ unit_write_drop_in_private(u, mode, name, "CPUWeight=");
+ else
+ unit_write_drop_in_private_format(u, mode, name, "CPUWeight=%" PRIu64, weight);
+ }
+
+ return 1;
+
+ } else if (streq(name, "StartupCPUWeight")) {
+ uint64_t weight;
+
+ r = sd_bus_message_read(message, "t", &weight);
+ if (r < 0)
+ return r;
+
+ if (!CGROUP_WEIGHT_IS_OK(weight))
+ return sd_bus_error_set_errnof(error, EINVAL, "StartupCPUWeight value out of range");
+
+ if (mode != UNIT_CHECK) {
+ c->startup_cpu_weight = weight;
+ unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
+
+ if (weight == CGROUP_CPU_SHARES_INVALID)
+ unit_write_drop_in_private(u, mode, name, "StartupCPUWeight=");
+ else
+ unit_write_drop_in_private_format(u, mode, name, "StartupCPUWeight=%" PRIu64, weight);
+ }
+
+ return 1;
+
} else if (streq(name, "CPUShares")) {
uint64_t shares;
@@ -830,7 +877,7 @@ int bus_cgroup_set_property(
return 1;
- } else if (STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax")) {
+ } else if (STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax", "MemorySwapMax")) {
uint64_t v;
r = sd_bus_message_read(message, "t", &v);
@@ -844,6 +891,8 @@ int bus_cgroup_set_property(
c->memory_low = v;
else if (streq(name, "MemoryHigh"))
c->memory_high = v;
+ else if (streq(name, "MemorySwapMax"))
+ c->memory_swap_max = v;
else
c->memory_max = v;
diff --git a/src/grp-system/libcore/src/dbus-execute.c b/src/grp-system/libcore/src/dbus-execute.c
index 8029497c3c..2e447e8043 100644
--- a/src/grp-system/libcore/src/dbus-execute.c
+++ b/src/grp-system/libcore/src/dbus-execute.c
@@ -45,6 +45,7 @@
#endif
#include "systemd-basic/strv.h"
#include "systemd-basic/syslog-util.h"
+#include "systemd-basic/user-util.h"
#include "systemd-basic/utf8.h"
BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_exec_output, exec_output, ExecOutput);
@@ -627,6 +628,53 @@ static int property_get_syslog_facility(
return sd_bus_message_append(reply, "i", LOG_FAC(c->syslog_priority));
}
+static int property_get_input_fdname(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ const char *name;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ name = exec_context_fdname(c, STDIN_FILENO);
+
+ return sd_bus_message_append(reply, "s", name);
+}
+
+static int property_get_output_fdname(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ const char *name = NULL;
+
+ assert(bus);
+ assert(c);
+ assert(property);
+ assert(reply);
+
+ if (c->std_output == EXEC_OUTPUT_NAMED_FD && streq(property, "StandardOutputFileDescriptorName"))
+ name = exec_context_fdname(c, STDOUT_FILENO);
+ else if (c->std_error == EXEC_OUTPUT_NAMED_FD && streq(property, "StandardErrorFileDescriptorName"))
+ name = exec_context_fdname(c, STDERR_FILENO);
+
+ return sd_bus_message_append(reply, "s", name);
+}
+
const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_VTABLE_START(0),
SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(ExecContext, environment), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -677,8 +725,11 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("CPUSchedulingResetOnFork", "b", bus_property_get_bool, offsetof(ExecContext, cpu_sched_reset_on_fork), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("NonBlocking", "b", bus_property_get_bool, offsetof(ExecContext, non_blocking), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("StandardInput", "s", property_get_exec_input, offsetof(ExecContext, std_input), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StandardInputFileDescriptorName", "s", property_get_input_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("StandardOutput", "s", bus_property_get_exec_output, offsetof(ExecContext, std_output), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StandardOutputFileDescriptorName", "s", property_get_output_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("StandardError", "s", bus_property_get_exec_output, offsetof(ExecContext, std_error), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StandardErrorFileDescriptorName", "s", property_get_output_fdname, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("TTYPath", "s", NULL, offsetof(ExecContext, tty_path), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("TTYReset", "b", bus_property_get_bool, offsetof(ExecContext, tty_reset), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("TTYVHangup", "b", bus_property_get_bool, offsetof(ExecContext, tty_vhangup), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -694,6 +745,8 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("AmbientCapabilities", "t", property_get_ambient_capabilities, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("User", "s", NULL, offsetof(ExecContext, user), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Group", "s", NULL, offsetof(ExecContext, group), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("DynamicUser", "b", bus_property_get_bool, offsetof(ExecContext, dynamic_user), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RemoveIPC", "b", bus_property_get_bool, offsetof(ExecContext, remove_ipc), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SupplementaryGroups", "as", NULL, offsetof(ExecContext, supplementary_groups), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PAMName", "s", NULL, offsetof(ExecContext, pam_name), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ReadWriteDirectories", "as", NULL, offsetof(ExecContext, read_write_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
@@ -704,8 +757,12 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("InaccessiblePaths", "as", NULL, offsetof(ExecContext, inaccessible_paths), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_flags), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
- SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectKernelTunables", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_tunables), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectKernelModules", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_modules), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectHome", "s", bus_property_get_protect_home, offsetof(ExecContext, protect_home), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectSystem", "s", bus_property_get_protect_system, offsetof(ExecContext, protect_system), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SameProcessGroup", "b", bus_property_get_bool, offsetof(ExecContext, same_pgrp), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -841,6 +898,9 @@ int bus_exec_context_set_transient_property(
if (r < 0)
return r;
+ if (!isempty(uu) && !valid_user_group_name_or_id(uu))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid user name: %s", uu);
+
if (mode != UNIT_CHECK) {
if (isempty(uu))
@@ -860,6 +920,9 @@ int bus_exec_context_set_transient_property(
if (r < 0)
return r;
+ if (!isempty(gg) && !valid_user_group_name_or_id(gg))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid group name: %s", gg);
+
if (mode != UNIT_CHECK) {
if (isempty(gg))
@@ -928,7 +991,7 @@ int bus_exec_context_set_transient_property(
if (r < 0)
return r;
- if (n < PRIO_MIN || n >= PRIO_MAX)
+ if (!nice_is_valid(n))
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Nice value out of range");
if (mode != UNIT_CHECK) {
@@ -1018,7 +1081,6 @@ int bus_exec_context_set_transient_property(
return 1;
-
} else if (streq(name, "StandardOutput")) {
const char *s;
ExecOutput p;
@@ -1060,9 +1122,46 @@ int bus_exec_context_set_transient_property(
return 1;
} else if (STR_IN_SET(name,
+ "StandardInputFileDescriptorName", "StandardOutputFileDescriptorName", "StandardErrorFileDescriptorName")) {
+ const char *s;
+
+ r = sd_bus_message_read(message, "s", &s);
+ if (r < 0)
+ return r;
+
+ if (!fdname_is_valid(s))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid file descriptor name");
+
+ if (mode != UNIT_CHECK) {
+ if (streq(name, "StandardInputFileDescriptorName")) {
+ c->std_input = EXEC_INPUT_NAMED_FD;
+ r = free_and_strdup(&c->stdio_fdname[STDIN_FILENO], s);
+ if (r < 0)
+ return r;
+ unit_write_drop_in_private_format(u, mode, name, "StandardInput=fd:%s", s);
+ } else if (streq(name, "StandardOutputFileDescriptorName")) {
+ c->std_output = EXEC_OUTPUT_NAMED_FD;
+ r = free_and_strdup(&c->stdio_fdname[STDOUT_FILENO], s);
+ if (r < 0)
+ return r;
+ unit_write_drop_in_private_format(u, mode, name, "StandardOutput=fd:%s", s);
+ } else if (streq(name, "StandardErrorFileDescriptorName")) {
+ c->std_error = EXEC_OUTPUT_NAMED_FD;
+ r = free_and_strdup(&c->stdio_fdname[STDERR_FILENO], s);
+ if (r < 0)
+ return r;
+ unit_write_drop_in_private_format(u, mode, name, "StandardError=fd:%s", s);
+ }
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name,
"IgnoreSIGPIPE", "TTYVHangup", "TTYReset",
- "PrivateTmp", "PrivateDevices", "PrivateNetwork",
- "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute", "RestrictRealtime")) {
+ "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
+ "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute",
+ "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
+ "ProtectKernelModules", "ProtectControlGroups")) {
int b;
r = sd_bus_message_read(message, "b", &b);
@@ -1082,6 +1181,8 @@ int bus_exec_context_set_transient_property(
c->private_devices = b;
else if (streq(name, "PrivateNetwork"))
c->private_network = b;
+ else if (streq(name, "PrivateUsers"))
+ c->private_users = b;
else if (streq(name, "NoNewPrivileges"))
c->no_new_privileges = b;
else if (streq(name, "SyslogLevelPrefix"))
@@ -1090,6 +1191,16 @@ int bus_exec_context_set_transient_property(
c->memory_deny_write_execute = b;
else if (streq(name, "RestrictRealtime"))
c->restrict_realtime = b;
+ else if (streq(name, "DynamicUser"))
+ c->dynamic_user = b;
+ else if (streq(name, "RemoveIPC"))
+ c->remove_ipc = b;
+ else if (streq(name, "ProtectKernelTunables"))
+ c->protect_kernel_tunables = b;
+ else if (streq(name, "ProtectKernelModules"))
+ c->protect_kernel_modules = b;
+ else if (streq(name, "ProtectControlGroups"))
+ c->protect_control_groups = b;
unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, yes_no(b));
}
diff --git a/src/grp-system/libcore/src/dbus-manager.c b/src/grp-system/libcore/src/dbus-manager.c
index 8ae359f2d4..b704dd02cb 100644
--- a/src/grp-system/libcore/src/dbus-manager.c
+++ b/src/grp-system/libcore/src/dbus-manager.c
@@ -37,6 +37,7 @@
#include "systemd-basic/string-util.h"
#include "systemd-basic/strv.h"
#include "systemd-basic/syslog-util.h"
+#include "systemd-basic/user-util.h"
#include "systemd-basic/virt.h"
#include "systemd-shared/install.h"
#include "systemd-shared/watchdog.h"
@@ -47,6 +48,11 @@
#include "dbus.h"
#include "selinux-access.h"
+static UnitFileFlags unit_file_bools_to_flags(bool runtime, bool force) {
+ return (runtime ? UNIT_FILE_RUNTIME : 0) |
+ (force ? UNIT_FILE_FORCE : 0);
+}
+
static int property_get_version(
sd_bus *bus,
const char *path,
@@ -464,6 +470,64 @@ static int method_get_unit_by_pid(sd_bus_message *message, void *userdata, sd_bu
return sd_bus_reply_method_return(message, "o", path);
}
+static int method_get_unit_by_invocation_id(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *path = NULL;
+ Manager *m = userdata;
+ sd_id128_t id;
+ const void *a;
+ Unit *u;
+ size_t sz;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ /* Anyone can call this method */
+
+ r = sd_bus_message_read_array(message, 'y', &a, &sz);
+ if (r < 0)
+ return r;
+ if (sz == 0)
+ id = SD_ID128_NULL;
+ else if (sz == 16)
+ memcpy(&id, a, sz);
+ else
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid invocation ID");
+
+ if (sd_id128_is_null(id)) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ pid_t pid;
+
+ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return r;
+
+ u = manager_get_unit_by_pid(m, pid);
+ if (!u)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT, "Client " PID_FMT " not member of any unit.", pid);
+ } else {
+ u = hashmap_get(m->units_by_invocation_id, &id);
+ if (!u)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID, "No unit with the specified invocation ID " SD_ID128_FORMAT_STR " known.", SD_ID128_FORMAT_VAL(id));
+ }
+
+ r = mac_selinux_unit_access_check(u, message, "status", error);
+ if (r < 0)
+ return r;
+
+ /* So here's a special trick: the bus path we return actually references the unit by its invocation ID instead
+ * of the unit name. This means it stays valid only as long as the invocation ID stays the same. */
+ path = unit_dbus_path_invocation_id(u);
+ if (!path)
+ return -ENOMEM;
+
+ return sd_bus_reply_method_return(message, "o", path);
+}
+
static int method_load_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
_cleanup_free_ char *path = NULL;
Manager *m = userdata;
@@ -643,6 +707,54 @@ static int method_set_unit_properties(sd_bus_message *message, void *userdata, s
return bus_unit_method_set_properties(message, u, error);
}
+static int method_ref_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_load_unit(m, name, NULL, error, &u);
+ if (r < 0)
+ return r;
+
+ r = bus_unit_check_load_state(u, error);
+ if (r < 0)
+ return r;
+
+ return bus_unit_method_ref(message, u, error);
+}
+
+static int method_unref_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ Unit *u;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read(message, "s", &name);
+ if (r < 0)
+ return r;
+
+ r = manager_load_unit(m, name, NULL, error, &u);
+ if (r < 0)
+ return r;
+
+ r = bus_unit_check_load_state(u, error);
+ if (r < 0)
+ return r;
+
+ return bus_unit_method_unref(message, u, error);
+}
+
static int reply_unit_info(sd_bus_message *reply, Unit *u) {
_cleanup_free_ char *unit_path = NULL, *job_path = NULL;
Unit *following;
@@ -781,6 +893,13 @@ static int transient_unit_from_message(
if (r < 0)
return r;
+ /* If the client asked for it, automatically add a reference to this unit. */
+ if (u->bus_track_add) {
+ r = bus_unit_track_add_sender(u, message);
+ if (r < 0)
+ return log_error_errno(r, "Failed to watch sender: %m");
+ }
+
/* Now load the missing bits of the unit we just created */
unit_add_to_load_queue(u);
manager_dispatch_load_queue(m);
@@ -1512,8 +1631,8 @@ static int method_unset_and_set_environment(sd_bus_message *message, void *userd
}
static int method_set_exit_code(sd_bus_message *message, void *userdata, sd_bus_error *error) {
- uint8_t code;
Manager *m = userdata;
+ uint8_t code;
int r;
assert(message);
@@ -1535,6 +1654,61 @@ static int method_set_exit_code(sd_bus_message *message, void *userdata, sd_bus_
return sd_bus_reply_method_return(message, NULL);
}
+static int method_lookup_dynamic_user_by_name(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Manager *m = userdata;
+ const char *name;
+ uid_t uid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ r = sd_bus_message_read_basic(message, 's', &name);
+ if (r < 0)
+ return r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Dynamic users are only supported in the system instance.");
+ if (!valid_user_group_name(name))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "User name invalid: %s", name);
+
+ r = dynamic_user_lookup_name(m, name, &uid);
+ if (r == -ESRCH)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_DYNAMIC_USER, "Dynamic user %s does not exist.", name);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "u", (uint32_t) uid);
+}
+
+static int method_lookup_dynamic_user_by_uid(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *name = NULL;
+ Manager *m = userdata;
+ uid_t uid;
+ int r;
+
+ assert(message);
+ assert(m);
+
+ assert_cc(sizeof(uid) == sizeof(uint32_t));
+ r = sd_bus_message_read_basic(message, 'u', &uid);
+ if (r < 0)
+ return r;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Dynamic users are only supported in the system instance.");
+ if (!uid_is_valid(uid))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "User ID invalid: " UID_FMT, uid);
+
+ r = dynamic_user_lookup_uid(m, uid, &name);
+ if (r == -ESRCH)
+ return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_DYNAMIC_USER, "Dynamic user ID " UID_FMT " does not exist.", uid);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, "s", name);
+}
+
static int list_unit_files_by_patterns(sd_bus_message *message, void *userdata, sd_bus_error *error, char **states, char **patterns) {
_cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
Manager *m = userdata;
@@ -1780,13 +1954,14 @@ static int install_error(
static int method_enable_unit_files_generic(
sd_bus_message *message,
Manager *m,
- int (*call)(UnitFileScope scope, bool runtime, const char *root_dir, char *files[], bool force, UnitFileChange **changes, unsigned *n_changes),
+ int (*call)(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char *files[], UnitFileChange **changes, unsigned *n_changes),
bool carries_install_info,
sd_bus_error *error) {
_cleanup_strv_free_ char **l = NULL;
UnitFileChange *changes = NULL;
unsigned n_changes = 0;
+ UnitFileFlags flags;
int runtime, force, r;
assert(message);
@@ -1800,13 +1975,15 @@ static int method_enable_unit_files_generic(
if (r < 0)
return r;
+ flags = unit_file_bools_to_flags(runtime, force);
+
r = bus_verify_manage_unit_files_async(m, message, error);
if (r < 0)
return r;
if (r == 0)
return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
- r = call(m->unit_file_scope, runtime, NULL, l, force, &changes, &n_changes);
+ r = call(m->unit_file_scope, flags, NULL, l, &changes, &n_changes);
if (r < 0)
return install_error(error, r, changes, n_changes);
@@ -1825,8 +2002,8 @@ static int method_link_unit_files(sd_bus_message *message, void *userdata, sd_bu
return method_enable_unit_files_generic(message, userdata, unit_file_link, false, error);
}
-static int unit_file_preset_without_mode(UnitFileScope scope, bool runtime, const char *root_dir, char **files, bool force, UnitFileChange **changes, unsigned *n_changes) {
- return unit_file_preset(scope, runtime, root_dir, files, UNIT_FILE_PRESET_FULL, force, changes, n_changes);
+static int unit_file_preset_without_mode(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char **files, UnitFileChange **changes, unsigned *n_changes) {
+ return unit_file_preset(scope, flags, root_dir, files, UNIT_FILE_PRESET_FULL, changes, n_changes);
}
static int method_preset_unit_files(sd_bus_message *message, void *userdata, sd_bus_error *error) {
@@ -1845,6 +2022,7 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use
Manager *m = userdata;
UnitFilePresetMode mm;
int runtime, force, r;
+ UnitFileFlags flags;
const char *mode;
assert(message);
@@ -1858,6 +2036,8 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use
if (r < 0)
return r;
+ flags = unit_file_bools_to_flags(runtime, force);
+
if (isempty(mode))
mm = UNIT_FILE_PRESET_FULL;
else {
@@ -1872,7 +2052,7 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use
if (r == 0)
return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
- r = unit_file_preset(m->unit_file_scope, runtime, NULL, l, mm, force, &changes, &n_changes);
+ r = unit_file_preset(m->unit_file_scope, flags, NULL, l, mm, &changes, &n_changes);
if (r < 0)
return install_error(error, r, changes, n_changes);
@@ -1882,7 +2062,7 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use
static int method_disable_unit_files_generic(
sd_bus_message *message,
Manager *m,
- int (*call)(UnitFileScope scope, bool runtime, const char *root_dir, char *files[], UnitFileChange **changes, unsigned *n_changes),
+ int (*call)(UnitFileScope scope, UnitFileFlags flags, const char *root_dir, char *files[], UnitFileChange **changes, unsigned *n_changes),
sd_bus_error *error) {
_cleanup_strv_free_ char **l = NULL;
@@ -1907,7 +2087,7 @@ static int method_disable_unit_files_generic(
if (r == 0)
return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
- r = call(m->unit_file_scope, runtime, NULL, l, &changes, &n_changes);
+ r = call(m->unit_file_scope, runtime ? UNIT_FILE_RUNTIME : 0, NULL, l, &changes, &n_changes);
if (r < 0)
return install_error(error, r, changes, n_changes);
@@ -1973,7 +2153,7 @@ static int method_set_default_target(sd_bus_message *message, void *userdata, sd
if (r == 0)
return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
- r = unit_file_set_default(m->unit_file_scope, NULL, name, force, &changes, &n_changes);
+ r = unit_file_set_default(m->unit_file_scope, force ? UNIT_FILE_FORCE : 0, NULL, name, &changes, &n_changes);
if (r < 0)
return install_error(error, r, changes, n_changes);
@@ -1986,6 +2166,7 @@ static int method_preset_all_unit_files(sd_bus_message *message, void *userdata,
Manager *m = userdata;
UnitFilePresetMode mm;
const char *mode;
+ UnitFileFlags flags;
int force, runtime, r;
assert(message);
@@ -1999,6 +2180,8 @@ static int method_preset_all_unit_files(sd_bus_message *message, void *userdata,
if (r < 0)
return r;
+ flags = unit_file_bools_to_flags(runtime, force);
+
if (isempty(mode))
mm = UNIT_FILE_PRESET_FULL;
else {
@@ -2013,7 +2196,7 @@ static int method_preset_all_unit_files(sd_bus_message *message, void *userdata,
if (r == 0)
return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
- r = unit_file_preset_all(m->unit_file_scope, runtime, NULL, mm, force, &changes, &n_changes);
+ r = unit_file_preset_all(m->unit_file_scope, flags, NULL, mm, &changes, &n_changes);
if (r < 0)
return install_error(error, r, changes, n_changes);
@@ -2028,6 +2211,7 @@ static int method_add_dependency_unit_files(sd_bus_message *message, void *userd
int runtime, force, r;
char *target, *type;
UnitDependency dep;
+ UnitFileFlags flags;
assert(message);
assert(m);
@@ -2046,17 +2230,62 @@ static int method_add_dependency_unit_files(sd_bus_message *message, void *userd
if (r < 0)
return r;
+ flags = unit_file_bools_to_flags(runtime, force);
+
dep = unit_dependency_from_string(type);
if (dep < 0)
return -EINVAL;
- r = unit_file_add_dependency(m->unit_file_scope, runtime, NULL, l, target, dep, force, &changes, &n_changes);
+ r = unit_file_add_dependency(m->unit_file_scope, flags, NULL, l, target, dep, &changes, &n_changes);
if (r < 0)
return install_error(error, r, changes, n_changes);
return reply_unit_file_changes_and_free(m, message, -1, changes, n_changes);
}
+static int method_get_unit_file_links(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ UnitFileChange *changes = NULL;
+ unsigned n_changes = 0, i;
+ UnitFileFlags flags;
+ const char *name;
+ char **p;
+ int runtime, r;
+
+ r = sd_bus_message_read(message, "sb", &name, &runtime);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(message, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, SD_BUS_TYPE_ARRAY, "s");
+ if (r < 0)
+ return r;
+
+ p = STRV_MAKE(name);
+ flags = UNIT_FILE_DRY_RUN |
+ (runtime ? UNIT_FILE_RUNTIME : 0);
+
+ r = unit_file_disable(UNIT_FILE_SYSTEM, flags, NULL, p, &changes, &n_changes);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get file links for %s: %m", name);
+
+ for (i = 0; i < n_changes; i++)
+ if (changes[i].type == UNIT_FILE_UNLINK) {
+ r = sd_bus_message_append(reply, "s", changes[i].path);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
const sd_bus_vtable bus_manager_vtable[] = {
SD_BUS_VTABLE_START(0),
@@ -2144,6 +2373,7 @@ const sd_bus_vtable bus_manager_vtable[] = {
SD_BUS_METHOD("GetUnit", "s", "o", method_get_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("GetUnitByPID", "u", "o", method_get_unit_by_pid, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetUnitByInvocationID", "ay", "o", method_get_unit_by_invocation_id, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("LoadUnit", "s", "o", method_load_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("StartUnit", "ss", "o", method_start_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("StartUnitReplace", "sss", "o", method_start_unit_replace, SD_BUS_VTABLE_UNPRIVILEGED),
@@ -2156,6 +2386,8 @@ const sd_bus_vtable bus_manager_vtable[] = {
SD_BUS_METHOD("KillUnit", "ssi", NULL, method_kill_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("ResetFailedUnit", "s", NULL, method_reset_failed_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("SetUnitProperties", "sba(sv)", NULL, method_set_unit_properties, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("RefUnit", "s", NULL, method_ref_unit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("UnrefUnit", "s", NULL, method_unref_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("StartTransientUnit", "ssa(sv)a(sa(sv))", "o", method_start_transient_unit, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("GetUnitProcesses", "s", "a(sus)", method_get_unit_processes, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("GetJob", "u", "o", method_get_job, SD_BUS_VTABLE_UNPRIVILEGED),
@@ -2199,7 +2431,10 @@ const sd_bus_vtable bus_manager_vtable[] = {
SD_BUS_METHOD("GetDefaultTarget", NULL, "s", method_get_default_target, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("PresetAllUnitFiles", "sbb", "a(sss)", method_preset_all_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("AddDependencyUnitFiles", "asssbb", "a(sss)", method_add_dependency_unit_files, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("GetUnitFileLinks", "sb", "as", method_get_unit_file_links, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("SetExitCode", "y", NULL, method_set_exit_code, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("LookupDynamicUserByName", "s", "u", method_lookup_dynamic_user_by_name, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("LookupDynamicUserByUID", "u", "s", method_lookup_dynamic_user_by_uid, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_SIGNAL("UnitNew", "so", 0),
SD_BUS_SIGNAL("UnitRemoved", "so", 0),
diff --git a/src/grp-system/libcore/src/dbus-mount.c b/src/grp-system/libcore/src/dbus-mount.c
index ea2abb0e4e..f9f396ec70 100644
--- a/src/grp-system/libcore/src/dbus-mount.c
+++ b/src/grp-system/libcore/src/dbus-mount.c
@@ -117,7 +117,11 @@ const sd_bus_vtable bus_mount_vtable[] = {
SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Mount, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("DirectoryMode", "u", bus_property_get_mode, offsetof(Mount, directory_mode), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SloppyOptions", "b", bus_property_get_bool, offsetof(Mount, sloppy_options), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("LazyUnmount", "b", bus_property_get_bool, offsetof(Mount, lazy_unmount), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ForceUnmount", "b", bus_property_get_bool, offsetof(Mount, force_unmount), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Mount, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
BUS_EXEC_COMMAND_VTABLE("ExecMount", offsetof(Mount, exec_command[MOUNT_EXEC_MOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
BUS_EXEC_COMMAND_VTABLE("ExecUnmount", offsetof(Mount, exec_command[MOUNT_EXEC_UNMOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
BUS_EXEC_COMMAND_VTABLE("ExecRemount", offsetof(Mount, exec_command[MOUNT_EXEC_REMOUNT]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
@@ -158,6 +162,9 @@ static int bus_mount_set_transient_property(
if (!p)
return -ENOMEM;
+ unit_write_drop_in_format(UNIT(m), mode, name, "[Mount]\n%s=%s\n",
+ name, new_property);
+
free(*property);
*property = p;
}
diff --git a/src/grp-system/libcore/src/dbus-service.c b/src/grp-system/libcore/src/dbus-service.c
index 42cd1c52bd..da8a2298a2 100644
--- a/src/grp-system/libcore/src/dbus-service.c
+++ b/src/grp-system/libcore/src/dbus-service.c
@@ -37,7 +37,7 @@ static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, service_type, ServiceType
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, service_result, ServiceResult);
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_restart, service_restart, ServiceRestart);
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_notify_access, notify_access, NotifyAccess);
-static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_failure_action, failure_action, FailureAction);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_emergency_action, emergency_action, EmergencyAction);
const sd_bus_vtable bus_service_vtable[] = {
SD_BUS_VTABLE_START(0),
@@ -51,12 +51,7 @@ const sd_bus_vtable bus_service_vtable[] = {
SD_BUS_PROPERTY("RuntimeMaxUSec", "t", bus_property_get_usec, offsetof(Service, runtime_max_usec), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("WatchdogUSec", "t", bus_property_get_usec, offsetof(Service, watchdog_usec), SD_BUS_VTABLE_PROPERTY_CONST),
BUS_PROPERTY_DUAL_TIMESTAMP("WatchdogTimestamp", offsetof(Service, watchdog_timestamp), 0),
- /* The following four are obsolete, and thus marked hidden here. They moved into the Unit interface */
- SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
- SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
- SD_BUS_PROPERTY("StartLimitAction", "s", property_get_failure_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
- SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
- SD_BUS_PROPERTY("FailureAction", "s", property_get_failure_action, offsetof(Service, failure_action), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("FailureAction", "s", property_get_emergency_action, offsetof(Service, emergency_action), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PermissionsStartOnly", "b", bus_property_get_bool, offsetof(Service, permissions_start_only), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RootDirectoryStartOnly", "b", bus_property_get_bool, offsetof(Service, root_directory_start_only), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RemainAfterExit", "b", bus_property_get_bool, offsetof(Service, remain_after_exit), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -71,6 +66,9 @@ const sd_bus_vtable bus_service_vtable[] = {
SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Service, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("USBFunctionDescriptors", "s", NULL, offsetof(Service, usb_function_descriptors), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("USBFunctionStrings", "s", NULL, offsetof(Service, usb_function_strings), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+
BUS_EXEC_STATUS_VTABLE("ExecMain", offsetof(Service, main_exec_status), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
BUS_EXEC_COMMAND_LIST_VTABLE("ExecStart", offsetof(Service, exec_command[SERVICE_EXEC_START]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
@@ -78,6 +76,12 @@ const sd_bus_vtable bus_service_vtable[] = {
BUS_EXEC_COMMAND_LIST_VTABLE("ExecReload", offsetof(Service, exec_command[SERVICE_EXEC_RELOAD]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
BUS_EXEC_COMMAND_LIST_VTABLE("ExecStop", offsetof(Service, exec_command[SERVICE_EXEC_STOP]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
BUS_EXEC_COMMAND_LIST_VTABLE("ExecStopPost", offsetof(Service, exec_command[SERVICE_EXEC_STOP_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+
+ /* The following four are obsolete, and thus marked hidden here. They moved into the Unit interface */
+ SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("StartLimitAction", "s", property_get_emergency_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
SD_BUS_VTABLE_END
};
diff --git a/src/grp-system/libcore/src/dbus-socket.c b/src/grp-system/libcore/src/dbus-socket.c
index 2bdfd0a47f..4bd973a882 100644
--- a/src/grp-system/libcore/src/dbus-socket.c
+++ b/src/grp-system/libcore/src/dbus-socket.c
@@ -138,6 +138,7 @@ const sd_bus_vtable bus_socket_vtable[] = {
SD_BUS_PROPERTY("Symlinks", "as", NULL, offsetof(Socket, symlinks), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Mark", "i", bus_property_get_int, offsetof(Socket, mark), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MaxConnections", "u", bus_property_get_unsigned, offsetof(Socket, max_connections), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MaxConnectionsPerSource", "u", bus_property_get_unsigned, offsetof(Socket, max_connections_per_source), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MessageQueueMaxMessages", "x", bus_property_get_long, offsetof(Socket, mq_maxmsg), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MessageQueueMessageSize", "x", bus_property_get_long, offsetof(Socket, mq_msgsize), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ReusePort", "b", bus_property_get_bool, offsetof(Socket, reuse_port), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -152,6 +153,8 @@ const sd_bus_vtable bus_socket_vtable[] = {
SD_BUS_PROPERTY("SocketProtocol", "i", bus_property_get_int, offsetof(Socket, socket_protocol), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("TriggerLimitIntervalUSec", "t", bus_property_get_usec, offsetof(Socket, trigger_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("TriggerLimitBurst", "u", bus_property_get_unsigned, offsetof(Socket, trigger_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Socket, exec_command[SOCKET_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPost", offsetof(Socket, exec_command[SOCKET_EXEC_START_POST]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
BUS_EXEC_COMMAND_LIST_VTABLE("ExecStopPre", offsetof(Socket, exec_command[SOCKET_EXEC_STOP_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
diff --git a/src/grp-system/libcore/src/dbus-swap.c b/src/grp-system/libcore/src/dbus-swap.c
index 3b0f274ba7..6e40d59808 100644
--- a/src/grp-system/libcore/src/dbus-swap.c
+++ b/src/grp-system/libcore/src/dbus-swap.c
@@ -85,6 +85,8 @@ const sd_bus_vtable bus_swap_vtable[] = {
SD_BUS_PROPERTY("TimeoutUSec", "t", bus_property_get_usec, offsetof(Swap, timeout_usec), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Swap, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Swap, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
BUS_EXEC_COMMAND_VTABLE("ExecActivate", offsetof(Swap, exec_command[SWAP_EXEC_ACTIVATE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
BUS_EXEC_COMMAND_VTABLE("ExecDeactivate", offsetof(Swap, exec_command[SWAP_EXEC_DEACTIVATE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
SD_BUS_VTABLE_END
diff --git a/src/grp-system/libcore/src/dbus-unit.c b/src/grp-system/libcore/src/dbus-unit.c
index ef12ad66a3..93391c2c6e 100644
--- a/src/grp-system/libcore/src/dbus-unit.c
+++ b/src/grp-system/libcore/src/dbus-unit.c
@@ -38,7 +38,7 @@
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_load_state, unit_load_state, UnitLoadState);
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_job_mode, job_mode, JobMode);
-static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_failure_action, failure_action, FailureAction);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_emergency_action, emergency_action, EmergencyAction);
static int property_get_names(
sd_bus *bus,
@@ -264,10 +264,7 @@ static int property_get_can_stop(
assert(reply);
assert(u);
- /* On the lower levels we assume that every unit we can start
- * we can also stop */
-
- return sd_bus_message_append(reply, "b", unit_can_start(u) && !u->refuse_manual_stop);
+ return sd_bus_message_append(reply, "b", unit_can_stop(u) && !u->refuse_manual_stop);
}
static int property_get_can_reload(
@@ -419,6 +416,7 @@ static int bus_verify_manage_units_async_full(
const char *verb,
int capability,
const char *polkit_message,
+ bool interactive,
sd_bus_message *call,
sd_bus_error *error) {
@@ -434,7 +432,15 @@ static int bus_verify_manage_units_async_full(
details[7] = GETTEXT_PACKAGE;
}
- return bus_verify_polkit_async(call, capability, "org.freedesktop.systemd1.manage-units", details, false, UID_INVALID, &u->manager->polkit_registry, error);
+ return bus_verify_polkit_async(
+ call,
+ capability,
+ "org.freedesktop.systemd1.manage-units",
+ details,
+ interactive,
+ UID_INVALID,
+ &u->manager->polkit_registry,
+ error);
}
int bus_unit_method_start_generic(
@@ -487,6 +493,7 @@ int bus_unit_method_start_generic(
verb,
CAP_SYS_ADMIN,
job_type < _JOB_TYPE_MAX ? polkit_message_for_job[job_type] : NULL,
+ true,
message,
error);
if (r < 0)
@@ -559,6 +566,7 @@ int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *
"kill",
CAP_KILL,
N_("Authentication is required to kill '$(unit)'."),
+ true,
message,
error);
if (r < 0)
@@ -589,6 +597,7 @@ int bus_unit_method_reset_failed(sd_bus_message *message, void *userdata, sd_bus
"reset-failed",
CAP_SYS_ADMIN,
N_("Authentication is required to reset the \"failed\" state of '$(unit)'."),
+ true,
message,
error);
if (r < 0)
@@ -621,6 +630,7 @@ int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_b
"set-property",
CAP_SYS_ADMIN,
N_("Authentication is required to set properties on '$(unit)'."),
+ true,
message,
error);
if (r < 0)
@@ -635,6 +645,53 @@ int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_b
return sd_bus_reply_method_return(message, NULL);
}
+int bus_unit_method_ref(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Unit *u = userdata;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ r = mac_selinux_unit_access_check(u, message, "start", error);
+ if (r < 0)
+ return r;
+
+ r = bus_verify_manage_units_async_full(
+ u,
+ "ref",
+ CAP_SYS_ADMIN,
+ NULL,
+ false,
+ message,
+ error);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+ r = bus_unit_track_add_sender(u, message);
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
+int bus_unit_method_unref(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ Unit *u = userdata;
+ int r;
+
+ assert(message);
+ assert(u);
+
+ r = bus_unit_track_remove_sender(u, message);
+ if (r == -EUNATCH)
+ return sd_bus_error_setf(error, BUS_ERROR_NOT_REFERENCED, "Unit has not been referenced yet.");
+ if (r < 0)
+ return r;
+
+ return sd_bus_reply_method_return(message, NULL);
+}
+
const sd_bus_vtable bus_unit_vtable[] = {
SD_BUS_VTABLE_START(0),
@@ -661,10 +718,6 @@ const sd_bus_vtable bus_unit_vtable[] = {
SD_BUS_PROPERTY("PropagatesReloadTo", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_PROPAGATES_RELOAD_TO]), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ReloadPropagatedFrom", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_RELOAD_PROPAGATED_FROM]), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("JoinsNamespaceOf", "as", property_get_dependencies, offsetof(Unit, dependencies[UNIT_JOINS_NAMESPACE_OF]), SD_BUS_VTABLE_PROPERTY_CONST),
- SD_BUS_PROPERTY("RequiresOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
- SD_BUS_PROPERTY("RequisiteOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
- SD_BUS_PROPERTY("RequiredByOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
- SD_BUS_PROPERTY("RequisiteOfOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
SD_BUS_PROPERTY("RequiresMountsFor", "as", NULL, offsetof(Unit, requires_mounts_for), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Documentation", "as", NULL, offsetof(Unit, documentation), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Description", "s", property_get_description, 0, SD_BUS_VTABLE_PROPERTY_CONST),
@@ -695,7 +748,7 @@ const sd_bus_vtable bus_unit_vtable[] = {
SD_BUS_PROPERTY("IgnoreOnIsolate", "b", bus_property_get_bool, offsetof(Unit, ignore_on_isolate), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("NeedDaemonReload", "b", property_get_need_daemon_reload, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("JobTimeoutUSec", "t", bus_property_get_usec, offsetof(Unit, job_timeout), SD_BUS_VTABLE_PROPERTY_CONST),
- SD_BUS_PROPERTY("JobTimeoutAction", "s", property_get_failure_action, offsetof(Unit, job_timeout_action), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("JobTimeoutAction", "s", property_get_emergency_action, offsetof(Unit, job_timeout_action), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("JobTimeoutRebootArgument", "s", NULL, offsetof(Unit, job_timeout_reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ConditionResult", "b", bus_property_get_bool, offsetof(Unit, condition_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("AssertResult", "b", bus_property_get_bool, offsetof(Unit, assert_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
@@ -705,11 +758,12 @@ const sd_bus_vtable bus_unit_vtable[] = {
SD_BUS_PROPERTY("Asserts", "a(sbbsi)", property_get_conditions, offsetof(Unit, asserts), 0),
SD_BUS_PROPERTY("LoadError", "(ss)", property_get_load_error, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Transient", "b", bus_property_get_bool, offsetof(Unit, transient), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Perpetual", "b", bus_property_get_bool, offsetof(Unit, perpetual), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("StartLimitIntervalSec", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST),
- SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), /* obsolete alias name */
SD_BUS_PROPERTY("StartLimitBurst", "u", bus_property_get_unsigned, offsetof(Unit, start_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST),
- SD_BUS_PROPERTY("StartLimitAction", "s", property_get_failure_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("StartLimitAction", "s", property_get_emergency_action, offsetof(Unit, start_limit_action), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RebootArgument", "s", NULL, offsetof(Unit, reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("InvocationID", "ay", bus_property_get_id128, offsetof(Unit, invocation_id), 0),
SD_BUS_METHOD("Start", "s", "o", method_start, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("Stop", "s", "o", method_stop, SD_BUS_VTABLE_UNPRIVILEGED),
@@ -721,7 +775,15 @@ const sd_bus_vtable bus_unit_vtable[] = {
SD_BUS_METHOD("Kill", "si", NULL, bus_unit_method_kill, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("ResetFailed", NULL, NULL, bus_unit_method_reset_failed, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_METHOD("SetProperties", "ba(sv)", NULL, bus_unit_method_set_properties, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Ref", NULL, NULL, bus_unit_method_ref, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Unref", NULL, NULL, bus_unit_method_unref, SD_BUS_VTABLE_UNPRIVILEGED),
+ /* Obsolete properties or obsolete alias names */
+ SD_BUS_PROPERTY("RequiresOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("RequisiteOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("RequiredByOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("RequisiteOfOverridable", "as", property_get_obsolete_dependencies, 0, SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("StartLimitInterval", "t", bus_property_get_usec, offsetof(Unit, start_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
SD_BUS_VTABLE_END
};
@@ -1105,7 +1167,7 @@ void bus_unit_send_removed_signal(Unit *u) {
int r;
assert(u);
- if (!u->sent_dbus_new_signal)
+ if (!u->sent_dbus_new_signal || u->in_dbus_queue)
bus_unit_send_change_signal(u);
if (!u->id)
@@ -1318,6 +1380,29 @@ static int bus_unit_set_transient_property(
return r;
return 1;
+
+ } else if (streq(name, "AddRef")) {
+
+ int b;
+
+ /* Why is this called "AddRef" rather than just "Ref", or "Reference"? There's already a "Ref()" method
+ * on the Unit interface, and it's probably not a good idea to expose a property and a method on the
+ * same interface (well, strictly speaking AddRef isn't exposed as full property, we just read it for
+ * transient units, but still). And "References" and "ReferencedBy" is already used as unit reference
+ * dependency type, hence let's not confuse things with that.
+ *
+ * Note that we don't acually add the reference to the bus track. We do that only after the setup of
+ * the transient unit is complete, so that setting this property multiple times in the same transient
+ * unit creation call doesn't count as individual references. */
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ if (mode != UNIT_CHECK)
+ u->bus_track_add = b;
+
+ return 1;
}
return 0;
@@ -1422,3 +1507,71 @@ int bus_unit_check_load_state(Unit *u, sd_bus_error *error) {
return sd_bus_error_set_errnof(error, u->load_error, "Unit %s is not loaded properly: %m.", u->id);
}
+
+static int bus_track_handler(sd_bus_track *t, void *userdata) {
+ Unit *u = userdata;
+
+ assert(t);
+ assert(u);
+
+ u->bus_track = sd_bus_track_unref(u->bus_track); /* make sure we aren't called again */
+
+ unit_add_to_gc_queue(u);
+ return 0;
+}
+
+static int allocate_bus_track(Unit *u) {
+ int r;
+
+ assert(u);
+
+ if (u->bus_track)
+ return 0;
+
+ r = sd_bus_track_new(u->manager->api_bus, &u->bus_track, bus_track_handler, u);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_track_set_recursive(u->bus_track, true);
+ if (r < 0) {
+ u->bus_track = sd_bus_track_unref(u->bus_track);
+ return r;
+ }
+
+ return 0;
+}
+
+int bus_unit_track_add_name(Unit *u, const char *name) {
+ int r;
+
+ assert(u);
+
+ r = allocate_bus_track(u);
+ if (r < 0)
+ return r;
+
+ return sd_bus_track_add_name(u->bus_track, name);
+}
+
+int bus_unit_track_add_sender(Unit *u, sd_bus_message *m) {
+ int r;
+
+ assert(u);
+
+ r = allocate_bus_track(u);
+ if (r < 0)
+ return r;
+
+ return sd_bus_track_add_sender(u->bus_track, m);
+}
+
+int bus_unit_track_remove_sender(Unit *u, sd_bus_message *m) {
+ assert(u);
+
+ /* If we haven't allocated the bus track object yet, then there's definitely no reference taken yet, return an
+ * error */
+ if (!u->bus_track)
+ return -EUNATCH;
+
+ return sd_bus_track_remove_sender(u->bus_track, m);
+}
diff --git a/src/grp-system/libcore/src/dbus-unit.h b/src/grp-system/libcore/src/dbus-unit.h
index d6351f9552..5133bec287 100644
--- a/src/grp-system/libcore/src/dbus-unit.h
+++ b/src/grp-system/libcore/src/dbus-unit.h
@@ -33,9 +33,15 @@ int bus_unit_method_start_generic(sd_bus_message *message, Unit *u, JobType job_
int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *error);
int bus_unit_method_reset_failed(sd_bus_message *message, void *userdata, sd_bus_error *error);
-int bus_unit_queue_job(sd_bus_message *message, Unit *u, JobType type, JobMode mode, bool reload_if_possible, sd_bus_error *error);
int bus_unit_set_properties(Unit *u, sd_bus_message *message, UnitSetPropertiesMode mode, bool commit, sd_bus_error *error);
int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_bus_error *error);
int bus_unit_method_get_processes(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_ref(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_method_unref(sd_bus_message *message, void *userdata, sd_bus_error *error);
+int bus_unit_queue_job(sd_bus_message *message, Unit *u, JobType type, JobMode mode, bool reload_if_possible, sd_bus_error *error);
int bus_unit_check_load_state(Unit *u, sd_bus_error *error);
+
+int bus_unit_track_add_name(Unit *u, const char *name);
+int bus_unit_track_add_sender(Unit *u, sd_bus_message *m);
+int bus_unit_track_remove_sender(Unit *u, sd_bus_message *m);
diff --git a/src/grp-system/libcore/src/dbus.c b/src/grp-system/libcore/src/dbus.c
index 4485b38e2e..77b5dc81cb 100644
--- a/src/grp-system/libcore/src/dbus.c
+++ b/src/grp-system/libcore/src/dbus.c
@@ -965,10 +965,6 @@ static int bus_init_private(Manager *m) {
if (m->private_listen_fd >= 0)
return 0;
- /* We don't need the private socket if we have kdbus */
- if (m->kdbus_fd >= 0)
- return 0;
-
if (MANAGER_IS_SYSTEM(m)) {
/* We want the private bus only when running as init */
@@ -1169,60 +1165,57 @@ int bus_foreach_bus(
return ret;
}
-void bus_track_serialize(sd_bus_track *t, FILE *f) {
+void bus_track_serialize(sd_bus_track *t, FILE *f, const char *prefix) {
const char *n;
assert(f);
+ assert(prefix);
- for (n = sd_bus_track_first(t); n; n = sd_bus_track_next(t))
- fprintf(f, "subscribed=%s\n", n);
-}
+ for (n = sd_bus_track_first(t); n; n = sd_bus_track_next(t)) {
+ int c, j;
-int bus_track_deserialize_item(char ***l, const char *line) {
- const char *e;
- int r;
-
- assert(l);
- assert(line);
-
- e = startswith(line, "subscribed=");
- if (!e)
- return 0;
+ c = sd_bus_track_count_name(t, n);
- r = strv_extend(l, e);
- if (r < 0)
- return r;
-
- return 1;
+ for (j = 0; j < c; j++) {
+ fputs(prefix, f);
+ fputc('=', f);
+ fputs(n, f);
+ fputc('\n', f);
+ }
+ }
}
-int bus_track_coldplug(Manager *m, sd_bus_track **t, char ***l) {
+int bus_track_coldplug(Manager *m, sd_bus_track **t, bool recursive, char **l) {
+ char **i;
int r = 0;
assert(m);
assert(t);
- assert(l);
- if (!strv_isempty(*l) && m->api_bus) {
- char **i;
-
- if (!*t) {
- r = sd_bus_track_new(m->api_bus, t, NULL, NULL);
- if (r < 0)
- return r;
- }
+ if (strv_isempty(l))
+ return 0;
- r = 0;
- STRV_FOREACH(i, *l) {
- int k;
+ if (!m->api_bus)
+ return 0;
- k = sd_bus_track_add_name(*t, *i);
- if (k < 0)
- r = k;
- }
+ if (!*t) {
+ r = sd_bus_track_new(m->api_bus, t, NULL, NULL);
+ if (r < 0)
+ return r;
}
- *l = strv_free(*l);
+ r = sd_bus_track_set_recursive(*t, recursive);
+ if (r < 0)
+ return r;
+
+ r = 0;
+ STRV_FOREACH(i, l) {
+ int k;
+
+ k = sd_bus_track_add_name(*t, *i);
+ if (k < 0)
+ r = k;
+ }
return r;
}
diff --git a/src/grp-system/libcore/src/dbus.h b/src/grp-system/libcore/src/dbus.h
index 36f8d34a1d..9f892599cc 100644
--- a/src/grp-system/libcore/src/dbus.h
+++ b/src/grp-system/libcore/src/dbus.h
@@ -28,9 +28,8 @@ void bus_done(Manager *m);
int bus_fdset_add_all(Manager *m, FDSet *fds);
-void bus_track_serialize(sd_bus_track *t, FILE *f);
-int bus_track_deserialize_item(char ***l, const char *line);
-int bus_track_coldplug(Manager *m, sd_bus_track **t, char ***l);
+void bus_track_serialize(sd_bus_track *t, FILE *f, const char *prefix);
+int bus_track_coldplug(Manager *m, sd_bus_track **t, bool recursive, char **l);
int manager_sync_bus_names(Manager *m, sd_bus *bus);
diff --git a/src/grp-system/libcore/src/device.c b/src/grp-system/libcore/src/device.c
index a147de3280..f7865195d7 100644
--- a/src/grp-system/libcore/src/device.c
+++ b/src/grp-system/libcore/src/device.c
@@ -332,11 +332,7 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa
if (!u) {
delete = true;
- u = unit_new(m, sizeof(Device));
- if (!u)
- return log_oom();
-
- r = unit_add_name(u, e);
+ r = unit_new_for_name(m, sizeof(Device), e, &u);
if (r < 0)
goto fail;
@@ -370,7 +366,7 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa
fail:
log_unit_warning_errno(u, r, "Failed to set up device unit: %m");
- if (delete)
+ if (delete && u)
unit_free(u);
return r;
@@ -465,6 +461,10 @@ static void device_update_found_one(Device *d, bool add, DeviceFound found, bool
if (!now)
return;
+ /* Didn't exist before, but does now? if so, generate a new invocation ID for it */
+ if (previous == DEVICE_NOT_FOUND && d->found != DEVICE_NOT_FOUND)
+ (void) unit_acquire_invocation_id(UNIT(d));
+
if (d->found & DEVICE_FOUND_UDEV)
/* When the device is known to udev we consider it
* plugged. */
diff --git a/src/grp-system/libcore/src/dynamic-user.c b/src/grp-system/libcore/src/dynamic-user.c
new file mode 100644
index 0000000000..9a1ea09e03
--- /dev/null
+++ b/src/grp-system/libcore/src/dynamic-user.c
@@ -0,0 +1,794 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2016 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <grp.h>
+#include <pwd.h>
+#include <sys/file.h>
+
+#include "core/dynamic-user.h"
+#include "systemd-basic/fd-util.h"
+#include "systemd-basic/fileio.h"
+#include "systemd-basic/fs-util.h"
+#include "systemd-basic/parse-util.h"
+#include "systemd-basic/random-util.h"
+#include "systemd-basic/stdio-util.h"
+#include "systemd-basic/string-util.h"
+#include "systemd-basic/user-util.h"
+
+/* Takes a value generated randomly or by hashing and turns it into a UID in the right range */
+#define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (DYNAMIC_UID_MAX - DYNAMIC_UID_MIN + 1)) + DYNAMIC_UID_MIN)
+
+static DynamicUser* dynamic_user_free(DynamicUser *d) {
+ if (!d)
+ return NULL;
+
+ if (d->manager)
+ (void) hashmap_remove(d->manager->dynamic_users, d->name);
+
+ safe_close_pair(d->storage_socket);
+ return mfree(d);
+}
+
+static int dynamic_user_add(Manager *m, const char *name, int storage_socket[2], DynamicUser **ret) {
+ DynamicUser *d = NULL;
+ int r;
+
+ assert(m);
+ assert(name);
+ assert(storage_socket);
+
+ r = hashmap_ensure_allocated(&m->dynamic_users, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ d = malloc0(offsetof(DynamicUser, name) + strlen(name) + 1);
+ if (!d)
+ return -ENOMEM;
+
+ strcpy(d->name, name);
+
+ d->storage_socket[0] = storage_socket[0];
+ d->storage_socket[1] = storage_socket[1];
+
+ r = hashmap_put(m->dynamic_users, d->name, d);
+ if (r < 0) {
+ free(d);
+ return r;
+ }
+
+ d->manager = m;
+
+ if (ret)
+ *ret = d;
+
+ return 0;
+}
+
+int dynamic_user_acquire(Manager *m, const char *name, DynamicUser** ret) {
+ _cleanup_close_pair_ int storage_socket[2] = { -1, -1 };
+ DynamicUser *d;
+ int r;
+
+ assert(m);
+ assert(name);
+
+ /* Return the DynamicUser structure for a specific user name. Note that this won't actually allocate a UID for
+ * it, but just prepare the data structure for it. The UID is allocated only on demand, when it's really
+ * needed, and in the child process we fork off, since allocation involves NSS checks which are not OK to do
+ * from PID 1. To allow the children and PID 1 share information about allocated UIDs we use an anonymous
+ * AF_UNIX/SOCK_DGRAM socket (called the "storage socket") that contains at most one datagram with the
+ * allocated UID number, plus an fd referencing the lock file for the UID
+ * (i.e. /run/systemd/dynamic-uid/$UID). Why involve the socket pair? So that PID 1 and all its children can
+ * share the same storage for the UID and lock fd, simply by inheriting the storage socket fds. The socket pair
+ * may exist in three different states:
+ *
+ * a) no datagram stored. This is the initial state. In this case the dynamic user was never realized.
+ *
+ * b) a datagram containing a UID stored, but no lock fd attached to it. In this case there was already a
+ * statically assigned UID by the same name, which we are reusing.
+ *
+ * c) a datagram containing a UID stored, and a lock fd is attached to it. In this case we allocated a dynamic
+ * UID and locked it in the file system, using the lock fd.
+ *
+ * As PID 1 and various children might access the socket pair simultaneously, and pop the datagram or push it
+ * back in any time, we also maintain a lock on the socket pair. Note one peculiarity regarding locking here:
+ * the UID lock on disk is protected via a BSD file lock (i.e. an fd-bound lock), so that the lock is kept in
+ * place as long as there's a reference to the fd open. The lock on the storage socket pair however is a POSIX
+ * file lock (i.e. a process-bound lock), as all users share the same fd of this (after all it is anonymous,
+ * nobody else could get any access to it except via our own fd) and we want to synchronize access between all
+ * processes that have access to it. */
+
+ d = hashmap_get(m->dynamic_users, name);
+ if (d) {
+ /* We already have a structure for the dynamic user, let's increase the ref count and reuse it */
+ d->n_ref++;
+ *ret = d;
+ return 0;
+ }
+
+ if (!valid_user_group_name_or_id(name))
+ return -EINVAL;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, storage_socket) < 0)
+ return -errno;
+
+ r = dynamic_user_add(m, name, storage_socket, &d);
+ if (r < 0)
+ return r;
+
+ storage_socket[0] = storage_socket[1] = -1;
+
+ if (ret) {
+ d->n_ref++;
+ *ret = d;
+ }
+
+ return 1;
+}
+
+static int make_uid_symlinks(uid_t uid, const char *name, bool b) {
+
+ char path1[strlen("/run/systemd/dynamic-uid/direct:") + DECIMAL_STR_MAX(uid_t) + 1];
+ const char *path2;
+ int r = 0, k;
+
+ /* Add direct additional symlinks for direct lookups of dynamic UIDs and their names by userspace code. The
+ * only reason we have this is because dbus-daemon cannot use D-Bus for resolving users and groups (since it
+ * would be its own client then). We hence keep these world-readable symlinks in place, so that the
+ * unprivileged dbus user can read the mappings when it needs them via these symlinks instead of having to go
+ * via the bus. Ideally, we'd use the lock files we keep for this anyway, but we can't since we use BSD locks
+ * on them and as those may be taken by any user with read access we can't make them world-readable. */
+
+ xsprintf(path1, "/run/systemd/dynamic-uid/direct:" UID_FMT, uid);
+ if (unlink(path1) < 0 && errno != ENOENT)
+ r = -errno;
+
+ if (b && symlink(name, path1) < 0) {
+ k = log_warning_errno(errno, "Failed to symlink \"%s\": %m", path1);
+ if (r == 0)
+ r = k;
+ }
+
+ path2 = strjoina("/run/systemd/dynamic-uid/direct:", name);
+ if (unlink(path2) < 0 && errno != ENOENT) {
+ k = -errno;
+ if (r == 0)
+ r = k;
+ }
+
+ if (b && symlink(path1 + strlen("/run/systemd/dynamic-uid/direct:"), path2) < 0) {
+ k = log_warning_errno(errno, "Failed to symlink \"%s\": %m", path2);
+ if (r == 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int pick_uid(const char *name, uid_t *ret_uid) {
+
+ static const uint8_t hash_key[] = {
+ 0x37, 0x53, 0x7e, 0x31, 0xcf, 0xce, 0x48, 0xf5,
+ 0x8a, 0xbb, 0x39, 0x57, 0x8d, 0xd9, 0xec, 0x59
+ };
+
+ unsigned n_tries = 100;
+ uid_t candidate;
+ int r;
+
+ /* A static user by this name does not exist yet. Let's find a free ID then, and use that. We start with a UID
+ * generated as hash from the user name. */
+ candidate = UID_CLAMP_INTO_RANGE(siphash24(name, strlen(name), hash_key));
+
+ (void) mkdir("/run/systemd/dynamic-uid", 0755);
+
+ for (;;) {
+ char lock_path[strlen("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1];
+ _cleanup_close_ int lock_fd = -1;
+ ssize_t l;
+
+ if (--n_tries <= 0) /* Give up retrying eventually */
+ return -EBUSY;
+
+ if (!uid_is_dynamic(candidate))
+ goto next;
+
+ xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, candidate);
+
+ for (;;) {
+ struct stat st;
+
+ lock_fd = open(lock_path, O_CREAT|O_RDWR|O_NOFOLLOW|O_CLOEXEC|O_NOCTTY, 0600);
+ if (lock_fd < 0)
+ return -errno;
+
+ r = flock(lock_fd, LOCK_EX|LOCK_NB); /* Try to get a BSD file lock on the UID lock file */
+ if (r < 0) {
+ if (errno == EBUSY || errno == EAGAIN)
+ goto next; /* already in use */
+
+ return -errno;
+ }
+
+ if (fstat(lock_fd, &st) < 0)
+ return -errno;
+ if (st.st_nlink > 0)
+ break;
+
+ /* Oh, bummer, we got the lock, but the file was unlinked between the time we opened it and
+ * got the lock. Close it, and try again. */
+ lock_fd = safe_close(lock_fd);
+ }
+
+ /* Some superficial check whether this UID/GID might already be taken by some static user */
+ if (getpwuid(candidate) || getgrgid((gid_t) candidate)) {
+ (void) unlink(lock_path);
+ goto next;
+ }
+
+ /* Let's store the user name in the lock file, so that we can use it for looking up the username for a UID */
+ l = pwritev(lock_fd,
+ (struct iovec[2]) {
+ { .iov_base = (char*) name, .iov_len = strlen(name) },
+ { .iov_base = (char[1]) { '\n' }, .iov_len = 1 }
+ }, 2, 0);
+ if (l < 0) {
+ (void) unlink(lock_path);
+ return -errno;
+ }
+
+ (void) ftruncate(lock_fd, l);
+ (void) make_uid_symlinks(candidate, name, true); /* also add direct lookup symlinks */
+
+ *ret_uid = candidate;
+ r = lock_fd;
+ lock_fd = -1;
+
+ return r;
+
+ next:
+ /* Pick another random UID, and see if that works for us. */
+ random_bytes(&candidate, sizeof(candidate));
+ candidate = UID_CLAMP_INTO_RANGE(candidate);
+ }
+}
+
+static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) {
+ uid_t uid = UID_INVALID;
+ struct iovec iov = {
+ .iov_base = &uid,
+ .iov_len = sizeof(uid),
+ };
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(int))];
+ } control = {};
+ struct msghdr mh = {
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ struct cmsghdr *cmsg;
+
+ ssize_t k;
+ int lock_fd = -1;
+
+ assert(d);
+ assert(ret_uid);
+ assert(ret_lock_fd);
+
+ /* Read the UID and lock fd that is stored in the storage AF_UNIX socket. This should be called with the lock
+ * on the socket taken. */
+
+ k = recvmsg(d->storage_socket[0], &mh, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC);
+ if (k < 0)
+ return -errno;
+
+ cmsg = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int)));
+ if (cmsg)
+ lock_fd = *(int*) CMSG_DATA(cmsg);
+ else
+ cmsg_close_all(&mh); /* just in case... */
+
+ *ret_uid = uid;
+ *ret_lock_fd = lock_fd;
+
+ return 0;
+}
+
+static int dynamic_user_push(DynamicUser *d, uid_t uid, int lock_fd) {
+ struct iovec iov = {
+ .iov_base = &uid,
+ .iov_len = sizeof(uid),
+ };
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(int))];
+ } control = {};
+ struct msghdr mh = {
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ ssize_t k;
+
+ assert(d);
+
+ /* Store the UID and lock_fd in the storage socket. This should be called with the socket pair lock taken. */
+
+ if (lock_fd >= 0) {
+ struct cmsghdr *cmsg;
+
+ cmsg = CMSG_FIRSTHDR(&mh);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &lock_fd, sizeof(int));
+
+ mh.msg_controllen = CMSG_SPACE(sizeof(int));
+ } else {
+ mh.msg_control = NULL;
+ mh.msg_controllen = 0;
+ }
+
+ k = sendmsg(d->storage_socket[1], &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
+ if (k < 0)
+ return -errno;
+
+ return 0;
+}
+
+static void unlink_uid_lock(int lock_fd, uid_t uid, const char *name) {
+ char lock_path[strlen("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1];
+
+ if (lock_fd < 0)
+ return;
+
+ xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, uid);
+ (void) unlink(lock_path);
+
+ (void) make_uid_symlinks(uid, name, false); /* remove direct lookup symlinks */
+}
+
+int dynamic_user_realize(DynamicUser *d, uid_t *ret) {
+
+ _cleanup_close_ int etc_passwd_lock_fd = -1, uid_lock_fd = -1;
+ uid_t uid = UID_INVALID;
+ int r;
+
+ assert(d);
+
+ /* Acquire a UID for the user name. This will allocate a UID for the user name if the user doesn't exist
+ * yet. If it already exists its existing UID/GID will be reused. */
+
+ if (lockf(d->storage_socket[0], F_LOCK, 0) < 0)
+ return -errno;
+
+ r = dynamic_user_pop(d, &uid, &uid_lock_fd);
+ if (r < 0) {
+ int new_uid_lock_fd;
+ uid_t new_uid;
+
+ if (r != -EAGAIN)
+ goto finish;
+
+ /* OK, nothing stored yet, let's try to find something useful. While we are working on this release the
+ * lock however, so that nobody else blocks on our NSS lookups. */
+ (void) lockf(d->storage_socket[0], F_ULOCK, 0);
+
+ /* Let's see if a proper, static user or group by this name exists. Try to take the lock on
+ * /etc/passwd, if that fails with EROFS then /etc is read-only. In that case it's fine if we don't
+ * take the lock, given that users can't be added there anyway in this case. */
+ etc_passwd_lock_fd = take_etc_passwd_lock(NULL);
+ if (etc_passwd_lock_fd < 0 && etc_passwd_lock_fd != -EROFS)
+ return etc_passwd_lock_fd;
+
+ /* First, let's parse this as numeric UID */
+ r = parse_uid(d->name, &uid);
+ if (r < 0) {
+ struct passwd *p;
+ struct group *g;
+
+ /* OK, this is not a numeric UID. Let's see if there's a user by this name */
+ p = getpwnam(d->name);
+ if (p)
+ uid = p->pw_uid;
+
+ /* Let's see if there's a group by this name */
+ g = getgrnam(d->name);
+ if (g) {
+ /* If the UID/GID of the user/group of the same don't match, refuse operation */
+ if (uid != UID_INVALID && uid != (uid_t) g->gr_gid)
+ return -EILSEQ;
+
+ uid = (uid_t) g->gr_gid;
+ }
+ }
+
+ if (uid == UID_INVALID) {
+ /* No static UID assigned yet, excellent. Let's pick a new dynamic one, and lock it. */
+
+ uid_lock_fd = pick_uid(d->name, &uid);
+ if (uid_lock_fd < 0)
+ return uid_lock_fd;
+ }
+
+ /* So, we found a working UID/lock combination. Let's see if we actually still need it. */
+ if (lockf(d->storage_socket[0], F_LOCK, 0) < 0) {
+ unlink_uid_lock(uid_lock_fd, uid, d->name);
+ return -errno;
+ }
+
+ r = dynamic_user_pop(d, &new_uid, &new_uid_lock_fd);
+ if (r < 0) {
+ if (r != -EAGAIN) {
+ /* OK, something bad happened, let's get rid of the bits we acquired. */
+ unlink_uid_lock(uid_lock_fd, uid, d->name);
+ goto finish;
+ }
+
+ /* Great! Nothing is stored here, still. Store our newly acquired data. */
+ } else {
+ /* Hmm, so as it appears there's now something stored in the storage socket. Throw away what we
+ * acquired, and use what's stored now. */
+
+ unlink_uid_lock(uid_lock_fd, uid, d->name);
+ safe_close(uid_lock_fd);
+
+ uid = new_uid;
+ uid_lock_fd = new_uid_lock_fd;
+ }
+ }
+
+ /* If the UID/GID was already allocated dynamically, push the data we popped out back in. If it was already
+ * allocated statically, push the UID back too, but do not push the lock fd in. If we allocated the UID
+ * dynamically right here, push that in along with the lock fd for it. */
+ r = dynamic_user_push(d, uid, uid_lock_fd);
+ if (r < 0)
+ goto finish;
+
+ *ret = uid;
+ r = 0;
+
+finish:
+ (void) lockf(d->storage_socket[0], F_ULOCK, 0);
+ return r;
+}
+
+int dynamic_user_current(DynamicUser *d, uid_t *ret) {
+ _cleanup_close_ int lock_fd = -1;
+ uid_t uid;
+ int r;
+
+ assert(d);
+ assert(ret);
+
+ /* Get the currently assigned UID for the user, if there's any. This simply pops the data from the storage socket, and pushes it back in right-away. */
+
+ if (lockf(d->storage_socket[0], F_LOCK, 0) < 0)
+ return -errno;
+
+ r = dynamic_user_pop(d, &uid, &lock_fd);
+ if (r < 0)
+ goto finish;
+
+ r = dynamic_user_push(d, uid, lock_fd);
+ if (r < 0)
+ goto finish;
+
+ *ret = uid;
+ r = 0;
+
+finish:
+ (void) lockf(d->storage_socket[0], F_ULOCK, 0);
+ return r;
+}
+
+DynamicUser* dynamic_user_ref(DynamicUser *d) {
+ if (!d)
+ return NULL;
+
+ assert(d->n_ref > 0);
+ d->n_ref++;
+
+ return d;
+}
+
+DynamicUser* dynamic_user_unref(DynamicUser *d) {
+ if (!d)
+ return NULL;
+
+ /* Note that this doesn't actually release any resources itself. If a dynamic user should be fully destroyed
+ * and its UID released, use dynamic_user_destroy() instead. NB: the dynamic user table may contain entries
+ * with no references, which is commonly the case right before a daemon reload. */
+
+ assert(d->n_ref > 0);
+ d->n_ref--;
+
+ return NULL;
+}
+
+static int dynamic_user_close(DynamicUser *d) {
+ _cleanup_close_ int lock_fd = -1;
+ uid_t uid;
+ int r;
+
+ /* Release the user ID, by releasing the lock on it, and emptying the storage socket. After this the user is
+ * unrealized again, much like it was after it the DynamicUser object was first allocated. */
+
+ if (lockf(d->storage_socket[0], F_LOCK, 0) < 0)
+ return -errno;
+
+ r = dynamic_user_pop(d, &uid, &lock_fd);
+ if (r == -EAGAIN) {
+ /* User wasn't realized yet, nothing to do. */
+ r = 0;
+ goto finish;
+ }
+ if (r < 0)
+ goto finish;
+
+ /* This dynamic user was realized and dynamically allocated. In this case, let's remove the lock file. */
+ unlink_uid_lock(lock_fd, uid, d->name);
+ r = 1;
+
+finish:
+ (void) lockf(d->storage_socket[0], F_ULOCK, 0);
+ return r;
+}
+
+DynamicUser* dynamic_user_destroy(DynamicUser *d) {
+ if (!d)
+ return NULL;
+
+ /* Drop a reference to a DynamicUser object, and destroy the user completely if this was the last
+ * reference. This is called whenever a service is shut down and wants its dynamic UID gone. Note that
+ * dynamic_user_unref() is what is called whenever a service is simply freed, for example during a reload
+ * cycle, where the dynamic users should not be destroyed, but our datastructures should. */
+
+ dynamic_user_unref(d);
+
+ if (d->n_ref > 0)
+ return NULL;
+
+ (void) dynamic_user_close(d);
+ return dynamic_user_free(d);
+}
+
+int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds) {
+ DynamicUser *d;
+ Iterator i;
+
+ assert(m);
+ assert(f);
+ assert(fds);
+
+ /* Dump the dynamic user database into the manager serialization, to deal with daemon reloads. */
+
+ HASHMAP_FOREACH(d, m->dynamic_users, i) {
+ int copy0, copy1;
+
+ copy0 = fdset_put_dup(fds, d->storage_socket[0]);
+ if (copy0 < 0)
+ return copy0;
+
+ copy1 = fdset_put_dup(fds, d->storage_socket[1]);
+ if (copy1 < 0)
+ return copy1;
+
+ fprintf(f, "dynamic-user=%s %i %i\n", d->name, copy0, copy1);
+ }
+
+ return 0;
+}
+
+void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds) {
+ _cleanup_free_ char *name = NULL, *s0 = NULL, *s1 = NULL;
+ int r, fd0, fd1;
+
+ assert(m);
+ assert(value);
+ assert(fds);
+
+ /* Parse the serialization again, after a daemon reload */
+
+ r = extract_many_words(&value, NULL, 0, &name, &s0, &s1, NULL);
+ if (r != 3 || !isempty(value)) {
+ log_debug("Unable to parse dynamic user line.");
+ return;
+ }
+
+ if (safe_atoi(s0, &fd0) < 0 || !fdset_contains(fds, fd0)) {
+ log_debug("Unable to process dynamic user fd specification.");
+ return;
+ }
+
+ if (safe_atoi(s1, &fd1) < 0 || !fdset_contains(fds, fd1)) {
+ log_debug("Unable to process dynamic user fd specification.");
+ return;
+ }
+
+ r = dynamic_user_add(m, name, (int[]) { fd0, fd1 }, NULL);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add dynamic user: %m");
+ return;
+ }
+
+ (void) fdset_remove(fds, fd0);
+ (void) fdset_remove(fds, fd1);
+}
+
+void dynamic_user_vacuum(Manager *m, bool close_user) {
+ DynamicUser *d;
+ Iterator i;
+
+ assert(m);
+
+ /* Empty the dynamic user database, optionally cleaning up orphaned dynamic users, i.e. destroy and free users
+ * to which no reference exist. This is called after a daemon reload finished, in order to destroy users which
+ * might not be referenced anymore. */
+
+ HASHMAP_FOREACH(d, m->dynamic_users, i) {
+ if (d->n_ref > 0)
+ continue;
+
+ if (close_user) {
+ log_debug("Removing orphaned dynamic user %s", d->name);
+ (void) dynamic_user_close(d);
+ }
+
+ dynamic_user_free(d);
+ }
+}
+
+int dynamic_user_lookup_uid(Manager *m, uid_t uid, char **ret) {
+ char lock_path[strlen("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1];
+ _cleanup_free_ char *user = NULL;
+ uid_t check_uid;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ /* A friendly way to translate a dynamic user's UID into a name. */
+ if (!uid_is_dynamic(uid))
+ return -ESRCH;
+
+ xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, uid);
+ r = read_one_line_file(lock_path, &user);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
+
+ /* The lock file might be stale, hence let's verify the data before we return it */
+ r = dynamic_user_lookup_name(m, user, &check_uid);
+ if (r < 0)
+ return r;
+ if (check_uid != uid) /* lock file doesn't match our own idea */
+ return -ESRCH;
+
+ *ret = user;
+ user = NULL;
+
+ return 0;
+}
+
+int dynamic_user_lookup_name(Manager *m, const char *name, uid_t *ret) {
+ DynamicUser *d;
+ int r;
+
+ assert(m);
+ assert(name);
+ assert(ret);
+
+ /* A friendly call for translating a dynamic user's name into its UID */
+
+ d = hashmap_get(m->dynamic_users, name);
+ if (!d)
+ return -ESRCH;
+
+ r = dynamic_user_current(d, ret);
+ if (r == -EAGAIN) /* not realized yet? */
+ return -ESRCH;
+
+ return r;
+}
+
+int dynamic_creds_acquire(DynamicCreds *creds, Manager *m, const char *user, const char *group) {
+ bool acquired = false;
+ int r;
+
+ assert(creds);
+ assert(m);
+
+ /* A DynamicUser object encapsulates an allocation of both a UID and a GID for a specific name. However, some
+ * services use different user and groups. For cases like that there's DynamicCreds containing a pair of user
+ * and group. This call allocates a pair. */
+
+ if (!creds->user && user) {
+ r = dynamic_user_acquire(m, user, &creds->user);
+ if (r < 0)
+ return r;
+
+ acquired = true;
+ }
+
+ if (!creds->group) {
+
+ if (creds->user && (!group || streq_ptr(user, group)))
+ creds->group = dynamic_user_ref(creds->user);
+ else {
+ r = dynamic_user_acquire(m, group, &creds->group);
+ if (r < 0) {
+ if (acquired)
+ creds->user = dynamic_user_unref(creds->user);
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int dynamic_creds_realize(DynamicCreds *creds, uid_t *uid, gid_t *gid) {
+ uid_t u = UID_INVALID;
+ gid_t g = GID_INVALID;
+ int r;
+
+ assert(creds);
+ assert(uid);
+ assert(gid);
+
+ /* Realize both the referenced user and group */
+
+ if (creds->user) {
+ r = dynamic_user_realize(creds->user, &u);
+ if (r < 0)
+ return r;
+ }
+
+ if (creds->group && creds->group != creds->user) {
+ r = dynamic_user_realize(creds->group, &g);
+ if (r < 0)
+ return r;
+ } else
+ g = u;
+
+ *uid = u;
+ *gid = g;
+
+ return 0;
+}
+
+void dynamic_creds_unref(DynamicCreds *creds) {
+ assert(creds);
+
+ creds->user = dynamic_user_unref(creds->user);
+ creds->group = dynamic_user_unref(creds->group);
+}
+
+void dynamic_creds_destroy(DynamicCreds *creds) {
+ assert(creds);
+
+ creds->user = dynamic_user_destroy(creds->user);
+ creds->group = dynamic_user_destroy(creds->group);
+}
diff --git a/src/grp-system/libcore/src/failure-action.c b/src/grp-system/libcore/src/emergency-action.c
index e17ed7219b..2c85702970 100644
--- a/src/grp-system/libcore/src/failure-action.c
+++ b/src/grp-system/libcore/src/emergency-action.c
@@ -22,61 +22,62 @@
#include <linux/reboot.h>
-#include "core/failure-action.h"
+#include "core/emergency-action.h"
#include "sd-bus/bus-error.h"
#include "sd-bus/bus-util.h"
#include "systemd-basic/special.h"
#include "systemd-basic/string-table.h"
#include "systemd-basic/terminal-util.h"
-static void log_and_status(Manager *m, const char *message) {
- log_warning("%s", message);
+static void log_and_status(Manager *m, const char *message, const char *reason) {
+ log_warning("%s: %s", message, reason);
manager_status_printf(m, STATUS_TYPE_EMERGENCY,
ANSI_HIGHLIGHT_RED " !! " ANSI_NORMAL,
- "%s", message);
+ "%s: %s", message, reason);
}
-int failure_action(
+int emergency_action(
Manager *m,
- FailureAction action,
- const char *reboot_arg) {
+ EmergencyAction action,
+ const char *reboot_arg,
+ const char *reason) {
assert(m);
assert(action >= 0);
- assert(action < _FAILURE_ACTION_MAX);
+ assert(action < _EMERGENCY_ACTION_MAX);
- if (action == FAILURE_ACTION_NONE)
+ if (action == EMERGENCY_ACTION_NONE)
return -ECANCELED;
if (!MANAGER_IS_SYSTEM(m)) {
/* Downgrade all options to simply exiting if we run
* in user mode */
- log_warning("Exiting as result of failure.");
+ log_warning("Exiting: %s", reason);
m->exit_code = MANAGER_EXIT;
return -ECANCELED;
}
switch (action) {
- case FAILURE_ACTION_REBOOT:
- log_and_status(m, "Rebooting as result of failure.");
+ case EMERGENCY_ACTION_REBOOT:
+ log_and_status(m, "Rebooting", reason);
(void) update_reboot_parameter_and_warn(reboot_arg);
(void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_REBOOT_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL);
break;
- case FAILURE_ACTION_REBOOT_FORCE:
- log_and_status(m, "Forcibly rebooting as result of failure.");
+ case EMERGENCY_ACTION_REBOOT_FORCE:
+ log_and_status(m, "Forcibly rebooting", reason);
(void) update_reboot_parameter_and_warn(reboot_arg);
m->exit_code = MANAGER_REBOOT;
break;
- case FAILURE_ACTION_REBOOT_IMMEDIATE:
- log_and_status(m, "Rebooting immediately as result of failure.");
+ case EMERGENCY_ACTION_REBOOT_IMMEDIATE:
+ log_and_status(m, "Rebooting immediately", reason);
sync();
@@ -90,18 +91,18 @@ int failure_action(
reboot(RB_AUTOBOOT);
break;
- case FAILURE_ACTION_POWEROFF:
- log_and_status(m, "Powering off as result of failure.");
+ case EMERGENCY_ACTION_POWEROFF:
+ log_and_status(m, "Powering off", reason);
(void) manager_add_job_by_name_and_warn(m, JOB_START, SPECIAL_POWEROFF_TARGET, JOB_REPLACE_IRREVERSIBLY, NULL);
break;
- case FAILURE_ACTION_POWEROFF_FORCE:
- log_and_status(m, "Forcibly powering off as result of failure.");
+ case EMERGENCY_ACTION_POWEROFF_FORCE:
+ log_and_status(m, "Forcibly powering off", reason);
m->exit_code = MANAGER_POWEROFF;
break;
- case FAILURE_ACTION_POWEROFF_IMMEDIATE:
- log_and_status(m, "Powering off immediately as result of failure.");
+ case EMERGENCY_ACTION_POWEROFF_IMMEDIATE:
+ log_and_status(m, "Powering off immediately", reason);
sync();
@@ -110,19 +111,19 @@ int failure_action(
break;
default:
- assert_not_reached("Unknown failure action");
+ assert_not_reached("Unknown emergency action");
}
return -ECANCELED;
}
-static const char* const failure_action_table[_FAILURE_ACTION_MAX] = {
- [FAILURE_ACTION_NONE] = "none",
- [FAILURE_ACTION_REBOOT] = "reboot",
- [FAILURE_ACTION_REBOOT_FORCE] = "reboot-force",
- [FAILURE_ACTION_REBOOT_IMMEDIATE] = "reboot-immediate",
- [FAILURE_ACTION_POWEROFF] = "poweroff",
- [FAILURE_ACTION_POWEROFF_FORCE] = "poweroff-force",
- [FAILURE_ACTION_POWEROFF_IMMEDIATE] = "poweroff-immediate"
+static const char* const emergency_action_table[_EMERGENCY_ACTION_MAX] = {
+ [EMERGENCY_ACTION_NONE] = "none",
+ [EMERGENCY_ACTION_REBOOT] = "reboot",
+ [EMERGENCY_ACTION_REBOOT_FORCE] = "reboot-force",
+ [EMERGENCY_ACTION_REBOOT_IMMEDIATE] = "reboot-immediate",
+ [EMERGENCY_ACTION_POWEROFF] = "poweroff",
+ [EMERGENCY_ACTION_POWEROFF_FORCE] = "poweroff-force",
+ [EMERGENCY_ACTION_POWEROFF_IMMEDIATE] = "poweroff-immediate"
};
-DEFINE_STRING_TABLE_LOOKUP(failure_action, FailureAction);
+DEFINE_STRING_TABLE_LOOKUP(emergency_action, EmergencyAction);
diff --git a/src/grp-system/libcore/src/execute.c b/src/grp-system/libcore/src/execute.c
index 83b43c3535..cbe772bf9f 100644
--- a/src/grp-system/libcore/src/execute.c
+++ b/src/grp-system/libcore/src/execute.c
@@ -25,11 +25,14 @@
#include <signal.h>
#include <string.h>
#include <sys/capability.h>
+#include <sys/eventfd.h>
#include <sys/mman.h>
#include <sys/personality.h>
#include <sys/prctl.h>
+#include <sys/shm.h>
#include <sys/socket.h>
#include <sys/stat.h>
+#include <sys/types.h>
#include <sys/un.h>
#include <unistd.h>
#include <utmpx.h>
@@ -91,6 +94,7 @@
#include "systemd-basic/selinux-util.h"
#include "systemd-basic/signal-util.h"
#include "systemd-basic/smack-util.h"
+#include "systemd-basic/special.h"
#include "systemd-basic/string-table.h"
#include "systemd-basic/string-util.h"
#include "systemd-basic/strv.h"
@@ -219,12 +223,36 @@ static void exec_context_tty_reset(const ExecContext *context, const ExecParamet
(void) vt_disallocate(path);
}
+static bool is_terminal_input(ExecInput i) {
+ return IN_SET(i,
+ EXEC_INPUT_TTY,
+ EXEC_INPUT_TTY_FORCE,
+ EXEC_INPUT_TTY_FAIL);
+}
+
static bool is_terminal_output(ExecOutput o) {
- return
- o == EXEC_OUTPUT_TTY ||
- o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
- o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
- o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
+ return IN_SET(o,
+ EXEC_OUTPUT_TTY,
+ EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
+ EXEC_OUTPUT_KMSG_AND_CONSOLE,
+ EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
+}
+
+static bool exec_context_needs_term(const ExecContext *c) {
+ assert(c);
+
+ /* Return true if the execution context suggests we should set $TERM to something useful. */
+
+ if (is_terminal_input(c->std_input))
+ return true;
+
+ if (is_terminal_output(c->std_output))
+ return true;
+
+ if (is_terminal_output(c->std_error))
+ return true;
+
+ return !!c->tty_path;
}
static int open_null_as(int flags, int nfd) {
@@ -363,13 +391,6 @@ static int open_terminal_as(const char *path, mode_t mode, int nfd) {
return r;
}
-static bool is_terminal_input(ExecInput i) {
- return
- i == EXEC_INPUT_TTY ||
- i == EXEC_INPUT_TTY_FORCE ||
- i == EXEC_INPUT_TTY_FAIL;
-}
-
static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
if (is_terminal_input(std_input) && !apply_tty_stdin)
@@ -392,7 +413,8 @@ static int fixup_output(ExecOutput std_output, int socket_fd) {
static int setup_input(
const ExecContext *context,
const ExecParameters *params,
- int socket_fd) {
+ int socket_fd,
+ int named_iofds[3]) {
ExecInput i;
@@ -410,7 +432,7 @@ static int setup_input(
return STDIN_FILENO;
}
- i = fixup_input(context->std_input, socket_fd, params->apply_tty_stdin);
+ i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
switch (i) {
@@ -442,6 +464,10 @@ static int setup_input(
case EXEC_INPUT_SOCKET:
return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
+ case EXEC_INPUT_NAMED_FD:
+ (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
+ return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
+
default:
assert_not_reached("Unknown input type");
}
@@ -453,6 +479,7 @@ static int setup_output(
const ExecParameters *params,
int fileno,
int socket_fd,
+ int named_iofds[3],
const char *ident,
uid_t uid,
gid_t gid,
@@ -485,7 +512,7 @@ static int setup_output(
return STDERR_FILENO;
}
- i = fixup_input(context->std_input, socket_fd, params->apply_tty_stdin);
+ i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
o = fixup_output(context->std_output, socket_fd);
if (fileno == STDERR_FILENO) {
@@ -504,7 +531,7 @@ static int setup_output(
return fileno;
/* Duplicate from stdout if possible */
- if (e == o || e == EXEC_OUTPUT_INHERIT)
+ if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
o = e;
@@ -566,6 +593,10 @@ static int setup_output(
assert(socket_fd >= 0);
return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
+ case EXEC_OUTPUT_NAMED_FD:
+ (void) fd_nonblock(named_iofds[fileno], false);
+ return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
+
default:
assert_not_reached("Unknown error type");
}
@@ -701,73 +732,157 @@ static int ask_for_confirmation(char *response, char **argv) {
return r;
}
-static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
- bool keep_groups = false;
+static int get_fixed_user(const ExecContext *c, const char **user,
+ uid_t *uid, gid_t *gid,
+ const char **home, const char **shell) {
int r;
+ const char *name;
- assert(context);
+ assert(c);
+
+ if (!c->user)
+ return 0;
+
+ /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
+ * (i.e. are "/" or "/bin/nologin"). */
+
+ name = c->user;
+ r = get_user_creds_clean(&name, uid, gid, home, shell);
+ if (r < 0)
+ return r;
- /* Lookup and set GID and supplementary group list. Here too
- * we avoid NSS lookups for gid=0. */
+ *user = name;
+ return 0;
+}
- if (context->group || username) {
+static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
+ int r;
+ const char *name;
+
+ assert(c);
+
+ if (!c->group)
+ return 0;
+
+ name = c->group;
+ r = get_group_creds(&name, gid);
+ if (r < 0)
+ return r;
+
+ *group = name;
+ return 0;
+}
+
+static int get_supplementary_groups(const ExecContext *c, const char *user,
+ const char *group, gid_t gid,
+ gid_t **supplementary_gids, int *ngids) {
+ char **i;
+ int r, k = 0;
+ int ngroups_max;
+ bool keep_groups = false;
+ gid_t *groups = NULL;
+ _cleanup_free_ gid_t *l_gids = NULL;
+
+ assert(c);
+
+ /*
+ * If user is given, then lookup GID and supplementary groups list.
+ * We avoid NSS lookups for gid=0. Also we have to initialize groups
+ * here and as early as possible so we keep the list of supplementary
+ * groups of the caller.
+ */
+ if (user && gid_is_valid(gid) && gid != 0) {
/* First step, initialize groups from /etc/groups */
- if (username && gid != 0) {
- if (initgroups(username, gid) < 0)
- return -errno;
+ if (initgroups(user, gid) < 0)
+ return -errno;
- keep_groups = true;
- }
+ keep_groups = true;
+ }
- /* Second step, set our gids */
- if (setresgid(gid, gid, gid) < 0)
+ if (!c->supplementary_groups)
+ return 0;
+
+ /*
+ * If SupplementaryGroups= was passed then NGROUPS_MAX has to
+ * be positive, otherwise fail.
+ */
+ errno = 0;
+ ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
+ if (ngroups_max <= 0) {
+ if (errno > 0)
return -errno;
+ else
+ return -EOPNOTSUPP; /* For all other values */
}
- if (context->supplementary_groups) {
- int ngroups_max, k;
- gid_t *gids;
- char **i;
+ l_gids = new(gid_t, ngroups_max);
+ if (!l_gids)
+ return -ENOMEM;
- /* Final step, initialize any manually set supplementary groups */
- assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
+ if (keep_groups) {
+ /*
+ * Lookup the list of groups that the user belongs to, we
+ * avoid NSS lookups here too for gid=0.
+ */
+ k = ngroups_max;
+ if (getgrouplist(user, gid, l_gids, &k) < 0)
+ return -EINVAL;
+ } else
+ k = 0;
- if (!(gids = new(gid_t, ngroups_max)))
- return -ENOMEM;
+ STRV_FOREACH(i, c->supplementary_groups) {
+ const char *g;
- if (keep_groups) {
- k = getgroups(ngroups_max, gids);
- if (k < 0) {
- free(gids);
- return -errno;
- }
- } else
- k = 0;
+ if (k >= ngroups_max)
+ return -E2BIG;
- STRV_FOREACH(i, context->supplementary_groups) {
- const char *g;
+ g = *i;
+ r = get_group_creds(&g, l_gids+k);
+ if (r < 0)
+ return r;
- if (k >= ngroups_max) {
- free(gids);
- return -E2BIG;
- }
+ k++;
+ }
- g = *i;
- r = get_group_creds(&g, gids+k);
- if (r < 0) {
- free(gids);
- return r;
- }
+ /*
+ * Sets ngids to zero to drop all supplementary groups, happens
+ * when we are under root and SupplementaryGroups= is empty.
+ */
+ if (k == 0) {
+ *ngids = 0;
+ return 0;
+ }
- k++;
- }
+ /* Otherwise get the final list of supplementary groups */
+ groups = memdup(l_gids, sizeof(gid_t) * k);
+ if (!groups)
+ return -ENOMEM;
- if (setgroups(k, gids) < 0) {
- free(gids);
- return -errno;
- }
+ *supplementary_gids = groups;
+ *ngids = k;
+
+ groups = NULL;
+
+ return 0;
+}
+
+static int enforce_groups(const ExecContext *context, gid_t gid,
+ gid_t *supplementary_gids, int ngids) {
+ int r;
+
+ assert(context);
+
+ /* Handle SupplementaryGroups= even if it is empty */
+ if (context->supplementary_groups) {
+ r = maybe_setgroups(ngids, supplementary_gids);
+ if (r < 0)
+ return r;
+ }
- free(gids);
+ if (gid_is_valid(gid)) {
+ /* Then set our gids */
+ if (setresgid(gid, gid, gid) < 0)
+ return -errno;
}
return 0;
@@ -776,6 +891,9 @@ static int enforce_groups(const ExecContext *context, const char *username, gid_
static int enforce_user(const ExecContext *context, uid_t uid) {
assert(context);
+ if (!uid_is_valid(uid))
+ return 0;
+
/* Sets (but doesn't look up) the uid and make sure we keep the
* capabilities while doing so. */
@@ -818,14 +936,19 @@ static int null_conv(
return PAM_CONV_ERR;
}
+#endif
+
static int setup_pam(
const char *name,
const char *user,
uid_t uid,
+ gid_t gid,
const char *tty,
char ***env,
int fds[], unsigned n_fds) {
+#ifdef HAVE_PAM
+
static const struct pam_conv conv = {
.conv = null_conv,
.appdata_ptr = NULL
@@ -925,8 +1048,14 @@ static int setup_pam(
* and this will make PR_SET_PDEATHSIG work in most cases.
* If this fails, ignore the error - but expect sd-pam threads
* to fail to exit normally */
+
+ r = maybe_setgroups(0, NULL);
+ if (r < 0)
+ log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
+ if (setresgid(gid, gid, gid) < 0)
+ log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
if (setresuid(uid, uid, uid) < 0)
- log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
+ log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
(void) ignore_signals(SIGPIPE, -1);
@@ -1019,8 +1148,10 @@ fail:
closelog();
return r;
-}
+#else
+ return 0;
#endif
+}
static void rename_process_from_path(const char *path) {
char process_name[11];
@@ -1055,15 +1186,29 @@ static void rename_process_from_path(const char *path) {
#ifdef HAVE_SECCOMP
-static int apply_seccomp(const ExecContext *c) {
+static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
+
+ if (is_seccomp_available())
+ return false;
+
+ log_open();
+ log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
+ log_close();
+ return true;
+}
+
+static int apply_seccomp(const Unit* u, const ExecContext *c) {
uint32_t negative_action, action;
- scmp_filter_ctx *seccomp;
+ scmp_filter_ctx seccomp;
Iterator i;
void *id;
int r;
assert(c);
+ if (skip_seccomp_unavailable(u, "syscall filtering"))
+ return 0;
+
negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
@@ -1104,20 +1249,23 @@ finish:
return r;
}
-static int apply_address_families(const ExecContext *c) {
- scmp_filter_ctx *seccomp;
+static int apply_address_families(const Unit* u, const ExecContext *c) {
+ scmp_filter_ctx seccomp;
Iterator i;
int r;
+#if defined(__i386__)
+ return 0;
+#endif
+
assert(c);
- seccomp = seccomp_init(SCMP_ACT_ALLOW);
- if (!seccomp)
- return -ENOMEM;
+ if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
+ return 0;
- r = seccomp_add_secondary_archs(seccomp);
+ r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
if (r < 0)
- goto finish;
+ return r;
if (c->address_families_whitelist) {
int af, first = 0, last = 0;
@@ -1214,10 +1362,6 @@ static int apply_address_families(const ExecContext *c) {
}
}
- r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
- if (r < 0)
- goto finish;
-
r = seccomp_load(seccomp);
finish:
@@ -1225,15 +1369,18 @@ finish:
return r;
}
-static int apply_memory_deny_write_execute(const ExecContext *c) {
- scmp_filter_ctx *seccomp;
+static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
+ scmp_filter_ctx seccomp;
int r;
assert(c);
- seccomp = seccomp_init(SCMP_ACT_ALLOW);
- if (!seccomp)
- return -ENOMEM;
+ if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
+ return 0;
+
+ r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
r = seccomp_rule_add(
seccomp,
@@ -1253,7 +1400,12 @@ static int apply_memory_deny_write_execute(const ExecContext *c) {
if (r < 0)
goto finish;
- r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+ r = seccomp_rule_add(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(shmat),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
if (r < 0)
goto finish;
@@ -1264,22 +1416,25 @@ finish:
return r;
}
-static int apply_restrict_realtime(const ExecContext *c) {
+static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
static const int permitted_policies[] = {
SCHED_OTHER,
SCHED_BATCH,
SCHED_IDLE,
};
- scmp_filter_ctx *seccomp;
+ scmp_filter_ctx seccomp;
unsigned i;
int r, p, max_policy = 0;
assert(c);
- seccomp = seccomp_init(SCMP_ACT_ALLOW);
- if (!seccomp)
- return -ENOMEM;
+ if (skip_seccomp_unavailable(u, "RestrictRealtime="))
+ return 0;
+
+ r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
/* Determine the highest policy constant we want to allow */
for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
@@ -1323,7 +1478,34 @@ static int apply_restrict_realtime(const ExecContext *c) {
if (r < 0)
goto finish;
- r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+ r = seccomp_load(seccomp);
+
+finish:
+ seccomp_release(seccomp);
+ return r;
+}
+
+static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
+ scmp_filter_ctx seccomp;
+ int r;
+
+ assert(c);
+
+ /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
+ * let's protect even those systems where this is left on in the kernel. */
+
+ if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
+ return 0;
+
+ r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = seccomp_rule_add(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(_sysctl),
+ 0);
if (r < 0)
goto finish;
@@ -1334,12 +1516,33 @@ finish:
return r;
}
+static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
+ assert(c);
+
+ /* Turn off module syscalls on ProtectKernelModules=yes */
+
+ if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
+ return 0;
+
+ return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
+}
+
+static int apply_private_devices(const Unit *u, const ExecContext *c) {
+ assert(c);
+
+ /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
+
+ if (skip_seccomp_unavailable(u, "PrivateDevices="))
+ return 0;
+
+ return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
+}
+
#endif
static void do_idle_pipe_dance(int idle_pipe[4]) {
assert(idle_pipe);
-
idle_pipe[1] = safe_close(idle_pipe[1]);
idle_pipe[2] = safe_close(idle_pipe[2]);
@@ -1366,6 +1569,7 @@ static void do_idle_pipe_dance(int idle_pipe[4]) {
}
static int build_environment(
+ Unit *u,
const ExecContext *c,
const ExecParameters *p,
unsigned n_fds,
@@ -1380,10 +1584,11 @@ static int build_environment(
unsigned n_env = 0;
char *x;
+ assert(u);
assert(c);
assert(ret);
- our_env = new0(char*, 12);
+ our_env = new0(char*, 14);
if (!our_env)
return -ENOMEM;
@@ -1408,7 +1613,7 @@ static int build_environment(
our_env[n_env++] = x;
}
- if (p->watchdog_usec > 0) {
+ if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
return -ENOMEM;
our_env[n_env++] = x;
@@ -1418,6 +1623,16 @@ static int build_environment(
our_env[n_env++] = x;
}
+ /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
+ * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
+ * check the database directly. */
+ if (unit_has_name(u, SPECIAL_DBUS_SERVICE)) {
+ x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
+ if (!x)
+ return -ENOMEM;
+ our_env[n_env++] = x;
+ }
+
if (home) {
x = strappend("HOME=", home);
if (!x)
@@ -1444,12 +1659,28 @@ static int build_environment(
our_env[n_env++] = x;
}
- if (is_terminal_input(c->std_input) ||
- c->std_output == EXEC_OUTPUT_TTY ||
- c->std_error == EXEC_OUTPUT_TTY ||
- c->tty_path) {
+ if (!sd_id128_is_null(u->invocation_id)) {
+ if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
+ return -ENOMEM;
+
+ our_env[n_env++] = x;
+ }
+
+ if (exec_context_needs_term(c)) {
+ const char *tty_path, *term = NULL;
+
+ tty_path = exec_context_tty_path(c);
+
+ /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
+ * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
+ * passes to PID 1 ends up all the way in the console login shown. */
- x = strdup(default_term_for_tty(exec_context_tty_path(c)));
+ if (path_equal(tty_path, "/dev/console") && getppid() == 1)
+ term = getenv("TERM");
+ if (!term)
+ term = default_term_for_tty(tty_path);
+
+ x = strappend("TERM=", term);
if (!x)
return -ENOMEM;
our_env[n_env++] = x;
@@ -1520,20 +1751,382 @@ static bool exec_needs_mount_namespace(
if (context->private_devices ||
context->protect_system != PROTECT_SYSTEM_NO ||
- context->protect_home != PROTECT_HOME_NO)
+ context->protect_home != PROTECT_HOME_NO ||
+ context->protect_kernel_tunables ||
+ context->protect_kernel_modules ||
+ context->protect_control_groups)
return true;
return false;
}
+static int setup_private_users(uid_t uid, gid_t gid) {
+ _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
+ _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
+ _cleanup_close_ int unshare_ready_fd = -1;
+ _cleanup_(sigkill_waitp) pid_t pid = 0;
+ uint64_t c = 1;
+ siginfo_t si;
+ ssize_t n;
+ int r;
+
+ /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
+ * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
+ * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
+ * which waits for the parent to create the new user namespace while staying in the original namespace. The
+ * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
+ * continues execution normally. */
+
+ if (uid != 0 && uid_is_valid(uid))
+ asprintf(&uid_map,
+ "0 0 1\n" /* Map root → root */
+ UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
+ uid, uid);
+ else
+ uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
+ if (!uid_map)
+ return -ENOMEM;
+
+ if (gid != 0 && gid_is_valid(gid))
+ asprintf(&gid_map,
+ "0 0 1\n" /* Map root → root */
+ GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
+ gid, gid);
+ else
+ gid_map = strdup("0 0 1\n"); /* The case where the above is the same */
+ if (!gid_map)
+ return -ENOMEM;
+
+ /* Create a communication channel so that the parent can tell the child when it finished creating the user
+ * namespace. */
+ unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
+ if (unshare_ready_fd < 0)
+ return -errno;
+
+ /* Create a communication channel so that the child can tell the parent a proper error code in case it
+ * failed. */
+ if (pipe2(errno_pipe, O_CLOEXEC) < 0)
+ return -errno;
+
+ pid = fork();
+ if (pid < 0)
+ return -errno;
+
+ if (pid == 0) {
+ _cleanup_close_ int fd = -1;
+ const char *a;
+ pid_t ppid;
+
+ /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
+ * here, after the parent opened its own user namespace. */
+
+ ppid = getppid();
+ errno_pipe[0] = safe_close(errno_pipe[0]);
+
+ /* Wait until the parent unshared the user namespace */
+ if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+
+ /* Disable the setgroups() system call in the child user namespace, for good. */
+ a = procfs_file_alloca(ppid, "setgroups");
+ fd = open(a, O_WRONLY|O_CLOEXEC);
+ if (fd < 0) {
+ if (errno != ENOENT) {
+ r = -errno;
+ goto child_fail;
+ }
+
+ /* If the file is missing the kernel is too old, let's continue anyway. */
+ } else {
+ if (write(fd, "deny\n", 5) < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+
+ fd = safe_close(fd);
+ }
+
+ /* First write the GID map */
+ a = procfs_file_alloca(ppid, "gid_map");
+ fd = open(a, O_WRONLY|O_CLOEXEC);
+ if (fd < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+ if (write(fd, gid_map, strlen(gid_map)) < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+ fd = safe_close(fd);
+
+ /* The write the UID map */
+ a = procfs_file_alloca(ppid, "uid_map");
+ fd = open(a, O_WRONLY|O_CLOEXEC);
+ if (fd < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+ if (write(fd, uid_map, strlen(uid_map)) < 0) {
+ r = -errno;
+ goto child_fail;
+ }
+
+ _exit(EXIT_SUCCESS);
+
+ child_fail:
+ (void) write(errno_pipe[1], &r, sizeof(r));
+ _exit(EXIT_FAILURE);
+ }
+
+ errno_pipe[1] = safe_close(errno_pipe[1]);
+
+ if (unshare(CLONE_NEWUSER) < 0)
+ return -errno;
+
+ /* Let the child know that the namespace is ready now */
+ if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
+ return -errno;
+
+ /* Try to read an error code from the child */
+ n = read(errno_pipe[0], &r, sizeof(r));
+ if (n < 0)
+ return -errno;
+ if (n == sizeof(r)) { /* an error code was sent to us */
+ if (r < 0)
+ return r;
+ return -EIO;
+ }
+ if (n != 0) /* on success we should have read 0 bytes */
+ return -EIO;
+
+ r = wait_for_terminate(pid, &si);
+ if (r < 0)
+ return r;
+ pid = 0;
+
+ /* If something strange happened with the child, let's consider this fatal, too */
+ if (si.si_code != CLD_EXITED || si.si_status != 0)
+ return -EIO;
+
+ return 0;
+}
+
+static int setup_runtime_directory(
+ const ExecContext *context,
+ const ExecParameters *params,
+ uid_t uid,
+ gid_t gid) {
+
+ char **rt;
+ int r;
+
+ assert(context);
+ assert(params);
+
+ STRV_FOREACH(rt, context->runtime_directory) {
+ _cleanup_free_ char *p;
+
+ p = strjoin(params->runtime_prefix, "/", *rt, NULL);
+ if (!p)
+ return -ENOMEM;
+
+ r = mkdir_p_label(p, context->runtime_directory_mode);
+ if (r < 0)
+ return r;
+
+ r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int setup_smack(
+ const ExecContext *context,
+ const ExecCommand *command) {
+
+#ifdef HAVE_SMACK
+ int r;
+
+ assert(context);
+ assert(command);
+
+ if (!mac_smack_use())
+ return 0;
+
+ if (context->smack_process_label) {
+ r = mac_smack_apply_pid(0, context->smack_process_label);
+ if (r < 0)
+ return r;
+ }
+#ifdef SMACK_DEFAULT_PROCESS_LABEL
+ else {
+ _cleanup_free_ char *exec_label = NULL;
+
+ r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
+ if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP)
+ return r;
+
+ r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
+ if (r < 0)
+ return r;
+ }
+#endif
+#endif
+
+ return 0;
+}
+
+static int compile_read_write_paths(
+ const ExecContext *context,
+ const ExecParameters *params,
+ char ***ret) {
+
+ _cleanup_strv_free_ char **l = NULL;
+ char **rt;
+
+ /* Compile the list of writable paths. This is the combination of the explicitly configured paths, plus all
+ * runtime directories. */
+
+ if (strv_isempty(context->read_write_paths) &&
+ strv_isempty(context->runtime_directory)) {
+ *ret = NULL; /* NOP if neither is set */
+ return 0;
+ }
+
+ l = strv_copy(context->read_write_paths);
+ if (!l)
+ return -ENOMEM;
+
+ STRV_FOREACH(rt, context->runtime_directory) {
+ char *s;
+
+ s = strjoin(params->runtime_prefix, "/", *rt, NULL);
+ if (!s)
+ return -ENOMEM;
+
+ if (strv_consume(&l, s) < 0)
+ return -ENOMEM;
+ }
+
+ *ret = l;
+ l = NULL;
+
+ return 0;
+}
+
+static int apply_mount_namespace(Unit *u, const ExecContext *context,
+ const ExecParameters *params,
+ ExecRuntime *runtime) {
+ int r;
+ _cleanup_free_ char **rw = NULL;
+ char *tmp = NULL, *var = NULL;
+ const char *root_dir = NULL;
+ NameSpaceInfo ns_info = {
+ .private_dev = context->private_devices,
+ .protect_control_groups = context->protect_control_groups,
+ .protect_kernel_tunables = context->protect_kernel_tunables,
+ .protect_kernel_modules = context->protect_kernel_modules,
+ };
+
+ assert(context);
+
+ /* The runtime struct only contains the parent of the private /tmp,
+ * which is non-accessible to world users. Inside of it there's a /tmp
+ * that is sticky, and that's the one we want to use here. */
+
+ if (context->private_tmp && runtime) {
+ if (runtime->tmp_dir)
+ tmp = strjoina(runtime->tmp_dir, "/tmp");
+ if (runtime->var_tmp_dir)
+ var = strjoina(runtime->var_tmp_dir, "/tmp");
+ }
+
+ r = compile_read_write_paths(context, params, &rw);
+ if (r < 0)
+ return r;
+
+ if (params->flags & EXEC_APPLY_CHROOT)
+ root_dir = context->root_directory;
+
+ r = setup_namespace(root_dir, &ns_info, rw,
+ context->read_only_paths,
+ context->inaccessible_paths,
+ tmp,
+ var,
+ context->protect_home,
+ context->protect_system,
+ context->mount_flags);
+
+ /* If we couldn't set up the namespace this is probably due to a
+ * missing capability. In this case, silently proceeed. */
+ if (IN_SET(r, -EPERM, -EACCES)) {
+ log_open();
+ log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
+ log_close();
+ r = 0;
+ }
+
+ return r;
+}
+
+static int apply_working_directory(const ExecContext *context,
+ const ExecParameters *params,
+ const char *home,
+ const bool needs_mount_ns) {
+ const char *d;
+ const char *wd;
+
+ assert(context);
+
+ if (context->working_directory_home)
+ wd = home;
+ else if (context->working_directory)
+ wd = context->working_directory;
+ else
+ wd = "/";
+
+ if (params->flags & EXEC_APPLY_CHROOT) {
+ if (!needs_mount_ns && context->root_directory)
+ if (chroot(context->root_directory) < 0)
+ return -errno;
+
+ d = wd;
+ } else
+ d = strjoina(strempty(context->root_directory), "/", strempty(wd));
+
+ if (chdir(d) < 0 && !context->working_directory_missing_ok)
+ return -errno;
+
+ return 0;
+}
+
+static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
+ assert(array);
+ assert(n);
+
+ if (!pair)
+ return;
+
+ if (pair[0] >= 0)
+ array[(*n)++] = pair[0];
+ if (pair[1] >= 0)
+ array[(*n)++] = pair[1];
+}
+
static int close_remaining_fds(
const ExecParameters *params,
ExecRuntime *runtime,
+ DynamicCreds *dcreds,
+ int user_lookup_fd,
int socket_fd,
int *fds, unsigned n_fds) {
unsigned n_dont_close = 0;
- int dont_close[n_fds + 7];
+ int dont_close[n_fds + 12];
assert(params);
@@ -1551,37 +2144,109 @@ static int close_remaining_fds(
n_dont_close += n_fds;
}
- if (runtime) {
- if (runtime->netns_storage_socket[0] >= 0)
- dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
- if (runtime->netns_storage_socket[1] >= 0)
- dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
+ if (runtime)
+ append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
+
+ if (dcreds) {
+ if (dcreds->user)
+ append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
+ if (dcreds->group)
+ append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
}
+ if (user_lookup_fd >= 0)
+ dont_close[n_dont_close++] = user_lookup_fd;
+
return close_all_fds(dont_close, n_dont_close);
}
+static bool context_has_address_families(const ExecContext *c) {
+ assert(c);
+
+ return c->address_families_whitelist ||
+ !set_isempty(c->address_families);
+}
+
+static bool context_has_syscall_filters(const ExecContext *c) {
+ assert(c);
+
+ return c->syscall_whitelist ||
+ !set_isempty(c->syscall_filter) ||
+ !set_isempty(c->syscall_archs);
+}
+
+static bool context_has_no_new_privileges(const ExecContext *c) {
+ assert(c);
+
+ if (c->no_new_privileges)
+ return true;
+
+ if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
+ return false;
+
+ return context_has_address_families(c) || /* we need NNP if we have any form of seccomp and are unprivileged */
+ c->memory_deny_write_execute ||
+ c->restrict_realtime ||
+ c->protect_kernel_tunables ||
+ c->protect_kernel_modules ||
+ c->private_devices ||
+ context_has_syscall_filters(c);
+}
+
+static int send_user_lookup(
+ Unit *unit,
+ int user_lookup_fd,
+ uid_t uid,
+ gid_t gid) {
+
+ assert(unit);
+
+ /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
+ * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
+ * specified. */
+
+ if (user_lookup_fd < 0)
+ return 0;
+
+ if (!uid_is_valid(uid) && !gid_is_valid(gid))
+ return 0;
+
+ if (writev(user_lookup_fd,
+ (struct iovec[]) {
+ { .iov_base = &uid, .iov_len = sizeof(uid) },
+ { .iov_base = &gid, .iov_len = sizeof(gid) },
+ { .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0)
+ return -errno;
+
+ return 0;
+}
+
static int exec_child(
Unit *unit,
ExecCommand *command,
const ExecContext *context,
const ExecParameters *params,
ExecRuntime *runtime,
+ DynamicCreds *dcreds,
char **argv,
int socket_fd,
+ int named_iofds[3],
int *fds, unsigned n_fds,
char **files_env,
+ int user_lookup_fd,
int *exit_status) {
_cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
_cleanup_free_ char *mac_selinux_context_net = NULL;
- const char *username = NULL, *home = NULL, *shell = NULL, *wd;
+ _cleanup_free_ gid_t *supplementary_gids = NULL;
+ const char *username = NULL, *groupname = NULL;
+ const char *home = NULL, *shell = NULL;
dev_t journal_stream_dev = 0;
ino_t journal_stream_ino = 0;
bool needs_mount_namespace;
uid_t uid = UID_INVALID;
gid_t gid = GID_INVALID;
- int i, r;
+ int i, r, ngids = 0;
assert(unit);
assert(command);
@@ -1617,7 +2282,7 @@ static int exec_child(
log_forget_fds();
- r = close_remaining_fds(params, runtime, socket_fd, fds, n_fds);
+ r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
if (r < 0) {
*exit_status = EXIT_FDS;
return r;
@@ -1631,7 +2296,7 @@ static int exec_child(
exec_context_tty_reset(context, params);
- if (params->confirm_spawn) {
+ if (params->flags & EXEC_CONFIRM_SPAWN) {
char response;
r = ask_for_confirmation(&response, argv);
@@ -1650,44 +2315,76 @@ static int exec_child(
}
}
- if (context->user) {
- username = context->user;
- r = get_user_creds(&username, &uid, &gid, &home, &shell);
+ if (context->dynamic_user && dcreds) {
+
+ /* Make sure we bypass our own NSS module for any NSS checks */
+ if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
+ *exit_status = EXIT_USER;
+ return -errno;
+ }
+
+ r = dynamic_creds_realize(dcreds, &uid, &gid);
if (r < 0) {
*exit_status = EXIT_USER;
return r;
}
- }
- if (context->group) {
- const char *g = context->group;
+ if (!uid_is_valid(uid) || !gid_is_valid(gid)) {
+ *exit_status = EXIT_USER;
+ return -ESRCH;
+ }
+
+ if (dcreds->user)
+ username = dcreds->user->name;
+
+ } else {
+ r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
+ if (r < 0) {
+ *exit_status = EXIT_USER;
+ return r;
+ }
- r = get_group_creds(&g, &gid);
+ r = get_fixed_group(context, &groupname, &gid);
if (r < 0) {
*exit_status = EXIT_GROUP;
return r;
}
}
+ /* Initialize user supplementary groups and get SupplementaryGroups= ones */
+ r = get_supplementary_groups(context, username, groupname, gid,
+ &supplementary_gids, &ngids);
+ if (r < 0) {
+ *exit_status = EXIT_GROUP;
+ return r;
+ }
+
+ r = send_user_lookup(unit, user_lookup_fd, uid, gid);
+ if (r < 0) {
+ *exit_status = EXIT_USER;
+ return r;
+ }
+
+ user_lookup_fd = safe_close(user_lookup_fd);
/* If a socket is connected to STDIN/STDOUT/STDERR, we
* must sure to drop O_NONBLOCK */
if (socket_fd >= 0)
(void) fd_nonblock(socket_fd, false);
- r = setup_input(context, params, socket_fd);
+ r = setup_input(context, params, socket_fd, named_iofds);
if (r < 0) {
*exit_status = EXIT_STDIN;
return r;
}
- r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
+ r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
if (r < 0) {
*exit_status = EXIT_STDOUT;
return r;
}
- r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
+ r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
if (r < 0) {
*exit_status = EXIT_STDERR;
return r;
@@ -1774,7 +2471,7 @@ static int exec_child(
USER_PROCESS,
username ? "root" : context->user);
- if (context->user && is_terminal_input(context->std_input)) {
+ if (context->user) {
r = chown_terminal(STDIN_FILENO, uid);
if (r < 0) {
*exit_status = EXIT_STDIN;
@@ -1801,32 +2498,15 @@ static int exec_child(
}
if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
- char **rt;
-
- STRV_FOREACH(rt, context->runtime_directory) {
- _cleanup_free_ char *p;
-
- p = strjoin(params->runtime_prefix, "/", *rt, NULL);
- if (!p) {
- *exit_status = EXIT_RUNTIME_DIRECTORY;
- return -ENOMEM;
- }
-
- r = mkdir_p_label(p, context->runtime_directory_mode);
- if (r < 0) {
- *exit_status = EXIT_RUNTIME_DIRECTORY;
- return r;
- }
-
- r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid);
- if (r < 0) {
- *exit_status = EXIT_RUNTIME_DIRECTORY;
- return r;
- }
+ r = setup_runtime_directory(context, params, uid, gid);
+ if (r < 0) {
+ *exit_status = EXIT_RUNTIME_DIRECTORY;
+ return r;
}
}
r = build_environment(
+ unit,
context,
params,
n_fds,
@@ -1860,49 +2540,16 @@ static int exec_child(
}
accum_env = strv_env_clean(accum_env);
- umask(context->umask);
+ (void) umask(context->umask);
- if (params->apply_permissions && !command->privileged) {
- r = enforce_groups(context, username, gid);
- if (r < 0) {
- *exit_status = EXIT_GROUP;
- return r;
- }
-#ifdef HAVE_SMACK
- if (context->smack_process_label) {
- r = mac_smack_apply_pid(0, context->smack_process_label);
- if (r < 0) {
- *exit_status = EXIT_SMACK_PROCESS_LABEL;
- return r;
- }
- }
-#ifdef SMACK_DEFAULT_PROCESS_LABEL
- else {
- _cleanup_free_ char *exec_label = NULL;
-
- r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
- if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP) {
- *exit_status = EXIT_SMACK_PROCESS_LABEL;
- return r;
- }
-
- r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
- if (r < 0) {
- *exit_status = EXIT_SMACK_PROCESS_LABEL;
- return r;
- }
- }
-#endif
-#endif
-#ifdef HAVE_PAM
+ if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
if (context->pam_name && username) {
- r = setup_pam(context->pam_name, username, uid, context->tty_path, &accum_env, fds, n_fds);
+ r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
if (r < 0) {
*exit_status = EXIT_PAM;
return r;
}
}
-#endif
}
if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
@@ -1914,80 +2561,37 @@ static int exec_child(
}
needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
-
if (needs_mount_namespace) {
- char *tmp = NULL, *var = NULL;
-
- /* The runtime struct only contains the parent
- * of the private /tmp, which is
- * non-accessible to world users. Inside of it
- * there's a /tmp that is sticky, and that's
- * the one we want to use here. */
-
- if (context->private_tmp && runtime) {
- if (runtime->tmp_dir)
- tmp = strjoina(runtime->tmp_dir, "/tmp");
- if (runtime->var_tmp_dir)
- var = strjoina(runtime->var_tmp_dir, "/tmp");
- }
-
- r = setup_namespace(
- params->apply_chroot ? context->root_directory : NULL,
- context->read_write_paths,
- context->read_only_paths,
- context->inaccessible_paths,
- tmp,
- var,
- context->private_devices,
- context->protect_home,
- context->protect_system,
- context->mount_flags);
-
- /* If we couldn't set up the namespace this is
- * probably due to a missing capability. In this case,
- * silently proceeed. */
- if (r == -EPERM || r == -EACCES) {
- log_open();
- log_unit_debug_errno(unit, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
- log_close();
- } else if (r < 0) {
+ r = apply_mount_namespace(unit, context, params, runtime);
+ if (r < 0) {
*exit_status = EXIT_NAMESPACE;
return r;
}
}
- if (context->working_directory_home)
- wd = home;
- else if (context->working_directory)
- wd = context->working_directory;
- else
- wd = "/";
-
- if (params->apply_chroot) {
- if (!needs_mount_namespace && context->root_directory)
- if (chroot(context->root_directory) < 0) {
- *exit_status = EXIT_CHROOT;
- return -errno;
- }
-
- if (chdir(wd) < 0 &&
- !context->working_directory_missing_ok) {
- *exit_status = EXIT_CHDIR;
- return -errno;
- }
- } else {
- const char *d;
+ /* Apply just after mount namespace setup */
+ r = apply_working_directory(context, params, home, needs_mount_namespace);
+ if (r < 0) {
+ *exit_status = EXIT_CHROOT;
+ return r;
+ }
- d = strjoina(strempty(context->root_directory), "/", strempty(wd));
- if (chdir(d) < 0 &&
- !context->working_directory_missing_ok) {
- *exit_status = EXIT_CHDIR;
- return -errno;
+ /* Drop groups as early as possbile */
+ if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
+ r = enforce_groups(context, gid, supplementary_gids, ngids);
+ if (r < 0) {
+ *exit_status = EXIT_GROUP;
+ return r;
}
}
#ifdef HAVE_SELINUX
- if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0 && !command->privileged) {
+ if ((params->flags & EXEC_APPLY_PERMISSIONS) &&
+ mac_selinux_use() &&
+ params->selinux_context_net &&
+ socket_fd >= 0 &&
+ !command->privileged) {
+
r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
if (r < 0) {
*exit_status = EXIT_SELINUX_CONTEXT;
@@ -1996,6 +2600,14 @@ static int exec_child(
}
#endif
+ if ((params->flags & EXEC_APPLY_PERMISSIONS) && context->private_users) {
+ r = setup_private_users(uid, gid);
+ if (r < 0) {
+ *exit_status = EXIT_USER;
+ return r;
+ }
+ }
+
/* We repeat the fd closing here, to make sure that
* nothing is leaked from the PAM modules. Note that
* we are more aggressive this time since socket_fd
@@ -2012,13 +2624,8 @@ static int exec_child(
return r;
}
- if (params->apply_permissions && !command->privileged) {
+ if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
- bool use_address_families = context->address_families_whitelist ||
- !set_isempty(context->address_families);
- bool use_syscall_filter = context->syscall_whitelist ||
- !set_isempty(context->syscall_filter) ||
- !set_isempty(context->syscall_archs);
int secure_bits = context->secure_bits;
for (i = 0; i < _RLIMIT_MAX; i++) {
@@ -2085,6 +2692,41 @@ static int exec_child(
}
}
+ /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
+ * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
+ * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
+ * are restricted. */
+
+#ifdef HAVE_SELINUX
+ if (mac_selinux_use()) {
+ char *exec_context = mac_selinux_context_net ?: context->selinux_context;
+
+ if (exec_context) {
+ r = setexeccon(exec_context);
+ if (r < 0) {
+ *exit_status = EXIT_SELINUX_CONTEXT;
+ return r;
+ }
+ }
+ }
+#endif
+
+ r = setup_smack(context, command);
+ if (r < 0) {
+ *exit_status = EXIT_SMACK_PROCESS_LABEL;
+ return r;
+ }
+
+#ifdef HAVE_APPARMOR
+ if (context->apparmor_profile && mac_apparmor_use()) {
+ r = aa_change_onexec(context->apparmor_profile);
+ if (r < 0 && !context->apparmor_profile_ignore) {
+ *exit_status = EXIT_APPARMOR_PROFILE;
+ return -errno;
+ }
+ }
+#endif
+
/* PR_GET_SECUREBITS is not privileged, while
* PR_SET_SECUREBITS is. So to suppress
* potential EPERMs we'll try not to call
@@ -2095,16 +2737,15 @@ static int exec_child(
return -errno;
}
- if (context->no_new_privileges ||
- (!have_effective_cap(CAP_SYS_ADMIN) && (use_address_families || context->memory_deny_write_execute || context->restrict_realtime || use_syscall_filter)))
+ if (context_has_no_new_privileges(context))
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
*exit_status = EXIT_NO_NEW_PRIVILEGES;
return -errno;
}
#ifdef HAVE_SECCOMP
- if (use_address_families) {
- r = apply_address_families(context);
+ if (context_has_address_families(context)) {
+ r = apply_address_families(unit, context);
if (r < 0) {
*exit_status = EXIT_ADDRESS_FAMILIES;
return r;
@@ -2112,7 +2753,7 @@ static int exec_child(
}
if (context->memory_deny_write_execute) {
- r = apply_memory_deny_write_execute(context);
+ r = apply_memory_deny_write_execute(unit, context);
if (r < 0) {
*exit_status = EXIT_SECCOMP;
return r;
@@ -2120,42 +2761,44 @@ static int exec_child(
}
if (context->restrict_realtime) {
- r = apply_restrict_realtime(context);
+ r = apply_restrict_realtime(unit, context);
if (r < 0) {
*exit_status = EXIT_SECCOMP;
return r;
}
}
- if (use_syscall_filter) {
- r = apply_seccomp(context);
+ if (context->protect_kernel_tunables) {
+ r = apply_protect_sysctl(unit, context);
if (r < 0) {
*exit_status = EXIT_SECCOMP;
return r;
}
}
-#endif
-#ifdef HAVE_SELINUX
- if (mac_selinux_use()) {
- char *exec_context = mac_selinux_context_net ?: context->selinux_context;
+ if (context->protect_kernel_modules) {
+ r = apply_protect_kernel_modules(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return r;
+ }
+ }
- if (exec_context) {
- r = setexeccon(exec_context);
- if (r < 0) {
- *exit_status = EXIT_SELINUX_CONTEXT;
- return r;
- }
+ if (context->private_devices) {
+ r = apply_private_devices(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return r;
}
}
-#endif
-#ifdef HAVE_APPARMOR
- if (context->apparmor_profile && mac_apparmor_use()) {
- r = aa_change_onexec(context->apparmor_profile);
- if (r < 0 && !context->apparmor_profile_ignore) {
- *exit_status = EXIT_APPARMOR_PROFILE;
- return -errno;
+ /* This really should remain the last step before the execve(), to make sure our own code is unaffected
+ * by the filter as little as possible. */
+ if (context_has_syscall_filters(context)) {
+ r = apply_seccomp(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return r;
}
}
#endif
@@ -2192,12 +2835,14 @@ int exec_spawn(Unit *unit,
const ExecContext *context,
const ExecParameters *params,
ExecRuntime *runtime,
+ DynamicCreds *dcreds,
pid_t *ret) {
_cleanup_strv_free_ char **files_env = NULL;
int *fds = NULL; unsigned n_fds = 0;
_cleanup_free_ char *line = NULL;
int socket_fd, r;
+ int named_iofds[3] = { -1, -1, -1 };
char **argv;
pid_t pid;
@@ -2224,6 +2869,10 @@ int exec_spawn(Unit *unit,
n_fds = params->n_fds;
}
+ r = exec_context_named_iofds(unit, context, params, named_iofds);
+ if (r < 0)
+ return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
+
r = exec_context_load_environment(unit, context, &files_env);
if (r < 0)
return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
@@ -2250,10 +2899,13 @@ int exec_spawn(Unit *unit,
context,
params,
runtime,
+ dcreds,
argv,
socket_fd,
+ named_iofds,
fds, n_fds,
files_env,
+ unit->manager->user_lookup_fds[1],
&exit_status);
if (r < 0) {
log_open();
@@ -2313,6 +2965,9 @@ void exec_context_done(ExecContext *c) {
for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
c->rlimit[l] = mfree(c->rlimit[l]);
+ for (l = 0; l < 3; l++)
+ c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
+
c->working_directory = mfree(c->working_directory);
c->root_directory = mfree(c->root_directory);
c->tty_path = mfree(c->tty_path);
@@ -2411,6 +3066,56 @@ static void invalid_env(const char *p, void *userdata) {
log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
}
+const char* exec_context_fdname(const ExecContext *c, int fd_index) {
+ assert(c);
+
+ switch (fd_index) {
+ case STDIN_FILENO:
+ if (c->std_input != EXEC_INPUT_NAMED_FD)
+ return NULL;
+ return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
+ case STDOUT_FILENO:
+ if (c->std_output != EXEC_OUTPUT_NAMED_FD)
+ return NULL;
+ return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
+ case STDERR_FILENO:
+ if (c->std_error != EXEC_OUTPUT_NAMED_FD)
+ return NULL;
+ return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
+ default:
+ return NULL;
+ }
+}
+
+int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
+ unsigned i, targets;
+ const char *stdio_fdname[3];
+
+ assert(c);
+ assert(p);
+
+ targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
+ (c->std_output == EXEC_OUTPUT_NAMED_FD) +
+ (c->std_error == EXEC_OUTPUT_NAMED_FD);
+
+ for (i = 0; i < 3; i++)
+ stdio_fdname[i] = exec_context_fdname(c, i);
+
+ for (i = 0; i < p->n_fds && targets > 0; i++)
+ if (named_iofds[STDIN_FILENO] < 0 && c->std_input == EXEC_INPUT_NAMED_FD && stdio_fdname[STDIN_FILENO] && streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
+ named_iofds[STDIN_FILENO] = p->fds[i];
+ targets--;
+ } else if (named_iofds[STDOUT_FILENO] < 0 && c->std_output == EXEC_OUTPUT_NAMED_FD && stdio_fdname[STDOUT_FILENO] && streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
+ named_iofds[STDOUT_FILENO] = p->fds[i];
+ targets--;
+ } else if (named_iofds[STDERR_FILENO] < 0 && c->std_error == EXEC_OUTPUT_NAMED_FD && stdio_fdname[STDERR_FILENO] && streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
+ named_iofds[STDERR_FILENO] = p->fds[i];
+ targets--;
+ }
+
+ return (targets == 0 ? 0 : -ENOENT);
+}
+
int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
char **i, **r = NULL;
@@ -2555,8 +3260,12 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
"%sRootDirectory: %s\n"
"%sNonBlocking: %s\n"
"%sPrivateTmp: %s\n"
- "%sPrivateNetwork: %s\n"
"%sPrivateDevices: %s\n"
+ "%sProtectKernelTunables: %s\n"
+ "%sProtectKernelModules: %s\n"
+ "%sProtectControlGroups: %s\n"
+ "%sPrivateNetwork: %s\n"
+ "%sPrivateUsers: %s\n"
"%sProtectHome: %s\n"
"%sProtectSystem: %s\n"
"%sIgnoreSIGPIPE: %s\n"
@@ -2567,8 +3276,12 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
prefix, c->root_directory ? c->root_directory : "/",
prefix, yes_no(c->non_blocking),
prefix, yes_no(c->private_tmp),
- prefix, yes_no(c->private_network),
prefix, yes_no(c->private_devices),
+ prefix, yes_no(c->protect_kernel_tunables),
+ prefix, yes_no(c->protect_kernel_modules),
+ prefix, yes_no(c->protect_control_groups),
+ prefix, yes_no(c->private_network),
+ prefix, yes_no(c->private_users),
prefix, protect_home_to_string(c->protect_home),
prefix, protect_system_to_string(c->protect_system),
prefix, yes_no(c->ignore_sigpipe),
@@ -2723,6 +3436,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
if (c->group)
fprintf(f, "%sGroup: %s\n", prefix, c->group);
+ fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
+
if (strv_length(c->supplementary_groups) > 0) {
fprintf(f, "%sSupplementaryGroups:", prefix);
strv_fprintf(f, c->supplementary_groups);
@@ -2882,12 +3597,12 @@ void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
"%sPID: "PID_FMT"\n",
prefix, s->pid);
- if (s->start_timestamp.realtime > 0)
+ if (dual_timestamp_is_set(&s->start_timestamp))
fprintf(f,
"%sStart Timestamp: %s\n",
prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
- if (s->exit_timestamp.realtime > 0)
+ if (dual_timestamp_is_set(&s->exit_timestamp))
fprintf(f,
"%sExit Timestamp: %s\n"
"%sExit Code: %s\n"
@@ -2908,7 +3623,8 @@ char *exec_command_line(char **argv) {
STRV_FOREACH(a, argv)
k += strlen(*a)+3;
- if (!(n = new(char, k)))
+ n = new(char, k);
+ if (!n)
return NULL;
p = n;
@@ -3097,9 +3813,7 @@ ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
free(r->tmp_dir);
free(r->var_tmp_dir);
safe_close_pair(r->netns_storage_socket);
- free(r);
-
- return NULL;
+ return mfree(r);
}
int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
@@ -3255,7 +3969,8 @@ static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
[EXEC_INPUT_TTY] = "tty",
[EXEC_INPUT_TTY_FORCE] = "tty-force",
[EXEC_INPUT_TTY_FAIL] = "tty-fail",
- [EXEC_INPUT_SOCKET] = "socket"
+ [EXEC_INPUT_SOCKET] = "socket",
+ [EXEC_INPUT_NAMED_FD] = "fd",
};
DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
@@ -3270,7 +3985,8 @@ static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
[EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
[EXEC_OUTPUT_JOURNAL] = "journal",
[EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
- [EXEC_OUTPUT_SOCKET] = "socket"
+ [EXEC_OUTPUT_SOCKET] = "socket",
+ [EXEC_OUTPUT_NAMED_FD] = "fd",
};
DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
diff --git a/src/grp-system/libcore/src/job.c b/src/grp-system/libcore/src/job.c
index 5c28ea1bd6..69f5ae5cfc 100644
--- a/src/grp-system/libcore/src/job.c
+++ b/src/grp-system/libcore/src/job.c
@@ -691,16 +691,16 @@ _pure_ static const char *job_get_status_message_format(Unit *u, JobType t, JobR
}
static void job_print_status_message(Unit *u, JobType t, JobResult result) {
- static struct {
+ static const struct {
const char *color, *word;
} const statuses[_JOB_RESULT_MAX] = {
- [JOB_DONE] = {ANSI_GREEN, " OK "},
- [JOB_TIMEOUT] = {ANSI_HIGHLIGHT_RED, " TIME "},
- [JOB_FAILED] = {ANSI_HIGHLIGHT_RED, "FAILED"},
- [JOB_DEPENDENCY] = {ANSI_HIGHLIGHT_YELLOW, "DEPEND"},
- [JOB_SKIPPED] = {ANSI_HIGHLIGHT, " INFO "},
- [JOB_ASSERT] = {ANSI_HIGHLIGHT_YELLOW, "ASSERT"},
- [JOB_UNSUPPORTED] = {ANSI_HIGHLIGHT_YELLOW, "UNSUPP"},
+ [JOB_DONE] = { ANSI_GREEN, " OK " },
+ [JOB_TIMEOUT] = { ANSI_HIGHLIGHT_RED, " TIME " },
+ [JOB_FAILED] = { ANSI_HIGHLIGHT_RED, "FAILED" },
+ [JOB_DEPENDENCY] = { ANSI_HIGHLIGHT_YELLOW, "DEPEND" },
+ [JOB_SKIPPED] = { ANSI_HIGHLIGHT, " INFO " },
+ [JOB_ASSERT] = { ANSI_HIGHLIGHT_YELLOW, "ASSERT" },
+ [JOB_UNSUPPORTED] = { ANSI_HIGHLIGHT_YELLOW, "UNSUPP" },
};
const char *format;
@@ -768,8 +768,9 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) {
if (!format)
return;
+ /* The description might be longer than the buffer, but that's OK, we'll just truncate it here */
DISABLE_WARNING_FORMAT_NONLITERAL;
- xsprintf(buf, format, unit_description(u));
+ snprintf(buf, sizeof(buf), format, unit_description(u));
REENABLE_WARNING;
switch (t) {
@@ -928,7 +929,7 @@ static int job_dispatch_timer(sd_event_source *s, uint64_t monotonic, void *user
u = j->unit;
job_finish_and_invalidate(j, JOB_TIMEOUT, true, false);
- failure_action(u->manager, u->job_timeout_action, u->job_timeout_reboot_arg);
+ emergency_action(u->manager, u->job_timeout_action, u->job_timeout_reboot_arg, "job timed out");
return 0;
}
@@ -998,7 +999,10 @@ char *job_dbus_path(Job *j) {
return p;
}
-int job_serialize(Job *j, FILE *f, FDSet *fds) {
+int job_serialize(Job *j, FILE *f) {
+ assert(j);
+ assert(f);
+
fprintf(f, "job-id=%u\n", j->id);
fprintf(f, "job-type=%s\n", job_type_to_string(j->type));
fprintf(f, "job-state=%s\n", job_state_to_string(j->state));
@@ -1009,15 +1013,16 @@ int job_serialize(Job *j, FILE *f, FDSet *fds) {
if (j->begin_usec > 0)
fprintf(f, "job-begin="USEC_FMT"\n", j->begin_usec);
- bus_track_serialize(j->clients, f);
+ bus_track_serialize(j->clients, f, "subscribed");
/* End marker */
fputc('\n', f);
return 0;
}
-int job_deserialize(Job *j, FILE *f, FDSet *fds) {
+int job_deserialize(Job *j, FILE *f) {
assert(j);
+ assert(f);
for (;;) {
char line[LINE_MAX], *l, *v;
@@ -1107,7 +1112,7 @@ int job_deserialize(Job *j, FILE *f, FDSet *fds) {
} else if (streq(l, "subscribed")) {
if (strv_extend(&j->deserialized_clients, v) < 0)
- return log_oom();
+ log_oom();
}
}
}
@@ -1119,9 +1124,8 @@ int job_coldplug(Job *j) {
/* After deserialization is complete and the bus connection
* set up again, let's start watching our subscribers again */
- r = bus_track_coldplug(j->manager, &j->clients, &j->deserialized_clients);
- if (r < 0)
- return r;
+ (void) bus_track_coldplug(j->manager, &j->clients, false, j->deserialized_clients);
+ j->deserialized_clients = strv_free(j->deserialized_clients);
if (j->state == JOB_WAITING)
job_add_to_run_queue(j);
diff --git a/src/grp-system/libcore/src/load-fragment-gperf.gperf.m4 b/src/grp-system/libcore/src/load-fragment-gperf.gperf.m4
index 4658fe71b8..11b5dd5dc2 100644
--- a/src/grp-system/libcore/src/load-fragment-gperf.gperf.m4
+++ b/src/grp-system/libcore/src/load-fragment-gperf.gperf.m4
@@ -20,9 +20,9 @@ m4_dnl Define the context options only once
m4_define(`EXEC_CONTEXT_CONFIG_ITEMS',
`$1.WorkingDirectory, config_parse_working_directory, 0, offsetof($1, exec_context)
$1.RootDirectory, config_parse_unit_path_printf, 0, offsetof($1, exec_context.root_directory)
-$1.User, config_parse_unit_string_printf, 0, offsetof($1, exec_context.user)
-$1.Group, config_parse_unit_string_printf, 0, offsetof($1, exec_context.group)
-$1.SupplementaryGroups, config_parse_strv, 0, offsetof($1, exec_context.supplementary_groups)
+$1.User, config_parse_user_group, 0, offsetof($1, exec_context.user)
+$1.Group, config_parse_user_group, 0, offsetof($1, exec_context.group)
+$1.SupplementaryGroups, config_parse_user_group_strv, 0, offsetof($1, exec_context.supplementary_groups)
$1.Nice, config_parse_exec_nice, 0, offsetof($1, exec_context)
$1.OOMScoreAdjust, config_parse_exec_oom_score_adjust, 0, offsetof($1, exec_context)
$1.IOSchedulingClass, config_parse_exec_io_class, 0, offsetof($1, exec_context)
@@ -35,9 +35,10 @@ $1.UMask, config_parse_mode, 0,
$1.Environment, config_parse_environ, 0, offsetof($1, exec_context.environment)
$1.EnvironmentFile, config_parse_unit_env_file, 0, offsetof($1, exec_context.environment_files)
$1.PassEnvironment, config_parse_pass_environ, 0, offsetof($1, exec_context.pass_environment)
-$1.StandardInput, config_parse_input, 0, offsetof($1, exec_context.std_input)
-$1.StandardOutput, config_parse_output, 0, offsetof($1, exec_context.std_output)
-$1.StandardError, config_parse_output, 0, offsetof($1, exec_context.std_error)
+$1.DynamicUser, config_parse_bool, 0, offsetof($1, exec_context.dynamic_user)
+$1.StandardInput, config_parse_exec_input, 0, offsetof($1, exec_context)
+$1.StandardOutput, config_parse_exec_output, 0, offsetof($1, exec_context)
+$1.StandardError, config_parse_exec_output, 0, offsetof($1, exec_context)
$1.TTYPath, config_parse_unit_path_printf, 0, offsetof($1, exec_context.tty_path)
$1.TTYReset, config_parse_bool, 0, offsetof($1, exec_context.tty_reset)
$1.TTYVHangup, config_parse_bool, 0, offsetof($1, exec_context.tty_vhangup)
@@ -88,8 +89,12 @@ $1.ReadWritePaths, config_parse_namespace_path_strv, 0,
$1.ReadOnlyPaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_paths)
$1.InaccessiblePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths)
$1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp)
-$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
$1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices)
+$1.ProtectKernelTunables, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_tunables)
+$1.ProtectKernelModules, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_modules)
+$1.ProtectControlGroups, config_parse_bool, 0, offsetof($1, exec_context.protect_control_groups)
+$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
+$1.PrivateUsers, config_parse_bool, 0, offsetof($1, exec_context.private_users)
$1.ProtectSystem, config_parse_protect_system, 0, offsetof($1, exec_context)
$1.ProtectHome, config_parse_protect_home, 0, offsetof($1, exec_context)
$1.MountFlags, config_parse_exec_mount_flags, 0, offsetof($1, exec_context)
@@ -121,6 +126,8 @@ $1.KillSignal, config_parse_signal, 0,
m4_define(`CGROUP_CONTEXT_CONFIG_ITEMS',
`$1.Slice, config_parse_unit_slice, 0, 0
$1.CPUAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.cpu_accounting)
+$1.CPUWeight, config_parse_cpu_weight, 0, offsetof($1, cgroup_context.cpu_weight)
+$1.StartupCPUWeight, config_parse_cpu_weight, 0, offsetof($1, cgroup_context.startup_cpu_weight)
$1.CPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.cpu_shares)
$1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares)
$1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context)
@@ -128,6 +135,7 @@ $1.MemoryAccounting, config_parse_bool, 0,
$1.MemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.MemoryHigh, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.MemoryMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
+$1.MemorySwapMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.MemoryLimit, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.DeviceAllow, config_parse_device_allow, 0, offsetof($1, cgroup_context)
$1.DevicePolicy, config_parse_device_policy, 0, offsetof($1, cgroup_context.device_policy)
@@ -181,13 +189,13 @@ Unit.OnFailureIsolate, config_parse_job_mode_isolate, 0,
Unit.IgnoreOnIsolate, config_parse_bool, 0, offsetof(Unit, ignore_on_isolate)
Unit.IgnoreOnSnapshot, config_parse_warn_compat, DISABLED_LEGACY, 0
Unit.JobTimeoutSec, config_parse_sec_fix_0, 0, offsetof(Unit, job_timeout)
-Unit.JobTimeoutAction, config_parse_failure_action, 0, offsetof(Unit, job_timeout_action)
+Unit.JobTimeoutAction, config_parse_emergency_action, 0, offsetof(Unit, job_timeout_action)
Unit.JobTimeoutRebootArgument, config_parse_string, 0, offsetof(Unit, job_timeout_reboot_arg)
Unit.StartLimitIntervalSec, config_parse_sec, 0, offsetof(Unit, start_limit.interval)
m4_dnl The following is a legacy alias name for compatibility
Unit.StartLimitInterval, config_parse_sec, 0, offsetof(Unit, start_limit.interval)
Unit.StartLimitBurst, config_parse_unsigned, 0, offsetof(Unit, start_limit.burst)
-Unit.StartLimitAction, config_parse_failure_action, 0, offsetof(Unit, start_limit_action)
+Unit.StartLimitAction, config_parse_emergency_action, 0, offsetof(Unit, start_limit_action)
Unit.RebootArgument, config_parse_string, 0, offsetof(Unit, reboot_arg)
Unit.ConditionPathExists, config_parse_unit_condition_path, CONDITION_PATH_EXISTS, offsetof(Unit, conditions)
Unit.ConditionPathExistsGlob, config_parse_unit_condition_path, CONDITION_PATH_EXISTS_GLOB, offsetof(Unit, conditions)
@@ -244,9 +252,9 @@ Service.WatchdogSec, config_parse_sec, 0,
m4_dnl The following three only exist for compatibility, they moved into Unit, see above
Service.StartLimitInterval, config_parse_sec, 0, offsetof(Unit, start_limit.interval)
Service.StartLimitBurst, config_parse_unsigned, 0, offsetof(Unit, start_limit.burst)
-Service.StartLimitAction, config_parse_failure_action, 0, offsetof(Unit, start_limit_action)
+Service.StartLimitAction, config_parse_emergency_action, 0, offsetof(Unit, start_limit_action)
Service.RebootArgument, config_parse_string, 0, offsetof(Unit, reboot_arg)
-Service.FailureAction, config_parse_failure_action, 0, offsetof(Service, failure_action)
+Service.FailureAction, config_parse_emergency_action, 0, offsetof(Service, emergency_action)
Service.Type, config_parse_service_type, 0, offsetof(Service, type)
Service.Restart, config_parse_service_restart, 0, offsetof(Service, restart)
Service.PermissionsStartOnly, config_parse_bool, 0, offsetof(Service, permissions_start_only)
@@ -286,13 +294,14 @@ Socket.ExecStartPost, config_parse_exec, SOCKET_EXEC
Socket.ExecStopPre, config_parse_exec, SOCKET_EXEC_STOP_PRE, offsetof(Socket, exec_command)
Socket.ExecStopPost, config_parse_exec, SOCKET_EXEC_STOP_POST, offsetof(Socket, exec_command)
Socket.TimeoutSec, config_parse_sec, 0, offsetof(Socket, timeout_usec)
-Socket.SocketUser, config_parse_unit_string_printf, 0, offsetof(Socket, user)
-Socket.SocketGroup, config_parse_unit_string_printf, 0, offsetof(Socket, group)
+Socket.SocketUser, config_parse_user_group, 0, offsetof(Socket, user)
+Socket.SocketGroup, config_parse_user_group, 0, offsetof(Socket, group)
Socket.SocketMode, config_parse_mode, 0, offsetof(Socket, socket_mode)
Socket.DirectoryMode, config_parse_mode, 0, offsetof(Socket, directory_mode)
Socket.Accept, config_parse_bool, 0, offsetof(Socket, accept)
Socket.Writable, config_parse_bool, 0, offsetof(Socket, writable)
Socket.MaxConnections, config_parse_unsigned, 0, offsetof(Socket, max_connections)
+Socket.MaxConnectionsPerSource, config_parse_unsigned, 0, offsetof(Socket, max_connections_per_source)
Socket.KeepAlive, config_parse_bool, 0, offsetof(Socket, keep_alive)
Socket.KeepAliveTimeSec, config_parse_sec, 0, offsetof(Socket, keep_alive_time)
Socket.KeepAliveIntervalSec, config_parse_sec, 0, offsetof(Socket, keep_alive_interval)
@@ -351,6 +360,8 @@ Mount.Type, config_parse_string, 0,
Mount.TimeoutSec, config_parse_sec, 0, offsetof(Mount, timeout_usec)
Mount.DirectoryMode, config_parse_mode, 0, offsetof(Mount, directory_mode)
Mount.SloppyOptions, config_parse_bool, 0, offsetof(Mount, sloppy_options)
+Mount.LazyUnmount, config_parse_bool, 0, offsetof(Mount, lazy_unmount)
+Mount.ForceUnmount, config_parse_bool, 0, offsetof(Mount, force_unmount)
EXEC_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
CGROUP_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
KILL_CONTEXT_CONFIG_ITEMS(Mount)m4_dnl
diff --git a/src/grp-system/libcore/src/load-fragment.c b/src/grp-system/libcore/src/load-fragment.c
index 53f3746f4d..927615360e 100644
--- a/src/grp-system/libcore/src/load-fragment.c
+++ b/src/grp-system/libcore/src/load-fragment.c
@@ -64,6 +64,7 @@
#include "systemd-basic/string-util.h"
#include "systemd-basic/strv.h"
#include "systemd-basic/unit-name.h"
+#include "systemd-basic/user-util.h"
#include "systemd-basic/utf8.h"
#include "systemd-basic/web-util.h"
@@ -492,16 +493,17 @@ int config_parse_socket_bind(const char *unit,
return 0;
}
-int config_parse_exec_nice(const char *unit,
- const char *filename,
- unsigned line,
- const char *section,
- unsigned section_line,
- const char *lvalue,
- int ltype,
- const char *rvalue,
- void *data,
- void *userdata) {
+int config_parse_exec_nice(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
ExecContext *c = data;
int priority, r;
@@ -511,14 +513,13 @@ int config_parse_exec_nice(const char *unit,
assert(rvalue);
assert(data);
- r = safe_atoi(rvalue, &priority);
+ r = parse_nice(rvalue, &priority);
if (r < 0) {
- log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse nice priority, ignoring: %s", rvalue);
- return 0;
- }
+ if (r == -ERANGE)
+ log_syntax(unit, LOG_ERR, filename, line, r, "Nice priority out of range, ignoring: %s", rvalue);
+ else
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse nice priority, ignoring: %s", rvalue);
- if (priority < PRIO_MIN || priority >= PRIO_MAX) {
- log_syntax(unit, LOG_ERR, filename, line, 0, "Nice priority out of range, ignoring: %s", rvalue);
return 0;
}
@@ -777,8 +778,104 @@ int config_parse_socket_bindtodevice(
return 0;
}
-DEFINE_CONFIG_PARSE_ENUM(config_parse_output, exec_output, ExecOutput, "Failed to parse output specifier");
-DEFINE_CONFIG_PARSE_ENUM(config_parse_input, exec_input, ExecInput, "Failed to parse input specifier");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_input, exec_input, ExecInput, "Failed to parse input literal specifier");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_output, exec_output, ExecOutput, "Failed to parse output literal specifier");
+
+int config_parse_exec_input(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ ExecContext *c = data;
+ const char *name;
+ int r;
+
+ assert(data);
+ assert(filename);
+ assert(line);
+ assert(rvalue);
+
+ name = startswith(rvalue, "fd:");
+ if (name) {
+ /* Strip prefix and validate fd name */
+ if (!fdname_is_valid(name)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid file descriptor name, ignoring: %s", name);
+ return 0;
+ }
+ c->std_input = EXEC_INPUT_NAMED_FD;
+ r = free_and_strdup(&c->stdio_fdname[STDIN_FILENO], name);
+ if (r < 0)
+ log_oom();
+ return r;
+ } else {
+ ExecInput ei = exec_input_from_string(rvalue);
+ if (ei == _EXEC_INPUT_INVALID)
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse input specifier, ignoring: %s", rvalue);
+ else
+ c->std_input = ei;
+ return 0;
+ }
+}
+
+int config_parse_exec_output(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ ExecContext *c = data;
+ ExecOutput eo;
+ const char *name;
+ int r;
+
+ assert(data);
+ assert(filename);
+ assert(line);
+ assert(lvalue);
+ assert(rvalue);
+
+ name = startswith(rvalue, "fd:");
+ if (name) {
+ /* Strip prefix and validate fd name */
+ if (!fdname_is_valid(name)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid file descriptor name, ignoring: %s", name);
+ return 0;
+ }
+ eo = EXEC_OUTPUT_NAMED_FD;
+ } else {
+ eo = exec_output_from_string(rvalue);
+ if (eo == _EXEC_OUTPUT_INVALID) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse output specifier, ignoring: %s", rvalue);
+ return 0;
+ }
+ }
+
+ if (streq(lvalue, "StandardOutput")) {
+ c->std_output = eo;
+ r = free_and_strdup(&c->stdio_fdname[STDOUT_FILENO], name);
+ if (r < 0)
+ log_oom();
+ return r;
+ } else if (streq(lvalue, "StandardError")) {
+ c->std_error = eo;
+ r = free_and_strdup(&c->stdio_fdname[STDERR_FILENO], name);
+ if (r < 0)
+ log_oom();
+ return r;
+ } else {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse output property, ignoring: %s", lvalue);
+ return 0;
+ }
+}
int config_parse_exec_io_class(const char *unit,
const char *filename,
@@ -1339,10 +1436,13 @@ int config_parse_timer(const char *unit,
void *userdata) {
Timer *t = data;
- usec_t u = 0;
+ usec_t usec = 0;
TimerValue *v;
TimerBase b;
CalendarSpec *c = NULL;
+ Unit *u = userdata;
+ _cleanup_free_ char *k = NULL;
+ int r;
assert(filename);
assert(lvalue);
@@ -1361,14 +1461,20 @@ int config_parse_timer(const char *unit,
return 0;
}
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
if (b == TIMER_CALENDAR) {
- if (calendar_spec_from_string(rvalue, &c) < 0) {
- log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse calendar specification, ignoring: %s", rvalue);
+ if (calendar_spec_from_string(k, &c) < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse calendar specification, ignoring: %s", k);
return 0;
}
} else {
- if (parse_sec(rvalue, &u) < 0) {
- log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse timer value, ignoring: %s", rvalue);
+ if (parse_sec(k, &usec) < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse timer value, ignoring: %s", k);
return 0;
}
}
@@ -1380,7 +1486,7 @@ int config_parse_timer(const char *unit,
}
v->base = b;
- v->value = u;
+ v->value = usec;
v->calendar_spec = c;
LIST_PREPEND(value, t->values, v);
@@ -1583,11 +1689,7 @@ int config_parse_fdname(
return 0;
}
- free(s->fdname);
- s->fdname = p;
- p = NULL;
-
- return 0;
+ return free_and_replace(s->fdname, p);
}
int config_parse_service_sockets(
@@ -1765,6 +1867,123 @@ int config_parse_sec_fix_0(
return 0;
}
+int config_parse_user_group(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char **user = data, *n;
+ Unit *u = userdata;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (isempty(rvalue))
+ n = NULL;
+ else {
+ _cleanup_free_ char *k = NULL;
+
+ r = unit_full_printf(u, rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue);
+ return 0;
+ }
+
+ if (!valid_user_group_name_or_id(k)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid user/group name or numeric ID, ignoring: %s", k);
+ return 0;
+ }
+
+ n = k;
+ k = NULL;
+ }
+
+ free(*user);
+ *user = n;
+
+ return 0;
+}
+
+int config_parse_user_group_strv(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char ***users = data;
+ Unit *u = userdata;
+ const char *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (isempty(rvalue)) {
+ char **empty;
+
+ empty = new0(char*, 1);
+ if (!empty)
+ return log_oom();
+
+ strv_free(*users);
+ *users = empty;
+
+ return 0;
+ }
+
+ p = rvalue;
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *k = NULL;
+
+ r = extract_first_word(&p, &word, WHITESPACE, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ break;
+ }
+
+ r = unit_full_printf(u, word, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", word);
+ continue;
+ }
+
+ if (!valid_user_group_name_or_id(k)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid user/group name or numeric ID, ignoring: %s", k);
+ continue;
+ }
+
+ r = strv_push(users, k);
+ if (r < 0)
+ return log_oom();
+
+ k = NULL;
+ }
+
+ return 0;
+}
+
int config_parse_busname_service(
const char *unit,
const char *filename,
@@ -1927,9 +2146,7 @@ int config_parse_working_directory(
return 0;
}
- free(c->working_directory);
- c->working_directory = k;
- k = NULL;
+ free_and_replace(c->working_directory, k);
c->working_directory_home = false;
}
@@ -2308,7 +2525,7 @@ int config_parse_unit_condition_null(
}
DEFINE_CONFIG_PARSE_ENUM(config_parse_notify_access, notify_access, NotifyAccess, "Failed to parse notify access specifier");
-DEFINE_CONFIG_PARSE_ENUM(config_parse_failure_action, failure_action, FailureAction, "Failed to parse failure action specifier");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_emergency_action, emergency_action, EmergencyAction, "Failed to parse failure action specifier");
int config_parse_unit_requires_mounts_for(
const char *unit,
@@ -2403,6 +2620,7 @@ int config_parse_documentation(const char *unit,
}
#ifdef HAVE_SECCOMP
+
static int syscall_filter_parse_one(
const char *unit,
const char *filename,
@@ -2413,27 +2631,29 @@ static int syscall_filter_parse_one(
bool warn) {
int r;
- if (*t == '@') {
- const SystemCallFilterSet *set;
+ if (t[0] == '@') {
+ const SyscallFilterSet *set;
+ const char *i;
- for (set = syscall_filter_sets; set->set_name; set++)
- if (streq(set->set_name, t)) {
- const char *sys;
+ set = syscall_filter_set_find(t);
+ if (!set) {
+ if (warn)
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Don't know system call group, ignoring: %s", t);
+ return 0;
+ }
- NULSTR_FOREACH(sys, set->value) {
- r = syscall_filter_parse_one(unit, filename, line, c, invert, sys, false);
- if (r < 0)
- return r;
- }
- break;
- }
+ NULSTR_FOREACH(i, set->value) {
+ r = syscall_filter_parse_one(unit, filename, line, c, invert, i, false);
+ if (r < 0)
+ return r;
+ }
} else {
int id;
id = seccomp_syscall_resolve_name(t);
if (id == __NR_SCMP_ERROR) {
if (warn)
- log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t);
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse system call, ignoring: %s", t);
return 0;
}
@@ -2447,8 +2667,9 @@ static int syscall_filter_parse_one(
if (r < 0)
return log_oom();
} else
- set_remove(c->syscall_filter, INT_TO_PTR(id + 1));
+ (void) set_remove(c->syscall_filter, INT_TO_PTR(id + 1));
}
+
return 0;
}
@@ -2467,8 +2688,7 @@ int config_parse_syscall_filter(
ExecContext *c = data;
Unit *u = userdata;
bool invert = false;
- const char *word, *state;
- size_t l;
+ const char *p;
int r;
assert(filename);
@@ -2507,24 +2727,24 @@ int config_parse_syscall_filter(
}
}
- FOREACH_WORD_QUOTED(word, l, rvalue, state) {
- _cleanup_free_ char *t = NULL;
+ p = rvalue;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
- t = strndup(word, l);
- if (!t)
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ break;
+ }
- r = syscall_filter_parse_one(unit, filename, line, c, invert, t, true);
+ r = syscall_filter_parse_one(unit, filename, line, c, invert, word, true);
if (r < 0)
return r;
}
- if (!isempty(state))
- log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring.");
-
- /* Turn on NNP, but only if it wasn't configured explicitly
- * before, and only if we are in user mode. */
- if (!c->no_new_privileges_set && MANAGER_IS_USER(u->manager))
- c->no_new_privileges = true;
return 0;
}
@@ -2735,6 +2955,34 @@ int config_parse_unit_slice(
DEFINE_CONFIG_PARSE_ENUM(config_parse_device_policy, cgroup_device_policy, CGroupDevicePolicy, "Failed to parse device policy");
+int config_parse_cpu_weight(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t *weight = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = cg_weight_parse(rvalue, weight);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "CPU weight '%s' invalid. Ignoring.", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
int config_parse_cpu_shares(
const char *unit,
const char *filename,
@@ -2787,7 +3035,7 @@ int config_parse_cpu_quota(
return 0;
}
- r = parse_percent(rvalue);
+ r = parse_percent_unbounded(rvalue);
if (r <= 0) {
log_syntax(unit, LOG_ERR, filename, line, r, "CPU quota '%s' invalid. Ignoring.", rvalue);
return 0;
@@ -2837,8 +3085,12 @@ int config_parse_memory_limit(
c->memory_high = bytes;
else if (streq(lvalue, "MemoryMax"))
c->memory_max = bytes;
- else
+ else if (streq(lvalue, "MemorySwapMax"))
+ c->memory_swap_max = bytes;
+ else if (streq(lvalue, "MemoryLimit"))
c->memory_limit = bytes;
+ else
+ return -EINVAL;
return 0;
}
@@ -2855,30 +3107,36 @@ int config_parse_tasks_max(
void *data,
void *userdata) {
- uint64_t *tasks_max = data, u;
+ uint64_t *tasks_max = data, v;
+ Unit *u = userdata;
int r;
- if (isempty(rvalue) || streq(rvalue, "infinity")) {
- *tasks_max = (uint64_t) -1;
+ if (isempty(rvalue)) {
+ *tasks_max = u->manager->default_tasks_max;
+ return 0;
+ }
+
+ if (streq(rvalue, "infinity")) {
+ *tasks_max = CGROUP_LIMIT_MAX;
return 0;
}
r = parse_percent(rvalue);
if (r < 0) {
- r = safe_atou64(rvalue, &u);
+ r = safe_atou64(rvalue, &v);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue);
return 0;
}
} else
- u = system_tasks_max_scale(r, 100U);
+ v = system_tasks_max_scale(r, 100U);
- if (u <= 0 || u >= UINT64_MAX) {
+ if (v <= 0 || v >= UINT64_MAX) {
log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range. Ignoring.", rvalue);
return 0;
}
- *tasks_max = u;
+ *tasks_max = v;
return 0;
}
@@ -2921,9 +3179,7 @@ int config_parse_device_allow(
if (!path)
return log_oom();
- if (!startswith(path, "/dev/") &&
- !startswith(path, "block-") &&
- !startswith(path, "char-")) {
+ if (!is_deviceallow_pattern(path)) {
log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid device node path '%s'. Ignoring.", path);
return 0;
}
@@ -3578,7 +3834,7 @@ int config_parse_no_new_privileges(
return 0;
}
- c->no_new_privileges = !!k;
+ c->no_new_privileges = k;
c->no_new_privileges_set = true;
return 0;
@@ -4028,8 +4284,8 @@ void unit_dump_config_items(FILE *f) {
{ config_parse_exec_cpu_affinity, "CPUAFFINITY" },
{ config_parse_mode, "MODE" },
{ config_parse_unit_env_file, "FILE" },
- { config_parse_output, "OUTPUT" },
- { config_parse_input, "INPUT" },
+ { config_parse_exec_output, "OUTPUT" },
+ { config_parse_exec_input, "INPUT" },
{ config_parse_log_facility, "FACILITY" },
{ config_parse_log_level, "LEVEL" },
{ config_parse_exec_secure_bits, "SECUREBITS" },
@@ -4064,7 +4320,7 @@ void unit_dump_config_items(FILE *f) {
{ config_parse_unit_slice, "SLICE" },
{ config_parse_documentation, "URL" },
{ config_parse_service_timeout, "SECONDS" },
- { config_parse_failure_action, "ACTION" },
+ { config_parse_emergency_action, "ACTION" },
{ config_parse_set_status, "STATUS" },
{ config_parse_service_sockets, "SOCKETS" },
{ config_parse_environ, "ENVIRON" },
@@ -4075,6 +4331,7 @@ void unit_dump_config_items(FILE *f) {
{ config_parse_address_families, "FAMILIES" },
#endif
{ config_parse_cpu_shares, "SHARES" },
+ { config_parse_cpu_weight, "WEIGHT" },
{ config_parse_memory_limit, "LIMIT" },
{ config_parse_device_allow, "DEVICE" },
{ config_parse_device_policy, "POLICY" },
diff --git a/src/grp-system/libcore/src/manager.c b/src/grp-system/libcore/src/manager.c
index b769c33b20..bad8cf0dbb 100644
--- a/src/grp-system/libcore/src/manager.c
+++ b/src/grp-system/libcore/src/manager.c
@@ -74,9 +74,11 @@
#include "systemd-basic/time-util.h"
#include "systemd-basic/umask-util.h"
#include "systemd-basic/unit-name.h"
+#include "systemd-basic/user-util.h"
#include "systemd-basic/util.h"
#include "systemd-basic/virt.h"
#include "systemd-shared/boot-timestamps.h"
+#include "systemd-shared/clean-ipc.h"
#include "systemd-shared/path-lookup.h"
#include "systemd-shared/watchdog.h"
@@ -100,6 +102,7 @@ static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, ui
static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata);
static int manager_dispatch_run_queue(sd_event_source *source, void *userdata);
static int manager_run_generators(Manager *m);
@@ -521,6 +524,7 @@ static void manager_clean_environment(Manager *m) {
"LISTEN_FDNAMES",
"WATCHDOG_PID",
"WATCHDOG_USEC",
+ "INVOCATION_ID",
NULL);
}
@@ -555,7 +559,6 @@ static int manager_default_environment(Manager *m) {
return 0;
}
-
int manager_new(UnitFileScope scope, bool test_run, Manager **_m) {
Manager *m;
int r;
@@ -582,17 +585,25 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) {
if (MANAGER_IS_SYSTEM(m)) {
m->unit_log_field = "UNIT=";
m->unit_log_format_string = "UNIT=%s";
+
+ m->invocation_log_field = "INVOCATION_ID=";
+ m->invocation_log_format_string = "INVOCATION_ID=" SD_ID128_FORMAT_STR;
} else {
m->unit_log_field = "USER_UNIT=";
m->unit_log_format_string = "USER_UNIT=%s";
+
+ m->invocation_log_field = "USER_INVOCATION_ID=";
+ m->invocation_log_format_string = "USER_INVOCATION_ID=" SD_ID128_FORMAT_STR;
}
m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1;
m->pin_cgroupfs_fd = m->notify_fd = m->cgroups_agent_fd = m->signal_fd = m->time_change_fd =
- m->dev_autofs_fd = m->private_listen_fd = m->kdbus_fd = m->cgroup_inotify_fd =
+ m->dev_autofs_fd = m->private_listen_fd = m->cgroup_inotify_fd =
m->ask_password_inotify_fd = -1;
+ m->user_lookup_fds[0] = m->user_lookup_fds[1] = -1;
+
m->current_job_id = 1; /* start as id #1, so that we can leave #0 around as "null-like" value */
m->have_ask_password = -EINVAL; /* we don't know */
@@ -659,9 +670,8 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) {
goto fail;
}
- /* Note that we set up neither kdbus, nor the notify fd
- * here. We do that after deserialization, since they might
- * have gotten serialized across the reexec. */
+ /* Note that we do not set up the notify fd here. We do that after deserialization,
+ * since they might have gotten serialized across the reexec. */
m->taint_usr = dir_is_empty("/usr") > 0;
@@ -769,7 +779,7 @@ static int manager_setup_cgroups_agent(Manager *m) {
if (!MANAGER_IS_SYSTEM(m))
return 0;
- if (cg_unified() > 0) /* We don't need this anymore on the unified hierarchy */
+ if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0) /* We don't need this anymore on the unified hierarchy */
return 0;
if (m->cgroups_agent_fd < 0) {
@@ -815,6 +825,59 @@ static int manager_setup_cgroups_agent(Manager *m) {
return 0;
}
+static int manager_setup_user_lookup_fd(Manager *m) {
+ int r;
+
+ assert(m);
+
+ /* Set up the socket pair used for passing UID/GID resolution results from forked off processes to PID
+ * 1. Background: we can't do name lookups (NSS) from PID 1, since it might involve IPC and thus activation,
+ * and we might hence deadlock on ourselves. Hence we do all user/group lookups asynchronously from the forked
+ * off processes right before executing the binaries to start. In order to be able to clean up any IPC objects
+ * created by a unit (see RemoveIPC=) we need to know in PID 1 the used UID/GID of the executed processes,
+ * hence we establish this communication channel so that forked off processes can pass their UID/GID
+ * information back to PID 1. The forked off processes send their resolved UID/GID to PID 1 in a simple
+ * datagram, along with their unit name, so that we can share one communication socket pair among all units for
+ * this purpose.
+ *
+ * You might wonder why we need a communication channel for this that is independent of the usual notification
+ * socket scheme (i.e. $NOTIFY_SOCKET). The primary difference is about trust: data sent via the $NOTIFY_SOCKET
+ * channel is only accepted if it originates from the right unit and if reception was enabled for it. The user
+ * lookup socket OTOH is only accessible by PID 1 and its children until they exec(), and always available.
+ *
+ * Note that this function is called under two circumstances: when we first initialize (in which case we
+ * allocate both the socket pair and the event source to listen on it), and when we deserialize after a reload
+ * (in which case the socket pair already exists but we still need to allocate the event source for it). */
+
+ if (m->user_lookup_fds[0] < 0) {
+
+ /* Free all secondary fields */
+ safe_close_pair(m->user_lookup_fds);
+ m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source);
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, m->user_lookup_fds) < 0)
+ return log_error_errno(errno, "Failed to allocate user lookup socket: %m");
+
+ (void) fd_inc_rcvbuf(m->user_lookup_fds[0], NOTIFY_RCVBUF_SIZE);
+ }
+
+ if (!m->user_lookup_event_source) {
+ r = sd_event_add_io(m->event, &m->user_lookup_event_source, m->user_lookup_fds[0], EPOLLIN, manager_dispatch_user_lookup_fd, m);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to allocate user lookup event source: %m");
+
+ /* Process even earlier than the notify event source, so that we always know first about valid UID/GID
+ * resolutions */
+ r = sd_event_source_set_priority(m->user_lookup_event_source, SD_EVENT_PRIORITY_NORMAL-8);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to set priority ot user lookup event source: %m");
+
+ (void) sd_event_source_set_description(m->user_lookup_event_source, "user-lookup");
+ }
+
+ return 0;
+}
+
static int manager_connect_bus(Manager *m, bool reexecuting) {
bool try_bus_connect;
@@ -824,7 +887,6 @@ static int manager_connect_bus(Manager *m, bool reexecuting) {
return 0;
try_bus_connect =
- m->kdbus_fd >= 0 ||
reexecuting ||
(MANAGER_IS_USER(m) && getenv("DBUS_SESSION_BUS_ADDRESS"));
@@ -856,8 +918,7 @@ enum {
_GC_OFFSET_MAX
};
-static void unit_gc_mark_good(Unit *u, unsigned gc_marker)
-{
+static void unit_gc_mark_good(Unit *u, unsigned gc_marker) {
Iterator i;
Unit *other;
@@ -1006,7 +1067,11 @@ Manager* manager_free(Manager *m) {
bus_done(m);
+ dynamic_user_vacuum(m, false);
+ hashmap_free(m->dynamic_users);
+
hashmap_free(m->units);
+ hashmap_free(m->units_by_invocation_id);
hashmap_free(m->jobs);
hashmap_free(m->watch_pids1);
hashmap_free(m->watch_pids2);
@@ -1021,12 +1086,13 @@ Manager* manager_free(Manager *m) {
sd_event_source_unref(m->time_change_event_source);
sd_event_source_unref(m->jobs_in_progress_event_source);
sd_event_source_unref(m->run_queue_event_source);
+ sd_event_source_unref(m->user_lookup_event_source);
safe_close(m->signal_fd);
safe_close(m->notify_fd);
safe_close(m->cgroups_agent_fd);
safe_close(m->time_change_fd);
- safe_close(m->kdbus_fd);
+ safe_close_pair(m->user_lookup_fds);
manager_close_ask_password(m);
@@ -1052,8 +1118,10 @@ Manager* manager_free(Manager *m) {
assert(hashmap_isempty(m->units_requiring_mounts_for));
hashmap_free(m->units_requiring_mounts_for);
- free(m);
- return NULL;
+ hashmap_free(m->uid_refs);
+ hashmap_free(m->gid_refs);
+
+ return mfree(m);
}
void manager_enumerate(Manager *m) {
@@ -1177,9 +1245,11 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
return r;
/* Make sure the transient directory always exists, so that it remains in the search path */
- r = mkdir_p_label(m->lookup_paths.transient, 0755);
- if (r < 0)
- return r;
+ if (!m->test_run) {
+ r = mkdir_p_label(m->lookup_paths.transient, 0755);
+ if (r < 0)
+ return r;
+ }
dual_timestamp_get(&m->generators_start_timestamp);
r = manager_run_generators(m);
@@ -1221,14 +1291,26 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
if (q < 0 && r == 0)
r = q;
- /* We might have deserialized the kdbus control fd, but if we
- * didn't, then let's create the bus now. */
- manager_connect_bus(m, !!serialization);
- bus_track_coldplug(m, &m->subscribed, &m->deserialized_subscribed);
+ q = manager_setup_user_lookup_fd(m);
+ if (q < 0 && r == 0)
+ r = q;
+
+ /* Let's connect to the bus now. */
+ (void) manager_connect_bus(m, !!serialization);
+
+ (void) bus_track_coldplug(m, &m->subscribed, false, m->deserialized_subscribed);
+ m->deserialized_subscribed = strv_free(m->deserialized_subscribed);
/* Third, fire things up! */
manager_coldplug(m);
+ /* Release any dynamic users no longer referenced */
+ dynamic_user_vacuum(m, true);
+
+ /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */
+ manager_vacuum_uid_refs(m);
+ manager_vacuum_gid_refs(m);
+
if (serialization) {
assert(m->n_reloading > 0);
m->n_reloading--;
@@ -1601,8 +1683,14 @@ static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const
if (UNIT_VTABLE(u)->notify_message)
UNIT_VTABLE(u)->notify_message(u, pid, tags, fds);
- else
- log_unit_debug(u, "Got notification message for unit. Ignoring.");
+ else if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
+ _cleanup_free_ char *x = NULL, *y = NULL;
+
+ x = cescape(buf);
+ if (x)
+ y = ellipsize(x, 20, 90);
+ log_unit_debug(u, "Got notification message \"%s\", ignoring.", strnull(y));
+ }
}
static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
@@ -1628,7 +1716,6 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
struct cmsghdr *cmsg;
struct ucred *ucred = NULL;
- bool found = false;
Unit *u1, *u2, *u3;
int r, *fd_array = NULL;
unsigned n_fds = 0;
@@ -1642,16 +1729,15 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
return 0;
}
- n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC|MSG_TRUNC);
if (n < 0) {
- if (!IN_SET(errno, EAGAIN, EINTR))
- log_error("Failed to receive notification message: %m");
+ if (IN_SET(errno, EAGAIN, EINTR))
+ return 0; /* Spurious wakeup, try again */
- /* It's not an option to return an error here since it
- * would disable the notification handler entirely. Services
- * wouldn't be able to send the WATCHDOG message for
- * example... */
- return 0;
+ /* If this is any other, real error, then let's stop processing this socket. This of course means we
+ * won't take notification messages anymore, but that's still better than busy looping around this:
+ * being woken up over and over again but being unable to actually read the message off the socket. */
+ return log_error_errno(errno, "Failed to receive notification message: %m");
}
CMSG_FOREACH(cmsg, &msghdr) {
@@ -1684,40 +1770,40 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
return 0;
}
- if ((size_t) n >= sizeof(buf)) {
+ if ((size_t) n >= sizeof(buf) || (msghdr.msg_flags & MSG_TRUNC)) {
log_warning("Received notify message exceeded maximum size. Ignoring.");
return 0;
}
- /* The message should be a string. Here we make sure it's NUL-terminated,
- * but only the part until first NUL will be used anyway. */
+ /* As extra safety check, let's make sure the string we get doesn't contain embedded NUL bytes. We permit one
+ * trailing NUL byte in the message, but don't expect it. */
+ if (n > 1 && memchr(buf, 0, n-1)) {
+ log_warning("Received notify message with embedded NUL bytes. Ignoring.");
+ return 0;
+ }
+
+ /* Make sure it's NUL-terminated. */
buf[n] = 0;
/* Notify every unit that might be interested, but try
* to avoid notifying the same one multiple times. */
u1 = manager_get_unit_by_pid_cgroup(m, ucred->pid);
- if (u1) {
+ if (u1)
manager_invoke_notify_message(m, u1, ucred->pid, buf, fds);
- found = true;
- }
u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(ucred->pid));
- if (u2 && u2 != u1) {
+ if (u2 && u2 != u1)
manager_invoke_notify_message(m, u2, ucred->pid, buf, fds);
- found = true;
- }
u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(ucred->pid));
- if (u3 && u3 != u2 && u3 != u1) {
+ if (u3 && u3 != u2 && u3 != u1)
manager_invoke_notify_message(m, u3, ucred->pid, buf, fds);
- found = true;
- }
- if (!found)
+ if (!u1 && !u2 && !u3)
log_warning("Cannot find unit for notify message of PID "PID_FMT".", ucred->pid);
if (fdset_size(fds) > 0)
- log_warning("Got auxiliary fds with notification message, closing all.");
+ log_warning("Got extra auxiliary fds with notification message, closing them.");
return 0;
}
@@ -1822,6 +1908,18 @@ static int manager_start_target(Manager *m, const char *name, JobMode mode) {
return r;
}
+static void manager_handle_ctrl_alt_del(Manager *m) {
+ /* If the user presses C-A-D more than
+ * 7 times within 2s, we reboot/shutdown immediately,
+ * unless it was disabled in system.conf */
+
+ if (ratelimit_test(&m->ctrl_alt_del_ratelimit) || m->cad_burst_action == EMERGENCY_ACTION_NONE)
+ manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY);
+ else
+ emergency_action(m, m->cad_burst_action, NULL,
+ "Ctrl-Alt-Del was pressed more than 7 times within 2s");
+}
+
static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
Manager *m = userdata;
ssize_t n;
@@ -1840,14 +1938,17 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
for (;;) {
n = read(m->signal_fd, &sfsi, sizeof(sfsi));
if (n != sizeof(sfsi)) {
+ if (n >= 0) {
+ log_warning("Truncated read from signal fd (%zu bytes)!", n);
+ return 0;
+ }
- if (n >= 0)
- return -EIO;
-
- if (errno == EINTR || errno == EAGAIN)
+ if (IN_SET(errno, EINTR, EAGAIN))
break;
- return -errno;
+ /* We return an error here, which will kill this handler,
+ * to avoid a busy loop on read error. */
+ return log_error_errno(errno, "Reading from signal fd failed: %m");
}
log_received_signal(sfsi.ssi_signo == SIGCHLD ||
@@ -1873,19 +1974,7 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
case SIGINT:
if (MANAGER_IS_SYSTEM(m)) {
-
- /* If the user presses C-A-D more than
- * 7 times within 2s, we reboot
- * immediately. */
-
- if (ratelimit_test(&m->ctrl_alt_del_ratelimit))
- manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE_IRREVERSIBLY);
- else {
- log_notice("Ctrl-Alt-Del was pressed more than 7 times within 2s, rebooting immediately.");
- status_printf(NULL, true, false, "Ctrl-Alt-Del was pressed more than 7 times within 2s, rebooting immediately.");
- m->exit_code = MANAGER_REBOOT;
- }
-
+ manager_handle_ctrl_alt_del(m);
break;
}
@@ -2171,6 +2260,7 @@ int manager_loop(Manager *m) {
int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e, Unit **_u) {
_cleanup_free_ char *n = NULL;
+ sd_id128_t invocation_id;
Unit *u;
int r;
@@ -2182,12 +2272,25 @@ int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e,
if (r < 0)
return r;
+ /* Permit addressing units by invocation ID: if the passed bus path is suffixed by a 128bit ID then we use it
+ * as invocation ID. */
+ r = sd_id128_from_string(n, &invocation_id);
+ if (r >= 0) {
+ u = hashmap_get(m->units_by_invocation_id, &invocation_id);
+ if (u) {
+ *_u = u;
+ return 0;
+ }
+
+ return sd_bus_error_setf(e, BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID, "No unit with the specified invocation ID " SD_ID128_FORMAT_STR " known.", SD_ID128_FORMAT_VAL(invocation_id));
+ }
+
+ /* If this didn't work, we use the suffix as unit name. */
r = manager_load_unit(m, n, NULL, e, &u);
if (r < 0)
return r;
*_u = u;
-
return 0;
}
@@ -2399,17 +2502,28 @@ int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) {
fprintf(f, "cgroups-agent-fd=%i\n", copy);
}
- if (m->kdbus_fd >= 0) {
- int copy;
+ if (m->user_lookup_fds[0] >= 0) {
+ int copy0, copy1;
- copy = fdset_put_dup(fds, m->kdbus_fd);
- if (copy < 0)
- return copy;
+ copy0 = fdset_put_dup(fds, m->user_lookup_fds[0]);
+ if (copy0 < 0)
+ return copy0;
+
+ copy1 = fdset_put_dup(fds, m->user_lookup_fds[1]);
+ if (copy1 < 0)
+ return copy1;
- fprintf(f, "kdbus-fd=%i\n", copy);
+ fprintf(f, "user-lookup=%i %i\n", copy0, copy1);
}
- bus_track_serialize(m->subscribed, f);
+ bus_track_serialize(m->subscribed, f, "subscribed");
+
+ r = dynamic_user_serialize(m, f, fds);
+ if (r < 0)
+ return r;
+
+ manager_serialize_uid_refs(m, f);
+ manager_serialize_gid_refs(m, f);
fputc('\n', f);
@@ -2577,25 +2691,31 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
m->cgroups_agent_fd = fdset_remove(fds, fd);
}
- } else if (startswith(l, "kdbus-fd=")) {
- int fd;
+ } else if (startswith(l, "user-lookup=")) {
+ int fd0, fd1;
- if (safe_atoi(l + 9, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
- log_debug("Failed to parse kdbus fd: %s", l + 9);
+ if (sscanf(l + 12, "%i %i", &fd0, &fd1) != 2 || fd0 < 0 || fd1 < 0 || fd0 == fd1 || !fdset_contains(fds, fd0) || !fdset_contains(fds, fd1))
+ log_debug("Failed to parse user lookup fd: %s", l + 12);
else {
- safe_close(m->kdbus_fd);
- m->kdbus_fd = fdset_remove(fds, fd);
+ m->user_lookup_event_source = sd_event_source_unref(m->user_lookup_event_source);
+ safe_close_pair(m->user_lookup_fds);
+ m->user_lookup_fds[0] = fdset_remove(fds, fd0);
+ m->user_lookup_fds[1] = fdset_remove(fds, fd1);
}
- } else {
- int k;
+ } else if (startswith(l, "dynamic-user="))
+ dynamic_user_deserialize_one(m, l + 13, fds);
+ else if (startswith(l, "destroy-ipc-uid="))
+ manager_deserialize_uid_refs_one(m, l + 16);
+ else if (startswith(l, "destroy-ipc-gid="))
+ manager_deserialize_gid_refs_one(m, l + 16);
+ else if (startswith(l, "subscribed=")) {
- k = bus_track_deserialize_item(&m->deserialized_subscribed, l);
- if (k < 0)
- log_debug_errno(k, "Failed to deserialize bus tracker object: %m");
- else if (k == 0)
- log_debug("Unknown serialization item '%s'", l);
- }
+ if (strv_extend(&m->deserialized_subscribed, l+11) < 0)
+ log_oom();
+
+ } else if (!startswith(l, "kdbus-fd=")) /* ignore this one */
+ log_debug("Unknown serialization item '%s'", l);
}
for (;;) {
@@ -2668,6 +2788,9 @@ int manager_reload(Manager *m) {
manager_clear_jobs_and_units(m);
lookup_paths_flush_generator(&m->lookup_paths);
lookup_paths_free(&m->lookup_paths);
+ dynamic_user_vacuum(m, false);
+ m->uid_refs = hashmap_free(m->uid_refs);
+ m->gid_refs = hashmap_free(m->gid_refs);
q = lookup_paths_init(&m->lookup_paths, m->unit_file_scope, 0, NULL);
if (q < 0 && r >= 0)
@@ -2701,9 +2824,20 @@ int manager_reload(Manager *m) {
if (q < 0 && r >= 0)
r = q;
+ q = manager_setup_user_lookup_fd(m);
+ if (q < 0 && r >= 0)
+ r = q;
+
/* Third, fire things up! */
manager_coldplug(m);
+ /* Release any dynamic users no longer referenced */
+ dynamic_user_vacuum(m, true);
+
+ /* Release any references to UIDs/GIDs no longer referenced, and destroy any IPC owned by them */
+ manager_vacuum_uid_refs(m);
+ manager_vacuum_gid_refs(m);
+
/* Sync current state of bus names with our set of listening units */
if (m->api_bus)
manager_sync_bus_names(m, m->api_bus);
@@ -2949,7 +3083,7 @@ int manager_set_default_rlimits(Manager *m, struct rlimit **default_rlimit) {
m->rlimit[i] = newdup(struct rlimit, default_rlimit[i], 1);
if (!m->rlimit[i])
- return -ENOMEM;
+ return log_oom();
}
return 0;
@@ -3137,6 +3271,300 @@ ManagerState manager_state(Manager *m) {
return MANAGER_RUNNING;
}
+#define DESTROY_IPC_FLAG (UINT32_C(1) << 31)
+
+static void manager_unref_uid_internal(
+ Manager *m,
+ Hashmap **uid_refs,
+ uid_t uid,
+ bool destroy_now,
+ int (*_clean_ipc)(uid_t uid)) {
+
+ uint32_t c, n;
+
+ assert(m);
+ assert(uid_refs);
+ assert(uid_is_valid(uid));
+ assert(_clean_ipc);
+
+ /* A generic implementation, covering both manager_unref_uid() and manager_unref_gid(), under the assumption
+ * that uid_t and gid_t are actually defined the same way, with the same validity rules.
+ *
+ * We store a hashmap where the UID/GID is they key and the value is a 32bit reference counter, whose highest
+ * bit is used as flag for marking UIDs/GIDs whose IPC objects to remove when the last reference to the UID/GID
+ * is dropped. The flag is set to on, once at least one reference from a unit where RemoveIPC= is set is added
+ * on a UID/GID. It is reset when the UID's/GID's reference counter drops to 0 again. */
+
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+ assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+ if (uid == 0) /* We don't keep track of root, and will never destroy it */
+ return;
+
+ c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
+
+ n = c & ~DESTROY_IPC_FLAG;
+ assert(n > 0);
+ n--;
+
+ if (destroy_now && n == 0) {
+ hashmap_remove(*uid_refs, UID_TO_PTR(uid));
+
+ if (c & DESTROY_IPC_FLAG) {
+ log_debug("%s " UID_FMT " is no longer referenced, cleaning up its IPC.",
+ _clean_ipc == clean_ipc_by_uid ? "UID" : "GID",
+ uid);
+ (void) _clean_ipc(uid);
+ }
+ } else {
+ c = n | (c & DESTROY_IPC_FLAG);
+ assert_se(hashmap_update(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c)) >= 0);
+ }
+}
+
+void manager_unref_uid(Manager *m, uid_t uid, bool destroy_now) {
+ manager_unref_uid_internal(m, &m->uid_refs, uid, destroy_now, clean_ipc_by_uid);
+}
+
+void manager_unref_gid(Manager *m, gid_t gid, bool destroy_now) {
+ manager_unref_uid_internal(m, &m->gid_refs, (uid_t) gid, destroy_now, clean_ipc_by_gid);
+}
+
+static int manager_ref_uid_internal(
+ Manager *m,
+ Hashmap **uid_refs,
+ uid_t uid,
+ bool clean_ipc) {
+
+ uint32_t c, n;
+ int r;
+
+ assert(m);
+ assert(uid_refs);
+ assert(uid_is_valid(uid));
+
+ /* A generic implementation, covering both manager_ref_uid() and manager_ref_gid(), under the assumption
+ * that uid_t and gid_t are actually defined the same way, with the same validity rules. */
+
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+ assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+ if (uid == 0) /* We don't keep track of root, and will never destroy it */
+ return 0;
+
+ r = hashmap_ensure_allocated(uid_refs, &trivial_hash_ops);
+ if (r < 0)
+ return r;
+
+ c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
+
+ n = c & ~DESTROY_IPC_FLAG;
+ n++;
+
+ if (n & DESTROY_IPC_FLAG) /* check for overflow */
+ return -EOVERFLOW;
+
+ c = n | (c & DESTROY_IPC_FLAG) | (clean_ipc ? DESTROY_IPC_FLAG : 0);
+
+ return hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c));
+}
+
+int manager_ref_uid(Manager *m, uid_t uid, bool clean_ipc) {
+ return manager_ref_uid_internal(m, &m->uid_refs, uid, clean_ipc);
+}
+
+int manager_ref_gid(Manager *m, gid_t gid, bool clean_ipc) {
+ return manager_ref_uid_internal(m, &m->gid_refs, (uid_t) gid, clean_ipc);
+}
+
+static void manager_vacuum_uid_refs_internal(
+ Manager *m,
+ Hashmap **uid_refs,
+ int (*_clean_ipc)(uid_t uid)) {
+
+ Iterator i;
+ void *p, *k;
+
+ assert(m);
+ assert(uid_refs);
+ assert(_clean_ipc);
+
+ HASHMAP_FOREACH_KEY(p, k, *uid_refs, i) {
+ uint32_t c, n;
+ uid_t uid;
+
+ uid = PTR_TO_UID(k);
+ c = PTR_TO_UINT32(p);
+
+ n = c & ~DESTROY_IPC_FLAG;
+ if (n > 0)
+ continue;
+
+ if (c & DESTROY_IPC_FLAG) {
+ log_debug("Found unreferenced %s " UID_FMT " after reload/reexec. Cleaning up.",
+ _clean_ipc == clean_ipc_by_uid ? "UID" : "GID",
+ uid);
+ (void) _clean_ipc(uid);
+ }
+
+ assert_se(hashmap_remove(*uid_refs, k) == p);
+ }
+}
+
+void manager_vacuum_uid_refs(Manager *m) {
+ manager_vacuum_uid_refs_internal(m, &m->uid_refs, clean_ipc_by_uid);
+}
+
+void manager_vacuum_gid_refs(Manager *m) {
+ manager_vacuum_uid_refs_internal(m, &m->gid_refs, clean_ipc_by_gid);
+}
+
+static void manager_serialize_uid_refs_internal(
+ Manager *m,
+ FILE *f,
+ Hashmap **uid_refs,
+ const char *field_name) {
+
+ Iterator i;
+ void *p, *k;
+
+ assert(m);
+ assert(f);
+ assert(uid_refs);
+ assert(field_name);
+
+ /* Serialize the UID reference table. Or actually, just the IPC destruction flag of it, as the actual counter
+ * of it is better rebuild after a reload/reexec. */
+
+ HASHMAP_FOREACH_KEY(p, k, *uid_refs, i) {
+ uint32_t c;
+ uid_t uid;
+
+ uid = PTR_TO_UID(k);
+ c = PTR_TO_UINT32(p);
+
+ if (!(c & DESTROY_IPC_FLAG))
+ continue;
+
+ fprintf(f, "%s=" UID_FMT "\n", field_name, uid);
+ }
+}
+
+void manager_serialize_uid_refs(Manager *m, FILE *f) {
+ manager_serialize_uid_refs_internal(m, f, &m->uid_refs, "destroy-ipc-uid");
+}
+
+void manager_serialize_gid_refs(Manager *m, FILE *f) {
+ manager_serialize_uid_refs_internal(m, f, &m->gid_refs, "destroy-ipc-gid");
+}
+
+static void manager_deserialize_uid_refs_one_internal(
+ Manager *m,
+ Hashmap** uid_refs,
+ const char *value) {
+
+ uid_t uid;
+ uint32_t c;
+ int r;
+
+ assert(m);
+ assert(uid_refs);
+ assert(value);
+
+ r = parse_uid(value, &uid);
+ if (r < 0 || uid == 0) {
+ log_debug("Unable to parse UID reference serialization");
+ return;
+ }
+
+ r = hashmap_ensure_allocated(uid_refs, &trivial_hash_ops);
+ if (r < 0) {
+ log_oom();
+ return;
+ }
+
+ c = PTR_TO_UINT32(hashmap_get(*uid_refs, UID_TO_PTR(uid)));
+ if (c & DESTROY_IPC_FLAG)
+ return;
+
+ c |= DESTROY_IPC_FLAG;
+
+ r = hashmap_replace(*uid_refs, UID_TO_PTR(uid), UINT32_TO_PTR(c));
+ if (r < 0) {
+ log_debug("Failed to add UID reference entry");
+ return;
+ }
+}
+
+void manager_deserialize_uid_refs_one(Manager *m, const char *value) {
+ manager_deserialize_uid_refs_one_internal(m, &m->uid_refs, value);
+}
+
+void manager_deserialize_gid_refs_one(Manager *m, const char *value) {
+ manager_deserialize_uid_refs_one_internal(m, &m->gid_refs, value);
+}
+
+int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ struct buffer {
+ uid_t uid;
+ gid_t gid;
+ char unit_name[UNIT_NAME_MAX+1];
+ } _packed_ buffer;
+
+ Manager *m = userdata;
+ ssize_t l;
+ size_t n;
+ Unit *u;
+
+ assert_se(source);
+ assert_se(m);
+
+ /* Invoked whenever a child process succeeded resolving its user/group to use and sent us the resulting UID/GID
+ * in a datagram. We parse the datagram here and pass it off to the unit, so that it can add a reference to the
+ * UID/GID so that it can destroy the UID/GID's IPC objects when the reference counter drops to 0. */
+
+ l = recv(fd, &buffer, sizeof(buffer), MSG_DONTWAIT);
+ if (l < 0) {
+ if (errno == EINTR || errno == EAGAIN)
+ return 0;
+
+ return log_error_errno(errno, "Failed to read from user lookup fd: %m");
+ }
+
+ if ((size_t) l <= offsetof(struct buffer, unit_name)) {
+ log_warning("Received too short user lookup message, ignoring.");
+ return 0;
+ }
+
+ if ((size_t) l > offsetof(struct buffer, unit_name) + UNIT_NAME_MAX) {
+ log_warning("Received too long user lookup message, ignoring.");
+ return 0;
+ }
+
+ if (!uid_is_valid(buffer.uid) && !gid_is_valid(buffer.gid)) {
+ log_warning("Got user lookup message with invalid UID/GID pair, ignoring.");
+ return 0;
+ }
+
+ n = (size_t) l - offsetof(struct buffer, unit_name);
+ if (memchr(buffer.unit_name, 0, n)) {
+ log_warning("Received lookup message with embedded NUL character, ignoring.");
+ return 0;
+ }
+
+ buffer.unit_name[n] = 0;
+ u = manager_get_unit(m, buffer.unit_name);
+ if (!u) {
+ log_debug("Got user lookup message but unit doesn't exist, ignoring.");
+ return 0;
+ }
+
+ log_unit_debug(u, "User lookup succeeded: uid=" UID_FMT " gid=" GID_FMT, buffer.uid, buffer.gid);
+
+ unit_notify_user_lookup(u, buffer.uid, buffer.gid);
+ return 0;
+}
+
static const char *const manager_state_table[_MANAGER_STATE_MAX] = {
[MANAGER_INITIALIZING] = "initializing",
[MANAGER_STARTING] = "starting",
diff --git a/src/grp-system/libcore/src/mount-setup.c b/src/grp-system/libcore/src/mount-setup.c
index a4e72eda03..46e6f71425 100644
--- a/src/grp-system/libcore/src/mount-setup.c
+++ b/src/grp-system/libcore/src/mount-setup.c
@@ -99,10 +99,12 @@ static const MountPoint mount_table[] = {
cg_is_unified_wanted, MNT_FATAL|MNT_IN_CONTAINER },
{ "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
+ { "cgroup", "/sys/fs/cgroup/systemd", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ cg_is_unified_systemd_controller_wanted, MNT_IN_CONTAINER },
{ "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
- cg_is_legacy_wanted, MNT_IN_CONTAINER },
+ cg_is_legacy_systemd_controller_wanted, MNT_IN_CONTAINER },
{ "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
- cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
+ cg_is_legacy_systemd_controller_wanted, MNT_FATAL|MNT_IN_CONTAINER },
{ "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
NULL, MNT_NONE },
#ifdef ENABLE_EFI
diff --git a/src/grp-system/libcore/src/mount.c b/src/grp-system/libcore/src/mount.c
index b0c3b6722c..06e8313e3d 100644
--- a/src/grp-system/libcore/src/mount.c
+++ b/src/grp-system/libcore/src/mount.c
@@ -160,17 +160,6 @@ static void mount_init(Unit *u) {
m->timeout_usec = u->manager->default_timeout_start_usec;
m->directory_mode = 0755;
- if (unit_has_name(u, "-.mount")) {
- /* Don't allow start/stop for root directory */
- u->refuse_manual_start = true;
- u->refuse_manual_stop = true;
- } else {
- /* The stdio/kmsg bridge socket is on /, in order to avoid a
- * dep loop, don't use kmsg logging for -.mount */
- m->exec_context.std_output = u->manager->default_std_output;
- m->exec_context.std_error = u->manager->default_std_error;
- }
-
/* We need to make sure that /usr/bin/mount is always called
* in the same process group as us, so that the autofs kernel
* side doesn't send us another mount request while we are
@@ -246,6 +235,8 @@ static void mount_done(Unit *u) {
exec_command_done_array(m->exec_command, _MOUNT_EXEC_COMMAND_MAX);
m->control_command = NULL;
+ dynamic_creds_unref(&m->dynamic_creds);
+
mount_unwatch_control_pid(m);
m->timer_event_source = sd_event_source_unref(m->timer_event_source);
@@ -483,6 +474,7 @@ static int mount_add_default_dependencies(Mount *m) {
static int mount_verify(Mount *m) {
_cleanup_free_ char *e = NULL;
+ MountParameters *p;
int r;
assert(m);
@@ -507,7 +499,8 @@ static int mount_verify(Mount *m) {
return -EINVAL;
}
- if (UNIT(m)->fragment_path && !m->parameters_fragment.what) {
+ p = get_mount_parameters_fragment(m);
+ if (p && !p->what) {
log_unit_error(UNIT(m), "What= setting is missing. Refusing.");
return -EBADMSG;
}
@@ -574,6 +567,25 @@ static int mount_add_extras(Mount *m) {
return 0;
}
+static int mount_load_root_mount(Unit *u) {
+ assert(u);
+
+ if (!unit_has_name(u, SPECIAL_ROOT_MOUNT))
+ return 0;
+
+ u->perpetual = true;
+ u->default_dependencies = false;
+
+ /* The stdio/kmsg bridge socket is on /, in order to avoid a dep loop, don't use kmsg logging for -.mount */
+ MOUNT(u)->exec_context.std_output = EXEC_OUTPUT_NULL;
+ MOUNT(u)->exec_context.std_input = EXEC_INPUT_NULL;
+
+ if (!u->description)
+ u->description = strdup("Root Mount");
+
+ return 1;
+}
+
static int mount_load(Unit *u) {
Mount *m = MOUNT(u);
int r;
@@ -581,11 +593,14 @@ static int mount_load(Unit *u) {
assert(u);
assert(u->load_state == UNIT_STUB);
- if (m->from_proc_self_mountinfo)
+ r = mount_load_root_mount(u);
+ if (r < 0)
+ return r;
+
+ if (m->from_proc_self_mountinfo || u->perpetual)
r = unit_load_fragment_and_dropin_optional(u);
else
r = unit_load_fragment_and_dropin(u);
-
if (r < 0)
return r;
@@ -649,6 +664,9 @@ static int mount_coldplug(Unit *u) {
return r;
}
+ if (!IN_SET(new_state, MOUNT_DEAD, MOUNT_FAILED))
+ (void) unit_setup_dynamic_creds(u);
+
mount_set_state(m, new_state);
return 0;
}
@@ -671,7 +689,10 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
"%sOptions: %s\n"
"%sFrom /proc/self/mountinfo: %s\n"
"%sFrom fragment: %s\n"
- "%sDirectoryMode: %04o\n",
+ "%sDirectoryMode: %04o\n"
+ "%sSloppyOptions: %s\n"
+ "%sLazyUnmount: %s\n"
+ "%sForceUnmount: %s\n",
prefix, mount_state_to_string(m->state),
prefix, mount_result_to_string(m->result),
prefix, m->where,
@@ -680,7 +701,10 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
prefix, p ? strna(p->options) : "n/a",
prefix, yes_no(m->from_proc_self_mountinfo),
prefix, yes_no(m->from_fragment),
- prefix, m->directory_mode);
+ prefix, m->directory_mode,
+ prefix, yes_no(m->sloppy_options),
+ prefix, yes_no(m->lazy_unmount),
+ prefix, yes_no(m->force_unmount));
if (m->control_pid > 0)
fprintf(f,
@@ -695,12 +719,10 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
pid_t pid;
int r;
ExecParameters exec_params = {
- .apply_permissions = true,
- .apply_chroot = true,
- .apply_tty_stdin = true,
- .stdin_fd = -1,
- .stdout_fd = -1,
- .stderr_fd = -1,
+ .flags = EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
+ .stdin_fd = -1,
+ .stdout_fd = -1,
+ .stderr_fd = -1,
};
assert(m);
@@ -717,12 +739,16 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
if (r < 0)
return r;
+ r = unit_setup_dynamic_creds(UNIT(m));
+ if (r < 0)
+ return r;
+
r = mount_arm_timer(m, usec_add(now(CLOCK_MONOTONIC), m->timeout_usec));
if (r < 0)
return r;
exec_params.environment = UNIT(m)->manager->environment;
- exec_params.confirm_spawn = UNIT(m)->manager->confirm_spawn;
+ exec_params.flags |= UNIT(m)->manager->confirm_spawn ? EXEC_CONFIRM_SPAWN : 0;
exec_params.cgroup_supported = UNIT(m)->manager->cgroup_supported;
exec_params.cgroup_path = UNIT(m)->cgroup_path;
exec_params.cgroup_delegate = m->cgroup_context.delegate;
@@ -733,6 +759,7 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
&m->exec_context,
&exec_params,
m->exec_runtime,
+ &m->dynamic_creds,
&pid);
if (r < 0)
return r;
@@ -750,21 +777,25 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
static void mount_enter_dead(Mount *m, MountResult f) {
assert(m);
- if (f != MOUNT_SUCCESS)
+ if (m->result == MOUNT_SUCCESS)
m->result = f;
+ mount_set_state(m, m->result != MOUNT_SUCCESS ? MOUNT_FAILED : MOUNT_DEAD);
+
exec_runtime_destroy(m->exec_runtime);
m->exec_runtime = exec_runtime_unref(m->exec_runtime);
exec_context_destroy_runtime_directory(&m->exec_context, manager_get_runtime_prefix(UNIT(m)->manager));
- mount_set_state(m, m->result != MOUNT_SUCCESS ? MOUNT_FAILED : MOUNT_DEAD);
+ unit_unref_uid_gid(UNIT(m), true);
+
+ dynamic_creds_destroy(&m->dynamic_creds);
}
static void mount_enter_mounted(Mount *m, MountResult f) {
assert(m);
- if (f != MOUNT_SUCCESS)
+ if (m->result == MOUNT_SUCCESS)
m->result = f;
mount_set_state(m, MOUNT_MOUNTED);
@@ -775,7 +806,7 @@ static void mount_enter_signal(Mount *m, MountState state, MountResult f) {
assert(m);
- if (f != MOUNT_SUCCESS)
+ if (m->result == MOUNT_SUCCESS)
m->result = f;
r = unit_kill_context(
@@ -811,7 +842,7 @@ static void mount_enter_signal(Mount *m, MountState state, MountResult f) {
fail:
log_unit_warning_errno(UNIT(m), r, "Failed to kill processes: %m");
- if (state == MOUNT_REMOUNTING_SIGTERM || state == MOUNT_REMOUNTING_SIGKILL)
+ if (IN_SET(state, MOUNT_REMOUNTING_SIGTERM, MOUNT_REMOUNTING_SIGKILL))
mount_enter_mounted(m, MOUNT_FAILURE_RESOURCES);
else
mount_enter_dead(m, MOUNT_FAILURE_RESOURCES);
@@ -833,6 +864,10 @@ static void mount_enter_unmounting(Mount *m) {
m->control_command = m->exec_command + MOUNT_EXEC_UNMOUNT;
r = exec_command_set(m->control_command, UMOUNT_PATH, m->where, NULL);
+ if (r >= 0 && m->lazy_unmount)
+ r = exec_command_append(m->control_command, "-l", NULL);
+ if (r >= 0 && m->force_unmount)
+ r = exec_command_append(m->control_command, "-f", NULL);
if (r < 0)
goto fail;
@@ -851,11 +886,6 @@ fail:
mount_enter_mounted(m, MOUNT_FAILURE_RESOURCES);
}
-static int mount_get_opts(Mount *m, char **ret) {
- return fstab_filter_options(m->parameters_fragment.options,
- "nofail\0" "noauto\0" "auto\0", NULL, NULL, ret);
-}
-
static void mount_enter_mounting(Mount *m) {
int r;
MountParameters *p;
@@ -878,19 +908,18 @@ static void mount_enter_mounting(Mount *m) {
if (p && mount_is_bind(p))
(void) mkdir_p_label(p->what, m->directory_mode);
- if (m->from_fragment) {
+ if (p) {
_cleanup_free_ char *opts = NULL;
- r = mount_get_opts(m, &opts);
+ r = fstab_filter_options(p->options, "nofail\0" "noauto\0" "auto\0", NULL, NULL, &opts);
if (r < 0)
goto fail;
- r = exec_command_set(m->control_command, MOUNT_PATH,
- m->parameters_fragment.what, m->where, NULL);
+ r = exec_command_set(m->control_command, MOUNT_PATH, p->what, m->where, NULL);
if (r >= 0 && m->sloppy_options)
r = exec_command_append(m->control_command, "-s", NULL);
- if (r >= 0 && m->parameters_fragment.fstype)
- r = exec_command_append(m->control_command, "-t", m->parameters_fragment.fstype, NULL);
+ if (r >= 0 && p->fstype)
+ r = exec_command_append(m->control_command, "-t", p->fstype, NULL);
if (r >= 0 && !isempty(opts))
r = exec_command_append(m->control_command, "-o", opts, NULL);
} else
@@ -916,27 +945,29 @@ fail:
static void mount_enter_remounting(Mount *m) {
int r;
+ MountParameters *p;
assert(m);
m->control_command_id = MOUNT_EXEC_REMOUNT;
m->control_command = m->exec_command + MOUNT_EXEC_REMOUNT;
- if (m->from_fragment) {
+ p = get_mount_parameters_fragment(m);
+ if (p) {
const char *o;
- if (m->parameters_fragment.options)
- o = strjoina("remount,", m->parameters_fragment.options);
+ if (p->options)
+ o = strjoina("remount,", p->options);
else
o = "remount";
r = exec_command_set(m->control_command, MOUNT_PATH,
- m->parameters_fragment.what, m->where,
+ p->what, m->where,
"-o", o, NULL);
if (r >= 0 && m->sloppy_options)
r = exec_command_append(m->control_command, "-s", NULL);
- if (r >= 0 && m->parameters_fragment.fstype)
- r = exec_command_append(m->control_command, "-t", m->parameters_fragment.fstype, NULL);
+ if (r >= 0 && p->fstype)
+ r = exec_command_append(m->control_command, "-t", p->fstype, NULL);
} else
r = -ENOENT;
@@ -967,18 +998,19 @@ static int mount_start(Unit *u) {
/* We cannot fulfill this request right now, try again later
* please! */
- if (m->state == MOUNT_UNMOUNTING ||
- m->state == MOUNT_UNMOUNTING_SIGTERM ||
- m->state == MOUNT_UNMOUNTING_SIGKILL ||
- m->state == MOUNT_MOUNTING_SIGTERM ||
- m->state == MOUNT_MOUNTING_SIGKILL)
+ if (IN_SET(m->state,
+ MOUNT_UNMOUNTING,
+ MOUNT_UNMOUNTING_SIGTERM,
+ MOUNT_UNMOUNTING_SIGKILL,
+ MOUNT_MOUNTING_SIGTERM,
+ MOUNT_MOUNTING_SIGKILL))
return -EAGAIN;
/* Already on it! */
if (m->state == MOUNT_MOUNTING)
return 0;
- assert(m->state == MOUNT_DEAD || m->state == MOUNT_FAILED);
+ assert(IN_SET(m->state, MOUNT_DEAD, MOUNT_FAILED));
r = unit_start_limit_test(u);
if (r < 0) {
@@ -986,6 +1018,10 @@ static int mount_start(Unit *u) {
return r;
}
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
m->result = MOUNT_SUCCESS;
m->reload_result = MOUNT_SUCCESS;
m->reset_cpu_usage = true;
@@ -1000,19 +1036,21 @@ static int mount_stop(Unit *u) {
assert(m);
/* Already on it */
- if (m->state == MOUNT_UNMOUNTING ||
- m->state == MOUNT_UNMOUNTING_SIGKILL ||
- m->state == MOUNT_UNMOUNTING_SIGTERM ||
- m->state == MOUNT_MOUNTING_SIGTERM ||
- m->state == MOUNT_MOUNTING_SIGKILL)
+ if (IN_SET(m->state,
+ MOUNT_UNMOUNTING,
+ MOUNT_UNMOUNTING_SIGKILL,
+ MOUNT_UNMOUNTING_SIGTERM,
+ MOUNT_MOUNTING_SIGTERM,
+ MOUNT_MOUNTING_SIGKILL))
return 0;
- assert(m->state == MOUNT_MOUNTING ||
- m->state == MOUNT_MOUNTING_DONE ||
- m->state == MOUNT_MOUNTED ||
- m->state == MOUNT_REMOUNTING ||
- m->state == MOUNT_REMOUNTING_SIGTERM ||
- m->state == MOUNT_REMOUNTING_SIGKILL);
+ assert(IN_SET(m->state,
+ MOUNT_MOUNTING,
+ MOUNT_MOUNTING_DONE,
+ MOUNT_MOUNTED,
+ MOUNT_REMOUNTING,
+ MOUNT_REMOUNTING_SIGTERM,
+ MOUNT_REMOUNTING_SIGKILL));
mount_enter_unmounting(m);
return 1;
@@ -1140,7 +1178,7 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) {
m->control_pid = 0;
- if (is_clean_exit(code, status, NULL))
+ if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
f = MOUNT_SUCCESS;
else if (code == CLD_EXITED)
f = MOUNT_FAILURE_EXIT_CODE;
@@ -1151,7 +1189,7 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) {
else
assert_not_reached("Unknown code");
- if (f != MOUNT_SUCCESS)
+ if (m->result == MOUNT_SUCCESS)
m->result = f;
if (m->control_command) {
@@ -1178,9 +1216,10 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) {
case MOUNT_MOUNTING_SIGKILL:
case MOUNT_MOUNTING_SIGTERM:
- if (f == MOUNT_SUCCESS)
- mount_enter_mounted(m, f);
- else if (m->from_proc_self_mountinfo)
+ if (f == MOUNT_SUCCESS || m->from_proc_self_mountinfo)
+ /* If /bin/mount returned success, or if we see the mount point in /proc/self/mountinfo we are
+ * happy. If we see the first condition first, we should see the the second condition
+ * immediately after – or /bin/mount lies to us and is broken. */
mount_enter_mounted(m, f);
else
mount_enter_dead(m, f);
@@ -1366,11 +1405,7 @@ static int mount_setup_unit(
if (!u) {
delete = true;
- u = unit_new(m, sizeof(Mount));
- if (!u)
- return log_oom();
-
- r = unit_add_name(u, e);
+ r = unit_new_for_name(m, sizeof(Mount), e, &u);
if (r < 0)
goto fail;
@@ -1457,17 +1492,9 @@ static int mount_setup_unit(
MOUNT(u)->from_proc_self_mountinfo = true;
- free(p->what);
- p->what = w;
- w = NULL;
-
- free(p->options);
- p->options = o;
- o = NULL;
-
- free(p->fstype);
- p->fstype = f;
- f = NULL;
+ free_and_replace(p->what, w);
+ free_and_replace(p->options, o);
+ free_and_replace(p->fstype, f);
if (load_extras) {
r = mount_add_extras(MOUNT(u));
@@ -1573,11 +1600,46 @@ static int mount_get_timeout(Unit *u, usec_t *timeout) {
return 1;
}
+static int synthesize_root_mount(Manager *m) {
+ Unit *u;
+ int r;
+
+ assert(m);
+
+ /* Whatever happens, we know for sure that the root directory is around, and cannot go away. Let's
+ * unconditionally synthesize it here and mark it as perpetual. */
+
+ u = manager_get_unit(m, SPECIAL_ROOT_MOUNT);
+ if (!u) {
+ r = unit_new_for_name(m, sizeof(Mount), SPECIAL_ROOT_MOUNT, &u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate the special " SPECIAL_ROOT_MOUNT " unit: %m");
+ }
+
+ u->perpetual = true;
+ MOUNT(u)->deserialized_state = MOUNT_MOUNTED;
+
+ unit_add_to_load_queue(u);
+ unit_add_to_dbus_queue(u);
+
+ return 0;
+}
+
+static bool mount_is_mounted(Mount *m) {
+ assert(m);
+
+ return UNIT(m)->perpetual || m->is_mounted;
+}
+
static void mount_enumerate(Manager *m) {
int r;
assert(m);
+ r = synthesize_root_mount(m);
+ if (r < 0)
+ goto fail;
+
mnt_init_debug(0);
if (!m->mount_monitor) {
@@ -1684,7 +1746,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
LIST_FOREACH(units_by_type, u, m->units_by_type[UNIT_MOUNT]) {
Mount *mount = MOUNT(u);
- if (!mount->is_mounted) {
+ if (!mount_is_mounted(mount)) {
/* A mount point is not around right now. It
* might be gone, or might never have
@@ -1723,9 +1785,10 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
case MOUNT_DEAD:
case MOUNT_FAILED:
- /* This has just been mounted by
- * somebody else, follow the state
- * change. */
+
+ /* This has just been mounted by somebody else, follow the state change, but let's
+ * generate a new invocation ID for this implicitly and automatically. */
+ (void) unit_acquire_invocation_id(UNIT(mount));
mount_enter_mounted(mount, MOUNT_SUCCESS);
break;
@@ -1744,7 +1807,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
}
}
- if (mount->is_mounted &&
+ if (mount_is_mounted(mount) &&
mount->from_proc_self_mountinfo &&
mount->parameters_proc_self_mountinfo.what) {
@@ -1818,6 +1881,7 @@ const UnitVTable mount_vtable = {
.cgroup_context_offset = offsetof(Mount, cgroup_context),
.kill_context_offset = offsetof(Mount, kill_context),
.exec_runtime_offset = offsetof(Mount, exec_runtime),
+ .dynamic_creds_offset = offsetof(Mount, dynamic_creds),
.sections =
"Unit\0"
diff --git a/src/grp-system/libcore/src/namespace.c b/src/grp-system/libcore/src/namespace.c
index f67b0136bf..988516d775 100644
--- a/src/grp-system/libcore/src/namespace.c
+++ b/src/grp-system/libcore/src/namespace.c
@@ -31,6 +31,7 @@
#include "core/namespace.h"
#include "systemd-basic/alloc-util.h"
#include "systemd-basic/fd-util.h"
+#include "systemd-basic/fs-util.h"
#include "systemd-basic/missing.h"
#include "systemd-basic/mkdir.h"
#include "systemd-basic/mount-util.h"
@@ -54,61 +55,245 @@ typedef enum MountMode {
PRIVATE_TMP,
PRIVATE_VAR_TMP,
PRIVATE_DEV,
- READWRITE
+ READWRITE,
} MountMode;
typedef struct BindMount {
- const char *path;
+ const char *path; /* stack memory, doesn't need to be freed explicitly */
+ char *chased; /* malloc()ed memory, needs to be freed */
MountMode mode;
- bool done;
- bool ignore;
+ bool ignore; /* Ignore if path does not exist */
} BindMount;
+typedef struct TargetMount {
+ const char *path;
+ MountMode mode;
+ bool ignore; /* Ignore if path does not exist */
+} TargetMount;
+
+/*
+ * The following Protect tables are to protect paths and mark some of them
+ * READONLY, in case a path is covered by an option from another table, then
+ * it is marked READWRITE in the current one, and the more restrictive mode is
+ * applied from that other table. This way all options can be combined in a
+ * safe and comprehensible way for users.
+ */
+
+/* ProtectKernelTunables= option and the related filesystem APIs */
+static const TargetMount protect_kernel_tunables_table[] = {
+ { "/proc/sys", READONLY, false },
+ { "/proc/sysrq-trigger", READONLY, true },
+ { "/proc/latency_stats", READONLY, true },
+ { "/proc/mtrr", READONLY, true },
+ { "/proc/apm", READONLY, true },
+ { "/proc/acpi", READONLY, true },
+ { "/proc/timer_stats", READONLY, true },
+ { "/proc/asound", READONLY, true },
+ { "/proc/bus", READONLY, true },
+ { "/proc/fs", READONLY, true },
+ { "/proc/irq", READONLY, true },
+ { "/sys", READONLY, false },
+ { "/sys/kernel/debug", READONLY, true },
+ { "/sys/kernel/tracing", READONLY, true },
+ { "/sys/fs/cgroup", READWRITE, false }, /* READONLY is set by ProtectControlGroups= option */
+};
+
+/* ProtectKernelModules= option */
+static const TargetMount protect_kernel_modules_table[] = {
+#ifdef HAVE_SPLIT_USR
+ { "/lib/modules", INACCESSIBLE, true },
+#endif
+ { "/usr/lib/modules", INACCESSIBLE, true },
+};
+
+/*
+ * ProtectHome=read-only table, protect $HOME and $XDG_RUNTIME_DIR and rest of
+ * system should be protected by ProtectSystem=
+ */
+static const TargetMount protect_home_read_only_table[] = {
+ { "/home", READONLY, true },
+ { "/run/user", READONLY, true },
+ { "/root", READONLY, true },
+};
+
+/* ProtectHome=yes table */
+static const TargetMount protect_home_yes_table[] = {
+ { "/home", INACCESSIBLE, true },
+ { "/run/user", INACCESSIBLE, true },
+ { "/root", INACCESSIBLE, true },
+};
+
+/* ProtectSystem=yes table */
+static const TargetMount protect_system_yes_table[] = {
+ { "/usr", READONLY, false },
+ { "/boot", READONLY, true },
+ { "/efi", READONLY, true },
+};
+
+/* ProtectSystem=full includes ProtectSystem=yes */
+static const TargetMount protect_system_full_table[] = {
+ { "/usr", READONLY, false },
+ { "/boot", READONLY, true },
+ { "/efi", READONLY, true },
+ { "/etc", READONLY, false },
+};
+
+/*
+ * ProtectSystem=strict table. In this strict mode, we mount everything
+ * read-only, except for /proc, /dev, /sys which are the kernel API VFS,
+ * which are left writable, but PrivateDevices= + ProtectKernelTunables=
+ * protect those, and these options should be fully orthogonal.
+ * (And of course /home and friends are also left writable, as ProtectHome=
+ * shall manage those, orthogonally).
+ */
+static const TargetMount protect_system_strict_table[] = {
+ { "/", READONLY, false },
+ { "/proc", READWRITE, false }, /* ProtectKernelTunables= */
+ { "/sys", READWRITE, false }, /* ProtectKernelTunables= */
+ { "/dev", READWRITE, false }, /* PrivateDevices= */
+ { "/home", READWRITE, true }, /* ProtectHome= */
+ { "/run/user", READWRITE, true }, /* ProtectHome= */
+ { "/root", READWRITE, true }, /* ProtectHome= */
+};
+
+static void set_bind_mount(BindMount **p, const char *path, MountMode mode, bool ignore) {
+ (*p)->path = path;
+ (*p)->mode = mode;
+ (*p)->ignore = ignore;
+}
+
static int append_mounts(BindMount **p, char **strv, MountMode mode) {
char **i;
assert(p);
STRV_FOREACH(i, strv) {
+ bool ignore = false;
- (*p)->ignore = false;
- (*p)->done = false;
-
- if ((mode == INACCESSIBLE || mode == READONLY || mode == READWRITE) && (*i)[0] == '-') {
- (*p)->ignore = true;
+ if (IN_SET(mode, INACCESSIBLE, READONLY, READWRITE) && startswith(*i, "-")) {
(*i)++;
+ ignore = true;
}
if (!path_is_absolute(*i))
return -EINVAL;
- (*p)->path = *i;
- (*p)->mode = mode;
+ set_bind_mount(p, *i, mode, ignore);
(*p)++;
}
return 0;
}
-static int mount_path_compare(const void *a, const void *b) {
- const BindMount *p = a, *q = b;
- int d;
+static int append_target_mounts(BindMount **p, const char *root_directory, const TargetMount *mounts, const size_t size) {
+ unsigned i;
- d = path_compare(p->path, q->path);
+ assert(p);
+ assert(mounts);
+
+ for (i = 0; i < size; i++) {
+ /*
+ * Here we assume that the ignore field is set during
+ * declaration we do not support "-" at the beginning.
+ */
+ const TargetMount *m = &mounts[i];
+ const char *path = prefix_roota(root_directory, m->path);
+
+ if (!path_is_absolute(path))
+ return -EINVAL;
+
+ set_bind_mount(p, path, m->mode, m->ignore);
+ (*p)++;
+ }
+
+ return 0;
+}
+
+static int append_protect_kernel_tunables(BindMount **p, const char *root_directory) {
+ assert(p);
+
+ return append_target_mounts(p, root_directory, protect_kernel_tunables_table,
+ ELEMENTSOF(protect_kernel_tunables_table));
+}
+
+static int append_protect_kernel_modules(BindMount **p, const char *root_directory) {
+ assert(p);
+
+ return append_target_mounts(p, root_directory, protect_kernel_modules_table,
+ ELEMENTSOF(protect_kernel_modules_table));
+}
+
+static int append_protect_home(BindMount **p, const char *root_directory, ProtectHome protect_home) {
+ int r = 0;
+
+ assert(p);
+
+ if (protect_home == PROTECT_HOME_NO)
+ return 0;
+
+ switch (protect_home) {
+ case PROTECT_HOME_READ_ONLY:
+ r = append_target_mounts(p, root_directory, protect_home_read_only_table,
+ ELEMENTSOF(protect_home_read_only_table));
+ break;
+ case PROTECT_HOME_YES:
+ r = append_target_mounts(p, root_directory, protect_home_yes_table,
+ ELEMENTSOF(protect_home_yes_table));
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ return r;
+}
- if (d == 0) {
- /* If the paths are equal, check the mode */
- if (p->mode < q->mode)
- return -1;
+static int append_protect_system(BindMount **p, const char *root_directory, ProtectSystem protect_system) {
+ int r = 0;
- if (p->mode > q->mode)
- return 1;
+ assert(p);
+ if (protect_system == PROTECT_SYSTEM_NO)
return 0;
+
+ switch (protect_system) {
+ case PROTECT_SYSTEM_STRICT:
+ r = append_target_mounts(p, root_directory, protect_system_strict_table,
+ ELEMENTSOF(protect_system_strict_table));
+ break;
+ case PROTECT_SYSTEM_YES:
+ r = append_target_mounts(p, root_directory, protect_system_yes_table,
+ ELEMENTSOF(protect_system_yes_table));
+ break;
+ case PROTECT_SYSTEM_FULL:
+ r = append_target_mounts(p, root_directory, protect_system_full_table,
+ ELEMENTSOF(protect_system_full_table));
+ break;
+ default:
+ r = -EINVAL;
+ break;
}
+ return r;
+}
+
+static int mount_path_compare(const void *a, const void *b) {
+ const BindMount *p = a, *q = b;
+ int d;
+
/* If the paths are not equal, then order prefixes first */
- return d;
+ d = path_compare(p->path, q->path);
+ if (d != 0)
+ return d;
+
+ /* If the paths are equal, check the mode */
+ if (p->mode < q->mode)
+ return -1;
+
+ if (p->mode > q->mode)
+ return 1;
+
+ return 0;
}
static void drop_duplicates(BindMount *m, unsigned *n) {
@@ -117,16 +302,110 @@ static void drop_duplicates(BindMount *m, unsigned *n) {
assert(m);
assert(n);
+ /* Drops duplicate entries. Expects that the array is properly ordered already. */
+
for (f = m, t = m, previous = NULL; f < m+*n; f++) {
- /* The first one wins */
- if (previous && path_equal(f->path, previous->path))
+ /* The first one wins (which is the one with the more restrictive mode), see mount_path_compare()
+ * above. */
+ if (previous && path_equal(f->path, previous->path)) {
+ log_debug("%s is duplicate.", f->path);
continue;
+ }
*t = *f;
-
previous = t;
+ t++;
+ }
+
+ *n = t - m;
+}
+
+static void drop_inaccessible(BindMount *m, unsigned *n) {
+ BindMount *f, *t;
+ const char *clear = NULL;
+ assert(m);
+ assert(n);
+
+ /* Drops all entries obstructed by another entry further up the tree. Expects that the array is properly
+ * ordered already. */
+
+ for (f = m, t = m; f < m+*n; f++) {
+
+ /* If we found a path set for INACCESSIBLE earlier, and this entry has it as prefix we should drop
+ * it, as inaccessible paths really should drop the entire subtree. */
+ if (clear && path_startswith(f->path, clear)) {
+ log_debug("%s is masked by %s.", f->path, clear);
+ continue;
+ }
+
+ clear = f->mode == INACCESSIBLE ? f->path : NULL;
+
+ *t = *f;
+ t++;
+ }
+
+ *n = t - m;
+}
+
+static void drop_nop(BindMount *m, unsigned *n) {
+ BindMount *f, *t;
+
+ assert(m);
+ assert(n);
+
+ /* Drops all entries which have an immediate parent that has the same type, as they are redundant. Assumes the
+ * list is ordered by prefixes. */
+
+ for (f = m, t = m; f < m+*n; f++) {
+
+ /* Only suppress such subtrees for READONLY and READWRITE entries */
+ if (IN_SET(f->mode, READONLY, READWRITE)) {
+ BindMount *p;
+ bool found = false;
+
+ /* Now let's find the first parent of the entry we are looking at. */
+ for (p = t-1; p >= m; p--) {
+ if (path_startswith(f->path, p->path)) {
+ found = true;
+ break;
+ }
+ }
+
+ /* We found it, let's see if it's the same mode, if so, we can drop this entry */
+ if (found && p->mode == f->mode) {
+ log_debug("%s is redundant by %s", f->path, p->path);
+ continue;
+ }
+ }
+
+ *t = *f;
+ t++;
+ }
+
+ *n = t - m;
+}
+
+static void drop_outside_root(const char *root_directory, BindMount *m, unsigned *n) {
+ BindMount *f, *t;
+
+ assert(m);
+ assert(n);
+
+ if (!root_directory)
+ return;
+
+ /* Drops all mounts that are outside of the root directory. */
+
+ for (f = m, t = m; f < m+*n; f++) {
+
+ if (!path_startswith(f->path, root_directory)) {
+ log_debug("%s is outside of root directory.", f->path);
+ continue;
+ }
+
+ *t = *f;
t++;
}
@@ -279,24 +558,23 @@ static int apply_mount(
const char *what;
int r;
- struct stat target;
assert(m);
+ log_debug("Applying namespace mount on %s", m->path);
+
switch (m->mode) {
- case INACCESSIBLE:
+ case INACCESSIBLE: {
+ struct stat target;
/* First, get rid of everything that is below if there
* is anything... Then, overmount it with an
* inaccessible path. */
- umount_recursive(m->path, 0);
+ (void) umount_recursive(m->path, 0);
- if (lstat(m->path, &target) < 0) {
- if (m->ignore && errno == ENOENT)
- return 0;
- return -errno;
- }
+ if (lstat(m->path, &target) < 0)
+ return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", m->path);
what = mode_to_inaccessible_node(target.st_mode);
if (!what) {
@@ -304,11 +582,20 @@ static int apply_mount(
return -ELOOP;
}
break;
+ }
+
case READONLY:
case READWRITE:
- /* Nothing to mount here, we just later toggle the
- * MS_RDONLY bit for the mount point */
- return 0;
+
+ r = path_is_mount_point(m->path, 0);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", m->path);
+ if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */
+ return 0;
+
+ /* This isn't a mount point yet, let's make it one. */
+ what = m->path;
+ break;
case PRIVATE_TMP:
what = tmp_dir;
@@ -327,68 +614,133 @@ static int apply_mount(
assert(what);
- r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL);
- if (r >= 0) {
- log_debug("Successfully mounted %s to %s", what, m->path);
- return r;
- } else {
- if (m->ignore && errno == ENOENT)
- return 0;
+ if (mount(what, m->path, NULL, MS_BIND|MS_REC, NULL) < 0)
return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, m->path);
- }
+
+ log_debug("Successfully mounted %s to %s", what, m->path);
+ return 0;
}
-static int make_read_only(BindMount *m) {
- int r;
+static int make_read_only(BindMount *m, char **blacklist) {
+ int r = 0;
assert(m);
if (IN_SET(m->mode, INACCESSIBLE, READONLY))
- r = bind_remount_recursive(m->path, true);
- else if (IN_SET(m->mode, READWRITE, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV)) {
- r = bind_remount_recursive(m->path, false);
- if (r == 0 && m->mode == PRIVATE_DEV) /* can be readonly but the submounts can't*/
- if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0)
- r = -errno;
+ r = bind_remount_recursive(m->path, true, blacklist);
+ else if (m->mode == PRIVATE_DEV) { /* Can be readonly but the submounts can't*/
+ if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0)
+ r = -errno;
} else
- r = 0;
-
- if (m->ignore && r == -ENOENT)
return 0;
+ /* Not that we only turn on the MS_RDONLY flag here, we never turn it off. Something that was marked read-only
+ * already stays this way. This improves compatibility with container managers, where we won't attempt to undo
+ * read-only mounts already applied. */
+
return r;
}
+static int chase_all_symlinks(const char *root_directory, BindMount *m, unsigned *n) {
+ BindMount *f, *t;
+ int r;
+
+ assert(m);
+ assert(n);
+
+ /* Since mount() will always follow symlinks and we need to take the different root directory into account we
+ * chase the symlinks on our own first. This call wil do so for all entries and remove all entries where we
+ * can't resolve the path, and which have been marked for such removal. */
+
+ for (f = m, t = m; f < m+*n; f++) {
+
+ r = chase_symlinks(f->path, root_directory, &f->chased);
+ if (r == -ENOENT && f->ignore) /* Doesn't exist? Then remove it! */
+ continue;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to chase symlinks for %s: %m", f->path);
+
+ if (path_equal(f->path, f->chased))
+ f->chased = mfree(f->chased);
+ else {
+ log_debug("Chased %s → %s", f->path, f->chased);
+ f->path = f->chased;
+ }
+
+ *t = *f;
+ t++;
+ }
+
+ *n = t - m;
+ return 0;
+}
+
+static unsigned namespace_calculate_mounts(
+ const NameSpaceInfo *ns_info,
+ char** read_write_paths,
+ char** read_only_paths,
+ char** inaccessible_paths,
+ const char* tmp_dir,
+ const char* var_tmp_dir,
+ ProtectHome protect_home,
+ ProtectSystem protect_system) {
+
+ unsigned protect_home_cnt;
+ unsigned protect_system_cnt =
+ (protect_system == PROTECT_SYSTEM_STRICT ?
+ ELEMENTSOF(protect_system_strict_table) :
+ ((protect_system == PROTECT_SYSTEM_FULL) ?
+ ELEMENTSOF(protect_system_full_table) :
+ ((protect_system == PROTECT_SYSTEM_YES) ?
+ ELEMENTSOF(protect_system_yes_table) : 0)));
+
+ protect_home_cnt =
+ (protect_home == PROTECT_HOME_YES ?
+ ELEMENTSOF(protect_home_yes_table) :
+ ((protect_home == PROTECT_HOME_READ_ONLY) ?
+ ELEMENTSOF(protect_home_read_only_table) : 0));
+
+ return !!tmp_dir + !!var_tmp_dir +
+ strv_length(read_write_paths) +
+ strv_length(read_only_paths) +
+ strv_length(inaccessible_paths) +
+ ns_info->private_dev +
+ (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) +
+ (ns_info->protect_control_groups ? 1 : 0) +
+ (ns_info->protect_kernel_modules ? ELEMENTSOF(protect_kernel_modules_table) : 0) +
+ protect_home_cnt + protect_system_cnt;
+}
+
int setup_namespace(
const char* root_directory,
+ const NameSpaceInfo *ns_info,
char** read_write_paths,
char** read_only_paths,
char** inaccessible_paths,
const char* tmp_dir,
const char* var_tmp_dir,
- bool private_dev,
ProtectHome protect_home,
ProtectSystem protect_system,
unsigned long mount_flags) {
BindMount *m, *mounts = NULL;
+ bool make_slave = false;
unsigned n;
int r = 0;
if (mount_flags == 0)
mount_flags = MS_SHARED;
- if (unshare(CLONE_NEWNS) < 0)
- return -errno;
+ n = namespace_calculate_mounts(ns_info,
+ read_write_paths,
+ read_only_paths,
+ inaccessible_paths,
+ tmp_dir, var_tmp_dir,
+ protect_home, protect_system);
- n = !!tmp_dir + !!var_tmp_dir +
- strv_length(read_write_paths) +
- strv_length(read_only_paths) +
- strv_length(inaccessible_paths) +
- private_dev +
- (protect_home != PROTECT_HOME_NO ? 3 : 0) +
- (protect_system != PROTECT_SYSTEM_NO ? 2 : 0) +
- (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0);
+ /* Set mount slave mode */
+ if (root_directory || n > 0)
+ make_slave = true;
if (n > 0) {
m = mounts = (BindMount *) alloca0(n * sizeof(BindMount));
@@ -416,100 +768,127 @@ int setup_namespace(
m++;
}
- if (private_dev) {
+ if (ns_info->private_dev) {
m->path = prefix_roota(root_directory, "/dev");
m->mode = PRIVATE_DEV;
m++;
}
- if (protect_home != PROTECT_HOME_NO) {
- const char *home_dir, *run_user_dir, *root_dir;
-
- home_dir = prefix_roota(root_directory, "/home");
- home_dir = strjoina("-", home_dir);
- run_user_dir = prefix_roota(root_directory, "/run/user");
- run_user_dir = strjoina("-", run_user_dir);
- root_dir = prefix_roota(root_directory, "/root");
- root_dir = strjoina("-", root_dir);
-
- r = append_mounts(&m, STRV_MAKE(home_dir, run_user_dir, root_dir),
- protect_home == PROTECT_HOME_READ_ONLY ? READONLY : INACCESSIBLE);
+ if (ns_info->protect_kernel_tunables) {
+ r = append_protect_kernel_tunables(&m, root_directory);
if (r < 0)
return r;
}
- if (protect_system != PROTECT_SYSTEM_NO) {
- const char *usr_dir, *boot_dir, *etc_dir;
-
- usr_dir = prefix_roota(root_directory, "/usr");
- boot_dir = prefix_roota(root_directory, "/boot");
- boot_dir = strjoina("-", boot_dir);
- etc_dir = prefix_roota(root_directory, "/etc");
-
- r = append_mounts(&m, protect_system == PROTECT_SYSTEM_FULL
- ? STRV_MAKE(usr_dir, boot_dir, etc_dir)
- : STRV_MAKE(usr_dir, boot_dir), READONLY);
+ if (ns_info->protect_kernel_modules) {
+ r = append_protect_kernel_modules(&m, root_directory);
if (r < 0)
return r;
}
+ if (ns_info->protect_control_groups) {
+ m->path = prefix_roota(root_directory, "/sys/fs/cgroup");
+ m->mode = READONLY;
+ m++;
+ }
+
+ r = append_protect_home(&m, root_directory, protect_home);
+ if (r < 0)
+ return r;
+
+ r = append_protect_system(&m, root_directory, protect_system);
+ if (r < 0)
+ return r;
+
assert(mounts + n == m);
+ /* Resolve symlinks manually first, as mount() will always follow them relative to the host's
+ * root. Moreover we want to suppress duplicates based on the resolved paths. This of course is a bit
+ * racy. */
+ r = chase_all_symlinks(root_directory, mounts, &n);
+ if (r < 0)
+ goto finish;
+
qsort(mounts, n, sizeof(BindMount), mount_path_compare);
+
drop_duplicates(mounts, &n);
+ drop_outside_root(root_directory, mounts, &n);
+ drop_inaccessible(mounts, &n);
+ drop_nop(mounts, &n);
}
- if (n > 0 || root_directory) {
+ if (unshare(CLONE_NEWNS) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ if (make_slave) {
/* Remount / as SLAVE so that nothing now mounted in the namespace
shows up in the parent */
- if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
- return -errno;
+ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
+ r = -errno;
+ goto finish;
+ }
}
if (root_directory) {
- /* Turn directory into bind mount */
- if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0)
- return -errno;
+ /* Turn directory into bind mount, if it isn't one yet */
+ r = path_is_mount_point(root_directory, AT_SYMLINK_FOLLOW);
+ if (r < 0)
+ goto finish;
+ if (r == 0) {
+ if (mount(root_directory, root_directory, NULL, MS_BIND|MS_REC, NULL) < 0) {
+ r = -errno;
+ goto finish;
+ }
+ }
}
if (n > 0) {
+ char **blacklist;
+ unsigned j;
+
+ /* First round, add in all special mounts we need */
for (m = mounts; m < mounts + n; ++m) {
r = apply_mount(m, tmp_dir, var_tmp_dir);
if (r < 0)
- goto fail;
+ goto finish;
}
+ /* Create a blacklist we can pass to bind_mount_recursive() */
+ blacklist = newa(char*, n+1);
+ for (j = 0; j < n; j++)
+ blacklist[j] = (char*) mounts[j].path;
+ blacklist[j] = NULL;
+
+ /* Second round, flip the ro bits if necessary. */
for (m = mounts; m < mounts + n; ++m) {
- r = make_read_only(m);
+ r = make_read_only(m, blacklist);
if (r < 0)
- goto fail;
+ goto finish;
}
}
if (root_directory) {
/* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */
r = mount_move_root(root_directory);
-
- /* at this point, we cannot rollback */
if (r < 0)
- return r;
+ goto finish;
}
/* Remount / as the desired mode. Not that this will not
* reestablish propagation from our side to the host, since
* what's disconnected is disconnected. */
- if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0)
- /* at this point, we cannot rollback */
- return -errno;
+ if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) {
+ r = -errno;
+ goto finish;
+ }
- return 0;
+ r = 0;
-fail:
- if (n > 0) {
- for (m = mounts; m < mounts + n; ++m)
- if (m->done)
- (void) umount2(m->path, MNT_DETACH);
- }
+finish:
+ for (m = mounts; m < mounts + n; m++)
+ free(m->chased);
return r;
}
@@ -659,6 +1038,7 @@ static const char *const protect_system_table[_PROTECT_SYSTEM_MAX] = {
[PROTECT_SYSTEM_NO] = "no",
[PROTECT_SYSTEM_YES] = "yes",
[PROTECT_SYSTEM_FULL] = "full",
+ [PROTECT_SYSTEM_STRICT] = "strict",
};
DEFINE_STRING_TABLE_LOOKUP(protect_system, ProtectSystem);
diff --git a/src/grp-system/libcore/src/path.c b/src/grp-system/libcore/src/path.c
index 49c81dffec..a1b0bdd042 100644
--- a/src/grp-system/libcore/src/path.c
+++ b/src/grp-system/libcore/src/path.c
@@ -455,7 +455,7 @@ static int path_coldplug(Unit *u) {
static void path_enter_dead(Path *p, PathResult f) {
assert(p);
- if (f != PATH_SUCCESS)
+ if (p->result == PATH_SUCCESS)
p->result = f;
path_set_state(p, p->result != PATH_SUCCESS ? PATH_FAILED : PATH_DEAD);
@@ -578,6 +578,10 @@ static int path_start(Unit *u) {
return r;
}
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
path_mkdir(p);
p->result = PATH_SUCCESS;
diff --git a/src/grp-system/libcore/src/scope.c b/src/grp-system/libcore/src/scope.c
index 11816469a6..da1aad4a32 100644
--- a/src/grp-system/libcore/src/scope.c
+++ b/src/grp-system/libcore/src/scope.c
@@ -148,6 +148,32 @@ static int scope_verify(Scope *s) {
return 0;
}
+static int scope_load_init_scope(Unit *u) {
+ assert(u);
+
+ if (!unit_has_name(u, SPECIAL_INIT_SCOPE))
+ return 0;
+
+ u->transient = true;
+ u->perpetual = true;
+
+ /* init.scope is a bit special, as it has to stick around forever. Because of its special semantics we
+ * synthesize it here, instead of relying on the unit file on disk. */
+
+ u->default_dependencies = false;
+ u->ignore_on_isolate = true;
+
+ SCOPE(u)->kill_context.kill_signal = SIGRTMIN+14;
+
+ /* Prettify things, if we can. */
+ if (!u->description)
+ u->description = strdup("System and Service Manager");
+ if (!u->documentation)
+ (void) strv_extend(&u->documentation, "man:systemd(1)");
+
+ return 1;
+}
+
static int scope_load(Unit *u) {
Scope *s = SCOPE(u);
int r;
@@ -159,6 +185,9 @@ static int scope_load(Unit *u) {
/* Refuse to load non-transient scope units, but allow them while reloading. */
return -ENOENT;
+ r = scope_load_init_scope(u);
+ if (r < 0)
+ return r;
r = unit_load_fragment_and_dropin_optional(u);
if (r < 0)
return r;
@@ -222,7 +251,7 @@ static void scope_dump(Unit *u, FILE *f, const char *prefix) {
static void scope_enter_dead(Scope *s, ScopeResult f) {
assert(s);
- if (f != SCOPE_SUCCESS)
+ if (s->result == SCOPE_SUCCESS)
s->result = f;
scope_set_state(s, s->result != SCOPE_SUCCESS ? SCOPE_FAILED : SCOPE_DEAD);
@@ -234,7 +263,7 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) {
assert(s);
- if (f != SCOPE_SUCCESS)
+ if (s->result == SCOPE_SUCCESS)
s->result = f;
unit_watch_all_pids(UNIT(s));
@@ -299,6 +328,10 @@ static int scope_start(Unit *u) {
if (!u->transient && !MANAGER_IS_RELOADING(u->manager))
return -ENOENT;
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
(void) unit_realize_cgroup(u);
(void) unit_reset_cpu_usage(u);
@@ -442,7 +475,7 @@ static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
/* If the PID set is empty now, then let's finish this off
(On unified we use proper notifications) */
- if (cg_unified() <= 0 && set_isempty(u->pids))
+ if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) <= 0 && set_isempty(u->pids))
scope_notify_cgroup_empty_event(u);
}
@@ -531,34 +564,16 @@ static void scope_enumerate(Manager *m) {
u = manager_get_unit(m, SPECIAL_INIT_SCOPE);
if (!u) {
- u = unit_new(m, sizeof(Scope));
- if (!u) {
- log_oom();
- return;
- }
-
- r = unit_add_name(u, SPECIAL_INIT_SCOPE);
+ r = unit_new_for_name(m, sizeof(Scope), SPECIAL_INIT_SCOPE, &u);
if (r < 0) {
- unit_free(u);
- log_error_errno(r, "Failed to add init.scope name");
+ log_error_errno(r, "Failed to allocate the special " SPECIAL_INIT_SCOPE " unit: %m");
return;
}
}
u->transient = true;
- u->default_dependencies = false;
- u->no_gc = true;
- u->ignore_on_isolate = true;
- u->refuse_manual_start = true;
- u->refuse_manual_stop = true;
+ u->perpetual = true;
SCOPE(u)->deserialized_state = SCOPE_RUNNING;
- SCOPE(u)->kill_context.kill_signal = SIGRTMIN+14;
-
- /* Prettify things, if we can. */
- if (!u->description)
- u->description = strdup("System and Service Manager");
- if (!u->documentation)
- (void) strv_extend(&u->documentation, "man:systemd(1)");
unit_add_to_load_queue(u);
unit_add_to_dbus_queue(u);
diff --git a/src/grp-system/libcore/src/selinux-access.h b/src/grp-system/libcore/src/selinux-access.h
index 19adabdee3..6cd37bed3d 100644
--- a/src/grp-system/libcore/src/selinux-access.h
+++ b/src/grp-system/libcore/src/selinux-access.h
@@ -33,7 +33,7 @@ int mac_selinux_generic_access_check(sd_bus_message *message, const char *path,
#define mac_selinux_unit_access_check(unit, message, permission, error) \
({ \
- Unit *_unit = (unit); \
+ const Unit *_unit = (unit); \
mac_selinux_generic_access_check((message), _unit->source_path ?: _unit->fragment_path, (permission), (error)); \
})
diff --git a/src/grp-system/libcore/src/service.c b/src/grp-system/libcore/src/service.c
index df114edf09..0defd0cc35 100644
--- a/src/grp-system/libcore/src/service.c
+++ b/src/grp-system/libcore/src/service.c
@@ -290,7 +290,17 @@ static void service_fd_store_unlink(ServiceFDStore *fs) {
free(fs);
}
-static void service_release_resources(Unit *u) {
+static void service_release_fd_store(Service *s) {
+ assert(s);
+
+ log_unit_debug(UNIT(s), "Releasing all stored fds");
+ while (s->fd_store)
+ service_fd_store_unlink(s->fd_store);
+
+ assert(s->n_fd_store == 0);
+}
+
+static void service_release_resources(Unit *u, bool inactive) {
Service *s = SERVICE(u);
assert(s);
@@ -298,16 +308,14 @@ static void service_release_resources(Unit *u) {
if (!s->fd_store && s->stdin_fd < 0 && s->stdout_fd < 0 && s->stderr_fd < 0)
return;
- log_unit_debug(u, "Releasing all resources.");
+ log_unit_debug(u, "Releasing resources.");
s->stdin_fd = safe_close(s->stdin_fd);
s->stdout_fd = safe_close(s->stdout_fd);
s->stderr_fd = safe_close(s->stderr_fd);
- while (s->fd_store)
- service_fd_store_unlink(s->fd_store);
-
- assert(s->n_fd_store == 0);
+ if (inactive)
+ service_release_fd_store(s);
}
static void service_done(Unit *u) {
@@ -323,6 +331,8 @@ static void service_done(Unit *u) {
s->control_command = NULL;
s->main_command = NULL;
+ dynamic_creds_unref(&s->dynamic_creds);
+
exit_status_set_free(&s->restart_prevent_status);
exit_status_set_free(&s->restart_force_status);
exit_status_set_free(&s->success_status);
@@ -341,6 +351,7 @@ static void service_done(Unit *u) {
s->bus_name_owner = mfree(s->bus_name_owner);
service_close_socket_fd(s);
+ s->peer = socket_peer_unref(s->peer);
unit_ref_unset(&s->accept_socket);
@@ -348,7 +359,7 @@ static void service_done(Unit *u) {
s->timer_event_source = sd_event_source_unref(s->timer_event_source);
- service_release_resources(u);
+ service_release_resources(u, true);
}
static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
@@ -358,6 +369,10 @@ static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *us
assert(fs);
/* If we get either EPOLLHUP or EPOLLERR, it's time to remove this entry from the fd store */
+ log_unit_debug(UNIT(fs->service),
+ "Received %s on stored fd %d (%s), closing.",
+ revents & EPOLLERR ? "EPOLLERR" : "EPOLLHUP",
+ fs->fd, strna(fs->fdname));
service_fd_store_unlink(fs);
return 0;
}
@@ -366,20 +381,23 @@ static int service_add_fd_store(Service *s, int fd, const char *name) {
ServiceFDStore *fs;
int r;
+ /* fd is always consumed if we return >= 0 */
+
assert(s);
assert(fd >= 0);
if (s->n_fd_store >= s->n_fd_store_max)
- return 0;
+ return -EXFULL; /* Our store is full.
+ * Use this errno rather than E[NM]FILE to distinguish from
+ * the case where systemd itself hits the file limit. */
LIST_FOREACH(fd_store, fs, s->fd_store) {
r = same_fd(fs->fd, fd);
if (r < 0)
return r;
if (r > 0) {
- /* Already included */
safe_close(fd);
- return 1;
+ return 0; /* fd already included */
}
}
@@ -407,7 +425,7 @@ static int service_add_fd_store(Service *s, int fd, const char *name) {
LIST_PREPEND(fd_store, s->fd_store, fs);
s->n_fd_store++;
- return 1;
+ return 1; /* fd newly stored */
}
static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
@@ -415,10 +433,7 @@ static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
assert(s);
- if (fdset_size(fds) <= 0)
- return 0;
-
- while (s->n_fd_store < s->n_fd_store_max) {
+ while (fdset_size(fds) > 0) {
_cleanup_close_ int fd = -1;
fd = fdset_steal_first(fds);
@@ -426,17 +441,17 @@ static int service_add_fd_store_set(Service *s, FDSet *fds, const char *name) {
break;
r = service_add_fd_store(s, fd, name);
+ if (r == -EXFULL)
+ return log_unit_warning_errno(UNIT(s), r,
+ "Cannot store more fds than FileDescriptorStoreMax=%u, closing remaining.",
+ s->n_fd_store_max);
if (r < 0)
- return log_unit_error_errno(UNIT(s), r, "Couldn't add fd to fd store: %m");
- if (r > 0) {
- log_unit_debug(UNIT(s), "Added fd to fd store.");
- fd = -1;
- }
+ return log_unit_error_errno(UNIT(s), r, "Failed to add fd to store: %m");
+ if (r > 0)
+ log_unit_debug(UNIT(s), "Added fd %u (%s) to fd store.", fd, strna(name));
+ fd = -1;
}
- if (fdset_size(fds) > 0)
- log_unit_warning(UNIT(s), "Tried to store more fds than FileDescriptorStoreMax=%u allows, closing remaining.", s->n_fd_store_max);
-
return 0;
}
@@ -759,6 +774,11 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) {
prefix, s->bus_name,
prefix, yes_no(s->bus_name_good));
+ if (UNIT_ISSET(s->accept_socket))
+ fprintf(f,
+ "%sAccept Socket: %s\n",
+ prefix, UNIT_DEREF(s->accept_socket)->id);
+
kill_context_dump(&s->kill_context, f, prefix);
exec_context_dump(&s->exec_context, f, prefix);
@@ -1031,6 +1051,23 @@ static int service_coldplug(Unit *u) {
if (IN_SET(s->deserialized_state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD))
service_start_watchdog(s);
+ if (!IN_SET(s->deserialized_state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART))
+ (void) unit_setup_dynamic_creds(u);
+
+ if (UNIT_ISSET(s->accept_socket)) {
+ Socket* socket = SOCKET(UNIT_DEREF(s->accept_socket));
+
+ if (socket->max_connections_per_source > 0) {
+ SocketPeer *peer;
+
+ /* Make a best-effort attempt at bumping the connection count */
+ if (socket_acquire_peer(socket, s->socket_fd, &peer) > 0) {
+ socket_peer_unref(s->peer);
+ s->peer = peer;
+ }
+ }
+ }
+
service_set_state(s, s->deserialized_state);
return 0;
}
@@ -1147,11 +1184,7 @@ static int service_spawn(
Service *s,
ExecCommand *c,
usec_t timeout,
- bool pass_fds,
- bool apply_permissions,
- bool apply_chroot,
- bool apply_tty_stdin,
- bool is_control,
+ ExecFlags flags,
pid_t *_pid) {
_cleanup_strv_free_ char **argv = NULL, **final_env = NULL, **our_env = NULL, **fd_names = NULL;
@@ -1161,12 +1194,10 @@ static int service_spawn(
pid_t pid;
ExecParameters exec_params = {
- .apply_permissions = apply_permissions,
- .apply_chroot = apply_chroot,
- .apply_tty_stdin = apply_tty_stdin,
- .stdin_fd = -1,
- .stdout_fd = -1,
- .stderr_fd = -1,
+ .flags = flags,
+ .stdin_fd = -1,
+ .stdout_fd = -1,
+ .stderr_fd = -1,
};
int r;
@@ -1175,6 +1206,14 @@ static int service_spawn(
assert(c);
assert(_pid);
+ if (flags & EXEC_IS_CONTROL) {
+ /* If this is a control process, mask the permissions/chroot application if this is requested. */
+ if (s->permissions_start_only)
+ exec_params.flags &= ~EXEC_APPLY_PERMISSIONS;
+ if (s->root_directory_start_only)
+ exec_params.flags &= ~EXEC_APPLY_CHROOT;
+ }
+
(void) unit_realize_cgroup(UNIT(s));
if (s->reset_cpu_usage) {
(void) unit_reset_cpu_usage(UNIT(s));
@@ -1185,7 +1224,11 @@ static int service_spawn(
if (r < 0)
return r;
- if (pass_fds ||
+ r = unit_setup_dynamic_creds(UNIT(s));
+ if (r < 0)
+ return r;
+
+ if ((flags & EXEC_PASS_FDS) ||
s->exec_context.std_input == EXEC_INPUT_SOCKET ||
s->exec_context.std_output == EXEC_OUTPUT_SOCKET ||
s->exec_context.std_error == EXEC_OUTPUT_SOCKET) {
@@ -1195,6 +1238,7 @@ static int service_spawn(
return r;
n_fds = r;
+ log_unit_debug(UNIT(s), "Passing %i fds to service", n_fds);
}
r = service_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), timeout));
@@ -1205,11 +1249,11 @@ static int service_spawn(
if (r < 0)
return r;
- our_env = new0(char*, 6);
+ our_env = new0(char*, 9);
if (!our_env)
return -ENOMEM;
- if (is_control ? s->notify_access == NOTIFY_ALL : s->notify_access != NOTIFY_NONE)
+ if ((flags & EXEC_IS_CONTROL) ? s->notify_access == NOTIFY_ALL : s->notify_access != NOTIFY_NONE)
if (asprintf(our_env + n_env++, "NOTIFY_SOCKET=%s", UNIT(s)->manager->notify_socket) < 0)
return -ENOMEM;
@@ -1217,7 +1261,7 @@ static int service_spawn(
if (asprintf(our_env + n_env++, "MAINPID="PID_FMT, s->main_pid) < 0)
return -ENOMEM;
- if (!MANAGER_IS_SYSTEM(UNIT(s)->manager))
+ if (MANAGER_IS_USER(UNIT(s)->manager))
if (asprintf(our_env + n_env++, "MANAGERPID="PID_FMT, getpid()) < 0)
return -ENOMEM;
@@ -1226,10 +1270,16 @@ static int service_spawn(
socklen_t salen = sizeof(sa);
r = getpeername(s->socket_fd, &sa.sa, &salen);
- if (r < 0)
- return -errno;
+ if (r < 0) {
+ r = -errno;
- if (IN_SET(sa.sa.sa_family, AF_INET, AF_INET6)) {
+ /* ENOTCONN is legitimate if the endpoint disappeared on shutdown.
+ * This connection is over, but the socket unit lives on. */
+ if (r != -ENOTCONN || !IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST))
+ return r;
+ }
+
+ if (r == 0 && IN_SET(sa.sa.sa_family, AF_INET, AF_INET6)) {
_cleanup_free_ char *addr = NULL;
char *t;
int port;
@@ -1253,22 +1303,40 @@ static int service_spawn(
}
}
+ if (flags & EXEC_SETENV_RESULT) {
+ if (asprintf(our_env + n_env++, "SERVICE_RESULT=%s", service_result_to_string(s->result)) < 0)
+ return -ENOMEM;
+
+ if (s->main_exec_status.pid > 0 &&
+ dual_timestamp_is_set(&s->main_exec_status.exit_timestamp)) {
+ if (asprintf(our_env + n_env++, "EXIT_CODE=%s", sigchld_code_to_string(s->main_exec_status.code)) < 0)
+ return -ENOMEM;
+
+ if (s->main_exec_status.code == CLD_EXITED)
+ r = asprintf(our_env + n_env++, "EXIT_STATUS=%i", s->main_exec_status.status);
+ else
+ r = asprintf(our_env + n_env++, "EXIT_STATUS=%s", signal_to_string(s->main_exec_status.status));
+ if (r < 0)
+ return -ENOMEM;
+ }
+ }
+
final_env = strv_env_merge(2, UNIT(s)->manager->environment, our_env, NULL);
if (!final_env)
return -ENOMEM;
- if (is_control && UNIT(s)->cgroup_path) {
+ if ((flags & EXEC_IS_CONTROL) && UNIT(s)->cgroup_path) {
path = strjoina(UNIT(s)->cgroup_path, "/control");
(void) cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
} else
path = UNIT(s)->cgroup_path;
exec_params.argv = argv;
+ exec_params.environment = final_env;
exec_params.fds = fds;
exec_params.fd_names = fd_names;
exec_params.n_fds = n_fds;
- exec_params.environment = final_env;
- exec_params.confirm_spawn = UNIT(s)->manager->confirm_spawn;
+ exec_params.flags |= UNIT(s)->manager->confirm_spawn ? EXEC_CONFIRM_SPAWN : 0;
exec_params.cgroup_supported = UNIT(s)->manager->cgroup_supported;
exec_params.cgroup_path = path;
exec_params.cgroup_delegate = s->cgroup_context.delegate;
@@ -1286,6 +1354,7 @@ static int service_spawn(
&s->exec_context,
&exec_params,
s->exec_runtime,
+ &s->dynamic_creds,
&pid);
if (r < 0)
return r;
@@ -1393,14 +1462,14 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
int r;
assert(s);
- if (f != SERVICE_SUCCESS)
+ if (s->result == SERVICE_SUCCESS)
s->result = f;
service_set_state(s, s->result != SERVICE_SUCCESS ? SERVICE_FAILED : SERVICE_DEAD);
if (s->result != SERVICE_SUCCESS) {
log_unit_warning(UNIT(s), "Failed with result '%s'.", service_result_to_string(s->result));
- failure_action(UNIT(s)->manager, s->failure_action, UNIT(s)->reboot_arg);
+ emergency_action(UNIT(s)->manager, s->emergency_action, UNIT(s)->reboot_arg, "service failed");
}
if (allow_restart && service_shall_restart(s)) {
@@ -1419,9 +1488,15 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
exec_runtime_destroy(s->exec_runtime);
s->exec_runtime = exec_runtime_unref(s->exec_runtime);
- /* Also, remove the runtime directory in */
+ /* Also, remove the runtime directory */
exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager));
+ /* Get rid of the IPC bits of the user */
+ unit_unref_uid_gid(UNIT(s), true);
+
+ /* Release the user, and destroy it if we are the only remaining owner */
+ dynamic_creds_destroy(&s->dynamic_creds);
+
/* Try to delete the pid file. At this point it will be
* out-of-date, and some software might be confused by it, so
* let's remove it. */
@@ -1439,7 +1514,7 @@ static void service_enter_stop_post(Service *s, ServiceResult f) {
int r;
assert(s);
- if (f != SERVICE_SUCCESS)
+ if (s->result == SERVICE_SUCCESS)
s->result = f;
service_unwatch_control_pid(s);
@@ -1452,11 +1527,7 @@ static void service_enter_stop_post(Service *s, ServiceResult f) {
r = service_spawn(s,
s->control_command,
s->timeout_stop_usec,
- false,
- !s->permissions_start_only,
- !s->root_directory_start_only,
- true,
- true,
+ EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_IS_CONTROL|EXEC_SETENV_RESULT,
&s->control_pid);
if (r < 0)
goto fail;
@@ -1496,7 +1567,7 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f
assert(s);
- if (f != SERVICE_SUCCESS)
+ if (s->result == SERVICE_SUCCESS)
s->result = f;
unit_watch_all_pids(UNIT(s));
@@ -1554,7 +1625,7 @@ static void service_enter_stop(Service *s, ServiceResult f) {
assert(s);
- if (f != SERVICE_SUCCESS)
+ if (s->result == SERVICE_SUCCESS)
s->result = f;
service_unwatch_control_pid(s);
@@ -1567,11 +1638,7 @@ static void service_enter_stop(Service *s, ServiceResult f) {
r = service_spawn(s,
s->control_command,
s->timeout_stop_usec,
- false,
- !s->permissions_start_only,
- !s->root_directory_start_only,
- false,
- true,
+ EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_SETENV_RESULT,
&s->control_pid);
if (r < 0)
goto fail;
@@ -1610,7 +1677,7 @@ static bool service_good(Service *s) {
static void service_enter_running(Service *s, ServiceResult f) {
assert(s);
- if (f != SERVICE_SUCCESS)
+ if (s->result == SERVICE_SUCCESS)
s->result = f;
service_unwatch_control_pid(s);
@@ -1648,11 +1715,7 @@ static void service_enter_start_post(Service *s) {
r = service_spawn(s,
s->control_command,
s->timeout_start_usec,
- false,
- !s->permissions_start_only,
- !s->root_directory_start_only,
- false,
- true,
+ EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL,
&s->control_pid);
if (r < 0)
goto fail;
@@ -1707,7 +1770,15 @@ static void service_enter_start(Service *s) {
}
if (!c) {
- assert(s->type == SERVICE_ONESHOT);
+ if (s->type != SERVICE_ONESHOT) {
+ /* There's no command line configured for the main command? Hmm, that is strange. This can only
+ * happen if the configuration changes at runtime. In this case, let's enter a failure
+ * state. */
+ log_unit_error(UNIT(s), "There's no 'start' task anymore we could start: %m");
+ r = -ENXIO;
+ goto fail;
+ }
+
service_enter_start_post(s);
return;
}
@@ -1722,11 +1793,7 @@ static void service_enter_start(Service *s) {
r = service_spawn(s,
c,
timeout,
- true,
- true,
- true,
- true,
- false,
+ EXEC_PASS_FDS|EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_SET_WATCHDOG,
&pid);
if (r < 0)
goto fail;
@@ -1785,11 +1852,7 @@ static void service_enter_start_pre(Service *s) {
r = service_spawn(s,
s->control_command,
s->timeout_start_usec,
- false,
- !s->permissions_start_only,
- !s->root_directory_start_only,
- true,
- true,
+ EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_APPLY_TTY_STDIN,
&s->control_pid);
if (r < 0)
goto fail;
@@ -1864,11 +1927,7 @@ static void service_enter_reload(Service *s) {
r = service_spawn(s,
s->control_command,
s->timeout_start_usec,
- false,
- !s->permissions_start_only,
- !s->root_directory_start_only,
- false,
- true,
+ EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL,
&s->control_pid);
if (r < 0)
goto fail;
@@ -1906,12 +1965,9 @@ static void service_run_next_control(Service *s) {
r = service_spawn(s,
s->control_command,
timeout,
- false,
- !s->permissions_start_only,
- !s->root_directory_start_only,
- s->control_command_id == SERVICE_EXEC_START_PRE ||
- s->control_command_id == SERVICE_EXEC_STOP_POST,
- true,
+ EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|
+ (IN_SET(s->control_command_id, SERVICE_EXEC_START_PRE, SERVICE_EXEC_STOP_POST) ? EXEC_APPLY_TTY_STDIN : 0)|
+ (IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_SETENV_RESULT : 0),
&s->control_pid);
if (r < 0)
goto fail;
@@ -1949,11 +2005,7 @@ static void service_run_next_main(Service *s) {
r = service_spawn(s,
s->main_command,
s->timeout_start_usec,
- true,
- true,
- true,
- true,
- false,
+ EXEC_PASS_FDS|EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_SET_WATCHDOG,
&pid);
if (r < 0)
goto fail;
@@ -2003,6 +2055,10 @@ static int service_start(Unit *u) {
return r;
}
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
s->result = SERVICE_SUCCESS;
s->reload_result = SERVICE_SUCCESS;
s->main_pid_known = false;
@@ -2117,6 +2173,12 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) {
if (r < 0)
return r;
+ if (UNIT_ISSET(s->accept_socket)) {
+ r = unit_serialize_item(u, f, "accept-socket", UNIT_DEREF(s->accept_socket)->id);
+ if (r < 0)
+ return r;
+ }
+
r = unit_serialize_item_fd(u, f, fds, "socket-fd", s->socket_fd);
if (r < 0)
return r;
@@ -2247,6 +2309,17 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
s->control_command_id = id;
s->control_command = s->exec_command[id];
}
+ } else if (streq(key, "accept-socket")) {
+ Unit *socket;
+
+ r = manager_load_unit(u->manager, value, NULL, NULL, &socket);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to load accept-socket unit: %s", value);
+ else {
+ unit_ref_set(&s->accept_socket, socket);
+ SOCKET(socket)->n_connections++;
+ }
+
} else if (streq(key, "socket-fd")) {
int fd;
@@ -2277,7 +2350,7 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
r = service_add_fd_store(s, fd, t);
if (r < 0)
log_unit_error_errno(u, r, "Failed to add fd to store: %m");
- else if (r > 0)
+ else
fdset_remove(fds, fd);
}
@@ -2553,8 +2626,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
assert(s);
assert(pid >= 0);
- if (UNIT(s)->fragment_path ? is_clean_exit(code, status, &s->success_status) :
- is_clean_exit_lsb(code, status, &s->success_status))
+ if (is_clean_exit(code, status, s->type == SERVICE_ONESHOT ? EXIT_CLEAN_COMMAND : EXIT_CLEAN_DAEMON, &s->success_status))
f = SERVICE_SUCCESS;
else if (code == CLD_EXITED)
f = SERVICE_FAILURE_EXIT_CODE;
@@ -2596,7 +2668,14 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
f = SERVICE_SUCCESS;
}
- log_struct(f == SERVICE_SUCCESS ? LOG_DEBUG : LOG_NOTICE,
+ /* When this is a successful exit, let's log about the exit code on DEBUG level. If this is a failure
+ * and the process exited on its own via exit(), then let's make this a NOTICE, under the assumption
+ * that the service already logged the reason at a higher log level on its own. However, if the service
+ * died due to a signal, then it most likely didn't say anything about any reason, hence let's raise
+ * our log level to WARNING then. */
+
+ log_struct(f == SERVICE_SUCCESS ? LOG_DEBUG :
+ (code == CLD_EXITED ? LOG_NOTICE : LOG_WARNING),
LOG_UNIT_ID(u),
LOG_UNIT_MESSAGE(u, "Main process exited, code=%s, status=%i/%s",
sigchld_code_to_string(code), status,
@@ -2607,7 +2686,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
"EXIT_STATUS=%i", status,
NULL);
- if (f != SERVICE_SUCCESS)
+ if (s->result == SERVICE_SUCCESS)
s->result = f;
if (s->main_command &&
@@ -2688,7 +2767,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
"Control process exited, code=%s status=%i",
sigchld_code_to_string(code), status);
- if (f != SERVICE_SUCCESS)
+ if (s->result == SERVICE_SUCCESS)
s->result = f;
/* Immediately get rid of the cgroup, so that the
@@ -2828,7 +2907,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
/* If the PID set is empty now, then let's finish this off
(On unified we use proper notifications) */
- if (cg_unified() <= 0 && set_isempty(u->pids))
+ if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) <= 0 && set_isempty(u->pids))
service_notify_cgroup_empty_event(u);
}
@@ -3038,9 +3117,7 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags, FDSet *fds)
if (!streq_ptr(s->status_text, t)) {
- free(s->status_text);
- s->status_text = t;
- t = NULL;
+ free_and_replace(s->status_text, t);
notify_dbus = true;
}
@@ -3324,6 +3401,7 @@ const UnitVTable service_vtable = {
.cgroup_context_offset = offsetof(Service, cgroup_context),
.kill_context_offset = offsetof(Service, kill_context),
.exec_runtime_offset = offsetof(Service, exec_runtime),
+ .dynamic_creds_offset = offsetof(Service, dynamic_creds),
.sections =
"Unit\0"
diff --git a/src/grp-system/libcore/src/show-status.c b/src/grp-system/libcore/src/show-status.c
index dec0eb00be..1b2a7480d5 100644
--- a/src/grp-system/libcore/src/show-status.c
+++ b/src/grp-system/libcore/src/show-status.c
@@ -61,6 +61,11 @@ int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char
if (vasprintf(&s, format, ap) < 0)
return log_oom();
+ /* Before you ask: yes, on purpose we open/close the console for each status line we write individually. This
+ * is a good strategy to avoid PID 1 getting killed by the kernel's SAK concept (it doesn't fix this entirely,
+ * but minimizes the time window the kernel might end up killing PID 1 due to SAK). It also makes things easier
+ * for us so that we don't have to recover from hangups and suchlike triggered on the console. */
+
fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
if (fd < 0)
return fd;
diff --git a/src/grp-system/libcore/src/slice.c b/src/grp-system/libcore/src/slice.c
index 4ab5548572..8bb47534fc 100644
--- a/src/grp-system/libcore/src/slice.c
+++ b/src/grp-system/libcore/src/slice.c
@@ -131,6 +131,28 @@ static int slice_verify(Slice *s) {
return 0;
}
+static int slice_load_root_slice(Unit *u) {
+ assert(u);
+
+ if (!unit_has_name(u, SPECIAL_ROOT_SLICE))
+ return 0;
+
+ u->perpetual = true;
+
+ /* The root slice is a bit special. For example it is always running and cannot be terminated. Because of its
+ * special semantics we synthesize it here, instead of relying on the unit file on disk. */
+
+ u->default_dependencies = false;
+ u->ignore_on_isolate = true;
+
+ if (!u->description)
+ u->description = strdup("Root Slice");
+ if (!u->documentation)
+ u->documentation = strv_new("man:systemd.special(7)", NULL);
+
+ return 1;
+}
+
static int slice_load(Unit *u) {
Slice *s = SLICE(u);
int r;
@@ -138,6 +160,9 @@ static int slice_load(Unit *u) {
assert(s);
assert(u->load_state == UNIT_STUB);
+ r = slice_load_root_slice(u);
+ if (r < 0)
+ return r;
r = unit_load_fragment_and_dropin_optional(u);
if (r < 0)
return r;
@@ -188,10 +213,15 @@ static void slice_dump(Unit *u, FILE *f, const char *prefix) {
static int slice_start(Unit *u) {
Slice *t = SLICE(u);
+ int r;
assert(t);
assert(t->state == SLICE_DEAD);
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
(void) unit_realize_cgroup(u);
(void) unit_reset_cpu_usage(u);
@@ -270,32 +300,16 @@ static void slice_enumerate(Manager *m) {
u = manager_get_unit(m, SPECIAL_ROOT_SLICE);
if (!u) {
- u = unit_new(m, sizeof(Slice));
- if (!u) {
- log_oom();
- return;
- }
-
- r = unit_add_name(u, SPECIAL_ROOT_SLICE);
+ r = unit_new_for_name(m, sizeof(Slice), SPECIAL_ROOT_SLICE, &u);
if (r < 0) {
- unit_free(u);
- log_error_errno(r, "Failed to add -.slice name");
+ log_error_errno(r, "Failed to allocate the special " SPECIAL_ROOT_SLICE " unit: %m");
return;
}
}
- u->default_dependencies = false;
- u->no_gc = true;
- u->ignore_on_isolate = true;
- u->refuse_manual_start = true;
- u->refuse_manual_stop = true;
+ u->perpetual = true;
SLICE(u)->deserialized_state = SLICE_ACTIVE;
- if (!u->description)
- u->description = strdup("Root Slice");
- if (!u->documentation)
- (void) strv_extend(&u->documentation, "man:systemd.special(7)");
-
unit_add_to_load_queue(u);
unit_add_to_dbus_queue(u);
}
diff --git a/src/grp-system/libcore/src/socket.c b/src/grp-system/libcore/src/socket.c
index 7e57cc7e79..9d7a72ef5f 100644
--- a/src/grp-system/libcore/src/socket.c
+++ b/src/grp-system/libcore/src/socket.c
@@ -39,6 +39,7 @@
#include "systemd-basic/exit-status.h"
#include "systemd-basic/fd-util.h"
#include "systemd-basic/formats-util.h"
+#include "systemd-basic/in-addr-util.h"
#include "systemd-basic/io-util.h"
#include "systemd-basic/label.h"
#include "systemd-basic/log.h"
@@ -60,6 +61,13 @@
#include "dbus-socket.h"
#include "unit-printf.h"
+struct SocketPeer {
+ unsigned n_ref;
+
+ Socket *socket;
+ union sockaddr_union peer;
+};
+
static const UnitActiveState state_translation_table[_SOCKET_STATE_MAX] = {
[SOCKET_DEAD] = UNIT_INACTIVE,
[SOCKET_START_PRE] = UNIT_ACTIVATING,
@@ -143,15 +151,23 @@ void socket_free_ports(Socket *s) {
static void socket_done(Unit *u) {
Socket *s = SOCKET(u);
+ SocketPeer *p;
assert(s);
socket_free_ports(s);
+ while ((p = set_steal_first(s->peers_by_address)))
+ p->socket = NULL;
+
+ s->peers_by_address = set_free(s->peers_by_address);
+
s->exec_runtime = exec_runtime_unref(s->exec_runtime);
exec_command_free_array(s->exec_command, _SOCKET_EXEC_COMMAND_MAX);
s->control_command = NULL;
+ dynamic_creds_unref(&s->dynamic_creds);
+
socket_unwatch_control_pid(s);
unit_ref_unset(&s->service);
@@ -468,6 +484,40 @@ static int socket_verify(Socket *s) {
return 0;
}
+static void peer_address_hash_func(const void *p, struct siphash *state) {
+ const SocketPeer *s = p;
+
+ assert(s);
+ assert(IN_SET(s->peer.sa.sa_family, AF_INET, AF_INET6));
+
+ if (s->peer.sa.sa_family == AF_INET)
+ siphash24_compress(&s->peer.in.sin_addr, sizeof(s->peer.in.sin_addr), state);
+ else
+ siphash24_compress(&s->peer.in6.sin6_addr, sizeof(s->peer.in6.sin6_addr), state);
+}
+
+static int peer_address_compare_func(const void *a, const void *b) {
+ const SocketPeer *x = a, *y = b;
+
+ if (x->peer.sa.sa_family < y->peer.sa.sa_family)
+ return -1;
+ if (x->peer.sa.sa_family > y->peer.sa.sa_family)
+ return 1;
+
+ switch(x->peer.sa.sa_family) {
+ case AF_INET:
+ return memcmp(&x->peer.in.sin_addr, &y->peer.in.sin_addr, sizeof(x->peer.in.sin_addr));
+ case AF_INET6:
+ return memcmp(&x->peer.in6.sin6_addr, &y->peer.in6.sin6_addr, sizeof(x->peer.in6.sin6_addr));
+ }
+ assert_not_reached("Black sheep in the family!");
+}
+
+const struct hash_ops peer_address_hash_ops = {
+ .hash = peer_address_hash_func,
+ .compare = peer_address_compare_func
+};
+
static int socket_load(Unit *u) {
Socket *s = SOCKET(u);
int r;
@@ -475,6 +525,10 @@ static int socket_load(Unit *u) {
assert(u);
assert(u->load_state == UNIT_STUB);
+ r = set_ensure_allocated(&s->peers_by_address, &peer_address_hash_ops);
+ if (r < 0)
+ return r;
+
r = unit_load_fragment_and_dropin(u);
if (r < 0)
return r;
@@ -489,6 +543,87 @@ static int socket_load(Unit *u) {
return socket_verify(s);
}
+static SocketPeer *socket_peer_new(void) {
+ SocketPeer *p;
+
+ p = new0(SocketPeer, 1);
+ if (!p)
+ return NULL;
+
+ p->n_ref = 1;
+
+ return p;
+}
+
+SocketPeer *socket_peer_ref(SocketPeer *p) {
+ if (!p)
+ return NULL;
+
+ assert(p->n_ref > 0);
+ p->n_ref++;
+
+ return p;
+}
+
+SocketPeer *socket_peer_unref(SocketPeer *p) {
+ if (!p)
+ return NULL;
+
+ assert(p->n_ref > 0);
+
+ p->n_ref--;
+
+ if (p->n_ref > 0)
+ return NULL;
+
+ if (p->socket)
+ set_remove(p->socket->peers_by_address, p);
+
+ return mfree(p);
+}
+
+int socket_acquire_peer(Socket *s, int fd, SocketPeer **p) {
+ _cleanup_(socket_peer_unrefp) SocketPeer *remote = NULL;
+ SocketPeer sa = {}, *i;
+ socklen_t salen = sizeof(sa.peer);
+ int r;
+
+ assert(fd >= 0);
+ assert(s);
+
+ r = getpeername(fd, &sa.peer.sa, &salen);
+ if (r < 0)
+ return log_error_errno(errno, "getpeername failed: %m");
+
+ if (!IN_SET(sa.peer.sa.sa_family, AF_INET, AF_INET6)) {
+ *p = NULL;
+ return 0;
+ }
+
+ i = set_get(s->peers_by_address, &sa);
+ if (i) {
+ *p = socket_peer_ref(i);
+ return 1;
+ }
+
+ remote = socket_peer_new();
+ if (!remote)
+ return log_oom();
+
+ remote->peer = sa.peer;
+
+ r = set_put(s->peers_by_address, remote);
+ if (r < 0)
+ return r;
+
+ remote->socket = s;
+
+ *p = remote;
+ remote = NULL;
+
+ return 1;
+}
+
_const_ static const char* listen_lookup(int family, int type) {
if (family == AF_NETLINK)
@@ -1201,14 +1336,9 @@ static int usbffs_select_ep(const struct dirent *d) {
static int usbffs_dispatch_eps(SocketPort *p) {
_cleanup_free_ struct dirent **ent = NULL;
- _cleanup_free_ char *path = NULL;
int r, i, n, k;
- path = dirname_malloc(p->path);
- if (!path)
- return -ENOMEM;
-
- r = scandir(path, &ent, usbffs_select_ep, alphasort);
+ r = scandir(p->path, &ent, usbffs_select_ep, alphasort);
if (r < 0)
return -errno;
@@ -1223,7 +1353,7 @@ static int usbffs_dispatch_eps(SocketPort *p) {
for (i = 0; i < n; ++i) {
_cleanup_free_ char *ep = NULL;
- ep = path_make_absolute(ent[i]->d_name, path);
+ ep = path_make_absolute(ent[i]->d_name, p->path);
if (!ep)
return -ENOMEM;
@@ -1604,6 +1734,9 @@ static int socket_coldplug(Unit *u) {
return r;
}
+ if (!IN_SET(s->deserialized_state, SOCKET_DEAD, SOCKET_FAILED))
+ (void) unit_setup_dynamic_creds(u);
+
socket_set_state(s, s->deserialized_state);
return 0;
}
@@ -1613,12 +1746,10 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
pid_t pid;
int r;
ExecParameters exec_params = {
- .apply_permissions = true,
- .apply_chroot = true,
- .apply_tty_stdin = true,
- .stdin_fd = -1,
- .stdout_fd = -1,
- .stderr_fd = -1,
+ .flags = EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
+ .stdin_fd = -1,
+ .stdout_fd = -1,
+ .stderr_fd = -1,
};
assert(s);
@@ -1635,6 +1766,10 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
if (r < 0)
return r;
+ r = unit_setup_dynamic_creds(UNIT(s));
+ if (r < 0)
+ return r;
+
r = socket_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_usec));
if (r < 0)
return r;
@@ -1645,7 +1780,7 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
exec_params.argv = argv;
exec_params.environment = UNIT(s)->manager->environment;
- exec_params.confirm_spawn = UNIT(s)->manager->confirm_spawn;
+ exec_params.flags |= UNIT(s)->manager->confirm_spawn ? EXEC_CONFIRM_SPAWN : 0;
exec_params.cgroup_supported = UNIT(s)->manager->cgroup_supported;
exec_params.cgroup_path = UNIT(s)->cgroup_path;
exec_params.cgroup_delegate = s->cgroup_context.delegate;
@@ -1656,6 +1791,7 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
&s->exec_context,
&exec_params,
s->exec_runtime,
+ &s->dynamic_creds,
&pid);
if (r < 0)
return r;
@@ -1756,15 +1892,19 @@ fail:
static void socket_enter_dead(Socket *s, SocketResult f) {
assert(s);
- if (f != SOCKET_SUCCESS)
+ if (s->result == SOCKET_SUCCESS)
s->result = f;
+ socket_set_state(s, s->result != SOCKET_SUCCESS ? SOCKET_FAILED : SOCKET_DEAD);
+
exec_runtime_destroy(s->exec_runtime);
s->exec_runtime = exec_runtime_unref(s->exec_runtime);
exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager));
- socket_set_state(s, s->result != SOCKET_SUCCESS ? SOCKET_FAILED : SOCKET_DEAD);
+ unit_unref_uid_gid(UNIT(s), true);
+
+ dynamic_creds_destroy(&s->dynamic_creds);
}
static void socket_enter_signal(Socket *s, SocketState state, SocketResult f);
@@ -1773,7 +1913,7 @@ static void socket_enter_stop_post(Socket *s, SocketResult f) {
int r;
assert(s);
- if (f != SOCKET_SUCCESS)
+ if (s->result == SOCKET_SUCCESS)
s->result = f;
socket_unwatch_control_pid(s);
@@ -1801,7 +1941,7 @@ static void socket_enter_signal(Socket *s, SocketState state, SocketResult f) {
assert(s);
- if (f != SOCKET_SUCCESS)
+ if (s->result == SOCKET_SUCCESS)
s->result = f;
r = unit_kill_context(
@@ -1845,7 +1985,7 @@ static void socket_enter_stop_pre(Socket *s, SocketResult f) {
int r;
assert(s);
- if (f != SOCKET_SUCCESS)
+ if (s->result == SOCKET_SUCCESS)
s->result = f;
socket_unwatch_control_pid(s);
@@ -2040,14 +2180,34 @@ static void socket_enter_running(Socket *s, int cfd) {
socket_set_state(s, SOCKET_RUNNING);
} else {
_cleanup_free_ char *prefix = NULL, *instance = NULL, *name = NULL;
+ _cleanup_(socket_peer_unrefp) SocketPeer *p = NULL;
Service *service;
if (s->n_connections >= s->max_connections) {
- log_unit_warning(UNIT(s), "Too many incoming connections (%u), refusing connection attempt.", s->n_connections);
+ log_unit_warning(UNIT(s), "Too many incoming connections (%u), dropping connection.",
+ s->n_connections);
safe_close(cfd);
return;
}
+ if (s->max_connections_per_source > 0) {
+ r = socket_acquire_peer(s, cfd, &p);
+ if (r < 0) {
+ safe_close(cfd);
+ return;
+ } else if (r > 0 && p->n_ref > s->max_connections_per_source) {
+ _cleanup_free_ char *t = NULL;
+
+ sockaddr_pretty(&p->peer.sa, FAMILY_ADDRESS_SIZE(p->peer.sa.sa_family), true, false, &t);
+
+ log_unit_warning(UNIT(s),
+ "Too many incoming connections (%u) from source %s, dropping connection.",
+ p->n_ref, strnull(t));
+ safe_close(cfd);
+ return;
+ }
+ }
+
r = socket_instantiate_service(s);
if (r < 0)
goto fail;
@@ -2089,6 +2249,9 @@ static void socket_enter_running(Socket *s, int cfd) {
cfd = -1; /* We passed ownership of the fd to the service now. Forget it here. */
s->n_connections++;
+ service->peer = p; /* Pass ownership of the peer reference */
+ p = NULL;
+
r = manager_add_job(UNIT(s)->manager, JOB_START, UNIT(service), JOB_REPLACE, &error, NULL);
if (r < 0) {
/* We failed to activate the new service, but it still exists. Let's make sure the service
@@ -2193,11 +2356,14 @@ static int socket_start(Unit *u) {
return r;
}
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
s->result = SOCKET_SUCCESS;
s->reset_cpu_usage = true;
socket_enter_start_pre(s);
-
return 1;
}
@@ -2288,6 +2454,11 @@ static int socket_serialize(Unit *u, FILE *f, FDSet *fds) {
return 0;
}
+static void socket_port_take_fd(SocketPort *p, FDSet *fds, int fd) {
+ safe_close(p->fd);
+ p->fd = fdset_remove(fds, fd);
+}
+
static int socket_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
Socket *s = SOCKET(u);
@@ -2342,18 +2513,13 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value,
if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd))
log_unit_debug(u, "Failed to parse fifo value: %s", value);
- else {
-
+ else
LIST_FOREACH(port, p, s->ports)
if (p->type == SOCKET_FIFO &&
- path_equal_or_files_same(p->path, value+skip))
+ path_equal_or_files_same(p->path, value+skip)) {
+ socket_port_take_fd(p, fds, fd);
break;
-
- if (p) {
- safe_close(p->fd);
- p->fd = fdset_remove(fds, fd);
- }
- }
+ }
} else if (streq(key, "special")) {
int fd, skip = 0;
@@ -2361,18 +2527,13 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value,
if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd))
log_unit_debug(u, "Failed to parse special value: %s", value);
- else {
-
+ else
LIST_FOREACH(port, p, s->ports)
if (p->type == SOCKET_SPECIAL &&
- path_equal_or_files_same(p->path, value+skip))
+ path_equal_or_files_same(p->path, value+skip)) {
+ socket_port_take_fd(p, fds, fd);
break;
-
- if (p) {
- safe_close(p->fd);
- p->fd = fdset_remove(fds, fd);
- }
- }
+ }
} else if (streq(key, "mqueue")) {
int fd, skip = 0;
@@ -2380,18 +2541,13 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value,
if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd))
log_unit_debug(u, "Failed to parse mqueue value: %s", value);
- else {
-
+ else
LIST_FOREACH(port, p, s->ports)
if (p->type == SOCKET_MQUEUE &&
- streq(p->path, value+skip))
+ streq(p->path, value+skip)) {
+ socket_port_take_fd(p, fds, fd);
break;
-
- if (p) {
- safe_close(p->fd);
- p->fd = fdset_remove(fds, fd);
- }
- }
+ }
} else if (streq(key, "socket")) {
int fd, type, skip = 0;
@@ -2399,17 +2555,12 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value,
if (sscanf(value, "%i %i %n", &fd, &type, &skip) < 2 || fd < 0 || type < 0 || !fdset_contains(fds, fd))
log_unit_debug(u, "Failed to parse socket value: %s", value);
- else {
-
+ else
LIST_FOREACH(port, p, s->ports)
- if (socket_address_is(&p->address, value+skip, type))
+ if (socket_address_is(&p->address, value+skip, type)) {
+ socket_port_take_fd(p, fds, fd);
break;
-
- if (p) {
- safe_close(p->fd);
- p->fd = fdset_remove(fds, fd);
- }
- }
+ }
} else if (streq(key, "netlink")) {
int fd, skip = 0;
@@ -2417,17 +2568,12 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value,
if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd))
log_unit_debug(u, "Failed to parse socket value: %s", value);
- else {
-
+ else
LIST_FOREACH(port, p, s->ports)
- if (socket_address_is_netlink(&p->address, value+skip))
+ if (socket_address_is_netlink(&p->address, value+skip)) {
+ socket_port_take_fd(p, fds, fd);
break;
-
- if (p) {
- safe_close(p->fd);
- p->fd = fdset_remove(fds, fd);
- }
- }
+ }
} else if (streq(key, "ffs")) {
int fd, skip = 0;
@@ -2435,18 +2581,13 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value,
if (sscanf(value, "%i %n", &fd, &skip) < 1 || fd < 0 || !fdset_contains(fds, fd))
log_unit_debug(u, "Failed to parse ffs value: %s", value);
- else {
-
+ else
LIST_FOREACH(port, p, s->ports)
if (p->type == SOCKET_USB_FUNCTION &&
- path_equal_or_files_same(p->path, value+skip))
+ path_equal_or_files_same(p->path, value+skip)) {
+ socket_port_take_fd(p, fds, fd);
break;
-
- if (p) {
- safe_close(p->fd);
- p->fd = fdset_remove(fds, fd);
- }
- }
+ }
} else
log_unit_debug(UNIT(s), "Unknown serialization key: %s", key);
@@ -2607,7 +2748,7 @@ static void socket_sigchld_event(Unit *u, pid_t pid, int code, int status) {
s->control_pid = 0;
- if (is_clean_exit(code, status, NULL))
+ if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
f = SOCKET_SUCCESS;
else if (code == CLD_EXITED)
f = SOCKET_FAILURE_EXIT_CODE;
@@ -2629,7 +2770,7 @@ static void socket_sigchld_event(Unit *u, pid_t pid, int code, int status) {
"Control process exited, code=%s status=%i",
sigchld_code_to_string(code), status);
- if (f != SOCKET_SUCCESS)
+ if (s->result == SOCKET_SUCCESS)
s->result = f;
if (s->control_command &&
@@ -2932,6 +3073,7 @@ const UnitVTable socket_vtable = {
.cgroup_context_offset = offsetof(Socket, cgroup_context),
.kill_context_offset = offsetof(Socket, kill_context),
.exec_runtime_offset = offsetof(Socket, exec_runtime),
+ .dynamic_creds_offset = offsetof(Socket, dynamic_creds),
.sections =
"Unit\0"
diff --git a/src/grp-system/libcore/src/swap.c b/src/grp-system/libcore/src/swap.c
index 29f3971720..85f789e12b 100644
--- a/src/grp-system/libcore/src/swap.c
+++ b/src/grp-system/libcore/src/swap.c
@@ -154,6 +154,8 @@ static void swap_done(Unit *u) {
exec_command_done_array(s->exec_command, _SWAP_EXEC_COMMAND_MAX);
s->control_command = NULL;
+ dynamic_creds_unref(&s->dynamic_creds);
+
swap_unwatch_control_pid(s);
s->timer_event_source = sd_event_source_unref(s->timer_event_source);
@@ -380,11 +382,7 @@ static int swap_setup_unit(
if (!u) {
delete = true;
- u = unit_new(m, sizeof(Swap));
- if (!u)
- return log_oom();
-
- r = unit_add_name(u, e);
+ r = unit_new_for_name(m, sizeof(Swap), e, &u);
if (r < 0)
goto fail;
@@ -554,6 +552,9 @@ static int swap_coldplug(Unit *u) {
return r;
}
+ if (!IN_SET(new_state, SWAP_DEAD, SWAP_FAILED))
+ (void) unit_setup_dynamic_creds(u);
+
swap_set_state(s, new_state);
return 0;
}
@@ -607,12 +608,10 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
pid_t pid;
int r;
ExecParameters exec_params = {
- .apply_permissions = true,
- .apply_chroot = true,
- .apply_tty_stdin = true,
- .stdin_fd = -1,
- .stdout_fd = -1,
- .stderr_fd = -1,
+ .flags = EXEC_APPLY_PERMISSIONS|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN,
+ .stdin_fd = -1,
+ .stdout_fd = -1,
+ .stderr_fd = -1,
};
assert(s);
@@ -629,12 +628,16 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
if (r < 0)
goto fail;
+ r = unit_setup_dynamic_creds(UNIT(s));
+ if (r < 0)
+ return r;
+
r = swap_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_usec));
if (r < 0)
goto fail;
exec_params.environment = UNIT(s)->manager->environment;
- exec_params.confirm_spawn = UNIT(s)->manager->confirm_spawn;
+ exec_params.flags |= UNIT(s)->manager->confirm_spawn ? EXEC_CONFIRM_SPAWN : 0;
exec_params.cgroup_supported = UNIT(s)->manager->cgroup_supported;
exec_params.cgroup_path = UNIT(s)->cgroup_path;
exec_params.cgroup_delegate = s->cgroup_context.delegate;
@@ -645,6 +648,7 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
&s->exec_context,
&exec_params,
s->exec_runtime,
+ &s->dynamic_creds,
&pid);
if (r < 0)
goto fail;
@@ -666,21 +670,25 @@ fail:
static void swap_enter_dead(Swap *s, SwapResult f) {
assert(s);
- if (f != SWAP_SUCCESS)
+ if (s->result == SWAP_SUCCESS)
s->result = f;
+ swap_set_state(s, s->result != SWAP_SUCCESS ? SWAP_FAILED : SWAP_DEAD);
+
exec_runtime_destroy(s->exec_runtime);
s->exec_runtime = exec_runtime_unref(s->exec_runtime);
exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager));
- swap_set_state(s, s->result != SWAP_SUCCESS ? SWAP_FAILED : SWAP_DEAD);
+ unit_unref_uid_gid(UNIT(s), true);
+
+ dynamic_creds_destroy(&s->dynamic_creds);
}
static void swap_enter_active(Swap *s, SwapResult f) {
assert(s);
- if (f != SWAP_SUCCESS)
+ if (s->result == SWAP_SUCCESS)
s->result = f;
swap_set_state(s, SWAP_ACTIVE);
@@ -691,7 +699,7 @@ static void swap_enter_signal(Swap *s, SwapState state, SwapResult f) {
assert(s);
- if (f != SWAP_SUCCESS)
+ if (s->result == SWAP_SUCCESS)
s->result = f;
r = unit_kill_context(
@@ -850,6 +858,10 @@ static int swap_start(Unit *u) {
return r;
}
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
s->result = SWAP_SUCCESS;
s->reset_cpu_usage = true;
@@ -977,7 +989,7 @@ static void swap_sigchld_event(Unit *u, pid_t pid, int code, int status) {
s->control_pid = 0;
- if (is_clean_exit(code, status, NULL))
+ if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
f = SWAP_SUCCESS;
else if (code == CLD_EXITED)
f = SWAP_FAILURE_EXIT_CODE;
@@ -988,7 +1000,7 @@ static void swap_sigchld_event(Unit *u, pid_t pid, int code, int status) {
else
assert_not_reached("Unknown code");
- if (f != SWAP_SUCCESS)
+ if (s->result == SWAP_SUCCESS)
s->result = f;
if (s->control_command) {
@@ -1178,6 +1190,7 @@ static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, v
case SWAP_DEAD:
case SWAP_FAILED:
+ (void) unit_acquire_invocation_id(UNIT(swap));
swap_enter_active(swap, SWAP_SUCCESS);
break;
@@ -1467,6 +1480,7 @@ const UnitVTable swap_vtable = {
.cgroup_context_offset = offsetof(Swap, cgroup_context),
.kill_context_offset = offsetof(Swap, kill_context),
.exec_runtime_offset = offsetof(Swap, exec_runtime),
+ .dynamic_creds_offset = offsetof(Swap, dynamic_creds),
.sections =
"Unit\0"
diff --git a/src/grp-system/libcore/src/target.c b/src/grp-system/libcore/src/target.c
index c44d76468c..717dee06a9 100644
--- a/src/grp-system/libcore/src/target.c
+++ b/src/grp-system/libcore/src/target.c
@@ -124,10 +124,15 @@ static void target_dump(Unit *u, FILE *f, const char *prefix) {
static int target_start(Unit *u) {
Target *t = TARGET(u);
+ int r;
assert(t);
assert(t->state == TARGET_DEAD);
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
target_set_state(t, TARGET_ACTIVE);
return 1;
}
diff --git a/src/grp-system/libcore/src/timer.c b/src/grp-system/libcore/src/timer.c
index 882d871dc9..8789dce022 100644
--- a/src/grp-system/libcore/src/timer.c
+++ b/src/grp-system/libcore/src/timer.c
@@ -262,6 +262,8 @@ static void timer_set_state(Timer *t, TimerState state) {
if (state != TIMER_WAITING) {
t->monotonic_event_source = sd_event_source_unref(t->monotonic_event_source);
t->realtime_event_source = sd_event_source_unref(t->realtime_event_source);
+ t->next_elapse_monotonic_or_boottime = USEC_INFINITY;
+ t->next_elapse_realtime = USEC_INFINITY;
}
if (state != old_state)
@@ -292,7 +294,7 @@ static int timer_coldplug(Unit *u) {
static void timer_enter_dead(Timer *t, TimerResult f) {
assert(t);
- if (f != TIMER_SUCCESS)
+ if (t->result == TIMER_SUCCESS)
t->result = f;
timer_set_state(t, t->result != TIMER_SUCCESS ? TIMER_FAILED : TIMER_DEAD);
@@ -617,6 +619,10 @@ static int timer_start(Unit *u) {
return r;
}
+ r = unit_acquire_invocation_id(u);
+ if (r < 0)
+ return r;
+
t->last_trigger = DUAL_TIMESTAMP_NULL;
/* Reenable all timers that depend on unit activation time */
@@ -633,7 +639,7 @@ static int timer_start(Unit *u) {
/* The timer has never run before,
* make sure a stamp file exists.
*/
- touch_file(t->stamp_path, true, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID);
+ (void) touch_file(t->stamp_path, true, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID);
}
t->result = TIMER_SUCCESS;
diff --git a/src/grp-system/libcore/src/transaction.c b/src/grp-system/libcore/src/transaction.c
index 2768be914f..b9b28900e9 100644
--- a/src/grp-system/libcore/src/transaction.c
+++ b/src/grp-system/libcore/src/transaction.c
@@ -1086,10 +1086,8 @@ Transaction *transaction_new(bool irreversible) {
return NULL;
tr->jobs = hashmap_new(NULL);
- if (!tr->jobs) {
- free(tr);
- return NULL;
- }
+ if (!tr->jobs)
+ return mfree(tr);
tr->irreversible = irreversible;
diff --git a/src/grp-system/libcore/src/unit.c b/src/grp-system/libcore/src/unit.c
index a36e589b73..364208dbad 100644
--- a/src/grp-system/libcore/src/unit.c
+++ b/src/grp-system/libcore/src/unit.c
@@ -31,6 +31,7 @@
#include "core/unit.h"
#include "sd-bus/bus-common-errors.h"
#include "sd-bus/bus-util.h"
+#include "sd-id128/id128-util.h"
#include "systemd-basic/alloc-util.h"
#include "systemd-basic/cgroup-util.h"
#include "systemd-basic/escape.h"
@@ -88,10 +89,8 @@ Unit *unit_new(Manager *m, size_t size) {
return NULL;
u->names = set_new(&string_hash_ops);
- if (!u->names) {
- free(u);
- return NULL;
- }
+ if (!u->names)
+ return mfree(u);
u->manager = m;
u->type = _UNIT_TYPE_INVALID;
@@ -101,7 +100,9 @@ Unit *unit_new(Manager *m, size_t size) {
u->on_failure_job_mode = JOB_REPLACE;
u->cgroup_inotify_wd = -1;
u->job_timeout = USEC_INFINITY;
- u->sigchldgen = 0;
+ u->ref_uid = UID_INVALID;
+ u->ref_gid = GID_INVALID;
+ u->cpu_usage_last = NSEC_INFINITY;
RATELIMIT_INIT(u->start_limit, m->default_start_limit_interval, m->default_start_limit_burst);
RATELIMIT_INIT(u->auto_stop_ratelimit, 10 * USEC_PER_SEC, 16);
@@ -109,11 +110,29 @@ Unit *unit_new(Manager *m, size_t size) {
return u;
}
+int unit_new_for_name(Manager *m, size_t size, const char *name, Unit **ret) {
+ Unit *u;
+ int r;
+
+ u = unit_new(m, size);
+ if (!u)
+ return -ENOMEM;
+
+ r = unit_add_name(u, name);
+ if (r < 0) {
+ unit_free(u);
+ return r;
+ }
+
+ *ret = u;
+ return r;
+}
+
bool unit_has_name(Unit *u, const char *name) {
assert(u);
assert(name);
- return !!set_get(u->names, (char*) name);
+ return set_contains(u->names, (char*) name);
}
static void unit_init(Unit *u) {
@@ -302,6 +321,7 @@ int unit_set_description(Unit *u, const char *description) {
bool unit_check_gc(Unit *u) {
UnitActiveState state;
+ bool inactive;
assert(u);
if (u->job)
@@ -311,24 +331,28 @@ bool unit_check_gc(Unit *u) {
return true;
state = unit_active_state(u);
+ inactive = state == UNIT_INACTIVE;
/* If the unit is inactive and failed and no job is queued for
* it, then release its runtime resources */
if (UNIT_IS_INACTIVE_OR_FAILED(state) &&
UNIT_VTABLE(u)->release_resources)
- UNIT_VTABLE(u)->release_resources(u);
+ UNIT_VTABLE(u)->release_resources(u, inactive);
/* But we keep the unit object around for longer when it is
* referenced or configured to not be gc'ed */
- if (state != UNIT_INACTIVE)
+ if (!inactive)
return true;
- if (u->no_gc)
+ if (u->perpetual)
return true;
if (u->refs)
return true;
+ if (sd_bus_track_count(u->bus_track) > 0)
+ return true;
+
if (UNIT_VTABLE(u)->check_gc)
if (UNIT_VTABLE(u)->check_gc(u))
return true;
@@ -509,11 +533,17 @@ void unit_free(Unit *u) {
sd_bus_slot_unref(u->match_bus_slot);
+ sd_bus_track_unref(u->bus_track);
+ u->deserialized_refs = strv_free(u->deserialized_refs);
+
unit_free_requires_mounts_for(u);
SET_FOREACH(t, u->names, i)
hashmap_remove_value(u->manager->units, t, u);
+ if (!sd_id128_is_null(u->invocation_id))
+ hashmap_remove_value(u->manager->units_by_invocation_id, &u->invocation_id, u);
+
if (u->job) {
Job *j = u->job;
job_uninstall(j);
@@ -551,6 +581,8 @@ void unit_free(Unit *u) {
unit_release_cgroup(u);
+ unit_unref_uid_gid(u, false);
+
(void) manager_update_failed_units(u->manager, u, false);
set_remove(u->manager->startup_units, u);
@@ -847,18 +879,14 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) {
return r;
}
- if (c->std_output != EXEC_OUTPUT_KMSG &&
- c->std_output != EXEC_OUTPUT_SYSLOG &&
- c->std_output != EXEC_OUTPUT_JOURNAL &&
- c->std_output != EXEC_OUTPUT_KMSG_AND_CONSOLE &&
- c->std_output != EXEC_OUTPUT_SYSLOG_AND_CONSOLE &&
- c->std_output != EXEC_OUTPUT_JOURNAL_AND_CONSOLE &&
- c->std_error != EXEC_OUTPUT_KMSG &&
- c->std_error != EXEC_OUTPUT_SYSLOG &&
- c->std_error != EXEC_OUTPUT_JOURNAL &&
- c->std_error != EXEC_OUTPUT_KMSG_AND_CONSOLE &&
- c->std_error != EXEC_OUTPUT_JOURNAL_AND_CONSOLE &&
- c->std_error != EXEC_OUTPUT_SYSLOG_AND_CONSOLE)
+ if (!IN_SET(c->std_output,
+ EXEC_OUTPUT_JOURNAL, EXEC_OUTPUT_JOURNAL_AND_CONSOLE,
+ EXEC_OUTPUT_KMSG, EXEC_OUTPUT_KMSG_AND_CONSOLE,
+ EXEC_OUTPUT_SYSLOG, EXEC_OUTPUT_SYSLOG_AND_CONSOLE) &&
+ !IN_SET(c->std_error,
+ EXEC_OUTPUT_JOURNAL, EXEC_OUTPUT_JOURNAL_AND_CONSOLE,
+ EXEC_OUTPUT_KMSG, EXEC_OUTPUT_KMSG_AND_CONSOLE,
+ EXEC_OUTPUT_SYSLOG, EXEC_OUTPUT_SYSLOG_AND_CONSOLE))
return 0;
/* If syslog or kernel logging is requested, make sure our own
@@ -895,6 +923,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
Unit *following;
_cleanup_set_free_ Set *following_set = NULL;
int r;
+ const char *n;
assert(u);
assert(u->type >= 0);
@@ -916,6 +945,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
"%s\tGC Check Good: %s\n"
"%s\tNeed Daemon Reload: %s\n"
"%s\tTransient: %s\n"
+ "%s\tPerpetual: %s\n"
"%s\tSlice: %s\n"
"%s\tCGroup: %s\n"
"%s\tCGroup realized: %s\n"
@@ -934,6 +964,7 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
prefix, yes_no(unit_check_gc(u)),
prefix, yes_no(unit_need_daemon_reload(u)),
prefix, yes_no(u->transient),
+ prefix, yes_no(u->perpetual),
prefix, strna(unit_slice_name(u)),
prefix, strna(u->cgroup_path),
prefix, yes_no(u->cgroup_realized),
@@ -943,6 +974,10 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
SET_FOREACH(t, u->names, i)
fprintf(f, "%s\tName: %s\n", prefix, t);
+ if (!sd_id128_is_null(u->invocation_id))
+ fprintf(f, "%s\tInvocation ID: " SD_ID128_FORMAT_STR "\n",
+ prefix, SD_ID128_FORMAT_VAL(u->invocation_id));
+
STRV_FOREACH(j, u->documentation)
fprintf(f, "%s\tDocumentation: %s\n", prefix, *j);
@@ -970,8 +1005,8 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
if (u->job_timeout != USEC_INFINITY)
fprintf(f, "%s\tJob Timeout: %s\n", prefix, format_timespan(timespan, sizeof(timespan), u->job_timeout, 0));
- if (u->job_timeout_action != FAILURE_ACTION_NONE)
- fprintf(f, "%s\tJob Timeout Action: %s\n", prefix, failure_action_to_string(u->job_timeout_action));
+ if (u->job_timeout_action != EMERGENCY_ACTION_NONE)
+ fprintf(f, "%s\tJob Timeout Action: %s\n", prefix, emergency_action_to_string(u->job_timeout_action));
if (u->job_timeout_reboot_arg)
fprintf(f, "%s\tJob Timeout Reboot Argument: %s\n", prefix, u->job_timeout_reboot_arg);
@@ -1036,13 +1071,14 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
else if (u->load_state == UNIT_ERROR)
fprintf(f, "%s\tLoad Error Code: %s\n", prefix, strerror(-u->load_error));
+ for (n = sd_bus_track_first(u->bus_track); n; n = sd_bus_track_next(u->bus_track))
+ fprintf(f, "%s\tBus Ref: %s\n", prefix, n);
if (u->job)
job_dump(u->job, f, prefix2);
if (u->nop_job)
job_dump(u->nop_job, f, prefix2);
-
}
/* Common implementation for multiple backends */
@@ -1437,7 +1473,7 @@ static void unit_status_log_starting_stopping_reloading(Unit *u, JobType t) {
format = unit_get_status_message_format(u, t);
DISABLE_WARNING_FORMAT_NONLITERAL;
- xsprintf(buf, format, unit_description(u));
+ snprintf(buf, sizeof buf, format, unit_description(u));
REENABLE_WARNING;
mid = t == JOB_START ? SD_MESSAGE_UNIT_STARTING :
@@ -1477,7 +1513,7 @@ int unit_start_limit_test(Unit *u) {
log_unit_warning(u, "Start request repeated too quickly.");
u->start_limit_hit = true;
- return failure_action(u->manager, u->start_limit_action, u->reboot_arg);
+ return emergency_action(u->manager, u->start_limit_action, u->reboot_arg, "unit failed");
}
/* Errors:
@@ -1603,6 +1639,18 @@ int unit_stop(Unit *u) {
return UNIT_VTABLE(u)->stop(u);
}
+bool unit_can_stop(Unit *u) {
+ assert(u);
+
+ if (!unit_supported(u))
+ return false;
+
+ if (u->perpetual)
+ return false;
+
+ return !!UNIT_VTABLE(u)->stop;
+}
+
/* Errors:
* -EBADR: This unit type does not support reloading.
* -ENOEXEC: Unit is not started.
@@ -2137,13 +2185,20 @@ bool unit_job_is_applicable(Unit *u, JobType j) {
case JOB_VERIFY_ACTIVE:
case JOB_START:
- case JOB_STOP:
case JOB_NOP:
+ /* Note that we don't check unit_can_start() here. That's because .device units and suchlike are not
+ * startable by us but may appear due to external events, and it thus makes sense to permit enqueing
+ * jobs for it. */
return true;
+ case JOB_STOP:
+ /* Similar as above. However, perpetual units can never be stopped (neither explicitly nor due to
+ * external events), hence it makes no sense to permit enqueing such a request either. */
+ return !u->perpetual;
+
case JOB_RESTART:
case JOB_TRY_RESTART:
- return unit_can_start(u);
+ return unit_can_stop(u) && unit_can_start(u);
case JOB_RELOAD:
case JOB_TRY_RELOAD:
@@ -2213,6 +2268,11 @@ int unit_add_dependency(Unit *u, UnitDependency d, Unit *other, bool add_referen
return 0;
}
+ if (d == UNIT_BEFORE && other->type == UNIT_DEVICE) {
+ log_unit_warning(u, "Dependency Before=%s ignored (.device units cannot be delayed)", other->id);
+ return 0;
+ }
+
r = set_ensure_allocated(&u->dependencies[d], NULL);
if (r < 0)
return r;
@@ -2375,6 +2435,15 @@ char *unit_dbus_path(Unit *u) {
return unit_dbus_path_from_name(u->id);
}
+char *unit_dbus_path_invocation_id(Unit *u) {
+ assert(u);
+
+ if (sd_id128_is_null(u->invocation_id))
+ return NULL;
+
+ return unit_dbus_path_from_name(u->invocation_id_string);
+}
+
int unit_set_slice(Unit *u, Unit *slice) {
assert(u);
assert(slice);
@@ -2609,21 +2678,34 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
unit_serialize_item(u, f, "assert-result", yes_no(u->assert_result));
unit_serialize_item(u, f, "transient", yes_no(u->transient));
- unit_serialize_item_format(u, f, "cpuacct-usage-base", "%" PRIu64, u->cpuacct_usage_base);
+
+ unit_serialize_item_format(u, f, "cpu-usage-base", "%" PRIu64, u->cpu_usage_base);
+ if (u->cpu_usage_last != NSEC_INFINITY)
+ unit_serialize_item_format(u, f, "cpu-usage-last", "%" PRIu64, u->cpu_usage_last);
if (u->cgroup_path)
unit_serialize_item(u, f, "cgroup", u->cgroup_path);
unit_serialize_item(u, f, "cgroup-realized", yes_no(u->cgroup_realized));
+ if (uid_is_valid(u->ref_uid))
+ unit_serialize_item_format(u, f, "ref-uid", UID_FMT, u->ref_uid);
+ if (gid_is_valid(u->ref_gid))
+ unit_serialize_item_format(u, f, "ref-gid", GID_FMT, u->ref_gid);
+
+ if (!sd_id128_is_null(u->invocation_id))
+ unit_serialize_item_format(u, f, "invocation-id", SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id));
+
+ bus_track_serialize(u->bus_track, f, "ref");
+
if (serialize_jobs) {
if (u->job) {
fprintf(f, "job\n");
- job_serialize(u->job, f, fds);
+ job_serialize(u->job, f);
}
if (u->nop_job) {
fprintf(f, "job\n");
- job_serialize(u->nop_job, f, fds);
+ job_serialize(u->nop_job, f);
}
}
@@ -2753,7 +2835,7 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
if (!j)
return log_oom();
- r = job_deserialize(j, f, fds);
+ r = job_deserialize(j, f);
if (r < 0) {
job_free(j);
return r;
@@ -2825,11 +2907,19 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
continue;
- } else if (streq(l, "cpuacct-usage-base")) {
+ } else if (STR_IN_SET(l, "cpu-usage-base", "cpuacct-usage-base")) {
+
+ r = safe_atou64(v, &u->cpu_usage_base);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse CPU usage base %s, ignoring.", v);
+
+ continue;
+
+ } else if (streq(l, "cpu-usage-last")) {
- r = safe_atou64(v, &u->cpuacct_usage_base);
+ r = safe_atou64(v, &u->cpu_usage_last);
if (r < 0)
- log_unit_debug(u, "Failed to parse CPU usage %s, ignoring.", v);
+ log_unit_debug(u, "Failed to read CPU usage last %s, ignoring.", v);
continue;
@@ -2852,6 +2942,47 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
u->cgroup_realized = b;
continue;
+
+ } else if (streq(l, "ref-uid")) {
+ uid_t uid;
+
+ r = parse_uid(v, &uid);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse referenced UID %s, ignoring.", v);
+ else
+ unit_ref_uid_gid(u, uid, GID_INVALID);
+
+ continue;
+
+ } else if (streq(l, "ref-gid")) {
+ gid_t gid;
+
+ r = parse_gid(v, &gid);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse referenced GID %s, ignoring.", v);
+ else
+ unit_ref_uid_gid(u, UID_INVALID, gid);
+
+ } else if (streq(l, "ref")) {
+
+ r = strv_extend(&u->deserialized_refs, v);
+ if (r < 0)
+ log_oom();
+
+ continue;
+ } else if (streq(l, "invocation-id")) {
+ sd_id128_t id;
+
+ r = sd_id128_from_string(v, &id);
+ if (r < 0)
+ log_unit_debug(u, "Failed to parse invocation id %s, ignoring.", v);
+ else {
+ r = unit_set_invocation_id(u, id);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to set invocation ID for unit: %m");
+ }
+
+ continue;
}
if (unit_can_serialize(u)) {
@@ -2926,7 +3057,8 @@ int unit_add_node_link(Unit *u, const char *what, bool wants, UnitDependency dep
}
int unit_coldplug(Unit *u) {
- int r = 0, q = 0;
+ int r = 0, q;
+ char **i;
assert(u);
@@ -2937,21 +3069,29 @@ int unit_coldplug(Unit *u) {
u->coldplugged = true;
- if (UNIT_VTABLE(u)->coldplug)
- r = UNIT_VTABLE(u)->coldplug(u);
+ STRV_FOREACH(i, u->deserialized_refs) {
+ q = bus_unit_track_add_name(u, *i);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+ u->deserialized_refs = strv_free(u->deserialized_refs);
- if (u->job)
- q = job_coldplug(u->job);
+ if (UNIT_VTABLE(u)->coldplug) {
+ q = UNIT_VTABLE(u)->coldplug(u);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
- if (r < 0)
- return r;
- if (q < 0)
- return q;
+ if (u->job) {
+ q = job_coldplug(u->job);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
- return 0;
+ return r;
}
-static bool fragment_mtime_newer(const char *path, usec_t mtime) {
+static bool fragment_mtime_newer(const char *path, usec_t mtime, bool path_masked) {
struct stat st;
if (!path)
@@ -2961,12 +3101,12 @@ static bool fragment_mtime_newer(const char *path, usec_t mtime) {
/* What, cannot access this anymore? */
return true;
- if (mtime > 0)
+ if (path_masked)
+ /* For masked files check if they are still so */
+ return !null_or_empty(&st);
+ else
/* For non-empty files check the mtime */
return timespec_load(&st.st_mtim) > mtime;
- else if (!null_or_empty(&st))
- /* For masked files check if they are still so */
- return true;
return false;
}
@@ -2977,18 +3117,22 @@ bool unit_need_daemon_reload(Unit *u) {
assert(u);
- if (fragment_mtime_newer(u->fragment_path, u->fragment_mtime))
+ /* For unit files, we allow masking… */
+ if (fragment_mtime_newer(u->fragment_path, u->fragment_mtime,
+ u->load_state == UNIT_MASKED))
return true;
- if (fragment_mtime_newer(u->source_path, u->source_mtime))
+ /* Source paths should not be masked… */
+ if (fragment_mtime_newer(u->source_path, u->source_mtime, false))
return true;
(void) unit_find_dropin_paths(u, &t);
if (!strv_equal(u->dropin_paths, t))
return true;
+ /* … any drop-ins that are masked are simply omitted from the list. */
STRV_FOREACH(path, u->dropin_paths)
- if (fragment_mtime_newer(*path, u->dropin_mtime))
+ if (fragment_mtime_newer(*path, u->dropin_mtime, false))
return true;
return false;
@@ -3225,6 +3369,33 @@ void unit_ref_unset(UnitRef *ref) {
ref->unit = NULL;
}
+static int user_from_unit_name(Unit *u, char **ret) {
+
+ static const uint8_t hash_key[] = {
+ 0x58, 0x1a, 0xaf, 0xe6, 0x28, 0x58, 0x4e, 0x96,
+ 0xb4, 0x4e, 0xf5, 0x3b, 0x8c, 0x92, 0x07, 0xec
+ };
+
+ _cleanup_free_ char *n = NULL;
+ int r;
+
+ r = unit_name_to_prefix(u->id, &n);
+ if (r < 0)
+ return r;
+
+ if (valid_user_group_name(n)) {
+ *ret = n;
+ n = NULL;
+ return 0;
+ }
+
+ /* If we can't use the unit name as a user name, then let's hash it and use that */
+ if (asprintf(ret, "_du%016" PRIx64, siphash24(n, strlen(n), hash_key)) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
int unit_patch_contexts(Unit *u) {
CGroupContext *cc;
ExecContext *ec;
@@ -3268,7 +3439,33 @@ int unit_patch_contexts(Unit *u) {
ec->no_new_privileges = true;
if (ec->private_devices)
- ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_MKNOD);
+ ec->capability_bounding_set &= ~((UINT64_C(1) << CAP_MKNOD) | (UINT64_C(1) << CAP_SYS_RAWIO));
+
+ if (ec->protect_kernel_modules)
+ ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_SYS_MODULE);
+
+ if (ec->dynamic_user) {
+ if (!ec->user) {
+ r = user_from_unit_name(u, &ec->user);
+ if (r < 0)
+ return r;
+ }
+
+ if (!ec->group) {
+ ec->group = strdup(ec->user);
+ if (!ec->group)
+ return -ENOMEM;
+ }
+
+ /* If the dynamic user option is on, let's make sure that the unit can't leave its UID/GID
+ * around in the file system or on IPC objects. Hence enforce a strict sandbox. */
+
+ ec->private_tmp = true;
+ ec->remove_ipc = true;
+ ec->protect_system = PROTECT_SYSTEM_STRICT;
+ if (ec->protect_home == PROTECT_HOME_NO)
+ ec->protect_home = PROTECT_HOME_READ_ONLY;
+ }
}
cc = unit_get_cgroup_context(u);
@@ -3653,7 +3850,7 @@ int unit_kill_context(
* there we get proper events. Hence rely on
* them.*/
- if (cg_unified() > 0 ||
+ if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0 ||
(detect_container() == 0 && !unit_cgroup_delegate(u)))
wait_for_exit = true;
@@ -3777,6 +3974,26 @@ int unit_setup_exec_runtime(Unit *u) {
return exec_runtime_make(rt, unit_get_exec_context(u), u->id);
}
+int unit_setup_dynamic_creds(Unit *u) {
+ ExecContext *ec;
+ DynamicCreds *dcreds;
+ size_t offset;
+
+ assert(u);
+
+ offset = UNIT_VTABLE(u)->dynamic_creds_offset;
+ assert(offset > 0);
+ dcreds = (DynamicCreds*) ((uint8_t*) u + offset);
+
+ ec = unit_get_exec_context(u);
+ assert(ec);
+
+ if (!ec->dynamic_user)
+ return 0;
+
+ return dynamic_creds_acquire(dcreds, u->manager, ec->user, ec->group);
+}
+
bool unit_type_supported(UnitType t) {
if (_unlikely_(t < 0))
return false;
@@ -3870,3 +4087,198 @@ pid_t unit_main_pid(Unit *u) {
return 0;
}
+
+static void unit_unref_uid_internal(
+ Unit *u,
+ uid_t *ref_uid,
+ bool destroy_now,
+ void (*_manager_unref_uid)(Manager *m, uid_t uid, bool destroy_now)) {
+
+ assert(u);
+ assert(ref_uid);
+ assert(_manager_unref_uid);
+
+ /* Generic implementation of both unit_unref_uid() and unit_unref_gid(), under the assumption that uid_t and
+ * gid_t are actually the same time, with the same validity rules.
+ *
+ * Drops a reference to UID/GID from a unit. */
+
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+ assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+ if (!uid_is_valid(*ref_uid))
+ return;
+
+ _manager_unref_uid(u->manager, *ref_uid, destroy_now);
+ *ref_uid = UID_INVALID;
+}
+
+void unit_unref_uid(Unit *u, bool destroy_now) {
+ unit_unref_uid_internal(u, &u->ref_uid, destroy_now, manager_unref_uid);
+}
+
+void unit_unref_gid(Unit *u, bool destroy_now) {
+ unit_unref_uid_internal(u, (uid_t*) &u->ref_gid, destroy_now, manager_unref_gid);
+}
+
+static int unit_ref_uid_internal(
+ Unit *u,
+ uid_t *ref_uid,
+ uid_t uid,
+ bool clean_ipc,
+ int (*_manager_ref_uid)(Manager *m, uid_t uid, bool clean_ipc)) {
+
+ int r;
+
+ assert(u);
+ assert(ref_uid);
+ assert(uid_is_valid(uid));
+ assert(_manager_ref_uid);
+
+ /* Generic implementation of both unit_ref_uid() and unit_ref_guid(), under the assumption that uid_t and gid_t
+ * are actually the same type, and have the same validity rules.
+ *
+ * Adds a reference on a specific UID/GID to this unit. Each unit referencing the same UID/GID maintains a
+ * reference so that we can destroy the UID/GID's IPC resources as soon as this is requested and the counter
+ * drops to zero. */
+
+ assert_cc(sizeof(uid_t) == sizeof(gid_t));
+ assert_cc(UID_INVALID == (uid_t) GID_INVALID);
+
+ if (*ref_uid == uid)
+ return 0;
+
+ if (uid_is_valid(*ref_uid)) /* Already set? */
+ return -EBUSY;
+
+ r = _manager_ref_uid(u->manager, uid, clean_ipc);
+ if (r < 0)
+ return r;
+
+ *ref_uid = uid;
+ return 1;
+}
+
+int unit_ref_uid(Unit *u, uid_t uid, bool clean_ipc) {
+ return unit_ref_uid_internal(u, &u->ref_uid, uid, clean_ipc, manager_ref_uid);
+}
+
+int unit_ref_gid(Unit *u, gid_t gid, bool clean_ipc) {
+ return unit_ref_uid_internal(u, (uid_t*) &u->ref_gid, (uid_t) gid, clean_ipc, manager_ref_gid);
+}
+
+static int unit_ref_uid_gid_internal(Unit *u, uid_t uid, gid_t gid, bool clean_ipc) {
+ int r = 0, q = 0;
+
+ assert(u);
+
+ /* Reference both a UID and a GID in one go. Either references both, or neither. */
+
+ if (uid_is_valid(uid)) {
+ r = unit_ref_uid(u, uid, clean_ipc);
+ if (r < 0)
+ return r;
+ }
+
+ if (gid_is_valid(gid)) {
+ q = unit_ref_gid(u, gid, clean_ipc);
+ if (q < 0) {
+ if (r > 0)
+ unit_unref_uid(u, false);
+
+ return q;
+ }
+ }
+
+ return r > 0 || q > 0;
+}
+
+int unit_ref_uid_gid(Unit *u, uid_t uid, gid_t gid) {
+ ExecContext *c;
+ int r;
+
+ assert(u);
+
+ c = unit_get_exec_context(u);
+
+ r = unit_ref_uid_gid_internal(u, uid, gid, c ? c->remove_ipc : false);
+ if (r < 0)
+ return log_unit_warning_errno(u, r, "Couldn't add UID/GID reference to unit, proceeding without: %m");
+
+ return r;
+}
+
+void unit_unref_uid_gid(Unit *u, bool destroy_now) {
+ assert(u);
+
+ unit_unref_uid(u, destroy_now);
+ unit_unref_gid(u, destroy_now);
+}
+
+void unit_notify_user_lookup(Unit *u, uid_t uid, gid_t gid) {
+ int r;
+
+ assert(u);
+
+ /* This is invoked whenever one of the forked off processes let's us know the UID/GID its user name/group names
+ * resolved to. We keep track of which UID/GID is currently assigned in order to be able to destroy its IPC
+ * objects when no service references the UID/GID anymore. */
+
+ r = unit_ref_uid_gid(u, uid, gid);
+ if (r > 0)
+ bus_unit_send_change_signal(u);
+}
+
+int unit_set_invocation_id(Unit *u, sd_id128_t id) {
+ int r;
+
+ assert(u);
+
+ /* Set the invocation ID for this unit. If we cannot, this will not roll back, but reset the whole thing. */
+
+ if (sd_id128_equal(u->invocation_id, id))
+ return 0;
+
+ if (!sd_id128_is_null(u->invocation_id))
+ (void) hashmap_remove_value(u->manager->units_by_invocation_id, &u->invocation_id, u);
+
+ if (sd_id128_is_null(id)) {
+ r = 0;
+ goto reset;
+ }
+
+ r = hashmap_ensure_allocated(&u->manager->units_by_invocation_id, &id128_hash_ops);
+ if (r < 0)
+ goto reset;
+
+ u->invocation_id = id;
+ sd_id128_to_string(id, u->invocation_id_string);
+
+ r = hashmap_put(u->manager->units_by_invocation_id, &u->invocation_id, u);
+ if (r < 0)
+ goto reset;
+
+ return 0;
+
+reset:
+ u->invocation_id = SD_ID128_NULL;
+ u->invocation_id_string[0] = 0;
+ return r;
+}
+
+int unit_acquire_invocation_id(Unit *u) {
+ sd_id128_t id;
+ int r;
+
+ assert(u);
+
+ r = sd_id128_randomize(&id);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to generate invocation ID for unit: %m");
+
+ r = unit_set_invocation_id(u, id);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "Failed to set invocation ID for unit: %m");
+
+ return 0;
+}
diff --git a/src/grp-system/systemctl/systemctl.c b/src/grp-system/systemctl/systemctl.c
index ab9fc4a427..086c9b494b 100644
--- a/src/grp-system/systemctl/systemctl.c
+++ b/src/grp-system/systemctl/systemctl.c
@@ -119,6 +119,7 @@ static enum dependency {
} arg_dependency = DEPENDENCY_FORWARD;
static const char *arg_job_mode = "replace";
static UnitFileScope arg_scope = UNIT_FILE_SYSTEM;
+static bool arg_wait = false;
static bool arg_no_block = false;
static bool arg_no_legend = false;
static bool arg_no_pager = false;
@@ -189,6 +190,11 @@ typedef enum BusFocus {
static sd_bus *busses[_BUS_FOCUS_MAX] = {};
+static UnitFileFlags args_to_flags(void) {
+ return (arg_runtime ? UNIT_FILE_RUNTIME : 0) |
+ (arg_force ? UNIT_FILE_FORCE : 0);
+}
+
static int acquire_bus(BusFocus focus, sd_bus **ret) {
int r;
@@ -362,22 +368,24 @@ static int compare_unit_info(const void *a, const void *b) {
return strcasecmp(u->id, v->id);
}
+static const char* unit_type_suffix(const char *name) {
+ const char *dot;
+
+ dot = strrchr(name, '.');
+ if (!dot)
+ return "";
+
+ return dot + 1;
+}
+
static bool output_show_unit(const UnitInfo *u, char **patterns) {
assert(u);
if (!strv_fnmatch_or_empty(patterns, u->id, FNM_NOESCAPE))
return false;
- if (arg_types) {
- const char *dot;
-
- dot = strrchr(u->id, '.');
- if (!dot)
- return false;
-
- if (!strv_find(arg_types, dot+1))
- return false;
- }
+ if (arg_types && !strv_find(arg_types, unit_type_suffix(u->id)))
+ return false;
if (arg_all)
return true;
@@ -403,7 +411,7 @@ static bool output_show_unit(const UnitInfo *u, char **patterns) {
}
static int output_units_list(const UnitInfo *unit_infos, unsigned c) {
- unsigned circle_len = 0, id_len, max_id_len, load_len, active_len, sub_len, job_len, desc_len;
+ unsigned circle_len = 0, id_len, max_id_len, load_len, active_len, sub_len, job_len, desc_len, max_desc_len;
const UnitInfo *u;
unsigned n_shown = 0;
int job_count = 0;
@@ -413,13 +421,14 @@ static int output_units_list(const UnitInfo *unit_infos, unsigned c) {
active_len = strlen("ACTIVE");
sub_len = strlen("SUB");
job_len = strlen("JOB");
- desc_len = 0;
+ max_desc_len = strlen("DESCRIPTION");
for (u = unit_infos; u < unit_infos + c; u++) {
max_id_len = MAX(max_id_len, strlen(u->id) + (u->machine ? strlen(u->machine)+1 : 0));
load_len = MAX(load_len, strlen(u->load_state));
active_len = MAX(active_len, strlen(u->active_state));
sub_len = MAX(sub_len, strlen(u->sub_state));
+ max_desc_len = MAX(max_desc_len, strlen(u->description));
if (u->job_id != 0) {
job_len = MAX(job_len, strlen(u->job_type));
@@ -435,7 +444,7 @@ static int output_units_list(const UnitInfo *unit_infos, unsigned c) {
if (!arg_full && original_stdout_is_tty) {
unsigned basic_len;
- id_len = MIN(max_id_len, 25u);
+ id_len = MIN(max_id_len, 25u); /* as much as it needs, but at most 25 for now */
basic_len = circle_len + 5 + id_len + 5 + active_len + sub_len;
if (job_count)
@@ -448,34 +457,38 @@ static int output_units_list(const UnitInfo *unit_infos, unsigned c) {
/* Either UNIT already got 25, or is fully satisfied.
* Grant up to 25 to DESC now. */
incr = MIN(extra_len, 25u);
- desc_len += incr;
+ desc_len = incr;
extra_len -= incr;
- /* split the remaining space between UNIT and DESC,
- * but do not give UNIT more than it needs. */
+ /* Of the remainder give as much as the ID needs to the ID, and give the rest to the
+ * description but not more than it needs. */
if (extra_len > 0) {
- incr = MIN(extra_len / 2, max_id_len - id_len);
+ incr = MIN(max_id_len - id_len, extra_len);
id_len += incr;
- desc_len += extra_len - incr;
+ desc_len += MIN(extra_len - incr, max_desc_len - desc_len);
}
}
- } else
+ } else {
id_len = max_id_len;
+ desc_len = max_desc_len;
+ }
for (u = unit_infos; u < unit_infos + c; u++) {
_cleanup_free_ char *e = NULL, *j = NULL;
+ const char *on_underline = "", *off_underline = "";
const char *on_loaded = "", *off_loaded = "";
const char *on_active = "", *off_active = "";
const char *on_circle = "", *off_circle = "";
const char *id;
- bool circle = false;
+ bool circle = false, underline = false;
if (!n_shown && !arg_no_legend) {
if (circle_len > 0)
fputs(" ", stdout);
- printf("%-*s %-*s %-*s %-*s ",
+ printf("%s%-*s %-*s %-*s %-*s ",
+ ansi_underline(),
id_len, "UNIT",
load_len, "LOAD",
active_len, "ACTIVE",
@@ -484,23 +497,34 @@ static int output_units_list(const UnitInfo *unit_infos, unsigned c) {
if (job_count)
printf("%-*s ", job_len, "JOB");
- if (!arg_full && arg_no_pager)
- printf("%.*s\n", desc_len, "DESCRIPTION");
- else
- printf("%s\n", "DESCRIPTION");
+ printf("%-*.*s%s\n",
+ desc_len,
+ !arg_full && arg_no_pager ? (int) desc_len : -1,
+ "DESCRIPTION",
+ ansi_normal());
}
n_shown++;
+ if (u + 1 < unit_infos + c &&
+ !streq(unit_type_suffix(u->id), unit_type_suffix((u + 1)->id))) {
+ on_underline = ansi_underline();
+ off_underline = ansi_normal();
+ underline = true;
+ }
+
if (STR_IN_SET(u->load_state, "error", "not-found", "masked") && !arg_plain) {
- on_loaded = ansi_highlight_red();
on_circle = ansi_highlight_yellow();
- off_loaded = off_circle = ansi_normal();
+ off_circle = ansi_normal();
circle = true;
+ on_loaded = underline ? ansi_highlight_red_underline() : ansi_highlight_red();
+ off_loaded = underline ? on_underline : ansi_normal();
} else if (streq(u->active_state, "failed") && !arg_plain) {
- on_circle = on_active = ansi_highlight_red();
- off_circle = off_active = ansi_normal();
+ on_circle = ansi_highlight_red();
+ off_circle = ansi_normal();
circle = true;
+ on_active = underline ? ansi_highlight_red_underline() : ansi_highlight_red();
+ off_active = underline ? on_underline : ansi_normal();
}
if (u->machine) {
@@ -523,17 +547,19 @@ static int output_units_list(const UnitInfo *unit_infos, unsigned c) {
if (circle_len > 0)
printf("%s%s%s ", on_circle, circle ? special_glyph(BLACK_CIRCLE) : " ", off_circle);
- printf("%s%-*s%s %s%-*s%s %s%-*s %-*s%s %-*s",
+ printf("%s%s%-*s%s %s%-*s%s %s%-*s %-*s%s %-*s",
+ on_underline,
on_active, id_len, id, off_active,
on_loaded, load_len, u->load_state, off_loaded,
on_active, active_len, u->active_state,
sub_len, u->sub_state, off_active,
job_count ? job_len + 1 : 0, u->job_id ? u->job_type : "");
- if (desc_len > 0)
- printf("%.*s\n", desc_len, u->description);
- else
- printf("%s\n", u->description);
+ printf("%-*.*s%s\n",
+ desc_len,
+ !arg_full && arg_no_pager ? (int) desc_len : -1,
+ u->description,
+ off_underline);
}
if (!arg_no_legend) {
@@ -1395,35 +1421,46 @@ static void output_unit_file_list(const UnitFileList *units, unsigned c) {
id_cols = max_id_len;
if (!arg_no_legend && c > 0)
- printf("%-*s %-*s\n",
+ printf("%s%-*s %-*s%s\n",
+ ansi_underline(),
id_cols, "UNIT FILE",
- state_cols, "STATE");
+ state_cols, "STATE",
+ ansi_normal());
for (u = units; u < units + c; u++) {
_cleanup_free_ char *e = NULL;
- const char *on, *off;
+ const char *on, *off, *on_underline = "", *off_underline = "";
const char *id;
+ bool underline = false;
+
+ if (u + 1 < units + c &&
+ !streq(unit_type_suffix(u->path), unit_type_suffix((u + 1)->path))) {
+ on_underline = ansi_underline();
+ off_underline = ansi_normal();
+ underline = true;
+ }
if (IN_SET(u->state,
UNIT_FILE_MASKED,
UNIT_FILE_MASKED_RUNTIME,
UNIT_FILE_DISABLED,
- UNIT_FILE_BAD)) {
- on = ansi_highlight_red();
- off = ansi_normal();
- } else if (u->state == UNIT_FILE_ENABLED) {
- on = ansi_highlight_green();
- off = ansi_normal();
- } else
- on = off = "";
+ UNIT_FILE_BAD))
+ on = underline ? ansi_highlight_red_underline() : ansi_highlight_red();
+ else if (u->state == UNIT_FILE_ENABLED)
+ on = underline ? ansi_highlight_green_underline() : ansi_highlight_green();
+ else
+ on = on_underline;
+ off = off_underline;
id = basename(u->path);
e = arg_full ? NULL : ellipsize(id, id_cols, 33);
- printf("%-*s %s%-*s%s\n",
+ printf("%s%-*s %s%-*s%s%s\n",
+ on_underline,
id_cols, e ? e : id,
- on, state_cols, unit_file_state_to_string(u->state), off);
+ on, state_cols, unit_file_state_to_string(u->state), off,
+ off_underline);
}
if (!arg_no_legend)
@@ -2111,7 +2148,7 @@ static int set_default(int argc, char *argv[], void *userdata) {
return log_error_errno(r, "Failed to mangle unit name: %m");
if (install_client_side()) {
- r = unit_file_set_default(arg_scope, arg_root, unit, true, &changes, &n_changes);
+ r = unit_file_set_default(arg_scope, UNIT_FILE_FORCE, arg_root, unit, &changes, &n_changes);
unit_file_dump_changes(r, "set default", changes, n_changes, arg_quiet);
if (r > 0)
@@ -2680,13 +2717,92 @@ static const char *method_to_verb(const char *method) {
return "n/a";
}
+typedef struct {
+ sd_bus_slot *match;
+ sd_event *event;
+ Set *unit_paths;
+ bool any_failed;
+} WaitContext;
+
+static void wait_context_free(WaitContext *c) {
+ c->match = sd_bus_slot_unref(c->match);
+ c->event = sd_event_unref(c->event);
+ c->unit_paths = set_free_free(c->unit_paths);
+}
+
+static int on_properties_changed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ WaitContext *c = userdata;
+ const char *path;
+ int r;
+
+ path = sd_bus_message_get_path(m);
+ if (!set_contains(c->unit_paths, path))
+ return 0;
+
+ /* Check if ActiveState changed to inactive/failed */
+ /* (s interface, a{sv} changed_properties, as invalidated_properties) */
+ r = sd_bus_message_skip(m, "s");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "{sv}");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_enter_container(m, SD_BUS_TYPE_DICT_ENTRY, "sv")) > 0) {
+ const char *s;
+
+ r = sd_bus_message_read(m, "s", &s);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (streq(s, "ActiveState")) {
+ bool is_failed;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, "s");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_read(m, "s", &s);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ is_failed = streq(s, "failed");
+ if (streq(s, "inactive") || is_failed) {
+ log_debug("%s became %s, dropping from --wait tracking", path, s);
+ free(set_remove(c->unit_paths, path));
+ c->any_failed = c->any_failed || is_failed;
+ } else
+ log_debug("ActiveState on %s changed to %s", path, s);
+
+ break; /* no need to dissect the rest of the message */
+ } else {
+ /* other property */
+ r = sd_bus_message_skip(m, "v");
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ if (set_isempty(c->unit_paths))
+ sd_event_exit(c->event, EXIT_SUCCESS);
+
+ return 0;
+}
+
static int start_unit_one(
sd_bus *bus,
const char *method,
const char *name,
const char *mode,
sd_bus_error *error,
- BusWaitForJobs *w) {
+ BusWaitForJobs *w,
+ WaitContext *wait_context) {
_cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
const char *path;
@@ -2697,6 +2813,40 @@ static int start_unit_one(
assert(mode);
assert(error);
+ if (wait_context) {
+ _cleanup_free_ char *unit_path = NULL;
+ const char* mt;
+
+ log_debug("Watching for property changes of %s", name);
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "RefUnit",
+ error,
+ NULL,
+ "s", name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to RefUnit %s: %s", name, bus_error_message(error, r));
+
+ unit_path = unit_dbus_path_from_name(name);
+ if (!unit_path)
+ return log_oom();
+
+ r = set_put_strdup(wait_context->unit_paths, unit_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add unit path %s to set: %m", unit_path);
+
+ mt = strjoina("type='signal',"
+ "interface='org.freedesktop.DBus.Properties',"
+ "path='", unit_path, "',"
+ "member='PropertiesChanged'");
+ r = sd_bus_add_match(bus, &wait_context->match, mt, on_properties_changed, wait_context);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match for PropertiesChanged signal: %m");
+ }
+
log_debug("Calling manager for %s on %s, %s", method, name, mode);
r = sd_bus_call_method(
@@ -2721,9 +2871,10 @@ static int start_unit_one(
if (!sd_bus_error_has_name(error, BUS_ERROR_NO_SUCH_UNIT) &&
!sd_bus_error_has_name(error, BUS_ERROR_UNIT_MASKED))
- log_error("See %s logs and 'systemctl%s status %s' for details.",
+ log_error("See %s logs and 'systemctl%s status%s %s' for details.",
arg_scope == UNIT_FILE_SYSTEM ? "system" : "user",
arg_scope == UNIT_FILE_SYSTEM ? "" : " --user",
+ name[0] == '-' ? " --" : "",
name);
return r;
@@ -2841,10 +2992,18 @@ static int start_unit(int argc, char *argv[], void *userdata) {
const char *method, *mode, *one_name, *suffix = NULL;
_cleanup_strv_free_ char **names = NULL;
sd_bus *bus;
+ _cleanup_(wait_context_free) WaitContext wait_context = {};
char **name;
int r = 0;
- r = acquire_bus(BUS_MANAGER, &bus);
+ if (arg_wait && !strstr(argv[0], "start")) {
+ log_error("--wait may only be used with a command that starts units.");
+ return -EINVAL;
+ }
+
+ /* we cannot do sender tracking on the private bus, so we need the full
+ * one for RefUnit to implement --wait */
+ r = acquire_bus(arg_wait ? BUS_FULL : BUS_MANAGER, &bus);
if (r < 0)
return r;
@@ -2888,11 +3047,36 @@ static int start_unit(int argc, char *argv[], void *userdata) {
return log_error_errno(r, "Could not watch jobs: %m");
}
+ if (arg_wait) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ wait_context.unit_paths = set_new(&string_hash_ops);
+ if (!wait_context.unit_paths)
+ return log_oom();
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "Subscribe",
+ &error,
+ NULL, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable subscription: %s", bus_error_message(&error, r));
+ r = sd_event_default(&wait_context.event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate event loop: %m");
+ r = sd_bus_attach_event(bus, wait_context.event, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach bus to event loop: %m");
+ }
+
STRV_FOREACH(name, names) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
int q;
- q = start_unit_one(bus, method, *name, mode, &error, w);
+ q = start_unit_one(bus, method, *name, mode, &error, w, arg_wait ? &wait_context : NULL);
if (r >= 0 && q < 0)
r = translate_bus_error_to_exit_status(q, &error);
}
@@ -2924,6 +3108,15 @@ static int start_unit(int argc, char *argv[], void *userdata) {
check_triggering_units(bus, *name);
}
+ if (r >= 0 && arg_wait) {
+ int q;
+ q = sd_event_loop(wait_context.event);
+ if (q < 0)
+ return log_error_errno(q, "Failed to run event loop: %m");
+ if (wait_context.any_failed)
+ r = EXIT_FAILURE;
+ }
+
return r;
}
@@ -3121,7 +3314,7 @@ static int logind_check_inhibitors(enum action a) {
if (sd_session_get_class(*s, &class) < 0 || !streq(class, "user"))
continue;
- if (sd_session_get_type(*s, &type) < 0 || (!streq(type, "x11") && !streq(type, "tty")))
+ if (sd_session_get_type(*s, &type) < 0 || !STR_IN_SET(type, "x11", "tty"))
continue;
sd_session_get_tty(*s, &tty);
@@ -3385,9 +3578,9 @@ static int kill_unit(int argc, char *argv[], void *userdata) {
"KillUnit",
&error,
NULL,
- "ssi", *names, kill_who ? kill_who : arg_kill_who, arg_signal);
+ "ssi", *name, kill_who ? kill_who : arg_kill_who, arg_signal);
if (q < 0) {
- log_error_errno(q, "Failed to kill unit %s: %s", *names, bus_error_message(&error, q));
+ log_error_errno(q, "Failed to kill unit %s: %s", *name, bus_error_message(&error, q));
if (r == 0)
r = q;
}
@@ -3572,6 +3765,7 @@ typedef struct UnitStatusInfo {
uint64_t memory_low;
uint64_t memory_high;
uint64_t memory_max;
+ uint64_t memory_swap_max;
uint64_t memory_limit;
uint64_t cpu_usage_nsec;
uint64_t tasks_current;
@@ -3621,7 +3815,7 @@ static void print_status_info(
if (streq_ptr(i->active_state, "failed")) {
active_on = ansi_highlight_red();
active_off = ansi_normal();
- } else if (streq_ptr(i->active_state, "active") || streq_ptr(i->active_state, "reloading")) {
+ } else if (STRPTR_IN_SET(i->active_state, "active", "reloading")) {
active_on = ansi_highlight_green();
active_off = ansi_normal();
} else
@@ -3702,12 +3896,10 @@ static void print_status_info(
if (!isempty(i->result) && !streq(i->result, "success"))
printf(" (Result: %s)", i->result);
- timestamp = (streq_ptr(i->active_state, "active") ||
- streq_ptr(i->active_state, "reloading")) ? i->active_enter_timestamp :
- (streq_ptr(i->active_state, "inactive") ||
- streq_ptr(i->active_state, "failed")) ? i->inactive_enter_timestamp :
- streq_ptr(i->active_state, "activating") ? i->inactive_exit_timestamp :
- i->active_exit_timestamp;
+ timestamp = STRPTR_IN_SET(i->active_state, "active", "reloading") ? i->active_enter_timestamp :
+ STRPTR_IN_SET(i->active_state, "inactive", "failed") ? i->inactive_enter_timestamp :
+ STRPTR_IN_SET(i->active_state, "activating") ? i->inactive_exit_timestamp :
+ i->active_exit_timestamp;
s1 = format_timestamp_relative(since1, sizeof(since1), timestamp);
s2 = format_timestamp(since2, sizeof(since2), timestamp);
@@ -3787,7 +3979,7 @@ static void print_status_info(
argv = strv_join(p->argv, " ");
printf(" Process: "PID_FMT" %s=%s ", p->pid, p->name, strna(argv));
- good = is_clean_exit_lsb(p->code, p->status, NULL);
+ good = is_clean_exit(p->code, p->status, EXIT_CLEAN_DAEMON, NULL);
if (!good) {
on = ansi_highlight_red();
off = ansi_normal();
@@ -3884,7 +4076,8 @@ static void print_status_info(
printf(" Memory: %s", format_bytes(buf, sizeof(buf), i->memory_current));
- if (i->memory_low > 0 || i->memory_high != CGROUP_LIMIT_MAX || i->memory_max != CGROUP_LIMIT_MAX ||
+ if (i->memory_low > 0 || i->memory_high != CGROUP_LIMIT_MAX ||
+ i->memory_max != CGROUP_LIMIT_MAX || i->memory_swap_max != CGROUP_LIMIT_MAX ||
i->memory_limit != CGROUP_LIMIT_MAX) {
const char *prefix = "";
@@ -3901,6 +4094,10 @@ static void print_status_info(
printf("%smax: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_max));
prefix = " ";
}
+ if (i->memory_swap_max != CGROUP_LIMIT_MAX) {
+ printf("%sswap max: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_swap_max));
+ prefix = " ";
+ }
if (i->memory_limit != CGROUP_LIMIT_MAX) {
printf("%slimit: %s", prefix, format_bytes(buf, sizeof(buf), i->memory_limit));
prefix = " ";
@@ -4141,6 +4338,8 @@ static int status_property(const char *name, sd_bus_message *m, UnitStatusInfo *
i->memory_high = u;
else if (streq(name, "MemoryMax"))
i->memory_max = u;
+ else if (streq(name, "MemorySwapMax"))
+ i->memory_swap_max = u;
else if (streq(name, "MemoryLimit"))
i->memory_limit = u;
else if (streq(name, "TasksCurrent"))
@@ -4575,7 +4774,8 @@ static int print_property(const char *name, sd_bus_message *m, const char *conte
return 0;
- } else if (contents[1] == SD_BUS_TYPE_STRUCT_BEGIN && (streq(name, "IODeviceWeight") || streq(name, "BlockIODeviceWeight"))) {
+ } else if (contents[1] == SD_BUS_TYPE_STRUCT_BEGIN &&
+ STR_IN_SET(name, "IODeviceWeight", "BlockIODeviceWeight")) {
const char *path;
uint64_t weight;
@@ -4594,8 +4794,9 @@ static int print_property(const char *name, sd_bus_message *m, const char *conte
return 0;
- } else if (contents[1] == SD_BUS_TYPE_STRUCT_BEGIN && (cgroup_io_limit_type_from_string(name) >= 0 ||
- streq(name, "BlockIOReadBandwidth") || streq(name, "BlockIOWriteBandwidth"))) {
+ } else if (contents[1] == SD_BUS_TYPE_STRUCT_BEGIN &&
+ (cgroup_io_limit_type_from_string(name) >= 0 ||
+ STR_IN_SET(name, "BlockIOReadBandwidth", "BlockIOWriteBandwidth"))) {
const char *path;
uint64_t bandwidth;
@@ -4656,6 +4857,7 @@ static int show_one(
.memory_current = (uint64_t) -1,
.memory_high = CGROUP_LIMIT_MAX,
.memory_max = CGROUP_LIMIT_MAX,
+ .memory_swap_max = CGROUP_LIMIT_MAX,
.memory_limit = (uint64_t) -1,
.cpu_usage_nsec = (uint64_t) -1,
.tasks_current = (uint64_t) -1,
@@ -4771,7 +4973,7 @@ static int show_one(
else if (streq(verb, "status")) {
print_status_info(bus, &info, ellipsized);
- if (info.active_state && STR_IN_SET(info.active_state, "inactive", "failed"))
+ if (info.active_state && !STR_IN_SET(info.active_state, "active", "reloading"))
r = EXIT_PROGRAM_NOT_RUNNING;
else
r = EXIT_PROGRAM_RUNNING_OR_SERVICE_OK;
@@ -5076,6 +5278,20 @@ static int cat(int argc, char *argv[], void *userdata) {
else
puts("");
+ if (need_daemon_reload(bus, *name) > 0) /* ignore errors (<0), this is informational output */
+ fprintf(stderr,
+ "%s# Warning: %s changed on disk, the version systemd has loaded is outdated.\n"
+ "%s# This output shows the current version of the unit's original fragment and drop-in files.\n"
+ "%s# If fragments or drop-ins were added or removed, they are not properly reflected in this output.\n"
+ "%s# Run 'systemctl%s daemon-reload' to reload units.%s\n",
+ ansi_highlight_red(),
+ *name,
+ ansi_highlight_red(),
+ ansi_highlight_red(),
+ ansi_highlight_red(),
+ arg_scope == UNIT_FILE_SYSTEM ? "" : " --user",
+ ansi_normal());
+
if (fragment_path) {
r = cat_file(fragment_path, false);
if (r < 0)
@@ -5097,7 +5313,6 @@ static int set_property(int argc, char *argv[], void *userdata) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
_cleanup_free_ char *n = NULL;
sd_bus *bus;
- char **i;
int r;
r = acquire_bus(BUS_MANAGER, &bus);
@@ -5128,11 +5343,9 @@ static int set_property(int argc, char *argv[], void *userdata) {
if (r < 0)
return bus_log_create_error(r);
- STRV_FOREACH(i, strv_skip(argv, 2)) {
- r = bus_append_unit_property_assignment(m, *i);
- if (r < 0)
- return r;
- }
+ r = bus_append_unit_property_assignment_many(m, strv_skip(argv, 2));
+ if (r < 0)
+ return r;
r = sd_bus_message_close_container(m);
if (r < 0)
@@ -5570,10 +5783,12 @@ static int enable_sysv_units(const char *verb, char **args) {
if (!found_sysv)
continue;
- if (found_native)
- log_info("Synchronizing state of %s with SysV service script with %s.", name, argv[0]);
- else
- log_info("%s is not a native service, redirecting to systemd-sysv-install.", name);
+ if (!arg_quiet) {
+ if (found_native)
+ log_info("Synchronizing state of %s with SysV service script with %s.", name, argv[0]);
+ else
+ log_info("%s is not a native service, redirecting to systemd-sysv-install.", name);
+ }
if (!isempty(arg_root))
argv[c++] = q = strappend("--root=", arg_root);
@@ -5674,6 +5889,29 @@ static int mangle_names(char **original_names, char ***mangled_names) {
return 0;
}
+static int normalize_names(char **names, bool warn_if_path) {
+ char **u;
+ bool was_path = false;
+
+ STRV_FOREACH(u, names) {
+ int r;
+
+ if (!is_path(*u))
+ continue;
+
+ r = free_and_strdup(u, basename(*u));
+ if (r < 0)
+ return log_error_errno(r, "Failed to normalize unit file path: %m");
+
+ was_path = true;
+ }
+
+ if (warn_if_path && was_path)
+ log_warning("Warning: Can't execute disable on the unit file path. Proceeding with the unit name.");
+
+ return 0;
+}
+
static int unit_exists(const char *unit) {
_cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
@@ -5741,23 +5979,32 @@ static int enable_unit(int argc, char *argv[], void *userdata) {
return daemon_reload(argc, argv, userdata);
}
+ if (streq(verb, "disable")) {
+ r = normalize_names(names, true);
+ if (r < 0)
+ return r;
+ }
+
if (install_client_side()) {
+ UnitFileFlags flags;
+
+ flags = args_to_flags();
if (streq(verb, "enable")) {
- r = unit_file_enable(arg_scope, arg_runtime, arg_root, names, arg_force, &changes, &n_changes);
+ r = unit_file_enable(arg_scope, flags, arg_root, names, &changes, &n_changes);
carries_install_info = r;
} else if (streq(verb, "disable"))
- r = unit_file_disable(arg_scope, arg_runtime, arg_root, names, &changes, &n_changes);
+ r = unit_file_disable(arg_scope, flags, arg_root, names, &changes, &n_changes);
else if (streq(verb, "reenable")) {
- r = unit_file_reenable(arg_scope, arg_runtime, arg_root, names, arg_force, &changes, &n_changes);
+ r = unit_file_reenable(arg_scope, flags, arg_root, names, &changes, &n_changes);
carries_install_info = r;
} else if (streq(verb, "link"))
- r = unit_file_link(arg_scope, arg_runtime, arg_root, names, arg_force, &changes, &n_changes);
+ r = unit_file_link(arg_scope, flags, arg_root, names, &changes, &n_changes);
else if (streq(verb, "preset")) {
- r = unit_file_preset(arg_scope, arg_runtime, arg_root, names, arg_preset_mode, arg_force, &changes, &n_changes);
+ r = unit_file_preset(arg_scope, flags, arg_root, names, arg_preset_mode, &changes, &n_changes);
} else if (streq(verb, "mask"))
- r = unit_file_mask(arg_scope, arg_runtime, arg_root, names, arg_force, &changes, &n_changes);
+ r = unit_file_mask(arg_scope, flags, arg_root, names, &changes, &n_changes);
else if (streq(verb, "unmask"))
- r = unit_file_unmask(arg_scope, arg_runtime, arg_root, names, &changes, &n_changes);
+ r = unit_file_unmask(arg_scope, flags, arg_root, names, &changes, &n_changes);
else if (streq(verb, "revert"))
r = unit_file_revert(arg_scope, arg_root, names, &changes, &n_changes);
else
@@ -5939,7 +6186,7 @@ static int add_dependency(int argc, char *argv[], void *userdata) {
assert_not_reached("Unknown verb");
if (install_client_side()) {
- r = unit_file_add_dependency(arg_scope, arg_runtime, arg_root, names, target, dep, arg_force, &changes, &n_changes);
+ r = unit_file_add_dependency(arg_scope, args_to_flags(), arg_root, names, target, dep, &changes, &n_changes);
unit_file_dump_changes(r, "add dependency on", changes, n_changes, arg_quiet);
if (r > 0)
@@ -6001,7 +6248,7 @@ static int preset_all(int argc, char *argv[], void *userdata) {
int r;
if (install_client_side()) {
- r = unit_file_preset_all(arg_scope, arg_runtime, arg_root, arg_preset_mode, arg_force, &changes, &n_changes);
+ r = unit_file_preset_all(arg_scope, args_to_flags(), arg_root, arg_preset_mode, &changes, &n_changes);
unit_file_dump_changes(r, "preset", changes, n_changes, arg_quiet);
if (r > 0)
@@ -6050,6 +6297,63 @@ finish:
return r;
}
+static int show_installation_targets_client_side(const char *name) {
+ UnitFileChange *changes = NULL;
+ unsigned n_changes = 0, i;
+ UnitFileFlags flags;
+ char **p;
+ int r;
+
+ p = STRV_MAKE(name);
+ flags = UNIT_FILE_DRY_RUN |
+ (arg_runtime ? UNIT_FILE_RUNTIME : 0);
+
+ r = unit_file_disable(UNIT_FILE_SYSTEM, flags, NULL, p, &changes, &n_changes);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get file links for %s: %m", name);
+
+ for (i = 0; i < n_changes; i++)
+ if (changes[i].type == UNIT_FILE_UNLINK)
+ printf(" %s\n", changes[i].path);
+
+ return 0;
+}
+
+static int show_installation_targets(sd_bus *bus, const char *name) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *link;
+ int r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "GetUnitFileLinks",
+ &error,
+ &reply,
+ "sb", name, arg_runtime);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get unit file links for %s: %s", name, bus_error_message(&error, r));
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "s");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(reply, "s", &link)) > 0)
+ printf(" %s\n", link);
+
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ return 0;
+}
+
static int unit_is_enabled(int argc, char *argv[], void *userdata) {
_cleanup_strv_free_ char **names = NULL;
@@ -6068,7 +6372,6 @@ static int unit_is_enabled(int argc, char *argv[], void *userdata) {
enabled = r > 0;
if (install_client_side()) {
-
STRV_FOREACH(name, names) {
UnitFileState state;
@@ -6084,8 +6387,14 @@ static int unit_is_enabled(int argc, char *argv[], void *userdata) {
UNIT_FILE_GENERATED))
enabled = true;
- if (!arg_quiet)
+ if (!arg_quiet) {
puts(unit_file_state_to_string(state));
+ if (arg_full) {
+ r = show_installation_targets_client_side(*name);
+ if (r < 0)
+ return r;
+ }
+ }
}
r = 0;
@@ -6120,8 +6429,14 @@ static int unit_is_enabled(int argc, char *argv[], void *userdata) {
if (STR_IN_SET(s, "enabled", "enabled-runtime", "static", "indirect", "generated"))
enabled = true;
- if (!arg_quiet)
+ if (!arg_quiet) {
puts(s);
+ if (arg_full) {
+ r = show_installation_targets(bus, *name);
+ if (r < 0)
+ return r;
+ }
+ }
}
}
@@ -6534,9 +6849,9 @@ static void systemctl_help(void) {
" -t --type=TYPE List units of a particular type\n"
" --state=STATE List units with particular LOAD or SUB or ACTIVE state\n"
" -p --property=NAME Show only properties by this name\n"
- " -a --all Show all loaded units/properties, including dead/empty\n"
- " ones. To list all units installed on the system, use\n"
- " the 'list-unit-files' command instead.\n"
+ " -a --all Show all properties/all units currently in memory,\n"
+ " including dead/empty ones. To list all units installed on\n"
+ " the system, use the 'list-unit-files' command instead.\n"
" -l --full Don't ellipsize unit names on output\n"
" -r --recursive Show unit list of host and local containers\n"
" --reverse Show reverse dependencies with 'list-dependencies'\n"
@@ -6550,6 +6865,7 @@ static void systemctl_help(void) {
" -s --signal=SIGNAL Which signal to send\n"
" --now Start or stop unit in addition to enabling or disabling it\n"
" -q --quiet Suppress output\n"
+ " --wait For (re)start, wait until service stopped again\n"
" --no-block Do not wait until operation finished\n"
" --no-wall Don't send wall message before halt/power-off/reboot\n"
" --no-reload Don't reload daemon after en-/dis-abling unit files\n"
@@ -6564,15 +6880,17 @@ static void systemctl_help(void) {
" --preset-mode= Apply only enable, only disable, or all presets\n"
" --root=PATH Enable unit files in the specified root directory\n"
" -n --lines=INTEGER Number of journal entries to show\n"
- " -o --output=STRING Change journal output mode (short, short-iso,\n"
- " short-precise, short-monotonic, verbose,\n"
- " export, json, json-pretty, json-sse, cat)\n"
+ " -o --output=STRING Change journal output mode (short, short-precise,\n"
+ " short-iso, short-full, short-monotonic, short-unix,\n"
+ " verbose, export, json, json-pretty, json-sse, cat)\n"
" --firmware-setup Tell the firmware to show the setup menu on next boot\n"
" --plain Print unit dependencies as a list instead of a tree\n\n"
"Unit Commands:\n"
- " list-units [PATTERN...] List loaded units\n"
- " list-sockets [PATTERN...] List loaded sockets ordered by address\n"
- " list-timers [PATTERN...] List loaded timers ordered by next elapse\n"
+ " list-units [PATTERN...] List units currently in memory\n"
+ " list-sockets [PATTERN...] List socket units currently in memory, ordered\n"
+ " by address\n"
+ " list-timers [PATTERN...] List timer units currently in memory, ordered\n"
+ " by next elapse\n"
" start NAME... Start (activate) one or more units\n"
" stop NAME... Stop (deactivate) one or more units\n"
" reload NAME... Reload one or more units\n"
@@ -6820,6 +7138,7 @@ static int systemctl_parse_argv(int argc, char *argv[]) {
ARG_FIRMWARE_SETUP,
ARG_NOW,
ARG_MESSAGE,
+ ARG_WAIT,
};
static const struct option options[] = {
@@ -6843,6 +7162,7 @@ static int systemctl_parse_argv(int argc, char *argv[]) {
{ "user", no_argument, NULL, ARG_USER },
{ "system", no_argument, NULL, ARG_SYSTEM },
{ "global", no_argument, NULL, ARG_GLOBAL },
+ { "wait", no_argument, NULL, ARG_WAIT },
{ "no-block", no_argument, NULL, ARG_NO_BLOCK },
{ "no-legend", no_argument, NULL, ARG_NO_LEGEND },
{ "no-pager", no_argument, NULL, ARG_NO_PAGER },
@@ -7023,6 +7343,10 @@ static int systemctl_parse_argv(int argc, char *argv[]) {
arg_scope = UNIT_FILE_GLOBAL;
break;
+ case ARG_WAIT:
+ arg_wait = true;
+ break;
+
case ARG_NO_BLOCK:
arg_no_block = true;
break;
@@ -7198,6 +7522,11 @@ static int systemctl_parse_argv(int argc, char *argv[]) {
return -EINVAL;
}
+ if (arg_wait && arg_no_block) {
+ log_error("--wait may not be combined with --no-block.");
+ return -EINVAL;
+ }
+
return 1;
}
diff --git a/src/grp-system/systemctl/systemctl.completion.bash.in b/src/grp-system/systemctl/systemctl.completion.bash.in
index 6f2b3f122c..dcf71a1f51 100644
--- a/src/grp-system/systemctl/systemctl.completion.bash.in
+++ b/src/grp-system/systemctl/systemctl.completion.bash.in
@@ -41,7 +41,7 @@ __contains_word () {
__filter_units_by_property () {
local mode=$1 property=$2 value=$3 ; shift 3
local units=("$@")
- local props
+ local props i
IFS=$'\n' read -rd '' -a props < \
<(__systemctl $mode show --property "$property" -- "${units[@]}")
for ((i=0; $i < ${#units[*]}; i++)); do
@@ -51,6 +51,33 @@ __filter_units_by_property () {
done
}
+__filter_units_by_properties () {
+ local mode=$1 properties=$2 values=$3 ; shift 3
+ local units=("$@")
+ local props i j conditions=()
+ IFS=$'\n' read -rd '' -a props < \
+ <(__systemctl $mode show --property "$properties" -- "${units[@]}")
+ IFS=$',' read -r -a properties < <(echo $properties)
+ IFS=$',' read -r -a values < <(echo $values)
+ for ((i=0; i < ${#properties[*]}; i++)); do
+ for ((j=0; j < ${#properties[*]}; j++)); do
+ if [[ ${props[i]%%=*} == ${properties[j]} ]]; then
+ conditions+=( "${properties[j]}=${values[j]}" )
+ fi
+ done
+ done
+ for ((i=0; i < ${#units[*]}; i++)); do
+ for ((j=0; j < ${#conditions[*]}; j++)); do
+ if [[ "${props[ i * ${#conditions[*]} + j]}" != "${conditions[j]}" ]]; then
+ break
+ fi
+ done
+ if (( j == ${#conditions[*]} )); then
+ echo " ${units[i]}"
+ fi
+ done
+}
+
__get_all_units () { { __systemctl $1 list-unit-files; __systemctl $1 list-units --all; } \
| { while read -r a b; do [[ $a =~ @\. ]] || echo " $a"; done; }; }
__get_template_names () { __systemctl $1 list-unit-files \
@@ -60,12 +87,12 @@ __get_active_units () { __systemctl $1 list-units \
| { while read -r a b; do echo " $a"; done; }; }
__get_startable_units () {
# find startable inactive units
- __filter_units_by_property $mode ActiveState inactive $(
- __filter_units_by_property $mode CanStart yes $(
- __systemctl $mode list-unit-files --state enabled,disabled,static | \
- { while read -r a b; do [[ $a =~ @\. ]] || echo " $a"; done; }
- __systemctl $mode list-units --state inactive,failed | \
- { while read -r a b; do echo " $a"; done; } ))
+ __filter_units_by_properties $mode ActiveState,CanStart inactive,yes $(
+ { __systemctl $mode list-unit-files --state enabled,enabled-runtime,linked,linked-runtime,static,indirect,disabled,generated,transient | \
+ { while read -r a b; do [[ $a =~ @\. ]] || echo " $a"; done; }
+ __systemctl $mode list-units --state inactive,failed | \
+ { while read -r a b c; do [[ $b == "loaded" ]] && echo " $a"; done; }
+ } | sort -u )
}
__get_restartable_units () {
# filter out masked and not-found
@@ -145,7 +172,7 @@ _systemctl () {
comps='full enable-only disable-only'
;;
--output|-o)
- comps='short short-iso short-precise short-monotonic verbose export json
+ comps='short short-full short-iso short-precise short-monotonic short-unix verbose export json
json-pretty json-sse cat'
;;
--machine|-M)
diff --git a/src/grp-system/systemctl/systemctl.completion.zsh.in b/src/grp-system/systemctl/systemctl.completion.zsh.in
index 44c31b7833..03a1c930b0 100644
--- a/src/grp-system/systemctl/systemctl.completion.zsh.in
+++ b/src/grp-system/systemctl/systemctl.completion.zsh.in
@@ -123,15 +123,11 @@ _systemctl_really_all_units()
}
_filter_units_by_property() {
- local property=$1 value=$2 ; shift ; shift
- local -a units ; units=($*)
- local props
- for props in ${(ps:\n\n:)"$(_call_program units "$service show --no-pager --property="Id,$property" -- ${units} 2>/dev/null")"}; do
- props=(${(f)props})
- if [[ "${props[2]}" = "$property=$value" ]]; then
- echo -E - " ${props[1]#Id=}"
- fi
- done
+ local property=$1 value=$2; shift 2
+ local -a units; units=("${(q-)@}")
+ local -A props
+ props=(${(f)"$(_call_program units "$service $_sys_service_mgr show --no-pager --property=\"Id,$property\" -- ${units} 2>/dev/null")"})
+ echo -E - "${(@g:o:)${(k@)props[(Re)$property=$value]}#Id=}"
}
_systemctl_get_template_names() { echo -E - ${^${(M)${(f)"$(__systemctl list-unit-files)"}##*@.[^[:space:]]##}%%@.*}\@ }
@@ -351,8 +347,10 @@ _job_modes() {
_values -s , "${_modes[@]}"
}
+# Build arguments for "systemctl" to be used in completion.
local -a _modes; _modes=("--user" "--system")
-local _sys_service_mgr=${${words:*_modes}[(R)(${(j.|.)_modes})]:---system}
+# Use the last mode (they are exclusive and the last one is used).
+local _sys_service_mgr=${${words:*_modes}[(R)(${(j.|.)_modes})]}
_arguments -s \
{-h,--help}'[Show help]' \
'--version[Show package version]' \
diff --git a/src/grp-system/systemctl/systemctl.xml b/src/grp-system/systemctl/systemctl.xml
index e7880d24f7..dfa00e0c03 100644
--- a/src/grp-system/systemctl/systemctl.xml
+++ b/src/grp-system/systemctl/systemctl.xml
@@ -156,6 +156,10 @@
<para>To list all units installed in the file system, use the
<command>list-unit-files</command> command instead.</para>
+
+ <para>When listing units with <command>list-dependencies</command>, recursively show
+ dependencies of all dependent units (by default only dependencies of target units are
+ shown).</para>
</listitem>
</varlistentry>
@@ -229,6 +233,8 @@
of <command>status</command>, <command>list-units</command>,
<command>list-jobs</command>, and
<command>list-timers</command>.</para>
+ <para>Also, show installation targets in the output of
+ <command>is-enabled</command>.</para>
</listitem>
</varlistentry>
@@ -302,7 +308,7 @@
<para><literal>ignore-requirements</literal> is similar to
<literal>ignore-dependencies</literal>, but only causes the
requirement dependencies to be ignored, the ordering
- dependencies will still be honoured.</para>
+ dependencies will still be honored.</para>
</listitem>
</varlistentry>
@@ -359,7 +365,20 @@
to finish. If this is not specified, the job will be
verified, enqueued and <command>systemctl</command> will
wait until the unit's start-up is completed. By passing this
- argument, it is only verified and enqueued.</para>
+ argument, it is only verified and enqueued. This option may not be
+ combined with <option>--wait</option>.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><option>--wait</option></term>
+
+ <listitem>
+ <para>Synchronously wait for started units to terminate again.
+ This option may not be combined with <option>--no-block</option>.
+ Note that this will wait forever if any given unit never terminates
+ (by itself or by getting stopped explicitly); particularly services
+ which use <literal>RemainAfterExit=yes</literal>.</para>
</listitem>
</varlistentry>
@@ -527,8 +546,10 @@
<listitem>
<para>When used with
<command>enable</command>/<command>disable</command>/<command>is-enabled</command>
- (and related commands), use an alternate root path when
- looking for unit files.</para>
+ (and related commands), use the specified root path when looking for unit
+ files. If this option is present, <command>systemctl</command> will operate on
+ the file system directly, instead of communicating with the <command>systemd</command>
+ daemon to carry out changes.</para>
</listitem>
</varlistentry>
@@ -609,7 +630,7 @@
<listitem>
<para>When used with <command>list-dependencies</command>,
- <command>list-units</command> or <command>list-machines</command>, the
+ <command>list-units</command> or <command>list-machines</command>,
the output is printed as a list instead of a tree, and the bullet
circles are omitted.</para>
</listitem>
@@ -638,13 +659,13 @@
<term><command>list-units <optional><replaceable>PATTERN</replaceable>...</optional></command></term>
<listitem>
- <para>List units that <command>systemd</command> has loaded. This includes units that
- are either referenced directly or through a dependency, or units that were active in the
- past and have failed. By default only units which are active, have pending jobs, or have
- failed are shown; this can be changed with option <option>--all</option>. If one or more
- <replaceable>PATTERN</replaceable>s are specified, only units matching one of them are
- shown. The units that are shown are additionally filtered by <option>--type=</option>
- and <option>--state=</option> if those options are specified.</para>
+ <para>List units that <command>systemd</command> currently has in memory. This includes units that are
+ either referenced directly or through a dependency, units that are pinned by applications programmatically,
+ or units that were active in the past and have failed. By default only units which are active, have pending
+ jobs, or have failed are shown; this can be changed with option <option>--all</option>. If one or more
+ <replaceable>PATTERN</replaceable>s are specified, only units matching one of them are shown. The units
+ that are shown are additionally filtered by <option>--type=</option> and <option>--state=</option> if those
+ options are specified.</para>
<para>This is the default command.</para>
</listitem>
@@ -654,9 +675,8 @@
<term><command>list-sockets <optional><replaceable>PATTERN</replaceable>...</optional></command></term>
<listitem>
- <para>List socket units ordered by listening address.
- If one or more <replaceable>PATTERN</replaceable>s are
- specified, only socket units matching one of them are
+ <para>List socket units currently in memory, ordered by listening address. If one or more
+ <replaceable>PATTERN</replaceable>s are specified, only socket units matching one of them are
shown. Produces output similar to
<programlisting>
LISTEN UNIT ACTIVATES
@@ -670,8 +690,7 @@ kobject-uevent 1 systemd-udevd-kernel.socket systemd-udevd.service
is not suitable for programmatic consumption.
</para>
- <para>See also the options <option>--show-types</option>,
- <option>--all</option>, and <option>--state=</option>.</para>
+ <para>Also see <option>--show-types</option>, <option>--all</option>, and <option>--state=</option>.</para>
</listitem>
</varlistentry>
@@ -679,13 +698,11 @@ kobject-uevent 1 systemd-udevd-kernel.socket systemd-udevd.service
<term><command>list-timers <optional><replaceable>PATTERN</replaceable>...</optional></command></term>
<listitem>
- <para>List timer units ordered by the time they elapse
- next. If one or more <replaceable>PATTERN</replaceable>s
- are specified, only units matching one of them are shown.
+ <para>List timer units currently in memory, ordered by the time they elapse next. If one or more
+ <replaceable>PATTERN</replaceable>s are specified, only units matching one of them are shown.
</para>
- <para>See also the options <option>--all</option> and
- <option>--state=</option>.</para>
+ <para>Also see <option>--all</option> and <option>--state=</option>.</para>
</listitem>
</varlistentry>
@@ -696,8 +713,8 @@ kobject-uevent 1 systemd-udevd-kernel.socket systemd-udevd.service
<para>Start (activate) one or more units specified on the
command line.</para>
- <para>Note that glob patterns operate on the set of primary names of currently loaded units. Units which
- are not active and are not in a failed state usually are not loaded, and will not be matched by any
+ <para>Note that glob patterns operate on the set of primary names of units currently in memory. Units which
+ are not active and are not in a failed state usually are not in memory, and will not be matched by any
pattern. In addition, in case of instantiated units, systemd is often unaware of the instance name until
the instance has been started. Therefore, using glob patterns with <command>start</command> has limited
usefulness. Also, secondary alias names of units are not considered.</para>
@@ -860,8 +877,8 @@ kobject-uevent 1 systemd-udevd-kernel.socket systemd-udevd.service
<para>Show properties of one or more units, jobs, or the
manager itself. If no argument is specified, properties of
the manager will be shown. If a unit name is specified,
- properties of the unit is shown, and if a job ID is
- specified, properties of the job is shown. By default, empty
+ properties of the unit are shown, and if a job ID is
+ specified, properties of the job are shown. By default, empty
properties are suppressed. Use <option>--all</option> to
show those too. To select specific properties to show, use
<option>--property=</option>. This command is intended to be
@@ -877,7 +894,11 @@ kobject-uevent 1 systemd-udevd-kernel.socket systemd-udevd.service
<para>Show backing files of one or more units. Prints the
"fragment" and "drop-ins" (source files) of units. Each
file is preceded by a comment which includes the file
- name.</para>
+ name. Note that this shows the contents of the backing files
+ on disk, which may not match the system manager's
+ understanding of these units if any unit files were
+ updated on disk and the <command>daemon-reload</command>
+ command wasn't issued since.</para>
</listitem>
</varlistentry>
<varlistentry>
@@ -993,7 +1014,7 @@ kobject-uevent 1 systemd-udevd-kernel.socket systemd-udevd.service
desired, combine this command with the <option>--now</option> switch, or invoke <command>start</command>
with appropriate arguments later. Note that in case of unit instance enablement (i.e. enablement of units of
the form <filename>foo@bar.service</filename>), symlinks named the same as instances are created in the
- unit configuration diectory, however they point to the single template unit file they are instantiated
+ unit configuration directory, however they point to the single template unit file they are instantiated
from.</para>
<para>This command expects either valid unit names (in which case various unit file directories are
@@ -1069,8 +1090,8 @@ kobject-uevent 1 systemd-udevd-kernel.socket systemd-udevd.service
<listitem>
<para>Reenable one or more units, as specified on the command line. This is a combination of
<command>disable</command> and <command>enable</command> and is useful to reset the symlinks a unit file is
- enabled with to the defaults configured in its <literal>[Install]</literal> section. This commands expects
- a unit uname only, it does not accept paths to unit files.</para>
+ enabled with to the defaults configured in its <literal>[Install]</literal> section. This command expects
+ a unit name only, it does not accept paths to unit files.</para>
</listitem>
</varlistentry>
@@ -1088,7 +1109,8 @@ kobject-uevent 1 systemd-udevd-kernel.socket systemd-udevd.service
enabled and disabled, or only enabled, or only disabled.</para>
<para>If the unit carries no install information, it will be silently ignored
- by this command.</para>
+ by this command. <replaceable>NAME</replaceable> must be the real unit name,
+ any alias names are ignored silently.</para>
<para>For more information on the preset policy format, see
<citerefentry><refentrytitle>systemd.preset</refentrytitle><manvolnum>5</manvolnum></citerefentry>.
@@ -1120,6 +1142,7 @@ kobject-uevent 1 systemd-udevd-kernel.socket systemd-udevd.service
exit code of 0 if at least one is enabled, non-zero
otherwise. Prints the current enable status (see table).
To suppress this output, use <option>--quiet</option>.
+ To show installation targets, use <option>--full</option>.
</para>
<table>
@@ -1242,7 +1265,7 @@ kobject-uevent 1 systemd-udevd-kernel.socket systemd-udevd.service
<literal>foo.service.d/</literal> with all their contained files are removed, both below the persistent and
runtime configuration directories (i.e. below <filename>/etc/systemd/system</filename> and
<filename>/run/systemd/system</filename>); if the unit file has a vendor-supplied version (i.e. a unit file
- located below <filename>/usr</filename>) any matching peristent or runtime unit file that overrides it is
+ located below <filename>/usr</filename>) any matching persistent or runtime unit file that overrides it is
removed, too. Note that if a unit file has no vendor-supplied version (i.e. is only defined below
<filename>/etc/systemd/system</filename> or <filename>/run/systemd/system</filename>, but not in a unit
file stored below <filename>/usr</filename>), then it is not removed. Also, if a unit is masked, it is
@@ -1676,20 +1699,15 @@ kobject-uevent 1 systemd-udevd-kernel.socket systemd-udevd.service
<term><command>switch-root <replaceable>ROOT</replaceable> <optional><replaceable>INIT</replaceable></optional></command></term>
<listitem>
- <para>Switches to a different root directory and executes a
- new system manager process below it. This is intended for
- usage in initial RAM disks ("initrd"), and will transition
- from the initrd's system manager process (a.k.a. "init"
- process) to the main system manager process. This call takes two
- arguments: the directory that is to become the new root directory, and
- the path to the new system manager binary below it to
- execute as PID 1. If the latter is omitted or the empty
- string, a systemd binary will automatically be searched for
- and used as init. If the system manager path is omitted or
- equal to the empty string, the state of the initrd's system
- manager process is passed to the main system manager, which
- allows later introspection of the state of the services
- involved in the initrd boot.</para>
+ <para>Switches to a different root directory and executes a new system manager process below it. This is
+ intended for usage in initial RAM disks ("initrd"), and will transition from the initrd's system manager
+ process (a.k.a. "init" process) to the main system manager process which is loaded from the actual host
+ volume. This call takes two arguments: the directory that is to become the new root directory, and the path
+ to the new system manager binary below it to execute as PID 1. If the latter is omitted or the empty
+ string, a systemd binary will automatically be searched for and used as init. If the system manager path is
+ omitted, equal to the empty string or identical to the path to the systemd binary, the state of the
+ initrd's system manager process is passed to the main system manager, which allows later introspection of
+ the state of the services involved in the initrd boot phase.</para>
</listitem>
</varlistentry>
@@ -1746,7 +1764,7 @@ kobject-uevent 1 systemd-udevd-kernel.socket systemd-udevd.service
are equivalent to:
<programlisting># systemctl status dev-sda.device
# systemctl status home.mount</programlisting>
- In the second case, shell-style globs will be matched against the primary names of all currently loaded units;
+ In the second case, shell-style globs will be matched against the primary names of all units currently in memory;
literal unit names, with or without a suffix, will be treated as in the first case. This means that literal unit
names always refer to exactly one unit, but globs may match zero units and this is not considered an
error.</para>
@@ -1758,11 +1776,11 @@ kobject-uevent 1 systemd-udevd-kernel.socket systemd-udevd.service
<literal>[]</literal> may be used. See
<citerefentry project='man-pages'><refentrytitle>glob</refentrytitle><manvolnum>7</manvolnum></citerefentry>
for more details. The patterns are matched against the primary names of
- currently loaded units, and patterns which do not match anything
+ units currently in memory, and patterns which do not match anything
are silently skipped. For example:
<programlisting># systemctl stop sshd@*.service</programlisting>
will stop all <filename>sshd@.service</filename> instances. Note that alias names of units, and units that aren't
- loaded are not considered for glob expansion.
+ in memory are not considered for glob expansion.
</para>
<para>For unit file commands, the specified <replaceable>NAME</replaceable> should be the name of the unit file
@@ -1804,6 +1822,7 @@ kobject-uevent 1 systemd-udevd-kernel.socket systemd-udevd.service
</variablelist>
<xi:include href="less-variables.xml" xpointer="pager"/>
<xi:include href="less-variables.xml" xpointer="less"/>
+ <xi:include href="less-variables.xml" xpointer="lesscharset"/>
</refsect1>
<refsect1>
diff --git a/src/grp-system/systemctl/systemd.preset.xml b/src/grp-system/systemctl/systemd.preset.xml
index b7164014f0..d09167baaf 100644
--- a/src/grp-system/systemctl/systemd.preset.xml
+++ b/src/grp-system/systemctl/systemd.preset.xml
@@ -98,6 +98,10 @@
Empty lines and lines whose first non-whitespace character is # or
; are ignored.</para>
+ <para>Presets must refer to the "real" unit file, and not to any aliases. See
+ <citerefentry><refentrytitle>systemd.unit</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+ for a description of unit aliasing.</para>
+
<para>Two different directives are understood:
<literal>enable</literal> may be used to enable units by default,
<literal>disable</literal> to disable units by default.</para>
diff --git a/src/grp-system/systemd-shutdown/umount.c b/src/grp-system/systemd-shutdown/umount.c
index 8d280c8c8c..1947b58c99 100644
--- a/src/grp-system/systemd-shutdown/umount.c
+++ b/src/grp-system/systemd-shutdown/umount.c
@@ -377,7 +377,7 @@ static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_e
/* If we are in a container, don't attempt to
read-only mount anything as that brings no real
benefits, but might confuse the host, as we remount
- the superblock here, not the bind mound. */
+ the superblock here, not the bind mount. */
if (detect_container() <= 0) {
_cleanup_free_ char *options = NULL;
/* MS_REMOUNT requires that the data parameter
diff --git a/src/grp-system/systemd/main.c b/src/grp-system/systemd/main.c
index 1c53f9aad3..be97cef6a1 100644
--- a/src/grp-system/systemd/main.c
+++ b/src/grp-system/systemd/main.c
@@ -48,8 +48,6 @@
#include "core/machine-id-setup.h"
#include "core/manager.h"
#include "core/mount-setup.h"
-#include "core/selinux-setup.h"
-#include "core/smack-setup.h"
#include "sd-bus/bus-error.h"
#include "sd-bus/bus-util.h"
#include "systemd-basic/alloc-util.h"
@@ -71,6 +69,15 @@
#include "systemd-basic/process-util.h"
#include "systemd-basic/raw-clone.h"
#include "systemd-basic/rlimit-util.h"
+#include "systemd-shared/conf-parser.h"
+#include "systemd-shared/fdset.h"
+#include "systemd-shared/pager.h"
+#ifdef HAVE_SECCOMP
+#include "systemd-shared/seccomp-util.h"
+#endif
+#include "core/emergency-action.h"
+#include "core/selinux-setup.h"
+#include "core/smack-setup.h"
#include "systemd-basic/selinux-util.h"
#include "systemd-basic/signal-util.h"
#include "systemd-basic/special.h"
@@ -81,9 +88,6 @@
#include "systemd-basic/umask-util.h"
#include "systemd-basic/user-util.h"
#include "systemd-basic/virt.h"
-#include "systemd-shared/conf-parser.h"
-#include "systemd-shared/fdset.h"
-#include "systemd-shared/pager.h"
#include "systemd-shared/switch-root.h"
#include "systemd-shared/watchdog.h"
@@ -92,8 +96,7 @@ static enum {
ACTION_HELP,
ACTION_VERSION,
ACTION_TEST,
- ACTION_DUMP_CONFIGURATION_ITEMS,
- ACTION_DONE
+ ACTION_DUMP_CONFIGURATION_ITEMS
} arg_action = ACTION_RUN;
static char *arg_default_unit = NULL;
static bool arg_system = false;
@@ -129,6 +132,7 @@ static bool arg_default_memory_accounting = false;
static bool arg_default_tasks_accounting = true;
static uint64_t arg_default_tasks_max = UINT64_MAX;
static sd_id128_t arg_machine_id = {};
+static EmergencyAction arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
noreturn static void freeze_or_reboot(void) {
@@ -200,7 +204,7 @@ noreturn static void crash(int sig) {
pid, sigchld_code_to_string(status.si_code),
status.si_status,
strna(status.si_code == CLD_EXITED
- ? exit_status_to_string(status.si_status, EXIT_STATUS_FULL)
+ ? exit_status_to_string(status.si_status, EXIT_STATUS_MINIMAL)
: signal_to_string(status.si_status)));
else
log_emergency("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid);
@@ -304,7 +308,7 @@ static int set_machine_id(const char *m) {
return 0;
}
-static int parse_proc_cmdline_item(const char *key, const char *value) {
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
int r;
@@ -700,6 +704,7 @@ static int parse_config_file(void) {
{ "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
{ "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting },
{ "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max },
+ { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, 0, &arg_cad_burst_action },
{}
};
@@ -713,7 +718,7 @@ static int parse_config_file(void) {
CONF_PATHS_NULSTR("systemd/system.conf.d") :
CONF_PATHS_NULSTR("systemd/user.conf.d");
- config_parse_many(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, false, NULL);
+ config_parse_many_nulstr(fn, conf_dirs_nulstr, "Manager\0", config_item_table_lookup, items, false, NULL);
/* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we used USEC_INFINITY
* like everywhere else. */
@@ -994,10 +999,8 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_MACHINE_ID:
r = set_machine_id(optarg);
- if (r < 0) {
- log_error("MachineID '%s' is not valid.", optarg);
- return r;
- }
+ if (r < 0)
+ return log_error_errno(r, "MachineID '%s' is not valid.", optarg);
break;
case 'h':
@@ -1120,7 +1123,7 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
* later when transitioning from the initrd to the main
* systemd or suchlike. */
if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0)
- return log_error_errno(errno, "Reading RLIMIT_NOFILE failed: %m");
+ return log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
/* Make sure forked processes get the default kernel setting */
if (!arg_default_rlimit[RLIMIT_NOFILE]) {
@@ -1137,7 +1140,7 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
nl.rlim_cur = nl.rlim_max = 64*1024;
r = setrlimit_closest(RLIMIT_NOFILE, &nl);
if (r < 0)
- return log_error_errno(r, "Setting RLIMIT_NOFILE failed: %m");
+ return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
return 0;
}
@@ -1187,6 +1190,9 @@ static int enforce_syscall_archs(Set *archs) {
void *id;
int r;
+ if (!is_seccomp_available())
+ return 0;
+
seccomp = seccomp_init(SCMP_ACT_ALLOW);
if (!seccomp)
return log_oom();
@@ -1319,7 +1325,7 @@ static int fixup_environment(void) {
return r;
if (r == 0) {
- term = strdup(default_term_for_tty("/dev/console") + 5);
+ term = strdup(default_term_for_tty("/dev/console"));
if (!term)
return -ENOMEM;
}
@@ -1415,12 +1421,12 @@ int main(int argc, char *argv[]) {
if (mac_selinux_setup(&loaded_policy) < 0) {
error_message = "Failed to load SELinux policy";
goto finish;
- } else if (ima_setup() < 0) {
- error_message = "Failed to load IMA policy";
- goto finish;
} else if (mac_smack_setup(&loaded_policy) < 0) {
error_message = "Failed to load SMACK policy";
goto finish;
+ } else if (ima_setup() < 0) {
+ error_message = "Failed to load IMA policy";
+ goto finish;
}
dual_timestamp_get(&security_finish_timestamp);
}
@@ -1506,7 +1512,8 @@ int main(int argc, char *argv[]) {
if (getpid() == 1) {
/* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour the limit)
* will process core dumps for system services by default. */
- (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY));
+ if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
+ log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
/* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored
* until the systemd-coredump tool is enabled via sysctl. */
@@ -1524,15 +1531,9 @@ int main(int argc, char *argv[]) {
* need to do that for user instances since they never log
* into the console. */
log_show_color(colors_enabled());
- make_null_stdio();
- }
-
- /* Initialize default unit */
- r = free_and_strdup(&arg_default_unit, SPECIAL_DEFAULT_TARGET);
- if (r < 0) {
- log_emergency_errno(r, "Failed to set default unit %s: %m", SPECIAL_DEFAULT_TARGET);
- error_message = "Failed to set default unit";
- goto finish;
+ r = make_null_stdio();
+ if (r < 0)
+ log_warning_errno(r, "Failed to redirect standard streams to /dev/null: %m");
}
r = initialize_join_controllers();
@@ -1560,7 +1561,7 @@ int main(int argc, char *argv[]) {
(void) reset_all_signal_handlers();
(void) ignore_signals(SIGNALS_IGNORE, -1);
- arg_default_tasks_max = system_tasks_max_scale(15U, 100U); /* 15% the system PIDs equals 4915 by default. */
+ arg_default_tasks_max = system_tasks_max_scale(DEFAULT_TASKS_MAX_PERCENTAGE, 100U);
if (parse_config_file() < 0) {
error_message = "Failed to parse config file";
@@ -1568,7 +1569,7 @@ int main(int argc, char *argv[]) {
}
if (arg_system) {
- r = parse_proc_cmdline(parse_proc_cmdline_item);
+ r = parse_proc_cmdline(parse_proc_cmdline_item, NULL, false);
if (r < 0)
log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
}
@@ -1582,6 +1583,16 @@ int main(int argc, char *argv[]) {
goto finish;
}
+ /* Initialize default unit */
+ if (!arg_default_unit) {
+ arg_default_unit = strdup(SPECIAL_DEFAULT_TARGET);
+ if (!arg_default_unit) {
+ r = log_oom();
+ error_message = "Failed to set default unit";
+ goto finish;
+ }
+ }
+
if (arg_action == ACTION_TEST &&
geteuid() == 0) {
log_error("Don't run test mode as root.");
@@ -1602,11 +1613,10 @@ int main(int argc, char *argv[]) {
goto finish;
}
- if (arg_action == ACTION_TEST)
- skip_setup = true;
-
- if (arg_action == ACTION_TEST || arg_action == ACTION_HELP)
+ if (arg_action == ACTION_TEST || arg_action == ACTION_HELP) {
pager_open(arg_no_pager, false);
+ skip_setup = true;
+ }
if (arg_action == ACTION_HELP) {
retval = help();
@@ -1615,12 +1625,10 @@ int main(int argc, char *argv[]) {
retval = version();
goto finish;
} else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
+ pager_open(arg_no_pager, false);
unit_dump_config_items(stdout);
retval = EXIT_SUCCESS;
goto finish;
- } else if (arg_action == ACTION_DONE) {
- retval = EXIT_SUCCESS;
- goto finish;
}
if (!arg_system &&
@@ -1766,10 +1774,10 @@ int main(int argc, char *argv[]) {
log_warning_errno(errno, "Failed to make us a subreaper: %m");
if (arg_system) {
- bump_rlimit_nofile(&saved_rlimit_nofile);
+ (void) bump_rlimit_nofile(&saved_rlimit_nofile);
if (empty_etc) {
- r = unit_file_preset_all(UNIT_FILE_SYSTEM, false, NULL, UNIT_FILE_PRESET_ENABLE_ONLY, false, NULL, 0);
+ r = unit_file_preset_all(UNIT_FILE_SYSTEM, 0, NULL, UNIT_FILE_PRESET_ENABLE_ONLY, NULL, 0);
if (r < 0)
log_full_errno(r == -EEXIST ? LOG_NOTICE : LOG_WARNING, r, "Failed to populate /etc with preset unit settings, ignoring: %m");
else
@@ -1792,6 +1800,7 @@ int main(int argc, char *argv[]) {
m->initrd_timestamp = initrd_timestamp;
m->security_start_timestamp = security_start_timestamp;
m->security_finish_timestamp = security_finish_timestamp;
+ m->cad_burst_action = arg_cad_burst_action;
manager_set_defaults(m);
manager_set_show_status(m, arg_show_status);
diff --git a/src/grp-system/systemd/org.freedesktop.systemd1.conf b/src/grp-system/systemd/org.freedesktop.systemd1.conf
index 3c64f20872..a61677e645 100644
--- a/src/grp-system/systemd/org.freedesktop.systemd1.conf
+++ b/src/grp-system/systemd/org.freedesktop.systemd1.conf
@@ -54,6 +54,10 @@
<allow send_destination="org.freedesktop.systemd1"
send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetUnitByInvocationID"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
send_member="LoadUnit"/>
<allow send_destination="org.freedesktop.systemd1"
@@ -90,6 +94,10 @@
<allow send_destination="org.freedesktop.systemd1"
send_interface="org.freedesktop.systemd1.Manager"
+ send_member="GetUnitFileLinks"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
send_member="ListJobs"/>
<allow send_destination="org.freedesktop.systemd1"
@@ -108,6 +116,14 @@
send_interface="org.freedesktop.systemd1.Manager"
send_member="GetDefaultTarget"/>
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="LookupDynamicUserByName"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="LookupDynamicUserByUID"/>
+
<!-- Managed via polkit or other criteria -->
<allow send_destination="org.freedesktop.systemd1"
@@ -176,6 +192,14 @@
<allow send_destination="org.freedesktop.systemd1"
send_interface="org.freedesktop.systemd1.Manager"
+ send_member="RefUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
+ send_member="UnrefUnit"/>
+
+ <allow send_destination="org.freedesktop.systemd1"
+ send_interface="org.freedesktop.systemd1.Manager"
send_member="EnableUnitFiles"/>
<allow send_destination="org.freedesktop.systemd1"
diff --git a/src/grp-system/systemd/system.conf b/src/grp-system/systemd/system.conf
index c6bb050aac..746572b7ff 100644
--- a/src/grp-system/systemd/system.conf
+++ b/src/grp-system/systemd/system.conf
@@ -21,6 +21,7 @@
#CrashChangeVT=no
#CrashShell=no
#CrashReboot=no
+#CtrlAltDelBurstAction=reboot-force
#CPUAffinity=1 2
#JoinControllers=cpu,cpuacct net_cls,net_prio
#RuntimeWatchdogSec=0
diff --git a/src/grp-system/systemd/systemd-system.conf.xml b/src/grp-system/systemd/systemd-system.conf.xml
index 1bb40fd234..e4e81f7f2e 100644
--- a/src/grp-system/systemd/systemd-system.conf.xml
+++ b/src/grp-system/systemd/systemd-system.conf.xml
@@ -106,6 +106,18 @@
</varlistentry>
<varlistentry>
+ <term><varname>CtrlAltDelBurstAction=</varname></term>
+
+ <listitem><para>Defines what action will be performed
+ if user presses Ctrl-Alt-Delete more than 7 times in 2s.
+ Can be set to <literal>reboot-force</literal>, <literal>poweroff-force</literal>,
+ <literal>reboot-immediate</literal>, <literal>poweroff-immediate</literal>
+ or disabled with <literal>none</literal>. Defaults to
+ <literal>reboot-force</literal>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>CPUAffinity=</varname></term>
<listitem><para>Configures the initial CPU affinity for the
@@ -318,7 +330,7 @@
<varname>TasksAccounting=</varname>. See
<citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
for details on the per-unit
- settings. <varname>DefaulTasksAccounting=</varname> defaults
+ settings. <varname>DefaultTasksAccounting=</varname> defaults
to on, the other three settings to off.</para></listitem>
</varlistentry>
diff --git a/src/grp-system/systemd/systemd.exec.xml b/src/grp-system/systemd/systemd.exec.xml
index 41ae6e76de..3c350df11f 100644
--- a/src/grp-system/systemd/systemd.exec.xml
+++ b/src/grp-system/systemd/systemd.exec.xml
@@ -74,6 +74,10 @@
execution specific configuration options are configured in the
[Service], [Socket], [Mount], or [Swap] sections, depending on the
unit type.</para>
+
+ <para>In addition, options which control resources through Linux Control Groups (cgroups) are listed in
+ <citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>.
+ Those options complement options listed here.</para>
</refsect1>
<refsect1>
@@ -107,46 +111,76 @@
<varlistentry>
<term><varname>WorkingDirectory=</varname></term>
- <listitem><para>Takes a directory path relative to the service's root
- directory specified by <varname>RootDirectory=</varname>, or the
- special value <literal>~</literal>. Sets the working directory
- for executed processes. If set to <literal>~</literal>, the
- home directory of the user specified in
- <varname>User=</varname> is used. If not set, defaults to the
- root directory when systemd is running as a system instance
- and the respective user's home directory if run as user. If
- the setting is prefixed with the <literal>-</literal>
- character, a missing working directory is not considered
- fatal. If <varname>RootDirectory=</varname> is not set, then
- <varname>WorkingDirectory=</varname> is relative to the root of
- the system running the service manager.
- Note that setting this parameter might result in
- additional dependencies to be added to the unit (see
- above).</para></listitem>
+ <listitem><para>Takes a directory path relative to the service's root directory specified by
+ <varname>RootDirectory=</varname>, or the special value <literal>~</literal>. Sets the working directory for
+ executed processes. If set to <literal>~</literal>, the home directory of the user specified in
+ <varname>User=</varname> is used. If not set, defaults to the root directory when systemd is running as a
+ system instance and the respective user's home directory if run as user. If the setting is prefixed with the
+ <literal>-</literal> character, a missing working directory is not considered fatal. If
+ <varname>RootDirectory=</varname> is not set, then <varname>WorkingDirectory=</varname> is relative to the root
+ of the system running the service manager. Note that setting this parameter might result in additional
+ dependencies to be added to the unit (see above).</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>RootDirectory=</varname></term>
- <listitem><para>Takes a directory path relative to the host's root directory
- (i.e. the root of the system running the service manager). Sets the
- root directory for executed processes, with the <citerefentry
- project='man-pages'><refentrytitle>chroot</refentrytitle><manvolnum>2</manvolnum></citerefentry>
- system call. If this is used, it must be ensured that the
- process binary and all its auxiliary files are available in
- the <function>chroot()</function> jail. Note that setting this
- parameter might result in additional dependencies to be added
- to the unit (see above).</para></listitem>
+ <listitem><para>Takes a directory path relative to the host's root directory (i.e. the root of the system
+ running the service manager). Sets the root directory for executed processes, with the <citerefentry
+ project='man-pages'><refentrytitle>chroot</refentrytitle><manvolnum>2</manvolnum></citerefentry> system
+ call. If this is used, it must be ensured that the process binary and all its auxiliary files are available in
+ the <function>chroot()</function> jail. Note that setting this parameter might result in additional
+ dependencies to be added to the unit (see above).</para>
+
+ <para>The <varname>PrivateUsers=</varname> setting is particularly useful in conjunction with
+ <varname>RootDirectory=</varname>. For details, see below.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>User=</varname></term>
<term><varname>Group=</varname></term>
- <listitem><para>Sets the Unix user or group that the processes
- are executed as, respectively. Takes a single user or group
- name or ID as argument. If no group is set, the default group
- of the user is chosen. These do not affect commands prefixed with <literal>+</literal>.</para></listitem>
+ <listitem><para>Set the UNIX user or group that the processes are executed as, respectively. Takes a single
+ user or group name, or numeric ID as argument. For system services (services run by the system service manager,
+ i.e. managed by PID 1) and for user services of the root user (services managed by root's instance of
+ <command>systemd --user</command>), the default is <literal>root</literal>, but <varname>User=</varname> may be
+ used to specify a different user. For user services of any other user, switching user identity is not
+ permitted, hence the only valid setting is the same user the user's service manager is running as. If no group
+ is set, the default group of the user is used. This setting does not affect commands whose command line is
+ prefixed with <literal>+</literal>.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>DynamicUser=</varname></term>
+
+ <listitem><para>Takes a boolean parameter. If set, a UNIX user and group pair is allocated dynamically when the
+ unit is started, and released as soon as it is stopped. The user and group will not be added to
+ <filename>/etc/passwd</filename> or <filename>/etc/group</filename>, but are managed transiently during
+ runtime. The <citerefentry><refentrytitle>nss-systemd</refentrytitle><manvolnum>8</manvolnum></citerefentry>
+ glibc NSS module provides integration of these dynamic users/groups into the system's user and group
+ databases. The user and group name to use may be configured via <varname>User=</varname> and
+ <varname>Group=</varname> (see above). If these options are not used and dynamic user/group allocation is
+ enabled for a unit, the name of the dynamic user/group is implicitly derived from the unit name. If the unit
+ name without the type suffix qualifies as valid user name it is used directly, otherwise a name incorporating a
+ hash of it is used. If a statically allocated user or group of the configured name already exists, it is used
+ and no dynamic user/group is allocated. Dynamic users/groups are allocated from the UID/GID range
+ 61184…65519. It is recommended to avoid this range for regular system or login users. At any point in time
+ each UID/GID from this range is only assigned to zero or one dynamically allocated users/groups in
+ use. However, UID/GIDs are recycled after a unit is terminated. Care should be taken that any processes running
+ as part of a unit for which dynamic users/groups are enabled do not leave files or directories owned by these
+ users/groups around, as a different unit might get the same UID/GID assigned later on, and thus gain access to
+ these files or directories. If <varname>DynamicUser=</varname> is enabled, <varname>RemoveIPC=</varname>,
+ <varname>PrivateTmp=</varname> are implied. This ensures that the lifetime of IPC objects and temporary files
+ created by the executed processes is bound to the runtime of the service, and hence the lifetime of the dynamic
+ user/group. Since <filename>/tmp</filename> and <filename>/var/tmp</filename> are usually the only
+ world-writable directories on a system this ensures that a unit making use of dynamic user/group allocation
+ cannot leave files around after unit termination. Moreover <varname>ProtectSystem=strict</varname> and
+ <varname>ProtectHome=read-only</varname> are implied, thus prohibiting the service to write to arbitrary file
+ system locations. In order to allow the service to write to certain directories, they have to be whitelisted
+ using <varname>ReadWritePaths=</varname>, but care must be taken so that UID/GID recycling doesn't
+ create security issues involving files created by the service. Use <varname>RuntimeDirectory=</varname> (see
+ below) in order to assign a writable runtime directory to a service, owned by the dynamic user/group and
+ removed automatically when the unit is terminated. Defaults to off.</para></listitem>
</varlistentry>
<varlistentry>
@@ -165,6 +199,18 @@
</varlistentry>
<varlistentry>
+ <term><varname>RemoveIPC=</varname></term>
+
+ <listitem><para>Takes a boolean parameter. If set, all System V and POSIX IPC objects owned by the user and
+ group the processes of this unit are run as are removed when the unit is stopped. This setting only has an
+ effect if at least one of <varname>User=</varname>, <varname>Group=</varname> and
+ <varname>DynamicUser=</varname> are used. It has no effect on IPC objects owned by the root user. Specifically,
+ this removes System V semaphores, as well as System V and POSIX shared memory segments and message queues. If
+ multiple units use the same user or group the IPC objects are removed when the last of these units is
+ stopped. This setting is implied if <varname>DynamicUser=</varname> is set.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>Nice=</varname></term>
<listitem><para>Sets the default nice level (scheduling
@@ -368,8 +414,9 @@
<option>null</option>,
<option>tty</option>,
<option>tty-force</option>,
- <option>tty-fail</option> or
- <option>socket</option>.</para>
+ <option>tty-fail</option>,
+ <option>socket</option> or
+ <option>fd</option>.</para>
<para>If <option>null</option> is selected, standard input
will be connected to <filename>/dev/null</filename>, i.e. all
@@ -407,6 +454,20 @@
<citerefentry project='freebsd'><refentrytitle>inetd</refentrytitle><manvolnum>8</manvolnum></citerefentry>
daemon.</para>
+ <para>The <option>fd</option> option connects
+ the input stream to a single file descriptor provided by a socket unit.
+ A custom named file descriptor can be specified as part of this option,
+ after a <literal>:</literal> (e.g. <literal>fd:<replaceable>foobar</replaceable></literal>).
+ If no name is specified, <literal>stdin</literal> is assumed
+ (i.e. <literal>fd</literal> is equivalent to <literal>fd:stdin</literal>).
+ At least one socket unit defining such name must be explicitly provided via the
+ <varname>Sockets=</varname> option, and file descriptor name may differ
+ from the name of its containing socket unit.
+ If multiple matches are found, the first one will be used.
+ See <varname>FileDescriptorName=</varname> in
+ <citerefentry><refentrytitle>systemd.socket</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+ for more details about named descriptors and ordering.</para>
+
<para>This setting defaults to
<option>null</option>.</para></listitem>
</varlistentry>
@@ -423,8 +484,9 @@
<option>kmsg</option>,
<option>journal+console</option>,
<option>syslog+console</option>,
- <option>kmsg+console</option> or
- <option>socket</option>.</para>
+ <option>kmsg+console</option>,
+ <option>socket</option> or
+ <option>fd</option>.</para>
<para><option>inherit</option> duplicates the file descriptor
of standard input for standard output.</para>
@@ -473,6 +535,20 @@
similar to the same option of
<varname>StandardInput=</varname>.</para>
+ <para>The <option>fd</option> option connects
+ the output stream to a single file descriptor provided by a socket unit.
+ A custom named file descriptor can be specified as part of this option,
+ after a <literal>:</literal> (e.g. <literal>fd:<replaceable>foobar</replaceable></literal>).
+ If no name is specified, <literal>stdout</literal> is assumed
+ (i.e. <literal>fd</literal> is equivalent to <literal>fd:stdout</literal>).
+ At least one socket unit defining such name must be explicitly provided via the
+ <varname>Sockets=</varname> option, and file descriptor name may differ
+ from the name of its containing socket unit.
+ If multiple matches are found, the first one will be used.
+ See <varname>FileDescriptorName=</varname> in
+ <citerefentry><refentrytitle>systemd.socket</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+ for more details about named descriptors and ordering.</para>
+
<para>If the standard output (or error output, see below) of a unit is connected to the journal, syslog or the
kernel log buffer, the unit will implicitly gain a dependency of type <varname>After=</varname> on
<filename>systemd-journald.socket</filename> (also see the automatic dependencies section above).</para>
@@ -490,9 +566,13 @@
<listitem><para>Controls where file descriptor 2 (STDERR) of
the executed processes is connected to. The available options
are identical to those of <varname>StandardOutput=</varname>,
- with one exception: if set to <option>inherit</option> the
+ with some exceptions: if set to <option>inherit</option> the
file descriptor used for standard output is duplicated for
- standard error. This setting defaults to the value set with
+ standard error, while <option>fd</option> operates on the error
+ stream and will look by default for a descriptor named
+ <literal>stderr</literal>.</para>
+
+ <para>This setting defaults to the value set with
<option>DefaultStandardError=</option> in
<citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>,
which defaults to <option>inherit</option>. Note that setting
@@ -666,8 +746,19 @@
<varname>MemoryLimit=</varname> is a more powerful (and
working) replacement for <varname>LimitRSS=</varname>.</para>
+ <para>For system units these resource limits may be chosen freely. For user units however (i.e. units run by a
+ per-user instance of
+ <citerefentry><refentrytitle>systemd</refentrytitle><manvolnum>1</manvolnum></citerefentry>), these limits are
+ bound by (possibly more restrictive) per-user limits enforced by the OS.</para>
+
+ <para>Resource limits not configured explicitly for a unit default to the value configured in the various
+ <varname>DefaultLimitCPU=</varname>, <varname>DefaultLimitFSIZE=</varname>, … options available in
+ <citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>, and –
+ if not configured there – the kernel or per-user defaults, as defined by the OS (the latter only for user
+ services, see above).</para>
+
<table>
- <title>Limit directives and their equivalent with ulimit</title>
+ <title>Resource limit directives, their equivalent <command>ulimit</command> shell commands and the unit used</title>
<tgroup cols='3'>
<colspec colname='directive' />
@@ -676,7 +767,7 @@
<thead>
<row>
<entry>Directive</entry>
- <entry>ulimit equivalent</entry>
+ <entry><command>ulimit</command> equivalent</entry>
<entry>Unit</entry>
</row>
</thead>
@@ -784,49 +875,37 @@
<listitem><para>Controls which capabilities to include in the capability bounding set for the executed
process. See <citerefentry
project='man-pages'><refentrytitle>capabilities</refentrytitle><manvolnum>7</manvolnum></citerefentry> for
- details. Takes a whitespace-separated list of capability names as read by <citerefentry
- project='mankier'><refentrytitle>cap_from_name</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
- e.g. <constant>CAP_SYS_ADMIN</constant>, <constant>CAP_DAC_OVERRIDE</constant>,
- <constant>CAP_SYS_PTRACE</constant>. Capabilities listed will be included in the bounding set, all others are
- removed. If the list of capabilities is prefixed with <literal>~</literal>, all but the listed capabilities
- will be included, the effect of the assignment inverted. Note that this option also affects the respective
- capabilities in the effective, permitted and inheritable capability sets. If this option is not used, the
- capability bounding set is not modified on process execution, hence no limits on the capabilities of the
- process are enforced. This option may appear more than once, in which case the bounding sets are merged. If the
- empty string is assigned to this option, the bounding set is reset to the empty capability set, and all prior
- settings have no effect. If set to <literal>~</literal> (without any further argument), the bounding set is
- reset to the full set of available capabilities, also undoing any previous settings. This does not affect
- commands prefixed with <literal>+</literal>.</para></listitem>
+ details. Takes a whitespace-separated list of capability names, e.g. <constant>CAP_SYS_ADMIN</constant>,
+ <constant>CAP_DAC_OVERRIDE</constant>, <constant>CAP_SYS_PTRACE</constant>. Capabilities listed will be
+ included in the bounding set, all others are removed. If the list of capabilities is prefixed with
+ <literal>~</literal>, all but the listed capabilities will be included, the effect of the assignment
+ inverted. Note that this option also affects the respective capabilities in the effective, permitted and
+ inheritable capability sets. If this option is not used, the capability bounding set is not modified on process
+ execution, hence no limits on the capabilities of the process are enforced. This option may appear more than
+ once, in which case the bounding sets are merged. If the empty string is assigned to this option, the bounding
+ set is reset to the empty capability set, and all prior settings have no effect. If set to
+ <literal>~</literal> (without any further argument), the bounding set is reset to the full set of available
+ capabilities, also undoing any previous settings. This does not affect commands prefixed with
+ <literal>+</literal>.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>AmbientCapabilities=</varname></term>
- <listitem><para>Controls which capabilities to include in the
- ambient capability set for the executed process. Takes a
- whitespace-separated list of capability names as read by
- <citerefentry project='mankier'><refentrytitle>cap_from_name</refentrytitle><manvolnum>3</manvolnum></citerefentry>,
- e.g. <constant>CAP_SYS_ADMIN</constant>,
- <constant>CAP_DAC_OVERRIDE</constant>,
- <constant>CAP_SYS_PTRACE</constant>. This option may appear more than
- once in which case the ambient capability sets are merged.
- If the list of capabilities is prefixed with <literal>~</literal>, all
- but the listed capabilities will be included, the effect of the
- assignment inverted. If the empty string is
- assigned to this option, the ambient capability set is reset to
- the empty capability set, and all prior settings have no effect.
- If set to <literal>~</literal> (without any further argument), the
- ambient capability set is reset to the full set of available
- capabilities, also undoing any previous settings. Note that adding
- capabilities to ambient capability set adds them to the process's
- inherited capability set.
- </para><para>
- Ambient capability sets are useful if you want to execute a process
- as a non-privileged user but still want to give it some capabilities.
- Note that in this case option <constant>keep-caps</constant> is
- automatically added to <varname>SecureBits=</varname> to retain the
- capabilities over the user change. <varname>AmbientCapabilities=</varname> does not affect
- commands prefixed with <literal>+</literal>.</para></listitem>
+ <listitem><para>Controls which capabilities to include in the ambient capability set for the executed
+ process. Takes a whitespace-separated list of capability names, e.g. <constant>CAP_SYS_ADMIN</constant>,
+ <constant>CAP_DAC_OVERRIDE</constant>, <constant>CAP_SYS_PTRACE</constant>. This option may appear more than
+ once in which case the ambient capability sets are merged. If the list of capabilities is prefixed with
+ <literal>~</literal>, all but the listed capabilities will be included, the effect of the assignment
+ inverted. If the empty string is assigned to this option, the ambient capability set is reset to the empty
+ capability set, and all prior settings have no effect. If set to <literal>~</literal> (without any further
+ argument), the ambient capability set is reset to the full set of available capabilities, also undoing any
+ previous settings. Note that adding capabilities to ambient capability set adds them to the process's inherited
+ capability set. </para><para> Ambient capability sets are useful if you want to execute a process as a
+ non-privileged user but still want to give it some capabilities. Note that in this case option
+ <constant>keep-caps</constant> is automatically added to <varname>SecureBits=</varname> to retain the
+ capabilities over the user change. <varname>AmbientCapabilities=</varname> does not affect commands prefixed
+ with <literal>+</literal>.</para></listitem>
</varlistentry>
<varlistentry>
@@ -852,101 +931,75 @@
<term><varname>ReadOnlyPaths=</varname></term>
<term><varname>InaccessiblePaths=</varname></term>
- <listitem><para>Sets up a new file system namespace for
- executed processes. These options may be used to limit access
- a process might have to the main file system hierarchy. Each
- setting takes a space-separated list of paths relative to
- the host's root directory (i.e. the system running the service manager).
- Note that if entries contain symlinks, they are resolved from the host's root directory as well.
- Entries (files or directories) listed in
- <varname>ReadWritePaths=</varname> are accessible from
- within the namespace with the same access rights as from
- outside. Entries listed in
- <varname>ReadOnlyPaths=</varname> are accessible for
- reading only, writing will be refused even if the usual file
- access controls would permit this. Entries listed in
- <varname>InaccessiblePaths=</varname> will be made
- inaccessible for processes inside the namespace, and may not
- countain any other mountpoints, including those specified by
- <varname>ReadWritePaths=</varname> or
- <varname>ReadOnlyPaths=</varname>.
- Note that restricting access with these options does not extend
- to submounts of a directory that are created later on.
- Non-directory paths can be specified as well. These
- options may be specified more than once, in which case all
- paths listed will have limited access from within the
- namespace. If the empty string is assigned to this option, the
- specific list is reset, and all prior assignments have no
- effect.</para>
- <para>Paths in
- <varname>ReadOnlyPaths=</varname>
- and
- <varname>InaccessiblePaths=</varname>
- may be prefixed with
- <literal>-</literal>, in which case
- they will be ignored when they do not
- exist. Note that using this
- setting will disconnect propagation of
- mounts from the service to the host
- (propagation in the opposite direction
- continues to work). This means that
- this setting may not be used for
- services which shall be able to
- install mount points in the main mount
- namespace.</para></listitem>
+ <listitem><para>Sets up a new file system namespace for executed processes. These options may be used to limit
+ access a process might have to the file system hierarchy. Each setting takes a space-separated list of paths
+ relative to the host's root directory (i.e. the system running the service manager). Note that if paths
+ contain symlinks, they are resolved relative to the root directory set with
+ <varname>RootDirectory=</varname>.</para>
+
+ <para>Paths listed in <varname>ReadWritePaths=</varname> are accessible from within the namespace with the same
+ access modes as from outside of it. Paths listed in <varname>ReadOnlyPaths=</varname> are accessible for
+ reading only, writing will be refused even if the usual file access controls would permit this. Nest
+ <varname>ReadWritePaths=</varname> inside of <varname>ReadOnlyPaths=</varname> in order to provide writable
+ subdirectories within read-only directories. Use <varname>ReadWritePaths=</varname> in order to whitelist
+ specific paths for write access if <varname>ProtectSystem=strict</varname> is used. Paths listed in
+ <varname>InaccessiblePaths=</varname> will be made inaccessible for processes inside the namespace (along with
+ everything below them in the file system hierarchy).</para>
+
+ <para>Note that restricting access with these options does not extend to submounts of a directory that are
+ created later on. Non-directory paths may be specified as well. These options may be specified more than once,
+ in which case all paths listed will have limited access from within the namespace. If the empty string is
+ assigned to this option, the specific list is reset, and all prior assignments have no effect.</para>
+
+ <para>Paths in <varname>ReadWritePaths=</varname>, <varname>ReadOnlyPaths=</varname> and
+ <varname>InaccessiblePaths=</varname> may be prefixed with <literal>-</literal>, in which case they will be ignored
+ when they do not exist. Note that using this setting will disconnect propagation of mounts from the service to
+ the host (propagation in the opposite direction continues to work). This means that this setting may not be used
+ for services which shall be able to install mount points in the main mount namespace. Note that the effect of
+ these settings may be undone by privileged processes. In order to set up an effective sandboxed environment for
+ a unit it is thus recommended to combine these settings with either
+ <varname>CapabilityBoundingSet=~CAP_SYS_ADMIN</varname> or <varname>SystemCallFilter=~@mount</varname>.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>PrivateTmp=</varname></term>
- <listitem><para>Takes a boolean argument. If true, sets up a
- new file system namespace for the executed processes and
- mounts private <filename>/tmp</filename> and
- <filename>/var/tmp</filename> directories inside it that is
- not shared by processes outside of the namespace. This is
- useful to secure access to temporary files of the process, but
- makes sharing between processes via <filename>/tmp</filename>
- or <filename>/var/tmp</filename> impossible. If this is
- enabled, all temporary files created by a service in these
- directories will be removed after the service is stopped.
- Defaults to false. It is possible to run two or more units
- within the same private <filename>/tmp</filename> and
- <filename>/var/tmp</filename> namespace by using the
+ <listitem><para>Takes a boolean argument. If true, sets up a new file system namespace for the executed
+ processes and mounts private <filename>/tmp</filename> and <filename>/var/tmp</filename> directories inside it
+ that is not shared by processes outside of the namespace. This is useful to secure access to temporary files of
+ the process, but makes sharing between processes via <filename>/tmp</filename> or <filename>/var/tmp</filename>
+ impossible. If this is enabled, all temporary files created by a service in these directories will be removed
+ after the service is stopped. Defaults to false. It is possible to run two or more units within the same
+ private <filename>/tmp</filename> and <filename>/var/tmp</filename> namespace by using the
<varname>JoinsNamespaceOf=</varname> directive, see
- <citerefentry><refentrytitle>systemd.unit</refentrytitle><manvolnum>5</manvolnum></citerefentry>
- for details. Note that using this setting will disconnect
- propagation of mounts from the service to the host
- (propagation in the opposite direction continues to work).
- This means that this setting may not be used for services
- which shall be able to install mount points in the main mount
- namespace.</para></listitem>
+ <citerefentry><refentrytitle>systemd.unit</refentrytitle><manvolnum>5</manvolnum></citerefentry> for
+ details. This setting is implied if <varname>DynamicUser=</varname> is set. For this setting the same
+ restrictions regarding mount propagation and privileges apply as for <varname>ReadOnlyPaths=</varname> and
+ related calls, see above.</para></listitem>
+
</varlistentry>
<varlistentry>
<term><varname>PrivateDevices=</varname></term>
- <listitem><para>Takes a boolean argument. If true, sets up a
- new /dev namespace for the executed processes and only adds
- API pseudo devices such as <filename>/dev/null</filename>,
- <filename>/dev/zero</filename> or
- <filename>/dev/random</filename> (as well as the pseudo TTY
- subsystem) to it, but no physical devices such as
- <filename>/dev/sda</filename>. This is useful to securely turn
- off physical device access by the executed process. Defaults
- to false. Enabling this option will also remove
- <constant>CAP_MKNOD</constant> from the capability bounding
- set for the unit (see above), and set
- <varname>DevicePolicy=closed</varname> (see
+ <listitem><para>Takes a boolean argument. If true, sets up a new /dev namespace for the executed processes and
+ only adds API pseudo devices such as <filename>/dev/null</filename>, <filename>/dev/zero</filename> or
+ <filename>/dev/random</filename> (as well as the pseudo TTY subsystem) to it, but no physical devices such as
+ <filename>/dev/sda</filename>, system memory <filename>/dev/mem</filename>, system ports
+ <filename>/dev/port</filename> and others. This is useful to securely turn off physical device access by the
+ executed process. Defaults to false. Enabling this option will install a system call filter to block low-level
+ I/O system calls that are grouped in the <varname>@raw-io</varname> set, will also remove
+ <constant>CAP_MKNOD</constant> and <constant>CAP_SYS_RAWIO</constant> from the capability bounding set for
+ the unit (see above), and set <varname>DevicePolicy=closed</varname> (see
<citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
- for details). Note that using this setting will disconnect
- propagation of mounts from the service to the host
- (propagation in the opposite direction continues to work).
- This means that this setting may not be used for services
- which shall be able to install mount points in the main mount
- namespace. The /dev namespace will be mounted read-only and 'noexec'.
- The latter may break old programs which try to set up executable
- memory by using <citerefentry><refentrytitle>mmap</refentrytitle><manvolnum>2</manvolnum></citerefentry>
- of <filename>/dev/zero</filename> instead of using <constant>MAP_ANON</constant>.</para></listitem>
+ for details). Note that using this setting will disconnect propagation of mounts from the service to the host
+ (propagation in the opposite direction continues to work). This means that this setting may not be used for
+ services which shall be able to install mount points in the main mount namespace. The /dev namespace will be
+ mounted read-only and 'noexec'. The latter may break old programs which try to set up executable memory by
+ using <citerefentry><refentrytitle>mmap</refentrytitle><manvolnum>2</manvolnum></citerefentry> of
+ <filename>/dev/zero</filename> instead of using <constant>MAP_ANON</constant>. This setting is implied if
+ <varname>DynamicUser=</varname> is set. For this setting the same restrictions regarding mount propagation and
+ privileges apply as for <varname>ReadOnlyPaths=</varname> and related calls, see above.</para></listitem>
</varlistentry>
<varlistentry>
@@ -971,76 +1024,107 @@
</varlistentry>
<varlistentry>
+ <term><varname>PrivateUsers=</varname></term>
+
+ <listitem><para>Takes a boolean argument. If true, sets up a new user namespace for the executed processes and
+ configures a minimal user and group mapping, that maps the <literal>root</literal> user and group as well as
+ the unit's own user and group to themselves and everything else to the <literal>nobody</literal> user and
+ group. This is useful to securely detach the user and group databases used by the unit from the rest of the
+ system, and thus to create an effective sandbox environment. All files, directories, processes, IPC objects and
+ other resources owned by users/groups not equaling <literal>root</literal> or the unit's own will stay visible
+ from within the unit but appear owned by the <literal>nobody</literal> user and group. If this mode is enabled,
+ all unit processes are run without privileges in the host user namespace (regardless if the unit's own
+ user/group is <literal>root</literal> or not). Specifically this means that the process will have zero process
+ capabilities on the host's user namespace, but full capabilities within the service's user namespace. Settings
+ such as <varname>CapabilityBoundingSet=</varname> will affect only the latter, and there's no way to acquire
+ additional capabilities in the host's user namespace. Defaults to off.</para>
+
+ <para>This setting is particularly useful in conjunction with <varname>RootDirectory=</varname>, as the need to
+ synchronize the user and group databases in the root directory and on the host is reduced, as the only users
+ and groups who need to be matched are <literal>root</literal>, <literal>nobody</literal> and the unit's own
+ user and group.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>ProtectSystem=</varname></term>
- <listitem><para>Takes a boolean argument or
- <literal>full</literal>. If true, mounts the
- <filename>/usr</filename> and <filename>/boot</filename>
- directories read-only for processes invoked by this unit. If
- set to <literal>full</literal>, the <filename>/etc</filename>
- directory is mounted read-only, too. This setting ensures that
- any modification of the vendor-supplied operating system (and
- optionally its configuration) is prohibited for the service.
- It is recommended to enable this setting for all long-running
- services, unless they are involved with system updates or need
- to modify the operating system in other ways. Note however
- that processes retaining the CAP_SYS_ADMIN capability can undo
- the effect of this setting. This setting is hence particularly
- useful for daemons which have this capability removed, for
- example with <varname>CapabilityBoundingSet=</varname>.
- Defaults to off.</para></listitem>
+ <listitem><para>Takes a boolean argument or the special values <literal>full</literal> or
+ <literal>strict</literal>. If true, mounts the <filename>/usr</filename> and <filename>/boot</filename>
+ directories read-only for processes invoked by this unit. If set to <literal>full</literal>, the
+ <filename>/etc</filename> directory is mounted read-only, too. If set to <literal>strict</literal> the entire
+ file system hierarchy is mounted read-only, except for the API file system subtrees <filename>/dev</filename>,
+ <filename>/proc</filename> and <filename>/sys</filename> (protect these directories using
+ <varname>PrivateDevices=</varname>, <varname>ProtectKernelTunables=</varname>,
+ <varname>ProtectControlGroups=</varname>). This setting ensures that any modification of the vendor-supplied
+ operating system (and optionally its configuration, and local mounts) is prohibited for the service. It is
+ recommended to enable this setting for all long-running services, unless they are involved with system updates
+ or need to modify the operating system in other ways. If this option is used,
+ <varname>ReadWritePaths=</varname> may be used to exclude specific directories from being made read-only. This
+ setting is implied if <varname>DynamicUser=</varname> is set. For this setting the same restrictions regarding
+ mount propagation and privileges apply as for <varname>ReadOnlyPaths=</varname> and related calls, see
+ above. Defaults to off.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>ProtectHome=</varname></term>
- <listitem><para>Takes a boolean argument or
- <literal>read-only</literal>. If true, the directories
- <filename>/home</filename>, <filename>/root</filename> and
- <filename>/run/user</filename>
- are made inaccessible and empty for processes invoked by this
- unit. If set to <literal>read-only</literal>, the three
- directories are made read-only instead. It is recommended to
- enable this setting for all long-running services (in
- particular network-facing ones), to ensure they cannot get
- access to private user data, unless the services actually
- require access to the user's private data. Note however that
- processes retaining the CAP_SYS_ADMIN capability can undo the
- effect of this setting. This setting is hence particularly
- useful for daemons which have this capability removed, for
- example with <varname>CapabilityBoundingSet=</varname>.
- Defaults to off.</para></listitem>
+ <listitem><para>Takes a boolean argument or <literal>read-only</literal>. If true, the directories
+ <filename>/home</filename>, <filename>/root</filename> and <filename>/run/user</filename> are made inaccessible
+ and empty for processes invoked by this unit. If set to <literal>read-only</literal>, the three directories are
+ made read-only instead. It is recommended to enable this setting for all long-running services (in particular
+ network-facing ones), to ensure they cannot get access to private user data, unless the services actually
+ require access to the user's private data. This setting is implied if <varname>DynamicUser=</varname> is
+ set. For this setting the same restrictions regarding mount propagation and privileges apply as for
+ <varname>ReadOnlyPaths=</varname> and related calls, see above.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>ProtectKernelTunables=</varname></term>
+
+ <listitem><para>Takes a boolean argument. If true, kernel variables accessible through
+ <filename>/proc/sys</filename>, <filename>/sys</filename>, <filename>/proc/sysrq-trigger</filename>,
+ <filename>/proc/latency_stats</filename>, <filename>/proc/acpi</filename>,
+ <filename>/proc/timer_stats</filename>, <filename>/proc/fs</filename> and <filename>/proc/irq</filename> will
+ be made read-only to all processes of the unit. Usually, tunable kernel variables should only be written at
+ boot-time, with the <citerefentry><refentrytitle>sysctl.d</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+ mechanism. Almost no services need to write to these at runtime; it is hence recommended to turn this on for
+ most services. For this setting the same restrictions regarding mount propagation and privileges apply as for
+ <varname>ReadOnlyPaths=</varname> and related calls, see above. Defaults to off.
+ Note that this option does not prevent kernel tuning through IPC interfaces and external programs. However
+ <varname>InaccessiblePaths=</varname> can be used to make some IPC file system objects
+ inaccessible.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>ProtectControlGroups=</varname></term>
+
+ <listitem><para>Takes a boolean argument. If true, the Linux Control Groups (<citerefentry
+ project='man-pages'><refentrytitle>cgroups</refentrytitle><manvolnum>7</manvolnum></citerefentry>) hierarchies
+ accessible through <filename>/sys/fs/cgroup</filename> will be made read-only to all processes of the
+ unit. Except for container managers no services should require write access to the control groups hierarchies;
+ it is hence recommended to turn this on for most services. For this setting the same restrictions regarding
+ mount propagation and privileges apply as for <varname>ReadOnlyPaths=</varname> and related calls, see
+ above. Defaults to off.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>MountFlags=</varname></term>
- <listitem><para>Takes a mount propagation flag:
- <option>shared</option>, <option>slave</option> or
- <option>private</option>, which control whether mounts in the
- file system namespace set up for this unit's processes will
- receive or propagate mounts or unmounts. See
- <citerefentry project='man-pages'><refentrytitle>mount</refentrytitle><manvolnum>2</manvolnum></citerefentry>
- for details. Defaults to <option>shared</option>. Use
- <option>shared</option> to ensure that mounts and unmounts are
- propagated from the host to the container and vice versa. Use
- <option>slave</option> to run processes so that none of their
- mounts and unmounts will propagate to the host. Use
- <option>private</option> to also ensure that no mounts and
- unmounts from the host will propagate into the unit processes'
- namespace. Note that <option>slave</option> means that file
- systems mounted on the host might stay mounted continuously in
- the unit's namespace, and thus keep the device busy. Note that
- the file system namespace related options
- (<varname>PrivateTmp=</varname>,
- <varname>PrivateDevices=</varname>,
- <varname>ProtectSystem=</varname>,
- <varname>ProtectHome=</varname>,
- <varname>ReadOnlyPaths=</varname>,
- <varname>InaccessiblePaths=</varname> and
- <varname>ReadWritePaths=</varname>) require that mount
- and unmount propagation from the unit's file system namespace
- is disabled, and hence downgrade <option>shared</option> to
+ <listitem><para>Takes a mount propagation flag: <option>shared</option>, <option>slave</option> or
+ <option>private</option>, which control whether mounts in the file system namespace set up for this unit's
+ processes will receive or propagate mounts or unmounts. See <citerefentry
+ project='man-pages'><refentrytitle>mount</refentrytitle><manvolnum>2</manvolnum></citerefentry> for
+ details. Defaults to <option>shared</option>. Use <option>shared</option> to ensure that mounts and unmounts
+ are propagated from the host to the container and vice versa. Use <option>slave</option> to run processes so
+ that none of their mounts and unmounts will propagate to the host. Use <option>private</option> to also ensure
+ that no mounts and unmounts from the host will propagate into the unit processes' namespace. Note that
+ <option>slave</option> means that file systems mounted on the host might stay mounted continuously in the
+ unit's namespace, and thus keep the device busy. Note that the file system namespace related options
+ (<varname>PrivateTmp=</varname>, <varname>PrivateDevices=</varname>, <varname>ProtectSystem=</varname>,
+ <varname>ProtectHome=</varname>, <varname>ProtectKernelTunables=</varname>,
+ <varname>ProtectControlGroups=</varname>, <varname>ReadOnlyPaths=</varname>,
+ <varname>InaccessiblePaths=</varname>, <varname>ReadWritePaths=</varname>) require that mount and unmount
+ propagation from the unit's file system namespace is disabled, and hence downgrade <option>shared</option> to
<option>slave</option>. </para></listitem>
</varlistentry>
@@ -1150,42 +1234,49 @@
<varlistentry>
<term><varname>NoNewPrivileges=</varname></term>
- <listitem><para>Takes a boolean argument. If true, ensures
- that the service process and all its children can never gain
- new privileges. This option is more powerful than the
- respective secure bits flags (see above), as it also prohibits
- UID changes of any kind. This is the simplest, most effective
- way to ensure that a process and its children can never
- elevate privileges again.</para></listitem>
+ <listitem><para>Takes a boolean argument. If true, ensures that the service
+ process and all its children can never gain new privileges. This option is more
+ powerful than the respective secure bits flags (see above), as it also prohibits
+ UID changes of any kind. This is the simplest and most effective way to ensure that
+ a process and its children can never elevate privileges again. Defaults to false,
+ but in the user manager instance certain settings force
+ <varname>NoNewPrivileges=yes</varname>, ignoring the value of this setting.
+ Those is the case when <varname>SystemCallFilter=</varname>,
+ <varname>SystemCallArchitectures=</varname>,
+ <varname>RestrictAddressFamilies=</varname>,
+ <varname>PrivateDevices=</varname>,
+ <varname>ProtectKernelTunables=</varname>,
+ <varname>ProtectKernelModules=</varname>,
+ <varname>MemoryDenyWriteExecute=</varname>, or
+ <varname>RestrictRealtime=</varname> are specified.
+ </para></listitem>
</varlistentry>
<varlistentry>
<term><varname>SystemCallFilter=</varname></term>
- <listitem><para>Takes a space-separated list of system call
- names. If this setting is used, all system calls executed by
- the unit processes except for the listed ones will result in
- immediate process termination with the
- <constant>SIGSYS</constant> signal (whitelisting). If the
- first character of the list is <literal>~</literal>, the
- effect is inverted: only the listed system calls will result
- in immediate process termination (blacklisting). If running in
- user mode, or in system mode, but without the
- <constant>CAP_SYS_ADMIN</constant> capability (e.g. setting
- <varname>User=nobody</varname>),
- <varname>NoNewPrivileges=yes</varname> is implied. This
- feature makes use of the Secure Computing Mode 2 interfaces of
- the kernel ('seccomp filtering') and is useful for enforcing a
- minimal sandboxing environment. Note that the
- <function>execve</function>,
- <function>rt_sigreturn</function>,
- <function>sigreturn</function>,
- <function>exit_group</function>, <function>exit</function>
- system calls are implicitly whitelisted and do not need to be
- listed explicitly. This option may be specified more than once,
- in which case the filter masks are merged. If the empty string
- is assigned, the filter is reset, all prior assignments will
- have no effect. This does not affect commands prefixed with <literal>+</literal>.</para>
+ <listitem><para>Takes a space-separated list of system call names. If this setting is used, all system calls
+ executed by the unit processes except for the listed ones will result in immediate process termination with the
+ <constant>SIGSYS</constant> signal (whitelisting). If the first character of the list is <literal>~</literal>,
+ the effect is inverted: only the listed system calls will result in immediate process termination
+ (blacklisting). If running in user mode, or in system mode, but without the <constant>CAP_SYS_ADMIN</constant>
+ capability (e.g. setting <varname>User=nobody</varname>), <varname>NoNewPrivileges=yes</varname> is
+ implied. This feature makes use of the Secure Computing Mode 2 interfaces of the kernel ('seccomp filtering')
+ and is useful for enforcing a minimal sandboxing environment. Note that the <function>execve</function>,
+ <function>exit</function>, <function>exit_group</function>, <function>getrlimit</function>,
+ <function>rt_sigreturn</function>, <function>sigreturn</function> system calls and the system calls for
+ querying time and sleeping are implicitly whitelisted and do not need to be listed explicitly. This option may
+ be specified more than once, in which case the filter masks are merged. If the empty string is assigned, the
+ filter is reset, all prior assignments will have no effect. This does not affect commands prefixed with
+ <literal>+</literal>.</para>
+
+ <para>Note that strict system call filters may impact execution and error handling code paths of the service
+ invocation. Specifically, access to the <function>execve</function> system call is required for the execution
+ of the service binary — if it is blocked service invocation will necessarily fail. Also, if execution of the
+ service binary fails for some reason (for example: missing service executable), the error handling logic might
+ require access to an additional set of system calls in order to process and log this failure correctly. It
+ might be necessary to temporarily disable system call filters in order to simplify debugging of such
+ failures.</para>
<para>If you specify both types of this option (i.e.
whitelisting and blacklisting), the first encountered will
@@ -1219,6 +1310,10 @@
</thead>
<tbody>
<row>
+ <entry>@basic-io</entry>
+ <entry>System calls for basic I/O: reading, writing, seeking, file descriptor duplication and closing (<citerefentry project='man-pages'><refentrytitle>read</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>write</refentrytitle><manvolnum>2</manvolnum></citerefentry>, and related calls)</entry>
+ </row>
+ <row>
<entry>@clock</entry>
<entry>System calls for changing the system clock (<citerefentry project='man-pages'><refentrytitle>adjtimex</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>settimeofday</refentrytitle><manvolnum>2</manvolnum></citerefentry>, and related calls)</entry>
</row>
@@ -1236,7 +1331,7 @@
</row>
<row>
<entry>@ipc</entry>
- <entry>SysV IPC, POSIX Message Queues or other IPC (<citerefentry project='man-pages'><refentrytitle>mq_overview</refentrytitle><manvolnum>7</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>svipc</refentrytitle><manvolnum>7</manvolnum></citerefentry>)</entry>
+ <entry>Pipes, SysV IPC, POSIX Message Queues and other IPC (<citerefentry project='man-pages'><refentrytitle>mq_overview</refentrytitle><manvolnum>7</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>svipc</refentrytitle><manvolnum>7</manvolnum></citerefentry>)</entry>
</row>
<row>
<entry>@keyring</entry>
@@ -1264,18 +1359,30 @@
</row>
<row>
<entry>@process</entry>
- <entry>Process control, execution, namespaces (<citerefentry project='man-pages'><refentrytitle>execve</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>kill</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>namespaces</refentrytitle><manvolnum>7</manvolnum></citerefentry>, …</entry>
+ <entry>Process control, execution, namespaces (<citerefentry project='man-pages'><refentrytitle>clone</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>kill</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>namespaces</refentrytitle><manvolnum>7</manvolnum></citerefentry>, …</entry>
</row>
<row>
<entry>@raw-io</entry>
- <entry>Raw I/O port access (<citerefentry project='man-pages'><refentrytitle>ioperm</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>iopl</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <function>pciconfig_read()</function>, …</entry>
+ <entry>Raw I/O port access (<citerefentry project='man-pages'><refentrytitle>ioperm</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>iopl</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <function>pciconfig_read()</function>, …)</entry>
+ </row>
+ <row>
+ <entry>@resources</entry>
+ <entry>System calls for changing resource limits, memory and scheduling parameters (<citerefentry project='man-pages'><refentrytitle>setrlimit</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>setpriority</refentrytitle><manvolnum>2</manvolnum></citerefentry>, …)</entry>
</row>
</tbody>
</tgroup>
</table>
- Note, that as new system calls are added to the kernel, additional system calls might be added to the groups
- above, so the contents of the sets may change between systemd versions.</para></listitem>
+ Note that as new system calls are added to the kernel, additional system calls might be added to the groups
+ above, so the contents of the sets may change between systemd versions.</para>
+
+ <para>It is recommended to combine the file system namespacing related options with
+ <varname>SystemCallFilter=~@mount</varname>, in order to prohibit the unit's processes to undo the
+ mappings. Specifically these are the options <varname>PrivateTmp=</varname>,
+ <varname>PrivateDevices=</varname>, <varname>ProtectSystem=</varname>, <varname>ProtectHome=</varname>,
+ <varname>ProtectKernelTunables=</varname>, <varname>ProtectControlGroups=</varname>,
+ <varname>ReadOnlyPaths=</varname>, <varname>InaccessiblePaths=</varname> and
+ <varname>ReadWritePaths=</varname>.</para></listitem>
</varlistentry>
<varlistentry>
@@ -1295,27 +1402,25 @@
<varlistentry>
<term><varname>SystemCallArchitectures=</varname></term>
- <listitem><para>Takes a space-separated list of architecture
- identifiers to include in the system call filter. The known
- architecture identifiers are <constant>x86</constant>,
- <constant>x86-64</constant>, <constant>x32</constant>,
- <constant>arm</constant> as well as the special identifier
- <constant>native</constant>. Only system calls of the
- specified architectures will be permitted to processes of this
- unit. This is an effective way to disable compatibility with
- non-native architectures for processes, for example to
- prohibit execution of 32-bit x86 binaries on 64-bit x86-64
- systems. The special <constant>native</constant> identifier
- implicitly maps to the native architecture of the system (or
- more strictly: to the architecture the system manager is
- compiled for). If running in user mode, or in system mode,
- but without the <constant>CAP_SYS_ADMIN</constant>
- capability (e.g. setting <varname>User=nobody</varname>),
- <varname>NoNewPrivileges=yes</varname> is implied. Note
- that setting this option to a non-empty list implies that
- <constant>native</constant> is included too. By default, this
- option is set to the empty list, i.e. no architecture system
- call filtering is applied.</para></listitem>
+ <listitem><para>Takes a space-separated list of architecture identifiers to
+ include in the system call filter. The known architecture identifiers are the same
+ as for <varname>ConditionArchitecture=</varname> described in
+ <citerefentry><refentrytitle>systemd.unit</refentrytitle><manvolnum>5</manvolnum></citerefentry>,
+ as well as <constant>x32</constant>, <constant>mips64-n32</constant>,
+ <constant>mips64-le-n32</constant>, and the special identifier
+ <constant>native</constant>. Only system calls of the specified architectures will
+ be permitted to processes of this unit. This is an effective way to disable
+ compatibility with non-native architectures for processes, for example to prohibit
+ execution of 32-bit x86 binaries on 64-bit x86-64 systems. The special
+ <constant>native</constant> identifier implicitly maps to the native architecture
+ of the system (or more strictly: to the architecture the system manager is
+ compiled for). If running in user mode, or in system mode, but without the
+ <constant>CAP_SYS_ADMIN</constant> capability (e.g. setting
+ <varname>User=nobody</varname>), <varname>NoNewPrivileges=yes</varname> is
+ implied. Note that setting this option to a non-empty list implies that
+ <constant>native</constant> is included too. By default, this option is set to the
+ empty list, i.e. no architecture system call filtering is applied.
+ </para></listitem>
</varlistentry>
<varlistentry>
@@ -1358,6 +1463,26 @@
</varlistentry>
<varlistentry>
+ <term><varname>ProtectKernelModules=</varname></term>
+
+ <listitem><para>Takes a boolean argument. If true, explicit module loading will
+ be denied. This allows to turn off module load and unload operations on modular
+ kernels. It is recommended to turn this on for most services that do not need special
+ file systems or extra kernel modules to work. Default to off. Enabling this option
+ removes <constant>CAP_SYS_MODULE</constant> from the capability bounding set for
+ the unit, and installs a system call filter to block module system calls,
+ also <filename>/usr/lib/modules</filename> is made inaccessible. For this
+ setting the same restrictions regarding mount propagation and privileges
+ apply as for <varname>ReadOnlyPaths=</varname> and related calls, see above.
+ Note that limited automatic module loading due to user configuration or kernel
+ mapping tables might still happen as side effect of requested user operations,
+ both privileged and unprivileged. To disable module auto-load feature please see
+ <citerefentry><refentrytitle>sysctl.d</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+ <constant>kernel.modules_disabled</constant> mechanism and
+ <filename>/proc/sys/kernel/modules_disabled</filename> documentation.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>Personality=</varname></term>
<listitem><para>Controls which kernel architecture <citerefentry
@@ -1403,12 +1528,15 @@
<term><varname>MemoryDenyWriteExecute=</varname></term>
<listitem><para>Takes a boolean argument. If set, attempts to create memory mappings that are writable and
- executable at the same time, or to change existing memory mappings to become executable are prohibited.
+ executable at the same time, or to change existing memory mappings to become executable, or mapping shared memory
+ segments as executable are prohibited.
Specifically, a system call filter is added that rejects
<citerefentry><refentrytitle>mmap</refentrytitle><manvolnum>2</manvolnum></citerefentry>
- system calls with both <constant>PROT_EXEC</constant> and <constant>PROT_WRITE</constant> set
- and <citerefentry><refentrytitle>mprotect</refentrytitle><manvolnum>2</manvolnum></citerefentry>
- system calls with <constant>PROT_EXEC</constant> set. Note that this option is incompatible with programs
+ system calls with both <constant>PROT_EXEC</constant> and <constant>PROT_WRITE</constant> set,
+ <citerefentry><refentrytitle>mprotect</refentrytitle><manvolnum>2</manvolnum></citerefentry>
+ system calls with <constant>PROT_EXEC</constant> set and
+ <citerefentry><refentrytitle>shmat</refentrytitle><manvolnum>2</manvolnum></citerefentry>
+ system calls with <constant>SHM_EXEC</constant> set. Note that this option is incompatible with programs
that generate program code dynamically at runtime, such as JIT execution engines, or programs compiled making
use of the code "trampoline" feature of various C compilers. This option improves service security, as it makes
harder for software exploits to change running code dynamically.
@@ -1421,7 +1549,7 @@
<listitem><para>Takes a boolean argument. If set, any attempts to enable realtime scheduling in a process of
the unit are refused. This restricts access to realtime task scheduling policies such as
<constant>SCHED_FIFO</constant>, <constant>SCHED_RR</constant> or <constant>SCHED_DEADLINE</constant>. See
- <citerefentry><refentrytitle>sched</refentrytitle><manvolnum>7</manvolnum></citerefentry> for details about
+ <citerefentry project='man-pages'><refentrytitle>sched</refentrytitle><manvolnum>7</manvolnum></citerefentry> for details about
these scheduling policies. Realtime scheduling policies may be used to monopolize CPU time for longer periods
of time, and may hence be used to lock up or otherwise trigger Denial-of-Service situations on the system. It
is hence recommended to restrict access to realtime scheduling to the few programs that actually require
@@ -1478,6 +1606,16 @@
</varlistentry>
<varlistentry>
+ <term><varname>$INVOCATION_ID</varname></term>
+
+ <listitem><para>Contains a randomized, unique 128bit ID identifying each runtime cycle of the unit, formatted
+ as 32 character hexadecimal string. A new ID is assigned each time the unit changes from an inactive state into
+ an activating or active state, and may be used to identify this specific runtime cycle, in particular in data
+ stored offline, such as the journal. The same ID is passed to all processes run as part of the
+ unit.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>$XDG_RUNTIME_DIR</varname></term>
<listitem><para>The directory for volatile state. Set for the
@@ -1503,7 +1641,7 @@
<varlistentry>
<term><varname>$MAINPID</varname></term>
- <listitem><para>The PID of the units main process if it is
+ <listitem><para>The PID of the unit's main process if it is
known. This is only set for control processes as invoked by
<varname>ExecReload=</varname> and similar. </para></listitem>
</varlistentry>
@@ -1574,6 +1712,118 @@
functions) if their standard output or standard error output is connected to the journal anyway, thus enabling
delivery of structured metadata along with logged messages.</para></listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><varname>$SERVICE_RESULT</varname></term>
+
+ <listitem><para>Only defined for the service unit type, this environment variable is passed to all
+ <varname>ExecStop=</varname> and <varname>ExecStopPost=</varname> processes, and encodes the service
+ "result". Currently, the following values are defined: <literal>timeout</literal> (in case of an operation
+ timeout), <literal>exit-code</literal> (if a service process exited with a non-zero exit code; see
+ <varname>$EXIT_CODE</varname> below for the actual exit code returned), <literal>signal</literal> (if a
+ service process was terminated abnormally by a signal; see <varname>$EXIT_CODE</varname> below for the actual
+ signal used for the termination), <literal>core-dump</literal> (if a service process terminated abnormally and
+ dumped core), <literal>watchdog</literal> (if the watchdog keep-alive ping was enabled for the service but it
+ missed the deadline), or <literal>resources</literal> (a catch-all condition in case a system operation
+ failed).</para>
+
+ <para>This environment variable is useful to monitor failure or successful termination of a service. Even
+ though this variable is available in both <varname>ExecStop=</varname> and <varname>ExecStopPost=</varname>, it
+ is usually a better choice to place monitoring tools in the latter, as the former is only invoked for services
+ that managed to start up correctly, and the latter covers both services that failed during their start-up and
+ those which failed during their runtime.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>$EXIT_CODE</varname></term>
+ <term><varname>$EXIT_STATUS</varname></term>
+
+ <listitem><para>Only defined for the service unit type, these environment variables are passed to all
+ <varname>ExecStop=</varname>, <varname>ExecStopPost=</varname> processes and contain exit status/code
+ information of the main process of the service. For the precise definition of the exit code and status, see
+ <citerefentry><refentrytitle>wait</refentrytitle><manvolnum>2</manvolnum></citerefentry>. <varname>$EXIT_CODE</varname>
+ is one of <literal>exited</literal>, <literal>killed</literal>,
+ <literal>dumped</literal>. <varname>$EXIT_STATUS</varname> contains the numeric exit code formatted as string
+ if <varname>$EXIT_CODE</varname> is <literal>exited</literal>, and the signal name in all other cases. Note
+ that these environment variables are only set if the service manager succeeded to start and identify the main
+ process of the service.</para>
+
+ <table>
+ <title>Summary of possible service result variable values</title>
+ <tgroup cols='3'>
+ <colspec colname='result' />
+ <colspec colname='status' />
+ <colspec colname='code' />
+ <thead>
+ <row>
+ <entry><varname>$SERVICE_RESULT</varname></entry>
+ <entry><varname>$EXIT_STATUS</varname></entry>
+ <entry><varname>$EXIT_CODE</varname></entry>
+ </row>
+ </thead>
+
+ <tbody>
+ <row>
+ <entry morerows="1" valign="top"><literal>timeout</literal></entry>
+ <entry valign="top"><literal>killed</literal></entry>
+ <entry><literal>TERM</literal>, <literal>KILL</literal></entry>
+ </row>
+
+ <row>
+ <entry valign="top"><literal>exited</literal></entry>
+ <entry><literal>0</literal>, <literal>1</literal>, <literal>2</literal>, <literal
+ >3</literal>, …, <literal>255</literal></entry>
+ </row>
+
+ <row>
+ <entry valign="top"><literal>exit-code</literal></entry>
+ <entry valign="top"><literal>exited</literal></entry>
+ <entry><literal>0</literal>, <literal>1</literal>, <literal>2</literal>, <literal
+ >3</literal>, …, <literal>255</literal></entry>
+ </row>
+
+ <row>
+ <entry valign="top"><literal>signal</literal></entry>
+ <entry valign="top"><literal>killed</literal></entry>
+ <entry><literal>HUP</literal>, <literal>INT</literal>, <literal>KILL</literal>, …</entry>
+ </row>
+
+ <row>
+ <entry valign="top"><literal>core-dump</literal></entry>
+ <entry valign="top"><literal>dumped</literal></entry>
+ <entry><literal>ABRT</literal>, <literal>SEGV</literal>, <literal>QUIT</literal>, …</entry>
+ </row>
+
+ <row>
+ <entry morerows="2" valign="top"><literal>watchdog</literal></entry>
+ <entry><literal>dumped</literal></entry>
+ <entry><literal>ABRT</literal></entry>
+ </row>
+ <row>
+ <entry><literal>killed</literal></entry>
+ <entry><literal>TERM</literal>, <literal>KILL</literal></entry>
+ </row>
+ <row>
+ <entry><literal>exited</literal></entry>
+ <entry><literal>0</literal>, <literal>1</literal>, <literal>2</literal>, <literal
+ >3</literal>, …, <literal>255</literal></entry>
+ </row>
+
+ <row>
+ <entry><literal>resources</literal></entry>
+ <entry>any of the above</entry>
+ <entry>any of the above</entry>
+ </row>
+
+ <row>
+ <entry namest="results" nameend="code">Note: the process may be also terminated by a signal not sent by systemd. In particular the process may send an arbitrary signal to itself in a handler for any of the non-maskable signals. Nevertheless, in the <literal>timeout</literal> and <literal>watchdog</literal> rows above only the signals that systemd sends have been included.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ </listitem>
+ </varlistentry>
</variablelist>
<para>Additional variables may be configured by the following
@@ -1609,4 +1859,5 @@
</para>
</refsect1>
+
</refentry>
diff --git a/src/grp-system/systemd/systemd.link.xml b/src/grp-system/systemd/systemd.link.xml
index d5b4d1038d..8edbe758d9 100644
--- a/src/grp-system/systemd/systemd.link.xml
+++ b/src/grp-system/systemd/systemd.link.xml
@@ -107,7 +107,7 @@
<listitem>
<para>A whitespace-separated list of shell-style globs matching
the device name, as exposed by the udev property
- "INTERFACE". This can not be used to match on names that have
+ "INTERFACE". This cannot be used to match on names that have
already been changed from userspace. Caution is advised when matching on
kernel-assigned names, as they are known to be unstable
between reboots.</para>
@@ -387,6 +387,46 @@
</variablelist>
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><varname>TCPSegmentationOffload=</varname></term>
+ <listitem>
+ <para>The TCP Segmentation Offload (TSO) when true enables
+ TCP segmentation offload. Takes a boolean value.
+ Defaults to "unset".</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><varname>GenericSegmentationOffload=</varname></term>
+ <listitem>
+ <para>The Generic Segmentation Offload (GSO) when true enables
+ generic segmentation offload. Takes a boolean value.
+ Defaults to "unset".</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><varname>UDPSegmentationOffload=</varname></term>
+ <listitem>
+ <para>The UDP Segmentation Offload (USO) when true enables
+ UDP segmentation offload. Takes a boolean value.
+ Defaults to "unset".</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><varname>GenericReceiveOffload=</varname></term>
+ <listitem>
+ <para>The Generic Receive Offload (GRO) when true enables
+ generic receive offload. Takes a boolean value.
+ Defaults to "unset".</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><varname>LargeReceiveOffload=</varname></term>
+ <listitem>
+ <para>The Large Receive Offload (LRO) when true enables
+ large receive offload. Takes a boolean value.
+ Defaults to "unset".</para>
+ </listitem>
+ </varlistentry>
</variablelist>
</refsect1>
diff --git a/src/grp-system/systemd/systemd.mount.xml b/src/grp-system/systemd/systemd.mount.xml
index 66cddd72e0..b0f156f6df 100644
--- a/src/grp-system/systemd/systemd.mount.xml
+++ b/src/grp-system/systemd/systemd.mount.xml
@@ -160,7 +160,7 @@
for details about the conversion.</para>
<para>The NFS mount option <option>bg</option> for NFS background mounts
- as documented in <citerefentry><refentrytitle>nfs</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+ as documented in <citerefentry project='man-pages'><refentrytitle>nfs</refentrytitle><manvolnum>5</manvolnum></citerefentry>
is not supported in <filename>/etc/fstab</filename> entries. The systemd mount option <option>nofail</option>
provides similar functionality and should be used instead.</para>
@@ -352,6 +352,30 @@
</varlistentry>
<varlistentry>
+ <term><varname>LazyUnmount=</varname></term>
+
+ <listitem><para>Takes a boolean argument. If true, detach the
+ filesystem from the filesystem hierarchy at time of the unmount
+ operation, and clean up all references to the filesystem as
+ soon as they are not busy anymore.
+ This corresponds with
+ <citerefentry project='man-pages'><refentrytitle>umount</refentrytitle><manvolnum>8</manvolnum></citerefentry>'s
+ <parameter>-l</parameter> switch. Defaults to
+ off.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>ForceUnmount=</varname></term>
+
+ <listitem><para>Takes a boolean argument. If true, force an
+ unmount (in case of an unreachable NFS system).
+ This corresponds with
+ <citerefentry project='man-pages'><refentrytitle>umount</refentrytitle><manvolnum>8</manvolnum></citerefentry>'s
+ <parameter>-f</parameter> switch. Defaults to
+ off.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>DirectoryMode=</varname></term>
<listitem><para>Directories of mount points (and any parent
directories) are automatically created if needed. This option
@@ -373,7 +397,7 @@
Takes a unit-less value in seconds, or a time span value such
as "5min 20s". Pass 0 to disable the timeout logic. The
default value is set from the manager configuration file's
- <varname>DefaultTimeoutStart=</varname>
+ <varname>DefaultTimeoutStartSec=</varname>
variable.</para></listitem>
</varlistentry>
</variablelist>
diff --git a/src/grp-system/systemd/systemd.netdev.xml b/src/grp-system/systemd/systemd.netdev.xml
index a5c6f0fa40..ffb66e735b 100644
--- a/src/grp-system/systemd/systemd.netdev.xml
+++ b/src/grp-system/systemd/systemd.netdev.xml
@@ -58,31 +58,38 @@
<citerefentry><refentrytitle>systemd-networkd</refentrytitle><manvolnum>8</manvolnum></citerefentry>.
</para>
- <para>Virtual Network Device files must have the extension
- <filename>.netdev</filename>; other extensions are ignored.
- Virtual network devices are created as soon as networkd is
- started. If a netdev with the specified name already exists,
- networkd will use that as-is rather than create its own. Note that
- the settings of the pre-existing netdev will not be changed by
+ <para>The main Virtual Network Device file must have the extension <filename>.netdev</filename>;
+ other extensions are ignored. Virtual network devices are created as soon as networkd is
+ started. If a netdev with the specified name already exists, networkd will use that as-is rather
+ than create its own. Note that the settings of the pre-existing netdev will not be changed by
networkd.</para>
- <para>The <filename>.netdev</filename> files are read from the
- files located in the system network directory
- <filename>/usr/lib/systemd/network</filename>, the volatile
- runtime network directory
- <filename>/run/systemd/network</filename> and the local
- administration network directory
- <filename>/etc/systemd/network</filename>. All configuration files
- are collectively sorted and processed in lexical order, regardless
- of the directories in which they live. However, files with
- identical filenames replace each other. Files in
- <filename>/etc</filename> have the highest priority, files in
- <filename>/run</filename> take precedence over files with the same
- name in <filename>/usr/lib</filename>. This can be used to
- override a system-supplied configuration file with a local file if
- needed. As a special case, an empty file (file size 0) or symlink
- with the same name pointing to <filename>/dev/null</filename>
- disables the configuration file entirely (it is "masked").</para>
+ <para>The <filename>.netdev</filename> files are read from the files located in the system
+ network directory <filename>/usr/lib/systemd/network</filename>, the volatile runtime network
+ directory <filename>/run/systemd/network</filename> and the local administration network
+ directory <filename>/etc/systemd/network</filename>. All configuration files are collectively
+ sorted and processed in lexical order, regardless of the directories in which they live.
+ However, files with identical filenames replace each other. Files in <filename>/etc</filename>
+ have the highest priority, files in <filename>/run</filename> take precedence over files with
+ the same name in <filename>/usr/lib</filename>. This can be used to override a system-supplied
+ configuration file with a local file if needed. As a special case, an empty file (file size 0)
+ or symlink with the same name pointing to <filename>/dev/null</filename> disables the
+ configuration file entirely (it is "masked").</para>
+
+ <para>Along with the netdev file <filename>foo.netdev</filename>, a "drop-in" directory
+ <filename>foo.netdev.d/</filename> may exist. All files with the suffix <literal>.conf</literal>
+ from this directory will be parsed after the file itself is parsed. This is useful to alter or
+ add configuration settings, without having to modify the main configuration file. Each drop-in
+ file must have appropriate section headers.</para>
+
+ <para>In addition to <filename>/etc/systemd/network</filename>, drop-in <literal>.d</literal>
+ directories can be placed in <filename>/usr/lib/systemd/network</filename> or
+ <filename>/run/systemd/network</filename> directories. Drop-in files in
+ <filename>/etc</filename> take precedence over those in <filename>/run</filename> which in turn
+ take precedence over those in <filename>/usr/lib</filename>. Drop-in files under any of these
+ directories take precedence over the main netdev file wherever located. (Of course, since
+ <filename>/run</filename> is temporary and <filename>/usr/lib</filename> is for vendors, it is
+ unlikely drop-ins should be used in either of those places.)</para>
</refsect1>
<refsect1>
@@ -163,7 +170,10 @@
<entry>A virtual extensible LAN (vxlan), for connecting Cloud computing deployments.</entry></row>
<row><entry><varname>vrf</varname></entry>
- <entry>A Virtual Routing and Forwarding (<ulink url="https://www.kernel.org/doc/Documentation/networking/vrf.txt">VRF</ulink>) interface to create separate routing and forwarding domains.</entry></row>
+ <entry>A Virtual Routing and Forwarding (<ulink url="https://www.kernel.org/doc/Documentation/networking/vrf.txt">VRF</ulink>) interface to create separate routing and forwarding domains.</entry></row>
+
+ <row><entry><varname>vcan</varname></entry>
+ <entry>The virtual CAN driver (vcan). Similar to the network loopback devices, vcan offers a virtual local CAN interface.</entry></row>
</tbody>
</tgroup>
@@ -315,6 +325,26 @@
</listitem>
</varlistentry>
<varlistentry>
+ <term><varname>AgeingTimeSec=</varname></term>
+ <listitem>
+ <para>This specifies the number of seconds a MAC Address will be kept in
+ the forwarding database after having a packet received from this MAC Address.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><varname>Priority=</varname></term>
+ <listitem>
+ <para>The priority of the bridge. An integer between 0 and 65535. A lower value
+ means higher priority. The bridge having the lowest priority will be elected as root bridge.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><varname>DefaultPVID=</varname></term>
+ <listitem>
+ <para>This specifies the default port VLAN ID of a newly attached bridge port.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
<term><varname>MulticastQuerier=</varname></term>
<listitem>
<para>A boolean. This setting controls the IFLA_BR_MCAST_QUERIER option in the kernel.
@@ -343,8 +373,15 @@
</para>
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><varname>STP=</varname></term>
+ <listitem>
+ <para>A boolean. This enables the bridge's Spanning Tree Protocol (STP). When unset,
+ the kernel's default setting applies.
+ </para>
+ </listitem>
+ </varlistentry>
</variablelist>
-
</refsect1>
<refsect1>
@@ -500,7 +537,7 @@
</listitem>
</varlistentry>
<varlistentry>
- <term><varname>UDPCheckSum=</varname></term>
+ <term><varname>UDPChecksum=</varname></term>
<listitem>
<para>A boolean. When true, transmitting UDP checksums when doing VXLAN/IPv4 is turned on.</para>
</listitem>
@@ -512,11 +549,23 @@
</listitem>
</varlistentry>
<varlistentry>
- <term><varname>UDP6ZeroCheckSumRx=</varname></term>
+ <term><varname>UDP6ZeroChecksumRx=</varname></term>
<listitem>
<para>A boolean. When true, receiving zero checksums in VXLAN/IPv6 is turned on.</para>
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><varname>RemoteChecksumTx=</varname></term>
+ <listitem>
+ <para>A boolean. When true, remote transmit checksum offload of VXLAN is turned on.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><varname>RemoteChecksumRx=</varname></term>
+ <listitem>
+ <para>A boolean. When true, remote receive checksum offload in VXLAN is turned on.</para>
+ </listitem>
+ </varlistentry>
<varlistentry>
<term><varname>GroupPolicyExtension=</varname></term>
<listitem>
diff --git a/src/grp-system/systemd/systemd.network.xml b/src/grp-system/systemd/systemd.network.xml
index 4541a55490..2fb4907634 100644
--- a/src/grp-system/systemd/systemd.network.xml
+++ b/src/grp-system/systemd/systemd.network.xml
@@ -58,31 +58,40 @@
<citerefentry><refentrytitle>systemd-networkd</refentrytitle><manvolnum>8</manvolnum></citerefentry>.
</para>
- <para>Network files must have the extension
- <filename>.network</filename>; other extensions are ignored.
- Networks are applied to links whenever the links appear.</para>
-
- <para>The <filename>.network</filename> files are read from the
- files located in the system network directory
- <filename>/usr/lib/systemd/network</filename>, the volatile
- runtime network directory
- <filename>/run/systemd/network</filename> and the local
- administration network directory
- <filename>/etc/systemd/network</filename>. All configuration files
- are collectively sorted and processed in lexical order, regardless
- of the directories in which they live. However, files with
- identical filenames replace each other. Files in
- <filename>/etc</filename> have the highest priority, files in
- <filename>/run</filename> take precedence over files with the same
- name in <filename>/usr/lib</filename>. This can be used to
- override a system-supplied configuration file with a local file if
- needed. As a special case, an empty file (file size 0) or symlink
- with the same name pointing to <filename>/dev/null</filename>
- disables the configuration file entirely (it is "masked").</para>
-
- <para>Note that an interface without any static IPv6 addresses configured, and neither DHCPv6 nor IPv6LL enabled,
- shall be considered to have no IPv6 support. IPv6 will be automatically disabled for that interface by writing "1"
- to <filename>/proc/sys/net/ipv6/conf/<replaceable>ifname</replaceable>/disable_ipv6</filename>.
+ <para>The main network file must have the extension <filename>.network</filename>; other
+ extensions are ignored. Networks are applied to links whenever the links appear.</para>
+
+ <para>The <filename>.network</filename> files are read from the files located in the system
+ network directory <filename>/usr/lib/systemd/network</filename>, the volatile runtime network
+ directory <filename>/run/systemd/network</filename> and the local administration network
+ directory <filename>/etc/systemd/network</filename>. All configuration files are collectively
+ sorted and processed in lexical order, regardless of the directories in which they live.
+ However, files with identical filenames replace each other. Files in <filename>/etc</filename>
+ have the highest priority, files in <filename>/run</filename> take precedence over files with
+ the same name in <filename>/usr/lib</filename>. This can be used to override a system-supplied
+ configuration file with a local file if needed. As a special case, an empty file (file size 0)
+ or symlink with the same name pointing to <filename>/dev/null</filename> disables the
+ configuration file entirely (it is "masked").</para>
+
+ <para>Along with the network file <filename>foo.network</filename>, a "drop-in" directory
+ <filename>foo.network.d/</filename> may exist. All files with the suffix
+ <literal>.conf</literal> from this directory will be parsed after the file itself is
+ parsed. This is useful to alter or add configuration settings, without having to modify the main
+ configuration file. Each drop-in file must have appropriate section headers.</para>
+
+ <para>In addition to <filename>/etc/systemd/network</filename>, drop-in <literal>.d</literal>
+ directories can be placed in <filename>/usr/lib/systemd/network</filename> or
+ <filename>/run/systemd/network</filename> directories. Drop-in files in
+ <filename>/etc</filename> take precedence over those in <filename>/run</filename> which in turn
+ take precedence over those in <filename>/usr/lib</filename>. Drop-in files under any of these
+ directories take precedence over the main netdev file wherever located. (Of course, since
+ <filename>/run</filename> is temporary and <filename>/usr/lib</filename> is for vendors, it is
+ unlikely drop-ins should be used in either of those places.)</para>
+
+ <para>Note that an interface without any static IPv6 addresses configured, and neither DHCPv6
+ nor IPv6LL enabled, shall be considered to have no IPv6 support. IPv6 will be automatically
+ disabled for that interface by writing "1" to
+ <filename>/proc/sys/net/ipv6/conf/<replaceable>ifname</replaceable>/disable_ipv6</filename>.
</para>
</refsect1>
@@ -212,6 +221,17 @@
below 1280 (the minimum MTU for IPv6) it will automatically be increased to this value.</para>
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><varname>ARP=</varname></term>
+ <listitem>
+ <para> A boolean. Enables or disables the ARP (low-level Address Resolution Protocol)
+ for this interface. Defaults to unset, which means that the kernel default will be used.</para>
+ <para> For example, disabling ARP is useful when creating multiple MACVLAN or VLAN virtual
+ interfaces atop a single lower-level physical interface, which will then only serve as a
+ link/"bridge" device aggregating traffic to the same physical link and not participate in
+ the network otherwise.</para>
+ </listitem>
+ </varlistentry>
</variablelist>
</refsect1>
@@ -447,24 +467,31 @@
<varlistentry>
<term><varname>Domains=</varname></term>
<listitem>
- <para>The domains used for DNS host name resolution on this link. Takes a list of DNS domain names which
- are used as search suffixes for extending single-label host names (host names containing no dots) to become
- fully qualified domain names (FQDNs). If a single-label host name is resolved on this interface, each of
- the specified search domains are appended to it in turn, converting it into a fully qualified domain name,
- until one of them may be successfully resolved.</para>
-
- <para>The specified domains are also used for routing of DNS queries: look-ups for host names ending in the
- domains specified here are preferably routed to the DNS servers configured for this interface. If a domain
- name is prefixed with <literal>~</literal>, the domain name becomes a pure "routing" domain, is used for
- DNS query routing purposes only and is not used in the described domain search logic. By specifying a
- routing domain of <literal>~.</literal> (the tilde indicating definition of a routing domain, the dot
- referring to the DNS root domain which is the implied suffix of all valid DNS names) it is possible to
- route all DNS traffic preferably to the DNS server specified for this interface. The route domain logic is
- particularly useful on multi-homed hosts with DNS servers serving particular private DNS zones on each
- interface.</para>
+ <para>A list of domains which should be resolved using the DNS servers on this link. Each item in the list
+ should be a domain name, optionally prefixed with a tilde (<literal>~</literal>). The domains with the
+ prefix are called "routing-only domains". The domains without the prefix are called "search domains" and
+ are first used as search suffixes for extending single-label host names (host names containing no dots) to
+ become fully qualified domain names (FQDNs). If a single-label host name is resolved on this interface,
+ each of the specified search domains are appended to it in turn, converting it into a fully qualified
+ domain name, until one of them may be successfully resolved.</para>
+
+ <para>Both "search" and "routing-only" domains are used for routing of DNS queries: look-ups for host names
+ ending in those domains (hence also single label names, if any "search domains" are listed), are routed to
+ the DNS servers configured for this interface. The domain routing logic is particularly useful on
+ multi-homed hosts with DNS servers serving particular private DNS zones on each interface.</para>
+
+ <para>The "routing-only" domain <literal>~.</literal> (the tilde indicating definition of a routing domain,
+ the dot referring to the DNS root domain which is the implied suffix of all valid DNS names) has special
+ effect. It causes all DNS traffic which does not match another configured domain routing entry to be routed
+ to DNS servers specified for this interface. This setting is useful to prefer a certain set of DNS servers
+ if a link on which they are connected is available.</para>
<para>This setting is read by
- <citerefentry><refentrytitle>systemd-resolved.service</refentrytitle><manvolnum>8</manvolnum></citerefentry>.</para>
+ <citerefentry><refentrytitle>systemd-resolved.service</refentrytitle><manvolnum>8</manvolnum></citerefentry>.
+ "Search domains" correspond to the <varname>domain</varname> and <varname>search</varname> entries in
+ <citerefentry><refentrytitle>resolv.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.
+ Domain name routing has no equivalent in the traditional glibc API, which has no concept of domain
+ name servers limited to a specific link.</para>
</listitem>
</varlistentry>
<varlistentry>
@@ -668,6 +695,57 @@
which is then configured to use them explicitly.</para>
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><varname>HomeAddress=</varname></term>
+ <listitem>
+ <para>Takes a boolean argument. Designates this address the "home address" as defined in
+ <ulink url="https://tools.ietf.org/html/rfc6275">RFC 6275</ulink>.
+ Supported only on IPv6. Defaults to false.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><varname>DuplicateAddressDetection=</varname></term>
+ <listitem>
+ <para>Takes a boolean argument. Do not perform Duplicate Address Detection
+ <ulink url="https://tools.ietf.org/html/rfc4862">RFC 4862</ulink> when adding this address.
+ Supported only on IPv6. Defaults to false.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><varname>ManageTemporaryAddress=</varname></term>
+ <listitem>
+ <para>Takes a boolean argument. If true the kernel manage temporary addresses created
+ from this one as template on behalf of Privacy Extensions
+ <ulink url="https://tools.ietf.org/html/rfc3041">RFC 3041</ulink>. For this to become
+ active, the use_tempaddr sysctl setting has to be set to a value greater than zero.
+ The given address needs to have a prefix length of 64. This flag allows to use privacy
+ extensions in a manually configured network, just like if stateless auto-configuration
+ was active. Defaults to false. </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><varname>PrefixRoute=</varname></term>
+ <listitem>
+ <para>Takes a boolean argument. When adding or modifying an IPv6 address, the userspace
+ application needs a way to suppress adding a prefix route. This is for example relevant
+ together with IFA_F_MANAGERTEMPADDR, where userspace creates autoconf generated addresses,
+ but depending on on-link, no route for the prefix should be added. Defaults to false.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><varname>AutoJoin=</varname></term>
+ <listitem>
+ <para>Takes a boolean argument. Joining multicast group on ethernet level via
+ <command>ip maddr</command> command would not work if we have an Ethernet switch that does
+ IGMP snooping since the switch would not replicate multicast packets on ports that did not
+ have IGMP reports for the multicast addresses. Linux vxlan interfaces created via
+ <command>ip link add vxlan</command> or networkd's netdev kind vxlan have the group option
+ that enables then to do the required join. By extending ip address command with option
+ <literal>autojoin</literal> we can get similar functionality for openvswitch (OVS) vxlan
+ interfaces as well as other tunneling mechanisms that need to receive multicast traffic.
+ Defaults to <literal>no</literal>.</para>
+ </listitem>
+ </varlistentry>
</variablelist>
</refsect1>
@@ -897,6 +975,15 @@
DHCP server.</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><varname>RouteTable=<replaceable>num</replaceable></varname></term>
+ <listitem>
+ <para>The table identifier for DHCP routes (a number between 1 and 4294967295, or 0 to unset).
+ The table can be retrieved using <command>ip route show table <replaceable>num</replaceable></command>.
+ </para>
+ </listitem>
+ </varlistentry>
</variablelist>
</refsect1>
@@ -937,6 +1024,16 @@
project='man-pages'><refentrytitle>resolv.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><varname>RouteTable=<replaceable>num</replaceable></varname></term>
+ <listitem>
+ <para>The table identifier for the routes received in the Router Advertisement
+ (a number between 1 and 4294967295, or 0 to unset).
+ The table can be retrieved using <command>ip route show table <replaceable>num</replaceable></command>.
+ </para>
+ </listitem>
+ </varlistentry>
</variablelist>
</refsect1>
diff --git a/src/grp-system/systemd/systemd.offline-updates.xml b/src/grp-system/systemd/systemd.offline-updates.xml
index ae53b8552d..07a5225512 100644
--- a/src/grp-system/systemd/systemd.offline-updates.xml
+++ b/src/grp-system/systemd/systemd.offline-updates.xml
@@ -77,7 +77,7 @@
<listitem>
<para>Very early in the new boot
- <citerefentry><refentrytitle>systemd-update-generator</refentrytitle><manvolnum>8</manvolnum></citerefentry>
+ <citerefentry><refentrytitle>systemd-system-update-generator</refentrytitle><manvolnum>8</manvolnum></citerefentry>
checks whether <filename>/system-update</filename> exists. If so, it (temporarily and for
this boot only) redirects (i.e. symlinks) <filename>default.target</filename> to
<filename>system-update.target</filename>, a special target that is pulls in the base system
@@ -143,7 +143,7 @@
<varname>FailureAction=</varname> makes sure that the specified unit is activated if your
script exits uncleanly (by non-zero error code, or signal/coredump). If your script succeeds
you should trigger the reboot in your own code, for example by invoking logind's
- <command>Reboot()</command> call or calling <command>systemct reboot</command>. See
+ <command>Reboot()</command> call or calling <command>systemctl reboot</command>. See
<ulink url="http://www.freedesktop.org/wiki/Software/systemd/logind">logind dbus API</ulink>
for details.</para>
</listitem>
@@ -162,8 +162,8 @@
<ulink url="http://www.freedesktop.org/wiki/Software/systemd/SystemUpdates/">Implementing Offline System Updates</ulink>,
<citerefentry><refentrytitle>systemd</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
<citerefentry><refentrytitle>systemd.generator</refentrytitle><manvolnum>7</manvolnum></citerefentry>,
- <citerefentry><refentrytitle>systemd-update-generator</refentrytitle><manvolnum>8</manvolnum></citerefentry>,
- <citerefentry><refentrytitle>dnf.plugin.system-upgrade</refentrytitle><manvolnum>8</manvolnum></citerefentry>
+ <citerefentry><refentrytitle>systemd-system-update-generator</refentrytitle><manvolnum>8</manvolnum></citerefentry>,
+ <citerefentry project='mankier'><refentrytitle>dnf.plugin.system-upgrade</refentrytitle><manvolnum>8</manvolnum></citerefentry>
</para>
</refsect1>
</refentry>
diff --git a/src/grp-system/systemd/systemd.resource-control.xml b/src/grp-system/systemd/systemd.resource-control.xml
index bf44a68345..02878b28a0 100644
--- a/src/grp-system/systemd/systemd.resource-control.xml
+++ b/src/grp-system/systemd/systemd.resource-control.xml
@@ -60,12 +60,10 @@
<refsect1>
<title>Description</title>
- <para>Unit configuration files for services, slices, scopes,
- sockets, mount points, and swap devices share a subset of
- configuration options for resource control of spawned
- processes. Internally, this relies on the Control Groups
- kernel concept for organizing processes in a hierarchical tree of
- named groups for the purpose of resource management.</para>
+ <para>Unit configuration files for services, slices, scopes, sockets, mount points, and swap devices share a subset
+ of configuration options for resource control of spawned processes. Internally, this relies on the Linux Control
+ Groups (cgroups) kernel concept for organizing processes in a hierarchical tree of named groups for the purpose of
+ resource management.</para>
<para>This man page lists the configuration options shared by
those six unit types. See
@@ -83,6 +81,11 @@
[Slice], [Scope], [Service], [Socket], [Mount], or [Swap]
sections, depending on the unit type.</para>
+ <para>In addition, options which control resources available to programs
+ <emphasis>executed</emphasis> by systemd are listed in
+ <citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry>.
+ Those options complement options listed here.</para>
+
<para>See the <ulink
url="http://www.freedesktop.org/wiki/Software/systemd/ControlGroupInterface/">New
Control Group Interfaces</ulink> for an introduction on how to make
@@ -99,19 +102,28 @@
<refsect1>
<title>Unified and Legacy Control Group Hierarchies</title>
- <para>The unified control group hierarchy is the new version of kernel control group interface. Depending on the
- resource type, there are differences in resource control capabilities. Also, because of interface changes, some
- resource types have a separate set of options on the unified hierarchy.</para>
+ <para>The unified control group hierarchy is the new version of kernel control group interface, see <ulink
+ url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>. Depending on the resource type,
+ there are differences in resource control capabilities. Also, because of interface changes, some resource types
+ have separate set of options on the unified hierarchy.</para>
<para>
<variablelist>
+
<varlistentry>
- <term><option>IO</option></term>
+ <term><option>CPU</option></term>
<listitem>
- <para><varname>IO</varname> prefixed settings are superset of and replace <varname>BlockIO</varname>
- prefixed ones. On unified hierarchy, IO resource control also applies to buffered writes.</para>
+ <para>Due to the lack of consensus in the kernel community, the CPU controller support on the unified
+ control group hierarchy requires out-of-tree kernel patches. See <ulink
+ url="https://git.kernel.org/cgit/linux/kernel/git/tj/cgroup.git/tree/Documentation/cgroup-v2-cpu.txt?h=cgroup-v2-cpu">cgroup-v2-cpu.txt</ulink>.</para>
+
+ <para><varname>CPUWeight=</varname> and <varname>StartupCPUWeight=</varname> replace
+ <varname>CPUShares=</varname> and <varname>StartupCPUShares=</varname>, respectively.</para>
+
+ <para>The <literal>cpuacct</literal> controller does not exist separately on the unified hierarchy.</para>
</listitem>
</varlistentry>
+
<varlistentry>
<term><option>Memory</option></term>
<listitem>
@@ -119,13 +131,29 @@
and <varname>MemoryHigh=</varname> are effective only on unified hierarchy.</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><option>IO</option></term>
+ <listitem>
+ <para><varname>IO</varname> prefixed settings are superset of and replace <varname>BlockIO</varname>
+ prefixed ones. On unified hierarchy, IO resource control also applies to buffered writes.</para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</para>
- <para>To ease the transition, there is best-effort translation between the two versions of settings. If all
- settings of a unit for a given resource type are for the other hierarchy type, the settings are translated and
- applied. If there are any valid settings for the hierarchy in use, all translations are disabled for the resource
- type. Mixing the two types of settings on a unit can lead to confusing results.</para>
+ <para>To ease the transition, there is best-effort translation between the two versions of settings. For each
+ controller, if any of the settings for the unified hierarchy are present, all settings for the legacy hierarchy are
+ ignored. If the resulting settings are for the other type of hierarchy, the configurations are translated before
+ application.</para>
+
+ <para>Legacy control group hierarchy (see <ulink
+ url="https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt">cgroups.txt</ulink>), also called cgroup-v1,
+ doesn't allow safe delegation of controllers to unprivileged processes. If the system uses the legacy control group
+ hierarchy, resource control is disabled for systemd user instance, see
+ <citerefentry><refentrytitle>systemd</refentrytitle><manvolnum>1</manvolnum></citerefentry>.
+ </para>
</refsect1>
<refsect1>
@@ -152,30 +180,26 @@
</varlistentry>
<varlistentry>
- <term><varname>CPUShares=<replaceable>weight</replaceable></varname></term>
- <term><varname>StartupCPUShares=<replaceable>weight</replaceable></varname></term>
+ <term><varname>CPUWeight=<replaceable>weight</replaceable></varname></term>
+ <term><varname>StartupCPUWeight=<replaceable>weight</replaceable></varname></term>
<listitem>
- <para>Assign the specified CPU time share weight to the
- processes executed. These options take an integer value and
- control the <literal>cpu.shares</literal> control group
- attribute. The allowed range is 2 to 262144. Defaults to
- 1024. For details about this control group attribute, see
- <ulink
+ <para>Assign the specified CPU time weight to the processes executed, if the unified control group hierarchy
+ is used on the system. These options take an integer value and control the <literal>cpu.weight</literal>
+ control group attribute. The allowed range is 1 to 10000. Defaults to 100. For details about this control
+ group attribute, see <ulink
+ url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink> and <ulink
url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.
- The available CPU time is split up among all units within
- one slice relative to their CPU time share weight.</para>
+ The available CPU time is split up among all units within one slice relative to their CPU time weight.</para>
- <para>While <varname>StartupCPUShares=</varname> only
- applies to the startup phase of the system,
- <varname>CPUShares=</varname> applies to normal runtime of
- the system, and if the former is not set also to the startup
- phase. Using <varname>StartupCPUShares=</varname> allows
- prioritizing specific services at boot-up differently than
- during normal runtime.</para>
+ <para>While <varname>StartupCPUWeight=</varname> only applies to the startup phase of the system,
+ <varname>CPUWeight=</varname> applies to normal runtime of the system, and if the former is not set also to
+ the startup phase. Using <varname>StartupCPUWeight=</varname> allows prioritizing specific services at
+ boot-up differently than during normal runtime.</para>
+
+ <para>Implies <literal>CPUAccounting=true</literal>.</para>
- <para>These options imply
- <literal>CPUAccounting=true</literal>.</para>
+ <para>These settings replace <varname>CPUShares=</varname> and <varname>StartupCPUShares=</varname>.</para>
</listitem>
</varlistentry>
@@ -183,20 +207,16 @@
<term><varname>CPUQuota=</varname></term>
<listitem>
- <para>Assign the specified CPU time quota to the processes
- executed. Takes a percentage value, suffixed with "%". The
- percentage specifies how much CPU time the unit shall get at
- maximum, relative to the total CPU time available on one
- CPU. Use values &gt; 100% for allotting CPU time on more than
- one CPU. This controls the
- <literal>cpu.cfs_quota_us</literal> control group
- attribute. For details about this control group attribute,
- see <ulink
+ <para>Assign the specified CPU time quota to the processes executed. Takes a percentage value, suffixed with
+ "%". The percentage specifies how much CPU time the unit shall get at maximum, relative to the total CPU time
+ available on one CPU. Use values &gt; 100% for allotting CPU time on more than one CPU. This controls the
+ <literal>cpu.max</literal> attribute on the unified control group hierarchy and
+ <literal>cpu.cfs_quota_us</literal> on legacy. For details about these control group attributes, see <ulink
+ url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink> and <ulink
url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.</para>
- <para>Example: <varname>CPUQuota=20%</varname> ensures that
- the executed processes will never get more than 20% CPU time
- on one CPU.</para>
+ <para>Example: <varname>CPUQuota=20%</varname> ensures that the executed processes will never get more than
+ 20% CPU time on one CPU.</para>
<para>Implies <literal>CPUAccounting=true</literal>.</para>
</listitem>
@@ -234,7 +254,8 @@
<para>Implies <literal>MemoryAccounting=true</literal>.</para>
- <para>This setting is supported only if the unified control group hierarchy is used.</para>
+ <para>This setting is supported only if the unified control group hierarchy is used and disables
+ <varname>MemoryLimit=</varname>.</para>
</listitem>
</varlistentry>
@@ -256,7 +277,8 @@
<para>Implies <literal>MemoryAccounting=true</literal>.</para>
- <para>This setting is supported only if the unified control group hierarchy is used.</para>
+ <para>This setting is supported only if the unified control group hierarchy is used and disables
+ <varname>MemoryLimit=</varname>.</para>
</listitem>
</varlistentry>
@@ -278,29 +300,26 @@
<para>Implies <literal>MemoryAccounting=true</literal>.</para>
- <para>This setting is supported only if the unified control group hierarchy is used. Use
- <varname>MemoryLimit=</varname> on systems using the legacy control group hierarchy.</para>
+ <para>This setting replaces <varname>MemoryLimit=</varname>.</para>
</listitem>
</varlistentry>
<varlistentry>
- <term><varname>MemoryLimit=<replaceable>bytes</replaceable></varname></term>
+ <term><varname>MemorySwapMax=<replaceable>bytes</replaceable></varname></term>
<listitem>
- <para>Specify the limit on maximum memory usage of the executed processes. The limit specifies how much
- process and kernel memory can be used by tasks in this unit. Takes a memory size in bytes. If the value is
- suffixed with K, M, G or T, the specified memory size is parsed as Kilobytes, Megabytes, Gigabytes, or
- Terabytes (with the base 1024), respectively. Alternatively, a percentage value may be specified, which is
- taken relative to the installed physical memory on the system. If assigned the special value
- <literal>infinity</literal>, no memory limit is applied. This controls the
- <literal>memory.limit_in_bytes</literal> control group attribute. For details about this control group
- attribute, see <ulink
- url="https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt">memory.txt</ulink>.</para>
+ <para>Specify the absolute limit on swap usage of the executed processes in this unit.</para>
+
+ <para>Takes a swap size in bytes. If the value is suffixed with K, M, G or T, the specified swap size is
+ parsed as Kilobytes, Megabytes, Gigabytes, or Terabytes (with the base 1024), respectively. If assigned the
+ special value <literal>infinity</literal>, no swap limit is applied. This controls the
+ <literal>memory.swap.max</literal> control group attribute. For details about this control group attribute,
+ see <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>.</para>
<para>Implies <literal>MemoryAccounting=true</literal>.</para>
- <para>This setting is supported only if the legacy control group hierarchy is used. Use
- <varname>MemoryMax=</varname> on systems using the unified control group hierarchy.</para>
+ <para>This setting is supported only if the unified control group hierarchy is used and disables
+ <varname>MemoryLimit=</varname>.</para>
</listitem>
</varlistentry>
@@ -352,8 +371,8 @@
in
<citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
- <para>This setting is supported only if the unified control group hierarchy is used. Use
- <varname>BlockIOAccounting=</varname> on systems using the legacy control group hierarchy.</para>
+ <para>This setting replaces <varname>BlockIOAccounting=</varname> and disables settings prefixed with
+ <varname>BlockIO</varname> or <varname>StartupBlockIO</varname>.</para>
</listitem>
</varlistentry>
@@ -378,9 +397,8 @@
<para>Implies <literal>IOAccounting=true</literal>.</para>
- <para>This setting is supported only if the unified control group hierarchy is used. Use
- <varname>BlockIOWeight=</varname> and <varname>StartupBlockIOWeight=</varname> on systems using the legacy
- control group hierarchy.</para>
+ <para>These settings replace <varname>BlockIOWeight=</varname> and <varname>StartupBlockIOWeight=</varname>
+ and disable settings prefixed with <varname>BlockIO</varname> or <varname>StartupBlockIO</varname>.</para>
</listitem>
</varlistentry>
@@ -399,8 +417,8 @@
<para>Implies <literal>IOAccounting=true</literal>.</para>
- <para>This setting is supported only if the unified control group hierarchy is used. Use
- <varname>BlockIODeviceWeight=</varname> on systems using the legacy control group hierarchy.</para>
+ <para>This setting replaces <varname>BlockIODeviceWeight=</varname> and disables settings prefixed with
+ <varname>BlockIO</varname> or <varname>StartupBlockIO</varname>.</para>
</listitem>
</varlistentry>
@@ -424,8 +442,9 @@
<para>Implies <literal>IOAccounting=true</literal>.</para>
- <para>This setting is supported only if the unified control group hierarchy is used. Use
- <varname>BlockIOAccounting=</varname> on systems using the legacy control group hierarchy.</para>
+ <para>These settings replace <varname>BlockIOReadBandwidth=</varname> and
+ <varname>BlockIOWriteBandwidth=</varname> and disable settings prefixed with <varname>BlockIO</varname> or
+ <varname>StartupBlockIO</varname>.</para>
</listitem>
</varlistentry>
@@ -449,100 +468,8 @@
<para>Implies <literal>IOAccounting=true</literal>.</para>
- <para>This setting is supported only if the unified control group hierarchy is used.</para>
- </listitem>
- </varlistentry>
-
- <varlistentry>
- <term><varname>BlockIOAccounting=</varname></term>
-
- <listitem>
- <para>Turn on Block I/O accounting for this unit, if the legacy control group hierarchy is used on the
- system. Takes a boolean argument. Note that turning on block I/O accounting for one unit will also implicitly
- turn it on for all units contained in the same slice and all for its parent slices and the units contained
- therein. The system default for this setting may be controlled with
- <varname>DefaultBlockIOAccounting=</varname> in
- <citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
-
- <para>This setting is supported only if the legacy control group hierarchy is used. Use
- <varname>IOAccounting=</varname> on systems using the unified control group hierarchy.</para>
- </listitem>
- </varlistentry>
-
- <varlistentry>
- <term><varname>BlockIOWeight=<replaceable>weight</replaceable></varname></term>
- <term><varname>StartupBlockIOWeight=<replaceable>weight</replaceable></varname></term>
-
- <listitem><para>Set the default overall block I/O weight for the executed processes, if the legacy control
- group hierarchy is used on the system. Takes a single weight value (between 10 and 1000) to set the default
- block I/O weight. This controls the <literal>blkio.weight</literal> control group attribute, which defaults to
- 500. For details about this control group attribute, see <ulink
- url="https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt">blkio-controller.txt</ulink>.
- The available I/O bandwidth is split up among all units within one slice relative to their block I/O
- weight.</para>
-
- <para>While <varname>StartupBlockIOWeight=</varname> only
- applies to the startup phase of the system,
- <varname>BlockIOWeight=</varname> applies to the later runtime
- of the system, and if the former is not set also to the
- startup phase. This allows prioritizing specific services at
- boot-up differently than during runtime.</para>
-
- <para>Implies
- <literal>BlockIOAccounting=true</literal>.</para>
-
- <para>This setting is supported only if the legacy control group hierarchy is used. Use
- <varname>IOWeight=</varname> and <varname>StartupIOWeight=</varname> on systems using the unified control group
- hierarchy.</para>
-
- </listitem>
- </varlistentry>
-
- <varlistentry>
- <term><varname>BlockIODeviceWeight=<replaceable>device</replaceable> <replaceable>weight</replaceable></varname></term>
-
- <listitem>
- <para>Set the per-device overall block I/O weight for the executed processes, if the legacy control group
- hierarchy is used on the system. Takes a space-separated pair of a file path and a weight value to specify
- the device specific weight value, between 10 and 1000. (Example: "/dev/sda 500"). The file path may be
- specified as path to a block device node or as any other file, in which case the backing block device of the
- file system of the file is determined. This controls the <literal>blkio.weight_device</literal> control group
- attribute, which defaults to 1000. Use this option multiple times to set weights for multiple devices. For
- details about this control group attribute, see <ulink
- url="https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt">blkio-controller.txt</ulink>.</para>
-
- <para>Implies
- <literal>BlockIOAccounting=true</literal>.</para>
-
- <para>This setting is supported only if the legacy control group hierarchy is used. Use
- <varname>IODeviceWeight=</varname> on systems using the unified control group hierarchy.</para>
- </listitem>
- </varlistentry>
-
- <varlistentry>
- <term><varname>BlockIOReadBandwidth=<replaceable>device</replaceable> <replaceable>bytes</replaceable></varname></term>
- <term><varname>BlockIOWriteBandwidth=<replaceable>device</replaceable> <replaceable>bytes</replaceable></varname></term>
-
- <listitem>
- <para>Set the per-device overall block I/O bandwidth limit for the executed processes, if the legacy control
- group hierarchy is used on the system. Takes a space-separated pair of a file path and a bandwidth value (in
- bytes per second) to specify the device specific bandwidth. The file path may be a path to a block device
- node, or as any other file in which case the backing block device of the file system of the file is used. If
- the bandwidth is suffixed with K, M, G, or T, the specified bandwidth is parsed as Kilobytes, Megabytes,
- Gigabytes, or Terabytes, respectively, to the base of 1000. (Example:
- "/dev/disk/by-path/pci-0000:00:1f.2-scsi-0:0:0:0 5M"). This controls the
- <literal>blkio.throttle.read_bps_device</literal> and <literal>blkio.throttle.write_bps_device</literal>
- control group attributes. Use this option multiple times to set bandwidth limits for multiple devices. For
- details about these control group attributes, see <ulink
- url="https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt">blkio-controller.txt</ulink>.
- </para>
-
- <para>Implies
- <literal>BlockIOAccounting=true</literal>.</para>
-
- <para>This setting is supported only if the legacy control group hierarchy is used. Use
- <varname>IOReadBandwidthMax=</varname> and <varname>IOWriteBandwidthMax=</varname> on systems using the
- unified control group hierarchy.</para>
+ <para>These settings are supported only if the unified control group hierarchy is used and disable settings
+ prefixed with <varname>BlockIO</varname> or <varname>StartupBlockIO</varname>.</para>
</listitem>
</varlistentry>
@@ -674,6 +601,149 @@
</refsect1>
<refsect1>
+ <title>Deprecated Options</title>
+
+ <para>The following options are deprecated. Use the indicated superseding options instead:</para>
+
+ <variablelist class='unit-directives'>
+
+ <varlistentry>
+ <term><varname>CPUShares=<replaceable>weight</replaceable></varname></term>
+ <term><varname>StartupCPUShares=<replaceable>weight</replaceable></varname></term>
+
+ <listitem>
+ <para>Assign the specified CPU time share weight to the processes executed. These options take an integer
+ value and control the <literal>cpu.shares</literal> control group attribute. The allowed range is 2 to
+ 262144. Defaults to 1024. For details about this control group attribute, see <ulink
+ url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.
+ The available CPU time is split up among all units within one slice relative to their CPU time share
+ weight.</para>
+
+ <para>While <varname>StartupCPUShares=</varname> only applies to the startup phase of the system,
+ <varname>CPUShares=</varname> applies to normal runtime of the system, and if the former is not set also to
+ the startup phase. Using <varname>StartupCPUShares=</varname> allows prioritizing specific services at
+ boot-up differently than during normal runtime.</para>
+
+ <para>Implies <literal>CPUAccounting=true</literal>.</para>
+
+ <para>These settings are deprecated. Use <varname>CPUWeight=</varname> and
+ <varname>StartupCPUWeight=</varname> instead.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>MemoryLimit=<replaceable>bytes</replaceable></varname></term>
+
+ <listitem>
+ <para>Specify the limit on maximum memory usage of the executed processes. The limit specifies how much
+ process and kernel memory can be used by tasks in this unit. Takes a memory size in bytes. If the value is
+ suffixed with K, M, G or T, the specified memory size is parsed as Kilobytes, Megabytes, Gigabytes, or
+ Terabytes (with the base 1024), respectively. Alternatively, a percentage value may be specified, which is
+ taken relative to the installed physical memory on the system. If assigned the special value
+ <literal>infinity</literal>, no memory limit is applied. This controls the
+ <literal>memory.limit_in_bytes</literal> control group attribute. For details about this control group
+ attribute, see <ulink
+ url="https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt">memory.txt</ulink>.</para>
+
+ <para>Implies <literal>MemoryAccounting=true</literal>.</para>
+
+ <para>This setting is deprecated. Use <varname>MemoryMax=</varname> instead.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>BlockIOAccounting=</varname></term>
+
+ <listitem>
+ <para>Turn on Block I/O accounting for this unit, if the legacy control group hierarchy is used on the
+ system. Takes a boolean argument. Note that turning on block I/O accounting for one unit will also implicitly
+ turn it on for all units contained in the same slice and all for its parent slices and the units contained
+ therein. The system default for this setting may be controlled with
+ <varname>DefaultBlockIOAccounting=</varname> in
+ <citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
+
+ <para>This setting is deprecated. Use <varname>IOAccounting=</varname> instead.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>BlockIOWeight=<replaceable>weight</replaceable></varname></term>
+ <term><varname>StartupBlockIOWeight=<replaceable>weight</replaceable></varname></term>
+
+ <listitem><para>Set the default overall block I/O weight for the executed processes, if the legacy control
+ group hierarchy is used on the system. Takes a single weight value (between 10 and 1000) to set the default
+ block I/O weight. This controls the <literal>blkio.weight</literal> control group attribute, which defaults to
+ 500. For details about this control group attribute, see <ulink
+ url="https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt">blkio-controller.txt</ulink>.
+ The available I/O bandwidth is split up among all units within one slice relative to their block I/O
+ weight.</para>
+
+ <para>While <varname>StartupBlockIOWeight=</varname> only
+ applies to the startup phase of the system,
+ <varname>BlockIOWeight=</varname> applies to the later runtime
+ of the system, and if the former is not set also to the
+ startup phase. This allows prioritizing specific services at
+ boot-up differently than during runtime.</para>
+
+ <para>Implies
+ <literal>BlockIOAccounting=true</literal>.</para>
+
+ <para>These settings are deprecated. Use <varname>IOWeight=</varname> and <varname>StartupIOWeight=</varname>
+ instead.</para>
+
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>BlockIODeviceWeight=<replaceable>device</replaceable> <replaceable>weight</replaceable></varname></term>
+
+ <listitem>
+ <para>Set the per-device overall block I/O weight for the executed processes, if the legacy control group
+ hierarchy is used on the system. Takes a space-separated pair of a file path and a weight value to specify
+ the device specific weight value, between 10 and 1000. (Example: "/dev/sda 500"). The file path may be
+ specified as path to a block device node or as any other file, in which case the backing block device of the
+ file system of the file is determined. This controls the <literal>blkio.weight_device</literal> control group
+ attribute, which defaults to 1000. Use this option multiple times to set weights for multiple devices. For
+ details about this control group attribute, see <ulink
+ url="https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt">blkio-controller.txt</ulink>.</para>
+
+ <para>Implies
+ <literal>BlockIOAccounting=true</literal>.</para>
+
+ <para>This setting is deprecated. Use <varname>IODeviceWeight=</varname> instead.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>BlockIOReadBandwidth=<replaceable>device</replaceable> <replaceable>bytes</replaceable></varname></term>
+ <term><varname>BlockIOWriteBandwidth=<replaceable>device</replaceable> <replaceable>bytes</replaceable></varname></term>
+
+ <listitem>
+ <para>Set the per-device overall block I/O bandwidth limit for the executed processes, if the legacy control
+ group hierarchy is used on the system. Takes a space-separated pair of a file path and a bandwidth value (in
+ bytes per second) to specify the device specific bandwidth. The file path may be a path to a block device
+ node, or as any other file in which case the backing block device of the file system of the file is used. If
+ the bandwidth is suffixed with K, M, G, or T, the specified bandwidth is parsed as Kilobytes, Megabytes,
+ Gigabytes, or Terabytes, respectively, to the base of 1000. (Example:
+ "/dev/disk/by-path/pci-0000:00:1f.2-scsi-0:0:0:0 5M"). This controls the
+ <literal>blkio.throttle.read_bps_device</literal> and <literal>blkio.throttle.write_bps_device</literal>
+ control group attributes. Use this option multiple times to set bandwidth limits for multiple devices. For
+ details about these control group attributes, see <ulink
+ url="https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt">blkio-controller.txt</ulink>.
+ </para>
+
+ <para>Implies
+ <literal>BlockIOAccounting=true</literal>.</para>
+
+ <para>These settings are deprecated. Use <varname>IOReadBandwidthMax=</varname> and
+ <varname>IOWriteBandwidthMax=</varname> instead.</para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
<title>See Also</title>
<para>
<citerefentry><refentrytitle>systemd</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
@@ -684,6 +754,7 @@
<citerefentry><refentrytitle>systemd.socket</refentrytitle><manvolnum>5</manvolnum></citerefentry>,
<citerefentry><refentrytitle>systemd.mount</refentrytitle><manvolnum>5</manvolnum></citerefentry>,
<citerefentry><refentrytitle>systemd.swap</refentrytitle><manvolnum>5</manvolnum></citerefentry>,
+ <citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry>,
<citerefentry><refentrytitle>systemd.directives</refentrytitle><manvolnum>7</manvolnum></citerefentry>,
<citerefentry><refentrytitle>systemd.special</refentrytitle><manvolnum>7</manvolnum></citerefentry>,
The documentation for control groups and specific controllers in the Linux kernel:
diff --git a/src/grp-system/systemd/systemd.service.xml b/src/grp-system/systemd/systemd.service.xml
index 875d368fcf..5c65957bda 100644
--- a/src/grp-system/systemd/systemd.service.xml
+++ b/src/grp-system/systemd/systemd.service.xml
@@ -96,9 +96,12 @@
<varname>After=</varname> on
<filename>dbus.socket</filename>.</para>
- <para>Socket activated service are automatically ordered after
- their activated <filename>.socket</filename> units via an
- automatic <varname>After=</varname> dependency.</para>
+ <para>Socket activated services are automatically ordered after
+ their activating <filename>.socket</filename> units via an
+ automatic <varname>After=</varname> dependency.
+ Services also pull in all <filename>.socket</filename> units
+ listed in <varname>Sockets=</varname> via automatic
+ <varname>Wants=</varname> and <varname>After=</varname> dependencies.</para>
<para>Unless <varname>DefaultDependencies=</varname> in the <literal>[Unit]</literal> is set to
<option>false</option>, service units will implicitly have dependencies of type <varname>Requires=</varname> and
@@ -209,11 +212,11 @@
if used in combination with
<varname>PrivateNetwork=</varname><option>yes</option>.</para>
- <para>Behavior of <option>idle</option> is very similar to
- <option>simple</option>; however, actual execution of the
- service binary is delayed until all jobs are dispatched. This
- may be used to avoid interleaving of output of shell services
- with the status output on the console.</para>
+ <para>Behavior of <option>idle</option> is very similar to <option>simple</option>; however, actual execution
+ of the service binary is delayed until all active jobs are dispatched. This may be used to avoid interleaving
+ of output of shell services with the status output on the console. Note that this type is useful only to
+ improve console output, it is not useful as a general unit ordering tool, and the effect of this service type
+ is subject to a 5s time-out, after which the service binary is invoked anyway.</para>
</listitem>
</varlistentry>
@@ -276,17 +279,12 @@
below (see section "Command Lines" below).
</para>
- <para>When <varname>Type=</varname> is not
- <option>oneshot</option>, only one command may and must be
- given. When <varname>Type=oneshot</varname> is used, zero or
- more commands may be specified. This can be specified by
- providing multiple command lines in the same directive, or
- alternatively, this directive may be specified more than once
- with the same effect. If the empty string is assigned to this
- option, the list of commands to start is reset, prior
- assignments of this option will have no effect. If no
- <varname>ExecStart=</varname> is specified, then the service
- must have <varname>RemainAfterExit=yes</varname> set.</para>
+ <para>Unless <varname>Type=</varname> is <option>oneshot</option>, exactly one command must be given. When
+ <varname>Type=oneshot</varname> is used, zero or more commands may be specified. Commands may be specified by
+ providing multiple command lines in the same directive, or alternatively, this directive may be specified more
+ than once with the same effect. If the empty string is assigned to this option, the list of commands to start
+ is reset, prior assignments of this option will have no effect. If no <varname>ExecStart=</varname> is
+ specified, then the service must have <varname>RemainAfterExit=yes</varname> set.</para>
<para>For each of the specified commands, the first argument must be an absolute path to an
executable. Optionally, if this file name is prefixed with <literal>@</literal>, the second token will be
@@ -294,7 +292,7 @@
the absolute filename is prefixed with <literal>-</literal>, an exit code of the command normally considered a
failure (i.e. non-zero exit status or abnormal exit due to signal) is ignored and considered success. If the
absolute path is prefixed with <literal>+</literal> then it is executed with full
- privileges. <literal>-</literal>, <literal>@</literal>, and <literal>+</literal> may be used together and they
+ privileges. <literal>@</literal>, <literal>-</literal>, and <literal>+</literal> may be used together and they
can appear in any order.</para>
<para>If more than one command is specified, the commands are
@@ -429,7 +427,13 @@
service failed to start up correctly. Commands configured with this setting need to be able to operate even if
the service failed starting up half-way and left incompletely initialized data around. As the service's
processes have been terminated already when the commands specified with this setting are executed they should
- not attempt to communicate with them.</para></listitem>
+ not attempt to communicate with them.</para>
+
+ <para>Note that all commands that are configured with this setting are invoked with the result code of the
+ service, as well as the main process' exit code and status, set in the <varname>$SERVICE_RESULT</varname>,
+ <varname>$EXIT_CODE</varname> and <varname>$EXIT_STATUS</varname> environment variables, see
+ <citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry> for
+ details.</para></listitem>
</varlistentry>
<varlistentry>
@@ -848,13 +852,13 @@
serialized to <filename>/run</filename> and the file
descriptors passed to the service manager, to allow restarts
without losing state. Defaults to 0, i.e. no file descriptors
- may be stored in the service manager by default. All file
+ may be stored in the service manager. All file
descriptors passed to the service manager from a specific
service are passed back to the service's main process on the
next service restart. Any file descriptors passed to the
service manager are automatically closed when POLLHUP or
POLLERR is seen on them, or when the service is fully stopped
- and no job queued or being executed for it.</para></listitem>
+ and no job is queued or being executed for it.</para></listitem>
</varlistentry>
<varlistentry>
diff --git a/src/grp-system/systemd/systemd.socket.xml b/src/grp-system/systemd/systemd.socket.xml
index 5bf54d8ef3..0ce1203cfb 100644
--- a/src/grp-system/systemd/systemd.socket.xml
+++ b/src/grp-system/systemd/systemd.socket.xml
@@ -294,10 +294,10 @@
<term><varname>ListenUSBFunction=</varname></term>
<listitem><para>Specifies a <ulink
url="https://www.kernel.org/doc/Documentation/usb/functionfs.txt">USB
- FunctionFS</ulink> endpoint location to listen on, for
+ FunctionFS</ulink> endpoints location to listen on, for
implementation of USB gadget functions. This expects an
- absolute file system path as the argument. Behavior otherwise
- is very similar to the <varname>ListenFIFO=</varname>
+ absolute file system path of functionfs mount point as the argument.
+ Behavior otherwise is very similar to the <varname>ListenFIFO=</varname>
directive above. Use this to open the FunctionFS endpoint
<filename>ep0</filename>. When using this option, the
activated service has to have the
@@ -443,6 +443,14 @@
</varlistentry>
<varlistentry>
+ <term><varname>MaxConnectionsPerSource=</varname></term>
+ <listitem><para>The maximum number of connections for a service per source IP address.
+ This is very similar to the <varname>MaxConnections=</varname> directive
+ above. Disabled by default.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>KeepAlive=</varname></term>
<listitem><para>Takes a boolean argument. If true, the TCP/IP
stack will send a keep alive message after 2h (depending on
@@ -527,7 +535,7 @@
and the kernel will ignore initial ACK packets without any
data. The argument specifies the approximate amount of time
the kernel should wait for incoming data before falling back
- to the normal behavior of honouring empty ACK packets. This
+ to the normal behavior of honoring empty ACK packets. This
option is beneficial for protocols where the client sends the
data first (e.g. HTTP, in contrast to SMTP), because the
server process will not be woken up unnecessarily before it
diff --git a/src/grp-system/systemd/systemd.special.xml b/src/grp-system/systemd/systemd.special.xml
index 18ad8f92e5..d977298cd8 100644
--- a/src/grp-system/systemd/systemd.special.xml
+++ b/src/grp-system/systemd/systemd.special.xml
@@ -879,6 +879,70 @@
</refsect1>
<refsect1>
+ <title>Special Passive User Units</title>
+
+ <refsect2>
+ <title>graphical-session.target</title>
+
+ <para>This target is active whenever any graphical session is running. It
+ is used to stop user services which only apply to a graphical (X,
+ Wayland, etc.) session when the session is terminated. Such services
+ should have <literal>PartOf=graphical-session.target</literal> in their
+ <literal>[Unit]</literal> section. A target for a particular session
+ (e. g. <filename>gnome-session.target</filename>) starts and stops
+ <literal>graphical-session.target</literal> with
+ <literal>BindsTo=graphical-session.target</literal>.</para>
+
+ <para>Which services are started by a session target is determined by the
+ <literal>Wants=</literal> and <literal>Requires=</literal> dependencies.
+ For services that can be enabled independently, symlinks in
+ <literal>.wants/</literal> and <literal>.requires/</literal> should be
+ used, see
+ <citerefentry><refentrytitle>systemd.unit</refentrytitle><manvolnum>5</manvolnum></citerefentry>.
+ Those symlinks should either be shipped in packages, or should be added
+ dynamically after installation, for example using <literal>systemctl add-wants</literal>, see
+ <citerefentry><refentrytitle>systemctl</refentrytitle><manvolnum>1</manvolnum></citerefentry>.
+ </para>
+
+ <example>
+ <title>Nautilus as part of a GNOME session</title>
+
+ <para><literal>gnome-session.target</literal> pulls in Nautilus as
+ top-level service:</para>
+
+ <programlisting>[Unit]
+Description=User systemd services for GNOME graphical session
+Wants=nautilus.service
+BindsTo=graphical-session.target
+ </programlisting>
+
+ <para><literal>nautilus.service</literal> gets stopped when the session stops:</para>
+
+ <programlisting>[Unit]
+Description=Render the desktop icons with Nautilus
+PartOf=graphical-session.target
+
+[Service]
+...
+ </programlisting>
+ </example>
+ </refsect2>
+
+ <refsect2>
+ <title>graphical-session-pre.target</title>
+
+ <para>This target contains services which set up the environment or
+ global configuration of a graphical session, such as SSH/GPG agents
+ (which need to export an environment variable into all desktop processes)
+ or migration of obsolete d-conf keys after an OS upgrade (which needs to
+ happen before starting any process that might use them). This target must
+ be started before starting a graphical session
+ like <filename>gnome-session.target</filename>.</para>
+ </refsect2>
+
+ </refsect1>
+
+ <refsect1>
<title>Special Slice Units</title>
<para>There are four <literal>.slice</literal> units which form
diff --git a/src/grp-system/systemd/systemd.target.xml b/src/grp-system/systemd/systemd.target.xml
index 2e35e54fc4..b3cccd4e52 100644
--- a/src/grp-system/systemd/systemd.target.xml
+++ b/src/grp-system/systemd/systemd.target.xml
@@ -83,7 +83,7 @@
<title>Automatic Dependencies</title>
<para>Unless <varname>DefaultDependencies=</varname> is set to
- <option>no</option> in either of releated units or an explicit ordering
+ <option>no</option> in either of related units or an explicit ordering
dependency is already defined, target units will implicitly complement all
configured dependencies of type <varname>Wants=</varname> or
<varname>Requires=</varname> with dependencies of type
diff --git a/src/grp-system/systemd/systemd.time.xml b/src/grp-system/systemd/systemd.time.xml
index aae3accb6c..47229b4a4e 100644
--- a/src/grp-system/systemd/systemd.time.xml
+++ b/src/grp-system/systemd/systemd.time.xml
@@ -57,14 +57,13 @@
<refsect1>
<title>Displaying Time Spans</title>
- <para>Time spans refer to time durations. On display, systemd will
- present time spans as a space-separated series of time values each
- suffixed by a time unit.</para>
+ <para>Time spans refer to time durations. On display, systemd will present time spans as a space-separated series
+ of time values each suffixed by a time unit. Example:</para>
<programlisting>2h 30min</programlisting>
- <para>All specified time values are meant to be added up. The
- above hence refers to 150 minutes.</para>
+ <para>All specified time values are meant to be added up. The above hence refers to 150 minutes. Display is
+ locale-independent, only English names for the time units are used.</para>
</refsect1>
<refsect1>
@@ -83,13 +82,13 @@
<listitem><para>days, day, d</para></listitem>
<listitem><para>weeks, week, w</para></listitem>
<listitem><para>months, month, M (defined as 30.44 days)</para></listitem>
- <listitem><para>years, year, y (define as 365.25 days)</para></listitem>
+ <listitem><para>years, year, y (defined as 365.25 days)</para></listitem>
</itemizedlist>
- <para>If no time unit is specified, generally seconds are assumed,
- but some exceptions exist and are marked as such. In a few cases
- <literal>ns</literal>, <literal>nsec</literal> is accepted too,
- where the granularity of the time span allows for this.</para>
+ <para>If no time unit is specified, generally seconds are assumed, but some exceptions exist and are marked as
+ such. In a few cases <literal>ns</literal>, <literal>nsec</literal> is accepted too, where the granularity of the
+ time span permits this. Parsing is generally locale-independent, non-English names for the time units are not
+ accepted.</para>
<para>Examples for valid time span specifications:</para>
@@ -110,30 +109,29 @@
<programlisting>Fri 2012-11-23 23:02:15 CET</programlisting>
- <para>The weekday is printed according to the locale choice of the
- user.</para>
+ <para>The weekday is printed in the abbreviated English language form. The formatting is locale-independent.</para>
+
+ <para>In some cases timestamps are shown in the UTC timezone instead of the local timezone, which is indicated via
+ the <literal>UTC</literal> timezone specifier in the output.</para>
+
+ <para>In some cases timestamps are shown with microsecond granularity. In this case the sub-second remainder is
+ separated by a full stop from the seconds component.</para>
</refsect1>
<refsect1>
<title>Parsing Timestamps</title>
- <para>When parsing, systemd will accept a similar syntax, but
- expects no timezone specification, unless it is given as the
- literal string "UTC". In this case, the time is considered in UTC,
- otherwise in the local timezone. The weekday specification is
- optional, but when the weekday is specified, it must either be in
- the abbreviated (<literal>Wed</literal>) or non-abbreviated
- (<literal>Wednesday</literal>) English language form (case does
- not matter), and is not subject to the locale choice of the user.
- Either the date, or the time part may be omitted, in which case
- the current date or 00:00:00, respectively, is assumed. The seconds
- component of the time may also be omitted, in which case ":00" is
- assumed. Year numbers may be specified in full or may be
- abbreviated (omitting the century).</para>
-
- <para>A timestamp is considered invalid if a weekday is specified
- and the date does not actually match the specified day of the
- week.</para>
+ <para>When parsing, systemd will accept a similar syntax, but expects no timezone specification, unless it is given
+ as the literal string <literal>UTC</literal> (for the UTC timezone) or is specified to be the locally configured
+ timezone. Other timezones than the local and UTC are not supported. The weekday specification is optional, but when
+ the weekday is specified, it must either be in the abbreviated (<literal>Wed</literal>) or non-abbreviated
+ (<literal>Wednesday</literal>) English language form (case does not matter), and is not subject to the locale
+ choice of the user. Either the date, or the time part may be omitted, in which case the current date or 00:00:00,
+ respectively, is assumed. The seconds component of the time may also be omitted, in which case ":00" is
+ assumed. Year numbers may be specified in full or may be abbreviated (omitting the century).</para>
+
+ <para>A timestamp is considered invalid if a weekday is specified and the date does not match the specified day of
+ the week.</para>
<para>When parsing, systemd will also accept a few special
placeholders instead of timestamps: <literal>now</literal> may be
@@ -167,8 +165,6 @@
2012-11-23 → Fri 2012-11-23 00:00:00
12-11-23 → Fri 2012-11-23 00:00:00
11:12:13 → Fri 2012-11-23 11:12:13
- 11:12:13.9900009 → Fri 2012-11-23 11:12:13
- format_timestamp_us: Fri 2012-11-23 11:12:13.990000
11:12 → Fri 2012-11-23 11:12:00
now → Fri 2012-11-23 18:15:22
today → Fri 2012-11-23 00:00:00
@@ -176,28 +172,25 @@
yesterday → Fri 2012-11-22 00:00:00
tomorrow → Fri 2012-11-24 00:00:00
+3h30min → Fri 2012-11-23 21:45:22
- +3h30min UTC → -EINVAL
-5s → Fri 2012-11-23 18:15:17
11min ago → Fri 2012-11-23 18:04:22
- 11min ago UTC → -EINVAL
@1395716396 → Tue 2014-03-25 03:59:56</programlisting>
- <para>Note that timestamps printed by systemd will not be parsed
- correctly by systemd, as the timezone specification is not
- accepted, and printing timestamps is subject to locale settings
- for the weekday, while parsing only accepts English weekday
- names.</para>
+ <para>Note that timestamps displayed by remote systems with a non-matching timezone are usually not parsable
+ locally, as the timezone component is not understood (unless it happens to be <literal>UTC</literal>).</para>
- <para>In some cases, systemd will display a relative timestamp
- (relative to the current time, or the time of invocation of the
- command) instead or in addition to an absolute timestamp as
- described above. A relative timestamp is formatted as
- follows:</para>
+ <para>Timestamps may also be specified with microsecond granularity. The sub-second remainder is expected separated
+ by a full stop from the seconds component. Example:</para>
+
+ <programlisting>2014-03-25 03:59:56.654563</programlisting>
+
+ <para>In some cases, systemd will display a relative timestamp (relative to the current time, or the time of
+ invocation of the command) instead of or in addition to an absolute timestamp as described above. A relative
+ timestamp is formatted as follows:</para>
- <para>2 months 5 days ago</para>
+ <programlisting>2 months 5 days ago</programlisting>
- <para>Note that any relative timestamp will also parse correctly
- where a timestamp is expected. (see above)</para>
+ <para>Note that a relative timestamp is also accepted where a timestamp is expected (see above).</para>
</refsect1>
<refsect1>
@@ -239,8 +232,9 @@
second component is not specified, <literal>:00</literal> is
assumed.</para>
- <para>A timezone specification is not expected, unless it is given
- as the literal string "UTC", similarly to timestamps.</para>
+ <para>A timezone specification is not expected, unless it is given as the literal string <literal>UTC</literal>, or
+ the local timezone, similar to the supported syntax of timestamps (see above). Non-local timezones except for UTC
+ are not supported.</para>
<para>The special expressions
<literal>minutely</literal>,
@@ -263,38 +257,38 @@
<para>Examples for valid timestamps and their
normalized form:</para>
-<programlisting> Sat,Thu,Mon..Wed,Sat..Sun → Mon..Thu,Sat,Sun *-*-* 00:00:00
- Mon,Sun 12-*-* 2,1:23 → Mon,Sun 2012-*-* 01,02:23:00
- Wed *-1 → Wed *-*-01 00:00:00
+<programlisting> Sat,Thu,Mon..Wed,Sat..Sun → Mon..Thu,Sat,Sun *-*-* 00:00:00
+ Mon,Sun 12-*-* 2,1:23 → Mon,Sun 2012-*-* 01,02:23:00
+ Wed *-1 → Wed *-*-01 00:00:00
Wed..Wed,Wed *-1 → Wed *-*-01 00:00:00
- Wed, 17:48 → Wed *-*-* 17:48:00
+ Wed, 17:48 → Wed *-*-* 17:48:00
Wed..Sat,Tue 12-10-15 1:2:3 → Tue..Sat 2012-10-15 01:02:03
- *-*-7 0:0:0 → *-*-07 00:00:00
- 10-15 → *-10-15 00:00:00
- monday *-12-* 17:00 → Mon *-12-* 17:00:00
- Mon,Fri *-*-3,1,2 *:30:45 → Mon,Fri *-*-01,02,03 *:30:45
- 12,14,13,12:20,10,30 → *-*-* 12,13,14:10,20,30:00
- 12..14:10,20,30 → *-*-* 12,13,14:10,20,30:00
- mon,fri *-1/2-1,3 *:30:45 → Mon,Fri *-01/2-01,03 *:30:45
- 03-05 08:05:40 → *-03-05 08:05:40
- 08:05:40 → *-*-* 08:05:40
- 05:40 → *-*-* 05:40:00
- Sat,Sun 12-05 08:05:40 → Sat,Sun *-12-05 08:05:40
- Sat,Sun 08:05:40 → Sat,Sun *-*-* 08:05:40
- 2003-03-05 05:40 → 2003-03-05 05:40:00
-05:40:23.4200004/3.1700005 → 05:40:23.420000/3.170001
- 2003-02..04-05 → 2003-02,03,04-05 00:00:00
- 2003-03-05 05:40 UTC → 2003-03-05 05:40:00 UTC
- 2003-03-05 → 2003-03-05 00:00:00
- 03-05 → *-03-05 00:00:00
- hourly → *-*-* *:00:00
- daily → *-*-* 00:00:00
- daily UTC → *-*-* 00:00:00 UTC
- monthly → *-*-01 00:00:00
- weekly → Mon *-*-* 00:00:00
- yearly → *-01-01 00:00:00
- annually → *-01-01 00:00:00
- *:2/3 → *-*-* *:02/3:00</programlisting>
+ *-*-7 0:0:0 → *-*-07 00:00:00
+ 10-15 → *-10-15 00:00:00
+ monday *-12-* 17:00 → Mon *-12-* 17:00:00
+ Mon,Fri *-*-3,1,2 *:30:45 → Mon,Fri *-*-01,02,03 *:30:45
+ 12,14,13,12:20,10,30 → *-*-* 12,13,14:10,20,30:00
+ 12..14:10,20,30 → *-*-* 12,13,14:10,20,30:00
+ mon,fri *-1/2-1,3 *:30:45 → Mon,Fri *-01/2-01,03 *:30:45
+ 03-05 08:05:40 → *-03-05 08:05:40
+ 08:05:40 → *-*-* 08:05:40
+ 05:40 → *-*-* 05:40:00
+ Sat,Sun 12-05 08:05:40 → Sat,Sun *-12-05 08:05:40
+ Sat,Sun 08:05:40 → Sat,Sun *-*-* 08:05:40
+ 2003-03-05 05:40 → 2003-03-05 05:40:00
+ 05:40:23.4200004/3.1700005 → 05:40:23.420000/3.170001
+ 2003-02..04-05 → 2003-02,03,04-05 00:00:00
+ 2003-03-05 05:40 UTC → 2003-03-05 05:40:00 UTC
+ 2003-03-05 → 2003-03-05 00:00:00
+ 03-05 → *-03-05 00:00:00
+ hourly → *-*-* *:00:00
+ daily → *-*-* 00:00:00
+ daily UTC → *-*-* 00:00:00 UTC
+ monthly → *-*-01 00:00:00
+ weekly → Mon *-*-* 00:00:00
+ yearly → *-01-01 00:00:00
+ annually → *-01-01 00:00:00
+ *:2/3 → *-*-* *:02/3:00</programlisting>
<para>Calendar events are used by timer units, see
<citerefentry><refentrytitle>systemd.timer</refentrytitle><manvolnum>5</manvolnum></citerefentry>
diff --git a/src/grp-system/systemd/systemd.unit.xml b/src/grp-system/systemd/systemd.unit.xml
index 85a7b12d76..40c4cfd854 100644
--- a/src/grp-system/systemd/systemd.unit.xml
+++ b/src/grp-system/systemd/systemd.unit.xml
@@ -144,61 +144,69 @@
<option>false</option> and <option>off</option> are
equivalent.</para>
- <para>Time span values encoded in unit files can be written in
- various formats. A stand-alone number specifies a time in seconds.
- If suffixed with a time unit, the unit is honored. A concatenation
- of multiple values with units is supported, in which case the
- values are added up. Example: "50" refers to 50 seconds; "2min
- 200ms" refers to 2 minutes plus 200 milliseconds, i.e. 120200ms.
- The following time units are understood: s, min, h, d, w, ms, us.
- For details see
+ <para>Time span values encoded in unit files can be written in various formats. A stand-alone
+ number specifies a time in seconds. If suffixed with a time unit, the unit is honored. A
+ concatenation of multiple values with units is supported, in which case the values are added
+ up. Example: <literal>50</literal> refers to 50 seconds; <literal>2min 200ms</literal> refers to
+ 2 minutes and 200 milliseconds, i.e. 120200 ms. The following time units are understood:
+ <literal>s</literal>, <literal>min</literal>, <literal>h</literal>, <literal>d</literal>,
+ <literal>w</literal>, <literal>ms</literal>, <literal>us</literal>. For details see
<citerefentry><refentrytitle>systemd.time</refentrytitle><manvolnum>7</manvolnum></citerefentry>.</para>
- <para>Empty lines and lines starting with # or ; are
- ignored. This may be used for commenting. Lines ending
- in a backslash are concatenated with the following
- line while reading and the backslash is replaced by a
- space character. This may be used to wrap long lines.</para>
-
- <para>Along with a unit file <filename>foo.service</filename>, the
- directory <filename>foo.service.wants/</filename> may exist. All
- unit files symlinked from such a directory are implicitly added as
- dependencies of type <varname>Wants=</varname> to the unit. This
- is useful to hook units into the start-up of other units, without
- having to modify their unit files. For details about the semantics
- of <varname>Wants=</varname>, see below. The preferred way to
- create symlinks in the <filename>.wants/</filename> directory of a
- unit file is with the <command>enable</command> command of the
+ <para>Empty lines and lines starting with <literal>#</literal> or <literal>;</literal> are
+ ignored. This may be used for commenting. Lines ending in a backslash are concatenated with the
+ following line while reading and the backslash is replaced by a space character. This may be
+ used to wrap long lines.</para>
+
+ <para>Units can be aliased (have an alternative name), by creating a symlink from the new name
+ to the existing name in one of the unit search paths. For example,
+ <filename>systemd-networkd.service</filename> has the alias
+ <filename>dbus-org.freedesktop.network1.service</filename>, created during installation as the
+ symlink <filename>/usr/lib/systemd/system/dbus-org.freedesktop.network1.service</filename>. In
+ addition, unit files may specify aliases through the <varname>Alias=</varname> directive in the
+ [Install] section; those aliases are only effective when the unit is enabled. When the unit is
+ enabled, symlinks will be created for those names, and removed when the unit is disabled. For
+ example, <filename>reboot.target</filename> specifies
+ <varname>Alias=ctrl-alt-del.target</varname>, so when enabled it will be invoked whenever
+ CTRL+ALT+DEL is pressed. Alias names may be used in commands like <command>enable</command>,
+ <command>disable</command>, <command>start</command>, <command>stop</command>,
+ <command>status</command>, …, and in unit dependency directives <varname>Wants=</varname>,
+ <varname>Requires=</varname>, <varname>Before=</varname>, <varname>After=</varname>, …, with the
+ limitation that aliases specified through <varname>Alias=</varname> are only effective when the
+ unit is enabled. Aliases cannot be used with the <command>preset</command> command.</para>
+
+ <para>Along with a unit file <filename>foo.service</filename>, the directory
+ <filename>foo.service.wants/</filename> may exist. All unit files symlinked from such a
+ directory are implicitly added as dependencies of type <varname>Wants=</varname> to the unit.
+ This is useful to hook units into the start-up of other units, without having to modify their
+ unit files. For details about the semantics of <varname>Wants=</varname>, see below. The
+ preferred way to create symlinks in the <filename>.wants/</filename> directory of a unit file is
+ with the <command>enable</command> command of the
<citerefentry><refentrytitle>systemctl</refentrytitle><manvolnum>1</manvolnum></citerefentry>
- tool which reads information from the [Install] section of unit
- files (see below). A similar functionality exists for
- <varname>Requires=</varname> type dependencies as well, the
- directory suffix is <filename>.requires/</filename> in this
- case.</para>
+ tool which reads information from the [Install] section of unit files (see below). A similar
+ functionality exists for <varname>Requires=</varname> type dependencies as well, the directory
+ suffix is <filename>.requires/</filename> in this case.</para>
<para>Along with a unit file <filename>foo.service</filename>, a "drop-in" directory
- <filename>foo.service.d/</filename> may exist. All files with the suffix <literal>.conf</literal> from this
- directory will be parsed after the file itself is parsed. This is useful to alter or add configuration settings for
- a unit, without having to modify unit files. Each drop-in file must have appropriate section headers. Note that for
- instantiated units, this logic will first look for the instance <literal>.d/</literal> subdirectory and read its
- <literal>.conf</literal> files, followed by the template <literal>.d/</literal> subdirectory and the
- <literal>.conf</literal> files there. Also note that settings from the <literal>[Install]</literal> section are not
- honoured in drop-in unit files, and have no effect.</para>
-
- <para>In addition to <filename>/etc/systemd/system</filename>,
- the drop-in <literal>.conf</literal> files for system services
- can be placed in <filename>/usr/lib/systemd/system</filename> or
- <filename>/run/systemd/system</filename> directories. Drop-in
- files in <filename>/etc</filename> take precedence over those in
- <filename>/run</filename> which in turn take precedence over
- those in <filename>/usr/lib</filename>. Drop-in files under any of
- these directories take precedence over unit files wherever located.
- (Of course, since <filename>/run</filename> is temporary and
- <filename>/usr/lib</filename> is for vendors, it is unlikely
- drop-ins should be used in either of those places.)</para>
- <!-- Note that we do not document .include here, as we
- consider it mostly obsolete, and want people to
- use .d/ drop-ins instead. -->
+ <filename>foo.service.d/</filename> may exist. All files with the suffix
+ <literal>.conf</literal> from this directory will be parsed after the file itself is
+ parsed. This is useful to alter or add configuration settings for a unit, without having to
+ modify unit files. Each drop-in file must have appropriate section headers. Note that for
+ instantiated units, this logic will first look for the instance <literal>.d/</literal>
+ subdirectory and read its <literal>.conf</literal> files, followed by the template
+ <literal>.d/</literal> subdirectory and the <literal>.conf</literal> files there. Also note that
+ settings from the <literal>[Install]</literal> section are not honored in drop-in unit files,
+ and have no effect.</para>
+
+ <para>In addition to <filename>/etc/systemd/system</filename>, the drop-in <literal>.d</literal>
+ directories for system services can be placed in <filename>/usr/lib/systemd/system</filename> or
+ <filename>/run/systemd/system</filename> directories. Drop-in files in <filename>/etc</filename>
+ take precedence over those in <filename>/run</filename> which in turn take precedence over those
+ in <filename>/usr/lib</filename>. Drop-in files under any of these directories take precedence
+ over unit files wherever located.</para>
+
+ <!-- Note that we do not document .include here, as we consider it mostly obsolete, and want
+ people to use .d/ drop-ins instead. -->
<para>Some unit names reflect paths existing in the file system
namespace. Example: a device unit
@@ -900,7 +908,8 @@
<varname>systemd-nspawn</varname>,
<varname>docker</varname>,
<varname>rkt</varname> to test
- against a specific implementation. See
+ against a specific implementation, or
+ <varname>private-users</varname> to check whether we are running in a user namespace. See
<citerefentry><refentrytitle>systemd-detect-virt</refentrytitle><manvolnum>1</manvolnum></citerefentry>
for a full list of known virtualization technologies and their
identifiers. If multiple virtualization technologies are
@@ -1244,7 +1253,7 @@
<row>
<entry><literal>%r</literal></entry>
<entry>Control group path of the slice the unit is placed in</entry>
- <entry>This usually maps to the parent cgroup path of <literal>%c</literal>.</entry>
+ <entry>This usually maps to the parent control group path of <literal>%c</literal>.</entry>
</row>
<row>
<entry><literal>%R</literal></entry>
diff --git a/src/grp-system/systemd/systemd.xml b/src/grp-system/systemd/systemd.xml
index 4f0201fc76..79d8aedbbc 100644
--- a/src/grp-system/systemd/systemd.xml
+++ b/src/grp-system/systemd/systemd.xml
@@ -272,7 +272,7 @@
<title>Concepts</title>
<para>systemd provides a dependency system between various
- entities called "units" of 12 different types. Units encapsulate
+ entities called "units" of 11 different types. Units encapsulate
various objects that are relevant for system boot-up and
maintenance. The majority of units are configured in unit
configuration files, whose syntax and basic set of options is
@@ -837,8 +837,10 @@
<varlistentry>
<term><varname>$SYSTEMD_COLORS</varname></term>
- <listitem><para>Controls whether colorized output should be generated.
- </para></listitem>
+ <listitem><para>The value must be a boolean. Controls whether colorized output should be
+ generated. This can be specified to override the decision that <command>systemd</command>
+ makes based on <varname>$TERM</varname> and what the console is connected to.</para>
+ </listitem>
</varlistentry>
<varlistentry>
@@ -849,7 +851,7 @@
<listitem><para>Set by systemd for supervised processes during
socket-based activation. See
<citerefentry><refentrytitle>sd_listen_fds</refentrytitle><manvolnum>3</manvolnum></citerefentry>
- for more information. </para></listitem>
+ for more information.</para></listitem>
</varlistentry>
<varlistentry>
@@ -858,7 +860,7 @@
<listitem><para>Set by systemd for supervised processes for
status and start-up completion notification. See
<citerefentry><refentrytitle>sd_notify</refentrytitle><manvolnum>3</manvolnum></citerefentry>
- for more information. </para></listitem>
+ for more information.</para></listitem>
</varlistentry>
</variablelist>
</refsect1>