diff options
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/automount.c | 6 | ||||
-rw-r--r-- | src/core/cgroup.c | 11 | ||||
-rw-r--r-- | src/core/cgroup.h | 1 | ||||
-rw-r--r-- | src/core/dbus-cgroup.c | 35 | ||||
-rw-r--r-- | src/core/dbus-execute.c | 25 | ||||
-rw-r--r-- | src/core/dbus-scope.c | 2 | ||||
-rw-r--r-- | src/core/execute.c | 46 | ||||
-rw-r--r-- | src/core/execute.h | 10 | ||||
-rw-r--r-- | src/core/killall.c | 7 | ||||
-rw-r--r-- | src/core/load-fragment-gperf.gperf.m4 | 9 | ||||
-rw-r--r-- | src/core/load-fragment.c | 27 | ||||
-rw-r--r-- | src/core/machine-id-setup.c | 226 | ||||
-rw-r--r-- | src/core/machine-id-setup.h | 2 | ||||
-rw-r--r-- | src/core/macros.systemd.in | 8 | ||||
-rw-r--r-- | src/core/main.c | 32 | ||||
-rw-r--r-- | src/core/manager.c | 8 | ||||
-rw-r--r-- | src/core/mount-setup.c | 14 | ||||
-rw-r--r-- | src/core/namespace.c | 51 | ||||
-rw-r--r-- | src/core/namespace.h | 6 | ||||
-rw-r--r-- | src/core/scope.c | 16 | ||||
-rw-r--r-- | src/core/scope.h | 3 | ||||
-rw-r--r-- | src/core/selinux-access.c | 2 | ||||
-rw-r--r-- | src/core/selinux-setup.c | 2 | ||||
-rw-r--r-- | src/core/service.c | 2 | ||||
-rw-r--r-- | src/core/shutdown.c | 20 | ||||
-rw-r--r-- | src/core/system.conf | 2 | ||||
-rw-r--r-- | src/core/transaction.c | 7 | ||||
-rw-r--r-- | src/core/unit.c | 104 | ||||
-rw-r--r-- | src/core/unit.h | 1 |
29 files changed, 365 insertions, 320 deletions
diff --git a/src/core/automount.c b/src/core/automount.c index 85803a9c4a..4e9891569c 100644 --- a/src/core/automount.c +++ b/src/core/automount.c @@ -98,9 +98,6 @@ static void unmount_autofs(Automount *a) { if (a->pipe_fd < 0) return; - automount_send_ready(a, a->tokens, -EHOSTDOWN); - automount_send_ready(a, a->expire_tokens, -EHOSTDOWN); - a->pipe_event_source = sd_event_source_unref(a->pipe_event_source); a->pipe_fd = safe_close(a->pipe_fd); @@ -109,6 +106,9 @@ static void unmount_autofs(Automount *a) { if (a->where && (UNIT(a)->manager->exit_code != MANAGER_RELOAD && UNIT(a)->manager->exit_code != MANAGER_REEXECUTE)) { + automount_send_ready(a, a->tokens, -EHOSTDOWN); + automount_send_ready(a, a->expire_tokens, -EHOSTDOWN); + r = repeat_unmount(a->where, MNT_DETACH); if (r < 0) log_error_errno(r, "Failed to unmount: %m"); diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 6e36e6b340..c19e43f571 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -800,7 +800,10 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { "/dev/random\0" "rwm\0" "/dev/urandom\0" "rwm\0" "/dev/tty\0" "rwm\0" - "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */ + "/dev/pts/ptmx\0" "rw\0" /* /dev/pts/ptmx may not be duplicated, but accessed */ + /* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */ + "/run/systemd/inaccessible/chr\0" "rwm\0" + "/run/systemd/inaccessible/blk\0" "rwm\0"; const char *x, *y; @@ -1136,7 +1139,7 @@ int unit_watch_cgroup(Unit *u) { /* Only applies to the unified hierarchy */ r = cg_unified(); if (r < 0) - return log_unit_error_errno(u, r, "Failed detect wether the unified hierarchy is used: %m"); + return log_unit_error_errno(u, r, "Failed detect whether the unified hierarchy is used: %m"); if (r == 0) return 0; @@ -1658,7 +1661,7 @@ int manager_setup_cgroup(Manager *m) { /* 3. Install agent */ if (unified) { - /* In the unified hierarchy we can can get + /* In the unified hierarchy we can get * cgroup empty notifications via inotify. */ m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source); @@ -1705,7 +1708,7 @@ int manager_setup_cgroup(Manager *m) { /* also, move all other userspace processes remaining * in the root cgroup into that scope. */ - r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, false); + r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, 0); if (r < 0) log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m"); diff --git a/src/core/cgroup.h b/src/core/cgroup.h index f21409bd5d..a57403e79f 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -119,7 +119,6 @@ struct CGroupContext { bool delegate; }; -#include "cgroup-util.h" #include "unit.h" void cgroup_context_init(CGroupContext *c); diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index 27bbe2d26d..85b0c86a2f 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -856,7 +856,7 @@ int bus_cgroup_set_property( return 1; - } else if (STR_IN_SET(name, "MemoryLowByPhysicalMemory", "MemoryHighByPhysicalMemory", "MemoryMaxByPhysicalMemory")) { + } else if (STR_IN_SET(name, "MemoryLowScale", "MemoryHighScale", "MemoryMaxScale")) { uint32_t raw; uint64_t v; @@ -872,7 +872,7 @@ int bus_cgroup_set_property( const char *e; /* Chop off suffix */ - assert_se(e = endswith(name, "ByPhysicalMemory")); + assert_se(e = endswith(name, "Scale")); name = strndupa(name, e - name); if (streq(name, "MemoryLow")) @@ -883,7 +883,8 @@ int bus_cgroup_set_property( c->memory_max = v; unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY); - unit_write_drop_in_private_format(u, mode, name, "%s=%" PRIu32 "%%", name, (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100, (uint64_t) UINT32_MAX))); + unit_write_drop_in_private_format(u, mode, name, "%s=%" PRIu32 "%%", name, + (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX))); } return 1; @@ -909,7 +910,7 @@ int bus_cgroup_set_property( return 1; - } else if (streq(name, "MemoryLimitByPhysicalMemory")) { + } else if (streq(name, "MemoryLimitScale")) { uint64_t limit; uint32_t raw; @@ -924,7 +925,8 @@ int bus_cgroup_set_property( if (mode != UNIT_CHECK) { c->memory_limit = limit; unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY); - unit_write_drop_in_private_format(u, mode, "MemoryLimit", "MemoryLimit=%" PRIu32 "%%", (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100, (uint64_t) UINT32_MAX))); + unit_write_drop_in_private_format(u, mode, "MemoryLimit", "MemoryLimit=%" PRIu32 "%%", + (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX))); } return 1; @@ -960,6 +962,7 @@ int bus_cgroup_set_property( while ((r = sd_bus_message_read(message, "(ss)", &path, &rwm)) > 0) { if ((!startswith(path, "/dev/") && + !startswith(path, "/run/systemd/inaccessible/") && !startswith(path, "block-") && !startswith(path, "char-")) || strpbrk(path, WHITESPACE)) @@ -1059,6 +1062,8 @@ int bus_cgroup_set_property( r = sd_bus_message_read(message, "t", &limit); if (r < 0) return r; + if (limit <= 0) + return sd_bus_error_set_errnof(error, EINVAL, "%s= is too small", name); if (mode != UNIT_CHECK) { c->tasks_max = limit; @@ -1071,6 +1076,26 @@ int bus_cgroup_set_property( } return 1; + } else if (streq(name, "TasksMaxScale")) { + uint64_t limit; + uint32_t raw; + + r = sd_bus_message_read(message, "u", &raw); + if (r < 0) + return r; + + limit = system_tasks_max_scale(raw, UINT32_MAX); + if (limit <= 0 || limit >= UINT64_MAX) + return sd_bus_error_set_errnof(error, EINVAL, "%s= is out of range", name); + + if (mode != UNIT_CHECK) { + c->tasks_max = limit; + unit_invalidate_cgroup(u, CGROUP_MASK_PIDS); + unit_write_drop_in_private_format(u, mode, name, "TasksMax=%" PRIu32 "%%", + (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX))); + } + + return 1; } if (u->transient && u->load_state == UNIT_STUB) { diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 644b9561b5..307c3d8e7a 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -695,9 +695,12 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("Group", "s", NULL, offsetof(ExecContext, group), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SupplementaryGroups", "as", NULL, offsetof(ExecContext, supplementary_groups), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PAMName", "s", NULL, offsetof(ExecContext, pam_name), SD_BUS_VTABLE_PROPERTY_CONST), - SD_BUS_PROPERTY("ReadWriteDirectories", "as", NULL, offsetof(ExecContext, read_write_dirs), SD_BUS_VTABLE_PROPERTY_CONST), - SD_BUS_PROPERTY("ReadOnlyDirectories", "as", NULL, offsetof(ExecContext, read_only_dirs), SD_BUS_VTABLE_PROPERTY_CONST), - SD_BUS_PROPERTY("InaccessibleDirectories", "as", NULL, offsetof(ExecContext, inaccessible_dirs), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ReadWriteDirectories", "as", NULL, offsetof(ExecContext, read_write_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("ReadOnlyDirectories", "as", NULL, offsetof(ExecContext, read_only_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("InaccessibleDirectories", "as", NULL, offsetof(ExecContext, inaccessible_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), + SD_BUS_PROPERTY("ReadWritePaths", "as", NULL, offsetof(ExecContext, read_write_paths), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ReadOnlyPaths", "as", NULL, offsetof(ExecContext, read_only_paths), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("InaccessiblePaths", "as", NULL, offsetof(ExecContext, inaccessible_paths), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_flags), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST), @@ -1323,8 +1326,8 @@ int bus_exec_context_set_transient_property( return 1; - } else if (STR_IN_SET(name, "ReadWriteDirectories", "ReadOnlyDirectories", "InaccessibleDirectories")) { - + } else if (STR_IN_SET(name, "ReadWriteDirectories", "ReadOnlyDirectories", "InaccessibleDirectories", + "ReadWritePaths", "ReadOnlyPaths", "InaccessiblePaths")) { _cleanup_strv_free_ char **l = NULL; char ***dirs; char **p; @@ -1346,12 +1349,12 @@ int bus_exec_context_set_transient_property( if (mode != UNIT_CHECK) { _cleanup_free_ char *joined = NULL; - if (streq(name, "ReadWriteDirectories")) - dirs = &c->read_write_dirs; - else if (streq(name, "ReadOnlyDirectories")) - dirs = &c->read_only_dirs; - else /* "InaccessibleDirectories" */ - dirs = &c->inaccessible_dirs; + if (STR_IN_SET(name, "ReadWriteDirectories", "ReadWritePaths")) + dirs = &c->read_write_paths; + else if (STR_IN_SET(name, "ReadOnlyDirectories", "ReadOnlyPaths")) + dirs = &c->read_only_paths; + else /* "InaccessiblePaths" */ + dirs = &c->inaccessible_paths; if (strv_length(l) == 0) { *dirs = strv_free(*dirs); diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c index f557eedfc3..1abaf9f658 100644 --- a/src/core/dbus-scope.c +++ b/src/core/dbus-scope.c @@ -225,5 +225,5 @@ int bus_scope_send_request_stop(Scope *s) { if (r < 0) return r; - return sd_bus_send_to(UNIT(s)->manager->api_bus, m, /* s->controller */ NULL, NULL); + return sd_bus_send_to(UNIT(s)->manager->api_bus, m, s->controller, NULL); } diff --git a/src/core/execute.c b/src/core/execute.c index 8c487b371f..7c178b97c3 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -290,10 +290,10 @@ static int connect_journal_socket(int fd, uid_t uid, gid_t gid) { } static int connect_logger_as( + Unit *unit, const ExecContext *context, ExecOutput output, const char *ident, - const char *unit_id, int nfd, uid_t uid, gid_t gid) { @@ -329,7 +329,7 @@ static int connect_logger_as( "%i\n" "%i\n", context->syslog_identifier ? context->syslog_identifier : ident, - unit_id, + unit->id, context->syslog_priority, !!context->syslog_level_prefix, output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE, @@ -544,7 +544,7 @@ static int setup_output( case EXEC_OUTPUT_KMSG_AND_CONSOLE: case EXEC_OUTPUT_JOURNAL: case EXEC_OUTPUT_JOURNAL_AND_CONSOLE: - r = connect_logger_as(context, o, ident, unit->id, fileno, uid, gid); + r = connect_logger_as(unit, context, o, ident, fileno, uid, gid); if (r < 0) { log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr"); r = open_null_as(O_WRONLY, fileno); @@ -1507,9 +1507,9 @@ static bool exec_needs_mount_namespace( assert(context); assert(params); - if (!strv_isempty(context->read_write_dirs) || - !strv_isempty(context->read_only_dirs) || - !strv_isempty(context->inaccessible_dirs)) + if (!strv_isempty(context->read_write_paths) || + !strv_isempty(context->read_only_paths) || + !strv_isempty(context->inaccessible_paths)) return true; if (context->mount_flags != 0) @@ -1933,9 +1933,9 @@ static int exec_child( r = setup_namespace( params->apply_chroot ? context->root_directory : NULL, - context->read_write_dirs, - context->read_only_dirs, - context->inaccessible_dirs, + context->read_write_paths, + context->read_only_paths, + context->inaccessible_paths, tmp, var, context->private_devices, @@ -2324,9 +2324,9 @@ void exec_context_done(ExecContext *c) { c->pam_name = mfree(c->pam_name); - c->read_only_dirs = strv_free(c->read_only_dirs); - c->read_write_dirs = strv_free(c->read_write_dirs); - c->inaccessible_dirs = strv_free(c->inaccessible_dirs); + c->read_only_paths = strv_free(c->read_only_paths); + c->read_write_paths = strv_free(c->read_write_paths); + c->inaccessible_paths = strv_free(c->inaccessible_paths); if (c->cpuset) CPU_FREE(c->cpuset); @@ -2732,21 +2732,21 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { if (c->pam_name) fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name); - if (strv_length(c->read_write_dirs) > 0) { - fprintf(f, "%sReadWriteDirs:", prefix); - strv_fprintf(f, c->read_write_dirs); + if (strv_length(c->read_write_paths) > 0) { + fprintf(f, "%sReadWritePaths:", prefix); + strv_fprintf(f, c->read_write_paths); fputs("\n", f); } - if (strv_length(c->read_only_dirs) > 0) { - fprintf(f, "%sReadOnlyDirs:", prefix); - strv_fprintf(f, c->read_only_dirs); + if (strv_length(c->read_only_paths) > 0) { + fprintf(f, "%sReadOnlyPaths:", prefix); + strv_fprintf(f, c->read_only_paths); fputs("\n", f); } - if (strv_length(c->inaccessible_dirs) > 0) { - fprintf(f, "%sInaccessibleDirs:", prefix); - strv_fprintf(f, c->inaccessible_dirs); + if (strv_length(c->inaccessible_paths) > 0) { + fprintf(f, "%sInaccessiblePaths:", prefix); + strv_fprintf(f, c->inaccessible_paths); fputs("\n", f); } @@ -2827,7 +2827,7 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { bool exec_context_maintains_privileges(ExecContext *c) { assert(c); - /* Returns true if the process forked off would run run under + /* Returns true if the process forked off would run under * an unchanged UID or as root. */ if (!c->user) @@ -3062,7 +3062,7 @@ int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) { return r; if (c->private_network && (*rt)->netns_storage_socket[0] < 0) { - if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0) + if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0) return -errno; } diff --git a/src/core/execute.h b/src/core/execute.h index 210eea0e82..189c4d0999 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -30,6 +30,7 @@ typedef struct ExecParameters ExecParameters; #include <stdio.h> #include <sys/capability.h> +#include "cgroup-util.h" #include "fdset.h" #include "list.h" #include "missing.h" @@ -130,7 +131,7 @@ struct ExecContext { bool ignore_sigpipe; - /* Since resolving these names might might involve socket + /* Since resolving these names might involve socket * connections and we don't want to deadlock ourselves these * names are resolved on execution only and in the child * process. */ @@ -152,7 +153,7 @@ struct ExecContext { bool smack_process_label_ignore; char *smack_process_label; - char **read_write_dirs, **read_only_dirs, **inaccessible_dirs; + char **read_write_paths, **read_only_paths, **inaccessible_paths; unsigned long mount_flags; uint64_t capability_bounding_set; @@ -203,9 +204,6 @@ struct ExecContext { bool no_new_privileges_set:1; }; -#include "cgroup-util.h" -#include "cgroup.h" - struct ExecParameters { char **argv; char **environment; @@ -236,6 +234,8 @@ struct ExecParameters { int stderr_fd; }; +#include "unit.h" + int exec_spawn(Unit *unit, ExecCommand *command, const ExecContext *context, diff --git a/src/core/killall.c b/src/core/killall.c index 09378f7085..a8b814e868 100644 --- a/src/core/killall.c +++ b/src/core/killall.c @@ -23,6 +23,7 @@ #include <unistd.h> #include "alloc-util.h" +#include "def.h" #include "fd-util.h" #include "formats-util.h" #include "killall.h" @@ -33,8 +34,6 @@ #include "terminal-util.h" #include "util.h" -#define TIMEOUT_USEC (10 * USEC_PER_SEC) - static bool ignore_proc(pid_t pid, bool warn_rootfs) { _cleanup_fclose_ FILE *f = NULL; char c; @@ -80,7 +79,7 @@ static bool ignore_proc(pid_t pid, bool warn_rootfs) { get_process_comm(pid, &comm); if (r) - log_notice("Process " PID_FMT " (%s) has been been marked to be excluded from killing. It is " + log_notice("Process " PID_FMT " (%s) has been marked to be excluded from killing. It is " "running from the root file system, and thus likely to block re-mounting of the " "root file system to read-only. Please consider moving it into an initrd file " "system instead.", pid, strna(comm)); @@ -99,7 +98,7 @@ static void wait_for_children(Set *pids, sigset_t *mask) { if (set_isempty(pids)) return; - until = now(CLOCK_MONOTONIC) + TIMEOUT_USEC; + until = now(CLOCK_MONOTONIC) + DEFAULT_TIMEOUT_USEC; for (;;) { struct timespec ts; int k; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index fe1006830b..6a5c16a000 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -80,9 +80,12 @@ $1.LimitMSGQUEUE, config_parse_limit, RLIMIT_MSGQ $1.LimitNICE, config_parse_limit, RLIMIT_NICE, offsetof($1, exec_context.rlimit) $1.LimitRTPRIO, config_parse_limit, RLIMIT_RTPRIO, offsetof($1, exec_context.rlimit) $1.LimitRTTIME, config_parse_limit, RLIMIT_RTTIME, offsetof($1, exec_context.rlimit) -$1.ReadWriteDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_write_dirs) -$1.ReadOnlyDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_dirs) -$1.InaccessibleDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_dirs) +$1.ReadWriteDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_write_paths) +$1.ReadOnlyDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_paths) +$1.InaccessibleDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths) +$1.ReadWritePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_write_paths) +$1.ReadOnlyPaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_paths) +$1.InaccessiblePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths) $1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp) $1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network) $1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 8295cf45a6..a36953f766 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -620,7 +620,7 @@ int config_parse_exec( ignore = true; else if (*f == '@' && !separate_argv0) separate_argv0 = true; - else if (*f == '!' && !privileged) + else if (*f == '+' && !privileged) privileged = true; else break; @@ -2429,7 +2429,7 @@ static int syscall_filter_parse_one( int id; id = seccomp_syscall_resolve_name(t); - if (id < 0) { + if (id == __NR_SCMP_ERROR) { if (warn) log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t); return 0; @@ -2823,8 +2823,8 @@ int config_parse_memory_limit( } else bytes = physical_memory_scale(r, 100U); - if (bytes < 1) { - log_syntax(unit, LOG_ERR, filename, line, 0, "Memory limit '%s' too small. Ignoring.", rvalue); + if (bytes <= 0 || bytes >= UINT64_MAX) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Memory limit '%s' out of range. Ignoring.", rvalue); return 0; } } @@ -2861,9 +2861,18 @@ int config_parse_tasks_max( return 0; } - r = safe_atou64(rvalue, &u); - if (r < 0 || u < 1) { - log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue); + r = parse_percent(rvalue); + if (r < 0) { + r = safe_atou64(rvalue, &u); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue); + return 0; + } + } else + u = system_tasks_max_scale(r, 100U); + + if (u <= 0 || u >= UINT64_MAX) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range. Ignoring.", rvalue); return 0; } @@ -3594,7 +3603,7 @@ int config_parse_protect_home( assert(data); /* Our enum shall be a superset of booleans, hence first try - * to parse as as boolean, and then as enum */ + * to parse as boolean, and then as enum */ k = parse_boolean(rvalue); if (k > 0) @@ -3637,7 +3646,7 @@ int config_parse_protect_system( assert(data); /* Our enum shall be a superset of booleans, hence first try - * to parse as as boolean, and then as enum */ + * to parse as boolean, and then as enum */ k = parse_boolean(rvalue); if (k > 0) diff --git a/src/core/machine-id-setup.c b/src/core/machine-id-setup.c index 0145fe2894..76dfcfa6d7 100644 --- a/src/core/machine-id-setup.c +++ b/src/core/machine-id-setup.c @@ -17,11 +17,8 @@ along with systemd; If not, see <http://www.gnu.org/licenses/>. ***/ -#include <errno.h> #include <fcntl.h> #include <sched.h> -#include <stdio.h> -#include <string.h> #include <sys/mount.h> #include <unistd.h> @@ -29,10 +26,8 @@ #include "alloc-util.h" #include "fd-util.h" -#include "fileio.h" #include "fs-util.h" -#include "hexdecoct.h" -#include "io-util.h" +#include "id128-util.h" #include "log.h" #include "machine-id-setup.h" #include "macro.h" @@ -46,101 +41,23 @@ #include "util.h" #include "virt.h" -static int shorten_uuid(char destination[34], const char source[36]) { - unsigned i, j; - - assert(destination); - assert(source); - - /* Converts a UUID into a machine ID, by lowercasing it and - * removing dashes. Validates everything. */ - - for (i = 0, j = 0; i < 36 && j < 32; i++) { - int t; - - t = unhexchar(source[i]); - if (t < 0) - continue; - - destination[j++] = hexchar(t); - } - - if (i != 36 || j != 32) - return -EINVAL; - - destination[32] = '\n'; - destination[33] = 0; - return 0; -} - -static int read_machine_id(int fd, char id[34]) { - char id_to_validate[34]; - int r; - - assert(fd >= 0); - assert(id); - - /* Reads a machine ID from a file, validates it, and returns - * it. The returned ID ends in a newline. */ - - r = loop_read_exact(fd, id_to_validate, 33, false); - if (r < 0) - return r; - - if (id_to_validate[32] != '\n') - return -EINVAL; - - id_to_validate[32] = 0; - - if (!id128_is_valid(id_to_validate)) - return -EINVAL; - - memcpy(id, id_to_validate, 32); - id[32] = '\n'; - id[33] = 0; - return 0; -} - -static int write_machine_id(int fd, const char id[34]) { - int r; - - assert(fd >= 0); - assert(id); - - if (lseek(fd, 0, SEEK_SET) < 0) - return -errno; - - r = loop_write(fd, id, 33, false); - if (r < 0) - return r; - - if (fsync(fd) < 0) - return -errno; - - return 0; -} - -static int generate_machine_id(char id[34], const char *root) { - int fd, r; - unsigned char *p; - sd_id128_t buf; - char *q; +static int generate_machine_id(const char *root, sd_id128_t *ret) { const char *dbus_machine_id; + _cleanup_close_ int fd = -1; + int r; - assert(id); - - dbus_machine_id = prefix_roota(root, "/var/lib/dbus/machine-id"); + assert(ret); /* First, try reading the D-Bus machine id, unless it is a symlink */ + dbus_machine_id = prefix_roota(root, "/var/lib/dbus/machine-id"); fd = open(dbus_machine_id, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); if (fd >= 0) { - r = read_machine_id(fd, id); - safe_close(fd); - - if (r >= 0) { + if (id128_read_fd(fd, ID128_PLAIN, ret) >= 0) { log_info("Initializing machine ID from D-Bus machine ID."); return 0; } + + fd = safe_close(fd); } if (isempty(root)) { @@ -151,13 +68,10 @@ static int generate_machine_id(char id[34], const char *root) { if (detect_container() > 0) { _cleanup_free_ char *e = NULL; - r = getenv_for_pid(1, "container_uuid", &e); - if (r > 0) { - r = shorten_uuid(id, e); - if (r >= 0) { - log_info("Initializing machine ID from container UUID."); - return 0; - } + if (getenv_for_pid(1, "container_uuid", &e) > 0 && + sd_id128_from_string(e, ret) >= 0) { + log_info("Initializing machine ID from container UUID."); + return 0; } } else if (detect_vm() == VIRTUALIZATION_KVM) { @@ -166,51 +80,29 @@ static int generate_machine_id(char id[34], const char *root) { * running in qemu/kvm and a machine ID was passed in * via -uuid on the qemu/kvm command line */ - char uuid[36]; - - fd = open("/sys/class/dmi/id/product_uuid", O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); - if (fd >= 0) { - r = loop_read_exact(fd, uuid, 36, false); - safe_close(fd); - - if (r >= 0) { - r = shorten_uuid(id, uuid); - if (r >= 0) { - log_info("Initializing machine ID from KVM UUID."); - return 0; - } - } + if (id128_read("/sys/class/dmi/id/product_uuid", ID128_UUID, ret) >= 0) { + log_info("Initializing machine ID from KVM UUID."); + return 0; } } } /* If that didn't work, generate a random machine id */ - r = sd_id128_randomize(&buf); + r = sd_id128_randomize(ret); if (r < 0) - return log_error_errno(r, "Failed to open /dev/urandom: %m"); - - for (p = buf.bytes, q = id; p < buf.bytes + sizeof(buf); p++, q += 2) { - q[0] = hexchar(*p >> 4); - q[1] = hexchar(*p & 15); - } - - id[32] = '\n'; - id[33] = 0; + return log_error_errno(r, "Failed to generate randomized : %m"); log_info("Initializing machine ID from random generator."); - return 0; } -int machine_id_setup(const char *root, sd_id128_t machine_id) { +int machine_id_setup(const char *root, sd_id128_t machine_id, sd_id128_t *ret) { const char *etc_machine_id, *run_machine_id; _cleanup_close_ int fd = -1; - bool writable = true; - char id[34]; /* 32 + \n + \0 */ + bool writable; int r; etc_machine_id = prefix_roota(root, "/etc/machine-id"); - run_machine_id = prefix_roota(root, "/run/machine-id"); RUN_WITH_UMASK(0000) { /* We create this 0444, to indicate that this isn't really @@ -218,7 +110,7 @@ int machine_id_setup(const char *root, sd_id128_t machine_id) { * will be owned by root it doesn't matter much, but maybe * people look. */ - mkdir_parents(etc_machine_id, 0755); + (void) mkdir_parents(etc_machine_id, 0755); fd = open(etc_machine_id, O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY, 0444); if (fd < 0) { int old_errno = errno; @@ -239,41 +131,41 @@ int machine_id_setup(const char *root, sd_id128_t machine_id) { } writable = false; - } + } else + writable = true; } - /* A machine id argument overrides all other machined-ids */ - if (!sd_id128_is_null(machine_id)) { - sd_id128_to_string(machine_id, id); - id[32] = '\n'; - id[33] = 0; - } else { - if (read_machine_id(fd, id) >= 0) - return 0; + /* A we got a valid machine ID argument, that's what counts */ + if (sd_id128_is_null(machine_id)) { - /* Hmm, so, the id currently stored is not useful, then let's - * generate one */ + /* Try to read any existing machine ID */ + if (id128_read_fd(fd, ID128_PLAIN, ret) >= 0) + return 0; - r = generate_machine_id(id, root); + /* Hmm, so, the id currently stored is not useful, then let's generate one */ + r = generate_machine_id(root, &machine_id); if (r < 0) return r; + + if (lseek(fd, 0, SEEK_SET) == (off_t) -1) + return log_error_errno(errno, "Failed to seek: %m"); } if (writable) - if (write_machine_id(fd, id) >= 0) - return 0; + if (id128_write_fd(fd, ID128_PLAIN, machine_id, true) >= 0) + goto finish; fd = safe_close(fd); - /* Hmm, we couldn't write it? So let's write it to - * /run/machine-id as a replacement */ + /* Hmm, we couldn't write it? So let's write it to /run/machine-id as a replacement */ - RUN_WITH_UMASK(0022) { - r = write_string_file(run_machine_id, id, WRITE_STRING_FILE_CREATE); - if (r < 0) { - (void) unlink(run_machine_id); - return log_error_errno(r, "Cannot write %s: %m", run_machine_id); - } + run_machine_id = prefix_roota(root, "/run/machine-id"); + + RUN_WITH_UMASK(0022) + r = id128_write(run_machine_id, ID128_PLAIN, machine_id, false); + if (r < 0) { + (void) unlink(run_machine_id); + return log_error_errno(r, "Cannot write %s: %m", run_machine_id); } /* And now, let's mount it over */ @@ -286,7 +178,11 @@ int machine_id_setup(const char *root, sd_id128_t machine_id) { /* Mark the mount read-only */ if (mount(NULL, etc_machine_id, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, NULL) < 0) - log_warning_errno(errno, "Failed to make transient %s read-only: %m", etc_machine_id); + log_warning_errno(errno, "Failed to make transient %s read-only, ignoring: %m", etc_machine_id); + +finish: + if (ret) + *ret = machine_id; return 0; } @@ -294,16 +190,20 @@ int machine_id_setup(const char *root, sd_id128_t machine_id) { int machine_id_commit(const char *root) { _cleanup_close_ int fd = -1, initial_mntns_fd = -1; const char *etc_machine_id; - char id[34]; /* 32 + \n + \0 */ + sd_id128_t id; int r; + /* Replaces a tmpfs bind mount of /etc/machine-id by a proper file, atomically. For this, the umount is removed + * in a mount namespace, a new file is created at the right place. Afterwards the mount is also removed in the + * original mount namespace, thus revealing the file that was just created. */ + etc_machine_id = prefix_roota(root, "/etc/machine-id"); r = path_is_mount_point(etc_machine_id, 0); if (r < 0) return log_error_errno(r, "Failed to determine whether %s is a mount point: %m", etc_machine_id); if (r == 0) { - log_debug("%s is is not a mount point. Nothing to do.", etc_machine_id); + log_debug("%s is not a mount point. Nothing to do.", etc_machine_id); return 0; } @@ -312,10 +212,6 @@ int machine_id_commit(const char *root) { if (fd < 0) return log_error_errno(errno, "Cannot open %s: %m", etc_machine_id); - r = read_machine_id(fd, id); - if (r < 0) - return log_error_errno(r, "We didn't find a valid machine ID in %s.", etc_machine_id); - r = fd_is_temporary_fs(fd); if (r < 0) return log_error_errno(r, "Failed to determine whether %s is on a temporary file system: %m", etc_machine_id); @@ -324,6 +220,10 @@ int machine_id_commit(const char *root) { return -EROFS; } + r = id128_read_fd(fd, ID128_PLAIN, &id); + if (r < 0) + return log_error_errno(r, "We didn't find a valid machine ID in %s.", etc_machine_id); + fd = safe_close(fd); /* Store current mount namespace */ @@ -342,15 +242,9 @@ int machine_id_commit(const char *root) { return log_error_errno(errno, "Failed to unmount transient %s file in our private namespace: %m", etc_machine_id); /* Update a persistent version of etc_machine_id */ - fd = open(etc_machine_id, O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY, 0444); - if (fd < 0) - return log_error_errno(errno, "Cannot open for writing %s. This is mandatory to get a persistent machine-id: %m", etc_machine_id); - - r = write_machine_id(fd, id); + r = id128_write(etc_machine_id, ID128_PLAIN, id, true); if (r < 0) - return log_error_errno(r, "Cannot write %s: %m", etc_machine_id); - - fd = safe_close(fd); + return log_error_errno(r, "Cannot write %s. This is mandatory to get a persistent machine ID: %m", etc_machine_id); /* Return to initial namespace and proceed a lazy tmpfs unmount */ r = namespace_enter(-1, initial_mntns_fd, -1, -1, -1); diff --git a/src/core/machine-id-setup.h b/src/core/machine-id-setup.h index a7e7678ed9..29f4620646 100644 --- a/src/core/machine-id-setup.h +++ b/src/core/machine-id-setup.h @@ -20,4 +20,4 @@ ***/ int machine_id_commit(const char *root); -int machine_id_setup(const char *root, sd_id128_t machine_id); +int machine_id_setup(const char *root, sd_id128_t requested, sd_id128_t *ret); diff --git a/src/core/macros.systemd.in b/src/core/macros.systemd.in index 2cace3d3ba..6e8a3b3e3d 100644 --- a/src/core/macros.systemd.in +++ b/src/core/macros.systemd.in @@ -29,6 +29,8 @@ %_sysusersdir @sysusersdir@ %_sysctldir @sysctldir@ %_binfmtdir @binfmtdir@ +%_systemdgeneratordir @systemgeneratordir@ +%_systemdusergeneratordir @usergeneratordir@ %systemd_requires \ Requires(post): systemd \ @@ -36,6 +38,12 @@ Requires(preun): systemd \ Requires(postun): systemd \ %{nil} +%systemd_ordering \ +OrderWithRequires(post): systemd \ +OrderWithRequires(preun): systemd \ +OrderWithRequires(postun): systemd \ +%{nil} + %systemd_post() \ if [ $1 -eq 1 ] ; then \ # Initial installation \ diff --git a/src/core/main.c b/src/core/main.c index 3d74ef1adf..f2adca7d2b 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -127,7 +127,7 @@ static bool arg_default_io_accounting = false; static bool arg_default_blockio_accounting = false; static bool arg_default_memory_accounting = false; static bool arg_default_tasks_accounting = true; -static uint64_t arg_default_tasks_max = UINT64_C(512); +static uint64_t arg_default_tasks_max = UINT64_MAX; static sd_id128_t arg_machine_id = {}; noreturn static void freeze_or_reboot(void) { @@ -291,14 +291,16 @@ static int parse_crash_chvt(const char *value) { } static int set_machine_id(const char *m) { + sd_id128_t t; assert(m); - if (sd_id128_from_string(m, &arg_machine_id) < 0) + if (sd_id128_from_string(m, &t) < 0) return -EINVAL; - if (sd_id128_is_null(arg_machine_id)) + if (sd_id128_is_null(t)) return -EINVAL; + arg_machine_id = t; return 0; } @@ -1298,6 +1300,11 @@ static int fixup_environment(void) { _cleanup_free_ char *term = NULL; int r; + /* We expect the environment to be set correctly + * if run inside a container. */ + if (detect_container() > 0) + return 0; + /* When started as PID1, the kernel uses /dev/console * for our stdios and uses TERM=linux whatever the * backend device used by the console. We try to make @@ -1314,7 +1321,7 @@ static int fixup_environment(void) { if (r == 0) { term = strdup(default_term_for_tty("/dev/console") + 5); if (!term) - return -errno; + return -ENOMEM; } if (setenv("TERM", term, 1) < 0) @@ -1444,7 +1451,7 @@ int main(int argc, char *argv[]) { /* * Do a dummy very first call to seal the kernel's time warp magic. * - * Do not call this this from inside the initrd. The initrd might not + * Do not call this from inside the initrd. The initrd might not * carry /etc/adjtime with LOCAL, but the real system could be set up * that way. In such case, we need to delay the time-warp or the sealing * until we reach the real system. @@ -1508,13 +1515,10 @@ int main(int argc, char *argv[]) { } if (arg_system) { - /* We expect the environment to be set correctly - * if run inside a container. */ - if (detect_container() <= 0) - if (fixup_environment() < 0) { - error_message = "Failed to fix up PID1 environment"; - goto finish; - } + if (fixup_environment() < 0) { + error_message = "Failed to fix up PID1 environment"; + goto finish; + } /* Try to figure out if we can use colors with the console. No * need to do that for user instances since they never log @@ -1556,6 +1560,8 @@ int main(int argc, char *argv[]) { (void) reset_all_signal_handlers(); (void) ignore_signals(SIGNALS_IGNORE, -1); + arg_default_tasks_max = system_tasks_max_scale(15U, 100U); /* 15% the system PIDs equals 4915 by default. */ + if (parse_config_file() < 0) { error_message = "Failed to parse config file"; goto finish; @@ -1717,7 +1723,7 @@ int main(int argc, char *argv[]) { status_welcome(); hostname_setup(); - machine_id_setup(NULL, arg_machine_id); + machine_id_setup(NULL, arg_machine_id, NULL); loopback_setup(); bump_unix_max_dgram_qlen(); diff --git a/src/core/manager.c b/src/core/manager.c index 902c2a0a27..4d84a0b37e 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -64,7 +64,6 @@ #include "manager.h" #include "missing.h" #include "mkdir.h" -#include "mkdir.h" #include "parse-util.h" #include "path-lookup.h" #include "path-util.h" @@ -570,7 +569,7 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) { m->exit_code = _MANAGER_EXIT_CODE_INVALID; m->default_timer_accuracy_usec = USEC_PER_MINUTE; m->default_tasks_accounting = true; - m->default_tasks_max = UINT64_C(512); + m->default_tasks_max = UINT64_MAX; #ifdef ENABLE_EFI if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0) @@ -1729,7 +1728,10 @@ static void invoke_sigchld_event(Manager *m, Unit *u, const siginfo_t *si) { unit_unwatch_pid(u, si->si_pid); if (UNIT_VTABLE(u)->sigchld_event) { - if (set_size(u->pids) <= 1 || iteration != u->sigchldgen) { + if (set_size(u->pids) <= 1 || + iteration != u->sigchldgen || + unit_main_pid(u) == si->si_pid || + unit_control_pid(u) == si->si_pid) { UNIT_VTABLE(u)->sigchld_event(u, si->si_pid, si->si_code, si->si_status); u->sigchldgen = iteration; } else diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index f9c9b4a91f..5d8ab0ec70 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -28,6 +28,7 @@ #include "cgroup-util.h" #include "dev-setup.h" #include "efivars.h" +#include "fs-util.h" #include "label.h" #include "log.h" #include "macro.h" @@ -403,9 +404,16 @@ int mount_setup(bool loaded_policy) { * really needs to stay for good, otherwise software that * copied sd-daemon.c into their sources will misdetect * systemd. */ - mkdir_label("/run/systemd", 0755); - mkdir_label("/run/systemd/system", 0755); - mkdir_label("/run/systemd/inaccessible", 0000); + (void) mkdir_label("/run/systemd", 0755); + (void) mkdir_label("/run/systemd/system", 0755); + (void) mkdir_label("/run/systemd/inaccessible", 0000); + /* Set up inaccessible items */ + (void) mknod("/run/systemd/inaccessible/reg", S_IFREG | 0000, 0); + (void) mkdir_label("/run/systemd/inaccessible/dir", 0000); + (void) mknod("/run/systemd/inaccessible/chr", S_IFCHR | 0000, makedev(0, 0)); + (void) mknod("/run/systemd/inaccessible/blk", S_IFBLK | 0000, makedev(0, 0)); + (void) mkfifo("/run/systemd/inaccessible/fifo", 0000); + (void) mknod("/run/systemd/inaccessible/sock", S_IFSOCK | 0000, 0); return 0; } diff --git a/src/core/namespace.c b/src/core/namespace.c index 203d122810..52a2505d94 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -278,6 +278,7 @@ static int apply_mount( const char *what; int r; + struct stat target; assert(m); @@ -287,12 +288,21 @@ static int apply_mount( /* First, get rid of everything that is below if there * is anything... Then, overmount it with an - * inaccessible directory. */ + * inaccessible path. */ umount_recursive(m->path, 0); - what = "/run/systemd/inaccessible"; - break; + if (lstat(m->path, &target) < 0) { + if (m->ignore && errno == ENOENT) + return 0; + return -errno; + } + what = mode_to_inaccessible_node(target.st_mode); + if (!what) { + log_debug("File type not supported for inaccessible mounts. Note that symlinks are not allowed"); + return -ELOOP; + } + break; case READONLY: case READWRITE: /* Nothing to mount here, we just later toggle the @@ -317,12 +327,14 @@ static int apply_mount( assert(what); r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL); - if (r >= 0) + if (r >= 0) { log_debug("Successfully mounted %s to %s", what, m->path); - else if (m->ignore && errno == ENOENT) - return 0; - - return r; + return r; + } else { + if (m->ignore && errno == ENOENT) + return 0; + return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, m->path); + } } static int make_read_only(BindMount *m) { @@ -335,7 +347,8 @@ static int make_read_only(BindMount *m) { else if (IN_SET(m->mode, READWRITE, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV)) { r = bind_remount_recursive(m->path, false); if (r == 0 && m->mode == PRIVATE_DEV) /* can be readonly but the submounts can't*/ - r = mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL); + if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0) + r = -errno; } else r = 0; @@ -347,9 +360,9 @@ static int make_read_only(BindMount *m) { int setup_namespace( const char* root_directory, - char** read_write_dirs, - char** read_only_dirs, - char** inaccessible_dirs, + char** read_write_paths, + char** read_only_paths, + char** inaccessible_paths, const char* tmp_dir, const char* var_tmp_dir, bool private_dev, @@ -368,9 +381,9 @@ int setup_namespace( return -errno; n = !!tmp_dir + !!var_tmp_dir + - strv_length(read_write_dirs) + - strv_length(read_only_dirs) + - strv_length(inaccessible_dirs) + + strv_length(read_write_paths) + + strv_length(read_only_paths) + + strv_length(inaccessible_paths) + private_dev + (protect_home != PROTECT_HOME_NO ? 3 : 0) + (protect_system != PROTECT_SYSTEM_NO ? 2 : 0) + @@ -378,15 +391,15 @@ int setup_namespace( if (n > 0) { m = mounts = (BindMount *) alloca0(n * sizeof(BindMount)); - r = append_mounts(&m, read_write_dirs, READWRITE); + r = append_mounts(&m, read_write_paths, READWRITE); if (r < 0) return r; - r = append_mounts(&m, read_only_dirs, READONLY); + r = append_mounts(&m, read_only_paths, READONLY); if (r < 0) return r; - r = append_mounts(&m, inaccessible_dirs, INACCESSIBLE); + r = append_mounts(&m, inaccessible_paths, INACCESSIBLE); if (r < 0) return r; @@ -629,7 +642,7 @@ int setup_netns(int netns_storage_socket[2]) { } fail: - lockf(netns_storage_socket[0], F_ULOCK, 0); + (void) lockf(netns_storage_socket[0], F_ULOCK, 0); return r; } diff --git a/src/core/namespace.h b/src/core/namespace.h index b54b7b47d6..1aedf5f208 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -40,9 +40,9 @@ typedef enum ProtectSystem { } ProtectSystem; int setup_namespace(const char *chroot, - char **read_write_dirs, - char **read_only_dirs, - char **inaccessible_dirs, + char **read_write_paths, + char **read_only_paths, + char **inaccessible_paths, const char *tmp_dir, const char *var_tmp_dir, bool private_dev, diff --git a/src/core/scope.c b/src/core/scope.c index decd1a1f3f..b45e238974 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -240,7 +240,7 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) { /* If we have a controller set let's ask the controller nicely * to terminate the scope, instead of us going directly into - * SIGTERM beserk mode */ + * SIGTERM berserk mode */ if (state == SCOPE_STOP_SIGTERM) skip_signal = bus_scope_send_request_stop(s) > 0; @@ -248,7 +248,9 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) { r = unit_kill_context( UNIT(s), &s->kill_context, - state != SCOPE_STOP_SIGTERM ? KILL_KILL : KILL_TERMINATE, + state != SCOPE_STOP_SIGTERM ? KILL_KILL : + s->was_abandoned ? KILL_TERMINATE_AND_LOG : + KILL_TERMINATE, -1, -1, false); if (r < 0) goto fail; @@ -369,6 +371,7 @@ static int scope_serialize(Unit *u, FILE *f, FDSet *fds) { assert(fds); unit_serialize_item(u, f, "state", scope_state_to_string(s->state)); + unit_serialize_item(u, f, "was-abandoned", yes_no(s->was_abandoned)); return 0; } @@ -389,6 +392,14 @@ static int scope_deserialize_item(Unit *u, const char *key, const char *value, F else s->deserialized_state = state; + } else if (streq(key, "was-abandoned")) { + int k; + + k = parse_boolean(value); + if (k < 0) + log_unit_debug(u, "Failed to parse boolean value: %s", value); + else + s->was_abandoned = k; } else log_unit_debug(u, "Unknown serialization key: %s", key); @@ -474,6 +485,7 @@ int scope_abandon(Scope *s) { if (!IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED)) return -ESTALE; + s->was_abandoned = true; s->controller = mfree(s->controller); /* The client is no longer watching the remaining processes, diff --git a/src/core/scope.h b/src/core/scope.h index 2dc86325c5..eaf8e8b447 100644 --- a/src/core/scope.h +++ b/src/core/scope.h @@ -21,7 +21,9 @@ typedef struct Scope Scope; +#include "cgroup.h" #include "kill.h" +#include "unit.h" typedef enum ScopeResult { SCOPE_SUCCESS, @@ -43,6 +45,7 @@ struct Scope { usec_t timeout_stop_usec; char *controller; + bool was_abandoned; sd_event_source *timer_event_source; }; diff --git a/src/core/selinux-access.c b/src/core/selinux-access.c index cc287d602d..2b96a9551b 100644 --- a/src/core/selinux-access.c +++ b/src/core/selinux-access.c @@ -191,7 +191,7 @@ int mac_selinux_generic_access_check( const char *tclass = NULL, *scon = NULL; struct audit_info audit_info = {}; _cleanup_free_ char *cl = NULL; - security_context_t fcon = NULL; + char *fcon = NULL; char **cmdline = NULL; int r = 0; diff --git a/src/core/selinux-setup.c b/src/core/selinux-setup.c index 4072df58e6..527aa8add0 100644 --- a/src/core/selinux-setup.c +++ b/src/core/selinux-setup.c @@ -44,7 +44,7 @@ int mac_selinux_setup(bool *loaded_policy) { #ifdef HAVE_SELINUX int enforce = 0; usec_t before_load, after_load; - security_context_t con; + char *con; int r; union selinux_callback cb; bool initialized = false; diff --git a/src/core/service.c b/src/core/service.c index 13de671700..afb198507b 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -1674,7 +1674,7 @@ static void service_kill_control_processes(Service *s) { return; p = strjoina(UNIT(s)->cgroup_path, "/control"); - cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, p, SIGKILL, true, true, true, NULL); + cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, p, SIGKILL, CGROUP_SIGCONT|CGROUP_IGNORE_SELF|CGROUP_REMOVE, NULL, NULL, NULL); } static void service_enter_start(Service *s) { diff --git a/src/core/shutdown.c b/src/core/shutdown.c index e14755d84e..a795d875bb 100644 --- a/src/core/shutdown.c +++ b/src/core/shutdown.c @@ -157,7 +157,6 @@ static int switch_root_initramfs(void) { return switch_root("/run/initramfs", "/oldroot", false, MS_BIND); } - int main(int argc, char *argv[]) { bool need_umount, need_swapoff, need_loop_detach, need_dm_detach; bool in_container, use_watchdog = false; @@ -203,20 +202,25 @@ int main(int argc, char *argv[]) { } (void) cg_get_root_path(&cgroup); + in_container = detect_container() > 0; use_watchdog = !!getenv("WATCHDOG_USEC"); - /* lock us into memory */ + /* Lock us into memory */ mlockall(MCL_CURRENT|MCL_FUTURE); + /* Synchronize everything that is not written to disk yet at this point already. This is a good idea so that + * slow IO is processed here already and the final process killing spree is not impacted by processes + * desperately trying to sync IO to disk within their timeout. */ + if (!in_container) + sync(); + log_info("Sending SIGTERM to remaining processes..."); broadcast_signal(SIGTERM, true, true); log_info("Sending SIGKILL to remaining processes..."); broadcast_signal(SIGKILL, true, false); - in_container = detect_container() > 0; - need_umount = !in_container; need_swapoff = !in_container; need_loop_detach = !in_container; @@ -345,10 +349,10 @@ int main(int argc, char *argv[]) { need_loop_detach ? " loop devices," : "", need_dm_detach ? " DM devices," : ""); - /* The kernel will automaticall flush ATA disks and suchlike - * on reboot(), but the file systems need to be synce'd - * explicitly in advance. So let's do this here, but not - * needlessly slow down containers. */ + /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need to be + * sync'ed explicitly in advance. So let's do this here, but not needlessly slow down containers. Note that we + * sync'ed things already once above, but we did some more work since then which might have caused IO, hence + * let's doit once more. */ if (!in_container) sync(); diff --git a/src/core/system.conf b/src/core/system.conf index db8b7acd78..c6bb050aac 100644 --- a/src/core/system.conf +++ b/src/core/system.conf @@ -42,7 +42,7 @@ #DefaultBlockIOAccounting=no #DefaultMemoryAccounting=no #DefaultTasksAccounting=yes -#DefaultTasksMax=512 +#DefaultTasksMax=15% #DefaultLimitCPU= #DefaultLimitFSIZE= #DefaultLimitDATA= diff --git a/src/core/transaction.c b/src/core/transaction.c index e06a48a2f1..8370b864fb 100644 --- a/src/core/transaction.c +++ b/src/core/transaction.c @@ -373,7 +373,7 @@ static int transaction_verify_order_one(Transaction *tr, Job *j, Job *from, unsi delete = NULL; for (k = from; k; k = ((k->generation == generation && k->marker != k) ? k->marker : NULL)) { - /* logging for j not k here here to provide consistent narrative */ + /* logging for j not k here to provide consistent narrative */ log_unit_warning(j->unit, "Found dependency on %s/%s", k->unit->id, job_type_to_string(k->type)); @@ -392,7 +392,7 @@ static int transaction_verify_order_one(Transaction *tr, Job *j, Job *from, unsi if (delete) { const char *status; - /* logging for j not k here here to provide consistent narrative */ + /* logging for j not k here to provide consistent narrative */ log_unit_warning(j->unit, "Breaking ordering cycle by deleting job %s/%s", delete->unit->id, job_type_to_string(delete->type)); @@ -591,6 +591,9 @@ static int transaction_apply(Transaction *tr, Manager *m, JobMode mode) { HASHMAP_FOREACH(j, m->jobs, i) { assert(j->installed); + if (j->unit->ignore_on_isolate) + continue; + if (hashmap_get(tr->jobs, j->unit)) continue; diff --git a/src/core/unit.c b/src/core/unit.c index 5f06a7dfe7..4934a0e56f 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -1683,7 +1683,7 @@ static void unit_check_unneeded(Unit *u) { if (unit_active_or_pending(other)) return; - /* If stopping a unit fails continously we might enter a stop + /* If stopping a unit fails continuously we might enter a stop * loop here, hence stop acting on the service being * unnecessary after a while. */ if (!ratelimit_test(&u->auto_stop_ratelimit)) { @@ -1728,7 +1728,7 @@ static void unit_check_binds_to(Unit *u) { if (!stop) return; - /* If stopping a unit fails continously we might enter a stop + /* If stopping a unit fails continuously we might enter a stop * loop here, hence stop acting on the service being * unnecessary after a while. */ if (!ratelimit_test(&u->auto_stop_ratelimit)) { @@ -3144,7 +3144,7 @@ int unit_kill_common( if (!pid_set) return -ENOMEM; - q = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, signo, false, false, false, pid_set); + q = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, signo, 0, pid_set, NULL, NULL); if (q < 0 && q != -EAGAIN && q != -ESRCH && q != -ENOENT) r = q; else @@ -3512,6 +3512,43 @@ int unit_make_transient(Unit *u) { return 0; } +static void log_kill(pid_t pid, int sig, void *userdata) { + _cleanup_free_ char *comm = NULL; + + (void) get_process_comm(pid, &comm); + + /* Don't log about processes marked with brackets, under the assumption that these are temporary processes + only, like for example systemd's own PAM stub process. */ + if (comm && comm[0] == '(') + return; + + log_unit_notice(userdata, + "Killing process " PID_FMT " (%s) with signal SIG%s.", + pid, + strna(comm), + signal_to_string(sig)); +} + +static int operation_to_signal(KillContext *c, KillOperation k) { + assert(c); + + switch (k) { + + case KILL_TERMINATE: + case KILL_TERMINATE_AND_LOG: + return c->kill_signal; + + case KILL_KILL: + return SIGKILL; + + case KILL_ABORT: + return SIGABRT; + + default: + assert_not_reached("KillOperation unknown"); + } +} + int unit_kill_context( Unit *u, KillContext *c, @@ -3520,58 +3557,63 @@ int unit_kill_context( pid_t control_pid, bool main_pid_alien) { - bool wait_for_exit = false; + bool wait_for_exit = false, send_sighup; + cg_kill_log_func_t log_func; int sig, r; assert(u); assert(c); + /* Kill the processes belonging to this unit, in preparation for shutting the unit down. Returns > 0 if we + * killed something worth waiting for, 0 otherwise. */ + if (c->kill_mode == KILL_NONE) return 0; - switch (k) { - case KILL_KILL: - sig = SIGKILL; - break; - case KILL_ABORT: - sig = SIGABRT; - break; - case KILL_TERMINATE: - sig = c->kill_signal; - break; - default: - assert_not_reached("KillOperation unknown"); - } + sig = operation_to_signal(c, k); + + send_sighup = + c->send_sighup && + IN_SET(k, KILL_TERMINATE, KILL_TERMINATE_AND_LOG) && + sig != SIGHUP; + + log_func = + k != KILL_TERMINATE || + IN_SET(sig, SIGKILL, SIGABRT) ? log_kill : NULL; if (main_pid > 0) { - r = kill_and_sigcont(main_pid, sig); + if (log_func) + log_func(main_pid, sig, u); + r = kill_and_sigcont(main_pid, sig); if (r < 0 && r != -ESRCH) { _cleanup_free_ char *comm = NULL; - get_process_comm(main_pid, &comm); + (void) get_process_comm(main_pid, &comm); log_unit_warning_errno(u, r, "Failed to kill main process " PID_FMT " (%s), ignoring: %m", main_pid, strna(comm)); } else { if (!main_pid_alien) wait_for_exit = true; - if (c->send_sighup && k == KILL_TERMINATE) + if (r != -ESRCH && send_sighup) (void) kill(main_pid, SIGHUP); } } if (control_pid > 0) { - r = kill_and_sigcont(control_pid, sig); + if (log_func) + log_func(control_pid, sig, u); + r = kill_and_sigcont(control_pid, sig); if (r < 0 && r != -ESRCH) { _cleanup_free_ char *comm = NULL; - get_process_comm(control_pid, &comm); + (void) get_process_comm(control_pid, &comm); log_unit_warning_errno(u, r, "Failed to kill control process " PID_FMT " (%s), ignoring: %m", control_pid, strna(comm)); } else { wait_for_exit = true; - if (c->send_sighup && k == KILL_TERMINATE) + if (r != -ESRCH && send_sighup) (void) kill(control_pid, SIGHUP); } } @@ -3585,7 +3627,11 @@ int unit_kill_context( if (!pid_set) return -ENOMEM; - r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, sig, true, k != KILL_TERMINATE, false, pid_set); + r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, + sig, + CGROUP_SIGCONT|CGROUP_IGNORE_SELF, + pid_set, + log_func, u); if (r < 0) { if (r != -EAGAIN && r != -ESRCH && r != -ENOENT) log_unit_warning_errno(u, r, "Failed to kill control group %s, ignoring: %m", u->cgroup_path); @@ -3610,14 +3656,18 @@ int unit_kill_context( (detect_container() == 0 && !unit_cgroup_delegate(u))) wait_for_exit = true; - if (c->send_sighup && k != KILL_KILL) { + if (send_sighup) { set_free(pid_set); pid_set = unit_pid_set(main_pid, control_pid); if (!pid_set) return -ENOMEM; - cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, SIGHUP, false, true, false, pid_set); + cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, + SIGHUP, + CGROUP_IGNORE_SELF, + pid_set, + NULL, NULL); } } } @@ -3790,7 +3840,7 @@ bool unit_is_pristine(Unit *u) { /* Check if the unit already exists or is already around, * in a number of different ways. Note that to cater for unit * types such as slice, we are generally fine with units that - * are marked UNIT_LOADED even even though nothing was + * are marked UNIT_LOADED even though nothing was * actually loaded, as those unit types don't require a file * on disk to validly load. */ diff --git a/src/core/unit.h b/src/core/unit.h index c41011ed9d..1eabfa51e2 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -36,6 +36,7 @@ typedef struct UnitStatusMessageFormats UnitStatusMessageFormats; typedef enum KillOperation { KILL_TERMINATE, + KILL_TERMINATE_AND_LOG, KILL_KILL, KILL_ABORT, _KILL_OPERATION_MAX, |