diff options
author | Lennart Poettering <lennart@poettering.net> | 2014-08-22 16:36:38 +0200 |
---|---|---|
committer | Lennart Poettering <lennart@poettering.net> | 2014-08-22 18:10:31 +0200 |
commit | 2928b0a863091f8f291fddb168988711afd389ef (patch) | |
tree | 10a0fad387c664a00b3ca173de9b521a91696eb7 /src/core | |
parent | 2de1851fe3611c59abf77127c6b5bc1b91eb7cba (diff) |
core: add support for a configurable system-wide start-up timeout
When this system-wide start-up timeout is hit we execute one of the
failure actions already implemented for services that fail.
This should not only be useful on embedded devices, but also on laptops
which have the power-button reachable when the lid is closed. This
devices, when in a backpack might get powered on by accident due to the
easily reachable power button. We want to make sure that the system
turns itself off if it starts up due this after a while.
When the system manages to fully start-up logind will suspend the
machine by default if the lid is closed. However, in some cases we don't
even get as far as logind, and the boot hangs much earlier, for example
because we ask for a LUKS password that nobody ever enters.
Yeah, this is a real-life problem on my Yoga 13, which has one of those
easily accessible power buttons, even if the device is closed.
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/failure-action.c | 94 | ||||
-rw-r--r-- | src/core/failure-action.h | 40 | ||||
-rw-r--r-- | src/core/main.c | 13 | ||||
-rw-r--r-- | src/core/manager.c | 43 | ||||
-rw-r--r-- | src/core/manager.h | 10 | ||||
-rw-r--r-- | src/core/service.c | 77 | ||||
-rw-r--r-- | src/core/service.h | 16 | ||||
-rw-r--r-- | src/core/system.conf | 3 |
8 files changed, 208 insertions, 88 deletions
diff --git a/src/core/failure-action.c b/src/core/failure-action.c new file mode 100644 index 0000000000..ca807b68da --- /dev/null +++ b/src/core/failure-action.c @@ -0,0 +1,94 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2014 Lennart Poettering + Copyright 2012 Michael Olbrich + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <sys/reboot.h> +#include <linux/reboot.h> +#include <sys/syscall.h> + +#include "bus-util.h" +#include "bus-error.h" +#include "special.h" +#include "failure-action.h" + +int failure_action( + Manager *m, + FailureAction action, + const char *reboot_arg) { + + int r; + + assert(m); + assert(action >= 0); + assert(action < _FAILURE_ACTION_MAX); + + switch (action) { + + case FAILURE_ACTION_NONE: + break; + + case FAILURE_ACTION_REBOOT: { + _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL; + + log_warning("Rebooting as result of failure."); + + update_reboot_param_file(reboot_arg); + r = manager_add_job_by_name(m, JOB_START, SPECIAL_REBOOT_TARGET, JOB_REPLACE, true, &error, NULL); + if (r < 0) + log_error("Failed to reboot: %s.", bus_error_message(&error, r)); + + break; + } + + case FAILURE_ACTION_REBOOT_FORCE: + log_warning("Forcibly rebooting as result of failure."); + update_reboot_param_file(reboot_arg); + m->exit_code = MANAGER_REBOOT; + break; + + case FAILURE_ACTION_REBOOT_IMMEDIATE: + log_warning("Rebooting immediately as result of failure."); + + sync(); + + if (reboot_arg) { + log_info("Rebooting with argument '%s'.", reboot_arg); + syscall(SYS_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_RESTART2, reboot_arg); + } + + log_info("Rebooting."); + reboot(RB_AUTOBOOT); + break; + + default: + assert_not_reached("Unknown failure action"); + } + + return -ECANCELED; +} + +static const char* const failure_action_table[_FAILURE_ACTION_MAX] = { + [FAILURE_ACTION_NONE] = "none", + [FAILURE_ACTION_REBOOT] = "reboot", + [FAILURE_ACTION_REBOOT_FORCE] = "reboot-force", + [FAILURE_ACTION_REBOOT_IMMEDIATE] = "reboot-immediate" +}; +DEFINE_STRING_TABLE_LOOKUP(failure_action, FailureAction); diff --git a/src/core/failure-action.h b/src/core/failure-action.h new file mode 100644 index 0000000000..5353192f31 --- /dev/null +++ b/src/core/failure-action.h @@ -0,0 +1,40 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +#pragma once + +/*** + This file is part of systemd. + + Copyright 2014 Lennart Poettering + Copyright 2012 Michael Olbrich + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +typedef enum FailureAction { + FAILURE_ACTION_NONE, + FAILURE_ACTION_REBOOT, + FAILURE_ACTION_REBOOT_FORCE, + FAILURE_ACTION_REBOOT_IMMEDIATE, + _FAILURE_ACTION_MAX, + _FAILURE_ACTION_INVALID = -1 +} FailureAction; + +#include "macro.h" +#include "manager.h" + +int failure_action(Manager *m, FailureAction action, const char *reboot_arg); + +const char* failure_action_to_string(FailureAction i) _const_; +FailureAction failure_action_from_string(const char *s) _pure_; diff --git a/src/core/main.c b/src/core/main.c index 792b316c61..ed690162bf 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -116,6 +116,9 @@ static FILE* arg_serialization = NULL; static bool arg_default_cpu_accounting = false; static bool arg_default_blockio_accounting = false; static bool arg_default_memory_accounting = false; +static usec_t arg_start_timeout_usec = DEFAULT_MANAGER_START_TIMEOUT_USEC; +static FailureAction arg_start_timeout_action = FAILURE_ACTION_REBOOT_FORCE; +static char *arg_start_timeout_reboot_arg = NULL; static void nop_handler(int sig) {} @@ -669,6 +672,9 @@ static int parse_config_file(void) { { "Manager", "DefaultCPUAccounting", config_parse_bool, 0, &arg_default_cpu_accounting }, { "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting }, { "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting }, + { "Manager", "StartTimeoutSec", config_parse_sec, 0, &arg_start_timeout_usec }, + { "Manager", "StartTimeoutAction", config_parse_failure_action, 0, &arg_start_timeout_action }, + { "Manager", "StartTimeoutRebootArgument",config_parse_string, 0, &arg_start_timeout_reboot_arg }, {} }; @@ -1628,6 +1634,10 @@ int main(int argc, char *argv[]) { m->default_memory_accounting = arg_default_memory_accounting; m->runtime_watchdog = arg_runtime_watchdog; m->shutdown_watchdog = arg_shutdown_watchdog; + m->start_timeout_usec = arg_start_timeout_usec; + m->start_timeout_action = arg_start_timeout_action; + free_and_strdup(&m->start_timeout_reboot_arg, arg_start_timeout_reboot_arg); + m->userspace_timestamp = userspace_timestamp; m->kernel_timestamp = kernel_timestamp; m->initrd_timestamp = initrd_timestamp; @@ -1816,6 +1826,9 @@ finish: set_free(arg_syscall_archs); arg_syscall_archs = NULL; + free(arg_start_timeout_reboot_arg); + arg_start_timeout_reboot_arg = NULL; + label_finish(); if (reexecute) { diff --git a/src/core/manager.c b/src/core/manager.c index 7401817844..1bb0c9025f 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -435,6 +435,8 @@ int manager_new(SystemdRunningAs running_as, bool test_run, Manager **_m) { m->running_as = running_as; m->exit_code = _MANAGER_EXIT_CODE_INVALID; m->default_timer_accuracy_usec = USEC_PER_MINUTE; + m->start_timeout_usec = DEFAULT_MANAGER_START_TIMEOUT_USEC; + m->start_timeout_action = FAILURE_ACTION_REBOOT_FORCE; m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1; @@ -823,6 +825,9 @@ void manager_free(Manager *m) { manager_close_idle_pipe(m); + sd_event_source_unref(m->start_timeout_event_source); + free(m->start_timeout_reboot_arg); + udev_unref(m->udev); sd_event_unref(m->event); @@ -970,6 +975,20 @@ static int manager_distribute_fds(Manager *m, FDSet *fds) { return 0; } +static int on_start_timeout(sd_event_source *s, usec_t usec, void *userdata) { + Manager *m = userdata; + + assert(s); + assert(m); + + m->start_timeout_event_source = sd_event_source_unref(m->start_timeout_event_source); + + log_error("Startup timed out."); + + failure_action(m, m->start_timeout_action, m->start_timeout_reboot_arg); + return 0; +} + int manager_startup(Manager *m, FILE *serialization, FDSet *fds) { int r, q; @@ -1042,6 +1061,22 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) { m->send_reloading_done = true; } + /* Possibly set up a start timeout */ + if (!dual_timestamp_is_set(&m->finish_timestamp)) { + m->start_timeout_event_source = sd_event_source_unref(m->start_timeout_event_source); + + if (m->start_timeout_usec) { + r = sd_event_add_time( + m->event, + &m->start_timeout_event_source, + CLOCK_MONOTONIC, + now(CLOCK_MONOTONIC) + m->start_timeout_usec, 0, + on_start_timeout, m); + if (r < 0) + log_error("Failed to add start timeout event: %s", strerror(-r)); + } + } + return r; } @@ -2462,10 +2497,8 @@ void manager_check_finished(Manager *m) { if (hashmap_size(m->jobs) > 0) { - if (m->jobs_in_progress_event_source) { - sd_event_source_set_time(m->jobs_in_progress_event_source, - now(CLOCK_MONOTONIC) + JOBS_IN_PROGRESS_WAIT_USEC); - } + if (m->jobs_in_progress_event_source) + sd_event_source_set_time(m->jobs_in_progress_event_source, now(CLOCK_MONOTONIC) + JOBS_IN_PROGRESS_WAIT_USEC); return; } @@ -2487,6 +2520,8 @@ void manager_check_finished(Manager *m) { dual_timestamp_get(&m->finish_timestamp); + m->start_timeout_event_source = sd_event_source_unref(m->start_timeout_event_source); + if (m->running_as == SYSTEMD_SYSTEM && detect_container(NULL) <= 0) { /* Note that m->kernel_usec.monotonic is always at 0, diff --git a/src/core/manager.h b/src/core/manager.h index 7cb76f7f00..7d26c3adea 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -33,6 +33,8 @@ /* Enforce upper limit how many names we allow */ #define MANAGER_MAX_NAMES 131072 /* 128K */ +#define DEFAULT_MANAGER_START_TIMEOUT_USEC (15*USEC_PER_MINUTE) + typedef struct Manager Manager; typedef enum ManagerState { @@ -69,6 +71,7 @@ typedef enum ManagerExitCode { #include "unit-name.h" #include "exit-status.h" #include "show-status.h" +#include "failure-action.h" struct Manager { /* Note that the set of units we know of is allowed to be @@ -152,6 +155,7 @@ struct Manager { dual_timestamp initrd_timestamp; dual_timestamp userspace_timestamp; dual_timestamp finish_timestamp; + dual_timestamp security_start_timestamp; dual_timestamp security_finish_timestamp; dual_timestamp generators_start_timestamp; @@ -279,6 +283,12 @@ struct Manager { /* Used for processing polkit authorization responses */ Hashmap *polkit_registry; + + /* System wide startup timeouts */ + usec_t start_timeout_usec; + sd_event_source *start_timeout_event_source; + FailureAction start_timeout_action; + char *start_timeout_reboot_arg; }; int manager_new(SystemdRunningAs running_as, bool test_run, Manager **m); diff --git a/src/core/service.c b/src/core/service.c index 1b864c4c8c..223e4b3a41 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -23,9 +23,6 @@ #include <signal.h> #include <dirent.h> #include <unistd.h> -#include <sys/reboot.h> -#include <linux/reboot.h> -#include <sys/syscall.h> #include "async.h" #include "manager.h" @@ -1052,8 +1049,6 @@ static int cgroup_good(Service *s) { return !r; } -static int service_execute_action(Service *s, FailureAction action, const char *reason, bool log_action_none); - static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) { int r; assert(s); @@ -1063,8 +1058,10 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) service_set_state(s, s->result != SERVICE_SUCCESS ? SERVICE_FAILED : SERVICE_DEAD); - if (s->result != SERVICE_SUCCESS) - service_execute_action(s, s->failure_action, "failed", false); + if (s->result != SERVICE_SUCCESS) { + log_warning_unit(UNIT(s)->id, "%s failed.", UNIT(s)->id); + failure_action(UNIT(s)->manager, s->failure_action, s->reboot_arg); + } if (allow_restart && !s->forbid_restart && @@ -1601,67 +1598,15 @@ fail: service_enter_stop(s, SERVICE_FAILURE_RESOURCES); } -static int service_execute_action(Service *s, FailureAction action, const char *reason, bool log_action_none) { - assert(s); - - if (action == SERVICE_FAILURE_ACTION_REBOOT || - action == SERVICE_FAILURE_ACTION_REBOOT_FORCE) - update_reboot_param_file(s->reboot_arg); - - switch (action) { - - case SERVICE_FAILURE_ACTION_NONE: - if (log_action_none) - log_warning_unit(UNIT(s)->id, "%s %s, refusing to start.", UNIT(s)->id, reason); - break; - - case SERVICE_FAILURE_ACTION_REBOOT: { - _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL; - int r; - - log_warning_unit(UNIT(s)->id, "%s %s, rebooting.", UNIT(s)->id, reason); - - r = manager_add_job_by_name(UNIT(s)->manager, JOB_START, SPECIAL_REBOOT_TARGET, JOB_REPLACE, true, &error, NULL); - if (r < 0) - log_error_unit(UNIT(s)->id, "Failed to reboot: %s.", bus_error_message(&error, r)); - - break; - } - - case SERVICE_FAILURE_ACTION_REBOOT_FORCE: - log_warning_unit(UNIT(s)->id, "%s %s, forcibly rebooting.", UNIT(s)->id, reason); - UNIT(s)->manager->exit_code = MANAGER_REBOOT; - break; - - case SERVICE_FAILURE_ACTION_REBOOT_IMMEDIATE: - log_warning_unit(UNIT(s)->id, "%s %s, rebooting immediately.", UNIT(s)->id, reason); - - sync(); - - if (s->reboot_arg) { - log_info("Rebooting with argument '%s'.", s->reboot_arg); - syscall(SYS_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_RESTART2, s->reboot_arg); - } - - log_info("Rebooting."); - reboot(RB_AUTOBOOT); - break; - - default: - log_error_unit(UNIT(s)->id, "failure action=%i", action); - assert_not_reached("Unknown FailureAction."); - } - - return -ECANCELED; -} - static int service_start_limit_test(Service *s) { assert(s); if (ratelimit_test(&s->start_limit)) return 0; - return service_execute_action(s, s->start_limit_action, "start request repeated too quickly", true); + log_warning_unit(UNIT(s)->id, "start request repeated too quickly for %s", UNIT(s)->id); + + return failure_action(UNIT(s)->manager, s->start_limit_action, s->reboot_arg); } static int service_start(Unit *u) { @@ -2908,14 +2853,6 @@ static const char* const service_result_table[_SERVICE_RESULT_MAX] = { DEFINE_STRING_TABLE_LOOKUP(service_result, ServiceResult); -static const char* const failure_action_table[_SERVICE_FAILURE_ACTION_MAX] = { - [SERVICE_FAILURE_ACTION_NONE] = "none", - [SERVICE_FAILURE_ACTION_REBOOT] = "reboot", - [SERVICE_FAILURE_ACTION_REBOOT_FORCE] = "reboot-force", - [SERVICE_FAILURE_ACTION_REBOOT_IMMEDIATE] = "reboot-immediate" -}; -DEFINE_STRING_TABLE_LOOKUP(failure_action, FailureAction); - const UnitVTable service_vtable = { .object_size = sizeof(Service), .exec_context_offset = offsetof(Service, exec_context), diff --git a/src/core/service.h b/src/core/service.h index 0227321d99..5bcfd14339 100644 --- a/src/core/service.h +++ b/src/core/service.h @@ -28,6 +28,7 @@ typedef struct Service Service; #include "ratelimit.h" #include "kill.h" #include "exit-status.h" +#include "failure-action.h" typedef enum ServiceState { SERVICE_DEAD, @@ -113,15 +114,6 @@ typedef enum ServiceResult { _SERVICE_RESULT_INVALID = -1 } ServiceResult; -typedef enum FailureAction { - SERVICE_FAILURE_ACTION_NONE, - SERVICE_FAILURE_ACTION_REBOOT, - SERVICE_FAILURE_ACTION_REBOOT_FORCE, - SERVICE_FAILURE_ACTION_REBOOT_IMMEDIATE, - _SERVICE_FAILURE_ACTION_MAX, - _SERVICE_FAILURE_ACTION_INVALID = -1 -} FailureAction; - struct Service { Unit meta; @@ -193,10 +185,9 @@ struct Service { char *status_text; int status_errno; - FailureAction failure_action; - RateLimit start_limit; FailureAction start_limit_action; + FailureAction failure_action; char *reboot_arg; UnitRef accept_socket; @@ -234,6 +225,3 @@ NotifyState notify_state_from_string(const char *s) _pure_; const char* service_result_to_string(ServiceResult i) _const_; ServiceResult service_result_from_string(const char *s) _pure_; - -const char* failure_action_to_string(FailureAction i) _const_; -FailureAction failure_action_from_string(const char *s) _pure_; diff --git a/src/core/system.conf b/src/core/system.conf index 65a35a0689..45448de328 100644 --- a/src/core/system.conf +++ b/src/core/system.conf @@ -23,6 +23,9 @@ #CapabilityBoundingSet= #SystemCallArchitectures= #TimerSlackNSec= +#StartTimeoutSec=15min +#StartTimeoutAction=reboot-force +#StartTimeoutRebootArgument= #DefaultTimerAccuracySec=1min #DefaultStandardOutput=journal #DefaultStandardError=inherit |