diff options
author | Lennart Poettering <lennart@poettering.net> | 2012-04-05 22:08:10 +0200 |
---|---|---|
committer | Lennart Poettering <lennart@poettering.net> | 2012-04-05 22:15:29 +0200 |
commit | e96d6be763014be75d480fde503d0b77f41194a0 (patch) | |
tree | 1bc3753e708c8b588c43ccd5b5c52c0adefc01c9 /src/manager.c | |
parent | 348e27fedfd4cdd2238ff31a46785a70b9dc6fc0 (diff) |
systemd: add hardware watchdog support
This adds minimal hardware watchdog support to PID 1. The idea is that
PID 1 supervises and watchdogs system services, while the hardware
watchdog is used to supervise PID 1.
This adds two hardware watchdog configuration options, for the runtime
watchdog and for a shutdown watchdog. The former is active during normal
operation, the latter only at reboots to ensure that if a clean reboot
times out we reboot nonetheless.
If the runtime watchdog is enabled PID 1 will automatically wake up at
half the configured interval and write to the watchdog daemon.
By default we enable the shutdown watchdog, but leave the runtime
watchdog disabled in order not to break independent hardware watchdog
daemons people might be using.
This is only the most basic hookup. If necessary we can later on hook
up the watchdog ping more closely with services deemed crucial.
Diffstat (limited to 'src/manager.c')
-rw-r--r-- | src/manager.c | 25 |
1 files changed, 21 insertions, 4 deletions
diff --git a/src/manager.c b/src/manager.c index 74bd740747..be47766a73 100644 --- a/src/manager.c +++ b/src/manager.c @@ -61,6 +61,7 @@ #include "bus-errors.h" #include "exit-status.h" #include "virt.h" +#include "watchdog.h" /* As soon as 16 units are in our GC queue, make sure to run a gc sweep */ #define GC_QUEUE_ENTRIES_MAX 16 @@ -2433,6 +2434,7 @@ static int process_event(Manager *m, struct epoll_event *ev) { int manager_loop(Manager *m) { int r; + int wait_msec = -1; RATELIMIT_DEFINE(rl, 1*USEC_PER_SEC, 50000); @@ -2447,17 +2449,29 @@ int manager_loop(Manager *m) { /* There might still be some zombies hanging around from * before we were exec()'ed. Leat's reap them */ - if ((r = manager_dispatch_sigchld(m)) < 0) + r = manager_dispatch_sigchld(m); + if (r < 0) return r; + /* Sleep for half the watchdog time */ + if (m->runtime_watchdog > 0 && m->running_as == MANAGER_SYSTEM) { + wait_msec = (int) (m->runtime_watchdog / 2 / USEC_PER_MSEC); + if (wait_msec <= 0) + wait_msec = 1; + } + while (m->exit_code == MANAGER_RUNNING) { struct epoll_event event; int n; + if (wait_msec >= 0) + watchdog_ping(); + if (!ratelimit_test(&rl)) { /* Yay, something is going seriously wrong, pause a little */ log_warning("Looping too fast. Throttling execution a little."); sleep(1); + continue; } if (manager_dispatch_load_queue(m) > 0) @@ -2481,17 +2495,20 @@ int manager_loop(Manager *m) { if (swap_dispatch_reload(m) > 0) continue; - if ((n = epoll_wait(m->epoll_fd, &event, 1, -1)) < 0) { + n = epoll_wait(m->epoll_fd, &event, 1, wait_msec); + if (n < 0) { if (errno == EINTR) continue; return -errno; - } + } else if (n == 0) + continue; assert(n == 1); - if ((r = process_event(m, &event)) < 0) + r = process_event(m, &event); + if (r < 0) return r; } |