diff options
author | Michael Olbrich <m.olbrich@pengutronix.de> | 2012-02-08 10:10:34 +0100 |
---|---|---|
committer | Lennart Poettering <lennart@poettering.net> | 2012-02-08 17:10:38 +0100 |
commit | bb242b7b5277f5db4a01be905f26eccd382ff1e0 (patch) | |
tree | 99cb256985193d596061bd585cc71e7155b43499 | |
parent | aa704ba8c2a1adce92ba8a154f70f1bdc950be1e (diff) |
service: introduce WatchdogSec and hook up the watchdog with the existing failure logic
-rw-r--r-- | man/systemd.service.xml | 18 | ||||
-rw-r--r-- | src/dbus-service.c | 2 | ||||
-rw-r--r-- | src/load-fragment-gperf.gperf.m4 | 1 | ||||
-rw-r--r-- | src/service.c | 46 | ||||
-rw-r--r-- | src/service.h | 3 |
5 files changed, 68 insertions, 2 deletions
diff --git a/man/systemd.service.xml b/man/systemd.service.xml index 0baddd1d4f..0b5edb8560 100644 --- a/man/systemd.service.xml +++ b/man/systemd.service.xml @@ -460,6 +460,24 @@ </varlistentry> <varlistentry> + <term><varname>WatchdogSec=</varname></term> + <listitem><para>Configures the watchdog + timeout for a service. This is activated + when the start-up is completed. The service + must call + <citerefentry><refentrytitle>sd_notify</refentrytitle><manvolnum>3</manvolnum></citerefentry> + regularly with "WATCHDOG=1". If the time + between two such calls is larger than + the configured time then the service + enters a failure state. By setting + <term><varname>Restart=</varname></term> + to <option>on-failure</option> or + <option>always</option> the service can + be restarted. Defaults to 0s, which + disables this feature.</para></listitem> + </varlistentry> + + <varlistentry> <term><varname>Restart=</varname></term> <listitem><para>Configures whether the main service process shall be diff --git a/src/dbus-service.c b/src/dbus-service.c index 738dc7bccd..fedfc1d523 100644 --- a/src/dbus-service.c +++ b/src/dbus-service.c @@ -43,6 +43,7 @@ " <property name=\"NotifyAccess\" type=\"s\" access=\"read\"/>\n" \ " <property name=\"RestartUSec\" type=\"t\" access=\"read\"/>\n" \ " <property name=\"TimeoutUSec\" type=\"t\" access=\"read\"/>\n" \ + " <property name=\"WatchdogUSec\" type=\"t\" access=\"read\"/>\n" \ " <property name=\"WatchdogTimestamp\" type=\"t\" access=\"read\"/>\n" \ " <property name=\"WatchdogTimestampMonotonic\" type=\"t\" access=\"read\"/>\n" \ BUS_EXEC_COMMAND_INTERFACE("ExecStartPre") \ @@ -119,6 +120,7 @@ static const BusProperty bus_service_properties[] = { { "NotifyAccess", bus_service_append_notify_access, "s", offsetof(Service, notify_access) }, { "RestartUSec", bus_property_append_usec, "t", offsetof(Service, restart_usec) }, { "TimeoutUSec", bus_property_append_usec, "t", offsetof(Service, timeout_usec) }, + { "WatchdogUSec", bus_property_append_usec, "t", offsetof(Service, watchdog_usec) }, { "WatchdogTimestamp", bus_property_append_usec, "t", offsetof(Service, watchdog_timestamp.realtime) }, { "WatchdogTimestampMonotonic",bus_property_append_usec, "t", offsetof(Service, watchdog_timestamp.monotonic) }, BUS_EXEC_COMMAND_PROPERTY("ExecStartPre", offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]), true ), diff --git a/src/load-fragment-gperf.gperf.m4 b/src/load-fragment-gperf.gperf.m4 index 14c060616f..9191f90642 100644 --- a/src/load-fragment-gperf.gperf.m4 +++ b/src/load-fragment-gperf.gperf.m4 @@ -134,6 +134,7 @@ Service.ExecStop, config_parse_exec, SERVICE_EXE Service.ExecStopPost, config_parse_exec, SERVICE_EXEC_STOP_POST, offsetof(Service, exec_command) Service.RestartSec, config_parse_usec, 0, offsetof(Service, restart_usec) Service.TimeoutSec, config_parse_usec, 0, offsetof(Service, timeout_usec) +Service.WatchdogSec, config_parse_usec, 0, offsetof(Service, watchdog_usec) Service.Type, config_parse_service_type, 0, offsetof(Service, type) Service.Restart, config_parse_service_restart, 0, offsetof(Service, restart) Service.PermissionsStartOnly, config_parse_bool, 0, offsetof(Service, permissions_start_only) diff --git a/src/service.c b/src/service.c index b6bbfab3ef..1631595a28 100644 --- a/src/service.c +++ b/src/service.c @@ -112,6 +112,9 @@ static void service_init(Unit *u) { s->timeout_usec = DEFAULT_TIMEOUT_USEC; s->restart_usec = DEFAULT_RESTART_USEC; + + s->watchdog_watch.type = WATCH_INVALID; + s->timer_watch.type = WATCH_INVALID; #ifdef HAVE_SYSV_COMPAT s->sysv_start_priority = -1; @@ -208,14 +211,39 @@ static void service_connection_unref(Service *s) { static void service_stop_watchdog(Service *s) { assert(s); + unit_unwatch_timer(UNIT(s), &s->watchdog_watch); s->watchdog_timestamp.realtime = 0; s->watchdog_timestamp.monotonic = 0; } +static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart); + +static void service_handle_watchdog(Service *s) { + usec_t offset; + int r; + + assert(s); + + if (s->watchdog_usec == 0) + return; + + offset = now(CLOCK_MONOTONIC) - s->watchdog_timestamp.monotonic; + if (offset >= s->watchdog_usec) { + log_error("%s watchdog timeout!", UNIT(s)->id); + service_enter_dead(s, SERVICE_FAILURE_WATCHDOG, true); + return; + } + + r = unit_watch_timer(UNIT(s), s->watchdog_usec - offset, &s->watchdog_watch); + if (r < 0) + log_warning("%s failed to install watchdog timer: %s", UNIT(s)->id, strerror(-r)); +} + static void service_reset_watchdog(Service *s) { assert(s); dual_timestamp_get(&s->watchdog_timestamp); + service_handle_watchdog(s); } static void service_done(Unit *u) { @@ -259,6 +287,8 @@ static void service_done(Unit *u) { unit_ref_unset(&s->accept_socket); + service_stop_watchdog(s); + unit_unwatch_timer(u, &s->timer_watch); } @@ -1568,9 +1598,12 @@ static int service_coldplug(Unit *u) { if ((r = unit_watch_pid(UNIT(s), s->control_pid)) < 0) return r; + if (s->deserialized_state == SERVICE_START_POST || + s->deserialized_state == SERVICE_RUNNING) + service_handle_watchdog(s); + service_set_state(s, s->deserialized_state); } - return 0; } @@ -2002,6 +2035,9 @@ static void service_enter_start_post(Service *s) { service_unwatch_control_pid(s); + if (s->watchdog_usec > 0) + service_reset_watchdog(s); + if ((s->control_command = s->exec_command[SERVICE_EXEC_START_POST])) { s->control_command_id = SERVICE_EXEC_START_POST; @@ -2922,6 +2958,11 @@ static void service_timer_event(Unit *u, uint64_t elapsed, Watch* w) { assert(s); assert(elapsed == 1); + if (w == &s->watchdog_watch) { + service_handle_watchdog(s); + return; + } + assert(w == &s->timer_watch); switch (s->state) { @@ -3611,7 +3652,8 @@ static const char* const service_result_table[_SERVICE_RESULT_MAX] = { [SERVICE_FAILURE_TIMEOUT] = "timeout", [SERVICE_FAILURE_EXIT_CODE] = "exit-code", [SERVICE_FAILURE_SIGNAL] = "signal", - [SERVICE_FAILURE_CORE_DUMP] = "core-dump" + [SERVICE_FAILURE_CORE_DUMP] = "core-dump", + [SERVICE_FAILURE_WATCHDOG] = "watchdog" }; DEFINE_STRING_TABLE_LOOKUP(service_result, ServiceResult); diff --git a/src/service.h b/src/service.h index b1e8b90470..02726efe25 100644 --- a/src/service.h +++ b/src/service.h @@ -95,6 +95,7 @@ typedef enum ServiceResult { SERVICE_FAILURE_EXIT_CODE, SERVICE_FAILURE_SIGNAL, SERVICE_FAILURE_CORE_DUMP, + SERVICE_FAILURE_WATCHDOG, _SERVICE_RESULT_MAX, _SERVICE_RESULT_INVALID = -1 } ServiceResult; @@ -112,6 +113,8 @@ struct Service { usec_t timeout_usec; dual_timestamp watchdog_timestamp; + usec_t watchdog_usec; + Watch watchdog_watch; ExecCommand* exec_command[_SERVICE_EXEC_COMMAND_MAX]; ExecContext exec_context; |