diff options
-rw-r--r-- | src/core/dbus-scope.c | 17 | ||||
-rw-r--r-- | src/core/manager.c | 2 | ||||
-rw-r--r-- | src/core/scope.c | 98 | ||||
-rw-r--r-- | src/core/scope.h | 5 | ||||
-rw-r--r-- | src/core/service.c | 134 | ||||
-rw-r--r-- | src/core/unit.c | 124 | ||||
-rw-r--r-- | src/core/unit.h | 9 |
7 files changed, 281 insertions, 108 deletions
diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c index d5a2048516..c46c972c26 100644 --- a/src/core/dbus-scope.c +++ b/src/core/dbus-scope.c @@ -28,6 +28,16 @@ #include "bus-util.h" #include "bus-internal.h" +static int bus_scope_abandon(sd_bus *bus, sd_bus_message *message, void *userdata, sd_bus_error *error) { + Scope *s = userdata; + + assert(bus); + assert(message); + assert(s); + + return scope_abandon(s); +} + static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, scope_result, ScopeResult); const sd_bus_vtable bus_scope_vtable[] = { @@ -36,6 +46,7 @@ const sd_bus_vtable bus_scope_vtable[] = { SD_BUS_PROPERTY("TimeoutStopUSec", "t", bus_property_get_usec, offsetof(Scope, timeout_stop_usec), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Scope, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_SIGNAL("RequestStop", NULL, 0), + SD_BUS_METHOD("Abandon", NULL, NULL, bus_scope_abandon, 0), SD_BUS_VTABLE_END }; @@ -56,10 +67,6 @@ static int bus_scope_set_transient_property( unsigned n = 0; uint32_t pid; - r = set_ensure_allocated(&s->pids, trivial_hash_func, trivial_compare_func); - if (r < 0) - return r; - r = sd_bus_message_enter_container(message, 'a', "u"); if (r < 0) return r; @@ -70,7 +77,7 @@ static int bus_scope_set_transient_property( return -EINVAL; if (mode != UNIT_CHECK) { - r = set_put(s->pids, LONG_TO_PTR(pid)); + r = unit_watch_pid(UNIT(s), pid); if (r < 0 && r != -EEXIST) return r; } diff --git a/src/core/manager.c b/src/core/manager.c index 45f5f70b2a..c0bcc79f00 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -1459,7 +1459,7 @@ static int manager_dispatch_sigchld(Manager *m) { log_debug_unit(u->id, "Child %lu belongs to %s", (long unsigned) si.si_pid, u->id); - hashmap_remove(m->watch_pids, LONG_TO_PTR(si.si_pid)); + unit_unwatch_pid(u, si.si_pid); UNIT_VTABLE(u)->sigchld_event(u, si.si_pid, si.si_code, si.si_status); } diff --git a/src/core/scope.c b/src/core/scope.c index 0c1d17e68c..940e40dda7 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -35,6 +35,7 @@ static const UnitActiveState state_translation_table[_SCOPE_STATE_MAX] = { [SCOPE_DEAD] = UNIT_INACTIVE, [SCOPE_RUNNING] = UNIT_ACTIVE, + [SCOPE_ABANDONED] = UNIT_ACTIVE, [SCOPE_STOP_SIGTERM] = UNIT_DEACTIVATING, [SCOPE_STOP_SIGKILL] = UNIT_DEACTIVATING, [SCOPE_FAILED] = UNIT_FAILED @@ -66,9 +67,6 @@ static void scope_done(Unit *u) { free(s->controller); - set_free(s->pids); - s->pids = NULL; - s->timer_event_source = sd_event_source_unref(s->timer_event_source); } @@ -100,15 +98,14 @@ static void scope_set_state(Scope *s, ScopeState state) { old_state = s->state; s->state = state; - if (state != SCOPE_STOP_SIGTERM && - state != SCOPE_STOP_SIGKILL) + if (!IN_SET(state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL)) s->timer_event_source = sd_event_source_unref(s->timer_event_source); + if (IN_SET(state, SCOPE_DEAD, SCOPE_FAILED)) + unit_unwatch_all_pids(UNIT(s)); + if (state != old_state) - log_debug("%s changed %s -> %s", - UNIT(s)->id, - scope_state_to_string(old_state), - scope_state_to_string(state)); + log_debug("%s changed %s -> %s", UNIT(s)->id, scope_state_to_string(old_state), scope_state_to_string(state)); unit_notify(UNIT(s), state_translation_table[old_state], state_translation_table[state], true); } @@ -135,7 +132,7 @@ static int scope_verify(Scope *s) { if (UNIT(s)->load_state != UNIT_LOADED) return 0; - if (set_size(s->pids) <= 0 && UNIT(s)->manager->n_reloading <= 0) { + if (set_isempty(UNIT(s)->pids) && UNIT(s)->manager->n_reloading <= 0) { log_error_unit(UNIT(s)->id, "Scope %s has no PIDs. Refusing.", UNIT(s)->id); return -EINVAL; } @@ -181,12 +178,15 @@ static int scope_coldplug(Unit *u) { if (s->deserialized_state != s->state) { - if (s->deserialized_state == SCOPE_STOP_SIGKILL || s->deserialized_state == SCOPE_STOP_SIGTERM) { + if (IN_SET(s->deserialized_state, SCOPE_STOP_SIGKILL, SCOPE_STOP_SIGTERM)) { r = scope_arm_timer(s); if (r < 0) return r; } + if (!IN_SET(s->deserialized_state, SCOPE_DEAD, SCOPE_FAILED)) + unit_watch_all_pids(UNIT(s)); + scope_set_state(s, s->deserialized_state); } @@ -227,6 +227,8 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) { if (f != SCOPE_SUCCESS) s->result = f; + unit_watch_all_pids(UNIT(s)); + /* If we have a controller set let's ask the controller nicely * to terminate the scope, instead of us going directly into * SIGTERM beserk mode */ @@ -288,13 +290,10 @@ static int scope_start(Unit *u) { return r; } - r = cg_attach_many_everywhere(u->manager->cgroup_supported, u->cgroup_path, s->pids); + r = cg_attach_many_everywhere(u->manager->cgroup_supported, u->cgroup_path, UNIT(s)->pids); if (r < 0) return r; - set_free(s->pids); - s->pids = NULL; - s->result = SCOPE_SUCCESS; scope_set_state(s, SCOPE_RUNNING); @@ -305,13 +304,13 @@ static int scope_stop(Unit *u) { Scope *s = SCOPE(u); assert(s); - assert(s->state == SCOPE_RUNNING); if (s->state == SCOPE_STOP_SIGTERM || s->state == SCOPE_STOP_SIGKILL) return 0; - assert(s->state == SCOPE_RUNNING); + assert(s->state == SCOPE_RUNNING || + s->state == SCOPE_ABANDONED); scope_enter_signal(s, SCOPE_STOP_SIGTERM, SCOPE_SUCCESS); return 0; @@ -389,8 +388,8 @@ static bool scope_check_gc(Unit *u) { /* Never clean up scopes that still have a process around, * even if the scope is formally dead. */ - if (UNIT(s)->cgroup_path) { - r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, UNIT(s)->cgroup_path, true); + if (u->cgroup_path) { + r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, true); if (r <= 0) return true; } @@ -398,6 +397,32 @@ static bool scope_check_gc(Unit *u) { return false; } +static void scope_notify_cgroup_empty_event(Unit *u) { + Scope *s = SCOPE(u); + assert(u); + + log_debug_unit(u->id, "%s: cgroup is empty", u->id); + + if (IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL)) + scope_enter_dead(s, SCOPE_SUCCESS); +} + +static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) { + + /* If we get a SIGCHLD event for one of the processes we were + interested in, then we look for others to watch, under the + assumption that we'll sooner or later get a SIGCHLD for + them, as the original process we watched was probably the + parent of them, and they are hence now our children. */ + + unit_tidy_watch_pids(u, 0, 0); + unit_watch_all_pids(u); + + /* If the PID set is empty now, then let's finish this off */ + if (set_isempty(u->pids)) + scope_notify_cgroup_empty_event(u); +} + static int scope_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) { Scope *s = SCOPE(userdata); @@ -429,24 +454,30 @@ static int scope_dispatch_timer(sd_event_source *source, usec_t usec, void *user return 0; } -static void scope_notify_cgroup_empty_event(Unit *u) { - Scope *s = SCOPE(u); - assert(u); +int scope_abandon(Scope *s) { + assert(s); - log_debug_unit(u->id, "%s: cgroup is empty", u->id); + if (!IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED)) + return -ESTALE; - switch (s->state) { + free(s->controller); + s->controller = NULL; - case SCOPE_RUNNING: - case SCOPE_STOP_SIGTERM: - case SCOPE_STOP_SIGKILL: - scope_enter_dead(s, SCOPE_SUCCESS); + /* The client is no longer watching the remaining processes, + * so let's step in here, under the assumption that the + * remaining processes will be sooner or later reassigned to + * us as parent. */ - break; + unit_tidy_watch_pids(UNIT(s), 0, 0); + unit_watch_all_pids(UNIT(s)); - default: - ; - } + /* If the PID set is empty now, then let's finish this off */ + if (set_isempty(UNIT(s)->pids)) + scope_notify_cgroup_empty_event(UNIT(s)); + else + scope_set_state(s, SCOPE_ABANDONED); + + return 0; } _pure_ static UnitActiveState scope_active_state(Unit *u) { @@ -464,6 +495,7 @@ _pure_ static const char *scope_sub_state_to_string(Unit *u) { static const char* const scope_state_table[_SCOPE_STATE_MAX] = { [SCOPE_DEAD] = "dead", [SCOPE_RUNNING] = "running", + [SCOPE_ABANDONED] = "abandoned", [SCOPE_STOP_SIGTERM] = "stop-sigterm", [SCOPE_STOP_SIGKILL] = "stop-sigkill", [SCOPE_FAILED] = "failed", @@ -516,6 +548,8 @@ const UnitVTable scope_vtable = { .check_gc = scope_check_gc, + .sigchld_event = scope_sigchld_event, + .reset_failed = scope_reset_failed, .notify_cgroup_empty = scope_notify_cgroup_empty_event, diff --git a/src/core/scope.h b/src/core/scope.h index 014b50c765..6c59126422 100644 --- a/src/core/scope.h +++ b/src/core/scope.h @@ -29,6 +29,7 @@ typedef struct Scope Scope; typedef enum ScopeState { SCOPE_DEAD, SCOPE_RUNNING, + SCOPE_ABANDONED, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL, SCOPE_FAILED, @@ -57,13 +58,13 @@ struct Scope { char *controller; - Set *pids; - sd_event_source *timer_event_source; }; extern const UnitVTable scope_vtable; +int scope_abandon(Scope *s); + const char* scope_state_to_string(ScopeState i) _const_; ScopeState scope_state_from_string(const char *s) _pure_; diff --git a/src/core/service.c b/src/core/service.c index 0542eae023..8c47e24bfb 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -1485,6 +1485,9 @@ static void service_set_state(Service *s, ServiceState state) { s->control_command_id = _SERVICE_EXEC_COMMAND_INVALID; } + if (IN_SET(state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART)) + unit_unwatch_all_pids(UNIT(s)); + if (!IN_SET(state, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, @@ -1587,6 +1590,9 @@ static int service_coldplug(Unit *u) { return r; } + if (!IN_SET(s->deserialized_state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART)) + unit_watch_all_pids(UNIT(s)); + if (IN_SET(s->deserialized_state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD)) service_start_watchdog(s); @@ -1895,6 +1901,7 @@ static void service_enter_stop_post(Service *s, ServiceResult f) { s->result = f; service_unwatch_control_pid(s); + unit_watch_all_pids(UNIT(s)); s->control_command = s->exec_command[SERVICE_EXEC_STOP_POST]; if (s->control_command) { @@ -1934,6 +1941,8 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f if (f != SERVICE_SUCCESS) s->result = f; + unit_watch_all_pids(UNIT(s)); + r = unit_kill_context( UNIT(s), &s->kill_context, @@ -1983,6 +1992,7 @@ static void service_enter_stop(Service *s, ServiceResult f) { s->result = f; service_unwatch_control_pid(s); + unit_watch_all_pids(UNIT(s)); s->control_command = s->exec_command[SERVICE_EXEC_STOP]; if (s->control_command) { @@ -2865,6 +2875,62 @@ fail: return 0; } +static void service_notify_cgroup_empty_event(Unit *u) { + Service *s = SERVICE(u); + + assert(u); + + log_debug_unit(u->id, "%s: cgroup is empty", u->id); + + switch (s->state) { + + /* Waiting for SIGCHLD is usually more interesting, + * because it includes return codes/signals. Which is + * why we ignore the cgroup events for most cases, + * except when we don't know pid which to expect the + * SIGCHLD for. */ + + case SERVICE_START: + case SERVICE_START_POST: + /* If we were hoping for the daemon to write its PID file, + * we can give up now. */ + if (s->pid_file_pathspec) { + log_warning_unit(u->id, + "%s never wrote its PID file. Failing.", UNIT(s)->id); + service_unwatch_pid_file(s); + if (s->state == SERVICE_START) + service_enter_signal(s, SERVICE_FINAL_SIGTERM, SERVICE_FAILURE_RESOURCES); + else + service_enter_stop(s, SERVICE_FAILURE_RESOURCES); + } + break; + + case SERVICE_RUNNING: + /* service_enter_running() will figure out what to do */ + service_enter_running(s, SERVICE_SUCCESS); + break; + + case SERVICE_STOP_SIGTERM: + case SERVICE_STOP_SIGKILL: + + if (main_pid_good(s) <= 0 && !control_pid_good(s)) + service_enter_stop_post(s, SERVICE_SUCCESS); + + break; + + case SERVICE_STOP_POST: + case SERVICE_FINAL_SIGTERM: + case SERVICE_FINAL_SIGKILL: + if (main_pid_good(s) <= 0 && !control_pid_good(s)) + service_enter_dead(s, SERVICE_SUCCESS, true); + + break; + + default: + ; + } +} + static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { Service *s = SERVICE(u); ServiceResult f; @@ -3142,6 +3208,18 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { /* Notify clients about changed exit status */ unit_add_to_dbus_queue(u); + + /* We got one SIGCHLD for the service, let's watch all + * processes that are now running of the service, and watch + * that. Among the PIDs we then watch will be children + * reassigned to us, which hopefully allows us to identify + * when all children are gone */ + unit_tidy_watch_pids(u, s->main_pid, s->control_pid); + unit_watch_all_pids(u); + + /* If the PID set is empty now, then let's finish this off */ + if (set_isempty(u->pids)) + service_notify_cgroup_empty_event(u); } static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) { @@ -3254,62 +3332,6 @@ static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void return 0; } -static void service_notify_cgroup_empty_event(Unit *u) { - Service *s = SERVICE(u); - - assert(u); - - log_debug_unit(u->id, "%s: cgroup is empty", u->id); - - switch (s->state) { - - /* Waiting for SIGCHLD is usually more interesting, - * because it includes return codes/signals. Which is - * why we ignore the cgroup events for most cases, - * except when we don't know pid which to expect the - * SIGCHLD for. */ - - case SERVICE_START: - case SERVICE_START_POST: - /* If we were hoping for the daemon to write its PID file, - * we can give up now. */ - if (s->pid_file_pathspec) { - log_warning_unit(u->id, - "%s never wrote its PID file. Failing.", UNIT(s)->id); - service_unwatch_pid_file(s); - if (s->state == SERVICE_START) - service_enter_signal(s, SERVICE_FINAL_SIGTERM, SERVICE_FAILURE_RESOURCES); - else - service_enter_stop(s, SERVICE_FAILURE_RESOURCES); - } - break; - - case SERVICE_RUNNING: - /* service_enter_running() will figure out what to do */ - service_enter_running(s, SERVICE_SUCCESS); - break; - - case SERVICE_STOP_SIGTERM: - case SERVICE_STOP_SIGKILL: - - if (main_pid_good(s) <= 0 && !control_pid_good(s)) - service_enter_stop_post(s, SERVICE_SUCCESS); - - break; - - case SERVICE_STOP_POST: - case SERVICE_FINAL_SIGTERM: - case SERVICE_FINAL_SIGKILL: - if (main_pid_good(s) <= 0 && !control_pid_good(s)) - service_enter_dead(s, SERVICE_SUCCESS, true); - - break; - - default: - ; - } -} - static void service_notify_message(Unit *u, pid_t pid, char **tags) { Service *s = SERVICE(u); const char *e; diff --git a/src/core/unit.c b/src/core/unit.c index 345521a711..07eedcd637 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -482,6 +482,8 @@ void unit_free(Unit *u) { set_free_free(u->names); + unit_unwatch_all_pids(u); + condition_free_list(u->conditions); unit_ref_unset(&u->slice); @@ -1697,13 +1699,25 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su } int unit_watch_pid(Unit *u, pid_t pid) { + int q, r; + assert(u); assert(pid >= 1); + r = set_ensure_allocated(&u->pids, trivial_hash_func, trivial_compare_func); + if (r < 0) + return r; + /* Watch a specific PID. We only support one unit watching * each PID for now. */ - return hashmap_put(u->manager->watch_pids, LONG_TO_PTR(pid), u); + r = set_put(u->pids, LONG_TO_PTR(pid)); + + q = hashmap_put(u->manager->watch_pids, LONG_TO_PTR(pid), u); + if (q < 0) + return q; + + return r; } void unit_unwatch_pid(Unit *u, pid_t pid) { @@ -1711,6 +1725,102 @@ void unit_unwatch_pid(Unit *u, pid_t pid) { assert(pid >= 1); hashmap_remove_value(u->manager->watch_pids, LONG_TO_PTR(pid), u); + set_remove(u->pids, LONG_TO_PTR(pid)); +} + +static int watch_pids_in_path(Unit *u, const char *path) { + _cleanup_closedir_ DIR *d = NULL; + _cleanup_fclose_ FILE *f = NULL; + int ret = 0, r; + + assert(u); + assert(path); + + /* Adds all PIDs from a specific cgroup path to the set of PIDs we watch. */ + + r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, path, &f); + if (r >= 0) { + pid_t pid; + + while ((r = cg_read_pid(f, &pid)) > 0) { + r = unit_watch_pid(u, pid); + if (r < 0 && ret >= 0) + ret = r; + } + if (r < 0 && ret >= 0) + ret = r; + + } else if (ret >= 0) + ret = r; + + r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d); + if (r >= 0) { + char *fn; + + while ((r = cg_read_subgroup(d, &fn)) > 0) { + _cleanup_free_ char *p = NULL; + + p = strjoin(path, "/", fn, NULL); + free(fn); + + if (!p) + return -ENOMEM; + + r = watch_pids_in_path(u, p); + if (r < 0 && ret >= 0) + ret = r; + } + if (r < 0 && ret >= 0) + ret = r; + + } else if (ret >= 0) + ret = r; + + return ret; +} + + +int unit_watch_all_pids(Unit *u) { + assert(u); + + if (!u->cgroup_path) + return -ENOENT; + + /* Adds all PIDs from our cgroup to the set of PIDs we watch */ + + return watch_pids_in_path(u, u->cgroup_path); +} + +void unit_unwatch_all_pids(Unit *u) { + Iterator i; + void *e; + + assert(u); + + SET_FOREACH(e, u->pids, i) + hashmap_remove_value(u->manager->watch_pids, e, u); + + set_free(u->pids); + u->pids = NULL; +} + +void unit_tidy_watch_pids(Unit *u, pid_t except1, pid_t except2) { + Iterator i; + void *e; + + assert(u); + + /* Cleans dead PIDs from our list */ + + SET_FOREACH(e, u->pids, i) { + pid_t pid = PTR_TO_LONG(e); + + if (pid == except1 || pid == except2) + continue; + + if (kill(pid, 0) < 0 && errno == ESRCH) + set_remove(u->pids, e); + } } bool unit_job_is_applicable(Unit *u, JobType j) { @@ -2979,17 +3089,7 @@ int unit_kill_context( log_warning_unit(u->id, "Failed to kill control group: %s", strerror(-r)); } else if (r > 0) { - /* FIXME: Now, we don't actually wait for any - * of the processes that are neither control - * nor main process. We should wait for them - * of course, but that's hard since the cgroup - * notification logic is so unreliable. It is - * not available at all in containers, and on - * the host it gets confused by - * subgroups. Hence, for now, let's not wait - * for these processes -- but when the kernel - * gets fixed we really should correct - * that. */ + wait_for_exit = true; if (c->send_sighup && !sigkill) { set_free(pid_set); diff --git a/src/core/unit.h b/src/core/unit.h index c686aeccef..c104a8a9d5 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -203,6 +203,11 @@ struct Unit { /* CGroup realize members queue */ LIST_FIELDS(Unit, cgroup_queue); + /* PIDs we keep an eye on. Note that a unit might have many + * more, but these are the ones we care enough about to + * process SIGCHLD for */ + Set *pids; + /* Used during GC sweeps */ unsigned gc_marker; @@ -538,6 +543,10 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su int unit_watch_pid(Unit *u, pid_t pid); void unit_unwatch_pid(Unit *u, pid_t pid); +int unit_watch_all_pids(Unit *u); +void unit_unwatch_all_pids(Unit *u); + +void unit_tidy_watch_pids(Unit *u, pid_t except1, pid_t except2); int unit_watch_bus_name(Unit *u, const char *name); void unit_unwatch_bus_name(Unit *u, const char *name); |