summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2013-04-01 22:48:40 +0200
committerLennart Poettering <lennart@poettering.net>2013-04-02 01:28:01 +0200
commitaaf7eb81be912e7bed939f31e3bc4c631b2552b3 (patch)
tree92f52fdb3635e99d1449d4e79c96872066668d38
parentceca950145f35f06938acba62af0c678f896e87d (diff)
shutdown: correctly wait for processes we killed in the killall spree
Previously we simply counted how many processes we killed and expected as many waitpid() calls to succeed. That however is incorrect to do. As we might kill processes that are not our immediate children, and as there might be left-over processes in the waitpid() queue from earlier the we might get more ore less waitpid() events that we expect. Hence: keep precise track of the processes we kill, remove the ones we get waitpid() for, and after each time we get SIGCHLD check if all others still exist. We use getpgid() to check if a PID still exists. This should fix issues with journald not setting journal files offline correctly on shutdown, because we'd too quickly proceed from SIGTERM to SIGKILL because some left-over process was in our waitpid() queue.
-rw-r--r--src/core/killall.c85
1 files changed, 58 insertions, 27 deletions
diff --git a/src/core/killall.c b/src/core/killall.c
index 1eb3766f77..7f0dbb9edf 100644
--- a/src/core/killall.c
+++ b/src/core/killall.c
@@ -22,12 +22,14 @@
#include <sys/wait.h>
#include <signal.h>
#include <errno.h>
+#include <unistd.h>
#include "util.h"
#include "def.h"
#include "killall.h"
+#include "set.h"
-#define TIMEOUT_USEC (5 * USEC_PER_SEC)
+#define TIMEOUT_USEC (10 * USEC_PER_SEC)
static bool ignore_proc(pid_t pid) {
char buf[PATH_MAX];
@@ -73,38 +75,68 @@ static bool ignore_proc(pid_t pid) {
return false;
}
-static void wait_for_children(int n_processes, sigset_t *mask) {
+static void wait_for_children(Set *pids, sigset_t *mask) {
usec_t until;
assert(mask);
+ if (set_isempty(pids))
+ return;
+
until = now(CLOCK_MONOTONIC) + TIMEOUT_USEC;
for (;;) {
struct timespec ts;
int k;
usec_t n;
+ void *p;
+ Iterator i;
+ /* First, let the kernel inform us about killed
+ * children. Most processes will probably be our
+ * children, but some are not (might be our
+ * grandchildren instead...). */
for (;;) {
- pid_t pid = waitpid(-1, NULL, WNOHANG);
+ pid_t pid;
+ pid = waitpid(-1, NULL, WNOHANG);
if (pid == 0)
break;
+ if (pid < 0) {
+ if (errno == ECHILD)
+ break;
- if (pid < 0 && errno == ECHILD)
+ log_error("waitpid() failed: %m");
return;
+ }
+
+ set_remove(pids, ULONG_TO_PTR(pid));
+ }
- if (n_processes > 0)
- if (--n_processes == 0)
- return;
+ /* Now explicitly check who might be remaining, who
+ * might not be our child. */
+ SET_FOREACH(p, pids, i) {
+
+ /* We misuse getpgid as a check whether a
+ * process still exists. */
+ if (getpgid((pid_t) PTR_TO_ULONG(p)) >= 0)
+ continue;
+
+ if (errno != ESRCH)
+ continue;
+
+ set_remove(pids, p);
}
+ if (set_isempty(pids))
+ return;
+
n = now(CLOCK_MONOTONIC);
if (n >= until)
return;
timespec_store(&ts, until - n);
-
- if ((k = sigtimedwait(mask, NULL, &ts)) != SIGCHLD) {
+ k = sigtimedwait(mask, NULL, &ts);
+ if (k != SIGCHLD) {
if (k < 0 && errno != EAGAIN) {
log_error("sigtimedwait() failed: %m");
@@ -117,10 +149,9 @@ static void wait_for_children(int n_processes, sigset_t *mask) {
}
}
-static int killall(int sig) {
- DIR *dir;
+static int killall(int sig, Set *pids) {
+ _cleanup_closedir_ DIR *dir = NULL;
struct dirent *d;
- unsigned int n_processes = 0;
dir = opendir("/proc");
if (!dir)
@@ -143,23 +174,25 @@ static int killall(int sig) {
_cleanup_free_ char *s;
get_process_comm(pid, &s);
- log_notice("Sending SIGKILL to PID %lu (%s)", (unsigned long) pid, strna(s));
+ log_notice("Sending SIGKILL to PID %lu (%s).", (unsigned long) pid, strna(s));
}
- if (kill(pid, sig) >= 0)
- n_processes++;
- else if (errno != ENOENT)
+ if (kill(pid, sig) >= 0) {
+ if (pids)
+ set_put(pids, ULONG_TO_PTR((unsigned long) pid));
+ } else if (errno != ENOENT)
log_warning("Could not kill %d: %m", pid);
}
- closedir(dir);
-
- return n_processes;
+ return set_size(pids);
}
void broadcast_signal(int sig, bool wait_for_exit) {
sigset_t mask, oldmask;
- int n_processes;
+ Set *pids;
+
+ if (wait_for_exit)
+ pids = set_new(trivial_hash_func, trivial_compare_func);
assert_se(sigemptyset(&mask) == 0);
assert_se(sigaddset(&mask, SIGCHLD) == 0);
@@ -168,17 +201,15 @@ void broadcast_signal(int sig, bool wait_for_exit) {
if (kill(-1, SIGSTOP) < 0 && errno != ESRCH)
log_warning("kill(-1, SIGSTOP) failed: %m");
- n_processes = killall(sig);
+ killall(sig, pids);
if (kill(-1, SIGCONT) < 0 && errno != ESRCH)
log_warning("kill(-1, SIGCONT) failed: %m");
- if (n_processes <= 0)
- goto finish;
-
if (wait_for_exit)
- wait_for_children(n_processes, &mask);
+ wait_for_children(pids, &mask);
+
+ assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) == 0);
-finish:
- sigprocmask(SIG_SETMASK, &oldmask, NULL);
+ set_free(pids);
}