summaryrefslogtreecommitdiff
path: root/core/udev/0001-udevd-kill-hanging-event-processes-after-30-seconds.patch
blob: 3ff89ae43631396c7eef60fc87e752a048888564 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
From e64fae5573e566ce4fd9b23c68ac8f3096603314 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Wed, 18 Jan 2012 05:06:18 +0100
Subject: [PATCH] udevd: kill hanging event processes after 30 seconds

Some broken kernel drivers load firmware synchronously in the module init
path and block modprobe until the firmware request is fulfilled.

The modprobe-generated firmware request is a direct child device of the
device which caused modprobe to run. Child device event are blocked until
the parent device is handled. This dead-locks until the kernel firmware
loading timeout of 60 seconds is reached.

The hanging modprobe event should now time-out and allow the firmware
event to run before the 60 second kernel timeout.
---
 src/udev-event.c |    2 +-
 src/udevd.c      |   62 +++++++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 51 insertions(+), 13 deletions(-)

diff --git a/src/udev-event.c b/src/udev-event.c
index 9bdc518..f0b9548 100644
--- a/src/udev-event.c
+++ b/src/udev-event.c
@@ -49,7 +49,7 @@ struct udev_event *udev_event_new(struct udev_device *dev)
         udev_list_init(udev, &event->run_list, false);
         event->fd_signal = -1;
         event->birth_usec = now_usec();
-        event->timeout_usec = 60 * 1000 * 1000;
+        event->timeout_usec = 30 * 1000 * 1000;
         dbg(event->udev, "allocated event %p\n", event);
         return event;
 }
diff --git a/src/udevd.c b/src/udevd.c
index 11ab19a..77a1e79 100644
--- a/src/udevd.c
+++ b/src/udevd.c
@@ -133,6 +133,7 @@ struct worker {
         struct udev_monitor *monitor;
         enum worker_state state;
         struct event *event;
+        unsigned long long event_start_usec;
 };
 
 /* passed from worker to main process */
@@ -372,6 +373,7 @@ out:
                 close(fd_inotify);
                 close(worker_watch[WRITE_END]);
                 udev_rules_unref(rules);
+                udev_builtin_exit(udev);
                 udev_monitor_unref(worker_monitor);
                 udev_unref(udev);
                 udev_log_close();
@@ -389,6 +391,7 @@ out:
                 worker->monitor = worker_monitor;
                 worker->pid = pid;
                 worker->state = WORKER_RUNNING;
+                worker->event_start_usec = now_usec();
                 worker->event = event;
                 event->state = EVENT_RUNNING;
                 udev_list_node_append(&worker->node, &worker_list);
@@ -419,6 +422,7 @@ static void event_run(struct event *event)
                 worker_ref(worker);
                 worker->event = event;
                 worker->state = WORKER_RUNNING;
+                worker->event_start_usec = now_usec();
                 event->state = EVENT_RUNNING;
                 return;
         }
@@ -610,9 +614,11 @@ static void worker_returned(int fd_worker)
                                 continue;
 
                         /* worker returned */
-                        worker->event->exitcode = msg.exitcode;
-                        event_queue_delete(worker->event, true);
-                        worker->event = NULL;
+                        if (worker->event) {
+                                worker->event->exitcode = msg.exitcode;
+                                event_queue_delete(worker->event, true);
+                                worker->event = NULL;
+                        }
                         if (worker->state != WORKER_KILLED)
                                 worker->state = WORKER_IDLE;
                         worker_unref(worker);
@@ -796,7 +802,7 @@ static void handle_signal(struct udev *udev, int signo)
                                 }
 
                                 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
-                                        if (worker->event != NULL) {
+                                        if (worker->event) {
                                                 err(udev, "worker [%u] failed while handling '%s'\n",
                                                     pid, worker->event->devpath);
                                                 worker->event->exitcode = -32;
@@ -1574,25 +1580,57 @@ int main(int argc, char *argv[])
                                 break;
 
                         /* timeout at exit for workers to finish */
-                        timeout = 60 * 1000;
-                } else if (udev_list_node_is_empty(&event_list) && children > 2) {
-                        /* set timeout to kill idle workers */
-                        timeout = 3 * 1000;
-                } else {
+                        timeout = 30 * 1000;
+                } else if (udev_list_node_is_empty(&event_list) && children <= 2) {
+                        /* we are idle */
                         timeout = -1;
+                } else {
+                        /* kill idle or hanging workers */
+                        timeout = 3 * 1000;
                 }
                 fdcount = epoll_wait(fd_ep, ev, ARRAY_SIZE(ev), timeout);
                 if (fdcount < 0)
                         continue;
 
                 if (fdcount == 0) {
+                        struct udev_list_node *loop;
+
+                        /* timeout */
                         if (udev_exit) {
-                                info(udev, "timeout, giving up waiting for workers to finish\n");
+                                err(udev, "timeout, giving up waiting for workers to finish\n");
                                 break;
                         }
 
-                        /* timeout - kill idle workers */
-                        worker_kill(udev, 2);
+                        /* kill idle workers */
+                        if (udev_list_node_is_empty(&event_list)) {
+                                info(udev, "cleanup idle workers\n");
+                                worker_kill(udev, 2);
+                        }
+
+                        /* check for hanging events */
+                        udev_list_node_foreach(loop, &worker_list) {
+                                struct worker *worker = node_to_worker(loop);
+
+                                if (worker->state != WORKER_RUNNING)
+                                        continue;
+
+                                if ((now_usec() - worker->event_start_usec) > 30 * 1000 * 1000) {
+                                        err(udev, "worker [%u] timeout, kill it\n", worker->pid,
+                                            worker->event ? worker->event->devpath : "<idle>");
+                                        kill(worker->pid, SIGKILL);
+                                        worker->state = WORKER_KILLED;
+                                        /* drop reference taken for state 'running' */
+                                        worker_unref(worker);
+                                        if (worker->event) {
+                                                err(udev, "seq %llu '%s' killed\n",
+                                                    udev_device_get_seqnum(worker->event->dev), worker->event->devpath);
+                                                worker->event->exitcode = -64;
+                                                event_queue_delete(worker->event, true);
+                                                worker->event = NULL;
+                                        }
+                                }
+                        }
+
                 }
 
                 is_worker = is_signal = is_inotify = is_netlink = is_ctrl = false;
-- 
1.7.8.3