diff options
Diffstat (limited to 'src/libsystemd/sd-event/sd-event.c')
-rw-r--r-- | src/libsystemd/sd-event/sd-event.c | 717 |
1 files changed, 453 insertions, 264 deletions
diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c index 76964aa0cc..9857f8b1fc 100644 --- a/src/libsystemd/sd-event/sd-event.c +++ b/src/libsystemd/sd-event/sd-event.c @@ -1,5 +1,3 @@ -/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ - /*** This file is part of systemd. @@ -23,19 +21,24 @@ #include <sys/timerfd.h> #include <sys/wait.h> -#include "sd-id128.h" #include "sd-daemon.h" -#include "macro.h" -#include "prioq.h" +#include "sd-event.h" +#include "sd-id128.h" + +#include "alloc-util.h" +#include "fd-util.h" #include "hashmap.h" -#include "util.h" -#include "time-util.h" +#include "list.h" +#include "macro.h" #include "missing.h" +#include "prioq.h" +#include "process-util.h" #include "set.h" -#include "list.h" #include "signal-util.h" - -#include "sd-event.h" +#include "string-table.h" +#include "string-util.h" +#include "time-util.h" +#include "util.h" #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC) @@ -56,9 +59,39 @@ typedef enum EventSourceType { _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1 } EventSourceType; +static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = { + [SOURCE_IO] = "io", + [SOURCE_TIME_REALTIME] = "realtime", + [SOURCE_TIME_BOOTTIME] = "bootime", + [SOURCE_TIME_MONOTONIC] = "monotonic", + [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm", + [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm", + [SOURCE_SIGNAL] = "signal", + [SOURCE_CHILD] = "child", + [SOURCE_DEFER] = "defer", + [SOURCE_POST] = "post", + [SOURCE_EXIT] = "exit", + [SOURCE_WATCHDOG] = "watchdog", +}; + +DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int); + +/* All objects we use in epoll events start with this value, so that + * we know how to dispatch it */ +typedef enum WakeupType { + WAKEUP_NONE, + WAKEUP_EVENT_SOURCE, + WAKEUP_CLOCK_DATA, + WAKEUP_SIGNAL_DATA, + _WAKEUP_TYPE_MAX, + _WAKEUP_TYPE_INVALID = -1, +} WakeupType; + #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM) struct sd_event_source { + WakeupType wakeup; + unsigned n_ref; sd_event *event; @@ -76,8 +109,8 @@ struct sd_event_source { int64_t priority; unsigned pending_index; unsigned prepare_index; - unsigned pending_iteration; - unsigned prepare_iteration; + uint64_t pending_iteration; + uint64_t prepare_iteration; LIST_FIELDS(sd_event_source, sources); @@ -120,6 +153,7 @@ struct sd_event_source { }; struct clock_data { + WakeupType wakeup; int fd; /* For all clocks we maintain two priority queues each, one @@ -136,11 +170,23 @@ struct clock_data { bool needs_rearm:1; }; +struct signal_data { + WakeupType wakeup; + + /* For each priority we maintain one signal fd, so that we + * only have to dequeue a single event per priority at a + * time. */ + + int fd; + int64_t priority; + sigset_t sigset; + sd_event_source *current; +}; + struct sd_event { unsigned n_ref; int epoll_fd; - int signal_fd; int watchdog_fd; Prioq *pending; @@ -157,8 +203,8 @@ struct sd_event { usec_t perturb; - sigset_t sigset; - sd_event_source **signal_sources; + sd_event_source **signal_sources; /* indexed by signal number */ + Hashmap *signal_data; /* indexed by priority */ Hashmap *child_sources; unsigned n_enabled_child_sources; @@ -169,14 +215,14 @@ struct sd_event { pid_t original_pid; - unsigned iteration; - dual_timestamp timestamp; - usec_t timestamp_boottime; + uint64_t iteration; + triple_timestamp timestamp; int state; bool exit_requested:1; bool need_process_child:1; bool watchdog:1; + bool profile_delays:1; int exit_code; @@ -188,6 +234,9 @@ struct sd_event { unsigned n_sources; LIST_HEAD(sd_event_source, sources); + + usec_t last_run, last_log; + unsigned delays[sizeof(usec_t) * 8]; }; static void source_disconnect(sd_event_source *s); @@ -216,12 +265,6 @@ static int pending_prioq_compare(const void *a, const void *b) { if (x->pending_iteration > y->pending_iteration) return 1; - /* Stability for the rest */ - if (x < y) - return -1; - if (x > y) - return 1; - return 0; } @@ -231,6 +274,12 @@ static int prepare_prioq_compare(const void *a, const void *b) { assert(x->prepare); assert(y->prepare); + /* Enabled ones first */ + if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF) + return -1; + if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF) + return 1; + /* Move most recently prepared ones last, so that we can stop * preparing as soon as we hit one that has already been * prepared in the current iteration */ @@ -239,24 +288,12 @@ static int prepare_prioq_compare(const void *a, const void *b) { if (x->prepare_iteration > y->prepare_iteration) return 1; - /* Enabled ones first */ - if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF) - return -1; - if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF) - return 1; - /* Lower priority values first */ if (x->priority < y->priority) return -1; if (x->priority > y->priority) return 1; - /* Stability for the rest */ - if (x < y) - return -1; - if (x > y) - return 1; - return 0; } @@ -284,15 +321,13 @@ static int earliest_time_prioq_compare(const void *a, const void *b) { if (x->time.next > y->time.next) return 1; - /* Stability for the rest */ - if (x < y) - return -1; - if (x > y) - return 1; - return 0; } +static usec_t time_event_source_latest(const sd_event_source *s) { + return usec_add(s->time.next, s->time.accuracy); +} + static int latest_time_prioq_compare(const void *a, const void *b) { const sd_event_source *x = a, *y = b; @@ -312,15 +347,9 @@ static int latest_time_prioq_compare(const void *a, const void *b) { return 1; /* Order by time */ - if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy) + if (time_event_source_latest(x) < time_event_source_latest(y)) return -1; - if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy) - return 1; - - /* Stability for the rest */ - if (x < y) - return -1; - if (x > y) + if (time_event_source_latest(x) > time_event_source_latest(y)) return 1; return 0; @@ -344,17 +373,12 @@ static int exit_prioq_compare(const void *a, const void *b) { if (x->priority > y->priority) return 1; - /* Stability for the rest */ - if (x < y) - return -1; - if (x > y) - return 1; - return 0; } static void free_clock_data(struct clock_data *d) { assert(d); + assert(d->wakeup == WAKEUP_CLOCK_DATA); safe_close(d->fd); prioq_free(d->earliest); @@ -378,7 +402,6 @@ static void event_free(sd_event *e) { *(e->default_event_ptr) = NULL; safe_close(e->epoll_fd); - safe_close(e->signal_fd); safe_close(e->watchdog_fd); free_clock_data(&e->realtime); @@ -392,6 +415,7 @@ static void event_free(sd_event *e) { prioq_free(e->exit); free(e->signal_sources); + hashmap_free(e->signal_data); hashmap_free(e->child_sources); set_free(e->post_sources); @@ -409,18 +433,15 @@ _public_ int sd_event_new(sd_event** ret) { return -ENOMEM; e->n_ref = 1; - e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1; + e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1; e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY; + e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA; e->original_pid = getpid(); e->perturb = USEC_INFINITY; - assert_se(sigemptyset(&e->sigset) == 0); - - e->pending = prioq_new(pending_prioq_compare); - if (!e->pending) { - r = -ENOMEM; + r = prioq_ensure_allocated(&e->pending, pending_prioq_compare); + if (r < 0) goto fail; - } e->epoll_fd = epoll_create1(EPOLL_CLOEXEC); if (e->epoll_fd < 0) { @@ -428,6 +449,11 @@ _public_ int sd_event_new(sd_event** ret) { goto fail; } + if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) { + log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s."); + e->profile_delays = true; + } + *ret = e; return 0; @@ -437,7 +463,9 @@ fail: } _public_ sd_event* sd_event_ref(sd_event *e) { - assert_return(e, NULL); + + if (!e) + return NULL; assert(e->n_ref >= 1); e->n_ref++; @@ -481,7 +509,9 @@ static void source_io_unregister(sd_event_source *s) { return; r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL); - assert_log(r >= 0); + if (r < 0) + log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m", + strna(s->description), event_source_type_to_string(s->type)); s->io.registered = false; } @@ -508,7 +538,6 @@ static int source_io_register( r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev); else r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev); - if (r < 0) return -errno; @@ -590,45 +619,173 @@ static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) { } } -static bool need_signal(sd_event *e, int signal) { - return (e->signal_sources && e->signal_sources[signal] && - e->signal_sources[signal]->enabled != SD_EVENT_OFF) - || - (signal == SIGCHLD && - e->n_enabled_child_sources > 0); -} +static int event_make_signal_data( + sd_event *e, + int sig, + struct signal_data **ret) { -static int event_update_signal_fd(sd_event *e) { struct epoll_event ev = {}; - bool add_to_epoll; + struct signal_data *d; + bool added = false; + sigset_t ss_copy; + int64_t priority; int r; assert(e); if (event_pid_changed(e)) - return 0; + return -ECHILD; - add_to_epoll = e->signal_fd < 0; + if (e->signal_sources && e->signal_sources[sig]) + priority = e->signal_sources[sig]->priority; + else + priority = 0; - r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC); - if (r < 0) - return -errno; + d = hashmap_get(e->signal_data, &priority); + if (d) { + if (sigismember(&d->sigset, sig) > 0) { + if (ret) + *ret = d; + return 0; + } + } else { + r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops); + if (r < 0) + return r; - e->signal_fd = r; + d = new0(struct signal_data, 1); + if (!d) + return -ENOMEM; - if (!add_to_epoll) + d->wakeup = WAKEUP_SIGNAL_DATA; + d->fd = -1; + d->priority = priority; + + r = hashmap_put(e->signal_data, &d->priority, d); + if (r < 0) { + free(d); + return r; + } + + added = true; + } + + ss_copy = d->sigset; + assert_se(sigaddset(&ss_copy, sig) >= 0); + + r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC); + if (r < 0) { + r = -errno; + goto fail; + } + + d->sigset = ss_copy; + + if (d->fd >= 0) { + if (ret) + *ret = d; return 0; + } + + d->fd = r; ev.events = EPOLLIN; - ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL); + ev.data.ptr = d; - r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev); - if (r < 0) { - e->signal_fd = safe_close(e->signal_fd); - return -errno; + r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev); + if (r < 0) { + r = -errno; + goto fail; } + if (ret) + *ret = d; + return 0; + +fail: + if (added) { + d->fd = safe_close(d->fd); + hashmap_remove(e->signal_data, &d->priority); + free(d); + } + + return r; +} + +static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) { + assert(e); + assert(d); + + /* Turns off the specified signal in the signal data + * object. If the signal mask of the object becomes empty that + * way removes it. */ + + if (sigismember(&d->sigset, sig) == 0) + return; + + assert_se(sigdelset(&d->sigset, sig) >= 0); + + if (sigisemptyset(&d->sigset)) { + + /* If all the mask is all-zero we can get rid of the structure */ + hashmap_remove(e->signal_data, &d->priority); + assert(!d->current); + safe_close(d->fd); + free(d); + return; + } + + assert(d->fd >= 0); + + if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0) + log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m"); +} + +static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) { + struct signal_data *d; + static const int64_t zero_priority = 0; + + assert(e); + + /* Rechecks if the specified signal is still something we are + * interested in. If not, we'll unmask it, and possibly drop + * the signalfd for it. */ + + if (sig == SIGCHLD && + e->n_enabled_child_sources > 0) + return; + + if (e->signal_sources && + e->signal_sources[sig] && + e->signal_sources[sig]->enabled != SD_EVENT_OFF) + return; + + /* + * The specified signal might be enabled in three different queues: + * + * 1) the one that belongs to the priority passed (if it is non-NULL) + * 2) the one that belongs to the priority of the event source of the signal (if there is one) + * 3) the 0 priority (to cover the SIGCHLD case) + * + * Hence, let's remove it from all three here. + */ + + if (priority) { + d = hashmap_get(e->signal_data, priority); + if (d) + event_unmask_signal_data(e, d, sig); + } + + if (e->signal_sources && e->signal_sources[sig]) { + d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority); + if (d) + event_unmask_signal_data(e, d, sig); + } + + d = hashmap_get(e->signal_data, &zero_priority); + if (d) + event_unmask_signal_data(e, d, sig); } static void source_disconnect(sd_event_source *s) { @@ -667,17 +824,11 @@ static void source_disconnect(sd_event_source *s) { case SOURCE_SIGNAL: if (s->signal.sig > 0) { + if (s->event->signal_sources) s->event->signal_sources[s->signal.sig] = NULL; - /* If the signal was on and now it is off... */ - if (s->enabled != SD_EVENT_OFF && !need_signal(s->event, s->signal.sig)) { - assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0); - - (void) event_update_signal_fd(s->event); - /* If disabling failed, we might get a spurious event, - * but otherwise nothing bad should happen. */ - } + event_gc_signal_data(s->event, &s->priority, s->signal.sig); } break; @@ -687,18 +838,10 @@ static void source_disconnect(sd_event_source *s) { if (s->enabled != SD_EVENT_OFF) { assert(s->event->n_enabled_child_sources > 0); s->event->n_enabled_child_sources--; - - /* We know the signal was on, if it is off now... */ - if (!need_signal(s->event, SIGCHLD)) { - assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0); - - (void) event_update_signal_fd(s->event); - /* If disabling failed, we might get a spurious event, - * but otherwise nothing bad should happen. */ - } } - hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid)); + (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid)); + event_gc_signal_data(s->event, &s->priority, SIGCHLD); } break; @@ -777,6 +920,14 @@ static int source_set_pending(sd_event_source *s, bool b) { d->needs_rearm = true; } + if (s->type == SOURCE_SIGNAL && !b) { + struct signal_data *d; + + d = hashmap_get(s->event->signal_data, &s->priority); + if (d && d->current == s) + d->current = NULL; + } + return 0; } @@ -799,7 +950,7 @@ static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType t sd_event_ref(e); LIST_PREPEND(sources, e->sources, s); - e->n_sources ++; + e->n_sources++; return s; } @@ -816,7 +967,7 @@ _public_ int sd_event_add_io( int r; assert_return(e, -EINVAL); - assert_return(fd >= 0, -EINVAL); + assert_return(fd >= 0, -EBADF); assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL); assert_return(callback, -EINVAL); assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); @@ -826,6 +977,7 @@ _public_ int sd_event_add_io( if (!s) return -ENOMEM; + s->wakeup = WAKEUP_EVENT_SOURCE; s->io.fd = fd; s->io.events = events; s->io.callback = callback; @@ -882,7 +1034,7 @@ static int event_setup_timer_fd( return -errno; ev.events = EPOLLIN; - ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock)); + ev.data.ptr = d; r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev); if (r < 0) { @@ -915,31 +1067,30 @@ _public_ int sd_event_add_time( int r; assert_return(e, -EINVAL); - assert_return(usec != (uint64_t) -1, -EINVAL); assert_return(accuracy != (uint64_t) -1, -EINVAL); assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); assert_return(!event_pid_changed(e), -ECHILD); + if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */ + return -EOPNOTSUPP; + + type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */ + if (type < 0) + return -EOPNOTSUPP; + if (!callback) callback = time_exit_callback; - type = clock_to_event_source_type(clock); - assert_return(type >= 0, -EOPNOTSUPP); - d = event_get_clock_data(e, type); assert(d); - if (!d->earliest) { - d->earliest = prioq_new(earliest_time_prioq_compare); - if (!d->earliest) - return -ENOMEM; - } + r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare); + if (r < 0) + return r; - if (!d->latest) { - d->latest = prioq_new(latest_time_prioq_compare); - if (!d->latest) - return -ENOMEM; - } + r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare); + if (r < 0) + return r; if (d->fd < 0) { r = event_setup_timer_fd(e, d, clock); @@ -992,13 +1143,12 @@ _public_ int sd_event_add_signal( void *userdata) { sd_event_source *s; + struct signal_data *d; sigset_t ss; int r; - bool previous; assert_return(e, -EINVAL); - assert_return(sig > 0, -EINVAL); - assert_return(sig < _NSIG, -EINVAL); + assert_return(SIGNAL_VALID(sig), -EINVAL); assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); assert_return(!event_pid_changed(e), -ECHILD); @@ -1006,8 +1156,8 @@ _public_ int sd_event_add_signal( callback = signal_exit_callback; r = pthread_sigmask(SIG_SETMASK, NULL, &ss); - if (r < 0) - return -errno; + if (r != 0) + return -r; if (!sigismember(&ss, sig)) return -EBUSY; @@ -1019,8 +1169,6 @@ _public_ int sd_event_add_signal( } else if (e->signal_sources[sig]) return -EBUSY; - previous = need_signal(e, sig); - s = source_new(e, !ret, SOURCE_SIGNAL); if (!s) return -ENOMEM; @@ -1032,14 +1180,10 @@ _public_ int sd_event_add_signal( e->signal_sources[sig] = s; - if (!previous) { - assert_se(sigaddset(&e->sigset, sig) == 0); - - r = event_update_signal_fd(e); - if (r < 0) { - source_free(s); - return r; - } + r = event_make_signal_data(e, sig, &d); + if (r < 0) { + source_free(s); + return r; } /* Use the signal name as description for the event source by default */ @@ -1061,7 +1205,6 @@ _public_ int sd_event_add_child( sd_event_source *s; int r; - bool previous; assert_return(e, -EINVAL); assert_return(pid > 1, -EINVAL); @@ -1075,11 +1218,9 @@ _public_ int sd_event_add_child( if (r < 0) return r; - if (hashmap_contains(e->child_sources, INT_TO_PTR(pid))) + if (hashmap_contains(e->child_sources, PID_TO_PTR(pid))) return -EBUSY; - previous = need_signal(e, SIGCHLD); - s = source_new(e, !ret, SOURCE_CHILD); if (!s) return -ENOMEM; @@ -1090,22 +1231,19 @@ _public_ int sd_event_add_child( s->userdata = userdata; s->enabled = SD_EVENT_ONESHOT; - r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s); + r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s); if (r < 0) { source_free(s); return r; } - e->n_enabled_child_sources ++; - - if (!previous) { - assert_se(sigaddset(&e->sigset, SIGCHLD) == 0); + e->n_enabled_child_sources++; - r = event_update_signal_fd(e); - if (r < 0) { - source_free(s); - return r; - } + r = event_make_signal_data(e, SIGCHLD, NULL); + if (r < 0) { + e->n_enabled_child_sources--; + source_free(s); + return r; } e->need_process_child = true; @@ -1202,11 +1340,9 @@ _public_ int sd_event_add_exit( assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); assert_return(!event_pid_changed(e), -ECHILD); - if (!e->exit) { - e->exit = prioq_new(exit_prioq_compare); - if (!e->exit) - return -ENOMEM; - } + r = prioq_ensure_allocated(&e->exit, exit_prioq_compare); + if (r < 0) + return r; s = source_new(e, !ret, SOURCE_EXIT); if (!s) @@ -1230,7 +1366,9 @@ _public_ int sd_event_add_exit( } _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) { - assert_return(s, NULL); + + if (!s) + return NULL; assert(s->n_ref >= 1); s->n_ref++; @@ -1311,7 +1449,7 @@ _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) { int r; assert_return(s, -EINVAL); - assert_return(fd >= 0, -EINVAL); + assert_return(fd >= 0, -EBADF); assert_return(s->type == SOURCE_IO, -EDOM); assert_return(!event_pid_changed(s->event), -ECHILD); @@ -1405,6 +1543,8 @@ _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) } _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) { + int r; + assert_return(s, -EINVAL); assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); assert_return(!event_pid_changed(s->event), -ECHILD); @@ -1412,7 +1552,25 @@ _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) if (s->priority == priority) return 0; - s->priority = priority; + if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) { + struct signal_data *old, *d; + + /* Move us from the signalfd belonging to the old + * priority to the signalfd of the new priority */ + + assert_se(old = hashmap_get(s->event->signal_data, &s->priority)); + + s->priority = priority; + + r = event_make_signal_data(s->event, s->signal.sig, &d); + if (r < 0) { + s->priority = old->priority; + return r; + } + + event_unmask_signal_data(s->event, old, s->signal.sig); + } else + s->priority = priority; if (s->pending) prioq_reshuffle(s->event->pending, s, &s->pending_index); @@ -1477,34 +1635,18 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) { } case SOURCE_SIGNAL: - assert(need_signal(s->event, s->signal.sig)); - s->enabled = m; - if (!need_signal(s->event, s->signal.sig)) { - assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0); - - (void) event_update_signal_fd(s->event); - /* If disabling failed, we might get a spurious event, - * but otherwise nothing bad should happen. */ - } - + event_gc_signal_data(s->event, &s->priority, s->signal.sig); break; case SOURCE_CHILD: - assert(need_signal(s->event, SIGCHLD)); - s->enabled = m; assert(s->event->n_enabled_child_sources > 0); s->event->n_enabled_child_sources--; - if (!need_signal(s->event, SIGCHLD)) { - assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0); - - (void) event_update_signal_fd(s->event); - } - + event_gc_signal_data(s->event, &s->priority, SIGCHLD); break; case SOURCE_EXIT: @@ -1550,37 +1692,33 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) { } case SOURCE_SIGNAL: - /* Check status before enabling. */ - if (!need_signal(s->event, s->signal.sig)) { - assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0); - - r = event_update_signal_fd(s->event); - if (r < 0) { - s->enabled = SD_EVENT_OFF; - return r; - } - } s->enabled = m; + + r = event_make_signal_data(s->event, s->signal.sig, NULL); + if (r < 0) { + s->enabled = SD_EVENT_OFF; + event_gc_signal_data(s->event, &s->priority, s->signal.sig); + return r; + } + break; case SOURCE_CHILD: - /* Check status before enabling. */ - if (s->enabled == SD_EVENT_OFF) { - if (!need_signal(s->event, SIGCHLD)) { - assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0); - - r = event_update_signal_fd(s->event); - if (r < 0) { - s->enabled = SD_EVENT_OFF; - return r; - } - } + if (s->enabled == SD_EVENT_OFF) s->event->n_enabled_child_sources++; - } s->enabled = m; + + r = event_make_signal_data(s->event, SIGCHLD, NULL); + if (r < 0) { + s->enabled = SD_EVENT_OFF; + s->event->n_enabled_child_sources--; + event_gc_signal_data(s->event, &s->priority, SIGCHLD); + return r; + } + break; case SOURCE_EXIT: @@ -1621,7 +1759,6 @@ _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) { struct clock_data *d; assert_return(s, -EINVAL); - assert_return(usec != (uint64_t) -1, -EINVAL); assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM); assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); assert_return(!event_pid_changed(s->event), -ECHILD); @@ -1751,6 +1888,8 @@ static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) { if (a <= 0) return 0; + if (a >= USEC_INFINITY) + return USEC_INFINITY; if (b <= a + 1) return a; @@ -1840,7 +1979,7 @@ static int event_arm_timer( d->needs_rearm = false; a = prioq_peek(d->earliest); - if (!a || a->enabled == SD_EVENT_OFF) { + if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) { if (d->fd < 0) return 0; @@ -1860,7 +1999,7 @@ static int event_arm_timer( b = prioq_peek(d->latest); assert_se(b && b->enabled != SD_EVENT_OFF); - t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy); + t = sleep_between(e, a->time.next, time_event_source_latest(b)); if (d->next == t) return 0; @@ -2024,20 +2163,35 @@ static int process_child(sd_event *e) { return 0; } -static int process_signal(sd_event *e, uint32_t events) { +static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) { bool read_one = false; int r; assert(e); - assert_return(events == EPOLLIN, -EIO); + /* If there's a signal queued on this priority and SIGCHLD is + on this priority too, then make sure to recheck the + children we watch. This is because we only ever dequeue + the first signal per priority, and if we dequeue one, and + SIGCHLD might be enqueued later we wouldn't know, but we + might have higher priority children we care about hence we + need to check that explicitly. */ + + if (sigismember(&d->sigset, SIGCHLD)) + e->need_process_child = true; + + /* If there's already an event source pending for this + * priority we don't read another */ + if (d->current) + return 0; + for (;;) { struct signalfd_siginfo si; ssize_t n; sd_event_source *s = NULL; - n = read(e->signal_fd, &si, sizeof(si)); + n = read(d->fd, &si, sizeof(si)); if (n < 0) { if (errno == EAGAIN || errno == EINTR) return read_one; @@ -2048,28 +2202,25 @@ static int process_signal(sd_event *e, uint32_t events) { if (_unlikely_(n != sizeof(si))) return -EIO; - assert(si.ssi_signo < _NSIG); + assert(SIGNAL_VALID(si.ssi_signo)); read_one = true; - if (si.ssi_signo == SIGCHLD) { - r = process_child(e); - if (r < 0) - return r; - if (r > 0) - continue; - } - if (e->signal_sources) s = e->signal_sources[si.ssi_signo]; - if (!s) continue; + if (s->pending) + continue; s->signal.siginfo = si; + d->current = s; + r = source_set_pending(s, true); if (r < 0) return r; + + return 1; } } @@ -2164,12 +2315,9 @@ static int source_dispatch(sd_event_source *s) { s->dispatching = false; - if (r < 0) { - if (s->description) - log_debug_errno(r, "Event source '%s' returned error, disabling: %m", s->description); - else - log_debug_errno(r, "Event source %p returned error, disabling: %m", s); - } + if (r < 0) + log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m", + strna(s->description), event_source_type_to_string(s->type)); if (s->n_ref == 0) source_free(s); @@ -2202,12 +2350,9 @@ static int event_prepare(sd_event *e) { r = s->prepare(s, s->userdata); s->dispatching = false; - if (r < 0) { - if (s->description) - log_debug_errno(r, "Prepare callback of event source '%s' returned error, disabling: %m", s->description); - else - log_debug_errno(r, "Prepare callback of event source %p returned error, disabling: %m", s); - } + if (r < 0) + log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m", + strna(s->description), event_source_type_to_string(s->type)); if (s->n_ref == 0) source_free(s); @@ -2312,7 +2457,9 @@ _public_ int sd_event_prepare(sd_event *e) { e->iteration++; + e->state = SD_EVENT_PREPARING; r = event_prepare(e); + e->state = SD_EVENT_INITIAL; if (r < 0) return r; @@ -2382,28 +2529,35 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) { goto finish; } - dual_timestamp_get(&e->timestamp); - e->timestamp_boottime = now(CLOCK_BOOTTIME); + triple_timestamp_get(&e->timestamp); for (i = 0; i < m; i++) { - if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME)) - r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next); - else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME)) - r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next); - else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC)) - r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next); - else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM)) - r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next); - else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM)) - r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next); - else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL)) - r = process_signal(e, ev_queue[i].events); - else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG)) + if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG)) r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL); - else - r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events); + else { + WakeupType *t = ev_queue[i].data.ptr; + + switch (*t) { + case WAKEUP_EVENT_SOURCE: + r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events); + break; + + case WAKEUP_CLOCK_DATA: { + struct clock_data *d = ev_queue[i].data.ptr; + r = flush_timer(e, d->fd, ev_queue[i].events, &d->next); + break; + } + + case WAKEUP_SIGNAL_DATA: + r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events); + break; + + default: + assert_not_reached("Invalid wake-up pointer"); + } + } if (r < 0) goto finish; } @@ -2416,7 +2570,7 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) { if (r < 0) goto finish; - r = process_timer(e, e->timestamp_boottime, &e->boottime); + r = process_timer(e, e->timestamp.boottime, &e->boottime); if (r < 0) goto finish; @@ -2428,7 +2582,7 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) { if (r < 0) goto finish; - r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm); + r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm); if (r < 0) goto finish; @@ -2482,6 +2636,18 @@ _public_ int sd_event_dispatch(sd_event *e) { return 1; } +static void event_log_delays(sd_event *e) { + char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1]; + unsigned i; + int o; + + for (i = o = 0; i < ELEMENTSOF(e->delays); i++) { + o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]); + e->delays[i] = 0; + } + log_debug("Event loop iterations: %.*s", o, b); +} + _public_ int sd_event_run(sd_event *e, uint64_t timeout) { int r; @@ -2490,11 +2656,30 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) { assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); assert_return(e->state == SD_EVENT_INITIAL, -EBUSY); + if (e->profile_delays && e->last_run) { + usec_t this_run; + unsigned l; + + this_run = now(CLOCK_MONOTONIC); + + l = u64log2(this_run - e->last_run); + assert(l < sizeof(e->delays)); + e->delays[l]++; + + if (this_run - e->last_log >= 5*USEC_PER_SEC) { + event_log_delays(e); + e->last_log = this_run; + } + } + r = sd_event_prepare(e); if (r == 0) /* There was nothing? Then wait... */ r = sd_event_wait(e, timeout); + if (e->profile_delays) + e->last_run = now(CLOCK_MONOTONIC); + if (r > 0) { /* There's something now, then let's dispatch it */ r = sd_event_dispatch(e); @@ -2572,27 +2757,23 @@ _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) { assert_return(usec, -EINVAL); assert_return(!event_pid_changed(e), -ECHILD); - /* If we haven't run yet, just get the actual time */ - if (!dual_timestamp_is_set(&e->timestamp)) - return -ENODATA; - - switch (clock) { + if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock)) + return -EOPNOTSUPP; - case CLOCK_REALTIME: - case CLOCK_REALTIME_ALARM: - *usec = e->timestamp.realtime; - break; + /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here, + * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for + * the purpose of getting the time this doesn't matter. */ + if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported()) + return -EOPNOTSUPP; - case CLOCK_MONOTONIC: - *usec = e->timestamp.monotonic; - break; - - case CLOCK_BOOTTIME: - case CLOCK_BOOTTIME_ALARM: - *usec = e->timestamp_boottime; - break; + if (!triple_timestamp_is_set(&e->timestamp)) { + /* Implicitly fall back to now() if we never ran + * before and thus have no cached time. */ + *usec = now(clock); + return 1; } + *usec = triple_timestamp_by_clock(&e->timestamp, clock); return 0; } @@ -2693,3 +2874,11 @@ _public_ int sd_event_get_watchdog(sd_event *e) { return e->watchdog; } + +_public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) { + assert_return(e, -EINVAL); + assert_return(!event_pid_changed(e), -ECHILD); + + *ret = e->iteration; + return 0; +} |