diff options
author | Vito Caputo <vito.caputo@coreos.com> | 2015-12-10 22:42:22 -0800 |
---|---|---|
committer | Vito Caputo <vito.caputo@coreos.com> | 2016-01-14 16:36:07 -0800 |
commit | 7a24f3bf2fb181243a1957a0cdd54cd919396793 (patch) | |
tree | bd5ab2a1cb566ff68ba316dafdd155ef5dcddd78 | |
parent | c57d67f718077aadee4e2d0940fb87f513b98671 (diff) |
journal: coalesce ftruncate()s in 250ms windows
Prior to this change every journal append causes an ftruncate() for the
sake of inotify propagation of the mmap-based writes.
With this change the notification is deferred up to ~250ms, coalescing
any repeated journal writes during the deferred period into a single
ftruncate(). The ftruncate() call isn't free and doing it on every
append adds unnecessary overhead and latency in the journald event loop.
Introduces journal_file_enable_post_change_timer() which manages a
timer on the provided sd-event instance for scheduling coalesced
ftruncates. The ftruncate() behavior is unchanged unless
journal_file_enable_post_change_timer() is called on the JournalFile.
While not a tremendous improvement, profiling systemd-journald event loop
latencies using instrumentation as introduced by 34b8751 it was observed that
coalescing the ftruncates was low-hanging fruit worth pursuing.
Note orders 12 and 13 shifting left into order 11 and order 6 dipping into
order 5:
Unmodified:
log2(us) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
-----------------------------------------------------------
[10685.414572] 0 0 0 0 38 602 61 2 290 60 1643 2554 13 1 4 1 0 0 1
[10690.415114] 0 0 0 0 0 646 54 7 309 44 2073 2148 17 1 3 0 0 0 1
[10695.415509] 0 0 0 0 1 650 73 3 324 37 2071 2270 9 0 0 1 0 1 0
[10700.416297] 0 0 0 0 0 659 50 4 318 38 2111 2152 6 0 1 0 0 1 1
[10705.417136] 0 0 0 0 2 660 48 4 320 38 2129 2146 12 1 1 0 0 1 1
[10710.489114] 0 0 0 0 0 673 38 3 321 37 1925 2339 7 0 0 0 0 1 1
[10715.489613] 0 0 0 0 3 656 64 8 317 48 2365 2007 7 0 0 0 0 0 1
Coalesced:
log2(us) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
-----------------------------------------------------------
[ 6169.161360] 0 0 0 1 24 786 54 11 389 24 4192 771 6 4 0 0 1 0 1
[ 6174.161705] 0 0 0 1 18 800 35 6 380 27 3977 893 3 1 0 0 1 0 1
[ 6179.162741] 0 0 0 1 28 768 51 4 391 16 3998 831 5 3 0 0 0 0 2
[ 6184.162856] 0 0 0 0 19 770 60 2 376 26 3795 1004 9 5 1 0 1 0 1
[ 6189.163279] 0 0 0 0 28 761 49 7 372 27 3729 1056 3 2 0 0 1 0 1
[ 6194.164255] 0 0 0 0 25 785 49 7 394 19 3996 908 6 3 2 0 0 0 1
[ 6199.164658] 0 0 0 0 29 797 35 5 389 18 3995 898 3 4 1 1 1 0 1
The remaining high-order delays are a result of the synchronous fsyncs in
systemd-journald, beyond the scope of this commit.
-rw-r--r-- | src/journal/journal-file.c | 96 | ||||
-rw-r--r-- | src/journal/journal-file.h | 5 | ||||
-rw-r--r-- | src/journal/journald-server.c | 43 |
3 files changed, 139 insertions, 5 deletions
diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 6f09301521..1ed46df284 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -39,6 +39,7 @@ #include "lookup3.h" #include "parse-util.h" #include "random-util.h" +#include "sd-event.h" #include "string-util.h" #include "xattr-util.h" @@ -149,6 +150,17 @@ JournalFile* journal_file_close(JournalFile *f) { journal_file_append_tag(f); #endif + if (f->post_change_timer) { + int enabled; + + if (sd_event_source_get_enabled(f->post_change_timer, &enabled) >= 0) + if (enabled == SD_EVENT_ONESHOT) + journal_file_post_change(f); + + sd_event_source_set_enabled(f->post_change_timer, SD_EVENT_OFF); + sd_event_source_unref(f->post_change_timer); + } + journal_file_set_offline(f); if (f->mmap && f->fd >= 0) @@ -1400,6 +1412,77 @@ void journal_file_post_change(JournalFile *f) { log_error_errno(errno, "Failed to truncate file to its own size: %m"); } +static int post_change_thunk(sd_event_source *timer, uint64_t usec, void *userdata) { + assert(userdata); + + journal_file_post_change(userdata); + + return 1; +} + +static void schedule_post_change(JournalFile *f) { + sd_event_source *timer; + int enabled, r; + uint64_t now; + + assert(f); + assert(f->post_change_timer); + + timer = f->post_change_timer; + + r = sd_event_source_get_enabled(timer, &enabled); + if (r < 0) { + log_error_errno(-r, "Failed to get ftruncate timer state: %m"); + return; + } + + if (enabled == SD_EVENT_ONESHOT) + return; + + r = sd_event_now(sd_event_source_get_event(timer), CLOCK_MONOTONIC, &now); + if (r < 0) { + log_error_errno(-r, "Failed to get clock's now for scheduling ftruncate: %m"); + return; + } + + r = sd_event_source_set_time(timer, now+f->post_change_timer_period); + if (r < 0) { + log_error_errno(-r, "Failed to set time for scheduling ftruncate: %m"); + return; + } + + r = sd_event_source_set_enabled(timer, SD_EVENT_ONESHOT); + if (r < 0) { + log_error_errno(-r, "Failed to enable scheduled ftruncate: %m"); + return; + } +} + +/* Enable coalesced change posting in a timer on the provided sd_event instance */ +int journal_file_enable_post_change_timer(JournalFile *f, sd_event *e, usec_t t) { + _cleanup_(sd_event_source_unrefp) sd_event_source *timer = NULL; + int r; + + assert(f); + assert_return(!f->post_change_timer, -EINVAL); + assert(e); + assert(t); + + r = sd_event_add_time(e, &timer, CLOCK_MONOTONIC, 0, 0, post_change_thunk, f); + if (r < 0) + return r; + + r = sd_event_source_set_enabled(timer, SD_EVENT_OFF); + if (r < 0) + return r; + + f->post_change_timer = timer; + timer = NULL; + f->post_change_timer_period = t; + + return r; +} + static int entry_item_cmp(const void *_a, const void *_b) { const EntryItem *a = _a, *b = _b; @@ -1465,7 +1548,10 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st if (mmap_cache_got_sigbus(f->mmap, f->fd)) r = -EIO; - journal_file_post_change(f); + if (f->post_change_timer) + schedule_post_change(f); + else + journal_file_post_change(f); return r; } @@ -2767,6 +2853,14 @@ int journal_file_open( goto fail; } + if (template && template->post_change_timer) { + sd_event *e = sd_event_source_get_event(template->post_change_timer); + + r = journal_file_enable_post_change_timer(f, e, template->post_change_timer_period); + if (r < 0) + goto fail; + } + *ret = f; return 0; diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h index 46c1f3278e..7970ebe738 100644 --- a/src/journal/journal-file.h +++ b/src/journal/journal-file.h @@ -33,6 +33,7 @@ #include "journal-def.h" #include "macro.h" #include "mmap-cache.h" +#include "sd-event.h" #include "sparse-endian.h" typedef struct JournalMetrics { @@ -101,6 +102,9 @@ typedef struct JournalFile { JournalMetrics metrics; MMapCache *mmap; + sd_event_source *post_change_timer; + usec_t post_change_timer_period; + OrderedHashmap *chain_cache; #if defined(HAVE_XZ) || defined(HAVE_LZ4) @@ -224,6 +228,7 @@ void journal_file_print_header(JournalFile *f); int journal_file_rotate(JournalFile **f, bool compress, bool seal); void journal_file_post_change(JournalFile *f); +int journal_file_enable_post_change_timer(JournalFile *f, sd_event *e, usec_t t); void journal_reset_metrics(JournalMetrics *m); void journal_default_metrics(JournalMetrics *m, int fd); diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c index a8a9b72080..67a275ec76 100644 --- a/src/journal/journald-server.c +++ b/src/journal/journald-server.c @@ -82,6 +82,9 @@ #define NOTIFY_SNDBUF_SIZE (8*1024*1024) +/* The period to insert between posting changes for coalescing */ +#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC) + static int determine_space_for( Server *s, JournalMetrics *metrics, @@ -220,6 +223,38 @@ static void server_add_acls(JournalFile *f, uid_t uid) { #endif } +static int open_journal( + Server *s, + bool reliably, + const char *fname, + int flags, + bool seal, + JournalMetrics *metrics, + JournalFile *template, + JournalFile **ret) { + int r; + + assert(s); + assert(fname); + assert(ret); + + if (reliably) + r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, template, ret); + else + r = journal_file_open(fname, flags, 0640, s->compress, seal, metrics, s->mmap, template, ret); + + if (r < 0) + return r; + + r = journal_file_enable_post_change_timer(*ret, s->event, POST_CHANGE_TIMER_INTERVAL_USEC); + if (r < 0) { + *ret = journal_file_close(*ret); + return r; + } + + return r; +} + static JournalFile* find_journal(Server *s, uid_t uid) { _cleanup_free_ char *p = NULL; int r; @@ -258,7 +293,7 @@ static JournalFile* find_journal(Server *s, uid_t uid) { journal_file_close(f); } - r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f); + r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_metrics, NULL, &f); if (r < 0) return s->system_journal; @@ -930,7 +965,7 @@ static int system_journal_open(Server *s, bool flush_requested) { (void) mkdir(fn, 0755); fn = strjoina(fn, "/system.journal"); - r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal); + r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_metrics, NULL, &s->system_journal); if (r >= 0) { server_add_acls(s->system_journal, 0); (void) determine_space_for(s, &s->system_metrics, "/var/log/journal/", "System journal", true, true, NULL, NULL); @@ -953,7 +988,7 @@ static int system_journal_open(Server *s, bool flush_requested) { * if it already exists, so that we can flush * it into the system journal */ - r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal); + r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_metrics, NULL, &s->runtime_journal); if (r < 0) { if (r != -ENOENT) log_warning_errno(r, "Failed to open runtime journal: %m"); @@ -970,7 +1005,7 @@ static int system_journal_open(Server *s, bool flush_requested) { (void) mkdir("/run/log/journal", 0755); (void) mkdir_parents(fn, 0750); - r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal); + r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_metrics, NULL, &s->runtime_journal); if (r < 0) return log_error_errno(r, "Failed to open runtime journal: %m"); } |