summaryrefslogtreecommitdiff
path: root/src/journal
diff options
context:
space:
mode:
authorVito Caputo <vito.caputo@coreos.com>2015-12-10 22:42:22 -0800
committerVito Caputo <vito.caputo@coreos.com>2016-01-14 16:36:07 -0800
commit7a24f3bf2fb181243a1957a0cdd54cd919396793 (patch)
treebd5ab2a1cb566ff68ba316dafdd155ef5dcddd78 /src/journal
parentc57d67f718077aadee4e2d0940fb87f513b98671 (diff)
journal: coalesce ftruncate()s in 250ms windows
Prior to this change every journal append causes an ftruncate() for the sake of inotify propagation of the mmap-based writes. With this change the notification is deferred up to ~250ms, coalescing any repeated journal writes during the deferred period into a single ftruncate(). The ftruncate() call isn't free and doing it on every append adds unnecessary overhead and latency in the journald event loop. Introduces journal_file_enable_post_change_timer() which manages a timer on the provided sd-event instance for scheduling coalesced ftruncates. The ftruncate() behavior is unchanged unless journal_file_enable_post_change_timer() is called on the JournalFile. While not a tremendous improvement, profiling systemd-journald event loop latencies using instrumentation as introduced by 34b8751 it was observed that coalescing the ftruncates was low-hanging fruit worth pursuing. Note orders 12 and 13 shifting left into order 11 and order 6 dipping into order 5: Unmodified: log2(us) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ----------------------------------------------------------- [10685.414572] 0 0 0 0 38 602 61 2 290 60 1643 2554 13 1 4 1 0 0 1 [10690.415114] 0 0 0 0 0 646 54 7 309 44 2073 2148 17 1 3 0 0 0 1 [10695.415509] 0 0 0 0 1 650 73 3 324 37 2071 2270 9 0 0 1 0 1 0 [10700.416297] 0 0 0 0 0 659 50 4 318 38 2111 2152 6 0 1 0 0 1 1 [10705.417136] 0 0 0 0 2 660 48 4 320 38 2129 2146 12 1 1 0 0 1 1 [10710.489114] 0 0 0 0 0 673 38 3 321 37 1925 2339 7 0 0 0 0 1 1 [10715.489613] 0 0 0 0 3 656 64 8 317 48 2365 2007 7 0 0 0 0 0 1 Coalesced: log2(us) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ----------------------------------------------------------- [ 6169.161360] 0 0 0 1 24 786 54 11 389 24 4192 771 6 4 0 0 1 0 1 [ 6174.161705] 0 0 0 1 18 800 35 6 380 27 3977 893 3 1 0 0 1 0 1 [ 6179.162741] 0 0 0 1 28 768 51 4 391 16 3998 831 5 3 0 0 0 0 2 [ 6184.162856] 0 0 0 0 19 770 60 2 376 26 3795 1004 9 5 1 0 1 0 1 [ 6189.163279] 0 0 0 0 28 761 49 7 372 27 3729 1056 3 2 0 0 1 0 1 [ 6194.164255] 0 0 0 0 25 785 49 7 394 19 3996 908 6 3 2 0 0 0 1 [ 6199.164658] 0 0 0 0 29 797 35 5 389 18 3995 898 3 4 1 1 1 0 1 The remaining high-order delays are a result of the synchronous fsyncs in systemd-journald, beyond the scope of this commit.
Diffstat (limited to 'src/journal')
-rw-r--r--src/journal/journal-file.c96
-rw-r--r--src/journal/journal-file.h5
-rw-r--r--src/journal/journald-server.c43
3 files changed, 139 insertions, 5 deletions
diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c
index 6f09301521..1ed46df284 100644
--- a/src/journal/journal-file.c
+++ b/src/journal/journal-file.c
@@ -39,6 +39,7 @@
#include "lookup3.h"
#include "parse-util.h"
#include "random-util.h"
+#include "sd-event.h"
#include "string-util.h"
#include "xattr-util.h"
@@ -149,6 +150,17 @@ JournalFile* journal_file_close(JournalFile *f) {
journal_file_append_tag(f);
#endif
+ if (f->post_change_timer) {
+ int enabled;
+
+ if (sd_event_source_get_enabled(f->post_change_timer, &enabled) >= 0)
+ if (enabled == SD_EVENT_ONESHOT)
+ journal_file_post_change(f);
+
+ sd_event_source_set_enabled(f->post_change_timer, SD_EVENT_OFF);
+ sd_event_source_unref(f->post_change_timer);
+ }
+
journal_file_set_offline(f);
if (f->mmap && f->fd >= 0)
@@ -1400,6 +1412,77 @@ void journal_file_post_change(JournalFile *f) {
log_error_errno(errno, "Failed to truncate file to its own size: %m");
}
+static int post_change_thunk(sd_event_source *timer, uint64_t usec, void *userdata) {
+ assert(userdata);
+
+ journal_file_post_change(userdata);
+
+ return 1;
+}
+
+static void schedule_post_change(JournalFile *f) {
+ sd_event_source *timer;
+ int enabled, r;
+ uint64_t now;
+
+ assert(f);
+ assert(f->post_change_timer);
+
+ timer = f->post_change_timer;
+
+ r = sd_event_source_get_enabled(timer, &enabled);
+ if (r < 0) {
+ log_error_errno(-r, "Failed to get ftruncate timer state: %m");
+ return;
+ }
+
+ if (enabled == SD_EVENT_ONESHOT)
+ return;
+
+ r = sd_event_now(sd_event_source_get_event(timer), CLOCK_MONOTONIC, &now);
+ if (r < 0) {
+ log_error_errno(-r, "Failed to get clock's now for scheduling ftruncate: %m");
+ return;
+ }
+
+ r = sd_event_source_set_time(timer, now+f->post_change_timer_period);
+ if (r < 0) {
+ log_error_errno(-r, "Failed to set time for scheduling ftruncate: %m");
+ return;
+ }
+
+ r = sd_event_source_set_enabled(timer, SD_EVENT_ONESHOT);
+ if (r < 0) {
+ log_error_errno(-r, "Failed to enable scheduled ftruncate: %m");
+ return;
+ }
+}
+
+/* Enable coalesced change posting in a timer on the provided sd_event instance */
+int journal_file_enable_post_change_timer(JournalFile *f, sd_event *e, usec_t t) {
+ _cleanup_(sd_event_source_unrefp) sd_event_source *timer = NULL;
+ int r;
+
+ assert(f);
+ assert_return(!f->post_change_timer, -EINVAL);
+ assert(e);
+ assert(t);
+
+ r = sd_event_add_time(e, &timer, CLOCK_MONOTONIC, 0, 0, post_change_thunk, f);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(timer, SD_EVENT_OFF);
+ if (r < 0)
+ return r;
+
+ f->post_change_timer = timer;
+ timer = NULL;
+ f->post_change_timer_period = t;
+
+ return r;
+}
+
static int entry_item_cmp(const void *_a, const void *_b) {
const EntryItem *a = _a, *b = _b;
@@ -1465,7 +1548,10 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st
if (mmap_cache_got_sigbus(f->mmap, f->fd))
r = -EIO;
- journal_file_post_change(f);
+ if (f->post_change_timer)
+ schedule_post_change(f);
+ else
+ journal_file_post_change(f);
return r;
}
@@ -2767,6 +2853,14 @@ int journal_file_open(
goto fail;
}
+ if (template && template->post_change_timer) {
+ sd_event *e = sd_event_source_get_event(template->post_change_timer);
+
+ r = journal_file_enable_post_change_timer(f, e, template->post_change_timer_period);
+ if (r < 0)
+ goto fail;
+ }
+
*ret = f;
return 0;
diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h
index 46c1f3278e..7970ebe738 100644
--- a/src/journal/journal-file.h
+++ b/src/journal/journal-file.h
@@ -33,6 +33,7 @@
#include "journal-def.h"
#include "macro.h"
#include "mmap-cache.h"
+#include "sd-event.h"
#include "sparse-endian.h"
typedef struct JournalMetrics {
@@ -101,6 +102,9 @@ typedef struct JournalFile {
JournalMetrics metrics;
MMapCache *mmap;
+ sd_event_source *post_change_timer;
+ usec_t post_change_timer_period;
+
OrderedHashmap *chain_cache;
#if defined(HAVE_XZ) || defined(HAVE_LZ4)
@@ -224,6 +228,7 @@ void journal_file_print_header(JournalFile *f);
int journal_file_rotate(JournalFile **f, bool compress, bool seal);
void journal_file_post_change(JournalFile *f);
+int journal_file_enable_post_change_timer(JournalFile *f, sd_event *e, usec_t t);
void journal_reset_metrics(JournalMetrics *m);
void journal_default_metrics(JournalMetrics *m, int fd);
diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c
index a8a9b72080..67a275ec76 100644
--- a/src/journal/journald-server.c
+++ b/src/journal/journald-server.c
@@ -82,6 +82,9 @@
#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
+/* The period to insert between posting changes for coalescing */
+#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
+
static int determine_space_for(
Server *s,
JournalMetrics *metrics,
@@ -220,6 +223,38 @@ static void server_add_acls(JournalFile *f, uid_t uid) {
#endif
}
+static int open_journal(
+ Server *s,
+ bool reliably,
+ const char *fname,
+ int flags,
+ bool seal,
+ JournalMetrics *metrics,
+ JournalFile *template,
+ JournalFile **ret) {
+ int r;
+
+ assert(s);
+ assert(fname);
+ assert(ret);
+
+ if (reliably)
+ r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, template, ret);
+ else
+ r = journal_file_open(fname, flags, 0640, s->compress, seal, metrics, s->mmap, template, ret);
+
+ if (r < 0)
+ return r;
+
+ r = journal_file_enable_post_change_timer(*ret, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
+ if (r < 0) {
+ *ret = journal_file_close(*ret);
+ return r;
+ }
+
+ return r;
+}
+
static JournalFile* find_journal(Server *s, uid_t uid) {
_cleanup_free_ char *p = NULL;
int r;
@@ -258,7 +293,7 @@ static JournalFile* find_journal(Server *s, uid_t uid) {
journal_file_close(f);
}
- r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
+ r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_metrics, NULL, &f);
if (r < 0)
return s->system_journal;
@@ -930,7 +965,7 @@ static int system_journal_open(Server *s, bool flush_requested) {
(void) mkdir(fn, 0755);
fn = strjoina(fn, "/system.journal");
- r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
+ r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_metrics, NULL, &s->system_journal);
if (r >= 0) {
server_add_acls(s->system_journal, 0);
(void) determine_space_for(s, &s->system_metrics, "/var/log/journal/", "System journal", true, true, NULL, NULL);
@@ -953,7 +988,7 @@ static int system_journal_open(Server *s, bool flush_requested) {
* if it already exists, so that we can flush
* it into the system journal */
- r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
+ r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_metrics, NULL, &s->runtime_journal);
if (r < 0) {
if (r != -ENOENT)
log_warning_errno(r, "Failed to open runtime journal: %m");
@@ -970,7 +1005,7 @@ static int system_journal_open(Server *s, bool flush_requested) {
(void) mkdir("/run/log/journal", 0755);
(void) mkdir_parents(fn, 0750);
- r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
+ r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_metrics, NULL, &s->runtime_journal);
if (r < 0)
return log_error_errno(r, "Failed to open runtime journal: %m");
}