From 0f972d66d439789afacbbcfba9a786965dd9e4b3 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 12 Oct 2016 18:46:07 +0200 Subject: journald: use the event loop dispatch timestamp for journal entries Let's use the earliest linearized event timestamp for journal entries we have: the event dispatch timestamp from the event loop, instead of requerying the timestamp at the time of writing. This makes the time a bit more accurate, allows us to query the kernel time one time less per event loop, and also makes sure we always use the same timestamp for both attempts to write an entry to a journal file. --- src/journal/journald-server.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'src/journal/journald-server.c') diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c index f01cf1d937..7227c80c86 100644 --- a/src/journal/journald-server.c +++ b/src/journal/journald-server.c @@ -627,14 +627,21 @@ static bool shall_try_append_again(JournalFile *f, int r) { } static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) { - JournalFile *f; + struct dual_timestamp ts; bool vacuumed = false; + JournalFile *f; int r; assert(s); assert(iovec); assert(n > 0); + /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use + * the source time, and not even the time the event was originally seen, but instead simply the time we started + * processing it, as we want strictly linear ordering in what we write out.) */ + assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0); + assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0); + f = find_journal(s, uid); if (!f) return; @@ -650,7 +657,7 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned return; } - r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL); + r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL); if (r >= 0) { server_schedule_sync(s, priority); return; @@ -669,7 +676,7 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned return; log_debug("Retrying write."); - r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL); + r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL); if (r < 0) log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n)); else -- cgit v1.2.3-54-g00ecf From 7c07001711ee1f0aa7a3db7b63b354a4800cadcc Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 12 Oct 2016 18:49:51 +0200 Subject: journald: automatically rotate journal files when the clock jumps backwards As soon as we notice that the clock jumps backwards, rotate journal files. This is beneficial, as this makes sure that the entries in journal files remain strictly ordered internally, and thus the bisection algorithm applied on it is not confused. This should help avoiding borked wallclock-based bisection on journal files as witnessed in #4278. --- src/journal/journald-server.c | 29 +++++++++++++++++++++++------ src/journal/journald-server.h | 2 ++ 2 files changed, 25 insertions(+), 6 deletions(-) (limited to 'src/journal/journald-server.c') diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c index 7227c80c86..28aea35d18 100644 --- a/src/journal/journald-server.c +++ b/src/journal/journald-server.c @@ -627,8 +627,8 @@ static bool shall_try_append_again(JournalFile *f, int r) { } static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) { + bool vacuumed = false, rotate = false; struct dual_timestamp ts; - bool vacuumed = false; JournalFile *f; int r; @@ -642,12 +642,27 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0); assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0); - f = find_journal(s, uid); - if (!f) - return; + if (ts.realtime < s->last_realtime_clock) { + /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during + * regular operation. However, when it does happen, then we should make sure that we start fresh files + * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure + * bisection works correctly. */ - if (journal_file_rotate_suggested(f, s->max_file_usec)) { - log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path); + log_debug("Time jumped backwards, rotating."); + rotate = true; + } else { + + f = find_journal(s, uid); + if (!f) + return; + + if (journal_file_rotate_suggested(f, s->max_file_usec)) { + log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path); + rotate = true; + } + } + + if (rotate) { server_rotate(s); server_vacuum(s, false, false); vacuumed = true; @@ -657,6 +672,8 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned return; } + s->last_realtime_clock = ts.realtime; + r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL); if (r >= 0) { server_schedule_sync(s, priority); diff --git a/src/journal/journald-server.h b/src/journal/journald-server.h index dfb5724794..cc68a0a690 100644 --- a/src/journal/journald-server.h +++ b/src/journal/journald-server.h @@ -149,6 +149,8 @@ struct Server { char *cgroup_root; usec_t watchdog_usec; + + usec_t last_realtime_clock; }; #define SERVER_MACHINE_ID(s) ((s)->machine_id_field + strlen("_MACHINE_ID=")) -- cgit v1.2.3-54-g00ecf From ae739cc1edc7cfa9d1afb4b7087c434aadf61a7a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 12 Oct 2016 18:53:35 +0200 Subject: journal: refuse opening journal files from the future for writing Never permit that we write to journal files that have newer timestamps than our local wallclock has. If we'd accept that, then the entries in the file might end up not being ordered strictly. Let's refuse this with ETXTBSY, and then immediately rotate to use a new file, so that each file remains strictly ordered also be wallclock internally. --- src/journal/journal-file.c | 13 +++++++++++-- src/journal/journald-server.c | 12 ++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'src/journal/journald-server.c') diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index e26c8223d8..49199b269f 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -568,8 +568,8 @@ static int journal_file_verify_header(JournalFile *f) { return -ENODATA; if (f->writable) { - uint8_t state; sd_id128_t machine_id; + uint8_t state; int r; r = sd_id128_get_machine(&machine_id); @@ -590,6 +590,14 @@ static int journal_file_verify_header(JournalFile *f) { log_debug("Journal file %s has unknown state %i.", f->path, state); return -EBUSY; } + + /* Don't permit appending to files from the future. Because otherwise the realtime timestamps wouldn't + * be strictly ordered in the entries in the file anymore, and we can't have that since it breaks + * bisection. */ + if (le64toh(f->header->tail_entry_realtime) > now(CLOCK_REALTIME)) { + log_debug("Journal file %s is from the future, refusing to append new data to it that'd be older.", f->path); + return -ETXTBSY; + } } f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header); @@ -3330,7 +3338,8 @@ int journal_file_open_reliably( -EBUSY, /* unclean shutdown */ -ESHUTDOWN, /* already archived */ -EIO, /* IO error, including SIGBUS on mmap */ - -EIDRM /* File has been deleted */)) + -EIDRM, /* File has been deleted */ + -ETXTBSY)) /* File is from the future */ return r; if ((flags & O_ACCMODE) == O_RDONLY) diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c index 28aea35d18..3224bdbf5f 100644 --- a/src/journal/journald-server.c +++ b/src/journal/journald-server.c @@ -595,32 +595,44 @@ static void server_cache_hostname(Server *s) { static bool shall_try_append_again(JournalFile *f, int r) { switch(r) { + case -E2BIG: /* Hit configured limit */ case -EFBIG: /* Hit fs limit */ case -EDQUOT: /* Quota limit hit */ case -ENOSPC: /* Disk full */ log_debug("%s: Allocation limit reached, rotating.", f->path); return true; + case -EIO: /* I/O error of some kind (mmap) */ log_warning("%s: IO error, rotating.", f->path); return true; + case -EHOSTDOWN: /* Other machine */ log_info("%s: Journal file from other machine, rotating.", f->path); return true; + case -EBUSY: /* Unclean shutdown */ log_info("%s: Unclean shutdown, rotating.", f->path); return true; + case -EPROTONOSUPPORT: /* Unsupported feature */ log_info("%s: Unsupported feature, rotating.", f->path); return true; + case -EBADMSG: /* Corrupted */ case -ENODATA: /* Truncated */ case -ESHUTDOWN: /* Already archived */ log_warning("%s: Journal file corrupted, rotating.", f->path); return true; + case -EIDRM: /* Journal file has been deleted */ log_warning("%s: Journal file has been deleted, rotating.", f->path); return true; + + case -ETXTBSY: /* Journal file is from the future */ + log_warning("%s: Journal file is from the future, rotateing.", f->path); + return true; + default: return false; } -- cgit v1.2.3-54-g00ecf