diff options
author | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2016-10-13 07:42:39 -0400 |
---|---|---|
committer | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2016-10-13 07:45:09 -0400 |
commit | c1a9199ec4361bc4ce3924034d12d7ff06fb759f (patch) | |
tree | 06208bd6b28677b2c4fc896bddae456fa1703e8c | |
parent | 6612379adf2eceb7275cec26b5441328f56407c5 (diff) | |
parent | da597d2b5279f1a09943182075e430cf9a88a226 (diff) |
Merge pull request #4362 from poettering/journalbootlistfix
-rw-r--r-- | TODO | 9 | ||||
-rw-r--r-- | src/journal/journal-file.c | 166 | ||||
-rw-r--r-- | src/journal/journal-vacuum.c | 2 | ||||
-rw-r--r-- | src/journal/journalctl.c | 6 | ||||
-rw-r--r-- | src/journal/journald-server.c | 52 | ||||
-rw-r--r-- | src/journal/journald-server.h | 2 |
6 files changed, 168 insertions, 69 deletions
@@ -40,14 +40,6 @@ Features: * switch to ProtectSystem=strict for all our long-running services where that's possible -* introduce an "invocation ID" for units, that is randomly generated, and - identifies each runtime-cycle of a unit. It should be set freshly each time - we traverse inactive → activating/active, and should be the primary key to - map offline data (stored in the journal) with online bus objects. Let's pass - this as $SYSTEMD_INVOCATION_ID to services, as well as set this as xattr on - the cgroup of a services. The former is accessible without privileges, the - latter ensures the ID cannot be faked. - * If RootDirectory= is used, mount /proc, /sys, /dev into it, if not mounted yet * Permit masking specific netlink APIs with RestrictAddressFamily= @@ -498,7 +490,6 @@ Features: message that works, but alraedy after a short tiemout - check if we can make journalctl by default use --follow mode inside of less if called without args? - maybe add API to send pairs of iovecs via sd_journal_send - - journal: when writing journal auto-rotate if time jumps backwards - journal: add a setgid "systemd-journal" utility to invoke from libsystemd-journal, which passes fds via STDOUT and does PK access - journactl: support negative filtering, i.e. FOOBAR!="waldo", and !FOOBAR for events without FOOBAR. diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 349ef74e81..49199b269f 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -568,8 +568,8 @@ static int journal_file_verify_header(JournalFile *f) { return -ENODATA; if (f->writable) { - uint8_t state; sd_id128_t machine_id; + uint8_t state; int r; r = sd_id128_get_machine(&machine_id); @@ -590,6 +590,14 @@ static int journal_file_verify_header(JournalFile *f) { log_debug("Journal file %s has unknown state %i.", f->path, state); return -EBUSY; } + + /* Don't permit appending to files from the future. Because otherwise the realtime timestamps wouldn't + * be strictly ordered in the entries in the file anymore, and we can't have that since it breaks + * bisection. */ + if (le64toh(f->header->tail_entry_realtime) > now(CLOCK_REALTIME)) { + log_debug("Journal file %s is from the future, refusing to append new data to it that'd be older.", f->path); + return -ETXTBSY; + } } f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header); @@ -747,12 +755,16 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset assert(ret); /* Objects may only be located at multiple of 64 bit */ - if (!VALID64(offset)) + if (!VALID64(offset)) { + log_debug("Attempt to move to object at non-64bit boundary: %" PRIu64, offset); return -EBADMSG; + } /* Object may not be located in the file header */ - if (offset < le64toh(f->header->header_size)) + if (offset < le64toh(f->header->header_size)) { + log_debug("Attempt to move to object located in file header: %" PRIu64, offset); return -EBADMSG; + } r = journal_file_move_to(f, type, false, offset, sizeof(ObjectHeader), &t); if (r < 0) @@ -761,17 +773,29 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset o = (Object*) t; s = le64toh(o->object.size); - if (s < sizeof(ObjectHeader)) + if (s == 0) { + log_debug("Attempt to move to uninitialized object: %" PRIu64, offset); + return -EBADMSG; + } + if (s < sizeof(ObjectHeader)) { + log_debug("Attempt to move to overly short object: %" PRIu64, offset); return -EBADMSG; + } - if (o->object.type <= OBJECT_UNUSED) + if (o->object.type <= OBJECT_UNUSED) { + log_debug("Attempt to move to object with invalid type: %" PRIu64, offset); return -EBADMSG; + } - if (s < minimum_header_size(o)) + if (s < minimum_header_size(o)) { + log_debug("Attempt to move to truncated object: %" PRIu64, offset); return -EBADMSG; + } - if (type > OBJECT_UNUSED && o->object.type != type) + if (type > OBJECT_UNUSED && o->object.type != type) { + log_debug("Attempt to move to object of unexpected type: %" PRIu64, offset); return -EBADMSG; + } if (s > sizeof(ObjectHeader)) { r = journal_file_move_to(f, type, false, offset, s, &t); @@ -2472,6 +2496,37 @@ int journal_file_compare_locations(JournalFile *af, JournalFile *bf) { return 0; } +static int bump_array_index(uint64_t *i, direction_t direction, uint64_t n) { + + /* Increase or decrease the specified index, in the right direction. */ + + if (direction == DIRECTION_DOWN) { + if (*i >= n - 1) + return 0; + + (*i) ++; + } else { + if (*i <= 0) + return 0; + + (*i) --; + } + + return 1; +} + +static bool check_properly_ordered(uint64_t new_offset, uint64_t old_offset, direction_t direction) { + + /* Consider it an error if any of the two offsets is uninitialized */ + if (old_offset == 0 || new_offset == 0) + return false; + + /* If we go down, the new offset must be larger than the old one. */ + return direction == DIRECTION_DOWN ? + new_offset > old_offset : + new_offset < old_offset; +} + int journal_file_next_entry( JournalFile *f, uint64_t p, @@ -2502,36 +2557,34 @@ int journal_file_next_entry( if (r <= 0) return r; - if (direction == DIRECTION_DOWN) { - if (i >= n - 1) - return 0; - - i++; - } else { - if (i <= 0) - return 0; - - i--; - } + r = bump_array_index(&i, direction, n); + if (r <= 0) + return r; } /* And jump to it */ - r = generic_array_get(f, - le64toh(f->header->entry_array_offset), - i, - ret, &ofs); - if (r == -EBADMSG && direction == DIRECTION_DOWN) { - /* Special case: when we iterate throught the journal file linearly, and hit an entry we can't read, - * consider this the end of the journal file. */ - log_debug_errno(r, "Encountered entry we can't read while iterating through journal file. Considering this the end of the file."); - return 0; + for (;;) { + r = generic_array_get(f, + le64toh(f->header->entry_array_offset), + i, + ret, &ofs); + if (r > 0) + break; + if (r != -EBADMSG) + return r; + + /* OK, so this entry is borked. Most likely some entry didn't get synced to disk properly, let's see if + * the next one might work for us instead. */ + log_debug_errno(r, "Entry item %" PRIu64 " is bad, skipping over it.", i); + + r = bump_array_index(&i, direction, n); + if (r <= 0) + return r; } - if (r <= 0) - return r; - if (p > 0 && - (direction == DIRECTION_DOWN ? ofs <= p : ofs >= p)) { - log_debug("%s: entry array corrupted at entry %" PRIu64, f->path, i); + /* Ensure our array is properly ordered. */ + if (p > 0 && !check_properly_ordered(ofs, p, direction)) { + log_debug("%s: entry array not properly ordered at entry %" PRIu64, f->path, i); return -EBADMSG; } @@ -2548,9 +2601,9 @@ int journal_file_next_entry_for_data( direction_t direction, Object **ret, uint64_t *offset) { - uint64_t n, i; - int r; + uint64_t i, n, ofs; Object *d; + int r; assert(f); assert(p > 0 || !o); @@ -2582,25 +2635,39 @@ int journal_file_next_entry_for_data( if (r <= 0) return r; - if (direction == DIRECTION_DOWN) { - if (i >= n - 1) - return 0; + r = bump_array_index(&i, direction, n); + if (r <= 0) + return r; + } - i++; - } else { - if (i <= 0) - return 0; + for (;;) { + r = generic_array_get_plus_one(f, + le64toh(d->data.entry_offset), + le64toh(d->data.entry_array_offset), + i, + ret, &ofs); + if (r > 0) + break; + if (r != -EBADMSG) + return r; - i--; - } + log_debug_errno(r, "Data entry item %" PRIu64 " is bad, skipping over it.", i); + + r = bump_array_index(&i, direction, n); + if (r <= 0) + return r; + } + /* Ensure our array is properly ordered. */ + if (p > 0 && check_properly_ordered(ofs, p, direction)) { + log_debug("%s data entry array not properly ordered at entry %" PRIu64, f->path, i); + return -EBADMSG; } - return generic_array_get_plus_one(f, - le64toh(d->data.entry_offset), - le64toh(d->data.entry_array_offset), - i, - ret, offset); + if (offset) + *offset = ofs; + + return 1; } int journal_file_move_to_entry_by_offset_for_data( @@ -3271,7 +3338,8 @@ int journal_file_open_reliably( -EBUSY, /* unclean shutdown */ -ESHUTDOWN, /* already archived */ -EIO, /* IO error, including SIGBUS on mmap */ - -EIDRM /* File has been deleted */)) + -EIDRM, /* File has been deleted */ + -ETXTBSY)) /* File is from the future */ return r; if ((flags & O_ACCMODE) == O_RDONLY) diff --git a/src/journal/journal-vacuum.c b/src/journal/journal-vacuum.c index f09dc66e03..12ce2fd56c 100644 --- a/src/journal/journal-vacuum.c +++ b/src/journal/journal-vacuum.c @@ -343,7 +343,7 @@ finish: free(list[i].filename); free(list); - log_full(verbose ? LOG_INFO : LOG_DEBUG, "Vacuuming done, freed %s of archived journals on disk.", format_bytes(sbytes, sizeof(sbytes), freed)); + log_full(verbose ? LOG_INFO : LOG_DEBUG, "Vacuuming done, freed %s of archived journals from %s.", format_bytes(sbytes, sizeof(sbytes), freed), directory); return r; } diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index 13e3b44f06..7f997487b4 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -1091,8 +1091,10 @@ static int discover_next_boot(sd_journal *j, r = sd_journal_previous(j); if (r < 0) return r; - else if (r == 0) + else if (r == 0) { + log_debug("Whoopsie! We found a boot ID but can't read its last entry."); return -ENODATA; /* This shouldn't happen. We just came from this very boot ID. */ + } r = sd_journal_get_realtime_usec(j, &next_boot->last); if (r < 0) @@ -2264,7 +2266,7 @@ int main(int argc, char *argv[]) { if (r < 0) goto finish; - printf("Archived and active journals take up %s on disk.\n", + printf("Archived and active journals take up %s in the file system.\n", format_bytes(sbytes, sizeof(sbytes), bytes)); goto finish; } diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c index f01cf1d937..381182fa2c 100644 --- a/src/journal/journald-server.c +++ b/src/journal/journald-server.c @@ -595,52 +595,86 @@ static void server_cache_hostname(Server *s) { static bool shall_try_append_again(JournalFile *f, int r) { switch(r) { + case -E2BIG: /* Hit configured limit */ case -EFBIG: /* Hit fs limit */ case -EDQUOT: /* Quota limit hit */ case -ENOSPC: /* Disk full */ log_debug("%s: Allocation limit reached, rotating.", f->path); return true; + case -EIO: /* I/O error of some kind (mmap) */ log_warning("%s: IO error, rotating.", f->path); return true; + case -EHOSTDOWN: /* Other machine */ log_info("%s: Journal file from other machine, rotating.", f->path); return true; + case -EBUSY: /* Unclean shutdown */ log_info("%s: Unclean shutdown, rotating.", f->path); return true; + case -EPROTONOSUPPORT: /* Unsupported feature */ log_info("%s: Unsupported feature, rotating.", f->path); return true; + case -EBADMSG: /* Corrupted */ case -ENODATA: /* Truncated */ case -ESHUTDOWN: /* Already archived */ log_warning("%s: Journal file corrupted, rotating.", f->path); return true; + case -EIDRM: /* Journal file has been deleted */ log_warning("%s: Journal file has been deleted, rotating.", f->path); return true; + + case -ETXTBSY: /* Journal file is from the future */ + log_warning("%s: Journal file is from the future, rotating.", f->path); + return true; + default: return false; } } static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) { + bool vacuumed = false, rotate = false; + struct dual_timestamp ts; JournalFile *f; - bool vacuumed = false; int r; assert(s); assert(iovec); assert(n > 0); - f = find_journal(s, uid); - if (!f) - return; + /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use + * the source time, and not even the time the event was originally seen, but instead simply the time we started + * processing it, as we want strictly linear ordering in what we write out.) */ + assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0); + assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0); + + if (ts.realtime < s->last_realtime_clock) { + /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during + * regular operation. However, when it does happen, then we should make sure that we start fresh files + * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure + * bisection works correctly. */ + + log_debug("Time jumped backwards, rotating."); + rotate = true; + } else { + + f = find_journal(s, uid); + if (!f) + return; + + if (journal_file_rotate_suggested(f, s->max_file_usec)) { + log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path); + rotate = true; + } + } - if (journal_file_rotate_suggested(f, s->max_file_usec)) { - log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path); + if (rotate) { server_rotate(s); server_vacuum(s, false, false); vacuumed = true; @@ -650,7 +684,9 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned return; } - r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL); + s->last_realtime_clock = ts.realtime; + + r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL); if (r >= 0) { server_schedule_sync(s, priority); return; @@ -669,7 +705,7 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned return; log_debug("Retrying write."); - r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL); + r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL); if (r < 0) log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n)); else diff --git a/src/journal/journald-server.h b/src/journal/journald-server.h index dfb5724794..cc68a0a690 100644 --- a/src/journal/journald-server.h +++ b/src/journal/journald-server.h @@ -149,6 +149,8 @@ struct Server { char *cgroup_root; usec_t watchdog_usec; + + usec_t last_realtime_clock; }; #define SERVER_MACHINE_ID(s) ((s)->machine_id_field + strlen("_MACHINE_ID=")) |