summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--TODO9
-rw-r--r--src/journal/journal-file.c166
-rw-r--r--src/journal/journal-vacuum.c2
-rw-r--r--src/journal/journalctl.c6
-rw-r--r--src/journal/journald-server.c52
-rw-r--r--src/journal/journald-server.h2
6 files changed, 168 insertions, 69 deletions
diff --git a/TODO b/TODO
index b4443ce585..1f1c2739c5 100644
--- a/TODO
+++ b/TODO
@@ -40,14 +40,6 @@ Features:
* switch to ProtectSystem=strict for all our long-running services where that's possible
-* introduce an "invocation ID" for units, that is randomly generated, and
- identifies each runtime-cycle of a unit. It should be set freshly each time
- we traverse inactive → activating/active, and should be the primary key to
- map offline data (stored in the journal) with online bus objects. Let's pass
- this as $SYSTEMD_INVOCATION_ID to services, as well as set this as xattr on
- the cgroup of a services. The former is accessible without privileges, the
- latter ensures the ID cannot be faked.
-
* If RootDirectory= is used, mount /proc, /sys, /dev into it, if not mounted yet
* Permit masking specific netlink APIs with RestrictAddressFamily=
@@ -498,7 +490,6 @@ Features:
message that works, but alraedy after a short tiemout
- check if we can make journalctl by default use --follow mode inside of less if called without args?
- maybe add API to send pairs of iovecs via sd_journal_send
- - journal: when writing journal auto-rotate if time jumps backwards
- journal: add a setgid "systemd-journal" utility to invoke from libsystemd-journal, which passes fds via STDOUT and does PK access
- journactl: support negative filtering, i.e. FOOBAR!="waldo",
and !FOOBAR for events without FOOBAR.
diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c
index 349ef74e81..49199b269f 100644
--- a/src/journal/journal-file.c
+++ b/src/journal/journal-file.c
@@ -568,8 +568,8 @@ static int journal_file_verify_header(JournalFile *f) {
return -ENODATA;
if (f->writable) {
- uint8_t state;
sd_id128_t machine_id;
+ uint8_t state;
int r;
r = sd_id128_get_machine(&machine_id);
@@ -590,6 +590,14 @@ static int journal_file_verify_header(JournalFile *f) {
log_debug("Journal file %s has unknown state %i.", f->path, state);
return -EBUSY;
}
+
+ /* Don't permit appending to files from the future. Because otherwise the realtime timestamps wouldn't
+ * be strictly ordered in the entries in the file anymore, and we can't have that since it breaks
+ * bisection. */
+ if (le64toh(f->header->tail_entry_realtime) > now(CLOCK_REALTIME)) {
+ log_debug("Journal file %s is from the future, refusing to append new data to it that'd be older.", f->path);
+ return -ETXTBSY;
+ }
}
f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header);
@@ -747,12 +755,16 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset
assert(ret);
/* Objects may only be located at multiple of 64 bit */
- if (!VALID64(offset))
+ if (!VALID64(offset)) {
+ log_debug("Attempt to move to object at non-64bit boundary: %" PRIu64, offset);
return -EBADMSG;
+ }
/* Object may not be located in the file header */
- if (offset < le64toh(f->header->header_size))
+ if (offset < le64toh(f->header->header_size)) {
+ log_debug("Attempt to move to object located in file header: %" PRIu64, offset);
return -EBADMSG;
+ }
r = journal_file_move_to(f, type, false, offset, sizeof(ObjectHeader), &t);
if (r < 0)
@@ -761,17 +773,29 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset
o = (Object*) t;
s = le64toh(o->object.size);
- if (s < sizeof(ObjectHeader))
+ if (s == 0) {
+ log_debug("Attempt to move to uninitialized object: %" PRIu64, offset);
+ return -EBADMSG;
+ }
+ if (s < sizeof(ObjectHeader)) {
+ log_debug("Attempt to move to overly short object: %" PRIu64, offset);
return -EBADMSG;
+ }
- if (o->object.type <= OBJECT_UNUSED)
+ if (o->object.type <= OBJECT_UNUSED) {
+ log_debug("Attempt to move to object with invalid type: %" PRIu64, offset);
return -EBADMSG;
+ }
- if (s < minimum_header_size(o))
+ if (s < minimum_header_size(o)) {
+ log_debug("Attempt to move to truncated object: %" PRIu64, offset);
return -EBADMSG;
+ }
- if (type > OBJECT_UNUSED && o->object.type != type)
+ if (type > OBJECT_UNUSED && o->object.type != type) {
+ log_debug("Attempt to move to object of unexpected type: %" PRIu64, offset);
return -EBADMSG;
+ }
if (s > sizeof(ObjectHeader)) {
r = journal_file_move_to(f, type, false, offset, s, &t);
@@ -2472,6 +2496,37 @@ int journal_file_compare_locations(JournalFile *af, JournalFile *bf) {
return 0;
}
+static int bump_array_index(uint64_t *i, direction_t direction, uint64_t n) {
+
+ /* Increase or decrease the specified index, in the right direction. */
+
+ if (direction == DIRECTION_DOWN) {
+ if (*i >= n - 1)
+ return 0;
+
+ (*i) ++;
+ } else {
+ if (*i <= 0)
+ return 0;
+
+ (*i) --;
+ }
+
+ return 1;
+}
+
+static bool check_properly_ordered(uint64_t new_offset, uint64_t old_offset, direction_t direction) {
+
+ /* Consider it an error if any of the two offsets is uninitialized */
+ if (old_offset == 0 || new_offset == 0)
+ return false;
+
+ /* If we go down, the new offset must be larger than the old one. */
+ return direction == DIRECTION_DOWN ?
+ new_offset > old_offset :
+ new_offset < old_offset;
+}
+
int journal_file_next_entry(
JournalFile *f,
uint64_t p,
@@ -2502,36 +2557,34 @@ int journal_file_next_entry(
if (r <= 0)
return r;
- if (direction == DIRECTION_DOWN) {
- if (i >= n - 1)
- return 0;
-
- i++;
- } else {
- if (i <= 0)
- return 0;
-
- i--;
- }
+ r = bump_array_index(&i, direction, n);
+ if (r <= 0)
+ return r;
}
/* And jump to it */
- r = generic_array_get(f,
- le64toh(f->header->entry_array_offset),
- i,
- ret, &ofs);
- if (r == -EBADMSG && direction == DIRECTION_DOWN) {
- /* Special case: when we iterate throught the journal file linearly, and hit an entry we can't read,
- * consider this the end of the journal file. */
- log_debug_errno(r, "Encountered entry we can't read while iterating through journal file. Considering this the end of the file.");
- return 0;
+ for (;;) {
+ r = generic_array_get(f,
+ le64toh(f->header->entry_array_offset),
+ i,
+ ret, &ofs);
+ if (r > 0)
+ break;
+ if (r != -EBADMSG)
+ return r;
+
+ /* OK, so this entry is borked. Most likely some entry didn't get synced to disk properly, let's see if
+ * the next one might work for us instead. */
+ log_debug_errno(r, "Entry item %" PRIu64 " is bad, skipping over it.", i);
+
+ r = bump_array_index(&i, direction, n);
+ if (r <= 0)
+ return r;
}
- if (r <= 0)
- return r;
- if (p > 0 &&
- (direction == DIRECTION_DOWN ? ofs <= p : ofs >= p)) {
- log_debug("%s: entry array corrupted at entry %" PRIu64, f->path, i);
+ /* Ensure our array is properly ordered. */
+ if (p > 0 && !check_properly_ordered(ofs, p, direction)) {
+ log_debug("%s: entry array not properly ordered at entry %" PRIu64, f->path, i);
return -EBADMSG;
}
@@ -2548,9 +2601,9 @@ int journal_file_next_entry_for_data(
direction_t direction,
Object **ret, uint64_t *offset) {
- uint64_t n, i;
- int r;
+ uint64_t i, n, ofs;
Object *d;
+ int r;
assert(f);
assert(p > 0 || !o);
@@ -2582,25 +2635,39 @@ int journal_file_next_entry_for_data(
if (r <= 0)
return r;
- if (direction == DIRECTION_DOWN) {
- if (i >= n - 1)
- return 0;
+ r = bump_array_index(&i, direction, n);
+ if (r <= 0)
+ return r;
+ }
- i++;
- } else {
- if (i <= 0)
- return 0;
+ for (;;) {
+ r = generic_array_get_plus_one(f,
+ le64toh(d->data.entry_offset),
+ le64toh(d->data.entry_array_offset),
+ i,
+ ret, &ofs);
+ if (r > 0)
+ break;
+ if (r != -EBADMSG)
+ return r;
- i--;
- }
+ log_debug_errno(r, "Data entry item %" PRIu64 " is bad, skipping over it.", i);
+
+ r = bump_array_index(&i, direction, n);
+ if (r <= 0)
+ return r;
+ }
+ /* Ensure our array is properly ordered. */
+ if (p > 0 && check_properly_ordered(ofs, p, direction)) {
+ log_debug("%s data entry array not properly ordered at entry %" PRIu64, f->path, i);
+ return -EBADMSG;
}
- return generic_array_get_plus_one(f,
- le64toh(d->data.entry_offset),
- le64toh(d->data.entry_array_offset),
- i,
- ret, offset);
+ if (offset)
+ *offset = ofs;
+
+ return 1;
}
int journal_file_move_to_entry_by_offset_for_data(
@@ -3271,7 +3338,8 @@ int journal_file_open_reliably(
-EBUSY, /* unclean shutdown */
-ESHUTDOWN, /* already archived */
-EIO, /* IO error, including SIGBUS on mmap */
- -EIDRM /* File has been deleted */))
+ -EIDRM, /* File has been deleted */
+ -ETXTBSY)) /* File is from the future */
return r;
if ((flags & O_ACCMODE) == O_RDONLY)
diff --git a/src/journal/journal-vacuum.c b/src/journal/journal-vacuum.c
index f09dc66e03..12ce2fd56c 100644
--- a/src/journal/journal-vacuum.c
+++ b/src/journal/journal-vacuum.c
@@ -343,7 +343,7 @@ finish:
free(list[i].filename);
free(list);
- log_full(verbose ? LOG_INFO : LOG_DEBUG, "Vacuuming done, freed %s of archived journals on disk.", format_bytes(sbytes, sizeof(sbytes), freed));
+ log_full(verbose ? LOG_INFO : LOG_DEBUG, "Vacuuming done, freed %s of archived journals from %s.", format_bytes(sbytes, sizeof(sbytes), freed), directory);
return r;
}
diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c
index 13e3b44f06..7f997487b4 100644
--- a/src/journal/journalctl.c
+++ b/src/journal/journalctl.c
@@ -1091,8 +1091,10 @@ static int discover_next_boot(sd_journal *j,
r = sd_journal_previous(j);
if (r < 0)
return r;
- else if (r == 0)
+ else if (r == 0) {
+ log_debug("Whoopsie! We found a boot ID but can't read its last entry.");
return -ENODATA; /* This shouldn't happen. We just came from this very boot ID. */
+ }
r = sd_journal_get_realtime_usec(j, &next_boot->last);
if (r < 0)
@@ -2264,7 +2266,7 @@ int main(int argc, char *argv[]) {
if (r < 0)
goto finish;
- printf("Archived and active journals take up %s on disk.\n",
+ printf("Archived and active journals take up %s in the file system.\n",
format_bytes(sbytes, sizeof(sbytes), bytes));
goto finish;
}
diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c
index f01cf1d937..381182fa2c 100644
--- a/src/journal/journald-server.c
+++ b/src/journal/journald-server.c
@@ -595,52 +595,86 @@ static void server_cache_hostname(Server *s) {
static bool shall_try_append_again(JournalFile *f, int r) {
switch(r) {
+
case -E2BIG: /* Hit configured limit */
case -EFBIG: /* Hit fs limit */
case -EDQUOT: /* Quota limit hit */
case -ENOSPC: /* Disk full */
log_debug("%s: Allocation limit reached, rotating.", f->path);
return true;
+
case -EIO: /* I/O error of some kind (mmap) */
log_warning("%s: IO error, rotating.", f->path);
return true;
+
case -EHOSTDOWN: /* Other machine */
log_info("%s: Journal file from other machine, rotating.", f->path);
return true;
+
case -EBUSY: /* Unclean shutdown */
log_info("%s: Unclean shutdown, rotating.", f->path);
return true;
+
case -EPROTONOSUPPORT: /* Unsupported feature */
log_info("%s: Unsupported feature, rotating.", f->path);
return true;
+
case -EBADMSG: /* Corrupted */
case -ENODATA: /* Truncated */
case -ESHUTDOWN: /* Already archived */
log_warning("%s: Journal file corrupted, rotating.", f->path);
return true;
+
case -EIDRM: /* Journal file has been deleted */
log_warning("%s: Journal file has been deleted, rotating.", f->path);
return true;
+
+ case -ETXTBSY: /* Journal file is from the future */
+ log_warning("%s: Journal file is from the future, rotating.", f->path);
+ return true;
+
default:
return false;
}
}
static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
+ bool vacuumed = false, rotate = false;
+ struct dual_timestamp ts;
JournalFile *f;
- bool vacuumed = false;
int r;
assert(s);
assert(iovec);
assert(n > 0);
- f = find_journal(s, uid);
- if (!f)
- return;
+ /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
+ * the source time, and not even the time the event was originally seen, but instead simply the time we started
+ * processing it, as we want strictly linear ordering in what we write out.) */
+ assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
+ assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
+
+ if (ts.realtime < s->last_realtime_clock) {
+ /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
+ * regular operation. However, when it does happen, then we should make sure that we start fresh files
+ * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
+ * bisection works correctly. */
+
+ log_debug("Time jumped backwards, rotating.");
+ rotate = true;
+ } else {
+
+ f = find_journal(s, uid);
+ if (!f)
+ return;
+
+ if (journal_file_rotate_suggested(f, s->max_file_usec)) {
+ log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
+ rotate = true;
+ }
+ }
- if (journal_file_rotate_suggested(f, s->max_file_usec)) {
- log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
+ if (rotate) {
server_rotate(s);
server_vacuum(s, false, false);
vacuumed = true;
@@ -650,7 +684,9 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
return;
}
- r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
+ s->last_realtime_clock = ts.realtime;
+
+ r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
if (r >= 0) {
server_schedule_sync(s, priority);
return;
@@ -669,7 +705,7 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
return;
log_debug("Retrying write.");
- r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
+ r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
if (r < 0)
log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
else
diff --git a/src/journal/journald-server.h b/src/journal/journald-server.h
index dfb5724794..cc68a0a690 100644
--- a/src/journal/journald-server.h
+++ b/src/journal/journald-server.h
@@ -149,6 +149,8 @@ struct Server {
char *cgroup_root;
usec_t watchdog_usec;
+
+ usec_t last_realtime_clock;
};
#define SERVER_MACHINE_ID(s) ((s)->machine_id_field + strlen("_MACHINE_ID="))