summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/journal/journal-file.c190
-rw-r--r--src/journal/journald.c1
-rw-r--r--src/journal/mmap-cache.c178
-rw-r--r--src/journal/mmap-cache.h1
4 files changed, 311 insertions, 59 deletions
diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c
index 709e15eceb..7beedb4a25 100644
--- a/src/journal/journal-file.c
+++ b/src/journal/journal-file.c
@@ -299,6 +299,8 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size)
if (r != 0)
return -r;
+ mmap_cache_close_fd_range(f->mmap, f->fd, old_size);
+
if (fstat(f->fd, &f->last_stat) < 0)
return -errno;
@@ -2335,18 +2337,86 @@ static void flush_progress(void) {
fflush(stdout);
}
+static int write_uint64(int fd, uint64_t p) {
+ ssize_t k;
+
+ k = write(fd, &p, sizeof(p));
+ if (k < 0)
+ return -errno;
+ if (k != sizeof(p))
+ return -EIO;
+
+ return 0;
+}
+
+static int contains_uint64(MMapCache *m, int fd, uint64_t n, uint64_t p) {
+ uint64_t a, b;
+ int r;
+
+ assert(m);
+ assert(fd >= 0);
+
+ /* Bisection ... */
+
+ a = 0; b = n;
+ while (a < b) {
+ uint64_t c, *z;
+
+ c = (a + b) / 2;
+
+ r = mmap_cache_get(m, fd, PROT_READ, 0, c * sizeof(uint64_t), sizeof(uint64_t), (void **) &z);
+ if (r < 0)
+ return r;
+
+ if (*z == p)
+ return 1;
+
+ if (p < *z)
+ b = c;
+ else
+ a = c;
+ }
+
+ return 0;
+}
+
int journal_file_verify(JournalFile *f, const char *key) {
int r;
Object *o;
- uint64_t p = 0, q = 0, e;
+ uint64_t p = 0;
uint64_t tag_seqnum = 0, entry_seqnum = 0, entry_monotonic = 0, entry_realtime = 0;
sd_id128_t entry_boot_id;
bool entry_seqnum_set = false, entry_monotonic_set = false, entry_realtime_set = false, found_main_entry_array = false;
- uint64_t n_weird = 0, n_objects = 0, n_entries = 0, n_data = 0, n_fields = 0, n_data_hash_tables = 0, n_field_hash_tables = 0;
+ uint64_t n_weird = 0, n_objects = 0, n_entries = 0, n_data = 0, n_fields = 0, n_data_hash_tables = 0, n_field_hash_tables = 0, n_entry_arrays = 0;
usec_t last_usec = 0;
+ int data_fd = -1, entry_fd = -1, entry_array_fd = -1;
+ char data_path[] = "/var/tmp/journal-data-XXXXXX",
+ entry_path[] = "/var/tmp/journal-entry-XXXXXX",
+ entry_array_path[] = "/var/tmp/journal-entry-array-XXXXXX";
assert(f);
+ data_fd = mkostemp(data_path, O_CLOEXEC);
+ if (data_fd < 0) {
+ log_error("Failed to create data file: %m");
+ goto fail;
+ }
+ unlink(data_path);
+
+ entry_fd = mkostemp(entry_path, O_CLOEXEC);
+ if (entry_fd < 0) {
+ log_error("Failed to create entry file: %m");
+ goto fail;
+ }
+ unlink(entry_path);
+
+ entry_array_fd = mkostemp(entry_array_path, O_CLOEXEC);
+ if (entry_array_fd < 0) {
+ log_error("Failed to create entry array file: %m");
+ goto fail;
+ }
+ unlink(entry_array_path);
+
/* First iteration: we go through all objects, verify the
* superficial structure, headers, hashes. */
@@ -2358,7 +2428,7 @@ int journal_file_verify(JournalFile *f, const char *key) {
p = le64toh(f->header->header_size);
while (p != 0) {
- draw_progress((65535ULL * p / le64toh(f->header->tail_object_offset)), &last_usec);
+ draw_progress((0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec);
r = journal_file_move_to_object(f, -1, p, &o);
if (r < 0) {
@@ -2416,6 +2486,10 @@ int journal_file_verify(JournalFile *f, const char *key) {
} else if (o->object.type == OBJECT_ENTRY) {
+ r = write_uint64(entry_fd, p);
+ if (r < 0)
+ goto fail;
+
if (!entry_seqnum_set &&
le64toh(o->entry.seqnum) != le64toh(f->header->head_entry_seqnum)) {
log_error("Head entry sequence number incorrect");
@@ -2458,6 +2532,10 @@ int journal_file_verify(JournalFile *f, const char *key) {
n_entries ++;
} else if (o->object.type == OBJECT_ENTRY_ARRAY) {
+ r = write_uint64(entry_array_fd, p);
+ if (r < 0)
+ goto fail;
+
if (p == le64toh(f->header->entry_array_offset)) {
if (found_main_entry_array) {
log_error("More than one main entry array at %llu", (unsigned long long) p);
@@ -2468,9 +2546,17 @@ int journal_file_verify(JournalFile *f, const char *key) {
found_main_entry_array = true;
}
- } else if (o->object.type == OBJECT_DATA)
+ n_entry_arrays++;
+
+ } else if (o->object.type == OBJECT_DATA) {
+
+ r = write_uint64(data_fd, p);
+ if (r < 0)
+ goto fail;
+
n_data++;
- else if (o->object.type == OBJECT_FIELD)
+
+ } else if (o->object.type == OBJECT_FIELD)
n_fields++;
else if (o->object.type == OBJECT_DATA_HASH_TABLE) {
n_data_hash_tables++;
@@ -2502,14 +2588,8 @@ int journal_file_verify(JournalFile *f, const char *key) {
r = -EBADMSG;
goto fail;
}
- }
-
- if (o->object.type >= _OBJECT_TYPE_MAX)
+ } else if (o->object.type >= _OBJECT_TYPE_MAX)
n_weird ++;
- else {
- /* Write address to file... */
-
- }
if (p == le64toh(f->header->tail_object_offset))
p = 0;
@@ -2592,36 +2672,86 @@ int journal_file_verify(JournalFile *f, const char *key) {
/* Second iteration: we go through all objects again, this
* time verify all pointers. */
- /* q = le64toh(f->header->header_size); */
- /* while (q != 0) { */
- /* r = journal_file_move_to_object(f, -1, q, &o); */
- /* if (r < 0) { */
- /* log_error("Invalid object at %llu", (unsigned long long) q); */
- /* goto fail; */
- /* } */
+ p = le64toh(f->header->header_size);
+ while (p != 0) {
+ draw_progress(0x8000 + (0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec);
+
+ r = journal_file_move_to_object(f, -1, p, &o);
+ if (r < 0) {
+ log_error("Invalid object at %llu", (unsigned long long) p);
+ goto fail;
+ }
+
+ if (o->object.type == OBJECT_ENTRY_ARRAY) {
+ uint64_t i = 0, n;
+
+ if (le64toh(o->entry_array.next_entry_array_offset) != 0 &&
+ !contains_uint64(f->mmap, entry_array_fd, n_entry_arrays, le64toh(o->entry_array.next_entry_array_offset))) {
+ log_error("Entry array chains up to invalid next array at %llu", (unsigned long long) p);
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ n = journal_file_entry_array_n_items(o);
+ for (i = 0; i < n; i++) {
+ if (le64toh(o->entry_array.items[i]) != 0 &&
+ !contains_uint64(f->mmap, entry_fd, n_entries, le64toh(o->entry_array.items[i]))) {
+
+ log_error("Entry array points to invalid next array at %llu", (unsigned long long) p);
+ r = -EBADMSG;
+ goto fail;
+ }
+ }
+
+ }
- /* if (q == le64toh(f->header->tail_object_offset)) */
- /* q = 0; */
- /* else */
- /* q = q + ALIGN64(le64toh(o->object.size)); */
- /* } */
+ r = journal_file_move_to_object(f, -1, p, &o);
+ if (r < 0) {
+ log_error("Invalid object at %llu", (unsigned long long) p);
+ goto fail;
+ }
+
+ if (p == le64toh(f->header->tail_object_offset))
+ p = 0;
+ else
+ p = p + ALIGN64(le64toh(o->object.size));
+ }
flush_progress();
+ mmap_cache_close_fd(f->mmap, data_fd);
+ mmap_cache_close_fd(f->mmap, entry_fd);
+ mmap_cache_close_fd(f->mmap, entry_array_fd);
+
+ close_nointr_nofail(data_fd);
+ close_nointr_nofail(entry_fd);
+ close_nointr_nofail(entry_array_fd);
+
return 0;
fail:
- e = p <= 0 ? q :
- q <= 0 ? p :
- MIN(p, q);
-
flush_progress();
log_error("File corruption detected at %s:%llu (of %llu, %llu%%).",
f->path,
- (unsigned long long) e,
+ (unsigned long long) p,
(unsigned long long) f->last_stat.st_size,
- (unsigned long long) (100 * e / f->last_stat.st_size));
+ (unsigned long long) (100 * p / f->last_stat.st_size));
+
+ if (data_fd >= 0) {
+ mmap_cache_close_fd(f->mmap, data_fd);
+ close_nointr_nofail(data_fd);
+ }
+
+ if (entry_fd >= 0) {
+ mmap_cache_close_fd(f->mmap, entry_fd);
+ close_nointr_nofail(entry_fd);
+ }
+
+ if (entry_array_fd >= 0) {
+ mmap_cache_close_fd(f->mmap, entry_array_fd);
+ close_nointr_nofail(entry_array_fd);
+ }
return r;
}
diff --git a/src/journal/journald.c b/src/journal/journald.c
index 145663bf5b..384ed90568 100644
--- a/src/journal/journald.c
+++ b/src/journal/journald.c
@@ -2944,6 +2944,7 @@ int main(int argc, char *argv[]) {
log_set_target(LOG_TARGET_SAFE);
log_set_facility(LOG_SYSLOG);
+ log_set_max_level(LOG_DEBUG);
log_parse_environment();
log_open();
diff --git a/src/journal/mmap-cache.c b/src/journal/mmap-cache.c
index 77710ff9bf..9782139f5a 100644
--- a/src/journal/mmap-cache.c
+++ b/src/journal/mmap-cache.c
@@ -68,6 +68,8 @@ struct MMapCache {
FileDescriptor *by_fd;
};
+static int mmap_cache_peek_fd_index(MMapCache *m, int fd, unsigned *fd_index);
+
static void mmap_cache_window_unmap(MMapCache *m, unsigned w) {
Window *v;
@@ -89,6 +91,13 @@ static void mmap_cache_window_add_lru(MMapCache *m, unsigned w) {
assert(w < m->n_windows);
v = m->windows + w;
+ assert(v->n_ref == 0);
+
+ if (m->lru_last != (unsigned) -1) {
+ assert(m->windows[m->lru_last].lru_next == (unsigned) -1);
+ m->windows[m->lru_last].lru_next = w;
+ }
+
v->lru_prev = m->lru_last;
v->lru_next = (unsigned) -1;
@@ -105,15 +114,21 @@ static void mmap_cache_window_remove_lru(MMapCache *m, unsigned w) {
v = m->windows + w;
- if (v->lru_prev == (unsigned) -1)
+ if (v->lru_prev == (unsigned) -1) {
+ assert(m->lru_first == w);
m->lru_first = v->lru_next;
- else
+ } else {
+ assert(m->windows[v->lru_prev].lru_next == w);
m->windows[v->lru_prev].lru_next = v->lru_next;
+ }
- if (v->lru_next == (unsigned) -1)
+ if (v->lru_next == (unsigned) -1) {
+ assert(m->lru_last == w);
m->lru_last = v->lru_prev;
- else
+ } else {
+ assert(m->windows[v->lru_next].lru_prev == w);
m->windows[v->lru_next].lru_prev = v->lru_prev;
+ }
}
static void mmap_cache_fd_add(MMapCache *m, unsigned fd_index, unsigned w) {
@@ -123,6 +138,13 @@ static void mmap_cache_fd_add(MMapCache *m, unsigned fd_index, unsigned w) {
assert(fd_index < m->n_fds);
v = m->windows + w;
+ assert(m->by_fd[fd_index].fd == v->fd);
+
+ if (m->by_fd[fd_index].windows != (unsigned) -1) {
+ assert(m->windows[m->by_fd[fd_index].windows].by_fd_prev == (unsigned) -1);
+ m->windows[m->by_fd[fd_index].windows].by_fd_prev = w;
+ }
+
v->by_fd_next = m->by_fd[fd_index].windows;
v->by_fd_prev = (unsigned) -1;
@@ -136,13 +158,22 @@ static void mmap_cache_fd_remove(MMapCache *m, unsigned fd_index, unsigned w) {
assert(fd_index < m->n_fds);
v = m->windows + w;
- if (v->by_fd_prev == (unsigned) -1)
+ assert(m->by_fd[fd_index].fd == v->fd);
+ assert(v->by_fd_next == (unsigned) -1 || m->windows[v->by_fd_next].fd == v->fd);
+ assert(v->by_fd_prev == (unsigned) -1 || m->windows[v->by_fd_prev].fd == v->fd);
+
+ if (v->by_fd_prev == (unsigned) -1) {
+ assert(m->by_fd[fd_index].windows == w);
m->by_fd[fd_index].windows = v->by_fd_next;
- else
+ } else {
+ assert(m->windows[v->by_fd_prev].by_fd_next == w);
m->windows[v->by_fd_prev].by_fd_next = v->by_fd_next;
+ }
- if (v->by_fd_next != (unsigned) -1)
+ if (v->by_fd_next != (unsigned) -1) {
+ assert(m->windows[v->by_fd_next].by_fd_prev == w);
m->windows[v->by_fd_next].by_fd_prev = v->by_fd_prev;
+ }
}
static void mmap_cache_context_unset(MMapCache *m, unsigned c) {
@@ -182,6 +213,7 @@ static void mmap_cache_context_set(MMapCache *m, unsigned c, unsigned w) {
v = m->windows + w;
v->n_ref ++;
+
if (v->n_ref == 1)
mmap_cache_window_remove_lru(m, w);
}
@@ -264,6 +296,9 @@ MMapCache* mmap_cache_unref(MMapCache *m) {
}
static int mmap_cache_allocate_window(MMapCache *m, unsigned *w) {
+ Window *v;
+ unsigned fd_index;
+
assert(m);
assert(w);
@@ -276,7 +311,16 @@ static int mmap_cache_allocate_window(MMapCache *m, unsigned *w) {
return -E2BIG;
*w = m->lru_first;
+ v = m->windows + *w;
+ assert(v->n_ref == 0);
+
mmap_cache_window_unmap(m, *w);
+
+ if (v->fd >= 0) {
+ assert_se(mmap_cache_peek_fd_index(m, v->fd, &fd_index) > 0);
+ mmap_cache_fd_remove(m, fd_index, *w);
+ }
+
mmap_cache_window_remove_lru(m, *w);
return 0;
@@ -370,8 +414,7 @@ static int mmap_cache_put(
v->size = wsize;
v->n_ref = 0;
- v->lru_prev = v->lru_next = (unsigned) -1;
-
+ mmap_cache_window_add_lru(m, w);
mmap_cache_fd_add(m, fd_index, w);
mmap_cache_context_set(m, context, w);
@@ -390,28 +433,48 @@ static int fd_cmp(const void *_a, const void *_b) {
return 0;
}
+static int mmap_cache_peek_fd_index(MMapCache *m, int fd, unsigned *fd_index) {
+ FileDescriptor *j;
+ unsigned r;
+
+ assert(m);
+ assert(fd >= 0);
+ assert(fd_index);
+
+ for (r = 0; r < m->n_fds; r++)
+ assert(m->by_fd[r].windows == (unsigned) -1 ||
+ m->windows[m->by_fd[r].windows].fd == m->by_fd[r].fd);
+
+ j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(FileDescriptor), fd_cmp);
+ if (!j)
+ return 0;
+
+ *fd_index = (unsigned) (j - m->by_fd);
+ return 1;
+}
+
static int mmap_cache_get_fd_index(MMapCache *m, int fd, unsigned *fd_index) {
FileDescriptor *j;
+ int r;
assert(m);
assert(fd >= 0);
assert(fd_index);
- j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp);
- if (!j) {
- if (m->n_fds >= m->fds_max)
- return -E2BIG;
+ r = mmap_cache_peek_fd_index(m, fd, fd_index);
+ if (r != 0)
+ return r;
- j = m->by_fd + m->n_fds ++;
- j->fd = fd;
- j->windows = (unsigned) -1;
+ if (m->n_fds >= m->fds_max)
+ return -E2BIG;
- qsort(m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp);
- j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp);
- }
+ j = m->by_fd + m->n_fds ++;
+ j->fd = fd;
+ j->windows = (unsigned) -1;
- *fd_index = (unsigned) (j - m->by_fd);
- return 0;
+ qsort(m->by_fd, m->n_fds, sizeof(FileDescriptor), fd_cmp);
+
+ return mmap_cache_peek_fd_index(m, fd, fd_index);
}
static bool mmap_cache_test_window(
@@ -466,6 +529,7 @@ static int mmap_cache_current(
static int mmap_cache_find(
MMapCache *m,
+ int fd,
unsigned fd_index,
unsigned context,
uint64_t offset,
@@ -476,6 +540,7 @@ static int mmap_cache_find(
unsigned w;
assert(m);
+ assert(fd >= 0);
assert(fd_index < m->n_fds);
assert(context < m->contexts_max);
assert(size > 0);
@@ -483,10 +548,13 @@ static int mmap_cache_find(
w = m->by_fd[fd_index].windows;
while (w != (unsigned) -1) {
+ v = m->windows + w;
+ assert(v->fd == fd);
+
if (mmap_cache_test_window(m, w, offset, size))
break;
- w = m->windows[w].by_fd_next;
+ w = v->by_fd_next;
}
if (w == (unsigned) -1)
@@ -494,7 +562,6 @@ static int mmap_cache_find(
mmap_cache_context_set(m, context, w);
- v = m->windows + w;
*ret = (uint8_t*) v->ptr + (offset - v->offset);
return 1;
}
@@ -523,13 +590,17 @@ int mmap_cache_get(
if (r != 0)
return r;
+ /* Hmm, drop the reference to the current one, since it wasn't
+ * good enough */
+ mmap_cache_context_unset(m, context);
+
/* OK, let's find the chain for this FD */
r = mmap_cache_get_fd_index(m, fd, &fd_index);
if (r < 0)
return r;
/* And let's look through the available mmaps */
- r = mmap_cache_find(m, fd_index, context, offset, size, ret);
+ r = mmap_cache_find(m, fd, fd_index, context, offset, size, ret);
if (r != 0)
return r;
@@ -538,16 +609,15 @@ int mmap_cache_get(
}
void mmap_cache_close_fd(MMapCache *m, int fd) {
- FileDescriptor *j;
unsigned fd_index, c, w;
+ int r;
assert(m);
assert(fd > 0);
- j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp);
- if (!j)
+ r = mmap_cache_peek_fd_index(m, fd, &fd_index);
+ if (r <= 0)
return;
- fd_index = (unsigned) (j - m->by_fd);
for (c = 0; c < m->contexts_max; c++) {
w = m->by_context[c];
@@ -560,9 +630,14 @@ void mmap_cache_close_fd(MMapCache *m, int fd) {
w = m->by_fd[fd_index].windows;
while (w != (unsigned) -1) {
+ Window *v;
+
+ v = m->windows + w;
+ assert(v->fd == fd);
- mmap_cache_fd_remove(m, fd_index, w);
mmap_cache_window_unmap(m, w);
+ mmap_cache_fd_remove(m, fd_index, w);
+ v->fd = -1;
w = m->by_fd[fd_index].windows;
}
@@ -571,6 +646,51 @@ void mmap_cache_close_fd(MMapCache *m, int fd) {
m->n_fds --;
}
+void mmap_cache_close_fd_range(MMapCache *m, int fd, uint64_t p) {
+ unsigned fd_index, c, w;
+ int r;
+
+ assert(m);
+ assert(fd > 0);
+
+ /* This drops all windows that include space right of the
+ * specified offset. This is useful to ensure that after the
+ * file size is extended we drop our mappings of the end and
+ * create it anew, since otherwise it is undefined whether
+ * mapping will continue to work as intended. */
+
+ r = mmap_cache_peek_fd_index(m, fd, &fd_index);
+ if (r <= 0)
+ return;
+
+ for (c = 0; c < m->contexts_max; c++) {
+ w = m->by_context[c];
+
+ if (w != (unsigned) -1 && m->windows[w].fd == fd)
+ mmap_cache_context_unset(m, c);
+ }
+
+ w = m->by_fd[fd_index].windows;
+ while (w != (unsigned) -1) {
+ Window *v;
+
+ v = m->windows + w;
+ assert(v->fd == fd);
+ assert(v->by_fd_next == (unsigned) -1 ||
+ m->windows[v->by_fd_next].fd == fd);
+
+ if (v->offset + v->size > p) {
+
+ mmap_cache_window_unmap(m, w);
+ mmap_cache_fd_remove(m, fd_index, w);
+ v->fd = -1;
+
+ w = m->by_fd[fd_index].windows;
+ } else
+ w = v->by_fd_next;
+ }
+}
+
void mmap_cache_close_context(MMapCache *m, unsigned context) {
mmap_cache_context_unset(m, context);
}
diff --git a/src/journal/mmap-cache.h b/src/journal/mmap-cache.h
index 0a88fc584f..984b759960 100644
--- a/src/journal/mmap-cache.h
+++ b/src/journal/mmap-cache.h
@@ -31,4 +31,5 @@ MMapCache* mmap_cache_unref(MMapCache *m);
int mmap_cache_get(MMapCache *m, int fd, int prot, unsigned context, uint64_t offset, uint64_t size, void **ret);
void mmap_cache_close_fd(MMapCache *m, int fd);
+void mmap_cache_close_fd_range(MMapCache *m, int fd, uint64_t range);
void mmap_cache_close_context(MMapCache *m, unsigned context);