diff options
-rw-r--r-- | src/journal/journal-file.c | 190 | ||||
-rw-r--r-- | src/journal/journald.c | 1 | ||||
-rw-r--r-- | src/journal/mmap-cache.c | 178 | ||||
-rw-r--r-- | src/journal/mmap-cache.h | 1 |
4 files changed, 311 insertions, 59 deletions
diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 709e15eceb..7beedb4a25 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -299,6 +299,8 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) if (r != 0) return -r; + mmap_cache_close_fd_range(f->mmap, f->fd, old_size); + if (fstat(f->fd, &f->last_stat) < 0) return -errno; @@ -2335,18 +2337,86 @@ static void flush_progress(void) { fflush(stdout); } +static int write_uint64(int fd, uint64_t p) { + ssize_t k; + + k = write(fd, &p, sizeof(p)); + if (k < 0) + return -errno; + if (k != sizeof(p)) + return -EIO; + + return 0; +} + +static int contains_uint64(MMapCache *m, int fd, uint64_t n, uint64_t p) { + uint64_t a, b; + int r; + + assert(m); + assert(fd >= 0); + + /* Bisection ... */ + + a = 0; b = n; + while (a < b) { + uint64_t c, *z; + + c = (a + b) / 2; + + r = mmap_cache_get(m, fd, PROT_READ, 0, c * sizeof(uint64_t), sizeof(uint64_t), (void **) &z); + if (r < 0) + return r; + + if (*z == p) + return 1; + + if (p < *z) + b = c; + else + a = c; + } + + return 0; +} + int journal_file_verify(JournalFile *f, const char *key) { int r; Object *o; - uint64_t p = 0, q = 0, e; + uint64_t p = 0; uint64_t tag_seqnum = 0, entry_seqnum = 0, entry_monotonic = 0, entry_realtime = 0; sd_id128_t entry_boot_id; bool entry_seqnum_set = false, entry_monotonic_set = false, entry_realtime_set = false, found_main_entry_array = false; - uint64_t n_weird = 0, n_objects = 0, n_entries = 0, n_data = 0, n_fields = 0, n_data_hash_tables = 0, n_field_hash_tables = 0; + uint64_t n_weird = 0, n_objects = 0, n_entries = 0, n_data = 0, n_fields = 0, n_data_hash_tables = 0, n_field_hash_tables = 0, n_entry_arrays = 0; usec_t last_usec = 0; + int data_fd = -1, entry_fd = -1, entry_array_fd = -1; + char data_path[] = "/var/tmp/journal-data-XXXXXX", + entry_path[] = "/var/tmp/journal-entry-XXXXXX", + entry_array_path[] = "/var/tmp/journal-entry-array-XXXXXX"; assert(f); + data_fd = mkostemp(data_path, O_CLOEXEC); + if (data_fd < 0) { + log_error("Failed to create data file: %m"); + goto fail; + } + unlink(data_path); + + entry_fd = mkostemp(entry_path, O_CLOEXEC); + if (entry_fd < 0) { + log_error("Failed to create entry file: %m"); + goto fail; + } + unlink(entry_path); + + entry_array_fd = mkostemp(entry_array_path, O_CLOEXEC); + if (entry_array_fd < 0) { + log_error("Failed to create entry array file: %m"); + goto fail; + } + unlink(entry_array_path); + /* First iteration: we go through all objects, verify the * superficial structure, headers, hashes. */ @@ -2358,7 +2428,7 @@ int journal_file_verify(JournalFile *f, const char *key) { p = le64toh(f->header->header_size); while (p != 0) { - draw_progress((65535ULL * p / le64toh(f->header->tail_object_offset)), &last_usec); + draw_progress((0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec); r = journal_file_move_to_object(f, -1, p, &o); if (r < 0) { @@ -2416,6 +2486,10 @@ int journal_file_verify(JournalFile *f, const char *key) { } else if (o->object.type == OBJECT_ENTRY) { + r = write_uint64(entry_fd, p); + if (r < 0) + goto fail; + if (!entry_seqnum_set && le64toh(o->entry.seqnum) != le64toh(f->header->head_entry_seqnum)) { log_error("Head entry sequence number incorrect"); @@ -2458,6 +2532,10 @@ int journal_file_verify(JournalFile *f, const char *key) { n_entries ++; } else if (o->object.type == OBJECT_ENTRY_ARRAY) { + r = write_uint64(entry_array_fd, p); + if (r < 0) + goto fail; + if (p == le64toh(f->header->entry_array_offset)) { if (found_main_entry_array) { log_error("More than one main entry array at %llu", (unsigned long long) p); @@ -2468,9 +2546,17 @@ int journal_file_verify(JournalFile *f, const char *key) { found_main_entry_array = true; } - } else if (o->object.type == OBJECT_DATA) + n_entry_arrays++; + + } else if (o->object.type == OBJECT_DATA) { + + r = write_uint64(data_fd, p); + if (r < 0) + goto fail; + n_data++; - else if (o->object.type == OBJECT_FIELD) + + } else if (o->object.type == OBJECT_FIELD) n_fields++; else if (o->object.type == OBJECT_DATA_HASH_TABLE) { n_data_hash_tables++; @@ -2502,14 +2588,8 @@ int journal_file_verify(JournalFile *f, const char *key) { r = -EBADMSG; goto fail; } - } - - if (o->object.type >= _OBJECT_TYPE_MAX) + } else if (o->object.type >= _OBJECT_TYPE_MAX) n_weird ++; - else { - /* Write address to file... */ - - } if (p == le64toh(f->header->tail_object_offset)) p = 0; @@ -2592,36 +2672,86 @@ int journal_file_verify(JournalFile *f, const char *key) { /* Second iteration: we go through all objects again, this * time verify all pointers. */ - /* q = le64toh(f->header->header_size); */ - /* while (q != 0) { */ - /* r = journal_file_move_to_object(f, -1, q, &o); */ - /* if (r < 0) { */ - /* log_error("Invalid object at %llu", (unsigned long long) q); */ - /* goto fail; */ - /* } */ + p = le64toh(f->header->header_size); + while (p != 0) { + draw_progress(0x8000 + (0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec); + + r = journal_file_move_to_object(f, -1, p, &o); + if (r < 0) { + log_error("Invalid object at %llu", (unsigned long long) p); + goto fail; + } + + if (o->object.type == OBJECT_ENTRY_ARRAY) { + uint64_t i = 0, n; + + if (le64toh(o->entry_array.next_entry_array_offset) != 0 && + !contains_uint64(f->mmap, entry_array_fd, n_entry_arrays, le64toh(o->entry_array.next_entry_array_offset))) { + log_error("Entry array chains up to invalid next array at %llu", (unsigned long long) p); + r = -EBADMSG; + goto fail; + } + + n = journal_file_entry_array_n_items(o); + for (i = 0; i < n; i++) { + if (le64toh(o->entry_array.items[i]) != 0 && + !contains_uint64(f->mmap, entry_fd, n_entries, le64toh(o->entry_array.items[i]))) { + + log_error("Entry array points to invalid next array at %llu", (unsigned long long) p); + r = -EBADMSG; + goto fail; + } + } + + } - /* if (q == le64toh(f->header->tail_object_offset)) */ - /* q = 0; */ - /* else */ - /* q = q + ALIGN64(le64toh(o->object.size)); */ - /* } */ + r = journal_file_move_to_object(f, -1, p, &o); + if (r < 0) { + log_error("Invalid object at %llu", (unsigned long long) p); + goto fail; + } + + if (p == le64toh(f->header->tail_object_offset)) + p = 0; + else + p = p + ALIGN64(le64toh(o->object.size)); + } flush_progress(); + mmap_cache_close_fd(f->mmap, data_fd); + mmap_cache_close_fd(f->mmap, entry_fd); + mmap_cache_close_fd(f->mmap, entry_array_fd); + + close_nointr_nofail(data_fd); + close_nointr_nofail(entry_fd); + close_nointr_nofail(entry_array_fd); + return 0; fail: - e = p <= 0 ? q : - q <= 0 ? p : - MIN(p, q); - flush_progress(); log_error("File corruption detected at %s:%llu (of %llu, %llu%%).", f->path, - (unsigned long long) e, + (unsigned long long) p, (unsigned long long) f->last_stat.st_size, - (unsigned long long) (100 * e / f->last_stat.st_size)); + (unsigned long long) (100 * p / f->last_stat.st_size)); + + if (data_fd >= 0) { + mmap_cache_close_fd(f->mmap, data_fd); + close_nointr_nofail(data_fd); + } + + if (entry_fd >= 0) { + mmap_cache_close_fd(f->mmap, entry_fd); + close_nointr_nofail(entry_fd); + } + + if (entry_array_fd >= 0) { + mmap_cache_close_fd(f->mmap, entry_array_fd); + close_nointr_nofail(entry_array_fd); + } return r; } diff --git a/src/journal/journald.c b/src/journal/journald.c index 145663bf5b..384ed90568 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -2944,6 +2944,7 @@ int main(int argc, char *argv[]) { log_set_target(LOG_TARGET_SAFE); log_set_facility(LOG_SYSLOG); + log_set_max_level(LOG_DEBUG); log_parse_environment(); log_open(); diff --git a/src/journal/mmap-cache.c b/src/journal/mmap-cache.c index 77710ff9bf..9782139f5a 100644 --- a/src/journal/mmap-cache.c +++ b/src/journal/mmap-cache.c @@ -68,6 +68,8 @@ struct MMapCache { FileDescriptor *by_fd; }; +static int mmap_cache_peek_fd_index(MMapCache *m, int fd, unsigned *fd_index); + static void mmap_cache_window_unmap(MMapCache *m, unsigned w) { Window *v; @@ -89,6 +91,13 @@ static void mmap_cache_window_add_lru(MMapCache *m, unsigned w) { assert(w < m->n_windows); v = m->windows + w; + assert(v->n_ref == 0); + + if (m->lru_last != (unsigned) -1) { + assert(m->windows[m->lru_last].lru_next == (unsigned) -1); + m->windows[m->lru_last].lru_next = w; + } + v->lru_prev = m->lru_last; v->lru_next = (unsigned) -1; @@ -105,15 +114,21 @@ static void mmap_cache_window_remove_lru(MMapCache *m, unsigned w) { v = m->windows + w; - if (v->lru_prev == (unsigned) -1) + if (v->lru_prev == (unsigned) -1) { + assert(m->lru_first == w); m->lru_first = v->lru_next; - else + } else { + assert(m->windows[v->lru_prev].lru_next == w); m->windows[v->lru_prev].lru_next = v->lru_next; + } - if (v->lru_next == (unsigned) -1) + if (v->lru_next == (unsigned) -1) { + assert(m->lru_last == w); m->lru_last = v->lru_prev; - else + } else { + assert(m->windows[v->lru_next].lru_prev == w); m->windows[v->lru_next].lru_prev = v->lru_prev; + } } static void mmap_cache_fd_add(MMapCache *m, unsigned fd_index, unsigned w) { @@ -123,6 +138,13 @@ static void mmap_cache_fd_add(MMapCache *m, unsigned fd_index, unsigned w) { assert(fd_index < m->n_fds); v = m->windows + w; + assert(m->by_fd[fd_index].fd == v->fd); + + if (m->by_fd[fd_index].windows != (unsigned) -1) { + assert(m->windows[m->by_fd[fd_index].windows].by_fd_prev == (unsigned) -1); + m->windows[m->by_fd[fd_index].windows].by_fd_prev = w; + } + v->by_fd_next = m->by_fd[fd_index].windows; v->by_fd_prev = (unsigned) -1; @@ -136,13 +158,22 @@ static void mmap_cache_fd_remove(MMapCache *m, unsigned fd_index, unsigned w) { assert(fd_index < m->n_fds); v = m->windows + w; - if (v->by_fd_prev == (unsigned) -1) + assert(m->by_fd[fd_index].fd == v->fd); + assert(v->by_fd_next == (unsigned) -1 || m->windows[v->by_fd_next].fd == v->fd); + assert(v->by_fd_prev == (unsigned) -1 || m->windows[v->by_fd_prev].fd == v->fd); + + if (v->by_fd_prev == (unsigned) -1) { + assert(m->by_fd[fd_index].windows == w); m->by_fd[fd_index].windows = v->by_fd_next; - else + } else { + assert(m->windows[v->by_fd_prev].by_fd_next == w); m->windows[v->by_fd_prev].by_fd_next = v->by_fd_next; + } - if (v->by_fd_next != (unsigned) -1) + if (v->by_fd_next != (unsigned) -1) { + assert(m->windows[v->by_fd_next].by_fd_prev == w); m->windows[v->by_fd_next].by_fd_prev = v->by_fd_prev; + } } static void mmap_cache_context_unset(MMapCache *m, unsigned c) { @@ -182,6 +213,7 @@ static void mmap_cache_context_set(MMapCache *m, unsigned c, unsigned w) { v = m->windows + w; v->n_ref ++; + if (v->n_ref == 1) mmap_cache_window_remove_lru(m, w); } @@ -264,6 +296,9 @@ MMapCache* mmap_cache_unref(MMapCache *m) { } static int mmap_cache_allocate_window(MMapCache *m, unsigned *w) { + Window *v; + unsigned fd_index; + assert(m); assert(w); @@ -276,7 +311,16 @@ static int mmap_cache_allocate_window(MMapCache *m, unsigned *w) { return -E2BIG; *w = m->lru_first; + v = m->windows + *w; + assert(v->n_ref == 0); + mmap_cache_window_unmap(m, *w); + + if (v->fd >= 0) { + assert_se(mmap_cache_peek_fd_index(m, v->fd, &fd_index) > 0); + mmap_cache_fd_remove(m, fd_index, *w); + } + mmap_cache_window_remove_lru(m, *w); return 0; @@ -370,8 +414,7 @@ static int mmap_cache_put( v->size = wsize; v->n_ref = 0; - v->lru_prev = v->lru_next = (unsigned) -1; - + mmap_cache_window_add_lru(m, w); mmap_cache_fd_add(m, fd_index, w); mmap_cache_context_set(m, context, w); @@ -390,28 +433,48 @@ static int fd_cmp(const void *_a, const void *_b) { return 0; } +static int mmap_cache_peek_fd_index(MMapCache *m, int fd, unsigned *fd_index) { + FileDescriptor *j; + unsigned r; + + assert(m); + assert(fd >= 0); + assert(fd_index); + + for (r = 0; r < m->n_fds; r++) + assert(m->by_fd[r].windows == (unsigned) -1 || + m->windows[m->by_fd[r].windows].fd == m->by_fd[r].fd); + + j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(FileDescriptor), fd_cmp); + if (!j) + return 0; + + *fd_index = (unsigned) (j - m->by_fd); + return 1; +} + static int mmap_cache_get_fd_index(MMapCache *m, int fd, unsigned *fd_index) { FileDescriptor *j; + int r; assert(m); assert(fd >= 0); assert(fd_index); - j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp); - if (!j) { - if (m->n_fds >= m->fds_max) - return -E2BIG; + r = mmap_cache_peek_fd_index(m, fd, fd_index); + if (r != 0) + return r; - j = m->by_fd + m->n_fds ++; - j->fd = fd; - j->windows = (unsigned) -1; + if (m->n_fds >= m->fds_max) + return -E2BIG; - qsort(m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp); - j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp); - } + j = m->by_fd + m->n_fds ++; + j->fd = fd; + j->windows = (unsigned) -1; - *fd_index = (unsigned) (j - m->by_fd); - return 0; + qsort(m->by_fd, m->n_fds, sizeof(FileDescriptor), fd_cmp); + + return mmap_cache_peek_fd_index(m, fd, fd_index); } static bool mmap_cache_test_window( @@ -466,6 +529,7 @@ static int mmap_cache_current( static int mmap_cache_find( MMapCache *m, + int fd, unsigned fd_index, unsigned context, uint64_t offset, @@ -476,6 +540,7 @@ static int mmap_cache_find( unsigned w; assert(m); + assert(fd >= 0); assert(fd_index < m->n_fds); assert(context < m->contexts_max); assert(size > 0); @@ -483,10 +548,13 @@ static int mmap_cache_find( w = m->by_fd[fd_index].windows; while (w != (unsigned) -1) { + v = m->windows + w; + assert(v->fd == fd); + if (mmap_cache_test_window(m, w, offset, size)) break; - w = m->windows[w].by_fd_next; + w = v->by_fd_next; } if (w == (unsigned) -1) @@ -494,7 +562,6 @@ static int mmap_cache_find( mmap_cache_context_set(m, context, w); - v = m->windows + w; *ret = (uint8_t*) v->ptr + (offset - v->offset); return 1; } @@ -523,13 +590,17 @@ int mmap_cache_get( if (r != 0) return r; + /* Hmm, drop the reference to the current one, since it wasn't + * good enough */ + mmap_cache_context_unset(m, context); + /* OK, let's find the chain for this FD */ r = mmap_cache_get_fd_index(m, fd, &fd_index); if (r < 0) return r; /* And let's look through the available mmaps */ - r = mmap_cache_find(m, fd_index, context, offset, size, ret); + r = mmap_cache_find(m, fd, fd_index, context, offset, size, ret); if (r != 0) return r; @@ -538,16 +609,15 @@ int mmap_cache_get( } void mmap_cache_close_fd(MMapCache *m, int fd) { - FileDescriptor *j; unsigned fd_index, c, w; + int r; assert(m); assert(fd > 0); - j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp); - if (!j) + r = mmap_cache_peek_fd_index(m, fd, &fd_index); + if (r <= 0) return; - fd_index = (unsigned) (j - m->by_fd); for (c = 0; c < m->contexts_max; c++) { w = m->by_context[c]; @@ -560,9 +630,14 @@ void mmap_cache_close_fd(MMapCache *m, int fd) { w = m->by_fd[fd_index].windows; while (w != (unsigned) -1) { + Window *v; + + v = m->windows + w; + assert(v->fd == fd); - mmap_cache_fd_remove(m, fd_index, w); mmap_cache_window_unmap(m, w); + mmap_cache_fd_remove(m, fd_index, w); + v->fd = -1; w = m->by_fd[fd_index].windows; } @@ -571,6 +646,51 @@ void mmap_cache_close_fd(MMapCache *m, int fd) { m->n_fds --; } +void mmap_cache_close_fd_range(MMapCache *m, int fd, uint64_t p) { + unsigned fd_index, c, w; + int r; + + assert(m); + assert(fd > 0); + + /* This drops all windows that include space right of the + * specified offset. This is useful to ensure that after the + * file size is extended we drop our mappings of the end and + * create it anew, since otherwise it is undefined whether + * mapping will continue to work as intended. */ + + r = mmap_cache_peek_fd_index(m, fd, &fd_index); + if (r <= 0) + return; + + for (c = 0; c < m->contexts_max; c++) { + w = m->by_context[c]; + + if (w != (unsigned) -1 && m->windows[w].fd == fd) + mmap_cache_context_unset(m, c); + } + + w = m->by_fd[fd_index].windows; + while (w != (unsigned) -1) { + Window *v; + + v = m->windows + w; + assert(v->fd == fd); + assert(v->by_fd_next == (unsigned) -1 || + m->windows[v->by_fd_next].fd == fd); + + if (v->offset + v->size > p) { + + mmap_cache_window_unmap(m, w); + mmap_cache_fd_remove(m, fd_index, w); + v->fd = -1; + + w = m->by_fd[fd_index].windows; + } else + w = v->by_fd_next; + } +} + void mmap_cache_close_context(MMapCache *m, unsigned context) { mmap_cache_context_unset(m, context); } diff --git a/src/journal/mmap-cache.h b/src/journal/mmap-cache.h index 0a88fc584f..984b759960 100644 --- a/src/journal/mmap-cache.h +++ b/src/journal/mmap-cache.h @@ -31,4 +31,5 @@ MMapCache* mmap_cache_unref(MMapCache *m); int mmap_cache_get(MMapCache *m, int fd, int prot, unsigned context, uint64_t offset, uint64_t size, void **ret); void mmap_cache_close_fd(MMapCache *m, int fd); +void mmap_cache_close_fd_range(MMapCache *m, int fd, uint64_t range); void mmap_cache_close_context(MMapCache *m, unsigned context); |