diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Makefile.am | 11 | ||||
-rw-r--r-- | src/journal/journal-file.c | 67 | ||||
-rw-r--r-- | src/journal/journal-file.h | 2 | ||||
-rw-r--r-- | src/journal/journald-server.c | 3 | ||||
-rw-r--r-- | src/journal/journald.c | 4 | ||||
-rw-r--r-- | src/journal/mmap-cache.c | 126 | ||||
-rw-r--r-- | src/journal/mmap-cache.h | 5 | ||||
-rw-r--r-- | src/shared/sigbus.c | 152 | ||||
-rw-r--r-- | src/shared/sigbus.h | 25 | ||||
-rw-r--r-- | src/test/test-sigbus.c | 62 |
11 files changed, 430 insertions, 28 deletions
diff --git a/.gitignore b/.gitignore index 078fd9ad7f..70ee4f63dd 100644 --- a/.gitignore +++ b/.gitignore @@ -233,6 +233,7 @@ /test-rtnl-manual /test-sched-prio /test-set +/test-sigbus /test-sleep /test-socket-util /test-ssd diff --git a/Makefile.am b/Makefile.am index 73e911f6bf..10fc8a9c84 100644 --- a/Makefile.am +++ b/Makefile.am @@ -901,6 +901,8 @@ libsystemd_shared_la_SOURCES = \ src/shared/verbs.h \ src/shared/machine-image.c \ src/shared/machine-image.h \ + src/shared/sigbus.c \ + src/shared/sigbus.h \ src/shared/build.h if HAVE_UTMP @@ -1386,7 +1388,8 @@ tests += \ test-locale-util \ test-execute \ test-copy \ - test-cap-list + test-cap-list \ + test-sigbus EXTRA_DIST += \ test/a.service \ @@ -1580,6 +1583,12 @@ test_copy_SOURCES = \ test_copy_LDADD = \ libsystemd-shared.la +test_sigbus_SOURCES = \ + src/test/test-sigbus.c + +test_sigbus_LDADD = \ + libsystemd-shared.la + test_condition_SOURCES = \ src/test/test-condition.c diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 48c27ee627..44a96928e0 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -67,6 +67,9 @@ /* How much to increase the journal file size at once each time we allocate something new. */ #define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL) /* 8MB */ +/* The mmap context to use for the header we pick as one above the last defined typed */ +#define CONTEXT_HEADER _OBJECT_TYPE_MAX + static int journal_file_set_online(JournalFile *f) { assert(f); @@ -76,6 +79,9 @@ static int journal_file_set_online(JournalFile *f) { if (!(f->fd >= 0 && f->header)) return -EINVAL; + if (mmap_cache_got_sigbus(f->mmap, f->fd)) + return -EIO; + switch(f->header->state) { case STATE_ONLINE: return 0; @@ -104,8 +110,14 @@ int journal_file_set_offline(JournalFile *f) { fsync(f->fd); + if (mmap_cache_got_sigbus(f->mmap, f->fd)) + return -EIO; + f->header->state = STATE_OFFLINE; + if (mmap_cache_got_sigbus(f->mmap, f->fd)) + return -EIO; + fsync(f->fd); return 0; @@ -120,14 +132,10 @@ void journal_file_close(JournalFile *f) { journal_file_append_tag(f); #endif - /* Sync everything to disk, before we mark the file offline */ - if (f->mmap && f->fd >= 0) - mmap_cache_close_fd(f->mmap, f->fd); - journal_file_set_offline(f); - if (f->header) - munmap(f->header, PAGE_ALIGN(sizeof(Header))); + if (f->mmap && f->fd >= 0) + mmap_cache_close_fd(f->mmap, f->fd); safe_close(f->fd); free(f->path); @@ -194,8 +202,8 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) { } static int journal_file_refresh_header(JournalFile *f) { - int r; sd_id128_t boot_id; + int r; assert(f); @@ -212,12 +220,12 @@ static int journal_file_refresh_header(JournalFile *f) { f->header->boot_id = boot_id; - journal_file_set_online(f); + r = journal_file_set_online(f); /* Sync the online state to disk */ fsync(f->fd); - return 0; + return r; } static int journal_file_verify_header(JournalFile *f) { @@ -321,6 +329,9 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) * for sure, since we always call posix_fallocate() * ourselves */ + if (mmap_cache_got_sigbus(f->mmap, f->fd)) + return -EIO; + old_size = le64toh(f->header->header_size) + le64toh(f->header->arena_size); @@ -376,6 +387,7 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) static unsigned type_to_context(ObjectType type) { /* One context for each type, plus one catch-all for the rest */ assert_cc(_OBJECT_TYPE_MAX <= MMAP_CACHE_MAX_CONTEXTS); + assert_cc(CONTEXT_HEADER < MMAP_CACHE_MAX_CONTEXTS); return type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX ? type : 0; } @@ -1357,6 +1369,14 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset); + /* If the memory mapping triggered a SIGBUS then we return an + * IO error and ignore the error code passed down to us, since + * it is very likely just an effect of a nullified replacement + * mapping page */ + + if (mmap_cache_got_sigbus(f->mmap, f->fd)) + r = -EIO; + journal_file_post_change(f); return r; @@ -1712,7 +1732,6 @@ found: return 1; } - static int generic_array_bisect_plus_one( JournalFile *f, uint64_t extra, @@ -2457,9 +2476,10 @@ int journal_file_open( JournalFile *template, JournalFile **ret) { + bool newly_created = false; JournalFile *f; + void *h; int r; - bool newly_created = false; assert(fname); assert(ret); @@ -2564,13 +2584,14 @@ int journal_file_open( goto fail; } - f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0); - if (f->header == MAP_FAILED) { - f->header = NULL; + r = mmap_cache_get(f->mmap, f->fd, f->prot, CONTEXT_HEADER, true, 0, PAGE_ALIGN(sizeof(Header)), &f->last_stat, &h); + if (r < 0) { r = -errno; goto fail; } + f->header = h; + if (!newly_created) { r = journal_file_verify_header(f); if (r < 0) @@ -2627,10 +2648,18 @@ int journal_file_open( if (r < 0) goto fail; + if (mmap_cache_got_sigbus(f->mmap, f->fd)) { + r = -EIO; + goto fail; + } + *ret = f; return 0; fail: + if (f->fd >= 0 && mmap_cache_got_sigbus(f->mmap, f->fd)) + r = -EIO; + journal_file_close(f); return r; @@ -2697,7 +2726,8 @@ int journal_file_open_reliably( r != -EHOSTDOWN && /* other machine */ r != -EPROTONOSUPPORT && /* incompatible feature */ r != -EBUSY && /* unclean shutdown */ - r != -ESHUTDOWN /* already archived */) + r != -ESHUTDOWN && /* already archived */ + r != -EIO /* IO error, including SIGBUS on mmap */) return r; if ((flags & O_ACCMODE) == O_RDONLY) @@ -2804,7 +2834,12 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6 return r; } - return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset); + r = journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset); + + if (mmap_cache_got_sigbus(to->mmap, to->fd)) + return -EIO; + + return r; } void journal_default_metrics(JournalMetrics *m, int fd) { diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h index 01bb4e038a..19fd7257f4 100644 --- a/src/journal/journal-file.h +++ b/src/journal/journal-file.h @@ -27,7 +27,7 @@ #include <gcrypt.h> #endif -#include "systemd/sd-id128.h" +#include "sd-id128.h" #include "sparse-endian.h" #include "journal-def.h" diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c index a2a2e197c0..6d037cfec4 100644 --- a/src/journal/journald-server.c +++ b/src/journal/journald-server.c @@ -452,6 +452,7 @@ bool shall_try_append_again(JournalFile *f, int r) { -EFBIG Hit fs limit -EDQUOT Quota limit hit -ENOSPC Disk full + -EIO I/O error of some kind (mmap) -EHOSTDOWN Other machine -EBUSY Unclean shutdown -EPROTONOSUPPORT Unsupported feature @@ -469,6 +470,8 @@ bool shall_try_append_again(JournalFile *f, int r) { log_info("%s: Unsupported feature, rotating.", f->path); else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN) log_warning("%s: Journal file corrupted, rotating.", f->path); + else if (r == -EIO) + log_warning("%s: IO error, rotating.", f->path); else return false; diff --git a/src/journal/journald.c b/src/journal/journald.c index 604c8617bb..80f4634f67 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -33,6 +33,8 @@ #include "journald-kmsg.h" #include "journald-syslog.h" +#include "sigbus.h" + int main(int argc, char *argv[]) { Server server; int r; @@ -49,6 +51,8 @@ int main(int argc, char *argv[]) { umask(0022); + sigbus_install(); + r = server_init(&server); if (r < 0) goto finish; diff --git a/src/journal/mmap-cache.c b/src/journal/mmap-cache.c index 4c940aaa24..ab21cdc288 100644 --- a/src/journal/mmap-cache.c +++ b/src/journal/mmap-cache.c @@ -29,6 +29,7 @@ #include "log.h" #include "util.h" #include "macro.h" +#include "sigbus.h" #include "mmap-cache.h" typedef struct Window Window; @@ -38,6 +39,7 @@ typedef struct FileDescriptor FileDescriptor; struct Window { MMapCache *cache; + bool invalidated; bool keep_always; bool in_unused; @@ -65,6 +67,7 @@ struct Context { struct FileDescriptor { MMapCache *cache; int fd; + bool sigbus; LIST_HEAD(Window, windows); }; @@ -134,6 +137,21 @@ static void window_unlink(Window *w) { } } +static void window_invalidate(Window *w) { + assert(w); + + if (w->invalidated) + return; + + /* Replace the window with anonymous pages. This is useful + * when we hit a SIGBUS and want to make sure the file cannot + * trigger any further SIGBUS, possibly overrunning the sigbus + * queue. */ + + assert_se(mmap(w->ptr, w->size, w->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr); + w->invalidated = true; +} + static void window_free(Window *w) { assert(w); @@ -383,6 +401,9 @@ static int try_context( return 0; } + if (c->window->fd->sigbus) + return -EIO; + c->window->keep_always |= keep_always; *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset); @@ -414,6 +435,9 @@ static int find_mmap( assert(f->fd == fd); + if (f->sigbus) + return -EIO; + LIST_FOREACH(by_fd, w, f->windows) if (window_matches(w, fd, prot, offset, size)) break; @@ -572,27 +596,111 @@ int mmap_cache_get( return add_mmap(m, fd, prot, context, keep_always, offset, size, st, ret); } -void mmap_cache_close_fd(MMapCache *m, int fd) { +unsigned mmap_cache_get_hit(MMapCache *m) { + assert(m); + + return m->n_hit; +} + +unsigned mmap_cache_get_missed(MMapCache *m) { + assert(m); + + return m->n_missed; +} + +static void mmap_cache_process_sigbus(MMapCache *m) { + bool found = false; FileDescriptor *f; + Iterator i; + int r; assert(m); - assert(fd >= 0); - f = hashmap_get(m->fds, INT_TO_PTR(fd + 1)); - if (!f) + /* Iterate through all triggered pages and mark their files as + * invalidated */ + for (;;) { + bool ours; + void *addr; + + r = sigbus_pop(&addr); + if (_likely_(r == 0)) + break; + if (r < 0) { + log_error_errno(r, "SIGBUS handling failed: %m"); + abort(); + } + + ours = false; + HASHMAP_FOREACH(f, m->fds, i) { + Window *w; + + LIST_FOREACH(by_fd, w, f->windows) { + if ((uint8_t*) addr >= (uint8_t*) w->ptr && + (uint8_t*) addr < (uint8_t*) w->ptr + w->size) { + found = ours = f->sigbus = true; + break; + } + } + + if (ours) + break; + } + + /* Didn't find a matching window, give up */ + if (!ours) { + log_error("Unknown SIGBUS page, aborting."); + abort(); + } + } + + /* The list of triggered pages is now empty. Now, let's remap + * all windows of the triggered file to anonymous maps, so + * that no page of the file in question is triggered again, so + * that we can be sure not to hit the queue size limit. */ + if (_likely_(!found)) return; - fd_free(f); + HASHMAP_FOREACH(f, m->fds, i) { + Window *w; + + if (!f->sigbus) + continue; + + LIST_FOREACH(by_fd, w, f->windows) + window_invalidate(w); + } } -unsigned mmap_cache_get_hit(MMapCache *m) { +bool mmap_cache_got_sigbus(MMapCache *m, int fd) { + FileDescriptor *f; + assert(m); + assert(fd >= 0); - return m->n_hit; + mmap_cache_process_sigbus(m); + + f = hashmap_get(m->fds, INT_TO_PTR(fd + 1)); + if (!f) + return false; + + return f->sigbus; } -unsigned mmap_cache_get_missed(MMapCache *m) { +void mmap_cache_close_fd(MMapCache *m, int fd) { + FileDescriptor *f; + assert(m); + assert(fd >= 0); - return m->n_missed; + /* Make sure that any queued SIGBUS are first dispatched, so + * that we don't end up with a SIGBUS entry we cannot relate + * to any existing memory map */ + + mmap_cache_process_sigbus(m); + + f = hashmap_get(m->fds, INT_TO_PTR(fd + 1)); + if (!f) + return; + + fd_free(f); } diff --git a/src/journal/mmap-cache.h b/src/journal/mmap-cache.h index fe2c83d751..a85c2b6063 100644 --- a/src/journal/mmap-cache.h +++ b/src/journal/mmap-cache.h @@ -25,7 +25,8 @@ #include <stdbool.h> #include <sys/stat.h> -#define MMAP_CACHE_MAX_CONTEXTS 8 +/* One context per object type, plus one of the header, plus one "additional" one */ +#define MMAP_CACHE_MAX_CONTEXTS 9 typedef struct MMapCache MMapCache; @@ -47,3 +48,5 @@ void mmap_cache_close_fd(MMapCache *m, int fd); unsigned mmap_cache_get_hit(MMapCache *m); unsigned mmap_cache_get_missed(MMapCache *m); + +bool mmap_cache_got_sigbus(MMapCache *m, int fd); diff --git a/src/shared/sigbus.c b/src/shared/sigbus.c new file mode 100644 index 0000000000..0108603fe8 --- /dev/null +++ b/src/shared/sigbus.c @@ -0,0 +1,152 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2014 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <signal.h> +#include <sys/mman.h> + +#include "macro.h" +#include "util.h" +#include "sigbus.h" + +#define SIGBUS_QUEUE_MAX 64 + +static struct sigaction old_sigaction; +static unsigned n_installed = 0; + +/* We maintain a fixed size list of page addresses that triggered a + SIGBUS. We access with list with atomic operations, so that we + don't have to deal with locks between signal handler and main + programs in possibly multiple threads. */ + +static void* volatile sigbus_queue[SIGBUS_QUEUE_MAX]; +static volatile sig_atomic_t n_sigbus_queue = 0; + +static void sigbus_push(void *addr) { + unsigned u; + + assert(addr); + + /* Find a free place, increase the number of entries and leave, if we can */ + for (u = 0; u < SIGBUS_QUEUE_MAX; u++) + if (__sync_bool_compare_and_swap(&sigbus_queue[u], NULL, addr)) { + __sync_fetch_and_add(&n_sigbus_queue, 1); + return; + } + + /* If we can't, make sure the queue size is out of bounds, to + * mark it as overflow */ + for (;;) { + unsigned c; + + __sync_synchronize(); + c = n_sigbus_queue; + + if (c > SIGBUS_QUEUE_MAX) /* already overflow */ + return; + + if (__sync_bool_compare_and_swap(&n_sigbus_queue, c, c + SIGBUS_QUEUE_MAX)) + return; + } +} + +int sigbus_pop(void **ret) { + assert(ret); + + for (;;) { + unsigned u, c; + + __sync_synchronize(); + c = n_sigbus_queue; + + if (_likely_(c == 0)) + return 0; + + if (_unlikely_(c >= SIGBUS_QUEUE_MAX)) + return -EOVERFLOW; + + for (u = 0; u < SIGBUS_QUEUE_MAX; u++) { + void *addr; + + addr = sigbus_queue[u]; + if (!addr) + continue; + + if (__sync_bool_compare_and_swap(&sigbus_queue[u], addr, NULL)) { + __sync_fetch_and_sub(&n_sigbus_queue, 1); + *ret = addr; + return 1; + } + } + } +} + +static void sigbus_handler(int sn, siginfo_t *si, void *data) { + unsigned long ul; + void *aligned; + + assert(sn == SIGBUS); + assert(si); + + if (si->si_code != BUS_ADRERR || !si->si_addr) { + assert_se(sigaction(SIGBUS, &old_sigaction, NULL) == 0); + raise(SIGBUS); + return; + } + + ul = (unsigned long) si->si_addr; + ul = ul / page_size(); + ul = ul * page_size(); + aligned = (void*) ul; + + /* Let's remember which address failed */ + sigbus_push(aligned); + + /* Replace mapping with an anonymous page, so that the + * execution can continue, however with a zeroed out page */ + assert_se(mmap(aligned, page_size(), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == aligned); +} + +void sigbus_install(void) { + struct sigaction sa = { + .sa_sigaction = sigbus_handler, + .sa_flags = SA_SIGINFO, + }; + + n_installed++; + + if (n_installed == 1) + assert_se(sigaction(SIGBUS, &sa, &old_sigaction) == 0); + + return; +} + +void sigbus_reset(void) { + + if (n_installed <= 0) + return; + + n_installed--; + + if (n_installed == 0) + assert_se(sigaction(SIGBUS, &old_sigaction, NULL) == 0); + + return; +} diff --git a/src/shared/sigbus.h b/src/shared/sigbus.h new file mode 100644 index 0000000000..25593af2d4 --- /dev/null +++ b/src/shared/sigbus.h @@ -0,0 +1,25 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2014 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +void sigbus_install(void); +void sigbus_reset(void); + +int sigbus_pop(void **ret); diff --git a/src/test/test-sigbus.c b/src/test/test-sigbus.c new file mode 100644 index 0000000000..39d0fec894 --- /dev/null +++ b/src/test/test-sigbus.c @@ -0,0 +1,62 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2014 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <sys/mman.h> + +#include "util.h" +#include "sigbus.h" + +int main(int argc, char *argv[]) { + _cleanup_close_ int fd = -1; + char template[] = "/tmp/sigbus-test-XXXXXX"; + void *addr = NULL; + uint8_t *p; + + sigbus_install(); + + assert(sigbus_pop(&addr) == 0); + + assert_se((fd = mkostemp(template, O_RDWR|O_CREAT|O_EXCL)) >= 0); + assert_se(unlink(template) >= 0); + assert_se(fallocate(fd, 0, 0, page_size() * 8) >= 0); + + p = mmap(NULL, page_size() * 16, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + assert_se(p != MAP_FAILED); + + assert_se(sigbus_pop(&addr) == 0); + + p[0] = 0xFF; + assert_se(sigbus_pop(&addr) == 0); + + p[page_size()] = 0xFF; + assert_se(sigbus_pop(&addr) == 0); + + p[page_size()*8] = 0xFF; + p[page_size()*8+1] = 0xFF; + p[page_size()*10] = 0xFF; + assert_se(sigbus_pop(&addr) > 0); + assert_se(addr == p + page_size() * 8); + assert_se(sigbus_pop(&addr) > 0); + assert_se(addr == p + page_size() * 10); + assert_se(sigbus_pop(&addr) == 0); + + sigbus_reset(); +} |