diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/journal/journal-file.c | 67 | ||||
| -rw-r--r-- | src/journal/journal-file.h | 2 | ||||
| -rw-r--r-- | src/journal/journald-server.c | 3 | ||||
| -rw-r--r-- | src/journal/journald.c | 4 | ||||
| -rw-r--r-- | src/journal/mmap-cache.c | 126 | ||||
| -rw-r--r-- | src/journal/mmap-cache.h | 5 | ||||
| -rw-r--r-- | src/shared/sigbus.c | 152 | ||||
| -rw-r--r-- | src/shared/sigbus.h | 25 | ||||
| -rw-r--r-- | src/test/test-sigbus.c | 62 | 
9 files changed, 419 insertions, 27 deletions
| diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 48c27ee627..44a96928e0 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -67,6 +67,9 @@  /* How much to increase the journal file size at once each time we allocate something new. */  #define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL)              /* 8MB */ +/* The mmap context to use for the header we pick as one above the last defined typed */ +#define CONTEXT_HEADER _OBJECT_TYPE_MAX +  static int journal_file_set_online(JournalFile *f) {          assert(f); @@ -76,6 +79,9 @@ static int journal_file_set_online(JournalFile *f) {          if (!(f->fd >= 0 && f->header))                  return -EINVAL; +        if (mmap_cache_got_sigbus(f->mmap, f->fd)) +                return -EIO; +          switch(f->header->state) {                  case STATE_ONLINE:                          return 0; @@ -104,8 +110,14 @@ int journal_file_set_offline(JournalFile *f) {          fsync(f->fd); +        if (mmap_cache_got_sigbus(f->mmap, f->fd)) +                return -EIO; +          f->header->state = STATE_OFFLINE; +        if (mmap_cache_got_sigbus(f->mmap, f->fd)) +                return -EIO; +          fsync(f->fd);          return 0; @@ -120,14 +132,10 @@ void journal_file_close(JournalFile *f) {                  journal_file_append_tag(f);  #endif -        /* Sync everything to disk, before we mark the file offline */ -        if (f->mmap && f->fd >= 0) -                mmap_cache_close_fd(f->mmap, f->fd); -          journal_file_set_offline(f); -        if (f->header) -                munmap(f->header, PAGE_ALIGN(sizeof(Header))); +        if (f->mmap && f->fd >= 0) +                mmap_cache_close_fd(f->mmap, f->fd);          safe_close(f->fd);          free(f->path); @@ -194,8 +202,8 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) {  }  static int journal_file_refresh_header(JournalFile *f) { -        int r;          sd_id128_t boot_id; +        int r;          assert(f); @@ -212,12 +220,12 @@ static int journal_file_refresh_header(JournalFile *f) {          f->header->boot_id = boot_id; -        journal_file_set_online(f); +        r = journal_file_set_online(f);          /* Sync the online state to disk */          fsync(f->fd); -        return 0; +        return r;  }  static int journal_file_verify_header(JournalFile *f) { @@ -321,6 +329,9 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size)           * for sure, since we always call posix_fallocate()           * ourselves */ +        if (mmap_cache_got_sigbus(f->mmap, f->fd)) +                return -EIO; +          old_size =                  le64toh(f->header->header_size) +                  le64toh(f->header->arena_size); @@ -376,6 +387,7 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size)  static unsigned type_to_context(ObjectType type) {          /* One context for each type, plus one catch-all for the rest */          assert_cc(_OBJECT_TYPE_MAX <= MMAP_CACHE_MAX_CONTEXTS); +        assert_cc(CONTEXT_HEADER < MMAP_CACHE_MAX_CONTEXTS);          return type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX ? type : 0;  } @@ -1357,6 +1369,14 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st          r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset); +        /* If the memory mapping triggered a SIGBUS then we return an +         * IO error and ignore the error code passed down to us, since +         * it is very likely just an effect of a nullified replacement +         * mapping page */ + +        if (mmap_cache_got_sigbus(f->mmap, f->fd)) +                r = -EIO; +          journal_file_post_change(f);          return r; @@ -1712,7 +1732,6 @@ found:          return 1;  } -  static int generic_array_bisect_plus_one(                  JournalFile *f,                  uint64_t extra, @@ -2457,9 +2476,10 @@ int journal_file_open(                  JournalFile *template,                  JournalFile **ret) { +        bool newly_created = false;          JournalFile *f; +        void *h;          int r; -        bool newly_created = false;          assert(fname);          assert(ret); @@ -2564,13 +2584,14 @@ int journal_file_open(                  goto fail;          } -        f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0); -        if (f->header == MAP_FAILED) { -                f->header = NULL; +        r = mmap_cache_get(f->mmap, f->fd, f->prot, CONTEXT_HEADER, true, 0, PAGE_ALIGN(sizeof(Header)), &f->last_stat, &h); +        if (r < 0) {                  r = -errno;                  goto fail;          } +        f->header = h; +          if (!newly_created) {                  r = journal_file_verify_header(f);                  if (r < 0) @@ -2627,10 +2648,18 @@ int journal_file_open(          if (r < 0)                  goto fail; +        if (mmap_cache_got_sigbus(f->mmap, f->fd)) { +                r = -EIO; +                goto fail; +        } +          *ret = f;          return 0;  fail: +        if (f->fd >= 0 && mmap_cache_got_sigbus(f->mmap, f->fd)) +                r = -EIO; +          journal_file_close(f);          return r; @@ -2697,7 +2726,8 @@ int journal_file_open_reliably(              r != -EHOSTDOWN && /* other machine */              r != -EPROTONOSUPPORT && /* incompatible feature */              r != -EBUSY && /* unclean shutdown */ -            r != -ESHUTDOWN /* already archived */) +            r != -ESHUTDOWN && /* already archived */ +            r != -EIO /* IO error, including SIGBUS on mmap */)                  return r;          if ((flags & O_ACCMODE) == O_RDONLY) @@ -2804,7 +2834,12 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6                          return r;          } -        return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset); +        r = journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset); + +        if (mmap_cache_got_sigbus(to->mmap, to->fd)) +                return -EIO; + +        return r;  }  void journal_default_metrics(JournalMetrics *m, int fd) { diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h index 01bb4e038a..19fd7257f4 100644 --- a/src/journal/journal-file.h +++ b/src/journal/journal-file.h @@ -27,7 +27,7 @@  #include <gcrypt.h>  #endif -#include "systemd/sd-id128.h" +#include "sd-id128.h"  #include "sparse-endian.h"  #include "journal-def.h" diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c index a2a2e197c0..6d037cfec4 100644 --- a/src/journal/journald-server.c +++ b/src/journal/journald-server.c @@ -452,6 +452,7 @@ bool shall_try_append_again(JournalFile *f, int r) {             -EFBIG            Hit fs limit             -EDQUOT           Quota limit hit             -ENOSPC           Disk full +           -EIO              I/O error of some kind (mmap)             -EHOSTDOWN        Other machine             -EBUSY            Unclean shutdown             -EPROTONOSUPPORT  Unsupported feature @@ -469,6 +470,8 @@ bool shall_try_append_again(JournalFile *f, int r) {                  log_info("%s: Unsupported feature, rotating.", f->path);          else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)                  log_warning("%s: Journal file corrupted, rotating.", f->path); +        else if (r == -EIO) +                log_warning("%s: IO error, rotating.", f->path);          else                  return false; diff --git a/src/journal/journald.c b/src/journal/journald.c index 604c8617bb..80f4634f67 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -33,6 +33,8 @@  #include "journald-kmsg.h"  #include "journald-syslog.h" +#include "sigbus.h" +  int main(int argc, char *argv[]) {          Server server;          int r; @@ -49,6 +51,8 @@ int main(int argc, char *argv[]) {          umask(0022); +        sigbus_install(); +          r = server_init(&server);          if (r < 0)                  goto finish; diff --git a/src/journal/mmap-cache.c b/src/journal/mmap-cache.c index 4c940aaa24..ab21cdc288 100644 --- a/src/journal/mmap-cache.c +++ b/src/journal/mmap-cache.c @@ -29,6 +29,7 @@  #include "log.h"  #include "util.h"  #include "macro.h" +#include "sigbus.h"  #include "mmap-cache.h"  typedef struct Window Window; @@ -38,6 +39,7 @@ typedef struct FileDescriptor FileDescriptor;  struct Window {          MMapCache *cache; +        bool invalidated;          bool keep_always;          bool in_unused; @@ -65,6 +67,7 @@ struct Context {  struct FileDescriptor {          MMapCache *cache;          int fd; +        bool sigbus;          LIST_HEAD(Window, windows);  }; @@ -134,6 +137,21 @@ static void window_unlink(Window *w) {          }  } +static void window_invalidate(Window *w) { +        assert(w); + +        if (w->invalidated) +                return; + +        /* Replace the window with anonymous pages. This is useful +         * when we hit a SIGBUS and want to make sure the file cannot +         * trigger any further SIGBUS, possibly overrunning the sigbus +         * queue. */ + +        assert_se(mmap(w->ptr, w->size, w->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr); +        w->invalidated = true; +} +  static void window_free(Window *w) {          assert(w); @@ -383,6 +401,9 @@ static int try_context(                  return 0;          } +        if (c->window->fd->sigbus) +                return -EIO; +          c->window->keep_always |= keep_always;          *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset); @@ -414,6 +435,9 @@ static int find_mmap(          assert(f->fd == fd); +        if (f->sigbus) +                return -EIO; +          LIST_FOREACH(by_fd, w, f->windows)                  if (window_matches(w, fd, prot, offset, size))                          break; @@ -572,27 +596,111 @@ int mmap_cache_get(          return add_mmap(m, fd, prot, context, keep_always, offset, size, st, ret);  } -void mmap_cache_close_fd(MMapCache *m, int fd) { +unsigned mmap_cache_get_hit(MMapCache *m) { +        assert(m); + +        return m->n_hit; +} + +unsigned mmap_cache_get_missed(MMapCache *m) { +        assert(m); + +        return m->n_missed; +} + +static void mmap_cache_process_sigbus(MMapCache *m) { +        bool found = false;          FileDescriptor *f; +        Iterator i; +        int r;          assert(m); -        assert(fd >= 0); -        f = hashmap_get(m->fds, INT_TO_PTR(fd + 1)); -        if (!f) +        /* Iterate through all triggered pages and mark their files as +         * invalidated */ +        for (;;) { +                bool ours; +                void *addr; + +                r = sigbus_pop(&addr); +                if (_likely_(r == 0)) +                        break; +                if (r < 0) { +                        log_error_errno(r, "SIGBUS handling failed: %m"); +                        abort(); +                } + +                ours = false; +                HASHMAP_FOREACH(f, m->fds, i) { +                        Window *w; + +                        LIST_FOREACH(by_fd, w, f->windows) { +                                if ((uint8_t*) addr >= (uint8_t*) w->ptr && +                                    (uint8_t*) addr < (uint8_t*) w->ptr + w->size) { +                                        found = ours = f->sigbus = true; +                                        break; +                                } +                        } + +                        if (ours) +                                break; +                } + +                /* Didn't find a matching window, give up */ +                if (!ours) { +                        log_error("Unknown SIGBUS page, aborting."); +                        abort(); +                } +        } + +        /* The list of triggered pages is now empty. Now, let's remap +         * all windows of the triggered file to anonymous maps, so +         * that no page of the file in question is triggered again, so +         * that we can be sure not to hit the queue size limit. */ +        if (_likely_(!found))                  return; -        fd_free(f); +        HASHMAP_FOREACH(f, m->fds, i) { +                Window *w; + +                if (!f->sigbus) +                        continue; + +                LIST_FOREACH(by_fd, w, f->windows) +                        window_invalidate(w); +        }  } -unsigned mmap_cache_get_hit(MMapCache *m) { +bool mmap_cache_got_sigbus(MMapCache *m, int fd) { +        FileDescriptor *f; +          assert(m); +        assert(fd >= 0); -        return m->n_hit; +        mmap_cache_process_sigbus(m); + +        f = hashmap_get(m->fds, INT_TO_PTR(fd + 1)); +        if (!f) +                return false; + +        return f->sigbus;  } -unsigned mmap_cache_get_missed(MMapCache *m) { +void mmap_cache_close_fd(MMapCache *m, int fd) { +        FileDescriptor *f; +          assert(m); +        assert(fd >= 0); -        return m->n_missed; +        /* Make sure that any queued SIGBUS are first dispatched, so +         * that we don't end up with a SIGBUS entry we cannot relate +         * to any existing memory map */ + +        mmap_cache_process_sigbus(m); + +        f = hashmap_get(m->fds, INT_TO_PTR(fd + 1)); +        if (!f) +                return; + +        fd_free(f);  } diff --git a/src/journal/mmap-cache.h b/src/journal/mmap-cache.h index fe2c83d751..a85c2b6063 100644 --- a/src/journal/mmap-cache.h +++ b/src/journal/mmap-cache.h @@ -25,7 +25,8 @@  #include <stdbool.h>  #include <sys/stat.h> -#define MMAP_CACHE_MAX_CONTEXTS 8 +/* One context per object type, plus one of the header, plus one "additional" one */ +#define MMAP_CACHE_MAX_CONTEXTS 9  typedef struct MMapCache MMapCache; @@ -47,3 +48,5 @@ void mmap_cache_close_fd(MMapCache *m, int fd);  unsigned mmap_cache_get_hit(MMapCache *m);  unsigned mmap_cache_get_missed(MMapCache *m); + +bool mmap_cache_got_sigbus(MMapCache *m, int fd); diff --git a/src/shared/sigbus.c b/src/shared/sigbus.c new file mode 100644 index 0000000000..0108603fe8 --- /dev/null +++ b/src/shared/sigbus.c @@ -0,0 +1,152 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** +  This file is part of systemd. + +  Copyright 2014 Lennart Poettering + +  systemd is free software; you can redistribute it and/or modify it +  under the terms of the GNU Lesser General Public License as published by +  the Free Software Foundation; either version 2.1 of the License, or +  (at your option) any later version. + +  systemd is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  Lesser General Public License for more details. + +  You should have received a copy of the GNU Lesser General Public License +  along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <signal.h> +#include <sys/mman.h> + +#include "macro.h" +#include "util.h" +#include "sigbus.h" + +#define SIGBUS_QUEUE_MAX 64 + +static struct sigaction old_sigaction; +static unsigned n_installed = 0; + +/* We maintain a fixed size list of page addresses that triggered a +   SIGBUS. We access with list with atomic operations, so that we +   don't have to deal with locks between signal handler and main +   programs in possibly multiple threads. */ + +static void* volatile sigbus_queue[SIGBUS_QUEUE_MAX]; +static volatile sig_atomic_t n_sigbus_queue = 0; + +static void sigbus_push(void *addr) { +        unsigned u; + +        assert(addr); + +        /* Find a free place, increase the number of entries and leave, if we can */ +        for (u = 0; u < SIGBUS_QUEUE_MAX; u++) +                if (__sync_bool_compare_and_swap(&sigbus_queue[u], NULL, addr)) { +                        __sync_fetch_and_add(&n_sigbus_queue, 1); +                        return; +                } + +        /* If we can't, make sure the queue size is out of bounds, to +         * mark it as overflow */ +        for (;;) { +                unsigned c; + +                __sync_synchronize(); +                c = n_sigbus_queue; + +                if (c > SIGBUS_QUEUE_MAX) /* already overflow */ +                        return; + +                if (__sync_bool_compare_and_swap(&n_sigbus_queue, c, c + SIGBUS_QUEUE_MAX)) +                        return; +        } +} + +int sigbus_pop(void **ret) { +        assert(ret); + +        for (;;) { +                unsigned u, c; + +                __sync_synchronize(); +                c = n_sigbus_queue; + +                if (_likely_(c == 0)) +                        return 0; + +                if (_unlikely_(c >= SIGBUS_QUEUE_MAX)) +                        return -EOVERFLOW; + +                for (u = 0; u < SIGBUS_QUEUE_MAX; u++) { +                        void *addr; + +                        addr = sigbus_queue[u]; +                        if (!addr) +                                continue; + +                        if (__sync_bool_compare_and_swap(&sigbus_queue[u], addr, NULL)) { +                                __sync_fetch_and_sub(&n_sigbus_queue, 1); +                                *ret = addr; +                                return 1; +                        } +                } +        } +} + +static void sigbus_handler(int sn, siginfo_t *si, void *data) { +        unsigned long ul; +        void *aligned; + +        assert(sn == SIGBUS); +        assert(si); + +        if (si->si_code != BUS_ADRERR || !si->si_addr) { +                assert_se(sigaction(SIGBUS, &old_sigaction, NULL) == 0); +                raise(SIGBUS); +                return; +        } + +        ul = (unsigned long) si->si_addr; +        ul = ul / page_size(); +        ul = ul * page_size(); +        aligned = (void*) ul; + +        /* Let's remember which address failed */ +        sigbus_push(aligned); + +        /* Replace mapping with an anonymous page, so that the +         * execution can continue, however with a zeroed out page */ +        assert_se(mmap(aligned, page_size(), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == aligned); +} + +void sigbus_install(void) { +        struct sigaction sa = { +                .sa_sigaction = sigbus_handler, +                .sa_flags = SA_SIGINFO, +        }; + +        n_installed++; + +        if (n_installed == 1) +                assert_se(sigaction(SIGBUS, &sa, &old_sigaction) == 0); + +        return; +} + +void sigbus_reset(void) { + +        if (n_installed <= 0) +                return; + +        n_installed--; + +        if (n_installed == 0) +                assert_se(sigaction(SIGBUS, &old_sigaction, NULL) == 0); + +        return; +} diff --git a/src/shared/sigbus.h b/src/shared/sigbus.h new file mode 100644 index 0000000000..25593af2d4 --- /dev/null +++ b/src/shared/sigbus.h @@ -0,0 +1,25 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** +  This file is part of systemd. + +  Copyright 2014 Lennart Poettering + +  systemd is free software; you can redistribute it and/or modify it +  under the terms of the GNU Lesser General Public License as published by +  the Free Software Foundation; either version 2.1 of the License, or +  (at your option) any later version. + +  systemd is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  Lesser General Public License for more details. + +  You should have received a copy of the GNU Lesser General Public License +  along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +void sigbus_install(void); +void sigbus_reset(void); + +int sigbus_pop(void **ret); diff --git a/src/test/test-sigbus.c b/src/test/test-sigbus.c new file mode 100644 index 0000000000..39d0fec894 --- /dev/null +++ b/src/test/test-sigbus.c @@ -0,0 +1,62 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** +  This file is part of systemd. + +  Copyright 2014 Lennart Poettering + +  systemd is free software; you can redistribute it and/or modify it +  under the terms of the GNU Lesser General Public License as published by +  the Free Software Foundation; either version 2.1 of the License, or +  (at your option) any later version. + +  systemd is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  Lesser General Public License for more details. + +  You should have received a copy of the GNU Lesser General Public License +  along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <sys/mman.h> + +#include "util.h" +#include "sigbus.h" + +int main(int argc, char *argv[]) { +        _cleanup_close_ int fd = -1; +        char template[] = "/tmp/sigbus-test-XXXXXX"; +        void *addr = NULL; +        uint8_t *p; + +        sigbus_install(); + +        assert(sigbus_pop(&addr) == 0); + +        assert_se((fd = mkostemp(template, O_RDWR|O_CREAT|O_EXCL)) >= 0); +        assert_se(unlink(template) >= 0); +        assert_se(fallocate(fd, 0, 0, page_size() * 8) >= 0); + +        p = mmap(NULL, page_size() * 16, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); +        assert_se(p != MAP_FAILED); + +        assert_se(sigbus_pop(&addr) == 0); + +        p[0] = 0xFF; +        assert_se(sigbus_pop(&addr) == 0); + +        p[page_size()] = 0xFF; +        assert_se(sigbus_pop(&addr) == 0); + +        p[page_size()*8] = 0xFF; +        p[page_size()*8+1] = 0xFF; +        p[page_size()*10] = 0xFF; +        assert_se(sigbus_pop(&addr) > 0); +        assert_se(addr == p + page_size() * 8); +        assert_se(sigbus_pop(&addr) > 0); +        assert_se(addr == p + page_size() * 10); +        assert_se(sigbus_pop(&addr) == 0); + +        sigbus_reset(); +} | 
