summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/journal/journal-file.c67
-rw-r--r--src/journal/journal-file.h2
-rw-r--r--src/journal/journald-server.c3
-rw-r--r--src/journal/journald.c4
-rw-r--r--src/journal/mmap-cache.c126
-rw-r--r--src/journal/mmap-cache.h5
-rw-r--r--src/shared/sigbus.c152
-rw-r--r--src/shared/sigbus.h25
-rw-r--r--src/test/test-sigbus.c62
9 files changed, 419 insertions, 27 deletions
diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c
index 48c27ee627..44a96928e0 100644
--- a/src/journal/journal-file.c
+++ b/src/journal/journal-file.c
@@ -67,6 +67,9 @@
/* How much to increase the journal file size at once each time we allocate something new. */
#define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL) /* 8MB */
+/* The mmap context to use for the header we pick as one above the last defined typed */
+#define CONTEXT_HEADER _OBJECT_TYPE_MAX
+
static int journal_file_set_online(JournalFile *f) {
assert(f);
@@ -76,6 +79,9 @@ static int journal_file_set_online(JournalFile *f) {
if (!(f->fd >= 0 && f->header))
return -EINVAL;
+ if (mmap_cache_got_sigbus(f->mmap, f->fd))
+ return -EIO;
+
switch(f->header->state) {
case STATE_ONLINE:
return 0;
@@ -104,8 +110,14 @@ int journal_file_set_offline(JournalFile *f) {
fsync(f->fd);
+ if (mmap_cache_got_sigbus(f->mmap, f->fd))
+ return -EIO;
+
f->header->state = STATE_OFFLINE;
+ if (mmap_cache_got_sigbus(f->mmap, f->fd))
+ return -EIO;
+
fsync(f->fd);
return 0;
@@ -120,14 +132,10 @@ void journal_file_close(JournalFile *f) {
journal_file_append_tag(f);
#endif
- /* Sync everything to disk, before we mark the file offline */
- if (f->mmap && f->fd >= 0)
- mmap_cache_close_fd(f->mmap, f->fd);
-
journal_file_set_offline(f);
- if (f->header)
- munmap(f->header, PAGE_ALIGN(sizeof(Header)));
+ if (f->mmap && f->fd >= 0)
+ mmap_cache_close_fd(f->mmap, f->fd);
safe_close(f->fd);
free(f->path);
@@ -194,8 +202,8 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) {
}
static int journal_file_refresh_header(JournalFile *f) {
- int r;
sd_id128_t boot_id;
+ int r;
assert(f);
@@ -212,12 +220,12 @@ static int journal_file_refresh_header(JournalFile *f) {
f->header->boot_id = boot_id;
- journal_file_set_online(f);
+ r = journal_file_set_online(f);
/* Sync the online state to disk */
fsync(f->fd);
- return 0;
+ return r;
}
static int journal_file_verify_header(JournalFile *f) {
@@ -321,6 +329,9 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size)
* for sure, since we always call posix_fallocate()
* ourselves */
+ if (mmap_cache_got_sigbus(f->mmap, f->fd))
+ return -EIO;
+
old_size =
le64toh(f->header->header_size) +
le64toh(f->header->arena_size);
@@ -376,6 +387,7 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size)
static unsigned type_to_context(ObjectType type) {
/* One context for each type, plus one catch-all for the rest */
assert_cc(_OBJECT_TYPE_MAX <= MMAP_CACHE_MAX_CONTEXTS);
+ assert_cc(CONTEXT_HEADER < MMAP_CACHE_MAX_CONTEXTS);
return type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX ? type : 0;
}
@@ -1357,6 +1369,14 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st
r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
+ /* If the memory mapping triggered a SIGBUS then we return an
+ * IO error and ignore the error code passed down to us, since
+ * it is very likely just an effect of a nullified replacement
+ * mapping page */
+
+ if (mmap_cache_got_sigbus(f->mmap, f->fd))
+ r = -EIO;
+
journal_file_post_change(f);
return r;
@@ -1712,7 +1732,6 @@ found:
return 1;
}
-
static int generic_array_bisect_plus_one(
JournalFile *f,
uint64_t extra,
@@ -2457,9 +2476,10 @@ int journal_file_open(
JournalFile *template,
JournalFile **ret) {
+ bool newly_created = false;
JournalFile *f;
+ void *h;
int r;
- bool newly_created = false;
assert(fname);
assert(ret);
@@ -2564,13 +2584,14 @@ int journal_file_open(
goto fail;
}
- f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
- if (f->header == MAP_FAILED) {
- f->header = NULL;
+ r = mmap_cache_get(f->mmap, f->fd, f->prot, CONTEXT_HEADER, true, 0, PAGE_ALIGN(sizeof(Header)), &f->last_stat, &h);
+ if (r < 0) {
r = -errno;
goto fail;
}
+ f->header = h;
+
if (!newly_created) {
r = journal_file_verify_header(f);
if (r < 0)
@@ -2627,10 +2648,18 @@ int journal_file_open(
if (r < 0)
goto fail;
+ if (mmap_cache_got_sigbus(f->mmap, f->fd)) {
+ r = -EIO;
+ goto fail;
+ }
+
*ret = f;
return 0;
fail:
+ if (f->fd >= 0 && mmap_cache_got_sigbus(f->mmap, f->fd))
+ r = -EIO;
+
journal_file_close(f);
return r;
@@ -2697,7 +2726,8 @@ int journal_file_open_reliably(
r != -EHOSTDOWN && /* other machine */
r != -EPROTONOSUPPORT && /* incompatible feature */
r != -EBUSY && /* unclean shutdown */
- r != -ESHUTDOWN /* already archived */)
+ r != -ESHUTDOWN && /* already archived */
+ r != -EIO /* IO error, including SIGBUS on mmap */)
return r;
if ((flags & O_ACCMODE) == O_RDONLY)
@@ -2804,7 +2834,12 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
return r;
}
- return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
+ r = journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
+
+ if (mmap_cache_got_sigbus(to->mmap, to->fd))
+ return -EIO;
+
+ return r;
}
void journal_default_metrics(JournalMetrics *m, int fd) {
diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h
index 01bb4e038a..19fd7257f4 100644
--- a/src/journal/journal-file.h
+++ b/src/journal/journal-file.h
@@ -27,7 +27,7 @@
#include <gcrypt.h>
#endif
-#include "systemd/sd-id128.h"
+#include "sd-id128.h"
#include "sparse-endian.h"
#include "journal-def.h"
diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c
index a2a2e197c0..6d037cfec4 100644
--- a/src/journal/journald-server.c
+++ b/src/journal/journald-server.c
@@ -452,6 +452,7 @@ bool shall_try_append_again(JournalFile *f, int r) {
-EFBIG Hit fs limit
-EDQUOT Quota limit hit
-ENOSPC Disk full
+ -EIO I/O error of some kind (mmap)
-EHOSTDOWN Other machine
-EBUSY Unclean shutdown
-EPROTONOSUPPORT Unsupported feature
@@ -469,6 +470,8 @@ bool shall_try_append_again(JournalFile *f, int r) {
log_info("%s: Unsupported feature, rotating.", f->path);
else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
log_warning("%s: Journal file corrupted, rotating.", f->path);
+ else if (r == -EIO)
+ log_warning("%s: IO error, rotating.", f->path);
else
return false;
diff --git a/src/journal/journald.c b/src/journal/journald.c
index 604c8617bb..80f4634f67 100644
--- a/src/journal/journald.c
+++ b/src/journal/journald.c
@@ -33,6 +33,8 @@
#include "journald-kmsg.h"
#include "journald-syslog.h"
+#include "sigbus.h"
+
int main(int argc, char *argv[]) {
Server server;
int r;
@@ -49,6 +51,8 @@ int main(int argc, char *argv[]) {
umask(0022);
+ sigbus_install();
+
r = server_init(&server);
if (r < 0)
goto finish;
diff --git a/src/journal/mmap-cache.c b/src/journal/mmap-cache.c
index 4c940aaa24..ab21cdc288 100644
--- a/src/journal/mmap-cache.c
+++ b/src/journal/mmap-cache.c
@@ -29,6 +29,7 @@
#include "log.h"
#include "util.h"
#include "macro.h"
+#include "sigbus.h"
#include "mmap-cache.h"
typedef struct Window Window;
@@ -38,6 +39,7 @@ typedef struct FileDescriptor FileDescriptor;
struct Window {
MMapCache *cache;
+ bool invalidated;
bool keep_always;
bool in_unused;
@@ -65,6 +67,7 @@ struct Context {
struct FileDescriptor {
MMapCache *cache;
int fd;
+ bool sigbus;
LIST_HEAD(Window, windows);
};
@@ -134,6 +137,21 @@ static void window_unlink(Window *w) {
}
}
+static void window_invalidate(Window *w) {
+ assert(w);
+
+ if (w->invalidated)
+ return;
+
+ /* Replace the window with anonymous pages. This is useful
+ * when we hit a SIGBUS and want to make sure the file cannot
+ * trigger any further SIGBUS, possibly overrunning the sigbus
+ * queue. */
+
+ assert_se(mmap(w->ptr, w->size, w->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
+ w->invalidated = true;
+}
+
static void window_free(Window *w) {
assert(w);
@@ -383,6 +401,9 @@ static int try_context(
return 0;
}
+ if (c->window->fd->sigbus)
+ return -EIO;
+
c->window->keep_always |= keep_always;
*ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
@@ -414,6 +435,9 @@ static int find_mmap(
assert(f->fd == fd);
+ if (f->sigbus)
+ return -EIO;
+
LIST_FOREACH(by_fd, w, f->windows)
if (window_matches(w, fd, prot, offset, size))
break;
@@ -572,27 +596,111 @@ int mmap_cache_get(
return add_mmap(m, fd, prot, context, keep_always, offset, size, st, ret);
}
-void mmap_cache_close_fd(MMapCache *m, int fd) {
+unsigned mmap_cache_get_hit(MMapCache *m) {
+ assert(m);
+
+ return m->n_hit;
+}
+
+unsigned mmap_cache_get_missed(MMapCache *m) {
+ assert(m);
+
+ return m->n_missed;
+}
+
+static void mmap_cache_process_sigbus(MMapCache *m) {
+ bool found = false;
FileDescriptor *f;
+ Iterator i;
+ int r;
assert(m);
- assert(fd >= 0);
- f = hashmap_get(m->fds, INT_TO_PTR(fd + 1));
- if (!f)
+ /* Iterate through all triggered pages and mark their files as
+ * invalidated */
+ for (;;) {
+ bool ours;
+ void *addr;
+
+ r = sigbus_pop(&addr);
+ if (_likely_(r == 0))
+ break;
+ if (r < 0) {
+ log_error_errno(r, "SIGBUS handling failed: %m");
+ abort();
+ }
+
+ ours = false;
+ HASHMAP_FOREACH(f, m->fds, i) {
+ Window *w;
+
+ LIST_FOREACH(by_fd, w, f->windows) {
+ if ((uint8_t*) addr >= (uint8_t*) w->ptr &&
+ (uint8_t*) addr < (uint8_t*) w->ptr + w->size) {
+ found = ours = f->sigbus = true;
+ break;
+ }
+ }
+
+ if (ours)
+ break;
+ }
+
+ /* Didn't find a matching window, give up */
+ if (!ours) {
+ log_error("Unknown SIGBUS page, aborting.");
+ abort();
+ }
+ }
+
+ /* The list of triggered pages is now empty. Now, let's remap
+ * all windows of the triggered file to anonymous maps, so
+ * that no page of the file in question is triggered again, so
+ * that we can be sure not to hit the queue size limit. */
+ if (_likely_(!found))
return;
- fd_free(f);
+ HASHMAP_FOREACH(f, m->fds, i) {
+ Window *w;
+
+ if (!f->sigbus)
+ continue;
+
+ LIST_FOREACH(by_fd, w, f->windows)
+ window_invalidate(w);
+ }
}
-unsigned mmap_cache_get_hit(MMapCache *m) {
+bool mmap_cache_got_sigbus(MMapCache *m, int fd) {
+ FileDescriptor *f;
+
assert(m);
+ assert(fd >= 0);
- return m->n_hit;
+ mmap_cache_process_sigbus(m);
+
+ f = hashmap_get(m->fds, INT_TO_PTR(fd + 1));
+ if (!f)
+ return false;
+
+ return f->sigbus;
}
-unsigned mmap_cache_get_missed(MMapCache *m) {
+void mmap_cache_close_fd(MMapCache *m, int fd) {
+ FileDescriptor *f;
+
assert(m);
+ assert(fd >= 0);
- return m->n_missed;
+ /* Make sure that any queued SIGBUS are first dispatched, so
+ * that we don't end up with a SIGBUS entry we cannot relate
+ * to any existing memory map */
+
+ mmap_cache_process_sigbus(m);
+
+ f = hashmap_get(m->fds, INT_TO_PTR(fd + 1));
+ if (!f)
+ return;
+
+ fd_free(f);
}
diff --git a/src/journal/mmap-cache.h b/src/journal/mmap-cache.h
index fe2c83d751..a85c2b6063 100644
--- a/src/journal/mmap-cache.h
+++ b/src/journal/mmap-cache.h
@@ -25,7 +25,8 @@
#include <stdbool.h>
#include <sys/stat.h>
-#define MMAP_CACHE_MAX_CONTEXTS 8
+/* One context per object type, plus one of the header, plus one "additional" one */
+#define MMAP_CACHE_MAX_CONTEXTS 9
typedef struct MMapCache MMapCache;
@@ -47,3 +48,5 @@ void mmap_cache_close_fd(MMapCache *m, int fd);
unsigned mmap_cache_get_hit(MMapCache *m);
unsigned mmap_cache_get_missed(MMapCache *m);
+
+bool mmap_cache_got_sigbus(MMapCache *m, int fd);
diff --git a/src/shared/sigbus.c b/src/shared/sigbus.c
new file mode 100644
index 0000000000..0108603fe8
--- /dev/null
+++ b/src/shared/sigbus.c
@@ -0,0 +1,152 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+ This file is part of systemd.
+
+ Copyright 2014 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <signal.h>
+#include <sys/mman.h>
+
+#include "macro.h"
+#include "util.h"
+#include "sigbus.h"
+
+#define SIGBUS_QUEUE_MAX 64
+
+static struct sigaction old_sigaction;
+static unsigned n_installed = 0;
+
+/* We maintain a fixed size list of page addresses that triggered a
+ SIGBUS. We access with list with atomic operations, so that we
+ don't have to deal with locks between signal handler and main
+ programs in possibly multiple threads. */
+
+static void* volatile sigbus_queue[SIGBUS_QUEUE_MAX];
+static volatile sig_atomic_t n_sigbus_queue = 0;
+
+static void sigbus_push(void *addr) {
+ unsigned u;
+
+ assert(addr);
+
+ /* Find a free place, increase the number of entries and leave, if we can */
+ for (u = 0; u < SIGBUS_QUEUE_MAX; u++)
+ if (__sync_bool_compare_and_swap(&sigbus_queue[u], NULL, addr)) {
+ __sync_fetch_and_add(&n_sigbus_queue, 1);
+ return;
+ }
+
+ /* If we can't, make sure the queue size is out of bounds, to
+ * mark it as overflow */
+ for (;;) {
+ unsigned c;
+
+ __sync_synchronize();
+ c = n_sigbus_queue;
+
+ if (c > SIGBUS_QUEUE_MAX) /* already overflow */
+ return;
+
+ if (__sync_bool_compare_and_swap(&n_sigbus_queue, c, c + SIGBUS_QUEUE_MAX))
+ return;
+ }
+}
+
+int sigbus_pop(void **ret) {
+ assert(ret);
+
+ for (;;) {
+ unsigned u, c;
+
+ __sync_synchronize();
+ c = n_sigbus_queue;
+
+ if (_likely_(c == 0))
+ return 0;
+
+ if (_unlikely_(c >= SIGBUS_QUEUE_MAX))
+ return -EOVERFLOW;
+
+ for (u = 0; u < SIGBUS_QUEUE_MAX; u++) {
+ void *addr;
+
+ addr = sigbus_queue[u];
+ if (!addr)
+ continue;
+
+ if (__sync_bool_compare_and_swap(&sigbus_queue[u], addr, NULL)) {
+ __sync_fetch_and_sub(&n_sigbus_queue, 1);
+ *ret = addr;
+ return 1;
+ }
+ }
+ }
+}
+
+static void sigbus_handler(int sn, siginfo_t *si, void *data) {
+ unsigned long ul;
+ void *aligned;
+
+ assert(sn == SIGBUS);
+ assert(si);
+
+ if (si->si_code != BUS_ADRERR || !si->si_addr) {
+ assert_se(sigaction(SIGBUS, &old_sigaction, NULL) == 0);
+ raise(SIGBUS);
+ return;
+ }
+
+ ul = (unsigned long) si->si_addr;
+ ul = ul / page_size();
+ ul = ul * page_size();
+ aligned = (void*) ul;
+
+ /* Let's remember which address failed */
+ sigbus_push(aligned);
+
+ /* Replace mapping with an anonymous page, so that the
+ * execution can continue, however with a zeroed out page */
+ assert_se(mmap(aligned, page_size(), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == aligned);
+}
+
+void sigbus_install(void) {
+ struct sigaction sa = {
+ .sa_sigaction = sigbus_handler,
+ .sa_flags = SA_SIGINFO,
+ };
+
+ n_installed++;
+
+ if (n_installed == 1)
+ assert_se(sigaction(SIGBUS, &sa, &old_sigaction) == 0);
+
+ return;
+}
+
+void sigbus_reset(void) {
+
+ if (n_installed <= 0)
+ return;
+
+ n_installed--;
+
+ if (n_installed == 0)
+ assert_se(sigaction(SIGBUS, &old_sigaction, NULL) == 0);
+
+ return;
+}
diff --git a/src/shared/sigbus.h b/src/shared/sigbus.h
new file mode 100644
index 0000000000..25593af2d4
--- /dev/null
+++ b/src/shared/sigbus.h
@@ -0,0 +1,25 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+ This file is part of systemd.
+
+ Copyright 2014 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+void sigbus_install(void);
+void sigbus_reset(void);
+
+int sigbus_pop(void **ret);
diff --git a/src/test/test-sigbus.c b/src/test/test-sigbus.c
new file mode 100644
index 0000000000..39d0fec894
--- /dev/null
+++ b/src/test/test-sigbus.c
@@ -0,0 +1,62 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+ This file is part of systemd.
+
+ Copyright 2014 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/mman.h>
+
+#include "util.h"
+#include "sigbus.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_close_ int fd = -1;
+ char template[] = "/tmp/sigbus-test-XXXXXX";
+ void *addr = NULL;
+ uint8_t *p;
+
+ sigbus_install();
+
+ assert(sigbus_pop(&addr) == 0);
+
+ assert_se((fd = mkostemp(template, O_RDWR|O_CREAT|O_EXCL)) >= 0);
+ assert_se(unlink(template) >= 0);
+ assert_se(fallocate(fd, 0, 0, page_size() * 8) >= 0);
+
+ p = mmap(NULL, page_size() * 16, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ assert_se(p != MAP_FAILED);
+
+ assert_se(sigbus_pop(&addr) == 0);
+
+ p[0] = 0xFF;
+ assert_se(sigbus_pop(&addr) == 0);
+
+ p[page_size()] = 0xFF;
+ assert_se(sigbus_pop(&addr) == 0);
+
+ p[page_size()*8] = 0xFF;
+ p[page_size()*8+1] = 0xFF;
+ p[page_size()*10] = 0xFF;
+ assert_se(sigbus_pop(&addr) > 0);
+ assert_se(addr == p + page_size() * 8);
+ assert_se(sigbus_pop(&addr) > 0);
+ assert_se(addr == p + page_size() * 10);
+ assert_se(sigbus_pop(&addr) == 0);
+
+ sigbus_reset();
+}