summaryrefslogtreecommitdiff
path: root/src/journal/journal-file.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/journal/journal-file.c')
-rw-r--r--src/journal/journal-file.c259
1 files changed, 231 insertions, 28 deletions
diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c
index 994d1ec5d8..f5e2952c99 100644
--- a/src/journal/journal-file.c
+++ b/src/journal/journal-file.c
@@ -20,6 +20,7 @@
#include <errno.h>
#include <fcntl.h>
#include <linux/fs.h>
+#include <pthread.h>
#include <stddef.h>
#include <sys/mman.h>
#include <sys/statvfs.h>
@@ -38,6 +39,7 @@
#include "parse-util.h"
#include "random-util.h"
#include "sd-event.h"
+#include "set.h"
#include "string-util.h"
#include "xattr-util.h"
@@ -86,33 +88,127 @@
/* The mmap context to use for the header we pick as one above the last defined typed */
#define CONTEXT_HEADER _OBJECT_TYPE_MAX
-static int journal_file_set_online(JournalFile *f) {
+/* This may be called from a separate thread to prevent blocking the caller for the duration of fsync().
+ * As a result we use atomic operations on f->offline_state for inter-thread communications with
+ * journal_file_set_offline() and journal_file_set_online(). */
+static void journal_file_set_offline_internal(JournalFile *f) {
assert(f);
+ assert(f->fd >= 0);
+ assert(f->header);
- if (!f->writable)
- return -EPERM;
+ for (;;) {
+ switch (f->offline_state) {
+ case OFFLINE_CANCEL:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_CANCEL, OFFLINE_DONE))
+ continue;
+ return;
- if (!(f->fd >= 0 && f->header))
- return -EINVAL;
+ case OFFLINE_AGAIN_FROM_SYNCING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_AGAIN_FROM_SYNCING, OFFLINE_SYNCING))
+ continue;
+ break;
+
+ case OFFLINE_AGAIN_FROM_OFFLINING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_AGAIN_FROM_OFFLINING, OFFLINE_SYNCING))
+ continue;
+ break;
+
+ case OFFLINE_SYNCING:
+ (void) fsync(f->fd);
+
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_SYNCING, OFFLINE_OFFLINING))
+ continue;
+
+ f->header->state = STATE_OFFLINE;
+ (void) fsync(f->fd);
+ break;
+
+ case OFFLINE_OFFLINING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_OFFLINING, OFFLINE_DONE))
+ continue;
+ /* fall through */
+
+ case OFFLINE_DONE:
+ return;
+
+ case OFFLINE_JOINED:
+ log_debug("OFFLINE_JOINED unexpected offline state for journal_file_set_offline_internal()");
+ return;
+ }
+ }
+}
+
+static void * journal_file_set_offline_thread(void *arg) {
+ JournalFile *f = arg;
+
+ journal_file_set_offline_internal(f);
+
+ return NULL;
+}
+
+static int journal_file_set_offline_thread_join(JournalFile *f) {
+ int r;
+
+ assert(f);
+
+ if (f->offline_state == OFFLINE_JOINED)
+ return 0;
+
+ r = pthread_join(f->offline_thread, NULL);
+ if (r)
+ return -r;
+
+ f->offline_state = OFFLINE_JOINED;
if (mmap_cache_got_sigbus(f->mmap, f->fd))
return -EIO;
- switch (f->header->state) {
- case STATE_ONLINE:
- return 0;
+ return 0;
+}
- case STATE_OFFLINE:
- f->header->state = STATE_ONLINE;
- fsync(f->fd);
- return 0;
+/* Trigger a restart if the offline thread is mid-flight in a restartable state. */
+static bool journal_file_set_offline_try_restart(JournalFile *f) {
+ for (;;) {
+ switch (f->offline_state) {
+ case OFFLINE_AGAIN_FROM_SYNCING:
+ case OFFLINE_AGAIN_FROM_OFFLINING:
+ return true;
+
+ case OFFLINE_CANCEL:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_CANCEL, OFFLINE_AGAIN_FROM_SYNCING))
+ continue;
+ return true;
+
+ case OFFLINE_SYNCING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_SYNCING, OFFLINE_AGAIN_FROM_SYNCING))
+ continue;
+ return true;
+
+ case OFFLINE_OFFLINING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_OFFLINING, OFFLINE_AGAIN_FROM_OFFLINING))
+ continue;
+ return true;
default:
- return -EINVAL;
+ return false;
+ }
}
}
-int journal_file_set_offline(JournalFile *f) {
+/* Sets a journal offline.
+ *
+ * If wait is false then an offline is dispatched in a separate thread for a
+ * subsequent journal_file_set_offline() or journal_file_set_online() of the
+ * same journal to synchronize with.
+ *
+ * If wait is true, then either an existing offline thread will be restarted
+ * and joined, or if none exists the offline is simply performed in this
+ * context without involving another thread.
+ */
+int journal_file_set_offline(JournalFile *f, bool wait) {
+ bool restarted;
+ int r;
+
assert(f);
if (!f->writable)
@@ -124,19 +220,107 @@ int journal_file_set_offline(JournalFile *f) {
if (f->header->state != STATE_ONLINE)
return 0;
- fsync(f->fd);
+ /* Restart an in-flight offline thread and wait if needed, or join a lingering done one. */
+ restarted = journal_file_set_offline_try_restart(f);
+ if ((restarted && wait) || !restarted) {
+ r = journal_file_set_offline_thread_join(f);
+ if (r < 0)
+ return r;
+ }
+
+ if (restarted)
+ return 0;
+
+ /* Initiate a new offline. */
+ f->offline_state = OFFLINE_SYNCING;
- if (mmap_cache_got_sigbus(f->mmap, f->fd))
- return -EIO;
+ if (wait) /* Without using a thread if waiting. */
+ journal_file_set_offline_internal(f);
+ else {
+ r = pthread_create(&f->offline_thread, NULL, journal_file_set_offline_thread, f);
+ if (r > 0)
+ return -r;
+ }
+
+ return 0;
+}
+
+static int journal_file_set_online(JournalFile *f) {
+ bool joined = false;
- f->header->state = STATE_OFFLINE;
+ assert(f);
+
+ if (!f->writable)
+ return -EPERM;
+
+ if (!(f->fd >= 0 && f->header))
+ return -EINVAL;
+
+ while (!joined) {
+ switch (f->offline_state) {
+ case OFFLINE_JOINED:
+ /* No offline thread, no need to wait. */
+ joined = true;
+ break;
+
+ case OFFLINE_SYNCING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_SYNCING, OFFLINE_CANCEL))
+ continue;
+ /* Canceled syncing prior to offlining, no need to wait. */
+ break;
+
+ case OFFLINE_AGAIN_FROM_SYNCING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_AGAIN_FROM_SYNCING, OFFLINE_CANCEL))
+ continue;
+ /* Canceled restart from syncing, no need to wait. */
+ break;
+
+ case OFFLINE_AGAIN_FROM_OFFLINING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_AGAIN_FROM_OFFLINING, OFFLINE_CANCEL))
+ continue;
+ /* Canceled restart from offlining, must wait for offlining to complete however. */
+
+ /* fall through to wait */
+ default: {
+ int r;
+
+ r = journal_file_set_offline_thread_join(f);
+ if (r < 0)
+ return r;
+
+ joined = true;
+ break;
+ }
+ }
+ }
if (mmap_cache_got_sigbus(f->mmap, f->fd))
return -EIO;
- fsync(f->fd);
+ switch (f->header->state) {
+ case STATE_ONLINE:
+ return 0;
- return 0;
+ case STATE_OFFLINE:
+ f->header->state = STATE_ONLINE;
+ (void) fsync(f->fd);
+ return 0;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+bool journal_file_is_offlining(JournalFile *f) {
+ assert(f);
+
+ __sync_synchronize();
+
+ if (f->offline_state == OFFLINE_DONE ||
+ f->offline_state == OFFLINE_JOINED)
+ return false;
+
+ return true;
}
JournalFile* journal_file_close(JournalFile *f) {
@@ -159,7 +343,7 @@ JournalFile* journal_file_close(JournalFile *f) {
sd_event_source_unref(f->post_change_timer);
}
- journal_file_set_offline(f);
+ journal_file_set_offline(f, true);
if (f->mmap && f->fd >= 0)
mmap_cache_close_fd(f->mmap, f->fd);
@@ -203,6 +387,15 @@ JournalFile* journal_file_close(JournalFile *f) {
return NULL;
}
+void journal_file_close_set(Set *s) {
+ JournalFile *f;
+
+ assert(s);
+
+ while ((f = set_steal_first(s)))
+ (void) journal_file_close(f);
+}
+
static int journal_file_init_header(JournalFile *f, JournalFile *template) {
Header h = {};
ssize_t k;
@@ -263,7 +456,7 @@ static int journal_file_refresh_header(JournalFile *f) {
r = journal_file_set_online(f);
/* Sync the online state to disk */
- fsync(f->fd);
+ (void) fsync(f->fd);
return r;
}
@@ -2710,6 +2903,7 @@ int journal_file_open(
bool seal,
JournalMetrics *metrics,
MMapCache *mmap_cache,
+ Set *deferred_closes,
JournalFile *template,
JournalFile **ret) {
@@ -2829,6 +3023,9 @@ int journal_file_open(
f->header = h;
if (!newly_created) {
+ if (deferred_closes)
+ journal_file_close_set(deferred_closes);
+
r = journal_file_verify_header(f);
if (r < 0)
goto fail;
@@ -2898,12 +3095,12 @@ fail:
if (f->fd >= 0 && mmap_cache_got_sigbus(f->mmap, f->fd))
r = -EIO;
- journal_file_close(f);
+ (void) journal_file_close(f);
return r;
}
-int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
+int journal_file_rotate(JournalFile **f, bool compress, bool seal, Set *deferred_closes) {
_cleanup_free_ char *p = NULL;
size_t l;
JournalFile *old_file, *new_file = NULL;
@@ -2943,8 +3140,13 @@ int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
* we archive them */
old_file->defrag_on_close = true;
- r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
- journal_file_close(old_file);
+ r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, deferred_closes, old_file, &new_file);
+
+ if (deferred_closes &&
+ set_put(deferred_closes, old_file) >= 0)
+ (void) journal_file_set_offline(old_file, false);
+ else
+ (void) journal_file_close(old_file);
*f = new_file;
return r;
@@ -2958,6 +3160,7 @@ int journal_file_open_reliably(
bool seal,
JournalMetrics *metrics,
MMapCache *mmap_cache,
+ Set *deferred_closes,
JournalFile *template,
JournalFile **ret) {
@@ -2965,7 +3168,7 @@ int journal_file_open_reliably(
size_t l;
_cleanup_free_ char *p = NULL;
- r = journal_file_open(fname, flags, mode, compress, seal, metrics, mmap_cache, template, ret);
+ r = journal_file_open(fname, flags, mode, compress, seal, metrics, mmap_cache, deferred_closes, template, ret);
if (!IN_SET(r,
-EBADMSG, /* corrupted */
-ENODATA, /* truncated */
@@ -3006,7 +3209,7 @@ int journal_file_open_reliably(
log_warning_errno(r, "File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
- return journal_file_open(fname, flags, mode, compress, seal, metrics, mmap_cache, template, ret);
+ return journal_file_open(fname, flags, mode, compress, seal, metrics, mmap_cache, deferred_closes, template, ret);
}
int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {