summaryrefslogtreecommitdiff
path: root/src/journal/journal-file.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/journal/journal-file.c')
-rw-r--r--src/journal/journal-file.c905
1 files changed, 724 insertions, 181 deletions
diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c
index be6a5522fa..49199b269f 100644
--- a/src/journal/journal-file.c
+++ b/src/journal/journal-file.c
@@ -1,5 +1,3 @@
-/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
-
/***
This file is part of systemd.
@@ -19,22 +17,32 @@
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
-#include <sys/mman.h>
#include <errno.h>
-#include <sys/uio.h>
-#include <unistd.h>
-#include <sys/statvfs.h>
#include <fcntl.h>
-#include <stddef.h>
#include <linux/fs.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <sys/statvfs.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include "alloc-util.h"
#include "btrfs-util.h"
+#include "chattr-util.h"
+#include "compress.h"
+#include "fd-util.h"
+#include "journal-authenticate.h"
#include "journal-def.h"
#include "journal-file.h"
-#include "journal-authenticate.h"
#include "lookup3.h"
-#include "compress.h"
+#include "parse-util.h"
+#include "path-util.h"
#include "random-util.h"
+#include "sd-event.h"
+#include "set.h"
+#include "string-util.h"
+#include "xattr-util.h"
#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
@@ -42,13 +50,16 @@
#define COMPRESSION_SIZE_THRESHOLD (512ULL)
/* This is the minimum journal file size */
-#define JOURNAL_FILE_SIZE_MIN (4ULL*1024ULL*1024ULL) /* 4 MiB */
+#define JOURNAL_FILE_SIZE_MIN (512ULL*1024ULL) /* 512 KiB */
/* These are the lower and upper bounds if we deduce the max_use value
* from the file system size */
#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
+/* This is the default minimal use limit, how much we'll use even if keep_free suggests otherwise. */
+#define DEFAULT_MIN_USE (1ULL*1024ULL*1024ULL) /* 1 MiB */
+
/* This is the upper bound if we deduce max_size from max_use */
#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
@@ -60,6 +71,9 @@
* size */
#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
+/* This is the default maximum number of journal files to keep around. */
+#define DEFAULT_N_MAX_FILES (100)
+
/* n_data was the first entry we added after the initial file format design */
#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
@@ -75,33 +89,127 @@
/* The mmap context to use for the header we pick as one above the last defined typed */
#define CONTEXT_HEADER _OBJECT_TYPE_MAX
-static int journal_file_set_online(JournalFile *f) {
+/* This may be called from a separate thread to prevent blocking the caller for the duration of fsync().
+ * As a result we use atomic operations on f->offline_state for inter-thread communications with
+ * journal_file_set_offline() and journal_file_set_online(). */
+static void journal_file_set_offline_internal(JournalFile *f) {
assert(f);
+ assert(f->fd >= 0);
+ assert(f->header);
- if (!f->writable)
- return -EPERM;
+ for (;;) {
+ switch (f->offline_state) {
+ case OFFLINE_CANCEL:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_CANCEL, OFFLINE_DONE))
+ continue;
+ return;
- if (!(f->fd >= 0 && f->header))
- return -EINVAL;
+ case OFFLINE_AGAIN_FROM_SYNCING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_AGAIN_FROM_SYNCING, OFFLINE_SYNCING))
+ continue;
+ break;
+
+ case OFFLINE_AGAIN_FROM_OFFLINING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_AGAIN_FROM_OFFLINING, OFFLINE_SYNCING))
+ continue;
+ break;
+
+ case OFFLINE_SYNCING:
+ (void) fsync(f->fd);
+
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_SYNCING, OFFLINE_OFFLINING))
+ continue;
+
+ f->header->state = f->archive ? STATE_ARCHIVED : STATE_OFFLINE;
+ (void) fsync(f->fd);
+ break;
+
+ case OFFLINE_OFFLINING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_OFFLINING, OFFLINE_DONE))
+ continue;
+ /* fall through */
+
+ case OFFLINE_DONE:
+ return;
+
+ case OFFLINE_JOINED:
+ log_debug("OFFLINE_JOINED unexpected offline state for journal_file_set_offline_internal()");
+ return;
+ }
+ }
+}
+
+static void * journal_file_set_offline_thread(void *arg) {
+ JournalFile *f = arg;
+
+ journal_file_set_offline_internal(f);
+
+ return NULL;
+}
+
+static int journal_file_set_offline_thread_join(JournalFile *f) {
+ int r;
+
+ assert(f);
+
+ if (f->offline_state == OFFLINE_JOINED)
+ return 0;
+
+ r = pthread_join(f->offline_thread, NULL);
+ if (r)
+ return -r;
+
+ f->offline_state = OFFLINE_JOINED;
if (mmap_cache_got_sigbus(f->mmap, f->fd))
return -EIO;
- switch(f->header->state) {
- case STATE_ONLINE:
- return 0;
+ return 0;
+}
- case STATE_OFFLINE:
- f->header->state = STATE_ONLINE;
- fsync(f->fd);
- return 0;
+/* Trigger a restart if the offline thread is mid-flight in a restartable state. */
+static bool journal_file_set_offline_try_restart(JournalFile *f) {
+ for (;;) {
+ switch (f->offline_state) {
+ case OFFLINE_AGAIN_FROM_SYNCING:
+ case OFFLINE_AGAIN_FROM_OFFLINING:
+ return true;
+
+ case OFFLINE_CANCEL:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_CANCEL, OFFLINE_AGAIN_FROM_SYNCING))
+ continue;
+ return true;
+
+ case OFFLINE_SYNCING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_SYNCING, OFFLINE_AGAIN_FROM_SYNCING))
+ continue;
+ return true;
+
+ case OFFLINE_OFFLINING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_OFFLINING, OFFLINE_AGAIN_FROM_OFFLINING))
+ continue;
+ return true;
default:
- return -EINVAL;
+ return false;
+ }
}
}
-int journal_file_set_offline(JournalFile *f) {
+/* Sets a journal offline.
+ *
+ * If wait is false then an offline is dispatched in a separate thread for a
+ * subsequent journal_file_set_offline() or journal_file_set_online() of the
+ * same journal to synchronize with.
+ *
+ * If wait is true, then either an existing offline thread will be restarted
+ * and joined, or if none exists the offline is simply performed in this
+ * context without involving another thread.
+ */
+int journal_file_set_offline(JournalFile *f, bool wait) {
+ bool restarted;
+ int r;
+
assert(f);
if (!f->writable)
@@ -110,34 +218,142 @@ int journal_file_set_offline(JournalFile *f) {
if (!(f->fd >= 0 && f->header))
return -EINVAL;
- if (f->header->state != STATE_ONLINE)
+ /* An offlining journal is implicitly online and may modify f->header->state,
+ * we must also join any potentially lingering offline thread when not online. */
+ if (!journal_file_is_offlining(f) && f->header->state != STATE_ONLINE)
+ return journal_file_set_offline_thread_join(f);
+
+ /* Restart an in-flight offline thread and wait if needed, or join a lingering done one. */
+ restarted = journal_file_set_offline_try_restart(f);
+ if ((restarted && wait) || !restarted) {
+ r = journal_file_set_offline_thread_join(f);
+ if (r < 0)
+ return r;
+ }
+
+ if (restarted)
return 0;
- fsync(f->fd);
+ /* Initiate a new offline. */
+ f->offline_state = OFFLINE_SYNCING;
- if (mmap_cache_got_sigbus(f->mmap, f->fd))
- return -EIO;
+ if (wait) /* Without using a thread if waiting. */
+ journal_file_set_offline_internal(f);
+ else {
+ r = pthread_create(&f->offline_thread, NULL, journal_file_set_offline_thread, f);
+ if (r > 0) {
+ f->offline_state = OFFLINE_JOINED;
+ return -r;
+ }
+ }
+
+ return 0;
+}
- f->header->state = STATE_OFFLINE;
+static int journal_file_set_online(JournalFile *f) {
+ bool joined = false;
+
+ assert(f);
+
+ if (!f->writable)
+ return -EPERM;
+
+ if (!(f->fd >= 0 && f->header))
+ return -EINVAL;
+
+ while (!joined) {
+ switch (f->offline_state) {
+ case OFFLINE_JOINED:
+ /* No offline thread, no need to wait. */
+ joined = true;
+ break;
+
+ case OFFLINE_SYNCING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_SYNCING, OFFLINE_CANCEL))
+ continue;
+ /* Canceled syncing prior to offlining, no need to wait. */
+ break;
+
+ case OFFLINE_AGAIN_FROM_SYNCING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_AGAIN_FROM_SYNCING, OFFLINE_CANCEL))
+ continue;
+ /* Canceled restart from syncing, no need to wait. */
+ break;
+
+ case OFFLINE_AGAIN_FROM_OFFLINING:
+ if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_AGAIN_FROM_OFFLINING, OFFLINE_CANCEL))
+ continue;
+ /* Canceled restart from offlining, must wait for offlining to complete however. */
+
+ /* fall through to wait */
+ default: {
+ int r;
+
+ r = journal_file_set_offline_thread_join(f);
+ if (r < 0)
+ return r;
+
+ joined = true;
+ break;
+ }
+ }
+ }
if (mmap_cache_got_sigbus(f->mmap, f->fd))
return -EIO;
- fsync(f->fd);
+ switch (f->header->state) {
+ case STATE_ONLINE:
+ return 0;
- return 0;
+ case STATE_OFFLINE:
+ f->header->state = STATE_ONLINE;
+ (void) fsync(f->fd);
+ return 0;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+bool journal_file_is_offlining(JournalFile *f) {
+ assert(f);
+
+ __sync_synchronize();
+
+ if (f->offline_state == OFFLINE_DONE ||
+ f->offline_state == OFFLINE_JOINED)
+ return false;
+
+ return true;
}
-void journal_file_close(JournalFile *f) {
+JournalFile* journal_file_close(JournalFile *f) {
assert(f);
#ifdef HAVE_GCRYPT
/* Write the final tag */
- if (f->seal && f->writable)
- journal_file_append_tag(f);
+ if (f->seal && f->writable) {
+ int r;
+
+ r = journal_file_append_tag(f);
+ if (r < 0)
+ log_error_errno(r, "Failed to append tag when closing journal: %m");
+ }
#endif
- journal_file_set_offline(f);
+ if (f->post_change_timer) {
+ int enabled;
+
+ if (sd_event_source_get_enabled(f->post_change_timer, &enabled) >= 0)
+ if (enabled == SD_EVENT_ONESHOT)
+ journal_file_post_change(f);
+
+ (void) sd_event_source_set_enabled(f->post_change_timer, SD_EVENT_OFF);
+ sd_event_source_unref(f->post_change_timer);
+ }
+
+ journal_file_set_offline(f, true);
if (f->mmap && f->fd >= 0)
mmap_cache_close_fd(f->mmap, f->fd);
@@ -154,11 +370,11 @@ void journal_file_close(JournalFile *f) {
(void) btrfs_defrag_fd(f->fd);
}
- safe_close(f->fd);
+ if (f->close_fd)
+ safe_close(f->fd);
free(f->path);
- if (f->mmap)
- mmap_cache_unref(f->mmap);
+ mmap_cache_unref(f->mmap);
ordered_hashmap_free_free(f->chain_cache);
@@ -169,7 +385,7 @@ void journal_file_close(JournalFile *f) {
#ifdef HAVE_GCRYPT
if (f->fss_file)
munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
- else if (f->fsprg_state)
+ else
free(f->fsprg_state);
free(f->fsprg_seed);
@@ -179,6 +395,16 @@ void journal_file_close(JournalFile *f) {
#endif
free(f);
+ return NULL;
+}
+
+void journal_file_close_set(Set *s) {
+ JournalFile *f;
+
+ assert(s);
+
+ while ((f = set_steal_first(s)))
+ (void) journal_file_close(f);
}
static int journal_file_init_header(JournalFile *f, JournalFile *template) {
@@ -218,11 +444,45 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) {
return 0;
}
+static int fsync_directory_of_file(int fd) {
+ _cleanup_free_ char *path = NULL, *dn = NULL;
+ _cleanup_close_ int dfd = -1;
+ struct stat st;
+ int r;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISREG(st.st_mode))
+ return -EBADFD;
+
+ r = fd_get_path(fd, &path);
+ if (r < 0)
+ return r;
+
+ if (!path_is_absolute(path))
+ return -EINVAL;
+
+ dn = dirname_malloc(path);
+ if (!dn)
+ return -ENOMEM;
+
+ dfd = open(dn, O_RDONLY|O_CLOEXEC|O_DIRECTORY);
+ if (dfd < 0)
+ return -errno;
+
+ if (fsync(dfd) < 0)
+ return -errno;
+
+ return 0;
+}
+
static int journal_file_refresh_header(JournalFile *f) {
sd_id128_t boot_id;
int r;
assert(f);
+ assert(f->header);
r = sd_id128_get_machine(&f->header->machine_id);
if (r < 0)
@@ -240,7 +500,10 @@ static int journal_file_refresh_header(JournalFile *f) {
r = journal_file_set_online(f);
/* Sync the online state to disk */
- fsync(f->fd);
+ (void) fsync(f->fd);
+
+ /* We likely just created a new file, also sync the directory this file is located in. */
+ (void) fsync_directory_of_file(f->fd);
return r;
}
@@ -249,6 +512,7 @@ static int journal_file_verify_header(JournalFile *f) {
uint32_t flags;
assert(f);
+ assert(f->header);
if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
return -EBADMSG;
@@ -304,8 +568,8 @@ static int journal_file_verify_header(JournalFile *f) {
return -ENODATA;
if (f->writable) {
- uint8_t state;
sd_id128_t machine_id;
+ uint8_t state;
int r;
r = sd_id128_get_machine(&machine_id);
@@ -326,6 +590,14 @@ static int journal_file_verify_header(JournalFile *f) {
log_debug("Journal file %s has unknown state %i.", f->path, state);
return -EBUSY;
}
+
+ /* Don't permit appending to files from the future. Because otherwise the realtime timestamps wouldn't
+ * be strictly ordered in the entries in the file anymore, and we can't have that since it breaks
+ * bisection. */
+ if (le64toh(f->header->tail_entry_realtime) > now(CLOCK_REALTIME)) {
+ log_debug("Journal file %s is from the future, refusing to append new data to it that'd be older.", f->path);
+ return -ETXTBSY;
+ }
}
f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header);
@@ -357,6 +629,7 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size)
int r;
assert(f);
+ assert(f->header);
/* We assume that this file is not sparse, and we know that
* for sure, since we always call posix_fallocate()
@@ -398,12 +671,7 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size)
if (fstatvfs(f->fd, &svfs) >= 0) {
uint64_t available;
- available = svfs.f_bfree * svfs.f_bsize;
-
- if (available >= f->metrics.keep_free)
- available -= f->metrics.keep_free;
- else
- available = 0;
+ available = LESS_BY((uint64_t) svfs.f_bfree * (uint64_t) svfs.f_bsize, f->metrics.keep_free);
if (new_size - old_size > available)
return -E2BIG;
@@ -487,8 +755,16 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset
assert(ret);
/* Objects may only be located at multiple of 64 bit */
- if (!VALID64(offset))
- return -EFAULT;
+ if (!VALID64(offset)) {
+ log_debug("Attempt to move to object at non-64bit boundary: %" PRIu64, offset);
+ return -EBADMSG;
+ }
+
+ /* Object may not be located in the file header */
+ if (offset < le64toh(f->header->header_size)) {
+ log_debug("Attempt to move to object located in file header: %" PRIu64, offset);
+ return -EBADMSG;
+ }
r = journal_file_move_to(f, type, false, offset, sizeof(ObjectHeader), &t);
if (r < 0)
@@ -497,17 +773,29 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset
o = (Object*) t;
s = le64toh(o->object.size);
- if (s < sizeof(ObjectHeader))
+ if (s == 0) {
+ log_debug("Attempt to move to uninitialized object: %" PRIu64, offset);
+ return -EBADMSG;
+ }
+ if (s < sizeof(ObjectHeader)) {
+ log_debug("Attempt to move to overly short object: %" PRIu64, offset);
return -EBADMSG;
+ }
- if (o->object.type <= OBJECT_UNUSED)
+ if (o->object.type <= OBJECT_UNUSED) {
+ log_debug("Attempt to move to object with invalid type: %" PRIu64, offset);
return -EBADMSG;
+ }
- if (s < minimum_header_size(o))
+ if (s < minimum_header_size(o)) {
+ log_debug("Attempt to move to truncated object: %" PRIu64, offset);
return -EBADMSG;
+ }
- if (type > OBJECT_UNUSED && o->object.type != type)
+ if (type > OBJECT_UNUSED && o->object.type != type) {
+ log_debug("Attempt to move to object of unexpected type: %" PRIu64, offset);
return -EBADMSG;
+ }
if (s > sizeof(ObjectHeader)) {
r = journal_file_move_to(f, type, false, offset, s, &t);
@@ -525,6 +813,7 @@ static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
uint64_t r;
assert(f);
+ assert(f->header);
r = le64toh(f->header->tail_entry_seqnum) + 1;
@@ -554,6 +843,7 @@ int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, O
void *t;
assert(f);
+ assert(f->header);
assert(type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX);
assert(size >= sizeof(ObjectHeader));
assert(offset);
@@ -603,11 +893,12 @@ static int journal_file_setup_data_hash_table(JournalFile *f) {
int r;
assert(f);
+ assert(f->header);
- /* We estimate that we need 1 hash table entry per 768 of
- journal file and we want to make sure we never get beyond
- 75% fill level. Calculate the hash table size for the
- maximum file size based on these metrics. */
+ /* We estimate that we need 1 hash table entry per 768 bytes
+ of journal file and we want to make sure we never get
+ beyond 75% fill level. Calculate the hash table size for
+ the maximum file size based on these metrics. */
s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
@@ -636,6 +927,7 @@ static int journal_file_setup_field_hash_table(JournalFile *f) {
int r;
assert(f);
+ assert(f->header);
/* We use a fixed size hash table for the fields as this
* number should grow very slowly only */
@@ -656,12 +948,16 @@ static int journal_file_setup_field_hash_table(JournalFile *f) {
return 0;
}
-static int journal_file_map_data_hash_table(JournalFile *f) {
+int journal_file_map_data_hash_table(JournalFile *f) {
uint64_t s, p;
void *t;
int r;
assert(f);
+ assert(f->header);
+
+ if (f->data_hash_table)
+ return 0;
p = le64toh(f->header->data_hash_table_offset);
s = le64toh(f->header->data_hash_table_size);
@@ -678,12 +974,16 @@ static int journal_file_map_data_hash_table(JournalFile *f) {
return 0;
}
-static int journal_file_map_field_hash_table(JournalFile *f) {
+int journal_file_map_field_hash_table(JournalFile *f) {
uint64_t s, p;
void *t;
int r;
assert(f);
+ assert(f->header);
+
+ if (f->field_hash_table)
+ return 0;
p = le64toh(f->header->field_hash_table_offset);
s = le64toh(f->header->field_hash_table_size);
@@ -710,6 +1010,8 @@ static int journal_file_link_field(
int r;
assert(f);
+ assert(f->header);
+ assert(f->field_hash_table);
assert(o);
assert(offset > 0);
@@ -753,6 +1055,8 @@ static int journal_file_link_data(
int r;
assert(f);
+ assert(f->header);
+ assert(f->data_hash_table);
assert(o);
assert(offset > 0);
@@ -801,12 +1105,21 @@ int journal_file_find_field_object_with_hash(
int r;
assert(f);
+ assert(f->header);
assert(field && size > 0);
+ /* If the field hash table is empty, we can't find anything */
+ if (le64toh(f->header->field_hash_table_size) <= 0)
+ return 0;
+
+ /* Map the field hash table, if it isn't mapped yet. */
+ r = journal_file_map_field_hash_table(f);
+ if (r < 0)
+ return r;
+
osize = offsetof(Object, field.payload) + size;
m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
-
if (m <= 0)
return -EBADMSG;
@@ -864,8 +1177,18 @@ int journal_file_find_data_object_with_hash(
int r;
assert(f);
+ assert(f->header);
assert(data || size == 0);
+ /* If there's no data hash table, then there's no entry. */
+ if (le64toh(f->header->data_hash_table_size) <= 0)
+ return 0;
+
+ /* Map the data hash table, if it isn't mapped yet. */
+ r = journal_file_map_data_hash_table(f);
+ if (r < 0)
+ return r;
+
osize = offsetof(Object, data.payload) + size;
m = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
@@ -1032,7 +1355,7 @@ static int journal_file_append_data(
r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
if (r < 0)
return r;
- else if (r > 0) {
+ if (r > 0) {
if (ret)
*ret = o;
@@ -1051,29 +1374,36 @@ static int journal_file_append_data(
o->data.hash = htole64(hash);
#if defined(HAVE_XZ) || defined(HAVE_LZ4)
- if (f->compress_xz &&
- size >= COMPRESSION_SIZE_THRESHOLD) {
+ if (JOURNAL_FILE_COMPRESS(f) && size >= COMPRESSION_SIZE_THRESHOLD) {
size_t rsize = 0;
- compression = compress_blob(data, size, o->data.payload, &rsize);
+ compression = compress_blob(data, size, o->data.payload, size - 1, &rsize);
- if (compression) {
+ if (compression >= 0) {
o->object.size = htole64(offsetof(Object, data.payload) + rsize);
o->object.flags |= compression;
log_debug("Compressed data object %"PRIu64" -> %zu using %s",
size, rsize, object_compressed_to_string(compression));
- }
+ } else
+ /* Compression didn't work, we don't really care why, let's continue without compression */
+ compression = 0;
}
#endif
- if (!compression && size > 0)
- memcpy(o->data.payload, data, size);
+ if (compression == 0)
+ memcpy_safe(o->data.payload, data, size);
r = journal_file_link_data(f, o, p, hash);
if (r < 0)
return r;
+#ifdef HAVE_GCRYPT
+ r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
+ if (r < 0)
+ return r;
+#endif
+
/* The linking might have altered the window, so let's
* refresh our pointer */
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
@@ -1098,12 +1428,6 @@ static int journal_file_append_data(
fo->field.head_data_offset = le64toh(p);
}
-#ifdef HAVE_GCRYPT
- r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
- if (r < 0)
- return r;
-#endif
-
if (ret)
*ret = o;
@@ -1150,6 +1474,7 @@ static int link_entry_into_array(JournalFile *f,
Object *o;
assert(f);
+ assert(f->header);
assert(first);
assert(idx);
assert(p > 0);
@@ -1270,6 +1595,7 @@ static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
int r;
assert(f);
+ assert(f->header);
assert(o);
assert(offset > 0);
@@ -1320,6 +1646,7 @@ static int journal_file_append_entry_internal(
int r;
assert(f);
+ assert(f->header);
assert(items || n_items == 0);
assert(ts);
@@ -1330,7 +1657,7 @@ static int journal_file_append_entry_internal(
return r;
o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
- memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
+ memcpy_safe(o->entry.items, items, n_items * sizeof(EntryItem));
o->entry.realtime = htole64(ts->realtime);
o->entry.monotonic = htole64(ts->monotonic);
o->entry.xor_hash = htole64(xor_hash);
@@ -1366,7 +1693,84 @@ void journal_file_post_change(JournalFile *f) {
__sync_synchronize();
if (ftruncate(f->fd, f->last_stat.st_size) < 0)
- log_error_errno(errno, "Failed to truncate file to its own size: %m");
+ log_debug_errno(errno, "Failed to truncate file to its own size: %m");
+}
+
+static int post_change_thunk(sd_event_source *timer, uint64_t usec, void *userdata) {
+ assert(userdata);
+
+ journal_file_post_change(userdata);
+
+ return 1;
+}
+
+static void schedule_post_change(JournalFile *f) {
+ sd_event_source *timer;
+ int enabled, r;
+ uint64_t now;
+
+ assert(f);
+ assert(f->post_change_timer);
+
+ timer = f->post_change_timer;
+
+ r = sd_event_source_get_enabled(timer, &enabled);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to get ftruncate timer state: %m");
+ goto fail;
+ }
+
+ if (enabled == SD_EVENT_ONESHOT)
+ return;
+
+ r = sd_event_now(sd_event_source_get_event(timer), CLOCK_MONOTONIC, &now);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to get clock's now for scheduling ftruncate: %m");
+ goto fail;
+ }
+
+ r = sd_event_source_set_time(timer, now+f->post_change_timer_period);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to set time for scheduling ftruncate: %m");
+ goto fail;
+ }
+
+ r = sd_event_source_set_enabled(timer, SD_EVENT_ONESHOT);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to enable scheduled ftruncate: %m");
+ goto fail;
+ }
+
+ return;
+
+fail:
+ /* On failure, let's simply post the change immediately. */
+ journal_file_post_change(f);
+}
+
+/* Enable coalesced change posting in a timer on the provided sd_event instance */
+int journal_file_enable_post_change_timer(JournalFile *f, sd_event *e, usec_t t) {
+ _cleanup_(sd_event_source_unrefp) sd_event_source *timer = NULL;
+ int r;
+
+ assert(f);
+ assert_return(!f->post_change_timer, -EINVAL);
+ assert(e);
+ assert(t);
+
+ r = sd_event_add_time(e, &timer, CLOCK_MONOTONIC, 0, 0, post_change_thunk, f);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(timer, SD_EVENT_OFF);
+ if (r < 0)
+ return r;
+
+ f->post_change_timer = timer;
+ timer = NULL;
+ f->post_change_timer_period = t;
+
+ return r;
}
static int entry_item_cmp(const void *_a, const void *_b) {
@@ -1387,6 +1791,7 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st
struct dual_timestamp _ts;
assert(f);
+ assert(f->header);
assert(iovec || n_iovec == 0);
if (!ts) {
@@ -1394,10 +1799,6 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st
ts = &_ts;
}
- if (f->tail_entry_monotonic_valid &&
- ts->monotonic < le64toh(f->header->tail_entry_monotonic))
- return -EINVAL;
-
#ifdef HAVE_GCRYPT
r = journal_file_maybe_append_tag(f, ts->realtime);
if (r < 0)
@@ -1434,7 +1835,10 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st
if (mmap_cache_got_sigbus(f->mmap, f->fd))
r = -EIO;
- journal_file_post_change(f);
+ if (f->post_change_timer)
+ schedule_post_change(f);
+ else
+ journal_file_post_change(f);
return r;
}
@@ -1645,9 +2049,14 @@ static int generic_array_bisect(
i = right - 1;
lp = p = le64toh(array->entry_array.items[i]);
if (p <= 0)
- return -EBADMSG;
-
- r = test_object(f, p, needle);
+ r = -EBADMSG;
+ else
+ r = test_object(f, p, needle);
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Encountered invalid entry while bisecting, cutting algorithm short. (1)");
+ n = i;
+ continue;
+ }
if (r < 0)
return r;
@@ -1723,9 +2132,14 @@ static int generic_array_bisect(
p = le64toh(array->entry_array.items[i]);
if (p <= 0)
- return -EBADMSG;
-
- r = test_object(f, p, needle);
+ r = -EBADMSG;
+ else
+ r = test_object(f, p, needle);
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Encountered invalid entry while bisecting, cutting algorithm short. (2)");
+ right = n = i;
+ continue;
+ }
if (r < 0)
return r;
@@ -1841,7 +2255,7 @@ static int generic_array_bisect_plus_one(
goto found;
if (r > 0 && idx)
- (*idx) ++;
+ (*idx)++;
return r;
@@ -1899,6 +2313,8 @@ int journal_file_move_to_entry_by_seqnum(
direction_t direction,
Object **ret,
uint64_t *offset) {
+ assert(f);
+ assert(f->header);
return generic_array_bisect(f,
le64toh(f->header->entry_array_offset),
@@ -1934,6 +2350,8 @@ int journal_file_move_to_entry_by_realtime(
direction_t direction,
Object **ret,
uint64_t *offset) {
+ assert(f);
+ assert(f->header);
return generic_array_bisect(f,
le64toh(f->header->entry_array_offset),
@@ -2026,7 +2444,9 @@ void journal_file_save_location(JournalFile *f, Object *o, uint64_t offset) {
int journal_file_compare_locations(JournalFile *af, JournalFile *bf) {
assert(af);
+ assert(af->header);
assert(bf);
+ assert(bf->header);
assert(af->location_type == LOCATION_SEEK);
assert(bf->location_type == LOCATION_SEEK);
@@ -2076,6 +2496,37 @@ int journal_file_compare_locations(JournalFile *af, JournalFile *bf) {
return 0;
}
+static int bump_array_index(uint64_t *i, direction_t direction, uint64_t n) {
+
+ /* Increase or decrease the specified index, in the right direction. */
+
+ if (direction == DIRECTION_DOWN) {
+ if (*i >= n - 1)
+ return 0;
+
+ (*i) ++;
+ } else {
+ if (*i <= 0)
+ return 0;
+
+ (*i) --;
+ }
+
+ return 1;
+}
+
+static bool check_properly_ordered(uint64_t new_offset, uint64_t old_offset, direction_t direction) {
+
+ /* Consider it an error if any of the two offsets is uninitialized */
+ if (old_offset == 0 || new_offset == 0)
+ return false;
+
+ /* If we go down, the new offset must be larger than the old one. */
+ return direction == DIRECTION_DOWN ?
+ new_offset > old_offset :
+ new_offset < old_offset;
+}
+
int journal_file_next_entry(
JournalFile *f,
uint64_t p,
@@ -2086,6 +2537,7 @@ int journal_file_next_entry(
int r;
assert(f);
+ assert(f->header);
n = le64toh(f->header->n_entries);
if (n <= 0)
@@ -2105,31 +2557,34 @@ int journal_file_next_entry(
if (r <= 0)
return r;
- if (direction == DIRECTION_DOWN) {
- if (i >= n - 1)
- return 0;
-
- i++;
- } else {
- if (i <= 0)
- return 0;
-
- i--;
- }
+ r = bump_array_index(&i, direction, n);
+ if (r <= 0)
+ return r;
}
/* And jump to it */
- r = generic_array_get(f,
- le64toh(f->header->entry_array_offset),
- i,
- ret, &ofs);
- if (r <= 0)
- return r;
+ for (;;) {
+ r = generic_array_get(f,
+ le64toh(f->header->entry_array_offset),
+ i,
+ ret, &ofs);
+ if (r > 0)
+ break;
+ if (r != -EBADMSG)
+ return r;
+
+ /* OK, so this entry is borked. Most likely some entry didn't get synced to disk properly, let's see if
+ * the next one might work for us instead. */
+ log_debug_errno(r, "Entry item %" PRIu64 " is bad, skipping over it.", i);
- if (p > 0 &&
- (direction == DIRECTION_DOWN ? ofs <= p : ofs >= p)) {
- log_debug("%s: entry array corrupted at entry %"PRIu64,
- f->path, i);
+ r = bump_array_index(&i, direction, n);
+ if (r <= 0)
+ return r;
+ }
+
+ /* Ensure our array is properly ordered. */
+ if (p > 0 && !check_properly_ordered(ofs, p, direction)) {
+ log_debug("%s: entry array not properly ordered at entry %" PRIu64, f->path, i);
return -EBADMSG;
}
@@ -2146,9 +2601,9 @@ int journal_file_next_entry_for_data(
direction_t direction,
Object **ret, uint64_t *offset) {
- uint64_t n, i;
- int r;
+ uint64_t i, n, ofs;
Object *d;
+ int r;
assert(f);
assert(p > 0 || !o);
@@ -2180,25 +2635,39 @@ int journal_file_next_entry_for_data(
if (r <= 0)
return r;
- if (direction == DIRECTION_DOWN) {
- if (i >= n - 1)
- return 0;
+ r = bump_array_index(&i, direction, n);
+ if (r <= 0)
+ return r;
+ }
+
+ for (;;) {
+ r = generic_array_get_plus_one(f,
+ le64toh(d->data.entry_offset),
+ le64toh(d->data.entry_array_offset),
+ i,
+ ret, &ofs);
+ if (r > 0)
+ break;
+ if (r != -EBADMSG)
+ return r;
- i++;
- } else {
- if (i <= 0)
- return 0;
+ log_debug_errno(r, "Data entry item %" PRIu64 " is bad, skipping over it.", i);
- i--;
- }
+ r = bump_array_index(&i, direction, n);
+ if (r <= 0)
+ return r;
+ }
+ /* Ensure our array is properly ordered. */
+ if (p > 0 && check_properly_ordered(ofs, p, direction)) {
+ log_debug("%s data entry array not properly ordered at entry %" PRIu64, f->path, i);
+ return -EBADMSG;
}
- return generic_array_get_plus_one(f,
- le64toh(d->data.entry_offset),
- le64toh(d->data.entry_array_offset),
- i,
- ret, offset);
+ if (offset)
+ *offset = ofs;
+
+ return 1;
}
int journal_file_move_to_entry_by_offset_for_data(
@@ -2368,6 +2837,7 @@ void journal_file_dump(JournalFile *f) {
uint64_t p;
assert(f);
+ assert(f->header);
journal_file_print_header(f);
@@ -2452,6 +2922,7 @@ void journal_file_print_header(JournalFile *f) {
char bytes[FORMAT_BYTES_MAX];
assert(f);
+ assert(f->header);
printf("File Path: %s\n"
"File ID: %s\n"
@@ -2466,11 +2937,11 @@ void journal_file_print_header(JournalFile *f) {
"Data Hash Table Size: %"PRIu64"\n"
"Field Hash Table Size: %"PRIu64"\n"
"Rotate Suggested: %s\n"
- "Head Sequential Number: %"PRIu64"\n"
- "Tail Sequential Number: %"PRIu64"\n"
- "Head Realtime Timestamp: %s\n"
- "Tail Realtime Timestamp: %s\n"
- "Tail Monotonic Timestamp: %s\n"
+ "Head Sequential Number: %"PRIu64" (%"PRIx64")\n"
+ "Tail Sequential Number: %"PRIu64" (%"PRIx64")\n"
+ "Head Realtime Timestamp: %s (%"PRIx64")\n"
+ "Tail Realtime Timestamp: %s (%"PRIx64")\n"
+ "Tail Monotonic Timestamp: %s (%"PRIx64")\n"
"Objects: %"PRIu64"\n"
"Entry Objects: %"PRIu64"\n",
f->path,
@@ -2491,11 +2962,11 @@ void journal_file_print_header(JournalFile *f) {
le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
yes_no(journal_file_rotate_suggested(f, 0)),
- le64toh(f->header->head_entry_seqnum),
- le64toh(f->header->tail_entry_seqnum),
- format_timestamp_safe(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
- format_timestamp_safe(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
- format_timespan(z, sizeof(z), le64toh(f->header->tail_entry_monotonic), USEC_PER_MSEC),
+ le64toh(f->header->head_entry_seqnum), le64toh(f->header->head_entry_seqnum),
+ le64toh(f->header->tail_entry_seqnum), le64toh(f->header->tail_entry_seqnum),
+ format_timestamp_safe(x, sizeof(x), le64toh(f->header->head_entry_realtime)), le64toh(f->header->head_entry_realtime),
+ format_timestamp_safe(y, sizeof(y), le64toh(f->header->tail_entry_realtime)), le64toh(f->header->tail_entry_realtime),
+ format_timespan(z, sizeof(z), le64toh(f->header->tail_entry_monotonic), USEC_PER_MSEC), le64toh(f->header->tail_entry_monotonic),
le64toh(f->header->n_objects),
le64toh(f->header->n_entries));
@@ -2519,7 +2990,7 @@ void journal_file_print_header(JournalFile *f) {
le64toh(f->header->n_entry_arrays));
if (fstat(f->fd, &st) >= 0)
- printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
+ printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (uint64_t) st.st_blocks * 512ULL));
}
static int journal_file_warn_btrfs(JournalFile *f) {
@@ -2558,6 +3029,7 @@ static int journal_file_warn_btrfs(JournalFile *f) {
}
int journal_file_open(
+ int fd,
const char *fname,
int flags,
mode_t mode,
@@ -2565,6 +3037,7 @@ int journal_file_open(
bool seal,
JournalMetrics *metrics,
MMapCache *mmap_cache,
+ Set *deferred_closes,
JournalFile *template,
JournalFile **ret) {
@@ -2573,22 +3046,24 @@ int journal_file_open(
void *h;
int r;
- assert(fname);
assert(ret);
+ assert(fd >= 0 || fname);
if ((flags & O_ACCMODE) != O_RDONLY &&
(flags & O_ACCMODE) != O_RDWR)
return -EINVAL;
- if (!endswith(fname, ".journal") &&
- !endswith(fname, ".journal~"))
- return -EINVAL;
+ if (fname) {
+ if (!endswith(fname, ".journal") &&
+ !endswith(fname, ".journal~"))
+ return -EINVAL;
+ }
f = new0(JournalFile, 1);
if (!f)
return -ENOMEM;
- f->fd = -1;
+ f->fd = fd;
f->mode = mode;
f->flags = flags;
@@ -2613,7 +3088,10 @@ int journal_file_open(
}
}
- f->path = strdup(fname);
+ if (fname)
+ f->path = strdup(fname);
+ else /* If we don't know the path, fill in something explanatory and vaguely useful */
+ asprintf(&f->path, "/proc/self/%i", fd);
if (!f->path) {
r = -ENOMEM;
goto fail;
@@ -2625,10 +3103,15 @@ int journal_file_open(
goto fail;
}
- f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
if (f->fd < 0) {
- r = -errno;
- goto fail;
+ f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
+ if (f->fd < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ /* fds we opened here by us should also be closed by us. */
+ f->close_fd = true;
}
r = journal_file_fstat(f);
@@ -2673,7 +3156,7 @@ int journal_file_open(
}
if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
- r = -EIO;
+ r = -ENODATA;
goto fail;
}
@@ -2684,6 +3167,9 @@ int journal_file_open(
f->header = h;
if (!newly_created) {
+ if (deferred_closes)
+ journal_file_close_set(deferred_closes);
+
r = journal_file_verify_header(f);
if (r < 0)
goto fail;
@@ -2731,19 +3217,24 @@ int journal_file_open(
#endif
}
- r = journal_file_map_field_hash_table(f);
- if (r < 0)
- goto fail;
-
- r = journal_file_map_data_hash_table(f);
- if (r < 0)
- goto fail;
-
if (mmap_cache_got_sigbus(f->mmap, f->fd)) {
r = -EIO;
goto fail;
}
+ if (template && template->post_change_timer) {
+ r = journal_file_enable_post_change_timer(
+ f,
+ sd_event_source_get_event(template->post_change_timer),
+ template->post_change_timer_period);
+
+ if (r < 0)
+ goto fail;
+ }
+
+ /* The file is opened now successfully, thus we take possession of any passed in fd. */
+ f->close_fd = true;
+
*ret = f;
return 0;
@@ -2751,12 +3242,12 @@ fail:
if (f->fd >= 0 && mmap_cache_got_sigbus(f->mmap, f->fd))
r = -EIO;
- journal_file_close(f);
+ (void) journal_file_close(f);
return r;
}
-int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
+int journal_file_rotate(JournalFile **f, bool compress, bool seal, Set *deferred_closes) {
_cleanup_free_ char *p = NULL;
size_t l;
JournalFile *old_file, *new_file = NULL;
@@ -2770,6 +3261,11 @@ int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
if (!old_file->writable)
return -EINVAL;
+ /* Is this a journal file that was passed to us as fd? If so, we synthesized a path name for it, and we refuse
+ * rotation, since we don't know the actual path, and couldn't rename the file hence.*/
+ if (path_startswith(old_file->path, "/proc/self/fd"))
+ return -EINVAL;
+
if (!endswith(old_file->path, ".journal"))
return -EINVAL;
@@ -2789,15 +3285,29 @@ int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
if (r < 0 && errno != ENOENT)
return -errno;
- old_file->header->state = STATE_ARCHIVED;
+ /* Sync the rename to disk */
+ (void) fsync_directory_of_file(old_file->fd);
+
+ /* Set as archive so offlining commits w/state=STATE_ARCHIVED.
+ * Previously we would set old_file->header->state to STATE_ARCHIVED directly here,
+ * but journal_file_set_offline() short-circuits when state != STATE_ONLINE, which
+ * would result in the rotated journal never getting fsync() called before closing.
+ * Now we simply queue the archive state by setting an archive bit, leaving the state
+ * as STATE_ONLINE so proper offlining occurs. */
+ old_file->archive = true;
/* Currently, btrfs is not very good with out write patterns
* and fragments heavily. Let's defrag our journal files when
* we archive them */
old_file->defrag_on_close = true;
- r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
- journal_file_close(old_file);
+ r = journal_file_open(-1, old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, deferred_closes, old_file, &new_file);
+
+ if (deferred_closes &&
+ set_put(deferred_closes, old_file) >= 0)
+ (void) journal_file_set_offline(old_file, false);
+ else
+ (void) journal_file_close(old_file);
*f = new_file;
return r;
@@ -2811,6 +3321,7 @@ int journal_file_open_reliably(
bool seal,
JournalMetrics *metrics,
MMapCache *mmap_cache,
+ Set *deferred_closes,
JournalFile *template,
JournalFile **ret) {
@@ -2818,8 +3329,7 @@ int journal_file_open_reliably(
size_t l;
_cleanup_free_ char *p = NULL;
- r = journal_file_open(fname, flags, mode, compress, seal,
- metrics, mmap_cache, template, ret);
+ r = journal_file_open(-1, fname, flags, mode, compress, seal, metrics, mmap_cache, deferred_closes, template, ret);
if (!IN_SET(r,
-EBADMSG, /* corrupted */
-ENODATA, /* truncated */
@@ -2828,7 +3338,8 @@ int journal_file_open_reliably(
-EBUSY, /* unclean shutdown */
-ESHUTDOWN, /* already archived */
-EIO, /* IO error, including SIGBUS on mmap */
- -EIDRM /* File has been deleted */))
+ -EIDRM, /* File has been deleted */
+ -ETXTBSY)) /* File is from the future */
return r;
if ((flags & O_ACCMODE) == O_RDONLY)
@@ -2849,20 +3360,18 @@ int journal_file_open_reliably(
random_u64()) < 0)
return -ENOMEM;
- r = rename(fname, p);
- if (r < 0)
+ if (rename(fname, p) < 0)
return -errno;
/* btrfs doesn't cope well with our write pattern and
* fragments heavily. Let's defrag all files we rotate */
- (void) chattr_path(p, false, FS_NOCOW_FL);
+ (void) chattr_path(p, 0, FS_NOCOW_FL);
(void) btrfs_defrag(p);
- log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
+ log_warning_errno(r, "File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
- return journal_file_open(fname, flags, mode, compress, seal,
- metrics, mmap_cache, template, ret);
+ return journal_file_open(-1, fname, flags, mode, compress, seal, metrics, mmap_cache, deferred_closes, template, ret);
}
int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
@@ -2949,16 +3458,35 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
return r;
}
+void journal_reset_metrics(JournalMetrics *m) {
+ assert(m);
+
+ /* Set everything to "pick automatic values". */
+
+ *m = (JournalMetrics) {
+ .min_use = (uint64_t) -1,
+ .max_use = (uint64_t) -1,
+ .min_size = (uint64_t) -1,
+ .max_size = (uint64_t) -1,
+ .keep_free = (uint64_t) -1,
+ .n_max_files = (uint64_t) -1,
+ };
+}
+
void journal_default_metrics(JournalMetrics *m, int fd) {
- uint64_t fs_size = 0;
+ char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX], e[FORMAT_BYTES_MAX];
struct statvfs ss;
- char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
+ uint64_t fs_size;
assert(m);
assert(fd >= 0);
if (fstatvfs(fd, &ss) >= 0)
fs_size = ss.f_frsize * ss.f_blocks;
+ else {
+ log_debug_errno(errno, "Failed to detremine disk size: %m");
+ fs_size = 0;
+ }
if (m->max_use == (uint64_t) -1) {
@@ -2975,10 +3503,16 @@ void journal_default_metrics(JournalMetrics *m, int fd) {
} else {
m->max_use = PAGE_ALIGN(m->max_use);
- if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
+ if (m->max_use != 0 && m->max_use < JOURNAL_FILE_SIZE_MIN*2)
m->max_use = JOURNAL_FILE_SIZE_MIN*2;
}
+ if (m->min_use == (uint64_t) -1)
+ m->min_use = DEFAULT_MIN_USE;
+
+ if (m->min_use > m->max_use)
+ m->min_use = m->max_use;
+
if (m->max_size == (uint64_t) -1) {
m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
@@ -2987,11 +3521,13 @@ void journal_default_metrics(JournalMetrics *m, int fd) {
} else
m->max_size = PAGE_ALIGN(m->max_size);
- if (m->max_size < JOURNAL_FILE_SIZE_MIN)
- m->max_size = JOURNAL_FILE_SIZE_MIN;
+ if (m->max_size != 0) {
+ if (m->max_size < JOURNAL_FILE_SIZE_MIN)
+ m->max_size = JOURNAL_FILE_SIZE_MIN;
- if (m->max_size*2 > m->max_use)
- m->max_use = m->max_size*2;
+ if (m->max_use != 0 && m->max_size*2 > m->max_use)
+ m->max_use = m->max_size*2;
+ }
if (m->min_size == (uint64_t) -1)
m->min_size = JOURNAL_FILE_SIZE_MIN;
@@ -3001,7 +3537,7 @@ void journal_default_metrics(JournalMetrics *m, int fd) {
if (m->min_size < JOURNAL_FILE_SIZE_MIN)
m->min_size = JOURNAL_FILE_SIZE_MIN;
- if (m->min_size > m->max_size)
+ if (m->max_size != 0 && m->min_size > m->max_size)
m->max_size = m->min_size;
}
@@ -3017,15 +3553,21 @@ void journal_default_metrics(JournalMetrics *m, int fd) {
m->keep_free = DEFAULT_KEEP_FREE;
}
- log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
- format_bytes(a, sizeof(a), m->max_use),
- format_bytes(b, sizeof(b), m->max_size),
- format_bytes(c, sizeof(c), m->min_size),
- format_bytes(d, sizeof(d), m->keep_free));
+ if (m->n_max_files == (uint64_t) -1)
+ m->n_max_files = DEFAULT_N_MAX_FILES;
+
+ log_debug("Fixed min_use=%s max_use=%s max_size=%s min_size=%s keep_free=%s n_max_files=%" PRIu64,
+ format_bytes(a, sizeof(a), m->min_use),
+ format_bytes(b, sizeof(b), m->max_use),
+ format_bytes(c, sizeof(c), m->max_size),
+ format_bytes(d, sizeof(d), m->min_size),
+ format_bytes(e, sizeof(e), m->keep_free),
+ m->n_max_files);
}
int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
assert(f);
+ assert(f->header);
assert(from || to);
if (from) {
@@ -3089,6 +3631,7 @@ int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, u
bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
assert(f);
+ assert(f->header);
/* If we gained new header fields we gained new features,
* hence suggest a rotation */