diff options
Diffstat (limited to 'src/libsystemd/libsystemd-journal-internal')
13 files changed, 723 insertions, 243 deletions
diff --git a/src/libsystemd/libsystemd-journal-internal/Makefile b/src/libsystemd/libsystemd-journal-internal/Makefile index 7b7f313077..1c8547f798 100644 --- a/src/libsystemd/libsystemd-journal-internal/Makefile +++ b/src/libsystemd/libsystemd-journal-internal/Makefile @@ -60,7 +60,9 @@ libsystemd_journal_internal_la_SOURCES = \ src/journal/mmap-cache.h \ src/journal/compress.c \ src/journal/audit-type.h \ - src/journal/audit-type.c + src/journal/audit-type.c \ + src/shared/gcrypt-util.h \ + src/shared/gcrypt-util.c nodist_libsystemd_journal_internal_la_SOURCES = \ src/journal/audit_type-to-name.h diff --git a/src/libsystemd/libsystemd-journal-internal/catalog.c b/src/libsystemd/libsystemd-journal-internal/catalog.c index 962ca238b9..70838d958c 100644 --- a/src/libsystemd/libsystemd-journal-internal/catalog.c +++ b/src/libsystemd/libsystemd-journal-internal/catalog.c @@ -164,14 +164,14 @@ static int finish_item( Hashmap *h, sd_id128_t id, const char *language, - char *payload) { + char *payload, size_t payload_size) { _cleanup_free_ CatalogItem *i = NULL; - _cleanup_free_ char *combined = NULL, *prev = NULL; - int r; + _cleanup_free_ char *prev = NULL, *combined = NULL; assert(h); assert(payload); + assert(payload_size > 0); i = new0(CatalogItem, 1); if (!i) @@ -184,23 +184,25 @@ static int finish_item( } prev = hashmap_get(h, i); - - /* Already have such an item, combine them */ if (prev) { + /* Already have such an item, combine them */ combined = combine_entries(payload, prev); if (!combined) return log_oom(); - r = hashmap_update(h, i, combined); - if (r < 0) - return r; - combined = NULL; - /* A new item */ + if (hashmap_update(h, i, combined) < 0) + return log_oom(); + combined = NULL; } else { - r = hashmap_put(h, i, payload); - if (r < 0) - return r; + /* A new item */ + combined = memdup(payload, payload_size + 1); + if (!combined) + return log_oom(); + + if (hashmap_put(h, i, combined) < 0) + return log_oom(); i = NULL; + combined = NULL; } return 0; @@ -215,7 +217,7 @@ int catalog_file_lang(const char* filename, char **lang) { beg = end - 1; while (beg > filename && *beg != '.' && *beg != '/' && end - beg < 32) - beg --; + beg--; if (*beg != '.' || end <= beg + 1) return 0; @@ -262,6 +264,7 @@ static int catalog_entry_lang(const char* filename, int line, int catalog_import_file(Hashmap *h, const char *path) { _cleanup_fclose_ FILE *f = NULL; _cleanup_free_ char *payload = NULL; + size_t payload_size = 0, payload_allocated = 0; unsigned n = 0; sd_id128_t id; _cleanup_free_ char *deflang = NULL, *lang = NULL; @@ -283,8 +286,7 @@ int catalog_import_file(Hashmap *h, const char *path) { for (;;) { char line[LINE_MAX]; - size_t a, b, c; - char *t; + size_t line_len; if (!fgets(line, sizeof(line), f)) { if (feof(f)) @@ -323,17 +325,23 @@ int catalog_import_file(Hashmap *h, const char *path) { if (sd_id128_from_string(line + 2 + 1, &jd) >= 0) { if (got_id) { - r = finish_item(h, id, lang ?: deflang, payload); + if (payload_size == 0) { + log_error("[%s:%u] No payload text.", path, n); + return -EINVAL; + } + + r = finish_item(h, id, lang ?: deflang, payload, payload_size); if (r < 0) return r; - payload = NULL; lang = mfree(lang); + payload_size = 0; } if (with_language) { - t = strstrip(line + 2 + 1 + 32 + 1); + char *t; + t = strstrip(line + 2 + 1 + 32 + 1); r = catalog_entry_lang(path, n, t, deflang, &lang); if (r < 0) return r; @@ -343,9 +351,6 @@ int catalog_import_file(Hashmap *h, const char *path) { empty_line = false; id = jd; - if (payload) - payload[0] = '\0'; - continue; } } @@ -356,34 +361,30 @@ int catalog_import_file(Hashmap *h, const char *path) { return -EINVAL; } - a = payload ? strlen(payload) : 0; - b = strlen(line); - - c = a + (empty_line ? 1 : 0) + b + 1 + 1; - t = realloc(payload, c); - if (!t) + line_len = strlen(line); + if (!GREEDY_REALLOC(payload, payload_allocated, + payload_size + (empty_line ? 1 : 0) + line_len + 1 + 1)) return log_oom(); - if (empty_line) { - t[a] = '\n'; - memcpy(t + a + 1, line, b); - t[a+b+1] = '\n'; - t[a+b+2] = 0; - } else { - memcpy(t + a, line, b); - t[a+b] = '\n'; - t[a+b+1] = 0; - } + if (empty_line) + payload[payload_size++] = '\n'; + memcpy(payload + payload_size, line, line_len); + payload_size += line_len; + payload[payload_size++] = '\n'; + payload[payload_size] = '\0'; - payload = t; empty_line = false; } if (got_id) { - r = finish_item(h, id, lang ?: deflang, payload); + if (payload_size == 0) { + log_error("[%s:%u] No payload text.", path, n); + return -EINVAL; + } + + r = finish_item(h, id, lang ?: deflang, payload, payload_size); if (r < 0) return r; - payload = NULL; } return 0; diff --git a/src/libsystemd/libsystemd-journal-internal/compress.c b/src/libsystemd/libsystemd-journal-internal/compress.c index 1933b87b00..ba734b5561 100644 --- a/src/libsystemd/libsystemd-journal-internal/compress.c +++ b/src/libsystemd/libsystemd-journal-internal/compress.c @@ -17,6 +17,7 @@ along with systemd; If not, see <http://www.gnu.org/licenses/>. ***/ +#include <inttypes.h> #include <stdlib.h> #include <string.h> #include <sys/mman.h> @@ -111,7 +112,7 @@ int compress_blob_lz4(const void *src, uint64_t src_size, if (src_size < 9) return -ENOBUFS; - r = LZ4_compress_limitedOutput(src, dst + 8, src_size, (int) dst_alloc_size - 8); + r = LZ4_compress_limitedOutput(src, (char*)dst + 8, src_size, (int) dst_alloc_size - 8); if (r <= 0) return -ENOBUFS; @@ -175,7 +176,7 @@ int decompress_blob_xz(const void *src, uint64_t src_size, return -ENOMEM; s.avail_out = space - used; - s.next_out = *dst + used; + s.next_out = *(uint8_t**)dst + used; } *dst_size = space - s.avail_out; @@ -214,7 +215,7 @@ int decompress_blob_lz4(const void *src, uint64_t src_size, } else out = *dst; - r = LZ4_decompress_safe(src + 8, out, src_size - 8, size); + r = LZ4_decompress_safe((char*)src + 8, out, src_size - 8, size); if (r < 0 || r != size) return -EBADMSG; @@ -290,7 +291,7 @@ int decompress_startswith_xz(const void *src, uint64_t src_size, if (!(greedy_realloc(buffer, buffer_size, *buffer_size * 2, 1))) return -ENOMEM; - s.next_out = *buffer + *buffer_size - s.avail_out; + s.next_out = *(uint8_t**)buffer + *buffer_size - s.avail_out; } #else @@ -323,7 +324,7 @@ int decompress_startswith_lz4(const void *src, uint64_t src_size, if (!(greedy_realloc(buffer, buffer_size, ALIGN_8(prefix_len + 1), 1))) return -ENOMEM; - r = LZ4_decompress_safe_partial(src + 8, *buffer, src_size - 8, + r = LZ4_decompress_safe_partial((char*)src + 8, *buffer, src_size - 8, prefix_len + 1, *buffer_size); if (r >= 0) size = (unsigned) r; @@ -498,7 +499,7 @@ int compress_stream_lz4(int fdf, int fdt, uint64_t max_bytes) { total_out += n; if (max_bytes != (uint64_t) -1 && total_out > (size_t) max_bytes) { - log_debug("Compressed stream longer than %zd bytes", max_bytes); + log_debug("Compressed stream longer than %"PRIu64" bytes", max_bytes); return -EFBIG; } @@ -649,7 +650,7 @@ int decompress_stream_lz4(int in, int out, uint64_t max_bytes) { total_out += produced; if (max_bytes != (uint64_t) -1 && total_out > (size_t) max_bytes) { - log_debug("Decompressed stream longer than %zd bytes", max_bytes); + log_debug("Decompressed stream longer than %"PRIu64" bytes", max_bytes); r = -EFBIG; goto cleanup; } diff --git a/src/libsystemd/libsystemd-journal-internal/fsprg.c b/src/libsystemd/libsystemd-journal-internal/fsprg.c index 8f7e137e74..612b10f3a9 100644 --- a/src/libsystemd/libsystemd-journal-internal/fsprg.c +++ b/src/libsystemd/libsystemd-journal-internal/fsprg.c @@ -30,6 +30,7 @@ #include <string.h> #include "fsprg.h" +#include "gcrypt-util.h" #define ISVALID_SECPAR(secpar) (((secpar) % 16 == 0) && ((secpar) >= 16) && ((secpar) <= 16384)) #define VALIDATE_SECPAR(secpar) assert(ISVALID_SECPAR(secpar)); @@ -57,7 +58,7 @@ static gcry_mpi_t mpi_import(const void *buf, size_t buflen) { gcry_mpi_t h; unsigned len; - gcry_mpi_scan(&h, GCRYMPI_FMT_USG, buf, buflen, NULL); + assert_se(gcry_mpi_scan(&h, GCRYMPI_FMT_USG, buf, buflen, NULL) == 0); len = (gcry_mpi_get_nbits(h) + 7) / 8; assert(len <= buflen); assert(gcry_mpi_cmp_ui(h, 0) >= 0); @@ -206,20 +207,6 @@ static void CRT_compose(gcry_mpi_t *x, const gcry_mpi_t xp, const gcry_mpi_t xq, gcry_mpi_release(u); } -static void initialize_libgcrypt(void) { - const char *p; - if (gcry_control(GCRYCTL_INITIALIZATION_FINISHED_P)) - return; - - p = gcry_check_version("1.4.5"); - assert(p); - - /* Turn off "secmem". Clients which whish to make use of this - * feature should initialize the library manually */ - gcry_control(GCRYCTL_DISABLE_SECMEM); - gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0); -} - /******************************************************************************/ size_t FSPRG_mskinbytes(unsigned _secpar) { @@ -259,7 +246,7 @@ void FSPRG_GenMK(void *msk, void *mpk, const void *seed, size_t seedlen, unsigne VALIDATE_SECPAR(_secpar); secpar = _secpar; - initialize_libgcrypt(); + initialize_libgcrypt(false); if (!seed) { gcry_randomize(iseed, FSPRG_RECOMMENDED_SEEDLEN, GCRY_STRONG_RANDOM); @@ -295,7 +282,7 @@ void FSPRG_GenState0(void *state, const void *mpk, const void *seed, size_t seed gcry_mpi_t n, x; uint16_t secpar; - initialize_libgcrypt(); + initialize_libgcrypt(false); secpar = read_secpar(mpk + 0); n = mpi_import(mpk + 2, secpar / 8); @@ -314,7 +301,7 @@ void FSPRG_Evolve(void *state) { uint16_t secpar; uint64_t epoch; - initialize_libgcrypt(); + initialize_libgcrypt(false); secpar = read_secpar(state + 0); n = mpi_import(state + 2 + 0 * secpar / 8, secpar / 8); @@ -341,7 +328,7 @@ void FSPRG_Seek(void *state, uint64_t epoch, const void *msk, const void *seed, gcry_mpi_t p, q, n, x, xp, xq, kp, kq, xm; uint16_t secpar; - initialize_libgcrypt(); + initialize_libgcrypt(false); secpar = read_secpar(msk + 0); p = mpi_import(msk + 2 + 0 * (secpar / 2) / 8, (secpar / 2) / 8); @@ -380,7 +367,7 @@ void FSPRG_Seek(void *state, uint64_t epoch, const void *msk, const void *seed, void FSPRG_GetKey(const void *state, void *key, size_t keylen, uint32_t idx) { uint16_t secpar; - initialize_libgcrypt(); + initialize_libgcrypt(false); secpar = read_secpar(state + 0); det_randomize(key, keylen, state + 2, 2 * secpar / 8 + 8, idx); diff --git a/src/libsystemd/libsystemd-journal-internal/journal-authenticate.c b/src/libsystemd/libsystemd-journal-internal/journal-authenticate.c index 49f3c8f0f4..d8af113d3f 100644 --- a/src/libsystemd/libsystemd-journal-internal/journal-authenticate.c +++ b/src/libsystemd/libsystemd-journal-internal/journal-authenticate.c @@ -22,6 +22,7 @@ #include "fd-util.h" #include "fsprg.h" +#include "gcrypt-util.h" #include "hexdecoct.h" #include "journal-authenticate.h" #include "journal-def.h" @@ -424,25 +425,13 @@ finish: return r; } -static void initialize_libgcrypt(void) { - const char *p; - - if (gcry_control(GCRYCTL_INITIALIZATION_FINISHED_P)) - return; - - p = gcry_check_version("1.4.5"); - assert(p); - - gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0); -} - int journal_file_hmac_setup(JournalFile *f) { gcry_error_t e; if (!f->seal) return 0; - initialize_libgcrypt(); + initialize_libgcrypt(true); e = gcry_md_open(&f->hmac, GCRY_MD_SHA256, GCRY_MD_FLAG_HMAC); if (e != 0) diff --git a/src/libsystemd/libsystemd-journal-internal/journal-file.c b/src/libsystemd/libsystemd-journal-internal/journal-file.c index 4ea7013a53..13db5c53de 100644 --- a/src/libsystemd/libsystemd-journal-internal/journal-file.c +++ b/src/libsystemd/libsystemd-journal-internal/journal-file.c @@ -20,6 +20,7 @@ #include <errno.h> #include <fcntl.h> #include <linux/fs.h> +#include <pthread.h> #include <stddef.h> #include <sys/mman.h> #include <sys/statvfs.h> @@ -36,8 +37,10 @@ #include "journal-file.h" #include "lookup3.h" #include "parse-util.h" +#include "path-util.h" #include "random-util.h" #include <systemd/sd-event.h> +#include "set.h" #include "string-util.h" #include "xattr-util.h" @@ -86,33 +89,127 @@ /* The mmap context to use for the header we pick as one above the last defined typed */ #define CONTEXT_HEADER _OBJECT_TYPE_MAX -static int journal_file_set_online(JournalFile *f) { +/* This may be called from a separate thread to prevent blocking the caller for the duration of fsync(). + * As a result we use atomic operations on f->offline_state for inter-thread communications with + * journal_file_set_offline() and journal_file_set_online(). */ +static void journal_file_set_offline_internal(JournalFile *f) { assert(f); + assert(f->fd >= 0); + assert(f->header); - if (!f->writable) - return -EPERM; + for (;;) { + switch (f->offline_state) { + case OFFLINE_CANCEL: + if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_CANCEL, OFFLINE_DONE)) + continue; + return; - if (!(f->fd >= 0 && f->header)) - return -EINVAL; + case OFFLINE_AGAIN_FROM_SYNCING: + if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_AGAIN_FROM_SYNCING, OFFLINE_SYNCING)) + continue; + break; + + case OFFLINE_AGAIN_FROM_OFFLINING: + if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_AGAIN_FROM_OFFLINING, OFFLINE_SYNCING)) + continue; + break; + + case OFFLINE_SYNCING: + (void) fsync(f->fd); + + if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_SYNCING, OFFLINE_OFFLINING)) + continue; + + f->header->state = f->archive ? STATE_ARCHIVED : STATE_OFFLINE; + (void) fsync(f->fd); + break; + + case OFFLINE_OFFLINING: + if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_OFFLINING, OFFLINE_DONE)) + continue; + /* fall through */ + + case OFFLINE_DONE: + return; + + case OFFLINE_JOINED: + log_debug("OFFLINE_JOINED unexpected offline state for journal_file_set_offline_internal()"); + return; + } + } +} + +static void * journal_file_set_offline_thread(void *arg) { + JournalFile *f = arg; + + journal_file_set_offline_internal(f); + + return NULL; +} + +static int journal_file_set_offline_thread_join(JournalFile *f) { + int r; + + assert(f); + + if (f->offline_state == OFFLINE_JOINED) + return 0; + + r = pthread_join(f->offline_thread, NULL); + if (r) + return -r; + + f->offline_state = OFFLINE_JOINED; if (mmap_cache_got_sigbus(f->mmap, f->fd)) return -EIO; - switch (f->header->state) { - case STATE_ONLINE: - return 0; + return 0; +} - case STATE_OFFLINE: - f->header->state = STATE_ONLINE; - fsync(f->fd); - return 0; +/* Trigger a restart if the offline thread is mid-flight in a restartable state. */ +static bool journal_file_set_offline_try_restart(JournalFile *f) { + for (;;) { + switch (f->offline_state) { + case OFFLINE_AGAIN_FROM_SYNCING: + case OFFLINE_AGAIN_FROM_OFFLINING: + return true; + + case OFFLINE_CANCEL: + if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_CANCEL, OFFLINE_AGAIN_FROM_SYNCING)) + continue; + return true; + + case OFFLINE_SYNCING: + if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_SYNCING, OFFLINE_AGAIN_FROM_SYNCING)) + continue; + return true; + + case OFFLINE_OFFLINING: + if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_OFFLINING, OFFLINE_AGAIN_FROM_OFFLINING)) + continue; + return true; default: - return -EINVAL; + return false; + } } } -int journal_file_set_offline(JournalFile *f) { +/* Sets a journal offline. + * + * If wait is false then an offline is dispatched in a separate thread for a + * subsequent journal_file_set_offline() or journal_file_set_online() of the + * same journal to synchronize with. + * + * If wait is true, then either an existing offline thread will be restarted + * and joined, or if none exists the offline is simply performed in this + * context without involving another thread. + */ +int journal_file_set_offline(JournalFile *f, bool wait) { + bool restarted; + int r; + assert(f); if (!f->writable) @@ -121,22 +218,114 @@ int journal_file_set_offline(JournalFile *f) { if (!(f->fd >= 0 && f->header)) return -EINVAL; - if (f->header->state != STATE_ONLINE) + /* An offlining journal is implicitly online and may modify f->header->state, + * we must also join any potentially lingering offline thread when not online. */ + if (!journal_file_is_offlining(f) && f->header->state != STATE_ONLINE) + return journal_file_set_offline_thread_join(f); + + /* Restart an in-flight offline thread and wait if needed, or join a lingering done one. */ + restarted = journal_file_set_offline_try_restart(f); + if ((restarted && wait) || !restarted) { + r = journal_file_set_offline_thread_join(f); + if (r < 0) + return r; + } + + if (restarted) return 0; - fsync(f->fd); + /* Initiate a new offline. */ + f->offline_state = OFFLINE_SYNCING; - if (mmap_cache_got_sigbus(f->mmap, f->fd)) - return -EIO; + if (wait) /* Without using a thread if waiting. */ + journal_file_set_offline_internal(f); + else { + r = pthread_create(&f->offline_thread, NULL, journal_file_set_offline_thread, f); + if (r > 0) { + f->offline_state = OFFLINE_JOINED; + return -r; + } + } - f->header->state = STATE_OFFLINE; + return 0; +} + +static int journal_file_set_online(JournalFile *f) { + bool joined = false; + + assert(f); + + if (!f->writable) + return -EPERM; + + if (!(f->fd >= 0 && f->header)) + return -EINVAL; + + while (!joined) { + switch (f->offline_state) { + case OFFLINE_JOINED: + /* No offline thread, no need to wait. */ + joined = true; + break; + + case OFFLINE_SYNCING: + if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_SYNCING, OFFLINE_CANCEL)) + continue; + /* Canceled syncing prior to offlining, no need to wait. */ + break; + + case OFFLINE_AGAIN_FROM_SYNCING: + if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_AGAIN_FROM_SYNCING, OFFLINE_CANCEL)) + continue; + /* Canceled restart from syncing, no need to wait. */ + break; + + case OFFLINE_AGAIN_FROM_OFFLINING: + if (!__sync_bool_compare_and_swap(&f->offline_state, OFFLINE_AGAIN_FROM_OFFLINING, OFFLINE_CANCEL)) + continue; + /* Canceled restart from offlining, must wait for offlining to complete however. */ + + /* fall through to wait */ + default: { + int r; + + r = journal_file_set_offline_thread_join(f); + if (r < 0) + return r; + + joined = true; + break; + } + } + } if (mmap_cache_got_sigbus(f->mmap, f->fd)) return -EIO; - fsync(f->fd); + switch (f->header->state) { + case STATE_ONLINE: + return 0; - return 0; + case STATE_OFFLINE: + f->header->state = STATE_ONLINE; + (void) fsync(f->fd); + return 0; + + default: + return -EINVAL; + } +} + +bool journal_file_is_offlining(JournalFile *f) { + assert(f); + + __sync_synchronize(); + + if (f->offline_state == OFFLINE_DONE || + f->offline_state == OFFLINE_JOINED) + return false; + + return true; } JournalFile* journal_file_close(JournalFile *f) { @@ -159,7 +348,7 @@ JournalFile* journal_file_close(JournalFile *f) { sd_event_source_unref(f->post_change_timer); } - journal_file_set_offline(f); + journal_file_set_offline(f, true); if (f->mmap && f->fd >= 0) mmap_cache_close_fd(f->mmap, f->fd); @@ -176,7 +365,8 @@ JournalFile* journal_file_close(JournalFile *f) { (void) btrfs_defrag_fd(f->fd); } - safe_close(f->fd); + if (f->close_fd) + safe_close(f->fd); free(f->path); mmap_cache_unref(f->mmap); @@ -203,6 +393,15 @@ JournalFile* journal_file_close(JournalFile *f) { return NULL; } +void journal_file_close_set(Set *s) { + JournalFile *f; + + assert(s); + + while ((f = set_steal_first(s))) + (void) journal_file_close(f); +} + static int journal_file_init_header(JournalFile *f, JournalFile *template) { Header h = {}; ssize_t k; @@ -240,6 +439,39 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) { return 0; } +static int fsync_directory_of_file(int fd) { + _cleanup_free_ char *path = NULL, *dn = NULL; + _cleanup_close_ int dfd = -1; + struct stat st; + int r; + + if (fstat(fd, &st) < 0) + return -errno; + + if (!S_ISREG(st.st_mode)) + return -EBADFD; + + r = fd_get_path(fd, &path); + if (r < 0) + return r; + + if (!path_is_absolute(path)) + return -EINVAL; + + dn = dirname_malloc(path); + if (!dn) + return -ENOMEM; + + dfd = open(dn, O_RDONLY|O_CLOEXEC|O_DIRECTORY); + if (dfd < 0) + return -errno; + + if (fsync(dfd) < 0) + return -errno; + + return 0; +} + static int journal_file_refresh_header(JournalFile *f) { sd_id128_t boot_id; int r; @@ -263,7 +495,10 @@ static int journal_file_refresh_header(JournalFile *f) { r = journal_file_set_online(f); /* Sync the online state to disk */ - fsync(f->fd); + (void) fsync(f->fd); + + /* We likely just created a new file, also sync the directory this file is located in. */ + (void) fsync_directory_of_file(f->fd); return r; } @@ -508,7 +743,11 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset /* Objects may only be located at multiple of 64 bit */ if (!VALID64(offset)) - return -EFAULT; + return -EBADMSG; + + /* Object may not be located in the file header */ + if (offset < le64toh(f->header->header_size)) + return -EBADMSG; r = journal_file_move_to(f, type, false, offset, sizeof(ObjectHeader), &t); if (r < 0) @@ -1123,8 +1362,8 @@ static int journal_file_append_data( } #endif - if (compression == 0 && size > 0) - memcpy(o->data.payload, data, size); + if (compression == 0) + memcpy_safe(o->data.payload, data, size); r = journal_file_link_data(f, o, p, hash); if (r < 0) @@ -1389,7 +1628,7 @@ static int journal_file_append_entry_internal( return r; o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum)); - memcpy(o->entry.items, items, n_items * sizeof(EntryItem)); + memcpy_safe(o->entry.items, items, n_items * sizeof(EntryItem)); o->entry.realtime = htole64(ts->realtime); o->entry.monotonic = htole64(ts->monotonic); o->entry.xor_hash = htole64(xor_hash); @@ -1781,9 +2020,14 @@ static int generic_array_bisect( i = right - 1; lp = p = le64toh(array->entry_array.items[i]); if (p <= 0) - return -EBADMSG; - - r = test_object(f, p, needle); + r = -EBADMSG; + else + r = test_object(f, p, needle); + if (r == -EBADMSG) { + log_debug_errno(r, "Encountered invalid entry while bisecting, cutting algorithm short. (1)"); + n = i; + continue; + } if (r < 0) return r; @@ -1859,9 +2103,14 @@ static int generic_array_bisect( p = le64toh(array->entry_array.items[i]); if (p <= 0) - return -EBADMSG; - - r = test_object(f, p, needle); + r = -EBADMSG; + else + r = test_object(f, p, needle); + if (r == -EBADMSG) { + log_debug_errno(r, "Encountered invalid entry while bisecting, cutting algorithm short. (2)"); + right = n = i; + continue; + } if (r < 0) return r; @@ -1977,7 +2226,7 @@ static int generic_array_bisect_plus_one( goto found; if (r > 0 && idx) - (*idx) ++; + (*idx)++; return r; @@ -2266,13 +2515,18 @@ int journal_file_next_entry( le64toh(f->header->entry_array_offset), i, ret, &ofs); + if (r == -EBADMSG && direction == DIRECTION_DOWN) { + /* Special case: when we iterate throught the journal file linearly, and hit an entry we can't read, + * consider this the end of the journal file. */ + log_debug_errno(r, "Encountered entry we can't read while iterating through journal file. Considering this the end of the file."); + return 0; + } if (r <= 0) return r; if (p > 0 && (direction == DIRECTION_DOWN ? ofs <= p : ofs >= p)) { - log_debug("%s: entry array corrupted at entry %"PRIu64, - f->path, i); + log_debug("%s: entry array corrupted at entry %" PRIu64, f->path, i); return -EBADMSG; } @@ -2611,11 +2865,11 @@ void journal_file_print_header(JournalFile *f) { "Data Hash Table Size: %"PRIu64"\n" "Field Hash Table Size: %"PRIu64"\n" "Rotate Suggested: %s\n" - "Head Sequential Number: %"PRIu64"\n" - "Tail Sequential Number: %"PRIu64"\n" - "Head Realtime Timestamp: %s\n" - "Tail Realtime Timestamp: %s\n" - "Tail Monotonic Timestamp: %s\n" + "Head Sequential Number: %"PRIu64" (%"PRIx64")\n" + "Tail Sequential Number: %"PRIu64" (%"PRIx64")\n" + "Head Realtime Timestamp: %s (%"PRIx64")\n" + "Tail Realtime Timestamp: %s (%"PRIx64")\n" + "Tail Monotonic Timestamp: %s (%"PRIx64")\n" "Objects: %"PRIu64"\n" "Entry Objects: %"PRIu64"\n", f->path, @@ -2636,11 +2890,11 @@ void journal_file_print_header(JournalFile *f) { le64toh(f->header->data_hash_table_size) / sizeof(HashItem), le64toh(f->header->field_hash_table_size) / sizeof(HashItem), yes_no(journal_file_rotate_suggested(f, 0)), - le64toh(f->header->head_entry_seqnum), - le64toh(f->header->tail_entry_seqnum), - format_timestamp_safe(x, sizeof(x), le64toh(f->header->head_entry_realtime)), - format_timestamp_safe(y, sizeof(y), le64toh(f->header->tail_entry_realtime)), - format_timespan(z, sizeof(z), le64toh(f->header->tail_entry_monotonic), USEC_PER_MSEC), + le64toh(f->header->head_entry_seqnum), le64toh(f->header->head_entry_seqnum), + le64toh(f->header->tail_entry_seqnum), le64toh(f->header->tail_entry_seqnum), + format_timestamp_safe(x, sizeof(x), le64toh(f->header->head_entry_realtime)), le64toh(f->header->head_entry_realtime), + format_timestamp_safe(y, sizeof(y), le64toh(f->header->tail_entry_realtime)), le64toh(f->header->tail_entry_realtime), + format_timespan(z, sizeof(z), le64toh(f->header->tail_entry_monotonic), USEC_PER_MSEC), le64toh(f->header->tail_entry_monotonic), le64toh(f->header->n_objects), le64toh(f->header->n_entries)); @@ -2703,6 +2957,7 @@ static int journal_file_warn_btrfs(JournalFile *f) { } int journal_file_open( + int fd, const char *fname, int flags, mode_t mode, @@ -2710,6 +2965,7 @@ int journal_file_open( bool seal, JournalMetrics *metrics, MMapCache *mmap_cache, + Set *deferred_closes, JournalFile *template, JournalFile **ret) { @@ -2718,22 +2974,24 @@ int journal_file_open( void *h; int r; - assert(fname); assert(ret); + assert(fd >= 0 || fname); if ((flags & O_ACCMODE) != O_RDONLY && (flags & O_ACCMODE) != O_RDWR) return -EINVAL; - if (!endswith(fname, ".journal") && - !endswith(fname, ".journal~")) - return -EINVAL; + if (fname) { + if (!endswith(fname, ".journal") && + !endswith(fname, ".journal~")) + return -EINVAL; + } f = new0(JournalFile, 1); if (!f) return -ENOMEM; - f->fd = -1; + f->fd = fd; f->mode = mode; f->flags = flags; @@ -2758,7 +3016,10 @@ int journal_file_open( } } - f->path = strdup(fname); + if (fname) + f->path = strdup(fname); + else /* If we don't know the path, fill in something explanatory and vaguely useful */ + asprintf(&f->path, "/proc/self/%i", fd); if (!f->path) { r = -ENOMEM; goto fail; @@ -2770,10 +3031,15 @@ int journal_file_open( goto fail; } - f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode); if (f->fd < 0) { - r = -errno; - goto fail; + f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode); + if (f->fd < 0) { + r = -errno; + goto fail; + } + + /* fds we opened here by us should also be closed by us. */ + f->close_fd = true; } r = journal_file_fstat(f); @@ -2829,6 +3095,9 @@ int journal_file_open( f->header = h; if (!newly_created) { + if (deferred_closes) + journal_file_close_set(deferred_closes); + r = journal_file_verify_header(f); if (r < 0) goto fail; @@ -2891,6 +3160,9 @@ int journal_file_open( goto fail; } + /* The file is opened now successfully, thus we take possession of any passed in fd. */ + f->close_fd = true; + *ret = f; return 0; @@ -2898,12 +3170,12 @@ fail: if (f->fd >= 0 && mmap_cache_got_sigbus(f->mmap, f->fd)) r = -EIO; - journal_file_close(f); + (void) journal_file_close(f); return r; } -int journal_file_rotate(JournalFile **f, bool compress, bool seal) { +int journal_file_rotate(JournalFile **f, bool compress, bool seal, Set *deferred_closes) { _cleanup_free_ char *p = NULL; size_t l; JournalFile *old_file, *new_file = NULL; @@ -2917,6 +3189,11 @@ int journal_file_rotate(JournalFile **f, bool compress, bool seal) { if (!old_file->writable) return -EINVAL; + /* Is this a journal file that was passed to us as fd? If so, we synthesized a path name for it, and we refuse + * rotation, since we don't know the actual path, and couldn't rename the file hence.*/ + if (path_startswith(old_file->path, "/proc/self/fd")) + return -EINVAL; + if (!endswith(old_file->path, ".journal")) return -EINVAL; @@ -2936,15 +3213,29 @@ int journal_file_rotate(JournalFile **f, bool compress, bool seal) { if (r < 0 && errno != ENOENT) return -errno; - old_file->header->state = STATE_ARCHIVED; + /* Sync the rename to disk */ + (void) fsync_directory_of_file(old_file->fd); + + /* Set as archive so offlining commits w/state=STATE_ARCHIVED. + * Previously we would set old_file->header->state to STATE_ARCHIVED directly here, + * but journal_file_set_offline() short-circuits when state != STATE_ONLINE, which + * would result in the rotated journal never getting fsync() called before closing. + * Now we simply queue the archive state by setting an archive bit, leaving the state + * as STATE_ONLINE so proper offlining occurs. */ + old_file->archive = true; /* Currently, btrfs is not very good with out write patterns * and fragments heavily. Let's defrag our journal files when * we archive them */ old_file->defrag_on_close = true; - r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file); - journal_file_close(old_file); + r = journal_file_open(-1, old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, deferred_closes, old_file, &new_file); + + if (deferred_closes && + set_put(deferred_closes, old_file) >= 0) + (void) journal_file_set_offline(old_file, false); + else + (void) journal_file_close(old_file); *f = new_file; return r; @@ -2958,6 +3249,7 @@ int journal_file_open_reliably( bool seal, JournalMetrics *metrics, MMapCache *mmap_cache, + Set *deferred_closes, JournalFile *template, JournalFile **ret) { @@ -2965,7 +3257,7 @@ int journal_file_open_reliably( size_t l; _cleanup_free_ char *p = NULL; - r = journal_file_open(fname, flags, mode, compress, seal, metrics, mmap_cache, template, ret); + r = journal_file_open(-1, fname, flags, mode, compress, seal, metrics, mmap_cache, deferred_closes, template, ret); if (!IN_SET(r, -EBADMSG, /* corrupted */ -ENODATA, /* truncated */ @@ -3001,12 +3293,12 @@ int journal_file_open_reliably( /* btrfs doesn't cope well with our write pattern and * fragments heavily. Let's defrag all files we rotate */ - (void) chattr_path(p, false, FS_NOCOW_FL); + (void) chattr_path(p, 0, FS_NOCOW_FL); (void) btrfs_defrag(p); log_warning_errno(r, "File %s corrupted or uncleanly shut down, renaming and replacing.", fname); - return journal_file_open(fname, flags, mode, compress, seal, metrics, mmap_cache, template, ret); + return journal_file_open(-1, fname, flags, mode, compress, seal, metrics, mmap_cache, deferred_closes, template, ret); } int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) { diff --git a/src/libsystemd/libsystemd-journal-internal/journal-file.h b/src/libsystemd/libsystemd-journal-internal/journal-file.h index 0414117d98..e48e98f424 100644 --- a/src/libsystemd/libsystemd-journal-internal/journal-file.h +++ b/src/libsystemd/libsystemd-journal-internal/journal-file.h @@ -63,6 +63,16 @@ typedef enum LocationType { LOCATION_SEEK } LocationType; +typedef enum OfflineState { + OFFLINE_JOINED, + OFFLINE_SYNCING, + OFFLINE_OFFLINING, + OFFLINE_CANCEL, + OFFLINE_AGAIN_FROM_SYNCING, + OFFLINE_AGAIN_FROM_OFFLINING, + OFFLINE_DONE +} OfflineState; + typedef struct JournalFile { int fd; @@ -75,6 +85,8 @@ typedef struct JournalFile { bool compress_lz4:1; bool seal:1; bool defrag_on_close:1; + bool close_fd:1; + bool archive:1; bool tail_entry_monotonic_valid:1; @@ -105,6 +117,9 @@ typedef struct JournalFile { OrderedHashmap *chain_cache; + pthread_t offline_thread; + volatile OfflineState offline_state; + #if defined(HAVE_XZ) || defined(HAVE_LZ4) void *compress_buffer; size_t compress_buffer_size; @@ -129,6 +144,7 @@ typedef struct JournalFile { } JournalFile; int journal_file_open( + int fd, const char *fname, int flags, mode_t mode, @@ -136,11 +152,14 @@ int journal_file_open( bool seal, JournalMetrics *metrics, MMapCache *mmap_cache, + Set *deferred_closes, JournalFile *template, JournalFile **ret); -int journal_file_set_offline(JournalFile *f); +int journal_file_set_offline(JournalFile *f, bool wait); +bool journal_file_is_offlining(JournalFile *f); JournalFile* journal_file_close(JournalFile *j); +void journal_file_close_set(Set *s); int journal_file_open_reliably( const char *fname, @@ -150,6 +169,7 @@ int journal_file_open_reliably( bool seal, JournalMetrics *metrics, MMapCache *mmap_cache, + Set *deferred_closes, JournalFile *template, JournalFile **ret); @@ -223,7 +243,7 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6 void journal_file_dump(JournalFile *f); void journal_file_print_header(JournalFile *f); -int journal_file_rotate(JournalFile **f, bool compress, bool seal); +int journal_file_rotate(JournalFile **f, bool compress, bool seal, Set *deferred_closes); void journal_file_post_change(JournalFile *f); int journal_file_enable_post_change_timer(JournalFile *f, sd_event *e, usec_t t); diff --git a/src/libsystemd/libsystemd-journal-internal/journal-internal.h b/src/libsystemd/libsystemd-journal-internal/journal-internal.h index 8f21544ae1..d8eb11ad06 100644 --- a/src/libsystemd/libsystemd-journal-internal/journal-internal.h +++ b/src/libsystemd/libsystemd-journal-internal/journal-internal.h @@ -82,6 +82,8 @@ struct Directory { }; struct sd_journal { + int toplevel_fd; + char *path; char *prefix; @@ -117,6 +119,7 @@ struct sd_journal { bool on_network:1; bool no_new_files:1; + bool no_inotify:1; bool unique_file_lost:1; /* File we were iterating over got removed, and there were no more files, so sd_j_enumerate_unique diff --git a/src/libsystemd/libsystemd-journal-internal/journal-send.c b/src/libsystemd/libsystemd-journal-internal/journal-send.c index b85d9d3c61..5a49211f85 100644 --- a/src/libsystemd/libsystemd-journal-internal/journal-send.c +++ b/src/libsystemd/libsystemd-journal-internal/journal-send.c @@ -50,7 +50,7 @@ *_f = alloca(_fl + 10); \ memcpy(*_f, "CODE_FUNC=", 10); \ memcpy(*_f + 10, _func, _fl); \ - } while(false) + } while (false) /* We open a single fd, and we'll share it with the current process, * all its threads, and all its subprocesses. This means we need to @@ -208,13 +208,13 @@ _public_ int sd_journal_sendv(const struct iovec *iov, int n) { struct iovec *w; uint64_t *l; int i, j = 0; - struct sockaddr_un sa = { - .sun_family = AF_UNIX, - .sun_path = "/run/systemd/journal/socket", + static const union sockaddr_union sa = { + .un.sun_family = AF_UNIX, + .un.sun_path = "/run/systemd/journal/socket", }; struct msghdr mh = { - .msg_name = &sa, - .msg_namelen = offsetof(struct sockaddr_un, sun_path) + strlen(sa.sun_path), + .msg_name = (struct sockaddr*) &sa.sa, + .msg_namelen = SOCKADDR_UN_LEN(sa.un), }; ssize_t k; bool have_syslog_identifier = false; @@ -316,7 +316,7 @@ _public_ int sd_journal_sendv(const struct iovec *iov, int n) { buffer_fd = memfd_new(NULL); if (buffer_fd < 0) { if (buffer_fd == -ENOSYS) { - buffer_fd = open_tmpfile("/dev/shm", O_RDWR | O_CLOEXEC); + buffer_fd = open_tmpfile_unlinkable("/dev/shm", O_RDWR | O_CLOEXEC); if (buffer_fd < 0) return buffer_fd; @@ -392,7 +392,7 @@ _public_ int sd_journal_perror(const char *message) { } _public_ int sd_journal_stream_fd(const char *identifier, int priority, int level_prefix) { - union sockaddr_union sa = { + static const union sockaddr_union sa = { .un.sun_family = AF_UNIX, .un.sun_path = "/run/systemd/journal/stdout", }; @@ -408,7 +408,7 @@ _public_ int sd_journal_stream_fd(const char *identifier, int priority, int leve if (fd < 0) return -errno; - r = connect(fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path)); + r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)); if (r < 0) return -errno; diff --git a/src/libsystemd/libsystemd-journal-internal/journal-vacuum.c b/src/libsystemd/libsystemd-journal-internal/journal-vacuum.c index 3650ab3865..cd2676ab63 100644 --- a/src/libsystemd/libsystemd-journal-internal/journal-vacuum.c +++ b/src/libsystemd/libsystemd-journal-internal/journal-vacuum.c @@ -239,13 +239,13 @@ int journal_directory_vacuum( /* Vacuum corrupted files */ if (q < 1 + 16 + 1 + 16 + 8 + 1) { - n_active_files ++; + n_active_files++; continue; } if (de->d_name[q-1-8-16-1] != '-' || de->d_name[q-1-8-16-1-16-1] != '@') { - n_active_files ++; + n_active_files++; continue; } @@ -256,7 +256,7 @@ int journal_directory_vacuum( } if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) { - n_active_files ++; + n_active_files++; continue; } @@ -302,7 +302,7 @@ int journal_directory_vacuum( list[n_list].realtime = realtime; list[n_list].seqnum_id = seqnum_id; list[n_list].have_seqnum = have_seqnum; - n_list ++; + n_list++; p = NULL; sum += size; diff --git a/src/libsystemd/libsystemd-journal-internal/journal-verify.c b/src/libsystemd/libsystemd-journal-internal/journal-verify.c index b968e89bb8..26572ddd76 100644 --- a/src/libsystemd/libsystemd-journal-internal/journal-verify.c +++ b/src/libsystemd/libsystemd-journal-internal/journal-verify.c @@ -97,20 +97,20 @@ static void flush_progress(void) { fflush(stdout); } -#define debug(_offset, _fmt, ...) do{ \ +#define debug(_offset, _fmt, ...) do { \ flush_progress(); \ log_debug(OFSfmt": " _fmt, _offset, ##__VA_ARGS__); \ - } while(0) + } while (0) -#define warning(_offset, _fmt, ...) do{ \ +#define warning(_offset, _fmt, ...) do { \ flush_progress(); \ log_warning(OFSfmt": " _fmt, _offset, ##__VA_ARGS__); \ - } while(0) + } while (0) -#define error(_offset, _fmt, ...) do{ \ +#define error(_offset, _fmt, ...) do { \ flush_progress(); \ log_error(OFSfmt": " _fmt, (uint64_t)_offset, ##__VA_ARGS__); \ - } while(0) + } while (0) static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o) { uint64_t i; @@ -838,19 +838,19 @@ int journal_file_verify( } else if (f->seal) return -ENOKEY; - data_fd = open_tmpfile("/var/tmp", O_RDWR | O_CLOEXEC); + data_fd = open_tmpfile_unlinkable("/var/tmp", O_RDWR | O_CLOEXEC); if (data_fd < 0) { r = log_error_errno(data_fd, "Failed to create data file: %m"); goto fail; } - entry_fd = open_tmpfile("/var/tmp", O_RDWR | O_CLOEXEC); + entry_fd = open_tmpfile_unlinkable("/var/tmp", O_RDWR | O_CLOEXEC); if (entry_fd < 0) { r = log_error_errno(entry_fd, "Failed to create entry file: %m"); goto fail; } - entry_array_fd = open_tmpfile("/var/tmp", O_RDWR | O_CLOEXEC); + entry_array_fd = open_tmpfile_unlinkable("/var/tmp", O_RDWR | O_CLOEXEC); if (entry_array_fd < 0) { r = log_error_errno(entry_array_fd, "Failed to create entry array file: %m"); @@ -894,7 +894,7 @@ int journal_file_verify( goto fail; } - n_objects ++; + n_objects++; r = journal_file_object_verify(f, p, o); if (r < 0) { @@ -991,7 +991,7 @@ int journal_file_verify( entry_realtime = le64toh(o->entry.realtime); entry_realtime_set = true; - n_entries ++; + n_entries++; break; case OBJECT_DATA_HASH_TABLE: @@ -1131,11 +1131,11 @@ int journal_file_verify( last_epoch = le64toh(o->tag.epoch); - n_tags ++; + n_tags++; break; default: - n_weird ++; + n_weird++; } if (p == le64toh(f->header->tail_object_offset)) { diff --git a/src/libsystemd/libsystemd-journal-internal/mmap-cache.c b/src/libsystemd/libsystemd-journal-internal/mmap-cache.c index 9c0ce8ccbf..6bcd9b6ac8 100644 --- a/src/libsystemd/libsystemd-journal-internal/mmap-cache.c +++ b/src/libsystemd/libsystemd-journal-internal/mmap-cache.c @@ -107,7 +107,7 @@ MMapCache* mmap_cache_ref(MMapCache *m) { assert(m); assert(m->n_ref > 0); - m->n_ref ++; + m->n_ref++; return m; } @@ -361,7 +361,7 @@ MMapCache* mmap_cache_unref(MMapCache *m) { assert(m->n_ref > 0); - m->n_ref --; + m->n_ref--; if (m->n_ref == 0) mmap_cache_free(m); @@ -598,14 +598,14 @@ int mmap_cache_get( /* Check whether the current context is the right one already */ r = try_context(m, fd, prot, context, keep_always, offset, size, ret); if (r != 0) { - m->n_hit ++; + m->n_hit++; return r; } /* Search for a matching mmap */ r = find_mmap(m, fd, prot, context, keep_always, offset, size, ret); if (r != 0) { - m->n_hit ++; + m->n_hit++; return r; } diff --git a/src/libsystemd/libsystemd-journal-internal/sd-journal.c b/src/libsystemd/libsystemd-journal-internal/sd-journal.c index 1493f0348d..930486d65f 100644 --- a/src/libsystemd/libsystemd-journal-internal/sd-journal.c +++ b/src/libsystemd/libsystemd-journal-internal/sd-journal.c @@ -19,6 +19,7 @@ #include <errno.h> #include <fcntl.h> +#include <inttypes.h> #include <linux/magic.h> #include <poll.h> #include <stddef.h> @@ -1063,7 +1064,7 @@ _public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) { if (r < 0) return r; - for(;;) { + for (;;) { _cleanup_free_ char *item = NULL; unsigned long long ll; sd_id128_t id; @@ -1232,14 +1233,37 @@ static bool file_type_wanted(int flags, const char *filename) { return false; } -static int add_any_file(sd_journal *j, const char *path) { +static bool path_has_prefix(sd_journal *j, const char *path, const char *prefix) { + assert(j); + assert(path); + assert(prefix); + + if (j->toplevel_fd >= 0) + return false; + + return path_startswith(path, prefix); +} + +static const char *skip_slash(const char *p) { + + if (!p) + return NULL; + + while (*p == '/') + p++; + + return p; +} + +static int add_any_file(sd_journal *j, int fd, const char *path) { JournalFile *f = NULL; + bool close_fd = false; int r, k; assert(j); - assert(path); + assert(fd >= 0 || path); - if (ordered_hashmap_get(j->files, path)) + if (path && ordered_hashmap_get(j->files, path)) return 0; if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) { @@ -1248,8 +1272,24 @@ static int add_any_file(sd_journal *j, const char *path) { goto fail; } - r = journal_file_open(path, O_RDONLY, 0, false, false, NULL, j->mmap, NULL, &f); + if (fd < 0 && j->toplevel_fd >= 0) { + + /* If there's a top-level fd defined, open the file relative to this now. (Make the path relative, + * explicitly, since otherwise openat() ignores the first argument.) */ + + fd = openat(j->toplevel_fd, skip_slash(path), O_RDONLY|O_CLOEXEC); + if (fd < 0) { + r = log_debug_errno(errno, "Failed to open journal file %s: %m", path); + goto fail; + } + + close_fd = true; + } + + r = journal_file_open(fd, path, O_RDONLY, 0, false, false, NULL, j->mmap, NULL, NULL, &f); if (r < 0) { + if (close_fd) + safe_close(fd); log_debug_errno(r, "Failed to open journal file %s: %m", path); goto fail; } @@ -1258,15 +1298,21 @@ static int add_any_file(sd_journal *j, const char *path) { r = ordered_hashmap_put(j->files, f->path, f); if (r < 0) { - journal_file_close(f); + f->close_fd = close_fd; + (void) journal_file_close(f); goto fail; } + if (!j->has_runtime_files && path_has_prefix(j, f->path, "/run")) + j->has_runtime_files = true; + else if (!j->has_persistent_files && path_has_prefix(j, f->path, "/var")) + j->has_persistent_files = true; + log_debug("File %s added.", f->path); check_network(j, f->fd); - j->current_invalidate_counter ++; + j->current_invalidate_counter++; return 0; @@ -1285,18 +1331,14 @@ static int add_file(sd_journal *j, const char *prefix, const char *filename) { assert(prefix); assert(filename); - if (j->no_new_files || - !file_type_wanted(j->flags, filename)) + if (j->no_new_files) return 0; - path = strjoina(prefix, "/", filename); - - if (!j->has_runtime_files && path_startswith(path, "/run/log/journal")) - j->has_runtime_files = true; - else if (!j->has_persistent_files && path_startswith(path, "/var/log/journal")) - j->has_persistent_files = true; + if (!file_type_wanted(j->flags, filename)) + return 0; - return add_any_file(j, path); + path = strjoina(prefix, "/", filename); + return add_any_file(j, -1, path); } static void remove_file(sd_journal *j, const char *prefix, const char *filename) { @@ -1343,9 +1385,9 @@ static void remove_file_real(sd_journal *j, JournalFile *f) { j->fields_file_lost = true; } - journal_file_close(f); + (void) journal_file_close(f); - j->current_invalidate_counter ++; + j->current_invalidate_counter++; } static int dirname_is_machine_id(const char *fn) { @@ -1372,21 +1414,33 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname) assert(j); assert(prefix); - assert(dirname); - log_debug("Considering %s/%s.", prefix, dirname); - - if ((j->flags & SD_JOURNAL_LOCAL_ONLY) && - !(dirname_is_machine_id(dirname) > 0 || path_startswith(prefix, "/run"))) - return 0; + /* Adds a journal file directory to watch. If the directory is already tracked this updates the inotify watch + * and reenumerates directory contents */ - path = strjoin(prefix, "/", dirname, NULL); + if (dirname) + path = strjoin(prefix, "/", dirname, NULL); + else + path = strdup(prefix); if (!path) { r = -ENOMEM; goto fail; } - d = opendir(path); + log_debug("Considering directory %s.", path); + + /* We consider everything local that is in a directory for the local machine ID, or that is stored in /run */ + if ((j->flags & SD_JOURNAL_LOCAL_ONLY) && + !((dirname && dirname_is_machine_id(dirname) > 0) || path_has_prefix(j, path, "/run"))) + return 0; + + + if (j->toplevel_fd < 0) + d = opendir(path); + else + /* Open the specified directory relative to the the toplevel fd. Enforce that the path specified is + * relative, by dropping the initial slash */ + d = xopendirat(j->toplevel_fd, skip_slash(path), 0); if (!d) { r = log_debug_errno(errno, "Failed to open directory %s: %m", path); goto fail; @@ -1410,7 +1464,7 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname) } path = NULL; /* avoid freeing in cleanup */ - j->current_invalidate_counter ++; + j->current_invalidate_counter++; log_debug("Directory %s added.", m->path); @@ -1418,17 +1472,18 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname) return 0; if (m->wd <= 0 && j->inotify_fd >= 0) { + /* Watch this directory, if it not being watched yet. */ - m->wd = inotify_add_watch(j->inotify_fd, m->path, - IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE| - IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM| - IN_ONLYDIR); + m->wd = inotify_add_watch_fd(j->inotify_fd, dirfd(d), + IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE| + IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM| + IN_ONLYDIR); if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0) inotify_rm_watch(j->inotify_fd, m->wd); } - FOREACH_DIRENT_ALL(de, d, return log_debug_errno(errno, "Failed to read directory %s: %m", m->path)) { + FOREACH_DIRENT_ALL(de, d, r = log_debug_errno(errno, "Failed to read directory %s: %m", m->path); goto fail) { if (dirent_is_file_with_suffix(de, ".journal") || dirent_is_file_with_suffix(de, ".journal~")) @@ -1440,7 +1495,7 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname) return 0; fail: - k = journal_put_error(j, r, path ?: dirname); + k = journal_put_error(j, r, path ?: prefix); if (k < 0) return k; @@ -1448,28 +1503,62 @@ fail: } static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) { + _cleanup_closedir_ DIR *d = NULL; struct dirent *de; Directory *m; int r, k; assert(j); - assert(p); - if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) && - !path_startswith(p, "/run")) - return -EINVAL; + /* Adds a root directory to our set of directories to use. If the root directory is already in the set, we + * update the inotify logic, and renumerate the directory entries. This call may hence be called to initially + * populate the set, as well as to update it later. */ - if (j->prefix) - p = strjoina(j->prefix, p); + if (p) { + /* If there's a path specified, use it. */ - d = opendir(p); - if (!d) { - if (errno == ENOENT && missing_ok) - return 0; + if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) && + !path_has_prefix(j, p, "/run")) + return -EINVAL; - r = log_debug_errno(errno, "Failed to open root directory %s: %m", p); - goto fail; + if (j->prefix) + p = strjoina(j->prefix, p); + + if (j->toplevel_fd < 0) + d = opendir(p); + else + d = xopendirat(j->toplevel_fd, skip_slash(p), 0); + + if (!d) { + if (errno == ENOENT && missing_ok) + return 0; + + r = log_debug_errno(errno, "Failed to open root directory %s: %m", p); + goto fail; + } + } else { + int dfd; + + /* If there's no path specified, then we use the top-level fd itself. We duplicate the fd here, since + * opendir() will take possession of the fd, and close it, which we don't want. */ + + p = "."; /* store this as "." in the directories hashmap */ + + dfd = fcntl(j->toplevel_fd, F_DUPFD_CLOEXEC, 3); + if (dfd < 0) { + r = -errno; + goto fail; + } + + d = fdopendir(dfd); + if (!d) { + r = -errno; + safe_close(dfd); + goto fail; + } + + rewinddir(d); } m = hashmap_get(j->directories_by_path, p); @@ -1481,6 +1570,7 @@ static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) { } m->is_root = true; + m->path = strdup(p); if (!m->path) { free(m); @@ -1495,7 +1585,7 @@ static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) { goto fail; } - j->current_invalidate_counter ++; + j->current_invalidate_counter++; log_debug("Root directory %s added.", m->path); @@ -1504,7 +1594,7 @@ static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) { if (m->wd <= 0 && j->inotify_fd >= 0) { - m->wd = inotify_add_watch(j->inotify_fd, m->path, + m->wd = inotify_add_watch_fd(j->inotify_fd, dirfd(d), IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE| IN_ONLYDIR); @@ -1515,7 +1605,7 @@ static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) { if (j->no_new_files) return 0; - FOREACH_DIRENT_ALL(de, d, return log_debug_errno(errno, "Failed to read directory %s: %m", m->path)) { + FOREACH_DIRENT_ALL(de, d, r = log_debug_errno(errno, "Failed to read directory %s: %m", m->path); goto fail) { sd_id128_t id; if (dirent_is_file_with_suffix(de, ".journal") || @@ -1584,8 +1674,7 @@ static int add_current_paths(sd_journal *j) { assert(j); assert(j->no_new_files); - /* Simply adds all directories for files we have open as - * "root" directories. We don't expect errors here, so we + /* Simply adds all directories for files we have open as directories. We don't expect errors here, so we * treat them as fatal. */ ORDERED_HASHMAP_FOREACH(f, j->files, i) { @@ -1596,7 +1685,7 @@ static int add_current_paths(sd_journal *j) { if (!dir) return -ENOMEM; - r = add_root_directory(j, dir, true); + r = add_directory(j, dir, NULL); if (r < 0) return r; } @@ -1613,13 +1702,7 @@ static int allocate_inotify(sd_journal *j) { return -errno; } - if (!j->directories_by_wd) { - j->directories_by_wd = hashmap_new(NULL); - if (!j->directories_by_wd) - return -ENOMEM; - } - - return 0; + return hashmap_ensure_allocated(&j->directories_by_wd, NULL); } static sd_journal *journal_new(int flags, const char *path) { @@ -1630,6 +1713,7 @@ static sd_journal *journal_new(int flags, const char *path) { return NULL; j->original_pid = getpid(); + j->toplevel_fd = -1; j->inotify_fd = -1; j->flags = flags; j->data_threshold = DEFAULT_DATA_THRESHOLD; @@ -1683,6 +1767,9 @@ _public_ int sd_journal_open_container(sd_journal **ret, const char *machine, in char *p; int r; + /* This is pretty much deprecated, people should use machined's OpenMachineRootDirectory() call instead in + * combination with sd_journal_open_directory_fd(). */ + assert_return(machine, -EINVAL); assert_return(ret, -EINVAL); assert_return((flags & ~(SD_JOURNAL_LOCAL_ONLY|SD_JOURNAL_SYSTEM)) == 0, -EINVAL); @@ -1725,13 +1812,16 @@ _public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int f assert_return(ret, -EINVAL); assert_return(path, -EINVAL); - assert_return(flags == 0, -EINVAL); + assert_return((flags & ~SD_JOURNAL_OS_ROOT) == 0, -EINVAL); j = journal_new(flags, path); if (!j) return -ENOMEM; - r = add_root_directory(j, path, false); + if (flags & SD_JOURNAL_OS_ROOT) + r = add_search_paths(j); + else + r = add_root_directory(j, path, false); if (r < 0) goto fail; @@ -1740,7 +1830,6 @@ _public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int f fail: sd_journal_close(j); - return r; } @@ -1757,7 +1846,7 @@ _public_ int sd_journal_open_files(sd_journal **ret, const char **paths, int fla return -ENOMEM; STRV_FOREACH(path, paths) { - r = add_any_file(j, *path); + r = add_any_file(j, -1, *path); if (r < 0) goto fail; } @@ -1769,7 +1858,96 @@ _public_ int sd_journal_open_files(sd_journal **ret, const char **paths, int fla fail: sd_journal_close(j); + return r; +} + +_public_ int sd_journal_open_directory_fd(sd_journal **ret, int fd, int flags) { + sd_journal *j; + struct stat st; + int r; + + assert_return(ret, -EINVAL); + assert_return(fd >= 0, -EBADF); + assert_return((flags & ~SD_JOURNAL_OS_ROOT) == 0, -EINVAL); + + if (fstat(fd, &st) < 0) + return -errno; + + if (!S_ISDIR(st.st_mode)) + return -EBADFD; + + j = journal_new(flags, NULL); + if (!j) + return -ENOMEM; + + j->toplevel_fd = fd; + + if (flags & SD_JOURNAL_OS_ROOT) + r = add_search_paths(j); + else + r = add_root_directory(j, NULL, false); + if (r < 0) + goto fail; + *ret = j; + return 0; + +fail: + sd_journal_close(j); + return r; +} + +_public_ int sd_journal_open_files_fd(sd_journal **ret, int fds[], unsigned n_fds, int flags) { + Iterator iterator; + JournalFile *f; + sd_journal *j; + unsigned i; + int r; + + assert_return(ret, -EINVAL); + assert_return(n_fds > 0, -EBADF); + assert_return(flags == 0, -EINVAL); + + j = journal_new(flags, NULL); + if (!j) + return -ENOMEM; + + for (i = 0; i < n_fds; i++) { + struct stat st; + + if (fds[i] < 0) { + r = -EBADF; + goto fail; + } + + if (fstat(fds[i], &st) < 0) { + r = -errno; + goto fail; + } + + if (!S_ISREG(st.st_mode)) { + r = -EBADFD; + goto fail; + } + + r = add_any_file(j, fds[i], NULL); + if (r < 0) + goto fail; + } + + j->no_new_files = true; + j->no_inotify = true; + + *ret = j; + return 0; + +fail: + /* If we fail, make sure we don't take possession of the files we managed to make use of successfully, and they + * remain open */ + ORDERED_HASHMAP_FOREACH(f, j->files, iterator) + f->close_fd = false; + + sd_journal_close(j); return r; } @@ -1784,7 +1962,7 @@ _public_ void sd_journal_close(sd_journal *j) { sd_journal_flush_matches(j); while ((f = ordered_hashmap_steal_first(j->files))) - journal_file_close(f); + (void) journal_file_close(f); ordered_hashmap_free(j->files); @@ -1957,7 +2135,7 @@ _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void ** &f->compress_buffer, &f->compress_buffer_size, field, field_length, '='); if (r < 0) - log_debug_errno(r, "Cannot decompress %s object of length %zu at offset "OFSfmt": %m", + log_debug_errno(r, "Cannot decompress %s object of length %"PRIu64" at offset "OFSfmt": %m", object_compressed_to_string(compression), l, p); else if (r > 0) { @@ -2078,7 +2256,7 @@ _public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t if (r < 0) return r; - j->current_field ++; + j->current_field++; return 1; } @@ -2096,6 +2274,9 @@ _public_ int sd_journal_get_fd(sd_journal *j) { assert_return(j, -EINVAL); assert_return(!journal_pid_changed(j), -ECHILD); + if (j->no_inotify) + return -EMEDIUMTYPE; + if (j->inotify_fd >= 0) return j->inotify_fd; @@ -2103,10 +2284,14 @@ _public_ int sd_journal_get_fd(sd_journal *j) { if (r < 0) return r; + log_debug("Reiterating files to get inotify watches established"); + /* Iterate through all dirs again, to add them to the * inotify */ if (j->no_new_files) r = add_current_paths(j); + else if (j->toplevel_fd >= 0) + r = add_root_directory(j, NULL, false); else if (j->path) r = add_root_directory(j, j->path, true); else |