diff options
author | Luke Shumaker <lukeshu@sbcglobal.net> | 2016-12-17 02:47:02 -0500 |
---|---|---|
committer | Luke Shumaker <lukeshu@sbcglobal.net> | 2016-12-17 02:47:02 -0500 |
commit | a4d083550a7273b895b44aac8d2ff7e2fdb1f7d5 (patch) | |
tree | 6f148433641f8c92d6f1eddcb2199a78dbd111a0 /src/journal | |
parent | b6d071f1df46eb841ba3f88cdb2b248eaf5f35f8 (diff) | |
parent | 86e9bb69ae74bd960e1fd427258f41d54240d6d1 (diff) |
Merge branch 'systemd/parabola' into notsystemd/premove
# Conflicts:
# Makefile.amp
Diffstat (limited to 'src/journal')
-rw-r--r-- | src/journal/journal-file.c | 190 | ||||
-rw-r--r-- | src/journal/journal-vacuum.c | 2 | ||||
-rw-r--r-- | src/journal/journal-verify.c | 11 | ||||
-rw-r--r-- | src/journal/journalctl.c | 31 | ||||
-rw-r--r-- | src/journal/journald-console.c | 5 | ||||
-rw-r--r-- | src/journal/journald-gperf.gperf | 16 | ||||
-rw-r--r-- | src/journal/journald-rate-limit.c | 4 | ||||
-rw-r--r-- | src/journal/journald-server.c | 708 | ||||
-rw-r--r-- | src/journal/journald-server.h | 35 | ||||
-rw-r--r-- | src/journal/journald-stream.c | 3 | ||||
-rw-r--r-- | src/journal/journald.c | 9 | ||||
-rw-r--r-- | src/journal/mmap-cache.c | 6 | ||||
-rw-r--r-- | src/journal/sd-journal.c | 49 | ||||
-rw-r--r-- | src/journal/test-catalog.c | 4 | ||||
-rw-r--r-- | src/journal/test-compress.c | 11 | ||||
-rw-r--r-- | src/journal/test-journal-interleaving.c | 7 | ||||
-rw-r--r-- | src/journal/test-mmap-cache.c | 6 |
17 files changed, 693 insertions, 404 deletions
diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 7504326bff..d3e0214731 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -333,8 +333,13 @@ JournalFile* journal_file_close(JournalFile *f) { #ifdef HAVE_GCRYPT /* Write the final tag */ - if (f->seal && f->writable) - journal_file_append_tag(f); + if (f->seal && f->writable) { + int r; + + r = journal_file_append_tag(f); + if (r < 0) + log_error_errno(r, "Failed to append tag when closing journal: %m"); + } #endif if (f->post_change_timer) { @@ -389,8 +394,7 @@ JournalFile* journal_file_close(JournalFile *f) { gcry_md_close(f->hmac); #endif - free(f); - return NULL; + return mfree(f); } void journal_file_close_set(Set *s) { @@ -563,8 +567,8 @@ static int journal_file_verify_header(JournalFile *f) { return -ENODATA; if (f->writable) { - uint8_t state; sd_id128_t machine_id; + uint8_t state; int r; r = sd_id128_get_machine(&machine_id); @@ -585,6 +589,14 @@ static int journal_file_verify_header(JournalFile *f) { log_debug("Journal file %s has unknown state %i.", f->path, state); return -EBUSY; } + + /* Don't permit appending to files from the future. Because otherwise the realtime timestamps wouldn't + * be strictly ordered in the entries in the file anymore, and we can't have that since it breaks + * bisection. */ + if (le64toh(f->header->tail_entry_realtime) > now(CLOCK_REALTIME)) { + log_debug("Journal file %s is from the future, refusing to append new data to it that'd be older.", f->path); + return -ETXTBSY; + } } f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header); @@ -742,12 +754,16 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset assert(ret); /* Objects may only be located at multiple of 64 bit */ - if (!VALID64(offset)) + if (!VALID64(offset)) { + log_debug("Attempt to move to object at non-64bit boundary: %" PRIu64, offset); return -EBADMSG; + } /* Object may not be located in the file header */ - if (offset < le64toh(f->header->header_size)) + if (offset < le64toh(f->header->header_size)) { + log_debug("Attempt to move to object located in file header: %" PRIu64, offset); return -EBADMSG; + } r = journal_file_move_to(f, type, false, offset, sizeof(ObjectHeader), &t); if (r < 0) @@ -756,17 +772,29 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset o = (Object*) t; s = le64toh(o->object.size); - if (s < sizeof(ObjectHeader)) + if (s == 0) { + log_debug("Attempt to move to uninitialized object: %" PRIu64, offset); return -EBADMSG; + } + if (s < sizeof(ObjectHeader)) { + log_debug("Attempt to move to overly short object: %" PRIu64, offset); + return -EBADMSG; + } - if (o->object.type <= OBJECT_UNUSED) + if (o->object.type <= OBJECT_UNUSED) { + log_debug("Attempt to move to object with invalid type: %" PRIu64, offset); return -EBADMSG; + } - if (s < minimum_header_size(o)) + if (s < minimum_header_size(o)) { + log_debug("Attempt to move to truncated object: %" PRIu64, offset); return -EBADMSG; + } - if (type > OBJECT_UNUSED && o->object.type != type) + if (type > OBJECT_UNUSED && o->object.type != type) { + log_debug("Attempt to move to object of unexpected type: %" PRIu64, offset); return -EBADMSG; + } if (s > sizeof(ObjectHeader)) { r = journal_file_move_to(f, type, false, offset, s, &t); @@ -1369,6 +1397,12 @@ static int journal_file_append_data( if (r < 0) return r; +#ifdef HAVE_GCRYPT + r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p); + if (r < 0) + return r; +#endif + /* The linking might have altered the window, so let's * refresh our pointer */ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o); @@ -1393,12 +1427,6 @@ static int journal_file_append_data( fo->field.head_data_offset = le64toh(p); } -#ifdef HAVE_GCRYPT - r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p); - if (r < 0) - return r; -#endif - if (ret) *ret = o; @@ -2467,6 +2495,37 @@ int journal_file_compare_locations(JournalFile *af, JournalFile *bf) { return 0; } +static int bump_array_index(uint64_t *i, direction_t direction, uint64_t n) { + + /* Increase or decrease the specified index, in the right direction. */ + + if (direction == DIRECTION_DOWN) { + if (*i >= n - 1) + return 0; + + (*i) ++; + } else { + if (*i <= 0) + return 0; + + (*i) --; + } + + return 1; +} + +static bool check_properly_ordered(uint64_t new_offset, uint64_t old_offset, direction_t direction) { + + /* Consider it an error if any of the two offsets is uninitialized */ + if (old_offset == 0 || new_offset == 0) + return false; + + /* If we go down, the new offset must be larger than the old one. */ + return direction == DIRECTION_DOWN ? + new_offset > old_offset : + new_offset < old_offset; +} + int journal_file_next_entry( JournalFile *f, uint64_t p, @@ -2497,36 +2556,34 @@ int journal_file_next_entry( if (r <= 0) return r; - if (direction == DIRECTION_DOWN) { - if (i >= n - 1) - return 0; - - i++; - } else { - if (i <= 0) - return 0; - - i--; - } + r = bump_array_index(&i, direction, n); + if (r <= 0) + return r; } /* And jump to it */ - r = generic_array_get(f, - le64toh(f->header->entry_array_offset), - i, - ret, &ofs); - if (r == -EBADMSG && direction == DIRECTION_DOWN) { - /* Special case: when we iterate throught the journal file linearly, and hit an entry we can't read, - * consider this the end of the journal file. */ - log_debug_errno(r, "Encountered entry we can't read while iterating through journal file. Considering this the end of the file."); - return 0; + for (;;) { + r = generic_array_get(f, + le64toh(f->header->entry_array_offset), + i, + ret, &ofs); + if (r > 0) + break; + if (r != -EBADMSG) + return r; + + /* OK, so this entry is borked. Most likely some entry didn't get synced to disk properly, let's see if + * the next one might work for us instead. */ + log_debug_errno(r, "Entry item %" PRIu64 " is bad, skipping over it.", i); + + r = bump_array_index(&i, direction, n); + if (r <= 0) + return r; } - if (r <= 0) - return r; - if (p > 0 && - (direction == DIRECTION_DOWN ? ofs <= p : ofs >= p)) { - log_debug("%s: entry array corrupted at entry %" PRIu64, f->path, i); + /* Ensure our array is properly ordered. */ + if (p > 0 && !check_properly_ordered(ofs, p, direction)) { + log_debug("%s: entry array not properly ordered at entry %" PRIu64, f->path, i); return -EBADMSG; } @@ -2543,9 +2600,9 @@ int journal_file_next_entry_for_data( direction_t direction, Object **ret, uint64_t *offset) { - uint64_t n, i; - int r; + uint64_t i, n, ofs; Object *d; + int r; assert(f); assert(p > 0 || !o); @@ -2577,25 +2634,39 @@ int journal_file_next_entry_for_data( if (r <= 0) return r; - if (direction == DIRECTION_DOWN) { - if (i >= n - 1) - return 0; + r = bump_array_index(&i, direction, n); + if (r <= 0) + return r; + } - i++; - } else { - if (i <= 0) - return 0; + for (;;) { + r = generic_array_get_plus_one(f, + le64toh(d->data.entry_offset), + le64toh(d->data.entry_array_offset), + i, + ret, &ofs); + if (r > 0) + break; + if (r != -EBADMSG) + return r; - i--; - } + log_debug_errno(r, "Data entry item %" PRIu64 " is bad, skipping over it.", i); + + r = bump_array_index(&i, direction, n); + if (r <= 0) + return r; + } + /* Ensure our array is properly ordered. */ + if (p > 0 && check_properly_ordered(ofs, p, direction)) { + log_debug("%s data entry array not properly ordered at entry %" PRIu64, f->path, i); + return -EBADMSG; } - return generic_array_get_plus_one(f, - le64toh(d->data.entry_offset), - le64toh(d->data.entry_array_offset), - i, - ret, offset); + if (offset) + *offset = ofs; + + return 1; } int journal_file_move_to_entry_by_offset_for_data( @@ -3266,7 +3337,8 @@ int journal_file_open_reliably( -EBUSY, /* unclean shutdown */ -ESHUTDOWN, /* already archived */ -EIO, /* IO error, including SIGBUS on mmap */ - -EIDRM /* File has been deleted */)) + -EIDRM, /* File has been deleted */ + -ETXTBSY)) /* File is from the future */ return r; if ((flags & O_ACCMODE) == O_RDONLY) diff --git a/src/journal/journal-vacuum.c b/src/journal/journal-vacuum.c index f09dc66e03..12ce2fd56c 100644 --- a/src/journal/journal-vacuum.c +++ b/src/journal/journal-vacuum.c @@ -343,7 +343,7 @@ finish: free(list[i].filename); free(list); - log_full(verbose ? LOG_INFO : LOG_DEBUG, "Vacuuming done, freed %s of archived journals on disk.", format_bytes(sbytes, sizeof(sbytes), freed)); + log_full(verbose ? LOG_INFO : LOG_DEBUG, "Vacuuming done, freed %s of archived journals from %s.", format_bytes(sbytes, sizeof(sbytes), freed), directory); return r; } diff --git a/src/journal/journal-verify.c b/src/journal/journal-verify.c index 4105abfccc..9e4d8a28a5 100644 --- a/src/journal/journal-verify.c +++ b/src/journal/journal-verify.c @@ -118,6 +118,11 @@ static void flush_progress(void) { log_error(OFSfmt": " _fmt, (uint64_t)_offset, ##__VA_ARGS__); \ } while (0) +#define error_errno(_offset, error, _fmt, ...) do { \ + flush_progress(); \ + log_error_errno(error, OFSfmt": " _fmt, (uint64_t)_offset, ##__VA_ARGS__); \ + } while (0) + static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o) { uint64_t i; @@ -168,8 +173,8 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o le64toh(o->object.size) - offsetof(Object, data.payload), &b, &alloc, &b_size, 0); if (r < 0) { - error(offset, "%s decompression failed: %s", - object_compressed_to_string(compression), strerror(-r)); + error_errno(offset, r, "%s decompression failed: %m", + object_compressed_to_string(compression)); return r; } @@ -912,7 +917,7 @@ int journal_file_verify( r = journal_file_object_verify(f, p, o); if (r < 0) { - error(p, "Invalid object contents: %s", strerror(-r)); + error_errno(p, r, "Invalid object contents: %m"); goto fail; } diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index 6f841efb69..7f997487b4 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -310,7 +310,7 @@ static void help(void) { " -m --merge Show entries from all available journals\n" " -D --directory=PATH Show journal files from directory\n" " --file=PATH Show journal file\n" - " --root=ROOT Operate on catalog files below a root directory\n" + " --root=ROOT Operate on files below a root directory\n" #ifdef HAVE_GCRYPT " --interval=TIME Time interval for changing the FSS sealing key\n" " --verify-key=KEY Specify FSS verification key\n" @@ -848,8 +848,8 @@ static int parse_argv(int argc, char *argv[]) { if (arg_follow && !arg_no_tail && !arg_since && arg_lines == ARG_LINES_DEFAULT) arg_lines = 10; - if (!!arg_directory + !!arg_file + !!arg_machine > 1) { - log_error("Please specify either -D/--directory= or --file= or -M/--machine=, not more than one."); + if (!!arg_directory + !!arg_file + !!arg_machine + !!arg_root > 1) { + log_error("Please specify at most one of -D/--directory=, --file=, -M/--machine=, --root."); return -EINVAL; } @@ -1091,8 +1091,10 @@ static int discover_next_boot(sd_journal *j, r = sd_journal_previous(j); if (r < 0) return r; - else if (r == 0) + else if (r == 0) { + log_debug("Whoopsie! We found a boot ID but can't read its last entry."); return -ENODATA; /* This shouldn't happen. We just came from this very boot ID. */ + } r = sd_journal_get_realtime_usec(j, &next_boot->last); if (r < 0) @@ -1112,7 +1114,7 @@ static int get_boots( bool skip_once; int r, count = 0; - BootId *head = NULL, *tail = NULL; + BootId *head = NULL, *tail = NULL, *id; const bool advance_older = boot_id && offset <= 0; sd_id128_t previous_boot_id; @@ -1203,6 +1205,13 @@ static int get_boots( break; } } else { + LIST_FOREACH(boot_list, id, head) { + if (sd_id128_equal(id->id, current->id)) { + /* boot id already stored, something wrong with the journal files */ + /* exiting as otherwise this problem would cause forever loop */ + goto finish; + } + } LIST_INSERT_AFTER(boot_list, head, tail, current); tail = current; current = NULL; @@ -1267,7 +1276,7 @@ static int add_boot(sd_journal *j) { * We can do this only when we logs are coming from the current machine, * so take the slow path if log location is specified. */ if (arg_boot_offset == 0 && sd_id128_is_null(arg_boot_id) && - !arg_directory && !arg_file) + !arg_directory && !arg_file && !arg_root) return add_match_this_boot(j, arg_machine); @@ -1632,7 +1641,7 @@ static int setup_keys(void) { n /= arg_interval; safe_close(fd); - fd = mkostemp_safe(k, O_WRONLY|O_CLOEXEC); + fd = mkostemp_safe(k); if (fd < 0) { r = log_error_errno(fd, "Failed to open %s: %m", k); goto finish; @@ -1684,9 +1693,9 @@ static int setup_keys(void) { "at a safe location and should not be saved locally on disk.\n" "\n\t%s", ansi_highlight(), ansi_normal(), + p, ansi_highlight(), ansi_normal(), - ansi_highlight_red(), - p); + ansi_highlight_red()); fflush(stderr); } for (i = 0; i < seed_size; i++) { @@ -2161,6 +2170,8 @@ int main(int argc, char *argv[]) { if (arg_directory) r = sd_journal_open_directory(&j, arg_directory, arg_journal_type); + else if (arg_root) + r = sd_journal_open_directory(&j, arg_root, arg_journal_type | SD_JOURNAL_OS_ROOT); else if (arg_file_stdin) { int ifd = STDIN_FILENO; r = sd_journal_open_files_fd(&j, &ifd, 1, 0); @@ -2255,7 +2266,7 @@ int main(int argc, char *argv[]) { if (r < 0) goto finish; - printf("Archived and active journals take up %s on disk.\n", + printf("Archived and active journals take up %s in the file system.\n", format_bytes(sbytes, sizeof(sbytes), bytes)); goto finish; } diff --git a/src/journal/journald-console.c b/src/journal/journald-console.c index fcc9f25814..3a9fba42a3 100644 --- a/src/journal/journald-console.c +++ b/src/journal/journald-console.c @@ -102,6 +102,11 @@ void server_forward_console( tty = s->tty_path ? s->tty_path : "/dev/console"; + /* Before you ask: yes, on purpose we open/close the console for each log line we write individually. This is a + * good strategy to avoid journald getting killed by the kernel's SAK concept (it doesn't fix this entirely, + * but minimizes the time window the kernel might end up killing journald due to SAK). It also makes things + * easier for us so that we don't have to recover from hangups and suchlike triggered on the console. */ + fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC); if (fd < 0) { log_debug_errno(fd, "Failed to open %s for logging: %m", tty); diff --git a/src/journal/journald-gperf.gperf b/src/journal/journald-gperf.gperf index 7fecd7a964..654fd76a4b 100644 --- a/src/journal/journald-gperf.gperf +++ b/src/journal/journald-gperf.gperf @@ -23,14 +23,14 @@ Journal.SyncIntervalSec, config_parse_sec, 0, offsetof(Server, sync_in Journal.RateLimitInterval, config_parse_sec, 0, offsetof(Server, rate_limit_interval) Journal.RateLimitIntervalSec,config_parse_sec, 0, offsetof(Server, rate_limit_interval) Journal.RateLimitBurst, config_parse_unsigned, 0, offsetof(Server, rate_limit_burst) -Journal.SystemMaxUse, config_parse_iec_uint64, 0, offsetof(Server, system_metrics.max_use) -Journal.SystemMaxFileSize, config_parse_iec_uint64, 0, offsetof(Server, system_metrics.max_size) -Journal.SystemKeepFree, config_parse_iec_uint64, 0, offsetof(Server, system_metrics.keep_free) -Journal.SystemMaxFiles, config_parse_uint64, 0, offsetof(Server, system_metrics.n_max_files) -Journal.RuntimeMaxUse, config_parse_iec_uint64, 0, offsetof(Server, runtime_metrics.max_use) -Journal.RuntimeMaxFileSize, config_parse_iec_uint64, 0, offsetof(Server, runtime_metrics.max_size) -Journal.RuntimeKeepFree, config_parse_iec_uint64, 0, offsetof(Server, runtime_metrics.keep_free) -Journal.RuntimeMaxFiles, config_parse_uint64, 0, offsetof(Server, runtime_metrics.n_max_files) +Journal.SystemMaxUse, config_parse_iec_uint64, 0, offsetof(Server, system_storage.metrics.max_use) +Journal.SystemMaxFileSize, config_parse_iec_uint64, 0, offsetof(Server, system_storage.metrics.max_size) +Journal.SystemKeepFree, config_parse_iec_uint64, 0, offsetof(Server, system_storage.metrics.keep_free) +Journal.SystemMaxFiles, config_parse_uint64, 0, offsetof(Server, system_storage.metrics.n_max_files) +Journal.RuntimeMaxUse, config_parse_iec_uint64, 0, offsetof(Server, runtime_storage.metrics.max_use) +Journal.RuntimeMaxFileSize, config_parse_iec_uint64, 0, offsetof(Server, runtime_storage.metrics.max_size) +Journal.RuntimeKeepFree, config_parse_iec_uint64, 0, offsetof(Server, runtime_storage.metrics.keep_free) +Journal.RuntimeMaxFiles, config_parse_uint64, 0, offsetof(Server, runtime_storage.metrics.n_max_files) Journal.MaxRetentionSec, config_parse_sec, 0, offsetof(Server, max_retention_usec) Journal.MaxFileSec, config_parse_sec, 0, offsetof(Server, max_file_usec) Journal.ForwardToSyslog, config_parse_bool, 0, offsetof(Server, forward_to_syslog) diff --git a/src/journal/journald-rate-limit.c b/src/journal/journald-rate-limit.c index fce799a6ce..f48639cf58 100644 --- a/src/journal/journald-rate-limit.c +++ b/src/journal/journald-rate-limit.c @@ -190,7 +190,7 @@ static unsigned burst_modulate(unsigned burst, uint64_t available) { if (k <= 20) return burst; - burst = (burst * (k-20)) / 4; + burst = (burst * (k-16)) / 4; /* * Example: @@ -261,7 +261,7 @@ int journal_rate_limit_test(JournalRateLimit *r, const char *id, int priority, u return 1 + s; } - if (p->num <= burst) { + if (p->num < burst) { p->num++; return 1; } diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c index 587c343b31..50bd83fc6e 100644 --- a/src/journal/journald-server.c +++ b/src/journal/journald-server.c @@ -44,6 +44,7 @@ #include "fs-util.h" #include "hashmap.h" #include "hostname-util.h" +#include "id128-util.h" #include "io-util.h" #include "journal-authenticate.h" #include "journal-file.h" @@ -56,6 +57,7 @@ #include "journald-server.h" #include "journald-stream.h" #include "journald-syslog.h" +#include "log.h" #include "missing.h" #include "mkdir.h" #include "parse-util.h" @@ -69,7 +71,7 @@ #include "string-table.h" #include "string-util.h" #include "user-util.h" -#include "log.h" +#include "syslog-util.h" #define USER_JOURNALS_MAX 1024 @@ -85,48 +87,24 @@ /* The period to insert between posting changes for coalescing */ #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC) -static int determine_space_for( - Server *s, - JournalMetrics *metrics, - const char *path, - const char *name, - bool verbose, - bool patch_min_use, - uint64_t *available, - uint64_t *limit) { - - uint64_t sum = 0, ss_avail, avail; +static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) { _cleanup_closedir_ DIR *d = NULL; struct dirent *de; struct statvfs ss; - const char *p; - usec_t ts; - - assert(s); - assert(metrics); - assert(path); - assert(name); - - ts = now(CLOCK_MONOTONIC); - - if (!verbose && s->cached_space_timestamp + RECHECK_SPACE_USEC > ts) { - if (available) - *available = s->cached_space_available; - if (limit) - *limit = s->cached_space_limit; - - return 0; - } + assert(ret_used); + assert(ret_free); - p = strjoina(path, SERVER_MACHINE_ID(s)); - d = opendir(p); + d = opendir(path); if (!d) - return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno, "Failed to open %s: %m", p); + return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, + errno, "Failed to open %s: %m", path); if (fstatvfs(dirfd(d), &ss) < 0) - return log_error_errno(errno, "Failed to fstatvfs(%s): %m", p); + return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path); + *ret_free = ss.f_bsize * ss.f_bavail; + *ret_used = 0; FOREACH_DIRENT_ALL(de, d, break) { struct stat st; @@ -135,88 +113,125 @@ static int determine_space_for( continue; if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) { - log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", p, de->d_name); + log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name); continue; } if (!S_ISREG(st.st_mode)) continue; - sum += (uint64_t) st.st_blocks * 512UL; + *ret_used += (uint64_t) st.st_blocks * 512UL; } - /* If requested, then let's bump the min_use limit to the - * current usage on disk. We do this when starting up and - * first opening the journal files. This way sudden spikes in - * disk usage will not cause journald to vacuum files without - * bounds. Note that this means that only a restart of - * journald will make it reset this value. */ - - if (patch_min_use) - metrics->min_use = MAX(metrics->min_use, sum); - - ss_avail = ss.f_bsize * ss.f_bavail; - avail = LESS_BY(ss_avail, metrics->keep_free); - - s->cached_space_limit = MIN(MAX(sum + avail, metrics->min_use), metrics->max_use); - s->cached_space_available = LESS_BY(s->cached_space_limit, sum); - s->cached_space_timestamp = ts; - - if (verbose) { - char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX], - fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX]; - format_bytes(fb1, sizeof(fb1), sum); - format_bytes(fb2, sizeof(fb2), metrics->max_use); - format_bytes(fb3, sizeof(fb3), metrics->keep_free); - format_bytes(fb4, sizeof(fb4), ss_avail); - format_bytes(fb5, sizeof(fb5), s->cached_space_limit); - format_bytes(fb6, sizeof(fb6), s->cached_space_available); - - server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE, - LOG_MESSAGE("%s (%s) is %s, max %s, %s free.", - name, path, fb1, fb5, fb6), - "JOURNAL_NAME=%s", name, - "JOURNAL_PATH=%s", path, - "CURRENT_USE=%"PRIu64, sum, - "CURRENT_USE_PRETTY=%s", fb1, - "MAX_USE=%"PRIu64, metrics->max_use, - "MAX_USE_PRETTY=%s", fb2, - "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free, - "DISK_KEEP_FREE_PRETTY=%s", fb3, - "DISK_AVAILABLE=%"PRIu64, ss_avail, - "DISK_AVAILABLE_PRETTY=%s", fb4, - "LIMIT=%"PRIu64, s->cached_space_limit, - "LIMIT_PRETTY=%s", fb5, - "AVAILABLE=%"PRIu64, s->cached_space_available, - "AVAILABLE_PRETTY=%s", fb6, - NULL); - } + return 0; +} - if (available) - *available = s->cached_space_available; - if (limit) - *limit = s->cached_space_limit; +static void cache_space_invalidate(JournalStorageSpace *space) { + memset(space, 0, sizeof(*space)); +} +static int cache_space_refresh(Server *s, JournalStorage *storage) { + JournalStorageSpace *space; + JournalMetrics *metrics; + uint64_t vfs_used, vfs_avail, avail; + usec_t ts; + int r; + + assert(s); + + metrics = &storage->metrics; + space = &storage->space; + + ts = now(CLOCK_MONOTONIC); + + if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts) + return 0; + + r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail); + if (r < 0) + return r; + + space->vfs_used = vfs_used; + space->vfs_available = vfs_avail; + + avail = LESS_BY(vfs_avail, metrics->keep_free); + + space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use); + space->available = LESS_BY(space->limit, vfs_used); + space->timestamp = ts; return 1; } -static int determine_space(Server *s, bool verbose, bool patch_min_use, uint64_t *available, uint64_t *limit) { - JournalMetrics *metrics; - const char *path, *name; +static void patch_min_use(JournalStorage *storage) { + assert(storage); + + /* Let's bump the min_use limit to the current usage on disk. We do + * this when starting up and first opening the journal files. This way + * sudden spikes in disk usage will not cause journald to vacuum files + * without bounds. Note that this means that only a restart of journald + * will make it reset this value. */ + + storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used); +} + + +static int determine_space(Server *s, uint64_t *available, uint64_t *limit) { + JournalStorage *js; + int r; assert(s); - if (s->system_journal) { - path = "/var/log/journal/"; - metrics = &s->system_metrics; - name = "System journal"; - } else { - path = "/run/log/journal/"; - metrics = &s->runtime_metrics; - name = "Runtime journal"; + js = s->system_journal ? &s->system_storage : &s->runtime_storage; + + r = cache_space_refresh(s, js); + if (r >= 0) { + if (available) + *available = js->space.available; + if (limit) + *limit = js->space.limit; } + return r; +} + +void server_space_usage_message(Server *s, JournalStorage *storage) { + char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX], + fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX]; + JournalMetrics *metrics; + + assert(s); + + if (!storage) + storage = s->system_journal ? &s->system_storage : &s->runtime_storage; + + if (cache_space_refresh(s, storage) < 0) + return; - return determine_space_for(s, metrics, path, name, verbose, patch_min_use, available, limit); + metrics = &storage->metrics; + format_bytes(fb1, sizeof(fb1), storage->space.vfs_used); + format_bytes(fb2, sizeof(fb2), metrics->max_use); + format_bytes(fb3, sizeof(fb3), metrics->keep_free); + format_bytes(fb4, sizeof(fb4), storage->space.vfs_available); + format_bytes(fb5, sizeof(fb5), storage->space.limit); + format_bytes(fb6, sizeof(fb6), storage->space.available); + + server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE, + LOG_MESSAGE("%s (%s) is %s, max %s, %s free.", + storage->name, storage->path, fb1, fb5, fb6), + "JOURNAL_NAME=%s", storage->name, + "JOURNAL_PATH=%s", storage->path, + "CURRENT_USE=%"PRIu64, storage->space.vfs_used, + "CURRENT_USE_PRETTY=%s", fb1, + "MAX_USE=%"PRIu64, metrics->max_use, + "MAX_USE_PRETTY=%s", fb2, + "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free, + "DISK_KEEP_FREE_PRETTY=%s", fb3, + "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available, + "DISK_AVAILABLE_PRETTY=%s", fb4, + "LIMIT=%"PRIu64, storage->space.limit, + "LIMIT_PRETTY=%s", fb5, + "AVAILABLE=%"PRIu64, storage->space.available, + "AVAILABLE_PRETTY=%s", fb6, + NULL); } static void server_add_acls(JournalFile *f, uid_t uid) { @@ -267,6 +282,97 @@ static int open_journal( return r; } +static bool flushed_flag_is_set(void) { + return (access("/run/systemd/journal/flushed", F_OK) >= 0); +} + +static int system_journal_open(Server *s, bool flush_requested) { + bool flushed = false; + const char *fn; + int r = 0; + + if (!s->system_journal && + (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) && + (flush_requested || (flushed = flushed_flag_is_set()))) { + + /* If in auto mode: first try to create the machine + * path, but not the prefix. + * + * If in persistent mode: create /var/log/journal and + * the machine path */ + + if (s->storage == STORAGE_PERSISTENT) + (void) mkdir_p("/var/log/journal/", 0755); + + (void) mkdir(s->system_storage.path, 0755); + + fn = strjoina(s->system_storage.path, "/system.journal"); + r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal); + if (r >= 0) { + server_add_acls(s->system_journal, 0); + (void) cache_space_refresh(s, &s->system_storage); + patch_min_use(&s->system_storage); + } else if (r < 0) { + if (r != -ENOENT && r != -EROFS) + log_warning_errno(r, "Failed to open system journal: %m"); + + r = 0; + } + + /* If the runtime journal is open, and we're post-flush, we're + * recovering from a failed system journal rotate (ENOSPC) + * for which the runtime journal was reopened. + * + * Perform an implicit flush to var, leaving the runtime + * journal closed, now that the system journal is back. + */ + if (s->runtime_journal && flushed) + (void) server_flush_to_var(s); + } + + if (!s->runtime_journal && + (s->storage != STORAGE_NONE)) { + + fn = strjoina(s->runtime_storage.path, "/system.journal"); + + if (s->system_journal) { + + /* Try to open the runtime journal, but only + * if it already exists, so that we can flush + * it into the system journal */ + + r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal); + if (r < 0) { + if (r != -ENOENT) + log_warning_errno(r, "Failed to open runtime journal: %m"); + + r = 0; + } + + } else { + + /* OK, we really need the runtime journal, so create + * it if necessary. */ + + (void) mkdir("/run/log", 0755); + (void) mkdir("/run/log/journal", 0755); + (void) mkdir_parents(fn, 0750); + + r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal); + if (r < 0) + return log_error_errno(r, "Failed to open runtime journal: %m"); + } + + if (s->runtime_journal) { + server_add_acls(s->runtime_journal, 0); + (void) cache_space_refresh(s, &s->runtime_storage); + patch_min_use(&s->runtime_storage); + } + } + + return r; +} + static JournalFile* find_journal(Server *s, uid_t uid) { _cleanup_free_ char *p = NULL; int r; @@ -275,6 +381,17 @@ static JournalFile* find_journal(Server *s, uid_t uid) { assert(s); + /* A rotate that fails to create the new journal (ENOSPC) leaves the + * rotated journal as NULL. Unless we revisit opening, even after + * space is made available we'll continue to return NULL indefinitely. + * + * system_journal_open() is a noop if the journals are already open, so + * we can just call it here to recover from failed rotates (or anything + * else that's left the journals as NULL). + * + * Fixes https://github.com/systemd/systemd/issues/3968 */ + (void) system_journal_open(s, false); + /* We split up user logs only on /var, not on /run. If the * runtime file is open, we write to it exclusively, in order * to guarantee proper order as soon as we flush /run to @@ -283,7 +400,7 @@ static JournalFile* find_journal(Server *s, uid_t uid) { if (s->runtime_journal) return s->runtime_journal; - if (uid <= SYSTEM_UID_MAX) + if (uid <= SYSTEM_UID_MAX || uid_is_dynamic(uid)) return s->system_journal; r = sd_id128_get_machine(&machine); @@ -305,7 +422,7 @@ static JournalFile* find_journal(Server *s, uid_t uid) { (void) journal_file_close(f); } - r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_metrics, &f); + r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f); if (r < 0) return s->system_journal; @@ -399,50 +516,38 @@ void server_sync(Server *s) { s->sync_scheduled = false; } -static void do_vacuum( - Server *s, - JournalFile *f, - JournalMetrics *metrics, - const char *path, - const char *name, - bool verbose, - bool patch_min_use) { +static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) { - const char *p; - uint64_t limit; int r; assert(s); - assert(metrics); - assert(path); - assert(name); + assert(storage); - if (!f) - return; - - p = strjoina(path, SERVER_MACHINE_ID(s)); + (void) cache_space_refresh(s, storage); - limit = metrics->max_use; - (void) determine_space_for(s, metrics, path, name, verbose, patch_min_use, NULL, &limit); + if (verbose) + server_space_usage_message(s, storage); - r = journal_directory_vacuum(p, limit, metrics->n_max_files, s->max_retention_usec, &s->oldest_file_usec, verbose); + r = journal_directory_vacuum(storage->path, storage->space.limit, + storage->metrics.n_max_files, s->max_retention_usec, + &s->oldest_file_usec, verbose); if (r < 0 && r != -ENOENT) - log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", p); + log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path); + + cache_space_invalidate(&storage->space); } -int server_vacuum(Server *s, bool verbose, bool patch_min_use) { +int server_vacuum(Server *s, bool verbose) { assert(s); log_debug("Vacuuming..."); s->oldest_file_usec = 0; - do_vacuum(s, s->system_journal, &s->system_metrics, "/var/log/journal/", "System journal", verbose, patch_min_use); - do_vacuum(s, s->runtime_journal, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", verbose, patch_min_use); - - s->cached_space_limit = 0; - s->cached_space_available = 0; - s->cached_space_timestamp = 0; + if (s->system_journal) + do_vacuum(s, &s->system_storage, verbose); + if (s->runtime_journal) + do_vacuum(s, &s->runtime_storage, verbose); return 0; } @@ -493,54 +598,88 @@ static void server_cache_hostname(Server *s) { static bool shall_try_append_again(JournalFile *f, int r) { switch(r) { + case -E2BIG: /* Hit configured limit */ case -EFBIG: /* Hit fs limit */ case -EDQUOT: /* Quota limit hit */ case -ENOSPC: /* Disk full */ log_debug("%s: Allocation limit reached, rotating.", f->path); return true; + case -EIO: /* I/O error of some kind (mmap) */ log_warning("%s: IO error, rotating.", f->path); return true; + case -EHOSTDOWN: /* Other machine */ log_info("%s: Journal file from other machine, rotating.", f->path); return true; + case -EBUSY: /* Unclean shutdown */ log_info("%s: Unclean shutdown, rotating.", f->path); return true; + case -EPROTONOSUPPORT: /* Unsupported feature */ log_info("%s: Unsupported feature, rotating.", f->path); return true; + case -EBADMSG: /* Corrupted */ case -ENODATA: /* Truncated */ case -ESHUTDOWN: /* Already archived */ log_warning("%s: Journal file corrupted, rotating.", f->path); return true; + case -EIDRM: /* Journal file has been deleted */ log_warning("%s: Journal file has been deleted, rotating.", f->path); return true; + + case -ETXTBSY: /* Journal file is from the future */ + log_warning("%s: Journal file is from the future, rotating.", f->path); + return true; + default: return false; } } static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) { + bool vacuumed = false, rotate = false; + struct dual_timestamp ts; JournalFile *f; - bool vacuumed = false; int r; assert(s); assert(iovec); assert(n > 0); - f = find_journal(s, uid); - if (!f) - return; + /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use + * the source time, and not even the time the event was originally seen, but instead simply the time we started + * processing it, as we want strictly linear ordering in what we write out.) */ + assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0); + assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0); + + if (ts.realtime < s->last_realtime_clock) { + /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during + * regular operation. However, when it does happen, then we should make sure that we start fresh files + * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure + * bisection works correctly. */ + + log_debug("Time jumped backwards, rotating."); + rotate = true; + } else { + + f = find_journal(s, uid); + if (!f) + return; + + if (journal_file_rotate_suggested(f, s->max_file_usec)) { + log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path); + rotate = true; + } + } - if (journal_file_rotate_suggested(f, s->max_file_usec)) { - log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path); + if (rotate) { server_rotate(s); - server_vacuum(s, false, false); + server_vacuum(s, false); vacuumed = true; f = find_journal(s, uid); @@ -548,7 +687,9 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned return; } - r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL); + s->last_realtime_clock = ts.realtime; + + r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL); if (r >= 0) { server_schedule_sync(s, priority); return; @@ -560,20 +701,58 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned } server_rotate(s); - server_vacuum(s, false, false); + server_vacuum(s, false); f = find_journal(s, uid); if (!f) return; log_debug("Retrying write."); - r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL); + r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL); if (r < 0) log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n)); else server_schedule_sync(s, priority); } +static int get_invocation_id(const char *cgroup_root, const char *slice, const char *unit, char **ret) { + _cleanup_free_ char *escaped = NULL, *slice_path = NULL, *p = NULL; + char *copy, ids[SD_ID128_STRING_MAX]; + int r; + + /* Read the invocation ID of a unit off a unit. It's stored in the "trusted.invocation_id" extended attribute + * on the cgroup path. */ + + r = cg_slice_to_path(slice, &slice_path); + if (r < 0) + return r; + + escaped = cg_escape(unit); + if (!escaped) + return -ENOMEM; + + p = strjoin(cgroup_root, "/", slice_path, "/", escaped, NULL); + if (!p) + return -ENOMEM; + + r = cg_get_xattr(SYSTEMD_CGROUP_CONTROLLER, p, "trusted.invocation_id", ids, 32); + if (r < 0) + return r; + if (r != 32) + return -EINVAL; + ids[32] = 0; + + if (!id128_is_valid(ids)) + return -EINVAL; + + copy = strdup(ids); + if (!copy) + return -ENOMEM; + + *ret = copy; + return 0; +} + static void dispatch_message_real( Server *s, struct iovec *iovec, unsigned n, unsigned m, @@ -612,7 +791,7 @@ static void dispatch_message_real( assert(s); assert(iovec); assert(n > 0); - assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m); + assert(n + N_IOVEC_META_FIELDS + (object_pid > 0 ? N_IOVEC_OBJECT_FIELDS : 0) <= m); if (ucred) { realuid = ucred->uid; @@ -670,6 +849,7 @@ static void dispatch_message_real( r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c); if (r >= 0) { + _cleanup_free_ char *raw_unit = NULL, *raw_slice = NULL; char *session = NULL; x = strjoina("_SYSTEMD_CGROUP=", c); @@ -689,9 +869,8 @@ static void dispatch_message_real( IOVEC_SET_STRING(iovec[n++], owner_uid); } - if (cg_path_get_unit(c, &t) >= 0) { - x = strjoina("_SYSTEMD_UNIT=", t); - free(t); + if (cg_path_get_unit(c, &raw_unit) >= 0) { + x = strjoina("_SYSTEMD_UNIT=", raw_unit); IOVEC_SET_STRING(iovec[n++], x); } else if (unit_id && !session) { x = strjoina("_SYSTEMD_UNIT=", unit_id); @@ -707,12 +886,25 @@ static void dispatch_message_real( IOVEC_SET_STRING(iovec[n++], x); } - if (cg_path_get_slice(c, &t) >= 0) { - x = strjoina("_SYSTEMD_SLICE=", t); + if (cg_path_get_slice(c, &raw_slice) >= 0) { + x = strjoina("_SYSTEMD_SLICE=", raw_slice); + IOVEC_SET_STRING(iovec[n++], x); + } + + if (cg_path_get_user_slice(c, &t) >= 0) { + x = strjoina("_SYSTEMD_USER_SLICE=", t); free(t); IOVEC_SET_STRING(iovec[n++], x); } + if (raw_slice && raw_unit) { + if (get_invocation_id(s->cgroup_root, raw_slice, raw_unit, &t) >= 0) { + x = strjoina("_SYSTEMD_INVOCATION_ID=", t); + free(t); + IOVEC_SET_STRING(iovec[n++], x); + } + } + free(c); } else if (unit_id) { x = strjoina("_SYSTEMD_UNIT=", unit_id); @@ -818,13 +1010,25 @@ static void dispatch_message_real( IOVEC_SET_STRING(iovec[n++], x); } + if (cg_path_get_slice(c, &t) >= 0) { + x = strjoina("OBJECT_SYSTEMD_SLICE=", t); + free(t); + IOVEC_SET_STRING(iovec[n++], x); + } + + if (cg_path_get_user_slice(c, &t) >= 0) { + x = strjoina("OBJECT_SYSTEMD_USER_SLICE=", t); + free(t); + IOVEC_SET_STRING(iovec[n++], x); + } + free(c); } } assert(n <= m); if (tv) { - sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv)); + sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv)); IOVEC_SET_STRING(iovec[n++], source_time); } @@ -964,7 +1168,7 @@ void server_dispatch_message( } } - (void) determine_space(s, false, false, &available, NULL); + (void) determine_space(s, &available, NULL); rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available); if (rl == 0) return; @@ -979,83 +1183,6 @@ finish: dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid); } - -static int system_journal_open(Server *s, bool flush_requested) { - const char *fn; - int r = 0; - - if (!s->system_journal && - (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) && - (flush_requested - || access("/run/systemd/journal/flushed", F_OK) >= 0)) { - - /* If in auto mode: first try to create the machine - * path, but not the prefix. - * - * If in persistent mode: create /var/log/journal and - * the machine path */ - - if (s->storage == STORAGE_PERSISTENT) - (void) mkdir_p("/var/log/journal/", 0755); - - fn = strjoina("/var/log/journal/", SERVER_MACHINE_ID(s)); - (void) mkdir(fn, 0755); - - fn = strjoina(fn, "/system.journal"); - r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_metrics, &s->system_journal); - if (r >= 0) { - server_add_acls(s->system_journal, 0); - (void) determine_space_for(s, &s->system_metrics, "/var/log/journal/", "System journal", true, true, NULL, NULL); - } else if (r < 0) { - if (r != -ENOENT && r != -EROFS) - log_warning_errno(r, "Failed to open system journal: %m"); - - r = 0; - } - } - - if (!s->runtime_journal && - (s->storage != STORAGE_NONE)) { - - fn = strjoina("/run/log/journal/", SERVER_MACHINE_ID(s), "/system.journal"); - - if (s->system_journal) { - - /* Try to open the runtime journal, but only - * if it already exists, so that we can flush - * it into the system journal */ - - r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_metrics, &s->runtime_journal); - if (r < 0) { - if (r != -ENOENT) - log_warning_errno(r, "Failed to open runtime journal: %m"); - - r = 0; - } - - } else { - - /* OK, we really need the runtime journal, so create - * it if necessary. */ - - (void) mkdir("/run/log", 0755); - (void) mkdir("/run/log/journal", 0755); - (void) mkdir_parents(fn, 0750); - - r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_metrics, &s->runtime_journal); - if (r < 0) - return log_error_errno(r, "Failed to open runtime journal: %m"); - } - - if (s->runtime_journal) { - server_add_acls(s->runtime_journal, 0); - (void) determine_space_for(s, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", true, true, NULL, NULL); - } - } - - return r; -} - int server_flush_to_var(Server *s) { sd_id128_t machine; sd_journal *j = NULL; @@ -1117,7 +1244,7 @@ int server_flush_to_var(Server *s) { } server_rotate(s); - server_vacuum(s, false, false); + server_vacuum(s, false); if (!s->system_journal) { log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful."); @@ -1284,14 +1411,15 @@ static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo * log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid); - server_flush_to_var(s); + (void) server_flush_to_var(s); server_sync(s); - server_vacuum(s, false, false); + server_vacuum(s, false); r = touch("/run/systemd/journal/flushed"); if (r < 0) log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m"); + server_space_usage_message(s, NULL); return 0; } @@ -1303,7 +1431,12 @@ static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo * log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid); server_rotate(s); - server_vacuum(s, true, true); + server_vacuum(s, true); + + if (s->system_journal) + patch_min_use(&s->system_storage); + if (s->runtime_journal) + patch_min_use(&s->runtime_storage); /* Let clients know when the most recent rotation happened. */ r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC)); @@ -1393,55 +1526,68 @@ static int setup_signals(Server *s) { return 0; } -static int server_parse_proc_cmdline(Server *s) { - _cleanup_free_ char *line = NULL; - const char *p; +static int parse_proc_cmdline_item(const char *key, const char *value, void *data) { + Server *s = data; int r; - r = proc_cmdline(&line); - if (r < 0) { - log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m"); - return 0; - } - - p = line; - for (;;) { - _cleanup_free_ char *word = NULL; + assert(s); - r = extract_first_word(&p, &word, NULL, 0); + if (streq(key, "systemd.journald.forward_to_syslog")) { + r = value ? parse_boolean(value) : true; if (r < 0) - return log_error_errno(r, "Failed to parse journald syntax \"%s\": %m", line); - - if (r == 0) - break; - - if (startswith(word, "systemd.journald.forward_to_syslog=")) { - r = parse_boolean(word + 35); - if (r < 0) - log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35); - else - s->forward_to_syslog = r; - } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) { - r = parse_boolean(word + 33); - if (r < 0) - log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33); - else - s->forward_to_kmsg = r; - } else if (startswith(word, "systemd.journald.forward_to_console=")) { - r = parse_boolean(word + 36); - if (r < 0) - log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36); - else - s->forward_to_console = r; - } else if (startswith(word, "systemd.journald.forward_to_wall=")) { - r = parse_boolean(word + 33); - if (r < 0) - log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33); - else - s->forward_to_wall = r; - } else if (startswith(word, "systemd.journald")) - log_warning("Invalid systemd.journald parameter. Ignoring."); - } + log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value); + else + s->forward_to_syslog = r; + } else if (streq(key, "systemd.journald.forward_to_kmsg")) { + r = value ? parse_boolean(value) : true; + if (r < 0) + log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value); + else + s->forward_to_kmsg = r; + } else if (streq(key, "systemd.journald.forward_to_console")) { + r = value ? parse_boolean(value) : true; + if (r < 0) + log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value); + else + s->forward_to_console = r; + } else if (streq(key, "systemd.journald.forward_to_wall")) { + r = value ? parse_boolean(value) : true; + if (r < 0) + log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value); + else + s->forward_to_wall = r; + } else if (streq(key, "systemd.journald.max_level_console") && value) { + r = log_level_from_string(value); + if (r < 0) + log_warning("Failed to parse max level console value \"%s\". Ignoring.", value); + else + s->max_level_console = r; + } else if (streq(key, "systemd.journald.max_level_store") && value) { + r = log_level_from_string(value); + if (r < 0) + log_warning("Failed to parse max level store value \"%s\". Ignoring.", value); + else + s->max_level_store = r; + } else if (streq(key, "systemd.journald.max_level_syslog") && value) { + r = log_level_from_string(value); + if (r < 0) + log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value); + else + s->max_level_syslog = r; + } else if (streq(key, "systemd.journald.max_level_kmsg") && value) { + r = log_level_from_string(value); + if (r < 0) + log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value); + else + s->max_level_kmsg = r; + } else if (streq(key, "systemd.journald.max_level_wall") && value) { + r = log_level_from_string(value); + if (r < 0) + log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value); + else + s->max_level_wall = r; + } else if (startswith(key, "systemd.journald")) + log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key); /* do not warn about state here, since probably systemd already did */ return 0; @@ -1450,7 +1596,7 @@ static int server_parse_proc_cmdline(Server *s) { static int server_parse_config_file(Server *s) { assert(s); - return config_parse_many(PKGSYSCONFDIR "/journald.conf", + return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf", CONF_PATHS_NULSTR("systemd/journald.conf.d"), "Journal\0", config_item_perf_lookup, journald_gperf_lookup, @@ -1563,7 +1709,7 @@ static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, assert(s->notify_fd == fd); /* The $NOTIFY_SOCKET is writable again, now send exactly one - * message on it. Either it's the wtachdog event, the initial + * message on it. Either it's the watchdog event, the initial * READY=1 event or an stdout stream event. If there's nothing * to write anymore, turn our event source off. The next time * there's something to send it will be turned on again. */ @@ -1748,11 +1894,11 @@ int server_init(Server *s) { s->max_level_console = LOG_INFO; s->max_level_wall = LOG_EMERG; - journal_reset_metrics(&s->system_metrics); - journal_reset_metrics(&s->runtime_metrics); + journal_reset_metrics(&s->system_storage.metrics); + journal_reset_metrics(&s->runtime_storage.metrics); server_parse_config_file(s); - server_parse_proc_cmdline(s); + parse_proc_cmdline(parse_proc_cmdline_item, s, true); if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) { log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0", @@ -1902,6 +2048,14 @@ int server_init(Server *s) { server_cache_boot_id(s); server_cache_machine_id(s); + s->runtime_storage.name = "Runtime journal"; + s->system_storage.name = "System journal"; + + s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s), NULL); + s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s), NULL); + if (!s->runtime_storage.path || !s->system_storage.path) + return -ENOMEM; + (void) server_connect_notify(s); return system_journal_open(s, false); diff --git a/src/journal/journald-server.h b/src/journal/journald-server.h index d2a32ab422..99d91496be 100644 --- a/src/journal/journald-server.h +++ b/src/journal/journald-server.h @@ -49,6 +49,24 @@ typedef enum SplitMode { _SPLIT_INVALID = -1 } SplitMode; +typedef struct JournalStorageSpace { + usec_t timestamp; + + uint64_t available; + uint64_t limit; + + uint64_t vfs_used; /* space used by journal files */ + uint64_t vfs_available; +} JournalStorageSpace; + +typedef struct JournalStorage { + const char *name; + const char *path; + + JournalMetrics metrics; + JournalStorageSpace space; +} JournalStorage; + struct Server { int syslog_fd; int native_fd; @@ -89,8 +107,8 @@ struct Server { usec_t rate_limit_interval; unsigned rate_limit_burst; - JournalMetrics runtime_metrics; - JournalMetrics system_metrics; + JournalStorage runtime_storage; + JournalStorage system_storage; bool compress; bool seal; @@ -103,10 +121,6 @@ struct Server { unsigned n_forward_syslog_missed; usec_t last_warn_forward_syslog_missed; - uint64_t cached_space_available; - uint64_t cached_space_limit; - usec_t cached_space_timestamp; - uint64_t var_available_timestamp; usec_t max_retention_usec; @@ -149,14 +163,16 @@ struct Server { char *cgroup_root; usec_t watchdog_usec; + + usec_t last_realtime_clock; }; #define SERVER_MACHINE_ID(s) ((s)->machine_id_field + strlen("_MACHINE_ID=")) -#define N_IOVEC_META_FIELDS 20 +#define N_IOVEC_META_FIELDS 22 #define N_IOVEC_KERNEL_FIELDS 64 #define N_IOVEC_UDEV_FIELDS 32 -#define N_IOVEC_OBJECT_FIELDS 12 +#define N_IOVEC_OBJECT_FIELDS 14 #define N_IOVEC_PAYLOAD_FIELDS 15 void server_dispatch_message(Server *s, struct iovec *iovec, unsigned n, unsigned m, const struct ucred *ucred, const struct timeval *tv, const char *label, size_t label_len, const char *unit_id, int priority, pid_t object_pid); @@ -178,9 +194,10 @@ SplitMode split_mode_from_string(const char *s) _pure_; int server_init(Server *s); void server_done(Server *s); void server_sync(Server *s); -int server_vacuum(Server *s, bool verbose, bool patch_min_use); +int server_vacuum(Server *s, bool verbose); void server_rotate(Server *s); int server_schedule_sync(Server *s, int priority); int server_flush_to_var(Server *s); void server_maybe_append_tags(Server *s); int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata); +void server_space_usage_message(Server *s, JournalStorage *storage); diff --git a/src/journal/journald-stream.c b/src/journal/journald-stream.c index 4ad16ee41c..bc092f3c12 100644 --- a/src/journal/journald-stream.c +++ b/src/journal/journald-stream.c @@ -393,6 +393,9 @@ static int stdout_stream_scan(StdoutStream *s, bool force_flush) { p = s->buffer; remaining = s->length; + + /* XXX: This function does nothing if (s->length == 0) */ + for (;;) { char *end; size_t skip; diff --git a/src/journal/journald.c b/src/journal/journald.c index 272acb71c4..7f47ca22dd 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -51,7 +51,7 @@ int main(int argc, char *argv[]) { if (r < 0) goto finish; - server_vacuum(&server, false, false); + server_vacuum(&server, false); server_flush_to_var(&server); server_flush_dev_kmsg(&server); @@ -60,6 +60,11 @@ int main(int argc, char *argv[]) { LOG_MESSAGE("Journal started"), NULL); + /* Make sure to send the usage message *after* flushing the + * journal so entries from the runtime journals are ordered + * before this message. See #4190 for some details. */ + server_space_usage_message(&server, NULL); + for (;;) { usec_t t = USEC_INFINITY, n; @@ -77,7 +82,7 @@ int main(int argc, char *argv[]) { if (server.oldest_file_usec + server.max_retention_usec < n) { log_info("Retention time reached."); server_rotate(&server); - server_vacuum(&server, false, false); + server_vacuum(&server, false); continue; } diff --git a/src/journal/mmap-cache.c b/src/journal/mmap-cache.c index 293d27053a..d91247b524 100644 --- a/src/journal/mmap-cache.c +++ b/src/journal/mmap-cache.c @@ -325,10 +325,8 @@ static FileDescriptor* fd_add(MMapCache *m, int fd) { f->fd = fd; r = hashmap_put(m->fds, FD_TO_PTR(fd), f); - if (r < 0) { - free(f); - return NULL; - } + if (r < 0) + return mfree(f); return f; } diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 2a3824d0e8..f2f8546086 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -387,7 +387,7 @@ _public_ int sd_journal_add_disjunction(sd_journal *j) { } static char *match_make_string(Match *m) { - char *p, *r; + char *p = NULL, *r; Match *i; bool enclose = false; @@ -397,15 +397,12 @@ static char *match_make_string(Match *m) { if (m->type == MATCH_DISCRETE) return strndup(m->data, m->size); - p = NULL; LIST_FOREACH(matches, i, m->matches) { char *t, *k; t = match_make_string(i); - if (!t) { - free(p); - return NULL; - } + if (!t) + return mfree(p); if (p) { k = strjoin(p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t, NULL); @@ -1719,9 +1716,16 @@ static sd_journal *journal_new(int flags, const char *path) { j->data_threshold = DEFAULT_DATA_THRESHOLD; if (path) { - j->path = strdup(path); - if (!j->path) + char *t; + + t = strdup(path); + if (!t) goto fail; + + if (flags & SD_JOURNAL_OS_ROOT) + j->prefix = t; + else + j->path = t; } j->files = ordered_hashmap_new(&string_hash_ops); @@ -1737,12 +1741,17 @@ fail: return NULL; } +#define OPEN_ALLOWED_FLAGS \ + (SD_JOURNAL_LOCAL_ONLY | \ + SD_JOURNAL_RUNTIME_ONLY | \ + SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER) + _public_ int sd_journal_open(sd_journal **ret, int flags) { sd_journal *j; int r; assert_return(ret, -EINVAL); - assert_return((flags & ~(SD_JOURNAL_LOCAL_ONLY|SD_JOURNAL_RUNTIME_ONLY|SD_JOURNAL_SYSTEM|SD_JOURNAL_CURRENT_USER)) == 0, -EINVAL); + assert_return((flags & ~OPEN_ALLOWED_FLAGS) == 0, -EINVAL); j = journal_new(flags, NULL); if (!j) @@ -1761,6 +1770,9 @@ fail: return r; } +#define OPEN_CONTAINER_ALLOWED_FLAGS \ + (SD_JOURNAL_LOCAL_ONLY | SD_JOURNAL_SYSTEM) + _public_ int sd_journal_open_container(sd_journal **ret, const char *machine, int flags) { _cleanup_free_ char *root = NULL, *class = NULL; sd_journal *j; @@ -1772,7 +1784,7 @@ _public_ int sd_journal_open_container(sd_journal **ret, const char *machine, in assert_return(machine, -EINVAL); assert_return(ret, -EINVAL); - assert_return((flags & ~(SD_JOURNAL_LOCAL_ONLY|SD_JOURNAL_SYSTEM)) == 0, -EINVAL); + assert_return((flags & ~OPEN_CONTAINER_ALLOWED_FLAGS) == 0, -EINVAL); assert_return(machine_name_is_valid(machine), -EINVAL); p = strjoina("/run/systemd/machines/", machine); @@ -1787,13 +1799,10 @@ _public_ int sd_journal_open_container(sd_journal **ret, const char *machine, in if (!streq_ptr(class, "container")) return -EIO; - j = journal_new(flags, NULL); + j = journal_new(flags, root); if (!j) return -ENOMEM; - j->prefix = root; - root = NULL; - r = add_search_paths(j); if (r < 0) goto fail; @@ -1806,13 +1815,17 @@ fail: return r; } +#define OPEN_DIRECTORY_ALLOWED_FLAGS \ + (SD_JOURNAL_OS_ROOT | \ + SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER ) + _public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) { sd_journal *j; int r; assert_return(ret, -EINVAL); assert_return(path, -EINVAL); - assert_return((flags & ~SD_JOURNAL_OS_ROOT) == 0, -EINVAL); + assert_return((flags & ~OPEN_DIRECTORY_ALLOWED_FLAGS) == 0, -EINVAL); j = journal_new(flags, path); if (!j) @@ -1861,6 +1874,10 @@ fail: return r; } +#define OPEN_DIRECTORY_FD_ALLOWED_FLAGS \ + (SD_JOURNAL_OS_ROOT | \ + SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER ) + _public_ int sd_journal_open_directory_fd(sd_journal **ret, int fd, int flags) { sd_journal *j; struct stat st; @@ -1868,7 +1885,7 @@ _public_ int sd_journal_open_directory_fd(sd_journal **ret, int fd, int flags) { assert_return(ret, -EINVAL); assert_return(fd >= 0, -EBADF); - assert_return((flags & ~SD_JOURNAL_OS_ROOT) == 0, -EINVAL); + assert_return((flags & ~OPEN_DIRECTORY_FD_ALLOWED_FLAGS) == 0, -EINVAL); if (fstat(fd, &st) < 0) return -errno; diff --git a/src/journal/test-catalog.c b/src/journal/test-catalog.c index 898c876450..b7d9e7bffa 100644 --- a/src/journal/test-catalog.c +++ b/src/journal/test-catalog.c @@ -55,7 +55,7 @@ static Hashmap * test_import(const char* contents, ssize_t size, int code) { assert_se(h = hashmap_new(&catalog_hash_ops)); - fd = mkostemp_safe(name, O_RDWR|O_CLOEXEC); + fd = mkostemp_safe(name); assert_se(fd >= 0); assert_se(write(fd, contents, size) == size); @@ -182,7 +182,7 @@ static void test_catalog_update(void) { static char name[] = "/tmp/test-catalog.XXXXXX"; int r; - r = mkostemp_safe(name, O_RDWR|O_CLOEXEC); + r = mkostemp_safe(name); assert_se(r >= 0); database = name; diff --git a/src/journal/test-compress.c b/src/journal/test-compress.c index 68c9a4d76c..72cadf1771 100644 --- a/src/journal/test-compress.c +++ b/src/journal/test-compress.c @@ -167,7 +167,7 @@ static void test_compress_stream(int compression, log_debug("/* test compression */"); - assert_se((dst = mkostemp_safe(pattern, O_RDWR|O_CLOEXEC)) >= 0); + assert_se((dst = mkostemp_safe(pattern)) >= 0); assert_se(compress(src, dst, -1) == 0); @@ -178,7 +178,7 @@ static void test_compress_stream(int compression, log_debug("/* test decompression */"); - assert_se((dst2 = mkostemp_safe(pattern2, O_RDWR|O_CLOEXEC)) >= 0); + assert_se((dst2 = mkostemp_safe(pattern2)) >= 0); assert_se(stat(srcfile, &st) == 0); @@ -247,6 +247,9 @@ int main(int argc, char *argv[]) { "text\0foofoofoofoo AAAA aaaaaaaaa ghost busters barbarbar FFF" "foofoofoofoo AAAA aaaaaaaaa ghost busters barbarbar FFF"; + /* The file to test compression on can be specified as the first argument */ + const char *srcfile = argc > 1 ? argv[1] : argv[0]; + char data[512] = "random\0"; char huge[4096*1024]; @@ -275,7 +278,7 @@ int main(int argc, char *argv[]) { huge, sizeof(huge), true); test_compress_stream(OBJECT_COMPRESSED_XZ, "xzcat", - compress_stream_xz, decompress_stream_xz, argv[0]); + compress_stream_xz, decompress_stream_xz, srcfile); #else log_info("/* XZ test skipped */"); #endif @@ -297,7 +300,7 @@ int main(int argc, char *argv[]) { huge, sizeof(huge), true); test_compress_stream(OBJECT_COMPRESSED_LZ4, "lz4cat", - compress_stream_lz4, decompress_stream_lz4, argv[0]); + compress_stream_lz4, decompress_stream_lz4, srcfile); test_lz4_decompress_partial(); #else diff --git a/src/journal/test-journal-interleaving.c b/src/journal/test-journal-interleaving.c index 5e063f4d04..35cae23bf8 100644 --- a/src/journal/test-journal-interleaving.c +++ b/src/journal/test-journal-interleaving.c @@ -36,10 +36,9 @@ static bool arg_keep = false; -noreturn static void log_assert_errno(const char *text, int eno, const char *file, int line, const char *func) { - log_internal(LOG_CRIT, 0, file, line, func, - "'%s' failed at %s:%u (%s): %s.", - text, file, line, func, strerror(eno)); +noreturn static void log_assert_errno(const char *text, int error, const char *file, int line, const char *func) { + log_internal(LOG_CRIT, error, file, line, func, + "'%s' failed at %s:%u (%s): %m", text, file, line, func); abort(); } diff --git a/src/journal/test-mmap-cache.c b/src/journal/test-mmap-cache.c index 009aabf55e..0ad49aeb5f 100644 --- a/src/journal/test-mmap-cache.c +++ b/src/journal/test-mmap-cache.c @@ -36,15 +36,15 @@ int main(int argc, char *argv[]) { assert_se(m = mmap_cache_new()); - x = mkostemp_safe(px, O_RDWR|O_CLOEXEC); + x = mkostemp_safe(px); assert_se(x >= 0); unlink(px); - y = mkostemp_safe(py, O_RDWR|O_CLOEXEC); + y = mkostemp_safe(py); assert_se(y >= 0); unlink(py); - z = mkostemp_safe(pz, O_RDWR|O_CLOEXEC); + z = mkostemp_safe(pz); assert_se(z >= 0); unlink(pz); |