From eb86030ec0e53ef3834d1b230440e88fdf020e9d Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Wed, 27 Jan 2016 18:59:29 +0100
Subject: sd-journal: add an API to enumerate known field names of the journal

This adds two new calls to get the list of all journal fields names currently in use.

This is the low-level support to implement the feature requested in #2176 in a more optimized way.
---
 src/journal/journal-internal.h |  21 ++++--
 src/journal/sd-journal.c       | 156 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 171 insertions(+), 6 deletions(-)

(limited to 'src/journal')

diff --git a/src/journal/journal-internal.h b/src/journal/journal-internal.h
index fa5ca11636..a55d1bcc47 100644
--- a/src/journal/journal-internal.h
+++ b/src/journal/journal-internal.h
@@ -103,18 +103,27 @@ struct sd_journal {
         unsigned current_invalidate_counter, last_invalidate_counter;
         usec_t last_process_usec;
 
+        /* Iterating through unique fields and their data values */
         char *unique_field;
         JournalFile *unique_file;
         uint64_t unique_offset;
 
+        /* Iterating through known fields */
+        JournalFile *fields_file;
+        uint64_t fields_offset;
+        uint64_t fields_hash_table_index;
+        char *fields_buffer;
+        size_t fields_buffer_allocated;
+
         int flags;
 
-        bool on_network;
-        bool no_new_files;
-        bool unique_file_lost; /* File we were iterating over got
-                                  removed, and there were no more
-                                  files, so sd_j_enumerate_unique
-                                  will return a value equal to 0. */
+        bool on_network:1;
+        bool no_new_files:1;
+        bool unique_file_lost:1; /* File we were iterating over got
+                                    removed, and there were no more
+                                    files, so sd_j_enumerate_unique
+                                    will return a value equal to 0. */
+        bool fields_file_lost:1;
         bool has_runtime_files:1;
         bool has_persistent_files:1;
 
diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c
index 74a5e262f8..85d9bbe04f 100644
--- a/src/journal/sd-journal.c
+++ b/src/journal/sd-journal.c
@@ -1338,6 +1338,13 @@ static void remove_file_real(sd_journal *j, JournalFile *f) {
                         j->unique_file_lost = true;
         }
 
+        if (j->fields_file == f) {
+                j->fields_file = ordered_hashmap_next(j->files, j->fields_file->path);
+                j->fields_offset = 0;
+                if (!j->fields_file)
+                        j->fields_file_lost = true;
+        }
+
         journal_file_close(f);
 
         j->current_invalidate_counter ++;
@@ -1806,6 +1813,7 @@ _public_ void sd_journal_close(sd_journal *j) {
         free(j->path);
         free(j->prefix);
         free(j->unique_field);
+        free(j->fields_buffer);
         free(j);
 }
 
@@ -2552,6 +2560,154 @@ _public_ void sd_journal_restart_unique(sd_journal *j) {
         j->unique_file_lost = false;
 }
 
+_public_ int sd_journal_enumerate_fields(sd_journal *j, const char **field) {
+        int r;
+
+        assert_return(j, -EINVAL);
+        assert_return(!journal_pid_changed(j), -ECHILD);
+        assert_return(field, -EINVAL);
+
+        if (!j->fields_file) {
+                if (j->fields_file_lost)
+                        return 0;
+
+                j->fields_file = ordered_hashmap_first(j->files);
+                if (!j->fields_file)
+                        return 0;
+
+                j->fields_hash_table_index = 0;
+                j->fields_offset = 0;
+        }
+
+        for (;;) {
+                JournalFile *f, *of;
+                Iterator i;
+                uint64_t m;
+                Object *o;
+                size_t sz;
+                bool found;
+
+                f = j->fields_file;
+
+                if (j->fields_offset == 0) {
+                        bool eof = false;
+
+                        /* We are not yet positioned at any field. Let's pick the first one */
+                        r = journal_file_map_field_hash_table(f);
+                        if (r < 0)
+                                return r;
+
+                        m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
+                        for (;;) {
+                                if (j->fields_hash_table_index >= m) {
+                                        /* Reached the end of the hash table, go to the next file. */
+                                        eof = true;
+                                        break;
+                                }
+
+                                j->fields_offset = le64toh(f->field_hash_table[j->fields_hash_table_index].head_hash_offset);
+
+                                if (j->fields_offset != 0)
+                                        break;
+
+                                /* Empty hash table bucket, go to next one */
+                                j->fields_hash_table_index++;
+                        }
+
+                        if (eof) {
+                                /* Proceed with next file */
+                                j->fields_file = ordered_hashmap_next(j->files, f->path);
+                                if (!j->fields_file) {
+                                        *field = NULL;
+                                        return 0;
+                                }
+
+                                j->fields_offset = 0;
+                                j->fields_hash_table_index = 0;
+                                continue;
+                        }
+
+                } else {
+                        /* We are already positioned at a field. If so, let's figure out the next field from it */
+
+                        r = journal_file_move_to_object(f, OBJECT_FIELD, j->fields_offset, &o);
+                        if (r < 0)
+                                return r;
+
+                        j->fields_offset = le64toh(o->field.next_hash_offset);
+                        if (j->fields_offset == 0) {
+                                /* Reached the end of the hash table chain */
+                                j->fields_hash_table_index++;
+                                continue;
+                        }
+                }
+
+                /* We use OBJECT_UNUSED here, so that the iteator below doesn't remove our mmap window */
+                r = journal_file_move_to_object(f, OBJECT_UNUSED, j->fields_offset, &o);
+                if (r < 0)
+                        return r;
+
+                /* Because we used OBJECT_UNUSED above, we need to do our type check manually */
+                if (o->object.type != OBJECT_FIELD) {
+                        log_debug("%s:offset " OFSfmt ": object has type %i, expected %i", f->path, j->fields_offset, o->object.type, OBJECT_FIELD);
+                        return -EBADMSG;
+                }
+
+                sz = le64toh(o->object.size) - offsetof(Object, field.payload);
+
+                /* Let's see if we already returned this field name before. */
+                found = false;
+                ORDERED_HASHMAP_FOREACH(of, j->files, i) {
+                        if (of == f)
+                                break;
+
+                        /* Skip this file it didn't have any fields indexed */
+                        if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0)
+                                continue;
+
+                        r = journal_file_find_field_object_with_hash(of, o->field.payload, sz, le64toh(o->field.hash), NULL, NULL);
+                        if (r < 0)
+                                return r;
+                        if (r > 0) {
+                                found = true;
+                                break;
+                        }
+                }
+
+                if (found)
+                        continue;
+
+                /* Check if this is really a valid string containing no NUL byte */
+                if (memchr(o->field.payload, 0, sz))
+                        return -EBADMSG;
+
+                if (sz > j->data_threshold)
+                        sz = j->data_threshold;
+
+                if (!GREEDY_REALLOC(j->fields_buffer, j->fields_buffer_allocated, sz + 1))
+                        return -ENOMEM;
+
+                memcpy(j->fields_buffer, o->field.payload, sz);
+                j->fields_buffer[sz] = 0;
+
+                if (!field_is_valid(j->fields_buffer))
+                        return -EBADMSG;
+
+                *field = j->fields_buffer;
+                return 1;
+        }
+}
+
+_public_ void sd_journal_restart_fields(sd_journal *j) {
+        if (!j)
+                return;
+
+        j->fields_file = NULL;
+        j->fields_hash_table_index = 0;
+        j->fields_offset = 0;
+        j->fields_file_lost = false;
+}
+
 _public_ int sd_journal_reliable_fd(sd_journal *j) {
         assert_return(j, -EINVAL);
         assert_return(!journal_pid_changed(j), -ECHILD);
-- 
cgit v1.2.3-54-g00ecf


From 69e714f3d8784fb0207ac9112ed1a48d4fd5e9fe Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Wed, 27 Jan 2016 19:01:42 +0100
Subject: journalctl: add new --fields switch to dump all currently used field
 names

Fixes #2176
---
 man/journalctl.xml       |  7 +++++++
 src/journal/journalctl.c | 30 ++++++++++++++++++++++++++++--
 2 files changed, 35 insertions(+), 2 deletions(-)

(limited to 'src/journal')

diff --git a/man/journalctl.xml b/man/journalctl.xml
index b57afb6ebf..5af98c67cb 100644
--- a/man/journalctl.xml
+++ b/man/journalctl.xml
@@ -571,6 +571,13 @@
         field can take in all entries of the journal.</para></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><option>-N</option></term>
+        <term><option>--fields</option></term>
+
+        <listitem><para>Print all field names currently used in all entries of the journal.</para></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><option>--system</option></term>
         <term><option>--user</option></term>
diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c
index 1686f38c4e..cda24c4d18 100644
--- a/src/journal/journalctl.c
+++ b/src/journal/journalctl.c
@@ -136,6 +136,8 @@ static enum {
         ACTION_SYNC,
         ACTION_ROTATE,
         ACTION_VACUUM,
+        ACTION_LIST_FIELDS,
+        ACTION_LIST_FIELD_NAMES,
 } arg_action = ACTION_SHOW;
 
 typedef struct BootId {
@@ -244,6 +246,7 @@ static void help(void) {
                "\nCommands:\n"
                "  -h --help                Show this help text\n"
                "     --version             Show package version\n"
+               "  -N --fields              List all field names currently used\n"
                "  -F --field=FIELD         List all values that a specified field takes\n"
                "     --disk-usage          Show total disk usage of all journal files\n"
                "     --vacuum-size=BYTES   Reduce disk usage below specified size\n"
@@ -340,6 +343,7 @@ static int parse_argv(int argc, char *argv[]) {
                 { "unit",           required_argument, NULL, 'u'                },
                 { "user-unit",      required_argument, NULL, ARG_USER_UNIT      },
                 { "field",          required_argument, NULL, 'F'                },
+                { "fields",         no_argument,       NULL, 'N'                },
                 { "catalog",        no_argument,       NULL, 'x'                },
                 { "list-catalog",   no_argument,       NULL, ARG_LIST_CATALOG   },
                 { "dump-catalog",   no_argument,       NULL, ARG_DUMP_CATALOG   },
@@ -361,7 +365,7 @@ static int parse_argv(int argc, char *argv[]) {
         assert(argc >= 0);
         assert(argv);
 
-        while ((c = getopt_long(argc, argv, "hefo:aln::qmb::kD:p:c:S:U:t:u:F:xrM:", options, NULL)) >= 0)
+        while ((c = getopt_long(argc, argv, "hefo:aln::qmb::kD:p:c:S:U:t:u:NF:xrM:", options, NULL)) >= 0)
 
                 switch (c) {
 
@@ -698,9 +702,14 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
 
                 case 'F':
+                        arg_action = ACTION_LIST_FIELDS;
                         arg_field = optarg;
                         break;
 
+                case 'N':
+                        arg_action = ACTION_LIST_FIELD_NAMES;
+                        break;
+
                 case 'x':
                         arg_catalog = true;
                         break;
@@ -2003,6 +2012,8 @@ int main(int argc, char *argv[]) {
         case ACTION_DISK_USAGE:
         case ACTION_LIST_BOOTS:
         case ACTION_VACUUM:
+        case ACTION_LIST_FIELDS:
+        case ACTION_LIST_FIELD_NAMES:
                 /* These ones require access to the journal files, continue below. */
                 break;
 
@@ -2085,7 +2096,20 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
+        case ACTION_LIST_FIELD_NAMES: {
+                const char *field;
+
+                SD_JOURNAL_FOREACH_FIELD(j, field) {
+                        printf("%s\n", field);
+                        n_shown ++;
+                }
+
+                r = 0;
+                goto finish;
+        }
+
         case ACTION_SHOW:
+        case ACTION_LIST_FIELDS:
                 break;
 
         default:
@@ -2139,10 +2163,12 @@ int main(int argc, char *argv[]) {
                 log_debug("Journal filter: %s", filter);
         }
 
-        if (arg_field) {
+        if (arg_action == ACTION_LIST_FIELDS) {
                 const void *data;
                 size_t size;
 
+                assert(arg_field);
+
                 r = sd_journal_set_data_threshold(j, 0);
                 if (r < 0) {
                         log_error_errno(r, "Failed to unset data size threshold: %m");
-- 
cgit v1.2.3-54-g00ecf


From ed71f95662af903f0c5eba32c439e77c5cec4e7b Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Wed, 27 Jan 2016 19:02:10 +0100
Subject: sd-journal: minor optimization

No need to store the object and offset data if we don't actually need it ever.
---
 src/journal/sd-journal.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

(limited to 'src/journal')

diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c
index 85d9bbe04f..7a3aaf0cab 100644
--- a/src/journal/sd-journal.c
+++ b/src/journal/sd-journal.c
@@ -2520,24 +2520,20 @@ _public_ int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_
                  * traversed files. */
                 found = false;
                 ORDERED_HASHMAP_FOREACH(of, j->files, i) {
-                        Object *oo;
-                        uint64_t op;
-
                         if (of == j->unique_file)
                                 break;
 
-                        /* Skip this file it didn't have any fields
-                         * indexed */
-                        if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) &&
-                            le64toh(of->header->n_fields) <= 0)
+                        /* Skip this file it didn't have any fields indexed */
+                        if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0)
                                 continue;
 
-                        r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), &oo, &op);
+                        r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), NULL, NULL);
                         if (r < 0)
                                 return r;
-
-                        if (r > 0)
+                        if (r > 0) {
                                 found = true;
+                                break;
+                        }
                 }
 
                 if (found)
-- 
cgit v1.2.3-54-g00ecf