From 87d2c1ff6a7375f03476767e6f59454bcc5cd04b Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 7 Oct 2011 21:06:39 +0200 Subject: journal: add preliminary incomplete implementation --- .gitignore | 6 +- Makefile.am | 63 +- po/.gitignore | 1 + src/automount.c | 2 +- src/cgroup-show.c | 2 +- src/journal/Makefile | 1 + src/journal/journal-def.h | 139 +++++ src/journal/journal-private.h | 53 ++ src/journal/journalctl.c | 91 +++ src/journal/journald.c | 460 ++++++++++++++ src/journal/lookup3.c | 1003 +++++++++++++++++++++++++++++++ src/journal/lookup3.h | 25 + src/journal/sd-journal.c | 1333 +++++++++++++++++++++++++++++++++++++++++ src/journal/sd-journal.h | 74 +++ src/journal/test-journal.c | 93 +++ src/journal/wjournal.c | 57 ++ src/journal/wjournal.h | 39 ++ src/kmsg-syslogd.c | 216 ++----- src/loginctl.c | 2 +- src/machine-id-setup.c | 40 +- src/manager.c | 4 +- src/pam-module.c | 34 +- src/sd-id128.c | 210 +++++++ src/sd-id128.h | 56 ++ src/stdout-syslog-bridge.c | 3 +- src/systemctl.c | 4 +- src/test-id128.c | 49 ++ src/util.c | 276 +++++++-- src/util.h | 14 +- tmpfiles.d/Makefile | 1 + 30 files changed, 4076 insertions(+), 275 deletions(-) create mode 120000 src/journal/Makefile create mode 100644 src/journal/journal-def.h create mode 100644 src/journal/journal-private.h create mode 100644 src/journal/journalctl.c create mode 100644 src/journal/journald.c create mode 100644 src/journal/lookup3.c create mode 100644 src/journal/lookup3.h create mode 100644 src/journal/sd-journal.c create mode 100644 src/journal/sd-journal.h create mode 100644 src/journal/test-journal.c create mode 100644 src/journal/wjournal.c create mode 100644 src/journal/wjournal.h create mode 100644 src/sd-id128.c create mode 100644 src/sd-id128.h create mode 100644 src/test-id128.c create mode 120000 tmpfiles.d/Makefile diff --git a/.gitignore b/.gitignore index f4a8a45b4a..265801ff5f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +systemd-journalctl +systemd-journald +test-id128 +test-journal test-install org.freedesktop.hostname1.xml org.freedesktop.locale1.xml @@ -91,7 +95,7 @@ install-sh missing stamp-* *.stamp -Makefile +/Makefile ltmain.sh *.tar.bz2 *.tar.gz diff --git a/Makefile.am b/Makefile.am index f4a17aa7f5..9bf92ad7ac 100644 --- a/Makefile.am +++ b/Makefile.am @@ -134,7 +134,8 @@ rootbin_PROGRAMS = \ systemd-ask-password \ systemd-tty-ask-password-agent \ systemd-tmpfiles \ - systemd-machine-id-setup + systemd-machine-id-setup \ + systemd-journalctl bin_PROGRAMS = \ systemd-cgls \ @@ -173,7 +174,8 @@ rootlibexec_PROGRAMS = \ systemd-detect-virt \ systemd-sysctl \ systemd-logind \ - systemd-uaccess + systemd-uaccess \ + systemd-journald if ENABLE_BINFMT rootlibexec_PROGRAMS += \ @@ -225,7 +227,9 @@ noinst_PROGRAMS = \ test-env-replace \ test-strv \ test-login \ - test-install + test-install \ + test-id128 \ + test-journal if HAVE_PAM pamlib_LTLIBRARIES = \ @@ -685,7 +689,8 @@ libsystemd_core_la_SOURCES = \ src/dbus-common.c \ src/sd-daemon.c \ src/install.c \ - src/cgroup-attr.c + src/cgroup-attr.c \ + src/sd-id128.c nodist_libsystemd_core_la_SOURCES = \ src/load-fragment-gperf.c \ @@ -947,6 +952,53 @@ test_install_CFLAGS = \ test_install_LDADD = \ libsystemd-basic.la +test_id128_SOURCES = \ + src/test-id128.c \ + src/sd-id128.c + +test_id128_CFLAGS = \ + $(AM_CFLAGS) + +test_id128_LDADD = \ + libsystemd-basic.la + +test_journal_SOURCES = \ + src/journal/test-journal.c \ + src/journal/sd-journal.c \ + src/journal/lookup3.c \ + src/sd-id128.c + +test_journal_CFLAGS = \ + $(AM_CFLAGS) + +test_journal_LDADD = \ + libsystemd-basic.la + +systemd_journald_SOURCES = \ + src/journal/journald.c \ + src/journal/sd-journal.c \ + src/journal/lookup3.c \ + src/sd-id128.c + +systemd_journald_CFLAGS = \ + $(AM_CFLAGS) + +systemd_journald_LDADD = \ + libsystemd-basic.la \ + libsystemd-daemon.la + +systemd_journalctl_SOURCES = \ + src/journal/journalctl.c \ + src/journal/sd-journal.c \ + src/journal/lookup3.c \ + src/sd-id128.c + +systemd_journalctl_CFLAGS = \ + $(AM_CFLAGS) + +systemd_journalctl_LDADD = \ + libsystemd-basic.la + systemd_stdout_syslog_bridge_SOURCES = \ src/stdout-syslog-bridge.c \ src/tcpwrap.c @@ -1142,7 +1194,8 @@ systemd_tmpfiles_LDADD = \ systemd_machine_id_setup_SOURCES = \ src/machine-id-setup.c \ - src/machine-id-main.c + src/machine-id-main.c \ + src/sd-id128.c systemd_machine_id_setup_CFLAGS = \ $(AM_CFLAGS) diff --git a/po/.gitignore b/po/.gitignore index 251edd4c81..ee1215654d 100644 --- a/po/.gitignore +++ b/po/.gitignore @@ -1,3 +1,4 @@ POTFILES Makefile.in.in .intltool-merge-cache +Makefile diff --git a/src/automount.c b/src/automount.c index 29b807de51..6cf3c311b3 100644 --- a/src/automount.c +++ b/src/automount.c @@ -770,7 +770,7 @@ static void automount_fd_event(Unit *u, int fd, uint32_t events, Watch *w) { if (packet.v5_packet.pid > 0) { char *p = NULL; - get_process_name(packet.v5_packet.pid, &p); + get_process_comm(packet.v5_packet.pid, &p); log_debug("Got direct mount request for %s, triggered by %lu (%s)", packet.v5_packet.name, (unsigned long) packet.v5_packet.pid, strna(p)); free(p); diff --git a/src/cgroup-show.c b/src/cgroup-show.c index bc9c216329..03c942c66e 100644 --- a/src/cgroup-show.c +++ b/src/cgroup-show.c @@ -133,7 +133,7 @@ static int show_cgroup_one_by_path(const char *path, const char *prefix, unsigne for (i = 0; i < n; i++) { char *t = NULL; - get_process_cmdline(pids[i], n_columns, &t); + get_process_cmdline(pids[i], n_columns, true, &t); printf("%s%s %*lu %s\n", prefix, diff --git a/src/journal/Makefile b/src/journal/Makefile new file mode 120000 index 0000000000..d0b0e8e008 --- /dev/null +++ b/src/journal/Makefile @@ -0,0 +1 @@ +../Makefile \ No newline at end of file diff --git a/src/journal/journal-def.h b/src/journal/journal-def.h new file mode 100644 index 0000000000..0d865ae2a2 --- /dev/null +++ b/src/journal/journal-def.h @@ -0,0 +1,139 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +#ifndef foojournaldefhfoo +#define foojournaldefhfoo + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include + +#include "macro.h" +#include "sd-id128.h" + +typedef struct Header Header; +typedef struct ObjectHeader ObjectHeader; +typedef union Object Object; +typedef struct DataObject DataObject; +typedef struct EntryObject EntryObject; +typedef struct HashTableObject HashTableObject; +typedef struct BisectTableObject BisectTableObject; +typedef struct EntryItem EntryItem; +typedef struct HashItem HashItem; + +/* Object types */ +enum { + OBJECT_UNUSED, + OBJECT_DATA, + OBJECT_ENTRY, + OBJECT_HASH_TABLE, + OBJECT_BISECT_TABLE +}; + +_packed_ struct ObjectHeader { + uint8_t type; + uint8_t reserved[3]; + uint64_t size; + uint8_t payload[]; +}; + +_packed_ struct DataObject { + ObjectHeader object; + uint64_t hash; + uint64_t head_entry_offset; + uint64_t tail_entry_offset; + uint64_t prev_hash_offset; + uint64_t next_hash_offset; + uint8_t payload[]; +}; + +_packed_ struct EntryItem { + uint64_t object_offset; + uint64_t prev_entry_offset; + uint64_t next_entry_offset; +}; + +_packed_ struct EntryObject { + ObjectHeader object; + uint64_t seqnum; + uint64_t realtime; + uint64_t monotonic; + uint64_t prev_entry_offset; + uint64_t next_entry_offset; + EntryItem items[]; +}; + +_packed_ struct HashItem { + uint64_t head_hash_offset; + uint64_t tail_hash_offset; +}; + +_packed_ struct HashTableObject { + ObjectHeader object; + HashItem table[]; +}; + +_packed_ struct BisectTableObject { + ObjectHeader object; + uint64_t table[]; +}; + +union Object { + ObjectHeader object; + DataObject data; + EntryObject entry; + HashTableObject hash_table; + BisectTableObject bisect_table; +}; + +enum { + STATE_OFFLINE, + STATE_ONLINE, + STATE_ARCHIVED +}; + +_packed_ struct Header { + uint8_t signature[8]; /* "LPKSHHRH" */ + uint32_t compatible_flags; + uint32_t incompatible_flags; + uint32_t state; + uint8_t reserved[4]; + sd_id128_t file_id; + sd_id128_t machine_id; + sd_id128_t boot_id; + uint64_t arena_offset; + uint64_t arena_size; + uint64_t arena_max_size; + uint64_t arena_min_size; + uint64_t arena_keep_free; + uint64_t hash_table_offset; /* for looking up data objects */ + uint64_t hash_table_size; + uint64_t bisect_table_offset; /* for looking up entry objects */ + uint64_t bisect_table_size; + uint64_t head_object_offset; + uint64_t tail_object_offset; + uint64_t head_entry_offset; + uint64_t tail_entry_offset; + uint64_t last_bisect_offset; + uint64_t n_objects; + uint64_t seqnum_base; + uint64_t seqnum; +}; + +#endif diff --git a/src/journal/journal-private.h b/src/journal/journal-private.h new file mode 100644 index 0000000000..863a39893b --- /dev/null +++ b/src/journal/journal-private.h @@ -0,0 +1,53 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +#ifndef foojournalprivatehfoo +#define foojournalprivatehfoo + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include + +#include "sd-journal.h" +#include "journal-def.h" +#include "util.h" + +typedef struct JournalFile JournalFile; + +int journal_file_open(sd_journal *j, const char *fname, int flags, mode_t mode, JournalFile **ret); + +void journal_file_close(JournalFile *j); + +int journal_file_move_to_object(JournalFile *f, uint64_t offset, Object **ret); + +uint64_t journal_file_entry_n_items(Object *o); + +int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, Object **ret, uint64_t *offset); + +int journal_file_move_to_entry(JournalFile *f, uint64_t seqnum, Object **ret, uint64_t *offset); + +int journal_file_find_first_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset); +int journal_file_find_last_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset); + +int journal_file_next_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset); +int journal_file_prev_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset); + +void journal_file_dump(JournalFile *f); + +#endif diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c new file mode 100644 index 0000000000..838e8436e4 --- /dev/null +++ b/src/journal/journalctl.c @@ -0,0 +1,91 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include +#include +#include + +#include "journal-private.h" + +int main(int argc, char *argv[]) { + int r; + JournalFile *f; + Object *o = NULL; + + log_parse_environment(); + log_open(); + + r = journal_file_open(NULL, "/var/log/journal/system.journal", O_RDONLY, 0644, &f); + if (r == -ENOENT) + r = journal_file_open(NULL, "/run/log/journal/system.journal", O_RDONLY, 0644, &f); + + if (r < 0) { + log_error("Failed to open journal: %s", strerror(-r)); + return EXIT_FAILURE; + } + + for (;;) { + uint64_t offset; + uint64_t n, i; + + r = journal_file_next_entry(f, o, &o, &offset); + if (r < 0) { + log_error("Failed to read journal: %s", strerror(-r)); + goto finish; + } + + if (r == 0) + break; + + printf("entry: %llu\n", (unsigned long long) le64toh(o->entry.seqnum)); + + n = journal_file_entry_n_items(o); + for (i = 0; i < n; i++) { + uint64_t p, l; + + p = le64toh(o->entry.items[i].object_offset); + r = journal_file_move_to_object(f, p, &o); + if (r < 0) { + log_error("Failed to move to data: %s", strerror(-r)); + goto finish; + } + + if (le64toh(o->object.type) != OBJECT_DATA) { + log_error("Invalid file"); + goto finish; + } + + l = o->object.size - offsetof(Object, data.payload); + printf("\t[%.*s]\n", (int) l, o->data.payload); + + r = journal_file_move_to_object(f, offset, &o); + if (r < 0) { + log_error("Failed to move back to entry: %s", strerror(-r)); + goto finish; + } + } + } + +finish: + journal_file_close(f); + + return 0; +} diff --git a/src/journal/journald.c b/src/journal/journald.c new file mode 100644 index 0000000000..9297ca6fb7 --- /dev/null +++ b/src/journal/journald.c @@ -0,0 +1,460 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include +#include +#include +#include +#include +#include + +#include "hashmap.h" +#include "journal-private.h" +#include "sd-daemon.h" +#include "socket-util.h" + +typedef struct Server { + int syslog_fd; + int epoll_fd; + int signal_fd; + + JournalFile *system_journal; + Hashmap *user_journals; +} Server; + +static void process_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv) { + char *message = NULL, *pid = NULL, *uid = NULL, *gid = NULL, + *source_time = NULL, *boot_id = NULL, *machine_id = NULL, + *comm = NULL, *cmdline = NULL, *hostname = NULL, + *audit_session = NULL, *audit_loginuid = NULL, + *syslog_priority = NULL, *syslog_facility = NULL, + *exe = NULL; + dual_timestamp ts; + struct iovec iovec[15]; + unsigned n = 0; + char idbuf[33]; + sd_id128_t id; + int r; + char *t; + int priority = LOG_USER | LOG_INFO; + + dual_timestamp_get(&ts); + + parse_syslog_priority((char**) &buf, &priority); + skip_syslog_date((char**) &buf); + + if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0) + IOVEC_SET_STRING(iovec[n++], syslog_priority); + + if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0) + IOVEC_SET_STRING(iovec[n++], syslog_facility); + + message = strappend("MESSAGE=", buf); + if (message) + IOVEC_SET_STRING(iovec[n++], message); + + if (ucred) { + uint32_t session; + uid_t loginuid; + + if (asprintf(&pid, "PID=%lu", (unsigned long) ucred->pid) >= 0) + IOVEC_SET_STRING(iovec[n++], pid); + + if (asprintf(&uid, "UID=%lu", (unsigned long) ucred->uid) >= 0) + IOVEC_SET_STRING(iovec[n++], uid); + + if (asprintf(&gid, "GID=%lu", (unsigned long) ucred->gid) >= 0) + IOVEC_SET_STRING(iovec[n++], gid); + + r = get_process_comm(ucred->pid, &t); + if (r >= 0) { + comm = strappend("COMM=", t); + if (comm) + IOVEC_SET_STRING(iovec[n++], comm); + free(t); + } + + r = get_process_exe(ucred->pid, &t); + if (r >= 0) { + exe = strappend("EXE=", t); + if (comm) + IOVEC_SET_STRING(iovec[n++], exe); + free(t); + } + + r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t); + if (r >= 0) { + cmdline = strappend("CMDLINE=", t); + if (cmdline) + IOVEC_SET_STRING(iovec[n++], cmdline); + free(t); + } + + r = audit_session_from_pid(ucred->pid, &session); + if (r >= 0) + if (asprintf(&audit_session, "AUDIT_SESSION=%lu", (unsigned long) session) >= 0) + IOVEC_SET_STRING(iovec[n++], audit_session); + + r = audit_loginuid_from_pid(ucred->pid, &loginuid); + if (r >= 0) + if (asprintf(&audit_loginuid, "AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0) + IOVEC_SET_STRING(iovec[n++], audit_loginuid); + } + + if (tv) { + if (asprintf(&source_time, "SOURCE_REALTIME_TIMESTAMP=%llu", + (unsigned long long) timeval_load(tv)) >= 0) + IOVEC_SET_STRING(iovec[n++], source_time); + } + + r = sd_id128_get_boot(&id); + if (r >= 0) + if (asprintf(&boot_id, "BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0) + IOVEC_SET_STRING(iovec[n++], boot_id); + + r = sd_id128_get_machine(&id); + if (r >= 0) + if (asprintf(&machine_id, "MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0) + IOVEC_SET_STRING(iovec[n++], machine_id); + + t = gethostname_malloc(); + if (t) { + hostname = strappend("HOSTNAME=", t); + if (hostname) + IOVEC_SET_STRING(iovec[n++], hostname); + free(t); + } + + r = journal_file_append_entry(s->system_journal, &ts, iovec, n, NULL, NULL); + if (r < 0) + log_error("Failed to write entry: %s", strerror(-r)); + + + free(message); + free(pid); + free(uid); + free(gid); + free(comm); + free(cmdline); + free(source_time); + free(boot_id); + free(machine_id); + free(hostname); + free(audit_session); + free(audit_loginuid); + free(syslog_facility); + free(syslog_priority); +} + +static int process_event(Server *s, struct epoll_event *ev) { + assert(s); + + if (ev->events != EPOLLIN) { + log_info("Got invalid event from epoll."); + return -EIO; + } + + if (ev->data.fd == s->signal_fd) { + struct signalfd_siginfo sfsi; + ssize_t n; + + n = read(s->signal_fd, &sfsi, sizeof(sfsi)); + if (n != sizeof(sfsi)) { + + if (n >= 0) + return -EIO; + + if (errno == EINTR || errno == EAGAIN) + return 0; + + return -errno; + } + + log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo)); + return 0; + + } else { + for (;;) { + char buf[LINE_MAX+1]; + struct msghdr msghdr; + struct iovec iovec; + struct ucred *ucred = NULL; + struct timeval *tv = NULL; + struct cmsghdr *cmsg; + union { + struct cmsghdr cmsghdr; + uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) + + CMSG_SPACE(sizeof(struct timeval))]; + } control; + ssize_t n; + char *e; + + zero(iovec); + iovec.iov_base = buf; + iovec.iov_len = sizeof(buf)-1; + + zero(control); + zero(msghdr); + msghdr.msg_iov = &iovec; + msghdr.msg_iovlen = 1; + msghdr.msg_control = &control; + msghdr.msg_controllen = sizeof(control); + + n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT); + if (n < 0) { + + if (errno == EINTR || errno == EAGAIN) + return 1; + + log_error("recvmsg() failed: %m"); + return -errno; + } + + for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) { + + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_CREDENTIALS && + cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) + ucred = (struct ucred*) CMSG_DATA(cmsg); + else if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SO_TIMESTAMP && + cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval))) + tv = (struct timeval*) CMSG_DATA(cmsg); + } + + e = memchr(buf, '\n', n); + if (e) + *e = 0; + else + buf[n] = 0; + + process_message(s, strstrip(buf), ucred, tv); + } + } + + return 1; +} + + +static int open_system_journal(JournalFile **f) { + int r; + + r = journal_file_open(NULL, "/var/log/journal/system.journal", O_RDWR|O_CREAT, 0644, f); + if (r == -ENOENT) { + mkdir_p("/run/log/journal", 0755); + + r = journal_file_open(NULL, "/run/log/journal/system.journal", O_RDWR|O_CREAT, 0644, f); + } + + return r; +} + +static int server_init(Server *s) { + int n, one, r; + struct epoll_event ev; + sigset_t mask; + + assert(s); + + zero(*s); + s->syslog_fd = s->signal_fd = -1; + + s->epoll_fd = epoll_create1(EPOLL_CLOEXEC); + if (s->epoll_fd < 0) { + log_error("Failed to create epoll object: %m"); + return -errno; + } + + n = sd_listen_fds(true); + if (n < 0) { + log_error("Failed to read listening file descriptors from environment: %s", strerror(-n)); + return n; + } + + if (n > 1) { + log_error("Too many file descriptors passed."); + return -EINVAL; + } + + if (n == 1) + s->syslog_fd = SD_LISTEN_FDS_START; + else { + union sockaddr_union sa; + + s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0); + if (s->syslog_fd < 0) { + log_error("socket() failed: %m"); + return -errno; + } + + zero(sa); + sa.un.sun_family = AF_UNIX; + strncpy(sa.un.sun_path, "/run/systemd/syslog", sizeof(sa.un.sun_path)); + + unlink(sa.un.sun_path); + + r = bind(s->syslog_fd, &sa.sa, sizeof(sa.un)); + if (r < 0) { + log_error("bind() failed: %m"); + return -errno; + } + + chmod(sa.un.sun_path, 0666); + } + + one = 1; + r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one)); + if (r < 0) { + log_error("SO_PASSCRED failed: %m"); + return -errno; + } + + one = 1; + r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one)); + if (r < 0) { + log_error("SO_TIMESTAMP failed: %m"); + return -errno; + } + + zero(ev); + ev.events = EPOLLIN; + ev.data.fd = s->syslog_fd; + if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) { + log_error("Failed to add server fd to epoll object: %m"); + return -errno; + } + + s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func); + if (!s->user_journals) { + log_error("Out of memory."); + return -ENOMEM; + } + + r = open_system_journal(&s->system_journal); + if (r < 0) { + log_error("Failed to open journal: %s", strerror(-r)); + return r; + } + + assert_se(sigemptyset(&mask) == 0); + sigset_add_many(&mask, SIGINT, SIGTERM, -1); + assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); + + s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC); + if (s->signal_fd < 0) { + log_error("signalfd(): %m"); + return -errno; + } + + zero(ev); + ev.events = EPOLLIN; + ev.data.fd = s->signal_fd; + + if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) { + log_error("epoll_ctl(): %m"); + return -errno; + } + + return 0; +} + +static void server_done(Server *s) { + JournalFile *f; + assert(s); + + if (s->system_journal) + journal_file_close(s->system_journal); + + while ((f = hashmap_steal_first(s->user_journals))) + journal_file_close(f); + + hashmap_free(s->user_journals); + + if (s->epoll_fd >= 0) + close_nointr_nofail(s->epoll_fd); + + if (s->signal_fd >= 0) + close_nointr_nofail(s->signal_fd); + + if (s->syslog_fd >= 0) + close_nointr_nofail(s->syslog_fd); +} + +int main(int argc, char *argv[]) { + Server server; + int r; + + /* if (getppid() != 1) { */ + /* log_error("This program should be invoked by init only."); */ + /* return EXIT_FAILURE; */ + /* } */ + + if (argc > 1) { + log_error("This program does not take arguments."); + return EXIT_FAILURE; + } + + log_set_target(LOG_TARGET_AUTO); + log_parse_environment(); + log_open(); + + umask(0022); + + r = server_init(&server); + if (r < 0) + goto finish; + + log_debug("systemd-journald running as pid %lu", (unsigned long) getpid()); + + sd_notify(false, + "READY=1\n" + "STATUS=Processing messages..."); + + for (;;) { + struct epoll_event event; + + r = epoll_wait(server.epoll_fd, &event, 1, -1); + if (r < 0) { + + if (errno == EINTR) + continue; + + log_error("epoll_wait() failed: %m"); + r = -errno; + goto finish; + } else if (r == 0) + break; + + r = process_event(&server, &event); + if (r < 0) + goto finish; + else if (r == 0) + break; + } + +finish: + sd_notify(false, + "STATUS=Shutting down..."); + + server_done(&server); + + return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/src/journal/lookup3.c b/src/journal/lookup3.c new file mode 100644 index 0000000000..b90093a5e2 --- /dev/null +++ b/src/journal/lookup3.c @@ -0,0 +1,1003 @@ +/* Slightly modified by Lennart Poettering, to avoid name clashes, and + * unexport a few functions. */ + +#include "lookup3.h" + +/* +------------------------------------------------------------------------------- +lookup3.c, by Bob Jenkins, May 2006, Public Domain. + +These are functions for producing 32-bit hashes for hash table lookup. +hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() +are externally useful functions. Routines to test the hash are included +if SELF_TEST is defined. You can use this free for any purpose. It's in +the public domain. It has no warranty. + +You probably want to use hashlittle(). hashlittle() and hashbig() +hash byte arrays. hashlittle() is is faster than hashbig() on +little-endian machines. Intel and AMD are little-endian machines. +On second thought, you probably want hashlittle2(), which is identical to +hashlittle() except it returns two 32-bit hashes for the price of one. +You could implement hashbig2() if you wanted but I haven't bothered here. + +If you want to find a hash of, say, exactly 7 integers, do + a = i1; b = i2; c = i3; + mix(a,b,c); + a += i4; b += i5; c += i6; + mix(a,b,c); + a += i7; + final(a,b,c); +then use c as the hash value. If you have a variable length array of +4-byte integers to hash, use hashword(). If you have a byte array (like +a character string), use hashlittle(). If you have several byte arrays, or +a mix of things, see the comments above hashlittle(). + +Why is this so big? I read 12 bytes at a time into 3 4-byte integers, +then mix those integers. This is fast (you can do a lot more thorough +mixing with 12*3 instructions on 3 integers than you can with 3 instructions +on 1 byte), but shoehorning those bytes into integers efficiently is messy. +------------------------------------------------------------------------------- +*/ +/* #define SELF_TEST 1 */ + +#include /* defines printf for tests */ +#include /* defines time_t for timings in the test */ +#include /* defines uint32_t etc */ +#include /* attempt to define endianness */ +#ifdef linux +# include /* attempt to define endianness */ +#endif + +/* + * My best guess at if you are big-endian or little-endian. This may + * need adjustment. + */ +#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \ + __BYTE_ORDER == __LITTLE_ENDIAN) || \ + (defined(i386) || defined(__i386__) || defined(__i486__) || \ + defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL)) +# define HASH_LITTLE_ENDIAN 1 +# define HASH_BIG_ENDIAN 0 +#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \ + __BYTE_ORDER == __BIG_ENDIAN) || \ + (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel)) +# define HASH_LITTLE_ENDIAN 0 +# define HASH_BIG_ENDIAN 1 +#else +# define HASH_LITTLE_ENDIAN 0 +# define HASH_BIG_ENDIAN 0 +#endif + +#define hashsize(n) ((uint32_t)1<<(n)) +#define hashmask(n) (hashsize(n)-1) +#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) + +/* +------------------------------------------------------------------------------- +mix -- mix 3 32-bit values reversibly. + +This is reversible, so any information in (a,b,c) before mix() is +still in (a,b,c) after mix(). + +If four pairs of (a,b,c) inputs are run through mix(), or through +mix() in reverse, there are at least 32 bits of the output that +are sometimes the same for one pair and different for another pair. +This was tested for: +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that +satisfy this are + 4 6 8 16 19 4 + 9 15 3 18 27 15 + 14 9 3 7 17 3 +Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing +for "differ" defined as + with a one-bit base and a two-bit delta. I +used http://burtleburtle.net/bob/hash/avalanche.html to choose +the operations, constants, and arrangements of the variables. + +This does not achieve avalanche. There are input bits of (a,b,c) +that fail to affect some output bits of (a,b,c), especially of a. The +most thoroughly mixed value is c, but it doesn't really even achieve +avalanche in c. + +This allows some parallelism. Read-after-writes are good at doubling +the number of bits affected, so the goal of mixing pulls in the opposite +direction as the goal of parallelism. I did what I could. Rotates +seem to cost as much as shifts on every machine I could lay my hands +on, and rotates are much kinder to the top and bottom bits, so I used +rotates. +------------------------------------------------------------------------------- +*/ +#define mix(a,b,c) \ +{ \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c,16); c += b; \ + b -= a; b ^= rot(a,19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} + +/* +------------------------------------------------------------------------------- +final -- final mixing of 3 32-bit values (a,b,c) into c + +Pairs of (a,b,c) values differing in only a few bits will usually +produce values of c that look totally different. This was tested for +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +These constants passed: + 14 11 25 16 4 14 24 + 12 14 25 16 4 14 24 +and these came close: + 4 8 15 26 3 22 24 + 10 8 15 26 3 22 24 + 11 8 15 26 3 22 24 +------------------------------------------------------------------------------- +*/ +#define final(a,b,c) \ +{ \ + c ^= b; c -= rot(b,14); \ + a ^= c; a -= rot(c,11); \ + b ^= a; b -= rot(a,25); \ + c ^= b; c -= rot(b,16); \ + a ^= c; a -= rot(c,4); \ + b ^= a; b -= rot(a,14); \ + c ^= b; c -= rot(b,24); \ +} + +/* +-------------------------------------------------------------------- + This works on all machines. To be useful, it requires + -- that the key be an array of uint32_t's, and + -- that the length be the number of uint32_t's in the key + + The function hashword() is identical to hashlittle() on little-endian + machines, and identical to hashbig() on big-endian machines, + except that the length has to be measured in uint32_ts rather than in + bytes. hashlittle() is more complicated than hashword() only because + hashlittle() has to dance around fitting the key bytes into registers. +-------------------------------------------------------------------- +*/ +uint32_t jenkins_hashword( +const uint32_t *k, /* the key, an array of uint32_t values */ +size_t length, /* the length of the key, in uint32_ts */ +uint32_t initval) /* the previous hash, or an arbitrary value */ +{ + uint32_t a,b,c; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval; + + /*------------------------------------------------- handle most of the key */ + while (length > 3) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 3; + k += 3; + } + + /*------------------------------------------- handle the last 3 uint32_t's */ + switch(length) /* all the case statements fall through */ + { + case 3 : c+=k[2]; + case 2 : b+=k[1]; + case 1 : a+=k[0]; + final(a,b,c); + case 0: /* case 0: nothing left to add */ + break; + } + /*------------------------------------------------------ report the result */ + return c; +} + + +/* +-------------------------------------------------------------------- +hashword2() -- same as hashword(), but take two seeds and return two +32-bit values. pc and pb must both be nonnull, and *pc and *pb must +both be initialized with seeds. If you pass in (*pb)==0, the output +(*pc) will be the same as the return value from hashword(). +-------------------------------------------------------------------- +*/ +void jenkins_hashword2 ( +const uint32_t *k, /* the key, an array of uint32_t values */ +size_t length, /* the length of the key, in uint32_ts */ +uint32_t *pc, /* IN: seed OUT: primary hash value */ +uint32_t *pb) /* IN: more seed OUT: secondary hash value */ +{ + uint32_t a,b,c; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc; + c += *pb; + + /*------------------------------------------------- handle most of the key */ + while (length > 3) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 3; + k += 3; + } + + /*------------------------------------------- handle the last 3 uint32_t's */ + switch(length) /* all the case statements fall through */ + { + case 3 : c+=k[2]; + case 2 : b+=k[1]; + case 1 : a+=k[0]; + final(a,b,c); + case 0: /* case 0: nothing left to add */ + break; + } + /*------------------------------------------------------ report the result */ + *pc=c; *pb=b; +} + + +/* +------------------------------------------------------------------------------- +hashlittle() -- hash a variable-length key into a 32-bit value + k : the key (the unaligned variable-length array of bytes) + length : the length of the key, counting by bytes + initval : can be any 4-byte value +Returns a 32-bit value. Every bit of the key affects every bit of +the return value. Two keys differing by one or two bits will have +totally different hash values. + +The best hash table sizes are powers of 2. There is no need to do +mod a prime (mod is sooo slow!). If you need less than 32 bits, +use a bitmask. For example, if you need only 10 bits, do + h = (h & hashmask(10)); +In which case, the hash table should have hashsize(10) elements. + +If you are hashing n strings (uint8_t **)k, do it like this: + for (i=0, h=0; i 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]&0xffffff" actually reads beyond the end of the string, but + * then masks off the part it's not allowed to read. Because the + * string is aligned, the masked-off tail is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff; a+=k[0]; break; + case 6 : b+=k[1]&0xffff; a+=k[0]; break; + case 5 : b+=k[1]&0xff; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff; break; + case 2 : a+=k[0]&0xffff; break; + case 1 : a+=k[0]&0xff; break; + case 0 : return c; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ + case 1 : a+=k8[0]; break; + case 0 : return c; + } + +#endif /* !valgrind */ + + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { + const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ + const uint8_t *k8; + + /*--------------- all but last block: aligned reads and different mixing */ + while (length > 12) + { + a += k[0] + (((uint32_t)k[1])<<16); + b += k[2] + (((uint32_t)k[3])<<16); + c += k[4] + (((uint32_t)k[5])<<16); + mix(a,b,c); + length -= 12; + k += 6; + } + + /*----------------------------- handle the last (probably partial) block */ + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[4]+(((uint32_t)k[5])<<16); + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=k[4]; + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=k[2]; + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=k[0]; + break; + case 1 : a+=k8[0]; + break; + case 0 : return c; /* zero length requires no mixing */ + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + a += ((uint32_t)k[1])<<8; + a += ((uint32_t)k[2])<<16; + a += ((uint32_t)k[3])<<24; + b += k[4]; + b += ((uint32_t)k[5])<<8; + b += ((uint32_t)k[6])<<16; + b += ((uint32_t)k[7])<<24; + c += k[8]; + c += ((uint32_t)k[9])<<8; + c += ((uint32_t)k[10])<<16; + c += ((uint32_t)k[11])<<24; + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=((uint32_t)k[11])<<24; + case 11: c+=((uint32_t)k[10])<<16; + case 10: c+=((uint32_t)k[9])<<8; + case 9 : c+=k[8]; + case 8 : b+=((uint32_t)k[7])<<24; + case 7 : b+=((uint32_t)k[6])<<16; + case 6 : b+=((uint32_t)k[5])<<8; + case 5 : b+=k[4]; + case 4 : a+=((uint32_t)k[3])<<24; + case 3 : a+=((uint32_t)k[2])<<16; + case 2 : a+=((uint32_t)k[1])<<8; + case 1 : a+=k[0]; + break; + case 0 : return c; + } + } + + final(a,b,c); + return c; +} + + +/* + * hashlittle2: return 2 32-bit hash values + * + * This is identical to hashlittle(), except it returns two 32-bit hash + * values instead of just one. This is good enough for hash table + * lookup with 2^^64 buckets, or if you want a second hash if you're not + * happy with the first, or if you want a probably-unique 64-bit ID for + * the key. *pc is better mixed than *pb, so use *pc first. If you want + * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)". + */ +void jenkins_hashlittle2( + const void *key, /* the key to hash */ + size_t length, /* length of the key */ + uint32_t *pc, /* IN: primary initval, OUT: primary hash */ + uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */ +{ + uint32_t a,b,c; /* internal state */ + union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc; + c += *pb; + + u.ptr = key; + if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { + const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ + + /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]&0xffffff" actually reads beyond the end of the string, but + * then masks off the part it's not allowed to read. Because the + * string is aligned, the masked-off tail is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff; a+=k[0]; break; + case 6 : b+=k[1]&0xffff; a+=k[0]; break; + case 5 : b+=k[1]&0xff; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff; break; + case 2 : a+=k[0]&0xffff; break; + case 1 : a+=k[0]&0xff; break; + case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ + case 1 : a+=k8[0]; break; + case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ + } + +#endif /* !valgrind */ + + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { + const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ + const uint8_t *k8; + + /*--------------- all but last block: aligned reads and different mixing */ + while (length > 12) + { + a += k[0] + (((uint32_t)k[1])<<16); + b += k[2] + (((uint32_t)k[3])<<16); + c += k[4] + (((uint32_t)k[5])<<16); + mix(a,b,c); + length -= 12; + k += 6; + } + + /*----------------------------- handle the last (probably partial) block */ + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[4]+(((uint32_t)k[5])<<16); + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=k[4]; + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=k[2]; + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=k[0]; + break; + case 1 : a+=k8[0]; + break; + case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + a += ((uint32_t)k[1])<<8; + a += ((uint32_t)k[2])<<16; + a += ((uint32_t)k[3])<<24; + b += k[4]; + b += ((uint32_t)k[5])<<8; + b += ((uint32_t)k[6])<<16; + b += ((uint32_t)k[7])<<24; + c += k[8]; + c += ((uint32_t)k[9])<<8; + c += ((uint32_t)k[10])<<16; + c += ((uint32_t)k[11])<<24; + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=((uint32_t)k[11])<<24; + case 11: c+=((uint32_t)k[10])<<16; + case 10: c+=((uint32_t)k[9])<<8; + case 9 : c+=k[8]; + case 8 : b+=((uint32_t)k[7])<<24; + case 7 : b+=((uint32_t)k[6])<<16; + case 6 : b+=((uint32_t)k[5])<<8; + case 5 : b+=k[4]; + case 4 : a+=((uint32_t)k[3])<<24; + case 3 : a+=((uint32_t)k[2])<<16; + case 2 : a+=((uint32_t)k[1])<<8; + case 1 : a+=k[0]; + break; + case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ + } + } + + final(a,b,c); + *pc=c; *pb=b; +} + + + +/* + * hashbig(): + * This is the same as hashword() on big-endian machines. It is different + * from hashlittle() on all machines. hashbig() takes advantage of + * big-endian byte ordering. + */ +uint32_t jenkins_hashbig( const void *key, size_t length, uint32_t initval) +{ + uint32_t a,b,c; + union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */ + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; + + u.ptr = key; + if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) { + const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ + + /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]<<8" actually reads beyond the end of the string, but + * then shifts out the part it's not allowed to read. Because the + * string is aligned, the illegal read is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff00; a+=k[0]; break; + case 6 : b+=k[1]&0xffff0000; a+=k[0]; break; + case 5 : b+=k[1]&0xff000000; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff00; break; + case 2 : a+=k[0]&0xffff0000; break; + case 1 : a+=k[0]&0xff000000; break; + case 0 : return c; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch(length) /* all the case statements fall through */ + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<8; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<16; /* fall through */ + case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */ + case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */ + case 1 : a+=((uint32_t)k8[0])<<24; break; + case 0 : return c; + } + +#endif /* !VALGRIND */ + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += ((uint32_t)k[0])<<24; + a += ((uint32_t)k[1])<<16; + a += ((uint32_t)k[2])<<8; + a += ((uint32_t)k[3]); + b += ((uint32_t)k[4])<<24; + b += ((uint32_t)k[5])<<16; + b += ((uint32_t)k[6])<<8; + b += ((uint32_t)k[7]); + c += ((uint32_t)k[8])<<24; + c += ((uint32_t)k[9])<<16; + c += ((uint32_t)k[10])<<8; + c += ((uint32_t)k[11]); + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=k[11]; + case 11: c+=((uint32_t)k[10])<<8; + case 10: c+=((uint32_t)k[9])<<16; + case 9 : c+=((uint32_t)k[8])<<24; + case 8 : b+=k[7]; + case 7 : b+=((uint32_t)k[6])<<8; + case 6 : b+=((uint32_t)k[5])<<16; + case 5 : b+=((uint32_t)k[4])<<24; + case 4 : a+=k[3]; + case 3 : a+=((uint32_t)k[2])<<8; + case 2 : a+=((uint32_t)k[1])<<16; + case 1 : a+=((uint32_t)k[0])<<24; + break; + case 0 : return c; + } + } + + final(a,b,c); + return c; +} + + +#ifdef SELF_TEST + +/* used for timings */ +void driver1() +{ + uint8_t buf[256]; + uint32_t i; + uint32_t h=0; + time_t a,z; + + time(&a); + for (i=0; i<256; ++i) buf[i] = 'x'; + for (i=0; i<1; ++i) + { + h = hashlittle(&buf[0],1,h); + } + time(&z); + if (z-a > 0) printf("time %d %.8x\n", z-a, h); +} + +/* check that every input bit changes every output bit half the time */ +#define HASHSTATE 1 +#define HASHLEN 1 +#define MAXPAIR 60 +#define MAXLEN 70 +void driver2() +{ + uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1]; + uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z; + uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE]; + uint32_t x[HASHSTATE],y[HASHSTATE]; + uint32_t hlen; + + printf("No more than %d trials should ever be needed \n",MAXPAIR/2); + for (hlen=0; hlen < MAXLEN; ++hlen) + { + z=0; + for (i=0; i>(8-j)); + c[0] = hashlittle(a, hlen, m); + b[i] ^= ((k+1)<>(8-j)); + d[0] = hashlittle(b, hlen, m); + /* check every bit is 1, 0, set, and not set at least once */ + for (l=0; lz) z=k; + if (k==MAXPAIR) + { + printf("Some bit didn't change: "); + printf("%.8x %.8x %.8x %.8x %.8x %.8x ", + e[0],f[0],g[0],h[0],x[0],y[0]); + printf("i %d j %d m %d len %d\n", i, j, m, hlen); + } + if (z==MAXPAIR) goto done; + } + } + } + done: + if (z < MAXPAIR) + { + printf("Mix success %2d bytes %2d initvals ",i,m); + printf("required %d trials\n", z/2); + } + } + printf("\n"); +} + +/* Check for reading beyond the end of the buffer and alignment problems */ +void driver3() +{ + uint8_t buf[MAXLEN+20], *b; + uint32_t len; + uint8_t q[] = "This is the time for all good men to come to the aid of their country..."; + uint32_t h; + uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country..."; + uint32_t i; + uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country..."; + uint32_t j; + uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country..."; + uint32_t ref,x,y; + uint8_t *p; + + printf("Endianness. These lines should all be the same (for values filled in):\n"); + printf("%.8x %.8x %.8x\n", + hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13), + hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13), + hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13)); + p = q; + printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", + hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13), + hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13), + hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13), + hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13), + hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13), + hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13)); + p = &qq[1]; + printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", + hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13), + hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13), + hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13), + hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13), + hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13), + hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13)); + p = &qqq[2]; + printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", + hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13), + hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13), + hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13), + hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13), + hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13), + hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13)); + p = &qqqq[3]; + printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", + hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13), + hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13), + hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13), + hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13), + hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13), + hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13)); + printf("\n"); + + /* check that hashlittle2 and hashlittle produce the same results */ + i=47; j=0; + hashlittle2(q, sizeof(q), &i, &j); + if (hashlittle(q, sizeof(q), 47) != i) + printf("hashlittle2 and hashlittle mismatch\n"); + + /* check that hashword2 and hashword produce the same results */ + len = 0xdeadbeef; + i=47, j=0; + hashword2(&len, 1, &i, &j); + if (hashword(&len, 1, 47) != i) + printf("hashword2 and hashword mismatch %x %x\n", + i, hashword(&len, 1, 47)); + + /* check hashlittle doesn't read before or after the ends of the string */ + for (h=0, b=buf+1; h<8; ++h, ++b) + { + for (i=0; i +#include + +uint32_t jenkins_hashword(const uint32_t *k, size_t length, uint32_t initval); +void jenkins_hashword2(const uint32_t *k, size_t length, uint32_t *pc, uint32_t *pb); + +uint32_t jenkins_hashlittle(const void *key, size_t length, uint32_t initval); +void jenkins_hashlittle2(const void *key, size_t length, uint32_t *pc, uint32_t *pb); + +uint32_t jenkins_hashbig(const void *key, size_t length, uint32_t initval); + +static inline uint64_t hash64(const void *data, size_t length) { + uint32_t a = 0, b = 0; + + jenkins_hashlittle2(data, length, &a, &b); + + return ((uint64_t) a << 32ULL) | (uint64_t) b; +} + +#endif diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c new file mode 100644 index 0000000000..f1dd92927c --- /dev/null +++ b/src/journal/sd-journal.c @@ -0,0 +1,1333 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include +#include +#include +#include +#include +#include +#include + +#include "sd-journal.h" +#include "journal-def.h" +#include "journal-private.h" +#include "lookup3.h" +#include "list.h" + +#define DEFAULT_ARENA_MAX_SIZE (16ULL*1024ULL*1024ULL*1024ULL) +#define DEFAULT_ARENA_MIN_SIZE (256ULL*1024ULL) +#define DEFAULT_ARENA_KEEP_FREE (1ULL*1024ULL*1024ULL) + +#define DEFAULT_HASH_TABLE_SIZE (2047ULL*16ULL) +#define DEFAULT_BISECT_TABLE_SIZE ((DEFAULT_ARENA_MAX_SIZE/(64ULL*1024ULL))*8ULL) + +#define DEFAULT_WINDOW_SIZE (128ULL*1024ULL*1024ULL) + +struct JournalFile { + sd_journal *journal; + + int fd; + char *path; + struct stat last_stat; + int prot; + bool writable; + + Header *header; + + HashItem *hash_table; + void *hash_table_window; + uint64_t hash_table_window_size; + + uint64_t *bisect_table; + void *bisect_table_window; + uint64_t bisect_table_window_size; + + void *window; + uint64_t window_offset; + uint64_t window_size; + + Object *current; + uint64_t current_offset; + + LIST_FIELDS(JournalFile, files); +}; + +struct sd_journal { + LIST_HEAD(JournalFile, files); +}; + +static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' }; + +#define ALIGN64(x) (((x) + 7ULL) & ~7ULL) + +void journal_file_close(JournalFile *f) { + assert(f); + + if (f->journal) + LIST_REMOVE(JournalFile, files, f->journal->files, f); + + if (f->fd >= 0) + close_nointr_nofail(f->fd); + + if (f->header) + munmap(f->header, PAGE_ALIGN(sizeof(Header))); + + if (f->hash_table_window) + munmap(f->hash_table_window, f->hash_table_window_size); + + if (f->bisect_table_window) + munmap(f->bisect_table_window, f->bisect_table_window_size); + + if (f->window) + munmap(f->window, f->window_size); + + free(f->path); + free(f); +} + +static int journal_file_init_header(JournalFile *f) { + Header h; + ssize_t k; + int r; + + assert(f); + + zero(h); + memcpy(h.signature, signature, 8); + h.arena_offset = htole64(ALIGN64(sizeof(h))); + h.arena_max_size = htole64(DEFAULT_ARENA_MAX_SIZE); + h.arena_min_size = htole64(DEFAULT_ARENA_MIN_SIZE); + h.arena_keep_free = htole64(DEFAULT_ARENA_KEEP_FREE); + + r = sd_id128_randomize(&h.file_id); + if (r < 0) + return r; + + k = pwrite(f->fd, &h, sizeof(h), 0); + if (k < 0) + return -errno; + + if (k != sizeof(h)) + return -EIO; + + return 0; +} + +static int journal_file_refresh_header(JournalFile *f) { + int r; + + assert(f); + + r = sd_id128_get_machine(&f->header->machine_id); + if (r < 0) + return r; + + r = sd_id128_get_boot(&f->header->boot_id); + if (r < 0) + return r; + + f->header->state = htole32(STATE_ONLINE); + return 0; +} + +static int journal_file_verify_header(JournalFile *f) { + assert(f); + + if (memcmp(f->header, signature, 8)) + return -EBADMSG; + + if (f->header->incompatible_flags != 0) + return -EPROTONOSUPPORT; + + if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->arena_offset) + le64toh(f->header->arena_size))) + return -ENODATA; + + if (f->writable) { + uint32_t state; + sd_id128_t machine_id; + int r; + + r = sd_id128_get_machine(&machine_id); + if (r < 0) + return r; + + if (!sd_id128_equal(machine_id, f->header->machine_id)) + return -EHOSTDOWN; + + state = le32toh(f->header->state); + + if (state == STATE_ONLINE) + log_debug("Journal file %s is already online. Assuming unclean closing. Ignoring.", f->path); + else if (state == STATE_ARCHIVED) + return -ESHUTDOWN; + else if (state != STATE_OFFLINE) + log_debug("Journal file %s has unknown state %u. Ignoring.", f->path, state); + } + + return 0; +} + +static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) { + uint64_t asize; + uint64_t old_size, new_size; + + assert(f); + + if (offset < le64toh(f->header->arena_offset)) + return -EINVAL; + + new_size = PAGE_ALIGN(offset + size); + + /* We assume that this file is not sparse, and we know that + * for sure, since we alway call posix_fallocate() + * ourselves */ + + old_size = + le64toh(f->header->arena_offset) + + le64toh(f->header->arena_size); + + if (old_size >= new_size) + return 0; + + asize = new_size - le64toh(f->header->arena_offset); + + if (asize > le64toh(f->header->arena_min_size)) { + struct statvfs svfs; + + if (fstatvfs(f->fd, &svfs) >= 0) { + uint64_t available; + + available = svfs.f_bfree * svfs.f_bsize; + + if (available >= f->header->arena_keep_free) + available -= f->header->arena_keep_free; + else + available = 0; + + if (new_size - old_size > available) + return -E2BIG; + } + } + + if (asize > le64toh(f->header->arena_max_size)) + return -E2BIG; + + if (posix_fallocate(f->fd, 0, new_size) < 0) + return -errno; + + if (fstat(f->fd, &f->last_stat) < 0) + return -errno; + + f->header->arena_size = htole64(asize); + + return 0; +} + +static int journal_file_map( + JournalFile *f, + uint64_t offset, + uint64_t size, + void **_window, + uint64_t *_woffset, + uint64_t *_wsize, + void **ret) { + + uint64_t woffset, wsize; + void *window; + + assert(f); + assert(size > 0); + assert(ret); + + woffset = offset & ~((uint64_t) page_size() - 1ULL); + wsize = size + (offset - woffset); + wsize = PAGE_ALIGN(wsize); + + window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset); + if (window == MAP_FAILED) + return -errno; + + if (_window) + *_window = window; + + if (_woffset) + *_woffset = woffset; + + if (_wsize) + *_wsize = wsize; + + *ret = (uint8_t*) window + (offset - woffset); + + return 0; +} + +static int journal_file_move_to(JournalFile *f, uint64_t offset, uint64_t size, void **ret) { + void *p; + uint64_t delta; + int r; + + assert(f); + assert(ret); + + if (_likely_(f->window && + f->window_offset <= offset && + f->window_offset+f->window_size >= offset + size)) { + + *ret = (uint8_t*) f->window + (offset - f->window_offset); + return 0; + } + + if (f->window) { + if (munmap(f->window, f->window_size) < 0) + return -errno; + + f->window = NULL; + f->window_size = f->window_offset = 0; + } + + if (size < DEFAULT_WINDOW_SIZE) { + /* If the default window size is larger then what was + * asked for extend the mapping a bit in the hope to + * minimize needed remappings later on. We add half + * the window space before and half behind the + * requested mapping */ + + delta = PAGE_ALIGN((DEFAULT_WINDOW_SIZE - size) / 2); + + if (offset < delta) + delta = offset; + + offset -= delta; + size += (DEFAULT_WINDOW_SIZE - delta); + } else + delta = 0; + + r = journal_file_map(f, + offset, size, + &f->window, &f->window_offset, &f->window_size, + & p); + + if (r < 0) + return r; + + *ret = (uint8_t*) p + delta; + return 0; +} + +static bool verify_hash(Object *o) { + uint64_t t; + + assert(o); + + t = le64toh(o->object.type); + if (t == OBJECT_DATA) { + uint64_t s, h1, h2; + + s = le64toh(o->object.size); + + h1 = le64toh(o->data.hash); + h2 = hash64(o->data.payload, s - offsetof(Object, data.payload)); + + return h1 == h2; + } + + return true; +} + +int journal_file_move_to_object(JournalFile *f, uint64_t offset, Object **ret) { + int r; + void *t; + Object *o; + uint64_t s; + + assert(f); + assert(ret); + + r = journal_file_move_to(f, offset, sizeof(ObjectHeader), &t); + if (r < 0) + return r; + + o = (Object*) t; + s = le64toh(o->object.size); + + if (s < sizeof(ObjectHeader)) + return -EBADMSG; + + if (s > sizeof(ObjectHeader)) { + r = journal_file_move_to(f, offset, s, &t); + if (r < 0) + return r; + + o = (Object*) t; + } + + if (!verify_hash(o)) + return -EBADMSG; + + *ret = o; + return 0; +} + +static uint64_t journal_file_seqnum(JournalFile *f) { + uint64_t r; + + assert(f); + + r = le64toh(f->header->seqnum) + 1; + f->header->seqnum = htole64(r); + + return r; +} + +static int journal_file_append_object(JournalFile *f, uint64_t size, Object **ret, uint64_t *offset) { + int r; + uint64_t p; + Object *tail, *o; + void *t; + + assert(f); + assert(size >= sizeof(ObjectHeader)); + assert(offset); + assert(ret); + + p = le64toh(f->header->tail_object_offset); + + if (p == 0) + p = le64toh(f->header->arena_offset); + else { + r = journal_file_move_to_object(f, p, &tail); + if (r < 0) + return r; + + p += ALIGN64(le64toh(tail->object.size)); + } + + r = journal_file_allocate(f, p, size); + if (r < 0) + return r; + + r = journal_file_move_to(f, p, size, &t); + if (r < 0) + return r; + + o = (Object*) t; + + zero(o->object); + o->object.type = htole64(OBJECT_UNUSED); + zero(o->object.reserved); + o->object.size = htole64(size); + + f->header->tail_object_offset = htole64(p); + if (f->header->head_object_offset == 0) + f->header->head_object_offset = htole64(p); + + f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1); + + *ret = o; + *offset = p; + + return 0; +} + +static int journal_file_setup_hash_table(JournalFile *f) { + uint64_t s, p; + Object *o; + int r; + + assert(f); + + s = DEFAULT_HASH_TABLE_SIZE; + r = journal_file_append_object(f, offsetof(Object, hash_table.table) + s, &o, &p); + if (r < 0) + return r; + + o->object.type = htole64(OBJECT_HASH_TABLE); + memset(o->hash_table.table, 0, s); + + f->header->hash_table_offset = htole64(p + offsetof(Object, hash_table.table)); + f->header->hash_table_size = htole64(s); + + return 0; +} + +static int journal_file_setup_bisect_table(JournalFile *f) { + uint64_t s, p; + Object *o; + int r; + + assert(f); + + s = DEFAULT_BISECT_TABLE_SIZE; + r = journal_file_append_object(f, offsetof(Object, bisect_table.table) + s, &o, &p); + if (r < 0) + return r; + + o->object.type = htole64(OBJECT_BISECT_TABLE); + memset(o->bisect_table.table, 0, s); + + f->header->bisect_table_offset = htole64(p + offsetof(Object, bisect_table.table)); + f->header->bisect_table_size = htole64(s); + + return 0; +} + +static int journal_file_map_hash_table(JournalFile *f) { + uint64_t s, p; + void *t; + int r; + + assert(f); + + p = le64toh(f->header->hash_table_offset); + s = le64toh(f->header->hash_table_size); + + r = journal_file_map(f, + p, s, + &f->hash_table_window, NULL, &f->hash_table_window_size, + &t); + if (r < 0) + return r; + + f->hash_table = t; + return 0; +} + +static int journal_file_map_bisect_table(JournalFile *f) { + uint64_t s, p; + void *t; + int r; + + assert(f); + + p = le64toh(f->header->bisect_table_offset); + s = le64toh(f->header->bisect_table_size); + + r = journal_file_map(f, + p, s, + &f->bisect_table_window, NULL, &f->bisect_table_window_size, + &t); + + if (r < 0) + return r; + + f->bisect_table = t; + return 0; +} + +static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash_index) { + uint64_t p; + int r; + + assert(f); + assert(o); + assert(offset > 0); + assert(o->object.type == htole64(OBJECT_DATA)); + + o->data.head_entry_offset = o->data.tail_entry_offset = 0; + o->data.next_hash_offset = 0; + + p = le64toh(f->hash_table[hash_index].tail_hash_offset); + if (p == 0) { + /* Only entry in the hash table is easy */ + + o->data.prev_hash_offset = 0; + f->hash_table[hash_index].head_hash_offset = htole64(offset); + } else { + o->data.prev_hash_offset = htole64(p); + + /* Temporarily move back to the previous data object, + * to patch in pointer */ + + r = journal_file_move_to_object(f, p, &o); + if (r < 0) + return r; + + o->data.next_hash_offset = offset; + + r = journal_file_move_to_object(f, offset, &o); + if (r < 0) + return r; + } + + f->hash_table[hash_index].tail_hash_offset = htole64(offset); + + return 0; +} + +static int journal_file_append_data(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { + uint64_t hash, h, p, np; + uint64_t osize; + Object *o; + int r; + + assert(f); + assert(data || size == 0); + + osize = offsetof(Object, data.payload) + size; + + hash = hash64(data, size); + h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); + p = le64toh(f->hash_table[h].head_hash_offset); + + while (p != 0) { + /* Look for this data object in the hash table */ + + r = journal_file_move_to_object(f, p, &o); + if (r < 0) + return r; + + if (le64toh(o->object.type) != OBJECT_DATA) + return -EBADMSG; + + if (le64toh(o->object.size) == osize && + memcmp(o->data.payload, data, size) == 0) { + + if (le64toh(o->data.hash) != hash) + return -EBADMSG; + + if (ret) + *ret = o; + + if (offset) + *offset = p; + + return 0; + } + + p = le64toh(o->data.next_hash_offset); + } + + r = journal_file_append_object(f, osize, &o, &np); + if (r < 0) + return r; + + o->object.type = htole64(OBJECT_DATA); + o->data.hash = htole64(hash); + memcpy(o->data.payload, data, size); + + r = journal_file_link_data(f, o, np, h); + if (r < 0) + return r; + + if (ret) + *ret = o; + + if (offset) + *offset = np; + + return 0; +} + +uint64_t journal_file_entry_n_items(Object *o) { + assert(o); + assert(o->object.type == htole64(OBJECT_ENTRY)); + + return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem); +} + +static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) { + uint64_t p, q; + int r; + assert(f); + assert(o); + assert(offset > 0); + + p = le64toh(o->entry.items[i].object_offset); + if (p == 0) + return -EINVAL; + + o->entry.items[i].next_entry_offset = 0; + + /* Move to the data object */ + r = journal_file_move_to_object(f, p, &o); + if (r < 0) + return r; + + if (o->object.type != htole64(OBJECT_DATA)) + return -EBADMSG; + + q = le64toh(o->data.tail_entry_offset); + o->data.tail_entry_offset = htole64(offset); + + if (q == 0) + o->data.head_entry_offset = htole64(offset); + else { + uint64_t n, j; + + /* Move to previous entry */ + r = journal_file_move_to_object(f, q, &o); + if (r < 0) + return r; + + if (o->object.type != htole64(OBJECT_ENTRY)) + return -EBADMSG; + + n = journal_file_entry_n_items(o); + for (j = 0; j < n; j++) + if (le64toh(o->entry.items[j].object_offset) == p) + break; + + if (j >= n) + return -EBADMSG; + + o->entry.items[j].next_entry_offset = offset; + } + + /* Move back to original entry */ + r = journal_file_move_to_object(f, offset, &o); + if (r < 0) + return r; + + o->entry.items[i].prev_entry_offset = q; + return 0; +} + +static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) { + uint64_t p, i, n, k, a, b; + int r; + + assert(f); + assert(o); + assert(offset > 0); + assert(o->object.type == htole64(OBJECT_ENTRY)); + + /* Link up the entry itself */ + p = le64toh(f->header->tail_entry_offset); + + o->entry.prev_entry_offset = f->header->tail_entry_offset; + o->entry.next_entry_offset = 0; + + if (p == 0) + f->header->head_entry_offset = htole64(offset); + else { + /* Temporarily move back to the previous entry, to + * patch in pointer */ + + r = journal_file_move_to_object(f, p, &o); + if (r < 0) + return r; + + o->entry.next_entry_offset = htole64(offset); + + r = journal_file_move_to_object(f, offset, &o); + if (r < 0) + return r; + } + + f->header->tail_entry_offset = htole64(offset); + + /* Link up the items */ + n = journal_file_entry_n_items(o); + for (i = 0; i < n; i++) { + r = journal_file_link_entry_item(f, o, offset, i); + if (r < 0) + return r; + } + + /* Link up the entry in the bisect table */ + n = le64toh(f->header->bisect_table_size) / sizeof(uint64_t); + k = le64toh(f->header->arena_max_size) / n; + + a = (le64toh(f->header->last_bisect_offset) + k - 1) / k; + b = offset / k; + + for (; a <= b; a++) + f->bisect_table[a] = htole64(offset); + + f->header->last_bisect_offset = htole64(offset + le64toh(o->object.size)); + + return 0; +} + +static int journal_file_append_entry_internal(JournalFile *f, const dual_timestamp *ts, const EntryItem items[], unsigned n_items, Object **ret, uint64_t *offset) { + uint64_t np; + uint64_t osize; + Object *o; + int r; + + assert(f); + assert(items || n_items == 0); + + osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem)); + + r = journal_file_append_object(f, osize, &o, &np); + if (r < 0) + return r; + + o->object.type = htole64(OBJECT_ENTRY); + o->entry.seqnum = htole64(journal_file_seqnum(f)); + memcpy(o->entry.items, items, n_items * sizeof(EntryItem)); + o->entry.realtime = htole64(ts->realtime); + o->entry.monotonic = htole64(ts->monotonic); + + r = journal_file_link_entry(f, o, np); + if (r < 0) + return r; + + if (ret) + *ret = o; + + if (offset) + *offset = np; + + return 0; +} + +int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, Object **ret, uint64_t *offset) { + unsigned i; + EntryItem *items; + int r; + + assert(f); + + items = new(EntryItem, n_iovec); + if (!items) + return -ENOMEM; + + for (i = 0; i < n_iovec; i++) { + uint64_t p; + + r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, NULL, &p); + if (r < 0) + goto finish; + + items[i].object_offset = htole64(p); + } + + r = journal_file_append_entry_internal(f, ts, items, n_iovec, ret, offset); + +finish: + free(items); + + return r; +} + +int journal_file_move_to_entry(JournalFile *f, uint64_t seqnum, Object **ret, uint64_t *offset) { + Object *o; + uint64_t lower, upper, p, n, k; + int r; + + assert(f); + + n = le64toh(f->header->bisect_table_size) / sizeof(uint64_t); + k = le64toh(f->header->arena_max_size) / n; + + lower = 0; + upper = le64toh(f->header->last_bisect_offset)/k+1; + + while (lower < upper) { + k = (upper + lower) / 2; + p = le64toh(f->bisect_table[k]); + + if (p == 0) { + upper = k; + continue; + } + + r = journal_file_move_to_object(f, p, &o); + if (r < 0) + return r; + + if (o->object.type != htole64(OBJECT_ENTRY)) + return -EBADMSG; + + if (o->entry.seqnum == seqnum) { + if (ret) + *ret = o; + + if (offset) + *offset = p; + + return 1; + } else if (seqnum < o->entry.seqnum) + upper = k; + else if (seqnum > o->entry.seqnum) + lower = k+1; + } + + assert(lower == upper); + + if (lower <= 0) + return 0; + + /* The object we are looking for is between + * bisect_table[lower-1] and bisect_table[lower] */ + + p = le64toh(f->bisect_table[lower-1]); + + for (;;) { + r = journal_file_move_to_object(f, p, &o); + if (r < 0) + return r; + + if (o->entry.seqnum == seqnum) { + if (ret) + *ret = o; + + if (offset) + *offset = p; + + return 1; + + } if (seqnum < o->entry.seqnum) + return 0; + + if (o->entry.next_entry_offset == 0) + return 0; + + p = le64toh(o->entry.next_entry_offset); + } + + return 0; +} + +int journal_file_next_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset) { + uint64_t np; + int r; + + assert(f); + + if (!o) + np = le64toh(f->header->head_entry_offset); + else { + if (le64toh(o->object.type) != OBJECT_ENTRY) + return -EINVAL; + + np = le64toh(o->entry.next_entry_offset); + } + + if (np == 0) + return 0; + + r = journal_file_move_to_object(f, np, &o); + if (r < 0) + return r; + + if (le64toh(o->object.type) != OBJECT_ENTRY) + return -EBADMSG; + + if (ret) + *ret = o; + + if (offset) + *offset = np; + + return 1; +} + +int journal_file_prev_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset) { + uint64_t np; + int r; + + assert(f); + + if (!o) + np = le64toh(f->header->tail_entry_offset); + else { + if (le64toh(o->object.type) != OBJECT_ENTRY) + return -EINVAL; + + np = le64toh(o->entry.prev_entry_offset); + } + + if (np == 0) + return 0; + + r = journal_file_move_to_object(f, np, &o); + if (r < 0) + return r; + + if (le64toh(o->object.type) != OBJECT_ENTRY) + return -EBADMSG; + + if (ret) + *ret = o; + + if (offset) + *offset = np; + + return 1; +} + +int journal_file_find_first_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { + uint64_t p, osize, hash, h; + int r; + + assert(f); + assert(data || size == 0); + + osize = offsetof(Object, data.payload) + size; + + hash = hash64(data, size); + h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); + p = le64toh(f->hash_table[h].head_hash_offset); + + while (p != 0) { + Object *o; + + r = journal_file_move_to_object(f, p, &o); + if (r < 0) + return r; + + if (le64toh(o->object.type) != OBJECT_DATA) + return -EBADMSG; + + if (le64toh(o->object.size) == osize && + memcmp(o->data.payload, data, size) == 0) { + + if (le64toh(o->data.hash) != hash) + return -EBADMSG; + + if (o->data.head_entry_offset == 0) + return 0; + + p = le64toh(o->data.head_entry_offset); + r = journal_file_move_to_object(f, p, &o); + if (r < 0) + return r; + + if (le64toh(o->object.type) != OBJECT_ENTRY) + return -EBADMSG; + + if (ret) + *ret = o; + + if (offset) + *offset = p; + + return 1; + } + + p = le64toh(o->data.next_hash_offset); + } + + return 0; +} + +int journal_file_find_last_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { + uint64_t p, osize, hash, h; + int r; + + assert(f); + assert(data || size == 0); + + osize = offsetof(Object, data.payload) + size; + + hash = hash64(data, size); + h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); + p = le64toh(f->hash_table[h].tail_hash_offset); + + while (p != 0) { + Object *o; + + r = journal_file_move_to_object(f, p, &o); + if (r < 0) + return r; + + if (le64toh(o->object.type) != OBJECT_DATA) + return -EBADMSG; + + if (le64toh(o->object.size) == osize && + memcmp(o->data.payload, data, size) == 0) { + + if (le64toh(o->data.hash) != hash) + return -EBADMSG; + + if (o->data.tail_entry_offset == 0) + return 0; + + p = le64toh(o->data.tail_entry_offset); + r = journal_file_move_to_object(f, p, &o); + if (r < 0) + return r; + + if (le64toh(o->object.type) != OBJECT_ENTRY) + return -EBADMSG; + + if (ret) + *ret = o; + + if (offset) + *offset = p; + + return 1; + } + + p = le64toh(o->data.prev_hash_offset); + } + + return 0; +} + +void journal_file_dump(JournalFile *f) { + char a[33], b[33], c[33]; + Object *o; + int r; + uint64_t p; + + assert(f); + + printf("File ID: %s\n" + "Machine ID: %s\n" + "Boot ID: %s\n" + "Arena size: %llu\n", + sd_id128_to_string(f->header->file_id, a), + sd_id128_to_string(f->header->machine_id, b), + sd_id128_to_string(f->header->boot_id, c), + (unsigned long long) le64toh(f->header->arena_size)); + + p = le64toh(f->header->head_object_offset); + while (p != 0) { + r = journal_file_move_to_object(f, p, &o); + if (r < 0) + goto fail; + + switch (o->object.type) { + + case OBJECT_UNUSED: + printf("Type: OBJECT_UNUSED\n"); + break; + + case OBJECT_DATA: + printf("Type: OBJECT_DATA\n"); + break; + + case OBJECT_ENTRY: + printf("Type: OBJECT_ENTRY %llu\n", (unsigned long long) le64toh(o->entry.seqnum)); + break; + + case OBJECT_HASH_TABLE: + printf("Type: OBJECT_HASH_TABLE\n"); + break; + + case OBJECT_BISECT_TABLE: + printf("Type: OBJECT_BISECT_TABLE\n"); + break; + } + + if (p == le64toh(f->header->tail_object_offset)) + p = 0; + else + p = p + ALIGN64(le64toh(o->object.size)); + } + + return; +fail: + log_error("File corrupt"); +} + +int journal_file_open( + sd_journal *j, + const char *fname, + int flags, + mode_t mode, + JournalFile **ret) { + + JournalFile *f; + int r; + bool newly_created = false; + + assert(fname); + + if ((flags & O_ACCMODE) != O_RDONLY && + (flags & O_ACCMODE) != O_RDWR) + return -EINVAL; + + f = new0(JournalFile, 1); + if (!f) + return -ENOMEM; + + f->writable = (flags & O_ACCMODE) != O_RDONLY; + f->prot = prot_from_flags(flags); + + f->fd = open(fname, flags|O_CLOEXEC, mode); + if (f->fd < 0) { + r = -errno; + goto fail; + } + + f->path = strdup(fname); + if (!f->path) { + r = -ENOMEM; + goto fail; + } + + if (fstat(f->fd, &f->last_stat) < 0) { + r = -errno; + goto fail; + } + + if (f->last_stat.st_size == 0 && f->writable) { + newly_created = true; + + r = journal_file_init_header(f); + if (r < 0) + goto fail; + + if (fstat(f->fd, &f->last_stat) < 0) { + r = -errno; + goto fail; + } + } + + if (f->last_stat.st_size < (off_t) sizeof(Header)) { + r = -EIO; + goto fail; + } + + f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0); + if (f->header == MAP_FAILED) { + f->header = NULL; + r = -errno; + goto fail; + } + + if (!newly_created) { + r = journal_file_verify_header(f); + if (r < 0) + goto fail; + } + + if (f->writable) { + r = journal_file_refresh_header(f); + if (r < 0) + goto fail; + } + + if (newly_created) { + + r = journal_file_setup_hash_table(f); + if (r < 0) + goto fail; + + r = journal_file_setup_bisect_table(f); + if (r < 0) + goto fail; + } + + r = journal_file_map_hash_table(f); + if (r < 0) + goto fail; + + r = journal_file_map_bisect_table(f); + if (r < 0) + goto fail; + + if (j) { + LIST_PREPEND(JournalFile, files, j->files, f); + f->journal = j; + } + + if (ret) + *ret = f; + + return 0; + +fail: + journal_file_close(f); + + return r; +} + +int sd_journal_open(sd_journal **ret) { + sd_journal *j; + char *fn; + const char *p; + int r = 0; + const char search_paths[] = + "/run/log/journal\0" + "/var/log/journal\0"; + + assert(ret); + + j = new0(sd_journal, 1); + if (!j) + return -ENOMEM; + + NULSTR_FOREACH(p, search_paths) { + DIR *d; + + d = opendir(p); + if (!d) { + if (errno != ENOENT && r == 0) + r = -errno; + + continue; + } + + for (;;) { + struct dirent buf, *de; + int k; + + k = readdir_r(d, &buf, &de); + if (k != 0) { + if (r == 0) + r = -k; + + break; + } + + if (!de) + break; + + if (!dirent_is_file_with_suffix(de, ".journal")) + continue; + + fn = join(p, "/", de->d_name, NULL); + if (!fn) { + r = -ENOMEM; + closedir(d); + goto fail; + } + + k = journal_file_open(j, fn, O_RDONLY, 0, NULL); + if (k < 0 && r == 0) + r = -k; + + free(fn); + } + } + + if (!j->files) { + if (r >= 0) + r = -ENOENT; + + goto fail; + } + + *ret = j; + return 0; + +fail: + sd_journal_close(j); + + return r; +}; + +void sd_journal_close(sd_journal *j) { + assert(j); + + while (j->files) + journal_file_close(j->files); + + free(j); +} diff --git a/src/journal/sd-journal.h b/src/journal/sd-journal.h new file mode 100644 index 0000000000..8170dea87c --- /dev/null +++ b/src/journal/sd-journal.h @@ -0,0 +1,74 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +#ifndef foojournalhfoo +#define foojournalhfoo + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include +#include + +#include "sd-id128.h" + +/* TODO: + * + * - implement rotation + * - check LE/BE conversion for 8bit, 16bit, 32bit values + * - implement parallel traversal + * - implement audit gateway + * - implement native gateway + * - extend hash table/bisect table as we go + */ + +typedef struct sd_journal sd_journal; + +int sd_journal_open(sd_journal **ret); +void sd_journal_close(sd_journal *j); + +int sd_journal_previous(sd_journal *j); +int sd_journal_next(sd_journal *j); + +void* sd_journal_get(sd_journal *j, const char *field, size_t *size); +uint64_t sd_journal_get_seqnum(sd_journal *j); +uint64_t sd_journal_get_realtime_usec(sd_journal *j); +uint64_t sd_journal_get_monotonic_usec(sd_journal *j); + +int sd_journal_add_match(sd_journal *j, const char *item, size_t *size); + +int sd_journal_seek_head(sd_journal *j); +int sd_journal_seek_tail(sd_journal *j); + +int sd_journal_seek_seqnum(sd_journal *j, uint64_t seqnum); +int sd_journal_seek_monotonic_usec(sd_journal *j, uint64_t usec); +int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec); + +uint64_t sd_journal_get_max_size(sd_journal *j); +uint64_t sd_journal_get_min_size(sd_journal *j); +uint64_t sd_journal_get_keep_free(sd_journal *j); + +int sd_journal_set_max_size(sd_journal *j, uint64_t size); +int sd_journal_set_min_size(sd_journal *j, uint64_t size); +int sd_journal_set_keep_free(sd_journal *j, uint64_t size); + +sd_id128_t sd_journal_get_file_id(sd_journal *j); +sd_id128_t sd_journal_get_machine_id(sd_journal *j); +sd_id128_t sd_journal_get_boot_id(sd_journal *j); + +#endif diff --git a/src/journal/test-journal.c b/src/journal/test-journal.c new file mode 100644 index 0000000000..92bef5f3ef --- /dev/null +++ b/src/journal/test-journal.c @@ -0,0 +1,93 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include + +#include "journal-private.h" +#include "log.h" + +int main(int argc, char *argv[]) { + dual_timestamp ts; + JournalFile *f; + struct iovec iovec; + static const char test[] = "test", test2[] = "test2"; + Object *o; + + log_set_max_level(LOG_DEBUG); + + assert_se(journal_file_open(NULL, "test", O_RDWR|O_CREAT, 0666, &f) == 0); + + dual_timestamp_get(&ts); + + iovec.iov_base = (void*) test; + iovec.iov_len = strlen(test); + assert_se(journal_file_append_entry(f, &ts, &iovec, 1, NULL, NULL) == 0); + + iovec.iov_base = (void*) test2; + iovec.iov_len = strlen(test2); + assert_se(journal_file_append_entry(f, &ts, &iovec, 1, NULL, NULL) == 0); + + iovec.iov_base = (void*) test; + iovec.iov_len = strlen(test); + assert_se(journal_file_append_entry(f, &ts, &iovec, 1, NULL, NULL) == 0); + + journal_file_dump(f); + + assert(journal_file_next_entry(f, NULL, &o, NULL) == 1); + assert(le64toh(o->entry.seqnum) == 1); + + assert(journal_file_next_entry(f, o, &o, NULL) == 1); + assert(le64toh(o->entry.seqnum) == 2); + + assert(journal_file_next_entry(f, o, &o, NULL) == 1); + assert(le64toh(o->entry.seqnum) == 3); + + assert(journal_file_next_entry(f, o, &o, NULL) == 0); + + assert(journal_file_find_first_entry(f, test, strlen(test), &o, NULL) == 1); + assert(le64toh(o->entry.seqnum) == 1); + + assert(journal_file_find_last_entry(f, test, strlen(test), &o, NULL) == 1); + assert(le64toh(o->entry.seqnum) == 3); + + assert(journal_file_find_last_entry(f, test2, strlen(test2), &o, NULL) == 1); + assert(le64toh(o->entry.seqnum) == 2); + + assert(journal_file_find_first_entry(f, test2, strlen(test2), &o, NULL) == 1); + assert(le64toh(o->entry.seqnum) == 2); + + assert(journal_file_find_first_entry(f, "quux", 4, &o, NULL) == 0); + + assert(journal_file_move_to_entry(f, 1, &o, NULL) == 1); + assert(le64toh(o->entry.seqnum) == 1); + + assert(journal_file_move_to_entry(f, 3, &o, NULL) == 1); + assert(le64toh(o->entry.seqnum) == 3); + + assert(journal_file_move_to_entry(f, 2, &o, NULL) == 1); + assert(le64toh(o->entry.seqnum) == 2); + + assert(journal_file_move_to_entry(f, 10, &o, NULL) == 0); + + journal_file_close(f); + + return 0; +} diff --git a/src/journal/wjournal.c b/src/journal/wjournal.c new file mode 100644 index 0000000000..3122aa054e --- /dev/null +++ b/src/journal/wjournal.c @@ -0,0 +1,57 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include "wjournal.h" +#include "journal-def.h" + +struct WJournal { + int fd; + + Header *header; + HashItem *hash_table; + uint64_t *bisect_table; +}; + +int wjournal_open(const char *fn, WJournal **ret) { + assert(fn); + assert(ret); +} + +void wjournal_close(WJournal *j) { + assert(j); + + if (j->fd >= 0) + close_nointr_nofail(j->fd); + + if (j->header) { + munmap(j->header, PAGE_ALIGN(sizeof(Header))); + + } + + free(j); +} + +int wjournal_write_object_begin(WJournal *j, uint64_t type, uint64_t size, Object **ret); +int wjournal_write_object_finish(WJournal *j, Object *ret); + +int wjournal_write_field(WJournal *j, const char *buffer, uint64_t size, Object **ret); +int wjournal_write_entry(WJournal *j, const Field *fields, unsigned n_fields, Object **ret); +int wjournal_write_eof(WJournal *j); diff --git a/src/journal/wjournal.h b/src/journal/wjournal.h new file mode 100644 index 0000000000..b0250d0fe2 --- /dev/null +++ b/src/journal/wjournal.h @@ -0,0 +1,39 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +#ifndef foojournalhfoo +#define foojournalhfoo + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include + +typedef struct WJournal WJournal; + +int wjournal_open(const char *fn, WJournal **ret); +void wjournal_close(WJournal *j); + +int wjournal_write_object_begin(WJournal *j, uint64_t type, uint64_t size, Object **ret); +int wjournal_write_object_finish(WJournal *j, Object *ret); + +int wjournal_write_field(WJournal *j, const char *buffer, uint64_t size, Object **ret); +int wjournal_write_entry(WJournal *j, const Field *fields, unsigned n_fields, Object **ret); +int wjournal_write_eof(WJournal *j); + +#endif diff --git a/src/kmsg-syslogd.c b/src/kmsg-syslogd.c index 0901a0e49b..70cc0730ee 100644 --- a/src/kmsg-syslogd.c +++ b/src/kmsg-syslogd.c @@ -65,45 +65,53 @@ static void server_done(Server *s) { fdset_free(s->syslog_fds); } -static int server_init(Server *s, unsigned n_sockets) { - int r; - unsigned i; +static int server_init(Server *s) { + int i, r, n; struct epoll_event ev; sigset_t mask; assert(s); - assert(n_sockets > 0); zero(*s); - s->kmsg_fd = s->signal_fd = -1; - if ((s->epoll_fd = epoll_create1(EPOLL_CLOEXEC)) < 0) { - r = -errno; - log_error("Failed to create epoll object: %s", strerror(errno)); - goto fail; + s->epoll_fd = epoll_create1(EPOLL_CLOEXEC); + if (s->epoll_fd < 0) { + log_error("Failed to create epoll object: %m"); + return -errno; + } + + s->syslog_fds = fdset_new(); + if (!s->syslog_fds) { + log_error("Failed to allocate file descriptor set: %s", strerror(ENOMEM)); + return -ENOMEM; } - if (!(s->syslog_fds = fdset_new())) { - r = -ENOMEM; - log_error("Failed to allocate file descriptor set: %s", strerror(errno)); - goto fail; + n = sd_listen_fds(true); + if (n < 0) { + log_error("Failed to read listening file descriptors from environment: %s", strerror(-n)); + return n; + } + + if (n <= 0 || n > SERVER_FD_MAX) { + log_error("No or too many file descriptors passed."); + return -EINVAL; } - for (i = 0; i < n_sockets; i++) { + for (i = 0; i < n; i++) { int fd, one = 1; fd = SD_LISTEN_FDS_START+i; - if ((r = sd_is_socket(fd, AF_UNSPEC, SOCK_DGRAM, -1)) < 0) { + r = sd_is_socket(fd, AF_UNSPEC, SOCK_DGRAM, -1); + if (r < 0) { log_error("Failed to determine file descriptor type: %s", strerror(-r)); - goto fail; + return r; } if (!r) { log_error("Wrong file descriptor type."); - r = -EINVAL; - goto fail; + return -EINVAL; } if (setsockopt(fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one)) < 0) @@ -113,18 +121,19 @@ static int server_init(Server *s, unsigned n_sockets) { ev.events = EPOLLIN; ev.data.fd = fd; if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) { - r = -errno; - log_error("Failed to add server fd to epoll object: %s", strerror(errno)); - goto fail; + log_error("Failed to add server fd to epoll object: %m"); + return -errno; } - if ((r = fdset_put(s->syslog_fds, fd)) < 0) { + r = fdset_put(s->syslog_fds, fd); + if (r < 0) { log_error("Failed to store file descriptor in set: %s", strerror(-r)); - goto fail; + return r; } } - if ((s->kmsg_fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC)) < 0) { + s->kmsg_fd = open("/dev/kmsg", O_WRONLY|O_NOCTTY|O_CLOEXEC); + if (s->kmsg_fd < 0) { log_error("Failed to open /dev/kmsg for logging: %m"); return -errno; } @@ -133,7 +142,8 @@ static int server_init(Server *s, unsigned n_sockets) { sigset_add_many(&mask, SIGINT, SIGTERM, -1); assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); - if ((s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC)) < 0) { + s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC); + if (s->signal_fd < 0) { log_error("signalfd(): %m"); return -errno; } @@ -148,80 +158,6 @@ static int server_init(Server *s, unsigned n_sockets) { } return 0; - -fail: - server_done(s); - return r; -} - -static void skip_date(const char **buf) { - enum { - LETTER, - SPACE, - NUMBER, - SPACE_OR_NUMBER, - COLON - } sequence[] = { - LETTER, LETTER, LETTER, - SPACE, - SPACE_OR_NUMBER, NUMBER, - SPACE, - SPACE_OR_NUMBER, NUMBER, - COLON, - SPACE_OR_NUMBER, NUMBER, - COLON, - SPACE_OR_NUMBER, NUMBER, - SPACE - }; - - const char *p; - unsigned i; - - assert(buf); - assert(*buf); - - p = *buf; - - for (i = 0; i < ELEMENTSOF(sequence); i++, p++) { - - if (!*p) - return; - - switch (sequence[i]) { - - case SPACE: - if (*p != ' ') - return; - break; - - case SPACE_OR_NUMBER: - if (*p == ' ') - break; - - /* fall through */ - - case NUMBER: - if (*p < '0' || *p > '9') - return; - - break; - - case LETTER: - if (!(*p >= 'A' && *p <= 'Z') && - !(*p >= 'a' && *p <= 'z')) - return; - - break; - - case COLON: - if (*p != ':') - return; - break; - - } - } - - *buf = p; } static int read_process(const char **buf, struct iovec *iovec) { @@ -266,28 +202,6 @@ static int read_process(const char **buf, struct iovec *iovec) { return 1; } -static void skip_pid(const char **buf) { - const char *p; - - assert(buf); - assert(*buf); - - p = *buf; - - if (*p != '[') - return; - - p++; - p += strspn(p, "0123456789"); - - if (*p != ']') - return; - - p++; - - *buf = p; -} - static int write_message(Server *s, const char *buf, struct ucred *ucred) { ssize_t k; char priority[6], pid[16]; @@ -314,14 +228,14 @@ static int write_message(Server *s, const char *buf, struct ucred *ucred) { IOVEC_SET_STRING(iovec[i++], priority); /* Second, skip date */ - skip_date(&buf); + skip_syslog_date((char**) &buf); /* Then, add process if set */ if (read_process(&buf, &iovec[i]) > 0) i++; else if (ucred && ucred->pid > 0 && - get_process_name(ucred->pid, &process) >= 0) + get_process_comm(ucred->pid, &process) >= 0) IOVEC_SET_STRING(iovec[i++], process); /* Skip the stored PID if we have a better one */ @@ -330,7 +244,7 @@ static int write_message(Server *s, const char *buf, struct ucred *ucred) { char_array_0(pid); IOVEC_SET_STRING(iovec[i++], pid); - skip_pid(&buf); + skip_syslog_pid((char**) &buf); if (*buf == ':') buf++; @@ -368,7 +282,8 @@ static int process_event(Server *s, struct epoll_event *ev) { struct signalfd_siginfo sfsi; ssize_t n; - if ((n = read(s->signal_fd, &sfsi, sizeof(sfsi))) != sizeof(sfsi)) { + n = read(s->signal_fd, &sfsi, sizeof(sfsi)); + if (n != sizeof(sfsi)) { if (n >= 0) return -EIO; @@ -407,7 +322,8 @@ static int process_event(Server *s, struct epoll_event *ev) { msghdr.msg_control = &control; msghdr.msg_controllen = sizeof(control); - if ((n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT)) < 0) { + n = recvmsg(ev->data.fd, &msghdr, MSG_DONTWAIT); + if (n < 0) { if (errno == EINTR || errno == EAGAIN) return 1; @@ -424,12 +340,14 @@ static int process_event(Server *s, struct epoll_event *ev) { else ucred = NULL; - if ((e = memchr(buf, '\n', n))) + e = memchr(buf, '\n', n); + if (e) *e = 0; else buf[n] = 0; - if ((k = write_message(s, strstrip(buf), ucred)) < 0) + k = write_message(s, strstrip(buf), ucred); + if (k < 0) return k; } } @@ -439,7 +357,7 @@ static int process_event(Server *s, struct epoll_event *ev) { int main(int argc, char *argv[]) { Server server; - int r = EXIT_FAILURE, n; + int r; if (getppid() != 1) { log_error("This program should be invoked by init only."); @@ -457,18 +375,9 @@ int main(int argc, char *argv[]) { umask(0022); - if ((n = sd_listen_fds(true)) < 0) { - log_error("Failed to read listening file descriptors from environment: %s", strerror(-r)); - return EXIT_FAILURE; - } - - if (n <= 0 || n > SERVER_FD_MAX) { - log_error("No or too many file descriptors passed."); - return EXIT_FAILURE; - } - - if (server_init(&server, (unsigned) n) < 0) - return EXIT_FAILURE; + r = server_init(&server); + if (r < 0) + goto finish; log_debug("systemd-kmsg-syslogd running as pid %lu", (unsigned long) getpid()); @@ -478,36 +387,33 @@ int main(int argc, char *argv[]) { for (;;) { struct epoll_event event; - int k; - if ((k = epoll_wait(server.epoll_fd, &event, 1, -1)) < 0) { + r = epoll_wait(server.epoll_fd, &event, 1, -1); + if (r < 0) { if (errno == EINTR) continue; log_error("epoll_wait() failed: %m"); - goto fail; - } - - if (k <= 0) + r = -errno; + goto finish; + } else if (r == 0) break; - if ((k = process_event(&server, &event)) < 0) - goto fail; - - if (k == 0) + r = process_event(&server, &event); + if (r < 0) + goto finish; + else if (r == 0) break; } - r = EXIT_SUCCESS; - log_debug("systemd-kmsg-syslogd stopped as pid %lu", (unsigned long) getpid()); -fail: +finish: sd_notify(false, "STATUS=Shutting down..."); server_done(&server); - return r; + return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/src/loginctl.c b/src/loginctl.c index 89762b66b0..1be47c8dde 100644 --- a/src/loginctl.c +++ b/src/loginctl.c @@ -393,7 +393,7 @@ static void print_session_status_info(SessionStatusInfo *i) { printf("\t Leader: %u", (unsigned) i->leader); - get_process_name(i->leader, &t); + get_process_comm(i->leader, &t); if (t) { printf(" (%s)", t); free(t); diff --git a/src/machine-id-setup.c b/src/machine-id-setup.c index 519521fe67..9b25b10438 100644 --- a/src/machine-id-setup.c +++ b/src/machine-id-setup.c @@ -31,21 +31,12 @@ #include "macro.h" #include "util.h" #include "log.h" - -static void make_v4_uuid(unsigned char *id) { - /* Stolen from generate_random_uuid() of drivers/char/random.c - * in the kernel sources */ - - /* Set UUID version to 4 --- truly random generation */ - id[6] = (id[6] & 0x0F) | 0x40; - - /* Set the UUID variant to DCE */ - id[8] = (id[8] & 0x3F) | 0x80; -} +#include "sd-id128.h" static int generate(char id[34]) { - int fd; - unsigned char buf[16], *p; + int fd, r; + unsigned char *p; + sd_id128_t buf; char *q; ssize_t k; @@ -68,26 +59,13 @@ static int generate(char id[34]) { } /* If that didn't work, generate a random machine id */ - fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC|O_NOCTTY); - if (fd < 0) { - log_error("Failed to open /dev/urandom: %m"); - return -errno; - } - - k = loop_read(fd, buf, sizeof(buf), false); - close_nointr_nofail(fd); - - if (k != sizeof(buf)) { - log_error("Failed to read /dev/urandom: %s", strerror(k < 0 ? -k : EIO)); - return k < 0 ? (int) k : -EIO; + r = sd_id128_randomize(&buf); + if (r < 0) { + log_error("Failed to open /dev/urandom: %s", strerror(-r)); + return r; } - /* Turn this into a valid v4 UUID, to be nice. Note that we - * only guarantee this for newly generated UUIDs, not for - * pre-existing ones.*/ - make_v4_uuid(buf); - - for (p = buf, q = id; p < buf + sizeof(buf); p++, q += 2) { + for (p = buf.bytes, q = id; p < buf.bytes + sizeof(buf); p++, q += 2) { q[0] = hexchar(*p >> 4); q[1] = hexchar(*p & 15); } diff --git a/src/manager.c b/src/manager.c index e626347dec..ac5bbef1a8 100644 --- a/src/manager.c +++ b/src/manager.c @@ -2024,7 +2024,7 @@ static int manager_dispatch_sigchld(Manager *m) { if (si.si_code == CLD_EXITED || si.si_code == CLD_KILLED || si.si_code == CLD_DUMPED) { char *name = NULL; - get_process_name(si.si_pid, &name); + get_process_comm(si.si_pid, &name); log_debug("Got SIGCHLD for process %lu (%s)", (unsigned long) si.si_pid, strna(name)); free(name); } @@ -2109,7 +2109,7 @@ static int manager_process_signal_fd(Manager *m) { if (sfsi.ssi_pid > 0) { char *p = NULL; - get_process_name(sfsi.ssi_pid, &p); + get_process_comm(sfsi.ssi_pid, &p); log_debug("Received SIG%s from PID %lu (%s).", signal_to_string(sfsi.ssi_signo), diff --git a/src/pam-module.c b/src/pam-module.c index dd05f93d42..78f9b30d5b 100644 --- a/src/pam-module.c +++ b/src/pam-module.c @@ -163,42 +163,24 @@ static int get_user_data( const char *username = NULL; struct passwd *pw = NULL; + uid_t uid; int r; - bool have_loginuid = false; - char *s; assert(handle); assert(ret_username); assert(ret_pw); - if (have_effective_cap(CAP_AUDIT_CONTROL) > 0) { - /* Only use audit login uid if we are executed with - * sufficient capabilities so that pam_loginuid could - * do its job. If we are lacking the CAP_AUDIT_CONTROL - * capabality we most likely are being run in a - * container and /proc/self/loginuid is useless since - * it probably contains a uid of the host system. */ - - if (read_one_line_file("/proc/self/loginuid", &s) >= 0) { - uid_t uid; - - r = parse_uid(s, &uid); - free(s); - - if (r >= 0 && uid != (uint32_t) -1) { - have_loginuid = true; - pw = pam_modutil_getpwuid(handle, uid); - } - } - } - - if (!have_loginuid) { - if ((r = pam_get_user(handle, &username, NULL)) != PAM_SUCCESS) { + r = audit_loginuid_from_pid(0, &uid); + if (r >= 0) + pw = pam_modutil_getpwuid(handle, uid); + else { + r = pam_get_user(handle, &username, NULL); + if (r != PAM_SUCCESS) { pam_syslog(handle, LOG_ERR, "Failed to get user name."); return r; } - if (!username || !*username) { + if (isempty(username)) { pam_syslog(handle, LOG_ERR, "User name not valid."); return PAM_AUTH_ERR; } diff --git a/src/sd-id128.c b/src/sd-id128.c new file mode 100644 index 0000000000..f5e0432a3f --- /dev/null +++ b/src/sd-id128.c @@ -0,0 +1,210 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include +#include +#include + +#include "sd-id128.h" +#include "util.h" +#include "macro.h" + +char *sd_id128_to_string(sd_id128_t id, char s[33]) { + unsigned n; + + assert(s); + + for (n = 0; n < 16; n++) { + s[n*2] = hexchar(id.bytes[n] >> 4); + s[n*2+1] = hexchar(id.bytes[n] & 0xF); + } + + s[32] = 0; + + return s; +} + +int sd_id128_from_string(const char s[33], sd_id128_t *ret) { + unsigned n; + sd_id128_t t; + + assert(s); + assert(ret); + + for (n = 0; n < 16; n++) { + int a, b; + + a = unhexchar(s[n*2]); + if (a < 0) + return -EINVAL; + + b = unhexchar(s[n*2+1]); + if (b < 0) + return -EINVAL; + + t.bytes[n] = (a << 4) | b; + } + + if (s[32] != 0) + return -EINVAL; + + *ret = t; + return 0; +} + +sd_id128_t sd_id128_make_v4_uuid(sd_id128_t id) { + /* Stolen from generate_random_uuid() of drivers/char/random.c + * in the kernel sources */ + + /* Set UUID version to 4 --- truly random generation */ + id.bytes[6] = (id.bytes[6] & 0x0F) | 0x40; + + /* Set the UUID variant to DCE */ + id.bytes[8] = (id.bytes[8] & 0x3F) | 0x80; + + return id; +} + +int sd_id128_get_machine(sd_id128_t *ret) { + static __thread sd_id128_t saved_machine_id; + static __thread bool saved_machine_id_valid = false; + int fd; + char buf[32]; + ssize_t k; + unsigned j; + sd_id128_t t; + + if (saved_machine_id_valid) { + *ret = saved_machine_id; + return 0; + } + + fd = open("/etc/machine-id", O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (fd < 0) + return -errno; + + k = loop_read(fd, buf, 32, false); + close_nointr_nofail(fd); + + if (k < 0) + return (int) k; + + if (k < 32) + return -EIO; + + for (j = 0; j < 16; j++) { + int a, b; + + a = unhexchar(buf[j*2]); + b = unhexchar(buf[j*2+1]); + + if (a < 0 || b < 0) + return -EIO; + + t.bytes[j] = a << 4 | b; + } + + saved_machine_id = t; + saved_machine_id_valid = true; + + *ret = t; + return 0; +} + +int sd_id128_get_boot(sd_id128_t *ret) { + static __thread sd_id128_t saved_boot_id; + static __thread bool saved_boot_id_valid = false; + int fd; + char buf[36]; + ssize_t k; + unsigned j; + sd_id128_t t; + char *p; + + if (saved_boot_id_valid) { + *ret = saved_boot_id; + return 0; + } + + fd = open("/proc/sys/kernel/random/boot_id", O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (fd < 0) + return -errno; + + k = loop_read(fd, buf, 36, false); + close_nointr_nofail(fd); + + if (k < 0) + return (int) k; + + if (k < 36) + return -EIO; + + for (j = 0, p = buf; j < 16; j++) { + int a, b; + + if (*p == '-') + p++; + + a = unhexchar(p[0]); + b = unhexchar(p[1]); + + if (a < 0 || b < 0) + return -EIO; + + t.bytes[j] = a << 4 | b; + + p += 2; + } + + saved_boot_id = t; + saved_boot_id_valid = true; + + *ret = t; + return 0; +} + +int sd_id128_randomize(sd_id128_t *ret) { + int fd; + ssize_t k; + sd_id128_t t; + + assert(ret); + + fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (fd < 0) + return -errno; + + k = loop_read(fd, &t, 16, false); + close_nointr_nofail(fd); + + if (k < 0) + return (int) k; + + if (k < 16) + return -EIO; + + /* Turn this into a valid v4 UUID, to be nice. Note that we + * only guarantee this for newly generated UUIDs, not for + * pre-existing ones.*/ + + *ret = sd_id128_make_v4_uuid(t); + return 0; +} diff --git a/src/sd-id128.h b/src/sd-id128.h new file mode 100644 index 0000000000..bfae78b97d --- /dev/null +++ b/src/sd-id128.h @@ -0,0 +1,56 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +#ifndef fooid128hfoo +#define fooid128hfoo + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include +#include +#include + +typedef union sd_id128 sd_id128_t; + +union sd_id128 { + uint8_t bytes[16]; + uint64_t qwords[2]; +}; + +char *sd_id128_to_string(sd_id128_t id, char s[33]); + +int sd_id128_from_string(const char s[33], sd_id128_t *ret); + +int sd_id128_randomize(sd_id128_t *ret); + +sd_id128_t sd_id128_make_v4_uuid(sd_id128_t id); + +int sd_id128_get_machine(sd_id128_t *ret); + +int sd_id128_get_boot(sd_id128_t *ret); + +#define SD_ID128_MAKE(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) \ + ((sd_id128_t) { .bytes = { 0x##v0, 0x##v1, 0x##v2, 0x##v3, 0x##v4, 0x##v5, 0x##v6, 0x##v7, \ + 0x##v8, 0x##v9, 0x##v10, 0x##v11, 0x##v12, 0x##v13, 0x##v14, 0x##v15 }}) + +static inline bool sd_id128_equal(sd_id128_t a, sd_id128_t b) { + return memcmp(&a, &b, 16) == 0; +} + +#endif diff --git a/src/stdout-syslog-bridge.c b/src/stdout-syslog-bridge.c index 9a0408819e..d50df22c88 100644 --- a/src/stdout-syslog-bridge.c +++ b/src/stdout-syslog-bridge.c @@ -649,7 +649,8 @@ int main(int argc, char *argv[]) { umask(0022); - if ((n = sd_listen_fds(true)) < 0) { + n = sd_listen_fds(true); + if (n < 0) { log_error("Failed to read listening file descriptors from environment: %s", strerror(-r)); return EXIT_FAILURE; } diff --git a/src/systemctl.c b/src/systemctl.c index 0de2444d43..a423fdbf93 100644 --- a/src/systemctl.c +++ b/src/systemctl.c @@ -2182,7 +2182,7 @@ static void print_status_info(UnitStatusInfo *i) { if (i->running) { char *t = NULL; - get_process_name(i->main_pid, &t); + get_process_comm(i->main_pid, &t); if (t) { printf(" (%s)", t); free(t); @@ -2216,7 +2216,7 @@ static void print_status_info(UnitStatusInfo *i) { printf(" Control: %u", (unsigned) i->control_pid); - get_process_name(i->control_pid, &t); + get_process_comm(i->control_pid, &t); if (t) { printf(" (%s)", t); free(t); diff --git a/src/test-id128.c b/src/test-id128.c new file mode 100644 index 0000000000..6c3928d2e9 --- /dev/null +++ b/src/test-id128.c @@ -0,0 +1,49 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include + +#include "sd-id128.h" +#include "util.h" +#include "macro.h" + +#define ID128_WALDI SD_ID128_MAKE(01, 02, 03, 04, 05, 06, 07, 08, 09, 0a, 0b, 0c, 0d, 0e, 0f, 10) + +int main(int argc, char *argv[]) { + sd_id128_t id, id2; + char t[33]; + + assert_se(sd_id128_randomize(&id) == 0); + printf("random: %s\n", sd_id128_to_string(id, t)); + + assert_se(sd_id128_from_string(t, &id2) == 0); + assert_se(sd_id128_equal(id, id2)); + + assert_se(sd_id128_get_machine(&id) == 0); + printf("machine: %s\n", sd_id128_to_string(id, t)); + + assert_se(sd_id128_get_boot(&id) == 0); + printf("boot: %s\n", sd_id128_to_string(id, t)); + + printf("waldi: %s\n", sd_id128_to_string(ID128_WALDI, t)); + + return 0; +} diff --git a/src/util.c b/src/util.c index 7977ee46c5..99737e4e63 100644 --- a/src/util.c +++ b/src/util.c @@ -55,6 +55,7 @@ #include #include #include +#include #include "macro.h" #include "util.h" @@ -73,7 +74,7 @@ size_t page_size(void) { static __thread size_t pgsz = 0; long r; - if (_likely_(pgsz)) + if (_likely_(pgsz > 0)) return pgsz; assert_se((r = sysconf(_SC_PAGESIZE)) > 0); @@ -993,46 +994,51 @@ char *truncate_nl(char *s) { return s; } -int get_process_name(pid_t pid, char **name) { - char *p; +int get_process_comm(pid_t pid, char **name) { int r; - assert(pid >= 1); assert(name); - if (asprintf(&p, "/proc/%lu/comm", (unsigned long) pid) < 0) - return -ENOMEM; - - r = read_one_line_file(p, name); - free(p); + if (pid == 0) + r = read_one_line_file("/proc/self/comm", name); + else { + char *p; + if (asprintf(&p, "/proc/%lu/comm", (unsigned long) pid) < 0) + return -ENOMEM; - if (r < 0) - return r; + r = read_one_line_file(p, name); + free(p); + } - return 0; + return r; } -int get_process_cmdline(pid_t pid, size_t max_length, char **line) { - char *p, *r, *k; +int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line) { + char *r, *k; int c; bool space = false; size_t left; FILE *f; - assert(pid >= 1); assert(max_length > 0); assert(line); - if (asprintf(&p, "/proc/%lu/cmdline", (unsigned long) pid) < 0) - return -ENOMEM; + if (pid == 0) + f = fopen("/proc/self/cmdline", "re"); + else { + char *p; + if (asprintf(&p, "/proc/%lu/cmdline", (unsigned long) pid) < 0) + return -ENOMEM; - f = fopen(p, "re"); - free(p); + f = fopen(p, "re"); + free(p); + } if (!f) return -errno; - if (!(r = new(char, max_length))) { + r = new(char, max_length); + if (!r) { fclose(f); return -ENOMEM; } @@ -1076,13 +1082,17 @@ int get_process_cmdline(pid_t pid, size_t max_length, char **line) { free(r); - if ((h = get_process_name(pid, &t)) < 0) + if (!comm_fallback) + return -ENOENT; + + h = get_process_comm(pid, &t); + if (h < 0) return h; - h = asprintf(&r, "[%s]", t); + r = join("[", t, "]", NULL); free(t); - if (h < 0) + if (!r) return -ENOMEM; } @@ -1090,6 +1100,25 @@ int get_process_cmdline(pid_t pid, size_t max_length, char **line) { return 0; } +int get_process_exe(pid_t pid, char **name) { + int r; + + assert(name); + + if (pid == 0) + r = readlink_malloc("/proc/self/exe", name); + else { + char *p; + if (asprintf(&p, "/proc/%lu/exe", (unsigned long) pid) < 0) + return -ENOMEM; + + r = readlink_malloc(p, name); + free(p); + } + + return r; +} + char *strnappend(const char *s, const char *suffix, size_t b) { size_t a; char *r; @@ -4267,7 +4296,7 @@ const char *default_term_for_tty(const char *tty) { return term; } -bool dirent_is_file(struct dirent *de) { +bool dirent_is_file(const struct dirent *de) { assert(de); if (ignore_file(de->d_name)) @@ -4281,6 +4310,15 @@ bool dirent_is_file(struct dirent *de) { return true; } +bool dirent_is_file_with_suffix(const struct dirent *de, const char *suffix) { + assert(de); + + if (!dirent_is_file(de)) + return false; + + return endswith(de->d_name, suffix); +} + void execute_directory(const char *directory, DIR *d, char *argv[]) { DIR *_d = NULL; struct dirent *de; @@ -4453,6 +4491,98 @@ void parse_syslog_priority(char **p, int *priority) { *p += k; } +void skip_syslog_pid(char **buf) { + char *p; + + assert(buf); + assert(*buf); + + p = *buf; + + if (*p != '[') + return; + + p++; + p += strspn(p, "0123456789"); + + if (*p != ']') + return; + + p++; + + *buf = p; +} + +void skip_syslog_date(char **buf) { + enum { + LETTER, + SPACE, + NUMBER, + SPACE_OR_NUMBER, + COLON + } sequence[] = { + LETTER, LETTER, LETTER, + SPACE, + SPACE_OR_NUMBER, NUMBER, + SPACE, + SPACE_OR_NUMBER, NUMBER, + COLON, + SPACE_OR_NUMBER, NUMBER, + COLON, + SPACE_OR_NUMBER, NUMBER, + SPACE + }; + + char *p; + unsigned i; + + assert(buf); + assert(*buf); + + p = *buf; + + for (i = 0; i < ELEMENTSOF(sequence); i++, p++) { + + if (!*p) + return; + + switch (sequence[i]) { + + case SPACE: + if (*p != ' ') + return; + break; + + case SPACE_OR_NUMBER: + if (*p == ' ') + break; + + /* fall through */ + + case NUMBER: + if (*p < '0' || *p > '9') + return; + + break; + + case LETTER: + if (!(*p >= 'A' && *p <= 'Z') && + !(*p >= 'a' && *p <= 'z')) + return; + + break; + + case COLON: + if (*p != ':') + return; + break; + + } + } + + *buf = p; +} + int have_effective_cap(int value) { cap_t cap; cap_flag_value_t fv; @@ -4672,21 +4802,6 @@ int vt_disallocate(const char *name) { return 0; } - -static int file_is_conf(const struct dirent *d, const char *suffix) { - assert(d); - - if (ignore_file(d->d_name)) - return 0; - - if (d->d_type != DT_REG && - d->d_type != DT_LNK && - d->d_type != DT_UNKNOWN) - return 0; - - return endswith(d->d_name, suffix); -} - static int files_add(Hashmap *h, const char *path, const char *suffix) { DIR *dir; struct dirent buffer, *de; @@ -4712,7 +4827,7 @@ static int files_add(Hashmap *h, const char *path, const char *suffix) { if (!de) break; - if (!file_is_conf(de, suffix)) + if (!dirent_is_file_with_suffix(de, suffix)) continue; if (asprintf(&p, "%s/%s", path, de->d_name) < 0) { @@ -5063,21 +5178,27 @@ int symlink_or_copy_atomic(const char *from, const char *to) { } int audit_session_from_pid(pid_t pid, uint32_t *id) { - char *p, *s; + char *s; uint32_t u; int r; - assert(pid >= 1); assert(id); if (have_effective_cap(CAP_AUDIT_CONTROL) <= 0) return -ENOENT; - if (asprintf(&p, "/proc/%lu/sessionid", (unsigned long) pid) < 0) - return -ENOMEM; + if (pid == 0) + r = read_one_line_file("/proc/self/sessionid", &s); + else { + char *p; + + if (asprintf(&p, "/proc/%lu/sessionid", (unsigned long) pid) < 0) + return -ENOMEM; + + r = read_one_line_file(p, &s); + free(p); + } - r = read_one_line_file(p, &s); - free(p); if (r < 0) return r; @@ -5094,6 +5215,51 @@ int audit_session_from_pid(pid_t pid, uint32_t *id) { return 0; } +int audit_loginuid_from_pid(pid_t pid, uid_t *uid) { + char *s; + uid_t u; + int r; + + assert(uid); + + /* Only use audit login uid if we are executed with sufficient + * capabilities so that pam_loginuid could do its job. If we + * are lacking the CAP_AUDIT_CONTROL capabality we most likely + * are being run in a container and /proc/self/loginuid is + * useless since it probably contains a uid of the host + * system. */ + + if (have_effective_cap(CAP_AUDIT_CONTROL) <= 0) + return -ENOENT; + + if (pid == 0) + r = read_one_line_file("/proc/self/loginuid", &s); + else { + char *p; + + if (asprintf(&p, "/proc/%lu/loginuid", (unsigned long) pid) < 0) + return -ENOMEM; + + r = read_one_line_file(p, &s); + free(p); + } + + if (r < 0) + return r; + + r = parse_uid(s, &u); + free(s); + + if (r < 0) + return r; + + if (u == (uid_t) -1) + return -ENOENT; + + *uid = (uid_t) u; + return 0; +} + bool display_is_local(const char *display) { assert(display); @@ -5700,3 +5866,21 @@ int strdup_or_null(const char *a, char **b) { *b = c; return 0; } + +int prot_from_flags(int flags) { + + switch (flags & O_ACCMODE) { + + case O_RDONLY: + return PROT_READ; + + case O_WRONLY: + return PROT_WRITE; + + case O_RDWR: + return PROT_READ|PROT_WRITE; + + default: + return -EINVAL; + } +} diff --git a/src/util.h b/src/util.h index ccbe8a3efa..1a2dd5825d 100644 --- a/src/util.h +++ b/src/util.h @@ -248,8 +248,9 @@ int parent_of_path(const char *path, char **parent); int rmdir_parents(const char *path, const char *stop); -int get_process_name(pid_t pid, char **name); -int get_process_cmdline(pid_t pid, size_t max_length, char **line); +int get_process_comm(pid_t pid, char **name); +int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line); +int get_process_exe(pid_t pid, char **name); char hexchar(int x); int unhexchar(char c); @@ -274,7 +275,9 @@ bool path_equal(const char *a, const char *b); char *ascii_strlower(char *path); -bool dirent_is_file(struct dirent *de); +bool dirent_is_file(const struct dirent *de); +bool dirent_is_file_with_suffix(const struct dirent *de, const char *suffix); + bool ignore_file(const char *filename); bool chars_intersect(const char *a, const char *b); @@ -415,6 +418,8 @@ bool nulstr_contains(const char*nulstr, const char *needle); bool plymouth_running(void); void parse_syslog_priority(char **p, int *priority); +void skip_syslog_pid(char **buf); +void skip_syslog_date(char **buf); int have_effective_cap(int value); @@ -443,6 +448,7 @@ int hwclock_get_time(struct tm *tm); int hwclock_set_time(const struct tm *tm); int audit_session_from_pid(pid_t pid, uint32_t *id); +int audit_loginuid_from_pid(pid_t pid, uid_t *uid); bool display_is_local(const char *display); int socket_from_display(const char *display, char **path); @@ -506,4 +512,6 @@ extern char **saved_argv; bool kexec_loaded(void); +int prot_from_flags(int flags); + #endif diff --git a/tmpfiles.d/Makefile b/tmpfiles.d/Makefile new file mode 120000 index 0000000000..bd1047548b --- /dev/null +++ b/tmpfiles.d/Makefile @@ -0,0 +1 @@ +../src/Makefile \ No newline at end of file -- cgit v1.2.3-54-g00ecf From dad503169b2665ecfd3f5bfb3c936897e44ecca7 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 7 Oct 2011 21:56:11 +0200 Subject: journal: store XOR combination of entry data object hashes to identify hash lines --- src/journal/journal-def.h | 1 + src/journal/sd-journal.c | 20 +++++++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/journal/journal-def.h b/src/journal/journal-def.h index 0d865ae2a2..2a519fe0db 100644 --- a/src/journal/journal-def.h +++ b/src/journal/journal-def.h @@ -74,6 +74,7 @@ _packed_ struct EntryObject { uint64_t seqnum; uint64_t realtime; uint64_t monotonic; + uint64_t xor_hash; uint64_t prev_entry_offset; uint64_t next_entry_offset; EntryItem items[]; diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index f1dd92927c..d49f717915 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -758,7 +758,12 @@ static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) { return 0; } -static int journal_file_append_entry_internal(JournalFile *f, const dual_timestamp *ts, const EntryItem items[], unsigned n_items, Object **ret, uint64_t *offset) { +static int journal_file_append_entry_internal( + JournalFile *f, + const dual_timestamp *ts, + uint64_t xor_hash, + const EntryItem items[], unsigned n_items, + Object **ret, uint64_t *offset) { uint64_t np; uint64_t osize; Object *o; @@ -776,8 +781,9 @@ static int journal_file_append_entry_internal(JournalFile *f, const dual_timesta o->object.type = htole64(OBJECT_ENTRY); o->entry.seqnum = htole64(journal_file_seqnum(f)); memcpy(o->entry.items, items, n_items * sizeof(EntryItem)); - o->entry.realtime = htole64(ts->realtime); - o->entry.monotonic = htole64(ts->monotonic); + o->entry.realtime = ts ? htole64(ts->realtime) : 0; + o->entry.monotonic = ts ? htole64(ts->monotonic) : 0; + o->entry.xor_hash = htole64(xor_hash); r = journal_file_link_entry(f, o, np); if (r < 0) @@ -796,8 +802,10 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st unsigned i; EntryItem *items; int r; + uint64_t xor_hash = 0; assert(f); + assert(iovec || n_iovec == 0); items = new(EntryItem, n_iovec); if (!items) @@ -805,15 +813,17 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st for (i = 0; i < n_iovec; i++) { uint64_t p; + Object *o; - r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, NULL, &p); + r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p); if (r < 0) goto finish; + xor_hash ^= le64toh(o->data.hash); items[i].object_offset = htole64(p); } - r = journal_file_append_entry_internal(f, ts, items, n_iovec, ret, offset); + r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, ret, offset); finish: free(items); -- cgit v1.2.3-54-g00ecf From 260a2be45522f03ce8d8aca38e471d7b0882ff05 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 7 Oct 2011 22:00:05 +0200 Subject: journal: replace linked list by hashmap when merging files --- src/journal/journal-private.h | 12 +++++++++- src/journal/journalctl.c | 4 ++-- src/journal/journald.c | 4 ++-- src/journal/sd-journal.c | 54 ++++++++++++++++++++++++------------------- src/journal/test-journal.c | 2 +- 5 files changed, 46 insertions(+), 30 deletions(-) diff --git a/src/journal/journal-private.h b/src/journal/journal-private.h index 863a39893b..914b73a40b 100644 --- a/src/journal/journal-private.h +++ b/src/journal/journal-private.h @@ -27,10 +27,20 @@ #include "sd-journal.h" #include "journal-def.h" #include "util.h" +#include "sd-id128.h" + +typedef struct JournalCoursor { + sd_id128_t file_id; + sd_id128_t boot_id; + uint64_t seqnum; + uint64_t monotonic; + uint64_t realtime; + uint64_t xor_hash; +} JournalCoursor; typedef struct JournalFile JournalFile; -int journal_file_open(sd_journal *j, const char *fname, int flags, mode_t mode, JournalFile **ret); +int journal_file_open(const char *fname, int flags, mode_t mode, JournalFile **ret); void journal_file_close(JournalFile *j); diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index 838e8436e4..7bcd842f6d 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -33,9 +33,9 @@ int main(int argc, char *argv[]) { log_parse_environment(); log_open(); - r = journal_file_open(NULL, "/var/log/journal/system.journal", O_RDONLY, 0644, &f); + r = journal_file_open("/var/log/journal/system.journal", O_RDONLY, 0644, &f); if (r == -ENOENT) - r = journal_file_open(NULL, "/run/log/journal/system.journal", O_RDONLY, 0644, &f); + r = journal_file_open("/run/log/journal/system.journal", O_RDONLY, 0644, &f); if (r < 0) { log_error("Failed to open journal: %s", strerror(-r)); diff --git a/src/journal/journald.c b/src/journal/journald.c index 9297ca6fb7..818e146f94 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -257,11 +257,11 @@ static int process_event(Server *s, struct epoll_event *ev) { static int open_system_journal(JournalFile **f) { int r; - r = journal_file_open(NULL, "/var/log/journal/system.journal", O_RDWR|O_CREAT, 0644, f); + r = journal_file_open("/var/log/journal/system.journal", O_RDWR|O_CREAT, 0644, f); if (r == -ENOENT) { mkdir_p("/run/log/journal", 0755); - r = journal_file_open(NULL, "/run/log/journal/system.journal", O_RDWR|O_CREAT, 0644, f); + r = journal_file_open("/run/log/journal/system.journal", O_RDWR|O_CREAT, 0644, f); } return r; diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index d49f717915..8bca300f93 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -32,6 +32,7 @@ #include "journal-private.h" #include "lookup3.h" #include "list.h" +#include "hashmap.h" #define DEFAULT_ARENA_MAX_SIZE (16ULL*1024ULL*1024ULL*1024ULL) #define DEFAULT_ARENA_MIN_SIZE (256ULL*1024ULL) @@ -43,8 +44,6 @@ #define DEFAULT_WINDOW_SIZE (128ULL*1024ULL*1024ULL) struct JournalFile { - sd_journal *journal; - int fd; char *path; struct stat last_stat; @@ -72,7 +71,7 @@ struct JournalFile { }; struct sd_journal { - LIST_HEAD(JournalFile, files); + Hashmap *files; }; static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' }; @@ -82,9 +81,6 @@ static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' }; void journal_file_close(JournalFile *f) { assert(f); - if (f->journal) - LIST_REMOVE(JournalFile, files, f->journal->files, f); - if (f->fd >= 0) close_nointr_nofail(f->fd); @@ -1146,7 +1142,6 @@ fail: } int journal_file_open( - sd_journal *j, const char *fname, int flags, mode_t mode, @@ -1242,11 +1237,6 @@ int journal_file_open( if (r < 0) goto fail; - if (j) { - LIST_PREPEND(JournalFile, files, j->files, f); - f->journal = j; - } - if (ret) *ret = f; @@ -1273,6 +1263,10 @@ int sd_journal_open(sd_journal **ret) { if (!j) return -ENOMEM; + j->files = hashmap_new(string_hash_func, string_compare_func); + if (!j->files) + goto fail; + NULSTR_FOREACH(p, search_paths) { DIR *d; @@ -1287,6 +1281,7 @@ int sd_journal_open(sd_journal **ret) { for (;;) { struct dirent buf, *de; int k; + JournalFile *f; k = readdir_r(d, &buf, &de); if (k != 0) { @@ -1309,19 +1304,24 @@ int sd_journal_open(sd_journal **ret) { goto fail; } - k = journal_file_open(j, fn, O_RDONLY, 0, NULL); - if (k < 0 && r == 0) - r = -k; - + k = journal_file_open(fn, O_RDONLY, 0, &f); free(fn); - } - } - if (!j->files) { - if (r >= 0) - r = -ENOENT; + if (k < 0) { - goto fail; + if (r == 0) + r = -k; + } else { + k = hashmap_put(j->files, f->path, f); + if (k < 0) { + journal_file_close(f); + closedir(d); + + r = k; + goto fail; + } + } + } } *ret = j; @@ -1336,8 +1336,14 @@ fail: void sd_journal_close(sd_journal *j) { assert(j); - while (j->files) - journal_file_close(j->files); + if (j->files) { + JournalFile *f; + + while ((f = hashmap_steal_first(j->files))) + journal_file_close(f); + + hashmap_free(j->files); + } free(j); } diff --git a/src/journal/test-journal.c b/src/journal/test-journal.c index 92bef5f3ef..e0aedc7b83 100644 --- a/src/journal/test-journal.c +++ b/src/journal/test-journal.c @@ -33,7 +33,7 @@ int main(int argc, char *argv[]) { log_set_max_level(LOG_DEBUG); - assert_se(journal_file_open(NULL, "test", O_RDWR|O_CREAT, 0666, &f) == 0); + assert_se(journal_file_open("test", O_RDWR|O_CREAT, 0666, &f) == 0); dual_timestamp_get(&ts); -- cgit v1.2.3-54-g00ecf From f4b4781191e8edfb5690e4447166e3ba7bcb48f5 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 7 Oct 2011 23:03:07 +0200 Subject: journal: split user logs into their own journal files --- Makefile.am | 15 +++-- src/acl-util.c | 68 +++++++++++++++++++++ src/acl-util.h | 27 +++++++++ src/journal/journal-private.h | 27 ++++++++- src/journal/journald.c | 138 ++++++++++++++++++++++++++++++++++-------- src/journal/sd-journal.c | 27 --------- src/logind-acl.c | 45 +------------- src/util.c | 16 +++++ src/util.h | 1 + 9 files changed, 264 insertions(+), 100 deletions(-) create mode 100644 src/acl-util.c create mode 100644 src/acl-util.h diff --git a/Makefile.am b/Makefile.am index 9bf92ad7ac..d43da3c47b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -978,14 +978,17 @@ systemd_journald_SOURCES = \ src/journal/journald.c \ src/journal/sd-journal.c \ src/journal/lookup3.c \ - src/sd-id128.c + src/sd-id128.c \ + src/acl-util.c systemd_journald_CFLAGS = \ - $(AM_CFLAGS) + $(AM_CFLAGS) \ + $(ACL_CFLAGS) systemd_journald_LDADD = \ libsystemd-basic.la \ - libsystemd-daemon.la + libsystemd-daemon.la \ + $(ACL_LIBS) systemd_journalctl_SOURCES = \ src/journal/journalctl.c \ @@ -1143,10 +1146,12 @@ systemd_uaccess_SOURCES = \ if HAVE_ACL systemd_logind_SOURCES += \ - src/logind-acl.c + src/logind-acl.c \ + src/acl-util.c systemd_uaccess_SOURCES += \ - src/logind-acl.c + src/logind-acl.c \ + src/acl-util.c endif systemd_uaccess_CFLAGS = \ diff --git a/src/acl-util.c b/src/acl-util.c new file mode 100644 index 0000000000..a2a9f9a22b --- /dev/null +++ b/src/acl-util.c @@ -0,0 +1,68 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include +#include +#include +#include +#include + +#include "acl-util.h" + +int acl_find_uid(acl_t acl, uid_t uid, acl_entry_t *entry) { + acl_entry_t i; + int found; + + assert(acl); + assert(entry); + + for (found = acl_get_entry(acl, ACL_FIRST_ENTRY, &i); + found > 0; + found = acl_get_entry(acl, ACL_NEXT_ENTRY, &i)) { + + acl_tag_t tag; + uid_t *u; + bool b; + + if (acl_get_tag_type(i, &tag) < 0) + return -errno; + + if (tag != ACL_USER) + continue; + + u = acl_get_qualifier(i); + if (!u) + return -errno; + + b = *u == uid; + acl_free(u); + + if (b) { + *entry = i; + return 1; + } + } + + if (found < 0) + return -errno; + + return 0; +} diff --git a/src/acl-util.h b/src/acl-util.h new file mode 100644 index 0000000000..798ce43364 --- /dev/null +++ b/src/acl-util.h @@ -0,0 +1,27 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +#ifndef fooaclutilhfoo +#define fooaclutilhfoo + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +int acl_find_uid(acl_t acl, uid_t uid, acl_entry_t *entry); + +#endif diff --git a/src/journal/journal-private.h b/src/journal/journal-private.h index 914b73a40b..3277d29542 100644 --- a/src/journal/journal-private.h +++ b/src/journal/journal-private.h @@ -29,6 +29,31 @@ #include "util.h" #include "sd-id128.h" +typedef struct JournalFile { + int fd; + char *path; + struct stat last_stat; + int prot; + bool writable; + + Header *header; + + HashItem *hash_table; + void *hash_table_window; + uint64_t hash_table_window_size; + + uint64_t *bisect_table; + void *bisect_table_window; + uint64_t bisect_table_window_size; + + void *window; + uint64_t window_offset; + uint64_t window_size; + + Object *current; + uint64_t current_offset; +} JournalFile; + typedef struct JournalCoursor { sd_id128_t file_id; sd_id128_t boot_id; @@ -38,8 +63,6 @@ typedef struct JournalCoursor { uint64_t xor_hash; } JournalCoursor; -typedef struct JournalFile JournalFile; - int journal_file_open(const char *fname, int flags, mode_t mode, JournalFile **ret); void journal_file_close(JournalFile *j); diff --git a/src/journal/journald.c b/src/journal/journald.c index 818e146f94..e9ac3a832e 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -25,21 +25,109 @@ #include #include #include +#include +#include #include "hashmap.h" #include "journal-private.h" #include "sd-daemon.h" #include "socket-util.h" +#include "acl-util.h" typedef struct Server { int syslog_fd; int epoll_fd; int signal_fd; + JournalFile *runtime_journal; JournalFile *system_journal; Hashmap *user_journals; } Server; +static void fix_perms(JournalFile *f, uid_t uid) { + acl_t acl; + acl_entry_t entry; + acl_permset_t permset; + int r; + + assert(f); + + r = fchmod_and_fchown(f->fd, 0640, 0, 0); + if (r < 0) + log_warning("Failed to fix access mode/rights on %s, ignoring: %s", f->path, strerror(-r)); + + if (uid <= 0) + return; + + acl = acl_get_fd(f->fd); + if (!acl) { + log_warning("Failed to read ACL on %s, ignoring: %m", f->path); + return; + } + + r = acl_find_uid(acl, uid, &entry); + if (r <= 0) { + + if (acl_create_entry(&acl, &entry) < 0 || + acl_set_tag_type(entry, ACL_USER) < 0 || + acl_set_qualifier(entry, &uid) < 0) { + log_warning("Failed to patch ACL on %s, ignoring: %m", f->path); + goto finish; + } + } + + if (acl_get_permset(entry, &permset) < 0 || + acl_add_perm(permset, ACL_READ) < 0 || + acl_calc_mask(&acl) < 0) { + log_warning("Failed to patch ACL on %s, ignoring: %m", f->path); + goto finish; + } + + if (acl_set_fd(f->fd, acl) < 0) + log_warning("Failed to set ACL on %s, ignoring: %m", f->path); + +finish: + acl_free(acl); +} + +static JournalFile* find_journal(Server *s, uid_t uid) { + char *p; + int r; + JournalFile *f; + + assert(s); + + /* We split up user logs only on /var, not on /run */ + if (!s->system_journal) + return s->runtime_journal; + + if (uid <= 0) + return s->system_journal; + + f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid)); + if (f) + return f; + + if (asprintf(&p, "/var/log/journal/%lu.journal", (unsigned long) uid) < 0) + return s->system_journal; + + r = journal_file_open(p, O_RDWR|O_CREAT, 0640, &f); + free(p); + + if (r < 0) + return s->system_journal; + + fix_perms(f, uid); + + r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f); + if (r < 0) { + journal_file_close(f); + return s->system_journal; + } + + return f; +} + static void process_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv) { char *message = NULL, *pid = NULL, *uid = NULL, *gid = NULL, *source_time = NULL, *boot_id = NULL, *machine_id = NULL, @@ -47,7 +135,6 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str *audit_session = NULL, *audit_loginuid = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *exe = NULL; - dual_timestamp ts; struct iovec iovec[15]; unsigned n = 0; char idbuf[33]; @@ -55,8 +142,8 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str int r; char *t; int priority = LOG_USER | LOG_INFO; - - dual_timestamp_get(&ts); + uid_t loginuid = 0; + JournalFile *f; parse_syslog_priority((char**) &buf, &priority); skip_syslog_date((char**) &buf); @@ -73,7 +160,6 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str if (ucred) { uint32_t session; - uid_t loginuid; if (asprintf(&pid, "PID=%lu", (unsigned long) ucred->pid) >= 0) IOVEC_SET_STRING(iovec[n++], pid); @@ -143,10 +229,15 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str free(t); } - r = journal_file_append_entry(s->system_journal, &ts, iovec, n, NULL, NULL); - if (r < 0) - log_error("Failed to write entry: %s", strerror(-r)); + f = find_journal(s, loginuid); + if (!f) + log_warning("Dropping message, as we can't find a place to store the data."); + else { + r = journal_file_append_entry(f, NULL, iovec, n, NULL, NULL); + if (r < 0) + log_error("Failed to write entry, ignoring: %s", strerror(-r)); + } free(message); free(pid); @@ -253,20 +344,6 @@ static int process_event(Server *s, struct epoll_event *ev) { return 1; } - -static int open_system_journal(JournalFile **f) { - int r; - - r = journal_file_open("/var/log/journal/system.journal", O_RDWR|O_CREAT, 0644, f); - if (r == -ENOENT) { - mkdir_p("/run/log/journal", 0755); - - r = journal_file_open("/run/log/journal/system.journal", O_RDWR|O_CREAT, 0644, f); - } - - return r; -} - static int server_init(Server *s) { int n, one, r; struct epoll_event ev; @@ -348,8 +425,18 @@ static int server_init(Server *s) { return -ENOMEM; } - r = open_system_journal(&s->system_journal); - if (r < 0) { + r = journal_file_open("/var/log/journal/system.journal", O_RDWR|O_CREAT, 0640, &s->system_journal); + if (r >= 0) + fix_perms(s->system_journal, 0); + else if (r == -ENOENT) { + mkdir_p("/run/log/journal", 0755); + + r = journal_file_open("/run/log/journal/system.journal", O_RDWR|O_CREAT, 0640, &s->runtime_journal); + if (r >= 0) + fix_perms(s->runtime_journal, 0); + } + + if (r < 0 && r != -ENOENT) { log_error("Failed to open journal: %s", strerror(-r)); return r; } @@ -383,6 +470,9 @@ static void server_done(Server *s) { if (s->system_journal) journal_file_close(s->system_journal); + if (s->runtime_journal) + journal_file_close(s->runtime_journal); + while ((f = hashmap_steal_first(s->user_journals))) journal_file_close(f); @@ -412,7 +502,7 @@ int main(int argc, char *argv[]) { return EXIT_FAILURE; } - log_set_target(LOG_TARGET_AUTO); + log_set_target(LOG_TARGET_CONSOLE); log_parse_environment(); log_open(); diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 8bca300f93..89bf545837 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -43,33 +43,6 @@ #define DEFAULT_WINDOW_SIZE (128ULL*1024ULL*1024ULL) -struct JournalFile { - int fd; - char *path; - struct stat last_stat; - int prot; - bool writable; - - Header *header; - - HashItem *hash_table; - void *hash_table_window; - uint64_t hash_table_window_size; - - uint64_t *bisect_table; - void *bisect_table_window; - uint64_t bisect_table_window_size; - - void *window; - uint64_t window_offset; - uint64_t window_size; - - Object *current; - uint64_t current_offset; - - LIST_FIELDS(JournalFile, files); -}; - struct sd_journal { Hashmap *files; }; diff --git a/src/logind-acl.c b/src/logind-acl.c index 7a06b501d4..eb8a48d191 100644 --- a/src/logind-acl.c +++ b/src/logind-acl.c @@ -27,46 +27,7 @@ #include "logind-acl.h" #include "util.h" - -static int find_acl(acl_t acl, uid_t uid, acl_entry_t *entry) { - acl_entry_t i; - int found; - - assert(acl); - assert(entry); - - for (found = acl_get_entry(acl, ACL_FIRST_ENTRY, &i); - found > 0; - found = acl_get_entry(acl, ACL_NEXT_ENTRY, &i)) { - - acl_tag_t tag; - uid_t *u; - bool b; - - if (acl_get_tag_type(i, &tag) < 0) - return -errno; - - if (tag != ACL_USER) - continue; - - u = acl_get_qualifier(i); - if (!u) - return -errno; - - b = *u == uid; - acl_free(u); - - if (b) { - *entry = i; - return 1; - } - } - - if (found < 0) - return -errno; - - return 0; -} +#include "acl-util.h" static int flush_acl(acl_t acl) { acl_entry_t i; @@ -125,7 +86,7 @@ int devnode_acl(const char *path, } else if (del && old_uid > 0) { acl_entry_t entry; - r = find_acl(acl, old_uid, &entry); + r = acl_find_uid(acl, old_uid, &entry); if (r < 0) goto finish; @@ -144,7 +105,7 @@ int devnode_acl(const char *path, acl_permset_t permset; int rd, wt; - r = find_acl(acl, new_uid, &entry); + r = acl_find_uid(acl, new_uid, &entry); if (r < 0) goto finish; diff --git a/src/util.c b/src/util.c index 99737e4e63..a3cfe864b6 100644 --- a/src/util.c +++ b/src/util.c @@ -3529,6 +3529,22 @@ int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid) { return 0; } +int fchmod_and_fchown(int fd, mode_t mode, uid_t uid, gid_t gid) { + assert(fd >= 0); + + /* Under the assumption that we are running privileged we + * first change the access mode and only then hand out + * ownership to avoid a window where access is too open. */ + + if (fchmod(fd, mode) < 0) + return -errno; + + if (fchown(fd, uid, gid) < 0) + return -errno; + + return 0; +} + cpu_set_t* cpu_set_malloc(unsigned *ncpus) { cpu_set_t *r; unsigned n = 1024; diff --git a/src/util.h b/src/util.h index 1a2dd5825d..89a7bec612 100644 --- a/src/util.h +++ b/src/util.h @@ -366,6 +366,7 @@ int get_ctty_devnr(pid_t pid, dev_t *d); int get_ctty(pid_t, dev_t *_devnr, char **r); int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid); +int fchmod_and_fchown(int fd, mode_t mode, uid_t uid, gid_t gid); int rm_rf(const char *path, bool only_dirs, bool delete_root, bool honour_sticky); -- cgit v1.2.3-54-g00ecf From cec736d21ff86c4ac81b4d306ddba2120333818c Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sat, 8 Oct 2011 02:20:44 +0200 Subject: journal: implement parallel traversal in client --- Makefile.am | 3 + src/journal/journal-def.h | 5 +- src/journal/journal-file.c | 1191 +++++++++++++++++++++++++++++++++++++++ src/journal/journal-file.h | 86 +++ src/journal/journal-private.h | 86 --- src/journal/journalctl.c | 11 +- src/journal/journald.c | 11 +- src/journal/sd-journal.c | 1241 +++++------------------------------------ src/journal/sd-journal.h | 27 +- src/journal/test-journal.c | 2 +- 10 files changed, 1434 insertions(+), 1229 deletions(-) create mode 100644 src/journal/journal-file.c create mode 100644 src/journal/journal-file.h delete mode 100644 src/journal/journal-private.h diff --git a/Makefile.am b/Makefile.am index d43da3c47b..892072318f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -965,6 +965,7 @@ test_id128_LDADD = \ test_journal_SOURCES = \ src/journal/test-journal.c \ src/journal/sd-journal.c \ + src/journal/journal-file.c \ src/journal/lookup3.c \ src/sd-id128.c @@ -977,6 +978,7 @@ test_journal_LDADD = \ systemd_journald_SOURCES = \ src/journal/journald.c \ src/journal/sd-journal.c \ + src/journal/journal-file.c \ src/journal/lookup3.c \ src/sd-id128.c \ src/acl-util.c @@ -993,6 +995,7 @@ systemd_journald_LDADD = \ systemd_journalctl_SOURCES = \ src/journal/journalctl.c \ src/journal/sd-journal.c \ + src/journal/journal-file.c \ src/journal/lookup3.c \ src/sd-id128.c diff --git a/src/journal/journal-def.h b/src/journal/journal-def.h index 2a519fe0db..b3fa1e524f 100644 --- a/src/journal/journal-def.h +++ b/src/journal/journal-def.h @@ -48,7 +48,7 @@ enum { _packed_ struct ObjectHeader { uint8_t type; - uint8_t reserved[3]; + uint8_t reserved[7]; uint64_t size; uint8_t payload[]; }; @@ -74,6 +74,7 @@ _packed_ struct EntryObject { uint64_t seqnum; uint64_t realtime; uint64_t monotonic; + sd_id128_t boot_id; uint64_t xor_hash; uint64_t prev_entry_offset; uint64_t next_entry_offset; @@ -118,6 +119,7 @@ _packed_ struct Header { sd_id128_t file_id; sd_id128_t machine_id; sd_id128_t boot_id; + sd_id128_t seqnum_id; uint64_t arena_offset; uint64_t arena_size; uint64_t arena_max_size; @@ -133,7 +135,6 @@ _packed_ struct Header { uint64_t tail_entry_offset; uint64_t last_bisect_offset; uint64_t n_objects; - uint64_t seqnum_base; uint64_t seqnum; }; diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c new file mode 100644 index 0000000000..37e2e37eb1 --- /dev/null +++ b/src/journal/journal-file.c @@ -0,0 +1,1191 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include +#include +#include +#include +#include +#include +#include + +#include "journal-def.h" +#include "journal-file.h" +#include "lookup3.h" + +#define DEFAULT_ARENA_MAX_SIZE (16ULL*1024ULL*1024ULL*1024ULL) +#define DEFAULT_ARENA_MIN_SIZE (256ULL*1024ULL) +#define DEFAULT_ARENA_KEEP_FREE (1ULL*1024ULL*1024ULL) + +#define DEFAULT_HASH_TABLE_SIZE (2047ULL*16ULL) +#define DEFAULT_BISECT_TABLE_SIZE ((DEFAULT_ARENA_MAX_SIZE/(64ULL*1024ULL))*8ULL) + +#define DEFAULT_WINDOW_SIZE (128ULL*1024ULL*1024ULL) + +static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' }; + +#define ALIGN64(x) (((x) + 7ULL) & ~7ULL) + +void journal_file_close(JournalFile *f) { + assert(f); + + if (f->fd >= 0) + close_nointr_nofail(f->fd); + + if (f->header) + munmap(f->header, PAGE_ALIGN(sizeof(Header))); + + if (f->hash_table_window) + munmap(f->hash_table_window, f->hash_table_window_size); + + if (f->bisect_table_window) + munmap(f->bisect_table_window, f->bisect_table_window_size); + + if (f->window) + munmap(f->window, f->window_size); + + free(f->path); + free(f); +} + +static int journal_file_init_header(JournalFile *f) { + Header h; + ssize_t k; + int r; + + assert(f); + + zero(h); + memcpy(h.signature, signature, 8); + h.arena_offset = htole64(ALIGN64(sizeof(h))); + h.arena_max_size = htole64(DEFAULT_ARENA_MAX_SIZE); + h.arena_min_size = htole64(DEFAULT_ARENA_MIN_SIZE); + h.arena_keep_free = htole64(DEFAULT_ARENA_KEEP_FREE); + + r = sd_id128_randomize(&h.file_id); + if (r < 0) + return r; + + h.seqnum_id = h.file_id; + + k = pwrite(f->fd, &h, sizeof(h), 0); + if (k < 0) + return -errno; + + if (k != sizeof(h)) + return -EIO; + + return 0; +} + +static int journal_file_refresh_header(JournalFile *f) { + int r; + + assert(f); + + r = sd_id128_get_machine(&f->header->machine_id); + if (r < 0) + return r; + + r = sd_id128_get_boot(&f->header->boot_id); + if (r < 0) + return r; + + f->header->state = htole32(STATE_ONLINE); + return 0; +} + +static int journal_file_verify_header(JournalFile *f) { + assert(f); + + if (memcmp(f->header, signature, 8)) + return -EBADMSG; + + if (f->header->incompatible_flags != 0) + return -EPROTONOSUPPORT; + + if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->arena_offset) + le64toh(f->header->arena_size))) + return -ENODATA; + + if (f->writable) { + uint32_t state; + sd_id128_t machine_id; + int r; + + r = sd_id128_get_machine(&machine_id); + if (r < 0) + return r; + + if (!sd_id128_equal(machine_id, f->header->machine_id)) + return -EHOSTDOWN; + + state = le32toh(f->header->state); + + if (state == STATE_ONLINE) + log_debug("Journal file %s is already online. Assuming unclean closing. Ignoring.", f->path); + else if (state == STATE_ARCHIVED) + return -ESHUTDOWN; + else if (state != STATE_OFFLINE) + log_debug("Journal file %s has unknown state %u. Ignoring.", f->path, state); + } + + return 0; +} + +static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) { + uint64_t asize; + uint64_t old_size, new_size; + + assert(f); + + if (offset < le64toh(f->header->arena_offset)) + return -EINVAL; + + new_size = PAGE_ALIGN(offset + size); + + /* We assume that this file is not sparse, and we know that + * for sure, since we alway call posix_fallocate() + * ourselves */ + + old_size = + le64toh(f->header->arena_offset) + + le64toh(f->header->arena_size); + + if (old_size >= new_size) + return 0; + + asize = new_size - le64toh(f->header->arena_offset); + + if (asize > le64toh(f->header->arena_min_size)) { + struct statvfs svfs; + + if (fstatvfs(f->fd, &svfs) >= 0) { + uint64_t available; + + available = svfs.f_bfree * svfs.f_bsize; + + if (available >= f->header->arena_keep_free) + available -= f->header->arena_keep_free; + else + available = 0; + + if (new_size - old_size > available) + return -E2BIG; + } + } + + if (asize > le64toh(f->header->arena_max_size)) + return -E2BIG; + + if (posix_fallocate(f->fd, 0, new_size) < 0) + return -errno; + + if (fstat(f->fd, &f->last_stat) < 0) + return -errno; + + f->header->arena_size = htole64(asize); + + return 0; +} + +static int journal_file_map( + JournalFile *f, + uint64_t offset, + uint64_t size, + void **_window, + uint64_t *_woffset, + uint64_t *_wsize, + void **ret) { + + uint64_t woffset, wsize; + void *window; + + assert(f); + assert(size > 0); + assert(ret); + + woffset = offset & ~((uint64_t) page_size() - 1ULL); + wsize = size + (offset - woffset); + wsize = PAGE_ALIGN(wsize); + + window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset); + if (window == MAP_FAILED) + return -errno; + + if (_window) + *_window = window; + + if (_woffset) + *_woffset = woffset; + + if (_wsize) + *_wsize = wsize; + + *ret = (uint8_t*) window + (offset - woffset); + + return 0; +} + +static int journal_file_move_to(JournalFile *f, uint64_t offset, uint64_t size, void **ret) { + void *p; + uint64_t delta; + int r; + + assert(f); + assert(ret); + + if (_likely_(f->window && + f->window_offset <= offset && + f->window_offset+f->window_size >= offset + size)) { + + *ret = (uint8_t*) f->window + (offset - f->window_offset); + return 0; + } + + if (f->window) { + if (munmap(f->window, f->window_size) < 0) + return -errno; + + f->window = NULL; + f->window_size = f->window_offset = 0; + } + + if (size < DEFAULT_WINDOW_SIZE) { + /* If the default window size is larger then what was + * asked for extend the mapping a bit in the hope to + * minimize needed remappings later on. We add half + * the window space before and half behind the + * requested mapping */ + + delta = PAGE_ALIGN((DEFAULT_WINDOW_SIZE - size) / 2); + + if (offset < delta) + delta = offset; + + offset -= delta; + size += (DEFAULT_WINDOW_SIZE - delta); + } else + delta = 0; + + r = journal_file_map(f, + offset, size, + &f->window, &f->window_offset, &f->window_size, + & p); + + if (r < 0) + return r; + + *ret = (uint8_t*) p + delta; + return 0; +} + +static bool verify_hash(Object *o) { + uint64_t t; + + assert(o); + + t = le64toh(o->object.type); + if (t == OBJECT_DATA) { + uint64_t s, h1, h2; + + s = le64toh(o->object.size); + + h1 = le64toh(o->data.hash); + h2 = hash64(o->data.payload, s - offsetof(Object, data.payload)); + + return h1 == h2; + } + + return true; +} + +int journal_file_move_to_object(JournalFile *f, uint64_t offset, int type, Object **ret) { + int r; + void *t; + Object *o; + uint64_t s; + + assert(f); + assert(ret); + + r = journal_file_move_to(f, offset, sizeof(ObjectHeader), &t); + if (r < 0) + return r; + + o = (Object*) t; + s = le64toh(o->object.size); + + if (s < sizeof(ObjectHeader)) + return -EBADMSG; + + if (type >= 0 && le64toh(o->object.type) != type) + return -EBADMSG; + + if (s > sizeof(ObjectHeader)) { + r = journal_file_move_to(f, offset, s, &t); + if (r < 0) + return r; + + o = (Object*) t; + } + + if (!verify_hash(o)) + return -EBADMSG; + + *ret = o; + return 0; +} + +static uint64_t journal_file_seqnum(JournalFile *f) { + uint64_t r; + + assert(f); + + r = le64toh(f->header->seqnum) + 1; + f->header->seqnum = htole64(r); + + return r; +} + +static int journal_file_append_object(JournalFile *f, uint64_t size, Object **ret, uint64_t *offset) { + int r; + uint64_t p; + Object *tail, *o; + void *t; + + assert(f); + assert(size >= sizeof(ObjectHeader)); + assert(offset); + assert(ret); + + p = le64toh(f->header->tail_object_offset); + + if (p == 0) + p = le64toh(f->header->arena_offset); + else { + r = journal_file_move_to_object(f, p, -1, &tail); + if (r < 0) + return r; + + p += ALIGN64(le64toh(tail->object.size)); + } + + r = journal_file_allocate(f, p, size); + if (r < 0) + return r; + + r = journal_file_move_to(f, p, size, &t); + if (r < 0) + return r; + + o = (Object*) t; + + zero(o->object); + o->object.type = htole64(OBJECT_UNUSED); + zero(o->object.reserved); + o->object.size = htole64(size); + + f->header->tail_object_offset = htole64(p); + if (f->header->head_object_offset == 0) + f->header->head_object_offset = htole64(p); + + f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1); + + *ret = o; + *offset = p; + + return 0; +} + +static int journal_file_setup_hash_table(JournalFile *f) { + uint64_t s, p; + Object *o; + int r; + + assert(f); + + s = DEFAULT_HASH_TABLE_SIZE; + r = journal_file_append_object(f, offsetof(Object, hash_table.table) + s, &o, &p); + if (r < 0) + return r; + + o->object.type = htole64(OBJECT_HASH_TABLE); + memset(o->hash_table.table, 0, s); + + f->header->hash_table_offset = htole64(p + offsetof(Object, hash_table.table)); + f->header->hash_table_size = htole64(s); + + return 0; +} + +static int journal_file_setup_bisect_table(JournalFile *f) { + uint64_t s, p; + Object *o; + int r; + + assert(f); + + s = DEFAULT_BISECT_TABLE_SIZE; + r = journal_file_append_object(f, offsetof(Object, bisect_table.table) + s, &o, &p); + if (r < 0) + return r; + + o->object.type = htole64(OBJECT_BISECT_TABLE); + memset(o->bisect_table.table, 0, s); + + f->header->bisect_table_offset = htole64(p + offsetof(Object, bisect_table.table)); + f->header->bisect_table_size = htole64(s); + + return 0; +} + +static int journal_file_map_hash_table(JournalFile *f) { + uint64_t s, p; + void *t; + int r; + + assert(f); + + p = le64toh(f->header->hash_table_offset); + s = le64toh(f->header->hash_table_size); + + r = journal_file_map(f, + p, s, + &f->hash_table_window, NULL, &f->hash_table_window_size, + &t); + if (r < 0) + return r; + + f->hash_table = t; + return 0; +} + +static int journal_file_map_bisect_table(JournalFile *f) { + uint64_t s, p; + void *t; + int r; + + assert(f); + + p = le64toh(f->header->bisect_table_offset); + s = le64toh(f->header->bisect_table_size); + + r = journal_file_map(f, + p, s, + &f->bisect_table_window, NULL, &f->bisect_table_window_size, + &t); + + if (r < 0) + return r; + + f->bisect_table = t; + return 0; +} + +static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash_index) { + uint64_t p; + int r; + + assert(f); + assert(o); + assert(offset > 0); + assert(o->object.type == htole64(OBJECT_DATA)); + + o->data.head_entry_offset = o->data.tail_entry_offset = 0; + o->data.next_hash_offset = 0; + + p = le64toh(f->hash_table[hash_index].tail_hash_offset); + if (p == 0) { + /* Only entry in the hash table is easy */ + + o->data.prev_hash_offset = 0; + f->hash_table[hash_index].head_hash_offset = htole64(offset); + } else { + o->data.prev_hash_offset = htole64(p); + + /* Temporarily move back to the previous data object, + * to patch in pointer */ + + r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); + if (r < 0) + return r; + + o->data.next_hash_offset = offset; + + r = journal_file_move_to_object(f, offset, OBJECT_DATA, &o); + if (r < 0) + return r; + } + + f->hash_table[hash_index].tail_hash_offset = htole64(offset); + + return 0; +} + +static int journal_file_append_data(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { + uint64_t hash, h, p, np; + uint64_t osize; + Object *o; + int r; + + assert(f); + assert(data || size == 0); + + osize = offsetof(Object, data.payload) + size; + + hash = hash64(data, size); + h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); + p = le64toh(f->hash_table[h].head_hash_offset); + + while (p != 0) { + /* Look for this data object in the hash table */ + + r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); + if (r < 0) + return r; + + if (le64toh(o->object.size) == osize && + memcmp(o->data.payload, data, size) == 0) { + + if (le64toh(o->data.hash) != hash) + return -EBADMSG; + + if (ret) + *ret = o; + + if (offset) + *offset = p; + + return 0; + } + + p = le64toh(o->data.next_hash_offset); + } + + r = journal_file_append_object(f, osize, &o, &np); + if (r < 0) + return r; + + o->object.type = htole64(OBJECT_DATA); + o->data.hash = htole64(hash); + memcpy(o->data.payload, data, size); + + r = journal_file_link_data(f, o, np, h); + if (r < 0) + return r; + + if (ret) + *ret = o; + + if (offset) + *offset = np; + + return 0; +} + +uint64_t journal_file_entry_n_items(Object *o) { + assert(o); + assert(o->object.type == htole64(OBJECT_ENTRY)); + + return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem); +} + +static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) { + uint64_t p, q; + int r; + assert(f); + assert(o); + assert(offset > 0); + + p = le64toh(o->entry.items[i].object_offset); + if (p == 0) + return -EINVAL; + + o->entry.items[i].next_entry_offset = 0; + + /* Move to the data object */ + r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); + if (r < 0) + return r; + + q = le64toh(o->data.tail_entry_offset); + o->data.tail_entry_offset = htole64(offset); + + if (q == 0) + o->data.head_entry_offset = htole64(offset); + else { + uint64_t n, j; + + /* Move to previous entry */ + r = journal_file_move_to_object(f, q, OBJECT_ENTRY, &o); + if (r < 0) + return r; + + n = journal_file_entry_n_items(o); + for (j = 0; j < n; j++) + if (le64toh(o->entry.items[j].object_offset) == p) + break; + + if (j >= n) + return -EBADMSG; + + o->entry.items[j].next_entry_offset = offset; + } + + /* Move back to original entry */ + r = journal_file_move_to_object(f, offset, OBJECT_ENTRY, &o); + if (r < 0) + return r; + + o->entry.items[i].prev_entry_offset = q; + return 0; +} + +static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) { + uint64_t p, i, n, k, a, b; + int r; + + assert(f); + assert(o); + assert(offset > 0); + assert(o->object.type == htole64(OBJECT_ENTRY)); + + /* Link up the entry itself */ + p = le64toh(f->header->tail_entry_offset); + + o->entry.prev_entry_offset = f->header->tail_entry_offset; + o->entry.next_entry_offset = 0; + + if (p == 0) + f->header->head_entry_offset = htole64(offset); + else { + /* Temporarily move back to the previous entry, to + * patch in pointer */ + + r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o); + if (r < 0) + return r; + + o->entry.next_entry_offset = htole64(offset); + + r = journal_file_move_to_object(f, offset, OBJECT_ENTRY, &o); + if (r < 0) + return r; + } + + f->header->tail_entry_offset = htole64(offset); + + /* Link up the items */ + n = journal_file_entry_n_items(o); + for (i = 0; i < n; i++) { + r = journal_file_link_entry_item(f, o, offset, i); + if (r < 0) + return r; + } + + /* Link up the entry in the bisect table */ + n = le64toh(f->header->bisect_table_size) / sizeof(uint64_t); + k = le64toh(f->header->arena_max_size) / n; + + a = (le64toh(f->header->last_bisect_offset) + k - 1) / k; + b = offset / k; + + for (; a <= b; a++) + f->bisect_table[a] = htole64(offset); + + f->header->last_bisect_offset = htole64(offset + le64toh(o->object.size)); + + return 0; +} + +static int journal_file_append_entry_internal( + JournalFile *f, + const dual_timestamp *ts, + uint64_t xor_hash, + const EntryItem items[], unsigned n_items, + Object **ret, uint64_t *offset) { + uint64_t np; + uint64_t osize; + Object *o; + int r; + + assert(f); + assert(items || n_items == 0); + + osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem)); + + r = journal_file_append_object(f, osize, &o, &np); + if (r < 0) + return r; + + o->object.type = htole64(OBJECT_ENTRY); + o->entry.seqnum = htole64(journal_file_seqnum(f)); + memcpy(o->entry.items, items, n_items * sizeof(EntryItem)); + o->entry.realtime = ts ? htole64(ts->realtime) : 0; + o->entry.monotonic = ts ? htole64(ts->monotonic) : 0; + o->entry.xor_hash = htole64(xor_hash); + o->entry.boot_id = f->header->boot_id; + + r = journal_file_link_entry(f, o, np); + if (r < 0) + return r; + + if (ret) + *ret = o; + + if (offset) + *offset = np; + + return 0; +} + +int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, Object **ret, uint64_t *offset) { + unsigned i; + EntryItem *items; + int r; + uint64_t xor_hash = 0; + + assert(f); + assert(iovec || n_iovec == 0); + + items = new(EntryItem, n_iovec); + if (!items) + return -ENOMEM; + + for (i = 0; i < n_iovec; i++) { + uint64_t p; + Object *o; + + r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p); + if (r < 0) + goto finish; + + xor_hash ^= le64toh(o->data.hash); + items[i].object_offset = htole64(p); + } + + r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, ret, offset); + +finish: + free(items); + + return r; +} + +int journal_file_move_to_entry(JournalFile *f, uint64_t seqnum, Object **ret, uint64_t *offset) { + Object *o; + uint64_t lower, upper, p, n, k; + int r; + + assert(f); + + n = le64toh(f->header->bisect_table_size) / sizeof(uint64_t); + k = le64toh(f->header->arena_max_size) / n; + + lower = 0; + upper = le64toh(f->header->last_bisect_offset)/k+1; + + while (lower < upper) { + k = (upper + lower) / 2; + p = le64toh(f->bisect_table[k]); + + if (p == 0) { + upper = k; + continue; + } + + r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o); + if (r < 0) + return r; + + if (o->entry.seqnum == seqnum) { + if (ret) + *ret = o; + + if (offset) + *offset = p; + + return 1; + } else if (seqnum < o->entry.seqnum) + upper = k; + else if (seqnum > o->entry.seqnum) + lower = k+1; + } + + assert(lower == upper); + + if (lower <= 0) + return 0; + + /* The object we are looking for is between + * bisect_table[lower-1] and bisect_table[lower] */ + + p = le64toh(f->bisect_table[lower-1]); + + for (;;) { + r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o); + if (r < 0) + return r; + + if (o->entry.seqnum == seqnum) { + if (ret) + *ret = o; + + if (offset) + *offset = p; + + return 1; + + } if (seqnum < o->entry.seqnum) + return 0; + + if (o->entry.next_entry_offset == 0) + return 0; + + p = le64toh(o->entry.next_entry_offset); + } + + return 0; +} + +int journal_file_next_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset) { + uint64_t np; + int r; + + assert(f); + + if (!o) + np = le64toh(f->header->head_entry_offset); + else { + if (le64toh(o->object.type) != OBJECT_ENTRY) + return -EINVAL; + + np = le64toh(o->entry.next_entry_offset); + } + + if (np == 0) + return 0; + + r = journal_file_move_to_object(f, np, OBJECT_ENTRY, &o); + if (r < 0) + return r; + + if (ret) + *ret = o; + + if (offset) + *offset = np; + + return 1; +} + +int journal_file_prev_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset) { + uint64_t np; + int r; + + assert(f); + + if (!o) + np = le64toh(f->header->tail_entry_offset); + else { + if (le64toh(o->object.type) != OBJECT_ENTRY) + return -EINVAL; + + np = le64toh(o->entry.prev_entry_offset); + } + + if (np == 0) + return 0; + + r = journal_file_move_to_object(f, np, OBJECT_ENTRY, &o); + if (r < 0) + return r; + + if (ret) + *ret = o; + + if (offset) + *offset = np; + + return 1; +} + +int journal_file_find_first_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { + uint64_t p, osize, hash, h; + int r; + + assert(f); + assert(data || size == 0); + + osize = offsetof(Object, data.payload) + size; + + hash = hash64(data, size); + h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); + p = le64toh(f->hash_table[h].head_hash_offset); + + while (p != 0) { + Object *o; + + r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); + if (r < 0) + return r; + + if (le64toh(o->object.size) == osize && + memcmp(o->data.payload, data, size) == 0) { + + if (le64toh(o->data.hash) != hash) + return -EBADMSG; + + if (o->data.head_entry_offset == 0) + return 0; + + p = le64toh(o->data.head_entry_offset); + r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o); + if (r < 0) + return r; + + if (ret) + *ret = o; + + if (offset) + *offset = p; + + return 1; + } + + p = le64toh(o->data.next_hash_offset); + } + + return 0; +} + +int journal_file_find_last_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { + uint64_t p, osize, hash, h; + int r; + + assert(f); + assert(data || size == 0); + + osize = offsetof(Object, data.payload) + size; + + hash = hash64(data, size); + h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); + p = le64toh(f->hash_table[h].tail_hash_offset); + + while (p != 0) { + Object *o; + + r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); + if (r < 0) + return r; + + if (le64toh(o->object.size) == osize && + memcmp(o->data.payload, data, size) == 0) { + + if (le64toh(o->data.hash) != hash) + return -EBADMSG; + + if (o->data.tail_entry_offset == 0) + return 0; + + p = le64toh(o->data.tail_entry_offset); + r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o); + if (r < 0) + return r; + + if (ret) + *ret = o; + + if (offset) + *offset = p; + + return 1; + } + + p = le64toh(o->data.prev_hash_offset); + } + + return 0; +} + +void journal_file_dump(JournalFile *f) { + char a[33], b[33], c[33]; + Object *o; + int r; + uint64_t p; + + assert(f); + + printf("File ID: %s\n" + "Machine ID: %s\n" + "Boot ID: %s\n" + "Arena size: %llu\n", + sd_id128_to_string(f->header->file_id, a), + sd_id128_to_string(f->header->machine_id, b), + sd_id128_to_string(f->header->boot_id, c), + (unsigned long long) le64toh(f->header->arena_size)); + + p = le64toh(f->header->head_object_offset); + while (p != 0) { + r = journal_file_move_to_object(f, p, -1, &o); + if (r < 0) + goto fail; + + switch (o->object.type) { + + case OBJECT_UNUSED: + printf("Type: OBJECT_UNUSED\n"); + break; + + case OBJECT_DATA: + printf("Type: OBJECT_DATA\n"); + break; + + case OBJECT_ENTRY: + printf("Type: OBJECT_ENTRY %llu\n", (unsigned long long) le64toh(o->entry.seqnum)); + break; + + case OBJECT_HASH_TABLE: + printf("Type: OBJECT_HASH_TABLE\n"); + break; + + case OBJECT_BISECT_TABLE: + printf("Type: OBJECT_BISECT_TABLE\n"); + break; + } + + if (p == le64toh(f->header->tail_object_offset)) + p = 0; + else + p = p + ALIGN64(le64toh(o->object.size)); + } + + return; +fail: + log_error("File corrupt"); +} + +int journal_file_open( + const char *fname, + int flags, + mode_t mode, + JournalFile **ret) { + + JournalFile *f; + int r; + bool newly_created = false; + + assert(fname); + + if ((flags & O_ACCMODE) != O_RDONLY && + (flags & O_ACCMODE) != O_RDWR) + return -EINVAL; + + f = new0(JournalFile, 1); + if (!f) + return -ENOMEM; + + f->writable = (flags & O_ACCMODE) != O_RDONLY; + f->prot = prot_from_flags(flags); + + f->fd = open(fname, flags|O_CLOEXEC, mode); + if (f->fd < 0) { + r = -errno; + goto fail; + } + + f->path = strdup(fname); + if (!f->path) { + r = -ENOMEM; + goto fail; + } + + if (fstat(f->fd, &f->last_stat) < 0) { + r = -errno; + goto fail; + } + + if (f->last_stat.st_size == 0 && f->writable) { + newly_created = true; + + r = journal_file_init_header(f); + if (r < 0) + goto fail; + + if (fstat(f->fd, &f->last_stat) < 0) { + r = -errno; + goto fail; + } + } + + if (f->last_stat.st_size < (off_t) sizeof(Header)) { + r = -EIO; + goto fail; + } + + f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0); + if (f->header == MAP_FAILED) { + f->header = NULL; + r = -errno; + goto fail; + } + + if (!newly_created) { + r = journal_file_verify_header(f); + if (r < 0) + goto fail; + } + + if (f->writable) { + r = journal_file_refresh_header(f); + if (r < 0) + goto fail; + } + + if (newly_created) { + + r = journal_file_setup_hash_table(f); + if (r < 0) + goto fail; + + r = journal_file_setup_bisect_table(f); + if (r < 0) + goto fail; + } + + r = journal_file_map_hash_table(f); + if (r < 0) + goto fail; + + r = journal_file_map_bisect_table(f); + if (r < 0) + goto fail; + + if (ret) + *ret = f; + + return 0; + +fail: + journal_file_close(f); + + return r; +} diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h new file mode 100644 index 0000000000..55cc7153af --- /dev/null +++ b/src/journal/journal-file.h @@ -0,0 +1,86 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +#ifndef foojournalfilehfoo +#define foojournalfilehfoo + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include + +#include "journal-def.h" +#include "util.h" +#include "sd-id128.h" + +typedef struct JournalFile { + int fd; + char *path; + struct stat last_stat; + int prot; + bool writable; + + Header *header; + + HashItem *hash_table; + void *hash_table_window; + uint64_t hash_table_window_size; + + uint64_t *bisect_table; + void *bisect_table_window; + uint64_t bisect_table_window_size; + + void *window; + uint64_t window_offset; + uint64_t window_size; + + uint64_t current_offset; +} JournalFile; + +typedef struct JournalCursor { + uint8_t version; + uint8_t reserved[7]; + uint64_t seqnum; + sd_id128_t seqnum_id; + sd_id128_t boot_id; + uint64_t monotonic; + uint64_t realtime; + uint64_t xor_hash; +} JournalCursor; + +int journal_file_open(const char *fname, int flags, mode_t mode, JournalFile **ret); + +void journal_file_close(JournalFile *j); + +int journal_file_move_to_object(JournalFile *f, uint64_t offset, int type, Object **ret); + +uint64_t journal_file_entry_n_items(Object *o); + +int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, Object **ret, uint64_t *offset); + +int journal_file_move_to_entry(JournalFile *f, uint64_t seqnum, Object **ret, uint64_t *offset); + +int journal_file_find_first_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset); +int journal_file_find_last_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset); + +int journal_file_next_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset); +int journal_file_prev_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset); + +void journal_file_dump(JournalFile *f); + +#endif diff --git a/src/journal/journal-private.h b/src/journal/journal-private.h deleted file mode 100644 index 3277d29542..0000000000 --- a/src/journal/journal-private.h +++ /dev/null @@ -1,86 +0,0 @@ -/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ - -#ifndef foojournalprivatehfoo -#define foojournalprivatehfoo - -/*** - This file is part of systemd. - - Copyright 2011 Lennart Poettering - - systemd is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - systemd is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with systemd; If not, see . -***/ - -#include - -#include "sd-journal.h" -#include "journal-def.h" -#include "util.h" -#include "sd-id128.h" - -typedef struct JournalFile { - int fd; - char *path; - struct stat last_stat; - int prot; - bool writable; - - Header *header; - - HashItem *hash_table; - void *hash_table_window; - uint64_t hash_table_window_size; - - uint64_t *bisect_table; - void *bisect_table_window; - uint64_t bisect_table_window_size; - - void *window; - uint64_t window_offset; - uint64_t window_size; - - Object *current; - uint64_t current_offset; -} JournalFile; - -typedef struct JournalCoursor { - sd_id128_t file_id; - sd_id128_t boot_id; - uint64_t seqnum; - uint64_t monotonic; - uint64_t realtime; - uint64_t xor_hash; -} JournalCoursor; - -int journal_file_open(const char *fname, int flags, mode_t mode, JournalFile **ret); - -void journal_file_close(JournalFile *j); - -int journal_file_move_to_object(JournalFile *f, uint64_t offset, Object **ret); - -uint64_t journal_file_entry_n_items(Object *o); - -int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, Object **ret, uint64_t *offset); - -int journal_file_move_to_entry(JournalFile *f, uint64_t seqnum, Object **ret, uint64_t *offset); - -int journal_file_find_first_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset); -int journal_file_find_last_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset); - -int journal_file_next_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset); -int journal_file_prev_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset); - -void journal_file_dump(JournalFile *f); - -#endif diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index 7bcd842f6d..5f17f45cac 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -23,7 +23,7 @@ #include #include -#include "journal-private.h" +#include "journal-file.h" int main(int argc, char *argv[]) { int r; @@ -62,21 +62,16 @@ int main(int argc, char *argv[]) { uint64_t p, l; p = le64toh(o->entry.items[i].object_offset); - r = journal_file_move_to_object(f, p, &o); + r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); if (r < 0) { log_error("Failed to move to data: %s", strerror(-r)); goto finish; } - if (le64toh(o->object.type) != OBJECT_DATA) { - log_error("Invalid file"); - goto finish; - } - l = o->object.size - offsetof(Object, data.payload); printf("\t[%.*s]\n", (int) l, o->data.payload); - r = journal_file_move_to_object(f, offset, &o); + r = journal_file_move_to_object(f, offset, OBJECT_ENTRY, &o); if (r < 0) { log_error("Failed to move back to entry: %s", strerror(-r)); goto finish; diff --git a/src/journal/journald.c b/src/journal/journald.c index e9ac3a832e..d65451df58 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -29,7 +29,7 @@ #include #include "hashmap.h" -#include "journal-private.h" +#include "journal-file.h" #include "sd-daemon.h" #include "socket-util.h" #include "acl-util.h" @@ -282,7 +282,9 @@ static int process_event(Server *s, struct epoll_event *ev) { log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo)); return 0; - } else { + } + + if (ev->data.fd == s->syslog_fd) { for (;;) { char buf[LINE_MAX+1]; struct msghdr msghdr; @@ -339,9 +341,12 @@ static int process_event(Server *s, struct epoll_event *ev) { process_message(s, strstrip(buf), ucred, tv); } + + return 1; } - return 1; + log_error("Unknown event."); + return 0; } static int server_init(Server *s) { diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 89bf545837..8426b3bf9e 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -19,1206 +19,223 @@ along with systemd; If not, see . ***/ -#include #include -#include -#include -#include #include -#include #include "sd-journal.h" #include "journal-def.h" -#include "journal-private.h" -#include "lookup3.h" -#include "list.h" +#include "journal-file.h" #include "hashmap.h" +#include "list.h" -#define DEFAULT_ARENA_MAX_SIZE (16ULL*1024ULL*1024ULL*1024ULL) -#define DEFAULT_ARENA_MIN_SIZE (256ULL*1024ULL) -#define DEFAULT_ARENA_KEEP_FREE (1ULL*1024ULL*1024ULL) +typedef struct Match Match; -#define DEFAULT_HASH_TABLE_SIZE (2047ULL*16ULL) -#define DEFAULT_BISECT_TABLE_SIZE ((DEFAULT_ARENA_MAX_SIZE/(64ULL*1024ULL))*8ULL) +struct Match { + char *data; + size_t size; + uint64_t hash; -#define DEFAULT_WINDOW_SIZE (128ULL*1024ULL*1024ULL) + LIST_FIELDS(Match, matches); +}; struct sd_journal { Hashmap *files; -}; - -static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' }; - -#define ALIGN64(x) (((x) + 7ULL) & ~7ULL) - -void journal_file_close(JournalFile *f) { - assert(f); - - if (f->fd >= 0) - close_nointr_nofail(f->fd); - - if (f->header) - munmap(f->header, PAGE_ALIGN(sizeof(Header))); - - if (f->hash_table_window) - munmap(f->hash_table_window, f->hash_table_window_size); - - if (f->bisect_table_window) - munmap(f->bisect_table_window, f->bisect_table_window_size); - - if (f->window) - munmap(f->window, f->window_size); - - free(f->path); - free(f); -} - -static int journal_file_init_header(JournalFile *f) { - Header h; - ssize_t k; - int r; - - assert(f); - - zero(h); - memcpy(h.signature, signature, 8); - h.arena_offset = htole64(ALIGN64(sizeof(h))); - h.arena_max_size = htole64(DEFAULT_ARENA_MAX_SIZE); - h.arena_min_size = htole64(DEFAULT_ARENA_MIN_SIZE); - h.arena_keep_free = htole64(DEFAULT_ARENA_KEEP_FREE); - - r = sd_id128_randomize(&h.file_id); - if (r < 0) - return r; - - k = pwrite(f->fd, &h, sizeof(h), 0); - if (k < 0) - return -errno; - - if (k != sizeof(h)) - return -EIO; - - return 0; -} - -static int journal_file_refresh_header(JournalFile *f) { - int r; - - assert(f); - - r = sd_id128_get_machine(&f->header->machine_id); - if (r < 0) - return r; - - r = sd_id128_get_boot(&f->header->boot_id); - if (r < 0) - return r; - - f->header->state = htole32(STATE_ONLINE); - return 0; -} - -static int journal_file_verify_header(JournalFile *f) { - assert(f); - - if (memcmp(f->header, signature, 8)) - return -EBADMSG; - - if (f->header->incompatible_flags != 0) - return -EPROTONOSUPPORT; - - if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->arena_offset) + le64toh(f->header->arena_size))) - return -ENODATA; - - if (f->writable) { - uint32_t state; - sd_id128_t machine_id; - int r; - - r = sd_id128_get_machine(&machine_id); - if (r < 0) - return r; - - if (!sd_id128_equal(machine_id, f->header->machine_id)) - return -EHOSTDOWN; - - state = le32toh(f->header->state); - - if (state == STATE_ONLINE) - log_debug("Journal file %s is already online. Assuming unclean closing. Ignoring.", f->path); - else if (state == STATE_ARCHIVED) - return -ESHUTDOWN; - else if (state != STATE_OFFLINE) - log_debug("Journal file %s has unknown state %u. Ignoring.", f->path, state); - } - - return 0; -} - -static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) { - uint64_t asize; - uint64_t old_size, new_size; - - assert(f); - - if (offset < le64toh(f->header->arena_offset)) - return -EINVAL; - - new_size = PAGE_ALIGN(offset + size); - - /* We assume that this file is not sparse, and we know that - * for sure, since we alway call posix_fallocate() - * ourselves */ - - old_size = - le64toh(f->header->arena_offset) + - le64toh(f->header->arena_size); - - if (old_size >= new_size) - return 0; - - asize = new_size - le64toh(f->header->arena_offset); - if (asize > le64toh(f->header->arena_min_size)) { - struct statvfs svfs; - - if (fstatvfs(f->fd, &svfs) >= 0) { - uint64_t available; - - available = svfs.f_bfree * svfs.f_bsize; - - if (available >= f->header->arena_keep_free) - available -= f->header->arena_keep_free; - else - available = 0; - - if (new_size - old_size > available) - return -E2BIG; - } - } - - if (asize > le64toh(f->header->arena_max_size)) - return -E2BIG; - - if (posix_fallocate(f->fd, 0, new_size) < 0) - return -errno; - - if (fstat(f->fd, &f->last_stat) < 0) - return -errno; - - f->header->arena_size = htole64(asize); - - return 0; -} - -static int journal_file_map( - JournalFile *f, - uint64_t offset, - uint64_t size, - void **_window, - uint64_t *_woffset, - uint64_t *_wsize, - void **ret) { - - uint64_t woffset, wsize; - void *window; - - assert(f); - assert(size > 0); - assert(ret); - - woffset = offset & ~((uint64_t) page_size() - 1ULL); - wsize = size + (offset - woffset); - wsize = PAGE_ALIGN(wsize); - - window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset); - if (window == MAP_FAILED) - return -errno; - - if (_window) - *_window = window; - - if (_woffset) - *_woffset = woffset; - - if (_wsize) - *_wsize = wsize; - - *ret = (uint8_t*) window + (offset - woffset); - - return 0; -} - -static int journal_file_move_to(JournalFile *f, uint64_t offset, uint64_t size, void **ret) { - void *p; - uint64_t delta; - int r; - - assert(f); - assert(ret); - - if (_likely_(f->window && - f->window_offset <= offset && - f->window_offset+f->window_size >= offset + size)) { - - *ret = (uint8_t*) f->window + (offset - f->window_offset); - return 0; - } - - if (f->window) { - if (munmap(f->window, f->window_size) < 0) - return -errno; - - f->window = NULL; - f->window_size = f->window_offset = 0; - } - - if (size < DEFAULT_WINDOW_SIZE) { - /* If the default window size is larger then what was - * asked for extend the mapping a bit in the hope to - * minimize needed remappings later on. We add half - * the window space before and half behind the - * requested mapping */ - - delta = PAGE_ALIGN((DEFAULT_WINDOW_SIZE - size) / 2); - - if (offset < delta) - delta = offset; - - offset -= delta; - size += (DEFAULT_WINDOW_SIZE - delta); - } else - delta = 0; - - r = journal_file_map(f, - offset, size, - &f->window, &f->window_offset, &f->window_size, - & p); - - if (r < 0) - return r; - - *ret = (uint8_t*) p + delta; - return 0; -} - -static bool verify_hash(Object *o) { - uint64_t t; - - assert(o); - - t = le64toh(o->object.type); - if (t == OBJECT_DATA) { - uint64_t s, h1, h2; - - s = le64toh(o->object.size); - - h1 = le64toh(o->data.hash); - h2 = hash64(o->data.payload, s - offsetof(Object, data.payload)); - - return h1 == h2; - } - - return true; -} - -int journal_file_move_to_object(JournalFile *f, uint64_t offset, Object **ret) { - int r; - void *t; - Object *o; - uint64_t s; - - assert(f); - assert(ret); + JournalFile *current_file; - r = journal_file_move_to(f, offset, sizeof(ObjectHeader), &t); - if (r < 0) - return r; + LIST_HEAD(Match, matches); +}; - o = (Object*) t; - s = le64toh(o->object.size); +int sd_journal_add_match(sd_journal *j, const char *field, const void *data, size_t size) { + Match *m; + char *e; - if (s < sizeof(ObjectHeader)) - return -EBADMSG; + assert(j); + assert(field); + assert(data || size == 0); - if (s > sizeof(ObjectHeader)) { - r = journal_file_move_to(f, offset, s, &t); - if (r < 0) - return r; + m = new0(Match, 1); + if (!m) + return -ENOMEM; - o = (Object*) t; + m->size = strlen(field) + 1 + size; + m->data = malloc(m->size); + if (!m->data) { + free(m); + return -ENOMEM; } - if (!verify_hash(o)) - return -EBADMSG; + e = stpcpy(m->data, field); + *(e++) = '='; + memcpy(e, data, size); - *ret = o; + LIST_PREPEND(Match, matches, j->matches, m); return 0; } -static uint64_t journal_file_seqnum(JournalFile *f) { - uint64_t r; - - assert(f); - - r = le64toh(f->header->seqnum) + 1; - f->header->seqnum = htole64(r); - - return r; -} - -static int journal_file_append_object(JournalFile *f, uint64_t size, Object **ret, uint64_t *offset) { - int r; - uint64_t p; - Object *tail, *o; - void *t; - - assert(f); - assert(size >= sizeof(ObjectHeader)); - assert(offset); - assert(ret); - - p = le64toh(f->header->tail_object_offset); +void sd_journal_flush_matches(sd_journal *j) { + assert(j); - if (p == 0) - p = le64toh(f->header->arena_offset); - else { - r = journal_file_move_to_object(f, p, &tail); - if (r < 0) - return r; + while (j->matches) { + Match *m = j->matches; - p += ALIGN64(le64toh(tail->object.size)); + LIST_REMOVE(Match, matches, j->matches, m); + free(m->data); + free(m); } - - r = journal_file_allocate(f, p, size); - if (r < 0) - return r; - - r = journal_file_move_to(f, p, size, &t); - if (r < 0) - return r; - - o = (Object*) t; - - zero(o->object); - o->object.type = htole64(OBJECT_UNUSED); - zero(o->object.reserved); - o->object.size = htole64(size); - - f->header->tail_object_offset = htole64(p); - if (f->header->head_object_offset == 0) - f->header->head_object_offset = htole64(p); - - f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1); - - *ret = o; - *offset = p; - - return 0; -} - -static int journal_file_setup_hash_table(JournalFile *f) { - uint64_t s, p; - Object *o; - int r; - - assert(f); - - s = DEFAULT_HASH_TABLE_SIZE; - r = journal_file_append_object(f, offsetof(Object, hash_table.table) + s, &o, &p); - if (r < 0) - return r; - - o->object.type = htole64(OBJECT_HASH_TABLE); - memset(o->hash_table.table, 0, s); - - f->header->hash_table_offset = htole64(p + offsetof(Object, hash_table.table)); - f->header->hash_table_size = htole64(s); - - return 0; -} - -static int journal_file_setup_bisect_table(JournalFile *f) { - uint64_t s, p; - Object *o; - int r; - - assert(f); - - s = DEFAULT_BISECT_TABLE_SIZE; - r = journal_file_append_object(f, offsetof(Object, bisect_table.table) + s, &o, &p); - if (r < 0) - return r; - - o->object.type = htole64(OBJECT_BISECT_TABLE); - memset(o->bisect_table.table, 0, s); - - f->header->bisect_table_offset = htole64(p + offsetof(Object, bisect_table.table)); - f->header->bisect_table_size = htole64(s); - - return 0; -} - -static int journal_file_map_hash_table(JournalFile *f) { - uint64_t s, p; - void *t; - int r; - - assert(f); - - p = le64toh(f->header->hash_table_offset); - s = le64toh(f->header->hash_table_size); - - r = journal_file_map(f, - p, s, - &f->hash_table_window, NULL, &f->hash_table_window_size, - &t); - if (r < 0) - return r; - - f->hash_table = t; - return 0; } -static int journal_file_map_bisect_table(JournalFile *f) { - uint64_t s, p; - void *t; - int r; - - assert(f); - - p = le64toh(f->header->bisect_table_offset); - s = le64toh(f->header->bisect_table_size); +static int compare_order(JournalFile *af, Object *ao, uint64_t ap, + JournalFile *bf, Object *bo, uint64_t bp) { - r = journal_file_map(f, - p, s, - &f->bisect_table_window, NULL, &f->bisect_table_window_size, - &t); - - if (r < 0) - return r; + uint64_t a, b; - f->bisect_table = t; - return 0; -} + if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) { -static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash_index) { - uint64_t p; - int r; + /* If this is from the same seqnum source, compare + * seqnums */ + a = le64toh(ao->entry.seqnum); + b = le64toh(bo->entry.seqnum); - assert(f); - assert(o); - assert(offset > 0); - assert(o->object.type == htole64(OBJECT_DATA)); - o->data.head_entry_offset = o->data.tail_entry_offset = 0; - o->data.next_hash_offset = 0; + } else if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id)) { - p = le64toh(f->hash_table[hash_index].tail_hash_offset); - if (p == 0) { - /* Only entry in the hash table is easy */ + /* If the boot id matches compare monotonic time */ + a = le64toh(ao->entry.monotonic); + b = le64toh(bo->entry.monotonic); - o->data.prev_hash_offset = 0; - f->hash_table[hash_index].head_hash_offset = htole64(offset); } else { - o->data.prev_hash_offset = htole64(p); - /* Temporarily move back to the previous data object, - * to patch in pointer */ - - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; - - o->data.next_hash_offset = offset; - - r = journal_file_move_to_object(f, offset, &o); - if (r < 0) - return r; + /* Otherwise compare UTC time */ + a = le64toh(ao->entry.realtime); + b = le64toh(ao->entry.realtime); } - f->hash_table[hash_index].tail_hash_offset = htole64(offset); - - return 0; + return + a < b ? -1 : + a > b ? +1 : 0; } -static int journal_file_append_data(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { - uint64_t hash, h, p, np; - uint64_t osize; - Object *o; +int sd_journal_next(sd_journal *j) { + JournalFile *f, *new_current = NULL; + Iterator i; int r; + uint64_t new_offset = 0; + Object *new_entry = NULL; - assert(f); - assert(data || size == 0); - - osize = offsetof(Object, data.payload) + size; + assert(j); - hash = hash64(data, size); - h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); - p = le64toh(f->hash_table[h].head_hash_offset); + HASHMAP_FOREACH(f, j->files, i) { + Object *o; + uint64_t p; - while (p != 0) { - /* Look for this data object in the hash table */ + if (f->current_offset > 0) { + r = journal_file_move_to_object(f, f->current_offset, OBJECT_ENTRY, &o); + if (r < 0) + return r; + } else + o = NULL; - r = journal_file_move_to_object(f, p, &o); + r = journal_file_next_entry(f, o, &o, &p); if (r < 0) return r; + else if (r == 0) + continue; - if (le64toh(o->object.type) != OBJECT_DATA) - return -EBADMSG; - - if (le64toh(o->object.size) == osize && - memcmp(o->data.payload, data, size) == 0) { - - if (le64toh(o->data.hash) != hash) - return -EBADMSG; - - if (ret) - *ret = o; - - if (offset) - *offset = p; - - return 0; + if (!new_current || compare_order(new_current, new_entry, new_offset, f, o, p) > 0) { + new_current = f; + new_entry = o; + new_offset = p; } - - p = le64toh(o->data.next_hash_offset); } - r = journal_file_append_object(f, osize, &o, &np); - if (r < 0) - return r; - - o->object.type = htole64(OBJECT_DATA); - o->data.hash = htole64(hash); - memcpy(o->data.payload, data, size); - - r = journal_file_link_data(f, o, np, h); - if (r < 0) - return r; - - if (ret) - *ret = o; - - if (offset) - *offset = np; - - return 0; -} - -uint64_t journal_file_entry_n_items(Object *o) { - assert(o); - assert(o->object.type == htole64(OBJECT_ENTRY)); - - return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem); -} - -static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) { - uint64_t p, q; - int r; - assert(f); - assert(o); - assert(offset > 0); - - p = le64toh(o->entry.items[i].object_offset); - if (p == 0) - return -EINVAL; - - o->entry.items[i].next_entry_offset = 0; - - /* Move to the data object */ - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; - - if (o->object.type != htole64(OBJECT_DATA)) - return -EBADMSG; - - q = le64toh(o->data.tail_entry_offset); - o->data.tail_entry_offset = htole64(offset); - - if (q == 0) - o->data.head_entry_offset = htole64(offset); - else { - uint64_t n, j; - - /* Move to previous entry */ - r = journal_file_move_to_object(f, q, &o); - if (r < 0) - return r; - - if (o->object.type != htole64(OBJECT_ENTRY)) - return -EBADMSG; - - n = journal_file_entry_n_items(o); - for (j = 0; j < n; j++) - if (le64toh(o->entry.items[j].object_offset) == p) - break; - - if (j >= n) - return -EBADMSG; - - o->entry.items[j].next_entry_offset = offset; - } - - /* Move back to original entry */ - r = journal_file_move_to_object(f, offset, &o); - if (r < 0) - return r; - - o->entry.items[i].prev_entry_offset = q; - return 0; -} - -static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) { - uint64_t p, i, n, k, a, b; - int r; - - assert(f); - assert(o); - assert(offset > 0); - assert(o->object.type == htole64(OBJECT_ENTRY)); - - /* Link up the entry itself */ - p = le64toh(f->header->tail_entry_offset); - - o->entry.prev_entry_offset = f->header->tail_entry_offset; - o->entry.next_entry_offset = 0; - - if (p == 0) - f->header->head_entry_offset = htole64(offset); - else { - /* Temporarily move back to the previous entry, to - * patch in pointer */ - - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; - - o->entry.next_entry_offset = htole64(offset); - - r = journal_file_move_to_object(f, offset, &o); - if (r < 0) - return r; - } - - f->header->tail_entry_offset = htole64(offset); - - /* Link up the items */ - n = journal_file_entry_n_items(o); - for (i = 0; i < n; i++) { - r = journal_file_link_entry_item(f, o, offset, i); - if (r < 0) - return r; + if (new_current) { + j->current_file = new_current; + f->current_offset = new_offset; + return 1; } - /* Link up the entry in the bisect table */ - n = le64toh(f->header->bisect_table_size) / sizeof(uint64_t); - k = le64toh(f->header->arena_max_size) / n; - - a = (le64toh(f->header->last_bisect_offset) + k - 1) / k; - b = offset / k; - - for (; a <= b; a++) - f->bisect_table[a] = htole64(offset); - - f->header->last_bisect_offset = htole64(offset + le64toh(o->object.size)); - return 0; } -static int journal_file_append_entry_internal( - JournalFile *f, - const dual_timestamp *ts, - uint64_t xor_hash, - const EntryItem items[], unsigned n_items, - Object **ret, uint64_t *offset) { - uint64_t np; - uint64_t osize; - Object *o; +int sd_journal_previous(sd_journal *j) { + JournalFile *f, *new_current = NULL; + Iterator i; int r; + uint64_t new_offset = 0; + Object *new_entry = NULL; - assert(f); - assert(items || n_items == 0); - - osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem)); - - r = journal_file_append_object(f, osize, &o, &np); - if (r < 0) - return r; - - o->object.type = htole64(OBJECT_ENTRY); - o->entry.seqnum = htole64(journal_file_seqnum(f)); - memcpy(o->entry.items, items, n_items * sizeof(EntryItem)); - o->entry.realtime = ts ? htole64(ts->realtime) : 0; - o->entry.monotonic = ts ? htole64(ts->monotonic) : 0; - o->entry.xor_hash = htole64(xor_hash); - - r = journal_file_link_entry(f, o, np); - if (r < 0) - return r; - - if (ret) - *ret = o; - - if (offset) - *offset = np; - - return 0; -} - -int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, Object **ret, uint64_t *offset) { - unsigned i; - EntryItem *items; - int r; - uint64_t xor_hash = 0; - - assert(f); - assert(iovec || n_iovec == 0); - - items = new(EntryItem, n_iovec); - if (!items) - return -ENOMEM; + assert(j); - for (i = 0; i < n_iovec; i++) { - uint64_t p; + HASHMAP_FOREACH(f, j->files, i) { Object *o; + uint64_t p; - r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p); - if (r < 0) - goto finish; - - xor_hash ^= le64toh(o->data.hash); - items[i].object_offset = htole64(p); - } - - r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, ret, offset); - -finish: - free(items); - - return r; -} - -int journal_file_move_to_entry(JournalFile *f, uint64_t seqnum, Object **ret, uint64_t *offset) { - Object *o; - uint64_t lower, upper, p, n, k; - int r; - - assert(f); - - n = le64toh(f->header->bisect_table_size) / sizeof(uint64_t); - k = le64toh(f->header->arena_max_size) / n; - - lower = 0; - upper = le64toh(f->header->last_bisect_offset)/k+1; - - while (lower < upper) { - k = (upper + lower) / 2; - p = le64toh(f->bisect_table[k]); - - if (p == 0) { - upper = k; - continue; - } + if (f->current_offset > 0) { + r = journal_file_move_to_object(f, f->current_offset, OBJECT_ENTRY, &o); + if (r < 0) + return r; + } else + o = NULL; - r = journal_file_move_to_object(f, p, &o); + r = journal_file_prev_entry(f, o, &o, &p); if (r < 0) return r; + else if (r == 0) + continue; - if (o->object.type != htole64(OBJECT_ENTRY)) - return -EBADMSG; - - if (o->entry.seqnum == seqnum) { - if (ret) - *ret = o; - - if (offset) - *offset = p; - - return 1; - } else if (seqnum < o->entry.seqnum) - upper = k; - else if (seqnum > o->entry.seqnum) - lower = k+1; + if (!new_current || compare_order(new_current, new_entry, new_offset, f, o, p) > 0) { + new_current = f; + new_entry = o; + new_offset = p; + } } - assert(lower == upper); - - if (lower <= 0) - return 0; - - /* The object we are looking for is between - * bisect_table[lower-1] and bisect_table[lower] */ - - p = le64toh(f->bisect_table[lower-1]); - - for (;;) { - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; - - if (o->entry.seqnum == seqnum) { - if (ret) - *ret = o; - - if (offset) - *offset = p; - - return 1; - - } if (seqnum < o->entry.seqnum) - return 0; - - if (o->entry.next_entry_offset == 0) - return 0; - - p = le64toh(o->entry.next_entry_offset); + if (new_current) { + j->current_file = new_current; + f->current_offset = new_offset; + return 1; } return 0; } -int journal_file_next_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset) { - uint64_t np; - int r; - - assert(f); - - if (!o) - np = le64toh(f->header->head_entry_offset); - else { - if (le64toh(o->object.type) != OBJECT_ENTRY) - return -EINVAL; - - np = le64toh(o->entry.next_entry_offset); - } - - if (np == 0) - return 0; - - r = journal_file_move_to_object(f, np, &o); - if (r < 0) - return r; - - if (le64toh(o->object.type) != OBJECT_ENTRY) - return -EBADMSG; - - if (ret) - *ret = o; - - if (offset) - *offset = np; - - return 1; -} - -int journal_file_prev_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset) { - uint64_t np; +int sd_journal_get_cursor(sd_journal *j, void **cursor, size_t *size) { + JournalCursor *c; + Object *o; int r; - assert(f); - - if (!o) - np = le64toh(f->header->tail_entry_offset); - else { - if (le64toh(o->object.type) != OBJECT_ENTRY) - return -EINVAL; - - np = le64toh(o->entry.prev_entry_offset); - } + assert(j); + assert(cursor); + assert(size); - if (np == 0) + if (!j->current_file || !j->current_file->current_offset <= 0) return 0; - r = journal_file_move_to_object(f, np, &o); + r = journal_file_move_to_object(j->current_file, j->current_file->current_offset, OBJECT_ENTRY, &o); if (r < 0) return r; - if (le64toh(o->object.type) != OBJECT_ENTRY) - return -EBADMSG; + c = new0(JournalCursor, 1); + if (!c) + return -ENOMEM; - if (ret) - *ret = o; + c->version = 1; + c->seqnum = o->entry.seqnum; + c->seqnum_id = j->current_file->header->seqnum_id; + c->boot_id = o->entry.boot_id; + c->monotonic = o->entry.monotonic; + c->realtime = o->entry.realtime; + c->xor_hash = o->entry.xor_hash; - if (offset) - *offset = np; + *cursor = c; + *size = sizeof(JournalCursor); return 1; } -int journal_file_find_first_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { - uint64_t p, osize, hash, h; - int r; - - assert(f); - assert(data || size == 0); - - osize = offsetof(Object, data.payload) + size; - - hash = hash64(data, size); - h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); - p = le64toh(f->hash_table[h].head_hash_offset); - - while (p != 0) { - Object *o; - - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; - - if (le64toh(o->object.type) != OBJECT_DATA) - return -EBADMSG; - - if (le64toh(o->object.size) == osize && - memcmp(o->data.payload, data, size) == 0) { - - if (le64toh(o->data.hash) != hash) - return -EBADMSG; - - if (o->data.head_entry_offset == 0) - return 0; - - p = le64toh(o->data.head_entry_offset); - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; - - if (le64toh(o->object.type) != OBJECT_ENTRY) - return -EBADMSG; - - if (ret) - *ret = o; - - if (offset) - *offset = p; - - return 1; - } - - p = le64toh(o->data.next_hash_offset); - } - - return 0; -} - -int journal_file_find_last_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { - uint64_t p, osize, hash, h; - int r; - - assert(f); - assert(data || size == 0); - - osize = offsetof(Object, data.payload) + size; - - hash = hash64(data, size); - h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); - p = le64toh(f->hash_table[h].tail_hash_offset); - - while (p != 0) { - Object *o; - - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; - - if (le64toh(o->object.type) != OBJECT_DATA) - return -EBADMSG; - - if (le64toh(o->object.size) == osize && - memcmp(o->data.payload, data, size) == 0) { - - if (le64toh(o->data.hash) != hash) - return -EBADMSG; - - if (o->data.tail_entry_offset == 0) - return 0; - - p = le64toh(o->data.tail_entry_offset); - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - return r; - - if (le64toh(o->object.type) != OBJECT_ENTRY) - return -EBADMSG; - - if (ret) - *ret = o; - - if (offset) - *offset = p; - - return 1; - } - - p = le64toh(o->data.prev_hash_offset); - } - - return 0; -} - -void journal_file_dump(JournalFile *f) { - char a[33], b[33], c[33]; - Object *o; - int r; - uint64_t p; - - assert(f); - - printf("File ID: %s\n" - "Machine ID: %s\n" - "Boot ID: %s\n" - "Arena size: %llu\n", - sd_id128_to_string(f->header->file_id, a), - sd_id128_to_string(f->header->machine_id, b), - sd_id128_to_string(f->header->boot_id, c), - (unsigned long long) le64toh(f->header->arena_size)); - - p = le64toh(f->header->head_object_offset); - while (p != 0) { - r = journal_file_move_to_object(f, p, &o); - if (r < 0) - goto fail; - - switch (o->object.type) { - - case OBJECT_UNUSED: - printf("Type: OBJECT_UNUSED\n"); - break; - - case OBJECT_DATA: - printf("Type: OBJECT_DATA\n"); - break; - - case OBJECT_ENTRY: - printf("Type: OBJECT_ENTRY %llu\n", (unsigned long long) le64toh(o->entry.seqnum)); - break; - - case OBJECT_HASH_TABLE: - printf("Type: OBJECT_HASH_TABLE\n"); - break; - - case OBJECT_BISECT_TABLE: - printf("Type: OBJECT_BISECT_TABLE\n"); - break; - } - - if (p == le64toh(f->header->tail_object_offset)) - p = 0; - else - p = p + ALIGN64(le64toh(o->object.size)); - } - - return; -fail: - log_error("File corrupt"); -} - -int journal_file_open( - const char *fname, - int flags, - mode_t mode, - JournalFile **ret) { - - JournalFile *f; - int r; - bool newly_created = false; - - assert(fname); - - if ((flags & O_ACCMODE) != O_RDONLY && - (flags & O_ACCMODE) != O_RDWR) - return -EINVAL; - - f = new0(JournalFile, 1); - if (!f) - return -ENOMEM; - - f->writable = (flags & O_ACCMODE) != O_RDONLY; - f->prot = prot_from_flags(flags); - - f->fd = open(fname, flags|O_CLOEXEC, mode); - if (f->fd < 0) { - r = -errno; - goto fail; - } - - f->path = strdup(fname); - if (!f->path) { - r = -ENOMEM; - goto fail; - } - - if (fstat(f->fd, &f->last_stat) < 0) { - r = -errno; - goto fail; - } - - if (f->last_stat.st_size == 0 && f->writable) { - newly_created = true; - - r = journal_file_init_header(f); - if (r < 0) - goto fail; - - if (fstat(f->fd, &f->last_stat) < 0) { - r = -errno; - goto fail; - } - } - - if (f->last_stat.st_size < (off_t) sizeof(Header)) { - r = -EIO; - goto fail; - } - - f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0); - if (f->header == MAP_FAILED) { - f->header = NULL; - r = -errno; - goto fail; - } - - if (!newly_created) { - r = journal_file_verify_header(f); - if (r < 0) - goto fail; - } - - if (f->writable) { - r = journal_file_refresh_header(f); - if (r < 0) - goto fail; - } - - if (newly_created) { - - r = journal_file_setup_hash_table(f); - if (r < 0) - goto fail; - - r = journal_file_setup_bisect_table(f); - if (r < 0) - goto fail; - } - - r = journal_file_map_hash_table(f); - if (r < 0) - goto fail; - - r = journal_file_map_bisect_table(f); - if (r < 0) - goto fail; - - if (ret) - *ret = f; - - return 0; - -fail: - journal_file_close(f); - - return r; +int sd_journal_set_cursor(sd_journal *j, const void *cursor, size_t size) { + return -EINVAL; } int sd_journal_open(sd_journal **ret) { diff --git a/src/journal/sd-journal.h b/src/journal/sd-journal.h index 8170dea87c..55e58601fe 100644 --- a/src/journal/sd-journal.h +++ b/src/journal/sd-journal.h @@ -25,13 +25,12 @@ #include #include -#include "sd-id128.h" - /* TODO: * * - implement rotation * - check LE/BE conversion for 8bit, 16bit, 32bit values * - implement parallel traversal + * - implement inotify usage on client * - implement audit gateway * - implement native gateway * - extend hash table/bisect table as we go @@ -45,12 +44,13 @@ void sd_journal_close(sd_journal *j); int sd_journal_previous(sd_journal *j); int sd_journal_next(sd_journal *j); -void* sd_journal_get(sd_journal *j, const char *field, size_t *size); -uint64_t sd_journal_get_seqnum(sd_journal *j); -uint64_t sd_journal_get_realtime_usec(sd_journal *j); -uint64_t sd_journal_get_monotonic_usec(sd_journal *j); +int sd_journal_get(sd_journal *j, const char *field, const void **data, size_t *size); +int sd_journal_get_seqnum(sd_journal *j, uint64_t *ret); +int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret); +int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret); -int sd_journal_add_match(sd_journal *j, const char *item, size_t *size); +int sd_journal_add_match(sd_journal *j, const char *field, const void *data, size_t size); +void sd_journal_flush_matches(sd_journal *j); int sd_journal_seek_head(sd_journal *j); int sd_journal_seek_tail(sd_journal *j); @@ -59,16 +59,9 @@ int sd_journal_seek_seqnum(sd_journal *j, uint64_t seqnum); int sd_journal_seek_monotonic_usec(sd_journal *j, uint64_t usec); int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec); -uint64_t sd_journal_get_max_size(sd_journal *j); -uint64_t sd_journal_get_min_size(sd_journal *j); -uint64_t sd_journal_get_keep_free(sd_journal *j); - -int sd_journal_set_max_size(sd_journal *j, uint64_t size); -int sd_journal_set_min_size(sd_journal *j, uint64_t size); -int sd_journal_set_keep_free(sd_journal *j, uint64_t size); +int sd_journal_get_cursor(sd_journal *j, void **cursor, size_t *size); +int sd_journal_set_cursor(sd_journal *j, const void *cursor, size_t size); -sd_id128_t sd_journal_get_file_id(sd_journal *j); -sd_id128_t sd_journal_get_machine_id(sd_journal *j); -sd_id128_t sd_journal_get_boot_id(sd_journal *j); +int sd_journal_get_fd(sd_journal *j); #endif diff --git a/src/journal/test-journal.c b/src/journal/test-journal.c index e0aedc7b83..7028f11f7c 100644 --- a/src/journal/test-journal.c +++ b/src/journal/test-journal.c @@ -21,7 +21,7 @@ #include -#include "journal-private.h" +#include "journal-file.h" #include "log.h" int main(int argc, char *argv[]) { -- cgit v1.2.3-54-g00ecf From 38ac38b298a91c358285b7330aa66679338af874 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 12 Oct 2011 04:41:27 +0200 Subject: journal: only fallocate() what we really need to avoid slowness on file systems which do not support fallocate natively --- src/journal/journal-file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 37e2e37eb1..45cc0d1d8e 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -162,7 +162,7 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) new_size = PAGE_ALIGN(offset + size); /* We assume that this file is not sparse, and we know that - * for sure, since we alway call posix_fallocate() + * for sure, since we always call posix_fallocate() * ourselves */ old_size = @@ -195,7 +195,7 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) if (asize > le64toh(f->header->arena_max_size)) return -E2BIG; - if (posix_fallocate(f->fd, 0, new_size) < 0) + if (posix_fallocate(f->fd, old_size, new_size - old_size) < 0) return -errno; if (fstat(f->fd, &f->last_stat) < 0) -- cgit v1.2.3-54-g00ecf From 7c99e0c1fc05c0b9cd82dc529b52b3d14d511baa Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 12 Oct 2011 04:42:38 +0200 Subject: util: fix build --- src/util.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/util.c b/src/util.c index 26c2f22ff0..e5b5e53f7e 100644 --- a/src/util.c +++ b/src/util.c @@ -5902,6 +5902,7 @@ int prot_from_flags(int flags) { default: return -EINVAL; } +} unsigned long cap_last_cap(void) { static __thread unsigned long saved; -- cgit v1.2.3-54-g00ecf From ed49ef3f349bcd4f0483ba8254a2537fe8e9cd17 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 12 Oct 2011 05:06:09 +0200 Subject: journal: place machin journals in machine specific subdirectories --- src/journal/journald.c | 75 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 61 insertions(+), 14 deletions(-) diff --git a/src/journal/journald.c b/src/journal/journald.c index d65451df58..d6db9b1c55 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -211,6 +211,9 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str IOVEC_SET_STRING(iovec[n++], source_time); } + /* Note that strictly speaking storing the boot id here is + * redundant since the entry includes this in-line + * anyway. However, we need this indexed, too. */ r = sd_id128_get_boot(&id); if (r >= 0) if (asprintf(&boot_id, "BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0) @@ -349,6 +352,62 @@ static int process_event(Server *s, struct epoll_event *ev) { return 0; } +static int system_journal_open(Server *s) { + int r; + char *fn; + sd_id128_t machine; + char ids[33]; + + r = sd_id128_get_machine(&machine); + if (r < 0) + return r; + + /* First try to create the machine path, but not the prefix */ + fn = join("/var/log/journal/", sd_id128_to_string(machine, ids), NULL); + if (!fn) + return -ENOMEM; + (void) mkdir(fn, 0755); + free(fn); + + /* The create the system journal file */ + fn = join("/var/log/journal/", ids, "/system.journal", NULL); + if (!fn) + return -ENOMEM; + + r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, &s->system_journal); + free(fn); + + if (r >= 0) + fix_perms(s->system_journal, 0); + else if (r == -ENOENT) { + + /* /var didn't work, so try /run, but this time we + * create the prefix too */ + fn = join("/run/log/journal/", ids, NULL); + if (!fn) + return -ENOMEM; + (void) mkdir_p(fn, 0755); + free(fn); + + /* Then create the runtime journal file */ + fn = join("/run/log/journal/", ids, "/system.journal", NULL); + if (!fn) + return -ENOMEM; + r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, &s->runtime_journal); + free(fn); + + if (r >= 0) + fix_perms(s->runtime_journal, 0); + } + + if (r < 0 && r != -ENOENT) { + log_error("Failed to open journal: %s", strerror(-r)); + return r; + } + + return 0; +} + static int server_init(Server *s) { int n, one, r; struct epoll_event ev; @@ -430,21 +489,9 @@ static int server_init(Server *s) { return -ENOMEM; } - r = journal_file_open("/var/log/journal/system.journal", O_RDWR|O_CREAT, 0640, &s->system_journal); - if (r >= 0) - fix_perms(s->system_journal, 0); - else if (r == -ENOENT) { - mkdir_p("/run/log/journal", 0755); - - r = journal_file_open("/run/log/journal/system.journal", O_RDWR|O_CREAT, 0640, &s->runtime_journal); - if (r >= 0) - fix_perms(s->runtime_journal, 0); - } - - if (r < 0 && r != -ENOENT) { - log_error("Failed to open journal: %s", strerror(-r)); + r = system_journal_open(s); + if (r < 0) return r; - } assert_se(sigemptyset(&mask) == 0); sigset_add_many(&mask, SIGINT, SIGTERM, -1); -- cgit v1.2.3-54-g00ecf From 69e5d42db09dfb638bc74055c33bb2645f81563d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 12 Oct 2011 05:28:39 +0200 Subject: journal: add cgroup path to entries --- Makefile.am | 3 ++- src/journal/journald.c | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/Makefile.am b/Makefile.am index b26c613d4f..e08785a8a3 100644 --- a/Makefile.am +++ b/Makefile.am @@ -983,7 +983,8 @@ systemd_journald_SOURCES = \ src/journal/journal-file.c \ src/journal/lookup3.c \ src/sd-id128.c \ - src/acl-util.c + src/acl-util.c \ + src/cgroup-util.c systemd_journald_CFLAGS = \ $(AM_CFLAGS) \ diff --git a/src/journal/journald.c b/src/journal/journald.c index d6db9b1c55..b8a9fc3adf 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -33,6 +33,7 @@ #include "sd-daemon.h" #include "socket-util.h" #include "acl-util.h" +#include "cgroup-util.h" typedef struct Server { int syslog_fd; @@ -134,8 +135,8 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str *comm = NULL, *cmdline = NULL, *hostname = NULL, *audit_session = NULL, *audit_loginuid = NULL, *syslog_priority = NULL, *syslog_facility = NULL, - *exe = NULL; - struct iovec iovec[15]; + *exe = NULL, *cgroup = NULL; + struct iovec iovec[16]; unsigned n = 0; char idbuf[33]; sd_id128_t id; @@ -160,6 +161,7 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str if (ucred) { uint32_t session; + char *path; if (asprintf(&pid, "PID=%lu", (unsigned long) ucred->pid) >= 0) IOVEC_SET_STRING(iovec[n++], pid); @@ -203,6 +205,14 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str if (r >= 0) if (asprintf(&audit_loginuid, "AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0) IOVEC_SET_STRING(iovec[n++], audit_loginuid); + + r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, ucred->pid, &path); + if (r >= 0) { + cgroup = strappend("SYSTEMD_CGROUP=", path); + if (cgroup) + IOVEC_SET_STRING(iovec[n++], cgroup); + free(path); + } } if (tv) { @@ -247,6 +257,7 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str free(uid); free(gid); free(comm); + free(exe); free(cmdline); free(source_time); free(boot_id); @@ -256,6 +267,7 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str free(audit_loginuid); free(syslog_facility); free(syslog_priority); + free(cgroup); } static int process_event(Server *s, struct epoll_event *ev) { -- cgit v1.2.3-54-g00ecf From 250d54b5bee6a46fe1c1626211e3a7e238eda628 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 12 Oct 2011 05:29:08 +0200 Subject: journalctl: find journal files in right path --- src/journal/journalctl.c | 47 ++++++++++++++++++++++++++++++++++++++++++---- src/journal/journald.c | 49 +++++++++++++++++++++++++----------------------- 2 files changed, 69 insertions(+), 27 deletions(-) diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index 5f17f45cac..ac376eaf4f 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -25,6 +25,48 @@ #include "journal-file.h" +static int system_journal_open(JournalFile **f) { + int r; + char *fn; + sd_id128_t machine; + char ids[33]; + + assert(f); + + r = sd_id128_get_machine(&machine); + if (r < 0) + return r; + + fn = join("/var/log/journal/", sd_id128_to_string(machine, ids), "/system.journal", NULL); + if (!fn) + return -ENOMEM; + + r = journal_file_open(fn, O_RDONLY, 0640, f); + free(fn); + + if (r >= 0) + return r; + + if (r < 0 && r != -ENOENT) { + log_error("Failed to open system journal: %s", strerror(-r)); + return r; + } + + fn = join("/run/log/journal/", ids, "/system.journal", NULL); + if (!fn) + return -ENOMEM; + + r = journal_file_open(fn, O_RDONLY, 0640, f); + free(fn); + + if (r < 0) { + log_error("Failed to open system journal: %s", strerror(-r)); + return r; + } + + return r; +} + int main(int argc, char *argv[]) { int r; JournalFile *f; @@ -33,10 +75,7 @@ int main(int argc, char *argv[]) { log_parse_environment(); log_open(); - r = journal_file_open("/var/log/journal/system.journal", O_RDONLY, 0644, &f); - if (r == -ENOENT) - r = journal_file_open("/run/log/journal/system.journal", O_RDONLY, 0644, &f); - + r = system_journal_open(&f); if (r < 0) { log_error("Failed to open journal: %s", strerror(-r)); return EXIT_FAILURE; diff --git a/src/journal/journald.c b/src/journal/journald.c index b8a9fc3adf..94261f6763 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -375,7 +375,7 @@ static int system_journal_open(Server *s) { return r; /* First try to create the machine path, but not the prefix */ - fn = join("/var/log/journal/", sd_id128_to_string(machine, ids), NULL); + fn = strappend("/var/log/journal/", sd_id128_to_string(machine, ids)); if (!fn) return -ENOMEM; (void) mkdir(fn, 0755); @@ -389,35 +389,38 @@ static int system_journal_open(Server *s) { r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, &s->system_journal); free(fn); - if (r >= 0) + if (r >= 0) { fix_perms(s->system_journal, 0); - else if (r == -ENOENT) { - - /* /var didn't work, so try /run, but this time we - * create the prefix too */ - fn = join("/run/log/journal/", ids, NULL); - if (!fn) - return -ENOMEM; - (void) mkdir_p(fn, 0755); - free(fn); - - /* Then create the runtime journal file */ - fn = join("/run/log/journal/", ids, "/system.journal", NULL); - if (!fn) - return -ENOMEM; - r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, &s->runtime_journal); - free(fn); - - if (r >= 0) - fix_perms(s->runtime_journal, 0); + return r; } if (r < 0 && r != -ENOENT) { - log_error("Failed to open journal: %s", strerror(-r)); + log_error("Failed to open system journal: %s", strerror(-r)); return r; } - return 0; + /* /var didn't work, so try /run, but this time we + * create the prefix too */ + fn = strappend("/run/log/journal/", ids); + if (!fn) + return -ENOMEM; + (void) mkdir_p(fn, 0755); + free(fn); + + /* Then create the runtime journal file */ + fn = join("/run/log/journal/", ids, "/system.journal", NULL); + if (!fn) + return -ENOMEM; + r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, &s->runtime_journal); + free(fn); + + if (r < 0) { + log_error("Failed to open runtime journal: %s", strerror(-r)); + return r; + } + + fix_perms(s->runtime_journal, 0); + return r; } static int server_init(Server *s) { -- cgit v1.2.3-54-g00ecf From 0ac38b707212e9aa40e25d65ffbae648cc9116f5 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 13 Oct 2011 05:19:35 +0200 Subject: journal: implementation rotation --- src/journal/journal-def.h | 2 + src/journal/journal-file.c | 247 ++++++++++++++++++++++++++++++++++++++++++--- src/journal/journal-file.h | 9 +- src/journal/journalctl.c | 4 +- src/journal/journald.c | 10 +- src/journal/sd-journal.c | 2 +- src/journal/test-journal.c | 10 +- 7 files changed, 260 insertions(+), 24 deletions(-) diff --git a/src/journal/journal-def.h b/src/journal/journal-def.h index b3fa1e524f..d44b070fd0 100644 --- a/src/journal/journal-def.h +++ b/src/journal/journal-def.h @@ -136,6 +136,8 @@ _packed_ struct Header { uint64_t last_bisect_offset; uint64_t n_objects; uint64_t seqnum; + uint64_t head_entry_realtime; + uint64_t tail_entry_realtime; }; #endif diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 45cc0d1d8e..934c043aff 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -35,6 +35,8 @@ #define DEFAULT_ARENA_MIN_SIZE (256ULL*1024ULL) #define DEFAULT_ARENA_KEEP_FREE (1ULL*1024ULL*1024ULL) +#define DEFAULT_MAX_USE (16ULL*1024ULL*1024ULL*16ULL) + #define DEFAULT_HASH_TABLE_SIZE (2047ULL*16ULL) #define DEFAULT_BISECT_TABLE_SIZE ((DEFAULT_ARENA_MAX_SIZE/(64ULL*1024ULL))*8ULL) @@ -47,11 +49,12 @@ static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' }; void journal_file_close(JournalFile *f) { assert(f); - if (f->fd >= 0) - close_nointr_nofail(f->fd); + if (f->header) { + if (f->writable && f->header->state == htole32(STATE_ONLINE)) + f->header->state = htole32(STATE_OFFLINE); - if (f->header) munmap(f->header, PAGE_ALIGN(sizeof(Header))); + } if (f->hash_table_window) munmap(f->hash_table_window, f->hash_table_window_size); @@ -62,11 +65,14 @@ void journal_file_close(JournalFile *f) { if (f->window) munmap(f->window, f->window_size); + if (f->fd >= 0) + close_nointr_nofail(f->fd); + free(f->path); free(f); } -static int journal_file_init_header(JournalFile *f) { +static int journal_file_init_header(JournalFile *f, JournalFile *template) { Header h; ssize_t k; int r; @@ -84,7 +90,11 @@ static int journal_file_init_header(JournalFile *f) { if (r < 0) return r; - h.seqnum_id = h.file_id; + if (template) { + h.seqnum_id = template->header->seqnum_id; + h.seqnum = template->header->seqnum; + } else + h.seqnum_id = h.file_id; k = pwrite(f->fd, &h, sizeof(h), 0); if (k < 0) @@ -674,9 +684,10 @@ static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) { o->entry.prev_entry_offset = f->header->tail_entry_offset; o->entry.next_entry_offset = 0; - if (p == 0) + if (p == 0) { f->header->head_entry_offset = htole64(offset); - else { + f->header->head_entry_realtime = o->entry.realtime; + } else { /* Temporarily move back to the previous entry, to * patch in pointer */ @@ -692,6 +703,7 @@ static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) { } f->header->tail_entry_offset = htole64(offset); + f->header->tail_entry_realtime = o->entry.realtime; /* Link up the items */ n = journal_file_entry_n_items(o); @@ -1087,6 +1099,7 @@ int journal_file_open( const char *fname, int flags, mode_t mode, + JournalFile *template, JournalFile **ret) { JournalFile *f; @@ -1103,21 +1116,24 @@ int journal_file_open( if (!f) return -ENOMEM; + f->fd = -1; + f->flags = flags; + f->mode = mode; f->writable = (flags & O_ACCMODE) != O_RDONLY; f->prot = prot_from_flags(flags); - f->fd = open(fname, flags|O_CLOEXEC, mode); - if (f->fd < 0) { - r = -errno; - goto fail; - } - f->path = strdup(fname); if (!f->path) { r = -ENOMEM; goto fail; } + f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode); + if (f->fd < 0) { + r = -errno; + goto fail; + } + if (fstat(f->fd, &f->last_stat) < 0) { r = -errno; goto fail; @@ -1126,7 +1142,7 @@ int journal_file_open( if (f->last_stat.st_size == 0 && f->writable) { newly_created = true; - r = journal_file_init_header(f); + r = journal_file_init_header(f, template); if (r < 0) goto fail; @@ -1189,3 +1205,206 @@ fail: return r; } + +int journal_file_rotate(JournalFile **f) { + char *p; + size_t l; + JournalFile *old_file, *new_file = NULL; + int r; + + assert(f); + assert(*f); + + old_file = *f; + + if (!old_file->writable) + return -EINVAL; + + if (!endswith(old_file->path, ".journal")) + return -EINVAL; + + l = strlen(old_file->path); + + p = new(char, l + 1 + 16 + 1 + 32 + 1 + 16 + 1); + if (!p) + return -ENOMEM; + + memcpy(p, old_file->path, l - 8); + p[l-8] = '@'; + sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1); + snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1, + "-%016llx-%016llx.journal", + (unsigned long long) le64toh((*f)->header->seqnum), + (unsigned long long) le64toh((*f)->header->tail_entry_realtime)); + + r = rename(old_file->path, p); + free(p); + + if (r < 0) + return -errno; + + old_file->header->state = le32toh(STATE_ARCHIVED); + + r = journal_file_open(old_file->path, old_file->flags, old_file->mode, old_file, &new_file); + journal_file_close(old_file); + + *f = new_file; + return r; +} + +struct vacuum_info { + off_t usage; + char *filename; + + uint64_t realtime; + sd_id128_t seqnum_id; + uint64_t seqnum; +}; + +static int vacuum_compare(const void *_a, const void *_b) { + const struct vacuum_info *a, *b; + + a = _a; + b = _b; + + if (sd_id128_equal(a->seqnum_id, b->seqnum_id)) { + if (a->seqnum < b->seqnum) + return -1; + else if (a->seqnum > b->seqnum) + return 1; + else + return 0; + } + + if (a->realtime < b->realtime) + return -1; + else if (a->realtime > b->realtime) + return 1; + else + return memcmp(&a->seqnum_id, &b->seqnum_id, 16); +} + +int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) { + DIR *d; + int r = 0; + struct vacuum_info *list = NULL; + unsigned n_list = 0, n_allocated = 0, i; + uint64_t sum = 0; + + assert(directory); + + if (max_use <= 0) + max_use = DEFAULT_MAX_USE; + + d = opendir(directory); + if (!d) + return -errno; + + for (;;) { + int k; + struct dirent buf, *de; + size_t q; + struct stat st; + char *p; + unsigned long long seqnum, realtime; + sd_id128_t seqnum_id; + + k = readdir_r(d, &buf, &de); + if (k != 0) { + r = -k; + goto finish; + } + + if (!de) + break; + + if (!dirent_is_file_with_suffix(de, ".journal")) + continue; + + q = strlen(de->d_name); + + if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8) + continue; + + if (de->d_name[q-8-16-1] != '-' || + de->d_name[q-8-16-1-16-1] != '-' || + de->d_name[q-8-16-1-16-1-32-1] != '@') + continue; + + if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) + continue; + + if (!S_ISREG(st.st_mode)) + continue; + + p = strdup(de->d_name); + if (!p) { + r = -ENOMEM; + goto finish; + } + + de->d_name[q-8-16-1-16-1] = 0; + if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) { + free(p); + continue; + } + + if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) { + free(p); + continue; + } + + if (n_list >= n_allocated) { + struct vacuum_info *j; + + n_allocated = MAX(n_allocated * 2U, 8U); + j = realloc(list, n_allocated * sizeof(struct vacuum_info)); + if (!j) { + free(p); + r = -ENOMEM; + goto finish; + } + + list = j; + } + + list[n_list].filename = p; + list[n_list].usage = (uint64_t) st.st_blksize * (uint64_t) st.st_blocks; + list[n_list].seqnum = seqnum; + list[n_list].realtime = realtime; + list[n_list].seqnum_id = seqnum_id; + + sum += list[n_list].usage; + + n_list ++; + } + + qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare); + + for(i = 0; i < n_list; i++) { + struct statvfs ss; + + if (fstatvfs(dirfd(d), &ss) < 0) { + r = -errno; + goto finish; + } + + if (sum <= max_use && + (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free) + break; + + if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) { + log_debug("Deleted archived journal %s/%s.", directory, list[i].filename); + sum -= list[i].usage; + } else if (errno != ENOENT) + log_warning("Failed to delete %s/%s: %m", directory, list[i].filename); + } + +finish: + for (i = 0; i < n_list; i++) + free(list[i].filename); + + free(list); + + return r; +} diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h index 55cc7153af..4665f4da38 100644 --- a/src/journal/journal-file.h +++ b/src/journal/journal-file.h @@ -32,6 +32,8 @@ typedef struct JournalFile { int fd; char *path; struct stat last_stat; + mode_t mode; + int flags; int prot; bool writable; @@ -63,7 +65,7 @@ typedef struct JournalCursor { uint64_t xor_hash; } JournalCursor; -int journal_file_open(const char *fname, int flags, mode_t mode, JournalFile **ret); +int journal_file_open(const char *fname, int flags, mode_t mode, JournalFile *template, JournalFile **ret); void journal_file_close(JournalFile *j); @@ -83,4 +85,9 @@ int journal_file_prev_entry(JournalFile *f, Object *o, Object **ret, uint64_t *o void journal_file_dump(JournalFile *f); +int journal_file_rotate(JournalFile **f); + +int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free); + + #endif diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index ac376eaf4f..4645f9ebb0 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -41,7 +41,7 @@ static int system_journal_open(JournalFile **f) { if (!fn) return -ENOMEM; - r = journal_file_open(fn, O_RDONLY, 0640, f); + r = journal_file_open(fn, O_RDONLY, 0640, NULL, f); free(fn); if (r >= 0) @@ -56,7 +56,7 @@ static int system_journal_open(JournalFile **f) { if (!fn) return -ENOMEM; - r = journal_file_open(fn, O_RDONLY, 0640, f); + r = journal_file_open(fn, O_RDONLY, 0640, NULL, f); free(fn); if (r < 0) { diff --git a/src/journal/journald.c b/src/journal/journald.c index 94261f6763..7a2b50b017 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -109,10 +109,10 @@ static JournalFile* find_journal(Server *s, uid_t uid) { if (f) return f; - if (asprintf(&p, "/var/log/journal/%lu.journal", (unsigned long) uid) < 0) + if (asprintf(&p, "/var/log/journal/user-%lu.journal", (unsigned long) uid) < 0) return s->system_journal; - r = journal_file_open(p, O_RDWR|O_CREAT, 0640, &f); + r = journal_file_open(p, O_RDWR|O_CREAT, 0640, NULL, &f); free(p); if (r < 0) @@ -386,7 +386,7 @@ static int system_journal_open(Server *s) { if (!fn) return -ENOMEM; - r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, &s->system_journal); + r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal); free(fn); if (r >= 0) { @@ -411,7 +411,7 @@ static int system_journal_open(Server *s) { fn = join("/run/log/journal/", ids, "/system.journal", NULL); if (!fn) return -ENOMEM; - r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, &s->runtime_journal); + r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal); free(fn); if (r < 0) { @@ -584,7 +584,7 @@ int main(int argc, char *argv[]) { sd_notify(false, "READY=1\n" "STATUS=Processing messages..."); - +# for (;;) { struct epoll_event event; diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 8426b3bf9e..9f5f1e858b 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -294,7 +294,7 @@ int sd_journal_open(sd_journal **ret) { goto fail; } - k = journal_file_open(fn, O_RDONLY, 0, &f); + k = journal_file_open(fn, O_RDONLY, 0, NULL, &f); free(fn); if (k < 0) { diff --git a/src/journal/test-journal.c b/src/journal/test-journal.c index 7028f11f7c..8dd26bbc32 100644 --- a/src/journal/test-journal.c +++ b/src/journal/test-journal.c @@ -20,6 +20,7 @@ ***/ #include +#include #include "journal-file.h" #include "log.h" @@ -33,7 +34,9 @@ int main(int argc, char *argv[]) { log_set_max_level(LOG_DEBUG); - assert_se(journal_file_open("test", O_RDWR|O_CREAT, 0666, &f) == 0); + unlink("test.journal"); + + assert_se(journal_file_open("test.journal", O_RDWR|O_CREAT, 0666, NULL, &f) == 0); dual_timestamp_get(&ts); @@ -87,7 +90,12 @@ int main(int argc, char *argv[]) { assert(journal_file_move_to_entry(f, 10, &o, NULL) == 0); + journal_file_rotate(&f); + journal_file_rotate(&f); + journal_file_close(f); + journal_directory_vacuum(".", 3000000, 0); + return 0; } -- cgit v1.2.3-54-g00ecf From 3fbf9cbb02690e40cd65802e777519f3f3c8d88a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 14 Oct 2011 04:44:50 +0200 Subject: journal: implement parallel file traversal --- src/journal/journal-file.c | 9 +- src/journal/journal-file.h | 12 +- src/journal/journalctl.c | 103 ++++---------- src/journal/journald.c | 17 +-- src/journal/sd-journal.c | 346 +++++++++++++++++++++++++++++++++++++-------- src/journal/sd-journal.h | 20 ++- 6 files changed, 340 insertions(+), 167 deletions(-) diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 934c043aff..6c8d712dbd 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -751,8 +751,8 @@ static int journal_file_append_entry_internal( o->object.type = htole64(OBJECT_ENTRY); o->entry.seqnum = htole64(journal_file_seqnum(f)); memcpy(o->entry.items, items, n_items * sizeof(EntryItem)); - o->entry.realtime = ts ? htole64(ts->realtime) : 0; - o->entry.monotonic = ts ? htole64(ts->monotonic) : 0; + o->entry.realtime = htole64(ts ? ts->realtime : now(CLOCK_REALTIME)); + o->entry.monotonic = htole64(ts ? ts->monotonic : now(CLOCK_MONOTONIC)); o->entry.xor_hash = htole64(xor_hash); o->entry.boot_id = f->header->boot_id; @@ -1072,7 +1072,10 @@ void journal_file_dump(JournalFile *f) { break; case OBJECT_ENTRY: - printf("Type: OBJECT_ENTRY %llu\n", (unsigned long long) le64toh(o->entry.seqnum)); + printf("Type: OBJECT_ENTRY %llu %llu %llu\n", + (unsigned long long) le64toh(o->entry.seqnum), + (unsigned long long) le64toh(o->entry.monotonic), + (unsigned long long) le64toh(o->entry.realtime)); break; case OBJECT_HASH_TABLE: diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h index 4665f4da38..c51504d11b 100644 --- a/src/journal/journal-file.h +++ b/src/journal/journal-file.h @@ -52,19 +52,9 @@ typedef struct JournalFile { uint64_t window_size; uint64_t current_offset; + uint64_t current_field; } JournalFile; -typedef struct JournalCursor { - uint8_t version; - uint8_t reserved[7]; - uint64_t seqnum; - sd_id128_t seqnum_id; - sd_id128_t boot_id; - uint64_t monotonic; - uint64_t realtime; - uint64_t xor_hash; -} JournalCursor; - int journal_file_open(const char *fname, int flags, mode_t mode, JournalFile *template, JournalFile **ret); void journal_file_close(JournalFile *j); diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index 4645f9ebb0..bb1f18a21f 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -22,104 +22,51 @@ #include #include #include +#include +#include +#include +#include -#include "journal-file.h" - -static int system_journal_open(JournalFile **f) { - int r; - char *fn; - sd_id128_t machine; - char ids[33]; - - assert(f); - - r = sd_id128_get_machine(&machine); - if (r < 0) - return r; - - fn = join("/var/log/journal/", sd_id128_to_string(machine, ids), "/system.journal", NULL); - if (!fn) - return -ENOMEM; - - r = journal_file_open(fn, O_RDONLY, 0640, NULL, f); - free(fn); - - if (r >= 0) - return r; - - if (r < 0 && r != -ENOENT) { - log_error("Failed to open system journal: %s", strerror(-r)); - return r; - } - - fn = join("/run/log/journal/", ids, "/system.journal", NULL); - if (!fn) - return -ENOMEM; - - r = journal_file_open(fn, O_RDONLY, 0640, NULL, f); - free(fn); - - if (r < 0) { - log_error("Failed to open system journal: %s", strerror(-r)); - return r; - } - - return r; -} +#include "sd-journal.h" +#include "log.h" int main(int argc, char *argv[]) { int r; - JournalFile *f; - Object *o = NULL; + sd_journal *j = NULL; + + log_set_max_level(LOG_DEBUG); + log_set_target(LOG_TARGET_CONSOLE); log_parse_environment(); log_open(); - r = system_journal_open(&f); + r = sd_journal_open(&j); if (r < 0) { log_error("Failed to open journal: %s", strerror(-r)); - return EXIT_FAILURE; + goto finish; } - for (;;) { - uint64_t offset; - uint64_t n, i; + SD_JOURNAL_FOREACH(j) { + const void *data; + size_t length; + char *cursor; - r = journal_file_next_entry(f, o, &o, &offset); + r = sd_journal_get_cursor(j, &cursor); if (r < 0) { - log_error("Failed to read journal: %s", strerror(-r)); + log_error("Failed to get cursor: %s", strerror(-r)); goto finish; } - if (r == 0) - break; + printf("entry: %s\n", cursor); + free(cursor); - printf("entry: %llu\n", (unsigned long long) le64toh(o->entry.seqnum)); - - n = journal_file_entry_n_items(o); - for (i = 0; i < n; i++) { - uint64_t p, l; - - p = le64toh(o->entry.items[i].object_offset); - r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); - if (r < 0) { - log_error("Failed to move to data: %s", strerror(-r)); - goto finish; - } - - l = o->object.size - offsetof(Object, data.payload); - printf("\t[%.*s]\n", (int) l, o->data.payload); - - r = journal_file_move_to_object(f, offset, OBJECT_ENTRY, &o); - if (r < 0) { - log_error("Failed to move back to entry: %s", strerror(-r)); - goto finish; - } - } + SD_JOURNAL_FOREACH_FIELD(j, data, length) + printf("\t%.*s\n", (int) length, (const char*) data); } finish: - journal_file_close(f); + if (j) + sd_journal_close(j); - return 0; + return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/src/journal/journald.c b/src/journal/journald.c index 7a2b50b017..1143d81abc 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -95,6 +95,8 @@ static JournalFile* find_journal(Server *s, uid_t uid) { char *p; int r; JournalFile *f; + char ids[33]; + sd_id128_t machine; assert(s); @@ -105,11 +107,15 @@ static JournalFile* find_journal(Server *s, uid_t uid) { if (uid <= 0) return s->system_journal; + r = sd_id128_get_machine(&machine); + if (r < 0) + return s->system_journal; + f = hashmap_get(s->user_journals, UINT32_TO_PTR(uid)); if (f) return f; - if (asprintf(&p, "/var/log/journal/user-%lu.journal", (unsigned long) uid) < 0) + if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0) return s->system_journal; r = journal_file_open(p, O_RDWR|O_CREAT, 0640, NULL, &f); @@ -401,16 +407,11 @@ static int system_journal_open(Server *s) { /* /var didn't work, so try /run, but this time we * create the prefix too */ - fn = strappend("/run/log/journal/", ids); - if (!fn) - return -ENOMEM; - (void) mkdir_p(fn, 0755); - free(fn); - - /* Then create the runtime journal file */ fn = join("/run/log/journal/", ids, "/system.journal", NULL); if (!fn) return -ENOMEM; + + (void) mkdir_parents(fn, 0755); r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal); free(fn); diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 9f5f1e858b..6f47520217 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -21,6 +21,7 @@ #include #include +#include #include "sd-journal.h" #include "journal-def.h" @@ -151,7 +152,8 @@ int sd_journal_next(sd_journal *j) { if (new_current) { j->current_file = new_current; - f->current_offset = new_offset; + j->current_file->current_offset = new_offset; + j->current_file->current_field = 0; return 1; } @@ -193,59 +195,134 @@ int sd_journal_previous(sd_journal *j) { if (new_current) { j->current_file = new_current; - f->current_offset = new_offset; + j->current_file->current_offset = new_offset; + j->current_file->current_field = 0; return 1; } return 0; } -int sd_journal_get_cursor(sd_journal *j, void **cursor, size_t *size) { - JournalCursor *c; +int sd_journal_get_cursor(sd_journal *j, char **cursor) { Object *o; int r; + char bid[33], sid[33]; assert(j); assert(cursor); - assert(size); - if (!j->current_file || !j->current_file->current_offset <= 0) - return 0; + if (!j->current_file || j->current_file->current_offset <= 0) + return -EADDRNOTAVAIL; r = journal_file_move_to_object(j->current_file, j->current_file->current_offset, OBJECT_ENTRY, &o); if (r < 0) return r; - c = new0(JournalCursor, 1); - if (!c) - return -ENOMEM; + sd_id128_to_string(j->current_file->header->seqnum_id, sid); + sd_id128_to_string(o->entry.boot_id, bid); - c->version = 1; - c->seqnum = o->entry.seqnum; - c->seqnum_id = j->current_file->header->seqnum_id; - c->boot_id = o->entry.boot_id; - c->monotonic = o->entry.monotonic; - c->realtime = o->entry.realtime; - c->xor_hash = o->entry.xor_hash; - - *cursor = c; - *size = sizeof(JournalCursor); + if (asprintf(cursor, + "s=%s;i=%llx;b=%s;m=%llx;t=%llx;x=%llx;p=%s", + sid, (unsigned long long) le64toh(o->entry.seqnum), + bid, (unsigned long long) le64toh(o->entry.monotonic), + (unsigned long long) le64toh(o->entry.realtime), + (unsigned long long) le64toh(o->entry.xor_hash), + file_name_from_path(j->current_file->path)) < 0) + return -ENOMEM; return 1; } -int sd_journal_set_cursor(sd_journal *j, const void *cursor, size_t size) { +int sd_journal_set_cursor(sd_journal *j, const char *cursor) { return -EINVAL; } +static int add_file(sd_journal *j, const char *prefix, const char *dir, const char *filename) { + char *fn; + int r; + JournalFile *f; + + assert(j); + assert(prefix); + assert(filename); + + if (dir) + fn = join(prefix, "/", dir, "/", filename, NULL); + else + fn = join(prefix, "/", filename, NULL); + + if (!fn) + return -ENOMEM; + + r = journal_file_open(fn, O_RDONLY, 0, NULL, &f); + free(fn); + + if (r < 0) { + if (errno == ENOENT) + return 0; + + return r; + } + + r = hashmap_put(j->files, f->path, f); + if (r < 0) { + journal_file_close(f); + return r; + } + + return 0; +} + +static int add_directory(sd_journal *j, const char *prefix, const char *dir) { + char *fn; + int r; + DIR *d; + + assert(j); + assert(prefix); + assert(dir); + + fn = join(prefix, "/", dir, NULL); + if (!fn) + return -ENOMEM; + + d = opendir(fn); + free(fn); + + if (!d) { + if (errno == ENOENT) + return 0; + + return -errno; + } + + for (;;) { + struct dirent buf, *de; + + r = readdir_r(d, &buf, &de); + if (r != 0 || !de) + break; + + if (!dirent_is_file_with_suffix(de, ".journal")) + continue; + + r = add_file(j, prefix, dir, de->d_name); + if (r < 0) + log_debug("Failed to add file %s/%s/%s: %s", prefix, dir, de->d_name, strerror(-r)); + } + + closedir(d); + + return 0; +} + int sd_journal_open(sd_journal **ret) { sd_journal *j; - char *fn; const char *p; - int r = 0; const char search_paths[] = "/run/log/journal\0" "/var/log/journal\0"; + int r; assert(ret); @@ -254,64 +331,47 @@ int sd_journal_open(sd_journal **ret) { return -ENOMEM; j->files = hashmap_new(string_hash_func, string_compare_func); - if (!j->files) + if (!j->files) { + r = -ENOMEM; goto fail; + } + + /* We ignore most errors here, since the idea is to only open + * what's actually accessible, and ignore the rest. */ NULSTR_FOREACH(p, search_paths) { DIR *d; d = opendir(p); if (!d) { - if (errno != ENOENT && r == 0) - r = -errno; - + if (errno != ENOENT) + log_debug("Failed to open %s: %m", p); continue; } for (;;) { struct dirent buf, *de; - int k; - JournalFile *f; - - k = readdir_r(d, &buf, &de); - if (k != 0) { - if (r == 0) - r = -k; - - break; - } + sd_id128_t id; - if (!de) + r = readdir_r(d, &buf, &de); + if (r != 0 || !de) break; - if (!dirent_is_file_with_suffix(de, ".journal")) - continue; + if (dirent_is_file_with_suffix(de, ".journal")) { + r = add_file(j, p, NULL, de->d_name); + if (r < 0) + log_debug("Failed to add file %s/%s: %s", p, de->d_name, strerror(-r)); - fn = join(p, "/", de->d_name, NULL); - if (!fn) { - r = -ENOMEM; - closedir(d); - goto fail; - } - - k = journal_file_open(fn, O_RDONLY, 0, NULL, &f); - free(fn); - - if (k < 0) { + } else if ((de->d_type == DT_DIR || de->d_type == DT_UNKNOWN) && + sd_id128_from_string(de->d_name, &id) >= 0) { - if (r == 0) - r = -k; - } else { - k = hashmap_put(j->files, f->path, f); - if (k < 0) { - journal_file_close(f); - closedir(d); - - r = k; - goto fail; - } + r = add_directory(j, p, de->d_name); + if (r < 0) + log_debug("Failed to add directory %s/%s: %s", p, de->d_name, strerror(-r)); } } + + closedir(d); } *ret = j; @@ -337,3 +397,165 @@ void sd_journal_close(sd_journal *j) { free(j); } + +int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) { + Object *o; + JournalFile *f; + int r; + + assert(j); + assert(ret); + + f = j->current_file; + if (!f) + return 0; + + if (f->current_offset <= 0) + return 0; + + r = journal_file_move_to_object(f, f->current_offset, OBJECT_ENTRY, &o); + if (r < 0) + return r; + + *ret = le64toh(o->entry.realtime); + return 1; +} + +int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret) { + Object *o; + JournalFile *f; + int r; + sd_id128_t id; + + assert(j); + assert(ret); + + f = j->current_file; + if (!f) + return 0; + + if (f->current_offset <= 0) + return 0; + + r = sd_id128_get_machine(&id); + if (r < 0) + return r; + + r = journal_file_move_to_object(f, f->current_offset, OBJECT_ENTRY, &o); + if (r < 0) + return r; + + if (!sd_id128_equal(id, o->entry.boot_id)) + return 0; + + *ret = le64toh(o->entry.monotonic); + return 1; + +} + +int sd_journal_get_field(sd_journal *j, const char *field, const void **data, size_t *size) { + JournalFile *f; + uint64_t i, n; + size_t field_length; + int r; + Object *o; + + assert(j); + assert(field); + assert(data); + assert(size); + + if (isempty(field) || strchr(field, '=')) + return -EINVAL; + + f = j->current_file; + if (!f) + return 0; + + if (f->current_offset <= 0) + return 0; + + r = journal_file_move_to_object(f, f->current_offset, OBJECT_ENTRY, &o); + if (r < 0) + return r; + + field_length = strlen(field); + + n = journal_file_entry_n_items(o); + for (i = 0; i < n; i++) { + uint64_t p, l; + size_t t; + + p = le64toh(o->entry.items[i].object_offset); + r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); + if (r < 0) + return r; + + l = le64toh(o->object.size) - offsetof(Object, data.payload); + + if (field_length+1 > l) + continue; + + if (memcmp(o->data.payload, field, field_length) || + o->data.payload[field_length] != '=') + continue; + + t = (size_t) l; + + if ((uint64_t) t != l) + return -E2BIG; + + *data = o->data.payload; + *size = t; + + return 1; + } + + return 0; +} + +int sd_journal_iterate_fields(sd_journal *j, const void **data, size_t *size) { + JournalFile *f; + uint64_t p, l, n; + size_t t; + int r; + Object *o; + + assert(j); + assert(data); + assert(size); + + f = j->current_file; + if (!f) + return 0; + + if (f->current_offset <= 0) + return 0; + + r = journal_file_move_to_object(f, f->current_offset, OBJECT_ENTRY, &o); + if (r < 0) + return r; + + n = journal_file_entry_n_items(o); + if (f->current_field >= n) + return 0; + + p = le64toh(o->entry.items[f->current_field].object_offset); + r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); + if (r < 0) + return r; + + l = le64toh(o->object.size) - offsetof(Object, data.payload); + t = (size_t) l; + + /* We can't read objects larger than 4G on a 32bit machine */ + if ((uint64_t) t != l) + return -E2BIG; + + *data = o->data.payload; + *size = t; + + f->current_field ++; + + return 1; +} diff --git a/src/journal/sd-journal.h b/src/journal/sd-journal.h index 55e58601fe..bbfcda6f2a 100644 --- a/src/journal/sd-journal.h +++ b/src/journal/sd-journal.h @@ -27,13 +27,14 @@ /* TODO: * - * - implement rotation * - check LE/BE conversion for 8bit, 16bit, 32bit values * - implement parallel traversal * - implement inotify usage on client * - implement audit gateway * - implement native gateway + * - implement stdout gateway * - extend hash table/bisect table as we go + * - accelerate looking for "all hostnames" and suchlike. */ typedef struct sd_journal sd_journal; @@ -44,10 +45,10 @@ void sd_journal_close(sd_journal *j); int sd_journal_previous(sd_journal *j); int sd_journal_next(sd_journal *j); -int sd_journal_get(sd_journal *j, const char *field, const void **data, size_t *size); -int sd_journal_get_seqnum(sd_journal *j, uint64_t *ret); int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret); int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret); +int sd_journal_get_field(sd_journal *j, const char *field, const void **data, size_t *l); +int sd_journal_iterate_fields(sd_journal *j, const void **data, size_t *l); int sd_journal_add_match(sd_journal *j, const char *field, const void *data, size_t size); void sd_journal_flush_matches(sd_journal *j); @@ -59,9 +60,18 @@ int sd_journal_seek_seqnum(sd_journal *j, uint64_t seqnum); int sd_journal_seek_monotonic_usec(sd_journal *j, uint64_t usec); int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec); -int sd_journal_get_cursor(sd_journal *j, void **cursor, size_t *size); -int sd_journal_set_cursor(sd_journal *j, const void *cursor, size_t size); +int sd_journal_get_cursor(sd_journal *j, char **cursor); +int sd_journal_set_cursor(sd_journal *j, const char *cursor); int sd_journal_get_fd(sd_journal *j); +#define SD_JOURNAL_FOREACH(j) \ + while (sd_journal_next(j) > 0) + +#define SD_JOURNAL_FOREACH_BACKWARDS(j) \ + while (sd_journal_previous(j) > 0) + +#define SD_JOURNAL_FOREACH_FIELD(j, data, l) \ + while (sd_journal_iterate_fields((j), &(data), &(l)) > 0) + #endif -- cgit v1.2.3-54-g00ecf From 161e54f8719c4a11440d762276cbccbeb1736f8c Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 14 Oct 2011 04:52:56 +0200 Subject: journal: fix field retrieval by name --- src/journal/sd-journal.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 6f47520217..1f4ad0ff64 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -493,22 +493,24 @@ int sd_journal_get_field(sd_journal *j, const char *field, const void **data, si l = le64toh(o->object.size) - offsetof(Object, data.payload); - if (field_length+1 > l) - continue; + if (l >= field_length+1 && + memcmp(o->data.payload, field, field_length) == 0 && + o->data.payload[field_length] == '=') { - if (memcmp(o->data.payload, field, field_length) || - o->data.payload[field_length] != '=') - continue; + t = (size_t) l; - t = (size_t) l; + if ((uint64_t) t != l) + return -E2BIG; - if ((uint64_t) t != l) - return -E2BIG; + *data = o->data.payload; + *size = t; - *data = o->data.payload; - *size = t; + return 1; + } - return 1; + r = journal_file_move_to_object(f, f->current_offset, OBJECT_ENTRY, &o); + if (r < 0) + return r; } return 0; -- cgit v1.2.3-54-g00ecf From c2373f848dddcc1827cf715c5ef778dc8d475761 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 14 Oct 2011 05:12:58 +0200 Subject: journal: synchronize seqnum across files --- src/journal/journal-file.c | 21 +++++++++++++++++---- src/journal/journal-file.h | 2 +- src/journal/journalctl.c | 8 ++++++++ src/journal/journald.c | 6 ++++-- src/journal/sd-journal.c | 10 ++++++++++ src/journal/sd-journal.h | 1 - src/journal/test-journal.c | 6 +++--- 7 files changed, 43 insertions(+), 11 deletions(-) diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 6c8d712dbd..537978137b 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -364,12 +364,24 @@ int journal_file_move_to_object(JournalFile *f, uint64_t offset, int type, Objec return 0; } -static uint64_t journal_file_seqnum(JournalFile *f) { +static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) { uint64_t r; assert(f); r = le64toh(f->header->seqnum) + 1; + + if (seqnum) { + /* If an external seqno counter was passed, we update + * both the local and the external one, and set it to + * the maximum of both */ + + if (*seqnum + 1 > r) + r = *seqnum + 1; + + *seqnum = r; + } + f->header->seqnum = htole64(r); return r; @@ -733,6 +745,7 @@ static int journal_file_append_entry_internal( const dual_timestamp *ts, uint64_t xor_hash, const EntryItem items[], unsigned n_items, + uint64_t *seqno, Object **ret, uint64_t *offset) { uint64_t np; uint64_t osize; @@ -749,7 +762,7 @@ static int journal_file_append_entry_internal( return r; o->object.type = htole64(OBJECT_ENTRY); - o->entry.seqnum = htole64(journal_file_seqnum(f)); + o->entry.seqnum = htole64(journal_file_seqnum(f, seqno)); memcpy(o->entry.items, items, n_items * sizeof(EntryItem)); o->entry.realtime = htole64(ts ? ts->realtime : now(CLOCK_REALTIME)); o->entry.monotonic = htole64(ts ? ts->monotonic : now(CLOCK_MONOTONIC)); @@ -769,7 +782,7 @@ static int journal_file_append_entry_internal( return 0; } -int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, Object **ret, uint64_t *offset) { +int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqno, Object **ret, uint64_t *offset) { unsigned i; EntryItem *items; int r; @@ -794,7 +807,7 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st items[i].object_offset = htole64(p); } - r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, ret, offset); + r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqno, ret, offset); finish: free(items); diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h index c51504d11b..92f671a756 100644 --- a/src/journal/journal-file.h +++ b/src/journal/journal-file.h @@ -63,7 +63,7 @@ int journal_file_move_to_object(JournalFile *f, uint64_t offset, int type, Objec uint64_t journal_file_entry_n_items(Object *o); -int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, Object **ret, uint64_t *offset); +int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqno, Object **ret, uint64_t *offset); int journal_file_move_to_entry(JournalFile *f, uint64_t seqnum, Object **ret, uint64_t *offset); diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index bb1f18a21f..a6b6e0fbd6 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -50,6 +50,7 @@ int main(int argc, char *argv[]) { const void *data; size_t length; char *cursor; + uint64_t realtime = 0, monotonic = 0; r = sd_journal_get_cursor(j, &cursor); if (r < 0) { @@ -60,6 +61,13 @@ int main(int argc, char *argv[]) { printf("entry: %s\n", cursor); free(cursor); + sd_journal_get_realtime_usec(j, &realtime); + sd_journal_get_monotonic_usec(j, &monotonic); + printf("realtime: %llu\n" + "monotonic: %llu\n", + (unsigned long long) realtime, + (unsigned long long) monotonic); + SD_JOURNAL_FOREACH_FIELD(j, data, length) printf("\t%.*s\n", (int) length, (const char*) data); } diff --git a/src/journal/journald.c b/src/journal/journald.c index 1143d81abc..ede314a55f 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -43,6 +43,8 @@ typedef struct Server { JournalFile *runtime_journal; JournalFile *system_journal; Hashmap *user_journals; + + uint64_t seqnum; } Server; static void fix_perms(JournalFile *f, uid_t uid) { @@ -118,7 +120,7 @@ static JournalFile* find_journal(Server *s, uid_t uid) { if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0) return s->system_journal; - r = journal_file_open(p, O_RDWR|O_CREAT, 0640, NULL, &f); + r = journal_file_open(p, O_RDWR|O_CREAT, 0640, s->system_journal, &f); free(p); if (r < 0) @@ -252,7 +254,7 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str if (!f) log_warning("Dropping message, as we can't find a place to store the data."); else { - r = journal_file_append_entry(f, NULL, iovec, n, NULL, NULL); + r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL); if (r < 0) log_error("Failed to write entry, ignoring: %s", strerror(-r)); diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 1f4ad0ff64..1614bbf27f 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -561,3 +561,13 @@ int sd_journal_iterate_fields(sd_journal *j, const void **data, size_t *size) { return 1; } + +int sd_journal_seek_head(sd_journal *j) { + assert(j); + return -EINVAL; +} + +int sd_journal_seek_tail(sd_journal *j) { + assert(j); + return -EINVAL; +} diff --git a/src/journal/sd-journal.h b/src/journal/sd-journal.h index bbfcda6f2a..13b5f891d7 100644 --- a/src/journal/sd-journal.h +++ b/src/journal/sd-journal.h @@ -56,7 +56,6 @@ void sd_journal_flush_matches(sd_journal *j); int sd_journal_seek_head(sd_journal *j); int sd_journal_seek_tail(sd_journal *j); -int sd_journal_seek_seqnum(sd_journal *j, uint64_t seqnum); int sd_journal_seek_monotonic_usec(sd_journal *j, uint64_t usec); int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec); diff --git a/src/journal/test-journal.c b/src/journal/test-journal.c index 8dd26bbc32..3b67f1aa9f 100644 --- a/src/journal/test-journal.c +++ b/src/journal/test-journal.c @@ -42,15 +42,15 @@ int main(int argc, char *argv[]) { iovec.iov_base = (void*) test; iovec.iov_len = strlen(test); - assert_se(journal_file_append_entry(f, &ts, &iovec, 1, NULL, NULL) == 0); + assert_se(journal_file_append_entry(f, &ts, &iovec, 1, NULL, NULL, NULL) == 0); iovec.iov_base = (void*) test2; iovec.iov_len = strlen(test2); - assert_se(journal_file_append_entry(f, &ts, &iovec, 1, NULL, NULL) == 0); + assert_se(journal_file_append_entry(f, &ts, &iovec, 1, NULL, NULL, NULL) == 0); iovec.iov_base = (void*) test; iovec.iov_len = strlen(test); - assert_se(journal_file_append_entry(f, &ts, &iovec, 1, NULL, NULL) == 0); + assert_se(journal_file_append_entry(f, &ts, &iovec, 1, NULL, NULL, NULL) == 0); journal_file_dump(f); -- cgit v1.2.3-54-g00ecf From ae2cc8efc1659dcc6219dfcd07287288666aa303 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 14 Oct 2011 05:34:19 +0200 Subject: journal: when the same entry is in two files, skip over them in sync --- src/journal/sd-journal.c | 75 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 11 deletions(-) diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 1614bbf27f..35f911f2ba 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -87,10 +87,13 @@ void sd_journal_flush_matches(sd_journal *j) { } static int compare_order(JournalFile *af, Object *ao, uint64_t ap, - JournalFile *bf, Object *bo, uint64_t bp) { + JournalFile *bf, Object *bo, uint64_t bp) { uint64_t a, b; + /* We operate on two different files here, hence we can access + * two objects at the same time, which we normally can't */ + if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) { /* If this is from the same seqnum source, compare @@ -98,23 +101,43 @@ static int compare_order(JournalFile *af, Object *ao, uint64_t ap, a = le64toh(ao->entry.seqnum); b = le64toh(bo->entry.seqnum); + if (a < b) + return -1; + if (a > b) + return 1; + } - } else if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id)) { + if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id)) { /* If the boot id matches compare monotonic time */ a = le64toh(ao->entry.monotonic); b = le64toh(bo->entry.monotonic); - } else { - - /* Otherwise compare UTC time */ - a = le64toh(ao->entry.realtime); - b = le64toh(ao->entry.realtime); + if (a < b) + return -1; + if (a > b) + return 1; } - return - a < b ? -1 : - a > b ? +1 : 0; + /* Otherwise compare UTC time */ + a = le64toh(ao->entry.realtime); + b = le64toh(ao->entry.realtime); + + if (a < b) + return -1; + if (a > b) + return 1; + + /* Finally, compare by contents */ + a = le64toh(ao->entry.xor_hash); + b = le64toh(ao->entry.xor_hash); + + if (a < b) + return -1; + if (a > b) + return 1; + + return 0; } int sd_journal_next(sd_journal *j) { @@ -143,7 +166,8 @@ int sd_journal_next(sd_journal *j) { else if (r == 0) continue; - if (!new_current || compare_order(new_current, new_entry, new_offset, f, o, p) > 0) { + if (!new_current || + compare_order(new_current, new_entry, new_offset, f, o, p) > 0) { new_current = f; new_entry = o; new_offset = p; @@ -154,6 +178,35 @@ int sd_journal_next(sd_journal *j) { j->current_file = new_current; j->current_file->current_offset = new_offset; j->current_file->current_field = 0; + + /* Skip over any identical entries in the other files too */ + + HASHMAP_FOREACH(f, j->files, i) { + Object *o; + uint64_t p; + + if (j->current_file == f) + continue; + + if (f->current_offset > 0) { + r = journal_file_move_to_object(f, f->current_offset, OBJECT_ENTRY, &o); + if (r < 0) + return r; + } else + o = NULL; + + r = journal_file_next_entry(f, o, &o, &p); + if (r < 0) + return r; + else if (r == 0) + continue; + + if (compare_order(new_current, new_entry, new_offset, f, o, p) == 0) { + f->current_offset = p; + f->current_field = 0; + } + } + return 1; } -- cgit v1.2.3-54-g00ecf From 7210bfb3706f8cbb2b35403f916dace1824c516c Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 14 Oct 2011 05:38:40 +0200 Subject: journal: move field index from file into journal object --- src/journal/journal-file.h | 1 - src/journal/sd-journal.c | 42 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h index 92f671a756..0294555cab 100644 --- a/src/journal/journal-file.h +++ b/src/journal/journal-file.h @@ -52,7 +52,6 @@ typedef struct JournalFile { uint64_t window_size; uint64_t current_offset; - uint64_t current_field; } JournalFile; int journal_file_open(const char *fname, int flags, mode_t mode, JournalFile *template, JournalFile **ret); diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 35f911f2ba..74abac88af 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -43,6 +43,7 @@ struct sd_journal { Hashmap *files; JournalFile *current_file; + uint64_t current_field; LIST_HEAD(Match, matches); }; @@ -177,7 +178,7 @@ int sd_journal_next(sd_journal *j) { if (new_current) { j->current_file = new_current; j->current_file->current_offset = new_offset; - j->current_file->current_field = 0; + j->current_field = 0; /* Skip over any identical entries in the other files too */ @@ -201,10 +202,8 @@ int sd_journal_next(sd_journal *j) { else if (r == 0) continue; - if (compare_order(new_current, new_entry, new_offset, f, o, p) == 0) { + if (compare_order(new_current, new_entry, new_offset, f, o, p) == 0) f->current_offset = p; - f->current_field = 0; - } } return 1; @@ -249,7 +248,34 @@ int sd_journal_previous(sd_journal *j) { if (new_current) { j->current_file = new_current; j->current_file->current_offset = new_offset; - j->current_file->current_field = 0; + j->current_field = 0; + + /* Skip over any identical entries in the other files too */ + + HASHMAP_FOREACH(f, j->files, i) { + Object *o; + uint64_t p; + + if (j->current_file == f) + continue; + + if (f->current_offset > 0) { + r = journal_file_move_to_object(f, f->current_offset, OBJECT_ENTRY, &o); + if (r < 0) + return r; + } else + o = NULL; + + r = journal_file_prev_entry(f, o, &o, &p); + if (r < 0) + return r; + else if (r == 0) + continue; + + if (compare_order(new_current, new_entry, new_offset, f, o, p) == 0) + f->current_offset = p; + } + return 1; } @@ -592,10 +618,10 @@ int sd_journal_iterate_fields(sd_journal *j, const void **data, size_t *size) { return r; n = journal_file_entry_n_items(o); - if (f->current_field >= n) + if (j->current_field >= n) return 0; - p = le64toh(o->entry.items[f->current_field].object_offset); + p = le64toh(o->entry.items[j->current_field].object_offset); r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); if (r < 0) return r; @@ -610,7 +636,7 @@ int sd_journal_iterate_fields(sd_journal *j, const void **data, size_t *size) { *data = o->data.payload; *size = t; - f->current_field ++; + j->current_field ++; return 1; } -- cgit v1.2.3-54-g00ecf From 1cc101f1e59ca2f5e87dea48ac9ed98c6f077b3b Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 14 Oct 2011 16:52:42 +0200 Subject: journal: if two entries match with everything but seqnums, they are still identical --- src/journal/sd-journal.c | 34 +++++++++++++++++++++++++--------- src/journal/sd-journal.h | 12 +++++++++++- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 74abac88af..d580b8e768 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -48,28 +48,29 @@ struct sd_journal { LIST_HEAD(Match, matches); }; -int sd_journal_add_match(sd_journal *j, const char *field, const void *data, size_t size) { +int sd_journal_add_match(sd_journal *j, const void *data, size_t size) { Match *m; - char *e; assert(j); - assert(field); - assert(data || size == 0); + + if (size <= 0) + return -EINVAL; + + assert(data); m = new0(Match, 1); if (!m) return -ENOMEM; - m->size = strlen(field) + 1 + size; + m->size = size; + m->data = malloc(m->size); if (!m->data) { free(m); return -ENOMEM; } - e = stpcpy(m->data, field); - *(e++) = '='; - memcpy(e, data, size); + memcpy(m->data, data, size); LIST_PREPEND(Match, matches, j->matches, m); return 0; @@ -93,7 +94,16 @@ static int compare_order(JournalFile *af, Object *ao, uint64_t ap, uint64_t a, b; /* We operate on two different files here, hence we can access - * two objects at the same time, which we normally can't */ + * two objects at the same time, which we normally can't. + * + * If contents and timestamps match, these entries are + * identical, even if the seqnum does not match */ + + if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id) && + ao->entry.monotonic == bo->entry.monotonic && + ao->entry.realtime == bo->entry.realtime && + ao->entry.xor_hash == bo->entry.xor_hash) + return 0; if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) { @@ -106,6 +116,10 @@ static int compare_order(JournalFile *af, Object *ao, uint64_t ap, return -1; if (a > b) return 1; + + /* Wow! This is weird, different data but the same + * seqnums? Something is borked, but let's make the + * best of it and compare by time. */ } if (sd_id128_equal(ao->entry.boot_id, bo->entry.boot_id)) { @@ -474,6 +488,8 @@ void sd_journal_close(sd_journal *j) { hashmap_free(j->files); } + sd_journal_flush_matches(j); + free(j); } diff --git a/src/journal/sd-journal.h b/src/journal/sd-journal.h index 13b5f891d7..6b451b5765 100644 --- a/src/journal/sd-journal.h +++ b/src/journal/sd-journal.h @@ -35,6 +35,8 @@ * - implement stdout gateway * - extend hash table/bisect table as we go * - accelerate looking for "all hostnames" and suchlike. + * - throttling + * - enforce limit on open journal files in journald and journalctl */ typedef struct sd_journal sd_journal; @@ -50,7 +52,7 @@ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret); int sd_journal_get_field(sd_journal *j, const char *field, const void **data, size_t *l); int sd_journal_iterate_fields(sd_journal *j, const void **data, size_t *l); -int sd_journal_add_match(sd_journal *j, const char *field, const void *data, size_t size); +int sd_journal_add_match(sd_journal *j, const void *data, size_t size); void sd_journal_flush_matches(sd_journal *j); int sd_journal_seek_head(sd_journal *j); @@ -64,6 +66,14 @@ int sd_journal_set_cursor(sd_journal *j, const char *cursor); int sd_journal_get_fd(sd_journal *j); +enum { + SD_JOURNAL_NOP, + SD_JOURNAL_APPEND, + SD_JOURNAL_DROP +}; + +int sd_journal_process(sd_journal *j); + #define SD_JOURNAL_FOREACH(j) \ while (sd_journal_next(j) > 0) -- cgit v1.2.3-54-g00ecf From de7b95cdc3228131498021c2fdcf6647004c3920 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sat, 15 Oct 2011 01:13:37 +0200 Subject: journal: properly implement matching with multiple matches --- src/journal/journal-def.h | 1 + src/journal/journal-file.c | 1 + src/journal/journalctl.c | 10 +++- src/journal/sd-journal.c | 138 ++++++++++++++++++++++++++++++++++++++------- 4 files changed, 129 insertions(+), 21 deletions(-) diff --git a/src/journal/journal-def.h b/src/journal/journal-def.h index d44b070fd0..5cb1e6d9c8 100644 --- a/src/journal/journal-def.h +++ b/src/journal/journal-def.h @@ -65,6 +65,7 @@ _packed_ struct DataObject { _packed_ struct EntryItem { uint64_t object_offset; + uint64_t hash; uint64_t prev_entry_offset; uint64_t next_entry_offset; }; diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 537978137b..5557028147 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -805,6 +805,7 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st xor_hash ^= le64toh(o->data.hash); items[i].object_offset = htole64(p); + items[i].hash = o->data.hash; } r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqno, ret, offset); diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index a6b6e0fbd6..3475b9d43c 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -31,7 +31,7 @@ #include "log.h" int main(int argc, char *argv[]) { - int r; + int r, i; sd_journal *j = NULL; log_set_max_level(LOG_DEBUG); @@ -46,6 +46,14 @@ int main(int argc, char *argv[]) { goto finish; } + for (i = 1; i < argc; i++) { + r = sd_journal_add_match(j, argv[i], strlen(argv[i])); + if (r < 0) { + log_error("Failed to add match: %s", strerror(-r)); + goto finish; + } + } + SD_JOURNAL_FOREACH(j) { const void *data; size_t length; diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index d580b8e768..bc6d255662 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -28,13 +28,14 @@ #include "journal-file.h" #include "hashmap.h" #include "list.h" +#include "lookup3.h" typedef struct Match Match; struct Match { char *data; size_t size; - uint64_t hash; + uint64_t le_hash; LIST_FIELDS(Match, matches); }; @@ -46,6 +47,7 @@ struct sd_journal { uint64_t current_field; LIST_HEAD(Match, matches); + unsigned n_matches; }; int sd_journal_add_match(sd_journal *j, const void *data, size_t size) { @@ -71,8 +73,11 @@ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) { } memcpy(m->data, data, size); + m->le_hash = hash64(m->data, size); LIST_PREPEND(Match, matches, j->matches, m); + j->n_matches ++; + return 0; } @@ -86,6 +91,8 @@ void sd_journal_flush_matches(sd_journal *j) { free(m->data); free(m); } + + j->n_matches = 0; } static int compare_order(JournalFile *af, Object *ao, uint64_t ap, @@ -155,6 +162,103 @@ static int compare_order(JournalFile *af, Object *ao, uint64_t ap, return 0; } +static int move_to_next_with_matches(sd_journal *j, JournalFile *f, Object **o, uint64_t *p) { + int r; + uint64_t cp; + Object *c; + + assert(j); + assert(f); + assert(o); + assert(p); + + if (!j->matches) { + /* No matches is easy, just go on to the next entry */ + + if (f->current_offset > 0) { + r = journal_file_move_to_object(f, f->current_offset, OBJECT_ENTRY, &c); + if (r < 0) + return r; + } else + c = NULL; + + return journal_file_next_entry(f, c, o, p); + } + + /* So there are matches we have to adhere to, let's find the + * first entry that matches all of them */ + + if (f->current_offset > 0) + cp = f->current_offset; + else { + r = journal_file_find_first_entry(f, j->matches->data, j->matches->size, &c, &cp); + if (r <= 0) + return r; + + /* We can shortcut this if there's only one match */ + if (j->n_matches == 1) { + *o = c; + *p = cp; + return r; + } + } + + for (;;) { + uint64_t np, n; + bool found; + Match *m; + + r = journal_file_move_to_object(f, cp, OBJECT_ENTRY, &c); + if (r < 0) + return r; + + n = journal_file_entry_n_items(c); + + /* Make sure we don't match the entry we are starting + * from. */ + found = f->current_offset != cp; + + np = 0; + LIST_FOREACH(matches, m, j->matches) { + uint64_t q, k; + + for (k = 0; k < n; k++) + if (c->entry.items[k].hash == m->le_hash) + break; + + if (k >= n) { + /* Hmm, didn't find any field that matched, so ignore + * this match. Go on with next match */ + + found = false; + continue; + } + + /* Hmm, so, this field matched, let's remember + * where we'd have to try next, in case the other + * matches are not OK */ + q = le64toh(c->entry.items[k].next_entry_offset); + if (q > np) + np = q; + } + + /* Did this entry match against all matches? */ + if (found) { + *o = c; + *p = cp; + return 1; + } + + /* Did we find a subsequent entry? */ + if (np == 0) + return 0; + + /* Hmm, ok, this entry only matched partially, so + * let's try another one */ + cp = np; + } +} + int sd_journal_next(sd_journal *j) { JournalFile *f, *new_current = NULL; Iterator i; @@ -168,14 +272,7 @@ int sd_journal_next(sd_journal *j) { Object *o; uint64_t p; - if (f->current_offset > 0) { - r = journal_file_move_to_object(f, f->current_offset, OBJECT_ENTRY, &o); - if (r < 0) - return r; - } else - o = NULL; - - r = journal_file_next_entry(f, o, &o, &p); + r = move_to_next_with_matches(j, f, &o, &p); if (r < 0) return r; else if (r == 0) @@ -203,14 +300,7 @@ int sd_journal_next(sd_journal *j) { if (j->current_file == f) continue; - if (f->current_offset > 0) { - r = journal_file_move_to_object(f, f->current_offset, OBJECT_ENTRY, &o); - if (r < 0) - return r; - } else - o = NULL; - - r = journal_file_next_entry(f, o, &o, &p); + r = move_to_next_with_matches(j, f, &o, &p); if (r < 0) return r; else if (r == 0) @@ -532,7 +622,7 @@ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret) { if (f->current_offset <= 0) return 0; - r = sd_id128_get_machine(&id); + r = sd_id128_get_boot(&id); if (r < 0) return r; @@ -578,14 +668,18 @@ int sd_journal_get_field(sd_journal *j, const char *field, const void **data, si n = journal_file_entry_n_items(o); for (i = 0; i < n; i++) { - uint64_t p, l; + uint64_t p, l, h; size_t t; p = le64toh(o->entry.items[i].object_offset); + h = o->entry.items[j->current_field].hash; r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); if (r < 0) return r; + if (h != o->data.hash) + return -EBADMSG; + l = le64toh(o->object.size) - offsetof(Object, data.payload); if (l >= field_length+1 && @@ -613,7 +707,7 @@ int sd_journal_get_field(sd_journal *j, const char *field, const void **data, si int sd_journal_iterate_fields(sd_journal *j, const void **data, size_t *size) { JournalFile *f; - uint64_t p, l, n; + uint64_t p, l, n, h; size_t t; int r; Object *o; @@ -638,10 +732,14 @@ int sd_journal_iterate_fields(sd_journal *j, const void **data, size_t *size) { return 0; p = le64toh(o->entry.items[j->current_field].object_offset); + h = o->entry.items[j->current_field].hash; r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); if (r < 0) return r; + if (h != o->data.hash) + return -EBADMSG; + l = le64toh(o->object.size) - offsetof(Object, data.payload); t = (size_t) l; -- cgit v1.2.3-54-g00ecf From e892bd1797e99c8afaa6afdbe413fda78937c9c3 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sat, 15 Oct 2011 01:38:44 +0200 Subject: journal: unify code for up and for down traversal --- src/journal/journal-file.c | 68 ++++++------------------------- src/journal/journal-file.h | 11 ++++-- src/journal/sd-journal.c | 99 +++++++++++----------------------------------- src/journal/test-journal.c | 18 ++++----- 4 files changed, 52 insertions(+), 144 deletions(-) diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 5557028147..ab45132f0f 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -892,19 +892,23 @@ int journal_file_move_to_entry(JournalFile *f, uint64_t seqnum, Object **ret, ui return 0; } -int journal_file_next_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset) { +int journal_file_next_entry(JournalFile *f, Object *o, direction_t direction, Object **ret, uint64_t *offset) { uint64_t np; int r; assert(f); if (!o) - np = le64toh(f->header->head_entry_offset); + np = le64toh(direction == DIRECTION_DOWN ? + f->header->head_entry_offset : + f->header->tail_entry_offset); else { if (le64toh(o->object.type) != OBJECT_ENTRY) return -EINVAL; - np = le64toh(o->entry.next_entry_offset); + np = le64toh(direction == DIRECTION_DOWN ? + o->entry.next_entry_offset : + o->entry.prev_entry_offset); } if (np == 0) @@ -954,7 +958,7 @@ int journal_file_prev_entry(JournalFile *f, Object *o, Object **ret, uint64_t *o return 1; } -int journal_file_find_first_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { +int journal_file_find_first_entry(JournalFile *f, const void *data, uint64_t size, direction_t direction, Object **ret, uint64_t *offset) { uint64_t p, osize, hash, h; int r; @@ -980,59 +984,13 @@ int journal_file_find_first_entry(JournalFile *f, const void *data, uint64_t siz if (le64toh(o->data.hash) != hash) return -EBADMSG; - if (o->data.head_entry_offset == 0) - return 0; - - p = le64toh(o->data.head_entry_offset); - r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o); - if (r < 0) - return r; - - if (ret) - *ret = o; - - if (offset) - *offset = p; - - return 1; - } - - p = le64toh(o->data.next_hash_offset); - } - - return 0; -} - -int journal_file_find_last_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { - uint64_t p, osize, hash, h; - int r; + p = le64toh(direction == DIRECTION_DOWN ? + o->data.head_entry_offset : + o->data.tail_entry_offset); - assert(f); - assert(data || size == 0); - - osize = offsetof(Object, data.payload) + size; - - hash = hash64(data, size); - h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); - p = le64toh(f->hash_table[h].tail_hash_offset); - - while (p != 0) { - Object *o; - - r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); - if (r < 0) - return r; - - if (le64toh(o->object.size) == osize && - memcmp(o->data.payload, data, size) == 0) { - - if (le64toh(o->data.hash) != hash) - return -EBADMSG; - - if (o->data.tail_entry_offset == 0) + if (p == 0) return 0; - p = le64toh(o->data.tail_entry_offset); r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o); if (r < 0) return r; @@ -1046,7 +1004,7 @@ int journal_file_find_last_entry(JournalFile *f, const void *data, uint64_t size return 1; } - p = le64toh(o->data.prev_hash_offset); + p = le64toh(o->data.next_hash_offset); } return 0; diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h index 0294555cab..795a446dc8 100644 --- a/src/journal/journal-file.h +++ b/src/journal/journal-file.h @@ -54,6 +54,11 @@ typedef struct JournalFile { uint64_t current_offset; } JournalFile; +typedef enum direction { + DIRECTION_UP, + DIRECTION_DOWN +} direction_t; + int journal_file_open(const char *fname, int flags, mode_t mode, JournalFile *template, JournalFile **ret); void journal_file_close(JournalFile *j); @@ -66,11 +71,9 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st int journal_file_move_to_entry(JournalFile *f, uint64_t seqnum, Object **ret, uint64_t *offset); -int journal_file_find_first_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset); -int journal_file_find_last_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset); +int journal_file_find_first_entry(JournalFile *f, const void *data, uint64_t size, direction_t direction, Object **ret, uint64_t *offset); -int journal_file_next_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset); -int journal_file_prev_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset); +int journal_file_next_entry(JournalFile *f, Object *o, direction_t direction, Object **ret, uint64_t *offset); void journal_file_dump(JournalFile *f); diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index bc6d255662..985e3210ce 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -162,7 +162,7 @@ static int compare_order(JournalFile *af, Object *ao, uint64_t ap, return 0; } -static int move_to_next_with_matches(sd_journal *j, JournalFile *f, Object **o, uint64_t *p) { +static int move_to_next_with_matches(sd_journal *j, JournalFile *f, direction_t direction, Object **o, uint64_t *p) { int r; uint64_t cp; Object *c; @@ -182,7 +182,7 @@ static int move_to_next_with_matches(sd_journal *j, JournalFile *f, Object **o, } else c = NULL; - return journal_file_next_entry(f, c, o, p); + return journal_file_next_entry(f, c, direction, o, p); } /* So there are matches we have to adhere to, let's find the @@ -191,7 +191,7 @@ static int move_to_next_with_matches(sd_journal *j, JournalFile *f, Object **o, if (f->current_offset > 0) cp = f->current_offset; else { - r = journal_file_find_first_entry(f, j->matches->data, j->matches->size, &c, &cp); + r = journal_file_find_first_entry(f, j->matches->data, j->matches->size, direction, &c, &cp); if (r <= 0) return r; @@ -237,9 +237,18 @@ static int move_to_next_with_matches(sd_journal *j, JournalFile *f, Object **o, /* Hmm, so, this field matched, let's remember * where we'd have to try next, in case the other * matches are not OK */ - q = le64toh(c->entry.items[k].next_entry_offset); - if (q > np) - np = q; + + if (direction == DIRECTION_DOWN) { + q = le64toh(c->entry.items[k].next_entry_offset); + + if (q > np) + np = q; + } else { + q = le64toh(c->entry.items[k].prev_entry_offset); + + if (q != 0 && (np == 0 || q < np)) + np = q; + } } /* Did this entry match against all matches? */ @@ -259,7 +268,7 @@ static int move_to_next_with_matches(sd_journal *j, JournalFile *f, Object **o, } } -int sd_journal_next(sd_journal *j) { +static int real_journal_next(sd_journal *j, direction_t direction) { JournalFile *f, *new_current = NULL; Iterator i; int r; @@ -272,7 +281,7 @@ int sd_journal_next(sd_journal *j) { Object *o; uint64_t p; - r = move_to_next_with_matches(j, f, &o, &p); + r = move_to_next_with_matches(j, f, direction, &o, &p); if (r < 0) return r; else if (r == 0) @@ -300,7 +309,7 @@ int sd_journal_next(sd_journal *j) { if (j->current_file == f) continue; - r = move_to_next_with_matches(j, f, &o, &p); + r = move_to_next_with_matches(j, f, direction, &o, &p); if (r < 0) return r; else if (r == 0) @@ -316,74 +325,12 @@ int sd_journal_next(sd_journal *j) { return 0; } -int sd_journal_previous(sd_journal *j) { - JournalFile *f, *new_current = NULL; - Iterator i; - int r; - uint64_t new_offset = 0; - Object *new_entry = NULL; - - assert(j); - - HASHMAP_FOREACH(f, j->files, i) { - Object *o; - uint64_t p; - - if (f->current_offset > 0) { - r = journal_file_move_to_object(f, f->current_offset, OBJECT_ENTRY, &o); - if (r < 0) - return r; - } else - o = NULL; - - r = journal_file_prev_entry(f, o, &o, &p); - if (r < 0) - return r; - else if (r == 0) - continue; - - if (!new_current || compare_order(new_current, new_entry, new_offset, f, o, p) > 0) { - new_current = f; - new_entry = o; - new_offset = p; - } - } - - if (new_current) { - j->current_file = new_current; - j->current_file->current_offset = new_offset; - j->current_field = 0; - - /* Skip over any identical entries in the other files too */ - - HASHMAP_FOREACH(f, j->files, i) { - Object *o; - uint64_t p; - - if (j->current_file == f) - continue; - - if (f->current_offset > 0) { - r = journal_file_move_to_object(f, f->current_offset, OBJECT_ENTRY, &o); - if (r < 0) - return r; - } else - o = NULL; - - r = journal_file_prev_entry(f, o, &o, &p); - if (r < 0) - return r; - else if (r == 0) - continue; - - if (compare_order(new_current, new_entry, new_offset, f, o, p) == 0) - f->current_offset = p; - } - - return 1; - } +int sd_journal_next(sd_journal *j) { + return real_journal_next(j, DIRECTION_DOWN); +} - return 0; +int sd_journal_previous(sd_journal *j) { + return real_journal_next(j, DIRECTION_UP); } int sd_journal_get_cursor(sd_journal *j, char **cursor) { diff --git a/src/journal/test-journal.c b/src/journal/test-journal.c index 3b67f1aa9f..93e2b4dab4 100644 --- a/src/journal/test-journal.c +++ b/src/journal/test-journal.c @@ -54,30 +54,30 @@ int main(int argc, char *argv[]) { journal_file_dump(f); - assert(journal_file_next_entry(f, NULL, &o, NULL) == 1); + assert(journal_file_next_entry(f, NULL, DIRECTION_DOWN, &o, NULL) == 1); assert(le64toh(o->entry.seqnum) == 1); - assert(journal_file_next_entry(f, o, &o, NULL) == 1); + assert(journal_file_next_entry(f, o, DIRECTION_DOWN, &o, NULL) == 1); assert(le64toh(o->entry.seqnum) == 2); - assert(journal_file_next_entry(f, o, &o, NULL) == 1); + assert(journal_file_next_entry(f, o, DIRECTION_DOWN, &o, NULL) == 1); assert(le64toh(o->entry.seqnum) == 3); - assert(journal_file_next_entry(f, o, &o, NULL) == 0); + assert(journal_file_next_entry(f, o, DIRECTION_DOWN, &o, NULL) == 0); - assert(journal_file_find_first_entry(f, test, strlen(test), &o, NULL) == 1); + assert(journal_file_find_first_entry(f, test, strlen(test), DIRECTION_DOWN, &o, NULL) == 1); assert(le64toh(o->entry.seqnum) == 1); - assert(journal_file_find_last_entry(f, test, strlen(test), &o, NULL) == 1); + assert(journal_file_find_first_entry(f, test, strlen(test), DIRECTION_UP, &o, NULL) == 1); assert(le64toh(o->entry.seqnum) == 3); - assert(journal_file_find_last_entry(f, test2, strlen(test2), &o, NULL) == 1); + assert(journal_file_find_first_entry(f, test2, strlen(test2), DIRECTION_UP, &o, NULL) == 1); assert(le64toh(o->entry.seqnum) == 2); - assert(journal_file_find_first_entry(f, test2, strlen(test2), &o, NULL) == 1); + assert(journal_file_find_first_entry(f, test2, strlen(test2), DIRECTION_DOWN, &o, NULL) == 1); assert(le64toh(o->entry.seqnum) == 2); - assert(journal_file_find_first_entry(f, "quux", 4, &o, NULL) == 0); + assert(journal_file_find_first_entry(f, "quux", 4, DIRECTION_DOWN, &o, NULL) == 0); assert(journal_file_move_to_entry(f, 1, &o, NULL) == 1); assert(le64toh(o->entry.seqnum) == 1); -- cgit v1.2.3-54-g00ecf From 8725d60ae4f7a8471aa8a0207fa105e335d069a6 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sat, 15 Oct 2011 02:45:58 +0200 Subject: journal: implement seek to head/tail --- src/journal/journalctl.c | 8 +++++--- src/journal/sd-journal.c | 31 +++++++++++++++++++++++++------ src/journal/sd-journal.h | 43 +++++++++++++++++++++++++++++++------------ 3 files changed, 61 insertions(+), 21 deletions(-) diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index 3475b9d43c..3e5ee1af96 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -54,7 +54,8 @@ int main(int argc, char *argv[]) { } } - SD_JOURNAL_FOREACH(j) { + SD_JOURNAL_FOREACH_BEGIN(j) { + const void *data; size_t length; char *cursor; @@ -76,9 +77,10 @@ int main(int argc, char *argv[]) { (unsigned long long) realtime, (unsigned long long) monotonic); - SD_JOURNAL_FOREACH_FIELD(j, data, length) + SD_JOURNAL_FOREACH_DATA(j, data, length) printf("\t%.*s\n", (int) length, (const char*) data); - } + + } SD_JOURNAL_FOREACH_END(j); finish: if (j) diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 985e3210ce..6a68275e9e 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -585,7 +585,7 @@ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret) { } -int sd_journal_get_field(sd_journal *j, const char *field, const void **data, size_t *size) { +int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) { JournalFile *f; uint64_t i, n; size_t field_length; @@ -652,7 +652,7 @@ int sd_journal_get_field(sd_journal *j, const char *field, const void **data, si return 0; } -int sd_journal_iterate_fields(sd_journal *j, const void **data, size_t *size) { +int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) { JournalFile *f; uint64_t p, l, n, h; size_t t; @@ -702,12 +702,31 @@ int sd_journal_iterate_fields(sd_journal *j, const void **data, size_t *size) { return 1; } -int sd_journal_seek_head(sd_journal *j) { +void sd_journal_start_data(sd_journal *j) { assert(j); - return -EINVAL; + + j->current_field = 0; } -int sd_journal_seek_tail(sd_journal *j) { +static int real_journal_seek_head(sd_journal *j, direction_t direction) { + Iterator i; + JournalFile *f; + assert(j); - return -EINVAL; + + j->current_file = NULL; + j->current_field = 0; + + HASHMAP_FOREACH(f, j->files, i) + f->current_offset = 0; + + return real_journal_next(j, direction); +} + +int sd_journal_seek_head(sd_journal *j) { + return real_journal_seek_head(j, DIRECTION_DOWN); +} + +int sd_journal_seek_tail(sd_journal *j) { + return real_journal_seek_head(j, DIRECTION_UP); } diff --git a/src/journal/sd-journal.h b/src/journal/sd-journal.h index 6b451b5765..bf6673453d 100644 --- a/src/journal/sd-journal.h +++ b/src/journal/sd-journal.h @@ -49,8 +49,9 @@ int sd_journal_next(sd_journal *j); int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret); int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret); -int sd_journal_get_field(sd_journal *j, const char *field, const void **data, size_t *l); -int sd_journal_iterate_fields(sd_journal *j, const void **data, size_t *l); +int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *l); +int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *l); +void sd_journal_start_data(sd_journal *j); int sd_journal_add_match(sd_journal *j, const void *data, size_t size); void sd_journal_flush_matches(sd_journal *j); @@ -58,13 +59,16 @@ void sd_journal_flush_matches(sd_journal *j); int sd_journal_seek_head(sd_journal *j); int sd_journal_seek_tail(sd_journal *j); -int sd_journal_seek_monotonic_usec(sd_journal *j, uint64_t usec); -int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec); +int sd_journal_seek_monotonic_usec(sd_journal *j, uint64_t usec); /* missing */ +int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec); /* missing */ int sd_journal_get_cursor(sd_journal *j, char **cursor); -int sd_journal_set_cursor(sd_journal *j, const char *cursor); +int sd_journal_set_cursor(sd_journal *j, const char *cursor); /* missing */ -int sd_journal_get_fd(sd_journal *j); +int sd_journal_unique_seek(sd_journal *j, const char *field); /* missing */ +int sd_journal_unique_enumerate(sd_journal *j, const void **data, size_t *l); /* missing */ + +int sd_journal_get_fd(sd_journal *j); /* missing */ enum { SD_JOURNAL_NOP, @@ -74,13 +78,28 @@ enum { int sd_journal_process(sd_journal *j); -#define SD_JOURNAL_FOREACH(j) \ - while (sd_journal_next(j) > 0) +#define SD_JOURNAL_FOREACH_BEGIN(j) \ + if (sd_journal_seek_head(j) > 0) do { + +#define SD_JOURNAL_FOREACH_END(j) \ + } while (sd_journal_next(j) > 0) + +#define SD_JOURNAL_FOREACH_CONTINUE(j) \ + do { + +#define SD_JOURNAL_FOREACH_BACKWARDS_BEGIN(j) \ + if (sd_journal_seek_tail(j) > 0) do { + +#define SD_JOURNAL_FOREACH_BACKWARDS_END(j) \ + } while (sd_journal_previous(j) > 0) + +#define SD_JOURNAL_FOREACH_BACKWARDS_CONTINUE(j) \ + do { -#define SD_JOURNAL_FOREACH_BACKWARDS(j) \ - while (sd_journal_previous(j) > 0) +#define SD_JOURNAL_FOREACH_DATA(j, data, l) \ + for (sd_journal_start_data(j); sd_journal_enumerate_data((j), &(data), &(l)) > 0; ) -#define SD_JOURNAL_FOREACH_FIELD(j, data, l) \ - while (sd_journal_iterate_fields((j), &(data), &(l)) > 0) +#define SD_JOURNAL_FOREACH_UNIQUE(j, data, l) \ + while (sd_journal_enumerate_unique_data((j), &(data), &(l)) > 0) #endif -- cgit v1.2.3-54-g00ecf From 8f9b6cd9eb049b00b1e9e669d0e35aa415dc8fb0 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sat, 15 Oct 2011 02:53:04 +0200 Subject: journal: automaticall reset location when the set of matches changes --- src/journal/sd-journal.c | 40 +++++++++++++++++++++++++--------------- src/journal/sd-journal.h | 2 +- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 6a68275e9e..5d518a3870 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -50,6 +50,19 @@ struct sd_journal { unsigned n_matches; }; +static void reset_location(sd_journal *j) { + Iterator i; + JournalFile *f; + + assert(j); + + j->current_file = NULL; + j->current_field = 0; + + HASHMAP_FOREACH(f, j->files, i) + f->current_offset = 0; +} + int sd_journal_add_match(sd_journal *j, const void *data, size_t size) { Match *m; @@ -78,6 +91,8 @@ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) { LIST_PREPEND(Match, matches, j->matches, m); j->n_matches ++; + reset_location(j); + return 0; } @@ -93,6 +108,8 @@ void sd_journal_flush_matches(sd_journal *j) { } j->n_matches = 0; + + reset_location(j); } static int compare_order(JournalFile *af, Object *ao, uint64_t ap, @@ -708,25 +725,18 @@ void sd_journal_start_data(sd_journal *j) { j->current_field = 0; } -static int real_journal_seek_head(sd_journal *j, direction_t direction) { - Iterator i; - JournalFile *f; - +int sd_journal_seek_head(sd_journal *j) { assert(j); - j->current_file = NULL; - j->current_field = 0; - - HASHMAP_FOREACH(f, j->files, i) - f->current_offset = 0; + reset_location(j); - return real_journal_next(j, direction); -} - -int sd_journal_seek_head(sd_journal *j) { - return real_journal_seek_head(j, DIRECTION_DOWN); + return real_journal_next(j, DIRECTION_DOWN); } int sd_journal_seek_tail(sd_journal *j) { - return real_journal_seek_head(j, DIRECTION_UP); + assert(j); + + reset_location(j); + + return real_journal_next(j, DIRECTION_UP); } diff --git a/src/journal/sd-journal.h b/src/journal/sd-journal.h index bf6673453d..9978ca9ac0 100644 --- a/src/journal/sd-journal.h +++ b/src/journal/sd-journal.h @@ -76,7 +76,7 @@ enum { SD_JOURNAL_DROP }; -int sd_journal_process(sd_journal *j); +int sd_journal_process(sd_journal *j); /* missing */ #define SD_JOURNAL_FOREACH_BEGIN(j) \ if (sd_journal_seek_head(j) > 0) do { -- cgit v1.2.3-54-g00ecf From 9b3c575ed90bb1165a192dfae2fb2330baab583c Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 8 Nov 2011 18:18:48 +0100 Subject: macro: fix ALIGN_TO macro definition --- src/macro.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/macro.h b/src/macro.h index e7a4d2cde1..3f30aa7892 100644 --- a/src/macro.h +++ b/src/macro.h @@ -149,7 +149,7 @@ static inline size_t ALIGN_TO(size_t l, size_t ali) { char *_s = (char *)(s); \ _i->iov_base = _s; \ _i->iov_len = strlen(_s); \ - } while(false); + } while(false) static inline size_t IOVEC_TOTAL_SIZE(const struct iovec *i, unsigned n) { unsigned j; -- cgit v1.2.3-54-g00ecf From de190aef08bb267b645205a747762df573b36834 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 8 Nov 2011 18:20:03 +0100 Subject: journal: implement multiple field matches --- src/journal/journal-def.h | 66 +-- src/journal/journal-file.c | 1134 ++++++++++++++++++++++++++++++++------------ src/journal/journal-file.h | 51 +- src/journal/journalctl.c | 6 +- src/journal/journald.c | 34 +- src/journal/sd-journal.c | 801 +++++++++++++++++++++++++------ src/journal/sd-journal.h | 61 ++- src/journal/test-journal.c | 41 +- src/journal/wjournal.c | 57 --- src/journal/wjournal.h | 39 -- 10 files changed, 1642 insertions(+), 648 deletions(-) delete mode 100644 src/journal/wjournal.c delete mode 100644 src/journal/wjournal.h diff --git a/src/journal/journal-def.h b/src/journal/journal-def.h index 5cb1e6d9c8..1a63ca1f8d 100644 --- a/src/journal/journal-def.h +++ b/src/journal/journal-def.h @@ -31,9 +31,10 @@ typedef struct Header Header; typedef struct ObjectHeader ObjectHeader; typedef union Object Object; typedef struct DataObject DataObject; +typedef struct FieldObject FieldObject; typedef struct EntryObject EntryObject; typedef struct HashTableObject HashTableObject; -typedef struct BisectTableObject BisectTableObject; +typedef struct EntryArrayObject EntryArrayObject; typedef struct EntryItem EntryItem; typedef struct HashItem HashItem; @@ -41,9 +42,12 @@ typedef struct HashItem HashItem; enum { OBJECT_UNUSED, OBJECT_DATA, + OBJECT_FIELD, OBJECT_ENTRY, - OBJECT_HASH_TABLE, - OBJECT_BISECT_TABLE + OBJECT_DATA_HASH_TABLE, + OBJECT_FIELD_HASH_TABLE, + OBJECT_ENTRY_ARRAY, + _OBJECT_TYPE_MAX }; _packed_ struct ObjectHeader { @@ -56,18 +60,26 @@ _packed_ struct ObjectHeader { _packed_ struct DataObject { ObjectHeader object; uint64_t hash; - uint64_t head_entry_offset; - uint64_t tail_entry_offset; - uint64_t prev_hash_offset; uint64_t next_hash_offset; + uint64_t next_field_offset; + uint64_t entry_offset; /* the first array entry we store inline */ + uint64_t entry_array_offset; + uint64_t n_entries; + uint8_t payload[]; +}; + +_packed_ struct FieldObject { + ObjectHeader object; + uint64_t hash; + uint64_t next_hash_offset; + uint64_t head_data_offset; + uint64_t tail_data_offset; uint8_t payload[]; }; _packed_ struct EntryItem { uint64_t object_offset; uint64_t hash; - uint64_t prev_entry_offset; - uint64_t next_entry_offset; }; _packed_ struct EntryObject { @@ -77,8 +89,6 @@ _packed_ struct EntryObject { uint64_t monotonic; sd_id128_t boot_id; uint64_t xor_hash; - uint64_t prev_entry_offset; - uint64_t next_entry_offset; EntryItem items[]; }; @@ -89,20 +99,22 @@ _packed_ struct HashItem { _packed_ struct HashTableObject { ObjectHeader object; - HashItem table[]; + HashItem items[]; }; -_packed_ struct BisectTableObject { +_packed_ struct EntryArrayObject { ObjectHeader object; - uint64_t table[]; + uint64_t next_entry_array_offset; + uint64_t items[]; }; union Object { ObjectHeader object; DataObject data; + FieldObject field; EntryObject entry; HashTableObject hash_table; - BisectTableObject bisect_table; + EntryArrayObject entry_array; }; enum { @@ -115,30 +127,30 @@ _packed_ struct Header { uint8_t signature[8]; /* "LPKSHHRH" */ uint32_t compatible_flags; uint32_t incompatible_flags; - uint32_t state; - uint8_t reserved[4]; + uint8_t state; + uint8_t reserved[7]; sd_id128_t file_id; sd_id128_t machine_id; sd_id128_t boot_id; sd_id128_t seqnum_id; uint64_t arena_offset; uint64_t arena_size; - uint64_t arena_max_size; - uint64_t arena_min_size; - uint64_t arena_keep_free; - uint64_t hash_table_offset; /* for looking up data objects */ - uint64_t hash_table_size; - uint64_t bisect_table_offset; /* for looking up entry objects */ - uint64_t bisect_table_size; - uint64_t head_object_offset; + uint64_t arena_max_size; /* obsolete */ + uint64_t arena_min_size; /* obsolete */ + uint64_t arena_keep_free; /* obsolete */ + uint64_t data_hash_table_offset; /* for looking up data objects */ + uint64_t data_hash_table_size; + uint64_t field_hash_table_offset; /* for looking up field objects */ + uint64_t field_hash_table_size; uint64_t tail_object_offset; - uint64_t head_entry_offset; - uint64_t tail_entry_offset; - uint64_t last_bisect_offset; uint64_t n_objects; + uint64_t n_entries; uint64_t seqnum; + uint64_t first_seqnum; + uint64_t entry_array_offset; uint64_t head_entry_realtime; uint64_t tail_entry_realtime; + uint64_t tail_entry_monotonic; }; #endif diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index ab45132f0f..427631d30a 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -37,8 +37,8 @@ #define DEFAULT_MAX_USE (16ULL*1024ULL*1024ULL*16ULL) -#define DEFAULT_HASH_TABLE_SIZE (2047ULL*16ULL) -#define DEFAULT_BISECT_TABLE_SIZE ((DEFAULT_ARENA_MAX_SIZE/(64ULL*1024ULL))*8ULL) +#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*16ULL) +#define DEFAULT_FIELD_HASH_TABLE_SIZE (2047ULL*16ULL) #define DEFAULT_WINDOW_SIZE (128ULL*1024ULL*1024ULL) @@ -47,23 +47,17 @@ static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' }; #define ALIGN64(x) (((x) + 7ULL) & ~7ULL) void journal_file_close(JournalFile *f) { - assert(f); - - if (f->header) { - if (f->writable && f->header->state == htole32(STATE_ONLINE)) - f->header->state = htole32(STATE_OFFLINE); + int t; - munmap(f->header, PAGE_ALIGN(sizeof(Header))); - } + assert(f); - if (f->hash_table_window) - munmap(f->hash_table_window, f->hash_table_window_size); + if (f->header && f->writable) + f->header->state = STATE_OFFLINE; - if (f->bisect_table_window) - munmap(f->bisect_table_window, f->bisect_table_window_size); - if (f->window) - munmap(f->window, f->window_size); + for (t = 0; t < _WINDOW_MAX; t++) + if (f->windows[t].ptr) + munmap(f->windows[t].ptr, f->windows[t].size); if (f->fd >= 0) close_nointr_nofail(f->fd); @@ -108,6 +102,7 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) { static int journal_file_refresh_header(JournalFile *f) { int r; + sd_id128_t boot_id; assert(f); @@ -115,11 +110,16 @@ static int journal_file_refresh_header(JournalFile *f) { if (r < 0) return r; - r = sd_id128_get_boot(&f->header->boot_id); + r = sd_id128_get_boot(&boot_id); if (r < 0) return r; - f->header->state = htole32(STATE_ONLINE); + if (sd_id128_equal(boot_id, f->header->boot_id)) + f->tail_entry_monotonic_valid = true; + + f->header->boot_id = boot_id; + + f->header->state = STATE_ONLINE; return 0; } @@ -147,7 +147,7 @@ static int journal_file_verify_header(JournalFile *f) { if (!sd_id128_equal(machine_id, f->header->machine_id)) return -EHOSTDOWN; - state = le32toh(f->header->state); + state = f->header->state; if (state == STATE_ONLINE) log_debug("Journal file %s is already online. Assuming unclean closing. Ignoring.", f->path); @@ -254,28 +254,33 @@ static int journal_file_map( return 0; } -static int journal_file_move_to(JournalFile *f, uint64_t offset, uint64_t size, void **ret) { +static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_t size, void **ret) { void *p; uint64_t delta; int r; + Window *w; assert(f); assert(ret); + assert(wt >= 0); + assert(wt < _WINDOW_MAX); - if (_likely_(f->window && - f->window_offset <= offset && - f->window_offset+f->window_size >= offset + size)) { + w = f->windows + wt; - *ret = (uint8_t*) f->window + (offset - f->window_offset); + if (_likely_(w->ptr && + w->offset <= offset && + w->offset + w->size >= offset + size)) { + + *ret = (uint8_t*) w->ptr + (offset - w->offset); return 0; } - if (f->window) { - if (munmap(f->window, f->window_size) < 0) + if (w->ptr) { + if (munmap(w->ptr, w->size) < 0) return -errno; - f->window = NULL; - f->window_size = f->window_offset = 0; + w->ptr = NULL; + w->size = w->offset = 0; } if (size < DEFAULT_WINDOW_SIZE) { @@ -297,8 +302,8 @@ static int journal_file_move_to(JournalFile *f, uint64_t offset, uint64_t size, r = journal_file_map(f, offset, size, - &f->window, &f->window_offset, &f->window_size, - & p); + &w->ptr, &w->offset, &w->size, + &p); if (r < 0) return r; @@ -308,26 +313,23 @@ static int journal_file_move_to(JournalFile *f, uint64_t offset, uint64_t size, } static bool verify_hash(Object *o) { - uint64_t t; + uint64_t h1, h2; assert(o); - t = le64toh(o->object.type); - if (t == OBJECT_DATA) { - uint64_t s, h1, h2; - - s = le64toh(o->object.size); - + if (o->object.type == OBJECT_DATA) { h1 = le64toh(o->data.hash); - h2 = hash64(o->data.payload, s - offsetof(Object, data.payload)); - - return h1 == h2; - } + h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload)); + } else if (o->object.type == OBJECT_FIELD) { + h1 = le64toh(o->field.hash); + h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload)); + } else + return true; - return true; + return h1 == h2; } -int journal_file_move_to_object(JournalFile *f, uint64_t offset, int type, Object **ret) { +int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) { int r; void *t; Object *o; @@ -335,8 +337,9 @@ int journal_file_move_to_object(JournalFile *f, uint64_t offset, int type, Objec assert(f); assert(ret); + assert(type < _OBJECT_TYPE_MAX); - r = journal_file_move_to(f, offset, sizeof(ObjectHeader), &t); + r = journal_file_move_to(f, type >= 0 ? type : WINDOW_UNKNOWN, offset, sizeof(ObjectHeader), &t); if (r < 0) return r; @@ -346,11 +349,11 @@ int journal_file_move_to_object(JournalFile *f, uint64_t offset, int type, Objec if (s < sizeof(ObjectHeader)) return -EBADMSG; - if (type >= 0 && le64toh(o->object.type) != type) + if (type >= 0 && o->object.type != type) return -EBADMSG; if (s > sizeof(ObjectHeader)) { - r = journal_file_move_to(f, offset, s, &t); + r = journal_file_move_to(f, o->object.type, offset, s, &t); if (r < 0) return r; @@ -372,7 +375,7 @@ static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) { r = le64toh(f->header->seqnum) + 1; if (seqnum) { - /* If an external seqno counter was passed, we update + /* If an external seqnum counter was passed, we update * both the local and the external one, and set it to * the maximum of both */ @@ -384,10 +387,13 @@ static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) { f->header->seqnum = htole64(r); + if (f->header->first_seqnum == 0) + f->header->first_seqnum = htole64(r); + return r; } -static int journal_file_append_object(JournalFile *f, uint64_t size, Object **ret, uint64_t *offset) { +static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) { int r; uint64_t p; Object *tail, *o; @@ -399,11 +405,10 @@ static int journal_file_append_object(JournalFile *f, uint64_t size, Object **re assert(ret); p = le64toh(f->header->tail_object_offset); - if (p == 0) p = le64toh(f->header->arena_offset); else { - r = journal_file_move_to_object(f, p, -1, &tail); + r = journal_file_move_to_object(f, -1, p, &tail); if (r < 0) return r; @@ -414,21 +419,17 @@ static int journal_file_append_object(JournalFile *f, uint64_t size, Object **re if (r < 0) return r; - r = journal_file_move_to(f, p, size, &t); + r = journal_file_move_to(f, type, p, size, &t); if (r < 0) return r; o = (Object*) t; zero(o->object); - o->object.type = htole64(OBJECT_UNUSED); - zero(o->object.reserved); + o->object.type = type; o->object.size = htole64(size); f->header->tail_object_offset = htole64(p); - if (f->header->head_object_offset == 0) - f->header->head_object_offset = htole64(p); - f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1); *ret = o; @@ -437,135 +438,137 @@ static int journal_file_append_object(JournalFile *f, uint64_t size, Object **re return 0; } -static int journal_file_setup_hash_table(JournalFile *f) { +static int journal_file_setup_data_hash_table(JournalFile *f) { uint64_t s, p; Object *o; int r; assert(f); - s = DEFAULT_HASH_TABLE_SIZE; - r = journal_file_append_object(f, offsetof(Object, hash_table.table) + s, &o, &p); + s = DEFAULT_DATA_HASH_TABLE_SIZE; + r = journal_file_append_object(f, + OBJECT_DATA_HASH_TABLE, + offsetof(Object, hash_table.items) + s, + &o, &p); if (r < 0) return r; - o->object.type = htole64(OBJECT_HASH_TABLE); - memset(o->hash_table.table, 0, s); + memset(o->hash_table.items, 0, s); - f->header->hash_table_offset = htole64(p + offsetof(Object, hash_table.table)); - f->header->hash_table_size = htole64(s); + f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items)); + f->header->data_hash_table_size = htole64(s); return 0; } -static int journal_file_setup_bisect_table(JournalFile *f) { +static int journal_file_setup_field_hash_table(JournalFile *f) { uint64_t s, p; Object *o; int r; assert(f); - s = DEFAULT_BISECT_TABLE_SIZE; - r = journal_file_append_object(f, offsetof(Object, bisect_table.table) + s, &o, &p); + s = DEFAULT_FIELD_HASH_TABLE_SIZE; + r = journal_file_append_object(f, + OBJECT_FIELD_HASH_TABLE, + offsetof(Object, hash_table.items) + s, + &o, &p); if (r < 0) return r; - o->object.type = htole64(OBJECT_BISECT_TABLE); - memset(o->bisect_table.table, 0, s); + memset(o->hash_table.items, 0, s); - f->header->bisect_table_offset = htole64(p + offsetof(Object, bisect_table.table)); - f->header->bisect_table_size = htole64(s); + f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items)); + f->header->field_hash_table_size = htole64(s); return 0; } -static int journal_file_map_hash_table(JournalFile *f) { +static int journal_file_map_data_hash_table(JournalFile *f) { uint64_t s, p; void *t; int r; assert(f); - p = le64toh(f->header->hash_table_offset); - s = le64toh(f->header->hash_table_size); + p = le64toh(f->header->data_hash_table_offset); + s = le64toh(f->header->data_hash_table_size); - r = journal_file_map(f, - p, s, - &f->hash_table_window, NULL, &f->hash_table_window_size, - &t); + r = journal_file_move_to(f, + WINDOW_DATA_HASH_TABLE, + p, s, + &t); if (r < 0) return r; - f->hash_table = t; + f->data_hash_table = t; return 0; } -static int journal_file_map_bisect_table(JournalFile *f) { +static int journal_file_map_field_hash_table(JournalFile *f) { uint64_t s, p; void *t; int r; assert(f); - p = le64toh(f->header->bisect_table_offset); - s = le64toh(f->header->bisect_table_size); - - r = journal_file_map(f, - p, s, - &f->bisect_table_window, NULL, &f->bisect_table_window_size, - &t); + p = le64toh(f->header->field_hash_table_offset); + s = le64toh(f->header->field_hash_table_size); + r = journal_file_move_to(f, + WINDOW_FIELD_HASH_TABLE, + p, s, + &t); if (r < 0) return r; - f->bisect_table = t; + f->field_hash_table = t; return 0; } -static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash_index) { - uint64_t p; +static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) { + uint64_t p, h; int r; assert(f); assert(o); assert(offset > 0); - assert(o->object.type == htole64(OBJECT_DATA)); + assert(o->object.type == OBJECT_DATA); - o->data.head_entry_offset = o->data.tail_entry_offset = 0; - o->data.next_hash_offset = 0; + o->data.next_hash_offset = o->data.next_field_offset = 0; + o->data.entry_offset = o->data.entry_array_offset = 0; + o->data.n_entries = 0; - p = le64toh(f->hash_table[hash_index].tail_hash_offset); + h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)); + p = le64toh(f->data_hash_table[h].head_hash_offset); if (p == 0) { /* Only entry in the hash table is easy */ - - o->data.prev_hash_offset = 0; - f->hash_table[hash_index].head_hash_offset = htole64(offset); + f->data_hash_table[h].head_hash_offset = htole64(offset); } else { - o->data.prev_hash_offset = htole64(p); - /* Temporarily move back to the previous data object, * to patch in pointer */ - r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); + r = journal_file_move_to_object(f, OBJECT_DATA, p, &o); if (r < 0) return r; - o->data.next_hash_offset = offset; + o->data.next_hash_offset = htole64(offset); - r = journal_file_move_to_object(f, offset, OBJECT_DATA, &o); + r = journal_file_move_to_object(f, OBJECT_DATA, offset, &o); if (r < 0) return r; } - f->hash_table[hash_index].tail_hash_offset = htole64(offset); + f->data_hash_table[h].tail_hash_offset = htole64(offset); return 0; } -static int journal_file_append_data(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { - uint64_t hash, h, p, np; - uint64_t osize; - Object *o; +int journal_file_find_data_object_with_hash( + JournalFile *f, + const void *data, uint64_t size, uint64_t hash, + Object **ret, uint64_t *offset) { + uint64_t p, osize, h; int r; assert(f); @@ -573,14 +576,13 @@ static int journal_file_append_data(JournalFile *f, const void *data, uint64_t s osize = offsetof(Object, data.payload) + size; - hash = hash64(data, size); - h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); - p = le64toh(f->hash_table[h].head_hash_offset); + h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)); + p = le64toh(f->data_hash_table[h].head_hash_offset); - while (p != 0) { - /* Look for this data object in the hash table */ + while (p > 0) { + Object *o; - r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); + r = journal_file_move_to_object(f, OBJECT_DATA, p, &o); if (r < 0) return r; @@ -596,21 +598,66 @@ static int journal_file_append_data(JournalFile *f, const void *data, uint64_t s if (offset) *offset = p; - return 0; + return 1; } p = le64toh(o->data.next_hash_offset); } - r = journal_file_append_object(f, osize, &o, &np); + return 0; +} + +int journal_file_find_data_object( + JournalFile *f, + const void *data, uint64_t size, + Object **ret, uint64_t *offset) { + + uint64_t hash; + + assert(f); + assert(data || size == 0); + + hash = hash64(data, size); + + return journal_file_find_data_object_with_hash(f, + data, size, hash, + ret, offset); +} + +static int journal_file_append_data(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { + uint64_t hash, p; + uint64_t osize; + Object *o; + int r; + + assert(f); + assert(data || size == 0); + + hash = hash64(data, size); + + r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p); + if (r < 0) + return r; + else if (r > 0) { + + if (ret) + *ret = o; + + if (offset) + *offset = p; + + return 0; + } + + osize = offsetof(Object, data.payload) + size; + r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p); if (r < 0) return r; - o->object.type = htole64(OBJECT_DATA); o->data.hash = htole64(hash); memcpy(o->data.payload, data, size); - r = journal_file_link_data(f, o, np, h); + r = journal_file_link_data(f, o, p, hash); if (r < 0) return r; @@ -618,7 +665,7 @@ static int journal_file_append_data(JournalFile *f, const void *data, uint64_t s *ret = o; if (offset) - *offset = np; + *offset = p; return 0; } @@ -630,92 +677,154 @@ uint64_t journal_file_entry_n_items(Object *o) { return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem); } -static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) { - uint64_t p, q; +static uint64_t journal_file_entry_array_n_items(Object *o) { + assert(o); + assert(o->object.type == htole64(OBJECT_ENTRY_ARRAY)); + + return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t); +} + +static int link_entry_into_array(JournalFile *f, + uint64_t *first, + uint64_t *idx, + uint64_t p) { int r; + uint64_t n = 0, ap = 0, q, i, a, hidx; + Object *o; + assert(f); - assert(o); - assert(offset > 0); + assert(first); + assert(idx); + assert(p > 0); - p = le64toh(o->entry.items[i].object_offset); - if (p == 0) - return -EINVAL; + a = le64toh(*first); + i = hidx = le64toh(*idx); + while (a > 0) { + + r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o); + if (r < 0) + return r; - o->entry.items[i].next_entry_offset = 0; + n = journal_file_entry_array_n_items(o); + if (i < n) { + o->entry_array.items[i] = htole64(p); + *idx = htole64(hidx + 1); + return 0; + } - /* Move to the data object */ - r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); + i -= n; + ap = a; + a = le64toh(o->entry_array.next_entry_array_offset); + } + + if (hidx > n) + n = (hidx+1) * 2; + else + n = n * 2; + + if (n < 4) + n = 4; + + r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY, + offsetof(Object, entry_array.items) + n * sizeof(uint64_t), + &o, &q); if (r < 0) return r; - q = le64toh(o->data.tail_entry_offset); - o->data.tail_entry_offset = htole64(offset); + o->entry_array.items[i] = htole64(p); - if (q == 0) - o->data.head_entry_offset = htole64(offset); + if (ap == 0) + *first = q; else { - uint64_t n, j; - - /* Move to previous entry */ - r = journal_file_move_to_object(f, q, OBJECT_ENTRY, &o); + r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o); if (r < 0) return r; - n = journal_file_entry_n_items(o); - for (j = 0; j < n; j++) - if (le64toh(o->entry.items[j].object_offset) == p) - break; + o->entry_array.next_entry_array_offset = htole64(q); + } - if (j >= n) - return -EBADMSG; + *idx = htole64(hidx + 1); + + return 0; +} - o->entry.items[j].next_entry_offset = offset; +static int link_entry_into_array_plus_one(JournalFile *f, + uint64_t *extra, + uint64_t *first, + uint64_t *idx, + uint64_t p) { + + int r; + + assert(f); + assert(extra); + assert(first); + assert(idx); + assert(p > 0); + + if (*idx == 0) + *extra = htole64(p); + else { + uint64_t i; + + i = le64toh(*idx) - 1; + r = link_entry_into_array(f, first, &i, p); + if (r < 0) + return r; } - /* Move back to original entry */ - r = journal_file_move_to_object(f, offset, OBJECT_ENTRY, &o); + *idx = htole64(le64toh(*idx) + 1); + return 0; +} + +static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) { + uint64_t p; + int r; + assert(f); + assert(o); + assert(offset > 0); + + p = le64toh(o->entry.items[i].object_offset); + if (p == 0) + return -EINVAL; + + r = journal_file_move_to_object(f, OBJECT_DATA, p, &o); if (r < 0) return r; - o->entry.items[i].prev_entry_offset = q; - return 0; + return link_entry_into_array_plus_one(f, + &o->data.entry_offset, + &o->data.entry_array_offset, + &o->data.n_entries, + offset); } static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) { - uint64_t p, i, n, k, a, b; + uint64_t n, i; int r; assert(f); assert(o); assert(offset > 0); - assert(o->object.type == htole64(OBJECT_ENTRY)); + assert(o->object.type == OBJECT_ENTRY); /* Link up the entry itself */ - p = le64toh(f->header->tail_entry_offset); + r = link_entry_into_array(f, + &f->header->entry_array_offset, + &f->header->n_entries, + offset); + if (r < 0) + return r; - o->entry.prev_entry_offset = f->header->tail_entry_offset; - o->entry.next_entry_offset = 0; + log_error("%s %lu", f->path, (unsigned long) f->header->n_entries); - if (p == 0) { - f->header->head_entry_offset = htole64(offset); + if (f->header->head_entry_realtime == 0) f->header->head_entry_realtime = o->entry.realtime; - } else { - /* Temporarily move back to the previous entry, to - * patch in pointer */ - - r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o); - if (r < 0) - return r; - - o->entry.next_entry_offset = htole64(offset); - r = journal_file_move_to_object(f, offset, OBJECT_ENTRY, &o); - if (r < 0) - return r; - } - - f->header->tail_entry_offset = htole64(offset); f->header->tail_entry_realtime = o->entry.realtime; + f->header->tail_entry_monotonic = o->entry.monotonic; + + f->tail_entry_monotonic_valid = true; /* Link up the items */ n = journal_file_entry_n_items(o); @@ -725,18 +834,6 @@ static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) { return r; } - /* Link up the entry in the bisect table */ - n = le64toh(f->header->bisect_table_size) / sizeof(uint64_t); - k = le64toh(f->header->arena_max_size) / n; - - a = (le64toh(f->header->last_bisect_offset) + k - 1) / k; - b = offset / k; - - for (; a <= b; a++) - f->bisect_table[a] = htole64(offset); - - f->header->last_bisect_offset = htole64(offset + le64toh(o->object.size)); - return 0; } @@ -745,7 +842,7 @@ static int journal_file_append_entry_internal( const dual_timestamp *ts, uint64_t xor_hash, const EntryItem items[], unsigned n_items, - uint64_t *seqno, + uint64_t *seqnum, Object **ret, uint64_t *offset) { uint64_t np; uint64_t osize; @@ -754,18 +851,18 @@ static int journal_file_append_entry_internal( assert(f); assert(items || n_items == 0); + assert(ts); osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem)); - r = journal_file_append_object(f, osize, &o, &np); + r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np); if (r < 0) return r; - o->object.type = htole64(OBJECT_ENTRY); - o->entry.seqnum = htole64(journal_file_seqnum(f, seqno)); + o->entry.seqnum = htole64(journal_file_seqnum(f, seqnum)); memcpy(o->entry.items, items, n_items * sizeof(EntryItem)); - o->entry.realtime = htole64(ts ? ts->realtime : now(CLOCK_REALTIME)); - o->entry.monotonic = htole64(ts ? ts->monotonic : now(CLOCK_MONOTONIC)); + o->entry.realtime = htole64(ts->realtime); + o->entry.monotonic = htole64(ts->monotonic); o->entry.xor_hash = htole64(xor_hash); o->entry.boot_id = f->header->boot_id; @@ -782,15 +879,31 @@ static int journal_file_append_entry_internal( return 0; } -int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqno, Object **ret, uint64_t *offset) { +int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) { unsigned i; EntryItem *items; int r; uint64_t xor_hash = 0; + struct dual_timestamp _ts; assert(f); assert(iovec || n_iovec == 0); + if (!f->writable) + return -EPERM; + + if (!ts) { + dual_timestamp_get(&_ts); + ts = &_ts; + } + + if (f->tail_entry_monotonic_valid && + ts->monotonic < le64toh(f->header->tail_entry_monotonic)) + return -EINVAL; + + if (ts->realtime < le64toh(f->header->tail_entry_realtime)) + return -EINVAL; + items = new(EntryItem, n_iovec); if (!items) return -ENOMEM; @@ -808,7 +921,7 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st items[i].hash = o->data.hash; } - r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqno, ret, offset); + r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset); finish: free(items); @@ -816,198 +929,590 @@ finish: return r; } -int journal_file_move_to_entry(JournalFile *f, uint64_t seqnum, Object **ret, uint64_t *offset) { +static int generic_array_get(JournalFile *f, + uint64_t first, + uint64_t i, + Object **ret, uint64_t *offset) { + Object *o; - uint64_t lower, upper, p, n, k; + uint64_t p, a; int r; assert(f); - n = le64toh(f->header->bisect_table_size) / sizeof(uint64_t); - k = le64toh(f->header->arena_max_size) / n; + a = first; + while (a > 0) { + uint64_t n; - lower = 0; - upper = le64toh(f->header->last_bisect_offset)/k+1; - - while (lower < upper) { - k = (upper + lower) / 2; - p = le64toh(f->bisect_table[k]); + r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o); + if (r < 0) + return r; - if (p == 0) { - upper = k; - continue; + n = journal_file_entry_array_n_items(o); + if (i < n) { + p = le64toh(o->entry_array.items[i]); + break; } - r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o); + i -= n; + a = le64toh(o->entry_array.next_entry_array_offset); + } + + if (a <= 0 || p <= 0) + return 0; + + r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o); + if (r < 0) + return r; + + if (ret) + *ret = o; + + if (offset) + *offset = p; + + return 1; +} + +static int generic_array_get_plus_one(JournalFile *f, + uint64_t extra, + uint64_t first, + uint64_t i, + Object **ret, uint64_t *offset) { + + Object *o; + + assert(f); + + if (i == 0) { + int r; + + r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o); if (r < 0) return r; - if (o->entry.seqnum == seqnum) { - if (ret) - *ret = o; + if (ret) + *ret = o; - if (offset) - *offset = p; + if (offset) + *offset = extra; - return 1; - } else if (seqnum < o->entry.seqnum) - upper = k; - else if (seqnum > o->entry.seqnum) - lower = k+1; + return 1; } - assert(lower == upper); + return generic_array_get(f, first, i-1, ret, offset); +} - if (lower <= 0) - return 0; +enum { + TEST_FOUND, + TEST_LEFT, + TEST_RIGHT +}; - /* The object we are looking for is between - * bisect_table[lower-1] and bisect_table[lower] */ +static int generic_array_bisect(JournalFile *f, + uint64_t first, + uint64_t n, + uint64_t needle, + int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle), + direction_t direction, + Object **ret, + uint64_t *offset, + uint64_t *idx) { + + uint64_t a, p, t = 0, i = 0, last_p = 0; + bool subtract_one = false; + Object *o, *array = NULL; + int r; - p = le64toh(f->bisect_table[lower-1]); + assert(f); + assert(test_object); - for (;;) { - r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o); + a = first; + while (a > 0) { + uint64_t left, right, k, lp; + + r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array); if (r < 0) return r; - if (o->entry.seqnum == seqnum) { - if (ret) - *ret = o; + k = journal_file_entry_array_n_items(array); + right = MIN(k, n); + if (right <= 0) + return 0; - if (offset) - *offset = p; + i = right - 1; + lp = p = le64toh(array->entry_array.items[i]); + if (p <= 0) + return -EBADMSG; - return 1; + r = test_object(f, p, needle); + if (r < 0) + return r; - } if (seqnum < o->entry.seqnum) - return 0; + if (r == TEST_FOUND) + r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT; + + if (r == TEST_RIGHT) { + left = 0; + right -= 1; + for (;;) { + if (left == right) { + if (direction == DIRECTION_UP) + subtract_one = true; + + i = left; + goto found; + } + + assert(left < right); + + i = (left + right) / 2; + p = le64toh(array->entry_array.items[i]); + if (p <= 0) + return -EBADMSG; + + r = test_object(f, p, needle); + if (r < 0) + return r; - if (o->entry.next_entry_offset == 0) + if (r == TEST_FOUND) + r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT; + + if (r == TEST_RIGHT) + right = i; + else + left = i + 1; + } + } + + if (k > n) return 0; - p = le64toh(o->entry.next_entry_offset); + last_p = lp; + + n -= k; + t += k; + a = le64toh(array->entry_array.next_entry_array_offset); } return 0; + +found: + if (subtract_one && t == 0 && i == 0) + return 0; + + if (subtract_one && i == 0) + p = last_p; + else if (subtract_one) + p = le64toh(array->entry_array.items[i-1]); + else + p = le64toh(array->entry_array.items[i]); + + r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o); + if (r < 0) + return r; + + if (ret) + *ret = o; + + if (offset) + *offset = p; + + if (idx) + *idx = t + i - (subtract_one ? 1 : 0); + + return 1; } -int journal_file_next_entry(JournalFile *f, Object *o, direction_t direction, Object **ret, uint64_t *offset) { - uint64_t np; +static int generic_array_bisect_plus_one(JournalFile *f, + uint64_t extra, + uint64_t first, + uint64_t n, + uint64_t needle, + int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle), + direction_t direction, + Object **ret, + uint64_t *offset, + uint64_t *idx) { + int r; assert(f); + assert(test_object); - if (!o) - np = le64toh(direction == DIRECTION_DOWN ? - f->header->head_entry_offset : - f->header->tail_entry_offset); - else { - if (le64toh(o->object.type) != OBJECT_ENTRY) - return -EINVAL; + if (n <= 0) + return 0; - np = le64toh(direction == DIRECTION_DOWN ? - o->entry.next_entry_offset : - o->entry.prev_entry_offset); - } + /* This bisects the array in object 'first', but first checks + * an extra */ + + r = test_object(f, extra, needle); + if (r < 0) + return r; + else if (r == TEST_FOUND) { + Object *o; + + r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o); + if (r < 0) + return r; + + if (ret) + *ret = o; - if (np == 0) + if (offset) + *offset = extra; + } else if (r == TEST_RIGHT) return 0; - r = journal_file_move_to_object(f, np, OBJECT_ENTRY, &o); + r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx); + + if (r > 0) + (*idx) ++; + + return r; +} + +static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) { + Object *o; + int r; + + assert(f); + assert(p > 0); + + r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o); if (r < 0) return r; - if (ret) - *ret = o; + if (le64toh(o->entry.seqnum) == needle) + return TEST_FOUND; + else if (le64toh(o->entry.seqnum) < needle) + return TEST_LEFT; + else + return TEST_RIGHT; +} - if (offset) - *offset = np; +int journal_file_move_to_entry_by_seqnum( + JournalFile *f, + uint64_t seqnum, + direction_t direction, + Object **ret, + uint64_t *offset) { + + return generic_array_bisect(f, + le64toh(f->header->entry_array_offset), + le64toh(f->header->n_entries), + seqnum, + test_object_seqnum, + direction, + ret, offset, NULL); +} - return 1; +static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) { + Object *o; + int r; + + assert(f); + assert(p > 0); + + r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o); + if (r < 0) + return r; + + if (le64toh(o->entry.realtime) == needle) + return TEST_FOUND; + else if (le64toh(o->entry.realtime) < needle) + return TEST_LEFT; + else + return TEST_RIGHT; } -int journal_file_prev_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset) { - uint64_t np; +int journal_file_move_to_entry_by_realtime( + JournalFile *f, + uint64_t realtime, + direction_t direction, + Object **ret, + uint64_t *offset) { + + return generic_array_bisect(f, + le64toh(f->header->entry_array_offset), + le64toh(f->header->n_entries), + realtime, + test_object_realtime, + direction, + ret, offset, NULL); +} + +static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) { + Object *o; + int r; + + assert(f); + assert(p > 0); + + r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o); + if (r < 0) + return r; + + if (le64toh(o->entry.monotonic) == needle) + return TEST_FOUND; + else if (le64toh(o->entry.monotonic) < needle) + return TEST_LEFT; + else + return TEST_RIGHT; +} + +int journal_file_move_to_entry_by_monotonic( + JournalFile *f, + sd_id128_t boot_id, + uint64_t monotonic, + direction_t direction, + Object **ret, + uint64_t *offset) { + + char t[8+32+1] = "_BOOT_ID="; + Object *o; + int r; + + sd_id128_to_string(boot_id, t + 8); + + r = journal_file_find_data_object(f, t, strlen(t), &o, NULL); + if (r < 0) + return r; + else if (r == 0) + return -ENOENT; + + return generic_array_bisect_plus_one(f, + le64toh(o->data.entry_offset), + le64toh(o->data.entry_array_offset), + le64toh(o->data.n_entries), + monotonic, + test_object_monotonic, + direction, + ret, offset, NULL); +} + +static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) { + assert(f); + assert(p > 0); + + if (p == needle) + return TEST_FOUND; + else if (p < needle) + return TEST_LEFT; + else + return TEST_RIGHT; +} + +int journal_file_next_entry( + JournalFile *f, + Object *o, uint64_t p, + direction_t direction, + Object **ret, uint64_t *offset) { + + uint64_t i, n; int r; assert(f); + assert(p > 0 || !o); + + n = le64toh(f->header->n_entries); + if (n <= 0) + return 0; if (!o) - np = le64toh(f->header->tail_entry_offset); + i = direction == DIRECTION_DOWN ? 0 : n - 1; else { - if (le64toh(o->object.type) != OBJECT_ENTRY) + if (o->object.type != OBJECT_ENTRY) return -EINVAL; - np = le64toh(o->entry.prev_entry_offset); + r = generic_array_bisect(f, + le64toh(f->header->entry_array_offset), + le64toh(f->header->n_entries), + p, + test_object_offset, + DIRECTION_DOWN, + NULL, NULL, + &i); + if (r <= 0) + return r; + + if (direction == DIRECTION_DOWN) { + if (i >= n - 1) + return 0; + + i++; + } else { + if (i <= 0) + return 0; + + i--; + } } - if (np == 0) - return 0; + /* And jump to it */ + return generic_array_get(f, + le64toh(f->header->entry_array_offset), + i, + ret, offset); +} - r = journal_file_move_to_object(f, np, OBJECT_ENTRY, &o); - if (r < 0) +int journal_file_skip_entry( + JournalFile *f, + Object *o, uint64_t p, + int64_t skip, + Object **ret, uint64_t *offset) { + + uint64_t i, n; + int r; + + assert(f); + assert(o); + assert(p > 0); + + if (o->object.type != OBJECT_ENTRY) + return -EINVAL; + + r = generic_array_bisect(f, + le64toh(f->header->entry_array_offset), + le64toh(f->header->n_entries), + p, + test_object_offset, + DIRECTION_DOWN, + NULL, NULL, + &i); + if (r <= 0) return r; - if (ret) - *ret = o; + /* Calculate new index */ + if (skip < 0) { + if ((uint64_t) -skip >= i) + i = 0; + else + i = i - (uint64_t) -skip; + } else + i += (uint64_t) skip; - if (offset) - *offset = np; + n = le64toh(f->header->n_entries); + if (n <= 0) + return -EBADMSG; - return 1; + if (i >= n) + i = n-1; + + return generic_array_get(f, + le64toh(f->header->entry_array_offset), + i, + ret, offset); } -int journal_file_find_first_entry(JournalFile *f, const void *data, uint64_t size, direction_t direction, Object **ret, uint64_t *offset) { - uint64_t p, osize, hash, h; +int journal_file_next_entry_for_data( + JournalFile *f, + Object *o, uint64_t p, + uint64_t data_offset, + direction_t direction, + Object **ret, uint64_t *offset) { + + uint64_t n, i; int r; + Object *d; assert(f); - assert(data || size == 0); + assert(p > 0 || !o); - osize = offsetof(Object, data.payload) + size; + r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d); + if (r <= 0) + return r; - hash = hash64(data, size); - h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); - p = le64toh(f->hash_table[h].head_hash_offset); + n = le64toh(d->data.n_entries); + if (n <= 0) + return n; - while (p != 0) { - Object *o; + if (!o) + i = direction == DIRECTION_DOWN ? 0 : n - 1; + else { + if (o->object.type != OBJECT_ENTRY) + return -EINVAL; - r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); - if (r < 0) + r = generic_array_bisect_plus_one(f, + le64toh(d->data.entry_offset), + le64toh(d->data.entry_array_offset), + le64toh(d->data.n_entries), + p, + test_object_offset, + DIRECTION_DOWN, + NULL, NULL, + &i); + + if (r <= 0) return r; - if (le64toh(o->object.size) == osize && - memcmp(o->data.payload, data, size) == 0) { + if (direction == DIRECTION_DOWN) { + if (i >= n - 1) + return 0; - if (le64toh(o->data.hash) != hash) - return -EBADMSG; + i++; + } else { + if (i <= 0) + return 0; - p = le64toh(direction == DIRECTION_DOWN ? - o->data.head_entry_offset : - o->data.tail_entry_offset); + i--; + } - if (p == 0) - return 0; + } - r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o); - if (r < 0) - return r; + return generic_array_get_plus_one(f, + le64toh(d->data.entry_offset), + le64toh(d->data.entry_array_offset), + i, + ret, offset); +} - if (ret) - *ret = o; +int journal_file_move_to_entry_by_seqnum_for_data( + JournalFile *f, + uint64_t data_offset, + uint64_t seqnum, + direction_t direction, + Object **ret, uint64_t *offset) { - if (offset) - *offset = p; + Object *d; + int r; - return 1; - } + r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d); + if (r <= 0) + return r; - p = le64toh(o->data.next_hash_offset); - } + return generic_array_bisect_plus_one(f, + le64toh(d->data.entry_offset), + le64toh(d->data.entry_array_offset), + le64toh(d->data.n_entries), + seqnum, + test_object_seqnum, + direction, + ret, offset, NULL); +} - return 0; +int journal_file_move_to_entry_by_realtime_for_data( + JournalFile *f, + uint64_t data_offset, + uint64_t realtime, + direction_t direction, + Object **ret, uint64_t *offset) { + + Object *d; + int r; + + r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d); + if (r <= 0) + return r; + + return generic_array_bisect_plus_one(f, + le64toh(d->data.entry_offset), + le64toh(d->data.entry_array_offset), + le64toh(d->data.n_entries), + realtime, + test_object_realtime, + direction, + ret, offset, NULL); } void journal_file_dump(JournalFile *f) { @@ -1018,18 +1523,24 @@ void journal_file_dump(JournalFile *f) { assert(f); - printf("File ID: %s\n" + printf("File Path: %s\n" + "File ID: %s\n" "Machine ID: %s\n" "Boot ID: %s\n" - "Arena size: %llu\n", + "Arena size: %llu\n" + "Objects: %lu\n" + "Entries: %lu\n", + f->path, sd_id128_to_string(f->header->file_id, a), sd_id128_to_string(f->header->machine_id, b), sd_id128_to_string(f->header->boot_id, c), - (unsigned long long) le64toh(f->header->arena_size)); + (unsigned long long) le64toh(f->header->arena_size), + (unsigned long) le64toh(f->header->n_objects), + (unsigned long) le64toh(f->header->n_entries)); - p = le64toh(f->header->head_object_offset); + p = le64toh(f->header->arena_offset); while (p != 0) { - r = journal_file_move_to_object(f, p, -1, &o); + r = journal_file_move_to_object(f, -1, p, &o); if (r < 0) goto fail; @@ -1050,12 +1561,16 @@ void journal_file_dump(JournalFile *f) { (unsigned long long) le64toh(o->entry.realtime)); break; - case OBJECT_HASH_TABLE: - printf("Type: OBJECT_HASH_TABLE\n"); + case OBJECT_FIELD_HASH_TABLE: + printf("Type: OBJECT_FIELD_HASH_TABLE\n"); break; - case OBJECT_BISECT_TABLE: - printf("Type: OBJECT_BISECT_TABLE\n"); + case OBJECT_DATA_HASH_TABLE: + printf("Type: OBJECT_DATA_HASH_TABLE\n"); + break; + + case OBJECT_ENTRY_ARRAY: + printf("Type: OBJECT_ENTRY_ARRAY\n"); break; } @@ -1153,20 +1668,20 @@ int journal_file_open( if (newly_created) { - r = journal_file_setup_hash_table(f); + r = journal_file_setup_field_hash_table(f); if (r < 0) goto fail; - r = journal_file_setup_bisect_table(f); + r = journal_file_setup_data_hash_table(f); if (r < 0) goto fail; } - r = journal_file_map_hash_table(f); + r = journal_file_map_field_hash_table(f); if (r < 0) goto fail; - r = journal_file_map_bisect_table(f); + r = journal_file_map_data_hash_table(f); if (r < 0) goto fail; @@ -1381,5 +1896,8 @@ finish: free(list); + if (d) + closedir(d); + return r; } diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h index 795a446dc8..664f917bb0 100644 --- a/src/journal/journal-file.h +++ b/src/journal/journal-file.h @@ -28,6 +28,23 @@ #include "util.h" #include "sd-id128.h" +typedef struct Window { + void *ptr; + uint64_t offset; + uint64_t size; +} Window; + +enum { + WINDOW_UNKNOWN = OBJECT_UNUSED, + WINDOW_DATA = OBJECT_DATA, + WINDOW_ENTRY = OBJECT_ENTRY, + WINDOW_DATA_HASH_TABLE = OBJECT_DATA_HASH_TABLE, + WINDOW_FIELD_HASH_TABLE = OBJECT_FIELD_HASH_TABLE, + WINDOW_ENTRY_ARRAY = OBJECT_ENTRY_ARRAY, + WINDOW_HEADER, + _WINDOW_MAX +}; + typedef struct JournalFile { int fd; char *path; @@ -36,20 +53,13 @@ typedef struct JournalFile { int flags; int prot; bool writable; + bool tail_entry_monotonic_valid; Header *header; + HashItem *data_hash_table; + HashItem *field_hash_table; - HashItem *hash_table; - void *hash_table_window; - uint64_t hash_table_window_size; - - uint64_t *bisect_table; - void *bisect_table_window; - uint64_t bisect_table_window_size; - - void *window; - uint64_t window_offset; - uint64_t window_size; + Window windows[_WINDOW_MAX]; uint64_t current_offset; } JournalFile; @@ -60,20 +70,28 @@ typedef enum direction { } direction_t; int journal_file_open(const char *fname, int flags, mode_t mode, JournalFile *template, JournalFile **ret); - void journal_file_close(JournalFile *j); -int journal_file_move_to_object(JournalFile *f, uint64_t offset, int type, Object **ret); +int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret); uint64_t journal_file_entry_n_items(Object *o); int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqno, Object **ret, uint64_t *offset); -int journal_file_move_to_entry(JournalFile *f, uint64_t seqnum, Object **ret, uint64_t *offset); +int journal_file_find_data_object(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset); +int journal_file_find_data_object_with_hash(JournalFile *f, const void *data, uint64_t size, uint64_t hash, Object **ret, uint64_t *offset); -int journal_file_find_first_entry(JournalFile *f, const void *data, uint64_t size, direction_t direction, Object **ret, uint64_t *offset); +int journal_file_next_entry(JournalFile *f, Object *o, uint64_t p, direction_t direction, Object **ret, uint64_t *offset); +int journal_file_skip_entry(JournalFile *f, Object *o, uint64_t p, int64_t skip, Object **ret, uint64_t *offset); -int journal_file_next_entry(JournalFile *f, Object *o, direction_t direction, Object **ret, uint64_t *offset); +int journal_file_next_entry_for_data(JournalFile *f, Object *o, uint64_t p, uint64_t data_offset, direction_t direction, Object **ret, uint64_t *offset); + +int journal_file_move_to_entry_by_seqnum(JournalFile *f, uint64_t seqnum, direction_t direction, Object **ret, uint64_t *offset); +int journal_file_move_to_entry_by_realtime(JournalFile *f, uint64_t realtime, direction_t direction, Object **ret, uint64_t *offset); +int journal_file_move_to_entry_by_monotonic(JournalFile *f, sd_id128_t boot_id, uint64_t monotonic, direction_t direction, Object **ret, uint64_t *offset); + +int journal_file_move_to_entry_by_seqnum_for_data(JournalFile *f, uint64_t data_offset, uint64_t seqnum, direction_t direction, Object **ret, uint64_t *offset); +int journal_file_move_to_entry_by_realtime_for_data(JournalFile *f, uint64_t data_offset, uint64_t realtime, direction_t direction, Object **ret, uint64_t *offset); void journal_file_dump(JournalFile *f); @@ -81,5 +99,4 @@ int journal_file_rotate(JournalFile **f); int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free); - #endif diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index 3e5ee1af96..9220efdfec 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -54,7 +54,7 @@ int main(int argc, char *argv[]) { } } - SD_JOURNAL_FOREACH_BEGIN(j) { + SD_JOURNAL_FOREACH(j) { const void *data; size_t length; @@ -71,7 +71,7 @@ int main(int argc, char *argv[]) { free(cursor); sd_journal_get_realtime_usec(j, &realtime); - sd_journal_get_monotonic_usec(j, &monotonic); + sd_journal_get_monotonic_usec(j, &monotonic, NULL); printf("realtime: %llu\n" "monotonic: %llu\n", (unsigned long long) realtime, @@ -80,7 +80,7 @@ int main(int argc, char *argv[]) { SD_JOURNAL_FOREACH_DATA(j, data, length) printf("\t%.*s\n", (int) length, (const char*) data); - } SD_JOURNAL_FOREACH_END(j); + } finish: if (j) diff --git a/src/journal/journald.c b/src/journal/journald.c index ede314a55f..89d8bee2a2 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -144,14 +144,14 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str *audit_session = NULL, *audit_loginuid = NULL, *syslog_priority = NULL, *syslog_facility = NULL, *exe = NULL, *cgroup = NULL; - struct iovec iovec[16]; + struct iovec iovec[17]; unsigned n = 0; char idbuf[33]; sd_id128_t id; int r; char *t; int priority = LOG_USER | LOG_INFO; - uid_t loginuid = 0; + uid_t loginuid = 0, realuid = 0; JournalFile *f; parse_syslog_priority((char**) &buf, &priority); @@ -171,18 +171,20 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str uint32_t session; char *path; - if (asprintf(&pid, "PID=%lu", (unsigned long) ucred->pid) >= 0) + realuid = ucred->uid; + + if (asprintf(&pid, "_PID=%lu", (unsigned long) ucred->pid) >= 0) IOVEC_SET_STRING(iovec[n++], pid); - if (asprintf(&uid, "UID=%lu", (unsigned long) ucred->uid) >= 0) + if (asprintf(&uid, "_UID=%lu", (unsigned long) ucred->uid) >= 0) IOVEC_SET_STRING(iovec[n++], uid); - if (asprintf(&gid, "GID=%lu", (unsigned long) ucred->gid) >= 0) + if (asprintf(&gid, "_GID=%lu", (unsigned long) ucred->gid) >= 0) IOVEC_SET_STRING(iovec[n++], gid); r = get_process_comm(ucred->pid, &t); if (r >= 0) { - comm = strappend("COMM=", t); + comm = strappend("_COMM=", t); if (comm) IOVEC_SET_STRING(iovec[n++], comm); free(t); @@ -190,7 +192,7 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str r = get_process_exe(ucred->pid, &t); if (r >= 0) { - exe = strappend("EXE=", t); + exe = strappend("_EXE=", t); if (comm) IOVEC_SET_STRING(iovec[n++], exe); free(t); @@ -198,7 +200,7 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str r = get_process_cmdline(ucred->pid, LINE_MAX, false, &t); if (r >= 0) { - cmdline = strappend("CMDLINE=", t); + cmdline = strappend("_CMDLINE=", t); if (cmdline) IOVEC_SET_STRING(iovec[n++], cmdline); free(t); @@ -206,17 +208,17 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str r = audit_session_from_pid(ucred->pid, &session); if (r >= 0) - if (asprintf(&audit_session, "AUDIT_SESSION=%lu", (unsigned long) session) >= 0) + if (asprintf(&audit_session, "_AUDIT_SESSION=%lu", (unsigned long) session) >= 0) IOVEC_SET_STRING(iovec[n++], audit_session); r = audit_loginuid_from_pid(ucred->pid, &loginuid); if (r >= 0) - if (asprintf(&audit_loginuid, "AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0) + if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0) IOVEC_SET_STRING(iovec[n++], audit_loginuid); r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, ucred->pid, &path); if (r >= 0) { - cgroup = strappend("SYSTEMD_CGROUP=", path); + cgroup = strappend("_SYSTEMD_CGROUP=", path); if (cgroup) IOVEC_SET_STRING(iovec[n++], cgroup); free(path); @@ -224,7 +226,7 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str } if (tv) { - if (asprintf(&source_time, "SOURCE_REALTIME_TIMESTAMP=%llu", + if (asprintf(&source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv)) >= 0) IOVEC_SET_STRING(iovec[n++], source_time); } @@ -234,23 +236,23 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str * anyway. However, we need this indexed, too. */ r = sd_id128_get_boot(&id); if (r >= 0) - if (asprintf(&boot_id, "BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0) + if (asprintf(&boot_id, "_BOOT_ID=%s", sd_id128_to_string(id, idbuf)) >= 0) IOVEC_SET_STRING(iovec[n++], boot_id); r = sd_id128_get_machine(&id); if (r >= 0) - if (asprintf(&machine_id, "MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0) + if (asprintf(&machine_id, "_MACHINE_ID=%s", sd_id128_to_string(id, idbuf)) >= 0) IOVEC_SET_STRING(iovec[n++], machine_id); t = gethostname_malloc(); if (t) { - hostname = strappend("HOSTNAME=", t); + hostname = strappend("_HOSTNAME=", t); if (hostname) IOVEC_SET_STRING(iovec[n++], hostname); free(t); } - f = find_journal(s, loginuid); + f = find_journal(s, realuid == 0 ? 0 : loginuid); if (!f) log_warning("Dropping message, as we can't find a place to store the data."); else { diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 5d518a3870..85c57170d5 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -40,9 +40,34 @@ struct Match { LIST_FIELDS(Match, matches); }; +typedef enum location_type { + LOCATION_HEAD, + LOCATION_TAIL, + LOCATION_DISCRETE +} location_type_t; + +typedef struct Location { + location_type_t type; + + uint64_t seqnum; + sd_id128_t seqnum_id; + bool seqnum_set; + + uint64_t realtime; + bool realtime_set; + + uint64_t monotonic; + sd_id128_t boot_id; + bool monotonic_set; + + uint64_t xor_hash; + bool xor_hash_set; +} Location; + struct sd_journal { Hashmap *files; + Location current_location; JournalFile *current_file; uint64_t current_field; @@ -50,7 +75,7 @@ struct sd_journal { unsigned n_matches; }; -static void reset_location(sd_journal *j) { +static void detach_location(sd_journal *j) { Iterator i; JournalFile *f; @@ -63,8 +88,66 @@ static void reset_location(sd_journal *j) { f->current_offset = 0; } +static void reset_location(sd_journal *j) { + assert(j); + + detach_location(j); + zero(j->current_location); +} + +static void init_location(Location *l, JournalFile *f, Object *o) { + assert(l); + assert(f); + assert(o->object.type == OBJECT_ENTRY); + + l->type = LOCATION_DISCRETE; + l->seqnum = le64toh(o->entry.seqnum); + l->seqnum_id = f->header->seqnum_id; + l->realtime = le64toh(o->entry.realtime); + l->monotonic = le64toh(o->entry.monotonic); + l->boot_id = le64toh(o->entry.boot_id); + l->xor_hash = le64toh(o->entry.xor_hash); + + l->seqnum_set = l->realtime_set = l->monotonic_set = l->xor_hash_set = true; +} + +static void set_location(sd_journal *j, JournalFile *f, Object *o, uint64_t offset) { + assert(j); + assert(f); + assert(o); + + init_location(&j->current_location, f, o); + + j->current_file = f; + j->current_field = 0; + + f->current_offset = offset; +} + +static int same_field(const void *_a, size_t s, const void *_b, size_t t) { + const uint8_t *a = _a, *b = _b; + size_t j; + bool a_good = false, b_good = false, different = false; + + for (j = 0; j < s && j < t; j++) { + + if (a[j] == '=') + a_good = true; + if (b[j] == '=') + b_good = true; + if (a[j] != b[j]) + different = true; + + if (a_good && b_good) + return different ? 0 : 1; + } + + return -EINVAL; +} + int sd_journal_add_match(sd_journal *j, const void *data, size_t size) { - Match *m; + Match *m, *after = NULL; + uint64_t le_hash; assert(j); @@ -73,6 +156,23 @@ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) { assert(data); + le_hash = htole64(hash64(data, size)); + + LIST_FOREACH(matches, m, j->matches) { + int r; + + if (m->le_hash == le_hash && + m->size == size && + memcmp(m->data, data, size) == 0) + return 0; + + r = same_field(data, size, m->data, m->size); + if (r < 0) + return r; + else if (r > 0) + after = m; + } + m = new0(Match, 1); if (!m) return -ENOMEM; @@ -86,12 +186,14 @@ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) { } memcpy(m->data, data, size); - m->le_hash = hash64(m->data, size); + m->le_hash = le_hash; - LIST_PREPEND(Match, matches, j->matches, m); + /* Matches for the same fields we order adjacent to each + * other */ + LIST_INSERT_AFTER(Match, matches, j->matches, after, m); j->n_matches ++; - reset_location(j); + detach_location(j); return 0; } @@ -109,14 +211,19 @@ void sd_journal_flush_matches(sd_journal *j) { j->n_matches = 0; - reset_location(j); + detach_location(j); } -static int compare_order(JournalFile *af, Object *ao, uint64_t ap, - JournalFile *bf, Object *bo, uint64_t bp) { +static int compare_order(JournalFile *af, Object *ao, + JournalFile *bf, Object *bo) { uint64_t a, b; + assert(af); + assert(ao); + assert(bf); + assert(bo); + /* We operate on two different files here, hence we can access * two objects at the same time, which we normally can't. * @@ -179,99 +286,302 @@ static int compare_order(JournalFile *af, Object *ao, uint64_t ap, return 0; } -static int move_to_next_with_matches(sd_journal *j, JournalFile *f, direction_t direction, Object **o, uint64_t *p) { +static int compare_with_location(JournalFile *af, Object *ao, Location *l) { + uint64_t a; + + assert(af); + assert(ao); + assert(l); + assert(l->type == LOCATION_DISCRETE); + + if (l->monotonic_set && + sd_id128_equal(ao->entry.boot_id, l->boot_id) && + l->realtime_set && + le64toh(ao->entry.realtime) == l->realtime && + l->xor_hash_set && + le64toh(ao->entry.xor_hash) == l->xor_hash) + return 0; + + if (l->seqnum_set && + sd_id128_equal(af->header->seqnum_id, l->seqnum_id)) { + + a = le64toh(ao->entry.seqnum); + + if (a < l->seqnum) + return -1; + if (a > l->seqnum) + return 1; + } + + if (l->monotonic_set && + sd_id128_equal(ao->entry.boot_id, l->boot_id)) { + + a = le64toh(ao->entry.monotonic); + + if (a < l->monotonic) + return -1; + if (a > l->monotonic) + return 1; + } + + if (l->realtime_set) { + + a = le64toh(ao->entry.realtime); + + if (a < l->realtime) + return -1; + if (a > l->realtime) + return 1; + } + + if (l->xor_hash_set) { + a = le64toh(ao->entry.xor_hash); + + if (a < l->xor_hash) + return -1; + if (a > l->xor_hash) + return 1; + } + + return 0; +} + +static int find_location(sd_journal *j, JournalFile *f, direction_t direction, Object **ret, uint64_t *offset) { + Object *o = NULL; + uint64_t p = 0; int r; - uint64_t cp; - Object *c; assert(j); - assert(f); - assert(o); - assert(p); if (!j->matches) { - /* No matches is easy, just go on to the next entry */ + /* No matches is simple */ + + if (j->current_location.type == LOCATION_HEAD) + r = journal_file_next_entry(f, NULL, 0, DIRECTION_DOWN, &o, &p); + else if (j->current_location.type == LOCATION_TAIL) + r = journal_file_next_entry(f, NULL, 0, DIRECTION_UP, &o, &p); + else if (j->current_location.seqnum_set && + sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id)) + r = journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, &o, &p); + else if (j->current_location.monotonic_set) + r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, &o, &p); + else if (j->current_location.realtime_set) + r = journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, &o, &p); + else + r = journal_file_next_entry(f, NULL, 0, direction, &o, &p); + + if (r <= 0) + return r; + + } else { + Match *m, *term_match = NULL; + Object *to = NULL; + uint64_t tp = 0; + + /* We have matches, first, let's jump to the monotonic + * position if we have any, since it implies a + * match. */ - if (f->current_offset > 0) { - r = journal_file_move_to_object(f, f->current_offset, OBJECT_ENTRY, &c); - if (r < 0) + if (j->current_location.type == LOCATION_DISCRETE && + j->current_location.monotonic_set) { + + r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, &o, &p); + if (r <= 0) return r; - } else - c = NULL; + } + + LIST_FOREACH(matches, m, j->matches) { + Object *c, *d; + uint64_t cp, dp; + + r = journal_file_find_data_object_with_hash(f, m->data, m->size, m->le_hash, &d, &dp); + if (r <= 0) + return r; + + if (j->current_location.type == LOCATION_HEAD) + r = journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_DOWN, &c, &cp); + else if (j->current_location.type == LOCATION_TAIL) + r = journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_UP, &c, &cp); + else if (j->current_location.seqnum_set && + sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id)) + r = journal_file_move_to_entry_by_seqnum_for_data(f, dp, j->current_location.seqnum, direction, &c, &cp); + else if (j->current_location.realtime_set) + r = journal_file_move_to_entry_by_realtime_for_data(f, dp, j->current_location.realtime, direction, &c, &cp); + else + r = journal_file_next_entry_for_data(f, NULL, 0, dp, direction, &c, &cp); + + if (!term_match) { + term_match = m; + + if (r > 0) { + to = c; + tp = cp; + } + } else if (same_field(term_match->data, term_match->size, m->data, m->size)) { + + /* Same field as previous match... */ + if (r > 0) { + + /* Find the earliest of the OR matches */ + + if (!to || + (direction == DIRECTION_DOWN && cp < tp) || + (direction == DIRECTION_UP && cp > tp)) { + to = c; + tp = tp; + } + + } - return journal_file_next_entry(f, c, direction, o, p); + } else { + + /* Previous term is finished, did anything match? */ + if (!to) + return 0; + + /* Find the last of the AND matches */ + if (!o || + (direction == DIRECTION_DOWN && tp > p) || + (direction == DIRECTION_UP && tp < p)) { + o = to; + p = tp; + } + + term_match = m; + + if (r > 0) { + to = c; + tp = cp; + } else { + to = NULL; + tp = 0; + } + } + } + + /* Last term is finished, did anything match? */ + if (!to) + return 0; + + if (!o || + (direction == DIRECTION_DOWN && tp > p) || + (direction == DIRECTION_UP && tp < p)) { + o = to; + p = tp; + } + + if (!o) + return 0; } - /* So there are matches we have to adhere to, let's find the - * first entry that matches all of them */ + if (ret) + *ret = o; - if (f->current_offset > 0) - cp = f->current_offset; - else { - r = journal_file_find_first_entry(f, j->matches->data, j->matches->size, direction, &c, &cp); + if (offset) + *offset = p; + + return 1; +} + +static int next_with_matches(sd_journal *j, JournalFile *f, direction_t direction, Object **ret, uint64_t *offset) { + int r; + uint64_t cp; + Object *c; + + assert(j); + assert(f); + assert(ret); + assert(offset); + + c = *ret; + cp = *offset; + + if (!j->matches) { + /* No matches is easy */ + + r = journal_file_next_entry(f, c, cp, direction, &c, &cp); if (r <= 0) return r; - /* We can shortcut this if there's only one match */ - if (j->n_matches == 1) { - *o = c; - *p = cp; - return r; - } + if (ret) + *ret = c; + if (offset) + *offset = cp; + return 1; } + /* So there are matches we have to adhere to, let's find the + * first entry that matches all of them */ + for (;;) { uint64_t np, n; - bool found; - Match *m; - - r = journal_file_move_to_object(f, cp, OBJECT_ENTRY, &c); - if (r < 0) - return r; + bool found, term_result = false; + Match *m, *term_match = NULL; n = journal_file_entry_n_items(c); /* Make sure we don't match the entry we are starting * from. */ - found = f->current_offset != cp; + found = cp > *offset; np = 0; LIST_FOREACH(matches, m, j->matches) { uint64_t q, k; + /* Let's check if this is the beginning of a + * new term, i.e. has a different field prefix + * as the preceeding match. */ + if (!term_match) { + term_match = m; + term_result = false; + } else if (!same_field(term_match->data, term_match->size, m->data, m->size)) { + if (!term_result) + found = false; + + term_match = m; + term_result = false; + } + for (k = 0; k < n; k++) if (c->entry.items[k].hash == m->le_hash) break; if (k >= n) { - /* Hmm, didn't find any field that matched, so ignore - * this match. Go on with next match */ - - found = false; + /* Hmm, didn't find any field that + * matched this rule, so ignore this + * match. Go on with next match */ continue; } + term_result = true; + /* Hmm, so, this field matched, let's remember * where we'd have to try next, in case the other * matches are not OK */ - if (direction == DIRECTION_DOWN) { - q = le64toh(c->entry.items[k].next_entry_offset); - - if (q > np) - np = q; - } else { - q = le64toh(c->entry.items[k].prev_entry_offset); + r = journal_file_next_entry_for_data(f, c, cp, le64toh(c->entry.items[k].object_offset), direction, NULL, &q); + if (r > 0) { - if (q != 0 && (np == 0 || q < np)) - np = q; + if (direction == DIRECTION_DOWN) { + if (q > np) + np = q; + } else { + if (np == 0 || q < np) + np = q; + } } } + /* Check the last term */ + if (term_match && term_result) + found = true; + /* Did this entry match against all matches? */ if (found) { - *o = c; - *p = cp; + if (ret) + *ret = c; + if (offset) + *offset = cp; return 1; } @@ -285,6 +595,62 @@ static int move_to_next_with_matches(sd_journal *j, JournalFile *f, direction_t } } +static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction, Object **ret, uint64_t *offset) { + Object *c; + uint64_t cp; + int compare_value, r; + + assert(j); + assert(f); + + if (f->current_offset > 0) { + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &c); + if (r < 0) + return r; + + cp = f->current_offset; + + r = next_with_matches(j, f, direction, &c, &cp); + if (r <= 0) + return r; + + compare_value = 1; + } else { + r = find_location(j, f, direction, &c, &cp); + if (r <= 0) + return r; + + compare_value = 0; + } + + for (;;) { + bool found; + + if (j->current_location.type == LOCATION_DISCRETE) { + int k; + + k = compare_with_location(f, c, &j->current_location); + if (direction == DIRECTION_DOWN) + found = k >= compare_value; + else + found = k <= -compare_value; + } else + found = true; + + if (found) { + if (ret) + *ret = c; + if (offset) + *offset = cp; + return 1; + } + + r = next_with_matches(j, f, direction, &c, &cp); + if (r <= 0) + return r; + } +} + static int real_journal_next(sd_journal *j, direction_t direction) { JournalFile *f, *new_current = NULL; Iterator i; @@ -297,57 +663,88 @@ static int real_journal_next(sd_journal *j, direction_t direction) { HASHMAP_FOREACH(f, j->files, i) { Object *o; uint64_t p; + bool found; - r = move_to_next_with_matches(j, f, direction, &o, &p); + r = next_beyond_location(j, f, direction, &o, &p); if (r < 0) return r; else if (r == 0) continue; - if (!new_current || - compare_order(new_current, new_entry, new_offset, f, o, p) > 0) { + if (!new_current) + found = true; + else { + int k; + + k = compare_order(f, o, new_current, new_entry); + + if (direction == DIRECTION_DOWN) + found = k < 0; + else + found = k > 0; + } + + if (found) { new_current = f; new_entry = o; new_offset = p; } } - if (new_current) { - j->current_file = new_current; - j->current_file->current_offset = new_offset; - j->current_field = 0; + if (!new_current) + return 0; - /* Skip over any identical entries in the other files too */ + set_location(j, new_current, new_entry, new_offset); - HASHMAP_FOREACH(f, j->files, i) { - Object *o; - uint64_t p; + return 1; +} - if (j->current_file == f) - continue; +int sd_journal_next(sd_journal *j) { + return real_journal_next(j, DIRECTION_DOWN); +} - r = move_to_next_with_matches(j, f, direction, &o, &p); - if (r < 0) - return r; - else if (r == 0) - continue; +int sd_journal_previous(sd_journal *j) { + return real_journal_next(j, DIRECTION_UP); +} - if (compare_order(new_current, new_entry, new_offset, f, o, p) == 0) - f->current_offset = p; - } +int sd_journal_next_skip(sd_journal *j, uint64_t skip) { + int c = 0, r; - return 1; + assert(j); + + while (skip > 0) { + r = sd_journal_next(j); + if (r < 0) + return r; + + if (r == 0) + return c; + + skip--; + c++; } - return 0; + return c; } -int sd_journal_next(sd_journal *j) { - return real_journal_next(j, DIRECTION_DOWN); -} +int sd_journal_previous_skip(sd_journal *j, uint64_t skip) { + int c = 0, r; -int sd_journal_previous(sd_journal *j) { - return real_journal_next(j, DIRECTION_UP); + assert(j); + + while (skip > 0) { + r = sd_journal_previous(j); + if (r < 0) + return r; + + if (r == 0) + return c; + + skip--; + c++; + } + + return 1; } int sd_journal_get_cursor(sd_journal *j, char **cursor) { @@ -361,7 +758,7 @@ int sd_journal_get_cursor(sd_journal *j, char **cursor) { if (!j->current_file || j->current_file->current_offset <= 0) return -EADDRNOTAVAIL; - r = journal_file_move_to_object(j->current_file, j->current_file->current_offset, OBJECT_ENTRY, &o); + r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o); if (r < 0) return r; @@ -380,8 +777,150 @@ int sd_journal_get_cursor(sd_journal *j, char **cursor) { return 1; } -int sd_journal_set_cursor(sd_journal *j, const char *cursor) { - return -EINVAL; +int sd_journal_seek_cursor(sd_journal *j, const char *cursor) { + char *w; + size_t l; + char *state; + unsigned long long seqnum, monotonic, realtime, xor_hash; + bool + seqnum_id_set = false, + seqnum_set = false, + boot_id_set = false, + monotonic_set = false, + realtime_set = false, + xor_hash_set = false; + sd_id128_t seqnum_id, boot_id; + + assert(j); + assert(cursor); + + FOREACH_WORD_SEPARATOR(w, l, cursor, ";", state) { + char *item; + int k = 0; + + if (l < 2 || w[1] != '=') + return -EINVAL; + + item = strndup(w, l); + if (!item) + return -ENOMEM; + + switch (w[0]) { + + case 's': + seqnum_id_set = true; + k = sd_id128_from_string(w+2, &seqnum_id); + break; + + case 'i': + seqnum_set = true; + if (sscanf(w+2, "%llx", &seqnum) != 1) + k = -EINVAL; + break; + + case 'b': + boot_id_set = true; + k = sd_id128_from_string(w+2, &boot_id); + break; + + case 'm': + monotonic_set = true; + if (sscanf(w+2, "%llx", &monotonic) != 1) + k = -EINVAL; + break; + + case 't': + realtime_set = true; + if (sscanf(w+2, "%llx", &realtime) != 1) + k = -EINVAL; + break; + + case 'x': + xor_hash_set = true; + if (sscanf(w+2, "%llx", &xor_hash) != 1) + k = -EINVAL; + break; + } + + free(item); + + if (k < 0) + return k; + } + + if ((!seqnum_set || !seqnum_id_set) && + (!monotonic_set || !boot_id_set) && + !realtime_set) + return -EINVAL; + + reset_location(j); + + j->current_location.type = LOCATION_DISCRETE; + + if (realtime_set) { + j->current_location.realtime = (uint64_t) realtime; + j->current_location.realtime_set = true; + } + + if (seqnum_set && seqnum_id_set) { + j->current_location.seqnum = (uint64_t) seqnum; + j->current_location.seqnum_id = seqnum_id; + j->current_location.seqnum_set = true; + } + + if (monotonic_set && boot_id_set) { + j->current_location.monotonic = (uint64_t) monotonic; + j->current_location.boot_id = boot_id; + j->current_location.monotonic_set = true; + } + + if (xor_hash_set) { + j->current_location.xor_hash = (uint64_t) xor_hash; + j->current_location.xor_hash_set = true; + } + + return 0; +} + +int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) { + assert(j); + + reset_location(j); + j->current_location.type = LOCATION_DISCRETE; + j->current_location.boot_id = boot_id; + j->current_location.monotonic = usec; + j->current_location.monotonic_set = true; + + return 0; +} + +int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) { + assert(j); + + reset_location(j); + j->current_location.type = LOCATION_DISCRETE; + j->current_location.realtime = usec; + j->current_location.realtime_set = true; + + return 0; +} + +int sd_journal_seek_head(sd_journal *j) { + assert(j); + + reset_location(j); + j->current_location.type = LOCATION_HEAD; + + return 0; +} + +int sd_journal_seek_tail(sd_journal *j) { + assert(j); + + reset_location(j); + j->current_location.type = LOCATION_TAIL; + + return 0; } static int add_file(sd_journal *j, const char *prefix, const char *dir, const char *filename) { @@ -411,6 +950,9 @@ static int add_file(sd_journal *j, const char *prefix, const char *dir, const ch return r; } + journal_file_dump(f); + + r = hashmap_put(j->files, f->path, f); if (r < 0) { journal_file_close(f); @@ -557,20 +1099,20 @@ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) { f = j->current_file; if (!f) - return 0; + return -EADDRNOTAVAIL; if (f->current_offset <= 0) - return 0; + return -EADDRNOTAVAIL; - r = journal_file_move_to_object(f, f->current_offset, OBJECT_ENTRY, &o); + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); if (r < 0) return r; *ret = le64toh(o->entry.realtime); - return 1; + return 0; } -int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret) { +int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) { Object *o; JournalFile *f; int r; @@ -581,25 +1123,28 @@ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret) { f = j->current_file; if (!f) - return 0; + return -EADDRNOTAVAIL; if (f->current_offset <= 0) - return 0; + return -EADDRNOTAVAIL; - r = sd_id128_get_boot(&id); + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); if (r < 0) return r; - r = journal_file_move_to_object(f, f->current_offset, OBJECT_ENTRY, &o); - if (r < 0) - return r; + if (ret_boot_id) + *ret_boot_id = o->entry.boot_id; + else { + r = sd_id128_get_boot(&id); + if (r < 0) + return r; - if (!sd_id128_equal(id, o->entry.boot_id)) - return 0; + if (!sd_id128_equal(id, o->entry.boot_id)) + return -ENOENT; + } *ret = le64toh(o->entry.monotonic); - return 1; - + return 0; } int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) { @@ -619,12 +1164,12 @@ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, siz f = j->current_file; if (!f) - return 0; + return -EADDRNOTAVAIL; if (f->current_offset <= 0) - return 0; + return -EADDRNOTAVAIL; - r = journal_file_move_to_object(f, f->current_offset, OBJECT_ENTRY, &o); + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); if (r < 0) return r; @@ -632,16 +1177,16 @@ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, siz n = journal_file_entry_n_items(o); for (i = 0; i < n; i++) { - uint64_t p, l, h; + uint64_t p, l, le_hash; size_t t; p = le64toh(o->entry.items[i].object_offset); - h = o->entry.items[j->current_field].hash; - r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); + le_hash = o->entry.items[j->current_field].hash; + r = journal_file_move_to_object(f, OBJECT_DATA, p, &o); if (r < 0) return r; - if (h != o->data.hash) + if (le_hash != o->data.hash) return -EBADMSG; l = le64toh(o->object.size) - offsetof(Object, data.payload); @@ -658,23 +1203,23 @@ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, siz *data = o->data.payload; *size = t; - return 1; + return 0; } - r = journal_file_move_to_object(f, f->current_offset, OBJECT_ENTRY, &o); + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); if (r < 0) return r; } - return 0; + return -ENOENT; } int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) { JournalFile *f; - uint64_t p, l, n, h; - size_t t; + uint64_t p, l, n, le_hash; int r; Object *o; + size_t t; assert(j); assert(data); @@ -682,12 +1227,12 @@ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) { f = j->current_file; if (!f) - return 0; + return -EADDRNOTAVAIL; if (f->current_offset <= 0) - return 0; + return -EADDRNOTAVAIL; - r = journal_file_move_to_object(f, f->current_offset, OBJECT_ENTRY, &o); + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); if (r < 0) return r; @@ -696,12 +1241,12 @@ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) { return 0; p = le64toh(o->entry.items[j->current_field].object_offset); - h = o->entry.items[j->current_field].hash; - r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); + le_hash = o->entry.items[j->current_field].hash; + r = journal_file_move_to_object(f, OBJECT_DATA, p, &o); if (r < 0) return r; - if (h != o->data.hash) + if (le_hash != o->data.hash) return -EBADMSG; l = le64toh(o->object.size) - offsetof(Object, data.payload); @@ -719,24 +1264,8 @@ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) { return 1; } -void sd_journal_start_data(sd_journal *j) { +void sd_journal_restart_data(sd_journal *j) { assert(j); j->current_field = 0; } - -int sd_journal_seek_head(sd_journal *j) { - assert(j); - - reset_location(j); - - return real_journal_next(j, DIRECTION_DOWN); -} - -int sd_journal_seek_tail(sd_journal *j) { - assert(j); - - reset_location(j); - - return real_journal_next(j, DIRECTION_UP); -} diff --git a/src/journal/sd-journal.h b/src/journal/sd-journal.h index 9978ca9ac0..8d7e314223 100644 --- a/src/journal/sd-journal.h +++ b/src/journal/sd-journal.h @@ -25,18 +25,22 @@ #include #include +#include "sd-id128.h" + /* TODO: * * - check LE/BE conversion for 8bit, 16bit, 32bit values - * - implement parallel traversal * - implement inotify usage on client * - implement audit gateway * - implement native gateway * - implement stdout gateway - * - extend hash table/bisect table as we go + * - extend hash tables table as we go * - accelerate looking for "all hostnames" and suchlike. * - throttling * - enforce limit on open journal files in journald and journalctl + * - cryptographic hash + * - fix space reservation logic + * - comm, argv can be manipulated, should it be _COMM=, _CMDLINE= or COMM=, CMDLINE=? */ typedef struct sd_journal sd_journal; @@ -47,59 +51,52 @@ void sd_journal_close(sd_journal *j); int sd_journal_previous(sd_journal *j); int sd_journal_next(sd_journal *j); +int sd_journal_previous_skip(sd_journal *j, uint64_t skip); +int sd_journal_next_skip(sd_journal *j, uint64_t skip); + int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret); -int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret); +int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id); int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *l); int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *l); -void sd_journal_start_data(sd_journal *j); +void sd_journal_restart_data(sd_journal *j); int sd_journal_add_match(sd_journal *j, const void *data, size_t size); void sd_journal_flush_matches(sd_journal *j); int sd_journal_seek_head(sd_journal *j); int sd_journal_seek_tail(sd_journal *j); - -int sd_journal_seek_monotonic_usec(sd_journal *j, uint64_t usec); /* missing */ -int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec); /* missing */ +int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec); +int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec); +int sd_journal_seek_cursor(sd_journal *j, const char *cursor); int sd_journal_get_cursor(sd_journal *j, char **cursor); -int sd_journal_set_cursor(sd_journal *j, const char *cursor); /* missing */ -int sd_journal_unique_seek(sd_journal *j, const char *field); /* missing */ -int sd_journal_unique_enumerate(sd_journal *j, const void **data, size_t *l); /* missing */ - -int sd_journal_get_fd(sd_journal *j); /* missing */ +int sd_journal_query_unique(sd_journal *j, const char *field); /* missing */ +int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l); /* missing */ +void sd_journal_restart_unique(sd_journal *j); /* missing */ enum { SD_JOURNAL_NOP, SD_JOURNAL_APPEND, - SD_JOURNAL_DROP + SD_JOURNAL_INVALIDATE_ADD, + SD_JOURNAL_INVALIDATE_REMOVE }; +int sd_journal_get_fd(sd_journal *j); /* missing */ int sd_journal_process(sd_journal *j); /* missing */ -#define SD_JOURNAL_FOREACH_BEGIN(j) \ - if (sd_journal_seek_head(j) > 0) do { - -#define SD_JOURNAL_FOREACH_END(j) \ - } while (sd_journal_next(j) > 0) - -#define SD_JOURNAL_FOREACH_CONTINUE(j) \ - do { - -#define SD_JOURNAL_FOREACH_BACKWARDS_BEGIN(j) \ - if (sd_journal_seek_tail(j) > 0) do { - -#define SD_JOURNAL_FOREACH_BACKWARDS_END(j) \ - } while (sd_journal_previous(j) > 0) +#define SD_JOURNAL_FOREACH(j) \ + if (sd_journal_seek_head(j) >= 0) \ + while (sd_journal_next(j) > 0) \ -#define SD_JOURNAL_FOREACH_BACKWARDS_CONTINUE(j) \ - do { +#define SD_JOURNAL_FOREACH_BACKWARDS(j) \ + if (sd_journal_seek_tail(j) >= 0) \ + while (sd_journal_previous(j) > 0) \ #define SD_JOURNAL_FOREACH_DATA(j, data, l) \ - for (sd_journal_start_data(j); sd_journal_enumerate_data((j), &(data), &(l)) > 0; ) + for (sd_journal_restart_data(j); sd_journal_enumerate_data((j), &(data), &(l)) > 0; ) -#define SD_JOURNAL_FOREACH_UNIQUE(j, data, l) \ - while (sd_journal_enumerate_unique_data((j), &(data), &(l)) > 0) +#define SD_JOURNAL_FOREACH_UNIQUE(j, data, l) \ + for (sd_journal_restart_unique(j); sd_journal_enumerate_data((j), &(data), &(l)) > 0; ) #endif diff --git a/src/journal/test-journal.c b/src/journal/test-journal.c index 93e2b4dab4..45ced12b46 100644 --- a/src/journal/test-journal.c +++ b/src/journal/test-journal.c @@ -31,6 +31,7 @@ int main(int argc, char *argv[]) { struct iovec iovec; static const char test[] = "test", test2[] = "test2"; Object *o; + uint64_t p; log_set_max_level(LOG_DEBUG); @@ -54,41 +55,55 @@ int main(int argc, char *argv[]) { journal_file_dump(f); - assert(journal_file_next_entry(f, NULL, DIRECTION_DOWN, &o, NULL) == 1); + assert(journal_file_next_entry(f, NULL, 0, DIRECTION_DOWN, &o, &p) == 1); assert(le64toh(o->entry.seqnum) == 1); - assert(journal_file_next_entry(f, o, DIRECTION_DOWN, &o, NULL) == 1); + assert(journal_file_next_entry(f, o, p, DIRECTION_DOWN, &o, &p) == 1); assert(le64toh(o->entry.seqnum) == 2); - assert(journal_file_next_entry(f, o, DIRECTION_DOWN, &o, NULL) == 1); + assert(journal_file_next_entry(f, o, p, DIRECTION_DOWN, &o, &p) == 1); assert(le64toh(o->entry.seqnum) == 3); - assert(journal_file_next_entry(f, o, DIRECTION_DOWN, &o, NULL) == 0); + assert(journal_file_next_entry(f, o, p, DIRECTION_DOWN, &o, &p) == 0); - assert(journal_file_find_first_entry(f, test, strlen(test), DIRECTION_DOWN, &o, NULL) == 1); + assert(journal_file_next_entry(f, NULL, 0, DIRECTION_DOWN, &o, &p) == 1); assert(le64toh(o->entry.seqnum) == 1); - assert(journal_file_find_first_entry(f, test, strlen(test), DIRECTION_UP, &o, NULL) == 1); + assert(journal_file_skip_entry(f, o, p, 2, &o, &p) == 1); assert(le64toh(o->entry.seqnum) == 3); - assert(journal_file_find_first_entry(f, test2, strlen(test2), DIRECTION_UP, &o, NULL) == 1); + assert(journal_file_skip_entry(f, o, p, -2, &o, &p) == 1); + assert(le64toh(o->entry.seqnum) == 1); + + assert(journal_file_skip_entry(f, o, p, -2, &o, &p) == 1); + assert(le64toh(o->entry.seqnum) == 1); + + assert(journal_file_find_data_object(f, test, strlen(test), NULL, &p) == 1); + assert(journal_file_next_entry_for_data(f, NULL, 0, p, DIRECTION_DOWN, &o, NULL) == 1); + assert(le64toh(o->entry.seqnum) == 1); + + assert(journal_file_next_entry_for_data(f, NULL, 0, p, DIRECTION_UP, &o, NULL) == 1); + assert(le64toh(o->entry.seqnum) == 3); + + assert(journal_file_find_data_object(f, test2, strlen(test2), NULL, &p) == 1); + assert(journal_file_next_entry_for_data(f, NULL, 0, p, DIRECTION_UP, &o, NULL) == 1); assert(le64toh(o->entry.seqnum) == 2); - assert(journal_file_find_first_entry(f, test2, strlen(test2), DIRECTION_DOWN, &o, NULL) == 1); + assert(journal_file_next_entry_for_data(f, NULL, 0, p, DIRECTION_DOWN, &o, NULL) == 1); assert(le64toh(o->entry.seqnum) == 2); - assert(journal_file_find_first_entry(f, "quux", 4, DIRECTION_DOWN, &o, NULL) == 0); + assert(journal_file_find_data_object(f, "quux", 4, NULL, &p) == 0); - assert(journal_file_move_to_entry(f, 1, &o, NULL) == 1); + assert(journal_file_move_to_entry_by_seqnum(f, 1, DIRECTION_DOWN, &o, NULL) == 1); assert(le64toh(o->entry.seqnum) == 1); - assert(journal_file_move_to_entry(f, 3, &o, NULL) == 1); + assert(journal_file_move_to_entry_by_seqnum(f, 3, DIRECTION_DOWN, &o, NULL) == 1); assert(le64toh(o->entry.seqnum) == 3); - assert(journal_file_move_to_entry(f, 2, &o, NULL) == 1); + assert(journal_file_move_to_entry_by_seqnum(f, 2, DIRECTION_DOWN, &o, NULL) == 1); assert(le64toh(o->entry.seqnum) == 2); - assert(journal_file_move_to_entry(f, 10, &o, NULL) == 0); + assert(journal_file_move_to_entry_by_seqnum(f, 10, DIRECTION_DOWN, &o, NULL) == 0); journal_file_rotate(&f); journal_file_rotate(&f); diff --git a/src/journal/wjournal.c b/src/journal/wjournal.c deleted file mode 100644 index 3122aa054e..0000000000 --- a/src/journal/wjournal.c +++ /dev/null @@ -1,57 +0,0 @@ -/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ - -/*** - This file is part of systemd. - - Copyright 2011 Lennart Poettering - - systemd is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - systemd is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with systemd; If not, see . -***/ - -#include "wjournal.h" -#include "journal-def.h" - -struct WJournal { - int fd; - - Header *header; - HashItem *hash_table; - uint64_t *bisect_table; -}; - -int wjournal_open(const char *fn, WJournal **ret) { - assert(fn); - assert(ret); -} - -void wjournal_close(WJournal *j) { - assert(j); - - if (j->fd >= 0) - close_nointr_nofail(j->fd); - - if (j->header) { - munmap(j->header, PAGE_ALIGN(sizeof(Header))); - - } - - free(j); -} - -int wjournal_write_object_begin(WJournal *j, uint64_t type, uint64_t size, Object **ret); -int wjournal_write_object_finish(WJournal *j, Object *ret); - -int wjournal_write_field(WJournal *j, const char *buffer, uint64_t size, Object **ret); -int wjournal_write_entry(WJournal *j, const Field *fields, unsigned n_fields, Object **ret); -int wjournal_write_eof(WJournal *j); diff --git a/src/journal/wjournal.h b/src/journal/wjournal.h deleted file mode 100644 index b0250d0fe2..0000000000 --- a/src/journal/wjournal.h +++ /dev/null @@ -1,39 +0,0 @@ -/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ - -#ifndef foojournalhfoo -#define foojournalhfoo - -/*** - This file is part of systemd. - - Copyright 2011 Lennart Poettering - - systemd is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - systemd is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with systemd; If not, see . -***/ - -#include - -typedef struct WJournal WJournal; - -int wjournal_open(const char *fn, WJournal **ret); -void wjournal_close(WJournal *j); - -int wjournal_write_object_begin(WJournal *j, uint64_t type, uint64_t size, Object **ret); -int wjournal_write_object_finish(WJournal *j, Object *ret); - -int wjournal_write_field(WJournal *j, const char *buffer, uint64_t size, Object **ret); -int wjournal_write_entry(WJournal *j, const Field *fields, unsigned n_fields, Object **ret); -int wjournal_write_eof(WJournal *j); - -#endif -- cgit v1.2.3-54-g00ecf From a40593a0d0d740efa387e35411e1e456a6c5aba7 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 8 Nov 2011 19:12:29 +0100 Subject: TODO --- TODO | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/TODO b/TODO index 9149018134..b993a6b2eb 100644 --- a/TODO +++ b/TODO @@ -17,8 +17,12 @@ Bugfixes: * make polkit checks async +* properly handle .mount unit state tracking when two mount points are stacked one on top of another on the exact same mount point. + Features: +* check utf8 everywhere + * unset container= in PID1? * if we can not get user quota for tmpfs, mount a separate tmpfs instance -- cgit v1.2.3-54-g00ecf From 7f3e62571a63ac90de6ac5eefeeb8d3e9aa6f49e Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sat, 17 Dec 2011 00:56:34 +0100 Subject: journal: add native protocol to journald, and client side API to send journal messages --- Makefile.am | 1 + TODO | 4 + src/journal/journal-send.c | 196 +++++++++++++++++++++++ src/journal/journald.c | 375 ++++++++++++++++++++++++++++++++++++++------- src/journal/sd-journal.h | 12 ++ src/journal/test-journal.c | 1 + 6 files changed, 531 insertions(+), 58 deletions(-) create mode 100644 src/journal/journal-send.c diff --git a/Makefile.am b/Makefile.am index 58b3a63517..7dee4cf0a4 100644 --- a/Makefile.am +++ b/Makefile.am @@ -983,6 +983,7 @@ test_journal_SOURCES = \ src/journal/sd-journal.c \ src/journal/journal-file.c \ src/journal/lookup3.c \ + src/journal/journal-send.c \ src/sd-id128.c test_journal_CFLAGS = \ diff --git a/TODO b/TODO index 8c3034e079..5b64f048e5 100644 --- a/TODO +++ b/TODO @@ -21,6 +21,10 @@ Bugfixes: Features: +* logind: allow showing logout dialog from system + +* document that %% can be used to write % in a string that is specifier extended + * check utf8 everywhere * when an instanced service exits, remove its parent cgroup too if possible. diff --git a/src/journal/journal-send.c b/src/journal/journal-send.c new file mode 100644 index 0000000000..e2575a9805 --- /dev/null +++ b/src/journal/journal-send.c @@ -0,0 +1,196 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include +#include +#include +#include + +#include "sd-journal.h" +#include "util.h" + +/* We open a single fd, and we'll share it with the current process, + * all its threads, and all its subprocesses. This means we need to + * initialize it atomically, and need to operate on it atomically + * never assuming we are the only user */ + +static int journal_fd(void) { + int fd; + static int fd_plus_one = 0; + +retry: + if (fd_plus_one > 0) + return fd_plus_one - 1; + + fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0); + if (fd < 0) + return -errno; + + if (!__sync_bool_compare_and_swap(&fd_plus_one, 0, fd+1)) { + close_nointr_nofail(fd); + goto retry; + } + + return fd; +} + +int sd_journal_print(const char *format, ...) { + int r; + va_list ap; + + va_start(ap, format); + r = sd_journal_printv(format, ap); + va_end(ap); + + return r; +} + +int sd_journal_printv(const char *format, va_list ap) { + char buffer[8 + LINE_MAX]; + struct iovec iov; + + memcpy(buffer, "MESSAGE=", 8); + vsnprintf(buffer+8, sizeof(buffer) - 8, format, ap); + + char_array_0(buffer); + + zero(iov); + IOVEC_SET_STRING(iov, buffer); + + return sd_journal_sendv(&iov, 1); +} + +int sd_journal_send(const char *format, ...) { + int r, n = 0, i = 0, j; + va_list ap; + struct iovec *iov = NULL; + + va_start(ap, format); + while (format) { + struct iovec *c; + char *buffer; + + if (i >= n) { + n = MAX(i*2, 4); + c = realloc(iov, n * sizeof(struct iovec)); + if (!c) { + r = -ENOMEM; + goto fail; + } + + iov = c; + } + + if (vasprintf(&buffer, format, ap) < 0) { + r = -ENOMEM; + goto fail; + } + + IOVEC_SET_STRING(iov[i++], buffer); + + format = va_arg(ap, char *); + } + va_end(ap); + + r = sd_journal_sendv(iov, i); + +fail: + for (j = 0; j < i; j++) + free(iov[j].iov_base); + + free(iov); + + return r; +} + +int sd_journal_sendv(const struct iovec *iov, int n) { + int fd; + struct iovec *w; + uint64_t *l; + int i, j = 0; + struct msghdr mh; + struct sockaddr_un sa; + + if (!iov || n <= 0) + return -EINVAL; + + w = alloca(sizeof(struct iovec) * n * 5); + l = alloca(sizeof(uint64_t) * n); + + for (i = 0; i < n; i++) { + char *c, *nl; + + c = memchr(iov[i].iov_base, '=', iov[i].iov_len); + if (!c) + return -EINVAL; + + nl = memchr(iov[i].iov_base, '\n', iov[i].iov_len); + if (nl) { + if (nl < c) + return -EINVAL; + + /* Already includes a newline? Bummer, then + * let's write the variable name, then a + * newline, then the size (64bit LE), followed + * by the data and a final newline */ + + w[j].iov_base = iov[i].iov_base; + w[j].iov_len = c - (char*) iov[i].iov_base; + j++; + + IOVEC_SET_STRING(w[j++], "\n"); + + l[i] = htole64(iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1); + w[j].iov_base = &l[i]; + w[j].iov_len = sizeof(uint64_t); + j++; + + w[j].iov_base = c + 1; + w[j].iov_len = iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1; + j++; + + } else + /* Nothing special? Then just add the line and + * append a newline */ + w[j++] = iov[i]; + + IOVEC_SET_STRING(w[j++], "\n"); + } + + fd = journal_fd(); + if (fd < 0) + return fd; + + zero(sa); + sa.sun_family = AF_UNIX; + strncpy(sa.sun_path,"/run/systemd/journal", sizeof(sa.sun_path)); + + zero(mh); + mh.msg_name = &sa; + mh.msg_namelen = offsetof(struct sockaddr_un, sun_path) + strlen(sa.sun_path); + mh.msg_iov = w; + mh.msg_iovlen = j; + + if (sendmsg(fd, &mh, MSG_NOSIGNAL) < 0) + return -errno; + + return 0; +} diff --git a/src/journal/journald.c b/src/journal/journald.c index 89d8bee2a2..453495a964 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -27,6 +27,9 @@ #include #include #include +#include +#include +#include #include "hashmap.h" #include "journal-file.h" @@ -36,15 +39,19 @@ #include "cgroup-util.h" typedef struct Server { - int syslog_fd; int epoll_fd; int signal_fd; + int syslog_fd; + int native_fd; JournalFile *runtime_journal; JournalFile *system_journal; Hashmap *user_journals; uint64_t seqnum; + + char *buffer; + size_t buffer_size; } Server; static void fix_perms(JournalFile *f, uid_t uid) { @@ -137,35 +144,27 @@ static JournalFile* find_journal(Server *s, uid_t uid) { return f; } -static void process_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv) { - char *message = NULL, *pid = NULL, *uid = NULL, *gid = NULL, +static void dispatch_message(Server *s, struct iovec *iovec, unsigned n, unsigned m, struct ucred *ucred, struct timeval *tv) { + char *pid = NULL, *uid = NULL, *gid = NULL, *source_time = NULL, *boot_id = NULL, *machine_id = NULL, *comm = NULL, *cmdline = NULL, *hostname = NULL, *audit_session = NULL, *audit_loginuid = NULL, - *syslog_priority = NULL, *syslog_facility = NULL, *exe = NULL, *cgroup = NULL; - struct iovec iovec[17]; - unsigned n = 0; + char idbuf[33]; sd_id128_t id; int r; char *t; - int priority = LOG_USER | LOG_INFO; uid_t loginuid = 0, realuid = 0; JournalFile *f; - parse_syslog_priority((char**) &buf, &priority); - skip_syslog_date((char**) &buf); - - if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0) - IOVEC_SET_STRING(iovec[n++], syslog_priority); + assert(s); + assert(iovec || n == 0); - if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0) - IOVEC_SET_STRING(iovec[n++], syslog_facility); + if (n == 0) + return; - message = strappend("MESSAGE=", buf); - if (message) - IOVEC_SET_STRING(iovec[n++], message); + assert(n + 13 <= m); if (ucred) { uint32_t session; @@ -252,6 +251,8 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str free(t); } + assert(n <= m); + f = find_journal(s, realuid == 0 ? 0 : loginuid); if (!f) log_warning("Dropping message, as we can't find a place to store the data."); @@ -262,7 +263,6 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str log_error("Failed to write entry, ignoring: %s", strerror(-r)); } - free(message); free(pid); free(uid); free(gid); @@ -275,9 +275,148 @@ static void process_message(Server *s, const char *buf, struct ucred *ucred, str free(hostname); free(audit_session); free(audit_loginuid); + free(cgroup); + +} + +static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv) { + char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL; + struct iovec iovec[16]; + unsigned n = 0; + int priority = LOG_USER | LOG_INFO; + + assert(s); + assert(buf); + + parse_syslog_priority((char**) &buf, &priority); + skip_syslog_date((char**) &buf); + + if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0) + IOVEC_SET_STRING(iovec[n++], syslog_priority); + + if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0) + IOVEC_SET_STRING(iovec[n++], syslog_facility); + + message = strappend("MESSAGE=", buf); + if (message) + IOVEC_SET_STRING(iovec[n++], message); + + dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv); + + free(message); free(syslog_facility); free(syslog_priority); - free(cgroup); +} + +static void process_native_message(Server *s, const void *buffer, size_t buffer_size, struct ucred *ucred, struct timeval *tv) { + struct iovec *iovec = NULL; + unsigned n = 0, m = 0, j; + const char *p; + size_t remaining; + + assert(s); + assert(buffer || n == 0); + + p = buffer; + remaining = buffer_size; + + while (remaining > 0) { + const char *e, *q; + + e = memchr(p, '\n', remaining); + + if (!e) { + /* Trailing noise, let's ignore it, and flush what we collected */ + log_debug("Received message with trailing noise, ignoring."); + break; + } + + if (e == p) { + /* Entry separator */ + dispatch_message(s, iovec, n, m, ucred, tv); + n = 0; + + p++; + remaining--; + continue; + } + + if (*p == '.') { + /* Control command, ignore for now */ + remaining -= (e - p) + 1; + p = e + 1; + continue; + } + + /* A property follows */ + + if (n+13 >= m) { + struct iovec *c; + unsigned u; + + u = MAX((n+13U) * 2U, 4U); + c = realloc(iovec, u * sizeof(struct iovec)); + if (!c) { + log_error("Out of memory"); + break; + } + + iovec = c; + m = u; + } + + q = memchr(p, '=', e - p); + if (q) { + iovec[n].iov_base = (char*) p; + iovec[n].iov_len = e - p; + n++; + + remaining -= (e - p) + 1; + p = e + 1; + continue; + } else { + uint64_t l; + char *k; + + if (remaining < e - p + 1 + sizeof(uint64_t) + 1) { + log_debug("Failed to parse message, ignoring."); + break; + } + + memcpy(&l, e + 1, sizeof(uint64_t)); + l = le64toh(l); + + if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 || + e[1+sizeof(uint64_t)+l] != '\n') { + log_debug("Failed to parse message, ignoring."); + break; + } + + k = malloc((e - p) + 1 + l); + if (!k) { + log_error("Out of memory"); + break; + } + + memcpy(k, p, e - p); + k[e - p] = '='; + memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l); + + iovec[n].iov_base = k; + iovec[n].iov_len = (e - p) + 1 + l; + n++; + + remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1; + p = e + 1 + sizeof(uint64_t) + l + 1; + } + } + + dispatch_message(s, iovec, n, m, ucred, tv); + + for (j = 0; j < n; j++) + if (iovec[j].iov_base < buffer || + (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size) + free(iovec[j].iov_base); } static int process_event(Server *s, struct epoll_event *ev) { @@ -309,9 +448,9 @@ static int process_event(Server *s, struct epoll_event *ev) { } - if (ev->data.fd == s->syslog_fd) { + if (ev->data.fd == s->native_fd || + ev->data.fd == s->syslog_fd) { for (;;) { - char buf[LINE_MAX+1]; struct msghdr msghdr; struct iovec iovec; struct ucred *ucred = NULL; @@ -323,11 +462,35 @@ static int process_event(Server *s, struct epoll_event *ev) { CMSG_SPACE(sizeof(struct timeval))]; } control; ssize_t n; - char *e; + int v; + + if (ioctl(ev->data.fd, SIOCINQ, &v) < 0) { + log_error("SIOCINQ failed: %m"); + return -errno; + } + + if (v <= 0) + return 1; + + if (s->buffer_size < (size_t) v) { + void *b; + size_t l; + + l = MAX(LINE_MAX + (size_t) v, s->buffer_size * 2); + b = realloc(s->buffer, l+1); + + if (!b) { + log_error("Couldn't increase buffer."); + return -ENOMEM; + } + + s->buffer_size = l; + s->buffer = b; + } zero(iovec); - iovec.iov_base = buf; - iovec.iov_len = sizeof(buf)-1; + iovec.iov_base = s->buffer; + iovec.iov_len = s->buffer_size; zero(control); zero(msghdr); @@ -358,13 +521,18 @@ static int process_event(Server *s, struct epoll_event *ev) { tv = (struct timeval*) CMSG_DATA(cmsg); } - e = memchr(buf, '\n', n); - if (e) - *e = 0; - else - buf[n] = 0; + if (ev->data.fd == s->syslog_fd) { + char *e; - process_message(s, strstrip(buf), ucred, tv); + e = memchr(s->buffer, '\n', n); + if (e) + *e = 0; + else + s->buffer[n] = 0; + + process_syslog_message(s, strstrip(s->buffer), ucred, tv); + } else + process_native_message(s, s->buffer, n, ucred, tv); } return 1; @@ -428,51 +596,73 @@ static int system_journal_open(Server *s) { return r; } -static int server_init(Server *s) { - int n, one, r; - struct epoll_event ev; - sigset_t mask; +static int open_syslog_socket(Server *s) { + union sockaddr_union sa; + int one, r; assert(s); - zero(*s); - s->syslog_fd = s->signal_fd = -1; + if (s->syslog_fd < 0) { - s->epoll_fd = epoll_create1(EPOLL_CLOEXEC); - if (s->epoll_fd < 0) { - log_error("Failed to create epoll object: %m"); - return -errno; + s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0); + if (s->syslog_fd < 0) { + log_error("socket() failed: %m"); + return -errno; + } + + zero(sa); + sa.un.sun_family = AF_UNIX; + strncpy(sa.un.sun_path, "/run/systemd/syslog", sizeof(sa.un.sun_path)); + + unlink(sa.un.sun_path); + + r = bind(s->syslog_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path)); + if (r < 0) { + log_error("bind() failed: %m"); + return -errno; + } + + chmod(sa.un.sun_path, 0666); } - n = sd_listen_fds(true); - if (n < 0) { - log_error("Failed to read listening file descriptors from environment: %s", strerror(-n)); - return n; + one = 1; + r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one)); + if (r < 0) { + log_error("SO_PASSCRED failed: %m"); + return -errno; } - if (n > 1) { - log_error("Too many file descriptors passed."); - return -EINVAL; + one = 1; + r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one)); + if (r < 0) { + log_error("SO_TIMESTAMP failed: %m"); + return -errno; } - if (n == 1) - s->syslog_fd = SD_LISTEN_FDS_START; - else { - union sockaddr_union sa; + return 0; +} - s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0); - if (s->syslog_fd < 0) { +static int open_native_socket(Server*s) { + union sockaddr_union sa; + int one, r; + + assert(s); + + if (s->native_fd < 0) { + + s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0); + if (s->native_fd < 0) { log_error("socket() failed: %m"); return -errno; } zero(sa); sa.un.sun_family = AF_UNIX; - strncpy(sa.un.sun_path, "/run/systemd/syslog", sizeof(sa.un.sun_path)); + strncpy(sa.un.sun_path, "/run/systemd/journal", sizeof(sa.un.sun_path)); unlink(sa.un.sun_path); - r = bind(s->syslog_fd, &sa.sa, sizeof(sa.un)); + r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path)); if (r < 0) { log_error("bind() failed: %m"); return -errno; @@ -482,24 +672,90 @@ static int server_init(Server *s) { } one = 1; - r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one)); + r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one)); if (r < 0) { log_error("SO_PASSCRED failed: %m"); return -errno; } one = 1; - r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one)); + r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one)); if (r < 0) { log_error("SO_TIMESTAMP failed: %m"); return -errno; } + return 0; +} + +static int server_init(Server *s) { + int n, r, fd; + struct epoll_event ev; + sigset_t mask; + + assert(s); + + zero(*s); + s->syslog_fd = s->native_fd = s->signal_fd = -1; + + s->epoll_fd = epoll_create1(EPOLL_CLOEXEC); + if (s->epoll_fd < 0) { + log_error("Failed to create epoll object: %m"); + return -errno; + } + + n = sd_listen_fds(true); + if (n < 0) { + log_error("Failed to read listening file descriptors from environment: %s", strerror(-n)); + return n; + } + + for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) { + + if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) { + + if (s->syslog_fd >= 0) { + log_error("Too many /dev/log sockets passed."); + return -EINVAL; + } + + s->syslog_fd = fd; + + } else if (sd_is_socket(fd, AF_UNIX, SOCK_DGRAM, -1) > 0) { + + if (s->native_fd >= 0) { + log_error("Too many native sockets passed."); + return -EINVAL; + } + + s->native_fd = fd; + } else { + log_error("Unknown socket passed."); + return -EINVAL; + } + } + + r = open_syslog_socket(s); + if (r < 0) + return r; + zero(ev); ev.events = EPOLLIN; ev.data.fd = s->syslog_fd; if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) { - log_error("Failed to add server fd to epoll object: %m"); + log_error("Failed to add syslog server fd to epoll object: %m"); + return -errno; + } + + r = open_native_socket(s); + if (r < 0) + return r; + + zero(ev); + ev.events = EPOLLIN; + ev.data.fd = s->native_fd; + if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) { + log_error("Failed to add native server fd to epoll object: %m"); return -errno; } @@ -558,6 +814,9 @@ static void server_done(Server *s) { if (s->syslog_fd >= 0) close_nointr_nofail(s->syslog_fd); + + if (s->native_fd >= 0) + close_nointr_nofail(s->native_fd); } int main(int argc, char *argv[]) { diff --git a/src/journal/sd-journal.h b/src/journal/sd-journal.h index 8d7e314223..e42293ffe7 100644 --- a/src/journal/sd-journal.h +++ b/src/journal/sd-journal.h @@ -24,6 +24,8 @@ #include #include +#include +#include #include "sd-id128.h" @@ -43,6 +45,16 @@ * - comm, argv can be manipulated, should it be _COMM=, _CMDLINE= or COMM=, CMDLINE=? */ +/* Write to daemon */ + +int sd_journal_print(const char *format, ...) __attribute__ ((format (printf, 1, 2))); +int sd_journal_printv(const char *format, va_list ap); + +int sd_journal_send(const char *format, ...) __attribute__((sentinel)); +int sd_journal_sendv(const struct iovec *iov, int n); + +/* Browse journal stream */ + typedef struct sd_journal sd_journal; int sd_journal_open(sd_journal **ret); diff --git a/src/journal/test-journal.c b/src/journal/test-journal.c index 45ced12b46..a9bd6cb2cf 100644 --- a/src/journal/test-journal.c +++ b/src/journal/test-journal.c @@ -22,6 +22,7 @@ #include #include +#include "sd-journal.h" #include "journal-file.h" #include "log.h" -- cgit v1.2.3-54-g00ecf From cab8ac60837b489b27a247990f741315c71cb389 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sat, 17 Dec 2011 01:13:55 +0100 Subject: journal: enforce limits on open journal files --- src/journal/journald.c | 9 +++++++++ src/journal/sd-journal.c | 8 +++++++- src/journal/sd-journal.h | 2 -- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/journal/journald.c b/src/journal/journald.c index 453495a964..6b774f499d 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -38,6 +38,8 @@ #include "acl-util.h" #include "cgroup-util.h" +#define USER_JOURNALS_MAX 1024 + typedef struct Server { int epoll_fd; int signal_fd; @@ -127,6 +129,13 @@ static JournalFile* find_journal(Server *s, uid_t uid) { if (asprintf(&p, "/var/log/journal/%s/user-%lu.journal", sd_id128_to_string(machine, ids), (unsigned long) uid) < 0) return s->system_journal; + while (hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) { + /* Too many open? Then let's close one */ + f = hashmap_steal_first(s->user_journals); + assert(f); + journal_file_close(f); + } + r = journal_file_open(p, O_RDWR|O_CREAT, 0640, s->system_journal, &f); free(p); diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 85c57170d5..bcfcbfb9e1 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -30,6 +30,8 @@ #include "list.h" #include "lookup3.h" +#define JOURNAL_FILES_MAX 1024 + typedef struct Match Match; struct Match { @@ -932,6 +934,11 @@ static int add_file(sd_journal *j, const char *prefix, const char *dir, const ch assert(prefix); assert(filename); + if (hashmap_size(j->files) >= JOURNAL_FILES_MAX) { + log_debug("Too many open journal files, ignoring."); + return 0; + } + if (dir) fn = join(prefix, "/", dir, "/", filename, NULL); else @@ -952,7 +959,6 @@ static int add_file(sd_journal *j, const char *prefix, const char *dir, const ch journal_file_dump(f); - r = hashmap_put(j->files, f->path, f); if (r < 0) { journal_file_close(f); diff --git a/src/journal/sd-journal.h b/src/journal/sd-journal.h index e42293ffe7..b167dcf097 100644 --- a/src/journal/sd-journal.h +++ b/src/journal/sd-journal.h @@ -34,12 +34,10 @@ * - check LE/BE conversion for 8bit, 16bit, 32bit values * - implement inotify usage on client * - implement audit gateway - * - implement native gateway * - implement stdout gateway * - extend hash tables table as we go * - accelerate looking for "all hostnames" and suchlike. * - throttling - * - enforce limit on open journal files in journald and journalctl * - cryptographic hash * - fix space reservation logic * - comm, argv can be manipulated, should it be _COMM=, _CMDLINE= or COMM=, CMDLINE=? -- cgit v1.2.3-54-g00ecf From d0bbc21caa6e68693a47db60c93e99422bf2a858 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sat, 17 Dec 2011 01:32:49 +0100 Subject: journal: introduce mandatory sd_journal_printf() priority parameter --- src/journal/journal-send.c | 19 +++++++++++-------- src/journal/sd-journal.h | 4 ++-- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/journal/journal-send.c b/src/journal/journal-send.c index e2575a9805..238d64c13e 100644 --- a/src/journal/journal-send.c +++ b/src/journal/journal-send.c @@ -52,30 +52,33 @@ retry: return fd; } -int sd_journal_print(const char *format, ...) { +int sd_journal_print(int priority, const char *format, ...) { int r; va_list ap; va_start(ap, format); - r = sd_journal_printv(format, ap); + r = sd_journal_printv(priority, format, ap); va_end(ap); return r; } -int sd_journal_printv(const char *format, va_list ap) { - char buffer[8 + LINE_MAX]; - struct iovec iov; +int sd_journal_printv(int priority, const char *format, va_list ap) { + char buffer[8 + LINE_MAX], p[11]; + struct iovec iov[2]; + + snprintf(p, sizeof(p), "PRIORITY=%i", priority & LOG_PRIMASK); + char_array_0(p); memcpy(buffer, "MESSAGE=", 8); vsnprintf(buffer+8, sizeof(buffer) - 8, format, ap); - char_array_0(buffer); zero(iov); - IOVEC_SET_STRING(iov, buffer); + IOVEC_SET_STRING(iov[0], buffer); + IOVEC_SET_STRING(iov[1], p); - return sd_journal_sendv(&iov, 1); + return sd_journal_sendv(iov, 2); } int sd_journal_send(const char *format, ...) { diff --git a/src/journal/sd-journal.h b/src/journal/sd-journal.h index b167dcf097..05a929d910 100644 --- a/src/journal/sd-journal.h +++ b/src/journal/sd-journal.h @@ -45,8 +45,8 @@ /* Write to daemon */ -int sd_journal_print(const char *format, ...) __attribute__ ((format (printf, 1, 2))); -int sd_journal_printv(const char *format, va_list ap); +int sd_journal_print(int piority, const char *format, ...) __attribute__ ((format (printf, 2, 3))); +int sd_journal_printv(int priority, const char *format, va_list ap); int sd_journal_send(const char *format, ...) __attribute__((sentinel)); int sd_journal_sendv(const struct iovec *iov, int n); -- cgit v1.2.3-54-g00ecf From 2b0ba69bb127b6b1d76512ce32fc9cfd89670f97 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sat, 17 Dec 2011 01:36:47 +0100 Subject: journald: filter fields send from client starting with underscore --- src/journal/journald.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/journal/journald.c b/src/journal/journald.c index 6b774f499d..630ead0053 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -376,9 +376,15 @@ static void process_native_message(Server *s, const void *buffer, size_t buffer_ q = memchr(p, '=', e - p); if (q) { - iovec[n].iov_base = (char*) p; - iovec[n].iov_len = e - p; - n++; + if (p[0] != '_') { + /* If the field name starts with an + * underscore, skip the variable, + * since that indidates a trusted + * field */ + iovec[n].iov_base = (char*) p; + iovec[n].iov_len = e - p; + n++; + } remaining -= (e - p) + 1; p = e + 1; @@ -411,9 +417,12 @@ static void process_native_message(Server *s, const void *buffer, size_t buffer_ k[e - p] = '='; memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l); - iovec[n].iov_base = k; - iovec[n].iov_len = (e - p) + 1 + l; - n++; + if (k[0] != '_') { + iovec[n].iov_base = k; + iovec[n].iov_len = (e - p) + 1 + l; + n++; + } else + free(k); remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1; p = e + 1 + sizeof(uint64_t) + l + 1; -- cgit v1.2.3-54-g00ecf From 0b3b020a178cf3b957fed627de13c895773995ec Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Dec 2011 03:02:17 +0100 Subject: man: document the sd-login interfaces --- man/sd_get_seats.xml | 125 +++++++++++++++++++++++++++++ man/sd_login_monitor_new.xml | 172 ++++++++++++++++++++++++++++++++++++++++ man/sd_pid_get_session.xml | 136 ++++++++++++++++++++++++++++++++ man/sd_seat_get_active.xml | 150 +++++++++++++++++++++++++++++++++++ man/sd_session_is_active.xml | 134 +++++++++++++++++++++++++++++++ man/sd_uid_get_state.xml | 182 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 899 insertions(+) create mode 100644 man/sd_get_seats.xml create mode 100644 man/sd_login_monitor_new.xml create mode 100644 man/sd_pid_get_session.xml create mode 100644 man/sd_seat_get_active.xml create mode 100644 man/sd_session_is_active.xml create mode 100644 man/sd_uid_get_state.xml diff --git a/man/sd_get_seats.xml b/man/sd_get_seats.xml new file mode 100644 index 0000000000..bbc396a09b --- /dev/null +++ b/man/sd_get_seats.xml @@ -0,0 +1,125 @@ + + + + + + + + + sd_get_seats + systemd + + + + Developer + Lennart + Poettering + lennart@poettering.net + + + + + + sd_get_seats + 3 + + + + sd_get_seats + sd_get_sessions + sd_get_uids + Determine available seats, sessions and logged in users + + + + + #include <systemd/sd-login.h> + + + int sd_get_seats + char*** seats + + + + int sd_get_sessions + char*** sessions + + + + int sd_get_uids + char*** sessions + + + + + + + Description + + sd_get_seats() may be used + to determine all currently available local + seats. Returns an array of seat identifiers. The + returned array and all strings it references need to + be freed with the libc + free3 + call after use. + + Similar, sd_get_sessions() may + be used to determine all current login sessions. + + Similar, sd_get_uids() may + be used to determine all Unix users who currently have login sessions. + + + + Return Value + + On success sd_get_seats(), + sd_get_sessions() and + sd_get_uids() return the number + of entries in the arrays. On failure, these calls + return a negative errno-style error code. + + + + Notes + + The sd_get_seats(), + sd_get_sessions() and + sd_get_uids() interfaces + are available as shared library, which can be compiled + and linked to with the + libsystemd-login + pkg-config1 + file. + + + + See Also + + + systemd1, + sd-login7, + sd_session_get_seat3, + + + + diff --git a/man/sd_login_monitor_new.xml b/man/sd_login_monitor_new.xml new file mode 100644 index 0000000000..2b37f00d30 --- /dev/null +++ b/man/sd_login_monitor_new.xml @@ -0,0 +1,172 @@ + + + + + + + + + sd_login_monitor_new + systemd + + + + Developer + Lennart + Poettering + lennart@poettering.net + + + + + + sd_login_monitor_new + 3 + + + + sd_login_monitor_new + sd_login_monitor_unref + sd_login_monitor_flush + sd_login_monitor_get_fd + Monitor login sessions, seats and users + + + + + #include <systemd/sd-login.h> + + + int sd_login_monitor_new + const char* category + sd_login_monitor** ret + + + + sd_login_monitor* sd_login_monitor_unref + sd_login_monitor* m + + + + int sd_login_monitor_flush + sd_login_monitor* m + + + + int sd_login_monitor_get_fd + sd_login_monitor* m + + + + + + + Description + + sd_login_monitor_new() may + be used to monitor login session, users and seats. Via + a monitor object a file descriptor can be integrated + into an application defined event loop which is woken + up each time a user logs in, logs out or a seat is + added or removed, or a session, user, or seat changes + state otherwise. The first parameter takes a string + which can be either seat (to get + only notifications about seats being added, removed or + changed), session (to get only + notifications about sessions being created or removed + or changed) or uid (to get only + notifications when a user changes state in respect to + logins). If notifications shall be generated in all + these conditions, NULL may be passed. Note that in + future additional categories may be defined. The + second parameter returns a monitor object and needs to + be freed with the + sd_login_monitor_unref() call + after use. + + sd_login_monitor_unref() + may be used to destroy a monitor object. Note that + this will invalidate any file descriptor returned by + sd_login_monitor_get_fd(). + + sd_login_monitor_flush() + may be used to reset the wakeup state of the monitor + object. Whenever an event causes the monitor to wake + up the event loop via the file descriptor this + function needs to be called to reset the wake-up + state. If this call is not invoked the file descriptor + will immediately wake up the event loop again. + + sd_login_monitor_get_fd() + may be used to retrieve the file descriptor of the + monitor object that may be integrated in an + application defined event loop, based around + poll2 + or a similar interface. The application should include + the returned file descriptor as wake up source for + POLLIN events. Whenever a wake-up is triggered the + file descriptor needs to be reset via + sd_login_monitor_flush(). An + application needs to reread the login state with a + function like + sd_get_seats3 + or similar to determine what changed. + + + + Return Value + + On success + sd_login_monitor_new() and + sd_login_monitor_flush() return 0 + or a positive integer. On success + sd_login_monitor_get_fd() returns + a Unix file descriptor. On failure, these calls return + a negative errno-style error code. + + sd_login_monitor_unref() + always returns NULL. + + + + Notes + + The sd_login_monitor_new(), + sd_login_monitor_unref(), sd_login_monitor_flush() and + sd_login_monitor_get_fd() interfaces + are available as shared library, which can be compiled + and linked to with the + libsystemd-login + pkg-config1 + file. + + + + See Also + + + systemd1, + sd-login7, + sd_get_seats3, + + + + diff --git a/man/sd_pid_get_session.xml b/man/sd_pid_get_session.xml new file mode 100644 index 0000000000..9176433c3d --- /dev/null +++ b/man/sd_pid_get_session.xml @@ -0,0 +1,136 @@ + + + + + + + + + sd_pid_get_session + systemd + + + + Developer + Lennart + Poettering + lennart@poettering.net + + + + + + sd_pid_get_session + 3 + + + + sd_pid_get_session + sd_pid_get_owner_uid + Determine session or owner of a session of a specific PID + + + + + #include <systemd/sd-login.h> + + + int sd_pid_get_session + pid_t pid + char** session + + + + int sd_pid_get_owner_uid + pid_t pid + uid_t* uid + + + + + + Description + + sd_pid_get_session() may be + used to determine the login session identifier of a + process identified by the specified process identifier. The session + identifier is a short string (up to 64 characters), + consisting only of the characters a-zA-Z0-9 as well as + '-' and '_'. It is suitable for usage in file system + paths. Note that not all processes are part of a login + session (e.g. system service processes and user + processes that are shared between multiple sessions of + the same user). For processes not being part of a + login session this function will fail. The returned + string needs to be freed with the libc + free3 + call after use. + + sd_pid_get_owner_uid() may + be used to determine the Unix user identifier of the + owner of the session of a process identified the + specified PID. Note that this function will succeed + for user processes which are shared between multiple + login sessions of the same user, where + sd_pid_get_session() will + fail. For processes not being part of a login session + and not being a shared process of a user this function + will fail. + + + + Return Value + + On success these calls return 0 or a positive + integer. On failure, these calls return a negative + errno-style error code. + + + + Notes + + The sd_pid_get_session() + and sd_pid_get_owner_uid() + interfaces are available as shared library, which can + be compiled and linked to with the + libsystemd-login + pkg-config1 + file. + + Note that the login session identifier as + returned by sd_pid_get_session() + is completely unrelated to the process session + identifier as returned by + getsid2. + + + + See Also + + + systemd1, + sd-login7, + sd_session_is_active3, + getsid2 + + + + diff --git a/man/sd_seat_get_active.xml b/man/sd_seat_get_active.xml new file mode 100644 index 0000000000..e729a653b7 --- /dev/null +++ b/man/sd_seat_get_active.xml @@ -0,0 +1,150 @@ + + + + + + + + + sd_seat_get_active + systemd + + + + Developer + Lennart + Poettering + lennart@poettering.net + + + + + + sd_seat_get_active + 3 + + + + sd_seat_get_active + sd_seat_get_sessions + sd_seat_can_multi_session + Determine state of a specific seat + + + + + #include <systemd/sd-login.h> + + + int sd_seat_get_active + const char* seat + char** session + uid_t* uid + + + + int sd_seat_get_sessions + const char* seat + char*** sessions + uid_t** uid + unsigned* n_uids + + + + int sd_seat_can_multi_session + const char* session + + + + + + Description + + sd_seat_get_active() may be + used to determine which session is currently active on + a seat, if there is any. Returns the session + identifier and the user identifier of the Unix user + the session is belonging to. Either the session or the + user identifier parameter can be be passed NULL, in + case only one of the parameters shall be queried. The + returned string needs to be freed with the libc + free3 + call after use. + + sd_seat_get_sessions() may + be used to determine all sessions on the specified + seat. Returns two arrays, one (NULL terminated) with + the session identifiers of the sessions and one with + the user identifiers of the Unix users the sessions + belong to. An additional parameter may be used to + return the number of entries in the latter array. The + two arrays and the latter parameter may be passed as + NULL in case these values need not to be + determined. The arrays and the strings referenced by + them need to be freed with the libc + free3 + call after use. + + sd_seat_can_multi_session() + may be used to determine whether a specific seat is + capable of multi-session, i.e. allows multiple login + sessions in parallel (whith only one being active at a + time). + + + + Return Value + + On success + sd_seat_get_active() return + return 0 or a positive integer. On success + sd_seat_get_sessions() returns + the number of entries in the session identifier + array. If the test succeeds + sd_seat_can_multi_session returns + a positive integer, if it fails 0. On failure, these + calls return a negative errno-style error code. + + + + Notes + + The sd_seat_get_active(), + sd_seat_get_sessions(), and + sd_seat_can_multi_session() interfaces + are available as shared library, which can be compiled + and linked to with the + libsystemd-login + pkg-config1 + file. + + + + See Also + + + systemd1, + sd-login7, + sd_session_get_seat3, + + + + diff --git a/man/sd_session_is_active.xml b/man/sd_session_is_active.xml new file mode 100644 index 0000000000..82919f84fb --- /dev/null +++ b/man/sd_session_is_active.xml @@ -0,0 +1,134 @@ + + + + + + + + + sd_session_is_active + systemd + + + + Developer + Lennart + Poettering + lennart@poettering.net + + + + + + sd_session_is_active + 3 + + + + sd_session_is_active + sd_session_get_uid + sd_session_get_seat + Determine state of a specific session + + + + + #include <systemd/sd-login.h> + + + int sd_session_is_active + const char* session + + + + int sd_session_get_uid + const char* session + uid_t* uid + + + + int sd_session_get_seat + const char* session + char** seat + + + + + + Description + + sd_session_is_active() may + be used to determine whether the session identified by + the specified session identifier is currently active + (i.e. currently in the foreground and available for + user input) or not. + + sd_session_get_uid() may be + used to determine the user identifier of the Unix user the session + identified by the specified session identifier belongs + to. + + sd_session_get_seat() may + be used to determine the seat identifier of the seat + the session identified by the specified session + identifier belongs to. Note that not all sessions are + attached to a seat, this call will fail for them. The + returned string needs to be freed with the libc + free3 + call after use. + + + + Return Value + + If the test succeeds + sd_session_is_active() returns a + positive integer, if it fails 0. On success + sd_session_get_uid() and + sd_session_get_seat() return 0 or + a positive integer. On failure, these calls return a + negative errno-style error code. + + + + Notes + + The sd_session_is_active(), + sd_session_get_uid(), and + sd_session_get_seat() interfaces + are available as shared library, which can be compiled + and linked to with the + libsystemd-login + pkg-config1 + file. + + + + See Also + + + systemd1, + sd-login7, + sd_pid_get_session3, + + + + diff --git a/man/sd_uid_get_state.xml b/man/sd_uid_get_state.xml new file mode 100644 index 0000000000..a4e9e73087 --- /dev/null +++ b/man/sd_uid_get_state.xml @@ -0,0 +1,182 @@ + + + + + + + + + sd_uid_get_state + systemd + + + + Developer + Lennart + Poettering + lennart@poettering.net + + + + + + sd_uid_get_state + 3 + + + + sd_uid_get_state + sd_uid_is_on_seat + sd_uid_get_sessions + sd_uid_get_seats + Determine login state of a specific Unix user ID + + + + + #include <systemd/sd-login.h> + + + int sd_uid_get_state + uid_t pid + char** state + + + + int sd_uid_is_on_seat + uid_t pid + int require_active + const char* seat + + + + int sd_uid_get_sessions + uid_t pid + int require_active + char*** sessions + + + + int sd_uid_get_seats + uid_t pid + int require_active + char*** seats + + + + + + Description + + sd_uid_get_state() may be + used to determine the login state of a specific Unix + user identifier. The following states are currently + known: offline (user not logged in + at all), lingering (user not logged + in, but some user services running), + online (user logged in, but not + active), active (user logged in on + an active seat). In the future additional states might + be defined, client code should be written to be robust + in regards to additional state strings being + returned. The returned string needs to be freed with + the libc + free3 + call after use. + + sd_uid_is_on_seat() may be + used to determine whether a specific user is logged in + or active on a specific seat. Accepts a Unix user + identifier and a seat identifier string as + parameters. The require_active + parameter is a boolean. If non-zero (true) this + function will test if the user is active (i.e. has a + session that is in the foreground and accepting user + input) on the specified seat, otherwise (false) only + if the user is logged in (and possibly inactive) on + the specified seat. + + sd_uid_get_sessions() may + be used to determine the current sessions of the + specified user. Acceptes a Unix user identifier as + parameter. The require_active + boolean parameter controls whether the returned list + shall consist of only those sessions where the user is + currently active (true) or where the user is currently + logged in at all, possibly inactive (false). The call + returns a NULL terminated string array of session + identifiers in sessions which + needs to be freed by the caller with the libc + free3 + call after use, including all the strings referenced. If + the string array parameter is passed as NULL the array + will not be filled in, but the return code still + indicates the number of current sessions. + + Similar, sd_uid_get_seats() + may be used to determine the list of seats on which + the user currently has sessions. Similar semantics + apply, however note that the user may have + multiple sessions on the same seat as well as sessions + with no attached seat and hence the number of entries + in the returned array may differ from the one returned + by sd_uid_get_sessions(). + + + + Return Value + + On success + sd_uid_get_state() returns 0 or a + positive integer. If the test succeeds + sd_uid_is_on_seat() returns a + positive integer, if it fails + 0. sd_uid_get_sessions() and + sd_uid_get_seats() return the + number of entries in the returned arrays. On failure, + these calls return a negative errno-style error + code. + + + + Notes + + The sd_uid_get_state(), + sd_uid_is_on_seat(), + sd_uid_get_sessions(), and + sd_uid_get_seats() interfaces are + available as shared library, which can be compiled and + linked to with the libsystemd-login + pkg-config1 + file. + + + + See Also + + + systemd1, + sd-login7, + sd_pid_get_owner_uid3, + + + + -- cgit v1.2.3-54-g00ecf From a822cbfa2e42d60c3cafe724a8571329ab6c632e Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Dec 2011 13:11:42 +0100 Subject: sd-daemon: fix #include lines since we now ship a shared library --- man/sd-daemon.xml | 2 +- man/sd_booted.xml | 2 +- man/sd_is_fifo.xml | 2 +- man/sd_listen_fds.xml | 2 +- man/sd_notify.xml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/man/sd-daemon.xml b/man/sd-daemon.xml index cd67d9992a..383d77f595 100644 --- a/man/sd-daemon.xml +++ b/man/sd-daemon.xml @@ -50,7 +50,7 @@ - #include "sd-daemon.h" + #include <systemd/sd-daemon.h> diff --git a/man/sd_booted.xml b/man/sd_booted.xml index ebcde36b48..c9f538a3e6 100644 --- a/man/sd_booted.xml +++ b/man/sd_booted.xml @@ -49,7 +49,7 @@ - #include "sd-daemon.h" + #include <systemd/sd-daemon.h> int sd_booted diff --git a/man/sd_is_fifo.xml b/man/sd_is_fifo.xml index f6fafabc39..82b89bb290 100644 --- a/man/sd_is_fifo.xml +++ b/man/sd_is_fifo.xml @@ -53,7 +53,7 @@ - #include "sd-daemon.h" + #include <systemd/sd-daemon.h> int sd_is_fifo diff --git a/man/sd_listen_fds.xml b/man/sd_listen_fds.xml index 3276aff63d..68a45cd766 100644 --- a/man/sd_listen_fds.xml +++ b/man/sd_listen_fds.xml @@ -49,7 +49,7 @@ - #include "sd-daemon.h" + #include <systemd/sd-daemon.h> #define SD_LISTEN_FDS_START 3 diff --git a/man/sd_notify.xml b/man/sd_notify.xml index dd0ba935d6..1b55680748 100644 --- a/man/sd_notify.xml +++ b/man/sd_notify.xml @@ -50,7 +50,7 @@ - #include "sd-daemon.h" + #include <systemd/sd-daemon.h> int sd_notify -- cgit v1.2.3-54-g00ecf From f0d2e205a28e37528ef791cc2913e6664d0dde7f Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Dec 2011 13:12:36 +0100 Subject: man: build new man pages --- Makefile.am | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Makefile.am b/Makefile.am index 7dee4cf0a4..248204b45c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -831,6 +831,12 @@ MANPAGES_ALIAS = \ man/sd_is_socket_unix.3 \ man/sd_is_socket_inet.3 \ man/sd_notifyf.3 \ + man/sd_pid_get_session.3 \ + man/sd_uid_get_state.3 \ + man/sd_session_is_active.3 \ + man/sd_seat_get_active.3 \ + man/sd_get_seats.3 \ + man/sd_login_monitor_new.3 \ man/init.1 man/reboot.8: man/halt.8 -- cgit v1.2.3-54-g00ecf From 559de1289000f874e23ad01edfa1b37c102a793a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Dec 2011 13:19:01 +0100 Subject: man: sd_readahead is not actually available in libsystemd-daemon --- man/sd_readahead.xml | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/man/sd_readahead.xml b/man/sd_readahead.xml index 88b135b7ec..25fe5b2fc6 100644 --- a/man/sd_readahead.xml +++ b/man/sd_readahead.xml @@ -49,7 +49,7 @@ - #include "sd-daemon.h" + #include "sd-readahead.h" int sd_readahead @@ -134,23 +134,17 @@ url="http://cgit.freedesktop.org/systemd/tree/src/sd-readahead.h"/> sd_readahead() is - implemented in the reference implementation's + implemented in the reference implementation's drop-in sd-readahead.c and - sd-readahead.h files. These - interfaces are available as shared library, which can - be compiled and linked to with the - libsystemd-daemon - pkg-config1 - file. Alternatively, applications consuming this API - may copy the implementation into their source - tree. For more details about the reference - implementation see - sd-readahead7. - - If the reference implementation is used as - drop-in files and -DDISABLE_SYSTEMD is set during - compilation this function will always return 0 and - otherwise become a NOP. + sd-readahead.h files. It is + recommended that applications consuming this API copy + the implementation into their source tree. For more + details about the reference implementation see + sd-readahead7 + + If -DDISABLE_SYSTEMD is set during compilation + this function will always return 0 and otherwise + become a NOP. -- cgit v1.2.3-54-g00ecf From c10eb7b02eb048eb23f0c9f239bfe1f9e7bc8e4a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Dec 2011 13:25:00 +0100 Subject: build-sys: add rules for man page aliases --- Makefile.am | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Makefile.am b/Makefile.am index 248204b45c..907ecb4f7c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -844,8 +844,22 @@ man/poweroff.8: man/halt.8 man/sd_is_socket.3: man/sd_is_fifo.3 man/sd_is_socket_unix.3: man/sd_is_fifo.3 man/sd_is_socket_inet.3: man/sd_is_fifo.3 +man/sd_is_mq.3: man/sd_is_fifo.3 man/sd_notifyf.3: man/sd_notify.3 man/init.1: man/systemd.1 +man/sd_session_get_uid.3: man/sd_session_is_active.3 +man/sd_session_get_seat.3: man/sd_session_is_active.3 +man/sd_pid_get_owner_uid.3: man/sd_pid_get_session.3 +man/sd_uid_is_on_seat.3: man/sd_uid_get_state.3 +man/sd_uid_get_sessions.3: man/sd_uid_get_state.3 +man/sd_uid_get_seats.3: man/sd_uid_get_state.3 +man/sd_seat_get_sessions.3: man/sd_seat_get_active.3 +man/sd_seat_can_multi_session.3: man/sd_seat_get_active.3 +man/sd_get_sessions.3: man/sd_get_seats.3 +man/sd_get_uids.3: man/sd_get_seats.3 +man/sd_login_monitor_unref.3: man/sd_login_monitor_new.3 +man/sd_login_monitor_flush.3: man/sd_login_monitor_new.3 +man/sd_login_monitor_get_fd.3: man/sd_login_monitor_new.3 dist_man_MANS = \ $(MANPAGES) \ -- cgit v1.2.3-54-g00ecf From 01448ff92d9549785242ffab453bf5bcde348c61 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Dec 2011 13:57:07 +0100 Subject: man: add sd-login(7) page --- Makefile.am | 31 ++++++++---- man/sd-login.xml | 117 +++++++++++++++++++++++++++++++++++++++++++++ man/sd_pid_get_session.xml | 19 ++++---- 3 files changed, 149 insertions(+), 18 deletions(-) create mode 100644 man/sd-login.xml diff --git a/Makefile.am b/Makefile.am index 907ecb4f7c..e689355cee 100644 --- a/Makefile.am +++ b/Makefile.am @@ -799,6 +799,7 @@ MANPAGES = \ man/daemon.7 \ man/sd-daemon.7 \ man/sd-readahead.7 \ + man/sd-login.7 \ man/runlevel.8 \ man/telinit.8 \ man/halt.8 \ @@ -817,7 +818,13 @@ MANPAGES = \ man/modules-load.d.5 \ man/sysctl.d.5 \ man/systemd-ask-password.1 \ - man/systemd-loginctl.1 + man/systemd-loginctl.1 \ + man/sd_pid_get_session.3 \ + man/sd_uid_get_state.3 \ + man/sd_session_is_active.3 \ + man/sd_seat_get_active.3 \ + man/sd_get_seats.3 \ + man/sd_login_monitor_new.3 if ENABLE_BINFMT MANPAGES += \ @@ -830,14 +837,22 @@ MANPAGES_ALIAS = \ man/sd_is_socket.3 \ man/sd_is_socket_unix.3 \ man/sd_is_socket_inet.3 \ + man/sd_is_mq.3 \ man/sd_notifyf.3 \ - man/sd_pid_get_session.3 \ - man/sd_uid_get_state.3 \ - man/sd_session_is_active.3 \ - man/sd_seat_get_active.3 \ - man/sd_get_seats.3 \ - man/sd_login_monitor_new.3 \ - man/init.1 + man/init.1 \ + man/sd_session_get_uid.3 \ + man/sd_session_get_seat.3 \ + man/sd_pid_get_owner_uid.3 \ + man/sd_uid_is_on_seat.3 \ + man/sd_uid_get_sessions.3 \ + man/sd_uid_get_seats.3 \ + man/sd_seat_get_sessions.3 \ + man/sd_seat_can_multi_session.3 \ + man/sd_get_sessions.3 \ + man/sd_get_uids.3 \ + man/sd_login_monitor_unref.3 \ + man/sd_login_monitor_flush.3 \ + man/sd_login_monitor_get_fd.3 man/reboot.8: man/halt.8 man/poweroff.8: man/halt.8 diff --git a/man/sd-login.xml b/man/sd-login.xml new file mode 100644 index 0000000000..62ec6ffefd --- /dev/null +++ b/man/sd-login.xml @@ -0,0 +1,117 @@ + + + + + + + + + sd-login + systemd + + + + Developer + Lennart + Poettering + lennart@poettering.net + + + + + + sd-login + 7 + + + + sd-login + APIs for + tracking logins + + + + + #include <systemd/sd-login.h> + + + + pkg-config --cflags --libs libsystemd-login + + + + + Description + + sd-login.h provides APIs to + introspect and monitor seat, login session and user + status information on the local system. + + See Multi-Seat + on Linux for an introduction into multi-seat + support on Linux, the background for this set of APIs. + + Note that these APIs only allow purely passive access + and monitoring of seats, sessions and users. To + actively make changes to the seat configuration, + terminate login sessions, or switch session on a seat + you need to utilize the D-Bus API of + systemd-logind. + + See + sd_pid_get_session3, + sd_uid_get_state3, + sd_session_is_active3, + sd_seat_get_active3, + sd_get_seats3, + sd_login_monitor_new3 + for more information about the functions + implemented. + + + + Notes + + These APIs are implemented as shared library, + which can be compiled and linked to with the + libsystemd-login + pkg-config1 + file. + + + + See Also + + systemd1, + sd_pid_get_session3, + sd_uid_get_state3, + sd_session_is_active3, + sd_seat_get_active3, + sd_get_seats3, + sd_login_monitor_new3, + sd-daemon7, + sd-readahead7, + pkg-config1 + + + + diff --git a/man/sd_pid_get_session.xml b/man/sd_pid_get_session.xml index 9176433c3d..24e468015f 100644 --- a/man/sd_pid_get_session.xml +++ b/man/sd_pid_get_session.xml @@ -71,16 +71,15 @@ sd_pid_get_session() may be used to determine the login session identifier of a - process identified by the specified process identifier. The session - identifier is a short string (up to 64 characters), - consisting only of the characters a-zA-Z0-9 as well as - '-' and '_'. It is suitable for usage in file system - paths. Note that not all processes are part of a login - session (e.g. system service processes and user - processes that are shared between multiple sessions of - the same user). For processes not being part of a - login session this function will fail. The returned - string needs to be freed with the libc + process identified by the specified process + identifier. The session identifier is a short string, + suitable for usage in file system paths. Note that not + all processes are part of a login session (e.g. system + service processes and user processes that are shared + between multiple sessions of the same user). For + processes not being part of a login session this + function will fail. The returned string needs to be + freed with the libc free3 call after use. -- cgit v1.2.3-54-g00ecf From 595aae376fae21f885ec9af2cac1aaf3ff3e9bee Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Dec 2011 14:42:59 +0100 Subject: man: various updates --- man/sd-login.xml | 21 ++++++++++++++++++++- man/sd_get_seats.xml | 12 +++++++----- man/sd_login_monitor_new.xml | 2 +- man/sd_seat_get_active.xml | 6 ++++-- man/sd_session_is_active.xml | 2 +- man/sd_uid_get_state.xml | 13 ++++++++----- 6 files changed, 41 insertions(+), 15 deletions(-) diff --git a/man/sd-login.xml b/man/sd-login.xml index 62ec6ffefd..9926d2b85c 100644 --- a/man/sd-login.xml +++ b/man/sd-login.xml @@ -75,7 +75,26 @@ actively make changes to the seat configuration, terminate login sessions, or switch session on a seat you need to utilize the D-Bus API of - systemd-logind. + systemd-logind, instead. + + These functions access data in + /proc, + /sys/fs/cgroup and + /run. All of these are virtual + file systems, hence the runtime cost of the accesses + is relatively cheap. + + If the functions return string arrays, these are + generally NULL terminated and need to be freed by the + caller with the libc + free3 + call after use, including the strings referenced + therein. Similar, individual strings returned need to + be freed, as well. + + As a special exception, instead of an empty + string array NULL may be returned, which should be + treated equivalent to an empty string array. See sd_pid_get_session3, diff --git a/man/sd_get_seats.xml b/man/sd_get_seats.xml index bbc396a09b..2ac76500ec 100644 --- a/man/sd_get_seats.xml +++ b/man/sd_get_seats.xml @@ -76,11 +76,13 @@ sd_get_seats() may be used to determine all currently available local - seats. Returns an array of seat identifiers. The - returned array and all strings it references need to - be freed with the libc + seats. Returns a NULL terminated array of seat + identifiers. The returned array and all strings it + references need to be freed with the libc free3 - call after use. + call after use. Note that instead of an empty array + NULL may be returned and should be considered + equivalent to an empty array. Similar, sd_get_sessions() may be used to determine all current login sessions. @@ -118,7 +120,7 @@ systemd1, sd-login7, - sd_session_get_seat3, + sd_session_get_seat3 diff --git a/man/sd_login_monitor_new.xml b/man/sd_login_monitor_new.xml index 2b37f00d30..de484329a9 100644 --- a/man/sd_login_monitor_new.xml +++ b/man/sd_login_monitor_new.xml @@ -165,7 +165,7 @@ systemd1, sd-login7, - sd_get_seats3, + sd_get_seats3 diff --git a/man/sd_seat_get_active.xml b/man/sd_seat_get_active.xml index e729a653b7..14cda60a56 100644 --- a/man/sd_seat_get_active.xml +++ b/man/sd_seat_get_active.xml @@ -101,7 +101,9 @@ determined. The arrays and the strings referenced by them need to be freed with the libc free3 - call after use. + call after use. Note that instead of an empty array + NULL may be returned and should be considered + equivalent to an empty array. sd_seat_can_multi_session() may be used to determine whether a specific seat is @@ -143,7 +145,7 @@ systemd1, sd-login7, - sd_session_get_seat3, + sd_session_get_seat3 diff --git a/man/sd_session_is_active.xml b/man/sd_session_is_active.xml index 82919f84fb..88b22fd9f8 100644 --- a/man/sd_session_is_active.xml +++ b/man/sd_session_is_active.xml @@ -127,7 +127,7 @@ systemd1, sd-login7, - sd_pid_get_session3, + sd_pid_get_session3 diff --git a/man/sd_uid_get_state.xml b/man/sd_uid_get_state.xml index a4e9e73087..67776257db 100644 --- a/man/sd_uid_get_state.xml +++ b/man/sd_uid_get_state.xml @@ -126,10 +126,13 @@ identifiers in sessions which needs to be freed by the caller with the libc free3 - call after use, including all the strings referenced. If - the string array parameter is passed as NULL the array - will not be filled in, but the return code still - indicates the number of current sessions. + call after use, including all the strings + referenced. If the string array parameter is passed as + NULL the array will not be filled in, but the return + code still indicates the number of current + sessions. Note that instead of an empty array NULL may + be returned and should be considered equivalent to an + empty array. Similar, sd_uid_get_seats() may be used to determine the list of seats on which @@ -175,7 +178,7 @@ systemd1, sd-login7, - sd_pid_get_owner_uid3, + sd_pid_get_owner_uid3 -- cgit v1.2.3-54-g00ecf From 5079a105e701f17439635e76d8cb3052badbb34c Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Dec 2011 17:40:31 +0100 Subject: man: extend sd-login(7) in regards to mixing D-Bus and synchronous library calls a bit --- man/sd-login.xml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/man/sd-login.xml b/man/sd-login.xml index 9926d2b85c..3fc0e16f69 100644 --- a/man/sd-login.xml +++ b/man/sd-login.xml @@ -77,13 +77,23 @@ you need to utilize the D-Bus API of systemd-logind, instead. - These functions access data in + These functions synchronously access data in /proc, /sys/fs/cgroup and /run. All of these are virtual file systems, hence the runtime cost of the accesses is relatively cheap. + It is possible (and often a very good choice) to + mix calls to the synchronous interface of + sd-login.h with the asynchronous + D-Bus interface of systemd-logind. However, if this is + done you need to think a bit about possible races + since the stream of events from D-Bus and from + sd-login.h interfaces such as the + login monitor are asynchronous and not ordered against + each other. + If the functions return string arrays, these are generally NULL terminated and need to be freed by the caller with the libc -- cgit v1.2.3-54-g00ecf From 2e4a6ff47b311216829ed0f48f17ee9bc35641dc Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Dec 2011 19:54:51 +0100 Subject: hashmap: add hashmap_first_key() --- src/hashmap.c | 11 +++++++++++ src/hashmap.h | 1 + 2 files changed, 12 insertions(+) diff --git a/src/hashmap.c b/src/hashmap.c index 95ea45da48..7ef809746d 100644 --- a/src/hashmap.c +++ b/src/hashmap.c @@ -558,6 +558,17 @@ void* hashmap_first(Hashmap *h) { return h->iterate_list_head->value; } +void* hashmap_first_key(Hashmap *h) { + + if (!h) + return NULL; + + if (!h->iterate_list_head) + return NULL; + + return (void*) h->iterate_list_head->key; +} + void* hashmap_last(Hashmap *h) { if (!h) diff --git a/src/hashmap.h b/src/hashmap.h index 16ffbd3922..ab4363a7a3 100644 --- a/src/hashmap.h +++ b/src/hashmap.h @@ -74,6 +74,7 @@ void hashmap_clear(Hashmap *h); void *hashmap_steal_first(Hashmap *h); void *hashmap_steal_first_key(Hashmap *h); void* hashmap_first(Hashmap *h); +void* hashmap_first_key(Hashmap *h); void* hashmap_last(Hashmap *h); char **hashmap_get_strv(Hashmap *h); -- cgit v1.2.3-54-g00ecf From 38c67e2a442d875c1de6f5aae46647a195230b05 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Dec 2011 19:55:54 +0100 Subject: man: generate HTML instead of XHTML with XSL docbook to work around 'fsfunc' noise --- Makefile.am | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.am b/Makefile.am index e689355cee..656909c6c7 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1771,11 +1771,11 @@ XSLTPROC_PROCESS_MAN_IN = \ XSLTPROC_PROCESS_HTML = \ $(AM_V_GEN)$(MKDIR_P) $(dir $@) && \ - $(XSLTPROC) -o $@ $(XSLTPROC_FLAGS) http://docbook.sourceforge.net/release/xsl/current/xhtml-1_1/docbook.xsl $< + $(XSLTPROC) -o $@ $(XSLTPROC_FLAGS) http://docbook.sourceforge.net/release/xsl/current/html/docbook.xsl $< XSLTPROC_PROCESS_HTML_IN = \ $(AM_V_GEN)$(MKDIR_P) $(dir $@) && \ - $(XSLTPROC) -o ${@:.in=} $(XSLTPROC_FLAGS) http://docbook.sourceforge.net/release/xsl/current/xhtml-1_1/docbook.xsl $< && \ + $(XSLTPROC) -o ${@:.in=} $(XSLTPROC_FLAGS) http://docbook.sourceforge.net/release/xsl/current/html/docbook.xsl $< && \ mv ${@:.in=} $@ man/%.1: man/%.xml -- cgit v1.2.3-54-g00ecf From 76318284fc970b30e9dc4c079960807345331dad Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Dec 2011 20:25:52 +0100 Subject: man: switch to UTF-8 output, to work around charset issues --- Makefile.am | 9 +++++---- man/custom-html.xsl | 29 +++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 man/custom-html.xsl diff --git a/Makefile.am b/Makefile.am index 656909c6c7..f9093b6d8e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -506,7 +506,8 @@ EXTRA_DIST = \ src/libsystemd-login.sym \ introspect.awk \ src/73-seat-late.rules.in \ - src/99-systemd.rules.in + src/99-systemd.rules.in \ + man/custom-html.xsl if ENABLE_BINFMT EXTRA_DIST += \ @@ -1758,7 +1759,7 @@ endif if HAVE_XSLTPROC XSLTPROC_FLAGS = \ --nonet \ - --param funcsynopsis.style "'ansi'" + --stringparam funcsynopsis.style ansi XSLTPROC_PROCESS_MAN = \ $(AM_V_GEN)$(MKDIR_P) $(dir $@) && \ @@ -1771,11 +1772,11 @@ XSLTPROC_PROCESS_MAN_IN = \ XSLTPROC_PROCESS_HTML = \ $(AM_V_GEN)$(MKDIR_P) $(dir $@) && \ - $(XSLTPROC) -o $@ $(XSLTPROC_FLAGS) http://docbook.sourceforge.net/release/xsl/current/html/docbook.xsl $< + $(XSLTPROC) -o $@ $(XSLTPROC_FLAGS) man/custom-html.xsl $< XSLTPROC_PROCESS_HTML_IN = \ $(AM_V_GEN)$(MKDIR_P) $(dir $@) && \ - $(XSLTPROC) -o ${@:.in=} $(XSLTPROC_FLAGS) http://docbook.sourceforge.net/release/xsl/current/html/docbook.xsl $< && \ + $(XSLTPROC) -o ${@:.in=} $(XSLTPROC_FLAGS) man/custom-html.xsl $< && \ mv ${@:.in=} $@ man/%.1: man/%.xml diff --git a/man/custom-html.xsl b/man/custom-html.xsl new file mode 100644 index 0000000000..2d2f458793 --- /dev/null +++ b/man/custom-html.xsl @@ -0,0 +1,29 @@ + + + + + + + + + + + + -- cgit v1.2.3-54-g00ecf From 50f20cfdb0f127e415ab38c024d9ca7a3602f74b Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Dec 2011 22:35:46 +0100 Subject: journal: implement inotify-based live logging logic --- src/journal/journal-file.c | 14 +++ src/journal/journalctl.c | 79 +++++++++---- src/journal/journald.c | 2 +- src/journal/sd-journal.c | 278 ++++++++++++++++++++++++++++++++++++++++++++- src/journal/sd-journal.h | 11 +- src/util.c | 2 +- 6 files changed, 352 insertions(+), 34 deletions(-) diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 427631d30a..7626743248 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -879,6 +879,18 @@ static int journal_file_append_entry_internal( return 0; } +static void journal_file_post_change(JournalFile *f) { + assert(f); + + /* inotify() does not receive IN_MODIFY events from file + * accesses done via mmap(). After each access we hence + * trigger IN_MODIFY by truncating the journal file to its + * current size which triggers IN_MODIFY. */ + + if (ftruncate(f->fd, f->last_stat.st_size) < 0) + log_error("Failed to to truncate file to its own size: %m"); +} + int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) { unsigned i; EntryItem *items; @@ -923,6 +935,8 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset); + journal_file_post_change(f); + finish: free(items); diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index 9220efdfec..c947730441 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -26,12 +26,15 @@ #include #include #include +#include #include "sd-journal.h" #include "log.h" +static bool arg_follow = true; + int main(int argc, char *argv[]) { - int r, i; + int r, i, fd; sd_journal *j = NULL; log_set_max_level(LOG_DEBUG); @@ -54,32 +57,68 @@ int main(int argc, char *argv[]) { } } - SD_JOURNAL_FOREACH(j) { + fd = sd_journal_get_fd(j); + if (fd < 0) { + log_error("Failed to get wakeup fd: %s", strerror(-fd)); + goto finish; + } - const void *data; - size_t length; - char *cursor; - uint64_t realtime = 0, monotonic = 0; + r = sd_journal_seek_head(j); + if (r < 0) { + log_error("Failed to seek to head: %s", strerror(-r)); + goto finish; + } - r = sd_journal_get_cursor(j, &cursor); - if (r < 0) { - log_error("Failed to get cursor: %s", strerror(-r)); - goto finish; + for (;;) { + struct pollfd pollfd; + + while (sd_journal_next(j) > 0) { + const void *data; + size_t length; + char *cursor; + uint64_t realtime = 0, monotonic = 0; + + r = sd_journal_get_cursor(j, &cursor); + if (r < 0) { + log_error("Failed to get cursor: %s", strerror(-r)); + goto finish; + } + + printf("entry: %s\n", cursor); + free(cursor); + + sd_journal_get_realtime_usec(j, &realtime); + sd_journal_get_monotonic_usec(j, &monotonic, NULL); + printf("realtime: %llu\n" + "monotonic: %llu\n", + (unsigned long long) realtime, + (unsigned long long) monotonic); + + SD_JOURNAL_FOREACH_DATA(j, data, length) + printf("\t%.*s\n", (int) length, (const char*) data); } - printf("entry: %s\n", cursor); - free(cursor); + if (!arg_follow) + break; + + zero(pollfd); + pollfd.fd = fd; + pollfd.events = POLLIN; - sd_journal_get_realtime_usec(j, &realtime); - sd_journal_get_monotonic_usec(j, &monotonic, NULL); - printf("realtime: %llu\n" - "monotonic: %llu\n", - (unsigned long long) realtime, - (unsigned long long) monotonic); + if (poll(&pollfd, 1, -1) < 0) { + if (errno == EINTR) + break; - SD_JOURNAL_FOREACH_DATA(j, data, length) - printf("\t%.*s\n", (int) length, (const char*) data); + log_error("poll(): %m"); + r = -errno; + goto finish; + } + r = sd_journal_process(j); + if (r < 0) { + log_error("Failed to process: %s", strerror(-r)); + goto finish; + } } finish: diff --git a/src/journal/journald.c b/src/journal/journald.c index 630ead0053..c457d2786b 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -866,7 +866,7 @@ int main(int argc, char *argv[]) { sd_notify(false, "READY=1\n" "STATUS=Processing messages..."); -# + for (;;) { struct epoll_event event; diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index bcfcbfb9e1..bd510be51c 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include "sd-journal.h" #include "journal-def.h" @@ -73,6 +75,10 @@ struct sd_journal { JournalFile *current_file; uint64_t current_field; + int inotify_fd; + Hashmap *inotify_wd_dirs; + Hashmap *inotify_wd_roots; + LIST_HEAD(Match, matches); unsigned n_matches; }; @@ -934,11 +940,6 @@ static int add_file(sd_journal *j, const char *prefix, const char *dir, const ch assert(prefix); assert(filename); - if (hashmap_size(j->files) >= JOURNAL_FILES_MAX) { - log_debug("Too many open journal files, ignoring."); - return 0; - } - if (dir) fn = join(prefix, "/", dir, "/", filename, NULL); else @@ -947,6 +948,17 @@ static int add_file(sd_journal *j, const char *prefix, const char *dir, const ch if (!fn) return -ENOMEM; + if (hashmap_get(j->files, fn)) { + free(fn); + return 0; + } + + if (hashmap_size(j->files) >= JOURNAL_FILES_MAX) { + log_debug("Too many open journal files, not adding %s, ignoring.", fn); + free(fn); + return 0; + } + r = journal_file_open(fn, O_RDONLY, 0, NULL, &f); free(fn); @@ -965,6 +977,37 @@ static int add_file(sd_journal *j, const char *prefix, const char *dir, const ch return r; } + log_debug("File %s got added.", f->path); + + return 0; +} + +static int remove_file(sd_journal *j, const char *prefix, const char *dir, const char *filename) { + char *fn; + JournalFile *f; + + assert(j); + assert(prefix); + assert(filename); + + if (dir) + fn = join(prefix, "/", dir, "/", filename, NULL); + else + fn = join(prefix, "/", filename, NULL); + + if (!fn) + return -ENOMEM; + + f = hashmap_get(j->files, fn); + free(fn); + + if (!f) + return 0; + + hashmap_remove(j->files, f->path); + journal_file_close(f); + + log_debug("File %s got removed.", f->path); return 0; } @@ -972,6 +1015,7 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dir) { char *fn; int r; DIR *d; + int wd; assert(j); assert(prefix); @@ -982,15 +1026,28 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dir) { return -ENOMEM; d = opendir(fn); - free(fn); if (!d) { + free(fn); if (errno == ENOENT) return 0; return -errno; } + wd = inotify_add_watch(j->inotify_fd, fn, + IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE| + IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT| + IN_DONT_FOLLOW|IN_ONLYDIR); + if (wd > 0) { + if (hashmap_put(j->inotify_wd_dirs, INT_TO_PTR(wd), fn) < 0) + inotify_rm_watch(j->inotify_fd, wd); + else + fn = NULL; + } + + free(fn); + for (;;) { struct dirent buf, *de; @@ -1008,9 +1065,65 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dir) { closedir(d); + log_debug("Directory %s/%s got added.", prefix, dir); + return 0; } +static void remove_directory_wd(sd_journal *j, int wd) { + char *p; + + assert(j); + assert(wd > 0); + + if (j->inotify_fd >= 0) + inotify_rm_watch(j->inotify_fd, wd); + + p = hashmap_remove(j->inotify_wd_dirs, INT_TO_PTR(wd)); + + if (p) { + log_debug("Directory %s got removed.", p); + free(p); + } +} + +static void add_root_wd(sd_journal *j, const char *p) { + int wd; + char *k; + + assert(j); + assert(p); + + wd = inotify_add_watch(j->inotify_fd, p, + IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE| + IN_DONT_FOLLOW|IN_ONLYDIR); + if (wd <= 0) + return; + + k = strdup(p); + if (!k || hashmap_put(j->inotify_wd_roots, INT_TO_PTR(wd), k) < 0) { + inotify_rm_watch(j->inotify_fd, wd); + free(k); + } +} + +static void remove_root_wd(sd_journal *j, int wd) { + char *p; + + assert(j); + assert(wd > 0); + + if (j->inotify_fd >= 0) + inotify_rm_watch(j->inotify_fd, wd); + + p = hashmap_remove(j->inotify_wd_roots, INT_TO_PTR(wd)); + + if (p) { + log_debug("Root %s got removed.", p); + free(p); + } +} + int sd_journal_open(sd_journal **ret) { sd_journal *j; const char *p; @@ -1025,12 +1138,26 @@ int sd_journal_open(sd_journal **ret) { if (!j) return -ENOMEM; + j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC); + if (j->inotify_fd < 0) { + r = -errno; + goto fail; + } + j->files = hashmap_new(string_hash_func, string_compare_func); if (!j->files) { r = -ENOMEM; goto fail; } + j->inotify_wd_dirs = hashmap_new(trivial_hash_func, trivial_compare_func); + j->inotify_wd_roots = hashmap_new(trivial_hash_func, trivial_compare_func); + + if (!j->inotify_wd_dirs || !j->inotify_wd_roots) { + r = -ENOMEM; + goto fail; + } + /* We ignore most errors here, since the idea is to only open * what's actually accessible, and ignore the rest. */ @@ -1044,6 +1171,8 @@ int sd_journal_open(sd_journal **ret) { continue; } + add_root_wd(j, p); + for (;;) { struct dirent buf, *de; sd_id128_t id; @@ -1081,6 +1210,24 @@ fail: void sd_journal_close(sd_journal *j) { assert(j); + if (j->inotify_wd_dirs) { + void *k; + + while ((k = hashmap_first_key(j->inotify_wd_dirs))) + remove_directory_wd(j, PTR_TO_INT(k)); + + hashmap_free(j->inotify_wd_dirs); + } + + if (j->inotify_wd_roots) { + void *k; + + while ((k = hashmap_first_key(j->inotify_wd_roots))) + remove_root_wd(j, PTR_TO_INT(k)); + + hashmap_free(j->inotify_wd_roots); + } + if (j->files) { JournalFile *f; @@ -1092,6 +1239,9 @@ void sd_journal_close(sd_journal *j) { sd_journal_flush_matches(j); + if (j->inotify_fd >= 0) + close_nointr_nofail(j->inotify_fd); + free(j); } @@ -1275,3 +1425,119 @@ void sd_journal_restart_data(sd_journal *j) { j->current_field = 0; } + +int sd_journal_get_fd(sd_journal *j) { + assert(j); + + return j->inotify_fd; +} + +static void process_inotify_event(sd_journal *j, struct inotify_event *e) { + char *p; + int r; + + assert(j); + assert(e); + + /* Is this a subdirectory we watch? */ + p = hashmap_get(j->inotify_wd_dirs, INT_TO_PTR(e->wd)); + if (p) { + + if (!(e->mask & IN_ISDIR) && e->len > 0 && endswith(e->name, ".journal")) { + + /* Event for a journal file */ + + if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) { + r = add_file(j, p, NULL, e->name); + if (r < 0) + log_debug("Failed to add file %s/%s: %s", p, e->name, strerror(-r)); + } else if (e->mask & (IN_DELETE|IN_UNMOUNT)) { + + r = remove_file(j, p, NULL, e->name); + if (r < 0) + log_debug("Failed to remove file %s/%s: %s", p, e->name, strerror(-r)); + } + + } else if (e->len == 0) { + + /* Event for the directory itself */ + + if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT)) + remove_directory_wd(j, e->wd); + } + + return; + } + + /* Must be the root directory then? */ + p = hashmap_get(j->inotify_wd_roots, INT_TO_PTR(e->wd)); + if (p) { + sd_id128_t id; + + if (!(e->mask & IN_ISDIR) && e->len > 0 && endswith(e->name, ".journal")) { + + /* Event for a journal file */ + + if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) { + r = add_file(j, p, NULL, e->name); + if (r < 0) + log_debug("Failed to add file %s/%s: %s", p, e->name, strerror(-r)); + } else if (e->mask & (IN_DELETE|IN_UNMOUNT)) { + + r = remove_file(j, p, NULL, e->name); + if (r < 0) + log_debug("Failed to remove file %s/%s: %s", p, e->name, strerror(-r)); + } + + } else if ((e->mask & IN_ISDIR) && e->len > 0 && sd_id128_from_string(e->name, &id) >= 0) { + + /* Event for subdirectory */ + + if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) { + + r = add_directory(j, p, e->name); + if (r < 0) + log_debug("Failed to add directory %s/%s: %s", p, e->name, strerror(-r)); + } + } + + return; + } + + if (e->mask & IN_IGNORED) + return; + + log_warning("Unknown inotify event."); +} + +int sd_journal_process(sd_journal *j) { + uint8_t buffer[sizeof(struct inotify_event) + FILENAME_MAX]; + + assert(j); + + for (;;) { + struct inotify_event *e; + ssize_t l; + + l = read(j->inotify_fd, buffer, sizeof(buffer)); + if (l < 0) { + if (errno == EINTR || errno == EAGAIN) + return 0; + + return -errno; + } + + e = (struct inotify_event*) buffer; + while (l > 0) { + size_t step; + + process_inotify_event(j, e); + + step = sizeof(struct inotify_event) + e->len; + assert(step <= (size_t) l); + + e = (struct inotify_event*) ((uint8_t*) e + step); + l -= step; + } + } +} diff --git a/src/journal/sd-journal.h b/src/journal/sd-journal.h index 05a929d910..33e4b78855 100644 --- a/src/journal/sd-journal.h +++ b/src/journal/sd-journal.h @@ -32,7 +32,6 @@ /* TODO: * * - check LE/BE conversion for 8bit, 16bit, 32bit values - * - implement inotify usage on client * - implement audit gateway * - implement stdout gateway * - extend hash tables table as we go @@ -40,7 +39,7 @@ * - throttling * - cryptographic hash * - fix space reservation logic - * - comm, argv can be manipulated, should it be _COMM=, _CMDLINE= or COMM=, CMDLINE=? + * - compression */ /* Write to daemon */ @@ -92,16 +91,16 @@ enum { SD_JOURNAL_INVALIDATE_REMOVE }; -int sd_journal_get_fd(sd_journal *j); /* missing */ -int sd_journal_process(sd_journal *j); /* missing */ +int sd_journal_get_fd(sd_journal *j); +int sd_journal_process(sd_journal *j); #define SD_JOURNAL_FOREACH(j) \ if (sd_journal_seek_head(j) >= 0) \ - while (sd_journal_next(j) > 0) \ + while (sd_journal_next(j) > 0) #define SD_JOURNAL_FOREACH_BACKWARDS(j) \ if (sd_journal_seek_tail(j) >= 0) \ - while (sd_journal_previous(j) > 0) \ + while (sd_journal_previous(j) > 0) #define SD_JOURNAL_FOREACH_DATA(j, data, l) \ for (sd_journal_restart_data(j); sd_journal_enumerate_data((j), &(data), &(l)) > 0; ) diff --git a/src/util.c b/src/util.c index e5b5e53f7e..37942de534 100644 --- a/src/util.c +++ b/src/util.c @@ -2674,7 +2674,7 @@ int acquire_terminal(const char *name, bool fail, bool force, bool ignore_tiocst ssize_t l; struct inotify_event *e; - if ((l = read(notify, &inotify_buffer, sizeof(inotify_buffer))) < 0) { + if ((l = read(notify, inotify_buffer, sizeof(inotify_buffer))) < 0) { if (errno == EINTR) continue; -- cgit v1.2.3-54-g00ecf From 466ccd92e2f9ad712332012e1b3643a34b006a45 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 20 Dec 2011 00:38:14 +0100 Subject: journal: fix matches --- src/journal/journal-file.c | 2 +- src/journal/sd-journal.c | 23 +++++++++++++++-------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 7626743248..8a864cb913 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -1434,7 +1434,7 @@ int journal_file_next_entry_for_data( assert(p > 0 || !o); r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d); - if (r <= 0) + if (r < 0) return r; n = le64toh(d->data.n_entries); diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index bd510be51c..9dff72429b 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -525,6 +525,7 @@ static int next_with_matches(sd_journal *j, JournalFile *f, direction_t directio uint64_t np, n; bool found, term_result = false; Match *m, *term_match = NULL; + Object *npo = NULL; n = journal_file_entry_n_items(c); @@ -535,6 +536,7 @@ static int next_with_matches(sd_journal *j, JournalFile *f, direction_t directio np = 0; LIST_FOREACH(matches, m, j->matches) { uint64_t q, k; + Object *qo = NULL; /* Let's check if this is the beginning of a * new term, i.e. has a different field prefix @@ -567,22 +569,26 @@ static int next_with_matches(sd_journal *j, JournalFile *f, direction_t directio * where we'd have to try next, in case the other * matches are not OK */ - r = journal_file_next_entry_for_data(f, c, cp, le64toh(c->entry.items[k].object_offset), direction, NULL, &q); + r = journal_file_next_entry_for_data(f, c, cp, le64toh(c->entry.items[k].object_offset), direction, &qo, &q); if (r > 0) { if (direction == DIRECTION_DOWN) { - if (q > np) + if (q > np) { np = q; + npo = qo; + } } else { - if (np == 0 || q < np) + if (np == 0 || q < np) { np = q; + npo = qo; + } } } } /* Check the last term */ - if (term_match && term_result) - found = true; + if (term_match && !term_result) + found = false; /* Did this entry match against all matches? */ if (found) { @@ -600,6 +606,7 @@ static int next_with_matches(sd_journal *j, JournalFile *f, direction_t directio /* Hmm, ok, this entry only matched partially, so * let's try another one */ cp = np; + c = npo; } } @@ -612,12 +619,12 @@ static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direc assert(f); if (f->current_offset > 0) { - r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &c); + cp = f->current_offset; + + r = journal_file_move_to_object(f, OBJECT_ENTRY, cp, &c); if (r < 0) return r; - cp = f->current_offset; - r = next_with_matches(j, f, direction, &c, &cp); if (r <= 0) return r; -- cgit v1.2.3-54-g00ecf From bc85bfee87e11317fbcd1160c9003860dc6edde9 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 20 Dec 2011 02:38:36 +0100 Subject: journal: fix space reservation limit enforcement --- src/journal/journal-def.h | 3 -- src/journal/journal-file.c | 51 +++++++++++++++--------------- src/journal/journal-file.h | 13 ++++++++ src/journal/journald.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++ src/journal/sd-journal.h | 1 - 5 files changed, 118 insertions(+), 29 deletions(-) diff --git a/src/journal/journal-def.h b/src/journal/journal-def.h index 1a63ca1f8d..5f026ee0f6 100644 --- a/src/journal/journal-def.h +++ b/src/journal/journal-def.h @@ -135,9 +135,6 @@ _packed_ struct Header { sd_id128_t seqnum_id; uint64_t arena_offset; uint64_t arena_size; - uint64_t arena_max_size; /* obsolete */ - uint64_t arena_min_size; /* obsolete */ - uint64_t arena_keep_free; /* obsolete */ uint64_t data_hash_table_offset; /* for looking up data objects */ uint64_t data_hash_table_size; uint64_t field_hash_table_offset; /* for looking up field objects */ diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 8a864cb913..8f9b61bc2f 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -31,12 +31,6 @@ #include "journal-file.h" #include "lookup3.h" -#define DEFAULT_ARENA_MAX_SIZE (16ULL*1024ULL*1024ULL*1024ULL) -#define DEFAULT_ARENA_MIN_SIZE (256ULL*1024ULL) -#define DEFAULT_ARENA_KEEP_FREE (1ULL*1024ULL*1024ULL) - -#define DEFAULT_MAX_USE (16ULL*1024ULL*1024ULL*16ULL) - #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*16ULL) #define DEFAULT_FIELD_HASH_TABLE_SIZE (2047ULL*16ULL) @@ -76,9 +70,6 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) { zero(h); memcpy(h.signature, signature, 8); h.arena_offset = htole64(ALIGN64(sizeof(h))); - h.arena_max_size = htole64(DEFAULT_ARENA_MAX_SIZE); - h.arena_min_size = htole64(DEFAULT_ARENA_MIN_SIZE); - h.arena_keep_free = htole64(DEFAULT_ARENA_KEEP_FREE); r = sd_id128_randomize(&h.file_id); if (r < 0) @@ -161,16 +152,10 @@ static int journal_file_verify_header(JournalFile *f) { } static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) { - uint64_t asize; uint64_t old_size, new_size; assert(f); - if (offset < le64toh(f->header->arena_offset)) - return -EINVAL; - - new_size = PAGE_ALIGN(offset + size); - /* We assume that this file is not sparse, and we know that * for sure, since we always call posix_fallocate() * ourselves */ @@ -179,12 +164,19 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) le64toh(f->header->arena_offset) + le64toh(f->header->arena_size); - if (old_size >= new_size) + new_size = PAGE_ALIGN(offset + size); + if (new_size < le64toh(f->header->arena_offset)) + new_size = le64toh(f->header->arena_offset); + + if (new_size <= old_size) return 0; - asize = new_size - le64toh(f->header->arena_offset); + if (f->metrics.max_size > 0 && + new_size > f->metrics.max_size) + return -E2BIG; - if (asize > le64toh(f->header->arena_min_size)) { + if (new_size > f->metrics.min_size && + f->metrics.keep_free > 0) { struct statvfs svfs; if (fstatvfs(f->fd, &svfs) >= 0) { @@ -192,8 +184,8 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) available = svfs.f_bfree * svfs.f_bsize; - if (available >= f->header->arena_keep_free) - available -= f->header->arena_keep_free; + if (available >= f->metrics.keep_free) + available -= f->metrics.keep_free; else available = 0; @@ -202,16 +194,16 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) } } - if (asize > le64toh(f->header->arena_max_size)) - return -E2BIG; - + /* Note that the glibc fallocate() fallback is very + inefficient, hence we try to minimize the allocation area + as we can. */ if (posix_fallocate(f->fd, old_size, new_size - old_size) < 0) return -errno; if (fstat(f->fd, &f->last_stat) < 0) return -errno; - f->header->arena_size = htole64(asize); + f->header->arena_size = new_size - htole64(f->header->arena_offset); return 0; } @@ -576,6 +568,9 @@ int journal_file_find_data_object_with_hash( osize = offsetof(Object, data.payload) + size; + if (f->header->data_hash_table_size == 0) + return -EBADMSG; + h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)); p = le64toh(f->data_hash_table[h].head_hash_offset); @@ -816,7 +811,7 @@ static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) { if (r < 0) return r; - log_error("%s %lu", f->path, (unsigned long) f->header->n_entries); + log_error("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); if (f->header->head_entry_realtime == 0) f->header->head_entry_realtime = o->entry.realtime; @@ -887,6 +882,8 @@ static void journal_file_post_change(JournalFile *f) { * trigger IN_MODIFY by truncating the journal file to its * current size which triggers IN_MODIFY. */ + __sync_synchronize(); + if (ftruncate(f->fd, f->last_stat.st_size) < 0) log_error("Failed to to truncate file to its own size: %m"); } @@ -1626,6 +1623,10 @@ int journal_file_open( f->writable = (flags & O_ACCMODE) != O_RDONLY; f->prot = prot_from_flags(flags); + f->metrics.max_size = DEFAULT_MAX_SIZE; + f->metrics.min_size = DEFAULT_MIN_SIZE; + f->metrics.keep_free = DEFAULT_KEEP_FREE; + f->path = strdup(fname); if (!f->path) { r = -ENOMEM; diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h index 664f917bb0..20712b5dcc 100644 --- a/src/journal/journal-file.h +++ b/src/journal/journal-file.h @@ -28,6 +28,11 @@ #include "util.h" #include "sd-id128.h" +#define DEFAULT_MAX_SIZE (1024ULL*128ULL) +#define DEFAULT_MIN_SIZE (256ULL*1024ULL) +#define DEFAULT_KEEP_FREE (1ULL*1024ULL*1024ULL) +#define DEFAULT_MAX_USE (16ULL*1024ULL*1024ULL*16ULL) + typedef struct Window { void *ptr; uint64_t offset; @@ -45,6 +50,12 @@ enum { _WINDOW_MAX }; +typedef struct JournalMetrics { + uint64_t max_size; + uint64_t min_size; + uint64_t keep_free; +} JournalMetrics; + typedef struct JournalFile { int fd; char *path; @@ -62,6 +73,8 @@ typedef struct JournalFile { Window windows[_WINDOW_MAX]; uint64_t current_offset; + + JournalMetrics metrics; } JournalFile; typedef enum direction { diff --git a/src/journal/journald.c b/src/journal/journald.c index c457d2786b..37f8f16754 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -54,6 +54,9 @@ typedef struct Server { char *buffer; size_t buffer_size; + + JournalMetrics metrics; + uint64_t max_use; } Server; static void fix_perms(JournalFile *f, uid_t uid) { @@ -153,6 +156,66 @@ static JournalFile* find_journal(Server *s, uid_t uid) { return f; } +static void server_vacuum(Server *s) { + Iterator i; + void *k; + char *p; + char ids[33]; + sd_id128_t machine; + int r; + JournalFile *f; + + log_info("Rotating..."); + + if (s->runtime_journal) { + r = journal_file_rotate(&s->runtime_journal); + if (r < 0) + log_error("Failed to rotate %s: %s", s->runtime_journal->path, strerror(-r)); + } + + if (s->system_journal) { + r = journal_file_rotate(&s->system_journal); + if (r < 0) + log_error("Failed to rotate %s: %s", s->system_journal->path, strerror(-r)); + } + + HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) { + r = journal_file_rotate(&f); + if (r < 0) + log_error("Failed to rotate %s: %s", f->path, strerror(-r)); + else + hashmap_replace(s->user_journals, k, f); + } + + log_info("Vacuuming..."); + + r = sd_id128_get_machine(&machine); + if (r < 0) { + log_error("Failed to get machine ID: %s", strerror(-r)); + return; + } + + if (asprintf(&p, "/var/log/journal/%s", sd_id128_to_string(machine, ids)) < 0) { + log_error("Out of memory."); + return; + } + + r = journal_directory_vacuum(p, s->max_use, s->metrics.keep_free); + if (r < 0 && r != -ENOENT) + log_error("Failed to vacuum %s: %s", p, strerror(-r)); + free(p); + + if (asprintf(&p, "/run/log/journal/%s", ids) < 0) { + log_error("Out of memory."); + return; + } + + r = journal_directory_vacuum(p, s->max_use, s->metrics.keep_free); + if (r < 0 && r != -ENOENT) + log_error("Failed to vacuum %s: %s", p, strerror(-r)); + free(p); +} + static void dispatch_message(Server *s, struct iovec *iovec, unsigned n, unsigned m, struct ucred *ucred, struct timeval *tv) { char *pid = NULL, *uid = NULL, *gid = NULL, *source_time = NULL, *boot_id = NULL, *machine_id = NULL, @@ -166,6 +229,7 @@ static void dispatch_message(Server *s, struct iovec *iovec, unsigned n, unsigne char *t; uid_t loginuid = 0, realuid = 0; JournalFile *f; + bool vacuumed = false; assert(s); assert(iovec || n == 0); @@ -262,12 +326,23 @@ static void dispatch_message(Server *s, struct iovec *iovec, unsigned n, unsigne assert(n <= m); +retry: f = find_journal(s, realuid == 0 ? 0 : loginuid); if (!f) log_warning("Dropping message, as we can't find a place to store the data."); else { r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL); + if (r == -E2BIG && !vacuumed) { + log_info("Allocation limit reached."); + + server_vacuum(s); + vacuumed = true; + + log_info("Retrying write."); + goto retry; + } + if (r < 0) log_error("Failed to write entry, ignoring: %s", strerror(-r)); } @@ -715,6 +790,10 @@ static int server_init(Server *s) { zero(*s); s->syslog_fd = s->native_fd = s->signal_fd = -1; + s->metrics.max_size = DEFAULT_MAX_SIZE; + s->metrics.min_size = DEFAULT_MIN_SIZE; + s->metrics.keep_free = DEFAULT_KEEP_FREE; + s->max_use = DEFAULT_MAX_USE; s->epoll_fd = epoll_create1(EPOLL_CLOEXEC); if (s->epoll_fd < 0) { diff --git a/src/journal/sd-journal.h b/src/journal/sd-journal.h index 33e4b78855..ee9813f28c 100644 --- a/src/journal/sd-journal.h +++ b/src/journal/sd-journal.h @@ -38,7 +38,6 @@ * - accelerate looking for "all hostnames" and suchlike. * - throttling * - cryptographic hash - * - fix space reservation logic * - compression */ -- cgit v1.2.3-54-g00ecf From 807e17f05e217b474af39503efb9503d81b12596 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 21 Dec 2011 02:40:59 +0100 Subject: journal: add inline compression support with XZ --- Makefile.am | 27 +++++++++++++++++ TODO | 2 ++ configure.ac | 14 +++++++++ src/journal/journal-def.h | 13 +++++++- src/journal/journal-file.c | 75 ++++++++++++++++++++++++++++++++++++++++++---- src/journal/journal-file.h | 7 +++++ src/journal/journald.c | 11 +++++++ src/journal/sd-journal.c | 49 ++++++++++++++++++++++++++---- src/journal/sd-journal.h | 2 +- 9 files changed, 187 insertions(+), 13 deletions(-) diff --git a/Makefile.am b/Makefile.am index f9093b6d8e..7d551a9dcf 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1028,6 +1028,15 @@ test_journal_CFLAGS = \ test_journal_LDADD = \ libsystemd-basic.la +if HAVE_XZ +test_journal_SOURCES += \ + src/journal/compress.c +test_journal_CFLAGS += \ + $(XZ_CFLAGS) +test_journal_LDADD += \ + $(XZ_LIBS) +endif + systemd_journald_SOURCES = \ src/journal/journald.c \ src/journal/sd-journal.c \ @@ -1046,6 +1055,15 @@ systemd_journald_LDADD = \ libsystemd-daemon.la \ $(ACL_LIBS) +if HAVE_XZ +systemd_journald_SOURCES += \ + src/journal/compress.c +systemd_journald_CFLAGS += \ + $(XZ_CFLAGS) +systemd_journald_LDADD += \ + $(XZ_LIBS) +endif + systemd_journalctl_SOURCES = \ src/journal/journalctl.c \ src/journal/sd-journal.c \ @@ -1059,6 +1077,15 @@ systemd_journalctl_CFLAGS = \ systemd_journalctl_LDADD = \ libsystemd-basic.la +if HAVE_XZ +systemd_journalctl_SOURCES += \ + src/journal/compress.c +systemd_journalctl_CFLAGS += \ + $(XZ_CFLAGS) +systemd_journalctl_LDADD += \ + $(XZ_LIBS) +endif + systemd_stdout_syslog_bridge_SOURCES = \ src/stdout-syslog-bridge.c \ src/tcpwrap.c diff --git a/TODO b/TODO index 5b64f048e5..8daf79a7f0 100644 --- a/TODO +++ b/TODO @@ -21,6 +21,8 @@ Bugfixes: Features: +* logind: sends SessionNew on Lock()? + * logind: allow showing logout dialog from system * document that %% can be used to write % in a string that is specifier extended diff --git a/configure.ac b/configure.ac index 596a32ac23..a2e9c7d65c 100644 --- a/configure.ac +++ b/configure.ac @@ -149,6 +149,19 @@ if test "x$enable_selinux" != "xno"; then fi AM_CONDITIONAL(HAVE_SELINUX, [test "$have_selinux" = "yes"]) +have_xz=no +AC_ARG_ENABLE(xz, AS_HELP_STRING([--disable-xz], [Disable optional XZ support])) +if test "x$enable_xz" != "xno"; then + PKG_CHECK_MODULES(XZ, [ liblzma ], + [AC_DEFINE(HAVE_XZ, 1, [Define if XZ is available]) have_xz=yes], have_xz=no) + AC_SUBST(XZ_CFLAGS) + AC_SUBST(XZ_LIBS) + if test "x$have_xz" = xno -a "x$enable_xz" = xyes; then + AC_MSG_ERROR([*** Xz support requested but libraries not found]) + fi +fi +AM_CONDITIONAL(HAVE_XZ, [test "$have_xz" = "yes"]) + AC_ARG_ENABLE([tcpwrap], AS_HELP_STRING([--disable-tcpwrap],[Disable optional TCP wrappers support]), [case "${enableval}" in @@ -591,6 +604,7 @@ AC_MSG_RESULT([ PAM: ${have_pam} AUDIT: ${have_audit} SELinux: ${have_selinux} + XZ: ${have_xz} ACL: ${have_acl} binfmt: ${have_binfmt} hostnamed: ${have_hostnamed} diff --git a/src/journal/journal-def.h b/src/journal/journal-def.h index 5f026ee0f6..ef0cb6dae6 100644 --- a/src/journal/journal-def.h +++ b/src/journal/journal-def.h @@ -50,9 +50,15 @@ enum { _OBJECT_TYPE_MAX }; +/* Object flags */ +enum { + OBJECT_COMPRESSED = 1 +}; + _packed_ struct ObjectHeader { uint8_t type; - uint8_t reserved[7]; + uint8_t flags; + uint8_t reserved[6]; uint64_t size; uint8_t payload[]; }; @@ -123,6 +129,11 @@ enum { STATE_ARCHIVED }; +/* Header flags */ +enum { + HEADER_INCOMPATIBLE_COMPRESSED = 1 +}; + _packed_ struct Header { uint8_t signature[8]; /* "LPKSHHRH" */ uint32_t compatible_flags; diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 8f9b61bc2f..a0c479fc67 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -30,12 +30,15 @@ #include "journal-def.h" #include "journal-file.h" #include "lookup3.h" +#include "compress.h" #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*16ULL) #define DEFAULT_FIELD_HASH_TABLE_SIZE (2047ULL*16ULL) #define DEFAULT_WINDOW_SIZE (128ULL*1024ULL*1024ULL) +#define COMPRESSION_SIZE_THRESHOLD (64ULL) + static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' }; #define ALIGN64(x) (((x) + 7ULL) & ~7ULL) @@ -57,6 +60,11 @@ void journal_file_close(JournalFile *f) { close_nointr_nofail(f->fd); free(f->path); + +#ifdef HAVE_XZ + free(f->compress_buffer); +#endif + free(f); } @@ -120,8 +128,13 @@ static int journal_file_verify_header(JournalFile *f) { if (memcmp(f->header, signature, 8)) return -EBADMSG; +#ifdef HAVE_XZ + if ((le64toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0) + return -EPROTONOSUPPORT; +#else if (f->header->incompatible_flags != 0) return -EPROTONOSUPPORT; +#endif if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->arena_offset) + le64toh(f->header->arena_size))) return -ENODATA; @@ -309,7 +322,7 @@ static bool verify_hash(Object *o) { assert(o); - if (o->object.type == OBJECT_DATA) { + if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) { h1 = le64toh(o->data.hash); h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload)); } else if (o->object.type == OBJECT_FIELD) { @@ -581,12 +594,40 @@ int journal_file_find_data_object_with_hash( if (r < 0) return r; - if (le64toh(o->object.size) == osize && - memcmp(o->data.payload, data, size) == 0) { + if (le64toh(o->data.hash) != hash) + return -EBADMSG; + + if (o->object.flags & OBJECT_COMPRESSED) { +#ifdef HAVE_XZ + uint64_t l, rsize; - if (le64toh(o->data.hash) != hash) + l = le64toh(o->object.size); + if (l <= offsetof(Object, data.payload)) return -EBADMSG; + l -= offsetof(Object, data.payload); + + if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize)) + return -EBADMSG; + + if (rsize == size && + memcmp(f->compress_buffer, data, size) == 0) { + + if (ret) + *ret = o; + + if (offset) + *offset = p; + + return 1; + } +#else + return -EPROTONOSUPPORT; +#endif + + } else if (le64toh(o->object.size) == osize && + memcmp(o->data.payload, data, size) == 0) { + if (ret) *ret = o; @@ -624,6 +665,7 @@ static int journal_file_append_data(JournalFile *f, const void *data, uint64_t s uint64_t osize; Object *o; int r; + bool compressed = false; assert(f); assert(data || size == 0); @@ -650,7 +692,27 @@ static int journal_file_append_data(JournalFile *f, const void *data, uint64_t s return r; o->data.hash = htole64(hash); - memcpy(o->data.payload, data, size); + +#ifdef HAVE_XZ + if (f->compress && + size >= COMPRESSION_SIZE_THRESHOLD) { + uint64_t rsize; + + compressed = compress_blob(data, size, o->data.payload, &rsize); + + if (compressed) { + o->object.size = htole64(offsetof(Object, data.payload) + rsize); + o->object.flags |= OBJECT_COMPRESSED; + + f->header->incompatible_flags = htole32(le32toh(f->header->incompatible_flags) | HEADER_INCOMPATIBLE_COMPRESSED); + + log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize); + } + } +#endif + + if (!compressed) + memcpy(o->data.payload, data, size); r = journal_file_link_data(f, o, p, hash); if (r < 0) @@ -1585,6 +1647,9 @@ void journal_file_dump(JournalFile *f) { break; } + if (o->object.flags & OBJECT_COMPRESSED) + printf("Flags: COMPRESSED\n"); + if (p == le64toh(f->header->tail_object_offset)) p = 0; else diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h index 20712b5dcc..421dfa6766 100644 --- a/src/journal/journal-file.h +++ b/src/journal/journal-file.h @@ -75,6 +75,13 @@ typedef struct JournalFile { uint64_t current_offset; JournalMetrics metrics; + + bool compress; + +#ifdef HAVE_XZ + void *compress_buffer; + size_t compress_buffer_size; +#endif } JournalFile; typedef enum direction { diff --git a/src/journal/journald.c b/src/journal/journald.c index 37f8f16754..ca274ee44a 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -57,6 +57,7 @@ typedef struct Server { JournalMetrics metrics; uint64_t max_use; + bool compress; } Server; static void fix_perms(JournalFile *f, uid_t uid) { @@ -146,6 +147,8 @@ static JournalFile* find_journal(Server *s, uid_t uid) { return s->system_journal; fix_perms(f, uid); + f->metrics = s->metrics; + f->compress = s->compress; r = hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f); if (r < 0) { @@ -661,6 +664,9 @@ static int system_journal_open(Server *s) { free(fn); if (r >= 0) { + s->system_journal->metrics = s->metrics; + s->system_journal->compress = s->compress; + fix_perms(s->system_journal, 0); return r; } @@ -685,6 +691,9 @@ static int system_journal_open(Server *s) { return r; } + s->runtime_journal->metrics = s->metrics; + s->runtime_journal->compress = s->compress; + fix_perms(s->runtime_journal, 0); return r; } @@ -794,6 +803,7 @@ static int server_init(Server *s) { s->metrics.min_size = DEFAULT_MIN_SIZE; s->metrics.keep_free = DEFAULT_KEEP_FREE; s->max_use = DEFAULT_MAX_USE; + s->compress = true; s->epoll_fd = epoll_create1(EPOLL_CLOEXEC); if (s->epoll_fd < 0) { @@ -931,6 +941,7 @@ int main(int argc, char *argv[]) { } log_set_target(LOG_TARGET_CONSOLE); + log_set_max_level(LOG_DEBUG); log_parse_environment(); log_open(); diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 9dff72429b..bc575b43ef 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -31,6 +31,7 @@ #include "hashmap.h" #include "list.h" #include "lookup3.h" +#include "compress.h" #define JOURNAL_FILES_MAX 1024 @@ -1344,7 +1345,7 @@ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, siz size_t t; p = le64toh(o->entry.items[i].object_offset); - le_hash = o->entry.items[j->current_field].hash; + le_hash = o->entry.items[i].hash; r = journal_file_move_to_object(f, OBJECT_DATA, p, &o); if (r < 0) return r; @@ -1354,9 +1355,31 @@ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, siz l = le64toh(o->object.size) - offsetof(Object, data.payload); - if (l >= field_length+1 && - memcmp(o->data.payload, field, field_length) == 0 && - o->data.payload[field_length] == '=') { + if (o->object.flags & OBJECT_COMPRESSED) { + +#ifdef HAVE_XZ + if (uncompress_startswith(o->data.payload, l, + &f->compress_buffer, &f->compress_buffer_size, + field, field_length, '=')) { + + uint64_t rsize; + + if (!uncompress_blob(o->data.payload, l, + &f->compress_buffer, &f->compress_buffer_size, &rsize)) + return -EBADMSG; + + *data = f->compress_buffer; + *size = (size_t) rsize; + + return 0; + } +#else + return -EPROTONOSUPPORT; +#endif + + } else if (l >= field_length+1 && + memcmp(o->data.payload, field, field_length) == 0 && + o->data.payload[field_length] == '=') { t = (size_t) l; @@ -1419,8 +1442,22 @@ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) { if ((uint64_t) t != l) return -E2BIG; - *data = o->data.payload; - *size = t; + if (o->object.flags & OBJECT_COMPRESSED) { +#ifdef HAVE_XZ + uint64_t rsize; + + if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize)) + return -EBADMSG; + + *data = f->compress_buffer; + *size = (size_t) rsize; +#else + return -EPROTONOSUPPORT; +#endif + } else { + *data = o->data.payload; + *size = t; + } j->current_field ++; diff --git a/src/journal/sd-journal.h b/src/journal/sd-journal.h index ee9813f28c..b29680b3a2 100644 --- a/src/journal/sd-journal.h +++ b/src/journal/sd-journal.h @@ -38,7 +38,7 @@ * - accelerate looking for "all hostnames" and suchlike. * - throttling * - cryptographic hash - * - compression + * - never access beyond fle size check */ /* Write to daemon */ -- cgit v1.2.3-54-g00ecf From 72f597065c60fbfca501a8d8c29e9a11cb740946 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 21 Dec 2011 18:17:22 +0100 Subject: journalctl: add json, export, short and verbose output modes --- src/journal/journalctl.c | 347 +++++++++++++++++++++++++++++++++++++++++++---- src/journal/sd-journal.c | 2 +- src/util.c | 31 +++-- src/util.h | 5 +- 4 files changed, 345 insertions(+), 40 deletions(-) diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index c947730441..6f4342e597 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -27,15 +27,328 @@ #include #include #include +#include #include "sd-journal.h" #include "log.h" +#include "util.h" -static bool arg_follow = true; +#define PRINT_THRESHOLD 128 + +static enum { + OUTPUT_SHORT, + OUTPUT_VERBOSE, + OUTPUT_EXPORT, + OUTPUT_JSON, + _OUTPUT_MAX +} arg_output = OUTPUT_JSON; + +static bool arg_follow = false; +static bool arg_show_all = false; + +static bool contains_unprintable(const void *p, size_t l) { + const char *j; + + for (j = p; j < (const char *) p + l; j++) + if (*j < ' ' || *j >= 127) + return true; + + return false; +} + +static int output_short(sd_journal *j, unsigned line) { + int r; + uint64_t realtime; + time_t t; + struct tm tm; + char buf[64]; + const void *data; + size_t length; + size_t n = 0; + + assert(j); + + r = sd_journal_get_realtime_usec(j, &realtime); + if (r < 0) { + log_error("Failed to get realtime: %s", strerror(-r)); + return r; + } + + t = (time_t) (realtime / USEC_PER_SEC); + if (strftime(buf, sizeof(buf), "%b %d %H:%M:%S", localtime_r(&t, &tm)) <= 0) { + log_error("Failed to format time."); + return -EINVAL; + } + + fputs(buf, stdout); + n += strlen(buf); + + if (sd_journal_get_data(j, "_HOSTNAME", &data, &length) >= 0 && + (arg_show_all || (!contains_unprintable(data, length) && + length < PRINT_THRESHOLD))) { + printf(" %.*s", (int) length - 10, ((const char*) data) + 10); + n += length - 10 + 1; + } + + if (sd_journal_get_data(j, "MESSAGE", &data, &length) >= 0) { + if (arg_show_all) + printf(" %.*s", (int) length - 8, ((const char*) data) + 8); + else if (contains_unprintable(data, length)) + fputs(" [blob data]", stdout); + else if (length - 8 + n < columns()) + printf(" %.*s", (int) length - 8, ((const char*) data) + 8); + else if (n < columns()) { + char *e; + + e = ellipsize_mem((const char *) data + 8, length - 8, columns() - n - 2, 90); + + if (!e) + printf(" %.*s", (int) length - 8, ((const char*) data) + 8); + else + printf(" %s", e); + + free(e); + } + } + + fputc('\n', stdout); + + return 0; +} + +static int output_verbose(sd_journal *j, unsigned line) { + const void *data; + size_t length; + char *cursor; + uint64_t realtime; + char ts[FORMAT_TIMESTAMP_MAX]; + int r; + + assert(j); + + r = sd_journal_get_realtime_usec(j, &realtime); + if (r < 0) { + log_error("Failed to get realtime timestamp: %s", strerror(-r)); + return r; + } + + r = sd_journal_get_cursor(j, &cursor); + if (r < 0) { + log_error("Failed to get cursor: %s", strerror(-r)); + return r; + } + + printf("%s [%s]\n", + format_timestamp(ts, sizeof(ts), realtime), + cursor); + + free(cursor); + + SD_JOURNAL_FOREACH_DATA(j, data, length) { + if (!arg_show_all && (length > PRINT_THRESHOLD || + contains_unprintable(data, length))) { + const char *c; + + c = memchr(data, '=', length); + if (!c) { + log_error("Invalid field."); + return -EINVAL; + } + + printf("\t%.*s=[blob data]\n", + (int) (c - (const char*) data), + (const char*) data); + } else + printf("\t%.*s\n", (int) length, (const char*) data); + } + + return 0; +} + +static int output_export(sd_journal *j, unsigned line) { + sd_id128_t boot_id; + char sid[33]; + int r; + usec_t realtime, monotonic; + char *cursor; + const void *data; + size_t length; + + assert(j); + + r = sd_journal_get_realtime_usec(j, &realtime); + if (r < 0) { + log_error("Failed to get realtime timestamp: %s", strerror(-r)); + return r; + } + + r = sd_journal_get_monotonic_usec(j, &monotonic, &boot_id); + if (r < 0) { + log_error("Failed to get monotonic timestamp: %s", strerror(-r)); + return r; + } + + r = sd_journal_get_cursor(j, &cursor); + if (r < 0) { + log_error("Failed to get cursor: %s", strerror(-r)); + return r; + } + + printf(".cursor=%s\n" + ".realtime=%llu\n" + ".monotonic=%llu\n" + ".boot_id=%s\n", + cursor, + (unsigned long long) realtime, + (unsigned long long) monotonic, + sd_id128_to_string(boot_id, sid)); + + free(cursor); + + SD_JOURNAL_FOREACH_DATA(j, data, length) { + + if (contains_unprintable(data, length)) { + const char *c; + uint64_t le64; + + c = memchr(data, '=', length); + if (!c) { + log_error("Invalid field."); + return -EINVAL; + } + + fwrite(data, c - (const char*) data, 1, stdout); + fputc('\n', stdout); + le64 = htole64(length - (c - (const char*) data) - 1); + fwrite(&le64, sizeof(le64), 1, stdout); + fwrite(c + 1, length - (c - (const char*) data) - 1, 1, stdout); + } else + fwrite(data, length, 1, stdout); + + fputc('\n', stdout); + } + + fputc('\n', stdout); + + return 0; +} + +static void json_escape(const char* p, size_t l) { + + if (contains_unprintable(p, l)) { + bool not_first = false; + + fputs("[ ", stdout); + + while (l > 0) { + if (not_first) + printf(", %u", (uint8_t) *p); + else { + not_first = true; + printf("%u", (uint8_t) *p); + } + + p++; + l--; + } + + fputs(" ]", stdout); + } else { + fputc('\"', stdout); + + while (l > 0) { + if (*p == '"' || *p == '\\') { + fputc('\\', stdout); + fputc(*p, stdout); + } else + fputc(*p, stdout); + + p++; + l--; + } + + fputc('\"', stdout); + } +} + +static int output_json(sd_journal *j, unsigned line) { + uint64_t realtime, monotonic; + char *cursor; + const void *data; + size_t length; + sd_id128_t boot_id; + char sid[33]; + int r; + + assert(j); + + r = sd_journal_get_realtime_usec(j, &realtime); + if (r < 0) { + log_error("Failed to get realtime timestamp: %s", strerror(-r)); + return r; + } + + r = sd_journal_get_monotonic_usec(j, &monotonic, &boot_id); + if (r < 0) { + log_error("Failed to get monotonic timestamp: %s", strerror(-r)); + return r; + } + + r = sd_journal_get_cursor(j, &cursor); + if (r < 0) { + log_error("Failed to get cursor: %s", strerror(-r)); + return r; + } + + if (line == 1) + fputc('\n', stdout); + else + fputs(",\n", stdout); + + printf("{\n" + "\t\".cursor\" : \"%s\",\n" + "\t\".realtime\" : %llu,\n" + "\t\".monotonic\" : %llu,\n" + "\t\".boot_id\" : \"%s\"", + cursor, + (unsigned long long) realtime, + (unsigned long long) monotonic, + sd_id128_to_string(boot_id, sid)); + + free(cursor); + + SD_JOURNAL_FOREACH_DATA(j, data, length) { + const char *c; + + c = memchr(data, '=', length); + if (!c) { + log_error("Invalid field."); + return -EINVAL; + } + + fputs(",\n\t", stdout); + json_escape(data, c - (const char*) data); + fputs(" : ", stdout); + json_escape(c + 1, length - (c - (const char*) data) - 1); + } + + fputs("\n}", stdout); + fflush(stdout); + + return 0; +} + +static int (*output_funcs[_OUTPUT_MAX])(sd_journal*j, unsigned line) = { + [OUTPUT_SHORT] = output_short, + [OUTPUT_VERBOSE] = output_verbose, + [OUTPUT_EXPORT] = output_export, + [OUTPUT_JSON] = output_json +}; int main(int argc, char *argv[]) { int r, i, fd; sd_journal *j = NULL; + unsigned line = 0; log_set_max_level(LOG_DEBUG); log_set_target(LOG_TARGET_CONSOLE); @@ -69,33 +382,18 @@ int main(int argc, char *argv[]) { goto finish; } + if (arg_output == OUTPUT_JSON) + fputc('[', stdout); + for (;;) { struct pollfd pollfd; while (sd_journal_next(j) > 0) { - const void *data; - size_t length; - char *cursor; - uint64_t realtime = 0, monotonic = 0; - - r = sd_journal_get_cursor(j, &cursor); - if (r < 0) { - log_error("Failed to get cursor: %s", strerror(-r)); - goto finish; - } - - printf("entry: %s\n", cursor); - free(cursor); + line ++; - sd_journal_get_realtime_usec(j, &realtime); - sd_journal_get_monotonic_usec(j, &monotonic, NULL); - printf("realtime: %llu\n" - "monotonic: %llu\n", - (unsigned long long) realtime, - (unsigned long long) monotonic); - - SD_JOURNAL_FOREACH_DATA(j, data, length) - printf("\t%.*s\n", (int) length, (const char*) data); + r = output_funcs[arg_output](j, line); + if (r < 0) + goto finish; } if (!arg_follow) @@ -121,6 +419,9 @@ int main(int argc, char *argv[]) { } } + if (arg_output == OUTPUT_JSON) + fputs("\n]\n", stdout); + finish: if (j) sd_journal_close(j); diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index bc575b43ef..b9abbdff92 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -977,7 +977,7 @@ static int add_file(sd_journal *j, const char *prefix, const char *dir, const ch return r; } - journal_file_dump(f); + /* journal_file_dump(f); */ r = hashmap_put(j->files, f->path, f); if (r < 0) { diff --git a/src/util.c b/src/util.c index 37942de534..195835425d 100644 --- a/src/util.c +++ b/src/util.c @@ -3905,7 +3905,7 @@ char **replace_env_argv(char **argv, char **env) { return r; } -int columns(void) { +unsigned columns(void) { static __thread int parsed_columns = 0; const char *e; @@ -3948,38 +3948,41 @@ int running_in_chroot(void) { a.st_ino != b.st_ino; } -char *ellipsize(const char *s, unsigned length, unsigned percent) { - size_t l, x; +char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) { + size_t x; char *r; assert(s); assert(percent <= 100); - assert(length >= 3); + assert(new_length >= 3); - l = strlen(s); + if (old_length <= 3 || old_length <= new_length) + return strndup(s, old_length); - if (l <= 3 || l <= length) - return strdup(s); - - if (!(r = new0(char, length+1))) + r = new0(char, new_length+1); + if (!r) return r; - x = (length * percent) / 100; + x = (new_length * percent) / 100; - if (x > length - 3) - x = length - 3; + if (x > new_length - 3) + x = new_length - 3; memcpy(r, s, x); r[x] = '.'; r[x+1] = '.'; r[x+2] = '.'; memcpy(r + x + 3, - s + l - (length - x - 3), - length - x - 3); + s + old_length - (new_length - x - 3), + new_length - x - 3); return r; } +char *ellipsize(const char *s, size_t length, unsigned percent) { + return ellipsize_mem(s, strlen(s), length, percent); +} + int touch(const char *path) { int fd; diff --git a/src/util.h b/src/util.h index 1db82f83e0..ac2ec8c351 100644 --- a/src/util.h +++ b/src/util.h @@ -378,11 +378,12 @@ void status_vprintf(const char *format, va_list ap); void status_printf(const char *format, ...); void status_welcome(void); -int columns(void); +unsigned columns(void); int running_in_chroot(void); -char *ellipsize(const char *s, unsigned length, unsigned percent); +char *ellipsize(const char *s, size_t length, unsigned percent); +char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent); int touch(const char *path); -- cgit v1.2.3-54-g00ecf From 0d43c6944bbca30d5692d4b02885f007a0c630c8 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 21 Dec 2011 18:59:56 +0100 Subject: journalctl: add command line parsing --- Makefile.am | 3 +- src/journal/journalctl.c | 125 +++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 123 insertions(+), 5 deletions(-) diff --git a/Makefile.am b/Makefile.am index 7d551a9dcf..81b5c50199 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1069,7 +1069,8 @@ systemd_journalctl_SOURCES = \ src/journal/sd-journal.c \ src/journal/journal-file.c \ src/journal/lookup3.c \ - src/sd-id128.c + src/sd-id128.c \ + src/pager.c systemd_journalctl_CFLAGS = \ $(AM_CFLAGS) diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index 6f4342e597..5a1cb6e88a 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -28,10 +28,13 @@ #include #include #include +#include #include "sd-journal.h" #include "log.h" #include "util.h" +#include "build.h" +#include "pager.h" #define PRINT_THRESHOLD 128 @@ -41,10 +44,11 @@ static enum { OUTPUT_EXPORT, OUTPUT_JSON, _OUTPUT_MAX -} arg_output = OUTPUT_JSON; +} arg_output = OUTPUT_SHORT; static bool arg_follow = false; static bool arg_show_all = false; +static bool arg_no_pager = false; static bool contains_unprintable(const void *p, size_t l) { const char *j; @@ -345,6 +349,96 @@ static int (*output_funcs[_OUTPUT_MAX])(sd_journal*j, unsigned line) = { [OUTPUT_JSON] = output_json }; +static int help(void) { + + printf("%s [OPTIONS...] {COMMAND} ...\n\n" + "Send control commands to or query the login manager.\n\n" + " -h --help Show this help\n" + " --version Show package version\n" + " --no-pager Do not pipe output into a pager\n" + " -a --all Show all properties, including long and unprintable\n" + " -f --follow Follow journal\n" + " -o --output=STRING Change output mode (short, verbose, export, json)\n", + program_invocation_short_name); + + return 0; +} + +static int parse_argv(int argc, char *argv[]) { + + enum { + ARG_VERSION = 0x100, + ARG_NO_PAGER + }; + + static const struct option options[] = { + { "help", no_argument, NULL, 'h' }, + { "version" , no_argument, NULL, ARG_VERSION }, + { "no-pager", no_argument, NULL, ARG_NO_PAGER }, + { "follow", no_argument, NULL, 'f' }, + { "output", required_argument, NULL, 'o' }, + { "all", no_argument, NULL, 'a' }, + { NULL, 0, NULL, 0 } + }; + + int c; + + assert(argc >= 0); + assert(argv); + + while ((c = getopt_long(argc, argv, "hfo:a", options, NULL)) >= 0) { + + switch (c) { + + case 'h': + help(); + return 0; + + case ARG_VERSION: + puts(PACKAGE_STRING); + puts(DISTRIBUTION); + puts(SYSTEMD_FEATURES); + return 0; + + case ARG_NO_PAGER: + arg_no_pager = true; + break; + + case 'f': + arg_follow = true; + break; + + case 'o': + if (streq(optarg, "short")) + arg_output = OUTPUT_SHORT; + else if (streq(optarg, "verbose")) + arg_output = OUTPUT_VERBOSE; + else if (streq(optarg, "export")) + arg_output = OUTPUT_EXPORT; + else if (streq(optarg, "json")) + arg_output = OUTPUT_JSON; + else { + log_error("Unknown output '%s'.", optarg); + return -EINVAL; + } + break; + + case 'a': + arg_show_all = true; + break; + + case '?': + return -EINVAL; + + default: + log_error("Unknown option code %c", c); + return -EINVAL; + } + } + + return 1; +} + int main(int argc, char *argv[]) { int r, i, fd; sd_journal *j = NULL; @@ -356,13 +450,17 @@ int main(int argc, char *argv[]) { log_parse_environment(); log_open(); + r = parse_argv(argc, argv); + if (r <= 0) + goto finish; + r = sd_journal_open(&j); if (r < 0) { log_error("Failed to open journal: %s", strerror(-r)); goto finish; } - for (i = 1; i < argc; i++) { + for (i = optind; i < argc; i++) { r = sd_journal_add_match(j, argv[i], strlen(argv[i])); if (r < 0) { log_error("Failed to add match: %s", strerror(-r)); @@ -382,13 +480,30 @@ int main(int argc, char *argv[]) { goto finish; } - if (arg_output == OUTPUT_JSON) + if (!arg_no_pager && !arg_follow) { + columns(); + pager_open(); + } + + if (arg_output == OUTPUT_JSON) { fputc('[', stdout); + fflush(stdout); + } for (;;) { struct pollfd pollfd; - while (sd_journal_next(j) > 0) { + for (;;) { + r = sd_journal_next(j); + + if (r < 0) { + log_error("Failed to iterate through journal: %s", strerror(-r)); + goto finish; + } + + if (r == 0) + break; + line ++; r = output_funcs[arg_output](j, line); @@ -426,5 +541,7 @@ finish: if (j) sd_journal_close(j); + pager_close(); + return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS; } -- cgit v1.2.3-54-g00ecf From e4e61fdbed832a2bd3f5dcd47623872d9081599c Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 21 Dec 2011 19:00:10 +0100 Subject: journal: add missing compress.[ch] --- src/journal/compress.c | 208 +++++++++++++++++++++++++++++++++++++++++++++++++ src/journal/compress.h | 38 +++++++++ 2 files changed, 246 insertions(+) create mode 100644 src/journal/compress.c create mode 100644 src/journal/compress.h diff --git a/src/journal/compress.c b/src/journal/compress.c new file mode 100644 index 0000000000..ff906581f0 --- /dev/null +++ b/src/journal/compress.c @@ -0,0 +1,208 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include +#include +#include +#include + +#include "compress.h" + +bool compress_blob(const void *src, uint64_t src_size, void *dst, uint64_t *dst_size) { + lzma_stream s = LZMA_STREAM_INIT; + lzma_ret ret; + bool b = false; + + assert(src); + assert(src_size > 0); + assert(dst); + assert(dst_size); + + /* Returns false if we couldn't compress the data or the + * compressed result is longer than the original */ + + ret = lzma_easy_encoder(&s, LZMA_PRESET_DEFAULT, LZMA_CHECK_NONE); + if (ret != LZMA_OK) + return false; + + s.next_in = src; + s.avail_in = src_size; + s.next_out = dst; + s.avail_out = src_size; + + /* Does it fit? */ + if (lzma_code(&s, LZMA_FINISH) != LZMA_STREAM_END) + goto fail; + + /* Is it actually shorter? */ + if (s.avail_out == 0) + goto fail; + + *dst_size = src_size - s.avail_out; + b = true; + +fail: + lzma_end(&s); + + return b; +} + +bool uncompress_blob(const void *src, uint64_t src_size, + void **dst, uint64_t *dst_alloc_size, uint64_t* dst_size) { + + lzma_stream s = LZMA_STREAM_INIT; + lzma_ret ret; + bool b = false; + + assert(src); + assert(src_size > 0); + assert(dst); + assert(dst_alloc_size); + assert(dst_size); + assert(*dst_alloc_size == 0 || *dst); + + ret = lzma_stream_decoder(&s, UINT64_MAX, 0); + if (ret != LZMA_OK) + return false; + + if (*dst_alloc_size <= src_size) { + void *p; + + p = realloc(*dst, src_size*2); + if (!p) + return false; + + *dst = p; + *dst_alloc_size = src_size*2; + } + + s.next_in = src; + s.avail_in = src_size; + + s.next_out = *dst; + s.avail_out = *dst_alloc_size; + + for (;;) { + void *p; + + ret = lzma_code(&s, LZMA_FINISH); + + if (ret == LZMA_STREAM_END) + break; + + if (ret != LZMA_OK) + goto fail; + + p = realloc(*dst, *dst_alloc_size*2); + if (!p) + goto fail; + + s.next_out = (uint8_t*) p + ((uint8_t*) s.next_out - (uint8_t*) *dst); + s.avail_out += *dst_alloc_size; + + *dst = p; + *dst_alloc_size *= 2; + } + + *dst_size = *dst_alloc_size - s.avail_out; + b = true; + +fail: + lzma_end(&s); + + return b; +} + +bool uncompress_startswith(const void *src, uint64_t src_size, + void **buffer, uint64_t *buffer_size, + const void *prefix, uint64_t prefix_len, + uint8_t extra) { + + lzma_stream s = LZMA_STREAM_INIT; + lzma_ret ret; + bool b = false; + + /* Checks whether the uncompressed blob starts with the + * mentioned prefix. The byte extra needs to follow the + * prefix */ + + assert(src); + assert(src_size > 0); + assert(buffer); + assert(buffer_size); + assert(prefix); + assert(*buffer_size == 0 || *buffer); + + ret = lzma_stream_decoder(&s, UINT64_MAX, 0); + if (ret != LZMA_OK) + return false; + + if (*buffer_size <= prefix_len) { + void *p; + + p = realloc(*buffer, prefix_len*2); + if (!p) + return false; + + *buffer = p; + *buffer_size = prefix_len*2; + } + + s.next_in = src; + s.avail_in = src_size; + + s.next_out = *buffer; + s.avail_out = *buffer_size; + + for (;;) { + void *p; + + ret = lzma_code(&s, LZMA_FINISH); + + if (ret != LZMA_STREAM_END && ret != LZMA_OK) + goto fail; + + if ((*buffer_size - s.avail_out > prefix_len) && + memcmp(*buffer, prefix, prefix_len) == 0 && + ((const uint8_t*) *buffer)[prefix_len] == extra) + break; + + if (ret == LZMA_STREAM_END) + goto fail; + + p = realloc(*buffer, *buffer_size*2); + if (!p) + goto fail; + + s.next_out = (uint8_t*) p + ((uint8_t*) s.next_out - (uint8_t*) *buffer); + s.avail_out += *buffer_size; + + *buffer = p; + *buffer_size *= 2; + } + + b = true; + +fail: + lzma_end(&s); + + return b; +} diff --git a/src/journal/compress.h b/src/journal/compress.h new file mode 100644 index 0000000000..f187a6e00c --- /dev/null +++ b/src/journal/compress.h @@ -0,0 +1,38 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +#ifndef foocompresshfoo +#define foocompresshfoo + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include +#include + +bool compress_blob(const void *src, uint64_t src_size, void *dst, uint64_t *dst_size); + +bool uncompress_blob(const void *src, uint64_t src_size, + void **dst, uint64_t *dst_alloc_size, uint64_t* dst_size); + +bool uncompress_startswith(const void *src, uint64_t src_size, + void **buffer, uint64_t *buffer_size, + const void *prefix, uint64_t prefix_len, + uint8_t extra); + +#endif -- cgit v1.2.3-54-g00ecf From 440ee3665e252dc004e356da0f5b51ad26ea2cbe Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 21 Dec 2011 22:32:52 +0100 Subject: journal: properly handle first inline bisect array entry --- src/journal/journal-file.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index a0c479fc67..4a006d3bf3 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -1218,7 +1218,6 @@ static int generic_array_bisect_plus_one(JournalFile *f, /* This bisects the array in object 'first', but first checks * an extra */ - r = test_object(f, extra, needle); if (r < 0) return r; @@ -1234,6 +1233,11 @@ static int generic_array_bisect_plus_one(JournalFile *f, if (offset) *offset = extra; + + if (idx) + *idx = 0; + + return 1; } else if (r == TEST_RIGHT) return 0; -- cgit v1.2.3-54-g00ecf From 6ad1d1c30621280bfad3e63fcc1c7ceb7d8ffa98 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 22 Dec 2011 00:35:04 +0100 Subject: journald: enforce some syntax restrictions on field names sent from the client side --- src/journal/journald.c | 44 ++++++++++++++++++++++++++++++++++++++++---- src/journal/sd-journal.c | 17 +++++++++++++++++ src/journal/test-journal.c | 2 ++ 3 files changed, 59 insertions(+), 4 deletions(-) diff --git a/src/journal/journald.c b/src/journal/journald.c index ca274ee44a..d35e1c119a 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -395,6 +395,41 @@ static void process_syslog_message(Server *s, const char *buf, struct ucred *ucr free(syslog_priority); } +static bool valid_user_field(const char *p, size_t l) { + const char *a; + + /* We kinda enforce POSIX syntax recommendations for + environment variables here, but make a couple of additional + requirements. + + http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */ + + /* No empty field names */ + if (l <= 0) + return false; + + /* Don't allow names longer than 64 chars */ + if (l > 64) + return false; + + /* Variables starting with an underscore are protected */ + if (p[0] == '_') + return false; + + /* Don't allow digits as first character */ + if (p[0] >= '0' && p[0] <= '9') + return false; + + /* Only allow A-Z0-9 and '_' */ + for (a = p; a < p + l; a++) + if (!((*a >= 'A' && *a <= 'Z') || + (*a >= '0' && *a <= '9') || + *a == '_')) + return false; + + return true; +} + static void process_native_message(Server *s, const void *buffer, size_t buffer_size, struct ucred *ucred, struct timeval *tv) { struct iovec *iovec = NULL; unsigned n = 0, m = 0, j; @@ -428,8 +463,9 @@ static void process_native_message(Server *s, const void *buffer, size_t buffer_ continue; } - if (*p == '.') { - /* Control command, ignore for now */ + if (*p == '.' || *p == '#') { + /* Ignore control commands for now, and + * comments too. */ remaining -= (e - p) + 1; p = e + 1; continue; @@ -454,7 +490,7 @@ static void process_native_message(Server *s, const void *buffer, size_t buffer_ q = memchr(p, '=', e - p); if (q) { - if (p[0] != '_') { + if (valid_user_field(p, q - p)) { /* If the field name starts with an * underscore, skip the variable, * since that indidates a trusted @@ -495,7 +531,7 @@ static void process_native_message(Server *s, const void *buffer, size_t buffer_ k[e - p] = '='; memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l); - if (k[0] != '_') { + if (valid_user_field(p, e - p)) { iovec[n].iov_base = k; iovec[n].iov_len = (e - p) + 1 + l; n++; diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index b9abbdff92..4095830901 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -1585,3 +1585,20 @@ int sd_journal_process(sd_journal *j) { } } } + +int sd_journal_query_unique(sd_journal *j, const char *field) { + assert(j); + assert(field); + + return -ENOTSUP; +} + +int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l) { + assert(j); + + return -ENOTSUP; +} + +void sd_journal_restart_unique(sd_journal *j) { + assert(j); +} diff --git a/src/journal/test-journal.c b/src/journal/test-journal.c index a9bd6cb2cf..3d429bea90 100644 --- a/src/journal/test-journal.c +++ b/src/journal/test-journal.c @@ -113,5 +113,7 @@ int main(int argc, char *argv[]) { journal_directory_vacuum(".", 3000000, 0); + log_error("Exiting..."); + return 0; } -- cgit v1.2.3-54-g00ecf From fe6521272ba203ec8f0d5a94f0729960b3f90525 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 23 Dec 2011 20:50:48 +0100 Subject: journal: implement stdout transport --- src/journal/journal-send.c | 60 +++++ src/journal/journald.c | 573 ++++++++++++++++++++++++++++++++++++++++----- src/journal/sd-journal.h | 3 +- src/stdout-syslog-bridge.c | 6 +- src/util.c | 3 +- 5 files changed, 581 insertions(+), 64 deletions(-) diff --git a/src/journal/journal-send.c b/src/journal/journal-send.c index 238d64c13e..cc3cd8c303 100644 --- a/src/journal/journal-send.c +++ b/src/journal/journal-send.c @@ -26,6 +26,7 @@ #include "sd-journal.h" #include "util.h" +#include "socket-util.h" /* We open a single fd, and we'll share it with the current process, * all its threads, and all its subprocesses. This means we need to @@ -67,6 +68,12 @@ int sd_journal_printv(int priority, const char *format, va_list ap) { char buffer[8 + LINE_MAX], p[11]; struct iovec iov[2]; + if (priority < 0 || priority > 7) + return -EINVAL; + + if (!format) + return -EINVAL; + snprintf(p, sizeof(p), "PRIORITY=%i", priority & LOG_PRIMASK); char_array_0(p); @@ -197,3 +204,56 @@ int sd_journal_sendv(const struct iovec *iov, int n) { return 0; } + +int sd_journal_stream_fd(const char *tag, int priority, int priority_prefix) { + union sockaddr_union sa; + int fd; + char *header; + size_t l; + ssize_t r; + + if (priority < 0 || priority > 7) + return -EINVAL; + + fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); + if (fd < 0) + return -errno; + + zero(sa); + sa.un.sun_family = AF_UNIX; + strncpy(sa.un.sun_path, "/run/systemd/stdout", sizeof(sa.un.sun_path)); + + r = connect(fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path)); + if (r < 0) { + close_nointr_nofail(fd); + return -errno; + } + + if (!tag) + tag = ""; + + l = strlen(tag); + header = alloca(l + 1 + 2 + 2 + 2); + + memcpy(header, tag, l); + header[l++] = '\n'; + header[l++] = '0' + priority; + header[l++] = '\n'; + header[l++] = '0' + !!priority_prefix; + header[l++] = '\n'; + header[l++] = '0'; + header[l++] = '\n'; + + r = loop_write(fd, header, l, false); + if (r < 0) { + close_nointr_nofail(fd); + return (int) r; + } + + if ((size_t) r != l) { + close_nointr_nofail(fd); + return -errno; + } + + return fd; +} diff --git a/src/journal/journald.c b/src/journal/journald.c index d35e1c119a..5d3956ea46 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -37,14 +37,19 @@ #include "socket-util.h" #include "acl-util.h" #include "cgroup-util.h" +#include "list.h" #define USER_JOURNALS_MAX 1024 +#define STDOUT_STREAMS_MAX 4096 + +typedef struct StdoutStream StdoutStream; typedef struct Server { int epoll_fd; int signal_fd; int syslog_fd; int native_fd; + int stdout_fd; JournalFile *runtime_journal; JournalFile *system_journal; @@ -58,8 +63,38 @@ typedef struct Server { JournalMetrics metrics; uint64_t max_use; bool compress; + + LIST_HEAD(StdoutStream, stdout_streams); + unsigned n_stdout_streams; } Server; +typedef enum StdoutStreamState { + STDOUT_STREAM_TAG, + STDOUT_STREAM_PRIORITY, + STDOUT_STREAM_PRIORITY_PREFIX, + STDOUT_STREAM_TEE_CONSOLE, + STDOUT_STREAM_RUNNING +} StdoutStreamState; + +struct StdoutStream { + Server *server; + StdoutStreamState state; + + int fd; + + struct ucred ucred; + + char *tag; + int priority; + bool priority_prefix:1; + bool tee_console:1; + + char buffer[LINE_MAX+1]; + size_t length; + + LIST_FIELDS(StdoutStream, stdout_stream); +}; + static void fix_perms(JournalFile *f, uid_t uid) { acl_t acl; acl_entry_t entry; @@ -363,7 +398,6 @@ retry: free(audit_session); free(audit_loginuid); free(cgroup); - } static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv) { @@ -551,18 +585,322 @@ static void process_native_message(Server *s, const void *buffer, size_t buffer_ free(iovec[j].iov_base); } -static int process_event(Server *s, struct epoll_event *ev) { +static int stdout_stream_log(StdoutStream *s, const char *p, size_t l) { + struct iovec iovec[15]; + char *message = NULL, *syslog_priority = NULL; + unsigned n = 0; + size_t tag_len; + int priority; + assert(s); + assert(p); + + priority = s->priority; + + if (s->priority_prefix && + l > 3 && + p[0] == '<' && + p[1] >= '0' && p[1] <= '7' && + p[2] == '>') { + + priority = p[1] - '0'; + p += 3; + l -= 3; + } + + if (l <= 0) + return 0; + + if (asprintf(&syslog_priority, "PRIORITY=%i", priority) >= 0) + IOVEC_SET_STRING(iovec[n++], syslog_priority); + + tag_len = s->tag ? strlen(s->tag) + 2: 0; + message = malloc(8 + tag_len + l); + if (message) { + memcpy(message, "MESSAGE=", 8); + + if (s->tag) { + memcpy(message+8, s->tag, tag_len-2); + memcpy(message+8+tag_len-2, ": ", 2); + } - if (ev->events != EPOLLIN) { - log_info("Got invalid event from epoll."); - return -EIO; + memcpy(message+8+tag_len, p, l); + iovec[n].iov_base = message; + iovec[n].iov_len = 8+tag_len+l; + n++; } + dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL); + + if (s->tee_console) { + int console; + + console = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC); + if (console >= 0) { + n = 0; + if (s->tag) { + IOVEC_SET_STRING(iovec[n++], s->tag); + IOVEC_SET_STRING(iovec[n++], ": "); + } + + iovec[n].iov_base = (void*) p; + iovec[n].iov_len = l; + n++; + + IOVEC_SET_STRING(iovec[n++], (char*) "\n"); + + writev(console, iovec, n); + } + } + + free(message); + free(syslog_priority); + + return 0; +} + +static int stdout_stream_line(StdoutStream *s, const char *p, size_t l) { + assert(s); + assert(p); + + while (l > 0 && strchr(WHITESPACE, *p)) { + l--; + p++; + } + + while (l > 0 && strchr(WHITESPACE, *(p+l-1))) + l--; + + switch (s->state) { + + case STDOUT_STREAM_TAG: + + if (l > 0) { + s->tag = strndup(p, l); + if (!s->tag) { + log_error("Out of memory"); + return -EINVAL; + } + } + + s->state = STDOUT_STREAM_PRIORITY; + return 0; + + case STDOUT_STREAM_PRIORITY: + if (l != 1 || *p < '0' || *p > '7') { + log_warning("Failed to parse log priority line."); + return -EINVAL; + } + + s->priority = *p - '0'; + s->state = STDOUT_STREAM_PRIORITY_PREFIX; + return 0; + + case STDOUT_STREAM_PRIORITY_PREFIX: + if (l != 1 || *p < '0' || *p > '1') { + log_warning("Failed to parse priority prefix line."); + return -EINVAL; + } + + s->priority_prefix = *p - '0'; + s->state = STDOUT_STREAM_TEE_CONSOLE; + return 0; + + case STDOUT_STREAM_TEE_CONSOLE: + if (l != 1 || *p < '0' || *p > '1') { + log_warning("Failed to parse tee to console line."); + return -EINVAL; + } + + s->tee_console = *p - '0'; + s->state = STDOUT_STREAM_RUNNING; + return 0; + + case STDOUT_STREAM_RUNNING: + return stdout_stream_log(s, p, l); + } + + assert_not_reached("Unknown stream state"); +} + +static int stdout_stream_scan(StdoutStream *s, bool force_flush) { + char *p; + size_t remaining; + int r; + + assert(s); + + p = s->buffer; + remaining = s->length; + for (;;) { + char *end; + size_t skip; + + end = memchr(p, '\n', remaining); + if (!end) { + if (remaining >= LINE_MAX) { + end = p + LINE_MAX; + skip = LINE_MAX; + } else + break; + } else + skip = end - p + 1; + + r = stdout_stream_line(s, p, end - p); + if (r < 0) + return r; + + remaining -= skip; + p += skip; + } + + if (force_flush && remaining > 0) { + r = stdout_stream_line(s, p, remaining); + if (r < 0) + return r; + + p += remaining; + remaining = 0; + } + + if (p > s->buffer) { + memmove(s->buffer, p, remaining); + s->length = remaining; + } + + return 0; +} + +static int stdout_stream_process(StdoutStream *s) { + ssize_t l; + int r; + + assert(s); + + l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length); + if (l < 0) { + + if (errno == EAGAIN) + return 0; + + log_warning("Failed to read from stream: %m"); + return -errno; + } + + if (l == 0) { + r = stdout_stream_scan(s, true); + if (r < 0) + return r; + + return 0; + } + + s->length += l; + r = stdout_stream_scan(s, false); + if (r < 0) + return r; + + return 1; + +} + +static void stdout_stream_free(StdoutStream *s) { + assert(s); + + if (s->server) { + assert(s->server->n_stdout_streams > 0); + s->server->n_stdout_streams --; + LIST_REMOVE(StdoutStream, stdout_stream, s->server->stdout_streams, s); + } + + if (s->fd >= 0) { + if (s->server) + epoll_ctl(s->server->epoll_fd, EPOLL_CTL_DEL, s->fd, NULL); + + close_nointr_nofail(s->fd); + } + + free(s->tag); + free(s); +} + +static int stdout_stream_new(Server *s) { + StdoutStream *stream; + int fd, r; + socklen_t len; + struct epoll_event ev; + + assert(s); + + fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC); + if (fd < 0) { + if (errno == EAGAIN) + return 0; + + log_error("Failed to accept stdout connection: %m"); + return -errno; + } + + if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) { + log_warning("Too many stdout streams, refusing connection."); + close_nointr_nofail(fd); + return 0; + } + + stream = new0(StdoutStream, 1); + if (!stream) { + log_error("Out of memory."); + close_nointr_nofail(fd); + return -ENOMEM; + } + + stream->fd = fd; + + len = sizeof(stream->ucred); + if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &stream->ucred, &len) < 0) { + log_error("Failed to determine peer credentials: %m"); + r = -errno; + goto fail; + } + + if (shutdown(fd, SHUT_WR) < 0) { + log_error("Failed to shutdown writing side of socket: %m"); + r = -errno; + goto fail; + } + + zero(ev); + ev.data.ptr = stream; + ev.events = EPOLLIN; + if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) { + log_error("Failed to add stream to event loop: %m"); + r = -errno; + goto fail; + } + + stream->server = s; + LIST_PREPEND(StdoutStream, stdout_stream, s->stdout_streams, stream); + s->n_stdout_streams ++; + + return 0; + +fail: + stdout_stream_free(stream); + return r; +} + +static int process_event(Server *s, struct epoll_event *ev) { + assert(s); + if (ev->data.fd == s->signal_fd) { struct signalfd_siginfo sfsi; ssize_t n; + if (ev->events != EPOLLIN) { + log_info("Got invalid event from epoll."); + return -EIO; + } + n = read(s->signal_fd, &sfsi, sizeof(sfsi)); if (n != sizeof(sfsi)) { @@ -578,10 +916,14 @@ static int process_event(Server *s, struct epoll_event *ev) { log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo)); return 0; - } + } else if (ev->data.fd == s->native_fd || + ev->data.fd == s->syslog_fd) { + + if (ev->events != EPOLLIN) { + log_info("Got invalid event from epoll."); + return -EIO; + } - if (ev->data.fd == s->native_fd || - ev->data.fd == s->syslog_fd) { for (;;) { struct msghdr msghdr; struct iovec iovec; @@ -668,6 +1010,38 @@ static int process_event(Server *s, struct epoll_event *ev) { } return 1; + + } else if (ev->data.fd == s->stdout_fd) { + + if (ev->events != EPOLLIN) { + log_info("Got invalid event from epoll."); + return -EIO; + } + + stdout_stream_new(s); + return 1; + + } else { + StdoutStream *stream; + + if ((ev->events|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) { + log_info("Got invalid event from epoll."); + return -EIO; + } + + /* If it is none of the well-known fds, it must be an + * stdout stream fd. Note that this is a bit ugly here + * (since we rely that none of the well-known fds + * could be interpreted as pointer), but nonetheless + * safe, since the well-known fds would never get an + * fd > 4096, i.e. beyond the first memory page */ + + stream = ev->data.ptr; + + if (stdout_stream_process(stream) <= 0) + stdout_stream_free(stream); + + return 1; } log_error("Unknown event."); @@ -737,6 +1111,7 @@ static int system_journal_open(Server *s) { static int open_syslog_socket(Server *s) { union sockaddr_union sa; int one, r; + struct epoll_event ev; assert(s); @@ -777,12 +1152,21 @@ static int open_syslog_socket(Server *s) { return -errno; } + zero(ev); + ev.events = EPOLLIN; + ev.data.fd = s->syslog_fd; + if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) { + log_error("Failed to add syslog server fd to epoll object: %m"); + return -errno; + } + return 0; } static int open_native_socket(Server*s) { union sockaddr_union sa; int one, r; + struct epoll_event ev; assert(s); @@ -823,24 +1207,110 @@ static int open_native_socket(Server*s) { return -errno; } + zero(ev); + ev.events = EPOLLIN; + ev.data.fd = s->native_fd; + if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) { + log_error("Failed to add native server fd to epoll object: %m"); + return -errno; + } + return 0; } -static int server_init(Server *s) { - int n, r, fd; +static int open_stdout_socket(Server *s) { + union sockaddr_union sa; + int r; struct epoll_event ev; + + assert(s); + + if (s->stdout_fd < 0) { + + s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); + if (s->stdout_fd < 0) { + log_error("socket() failed: %m"); + return -errno; + } + + zero(sa); + sa.un.sun_family = AF_UNIX; + strncpy(sa.un.sun_path, "/run/systemd/stdout", sizeof(sa.un.sun_path)); + + unlink(sa.un.sun_path); + + r = bind(s->stdout_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path)); + if (r < 0) { + log_error("bind() failed: %m"); + return -errno; + } + + chmod(sa.un.sun_path, 0666); + + if (listen(s->stdout_fd, SOMAXCONN) < 0) { + log_error("liste() failed: %m"); + return -errno; + } + } + + zero(ev); + ev.events = EPOLLIN; + ev.data.fd = s->stdout_fd; + if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->stdout_fd, &ev) < 0) { + log_error("Failed to add stdout server fd to epoll object: %m"); + return -errno; + } + + return 0; +} + +static int open_signalfd(Server *s) { sigset_t mask; + struct epoll_event ev; + + assert(s); + + assert_se(sigemptyset(&mask) == 0); + sigset_add_many(&mask, SIGINT, SIGTERM, -1); + assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); + + s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC); + if (s->signal_fd < 0) { + log_error("signalfd(): %m"); + return -errno; + } + + zero(ev); + ev.events = EPOLLIN; + ev.data.fd = s->signal_fd; + + if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) { + log_error("epoll_ctl(): %m"); + return -errno; + } + + return 0; +} + +static int server_init(Server *s) { + int n, r, fd; assert(s); zero(*s); - s->syslog_fd = s->native_fd = s->signal_fd = -1; + s->syslog_fd = s->native_fd = s->stdout_fd = s->signal_fd = s->epoll_fd = -1; s->metrics.max_size = DEFAULT_MAX_SIZE; s->metrics.min_size = DEFAULT_MIN_SIZE; s->metrics.keep_free = DEFAULT_KEEP_FREE; s->max_use = DEFAULT_MAX_USE; s->compress = true; + s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func); + if (!s->user_journals) { + log_error("Out of memory."); + return -ENOMEM; + } + s->epoll_fd = epoll_create1(EPOLL_CLOEXEC); if (s->epoll_fd < 0) { log_error("Failed to create epoll object: %m"); @@ -855,23 +1325,33 @@ static int server_init(Server *s) { for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) { - if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) { + if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/native", 0) > 0) { - if (s->syslog_fd >= 0) { - log_error("Too many /dev/log sockets passed."); + if (s->native_fd >= 0) { + log_error("Too many native sockets passed."); return -EINVAL; } - s->syslog_fd = fd; + s->native_fd = fd; - } else if (sd_is_socket(fd, AF_UNIX, SOCK_DGRAM, -1) > 0) { + } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/stdout", 0) > 0) { - if (s->native_fd >= 0) { - log_error("Too many native sockets passed."); + if (s->stdout_fd >= 0) { + log_error("Too many stdout sockets passed."); return -EINVAL; } - s->native_fd = fd; + s->stdout_fd = fd; + + } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0) { + + if (s->syslog_fd >= 0) { + log_error("Too many /dev/log sockets passed."); + return -EINVAL; + } + + s->syslog_fd = fd; + } else { log_error("Unknown socket passed."); return -EINVAL; @@ -882,54 +1362,21 @@ static int server_init(Server *s) { if (r < 0) return r; - zero(ev); - ev.events = EPOLLIN; - ev.data.fd = s->syslog_fd; - if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->syslog_fd, &ev) < 0) { - log_error("Failed to add syslog server fd to epoll object: %m"); - return -errno; - } - r = open_native_socket(s); if (r < 0) return r; - zero(ev); - ev.events = EPOLLIN; - ev.data.fd = s->native_fd; - if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) { - log_error("Failed to add native server fd to epoll object: %m"); - return -errno; - } - - s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func); - if (!s->user_journals) { - log_error("Out of memory."); - return -ENOMEM; - } + r = open_stdout_socket(s); + if (r < 0) + return r; r = system_journal_open(s); if (r < 0) return r; - assert_se(sigemptyset(&mask) == 0); - sigset_add_many(&mask, SIGINT, SIGTERM, -1); - assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); - - s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC); - if (s->signal_fd < 0) { - log_error("signalfd(): %m"); - return -errno; - } - - zero(ev); - ev.events = EPOLLIN; - ev.data.fd = s->signal_fd; - - if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->signal_fd, &ev) < 0) { - log_error("epoll_ctl(): %m"); - return -errno; - } + r = open_signalfd(s); + if (r < 0) + return r; return 0; } @@ -938,6 +1385,9 @@ static void server_done(Server *s) { JournalFile *f; assert(s); + while (s->stdout_streams) + stdout_stream_free(s->stdout_streams); + if (s->system_journal) journal_file_close(s->system_journal); @@ -960,6 +1410,9 @@ static void server_done(Server *s) { if (s->native_fd >= 0) close_nointr_nofail(s->native_fd); + + if (s->stdout_fd >= 0) + close_nointr_nofail(s->stdout_fd); } int main(int argc, char *argv[]) { @@ -991,7 +1444,7 @@ int main(int argc, char *argv[]) { sd_notify(false, "READY=1\n" - "STATUS=Processing messages..."); + "STATUS=Processing requests..."); for (;;) { struct epoll_event event; @@ -1015,6 +1468,8 @@ int main(int argc, char *argv[]) { break; } + log_debug("systemd-journald stopped as pid %lu", (unsigned long) getpid()); + finish: sd_notify(false, "STATUS=Shutting down..."); diff --git a/src/journal/sd-journal.h b/src/journal/sd-journal.h index b29680b3a2..0333db4a4d 100644 --- a/src/journal/sd-journal.h +++ b/src/journal/sd-journal.h @@ -33,7 +33,6 @@ * * - check LE/BE conversion for 8bit, 16bit, 32bit values * - implement audit gateway - * - implement stdout gateway * - extend hash tables table as we go * - accelerate looking for "all hostnames" and suchlike. * - throttling @@ -49,6 +48,8 @@ int sd_journal_printv(int priority, const char *format, va_list ap); int sd_journal_send(const char *format, ...) __attribute__((sentinel)); int sd_journal_sendv(const struct iovec *iov, int n); +int sd_journal_stream_fd(const char *tag, int priority, int priority_prefix); + /* Browse journal stream */ typedef struct sd_journal sd_journal; diff --git a/src/stdout-syslog-bridge.c b/src/stdout-syslog-bridge.c index d50df22c88..6ec23ec612 100644 --- a/src/stdout-syslog-bridge.c +++ b/src/stdout-syslog-bridge.c @@ -236,7 +236,6 @@ static int stream_log(Stream *s, char *p, usec_t ts) { writev(console, iovec, 4); } - } return 0; @@ -366,7 +365,6 @@ static int stream_process(Stream *s, usec_t ts) { return -errno; } - if (l == 0) return 0; @@ -409,8 +407,10 @@ static int stream_new(Server *s, int server_fd) { int r; assert(s); + assert(server_fd >= 0); - if ((fd = accept4(server_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC)) < 0) + fd = accept4(server_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC); + if (fd < 0) return -errno; if (s->n_streams >= STREAMS_MAX) { diff --git a/src/util.c b/src/util.c index 195835425d..c07c569c26 100644 --- a/src/util.c +++ b/src/util.c @@ -2885,7 +2885,8 @@ ssize_t loop_write(int fd, const void *buf, size_t nbytes, bool do_poll) { while (nbytes > 0) { ssize_t k; - if ((k = write(fd, p, nbytes)) <= 0) { + k = write(fd, p, nbytes); + if (k <= 0) { if (k < 0 && errno == EINTR) continue; -- cgit v1.2.3-54-g00ecf From 6e409ce10d134625626d1eddfd6152755ef1908d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 27 Dec 2011 22:51:46 +0100 Subject: journald: implement sophisticated rate limiting --- Makefile.am | 1 + TODO | 2 + src/journal/journal-rate-limit.c | 273 +++++++++++++++++++++++++++++++++++++++ src/journal/journal-rate-limit.h | 34 +++++ src/journal/journald.c | 213 ++++++++++++++++++++++++++++-- src/journal/sd-journal.h | 1 + 6 files changed, 512 insertions(+), 12 deletions(-) create mode 100644 src/journal/journal-rate-limit.c create mode 100644 src/journal/journal-rate-limit.h diff --git a/Makefile.am b/Makefile.am index 81b5c50199..5fe67fd1c7 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1042,6 +1042,7 @@ systemd_journald_SOURCES = \ src/journal/sd-journal.c \ src/journal/journal-file.c \ src/journal/lookup3.c \ + src/journal/journal-rate-limit.c \ src/sd-id128.c \ src/acl-util.c \ src/cgroup-util.c diff --git a/TODO b/TODO index 8daf79a7f0..02688ad9a8 100644 --- a/TODO +++ b/TODO @@ -21,6 +21,8 @@ Bugfixes: Features: +* logind: selinux is borked... + * logind: sends SessionNew on Lock()? * logind: allow showing logout dialog from system diff --git a/src/journal/journal-rate-limit.c b/src/journal/journal-rate-limit.c new file mode 100644 index 0000000000..f69ab2770f --- /dev/null +++ b/src/journal/journal-rate-limit.c @@ -0,0 +1,273 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include +#include + +#include "journal-rate-limit.h" +#include "list.h" +#include "util.h" +#include "hashmap.h" + +#define POOLS_MAX 5 +#define BUCKETS_MAX 127 +#define GROUPS_MAX 2047 + +static const int priority_map[] = { + [LOG_EMERG] = 0, + [LOG_ALERT] = 0, + [LOG_CRIT] = 0, + [LOG_ERR] = 1, + [LOG_WARNING] = 2, + [LOG_NOTICE] = 3, + [LOG_INFO] = 3, + [LOG_DEBUG] = 4 +}; + +typedef struct JournalRateLimitPool JournalRateLimitPool; +typedef struct JournalRateLimitGroup JournalRateLimitGroup; + +struct JournalRateLimitPool { + usec_t begin; + unsigned num; + unsigned suppressed; +}; + +struct JournalRateLimitGroup { + JournalRateLimit *parent; + + char *id; + JournalRateLimitPool pools[POOLS_MAX]; + unsigned hash; + + LIST_FIELDS(JournalRateLimitGroup, bucket); + LIST_FIELDS(JournalRateLimitGroup, lru); +}; + +struct JournalRateLimit { + usec_t interval; + unsigned burst; + + JournalRateLimitGroup* buckets[BUCKETS_MAX]; + JournalRateLimitGroup *lru, *lru_tail; + + unsigned n_groups; +}; + +JournalRateLimit *journal_rate_limit_new(usec_t interval, unsigned burst) { + JournalRateLimit *r; + + assert(interval > 0 || burst == 0); + + r = new0(JournalRateLimit, 1); + if (!r) + return NULL; + + r->interval = interval; + r->burst = burst; + + return r; +} + +static void journal_rate_limit_group_free(JournalRateLimitGroup *g) { + assert(g); + + if (g->parent) { + assert(g->parent->n_groups > 0); + + if (g->parent->lru_tail == g) + g->parent->lru_tail = g->lru_prev; + + LIST_REMOVE(JournalRateLimitGroup, lru, g->parent->lru, g); + LIST_REMOVE(JournalRateLimitGroup, bucket, g->parent->buckets[g->hash % BUCKETS_MAX], g); + + g->parent->n_groups --; + } + + free(g->id); + free(g); +} + +void journal_rate_limit_free(JournalRateLimit *r) { + assert(r); + + while (r->lru) + journal_rate_limit_group_free(r->lru); +} + +static bool journal_rate_limit_group_expired(JournalRateLimitGroup *g, usec_t ts) { + unsigned i; + + assert(g); + + for (i = 0; i < POOLS_MAX; i++) + if (g->pools[i].begin + g->parent->interval >= ts) + return false; + + return true; +} + +static void journal_rate_limit_vacuum(JournalRateLimit *r, usec_t ts) { + assert(r); + + /* Makes room for at least one new item, but drop all + * expored items too. */ + + while (r->n_groups >= GROUPS_MAX || + (r->lru_tail && journal_rate_limit_group_expired(r->lru_tail, ts))) + journal_rate_limit_group_free(r->lru_tail); +} + +static JournalRateLimitGroup* journal_rate_limit_group_new(JournalRateLimit *r, const char *id, usec_t ts) { + JournalRateLimitGroup *g; + + assert(r); + assert(id); + + g = new0(JournalRateLimitGroup, 1); + if (!g) + return NULL; + + g->id = strdup(id); + if (!g->id) + goto fail; + + g->hash = string_hash_func(g->id); + + journal_rate_limit_vacuum(r, ts); + + LIST_PREPEND(JournalRateLimitGroup, bucket, r->buckets[g->hash % BUCKETS_MAX], g); + LIST_PREPEND(JournalRateLimitGroup, lru, r->lru, g); + if (!g->lru_next) + r->lru_tail = g; + r->n_groups ++; + + g->parent = r; + return g; + +fail: + journal_rate_limit_group_free(g); + return NULL; +} + +static uint64_t u64log2(uint64_t n) { + unsigned r; + + if (n <= 1) + return 0; + + r = 0; + for (;;) { + n = n >> 1; + if (!n) + return r; + r++; + } +} + +static unsigned burst_modulate(unsigned burst, uint64_t available) { + unsigned k; + + /* Modulates the burst rate a bit with the amount of available + * disk space */ + + k = u64log2(available); + + /* 1MB */ + if (k <= 20) + return burst; + + burst = (burst * (k-20)) / 4; + + /* + * Example: + * + * <= 1MB = rate * 1 + * 16MB = rate * 2 + * 256MB = rate * 3 + * 4GB = rate * 4 + * 64GB = rate * 5 + * 1TB = rate * 6 + */ + + return burst; +} + +int journal_rate_limit_test(JournalRateLimit *r, const char *id, int priority, uint64_t available) { + unsigned h; + JournalRateLimitGroup *g; + JournalRateLimitPool *p; + unsigned burst; + usec_t ts; + + assert(id); + + if (!r) + return 1; + + if (r->interval == 0 || r->burst == 0) + return 1; + + burst = burst_modulate(r->burst, available); + + ts = now(CLOCK_MONOTONIC); + + h = string_hash_func(id); + g = r->buckets[h % BUCKETS_MAX]; + + LIST_FOREACH(bucket, g, g) + if (streq(g->id, id)) + break; + + if (!g) { + g = journal_rate_limit_group_new(r, id, ts); + if (!g) + return -ENOMEM; + } + + p = &g->pools[priority_map[priority]]; + + if (p->begin <= 0) { + p->suppressed = 0; + p->num = 1; + p->begin = ts; + return 1; + } + + if (p->begin + r->interval < ts) { + unsigned s; + + s = p->suppressed; + p->suppressed = 0; + p->num = 1; + p->begin = ts; + + return 1 + s; + } + + if (p->num <= burst) { + p->num++; + return 1; + } + + p->suppressed++; + return 0; +} diff --git a/src/journal/journal-rate-limit.h b/src/journal/journal-rate-limit.h new file mode 100644 index 0000000000..2bbdd5f9fe --- /dev/null +++ b/src/journal/journal-rate-limit.h @@ -0,0 +1,34 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +#ifndef foojournalratelimithfoo +#define foojournalratelimithfoo + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include "macro.h" +#include "util.h" + +typedef struct JournalRateLimit JournalRateLimit; + +JournalRateLimit *journal_rate_limit_new(usec_t interval, unsigned burst); +void journal_rate_limit_free(JournalRateLimit *r); +int journal_rate_limit_test(JournalRateLimit *r, const char *id, int priority, uint64_t available); + +#endif diff --git a/src/journal/journald.c b/src/journal/journald.c index 5d3956ea46..9f753013a0 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "hashmap.h" #include "journal-file.h" @@ -38,6 +39,7 @@ #include "acl-util.h" #include "cgroup-util.h" #include "list.h" +#include "journal-rate-limit.h" #define USER_JOURNALS_MAX 1024 #define STDOUT_STREAMS_MAX 4096 @@ -60,6 +62,8 @@ typedef struct Server { char *buffer; size_t buffer_size; + JournalRateLimit *rate_limit; + JournalMetrics metrics; uint64_t max_use; bool compress; @@ -95,6 +99,76 @@ struct StdoutStream { LIST_FIELDS(StdoutStream, stdout_stream); }; +static uint64_t available_space(Server *s) { + char ids[33]; + sd_id128_t machine; + char *p; + const char *f; + struct statvfs ss; + uint64_t sum = 0, avail = 0, ss_avail = 0; + int r; + DIR *d; + + r = sd_id128_get_machine(&machine); + if (r < 0) + return 0; + + if (s->system_journal) + f = "/var/log/journal/"; + else + f = "/run/log/journal/"; + + p = strappend(f, sd_id128_to_string(machine, ids)); + if (!p) + return 0; + + d = opendir(p); + free(p); + + if (!d) + return 0; + + if (fstatvfs(dirfd(d), &ss) < 0) + goto finish; + + for (;;) { + struct stat st; + struct dirent buf, *de; + int k; + + k = readdir_r(d, &buf, &de); + if (k != 0) { + r = -k; + goto finish; + } + + if (!de) + break; + + if (!dirent_is_file_with_suffix(de, ".journal")) + continue; + + if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) + continue; + + sum += (uint64_t) st.st_blocks * (uint64_t) st.st_blksize; + } + + avail = sum >= s->max_use ? 0 : s->max_use - sum; + + ss_avail = ss.f_bsize * ss.f_bavail; + + ss_avail = ss_avail < s->metrics.keep_free ? 0 : ss_avail - s->metrics.keep_free; + + if (ss_avail < avail) + avail = ss_avail; + +finish: + closedir(d); + + return avail; +} + static void fix_perms(JournalFile *f, uid_t uid) { acl_t acl; acl_entry_t entry; @@ -254,7 +328,40 @@ static void server_vacuum(Server *s) { free(p); } -static void dispatch_message(Server *s, struct iovec *iovec, unsigned n, unsigned m, struct ucred *ucred, struct timeval *tv) { +static char *shortened_cgroup_path(pid_t pid) { + int r; + char *process_path, *init_path, *path; + + assert(pid > 0); + + r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path); + if (r < 0) + return NULL; + + r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path); + if (r < 0) { + free(process_path); + return NULL; + } + + if (streq(init_path, "/")) + init_path[0] = 0; + + if (startswith(process_path, init_path)) + path = process_path + strlen(init_path); + else + path = process_path; + + free(init_path); + + return path; +} + +static void dispatch_message_real(Server *s, + struct iovec *iovec, unsigned n, unsigned m, + struct ucred *ucred, + struct timeval *tv) { + char *pid = NULL, *uid = NULL, *gid = NULL, *source_time = NULL, *boot_id = NULL, *machine_id = NULL, *comm = NULL, *cmdline = NULL, *hostname = NULL, @@ -270,11 +377,8 @@ static void dispatch_message(Server *s, struct iovec *iovec, unsigned n, unsigne bool vacuumed = false; assert(s); - assert(iovec || n == 0); - - if (n == 0) - return; - + assert(iovec); + assert(n > 0); assert(n + 13 <= m); if (ucred) { @@ -326,11 +430,12 @@ static void dispatch_message(Server *s, struct iovec *iovec, unsigned n, unsigne if (asprintf(&audit_loginuid, "_AUDIT_LOGINUID=%lu", (unsigned long) loginuid) >= 0) IOVEC_SET_STRING(iovec[n++], audit_loginuid); - r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, ucred->pid, &path); - if (r >= 0) { + path = shortened_cgroup_path(ucred->pid); + if (path) { cgroup = strappend("_SYSTEMD_CGROUP=", path); if (cgroup) IOVEC_SET_STRING(iovec[n++], cgroup); + free(path); } } @@ -400,6 +505,72 @@ retry: free(cgroup); } +static void dispatch_message(Server *s, + struct iovec *iovec, unsigned n, unsigned m, + struct ucred *ucred, + struct timeval *tv, + int priority) { + int rl; + char *path, *c; + + assert(s); + assert(iovec || n == 0); + + if (n == 0) + return; + + if (!ucred) + goto finish; + + path = shortened_cgroup_path(ucred->pid); + if (!path) + goto finish; + + /* example: /user/lennart/3/foobar + * /system/dbus.service/foobar + * + * So let's cut of everything past the third /, since that is + * wher user directories start */ + + c = strchr(path, '/'); + if (c) { + c = strchr(c+1, '/'); + if (c) { + c = strchr(c+1, '/'); + if (c) + *c = 0; + } + } + + rl = journal_rate_limit_test(s->rate_limit, path, priority, available_space(s)); + + if (rl == 0) { + free(path); + return; + } + + if (rl > 1) { + int j = 0; + char suppress_message[LINE_MAX]; + struct iovec suppress_iovec[15]; + + /* Write a suppression message if we suppressed something */ + + snprintf(suppress_message, sizeof(suppress_message), "MESSAGE=Suppressed %u messages from %s", rl - 1, path); + char_array_0(suppress_message); + + IOVEC_SET_STRING(suppress_iovec[j++], "PRIORITY=5"); + IOVEC_SET_STRING(suppress_iovec[j++], suppress_message); + + dispatch_message_real(s, suppress_iovec, j, ELEMENTSOF(suppress_iovec), NULL, NULL); + } + + free(path); + +finish: + dispatch_message_real(s, iovec, n, m, ucred, tv); +} + static void process_syslog_message(Server *s, const char *buf, struct ucred *ucred, struct timeval *tv) { char *message = NULL, *syslog_priority = NULL, *syslog_facility = NULL; struct iovec iovec[16]; @@ -422,7 +593,7 @@ static void process_syslog_message(Server *s, const char *buf, struct ucred *ucr if (message) IOVEC_SET_STRING(iovec[n++], message); - dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv); + dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, priority & LOG_PRIMASK); free(message); free(syslog_facility); @@ -469,6 +640,7 @@ static void process_native_message(Server *s, const void *buffer, size_t buffer_ unsigned n = 0, m = 0, j; const char *p; size_t remaining; + int priority = LOG_INFO; assert(s); assert(buffer || n == 0); @@ -489,8 +661,9 @@ static void process_native_message(Server *s, const void *buffer, size_t buffer_ if (e == p) { /* Entry separator */ - dispatch_message(s, iovec, n, m, ucred, tv); + dispatch_message(s, iovec, n, m, ucred, tv, priority); n = 0; + priority = LOG_INFO; p++; remaining--; @@ -532,6 +705,15 @@ static void process_native_message(Server *s, const void *buffer, size_t buffer_ iovec[n].iov_base = (char*) p; iovec[n].iov_len = e - p; n++; + + /* We need to determine the priority + * of this entry for the rate limiting + * logic */ + if (e - p == 10 && + memcmp(p, "PRIORITY=", 10) == 0 && + p[10] >= '0' && + p[10] <= '9') + priority = p[10] - '0'; } remaining -= (e - p) + 1; @@ -577,7 +759,7 @@ static void process_native_message(Server *s, const void *buffer, size_t buffer_ } } - dispatch_message(s, iovec, n, m, ucred, tv); + dispatch_message(s, iovec, n, m, ucred, tv, priority); for (j = 0; j < n; j++) if (iovec[j].iov_base < buffer || @@ -630,7 +812,7 @@ static int stdout_stream_log(StdoutStream *s, const char *p, size_t l) { n++; } - dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL); + dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, priority); if (s->tee_console) { int console; @@ -1378,6 +1560,10 @@ static int server_init(Server *s) { if (r < 0) return r; + s->rate_limit = journal_rate_limit_new(10*USEC_PER_SEC, 2); + if (!s->rate_limit) + return -ENOMEM; + return 0; } @@ -1413,6 +1599,9 @@ static void server_done(Server *s) { if (s->stdout_fd >= 0) close_nointr_nofail(s->stdout_fd); + + if (s->rate_limit) + journal_rate_limit_free(s->rate_limit); } int main(int argc, char *argv[]) { diff --git a/src/journal/sd-journal.h b/src/journal/sd-journal.h index 0333db4a4d..7e2ef15327 100644 --- a/src/journal/sd-journal.h +++ b/src/journal/sd-journal.h @@ -38,6 +38,7 @@ * - throttling * - cryptographic hash * - never access beyond fle size check + * - OR of matches is borked... */ /* Write to daemon */ -- cgit v1.2.3-54-g00ecf From 330672957438243c8003d3a90f0e59dedbd845e9 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 27 Dec 2011 22:52:15 +0100 Subject: udev: exclude loopback device from udev rule based sysctl application, since we can just apply that directly at boot --- src/99-systemd.rules.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/99-systemd.rules.in b/src/99-systemd.rules.in index b2481aea9a..d306f71b63 100644 --- a/src/99-systemd.rules.in +++ b/src/99-systemd.rules.in @@ -44,7 +44,7 @@ SUBSYSTEM=="usb", ENV{DEVTYPE}=="usb_device", ENV{ID_USB_INTERFACES}=="*:0701??: # Apply sysctl variables to network devices (and only to those) as they appear. -SUBSYSTEM=="net", RUN+="@rootlibexecdir@/systemd-sysctl --prefix=/proc/sys/net/ipv4/conf/$name --prefix=/proc/sys/net/ipv4/neigh/$name --prefix=/proc/sys/net/ipv6/conf/$name --prefix=/proc/sys/net/ipv6/neigh/$name" +SUBSYSTEM=="net", KERNEL!="lo", RUN+="@rootlibexecdir@/systemd-sysctl --prefix=/proc/sys/net/ipv4/conf/$name --prefix=/proc/sys/net/ipv4/neigh/$name --prefix=/proc/sys/net/ipv6/conf/$name --prefix=/proc/sys/net/ipv6/neigh/$name" # Asynchronously mount file systems implemented by these modules as # soon as they are loaded. -- cgit v1.2.3-54-g00ecf From 24b51289e3f61c2483bb61231f8c16b65cf101be Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 27 Dec 2011 22:52:22 +0100 Subject: journal: fix typo --- src/journal/sd-journal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 4095830901..8fc8ec5885 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -437,7 +437,7 @@ static int find_location(sd_journal *j, JournalFile *f, direction_t direction, O (direction == DIRECTION_DOWN && cp < tp) || (direction == DIRECTION_UP && cp > tp)) { to = c; - tp = tp; + tp = cp; } } -- cgit v1.2.3-54-g00ecf From 85a131e8d8aa9fe3c2115e281569bed64a4200f1 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 27 Dec 2011 22:58:20 +0100 Subject: journal: fix hash table lookup logic --- src/journal/journal-file.c | 3 ++- src/journal/sd-journal.h | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 4a006d3bf3..80775e1acf 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -595,7 +595,7 @@ int journal_file_find_data_object_with_hash( return r; if (le64toh(o->data.hash) != hash) - return -EBADMSG; + goto next; if (o->object.flags & OBJECT_COMPRESSED) { #ifdef HAVE_XZ @@ -637,6 +637,7 @@ int journal_file_find_data_object_with_hash( return 1; } + next: p = le64toh(o->data.next_hash_offset); } diff --git a/src/journal/sd-journal.h b/src/journal/sd-journal.h index 7e2ef15327..9872e9c29c 100644 --- a/src/journal/sd-journal.h +++ b/src/journal/sd-journal.h @@ -35,7 +35,6 @@ * - implement audit gateway * - extend hash tables table as we go * - accelerate looking for "all hostnames" and suchlike. - * - throttling * - cryptographic hash * - never access beyond fle size check * - OR of matches is borked... -- cgit v1.2.3-54-g00ecf From 9cfb57c989b62d11c073c77179df4bb7fa19f35d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 27 Dec 2011 23:18:09 +0100 Subject: journald: when checking available disk space for rate limiting, cache the results temporarily --- src/journal/journald.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/journal/journald.c b/src/journal/journald.c index 9f753013a0..c216b78790 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -44,6 +44,8 @@ #define USER_JOURNALS_MAX 1024 #define STDOUT_STREAMS_MAX 4096 +#define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC) + typedef struct StdoutStream StdoutStream; typedef struct Server { @@ -68,6 +70,9 @@ typedef struct Server { uint64_t max_use; bool compress; + uint64_t cached_available_space; + usec_t cached_available_space_timestamp; + LIST_HEAD(StdoutStream, stdout_streams); unsigned n_stdout_streams; } Server; @@ -108,6 +113,10 @@ static uint64_t available_space(Server *s) { uint64_t sum = 0, avail = 0, ss_avail = 0; int r; DIR *d; + usec_t ts = now(CLOCK_MONOTONIC); + + if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts) + return s->cached_available_space; r = sd_id128_get_machine(&machine); if (r < 0) @@ -163,6 +172,9 @@ static uint64_t available_space(Server *s) { if (ss_avail < avail) avail = ss_avail; + s->cached_available_space = avail; + s->cached_available_space_timestamp = ts; + finish: closedir(d); @@ -326,6 +338,8 @@ static void server_vacuum(Server *s) { if (r < 0 && r != -ENOENT) log_error("Failed to vacuum %s: %s", p, strerror(-r)); free(p); + + s->cached_available_space_timestamp = 0; } static char *shortened_cgroup_path(pid_t pid) { -- cgit v1.2.3-54-g00ecf From 2a59ea54f136f8fcf6a4e1bdfc51448c81281a3e Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 28 Dec 2011 01:53:06 +0100 Subject: journal: never mmap beyond file size --- src/journal/journal-file.c | 13 +++++++++++++ src/journal/sd-journal.h | 8 +++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 80775e1acf..6c7718de31 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -241,6 +241,10 @@ static int journal_file_map( wsize = size + (offset - woffset); wsize = PAGE_ALIGN(wsize); + /* Avoid SIGBUS on invalid accesses */ + if (woffset + wsize > (uint64_t) PAGE_ALIGN(f->last_stat.st_size)) + return -EADDRNOTAVAIL; + window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset); if (window == MAP_FAILED) return -errno; @@ -305,6 +309,15 @@ static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_ } else delta = 0; + if (offset > (uint64_t) f->last_stat.st_size) + return -EADDRNOTAVAIL; + + if (offset + size > (uint64_t) f->last_stat.st_size) + size = PAGE_ALIGN((uint64_t) f->last_stat.st_size - offset); + + if (size <= 0) + return -EADDRNOTAVAIL; + r = journal_file_map(f, offset, size, &w->ptr, &w->offset, &w->size, diff --git a/src/journal/sd-journal.h b/src/journal/sd-journal.h index 9872e9c29c..7f9f78598b 100644 --- a/src/journal/sd-journal.h +++ b/src/journal/sd-journal.h @@ -36,8 +36,14 @@ * - extend hash tables table as we go * - accelerate looking for "all hostnames" and suchlike. * - cryptographic hash - * - never access beyond fle size check * - OR of matches is borked... + * - flush /run to /var + * - hookup with systemctl + * - local deserializer + * - think about manipulations of header + * - http server + * - handle incomplete header + * - message catalog */ /* Write to daemon */ -- cgit v1.2.3-54-g00ecf From de97b26ac5e29063632312ec1a20eb6318ca924c Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 29 Dec 2011 15:00:05 +0100 Subject: journald: increase rate limit burst rate --- src/journal/journald.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/journal/journald.c b/src/journal/journald.c index c216b78790..e7231d96f7 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -44,6 +44,9 @@ #define USER_JOURNALS_MAX 1024 #define STDOUT_STREAMS_MAX 4096 +#define DEFAULT_RATE_LIMIT_INTERVAL (10*USEC_PER_SEC) +#define DEFAULT_RATE_LIMIT_BURST 200 + #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC) typedef struct StdoutStream StdoutStream; @@ -1574,7 +1577,7 @@ static int server_init(Server *s) { if (r < 0) return r; - s->rate_limit = journal_rate_limit_new(10*USEC_PER_SEC, 2); + s->rate_limit = journal_rate_limit_new(DEFAULT_RATE_LIMIT_INTERVAL, DEFAULT_RATE_LIMIT_BURST); if (!s->rate_limit) return -ENOMEM; -- cgit v1.2.3-54-g00ecf From cf244689e9d1ab50082c9ddd0f3c4d1eb982badc Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 29 Dec 2011 15:00:57 +0100 Subject: journald: flush /run to /var as soon as it becomes available --- src/journal/journal-file.c | 93 +++++++++++++++-- src/journal/journal-file.h | 4 + src/journal/journalctl.c | 2 +- src/journal/journald.c | 248 +++++++++++++++++++++++++++++++++------------ src/journal/sd-journal.c | 69 ++++--------- src/journal/sd-journal.h | 23 +++-- 6 files changed, 309 insertions(+), 130 deletions(-) diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 6c7718de31..190bfb996b 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -950,7 +950,7 @@ static int journal_file_append_entry_internal( return 0; } -static void journal_file_post_change(JournalFile *f) { +void journal_file_post_change(JournalFile *f) { assert(f); /* inotify() does not receive IN_MODIFY events from file @@ -989,9 +989,7 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st if (ts->realtime < le64toh(f->header->tail_entry_realtime)) return -EINVAL; - items = new(EntryItem, n_iovec); - if (!items) - return -ENOMEM; + items = alloca(sizeof(EntryItem) * n_iovec); for (i = 0; i < n_iovec; i++) { uint64_t p; @@ -999,7 +997,7 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p); if (r < 0) - goto finish; + return r; xor_hash ^= le64toh(o->data.hash); items[i].object_offset = htole64(p); @@ -1010,9 +1008,6 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st journal_file_post_change(f); -finish: - free(items); - return r; } @@ -1999,3 +1994,85 @@ finish: return r; } + +int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) { + uint64_t i, n; + uint64_t q, xor_hash = 0; + int r; + EntryItem *items; + dual_timestamp ts; + + assert(from); + assert(to); + assert(o); + assert(p); + + if (!to->writable) + return -EPERM; + + ts.monotonic = le64toh(o->entry.monotonic); + ts.realtime = le64toh(o->entry.realtime); + + if (to->tail_entry_monotonic_valid && + ts.monotonic < le64toh(to->header->tail_entry_monotonic)) + return -EINVAL; + + if (ts.realtime < le64toh(to->header->tail_entry_realtime)) + return -EINVAL; + + n = journal_file_entry_n_items(o); + items = alloca(sizeof(EntryItem) * n); + + for (i = 0; i < n; i++) { + uint64_t le_hash, l, h; + size_t t; + void *data; + Object *u; + + q = le64toh(o->entry.items[i].object_offset); + le_hash = o->entry.items[i].hash; + + r = journal_file_move_to_object(from, OBJECT_DATA, q, &o); + if (r < 0) + return r; + + if (le_hash != o->data.hash) + return -EBADMSG; + + l = le64toh(o->object.size) - offsetof(Object, data.payload); + t = (size_t) l; + + /* We hit the limit on 32bit machines */ + if ((uint64_t) t != l) + return -E2BIG; + + if (o->object.flags & OBJECT_COMPRESSED) { +#ifdef HAVE_XZ + uint64_t rsize; + + if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize)) + return -EBADMSG; + + data = from->compress_buffer; + l = rsize; +#else + return -EPROTONOSUPPORT; +#endif + } else + data = o->data.payload; + + r = journal_file_append_data(to, data, l, &u, &h); + if (r < 0) + return r; + + xor_hash ^= le64toh(u->data.hash); + items[i].object_offset = htole64(h); + items[i].hash = u->data.hash; + + r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o); + if (r < 0) + return r; + } + + return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset); +} diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h index 421dfa6766..ab2970ca00 100644 --- a/src/journal/journal-file.h +++ b/src/journal/journal-file.h @@ -113,10 +113,14 @@ int journal_file_move_to_entry_by_monotonic(JournalFile *f, sd_id128_t boot_id, int journal_file_move_to_entry_by_seqnum_for_data(JournalFile *f, uint64_t data_offset, uint64_t seqnum, direction_t direction, Object **ret, uint64_t *offset); int journal_file_move_to_entry_by_realtime_for_data(JournalFile *f, uint64_t data_offset, uint64_t realtime, direction_t direction, Object **ret, uint64_t *offset); +int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset); + void journal_file_dump(JournalFile *f); int journal_file_rotate(JournalFile **f); int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free); +void journal_file_post_change(JournalFile *f); + #endif diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index 5a1cb6e88a..da4f51021d 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -454,7 +454,7 @@ int main(int argc, char *argv[]) { if (r <= 0) goto finish; - r = sd_journal_open(&j); + r = sd_journal_open(&j, 0); if (r < 0) { log_error("Failed to open journal: %s", strerror(-r)); goto finish; diff --git a/src/journal/journald.c b/src/journal/journald.c index e7231d96f7..1efe0420db 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -40,6 +40,8 @@ #include "cgroup-util.h" #include "list.h" #include "journal-rate-limit.h" +#include "sd-journal.h" +#include "journal-internal.h" #define USER_JOURNALS_MAX 1024 #define STDOUT_STREAMS_MAX 4096 @@ -107,6 +109,8 @@ struct StdoutStream { LIST_FIELDS(StdoutStream, stdout_stream); }; +static int server_flush_to_var(Server *s); + static uint64_t available_space(Server *s) { char ids[33]; sd_id128_t machine; @@ -239,8 +243,12 @@ static JournalFile* find_journal(Server *s, uid_t uid) { assert(s); - /* We split up user logs only on /var, not on /run */ - if (!s->system_journal) + /* We split up user logs only on /var, not on /run. If the + * runtime file is open, we write to it exclusively, in order + * to guarantee proper order as soon as we flush /run to + * /var and close the runtime file. */ + + if (s->runtime_journal) return s->runtime_journal; if (uid <= 0) @@ -486,6 +494,8 @@ static void dispatch_message_real(Server *s, assert(n <= m); + server_flush_to_var(s); + retry: f = find_journal(s, realuid == 0 ? 0 : loginuid); if (!f) @@ -1088,6 +1098,170 @@ fail: return r; } +static int system_journal_open(Server *s) { + int r; + char *fn; + sd_id128_t machine; + char ids[33]; + + r = sd_id128_get_machine(&machine); + if (r < 0) + return r; + + sd_id128_to_string(machine, ids); + + if (!s->system_journal) { + + /* First try to create the machine path, but not the prefix */ + fn = strappend("/var/log/journal/", ids); + if (!fn) + return -ENOMEM; + (void) mkdir(fn, 0755); + free(fn); + + /* The create the system journal file */ + fn = join("/var/log/journal/", ids, "/system.journal", NULL); + if (!fn) + return -ENOMEM; + + r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal); + free(fn); + + if (r >= 0) { + s->system_journal->metrics = s->metrics; + s->system_journal->compress = s->compress; + + fix_perms(s->system_journal, 0); + } else if (r < 0) { + + if (r == -ENOENT) + r = 0; + else { + log_error("Failed to open system journal: %s", strerror(-r)); + return r; + } + } + } + + if (!s->runtime_journal) { + + fn = join("/run/log/journal/", ids, "/system.journal", NULL); + if (!fn) + return -ENOMEM; + + if (s->system_journal) { + + /* Try to open the runtime journal, but only + * if it already exists, so that we can flush + * it into the system journal */ + + r = journal_file_open(fn, O_RDWR, 0640, NULL, &s->runtime_journal); + free(fn); + + if (r < 0) { + + if (r == -ENOENT) + r = 0; + else { + log_error("Failed to open runtime journal: %s", strerror(-r)); + return r; + } + } + + } else { + + /* OK, we really need the runtime journal, so create + * it if necessary. */ + + (void) mkdir_parents(fn, 0755); + r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal); + free(fn); + + if (r < 0) { + log_error("Failed to open runtime journal: %s", strerror(-r)); + return r; + } + } + + if (s->runtime_journal) { + s->runtime_journal->metrics = s->metrics; + s->runtime_journal->compress = s->compress; + + fix_perms(s->runtime_journal, 0); + } + } + + return r; +} + +static int server_flush_to_var(Server *s) { + char path[] = "/run/log/journal/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; + Object *o = NULL; + int r; + sd_id128_t machine; + sd_journal *j; + + assert(s); + + system_journal_open(s); + + if (!s->system_journal || !s->runtime_journal) + return 0; + + r = sd_id128_get_machine(&machine); + if (r < 0) { + log_error("Failed to get machine id: %s", strerror(-r)); + return r; + } + + r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY); + if (r < 0) { + log_error("Failed to read runtime journal: %s", strerror(-r)); + return r; + } + + SD_JOURNAL_FOREACH(j) { + JournalFile *f; + + f = j->current_file; + assert(f && f->current_offset > 0); + + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); + if (r < 0) { + log_error("Can't read entry: %s", strerror(-r)); + goto finish; + } + + r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL); + if (r == -E2BIG) { + log_info("Allocation limit reached."); + + journal_file_post_change(s->system_journal); + server_vacuum(s); + + r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL); + } + + if (r < 0) { + log_error("Can't write entry: %s", strerror(-r)); + goto finish; + } + } + +finish: + journal_file_post_change(s->system_journal); + + journal_file_close(s->runtime_journal); + s->runtime_journal = NULL; + + if (r >= 0) { + sd_id128_to_string(machine, path + 17); + rm_rf(path, false, true, false); + } + + return r; +} + static int process_event(Server *s, struct epoll_event *ev) { assert(s); @@ -1112,6 +1286,11 @@ static int process_event(Server *s, struct epoll_event *ev) { return -errno; } + if (sfsi.ssi_signo == SIGUSR1) { + server_flush_to_var(s); + return 0; + } + log_debug("Received SIG%s", signal_to_string(sfsi.ssi_signo)); return 0; @@ -1247,66 +1426,6 @@ static int process_event(Server *s, struct epoll_event *ev) { return 0; } -static int system_journal_open(Server *s) { - int r; - char *fn; - sd_id128_t machine; - char ids[33]; - - r = sd_id128_get_machine(&machine); - if (r < 0) - return r; - - /* First try to create the machine path, but not the prefix */ - fn = strappend("/var/log/journal/", sd_id128_to_string(machine, ids)); - if (!fn) - return -ENOMEM; - (void) mkdir(fn, 0755); - free(fn); - - /* The create the system journal file */ - fn = join("/var/log/journal/", ids, "/system.journal", NULL); - if (!fn) - return -ENOMEM; - - r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->system_journal); - free(fn); - - if (r >= 0) { - s->system_journal->metrics = s->metrics; - s->system_journal->compress = s->compress; - - fix_perms(s->system_journal, 0); - return r; - } - - if (r < 0 && r != -ENOENT) { - log_error("Failed to open system journal: %s", strerror(-r)); - return r; - } - - /* /var didn't work, so try /run, but this time we - * create the prefix too */ - fn = join("/run/log/journal/", ids, "/system.journal", NULL); - if (!fn) - return -ENOMEM; - - (void) mkdir_parents(fn, 0755); - r = journal_file_open(fn, O_RDWR|O_CREAT, 0640, NULL, &s->runtime_journal); - free(fn); - - if (r < 0) { - log_error("Failed to open runtime journal: %s", strerror(-r)); - return r; - } - - s->runtime_journal->metrics = s->metrics; - s->runtime_journal->compress = s->compress; - - fix_perms(s->runtime_journal, 0); - return r; -} - static int open_syslog_socket(Server *s) { union sockaddr_union sa; int one, r; @@ -1470,7 +1589,7 @@ static int open_signalfd(Server *s) { assert(s); assert_se(sigemptyset(&mask) == 0); - sigset_add_many(&mask, SIGINT, SIGTERM, -1); + sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, -1); assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); s->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC); @@ -1652,6 +1771,9 @@ int main(int argc, char *argv[]) { "READY=1\n" "STATUS=Processing requests..."); + server_vacuum(&server); + server_flush_to_var(&server); + for (;;) { struct epoll_event event; diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 8fc8ec5885..38e58f5732 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -32,58 +32,10 @@ #include "list.h" #include "lookup3.h" #include "compress.h" +#include "journal-internal.h" #define JOURNAL_FILES_MAX 1024 -typedef struct Match Match; - -struct Match { - char *data; - size_t size; - uint64_t le_hash; - - LIST_FIELDS(Match, matches); -}; - -typedef enum location_type { - LOCATION_HEAD, - LOCATION_TAIL, - LOCATION_DISCRETE -} location_type_t; - -typedef struct Location { - location_type_t type; - - uint64_t seqnum; - sd_id128_t seqnum_id; - bool seqnum_set; - - uint64_t realtime; - bool realtime_set; - - uint64_t monotonic; - sd_id128_t boot_id; - bool monotonic_set; - - uint64_t xor_hash; - bool xor_hash_set; -} Location; - -struct sd_journal { - Hashmap *files; - - Location current_location; - JournalFile *current_file; - uint64_t current_field; - - int inotify_fd; - Hashmap *inotify_wd_dirs; - Hashmap *inotify_wd_roots; - - LIST_HEAD(Match, matches); - unsigned n_matches; -}; - static void detach_location(sd_journal *j) { Iterator i; JournalFile *f; @@ -948,6 +900,10 @@ static int add_file(sd_journal *j, const char *prefix, const char *dir, const ch assert(prefix); assert(filename); + if ((j->flags & SD_JOURNAL_SYSTEM_ONLY) && + !startswith(filename, "system.journal")) + return 0; + if (dir) fn = join(prefix, "/", dir, "/", filename, NULL); else @@ -1024,11 +980,18 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dir) { int r; DIR *d; int wd; + sd_id128_t id, mid; assert(j); assert(prefix); assert(dir); + if ((j->flags & SD_JOURNAL_LOCAL_ONLY) && + (sd_id128_from_string(dir, &id) < 0 || + sd_id128_get_machine(&mid) < 0 || + !sd_id128_equal(id, mid))) + return 0; + fn = join(prefix, "/", dir, NULL); if (!fn) return -ENOMEM; @@ -1132,7 +1095,7 @@ static void remove_root_wd(sd_journal *j, int wd) { } } -int sd_journal_open(sd_journal **ret) { +int sd_journal_open(sd_journal **ret, int flags) { sd_journal *j; const char *p; const char search_paths[] = @@ -1146,6 +1109,8 @@ int sd_journal_open(sd_journal **ret) { if (!j) return -ENOMEM; + j->flags = flags; + j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC); if (j->inotify_fd < 0) { r = -errno; @@ -1172,6 +1137,10 @@ int sd_journal_open(sd_journal **ret) { NULSTR_FOREACH(p, search_paths) { DIR *d; + if ((flags & SD_JOURNAL_RUNTIME_ONLY) && + !path_startswith(p, "/run")) + continue; + d = opendir(p); if (!d) { if (errno != ENOENT) diff --git a/src/journal/sd-journal.h b/src/journal/sd-journal.h index 7f9f78598b..f6b1c955fb 100644 --- a/src/journal/sd-journal.h +++ b/src/journal/sd-journal.h @@ -31,19 +31,20 @@ /* TODO: * - * - check LE/BE conversion for 8bit, 16bit, 32bit values - * - implement audit gateway + * - OR of matches is borked... * - extend hash tables table as we go * - accelerate looking for "all hostnames" and suchlike. - * - cryptographic hash - * - OR of matches is borked... - * - flush /run to /var * - hookup with systemctl + * - handle incomplete header + * * - local deserializer - * - think about manipulations of header * - http server - * - handle incomplete header * - message catalog + * + * - check LE/BE conversion for 8bit, 16bit, 32bit values + * - cryptographic hash + * - think about manipulations of header + * - implement audit gateway */ /* Write to daemon */ @@ -60,7 +61,13 @@ int sd_journal_stream_fd(const char *tag, int priority, int priority_prefix); typedef struct sd_journal sd_journal; -int sd_journal_open(sd_journal **ret); +enum { + SD_JOURNAL_LOCAL_ONLY = 1, + SD_JOURNAL_RUNTIME_ONLY = 2, + SD_JOURNAL_SYSTEM_ONLY = 4 +}; + +int sd_journal_open(sd_journal **ret, int flags); void sd_journal_close(sd_journal *j); int sd_journal_previous(sd_journal *j); -- cgit v1.2.3-54-g00ecf From 54a7b863dd3937893abae47b20b6f655b8e9252a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 29 Dec 2011 15:25:42 +0100 Subject: journald: don't recheck /var availability more often than 30s --- src/journal/journald.c | 16 +++++++++++++++- src/journal/sd-journal.h | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/journal/journald.c b/src/journal/journald.c index 1efe0420db..52cdc7058c 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -51,6 +51,8 @@ #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC) +#define RECHECK_VAR_AVAILABLE_USEC (30*USEC_PER_SEC) + typedef struct StdoutStream StdoutStream; typedef struct Server { @@ -78,6 +80,8 @@ typedef struct Server { uint64_t cached_available_space; usec_t cached_available_space_timestamp; + uint64_t var_available_timestamp; + LIST_HEAD(StdoutStream, stdout_streams); unsigned n_stdout_streams; } Server; @@ -1200,12 +1204,22 @@ static int server_flush_to_var(Server *s) { int r; sd_id128_t machine; sd_journal *j; + usec_t ts; assert(s); + if (!s->runtime_journal) + return 0; + + ts = now(CLOCK_MONOTONIC); + if (s->var_available_timestamp + RECHECK_VAR_AVAILABLE_USEC > ts) + return 0; + + s->var_available_timestamp = ts; + system_journal_open(s); - if (!s->system_journal || !s->runtime_journal) + if (!s->system_journal) return 0; r = sd_id128_get_machine(&machine); diff --git a/src/journal/sd-journal.h b/src/journal/sd-journal.h index f6b1c955fb..97f9f0fa13 100644 --- a/src/journal/sd-journal.h +++ b/src/journal/sd-journal.h @@ -36,6 +36,7 @@ * - accelerate looking for "all hostnames" and suchlike. * - hookup with systemctl * - handle incomplete header + * - write unit files * * - local deserializer * - http server -- cgit v1.2.3-54-g00ecf From adb2ce5f694cb528f9294219941b1e37dc6a9530 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 30 Dec 2011 15:34:21 +0100 Subject: remount-api-vfs: handle another OOM condition --- src/remount-api-vfs.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/remount-api-vfs.c b/src/remount-api-vfs.c index 8bbc021dc4..7b146551a8 100644 --- a/src/remount-api-vfs.c +++ b/src/remount-api-vfs.c @@ -54,12 +54,14 @@ int main(int argc, char *argv[]) { umask(0022); - if (!(f = setmntent("/etc/fstab", "r"))) { + f = setmntent("/etc/fstab", "r"); + if (!f) { log_error("Failed to open /etc/fstab: %m"); goto finish; } - if (!(pids = hashmap_new(trivial_hash_func, trivial_compare_func))) { + pids = hashmap_new(trivial_hash_func, trivial_compare_func); + if (!pids) { log_error("Failed to allocate set"); goto finish; } @@ -76,9 +78,10 @@ int main(int argc, char *argv[]) { log_debug("Remounting %s", me->mnt_dir); - if ((pid = fork()) < 0) { + pid = fork(); + if (pid < 0) { log_error("Failed to fork: %m"); - ret = 1; + ret = EXIT_FAILURE; continue; } @@ -101,8 +104,15 @@ int main(int argc, char *argv[]) { /* Parent */ s = strdup(me->mnt_dir); + if (!s) { + log_error("Out of memory."); + ret = EXIT_FAILURE; + continue; + } + - if ((k = hashmap_put(pids, UINT_TO_PTR(pid), s)) < 0) { + k = hashmap_put(pids, UINT_TO_PTR(pid), s); + if (k < 0) { log_error("Failed to add PID to set: %s", strerror(-k)); ret = EXIT_FAILURE; continue; @@ -124,7 +134,8 @@ int main(int argc, char *argv[]) { break; } - if ((s = hashmap_remove(pids, UINT_TO_PTR(si.si_pid)))) { + s = hashmap_remove(pids, UINT_TO_PTR(si.si_pid)); + if (s) { if (!is_clean_exit(si.si_code, si.si_status)) { if (si.si_code == CLD_EXITED) log_error("/bin/mount for %s exited with exit status %i.", s, si.si_status); -- cgit v1.2.3-54-g00ecf From f39e126e990869e33a002763ec02aa0aeb06214a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 30 Dec 2011 16:01:33 +0100 Subject: journald: add missing header --- src/journal/journal-internal.h | 83 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 src/journal/journal-internal.h diff --git a/src/journal/journal-internal.h b/src/journal/journal-internal.h new file mode 100644 index 0000000000..1b64666da3 --- /dev/null +++ b/src/journal/journal-internal.h @@ -0,0 +1,83 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +#ifndef foojournalinternalhfoo +#define foojournalinternalhfoo + +/*** + This file is part of systemd. + + Copyright 2011 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include +#include +#include + +#include "list.h" +#include "sd-id128.h" + +typedef struct Match Match; + +struct Match { + char *data; + size_t size; + uint64_t le_hash; + + LIST_FIELDS(Match, matches); +}; + +typedef enum location_type { + LOCATION_HEAD, + LOCATION_TAIL, + LOCATION_DISCRETE +} location_type_t; + +typedef struct Location { + location_type_t type; + + uint64_t seqnum; + sd_id128_t seqnum_id; + bool seqnum_set; + + uint64_t realtime; + bool realtime_set; + + uint64_t monotonic; + sd_id128_t boot_id; + bool monotonic_set; + + uint64_t xor_hash; + bool xor_hash_set; +} Location; + +struct sd_journal { + int flags; + + Hashmap *files; + + Location current_location; + JournalFile *current_file; + uint64_t current_field; + + int inotify_fd; + Hashmap *inotify_wd_dirs; + Hashmap *inotify_wd_roots; + + LIST_HEAD(Match, matches); + unsigned n_matches; +}; + +#endif -- cgit v1.2.3-54-g00ecf From 4b2d99d9f4258a29f0bf8b1a78d17836e75bc378 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 30 Dec 2011 17:50:37 +0100 Subject: journal: add unit files and shared library glue --- .gitignore | 2 + Makefile.am | 108 +++++++++++++++++++++++++++++++++++--- libsystemd-id128.pc.in | 18 +++++++ libsystemd-journal.pc.in | 19 +++++++ src/libsystemd-id128.sym | 22 ++++++++ src/libsystemd-journal.sym | 45 ++++++++++++++++ units/.gitignore | 1 + units/systemd-journald.service.in | 24 +++++++++ units/systemd-journald.socket | 24 +++++++++ 9 files changed, 255 insertions(+), 8 deletions(-) create mode 100644 libsystemd-id128.pc.in create mode 100644 libsystemd-journal.pc.in create mode 100644 src/libsystemd-id128.sym create mode 100644 src/libsystemd-journal.sym create mode 100644 units/systemd-journald.service.in create mode 100644 units/systemd-journald.socket diff --git a/.gitignore b/.gitignore index 265801ff5f..28b40de7db 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +/libsystemd-journal.pc +/libsystemd-id128.pc systemd-journalctl systemd-journald test-id128 diff --git a/Makefile.am b/Makefile.am index 5fe67fd1c7..fa0a217813 100644 --- a/Makefile.am +++ b/Makefile.am @@ -27,6 +27,14 @@ LIBSYSTEMD_DAEMON_CURRENT=0 LIBSYSTEMD_DAEMON_REVISION=0 LIBSYSTEMD_DAEMON_AGE=0 +LIBSYSTEMD_ID128_CURRENT=0 +LIBSYSTEMD_ID128_REVISION=0 +LIBSYSTEMD_ID128_AGE=0 + +LIBSYSTEMD_JOURNAL_CURRENT=0 +LIBSYSTEMD_JOURNAL_REVISION=0 +LIBSYSTEMD_JOURNAL_AGE=0 + # Dirs of external packages dbuspolicydir=@dbuspolicydir@ dbussessionservicedir=@dbussessionservicedir@ @@ -217,11 +225,15 @@ endif lib_LTLIBRARIES = \ libsystemd-daemon.la \ - libsystemd-login.la + libsystemd-login.la \ + libsystemd-id128.la \ + libsystemd-journal.la pkginclude_HEADERS = \ src/sd-daemon.h \ - src/sd-login.h + src/sd-login.h \ + src/sd-id128.h \ + src/journal/sd-journal.h noinst_PROGRAMS = \ test-engine \ @@ -389,7 +401,8 @@ dist_systemunit_DATA = \ units/quotaon.service \ units/systemd-ask-password-wall.path \ units/systemd-ask-password-console.path \ - units/syslog.target + units/syslog.target \ + units/systemd-journald.socket if HAVE_SYSV_COMPAT dist_systemunit_DATA += \ @@ -410,6 +423,7 @@ nodist_systemunit_DATA = \ units/systemd-stdout-syslog-bridge.service \ units/systemd-shutdownd.service \ units/systemd-logind.service \ + units/systemd-journald.service \ units/systemd-kmsg-syslogd.service \ units/systemd-modules-load.service \ units/systemd-vconsole-setup.service \ @@ -473,7 +487,8 @@ EXTRA_DIST = \ units/systemd-stdout-syslog-bridge.service.in \ units/systemd-shutdownd.service.in \ units/systemd-logind.service.in \ - units/systemd-kmsg-syslogd.service.in \ + units/systemd-journald.service.in \ + units/systemd-kmsg-syslogd.service.in \ units/systemd-modules-load.service.in \ units/systemd-vconsole-setup.service.in \ units/systemd-remount-api-vfs.service.in \ @@ -502,8 +517,12 @@ EXTRA_DIST = \ systemd.pc.in \ libsystemd-daemon.pc.in \ libsystemd-login.pc.in \ + libsystemd-id128.pc.in \ + libsystemd-journal.pc.in \ src/libsystemd-daemon.sym \ src/libsystemd-login.sym \ + src/libsystemd-id128.sym \ + src/libsystemd-journal.sym \ introspect.awk \ src/73-seat-late.rules.in \ src/99-systemd.rules.in \ @@ -594,7 +613,9 @@ pkgconfigdata_DATA = \ pkgconfiglib_DATA = \ libsystemd-daemon.pc \ - libsystemd-login.pc + libsystemd-login.pc \ + libsystemd-id128.pc \ + libsystemd-journal.pc # Passed through intltool only polkitpolicy_in_files = \ @@ -770,7 +791,14 @@ EXTRA_DIST += \ src/logind-user.h \ src/logind-acl.h \ src/dbus-loop.h \ - src/spawn-agent.h + src/spawn-agent.h \ + src/journal/journal-def.h \ + src/journal/journal-internal.h \ + src/journal/journal-file.h \ + src/journal/lookup3.h \ + src/journal/compress.h \ + src/journal/journal-rate-limit.h \ + src/acl-util.h MANPAGES = \ man/systemd.1 \ @@ -1703,6 +1731,69 @@ libsystemd-login-install-hook: libsystemd-login-uninstall-hook: rm -f $(DESTDIR)$(rootlibdir)/libsystemd-login.so* +libsystemd_id128_la_SOURCES = \ + src/sd-id128.c + +libsystemd_id128_la_CFLAGS = \ + $(AM_CFLAGS) \ + -fvisibility=hidden + +libsystemd_id128_la_LDFLAGS = \ + -shared \ + -version-info $(LIBSYSTEMD_ID128_CURRENT):$(LIBSYSTEMD_ID128_REVISION):$(LIBSYSTEMD_ID128_AGE) \ + -Wl,--version-script=$(top_srcdir)/src/libsystemd-id128.sym + +libsystemd_id128_la_LIBADD = \ + libsystemd-basic.la + +# move lib from $(libdir) to $(rootlibdir) and update devel link, if needed +libsystemd-id128-install-hook: + if test "$(libdir)" != "$(rootlibdir)"; then \ + mkdir -p $(DESTDIR)$(rootlibdir) && \ + so_img_name=$$(readlink $(DESTDIR)$(libdir)/libsystemd-id128.so) && \ + so_img_rel_target_prefix=$$(echo $(libdir) | sed 's,\(^/\|\)[^/][^/]*,..,g') && \ + ln -sf $$so_img_rel_target_prefix$(rootlibdir)/$$so_img_name $(DESTDIR)$(libdir)/libsystemd-id128.so && \ + mv $(DESTDIR)$(libdir)/libsystemd-id128.so.* $(DESTDIR)$(rootlibdir); \ + fi + +libsystemd-id128-uninstall-hook: + rm -f $(DESTDIR)$(rootlibdir)/libsystemd-id128.so* + +libsystemd_journal_la_SOURCES = \ + src/journal/sd-journal.c \ + src/journal/journal-file.c \ + src/journal/compress.c \ + src/journal/lookup3.c \ + src/journal/journal-send.c + +libsystemd_journal_la_CFLAGS = \ + $(AM_CFLAGS) \ + $(XZ_CFLAGS) \ + -fvisibility=hidden + +libsystemd_journal_la_LDFLAGS = \ + -shared \ + -version-info $(LIBSYSTEMD_JOURNAL_CURRENT):$(LIBSYSTEMD_JOURNAL_REVISION):$(LIBSYSTEMD_JOURNAL_AGE) \ + -Wl,--version-script=$(top_srcdir)/src/libsystemd-journal.sym + +libsystemd_journal_la_LIBADD = \ + libsystemd-basic.la \ + libsystemd-id128.la \ + $(XZ_LIBS) + +# move lib from $(libdir) to $(rootlibdir) and update devel link, if needed +libsystemd-journal-install-hook: + if test "$(libdir)" != "$(rootlibdir)"; then \ + mkdir -p $(DESTDIR)$(rootlibdir) && \ + so_img_name=$$(readlink $(DESTDIR)$(libdir)/libsystemd-journal.so) && \ + so_img_rel_target_prefix=$$(echo $(libdir) | sed 's,\(^/\|\)[^/][^/]*,..,g') && \ + ln -sf $$so_img_rel_target_prefix$(rootlibdir)/$$so_img_name $(DESTDIR)$(libdir)/libsystemd-journal.so && \ + mv $(DESTDIR)$(libdir)/libsystemd-journal.so.* $(DESTDIR)$(rootlibdir); \ + fi + +libsystemd-journal-uninstall-hook: + rm -f $(DESTDIR)$(rootlibdir)/libsystemd-journal.so* + SED_PROCESS = \ $(AM_V_GEN)$(MKDIR_P) $(dir $@) && \ $(SED) -e 's,@rootlibexecdir\@,$(rootlibexecdir),g' \ @@ -1919,11 +2010,12 @@ endif rm -f user && \ $(LN_S) $(pkgsysconfdir)/user user ) ( cd $(DESTDIR)$(systemunitdir)/sockets.target.wants && \ - rm -f systemd-initctl.socket systemd-stdout-syslog-bridge.socket systemd-shutdownd.socket syslog.socket && \ + rm -f systemd-initctl.socket systemd-stdout-syslog-bridge.socket systemd-shutdownd.socket syslog.socket systemd-journald.socket && \ $(LN_S) ../systemd-stdout-syslog-bridge.socket systemd-stdout-syslog-bridge.socket && \ $(LN_S) ../systemd-initctl.socket systemd-initctl.socket && \ $(LN_S) ../systemd-shutdownd.socket systemd-shutdownd.socket && \ - $(LN_S) ../syslog.socket syslog.socket ) + $(LN_S) ../syslog.socket syslog.socket && \ + $(LN_S) ../systemd-journald.socket ) ( cd $(DESTDIR)$(systemunitdir)/runlevel1.target.wants && \ rm -f systemd-update-utmp-runlevel.service && \ $(LN_S) ../systemd-update-utmp-runlevel.service systemd-update-utmp-runlevel.service ) diff --git a/libsystemd-id128.pc.in b/libsystemd-id128.pc.in new file mode 100644 index 0000000000..4d984fdff5 --- /dev/null +++ b/libsystemd-id128.pc.in @@ -0,0 +1,18 @@ +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: systemd +Description: systemd 128 Bit ID Utility Library +URL: @PACKAGE_URL@ +Version: @PACKAGE_VERSION@ +Libs: -L${libdir} -lsystemd-id128 +Cflags: -I${includedir} diff --git a/libsystemd-journal.pc.in b/libsystemd-journal.pc.in new file mode 100644 index 0000000000..13cc8208df --- /dev/null +++ b/libsystemd-journal.pc.in @@ -0,0 +1,19 @@ +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: systemd +Description: systemd Journal Utility Library +URL: @PACKAGE_URL@ +Version: @PACKAGE_VERSION@ +Requires: libsystemd-id128 = @PACKAGE_VERSION@ +Libs: -L${libdir} -lsystemd-journal +Cflags: -I${includedir} diff --git a/src/libsystemd-id128.sym b/src/libsystemd-id128.sym new file mode 100644 index 0000000000..c4d1cf5d48 --- /dev/null +++ b/src/libsystemd-id128.sym @@ -0,0 +1,22 @@ +/*** + This file is part of systemd. + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. +***/ + +/* Original symbols from systemd v38 */ + +LIBSYSTEMD_ID128_38 { +global: + sd_id128_to_string; + sd_id128_from_string; + sd_id128_randomize; + sd_id128_make_v4_uuid; + sd_id128_get_machine; + sd_id128_get_boot; +local: + *; +}; diff --git a/src/libsystemd-journal.sym b/src/libsystemd-journal.sym new file mode 100644 index 0000000000..7653880e8f --- /dev/null +++ b/src/libsystemd-journal.sym @@ -0,0 +1,45 @@ +/*** + This file is part of systemd. + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. +***/ + +/* Original symbols from systemd v38 */ + +LIBSYSTEMD_JOURNAL_38 { +global: + sd_journal_print; + sd_journal_printv; + sd_journal_send; + sd_journal_sendv; + sd_journal_stream_fd; + sd_journal_open; + sd_journal_close; + sd_journal_previous; + sd_journal_next; + sd_journal_previous_skip; + sd_journal_next_skip; + sd_journal_get_realtime_usec; + sd_journal_get_monotonic_usec; + sd_journal_get_data; + sd_journal_enumerate_data; + sd_journal_restart_data; + sd_journal_add_match; + sd_journal_flush_matches; + sd_journal_seek_head; + sd_journal_seek_tail; + sd_journal_seek_monotonic_usec; + sd_journal_seek_realtime_usec; + sd_journal_seek_cursor; + sd_journal_get_cursor; + sd_journal_query_unique; + sd_journal_enumerate_unique; + sd_journal_restart_unique; + sd_journal_get_fd; + sd_journal_process; +local: + *; +}; diff --git a/units/.gitignore b/units/.gitignore index cc92c73022..94412d52e7 100644 --- a/units/.gitignore +++ b/units/.gitignore @@ -1,3 +1,4 @@ +/systemd-journald.service user@.service systemd-logind.service systemd-localed.service diff --git a/units/systemd-journald.service.in b/units/systemd-journald.service.in new file mode 100644 index 0000000000..2cfc68482e --- /dev/null +++ b/units/systemd-journald.service.in @@ -0,0 +1,24 @@ +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# See systemd.special(7) for details + +[Unit] +Description=Journal Service +DefaultDependencies=no +Requires=systemd-journald.socket +After=systemd-journald.socket + +[Service] +ExecStart=@rootlibexecdir@/systemd-journald +NotifyAccess=all +StandardOutput=null +#CapabilityBoundingSet=CAP_SYS_ADMIN CAP_SETUID CAP_SETGID + +# Increase the default a bit in order to allow many simultaneous +# services being run since we keep one fd open per service. +LimitNOFILE=16384 diff --git a/units/systemd-journald.socket b/units/systemd-journald.socket new file mode 100644 index 0000000000..b439bc1fdc --- /dev/null +++ b/units/systemd-journald.socket @@ -0,0 +1,24 @@ +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# See systemd.special(7) for details + +[Unit] +Description=Journal Socket +DefaultDependencies=no +Before=sockets.target syslog.target + +# Mount and swap units need this. If this socket unit is removed by an +# isolate request the mount and and swap units would be removed too, +# hence let's exclude this from isolate requests. +IgnoreOnIsolate=yes + +[Socket] +ListenStream=/run/systemd/stdout +ListenDatagram=/run/systemd/native +ListenDatagram=/dev/log +SocketMode=0666 -- cgit v1.2.3-54-g00ecf From 8b18eb674ce4d14e4819e102a0d6679a0fd2e6ce Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 30 Dec 2011 19:05:43 +0100 Subject: journald: forward all syslog messages to syslogd --- src/journal/journald.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++- units/syslog.socket | 2 +- 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/src/journal/journald.c b/src/journal/journald.c index 52cdc7058c..b290b5d2c0 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -53,6 +53,8 @@ #define RECHECK_VAR_AVAILABLE_USEC (30*USEC_PER_SEC) +#define SYSLOG_TIMEOUT_USEC (5*USEC_PER_SEC) + typedef struct StdoutStream StdoutStream; typedef struct Server { @@ -1276,6 +1278,69 @@ finish: return r; } +static void forward_syslog(Server *s, const void *buffer, size_t length, struct ucred *ucred, struct timeval *tv) { + struct msghdr msghdr; + struct iovec iovec; + struct cmsghdr *cmsg; + union { + struct cmsghdr cmsghdr; + uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) + + CMSG_SPACE(sizeof(struct timeval))]; + } control; + union sockaddr_union sa; + + assert(s); + + zero(msghdr); + + zero(iovec); + iovec.iov_base = (void*) buffer; + iovec.iov_len = length; + msghdr.msg_iov = &iovec; + msghdr.msg_iovlen = 1; + + zero(sa); + sa.un.sun_family = AF_UNIX; + strncpy(sa.un.sun_path, "/run/systemd/syslog", sizeof(sa.un.sun_path)); + msghdr.msg_name = &sa; + msghdr.msg_namelen = offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path); + + zero(control); + msghdr.msg_control = &control; + msghdr.msg_controllen = sizeof(control); + + cmsg = CMSG_FIRSTHDR(&msghdr); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_CREDENTIALS; + cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred)); + memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred)); + msghdr.msg_controllen = cmsg->cmsg_len; + + /* Forward the syslog message we received via /dev/log to + * /run/systemd/syslog. Unfortunately we currently can't set + * the SO_TIMESTAMP auxiliary data, and hence we don't. */ + + if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0) + return; + + if (errno == ESRCH) { + struct ucred u; + + /* Hmm, presumably the sender process vanished + * by now, so let's fix it as good as we + * can, and retry */ + + u = *ucred; + u.pid = getpid(); + memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred)); + + if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0) + return; + } + + log_debug("Failed to forward syslog message: %m"); +} + static int process_event(Server *s, struct epoll_event *ev) { assert(s); @@ -1396,6 +1461,7 @@ static int process_event(Server *s, struct epoll_event *ev) { else s->buffer[n] = 0; + forward_syslog(s, s->buffer, n, ucred, tv); process_syslog_message(s, strstrip(s->buffer), ucred, tv); } else process_native_message(s, s->buffer, n, ucred, tv); @@ -1444,6 +1510,7 @@ static int open_syslog_socket(Server *s) { union sockaddr_union sa; int one, r; struct epoll_event ev; + struct timeval tv; assert(s); @@ -1457,7 +1524,7 @@ static int open_syslog_socket(Server *s) { zero(sa); sa.un.sun_family = AF_UNIX; - strncpy(sa.un.sun_path, "/run/systemd/syslog", sizeof(sa.un.sun_path)); + strncpy(sa.un.sun_path, "/dev/log", sizeof(sa.un.sun_path)); unlink(sa.un.sun_path); @@ -1484,6 +1551,15 @@ static int open_syslog_socket(Server *s) { return -errno; } + /* Since we use the same socket for forwarding this to some + * other syslog implementation, make sure we don't hang + * forever */ + timeval_store(&tv, SYSLOG_TIMEOUT_USEC); + if (setsockopt(s->syslog_fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)) < 0) { + log_error("SO_SNDTIMEO failed: %m"); + return -errno; + } + zero(ev); ev.events = EPOLLIN; ev.data.fd = s->syslog_fd; diff --git a/units/syslog.socket b/units/syslog.socket index 500bb7c314..ca3d95ea35 100644 --- a/units/syslog.socket +++ b/units/syslog.socket @@ -16,7 +16,7 @@ Before=sockets.target syslog.target Wants=syslog.target [Socket] -ListenDatagram=/dev/log +ListenDatagram=/run/systemd/syslog SocketMode=0666 # The service we activate on incoming traffic is -- cgit v1.2.3-54-g00ecf From 783d2675eff73d1937bf8f78b368b1004c2d28c5 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 30 Dec 2011 22:15:58 +0100 Subject: journal: fix a few bad memory accesses and leaks --- src/journal/journal-rate-limit.c | 2 ++ src/journal/journald.c | 21 +++++++++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/journal/journal-rate-limit.c b/src/journal/journal-rate-limit.c index f69ab2770f..243ff2a378 100644 --- a/src/journal/journal-rate-limit.c +++ b/src/journal/journal-rate-limit.c @@ -111,6 +111,8 @@ void journal_rate_limit_free(JournalRateLimit *r) { while (r->lru) journal_rate_limit_group_free(r->lru); + + free(r); } static bool journal_rate_limit_group_expired(JournalRateLimitGroup *g, usec_t ts) { diff --git a/src/journal/journald.c b/src/journal/journald.c index b290b5d2c0..8d6b3ab438 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -378,11 +378,22 @@ static char *shortened_cgroup_path(pid_t pid) { if (streq(init_path, "/")) init_path[0] = 0; - if (startswith(process_path, init_path)) - path = process_path + strlen(init_path); - else + if (startswith(process_path, init_path)) { + char *p; + + p = strdup(process_path + strlen(init_path)); + if (!p) { + free(process_path); + free(init_path); + return NULL; + } + path = p; + } else { path = process_path; + process_path = NULL; + } + free(process_path); free(init_path); return path; @@ -544,7 +555,7 @@ static void dispatch_message(Server *s, struct timeval *tv, int priority) { int rl; - char *path, *c; + char *path = NULL, *c; assert(s); assert(iovec || n == 0); @@ -1828,6 +1839,8 @@ static void server_done(Server *s) { if (s->rate_limit) journal_rate_limit_free(s->rate_limit); + + free(s->buffer); } int main(int argc, char *argv[]) { -- cgit v1.2.3-54-g00ecf From b3a0ad5ab142a142ab526aeb3d0b69e98e4e523c Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 30 Dec 2011 22:29:23 +0100 Subject: journal: disable default debug logging --- src/journal/journalctl.c | 3 --- src/journal/journald.c | 1 - 2 files changed, 4 deletions(-) diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index da4f51021d..701518244c 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -444,9 +444,6 @@ int main(int argc, char *argv[]) { sd_journal *j = NULL; unsigned line = 0; - log_set_max_level(LOG_DEBUG); - log_set_target(LOG_TARGET_CONSOLE); - log_parse_environment(); log_open(); diff --git a/src/journal/journald.c b/src/journal/journald.c index 8d6b3ab438..a25f81ef66 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -1858,7 +1858,6 @@ int main(int argc, char *argv[]) { } log_set_target(LOG_TARGET_CONSOLE); - log_set_max_level(LOG_DEBUG); log_parse_environment(); log_open(); -- cgit v1.2.3-54-g00ecf From 74ef2d16ada74db3059d825ce8d24ea74946bf8f Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sat, 31 Dec 2011 00:57:14 +0100 Subject: journal: move max_use into metrics structure --- src/journal/journal-file.h | 3 ++- src/journal/journald.c | 9 ++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h index ab2970ca00..acc55272a0 100644 --- a/src/journal/journal-file.h +++ b/src/journal/journal-file.h @@ -28,7 +28,7 @@ #include "util.h" #include "sd-id128.h" -#define DEFAULT_MAX_SIZE (1024ULL*128ULL) +#define DEFAULT_MAX_SIZE (128ULL*1024ULL*1024ULL) #define DEFAULT_MIN_SIZE (256ULL*1024ULL) #define DEFAULT_KEEP_FREE (1ULL*1024ULL*1024ULL) #define DEFAULT_MAX_USE (16ULL*1024ULL*1024ULL*16ULL) @@ -54,6 +54,7 @@ typedef struct JournalMetrics { uint64_t max_size; uint64_t min_size; uint64_t keep_free; + uint64_t max_use; } JournalMetrics; typedef struct JournalFile { diff --git a/src/journal/journald.c b/src/journal/journald.c index a25f81ef66..78ccb4e05a 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -76,7 +76,6 @@ typedef struct Server { JournalRateLimit *rate_limit; JournalMetrics metrics; - uint64_t max_use; bool compress; uint64_t cached_available_space; @@ -176,7 +175,7 @@ static uint64_t available_space(Server *s) { sum += (uint64_t) st.st_blocks * (uint64_t) st.st_blksize; } - avail = sum >= s->max_use ? 0 : s->max_use - sum; + avail = sum >= s->metrics.max_use ? 0 : s->metrics.max_use - sum; ss_avail = ss.f_bsize * ss.f_bavail; @@ -341,7 +340,7 @@ static void server_vacuum(Server *s) { return; } - r = journal_directory_vacuum(p, s->max_use, s->metrics.keep_free); + r = journal_directory_vacuum(p, s->metrics.max_use, s->metrics.keep_free); if (r < 0 && r != -ENOENT) log_error("Failed to vacuum %s: %s", p, strerror(-r)); free(p); @@ -351,7 +350,7 @@ static void server_vacuum(Server *s) { return; } - r = journal_directory_vacuum(p, s->max_use, s->metrics.keep_free); + r = journal_directory_vacuum(p, s->metrics.max_use, s->metrics.keep_free); if (r < 0 && r != -ENOENT) log_error("Failed to vacuum %s: %s", p, strerror(-r)); free(p); @@ -1721,7 +1720,7 @@ static int server_init(Server *s) { s->metrics.max_size = DEFAULT_MAX_SIZE; s->metrics.min_size = DEFAULT_MIN_SIZE; s->metrics.keep_free = DEFAULT_KEEP_FREE; - s->max_use = DEFAULT_MAX_USE; + s->metrics.max_use = DEFAULT_MAX_USE; s->compress = true; s->user_journals = hashmap_new(trivial_hash_func, trivial_compare_func); -- cgit v1.2.3-54-g00ecf