summaryrefslogtreecommitdiff
path: root/src/grp-journal/libjournal-core/src
diff options
context:
space:
mode:
Diffstat (limited to 'src/grp-journal/libjournal-core/src')
l---------src/grp-journal/libjournal-core/src/GNUmakefile1
-rw-r--r--src/grp-journal/libjournal-core/src/Makefile56
-rw-r--r--src/grp-journal/libjournal-core/src/journald-audit.c564
-rw-r--r--src/grp-journal/libjournal-core/src/journald-console.c120
-rw-r--r--src/grp-journal/libjournal-core/src/journald-gperf.gperf47
-rw-r--r--src/grp-journal/libjournal-core/src/journald-kmsg.c473
-rw-r--r--src/grp-journal/libjournal-core/src/journald-native.c501
-rw-r--r--src/grp-journal/libjournal-core/src/journald-rate-limit.c271
-rw-r--r--src/grp-journal/libjournal-core/src/journald-server.c2162
-rw-r--r--src/grp-journal/libjournal-core/src/journald-stream.c788
-rw-r--r--src/grp-journal/libjournal-core/src/journald-syslog.c454
-rw-r--r--src/grp-journal/libjournal-core/src/journald-wall.c71
12 files changed, 5508 insertions, 0 deletions
diff --git a/src/grp-journal/libjournal-core/src/GNUmakefile b/src/grp-journal/libjournal-core/src/GNUmakefile
new file mode 120000
index 0000000000..13308a50cd
--- /dev/null
+++ b/src/grp-journal/libjournal-core/src/GNUmakefile
@@ -0,0 +1 @@
+../../../../GNUmakefile \ No newline at end of file
diff --git a/src/grp-journal/libjournal-core/src/Makefile b/src/grp-journal/libjournal-core/src/Makefile
new file mode 100644
index 0000000000..6ea0446e27
--- /dev/null
+++ b/src/grp-journal/libjournal-core/src/Makefile
@@ -0,0 +1,56 @@
+# -*- Mode: makefile; indent-tabs-mode: t -*-
+#
+# This file is part of systemd.
+#
+# Copyright 2010-2012 Lennart Poettering
+# Copyright 2010-2012 Kay Sievers
+# Copyright 2013 Zbigniew Jędrzejewski-Szmek
+# Copyright 2013 David Strauss
+# Copyright 2016 Luke Shumaker
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# systemd is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with systemd; If not, see <http://www.gnu.org/licenses/>.
+include $(dir $(lastword $(MAKEFILE_LIST)))/../../../../config.mk
+include $(topsrcdir)/build-aux/Makefile.head.mk
+
+libjournal_core_la_SOURCES = \
+ src/journal/journald-kmsg.c \
+ src/journal/journald-kmsg.h \
+ src/journal/journald-syslog.c \
+ src/journal/journald-syslog.h \
+ src/journal/journald-stream.c \
+ src/journal/journald-stream.h \
+ src/journal/journald-server.c \
+ src/journal/journald-server.h \
+ src/journal/journald-console.c \
+ src/journal/journald-console.h \
+ src/journal/journald-wall.c \
+ src/journal/journald-wall.h \
+ src/journal/journald-native.c \
+ src/journal/journald-native.h \
+ src/journal/journald-audit.c \
+ src/journal/journald-audit.h \
+ src/journal/journald-rate-limit.c \
+ src/journal/journald-rate-limit.h \
+ src/journal/journal-internal.h
+
+nodist_libjournal_core_la_SOURCES = \
+ src/journal/journald-gperf.c
+
+libjournal_core_la_LIBADD = \
+ libsystemd-shared.la
+
+noinst_LTLIBRARIES += \
+ libjournal-core.la
+
+include $(topsrcdir)/build-aux/Makefile.tail.mk
diff --git a/src/grp-journal/libjournal-core/src/journald-audit.c b/src/grp-journal/libjournal-core/src/journald-audit.c
new file mode 100644
index 0000000000..65f925fdc4
--- /dev/null
+++ b/src/grp-journal/libjournal-core/src/journald-audit.c
@@ -0,0 +1,564 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2014 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "journal-core/journald-audit.h"
+#include "sd-journal/audit-type.h"
+#include "systemd-basic/alloc-util.h"
+#include "systemd-basic/fd-util.h"
+#include "systemd-basic/hexdecoct.h"
+#include "systemd-basic/io-util.h"
+#include "systemd-basic/missing.h"
+#include "systemd-basic/string-util.h"
+
+typedef struct MapField {
+ const char *audit_field;
+ const char *journal_field;
+ int (*map)(const char *field, const char **p, struct iovec **iov, size_t *n_iov_allocated, unsigned *n_iov);
+} MapField;
+
+static int map_simple_field(const char *field, const char **p, struct iovec **iov, size_t *n_iov_allocated, unsigned *n_iov) {
+ _cleanup_free_ char *c = NULL;
+ size_t l = 0, allocated = 0;
+ const char *e;
+
+ assert(field);
+ assert(p);
+ assert(iov);
+ assert(n_iov);
+
+ l = strlen(field);
+ allocated = l + 1;
+ c = malloc(allocated);
+ if (!c)
+ return -ENOMEM;
+
+ memcpy(c, field, l);
+ for (e = *p; *e != ' ' && *e != 0; e++) {
+ if (!GREEDY_REALLOC(c, allocated, l+2))
+ return -ENOMEM;
+
+ c[l++] = *e;
+ }
+
+ c[l] = 0;
+
+ if (!GREEDY_REALLOC(*iov, *n_iov_allocated, *n_iov + 1))
+ return -ENOMEM;
+
+ (*iov)[*n_iov].iov_base = c;
+ (*iov)[*n_iov].iov_len = l;
+ (*n_iov)++;
+
+ *p = e;
+ c = NULL;
+
+ return 1;
+}
+
+static int map_string_field_internal(const char *field, const char **p, struct iovec **iov, size_t *n_iov_allocated, unsigned *n_iov, bool filter_printable) {
+ _cleanup_free_ char *c = NULL;
+ const char *s, *e;
+ size_t l;
+
+ assert(field);
+ assert(p);
+ assert(iov);
+ assert(n_iov);
+
+ /* The kernel formats string fields in one of two formats. */
+
+ if (**p == '"') {
+ /* Normal quoted syntax */
+ s = *p + 1;
+ e = strchr(s, '"');
+ if (!e)
+ return 0;
+
+ l = strlen(field) + (e - s);
+ c = malloc(l+1);
+ if (!c)
+ return -ENOMEM;
+
+ *((char*) mempcpy(stpcpy(c, field), s, e - s)) = 0;
+
+ e += 1;
+
+ } else if (unhexchar(**p) >= 0) {
+ /* Hexadecimal escaping */
+ size_t allocated = 0;
+
+ l = strlen(field);
+ allocated = l + 2;
+ c = malloc(allocated);
+ if (!c)
+ return -ENOMEM;
+
+ memcpy(c, field, l);
+ for (e = *p; *e != ' ' && *e != 0; e += 2) {
+ int a, b;
+ uint8_t x;
+
+ a = unhexchar(e[0]);
+ if (a < 0)
+ return 0;
+
+ b = unhexchar(e[1]);
+ if (b < 0)
+ return 0;
+
+ x = ((uint8_t) a << 4 | (uint8_t) b);
+
+ if (filter_printable && x < (uint8_t) ' ')
+ x = (uint8_t) ' ';
+
+ if (!GREEDY_REALLOC(c, allocated, l+2))
+ return -ENOMEM;
+
+ c[l++] = (char) x;
+ }
+
+ c[l] = 0;
+ } else
+ return 0;
+
+ if (!GREEDY_REALLOC(*iov, *n_iov_allocated, *n_iov + 1))
+ return -ENOMEM;
+
+ (*iov)[*n_iov].iov_base = c;
+ (*iov)[*n_iov].iov_len = l;
+ (*n_iov)++;
+
+ *p = e;
+ c = NULL;
+
+ return 1;
+}
+
+static int map_string_field(const char *field, const char **p, struct iovec **iov, size_t *n_iov_allocated, unsigned *n_iov) {
+ return map_string_field_internal(field, p, iov, n_iov_allocated, n_iov, false);
+}
+
+static int map_string_field_printable(const char *field, const char **p, struct iovec **iov, size_t *n_iov_allocated, unsigned *n_iov) {
+ return map_string_field_internal(field, p, iov, n_iov_allocated, n_iov, true);
+}
+
+static int map_generic_field(const char *prefix, const char **p, struct iovec **iov, size_t *n_iov_allocated, unsigned *n_iov) {
+ const char *e, *f;
+ char *c, *t;
+ int r;
+
+ /* Implements fallback mappings for all fields we don't know */
+
+ for (e = *p; e < *p + 16; e++) {
+
+ if (*e == 0 || *e == ' ')
+ return 0;
+
+ if (*e == '=')
+ break;
+
+ if (!((*e >= 'a' && *e <= 'z') ||
+ (*e >= 'A' && *e <= 'Z') ||
+ (*e >= '0' && *e <= '9') ||
+ *e == '_' || *e == '-'))
+ return 0;
+ }
+
+ if (e <= *p || e >= *p + 16)
+ return 0;
+
+ c = alloca(strlen(prefix) + (e - *p) + 2);
+
+ t = stpcpy(c, prefix);
+ for (f = *p; f < e; f++) {
+ char x;
+
+ if (*f >= 'a' && *f <= 'z')
+ x = (*f - 'a') + 'A'; /* uppercase */
+ else if (*f == '-')
+ x = '_'; /* dashes → underscores */
+ else
+ x = *f;
+
+ *(t++) = x;
+ }
+ strcpy(t, "=");
+
+ e++;
+
+ r = map_simple_field(c, &e, iov, n_iov_allocated, n_iov);
+ if (r < 0)
+ return r;
+
+ *p = e;
+ return r;
+}
+
+/* Kernel fields are those occurring in the audit string before
+ * msg='. All of these fields are trusted, hence carry the "_" prefix.
+ * We try to translate the fields we know into our native names. The
+ * other's are generically mapped to _AUDIT_FIELD_XYZ= */
+static const MapField map_fields_kernel[] = {
+
+ /* First, we map certain well-known audit fields into native
+ * well-known fields */
+ { "pid=", "_PID=", map_simple_field },
+ { "ppid=", "_PPID=", map_simple_field },
+ { "uid=", "_UID=", map_simple_field },
+ { "euid=", "_EUID=", map_simple_field },
+ { "fsuid=", "_FSUID=", map_simple_field },
+ { "gid=", "_GID=", map_simple_field },
+ { "egid=", "_EGID=", map_simple_field },
+ { "fsgid=", "_FSGID=", map_simple_field },
+ { "tty=", "_TTY=", map_simple_field },
+ { "ses=", "_AUDIT_SESSION=", map_simple_field },
+ { "auid=", "_AUDIT_LOGINUID=", map_simple_field },
+ { "subj=", "_SELINUX_CONTEXT=", map_simple_field },
+ { "comm=", "_COMM=", map_string_field },
+ { "exe=", "_EXE=", map_string_field },
+ { "proctitle=", "_CMDLINE=", map_string_field_printable },
+
+ /* Some fields don't map to native well-known fields. However,
+ * we know that they are string fields, hence let's undo
+ * string field escaping for them, though we stick to the
+ * generic field names. */
+ { "path=", "_AUDIT_FIELD_PATH=", map_string_field },
+ { "dev=", "_AUDIT_FIELD_DEV=", map_string_field },
+ { "name=", "_AUDIT_FIELD_NAME=", map_string_field },
+ {}
+};
+
+/* Userspace fields are those occurring in the audit string after
+ * msg='. All of these fields are untrusted, hence carry no "_"
+ * prefix. We map the fields we don't know to AUDIT_FIELD_XYZ= */
+static const MapField map_fields_userspace[] = {
+ { "cwd=", "AUDIT_FIELD_CWD=", map_string_field },
+ { "cmd=", "AUDIT_FIELD_CMD=", map_string_field },
+ { "acct=", "AUDIT_FIELD_ACCT=", map_string_field },
+ { "exe=", "AUDIT_FIELD_EXE=", map_string_field },
+ { "comm=", "AUDIT_FIELD_COMM=", map_string_field },
+ {}
+};
+
+static int map_all_fields(
+ const char *p,
+ const MapField map_fields[],
+ const char *prefix,
+ bool handle_msg,
+ struct iovec **iov,
+ size_t *n_iov_allocated,
+ unsigned *n_iov) {
+
+ int r;
+
+ assert(p);
+ assert(iov);
+ assert(n_iov_allocated);
+ assert(n_iov);
+
+ for (;;) {
+ bool mapped = false;
+ const MapField *m;
+ const char *v;
+
+ p += strspn(p, WHITESPACE);
+
+ if (*p == 0)
+ return 0;
+
+ if (handle_msg) {
+ v = startswith(p, "msg='");
+ if (v) {
+ const char *e;
+ char *c;
+
+ /* Userspace message. It's enclosed in
+ simple quotation marks, is not
+ escaped, but the last field in the
+ line, hence let's remove the
+ quotation mark, and apply the
+ userspace mapping instead of the
+ kernel mapping. */
+
+ e = endswith(v, "'");
+ if (!e)
+ return 0; /* don't continue splitting up if the final quotation mark is missing */
+
+ c = strndupa(v, e - v);
+ return map_all_fields(c, map_fields_userspace, "AUDIT_FIELD_", false, iov, n_iov_allocated, n_iov);
+ }
+ }
+
+ /* Try to map the kernel fields to our own names */
+ for (m = map_fields; m->audit_field; m++) {
+ v = startswith(p, m->audit_field);
+ if (!v)
+ continue;
+
+ r = m->map(m->journal_field, &v, iov, n_iov_allocated, n_iov);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse audit array: %m");
+
+ if (r > 0) {
+ mapped = true;
+ p = v;
+ break;
+ }
+ }
+
+ if (!mapped) {
+ r = map_generic_field(prefix, &p, iov, n_iov_allocated, n_iov);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse audit array: %m");
+
+ if (r == 0)
+ /* Couldn't process as generic field, let's just skip over it */
+ p += strcspn(p, WHITESPACE);
+ }
+ }
+}
+
+static void process_audit_string(Server *s, int type, const char *data, size_t size) {
+ _cleanup_free_ struct iovec *iov = NULL;
+ size_t n_iov_allocated = 0;
+ unsigned n_iov = 0, k;
+ uint64_t seconds, msec, id;
+ const char *p, *type_name;
+ unsigned z;
+ char id_field[sizeof("_AUDIT_ID=") + DECIMAL_STR_MAX(uint64_t)],
+ type_field[sizeof("_AUDIT_TYPE=") + DECIMAL_STR_MAX(int)],
+ source_time_field[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
+ char *m;
+
+ assert(s);
+
+ if (size <= 0)
+ return;
+
+ if (!data)
+ return;
+
+ /* Note that the input buffer is NUL terminated, but let's
+ * check whether there is a spurious NUL byte */
+ if (memchr(data, 0, size))
+ return;
+
+ p = startswith(data, "audit");
+ if (!p)
+ return;
+
+ if (sscanf(p, "(%" PRIu64 ".%" PRIu64 ":%" PRIu64 "):%n",
+ &seconds,
+ &msec,
+ &id,
+ &k) != 3)
+ return;
+
+ p += k;
+ p += strspn(p, WHITESPACE);
+
+ if (isempty(p))
+ return;
+
+ n_iov_allocated = N_IOVEC_META_FIELDS + 7;
+ iov = new(struct iovec, n_iov_allocated);
+ if (!iov) {
+ log_oom();
+ return;
+ }
+
+ IOVEC_SET_STRING(iov[n_iov++], "_TRANSPORT=audit");
+
+ sprintf(source_time_field, "_SOURCE_REALTIME_TIMESTAMP=%" PRIu64,
+ (usec_t) seconds * USEC_PER_SEC + (usec_t) msec * USEC_PER_MSEC);
+ IOVEC_SET_STRING(iov[n_iov++], source_time_field);
+
+ sprintf(type_field, "_AUDIT_TYPE=%i", type);
+ IOVEC_SET_STRING(iov[n_iov++], type_field);
+
+ sprintf(id_field, "_AUDIT_ID=%" PRIu64, id);
+ IOVEC_SET_STRING(iov[n_iov++], id_field);
+
+ assert_cc(4 == LOG_FAC(LOG_AUTH));
+ IOVEC_SET_STRING(iov[n_iov++], "SYSLOG_FACILITY=4");
+ IOVEC_SET_STRING(iov[n_iov++], "SYSLOG_IDENTIFIER=audit");
+
+ type_name = audit_type_name_alloca(type);
+
+ m = strjoina("MESSAGE=", type_name, " ", p);
+ IOVEC_SET_STRING(iov[n_iov++], m);
+
+ z = n_iov;
+
+ map_all_fields(p, map_fields_kernel, "_AUDIT_FIELD_", true, &iov, &n_iov_allocated, &n_iov);
+
+ if (!GREEDY_REALLOC(iov, n_iov_allocated, n_iov + N_IOVEC_META_FIELDS)) {
+ log_oom();
+ goto finish;
+ }
+
+ server_dispatch_message(s, iov, n_iov, n_iov_allocated, NULL, NULL, NULL, 0, NULL, LOG_NOTICE, 0);
+
+finish:
+ /* free() all entries that map_all_fields() added. All others
+ * are allocated on the stack or are constant. */
+
+ for (; z < n_iov; z++)
+ free(iov[z].iov_base);
+}
+
+void server_process_audit_message(
+ Server *s,
+ const void *buffer,
+ size_t buffer_size,
+ const struct ucred *ucred,
+ const union sockaddr_union *sa,
+ socklen_t salen) {
+
+ const struct nlmsghdr *nl = buffer;
+
+ assert(s);
+
+ if (buffer_size < ALIGN(sizeof(struct nlmsghdr)))
+ return;
+
+ assert(buffer);
+
+ /* Filter out fake data */
+ if (!sa ||
+ salen != sizeof(struct sockaddr_nl) ||
+ sa->nl.nl_family != AF_NETLINK ||
+ sa->nl.nl_pid != 0) {
+ log_debug("Audit netlink message from invalid sender.");
+ return;
+ }
+
+ if (!ucred || ucred->pid != 0) {
+ log_debug("Audit netlink message with invalid credentials.");
+ return;
+ }
+
+ if (!NLMSG_OK(nl, buffer_size)) {
+ log_error("Audit netlink message truncated.");
+ return;
+ }
+
+ /* Ignore special Netlink messages */
+ if (IN_SET(nl->nlmsg_type, NLMSG_NOOP, NLMSG_ERROR))
+ return;
+
+ /* Below AUDIT_FIRST_USER_MSG theer are only control messages, let's ignore those */
+ if (nl->nlmsg_type < AUDIT_FIRST_USER_MSG)
+ return;
+
+ process_audit_string(s, nl->nlmsg_type, NLMSG_DATA(nl), nl->nlmsg_len - ALIGN(sizeof(struct nlmsghdr)));
+}
+
+static int enable_audit(int fd, bool b) {
+ struct {
+ union {
+ struct nlmsghdr header;
+ uint8_t header_space[NLMSG_HDRLEN];
+ };
+ struct audit_status body;
+ } _packed_ request = {
+ .header.nlmsg_len = NLMSG_LENGTH(sizeof(struct audit_status)),
+ .header.nlmsg_type = AUDIT_SET,
+ .header.nlmsg_flags = NLM_F_REQUEST,
+ .header.nlmsg_seq = 1,
+ .header.nlmsg_pid = 0,
+ .body.mask = AUDIT_STATUS_ENABLED,
+ .body.enabled = b,
+ };
+ union sockaddr_union sa = {
+ .nl.nl_family = AF_NETLINK,
+ .nl.nl_pid = 0,
+ };
+ struct iovec iovec = {
+ .iov_base = &request,
+ .iov_len = NLMSG_LENGTH(sizeof(struct audit_status)),
+ };
+ struct msghdr mh = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_name = &sa.sa,
+ .msg_namelen = sizeof(sa.nl),
+ };
+
+ ssize_t n;
+
+ n = sendmsg(fd, &mh, MSG_NOSIGNAL);
+ if (n < 0)
+ return -errno;
+ if (n != NLMSG_LENGTH(sizeof(struct audit_status)))
+ return -EIO;
+
+ /* We don't wait for the result here, we can't do anything
+ * about it anyway */
+
+ return 0;
+}
+
+int server_open_audit(Server *s) {
+ static const int one = 1;
+ int r;
+
+ if (s->audit_fd < 0) {
+ static const union sockaddr_union sa = {
+ .nl.nl_family = AF_NETLINK,
+ .nl.nl_pid = 0,
+ .nl.nl_groups = AUDIT_NLGRP_READLOG,
+ };
+
+ s->audit_fd = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_AUDIT);
+ if (s->audit_fd < 0) {
+ if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT)
+ log_debug("Audit not supported in the kernel.");
+ else
+ log_warning_errno(errno, "Failed to create audit socket, ignoring: %m");
+
+ return 0;
+ }
+
+ if (bind(s->audit_fd, &sa.sa, sizeof(sa.nl)) < 0) {
+ log_warning_errno(errno,
+ "Failed to join audit multicast group. "
+ "The kernel is probably too old or multicast reading is not supported. "
+ "Ignoring: %m");
+ s->audit_fd = safe_close(s->audit_fd);
+ return 0;
+ }
+ } else
+ fd_nonblock(s->audit_fd, 1);
+
+ r = setsockopt(s->audit_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
+ if (r < 0)
+ return log_error_errno(errno, "Failed to set SO_PASSCRED on audit socket: %m");
+
+ r = sd_event_add_io(s->event, &s->audit_event_source, s->audit_fd, EPOLLIN, server_process_datagram, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add audit fd to event loop: %m");
+
+ /* We are listening now, try to enable audit */
+ r = enable_audit(s->audit_fd, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to issue audit enable call: %m");
+
+ return 0;
+}
diff --git a/src/grp-journal/libjournal-core/src/journald-console.c b/src/grp-journal/libjournal-core/src/journald-console.c
new file mode 100644
index 0000000000..2dad27973e
--- /dev/null
+++ b/src/grp-journal/libjournal-core/src/journald-console.c
@@ -0,0 +1,120 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2011 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <time.h>
+
+#include "journal-core/journald-console.h"
+#include "journal-core/journald-server.h"
+#include "systemd-basic/alloc-util.h"
+#include "systemd-basic/fd-util.h"
+#include "systemd-basic/fileio.h"
+#include "systemd-basic/formats-util.h"
+#include "systemd-basic/io-util.h"
+#include "systemd-basic/parse-util.h"
+#include "systemd-basic/process-util.h"
+#include "systemd-basic/stdio-util.h"
+#include "systemd-basic/terminal-util.h"
+
+static bool prefix_timestamp(void) {
+
+ static int cached_printk_time = -1;
+
+ if (_unlikely_(cached_printk_time < 0)) {
+ _cleanup_free_ char *p = NULL;
+
+ cached_printk_time =
+ read_one_line_file("/sys/module/printk/parameters/time", &p) >= 0
+ && parse_boolean(p) > 0;
+ }
+
+ return cached_printk_time;
+}
+
+void server_forward_console(
+ Server *s,
+ int priority,
+ const char *identifier,
+ const char *message,
+ const struct ucred *ucred) {
+
+ struct iovec iovec[5];
+ struct timespec ts;
+ char tbuf[sizeof("[] ")-1 + DECIMAL_STR_MAX(ts.tv_sec) + DECIMAL_STR_MAX(ts.tv_nsec)-3 + 1];
+ char header_pid[sizeof("[]: ")-1 + DECIMAL_STR_MAX(pid_t)];
+ int n = 0, fd;
+ _cleanup_free_ char *ident_buf = NULL;
+ const char *tty;
+
+ assert(s);
+ assert(message);
+
+ if (LOG_PRI(priority) > s->max_level_console)
+ return;
+
+ /* First: timestamp */
+ if (prefix_timestamp()) {
+ assert_se(clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
+ xsprintf(tbuf, "[%5"PRI_TIME".%06ld] ",
+ ts.tv_sec,
+ ts.tv_nsec / 1000);
+ IOVEC_SET_STRING(iovec[n++], tbuf);
+ }
+
+ /* Second: identifier and PID */
+ if (ucred) {
+ if (!identifier) {
+ get_process_comm(ucred->pid, &ident_buf);
+ identifier = ident_buf;
+ }
+
+ xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
+
+ if (identifier)
+ IOVEC_SET_STRING(iovec[n++], identifier);
+
+ IOVEC_SET_STRING(iovec[n++], header_pid);
+ } else if (identifier) {
+ IOVEC_SET_STRING(iovec[n++], identifier);
+ IOVEC_SET_STRING(iovec[n++], ": ");
+ }
+
+ /* Fourth: message */
+ IOVEC_SET_STRING(iovec[n++], message);
+ IOVEC_SET_STRING(iovec[n++], "\n");
+
+ tty = s->tty_path ? s->tty_path : "/dev/console";
+
+ /* Before you ask: yes, on purpose we open/close the console for each log line we write individually. This is a
+ * good strategy to avoid journald getting killed by the kernel's SAK concept (it doesn't fix this entirely,
+ * but minimizes the time window the kernel might end up killing journald due to SAK). It also makes things
+ * easier for us so that we don't have to recover from hangups and suchlike triggered on the console. */
+
+ fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0) {
+ log_debug_errno(fd, "Failed to open %s for logging: %m", tty);
+ return;
+ }
+
+ if (writev(fd, iovec, n) < 0)
+ log_debug_errno(errno, "Failed to write to %s for logging: %m", tty);
+
+ safe_close(fd);
+}
diff --git a/src/grp-journal/libjournal-core/src/journald-gperf.gperf b/src/grp-journal/libjournal-core/src/journald-gperf.gperf
new file mode 100644
index 0000000000..b898668ad1
--- /dev/null
+++ b/src/grp-journal/libjournal-core/src/journald-gperf.gperf
@@ -0,0 +1,47 @@
+%{
+#include <stddef.h>
+#include <sys/socket.h>
+
+#include "journal-core/journald-server.h"
+#include "systemd-shared/conf-parser.h"
+%}
+struct ConfigPerfItem;
+%null_strings
+%language=ANSI-C
+%define slot-name section_and_lvalue
+%define hash-function-name journald_gperf_hash
+%define lookup-function-name journald_gperf_lookup
+%readonly-tables
+%omit-struct-type
+%struct-type
+%includes
+%%
+Journal.Storage, config_parse_storage, 0, offsetof(Server, storage)
+Journal.Compress, config_parse_bool, 0, offsetof(Server, compress)
+Journal.Seal, config_parse_bool, 0, offsetof(Server, seal)
+Journal.SyncIntervalSec, config_parse_sec, 0, offsetof(Server, sync_interval_usec)
+# The following is a legacy name for compatibility
+Journal.RateLimitInterval, config_parse_sec, 0, offsetof(Server, rate_limit_interval)
+Journal.RateLimitIntervalSec,config_parse_sec, 0, offsetof(Server, rate_limit_interval)
+Journal.RateLimitBurst, config_parse_unsigned, 0, offsetof(Server, rate_limit_burst)
+Journal.SystemMaxUse, config_parse_iec_uint64, 0, offsetof(Server, system_storage.metrics.max_use)
+Journal.SystemMaxFileSize, config_parse_iec_uint64, 0, offsetof(Server, system_storage.metrics.max_size)
+Journal.SystemKeepFree, config_parse_iec_uint64, 0, offsetof(Server, system_storage.metrics.keep_free)
+Journal.SystemMaxFiles, config_parse_uint64, 0, offsetof(Server, system_storage.metrics.n_max_files)
+Journal.RuntimeMaxUse, config_parse_iec_uint64, 0, offsetof(Server, runtime_storage.metrics.max_use)
+Journal.RuntimeMaxFileSize, config_parse_iec_uint64, 0, offsetof(Server, runtime_storage.metrics.max_size)
+Journal.RuntimeKeepFree, config_parse_iec_uint64, 0, offsetof(Server, runtime_storage.metrics.keep_free)
+Journal.RuntimeMaxFiles, config_parse_uint64, 0, offsetof(Server, runtime_storage.metrics.n_max_files)
+Journal.MaxRetentionSec, config_parse_sec, 0, offsetof(Server, max_retention_usec)
+Journal.MaxFileSec, config_parse_sec, 0, offsetof(Server, max_file_usec)
+Journal.ForwardToSyslog, config_parse_bool, 0, offsetof(Server, forward_to_syslog)
+Journal.ForwardToKMsg, config_parse_bool, 0, offsetof(Server, forward_to_kmsg)
+Journal.ForwardToConsole, config_parse_bool, 0, offsetof(Server, forward_to_console)
+Journal.ForwardToWall, config_parse_bool, 0, offsetof(Server, forward_to_wall)
+Journal.TTYPath, config_parse_path, 0, offsetof(Server, tty_path)
+Journal.MaxLevelStore, config_parse_log_level, 0, offsetof(Server, max_level_store)
+Journal.MaxLevelSyslog, config_parse_log_level, 0, offsetof(Server, max_level_syslog)
+Journal.MaxLevelKMsg, config_parse_log_level, 0, offsetof(Server, max_level_kmsg)
+Journal.MaxLevelConsole, config_parse_log_level, 0, offsetof(Server, max_level_console)
+Journal.MaxLevelWall, config_parse_log_level, 0, offsetof(Server, max_level_wall)
+Journal.SplitMode, config_parse_split_mode, 0, offsetof(Server, split_mode)
diff --git a/src/grp-journal/libjournal-core/src/journald-kmsg.c b/src/grp-journal/libjournal-core/src/journald-kmsg.c
new file mode 100644
index 0000000000..598c2d6c80
--- /dev/null
+++ b/src/grp-journal/libjournal-core/src/journald-kmsg.c
@@ -0,0 +1,473 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2011 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <fcntl.h>
+#include <sys/epoll.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include <libudev.h>
+#include <systemd/sd-messages.h>
+
+#include "journal-core/journald-kmsg.h"
+#include "journal-core/journald-server.h"
+#include "journal-core/journald-syslog.h"
+#include "systemd-basic/escape.h"
+#include "systemd-basic/fd-util.h"
+#include "systemd-basic/formats-util.h"
+#include "systemd-basic/io-util.h"
+#include "systemd-basic/parse-util.h"
+#include "systemd-basic/process-util.h"
+#include "systemd-basic/stdio-util.h"
+#include "systemd-basic/string-util.h"
+
+void server_forward_kmsg(
+ Server *s,
+ int priority,
+ const char *identifier,
+ const char *message,
+ const struct ucred *ucred) {
+
+ struct iovec iovec[5];
+ char header_priority[DECIMAL_STR_MAX(priority) + 3],
+ header_pid[sizeof("[]: ")-1 + DECIMAL_STR_MAX(pid_t) + 1];
+ int n = 0;
+ char *ident_buf = NULL;
+
+ assert(s);
+ assert(priority >= 0);
+ assert(priority <= 999);
+ assert(message);
+
+ if (_unlikely_(LOG_PRI(priority) > s->max_level_kmsg))
+ return;
+
+ if (_unlikely_(s->dev_kmsg_fd < 0))
+ return;
+
+ /* Never allow messages with kernel facility to be written to
+ * kmsg, regardless where the data comes from. */
+ priority = syslog_fixup_facility(priority);
+
+ /* First: priority field */
+ xsprintf(header_priority, "<%i>", priority);
+ IOVEC_SET_STRING(iovec[n++], header_priority);
+
+ /* Second: identifier and PID */
+ if (ucred) {
+ if (!identifier) {
+ get_process_comm(ucred->pid, &ident_buf);
+ identifier = ident_buf;
+ }
+
+ xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
+
+ if (identifier)
+ IOVEC_SET_STRING(iovec[n++], identifier);
+
+ IOVEC_SET_STRING(iovec[n++], header_pid);
+ } else if (identifier) {
+ IOVEC_SET_STRING(iovec[n++], identifier);
+ IOVEC_SET_STRING(iovec[n++], ": ");
+ }
+
+ /* Fourth: message */
+ IOVEC_SET_STRING(iovec[n++], message);
+ IOVEC_SET_STRING(iovec[n++], "\n");
+
+ if (writev(s->dev_kmsg_fd, iovec, n) < 0)
+ log_debug_errno(errno, "Failed to write to /dev/kmsg for logging: %m");
+
+ free(ident_buf);
+}
+
+static bool is_us(const char *pid) {
+ pid_t t;
+
+ assert(pid);
+
+ if (parse_pid(pid, &t) < 0)
+ return false;
+
+ return t == getpid();
+}
+
+static void dev_kmsg_record(Server *s, const char *p, size_t l) {
+ struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS + 2 + N_IOVEC_UDEV_FIELDS];
+ char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
+ int priority, r;
+ unsigned n = 0, z = 0, j;
+ unsigned long long usec;
+ char *identifier = NULL, *pid = NULL, *e, *f, *k;
+ uint64_t serial;
+ size_t pl;
+ char *kernel_device = NULL;
+
+ assert(s);
+ assert(p);
+
+ if (l <= 0)
+ return;
+
+ e = memchr(p, ',', l);
+ if (!e)
+ return;
+ *e = 0;
+
+ r = safe_atoi(p, &priority);
+ if (r < 0 || priority < 0 || priority > 999)
+ return;
+
+ if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
+ return;
+
+ l -= (e - p) + 1;
+ p = e + 1;
+ e = memchr(p, ',', l);
+ if (!e)
+ return;
+ *e = 0;
+
+ r = safe_atou64(p, &serial);
+ if (r < 0)
+ return;
+
+ if (s->kernel_seqnum) {
+ /* We already read this one? */
+ if (serial < *s->kernel_seqnum)
+ return;
+
+ /* Did we lose any? */
+ if (serial > *s->kernel_seqnum)
+ server_driver_message(s, SD_MESSAGE_JOURNAL_MISSED,
+ LOG_MESSAGE("Missed %"PRIu64" kernel messages",
+ serial - *s->kernel_seqnum),
+ NULL);
+
+ /* Make sure we never read this one again. Note that
+ * we always store the next message serial we expect
+ * here, simply because this makes handling the first
+ * message with serial 0 easy. */
+ *s->kernel_seqnum = serial + 1;
+ }
+
+ l -= (e - p) + 1;
+ p = e + 1;
+ f = memchr(p, ';', l);
+ if (!f)
+ return;
+ /* Kernel 3.6 has the flags field, kernel 3.5 lacks that */
+ e = memchr(p, ',', l);
+ if (!e || f < e)
+ e = f;
+ *e = 0;
+
+ r = safe_atollu(p, &usec);
+ if (r < 0)
+ return;
+
+ l -= (f - p) + 1;
+ p = f + 1;
+ e = memchr(p, '\n', l);
+ if (!e)
+ return;
+ *e = 0;
+
+ pl = e - p;
+ l -= (e - p) + 1;
+ k = e + 1;
+
+ for (j = 0; l > 0 && j < N_IOVEC_KERNEL_FIELDS; j++) {
+ char *m;
+ /* Metadata fields attached */
+
+ if (*k != ' ')
+ break;
+
+ k++, l--;
+
+ e = memchr(k, '\n', l);
+ if (!e)
+ return;
+
+ *e = 0;
+
+ if (cunescape_length_with_prefix(k, e - k, "_KERNEL_", UNESCAPE_RELAX, &m) < 0)
+ break;
+
+ if (startswith(m, "_KERNEL_DEVICE="))
+ kernel_device = m + 15;
+
+ IOVEC_SET_STRING(iovec[n++], m);
+ z++;
+
+ l -= (e - k) + 1;
+ k = e + 1;
+ }
+
+ if (kernel_device) {
+ struct udev_device *ud;
+
+ ud = udev_device_new_from_device_id(s->udev, kernel_device);
+ if (ud) {
+ const char *g;
+ struct udev_list_entry *ll;
+ char *b;
+
+ g = udev_device_get_devnode(ud);
+ if (g) {
+ b = strappend("_UDEV_DEVNODE=", g);
+ if (b) {
+ IOVEC_SET_STRING(iovec[n++], b);
+ z++;
+ }
+ }
+
+ g = udev_device_get_sysname(ud);
+ if (g) {
+ b = strappend("_UDEV_SYSNAME=", g);
+ if (b) {
+ IOVEC_SET_STRING(iovec[n++], b);
+ z++;
+ }
+ }
+
+ j = 0;
+ ll = udev_device_get_devlinks_list_entry(ud);
+ udev_list_entry_foreach(ll, ll) {
+
+ if (j > N_IOVEC_UDEV_FIELDS)
+ break;
+
+ g = udev_list_entry_get_name(ll);
+ if (g) {
+ b = strappend("_UDEV_DEVLINK=", g);
+ if (b) {
+ IOVEC_SET_STRING(iovec[n++], b);
+ z++;
+ }
+ }
+
+ j++;
+ }
+
+ udev_device_unref(ud);
+ }
+ }
+
+ if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu", usec) >= 0)
+ IOVEC_SET_STRING(iovec[n++], source_time);
+
+ IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
+
+ if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
+ IOVEC_SET_STRING(iovec[n++], syslog_priority);
+
+ if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
+ IOVEC_SET_STRING(iovec[n++], syslog_facility);
+
+ if ((priority & LOG_FACMASK) == LOG_KERN)
+ IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
+ else {
+ pl -= syslog_parse_identifier((const char**) &p, &identifier, &pid);
+
+ /* Avoid any messages we generated ourselves via
+ * log_info() and friends. */
+ if (pid && is_us(pid))
+ goto finish;
+
+ if (identifier) {
+ syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
+ if (syslog_identifier)
+ IOVEC_SET_STRING(iovec[n++], syslog_identifier);
+ }
+
+ if (pid) {
+ syslog_pid = strappend("SYSLOG_PID=", pid);
+ if (syslog_pid)
+ IOVEC_SET_STRING(iovec[n++], syslog_pid);
+ }
+ }
+
+ if (cunescape_length_with_prefix(p, pl, "MESSAGE=", UNESCAPE_RELAX, &message) >= 0)
+ IOVEC_SET_STRING(iovec[n++], message);
+
+ server_dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, NULL, priority, 0);
+
+finish:
+ for (j = 0; j < z; j++)
+ free(iovec[j].iov_base);
+
+ free(message);
+ free(syslog_priority);
+ free(syslog_identifier);
+ free(syslog_pid);
+ free(syslog_facility);
+ free(source_time);
+ free(identifier);
+ free(pid);
+}
+
+static int server_read_dev_kmsg(Server *s) {
+ char buffer[8192+1]; /* the kernel-side limit per record is 8K currently */
+ ssize_t l;
+
+ assert(s);
+ assert(s->dev_kmsg_fd >= 0);
+
+ l = read(s->dev_kmsg_fd, buffer, sizeof(buffer) - 1);
+ if (l == 0)
+ return 0;
+ if (l < 0) {
+ /* Old kernels who don't allow reading from /dev/kmsg
+ * return EINVAL when we try. So handle this cleanly,
+ * but don' try to ever read from it again. */
+ if (errno == EINVAL) {
+ s->dev_kmsg_event_source = sd_event_source_unref(s->dev_kmsg_event_source);
+ return 0;
+ }
+
+ if (errno == EAGAIN || errno == EINTR || errno == EPIPE)
+ return 0;
+
+ return log_error_errno(errno, "Failed to read from kernel: %m");
+ }
+
+ dev_kmsg_record(s, buffer, l);
+ return 1;
+}
+
+int server_flush_dev_kmsg(Server *s) {
+ int r;
+
+ assert(s);
+
+ if (s->dev_kmsg_fd < 0)
+ return 0;
+
+ if (!s->dev_kmsg_readable)
+ return 0;
+
+ log_debug("Flushing /dev/kmsg...");
+
+ for (;;) {
+ r = server_read_dev_kmsg(s);
+ if (r < 0)
+ return r;
+
+ if (r == 0)
+ break;
+ }
+
+ return 0;
+}
+
+static int dispatch_dev_kmsg(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
+ Server *s = userdata;
+
+ assert(es);
+ assert(fd == s->dev_kmsg_fd);
+ assert(s);
+
+ if (revents & EPOLLERR)
+ log_warning("/dev/kmsg buffer overrun, some messages lost.");
+
+ if (!(revents & EPOLLIN))
+ log_error("Got invalid event from epoll for /dev/kmsg: %"PRIx32, revents);
+
+ return server_read_dev_kmsg(s);
+}
+
+int server_open_dev_kmsg(Server *s) {
+ int r;
+
+ assert(s);
+
+ s->dev_kmsg_fd = open("/dev/kmsg", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (s->dev_kmsg_fd < 0) {
+ log_full(errno == ENOENT ? LOG_DEBUG : LOG_WARNING,
+ "Failed to open /dev/kmsg, ignoring: %m");
+ return 0;
+ }
+
+ r = sd_event_add_io(s->event, &s->dev_kmsg_event_source, s->dev_kmsg_fd, EPOLLIN, dispatch_dev_kmsg, s);
+ if (r < 0) {
+
+ /* This will fail with EPERM on older kernels where
+ * /dev/kmsg is not readable. */
+ if (r == -EPERM) {
+ r = 0;
+ goto fail;
+ }
+
+ log_error_errno(r, "Failed to add /dev/kmsg fd to event loop: %m");
+ goto fail;
+ }
+
+ r = sd_event_source_set_priority(s->dev_kmsg_event_source, SD_EVENT_PRIORITY_IMPORTANT+10);
+ if (r < 0) {
+ log_error_errno(r, "Failed to adjust priority of kmsg event source: %m");
+ goto fail;
+ }
+
+ s->dev_kmsg_readable = true;
+
+ return 0;
+
+fail:
+ s->dev_kmsg_event_source = sd_event_source_unref(s->dev_kmsg_event_source);
+ s->dev_kmsg_fd = safe_close(s->dev_kmsg_fd);
+
+ return r;
+}
+
+int server_open_kernel_seqnum(Server *s) {
+ _cleanup_close_ int fd;
+ uint64_t *p;
+ int r;
+
+ assert(s);
+
+ /* We store the seqnum we last read in an mmaped file. That
+ * way we can just use it like a variable, but it is
+ * persistent and automatically flushed at reboot. */
+
+ fd = open("/run/systemd/journal/kernel-seqnum", O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0644);
+ if (fd < 0) {
+ log_error_errno(errno, "Failed to open /run/systemd/journal/kernel-seqnum, ignoring: %m");
+ return 0;
+ }
+
+ r = posix_fallocate(fd, 0, sizeof(uint64_t));
+ if (r != 0) {
+ log_error_errno(r, "Failed to allocate sequential number file, ignoring: %m");
+ return 0;
+ }
+
+ p = mmap(NULL, sizeof(uint64_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ if (p == MAP_FAILED) {
+ log_error_errno(errno, "Failed to map sequential number file, ignoring: %m");
+ return 0;
+ }
+
+ s->kernel_seqnum = p;
+
+ return 0;
+}
diff --git a/src/grp-journal/libjournal-core/src/journald-native.c b/src/grp-journal/libjournal-core/src/journald-native.c
new file mode 100644
index 0000000000..536765f414
--- /dev/null
+++ b/src/grp-journal/libjournal-core/src/journald-native.c
@@ -0,0 +1,501 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2011 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stddef.h>
+#include <sys/epoll.h>
+#include <sys/mman.h>
+#include <sys/statvfs.h>
+#include <unistd.h>
+
+#include "journal-core/journald-console.h"
+#include "journal-core/journald-kmsg.h"
+#include "journal-core/journald-native.h"
+#include "journal-core/journald-server.h"
+#include "journal-core/journald-syslog.h"
+#include "journal-core/journald-wall.h"
+#include "systemd-basic/alloc-util.h"
+#include "systemd-basic/fd-util.h"
+#include "systemd-basic/fs-util.h"
+#include "systemd-basic/io-util.h"
+#include "systemd-basic/memfd-util.h"
+#include "systemd-basic/parse-util.h"
+#include "systemd-basic/path-util.h"
+#include "systemd-basic/selinux-util.h"
+#include "systemd-basic/socket-util.h"
+#include "systemd-basic/string-util.h"
+
+bool valid_user_field(const char *p, size_t l, bool allow_protected) {
+ const char *a;
+
+ /* We kinda enforce POSIX syntax recommendations for
+ environment variables here, but make a couple of additional
+ requirements.
+
+ http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
+
+ /* No empty field names */
+ if (l <= 0)
+ return false;
+
+ /* Don't allow names longer than 64 chars */
+ if (l > 64)
+ return false;
+
+ /* Variables starting with an underscore are protected */
+ if (!allow_protected && p[0] == '_')
+ return false;
+
+ /* Don't allow digits as first character */
+ if (p[0] >= '0' && p[0] <= '9')
+ return false;
+
+ /* Only allow A-Z0-9 and '_' */
+ for (a = p; a < p + l; a++)
+ if ((*a < 'A' || *a > 'Z') &&
+ (*a < '0' || *a > '9') &&
+ *a != '_')
+ return false;
+
+ return true;
+}
+
+static bool allow_object_pid(const struct ucred *ucred) {
+ return ucred && ucred->uid == 0;
+}
+
+void server_process_native_message(
+ Server *s,
+ const void *buffer, size_t buffer_size,
+ const struct ucred *ucred,
+ const struct timeval *tv,
+ const char *label, size_t label_len) {
+
+ struct iovec *iovec = NULL;
+ unsigned n = 0, j, tn = (unsigned) -1;
+ const char *p;
+ size_t remaining, m = 0, entry_size = 0;
+ int priority = LOG_INFO;
+ char *identifier = NULL, *message = NULL;
+ pid_t object_pid = 0;
+
+ assert(s);
+ assert(buffer || buffer_size == 0);
+
+ p = buffer;
+ remaining = buffer_size;
+
+ while (remaining > 0) {
+ const char *e, *q;
+
+ e = memchr(p, '\n', remaining);
+
+ if (!e) {
+ /* Trailing noise, let's ignore it, and flush what we collected */
+ log_debug("Received message with trailing noise, ignoring.");
+ break;
+ }
+
+ if (e == p) {
+ /* Entry separator */
+
+ if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
+ log_debug("Entry is too big with %u properties and %zu bytes, ignoring.", n, entry_size);
+ continue;
+ }
+
+ server_dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority, object_pid);
+ n = 0;
+ priority = LOG_INFO;
+ entry_size = 0;
+
+ p++;
+ remaining--;
+ continue;
+ }
+
+ if (*p == '.' || *p == '#') {
+ /* Ignore control commands for now, and
+ * comments too. */
+ remaining -= (e - p) + 1;
+ p = e + 1;
+ continue;
+ }
+
+ /* A property follows */
+
+ /* n existing properties, 1 new, +1 for _TRANSPORT */
+ if (!GREEDY_REALLOC(iovec, m, n + 2 + N_IOVEC_META_FIELDS + N_IOVEC_OBJECT_FIELDS)) {
+ log_oom();
+ break;
+ }
+
+ q = memchr(p, '=', e - p);
+ if (q) {
+ if (valid_user_field(p, q - p, false)) {
+ size_t l;
+
+ l = e - p;
+
+ /* If the field name starts with an
+ * underscore, skip the variable,
+ * since that indidates a trusted
+ * field */
+ iovec[n].iov_base = (char*) p;
+ iovec[n].iov_len = l;
+ entry_size += iovec[n].iov_len;
+ n++;
+
+ /* We need to determine the priority
+ * of this entry for the rate limiting
+ * logic */
+ if (l == 10 &&
+ startswith(p, "PRIORITY=") &&
+ p[9] >= '0' && p[9] <= '9')
+ priority = (priority & LOG_FACMASK) | (p[9] - '0');
+
+ else if (l == 17 &&
+ startswith(p, "SYSLOG_FACILITY=") &&
+ p[16] >= '0' && p[16] <= '9')
+ priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
+
+ else if (l == 18 &&
+ startswith(p, "SYSLOG_FACILITY=") &&
+ p[16] >= '0' && p[16] <= '9' &&
+ p[17] >= '0' && p[17] <= '9')
+ priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
+
+ else if (l >= 19 &&
+ startswith(p, "SYSLOG_IDENTIFIER=")) {
+ char *t;
+
+ t = strndup(p + 18, l - 18);
+ if (t) {
+ free(identifier);
+ identifier = t;
+ }
+
+ } else if (l >= 8 &&
+ startswith(p, "MESSAGE=")) {
+ char *t;
+
+ t = strndup(p + 8, l - 8);
+ if (t) {
+ free(message);
+ message = t;
+ }
+
+ } else if (l > strlen("OBJECT_PID=") &&
+ l < strlen("OBJECT_PID=") + DECIMAL_STR_MAX(pid_t) &&
+ startswith(p, "OBJECT_PID=") &&
+ allow_object_pid(ucred)) {
+ char buf[DECIMAL_STR_MAX(pid_t)];
+ memcpy(buf, p + strlen("OBJECT_PID="), l - strlen("OBJECT_PID="));
+ buf[l-strlen("OBJECT_PID=")] = '\0';
+
+ /* ignore error */
+ parse_pid(buf, &object_pid);
+ }
+ }
+
+ remaining -= (e - p) + 1;
+ p = e + 1;
+ continue;
+ } else {
+ le64_t l_le;
+ uint64_t l;
+ char *k;
+
+ if (remaining < e - p + 1 + sizeof(uint64_t) + 1) {
+ log_debug("Failed to parse message, ignoring.");
+ break;
+ }
+
+ memcpy(&l_le, e + 1, sizeof(uint64_t));
+ l = le64toh(l_le);
+
+ if (l > DATA_SIZE_MAX) {
+ log_debug("Received binary data block of %"PRIu64" bytes is too large, ignoring.", l);
+ break;
+ }
+
+ if ((uint64_t) remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
+ e[1+sizeof(uint64_t)+l] != '\n') {
+ log_debug("Failed to parse message, ignoring.");
+ break;
+ }
+
+ k = malloc((e - p) + 1 + l);
+ if (!k) {
+ log_oom();
+ break;
+ }
+
+ memcpy(k, p, e - p);
+ k[e - p] = '=';
+ memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
+
+ if (valid_user_field(p, e - p, false)) {
+ iovec[n].iov_base = k;
+ iovec[n].iov_len = (e - p) + 1 + l;
+ entry_size += iovec[n].iov_len;
+ n++;
+ } else
+ free(k);
+
+ remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
+ p = e + 1 + sizeof(uint64_t) + l + 1;
+ }
+ }
+
+ if (n <= 0)
+ goto finish;
+
+ tn = n++;
+ IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
+ entry_size += strlen("_TRANSPORT=journal");
+
+ if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
+ log_debug("Entry is too big with %u properties and %zu bytes, ignoring.",
+ n, entry_size);
+ goto finish;
+ }
+
+ if (message) {
+ if (s->forward_to_syslog)
+ server_forward_syslog(s, priority, identifier, message, ucred, tv);
+
+ if (s->forward_to_kmsg)
+ server_forward_kmsg(s, priority, identifier, message, ucred);
+
+ if (s->forward_to_console)
+ server_forward_console(s, priority, identifier, message, ucred);
+
+ if (s->forward_to_wall)
+ server_forward_wall(s, priority, identifier, message, ucred);
+ }
+
+ server_dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority, object_pid);
+
+finish:
+ for (j = 0; j < n; j++) {
+ if (j == tn)
+ continue;
+
+ if (iovec[j].iov_base < buffer ||
+ (const uint8_t*) iovec[j].iov_base >= (const uint8_t*) buffer + buffer_size)
+ free(iovec[j].iov_base);
+ }
+
+ free(iovec);
+ free(identifier);
+ free(message);
+}
+
+void server_process_native_file(
+ Server *s,
+ int fd,
+ const struct ucred *ucred,
+ const struct timeval *tv,
+ const char *label, size_t label_len) {
+
+ struct stat st;
+ bool sealed;
+ int r;
+
+ /* Data is in the passed fd, since it didn't fit in a
+ * datagram. */
+
+ assert(s);
+ assert(fd >= 0);
+
+ /* If it's a memfd, check if it is sealed. If so, we can just
+ * use map it and use it, and do not need to copy the data
+ * out. */
+ sealed = memfd_get_sealed(fd) > 0;
+
+ if (!sealed && (!ucred || ucred->uid != 0)) {
+ _cleanup_free_ char *sl = NULL, *k = NULL;
+ const char *e;
+
+ /* If this is not a sealed memfd, and the peer is unknown or
+ * unprivileged, then verify the path. */
+
+ if (asprintf(&sl, "/proc/self/fd/%i", fd) < 0) {
+ log_oom();
+ return;
+ }
+
+ r = readlink_malloc(sl, &k);
+ if (r < 0) {
+ log_error_errno(r, "readlink(%s) failed: %m", sl);
+ return;
+ }
+
+ e = path_startswith(k, "/dev/shm/");
+ if (!e)
+ e = path_startswith(k, "/tmp/");
+ if (!e)
+ e = path_startswith(k, "/var/tmp/");
+ if (!e) {
+ log_error("Received file outside of allowed directories. Refusing.");
+ return;
+ }
+
+ if (!filename_is_valid(e)) {
+ log_error("Received file in subdirectory of allowed directories. Refusing.");
+ return;
+ }
+ }
+
+ if (fstat(fd, &st) < 0) {
+ log_error_errno(errno, "Failed to stat passed file, ignoring: %m");
+ return;
+ }
+
+ if (!S_ISREG(st.st_mode)) {
+ log_error("File passed is not regular. Ignoring.");
+ return;
+ }
+
+ if (st.st_size <= 0)
+ return;
+
+ if (st.st_size > ENTRY_SIZE_MAX) {
+ log_error("File passed too large. Ignoring.");
+ return;
+ }
+
+ if (sealed) {
+ void *p;
+ size_t ps;
+
+ /* The file is sealed, we can just map it and use it. */
+
+ ps = PAGE_ALIGN(st.st_size);
+ p = mmap(NULL, ps, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (p == MAP_FAILED) {
+ log_error_errno(errno, "Failed to map memfd, ignoring: %m");
+ return;
+ }
+
+ server_process_native_message(s, p, st.st_size, ucred, tv, label, label_len);
+ assert_se(munmap(p, ps) >= 0);
+ } else {
+ _cleanup_free_ void *p = NULL;
+ struct statvfs vfs;
+ ssize_t n;
+
+ if (fstatvfs(fd, &vfs) < 0) {
+ log_error_errno(errno, "Failed to stat file system of passed file, ignoring: %m");
+ return;
+ }
+
+ /* Refuse operating on file systems that have
+ * mandatory locking enabled, see:
+ *
+ * https://github.com/systemd/systemd/issues/1822
+ */
+ if (vfs.f_flag & ST_MANDLOCK) {
+ log_error("Received file descriptor from file system with mandatory locking enable, refusing.");
+ return;
+ }
+
+ /* Make the fd non-blocking. On regular files this has
+ * the effect of bypassing mandatory locking. Of
+ * course, this should normally not be necessary given
+ * the check above, but let's better be safe than
+ * sorry, after all NFS is pretty confusing regarding
+ * file system flags, and we better don't trust it,
+ * and so is SMB. */
+ r = fd_nonblock(fd, true);
+ if (r < 0) {
+ log_error_errno(r, "Failed to make fd non-blocking, ignoring: %m");
+ return;
+ }
+
+ /* The file is not sealed, we can't map the file here, since
+ * clients might then truncate it and trigger a SIGBUS for
+ * us. So let's stupidly read it */
+
+ p = malloc(st.st_size);
+ if (!p) {
+ log_oom();
+ return;
+ }
+
+ n = pread(fd, p, st.st_size, 0);
+ if (n < 0)
+ log_error_errno(errno, "Failed to read file, ignoring: %m");
+ else if (n > 0)
+ server_process_native_message(s, p, n, ucred, tv, label, label_len);
+ }
+}
+
+int server_open_native_socket(Server*s) {
+
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/journal/socket",
+ };
+ static const int one = 1;
+ int r;
+
+ assert(s);
+
+ if (s->native_fd < 0) {
+ s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s->native_fd < 0)
+ return log_error_errno(errno, "socket() failed: %m");
+
+ (void) unlink(sa.un.sun_path);
+
+ r = bind(s->native_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
+ if (r < 0)
+ return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
+
+ (void) chmod(sa.un.sun_path, 0666);
+ } else
+ fd_nonblock(s->native_fd, 1);
+
+ r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
+ if (r < 0)
+ return log_error_errno(errno, "SO_PASSCRED failed: %m");
+
+#ifdef HAVE_SELINUX
+ if (mac_selinux_have()) {
+ r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
+ if (r < 0)
+ log_warning_errno(errno, "SO_PASSSEC failed: %m");
+ }
+#endif
+
+ r = setsockopt(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
+ if (r < 0)
+ return log_error_errno(errno, "SO_TIMESTAMP failed: %m");
+
+ r = sd_event_add_io(s->event, &s->native_event_source, s->native_fd, EPOLLIN, server_process_datagram, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add native server fd to event loop: %m");
+
+ r = sd_event_source_set_priority(s->native_event_source, SD_EVENT_PRIORITY_NORMAL+5);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust native event source priority: %m");
+
+ return 0;
+}
diff --git a/src/grp-journal/libjournal-core/src/journald-rate-limit.c b/src/grp-journal/libjournal-core/src/journald-rate-limit.c
new file mode 100644
index 0000000000..10bff9df83
--- /dev/null
+++ b/src/grp-journal/libjournal-core/src/journald-rate-limit.c
@@ -0,0 +1,271 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2011 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <errno.h>
+#include <string.h>
+
+#include "journal-core/journald-rate-limit.h"
+#include "systemd-basic/alloc-util.h"
+#include "systemd-basic/hashmap.h"
+#include "systemd-basic/list.h"
+#include "systemd-basic/random-util.h"
+#include "systemd-basic/string-util.h"
+#include "systemd-basic/util.h"
+
+#define POOLS_MAX 5
+#define BUCKETS_MAX 127
+#define GROUPS_MAX 2047
+
+static const int priority_map[] = {
+ [LOG_EMERG] = 0,
+ [LOG_ALERT] = 0,
+ [LOG_CRIT] = 0,
+ [LOG_ERR] = 1,
+ [LOG_WARNING] = 2,
+ [LOG_NOTICE] = 3,
+ [LOG_INFO] = 3,
+ [LOG_DEBUG] = 4
+};
+
+typedef struct JournalRateLimitPool JournalRateLimitPool;
+typedef struct JournalRateLimitGroup JournalRateLimitGroup;
+
+struct JournalRateLimitPool {
+ usec_t begin;
+ unsigned num;
+ unsigned suppressed;
+};
+
+struct JournalRateLimitGroup {
+ JournalRateLimit *parent;
+
+ char *id;
+ JournalRateLimitPool pools[POOLS_MAX];
+ uint64_t hash;
+
+ LIST_FIELDS(JournalRateLimitGroup, bucket);
+ LIST_FIELDS(JournalRateLimitGroup, lru);
+};
+
+struct JournalRateLimit {
+ usec_t interval;
+ unsigned burst;
+
+ JournalRateLimitGroup* buckets[BUCKETS_MAX];
+ JournalRateLimitGroup *lru, *lru_tail;
+
+ unsigned n_groups;
+
+ uint8_t hash_key[16];
+};
+
+JournalRateLimit *journal_rate_limit_new(usec_t interval, unsigned burst) {
+ JournalRateLimit *r;
+
+ assert(interval > 0 || burst == 0);
+
+ r = new0(JournalRateLimit, 1);
+ if (!r)
+ return NULL;
+
+ r->interval = interval;
+ r->burst = burst;
+
+ random_bytes(r->hash_key, sizeof(r->hash_key));
+
+ return r;
+}
+
+static void journal_rate_limit_group_free(JournalRateLimitGroup *g) {
+ assert(g);
+
+ if (g->parent) {
+ assert(g->parent->n_groups > 0);
+
+ if (g->parent->lru_tail == g)
+ g->parent->lru_tail = g->lru_prev;
+
+ LIST_REMOVE(lru, g->parent->lru, g);
+ LIST_REMOVE(bucket, g->parent->buckets[g->hash % BUCKETS_MAX], g);
+
+ g->parent->n_groups--;
+ }
+
+ free(g->id);
+ free(g);
+}
+
+void journal_rate_limit_free(JournalRateLimit *r) {
+ assert(r);
+
+ while (r->lru)
+ journal_rate_limit_group_free(r->lru);
+
+ free(r);
+}
+
+_pure_ static bool journal_rate_limit_group_expired(JournalRateLimitGroup *g, usec_t ts) {
+ unsigned i;
+
+ assert(g);
+
+ for (i = 0; i < POOLS_MAX; i++)
+ if (g->pools[i].begin + g->parent->interval >= ts)
+ return false;
+
+ return true;
+}
+
+static void journal_rate_limit_vacuum(JournalRateLimit *r, usec_t ts) {
+ assert(r);
+
+ /* Makes room for at least one new item, but drop all
+ * expored items too. */
+
+ while (r->n_groups >= GROUPS_MAX ||
+ (r->lru_tail && journal_rate_limit_group_expired(r->lru_tail, ts)))
+ journal_rate_limit_group_free(r->lru_tail);
+}
+
+static JournalRateLimitGroup* journal_rate_limit_group_new(JournalRateLimit *r, const char *id, usec_t ts) {
+ JournalRateLimitGroup *g;
+ struct siphash state;
+
+ assert(r);
+ assert(id);
+
+ g = new0(JournalRateLimitGroup, 1);
+ if (!g)
+ return NULL;
+
+ g->id = strdup(id);
+ if (!g->id)
+ goto fail;
+
+ siphash24_init(&state, r->hash_key);
+ string_hash_func(g->id, &state);
+ g->hash = siphash24_finalize(&state);
+
+ journal_rate_limit_vacuum(r, ts);
+
+ LIST_PREPEND(bucket, r->buckets[g->hash % BUCKETS_MAX], g);
+ LIST_PREPEND(lru, r->lru, g);
+ if (!g->lru_next)
+ r->lru_tail = g;
+ r->n_groups++;
+
+ g->parent = r;
+ return g;
+
+fail:
+ journal_rate_limit_group_free(g);
+ return NULL;
+}
+
+static unsigned burst_modulate(unsigned burst, uint64_t available) {
+ unsigned k;
+
+ /* Modulates the burst rate a bit with the amount of available
+ * disk space */
+
+ k = u64log2(available);
+
+ /* 1MB */
+ if (k <= 20)
+ return burst;
+
+ burst = (burst * (k-16)) / 4;
+
+ /*
+ * Example:
+ *
+ * <= 1MB = rate * 1
+ * 16MB = rate * 2
+ * 256MB = rate * 3
+ * 4GB = rate * 4
+ * 64GB = rate * 5
+ * 1TB = rate * 6
+ */
+
+ return burst;
+}
+
+int journal_rate_limit_test(JournalRateLimit *r, const char *id, int priority, uint64_t available) {
+ uint64_t h;
+ JournalRateLimitGroup *g;
+ JournalRateLimitPool *p;
+ struct siphash state;
+ unsigned burst;
+ usec_t ts;
+
+ assert(id);
+
+ if (!r)
+ return 1;
+
+ if (r->interval == 0 || r->burst == 0)
+ return 1;
+
+ burst = burst_modulate(r->burst, available);
+
+ ts = now(CLOCK_MONOTONIC);
+
+ siphash24_init(&state, r->hash_key);
+ string_hash_func(id, &state);
+ h = siphash24_finalize(&state);
+ g = r->buckets[h % BUCKETS_MAX];
+
+ LIST_FOREACH(bucket, g, g)
+ if (streq(g->id, id))
+ break;
+
+ if (!g) {
+ g = journal_rate_limit_group_new(r, id, ts);
+ if (!g)
+ return -ENOMEM;
+ }
+
+ p = &g->pools[priority_map[priority]];
+
+ if (p->begin <= 0) {
+ p->suppressed = 0;
+ p->num = 1;
+ p->begin = ts;
+ return 1;
+ }
+
+ if (p->begin + r->interval < ts) {
+ unsigned s;
+
+ s = p->suppressed;
+ p->suppressed = 0;
+ p->num = 1;
+ p->begin = ts;
+
+ return 1 + s;
+ }
+
+ if (p->num < burst) {
+ p->num++;
+ return 1;
+ }
+
+ p->suppressed++;
+ return 0;
+}
diff --git a/src/grp-journal/libjournal-core/src/journald-server.c b/src/grp-journal/libjournal-core/src/journald-server.c
new file mode 100644
index 0000000000..158e0c197c
--- /dev/null
+++ b/src/grp-journal/libjournal-core/src/journald-server.c
@@ -0,0 +1,2162 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2011 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#ifdef HAVE_SELINUX
+#include <selinux/selinux.h>
+#endif
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/signalfd.h>
+#include <sys/statvfs.h>
+
+#include <linux/sockios.h>
+
+#include <libudev.h>
+#include <systemd/sd-daemon.h>
+#include <systemd/sd-journal.h>
+#include <systemd/sd-messages.h>
+
+#include "journal-core/journald-audit.h"
+#include "journal-core/journald-kmsg.h"
+#include "journal-core/journald-native.h"
+#include "journal-core/journald-rate-limit.h"
+#include "journal-core/journald-server.h"
+#include "journal-core/journald-stream.h"
+#include "journal-core/journald-syslog.h"
+#include "sd-id128/id128-util.h"
+#include "sd-journal/journal-authenticate.h"
+#include "sd-journal/journal-file.h"
+#include "sd-journal/journal-internal.h"
+#include "sd-journal/journal-vacuum.h"
+#include "systemd-basic/alloc-util.h"
+#include "systemd-basic/audit-util.h"
+#include "systemd-basic/cgroup-util.h"
+#include "systemd-basic/dirent-util.h"
+#include "systemd-basic/extract-word.h"
+#include "systemd-basic/fd-util.h"
+#include "systemd-basic/fileio.h"
+#include "systemd-basic/formats-util.h"
+#include "systemd-basic/fs-util.h"
+#include "systemd-basic/hashmap.h"
+#include "systemd-basic/hostname-util.h"
+#include "systemd-basic/io-util.h"
+#include "systemd-basic/log.h"
+#include "systemd-basic/missing.h"
+#include "systemd-basic/mkdir.h"
+#include "systemd-basic/parse-util.h"
+#include "systemd-basic/proc-cmdline.h"
+#include "systemd-basic/process-util.h"
+#include "systemd-basic/rm-rf.h"
+#include "systemd-basic/selinux-util.h"
+#include "systemd-basic/signal-util.h"
+#include "systemd-basic/socket-util.h"
+#include "systemd-basic/stdio-util.h"
+#include "systemd-basic/string-table.h"
+#include "systemd-basic/string-util.h"
+#include "systemd-basic/syslog-util.h"
+#include "systemd-basic/user-util.h"
+#include "systemd-shared/acl-util.h"
+#include "systemd-shared/conf-parser.h"
+
+#define USER_JOURNALS_MAX 1024
+
+#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
+#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
+#define DEFAULT_RATE_LIMIT_BURST 1000
+#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
+
+#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
+
+#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
+
+/* The period to insert between posting changes for coalescing */
+#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
+
+static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ struct statvfs ss;
+
+ assert(ret_used);
+ assert(ret_free);
+
+ d = opendir(path);
+ if (!d)
+ return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
+ errno, "Failed to open %s: %m", path);
+
+ if (fstatvfs(dirfd(d), &ss) < 0)
+ return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
+
+ *ret_free = ss.f_bsize * ss.f_bavail;
+ *ret_used = 0;
+ FOREACH_DIRENT_ALL(de, d, break) {
+ struct stat st;
+
+ if (!endswith(de->d_name, ".journal") &&
+ !endswith(de->d_name, ".journal~"))
+ continue;
+
+ if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+ log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
+ continue;
+ }
+
+ if (!S_ISREG(st.st_mode))
+ continue;
+
+ *ret_used += (uint64_t) st.st_blocks * 512UL;
+ }
+
+ return 0;
+}
+
+static void cache_space_invalidate(JournalStorageSpace *space) {
+ memset(space, 0, sizeof(*space));
+}
+
+static int cache_space_refresh(Server *s, JournalStorage *storage) {
+ JournalStorageSpace *space;
+ JournalMetrics *metrics;
+ uint64_t vfs_used, vfs_avail, avail;
+ usec_t ts;
+ int r;
+
+ assert(s);
+
+ metrics = &storage->metrics;
+ space = &storage->space;
+
+ ts = now(CLOCK_MONOTONIC);
+
+ if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
+ return 0;
+
+ r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
+ if (r < 0)
+ return r;
+
+ space->vfs_used = vfs_used;
+ space->vfs_available = vfs_avail;
+
+ avail = LESS_BY(vfs_avail, metrics->keep_free);
+
+ space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
+ space->available = LESS_BY(space->limit, vfs_used);
+ space->timestamp = ts;
+ return 1;
+}
+
+static void patch_min_use(JournalStorage *storage) {
+ assert(storage);
+
+ /* Let's bump the min_use limit to the current usage on disk. We do
+ * this when starting up and first opening the journal files. This way
+ * sudden spikes in disk usage will not cause journald to vacuum files
+ * without bounds. Note that this means that only a restart of journald
+ * will make it reset this value. */
+
+ storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
+}
+
+
+static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
+ JournalStorage *js;
+ int r;
+
+ assert(s);
+
+ js = s->system_journal ? &s->system_storage : &s->runtime_storage;
+
+ r = cache_space_refresh(s, js);
+ if (r >= 0) {
+ if (available)
+ *available = js->space.available;
+ if (limit)
+ *limit = js->space.limit;
+ }
+ return r;
+}
+
+void server_space_usage_message(Server *s, JournalStorage *storage) {
+ char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
+ fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
+ JournalMetrics *metrics;
+
+ assert(s);
+
+ if (!storage)
+ storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
+
+ if (cache_space_refresh(s, storage) < 0)
+ return;
+
+ metrics = &storage->metrics;
+ format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
+ format_bytes(fb2, sizeof(fb2), metrics->max_use);
+ format_bytes(fb3, sizeof(fb3), metrics->keep_free);
+ format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
+ format_bytes(fb5, sizeof(fb5), storage->space.limit);
+ format_bytes(fb6, sizeof(fb6), storage->space.available);
+
+ server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
+ LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
+ storage->name, storage->path, fb1, fb5, fb6),
+ "JOURNAL_NAME=%s", storage->name,
+ "JOURNAL_PATH=%s", storage->path,
+ "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
+ "CURRENT_USE_PRETTY=%s", fb1,
+ "MAX_USE=%"PRIu64, metrics->max_use,
+ "MAX_USE_PRETTY=%s", fb2,
+ "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
+ "DISK_KEEP_FREE_PRETTY=%s", fb3,
+ "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
+ "DISK_AVAILABLE_PRETTY=%s", fb4,
+ "LIMIT=%"PRIu64, storage->space.limit,
+ "LIMIT_PRETTY=%s", fb5,
+ "AVAILABLE=%"PRIu64, storage->space.available,
+ "AVAILABLE_PRETTY=%s", fb6,
+ NULL);
+}
+
+static void server_add_acls(JournalFile *f, uid_t uid) {
+#ifdef HAVE_ACL
+ int r;
+#endif
+ assert(f);
+
+#ifdef HAVE_ACL
+ if (uid <= SYSTEM_UID_MAX)
+ return;
+
+ r = add_acls_for_user(f->fd, uid);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
+#endif
+}
+
+static int open_journal(
+ Server *s,
+ bool reliably,
+ const char *fname,
+ int flags,
+ bool seal,
+ JournalMetrics *metrics,
+ JournalFile **ret) {
+ int r;
+ JournalFile *f;
+
+ assert(s);
+ assert(fname);
+ assert(ret);
+
+ if (reliably)
+ r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
+ else
+ r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
+ if (r < 0)
+ return r;
+
+ r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
+ if (r < 0) {
+ (void) journal_file_close(f);
+ return r;
+ }
+
+ *ret = f;
+ return r;
+}
+
+static bool flushed_flag_is_set(void) {
+ return (access("/run/systemd/journal/flushed", F_OK) >= 0);
+}
+
+static int system_journal_open(Server *s, bool flush_requested) {
+ bool flushed = false;
+ const char *fn;
+ int r = 0;
+
+ if (!s->system_journal &&
+ (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
+ (flush_requested || (flushed = flushed_flag_is_set()))) {
+
+ /* If in auto mode: first try to create the machine
+ * path, but not the prefix.
+ *
+ * If in persistent mode: create /var/log/journal and
+ * the machine path */
+
+ if (s->storage == STORAGE_PERSISTENT)
+ (void) mkdir_p("/var/log/journal/", 0755);
+
+ (void) mkdir(s->system_storage.path, 0755);
+
+ fn = strjoina(s->system_storage.path, "/system.journal");
+ r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
+ if (r >= 0) {
+ server_add_acls(s->system_journal, 0);
+ (void) cache_space_refresh(s, &s->system_storage);
+ patch_min_use(&s->system_storage);
+ } else if (r < 0) {
+ if (r != -ENOENT && r != -EROFS)
+ log_warning_errno(r, "Failed to open system journal: %m");
+
+ r = 0;
+ }
+
+ /* If the runtime journal is open, and we're post-flush, we're
+ * recovering from a failed system journal rotate (ENOSPC)
+ * for which the runtime journal was reopened.
+ *
+ * Perform an implicit flush to var, leaving the runtime
+ * journal closed, now that the system journal is back.
+ */
+ if (s->runtime_journal && flushed)
+ (void) server_flush_to_var(s);
+ }
+
+ if (!s->runtime_journal &&
+ (s->storage != STORAGE_NONE)) {
+
+ fn = strjoina(s->runtime_storage.path, "/system.journal");
+
+ if (s->system_journal) {
+
+ /* Try to open the runtime journal, but only
+ * if it already exists, so that we can flush
+ * it into the system journal */
+
+ r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
+ if (r < 0) {
+ if (r != -ENOENT)
+ log_warning_errno(r, "Failed to open runtime journal: %m");
+
+ r = 0;
+ }
+
+ } else {
+
+ /* OK, we really need the runtime journal, so create
+ * it if necessary. */
+
+ (void) mkdir("/run/log", 0755);
+ (void) mkdir("/run/log/journal", 0755);
+ (void) mkdir_parents(fn, 0750);
+
+ r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open runtime journal: %m");
+ }
+
+ if (s->runtime_journal) {
+ server_add_acls(s->runtime_journal, 0);
+ (void) cache_space_refresh(s, &s->runtime_storage);
+ patch_min_use(&s->runtime_storage);
+ }
+ }
+
+ return r;
+}
+
+static JournalFile* find_journal(Server *s, uid_t uid) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+ JournalFile *f;
+ sd_id128_t machine;
+
+ assert(s);
+
+ /* A rotate that fails to create the new journal (ENOSPC) leaves the
+ * rotated journal as NULL. Unless we revisit opening, even after
+ * space is made available we'll continue to return NULL indefinitely.
+ *
+ * system_journal_open() is a noop if the journals are already open, so
+ * we can just call it here to recover from failed rotates (or anything
+ * else that's left the journals as NULL).
+ *
+ * Fixes https://github.com/systemd/systemd/issues/3968 */
+ (void) system_journal_open(s, false);
+
+ /* We split up user logs only on /var, not on /run. If the
+ * runtime file is open, we write to it exclusively, in order
+ * to guarantee proper order as soon as we flush /run to
+ * /var and close the runtime file. */
+
+ if (s->runtime_journal)
+ return s->runtime_journal;
+
+ if (uid <= SYSTEM_UID_MAX || uid_is_dynamic(uid))
+ return s->system_journal;
+
+ r = sd_id128_get_machine(&machine);
+ if (r < 0)
+ return s->system_journal;
+
+ f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
+ if (f)
+ return f;
+
+ if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
+ SD_ID128_FORMAT_VAL(machine), uid) < 0)
+ return s->system_journal;
+
+ while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
+ /* Too many open? Then let's close one */
+ f = ordered_hashmap_steal_first(s->user_journals);
+ assert(f);
+ (void) journal_file_close(f);
+ }
+
+ r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
+ if (r < 0)
+ return s->system_journal;
+
+ server_add_acls(f, uid);
+
+ r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
+ if (r < 0) {
+ (void) journal_file_close(f);
+ return s->system_journal;
+ }
+
+ return f;
+}
+
+static int do_rotate(
+ Server *s,
+ JournalFile **f,
+ const char* name,
+ bool seal,
+ uint32_t uid) {
+
+ int r;
+ assert(s);
+
+ if (!*f)
+ return -EINVAL;
+
+ r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
+ if (r < 0)
+ if (*f)
+ log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
+ else
+ log_error_errno(r, "Failed to create new %s journal: %m", name);
+ else
+ server_add_acls(*f, uid);
+
+ return r;
+}
+
+void server_rotate(Server *s) {
+ JournalFile *f;
+ void *k;
+ Iterator i;
+ int r;
+
+ log_debug("Rotating...");
+
+ (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
+ (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
+
+ ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
+ r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
+ if (r >= 0)
+ ordered_hashmap_replace(s->user_journals, k, f);
+ else if (!f)
+ /* Old file has been closed and deallocated */
+ ordered_hashmap_remove(s->user_journals, k);
+ }
+
+ /* Perform any deferred closes which aren't still offlining. */
+ SET_FOREACH(f, s->deferred_closes, i)
+ if (!journal_file_is_offlining(f)) {
+ (void) set_remove(s->deferred_closes, f);
+ (void) journal_file_close(f);
+ }
+}
+
+void server_sync(Server *s) {
+ JournalFile *f;
+ Iterator i;
+ int r;
+
+ if (s->system_journal) {
+ r = journal_file_set_offline(s->system_journal, false);
+ if (r < 0)
+ log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
+ }
+
+ ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
+ r = journal_file_set_offline(f, false);
+ if (r < 0)
+ log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
+ }
+
+ if (s->sync_event_source) {
+ r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
+ if (r < 0)
+ log_error_errno(r, "Failed to disable sync timer source: %m");
+ }
+
+ s->sync_scheduled = false;
+}
+
+static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
+
+ int r;
+
+ assert(s);
+ assert(storage);
+
+ (void) cache_space_refresh(s, storage);
+
+ if (verbose)
+ server_space_usage_message(s, storage);
+
+ r = journal_directory_vacuum(storage->path, storage->space.limit,
+ storage->metrics.n_max_files, s->max_retention_usec,
+ &s->oldest_file_usec, verbose);
+ if (r < 0 && r != -ENOENT)
+ log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
+
+ cache_space_invalidate(&storage->space);
+}
+
+int server_vacuum(Server *s, bool verbose) {
+ assert(s);
+
+ log_debug("Vacuuming...");
+
+ s->oldest_file_usec = 0;
+
+ if (s->system_journal)
+ do_vacuum(s, &s->system_storage, verbose);
+ if (s->runtime_journal)
+ do_vacuum(s, &s->runtime_storage, verbose);
+
+ return 0;
+}
+
+static void server_cache_machine_id(Server *s) {
+ sd_id128_t id;
+ int r;
+
+ assert(s);
+
+ r = sd_id128_get_machine(&id);
+ if (r < 0)
+ return;
+
+ sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
+}
+
+static void server_cache_boot_id(Server *s) {
+ sd_id128_t id;
+ int r;
+
+ assert(s);
+
+ r = sd_id128_get_boot(&id);
+ if (r < 0)
+ return;
+
+ sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
+}
+
+static void server_cache_hostname(Server *s) {
+ _cleanup_free_ char *t = NULL;
+ char *x;
+
+ assert(s);
+
+ t = gethostname_malloc();
+ if (!t)
+ return;
+
+ x = strappend("_HOSTNAME=", t);
+ if (!x)
+ return;
+
+ free(s->hostname_field);
+ s->hostname_field = x;
+}
+
+static bool shall_try_append_again(JournalFile *f, int r) {
+ switch(r) {
+
+ case -E2BIG: /* Hit configured limit */
+ case -EFBIG: /* Hit fs limit */
+ case -EDQUOT: /* Quota limit hit */
+ case -ENOSPC: /* Disk full */
+ log_debug("%s: Allocation limit reached, rotating.", f->path);
+ return true;
+
+ case -EIO: /* I/O error of some kind (mmap) */
+ log_warning("%s: IO error, rotating.", f->path);
+ return true;
+
+ case -EHOSTDOWN: /* Other machine */
+ log_info("%s: Journal file from other machine, rotating.", f->path);
+ return true;
+
+ case -EBUSY: /* Unclean shutdown */
+ log_info("%s: Unclean shutdown, rotating.", f->path);
+ return true;
+
+ case -EPROTONOSUPPORT: /* Unsupported feature */
+ log_info("%s: Unsupported feature, rotating.", f->path);
+ return true;
+
+ case -EBADMSG: /* Corrupted */
+ case -ENODATA: /* Truncated */
+ case -ESHUTDOWN: /* Already archived */
+ log_warning("%s: Journal file corrupted, rotating.", f->path);
+ return true;
+
+ case -EIDRM: /* Journal file has been deleted */
+ log_warning("%s: Journal file has been deleted, rotating.", f->path);
+ return true;
+
+ case -ETXTBSY: /* Journal file is from the future */
+ log_warning("%s: Journal file is from the future, rotating.", f->path);
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
+ bool vacuumed = false, rotate = false;
+ struct dual_timestamp ts;
+ JournalFile *f;
+ int r;
+
+ assert(s);
+ assert(iovec);
+ assert(n > 0);
+
+ /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
+ * the source time, and not even the time the event was originally seen, but instead simply the time we started
+ * processing it, as we want strictly linear ordering in what we write out.) */
+ assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
+ assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
+
+ if (ts.realtime < s->last_realtime_clock) {
+ /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
+ * regular operation. However, when it does happen, then we should make sure that we start fresh files
+ * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
+ * bisection works correctly. */
+
+ log_debug("Time jumped backwards, rotating.");
+ rotate = true;
+ } else {
+
+ f = find_journal(s, uid);
+ if (!f)
+ return;
+
+ if (journal_file_rotate_suggested(f, s->max_file_usec)) {
+ log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
+ rotate = true;
+ }
+ }
+
+ if (rotate) {
+ server_rotate(s);
+ server_vacuum(s, false);
+ vacuumed = true;
+
+ f = find_journal(s, uid);
+ if (!f)
+ return;
+ }
+
+ s->last_realtime_clock = ts.realtime;
+
+ r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
+ if (r >= 0) {
+ server_schedule_sync(s, priority);
+ return;
+ }
+
+ if (vacuumed || !shall_try_append_again(f, r)) {
+ log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
+ return;
+ }
+
+ server_rotate(s);
+ server_vacuum(s, false);
+
+ f = find_journal(s, uid);
+ if (!f)
+ return;
+
+ log_debug("Retrying write.");
+ r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
+ if (r < 0)
+ log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
+ else
+ server_schedule_sync(s, priority);
+}
+
+static int get_invocation_id(const char *cgroup_root, const char *slice, const char *unit, char **ret) {
+ _cleanup_free_ char *escaped = NULL, *slice_path = NULL, *p = NULL;
+ char *copy, ids[SD_ID128_STRING_MAX];
+ int r;
+
+ /* Read the invocation ID of a unit off a unit. It's stored in the "trusted.invocation_id" extended attribute
+ * on the cgroup path. */
+
+ r = cg_slice_to_path(slice, &slice_path);
+ if (r < 0)
+ return r;
+
+ escaped = cg_escape(unit);
+ if (!escaped)
+ return -ENOMEM;
+
+ p = strjoin(cgroup_root, "/", slice_path, "/", escaped, NULL);
+ if (!p)
+ return -ENOMEM;
+
+ r = cg_get_xattr(SYSTEMD_CGROUP_CONTROLLER, p, "trusted.invocation_id", ids, 32);
+ if (r < 0)
+ return r;
+ if (r != 32)
+ return -EINVAL;
+ ids[32] = 0;
+
+ if (!id128_is_valid(ids))
+ return -EINVAL;
+
+ copy = strdup(ids);
+ if (!copy)
+ return -ENOMEM;
+
+ *ret = copy;
+ return 0;
+}
+
+static void dispatch_message_real(
+ Server *s,
+ struct iovec *iovec, unsigned n, unsigned m,
+ const struct ucred *ucred,
+ const struct timeval *tv,
+ const char *label, size_t label_len,
+ const char *unit_id,
+ int priority,
+ pid_t object_pid) {
+
+ char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
+ uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
+ gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
+ owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
+ source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
+ o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
+ o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
+ o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
+ uid_t object_uid;
+ gid_t object_gid;
+ char *x;
+ int r;
+ char *t, *c;
+ uid_t realuid = 0, owner = 0, journal_uid;
+ bool owner_valid = false;
+#ifdef HAVE_AUDIT
+ char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
+ audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
+ o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
+ o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
+
+ uint32_t audit;
+ uid_t loginuid;
+#endif
+
+ assert(s);
+ assert(iovec);
+ assert(n > 0);
+ assert(n + N_IOVEC_META_FIELDS + (object_pid > 0 ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
+
+ if (ucred) {
+ realuid = ucred->uid;
+
+ sprintf(pid, "_PID="PID_FMT, ucred->pid);
+ IOVEC_SET_STRING(iovec[n++], pid);
+
+ sprintf(uid, "_UID="UID_FMT, ucred->uid);
+ IOVEC_SET_STRING(iovec[n++], uid);
+
+ sprintf(gid, "_GID="GID_FMT, ucred->gid);
+ IOVEC_SET_STRING(iovec[n++], gid);
+
+ r = get_process_comm(ucred->pid, &t);
+ if (r >= 0) {
+ x = strjoina("_COMM=", t);
+ free(t);
+ IOVEC_SET_STRING(iovec[n++], x);
+ }
+
+ r = get_process_exe(ucred->pid, &t);
+ if (r >= 0) {
+ x = strjoina("_EXE=", t);
+ free(t);
+ IOVEC_SET_STRING(iovec[n++], x);
+ }
+
+ r = get_process_cmdline(ucred->pid, 0, false, &t);
+ if (r >= 0) {
+ x = strjoina("_CMDLINE=", t);
+ free(t);
+ IOVEC_SET_STRING(iovec[n++], x);
+ }
+
+ r = get_process_capeff(ucred->pid, &t);
+ if (r >= 0) {
+ x = strjoina("_CAP_EFFECTIVE=", t);
+ free(t);
+ IOVEC_SET_STRING(iovec[n++], x);
+ }
+
+#ifdef HAVE_AUDIT
+ r = audit_session_from_pid(ucred->pid, &audit);
+ if (r >= 0) {
+ sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
+ IOVEC_SET_STRING(iovec[n++], audit_session);
+ }
+
+ r = audit_loginuid_from_pid(ucred->pid, &loginuid);
+ if (r >= 0) {
+ sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
+ IOVEC_SET_STRING(iovec[n++], audit_loginuid);
+ }
+#endif
+
+ r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
+ if (r >= 0) {
+ _cleanup_free_ char *raw_unit = NULL, *raw_slice = NULL;
+ char *session = NULL;
+
+ x = strjoina("_SYSTEMD_CGROUP=", c);
+ IOVEC_SET_STRING(iovec[n++], x);
+
+ r = cg_path_get_session(c, &t);
+ if (r >= 0) {
+ session = strjoina("_SYSTEMD_SESSION=", t);
+ free(t);
+ IOVEC_SET_STRING(iovec[n++], session);
+ }
+
+ if (cg_path_get_owner_uid(c, &owner) >= 0) {
+ owner_valid = true;
+
+ sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
+ IOVEC_SET_STRING(iovec[n++], owner_uid);
+ }
+
+ if (cg_path_get_unit(c, &raw_unit) >= 0) {
+ x = strjoina("_SYSTEMD_UNIT=", raw_unit);
+ IOVEC_SET_STRING(iovec[n++], x);
+ } else if (unit_id && !session) {
+ x = strjoina("_SYSTEMD_UNIT=", unit_id);
+ IOVEC_SET_STRING(iovec[n++], x);
+ }
+
+ if (cg_path_get_user_unit(c, &t) >= 0) {
+ x = strjoina("_SYSTEMD_USER_UNIT=", t);
+ free(t);
+ IOVEC_SET_STRING(iovec[n++], x);
+ } else if (unit_id && session) {
+ x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
+ IOVEC_SET_STRING(iovec[n++], x);
+ }
+
+ if (cg_path_get_slice(c, &raw_slice) >= 0) {
+ x = strjoina("_SYSTEMD_SLICE=", raw_slice);
+ IOVEC_SET_STRING(iovec[n++], x);
+ }
+
+ if (cg_path_get_user_slice(c, &t) >= 0) {
+ x = strjoina("_SYSTEMD_USER_SLICE=", t);
+ free(t);
+ IOVEC_SET_STRING(iovec[n++], x);
+ }
+
+ if (raw_slice && raw_unit) {
+ if (get_invocation_id(s->cgroup_root, raw_slice, raw_unit, &t) >= 0) {
+ x = strjoina("_SYSTEMD_INVOCATION_ID=", t);
+ free(t);
+ IOVEC_SET_STRING(iovec[n++], x);
+ }
+ }
+
+ free(c);
+ } else if (unit_id) {
+ x = strjoina("_SYSTEMD_UNIT=", unit_id);
+ IOVEC_SET_STRING(iovec[n++], x);
+ }
+
+#ifdef HAVE_SELINUX
+ if (mac_selinux_have()) {
+ if (label) {
+ x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
+
+ *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
+ IOVEC_SET_STRING(iovec[n++], x);
+ } else {
+ char *con;
+
+ if (getpidcon(ucred->pid, &con) >= 0) {
+ x = strjoina("_SELINUX_CONTEXT=", con);
+
+ freecon(con);
+ IOVEC_SET_STRING(iovec[n++], x);
+ }
+ }
+ }
+#endif
+ }
+ assert(n <= m);
+
+ if (object_pid) {
+ r = get_process_uid(object_pid, &object_uid);
+ if (r >= 0) {
+ sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
+ IOVEC_SET_STRING(iovec[n++], o_uid);
+ }
+
+ r = get_process_gid(object_pid, &object_gid);
+ if (r >= 0) {
+ sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
+ IOVEC_SET_STRING(iovec[n++], o_gid);
+ }
+
+ r = get_process_comm(object_pid, &t);
+ if (r >= 0) {
+ x = strjoina("OBJECT_COMM=", t);
+ free(t);
+ IOVEC_SET_STRING(iovec[n++], x);
+ }
+
+ r = get_process_exe(object_pid, &t);
+ if (r >= 0) {
+ x = strjoina("OBJECT_EXE=", t);
+ free(t);
+ IOVEC_SET_STRING(iovec[n++], x);
+ }
+
+ r = get_process_cmdline(object_pid, 0, false, &t);
+ if (r >= 0) {
+ x = strjoina("OBJECT_CMDLINE=", t);
+ free(t);
+ IOVEC_SET_STRING(iovec[n++], x);
+ }
+
+#ifdef HAVE_AUDIT
+ r = audit_session_from_pid(object_pid, &audit);
+ if (r >= 0) {
+ sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
+ IOVEC_SET_STRING(iovec[n++], o_audit_session);
+ }
+
+ r = audit_loginuid_from_pid(object_pid, &loginuid);
+ if (r >= 0) {
+ sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
+ IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
+ }
+#endif
+
+ r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
+ if (r >= 0) {
+ x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
+ IOVEC_SET_STRING(iovec[n++], x);
+
+ r = cg_path_get_session(c, &t);
+ if (r >= 0) {
+ x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
+ free(t);
+ IOVEC_SET_STRING(iovec[n++], x);
+ }
+
+ if (cg_path_get_owner_uid(c, &owner) >= 0) {
+ sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
+ IOVEC_SET_STRING(iovec[n++], o_owner_uid);
+ }
+
+ if (cg_path_get_unit(c, &t) >= 0) {
+ x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
+ free(t);
+ IOVEC_SET_STRING(iovec[n++], x);
+ }
+
+ if (cg_path_get_user_unit(c, &t) >= 0) {
+ x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
+ free(t);
+ IOVEC_SET_STRING(iovec[n++], x);
+ }
+
+ if (cg_path_get_slice(c, &t) >= 0) {
+ x = strjoina("OBJECT_SYSTEMD_SLICE=", t);
+ free(t);
+ IOVEC_SET_STRING(iovec[n++], x);
+ }
+
+ if (cg_path_get_user_slice(c, &t) >= 0) {
+ x = strjoina("OBJECT_SYSTEMD_USER_SLICE=", t);
+ free(t);
+ IOVEC_SET_STRING(iovec[n++], x);
+ }
+
+ free(c);
+ }
+ }
+ assert(n <= m);
+
+ if (tv) {
+ sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
+ IOVEC_SET_STRING(iovec[n++], source_time);
+ }
+
+ /* Note that strictly speaking storing the boot id here is
+ * redundant since the entry includes this in-line
+ * anyway. However, we need this indexed, too. */
+ if (!isempty(s->boot_id_field))
+ IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
+
+ if (!isempty(s->machine_id_field))
+ IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
+
+ if (!isempty(s->hostname_field))
+ IOVEC_SET_STRING(iovec[n++], s->hostname_field);
+
+ assert(n <= m);
+
+ if (s->split_mode == SPLIT_UID && realuid > 0)
+ /* Split up strictly by any UID */
+ journal_uid = realuid;
+ else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
+ /* Split up by login UIDs. We do this only if the
+ * realuid is not root, in order not to accidentally
+ * leak privileged information to the user that is
+ * logged by a privileged process that is part of an
+ * unprivileged session. */
+ journal_uid = owner;
+ else
+ journal_uid = 0;
+
+ write_to_journal(s, journal_uid, iovec, n, priority);
+}
+
+void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
+ char mid[11 + 32 + 1];
+ struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
+ unsigned n = 0, m;
+ int r;
+ va_list ap;
+ struct ucred ucred = {};
+
+ assert(s);
+ assert(format);
+
+ assert_cc(3 == LOG_FAC(LOG_DAEMON));
+ IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
+ IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
+
+ IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
+ assert_cc(6 == LOG_INFO);
+ IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
+
+ if (!sd_id128_is_null(message_id)) {
+ snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
+ IOVEC_SET_STRING(iovec[n++], mid);
+ }
+
+ m = n;
+
+ va_start(ap, format);
+ r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, false, 0, format, ap);
+ /* Error handling below */
+ va_end(ap);
+
+ ucred.pid = getpid();
+ ucred.uid = getuid();
+ ucred.gid = getgid();
+
+ if (r >= 0)
+ dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
+
+ while (m < n)
+ free(iovec[m++].iov_base);
+
+ if (r < 0) {
+ /* We failed to format the message. Emit a warning instead. */
+ char buf[LINE_MAX];
+
+ xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
+
+ n = 3;
+ IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
+ IOVEC_SET_STRING(iovec[n++], buf);
+ dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
+ }
+}
+
+void server_dispatch_message(
+ Server *s,
+ struct iovec *iovec, unsigned n, unsigned m,
+ const struct ucred *ucred,
+ const struct timeval *tv,
+ const char *label, size_t label_len,
+ const char *unit_id,
+ int priority,
+ pid_t object_pid) {
+
+ int rl, r;
+ _cleanup_free_ char *path = NULL;
+ uint64_t available = 0;
+ char *c;
+
+ assert(s);
+ assert(iovec || n == 0);
+
+ if (n == 0)
+ return;
+
+ if (LOG_PRI(priority) > s->max_level_store)
+ return;
+
+ /* Stop early in case the information will not be stored
+ * in a journal. */
+ if (s->storage == STORAGE_NONE)
+ return;
+
+ if (!ucred)
+ goto finish;
+
+ r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
+ if (r < 0)
+ goto finish;
+
+ /* example: /user/lennart/3/foobar
+ * /system/dbus.service/foobar
+ *
+ * So let's cut of everything past the third /, since that is
+ * where user directories start */
+
+ c = strchr(path, '/');
+ if (c) {
+ c = strchr(c+1, '/');
+ if (c) {
+ c = strchr(c+1, '/');
+ if (c)
+ *c = 0;
+ }
+ }
+
+ (void) determine_space(s, &available, NULL);
+ rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
+ if (rl == 0)
+ return;
+
+ /* Write a suppression message if we suppressed something */
+ if (rl > 1)
+ server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
+ LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, path),
+ NULL);
+
+finish:
+ dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
+}
+
+int server_flush_to_var(Server *s) {
+ sd_id128_t machine;
+ sd_journal *j = NULL;
+ char ts[FORMAT_TIMESPAN_MAX];
+ usec_t start;
+ unsigned n = 0;
+ int r;
+
+ assert(s);
+
+ if (s->storage != STORAGE_AUTO &&
+ s->storage != STORAGE_PERSISTENT)
+ return 0;
+
+ if (!s->runtime_journal)
+ return 0;
+
+ (void) system_journal_open(s, true);
+
+ if (!s->system_journal)
+ return 0;
+
+ log_debug("Flushing to /var...");
+
+ start = now(CLOCK_MONOTONIC);
+
+ r = sd_id128_get_machine(&machine);
+ if (r < 0)
+ return r;
+
+ r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read runtime journal: %m");
+
+ sd_journal_set_data_threshold(j, 0);
+
+ SD_JOURNAL_FOREACH(j) {
+ Object *o = NULL;
+ JournalFile *f;
+
+ f = j->current_file;
+ assert(f && f->current_offset > 0);
+
+ n++;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ if (r < 0) {
+ log_error_errno(r, "Can't read entry: %m");
+ goto finish;
+ }
+
+ r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
+ if (r >= 0)
+ continue;
+
+ if (!shall_try_append_again(s->system_journal, r)) {
+ log_error_errno(r, "Can't write entry: %m");
+ goto finish;
+ }
+
+ server_rotate(s);
+ server_vacuum(s, false);
+
+ if (!s->system_journal) {
+ log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
+ r = -EIO;
+ goto finish;
+ }
+
+ log_debug("Retrying write.");
+ r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Can't write entry: %m");
+ goto finish;
+ }
+ }
+
+ r = 0;
+
+finish:
+ journal_file_post_change(s->system_journal);
+
+ s->runtime_journal = journal_file_close(s->runtime_journal);
+
+ if (r >= 0)
+ (void) rm_rf("/run/log/journal", REMOVE_ROOT);
+
+ sd_journal_close(j);
+
+ server_driver_message(s, SD_ID128_NULL,
+ LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
+ format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
+ n),
+ NULL);
+
+ return r;
+}
+
+int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
+ Server *s = userdata;
+ struct ucred *ucred = NULL;
+ struct timeval *tv = NULL;
+ struct cmsghdr *cmsg;
+ char *label = NULL;
+ size_t label_len = 0, m;
+ struct iovec iovec;
+ ssize_t n;
+ int *fds = NULL, v = 0;
+ unsigned n_fds = 0;
+
+ union {
+ struct cmsghdr cmsghdr;
+
+ /* We use NAME_MAX space for the SELinux label
+ * here. The kernel currently enforces no
+ * limit, but according to suggestions from
+ * the SELinux people this will change and it
+ * will probably be identical to NAME_MAX. For
+ * now we use that, but this should be updated
+ * one day when the final limit is known. */
+ uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
+ CMSG_SPACE(sizeof(struct timeval)) +
+ CMSG_SPACE(sizeof(int)) + /* fd */
+ CMSG_SPACE(NAME_MAX)]; /* selinux label */
+ } control = {};
+
+ union sockaddr_union sa = {};
+
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_name = &sa,
+ .msg_namelen = sizeof(sa),
+ };
+
+ assert(s);
+ assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
+
+ if (revents != EPOLLIN) {
+ log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
+ return -EIO;
+ }
+
+ /* Try to get the right size, if we can. (Not all
+ * sockets support SIOCINQ, hence we just try, but
+ * don't rely on it. */
+ (void) ioctl(fd, SIOCINQ, &v);
+
+ /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
+ m = PAGE_ALIGN(MAX3((size_t) v + 1,
+ (size_t) LINE_MAX,
+ ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
+
+ if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
+ return log_oom();
+
+ iovec.iov_base = s->buffer;
+ iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
+
+ n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ if (n < 0) {
+ if (errno == EINTR || errno == EAGAIN)
+ return 0;
+
+ return log_error_errno(errno, "recvmsg() failed: %m");
+ }
+
+ CMSG_FOREACH(cmsg, &msghdr) {
+
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
+ ucred = (struct ucred*) CMSG_DATA(cmsg);
+ else if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_SECURITY) {
+ label = (char*) CMSG_DATA(cmsg);
+ label_len = cmsg->cmsg_len - CMSG_LEN(0);
+ } else if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SO_TIMESTAMP &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
+ tv = (struct timeval*) CMSG_DATA(cmsg);
+ else if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_RIGHTS) {
+ fds = (int*) CMSG_DATA(cmsg);
+ n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+ }
+ }
+
+ /* And a trailing NUL, just in case */
+ s->buffer[n] = 0;
+
+ if (fd == s->syslog_fd) {
+ if (n > 0 && n_fds == 0)
+ server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
+ else if (n_fds > 0)
+ log_warning("Got file descriptors via syslog socket. Ignoring.");
+
+ } else if (fd == s->native_fd) {
+ if (n > 0 && n_fds == 0)
+ server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
+ else if (n == 0 && n_fds == 1)
+ server_process_native_file(s, fds[0], ucred, tv, label, label_len);
+ else if (n_fds > 0)
+ log_warning("Got too many file descriptors via native socket. Ignoring.");
+
+ } else {
+ assert(fd == s->audit_fd);
+
+ if (n > 0 && n_fds == 0)
+ server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
+ else if (n_fds > 0)
+ log_warning("Got file descriptors via audit socket. Ignoring.");
+ }
+
+ close_many(fds, n_fds);
+ return 0;
+}
+
+static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
+ Server *s = userdata;
+ int r;
+
+ assert(s);
+
+ log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
+
+ (void) server_flush_to_var(s);
+ server_sync(s);
+ server_vacuum(s, false);
+
+ r = touch("/run/systemd/journal/flushed");
+ if (r < 0)
+ log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
+
+ server_space_usage_message(s, NULL);
+ return 0;
+}
+
+static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
+ Server *s = userdata;
+ int r;
+
+ assert(s);
+
+ log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
+ server_rotate(s);
+ server_vacuum(s, true);
+
+ if (s->system_journal)
+ patch_min_use(&s->system_storage);
+ if (s->runtime_journal)
+ patch_min_use(&s->runtime_storage);
+
+ /* Let clients know when the most recent rotation happened. */
+ r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
+ if (r < 0)
+ log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
+
+ return 0;
+}
+
+static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
+ Server *s = userdata;
+
+ assert(s);
+
+ log_received_signal(LOG_INFO, si);
+
+ sd_event_exit(s->event, 0);
+ return 0;
+}
+
+static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
+ Server *s = userdata;
+ int r;
+
+ assert(s);
+
+ log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
+
+ server_sync(s);
+
+ /* Let clients know when the most recent sync happened. */
+ r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
+ if (r < 0)
+ log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
+
+ return 0;
+}
+
+static int setup_signals(Server *s) {
+ int r;
+
+ assert(s);
+
+ assert(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
+
+ r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
+ if (r < 0)
+ return r;
+
+ r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
+ if (r < 0)
+ return r;
+
+ /* Let's process SIGTERM late, so that we flush all queued
+ * messages to disk before we exit */
+ r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
+ if (r < 0)
+ return r;
+
+ /* When journald is invoked on the terminal (when debugging),
+ * it's useful if C-c is handled equivalent to SIGTERM. */
+ r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
+ if (r < 0)
+ return r;
+
+ /* SIGRTMIN+1 causes an immediate sync. We process this very
+ * late, so that everything else queued at this point is
+ * really written to disk. Clients can watch
+ * /run/systemd/journal/synced with inotify until its mtime
+ * changes to see when a sync happened. */
+ r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
+ Server *s = data;
+ int r;
+
+ assert(s);
+
+ if (streq(key, "systemd.journald.forward_to_syslog")) {
+ r = value ? parse_boolean(value) : true;
+ if (r < 0)
+ log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
+ else
+ s->forward_to_syslog = r;
+ } else if (streq(key, "systemd.journald.forward_to_kmsg")) {
+ r = value ? parse_boolean(value) : true;
+ if (r < 0)
+ log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
+ else
+ s->forward_to_kmsg = r;
+ } else if (streq(key, "systemd.journald.forward_to_console")) {
+ r = value ? parse_boolean(value) : true;
+ if (r < 0)
+ log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
+ else
+ s->forward_to_console = r;
+ } else if (streq(key, "systemd.journald.forward_to_wall")) {
+ r = value ? parse_boolean(value) : true;
+ if (r < 0)
+ log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
+ else
+ s->forward_to_wall = r;
+ } else if (streq(key, "systemd.journald.max_level_console") && value) {
+ r = log_level_from_string(value);
+ if (r < 0)
+ log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
+ else
+ s->max_level_console = r;
+ } else if (streq(key, "systemd.journald.max_level_store") && value) {
+ r = log_level_from_string(value);
+ if (r < 0)
+ log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
+ else
+ s->max_level_store = r;
+ } else if (streq(key, "systemd.journald.max_level_syslog") && value) {
+ r = log_level_from_string(value);
+ if (r < 0)
+ log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
+ else
+ s->max_level_syslog = r;
+ } else if (streq(key, "systemd.journald.max_level_kmsg") && value) {
+ r = log_level_from_string(value);
+ if (r < 0)
+ log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
+ else
+ s->max_level_kmsg = r;
+ } else if (streq(key, "systemd.journald.max_level_wall") && value) {
+ r = log_level_from_string(value);
+ if (r < 0)
+ log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
+ else
+ s->max_level_wall = r;
+ } else if (startswith(key, "systemd.journald"))
+ log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
+
+ /* do not warn about state here, since probably systemd already did */
+ return 0;
+}
+
+static int server_parse_config_file(Server *s) {
+ assert(s);
+
+ return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
+ CONF_PATHS_NULSTR("systemd/journald.conf.d"),
+ "Journal\0",
+ config_item_perf_lookup, journald_gperf_lookup,
+ false, s);
+}
+
+static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
+ Server *s = userdata;
+
+ assert(s);
+
+ server_sync(s);
+ return 0;
+}
+
+int server_schedule_sync(Server *s, int priority) {
+ int r;
+
+ assert(s);
+
+ if (priority <= LOG_CRIT) {
+ /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
+ server_sync(s);
+ return 0;
+ }
+
+ if (s->sync_scheduled)
+ return 0;
+
+ if (s->sync_interval_usec > 0) {
+ usec_t when;
+
+ r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
+ if (r < 0)
+ return r;
+
+ when += s->sync_interval_usec;
+
+ if (!s->sync_event_source) {
+ r = sd_event_add_time(
+ s->event,
+ &s->sync_event_source,
+ CLOCK_MONOTONIC,
+ when, 0,
+ server_dispatch_sync, s);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
+ } else {
+ r = sd_event_source_set_time(s->sync_event_source, when);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
+ }
+ if (r < 0)
+ return r;
+
+ s->sync_scheduled = true;
+ }
+
+ return 0;
+}
+
+static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
+ Server *s = userdata;
+
+ assert(s);
+
+ server_cache_hostname(s);
+ return 0;
+}
+
+static int server_open_hostname(Server *s) {
+ int r;
+
+ assert(s);
+
+ s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
+ if (s->hostname_fd < 0)
+ return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
+
+ r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
+ if (r < 0) {
+ /* kernels prior to 3.2 don't support polling this file. Ignore
+ * the failure. */
+ if (r == -EPERM) {
+ log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
+ s->hostname_fd = safe_close(s->hostname_fd);
+ return 0;
+ }
+
+ return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
+ }
+
+ r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
+
+ return 0;
+}
+
+static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
+ Server *s = userdata;
+ int r;
+
+ assert(s);
+ assert(s->notify_event_source == es);
+ assert(s->notify_fd == fd);
+
+ /* The $NOTIFY_SOCKET is writable again, now send exactly one
+ * message on it. Either it's the watchdog event, the initial
+ * READY=1 event or an stdout stream event. If there's nothing
+ * to write anymore, turn our event source off. The next time
+ * there's something to send it will be turned on again. */
+
+ if (!s->sent_notify_ready) {
+ static const char p[] =
+ "READY=1\n"
+ "STATUS=Processing requests...";
+ ssize_t l;
+
+ l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
+ }
+
+ s->sent_notify_ready = true;
+ log_debug("Sent READY=1 notification.");
+
+ } else if (s->send_watchdog) {
+
+ static const char p[] =
+ "WATCHDOG=1";
+
+ ssize_t l;
+
+ l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
+ }
+
+ s->send_watchdog = false;
+ log_debug("Sent WATCHDOG=1 notification.");
+
+ } else if (s->stdout_streams_notify_queue)
+ /* Dispatch one stream notification event */
+ stdout_stream_send_notify(s->stdout_streams_notify_queue);
+
+ /* Leave us enabled if there's still more to do. */
+ if (s->send_watchdog || s->stdout_streams_notify_queue)
+ return 0;
+
+ /* There was nothing to do anymore, let's turn ourselves off. */
+ r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
+ if (r < 0)
+ return log_error_errno(r, "Failed to turn off notify event source: %m");
+
+ return 0;
+}
+
+static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
+ Server *s = userdata;
+ int r;
+
+ assert(s);
+
+ s->send_watchdog = true;
+
+ r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
+ if (r < 0)
+ log_warning_errno(r, "Failed to turn on notify event source: %m");
+
+ r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
+ if (r < 0)
+ return log_error_errno(r, "Failed to restart watchdog event source: %m");
+
+ r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable watchdog event source: %m");
+
+ return 0;
+}
+
+static int server_connect_notify(Server *s) {
+ union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ };
+ const char *e;
+ int r;
+
+ assert(s);
+ assert(s->notify_fd < 0);
+ assert(!s->notify_event_source);
+
+ /*
+ So here's the problem: we'd like to send notification
+ messages to PID 1, but we cannot do that via sd_notify(),
+ since that's synchronous, and we might end up blocking on
+ it. Specifically: given that PID 1 might block on
+ dbus-daemon during IPC, and dbus-daemon is logging to us,
+ and might hence block on us, we might end up in a deadlock
+ if we block on sending PID 1 notification messages — by
+ generating a full blocking circle. To avoid this, let's
+ create a non-blocking socket, and connect it to the
+ notification socket, and then wait for POLLOUT before we
+ send anything. This should efficiently avoid any deadlocks,
+ as we'll never block on PID 1, hence PID 1 can safely block
+ on dbus-daemon which can safely block on us again.
+
+ Don't think that this issue is real? It is, see:
+ https://github.com/systemd/systemd/issues/1505
+ */
+
+ e = getenv("NOTIFY_SOCKET");
+ if (!e)
+ return 0;
+
+ if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
+ log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
+ return -EINVAL;
+ }
+
+ if (strlen(e) > sizeof(sa.un.sun_path)) {
+ log_error("NOTIFY_SOCKET path too long: %s", e);
+ return -EINVAL;
+ }
+
+ s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s->notify_fd < 0)
+ return log_error_errno(errno, "Failed to create notify socket: %m");
+
+ (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
+
+ strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
+ if (sa.un.sun_path[0] == '@')
+ sa.un.sun_path[0] = 0;
+
+ r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
+ if (r < 0)
+ return log_error_errno(errno, "Failed to connect to notify socket: %m");
+
+ r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to watch notification socket: %m");
+
+ if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
+ s->send_watchdog = true;
+
+ r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add watchdog time event: %m");
+ }
+
+ /* This should fire pretty soon, which we'll use to send the
+ * READY=1 event. */
+
+ return 0;
+}
+
+int server_init(Server *s) {
+ _cleanup_fdset_free_ FDSet *fds = NULL;
+ int n, r, fd;
+ bool no_sockets;
+
+ assert(s);
+
+ zero(*s);
+ s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
+ s->compress = true;
+ s->seal = true;
+
+ s->watchdog_usec = USEC_INFINITY;
+
+ s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
+ s->sync_scheduled = false;
+
+ s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
+ s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
+
+ s->forward_to_wall = true;
+
+ s->max_file_usec = DEFAULT_MAX_FILE_USEC;
+
+ s->max_level_store = LOG_DEBUG;
+ s->max_level_syslog = LOG_DEBUG;
+ s->max_level_kmsg = LOG_NOTICE;
+ s->max_level_console = LOG_INFO;
+ s->max_level_wall = LOG_EMERG;
+
+ journal_reset_metrics(&s->system_storage.metrics);
+ journal_reset_metrics(&s->runtime_storage.metrics);
+
+ server_parse_config_file(s);
+ parse_proc_cmdline(parse_proc_cmdline_item, s, true);
+
+ if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
+ log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
+ s->rate_limit_interval, s->rate_limit_burst);
+ s->rate_limit_interval = s->rate_limit_burst = 0;
+ }
+
+ (void) mkdir_p("/run/systemd/journal", 0755);
+
+ s->user_journals = ordered_hashmap_new(NULL);
+ if (!s->user_journals)
+ return log_oom();
+
+ s->mmap = mmap_cache_new();
+ if (!s->mmap)
+ return log_oom();
+
+ s->deferred_closes = set_new(NULL);
+ if (!s->deferred_closes)
+ return log_oom();
+
+ r = sd_event_default(&s->event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create event loop: %m");
+
+ n = sd_listen_fds(true);
+ if (n < 0)
+ return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
+
+ for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
+
+ if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
+
+ if (s->native_fd >= 0) {
+ log_error("Too many native sockets passed.");
+ return -EINVAL;
+ }
+
+ s->native_fd = fd;
+
+ } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
+
+ if (s->stdout_fd >= 0) {
+ log_error("Too many stdout sockets passed.");
+ return -EINVAL;
+ }
+
+ s->stdout_fd = fd;
+
+ } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
+ sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
+
+ if (s->syslog_fd >= 0) {
+ log_error("Too many /dev/log sockets passed.");
+ return -EINVAL;
+ }
+
+ s->syslog_fd = fd;
+
+ } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
+
+ if (s->audit_fd >= 0) {
+ log_error("Too many audit sockets passed.");
+ return -EINVAL;
+ }
+
+ s->audit_fd = fd;
+
+ } else {
+
+ if (!fds) {
+ fds = fdset_new();
+ if (!fds)
+ return log_oom();
+ }
+
+ r = fdset_put(fds, fd);
+ if (r < 0)
+ return log_oom();
+ }
+ }
+
+ /* Try to restore streams, but don't bother if this fails */
+ (void) server_restore_streams(s, fds);
+
+ if (fdset_size(fds) > 0) {
+ log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
+ fds = fdset_free(fds);
+ }
+
+ no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
+
+ /* always open stdout, syslog, native, and kmsg sockets */
+
+ /* systemd-journald.socket: /run/systemd/journal/stdout */
+ r = server_open_stdout_socket(s);
+ if (r < 0)
+ return r;
+
+ /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
+ r = server_open_syslog_socket(s);
+ if (r < 0)
+ return r;
+
+ /* systemd-journald.socket: /run/systemd/journal/socket */
+ r = server_open_native_socket(s);
+ if (r < 0)
+ return r;
+
+ /* /dev/ksmg */
+ r = server_open_dev_kmsg(s);
+ if (r < 0)
+ return r;
+
+ /* Unless we got *some* sockets and not audit, open audit socket */
+ if (s->audit_fd >= 0 || no_sockets) {
+ r = server_open_audit(s);
+ if (r < 0)
+ return r;
+ }
+
+ r = server_open_kernel_seqnum(s);
+ if (r < 0)
+ return r;
+
+ r = server_open_hostname(s);
+ if (r < 0)
+ return r;
+
+ r = setup_signals(s);
+ if (r < 0)
+ return r;
+
+ s->udev = udev_new();
+ if (!s->udev)
+ return -ENOMEM;
+
+ s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
+ if (!s->rate_limit)
+ return -ENOMEM;
+
+ r = cg_get_root_path(&s->cgroup_root);
+ if (r < 0)
+ return r;
+
+ server_cache_hostname(s);
+ server_cache_boot_id(s);
+ server_cache_machine_id(s);
+
+ s->runtime_storage.name = "Runtime journal";
+ s->system_storage.name = "System journal";
+
+ s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s), NULL);
+ s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s), NULL);
+ if (!s->runtime_storage.path || !s->system_storage.path)
+ return -ENOMEM;
+
+ (void) server_connect_notify(s);
+
+ return system_journal_open(s, false);
+}
+
+void server_maybe_append_tags(Server *s) {
+#ifdef HAVE_GCRYPT
+ JournalFile *f;
+ Iterator i;
+ usec_t n;
+
+ n = now(CLOCK_REALTIME);
+
+ if (s->system_journal)
+ journal_file_maybe_append_tag(s->system_journal, n);
+
+ ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
+ journal_file_maybe_append_tag(f, n);
+#endif
+}
+
+void server_done(Server *s) {
+ JournalFile *f;
+ assert(s);
+
+ if (s->deferred_closes) {
+ journal_file_close_set(s->deferred_closes);
+ set_free(s->deferred_closes);
+ }
+
+ while (s->stdout_streams)
+ stdout_stream_free(s->stdout_streams);
+
+ if (s->system_journal)
+ (void) journal_file_close(s->system_journal);
+
+ if (s->runtime_journal)
+ (void) journal_file_close(s->runtime_journal);
+
+ while ((f = ordered_hashmap_steal_first(s->user_journals)))
+ (void) journal_file_close(f);
+
+ ordered_hashmap_free(s->user_journals);
+
+ sd_event_source_unref(s->syslog_event_source);
+ sd_event_source_unref(s->native_event_source);
+ sd_event_source_unref(s->stdout_event_source);
+ sd_event_source_unref(s->dev_kmsg_event_source);
+ sd_event_source_unref(s->audit_event_source);
+ sd_event_source_unref(s->sync_event_source);
+ sd_event_source_unref(s->sigusr1_event_source);
+ sd_event_source_unref(s->sigusr2_event_source);
+ sd_event_source_unref(s->sigterm_event_source);
+ sd_event_source_unref(s->sigint_event_source);
+ sd_event_source_unref(s->sigrtmin1_event_source);
+ sd_event_source_unref(s->hostname_event_source);
+ sd_event_source_unref(s->notify_event_source);
+ sd_event_source_unref(s->watchdog_event_source);
+ sd_event_unref(s->event);
+
+ safe_close(s->syslog_fd);
+ safe_close(s->native_fd);
+ safe_close(s->stdout_fd);
+ safe_close(s->dev_kmsg_fd);
+ safe_close(s->audit_fd);
+ safe_close(s->hostname_fd);
+ safe_close(s->notify_fd);
+
+ if (s->rate_limit)
+ journal_rate_limit_free(s->rate_limit);
+
+ if (s->kernel_seqnum)
+ munmap(s->kernel_seqnum, sizeof(uint64_t));
+
+ free(s->buffer);
+ free(s->tty_path);
+ free(s->cgroup_root);
+ free(s->hostname_field);
+
+ if (s->mmap)
+ mmap_cache_unref(s->mmap);
+
+ udev_unref(s->udev);
+}
+
+static const char* const storage_table[_STORAGE_MAX] = {
+ [STORAGE_AUTO] = "auto",
+ [STORAGE_VOLATILE] = "volatile",
+ [STORAGE_PERSISTENT] = "persistent",
+ [STORAGE_NONE] = "none"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
+
+static const char* const split_mode_table[_SPLIT_MAX] = {
+ [SPLIT_LOGIN] = "login",
+ [SPLIT_UID] = "uid",
+ [SPLIT_NONE] = "none",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
diff --git a/src/grp-journal/libjournal-core/src/journald-stream.c b/src/grp-journal/libjournal-core/src/journald-stream.c
new file mode 100644
index 0000000000..64b24e157b
--- /dev/null
+++ b/src/grp-journal/libjournal-core/src/journald-stream.c
@@ -0,0 +1,788 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2011 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stddef.h>
+#include <unistd.h>
+
+#ifdef HAVE_SELINUX
+#include <selinux/selinux.h>
+#endif
+
+#include <systemd/sd-daemon.h>
+#include <systemd/sd-event.h>
+
+#include "journal-core/journald-console.h"
+#include "journal-core/journald-kmsg.h"
+#include "journal-core/journald-server.h"
+#include "journal-core/journald-stream.h"
+#include "journal-core/journald-syslog.h"
+#include "journal-core/journald-wall.h"
+#include "systemd-basic/alloc-util.h"
+#include "systemd-basic/dirent-util.h"
+#include "systemd-basic/escape.h"
+#include "systemd-basic/fd-util.h"
+#include "systemd-basic/fileio.h"
+#include "systemd-basic/io-util.h"
+#include "systemd-basic/mkdir.h"
+#include "systemd-basic/parse-util.h"
+#include "systemd-basic/selinux-util.h"
+#include "systemd-basic/socket-util.h"
+#include "systemd-basic/stdio-util.h"
+#include "systemd-basic/string-util.h"
+#include "systemd-basic/syslog-util.h"
+
+#define STDOUT_STREAMS_MAX 4096
+
+typedef enum StdoutStreamState {
+ STDOUT_STREAM_IDENTIFIER,
+ STDOUT_STREAM_UNIT_ID,
+ STDOUT_STREAM_PRIORITY,
+ STDOUT_STREAM_LEVEL_PREFIX,
+ STDOUT_STREAM_FORWARD_TO_SYSLOG,
+ STDOUT_STREAM_FORWARD_TO_KMSG,
+ STDOUT_STREAM_FORWARD_TO_CONSOLE,
+ STDOUT_STREAM_RUNNING
+} StdoutStreamState;
+
+struct StdoutStream {
+ Server *server;
+ StdoutStreamState state;
+
+ int fd;
+
+ struct ucred ucred;
+ char *label;
+ char *identifier;
+ char *unit_id;
+ int priority;
+ bool level_prefix:1;
+ bool forward_to_syslog:1;
+ bool forward_to_kmsg:1;
+ bool forward_to_console:1;
+
+ bool fdstore:1;
+ bool in_notify_queue:1;
+
+ char buffer[LINE_MAX+1];
+ size_t length;
+
+ sd_event_source *event_source;
+
+ char *state_file;
+
+ LIST_FIELDS(StdoutStream, stdout_stream);
+ LIST_FIELDS(StdoutStream, stdout_stream_notify_queue);
+};
+
+void stdout_stream_free(StdoutStream *s) {
+ if (!s)
+ return;
+
+ if (s->server) {
+ assert(s->server->n_stdout_streams > 0);
+ s->server->n_stdout_streams--;
+ LIST_REMOVE(stdout_stream, s->server->stdout_streams, s);
+
+ if (s->in_notify_queue)
+ LIST_REMOVE(stdout_stream_notify_queue, s->server->stdout_streams_notify_queue, s);
+ }
+
+ if (s->event_source) {
+ sd_event_source_set_enabled(s->event_source, SD_EVENT_OFF);
+ s->event_source = sd_event_source_unref(s->event_source);
+ }
+
+ safe_close(s->fd);
+ free(s->label);
+ free(s->identifier);
+ free(s->unit_id);
+ free(s->state_file);
+
+ free(s);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(StdoutStream*, stdout_stream_free);
+
+static void stdout_stream_destroy(StdoutStream *s) {
+ if (!s)
+ return;
+
+ if (s->state_file)
+ (void) unlink(s->state_file);
+
+ stdout_stream_free(s);
+}
+
+static int stdout_stream_save(StdoutStream *s) {
+ _cleanup_free_ char *temp_path = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ assert(s);
+
+ if (s->state != STDOUT_STREAM_RUNNING)
+ return 0;
+
+ if (!s->state_file) {
+ struct stat st;
+
+ r = fstat(s->fd, &st);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to stat connected stream: %m");
+
+ /* We use device and inode numbers as identifier for the stream */
+ if (asprintf(&s->state_file, "/run/systemd/journal/streams/%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino) < 0)
+ return log_oom();
+ }
+
+ mkdir_p("/run/systemd/journal/streams", 0755);
+
+ r = fopen_temporary(s->state_file, &f, &temp_path);
+ if (r < 0)
+ goto fail;
+
+ fprintf(f,
+ "# This is private data. Do not parse\n"
+ "PRIORITY=%i\n"
+ "LEVEL_PREFIX=%i\n"
+ "FORWARD_TO_SYSLOG=%i\n"
+ "FORWARD_TO_KMSG=%i\n"
+ "FORWARD_TO_CONSOLE=%i\n",
+ s->priority,
+ s->level_prefix,
+ s->forward_to_syslog,
+ s->forward_to_kmsg,
+ s->forward_to_console);
+
+ if (!isempty(s->identifier)) {
+ _cleanup_free_ char *escaped;
+
+ escaped = cescape(s->identifier);
+ if (!escaped) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "IDENTIFIER=%s\n", escaped);
+ }
+
+ if (!isempty(s->unit_id)) {
+ _cleanup_free_ char *escaped;
+
+ escaped = cescape(s->unit_id);
+ if (!escaped) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ fprintf(f, "UNIT=%s\n", escaped);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(temp_path, s->state_file) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ if (!s->fdstore && !s->in_notify_queue) {
+ LIST_PREPEND(stdout_stream_notify_queue, s->server->stdout_streams_notify_queue, s);
+ s->in_notify_queue = true;
+
+ if (s->server->notify_event_source) {
+ r = sd_event_source_set_enabled(s->server->notify_event_source, SD_EVENT_ON);
+ if (r < 0)
+ log_warning_errno(r, "Failed to enable notify event source: %m");
+ }
+ }
+
+ return 0;
+
+fail:
+ (void) unlink(s->state_file);
+
+ if (temp_path)
+ (void) unlink(temp_path);
+
+ return log_error_errno(r, "Failed to save stream data %s: %m", s->state_file);
+}
+
+static int stdout_stream_log(StdoutStream *s, const char *p) {
+ struct iovec iovec[N_IOVEC_META_FIELDS + 5];
+ int priority;
+ char syslog_priority[] = "PRIORITY=\0";
+ char syslog_facility[sizeof("SYSLOG_FACILITY=")-1 + DECIMAL_STR_MAX(int) + 1];
+ _cleanup_free_ char *message = NULL, *syslog_identifier = NULL;
+ unsigned n = 0;
+ size_t label_len;
+
+ assert(s);
+ assert(p);
+
+ priority = s->priority;
+
+ if (s->level_prefix)
+ syslog_parse_priority(&p, &priority, false);
+
+ if (isempty(p))
+ return 0;
+
+ if (s->forward_to_syslog || s->server->forward_to_syslog)
+ server_forward_syslog(s->server, syslog_fixup_facility(priority), s->identifier, p, &s->ucred, NULL);
+
+ if (s->forward_to_kmsg || s->server->forward_to_kmsg)
+ server_forward_kmsg(s->server, priority, s->identifier, p, &s->ucred);
+
+ if (s->forward_to_console || s->server->forward_to_console)
+ server_forward_console(s->server, priority, s->identifier, p, &s->ucred);
+
+ if (s->server->forward_to_wall)
+ server_forward_wall(s->server, priority, s->identifier, p, &s->ucred);
+
+ IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
+
+ syslog_priority[strlen("PRIORITY=")] = '0' + LOG_PRI(priority);
+ IOVEC_SET_STRING(iovec[n++], syslog_priority);
+
+ if (priority & LOG_FACMASK) {
+ xsprintf(syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority));
+ IOVEC_SET_STRING(iovec[n++], syslog_facility);
+ }
+
+ if (s->identifier) {
+ syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
+ if (syslog_identifier)
+ IOVEC_SET_STRING(iovec[n++], syslog_identifier);
+ }
+
+ message = strappend("MESSAGE=", p);
+ if (message)
+ IOVEC_SET_STRING(iovec[n++], message);
+
+ label_len = s->label ? strlen(s->label) : 0;
+ server_dispatch_message(s->server, iovec, n, ELEMENTSOF(iovec), &s->ucred, NULL, s->label, label_len, s->unit_id, priority, 0);
+ return 0;
+}
+
+static int stdout_stream_line(StdoutStream *s, char *p) {
+ int r;
+ char *orig;
+
+ assert(s);
+ assert(p);
+
+ orig = p;
+ p = strstrip(p);
+
+ switch (s->state) {
+
+ case STDOUT_STREAM_IDENTIFIER:
+ if (isempty(p))
+ s->identifier = NULL;
+ else {
+ s->identifier = strdup(p);
+ if (!s->identifier)
+ return log_oom();
+ }
+
+ s->state = STDOUT_STREAM_UNIT_ID;
+ return 0;
+
+ case STDOUT_STREAM_UNIT_ID:
+ if (s->ucred.uid == 0) {
+ if (isempty(p))
+ s->unit_id = NULL;
+ else {
+ s->unit_id = strdup(p);
+ if (!s->unit_id)
+ return log_oom();
+ }
+ }
+
+ s->state = STDOUT_STREAM_PRIORITY;
+ return 0;
+
+ case STDOUT_STREAM_PRIORITY:
+ r = safe_atoi(p, &s->priority);
+ if (r < 0 || s->priority < 0 || s->priority > 999) {
+ log_warning("Failed to parse log priority line.");
+ return -EINVAL;
+ }
+
+ s->state = STDOUT_STREAM_LEVEL_PREFIX;
+ return 0;
+
+ case STDOUT_STREAM_LEVEL_PREFIX:
+ r = parse_boolean(p);
+ if (r < 0) {
+ log_warning("Failed to parse level prefix line.");
+ return -EINVAL;
+ }
+
+ s->level_prefix = !!r;
+ s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG;
+ return 0;
+
+ case STDOUT_STREAM_FORWARD_TO_SYSLOG:
+ r = parse_boolean(p);
+ if (r < 0) {
+ log_warning("Failed to parse forward to syslog line.");
+ return -EINVAL;
+ }
+
+ s->forward_to_syslog = !!r;
+ s->state = STDOUT_STREAM_FORWARD_TO_KMSG;
+ return 0;
+
+ case STDOUT_STREAM_FORWARD_TO_KMSG:
+ r = parse_boolean(p);
+ if (r < 0) {
+ log_warning("Failed to parse copy to kmsg line.");
+ return -EINVAL;
+ }
+
+ s->forward_to_kmsg = !!r;
+ s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE;
+ return 0;
+
+ case STDOUT_STREAM_FORWARD_TO_CONSOLE:
+ r = parse_boolean(p);
+ if (r < 0) {
+ log_warning("Failed to parse copy to console line.");
+ return -EINVAL;
+ }
+
+ s->forward_to_console = !!r;
+ s->state = STDOUT_STREAM_RUNNING;
+
+ /* Try to save the stream, so that journald can be restarted and we can recover */
+ (void) stdout_stream_save(s);
+ return 0;
+
+ case STDOUT_STREAM_RUNNING:
+ return stdout_stream_log(s, orig);
+ }
+
+ assert_not_reached("Unknown stream state");
+}
+
+static int stdout_stream_scan(StdoutStream *s, bool force_flush) {
+ char *p;
+ size_t remaining;
+ int r;
+
+ assert(s);
+
+ p = s->buffer;
+ remaining = s->length;
+
+ /* XXX: This function does nothing if (s->length == 0) */
+
+ for (;;) {
+ char *end;
+ size_t skip;
+
+ end = memchr(p, '\n', remaining);
+ if (end)
+ skip = end - p + 1;
+ else if (remaining >= sizeof(s->buffer) - 1) {
+ end = p + sizeof(s->buffer) - 1;
+ skip = remaining;
+ } else
+ break;
+
+ *end = 0;
+
+ r = stdout_stream_line(s, p);
+ if (r < 0)
+ return r;
+
+ remaining -= skip;
+ p += skip;
+ }
+
+ if (force_flush && remaining > 0) {
+ p[remaining] = 0;
+ r = stdout_stream_line(s, p);
+ if (r < 0)
+ return r;
+
+ p += remaining;
+ remaining = 0;
+ }
+
+ if (p > s->buffer) {
+ memmove(s->buffer, p, remaining);
+ s->length = remaining;
+ }
+
+ return 0;
+}
+
+static int stdout_stream_process(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
+ StdoutStream *s = userdata;
+ ssize_t l;
+ int r;
+
+ assert(s);
+
+ if ((revents|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) {
+ log_error("Got invalid event from epoll for stdout stream: %"PRIx32, revents);
+ goto terminate;
+ }
+
+ l = read(s->fd, s->buffer+s->length, sizeof(s->buffer)-1-s->length);
+ if (l < 0) {
+
+ if (errno == EAGAIN)
+ return 0;
+
+ log_warning_errno(errno, "Failed to read from stream: %m");
+ goto terminate;
+ }
+
+ if (l == 0) {
+ stdout_stream_scan(s, true);
+ goto terminate;
+ }
+
+ s->length += l;
+ r = stdout_stream_scan(s, false);
+ if (r < 0)
+ goto terminate;
+
+ return 1;
+
+terminate:
+ stdout_stream_destroy(s);
+ return 0;
+}
+
+static int stdout_stream_install(Server *s, int fd, StdoutStream **ret) {
+ _cleanup_(stdout_stream_freep) StdoutStream *stream = NULL;
+ int r;
+
+ assert(s);
+ assert(fd >= 0);
+
+ stream = new0(StdoutStream, 1);
+ if (!stream)
+ return log_oom();
+
+ stream->fd = -1;
+ stream->priority = LOG_INFO;
+
+ r = getpeercred(fd, &stream->ucred);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine peer credentials: %m");
+
+ if (mac_selinux_have()) {
+ r = getpeersec(fd, &stream->label);
+ if (r < 0 && r != -EOPNOTSUPP)
+ (void) log_warning_errno(r, "Failed to determine peer security context: %m");
+ }
+
+ (void) shutdown(fd, SHUT_WR);
+
+ r = sd_event_add_io(s->event, &stream->event_source, fd, EPOLLIN, stdout_stream_process, stream);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add stream to event loop: %m");
+
+ r = sd_event_source_set_priority(stream->event_source, SD_EVENT_PRIORITY_NORMAL+5);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust stdout event source priority: %m");
+
+ stream->fd = fd;
+
+ stream->server = s;
+ LIST_PREPEND(stdout_stream, s->stdout_streams, stream);
+ s->n_stdout_streams++;
+
+ if (ret)
+ *ret = stream;
+
+ stream = NULL;
+
+ return 0;
+}
+
+static int stdout_stream_new(sd_event_source *es, int listen_fd, uint32_t revents, void *userdata) {
+ _cleanup_close_ int fd = -1;
+ Server *s = userdata;
+ int r;
+
+ assert(s);
+
+ if (revents != EPOLLIN) {
+ log_error("Got invalid event from epoll for stdout server fd: %"PRIx32, revents);
+ return -EIO;
+ }
+
+ fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ if (fd < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ return log_error_errno(errno, "Failed to accept stdout connection: %m");
+ }
+
+ if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
+ log_warning("Too many stdout streams, refusing connection.");
+ return 0;
+ }
+
+ r = stdout_stream_install(s, fd, NULL);
+ if (r < 0)
+ return r;
+
+ fd = -1;
+ return 0;
+}
+
+static int stdout_stream_load(StdoutStream *stream, const char *fname) {
+ _cleanup_free_ char
+ *priority = NULL,
+ *level_prefix = NULL,
+ *forward_to_syslog = NULL,
+ *forward_to_kmsg = NULL,
+ *forward_to_console = NULL;
+ int r;
+
+ assert(stream);
+ assert(fname);
+
+ if (!stream->state_file) {
+ stream->state_file = strappend("/run/systemd/journal/streams/", fname);
+ if (!stream->state_file)
+ return log_oom();
+ }
+
+ r = parse_env_file(stream->state_file, NEWLINE,
+ "PRIORITY", &priority,
+ "LEVEL_PREFIX", &level_prefix,
+ "FORWARD_TO_SYSLOG", &forward_to_syslog,
+ "FORWARD_TO_KMSG", &forward_to_kmsg,
+ "FORWARD_TO_CONSOLE", &forward_to_console,
+ "IDENTIFIER", &stream->identifier,
+ "UNIT", &stream->unit_id,
+ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read: %s", stream->state_file);
+
+ if (priority) {
+ int p;
+
+ p = log_level_from_string(priority);
+ if (p >= 0)
+ stream->priority = p;
+ }
+
+ if (level_prefix) {
+ r = parse_boolean(level_prefix);
+ if (r >= 0)
+ stream->level_prefix = r;
+ }
+
+ if (forward_to_syslog) {
+ r = parse_boolean(forward_to_syslog);
+ if (r >= 0)
+ stream->forward_to_syslog = r;
+ }
+
+ if (forward_to_kmsg) {
+ r = parse_boolean(forward_to_kmsg);
+ if (r >= 0)
+ stream->forward_to_kmsg = r;
+ }
+
+ if (forward_to_console) {
+ r = parse_boolean(forward_to_console);
+ if (r >= 0)
+ stream->forward_to_console = r;
+ }
+
+ return 0;
+}
+
+static int stdout_stream_restore(Server *s, const char *fname, int fd) {
+ StdoutStream *stream;
+ int r;
+
+ assert(s);
+ assert(fname);
+ assert(fd >= 0);
+
+ if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) {
+ log_warning("Too many stdout streams, refusing restoring of stream.");
+ return -ENOBUFS;
+ }
+
+ r = stdout_stream_install(s, fd, &stream);
+ if (r < 0)
+ return r;
+
+ stream->state = STDOUT_STREAM_RUNNING;
+ stream->fdstore = true;
+
+ /* Ignore all parsing errors */
+ (void) stdout_stream_load(stream, fname);
+
+ return 0;
+}
+
+int server_restore_streams(Server *s, FDSet *fds) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r;
+
+ d = opendir("/run/systemd/journal/streams");
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to enumerate /run/systemd/journal/streams: %m");
+ }
+
+ FOREACH_DIRENT(de, d, goto fail) {
+ unsigned long st_dev, st_ino;
+ bool found = false;
+ Iterator i;
+ int fd;
+
+ if (sscanf(de->d_name, "%lu:%lu", &st_dev, &st_ino) != 2)
+ continue;
+
+ FDSET_FOREACH(fd, fds, i) {
+ struct stat st;
+
+ if (fstat(fd, &st) < 0)
+ return log_error_errno(errno, "Failed to stat %s: %m", de->d_name);
+
+ if (S_ISSOCK(st.st_mode) && st.st_dev == st_dev && st.st_ino == st_ino) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ /* No file descriptor? Then let's delete the state file */
+ log_debug("Cannot restore stream file %s", de->d_name);
+ unlinkat(dirfd(d), de->d_name, 0);
+ continue;
+ }
+
+ fdset_remove(fds, fd);
+
+ r = stdout_stream_restore(s, de->d_name, fd);
+ if (r < 0)
+ safe_close(fd);
+ }
+
+ return 0;
+
+fail:
+ return log_error_errno(errno, "Failed to read streams directory: %m");
+}
+
+int server_open_stdout_socket(Server *s) {
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/journal/stdout",
+ };
+ int r;
+
+ assert(s);
+
+ if (s->stdout_fd < 0) {
+ s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s->stdout_fd < 0)
+ return log_error_errno(errno, "socket() failed: %m");
+
+ (void) unlink(sa.un.sun_path);
+
+ r = bind(s->stdout_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
+ if (r < 0)
+ return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
+
+ (void) chmod(sa.un.sun_path, 0666);
+
+ if (listen(s->stdout_fd, SOMAXCONN) < 0)
+ return log_error_errno(errno, "listen(%s) failed: %m", sa.un.sun_path);
+ } else
+ fd_nonblock(s->stdout_fd, 1);
+
+ r = sd_event_add_io(s->event, &s->stdout_event_source, s->stdout_fd, EPOLLIN, stdout_stream_new, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add stdout server fd to event source: %m");
+
+ r = sd_event_source_set_priority(s->stdout_event_source, SD_EVENT_PRIORITY_NORMAL+5);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust priority of stdout server event source: %m");
+
+ return 0;
+}
+
+void stdout_stream_send_notify(StdoutStream *s) {
+ struct iovec iovec = {
+ .iov_base = (char*) "FDSTORE=1",
+ .iov_len = strlen("FDSTORE=1"),
+ };
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ };
+ struct cmsghdr *cmsg;
+ ssize_t l;
+
+ assert(s);
+ assert(!s->fdstore);
+ assert(s->in_notify_queue);
+ assert(s->server);
+ assert(s->server->notify_fd >= 0);
+
+ /* Store the connection fd in PID 1, so that we get it passed
+ * in again on next start */
+
+ msghdr.msg_controllen = CMSG_SPACE(sizeof(int));
+ msghdr.msg_control = alloca0(msghdr.msg_controllen);
+
+ cmsg = CMSG_FIRSTHDR(&msghdr);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+
+ memcpy(CMSG_DATA(cmsg), &s->fd, sizeof(int));
+
+ l = sendmsg(s->server->notify_fd, &msghdr, MSG_DONTWAIT|MSG_NOSIGNAL);
+ if (l < 0) {
+ if (errno == EAGAIN)
+ return;
+
+ log_error_errno(errno, "Failed to send stream file descriptor to service manager: %m");
+ } else {
+ log_debug("Successfully sent stream file descriptor to service manager.");
+ s->fdstore = 1;
+ }
+
+ LIST_REMOVE(stdout_stream_notify_queue, s->server->stdout_streams_notify_queue, s);
+ s->in_notify_queue = false;
+
+}
diff --git a/src/grp-journal/libjournal-core/src/journald-syslog.c b/src/grp-journal/libjournal-core/src/journald-syslog.c
new file mode 100644
index 0000000000..054a44b39f
--- /dev/null
+++ b/src/grp-journal/libjournal-core/src/journald-syslog.c
@@ -0,0 +1,454 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2011 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stddef.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+
+#include <systemd/sd-messages.h>
+
+#include "journal-core/journald-console.h"
+#include "journal-core/journald-kmsg.h"
+#include "journal-core/journald-server.h"
+#include "journal-core/journald-syslog.h"
+#include "journal-core/journald-wall.h"
+#include "systemd-basic/alloc-util.h"
+#include "systemd-basic/fd-util.h"
+#include "systemd-basic/formats-util.h"
+#include "systemd-basic/io-util.h"
+#include "systemd-basic/process-util.h"
+#include "systemd-basic/selinux-util.h"
+#include "systemd-basic/socket-util.h"
+#include "systemd-basic/stdio-util.h"
+#include "systemd-basic/string-util.h"
+#include "systemd-basic/syslog-util.h"
+
+/* Warn once every 30s if we missed syslog message */
+#define WARN_FORWARD_SYSLOG_MISSED_USEC (30 * USEC_PER_SEC)
+
+static void forward_syslog_iovec(Server *s, const struct iovec *iovec, unsigned n_iovec, const struct ucred *ucred, const struct timeval *tv) {
+
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/journal/syslog",
+ };
+ struct msghdr msghdr = {
+ .msg_iov = (struct iovec *) iovec,
+ .msg_iovlen = n_iovec,
+ .msg_name = (struct sockaddr*) &sa.sa,
+ .msg_namelen = SOCKADDR_UN_LEN(sa.un),
+ };
+ struct cmsghdr *cmsg;
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
+ } control;
+
+ assert(s);
+ assert(iovec);
+ assert(n_iovec > 0);
+
+ if (ucred) {
+ zero(control);
+ msghdr.msg_control = &control;
+ msghdr.msg_controllen = sizeof(control);
+
+ cmsg = CMSG_FIRSTHDR(&msghdr);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_CREDENTIALS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
+ memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
+ msghdr.msg_controllen = cmsg->cmsg_len;
+ }
+
+ /* Forward the syslog message we received via /dev/log to
+ * /run/systemd/syslog. Unfortunately we currently can't set
+ * the SO_TIMESTAMP auxiliary data, and hence we don't. */
+
+ if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
+ return;
+
+ /* The socket is full? I guess the syslog implementation is
+ * too slow, and we shouldn't wait for that... */
+ if (errno == EAGAIN) {
+ s->n_forward_syslog_missed++;
+ return;
+ }
+
+ if (ucred && (errno == ESRCH || errno == EPERM)) {
+ struct ucred u;
+
+ /* Hmm, presumably the sender process vanished
+ * by now, or we don't have CAP_SYS_AMDIN, so
+ * let's fix it as good as we can, and retry */
+
+ u = *ucred;
+ u.pid = getpid();
+ memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred));
+
+ if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0)
+ return;
+
+ if (errno == EAGAIN) {
+ s->n_forward_syslog_missed++;
+ return;
+ }
+ }
+
+ if (errno != ENOENT)
+ log_debug_errno(errno, "Failed to forward syslog message: %m");
+}
+
+static void forward_syslog_raw(Server *s, int priority, const char *buffer, const struct ucred *ucred, const struct timeval *tv) {
+ struct iovec iovec;
+
+ assert(s);
+ assert(buffer);
+
+ if (LOG_PRI(priority) > s->max_level_syslog)
+ return;
+
+ IOVEC_SET_STRING(iovec, buffer);
+ forward_syslog_iovec(s, &iovec, 1, ucred, tv);
+}
+
+void server_forward_syslog(Server *s, int priority, const char *identifier, const char *message, const struct ucred *ucred, const struct timeval *tv) {
+ struct iovec iovec[5];
+ char header_priority[DECIMAL_STR_MAX(priority) + 3], header_time[64],
+ header_pid[sizeof("[]: ")-1 + DECIMAL_STR_MAX(pid_t) + 1];
+ int n = 0;
+ time_t t;
+ struct tm *tm;
+ char *ident_buf = NULL;
+
+ assert(s);
+ assert(priority >= 0);
+ assert(priority <= 999);
+ assert(message);
+
+ if (LOG_PRI(priority) > s->max_level_syslog)
+ return;
+
+ /* First: priority field */
+ xsprintf(header_priority, "<%i>", priority);
+ IOVEC_SET_STRING(iovec[n++], header_priority);
+
+ /* Second: timestamp */
+ t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
+ tm = localtime(&t);
+ if (!tm)
+ return;
+ if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
+ return;
+ IOVEC_SET_STRING(iovec[n++], header_time);
+
+ /* Third: identifier and PID */
+ if (ucred) {
+ if (!identifier) {
+ get_process_comm(ucred->pid, &ident_buf);
+ identifier = ident_buf;
+ }
+
+ xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
+
+ if (identifier)
+ IOVEC_SET_STRING(iovec[n++], identifier);
+
+ IOVEC_SET_STRING(iovec[n++], header_pid);
+ } else if (identifier) {
+ IOVEC_SET_STRING(iovec[n++], identifier);
+ IOVEC_SET_STRING(iovec[n++], ": ");
+ }
+
+ /* Fourth: message */
+ IOVEC_SET_STRING(iovec[n++], message);
+
+ forward_syslog_iovec(s, iovec, n, ucred, tv);
+
+ free(ident_buf);
+}
+
+int syslog_fixup_facility(int priority) {
+
+ if ((priority & LOG_FACMASK) == 0)
+ return (priority & LOG_PRIMASK) | LOG_USER;
+
+ return priority;
+}
+
+size_t syslog_parse_identifier(const char **buf, char **identifier, char **pid) {
+ const char *p;
+ char *t;
+ size_t l, e;
+
+ assert(buf);
+ assert(identifier);
+ assert(pid);
+
+ p = *buf;
+
+ p += strspn(p, WHITESPACE);
+ l = strcspn(p, WHITESPACE);
+
+ if (l <= 0 ||
+ p[l-1] != ':')
+ return 0;
+
+ e = l;
+ l--;
+
+ if (p[l-1] == ']') {
+ size_t k = l-1;
+
+ for (;;) {
+
+ if (p[k] == '[') {
+ t = strndup(p+k+1, l-k-2);
+ if (t)
+ *pid = t;
+
+ l = k;
+ break;
+ }
+
+ if (k == 0)
+ break;
+
+ k--;
+ }
+ }
+
+ t = strndup(p, l);
+ if (t)
+ *identifier = t;
+
+ if (strchr(WHITESPACE, p[e]))
+ e++;
+ *buf = p + e;
+ return e;
+}
+
+static void syslog_skip_date(char **buf) {
+ enum {
+ LETTER,
+ SPACE,
+ NUMBER,
+ SPACE_OR_NUMBER,
+ COLON
+ } sequence[] = {
+ LETTER, LETTER, LETTER,
+ SPACE,
+ SPACE_OR_NUMBER, NUMBER,
+ SPACE,
+ SPACE_OR_NUMBER, NUMBER,
+ COLON,
+ SPACE_OR_NUMBER, NUMBER,
+ COLON,
+ SPACE_OR_NUMBER, NUMBER,
+ SPACE
+ };
+
+ char *p;
+ unsigned i;
+
+ assert(buf);
+ assert(*buf);
+
+ p = *buf;
+
+ for (i = 0; i < ELEMENTSOF(sequence); i++, p++) {
+
+ if (!*p)
+ return;
+
+ switch (sequence[i]) {
+
+ case SPACE:
+ if (*p != ' ')
+ return;
+ break;
+
+ case SPACE_OR_NUMBER:
+ if (*p == ' ')
+ break;
+
+ /* fall through */
+
+ case NUMBER:
+ if (*p < '0' || *p > '9')
+ return;
+
+ break;
+
+ case LETTER:
+ if (!(*p >= 'A' && *p <= 'Z') &&
+ !(*p >= 'a' && *p <= 'z'))
+ return;
+
+ break;
+
+ case COLON:
+ if (*p != ':')
+ return;
+ break;
+
+ }
+ }
+
+ *buf = p;
+}
+
+void server_process_syslog_message(
+ Server *s,
+ const char *buf,
+ const struct ucred *ucred,
+ const struct timeval *tv,
+ const char *label,
+ size_t label_len) {
+
+ char syslog_priority[sizeof("PRIORITY=") + DECIMAL_STR_MAX(int)],
+ syslog_facility[sizeof("SYSLOG_FACILITY=") + DECIMAL_STR_MAX(int)];
+ const char *message = NULL, *syslog_identifier = NULL, *syslog_pid = NULL;
+ struct iovec iovec[N_IOVEC_META_FIELDS + 6];
+ unsigned n = 0;
+ int priority = LOG_USER | LOG_INFO;
+ _cleanup_free_ char *identifier = NULL, *pid = NULL;
+ const char *orig;
+
+ assert(s);
+ assert(buf);
+
+ orig = buf;
+ syslog_parse_priority(&buf, &priority, true);
+
+ if (s->forward_to_syslog)
+ forward_syslog_raw(s, priority, orig, ucred, tv);
+
+ syslog_skip_date((char**) &buf);
+ syslog_parse_identifier(&buf, &identifier, &pid);
+
+ if (s->forward_to_kmsg)
+ server_forward_kmsg(s, priority, identifier, buf, ucred);
+
+ if (s->forward_to_console)
+ server_forward_console(s, priority, identifier, buf, ucred);
+
+ if (s->forward_to_wall)
+ server_forward_wall(s, priority, identifier, buf, ucred);
+
+ IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
+
+ xsprintf(syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK);
+ IOVEC_SET_STRING(iovec[n++], syslog_priority);
+
+ if (priority & LOG_FACMASK) {
+ xsprintf(syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority));
+ IOVEC_SET_STRING(iovec[n++], syslog_facility);
+ }
+
+ if (identifier) {
+ syslog_identifier = strjoina("SYSLOG_IDENTIFIER=", identifier);
+ IOVEC_SET_STRING(iovec[n++], syslog_identifier);
+ }
+
+ if (pid) {
+ syslog_pid = strjoina("SYSLOG_PID=", pid);
+ IOVEC_SET_STRING(iovec[n++], syslog_pid);
+ }
+
+ message = strjoina("MESSAGE=", buf);
+ if (message)
+ IOVEC_SET_STRING(iovec[n++], message);
+
+ server_dispatch_message(s, iovec, n, ELEMENTSOF(iovec), ucred, tv, label, label_len, NULL, priority, 0);
+}
+
+int server_open_syslog_socket(Server *s) {
+
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/journal/dev-log",
+ };
+ static const int one = 1;
+ int r;
+
+ assert(s);
+
+ if (s->syslog_fd < 0) {
+ s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (s->syslog_fd < 0)
+ return log_error_errno(errno, "socket() failed: %m");
+
+ (void) unlink(sa.un.sun_path);
+
+ r = bind(s->syslog_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
+ if (r < 0)
+ return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
+
+ (void) chmod(sa.un.sun_path, 0666);
+ } else
+ fd_nonblock(s->syslog_fd, 1);
+
+ r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
+ if (r < 0)
+ return log_error_errno(errno, "SO_PASSCRED failed: %m");
+
+#ifdef HAVE_SELINUX
+ if (mac_selinux_have()) {
+ r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
+ if (r < 0)
+ log_warning_errno(errno, "SO_PASSSEC failed: %m");
+ }
+#endif
+
+ r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, &one, sizeof(one));
+ if (r < 0)
+ return log_error_errno(errno, "SO_TIMESTAMP failed: %m");
+
+ r = sd_event_add_io(s->event, &s->syslog_event_source, s->syslog_fd, EPOLLIN, server_process_datagram, s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add syslog server fd to event loop: %m");
+
+ r = sd_event_source_set_priority(s->syslog_event_source, SD_EVENT_PRIORITY_NORMAL+5);
+ if (r < 0)
+ return log_error_errno(r, "Failed to adjust syslog event source priority: %m");
+
+ return 0;
+}
+
+void server_maybe_warn_forward_syslog_missed(Server *s) {
+ usec_t n;
+
+ assert(s);
+
+ if (s->n_forward_syslog_missed <= 0)
+ return;
+
+ n = now(CLOCK_MONOTONIC);
+ if (s->last_warn_forward_syslog_missed + WARN_FORWARD_SYSLOG_MISSED_USEC > n)
+ return;
+
+ server_driver_message(s, SD_MESSAGE_FORWARD_SYSLOG_MISSED,
+ LOG_MESSAGE("Forwarding to syslog missed %u messages.",
+ s->n_forward_syslog_missed),
+ NULL);
+
+ s->n_forward_syslog_missed = 0;
+ s->last_warn_forward_syslog_missed = n;
+}
diff --git a/src/grp-journal/libjournal-core/src/journald-wall.c b/src/grp-journal/libjournal-core/src/journald-wall.c
new file mode 100644
index 0000000000..242e69f6c3
--- /dev/null
+++ b/src/grp-journal/libjournal-core/src/journald-wall.c
@@ -0,0 +1,71 @@
+/***
+ This file is part of systemd.
+
+ Copyright 2014 Sebastian Thorarensen
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "journal-core/journald-server.h"
+#include "journal-core/journald-wall.h"
+#include "systemd-basic/alloc-util.h"
+#include "systemd-basic/formats-util.h"
+#include "systemd-basic/process-util.h"
+#include "systemd-basic/string-util.h"
+#include "systemd-shared/utmp-wtmp.h"
+
+void server_forward_wall(
+ Server *s,
+ int priority,
+ const char *identifier,
+ const char *message,
+ const struct ucred *ucred) {
+
+ _cleanup_free_ char *ident_buf = NULL, *l_buf = NULL;
+ const char *l;
+ int r;
+
+ assert(s);
+ assert(message);
+
+ if (LOG_PRI(priority) > s->max_level_wall)
+ return;
+
+ if (ucred) {
+ if (!identifier) {
+ get_process_comm(ucred->pid, &ident_buf);
+ identifier = ident_buf;
+ }
+
+ if (asprintf(&l_buf, "%s["PID_FMT"]: %s", strempty(identifier), ucred->pid, message) < 0) {
+ log_oom();
+ return;
+ }
+
+ l = l_buf;
+
+ } else if (identifier) {
+
+ l = l_buf = strjoin(identifier, ": ", message, NULL);
+ if (!l_buf) {
+ log_oom();
+ return;
+ }
+ } else
+ l = message;
+
+ r = utmp_wall(l, "systemd-journald", NULL, NULL, NULL);
+ if (r < 0)
+ log_debug_errno(r, "Failed to send wall message: %m");
+}