From 92e92d71faea0f107312f296b7756cc04281ba99 Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Sun, 26 Feb 2017 16:46:23 -0500 Subject: coredump: process special crashes in an (almost) normal way We would only log a terse message when pid1 or systemd-journald crashed. It seems better to reuse the normal code paths as much as possible, with the following differences: - if pid1 crashes, we cannot launch the helper, so we don't analyze the coredump, just write it to file directly from the helper invoked by the kernel; - if journald crashes, we can produce the backtrace, but we don't log full structured messages. With comparison to previous code, advantages are: - we go through most of the steps, so for example vacuuming is performed, - we gather and log more data. In particular for journald and pid1 crashes we generate a backtrace, and for pid1 crashes we record the metadata (fdinfo, maps, etc.), - coredumpctl shows pid1 crashes. A disavantage (inefficiency) is that we gather metadata for journald crashes which is then ignored because _TRANSPORT=kernel does not support structued messages. Messages for the systemd-journald "crash" have _TRANSPORT=kernel, and _TRANSPORT=journal for the pid1 "crash". Feb 26 16:27:55 systemd[1]: systemd-journald.service: Main process exited, code=dumped, status=11/SEGV Feb 26 16:27:55 systemd[1]: systemd-journald.service: Unit entered failed state. Feb 26 16:37:54 systemd-coredump[18801]: Process 18729 (systemd-journal) of user 0 dumped core. Feb 26 16:37:54 systemd-coredump[18801]: Coredump diverted to /var/lib/systemd/coredump/core.systemd-journal.0.36c14bf3c6ce4c38914f441038990979.18729.1488145074000000.lz4 Feb 26 16:37:54 systemd-coredump[18801]: Stack trace of thread 18729: Feb 26 16:37:54 systemd-coredump[18801]: #0 0x00007f46d6a06b8d fsync (libpthread.so.0) Feb 26 16:37:54 systemd-coredump[18801]: #1 0x00007f46d71bfc47 journal_file_set_online (libsystemd-shared-233.so) Feb 26 16:37:54 systemd-coredump[18801]: #2 0x00007f46d71c1c31 journal_file_append_object (libsystemd-shared-233.so) Feb 26 16:37:54 systemd-coredump[18801]: #3 0x00007f46d71c3405 journal_file_append_data (libsystemd-shared-233.so) Feb 26 16:37:54 systemd-coredump[18801]: #4 0x00007f46d71c4b7c journal_file_append_entry (libsystemd-shared-233.so) Feb 26 16:37:54 systemd-coredump[18801]: #5 0x00005577688cf056 write_to_journal (systemd-journald) Feb 26 16:37:54 systemd-coredump[18801]: #6 0x00005577688d2e98 dispatch_message_real (systemd-journald) Feb 26 16:37:54 kernel: systemd-coredum: 9 output lines suppressed due to ratelimiting Feb 26 16:37:54 systemd-journald[18810]: Journal started Feb 26 16:50:59 systemd-coredump[19229]: Due to PID 1 having crashed coredump collection will now be turned off. Feb 26 16:51:00 systemd[1]: Caught , dumped core as pid 19228. Feb 26 16:51:00 systemd[1]: Freezing execution. Feb 26 16:51:00 systemd-coredump[19229]: Process 19228 (systemd) of user 0 dumped core. Stack trace of thread 19228: #0 0x00007fab82075c47 kill (libc.so.6) #1 0x000055fdf7c38b6b crash (systemd) #2 0x00007fab824175c0 __restore_rt (libpthread.so.0) #3 0x00007fab82148573 epoll_wait (libc.so.6) #4 0x00007fab8366f84a sd_event_wait (libsystemd-shared-233.so) #5 0x00007fab836701de sd_event_run (libsystemd-shared-233.so) #6 0x000055fdf7c4a380 manager_loop (systemd) #7 0x000055fdf7c402c2 main (systemd) #8 0x00007fab82060401 __libc_start_main (libc.so.6) #9 0x000055fdf7c3818a _start (systemd) Poor machine ;) --- src/coredump/coredump.c | 124 ++++++++++++++++++++++-------------------------- 1 file changed, 57 insertions(+), 67 deletions(-) (limited to 'src') diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c index 9e7206cc6a..270af630cf 100644 --- a/src/coredump/coredump.c +++ b/src/coredump/coredump.c @@ -102,6 +102,7 @@ enum { CONTEXT_RLIMIT, CONTEXT_COMM, CONTEXT_EXE, + CONTEXT_UNIT, _CONTEXT_MAX }; @@ -706,7 +707,7 @@ static int submit_coredump( _cleanup_close_ int coredump_fd = -1, coredump_node_fd = -1; _cleanup_free_ char *core_message = NULL, *filename = NULL, *coredump_data = NULL; uint64_t coredump_size = UINT64_MAX; - bool truncated = false; + bool truncated = false, journald_crash; int r; assert(context); @@ -714,6 +715,8 @@ static int submit_coredump( assert(n_iovec_allocated >= n_iovec + SUBMIT_COREDUMP_FIELDS); assert(input_fd >= 0); + journald_crash = streq_ptr(context[CONTEXT_UNIT], SPECIAL_JOURNALD_SERVICE); + /* Vacuum before we write anything again */ (void) coredump_vacuum(-1, arg_keep_free, arg_max_use); @@ -758,8 +761,10 @@ static int submit_coredump( if (r >= 0) core_message = strjoin("MESSAGE=Process ", context[CONTEXT_PID], " (", context[CONTEXT_COMM], ") of user ", - context[CONTEXT_UID], " dumped core.\n\n", - stacktrace); + context[CONTEXT_UID], " dumped core.", + journald_crash ? "\nCoredump diverted to " : "", + journald_crash ? filename : "", + "\n\n", stacktrace); else if (r == -EINVAL) log_warning("Failed to generate stack trace: %s", dwfl_errmsg(dwfl_errno())); else @@ -771,9 +776,21 @@ static int submit_coredump( if (!core_message) #endif log: - core_message = strjoin("MESSAGE=Process ", context[CONTEXT_PID], " (", - context[CONTEXT_COMM], ") of user ", - context[CONTEXT_UID], " dumped core."); + core_message = strjoin("MESSAGE=Process ", context[CONTEXT_PID], + " (", context[CONTEXT_COMM], ") of user ", + context[CONTEXT_UID], " dumped core.", + journald_crash ? "\nCoredump diverted to " : NULL, + journald_crash ? filename : NULL); + if (!core_message) + return log_oom(); + + if (journald_crash) { + /* We cannot log to the journal, so just print the MESSAGE. + * The target was set previously to something safe. */ + log_struct(LOG_ERR, core_message, NULL); + return 0; + } + if (core_message) IOVEC_SET_STRING(iovec[n_iovec++], core_message); @@ -810,15 +827,15 @@ log: static void map_context_fields(const struct iovec *iovec, const char* context[]) { - static const char * const context_field_names[_CONTEXT_MAX] = { + static const char * const context_field_names[] = { [CONTEXT_PID] = "COREDUMP_PID=", [CONTEXT_UID] = "COREDUMP_UID=", [CONTEXT_GID] = "COREDUMP_GID=", [CONTEXT_SIGNAL] = "COREDUMP_SIGNAL=", [CONTEXT_TIMESTAMP] = "COREDUMP_TIMESTAMP=", + [CONTEXT_RLIMIT] = "COREDUMP_RLIMIT=", [CONTEXT_COMM] = "COREDUMP_COMM=", [CONTEXT_EXE] = "COREDUMP_EXE=", - [CONTEXT_RLIMIT] = "COREDUMP_RLIMIT=", }; unsigned i; @@ -826,9 +843,12 @@ static void map_context_fields(const struct iovec *iovec, const char* context[]) assert(iovec); assert(context); - for (i = 0; i < _CONTEXT_MAX; i++) { + for (i = 0; i < ELEMENTSOF(context_field_names); i++) { size_t l; + if (!context_field_names[i]) + continue; + l = strlen(context_field_names[i]); if (iovec->iov_len < l) continue; @@ -1033,35 +1053,6 @@ static int send_iovec(const struct iovec iovec[], size_t n_iovec, int input_fd) return 0; } -static int process_special_crash(const char *context[], int input_fd) { - _cleanup_close_ int coredump_fd = -1, coredump_node_fd = -1; - _cleanup_free_ char *filename = NULL; - uint64_t coredump_size; - bool truncated; - int r; - - assert(context); - assert(input_fd >= 0); - - /* If we are pid1 or journald, we cut things short, don't write to the journal, but still create a coredump. */ - - if (arg_storage != COREDUMP_STORAGE_NONE) - arg_storage = COREDUMP_STORAGE_EXTERNAL; - - r = save_external_coredump(context, input_fd, - &filename, &coredump_node_fd, &coredump_fd, &coredump_size, &truncated); - if (r < 0) - return r; - - r = maybe_remove_external_coredump(filename, coredump_size); - if (r < 0) - return r; - - log_notice("Detected coredump of the journal daemon or PID 1, diverted to %s.", filename); - - return 0; -} - static char* set_iovec_field(struct iovec iovec[27], size_t *n_iovec, const char *field, const char *value) { char *x; @@ -1079,7 +1070,7 @@ static char* set_iovec_field_free(struct iovec iovec[27], size_t *n_iovec, const return x; } -static int gather_pid_metadata_and_process_special_crash( +static int gather_pid_metadata( char* context[_CONTEXT_MAX], char **comm_fallback, struct iovec *iovec, size_t *n_iovec) { @@ -1111,32 +1102,22 @@ static int gather_pid_metadata_and_process_special_crash( if (r < 0) log_warning_errno(r, "Failed to get EXE, ignoring: %m"); - if (cg_pid_get_unit(pid, &t) >= 0) { + if (cg_pid_get_unit(pid, &context[CONTEXT_UNIT]) >= 0) { + if (!streq(context[CONTEXT_UNIT], SPECIAL_JOURNALD_SERVICE)) { + /* OK, now we know it's not the journal, hence we can make use of it now. */ + log_set_target(LOG_TARGET_JOURNAL_OR_KMSG); + log_open(); + } /* If this is PID 1 disable coredump collection, we'll unlikely be able to process it later on. */ - if (streq(t, SPECIAL_INIT_SCOPE)) { + if (streq(context[CONTEXT_UNIT], SPECIAL_INIT_SCOPE)) { log_notice("Due to PID 1 having crashed coredump collection will now be turned off."); (void) write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", 0); } - /* Let's avoid dead-locks when processing journald and init crashes, as socket activation and logging - * are unlikely to work then. */ - if (STR_IN_SET(t, SPECIAL_JOURNALD_SERVICE, SPECIAL_INIT_SCOPE)) { - free(t); - r = process_special_crash((const char**) context, STDIN_FILENO); - if (r < 0) - return r; - - return 1; /* > 0 means: we have already processed it, because it's a special crash */ - } - - set_iovec_field_free(iovec, n_iovec, "COREDUMP_UNIT=", t); + set_iovec_field(iovec, n_iovec, "COREDUMP_UNIT=", context[CONTEXT_UNIT]); } - /* OK, now we know it's not the journal, hence we can make use of it now. */ - log_set_target(LOG_TARGET_JOURNAL_OR_KMSG); - log_open(); - if (cg_pid_get_user_unit(pid, &t) >= 0) set_iovec_field_free(iovec, n_iovec, "COREDUMP_USER_UNIT=", t); @@ -1230,13 +1211,13 @@ static int gather_pid_metadata_and_process_special_crash( if (safe_atoi(context[CONTEXT_SIGNAL], &signo) >= 0 && SIGNAL_VALID(signo)) set_iovec_field(iovec, n_iovec, "COREDUMP_SIGNAL_NAME=SIG", signal_to_string(signo)); - return 0; /* == 0 means: we successfully acquired all metadata */ + return 0; /* we successfully acquired all metadata */ } static int process_kernel(int argc, char* argv[]) { char* context[_CONTEXT_MAX] = {}; - struct iovec iovec[28]; + struct iovec iovec[28 + SUBMIT_COREDUMP_FIELDS]; size_t i, n_iovec, n_to_free = 0; int r; @@ -1254,9 +1235,8 @@ static int process_kernel(int argc, char* argv[]) { context[CONTEXT_TIMESTAMP] = argv[1 + CONTEXT_TIMESTAMP]; context[CONTEXT_RLIMIT] = argv[1 + CONTEXT_RLIMIT]; - r = gather_pid_metadata_and_process_special_crash(context, argv + 1 + CONTEXT_COMM, iovec, &n_to_free); - if (r != 0) - /* Error, or a a special crash, which has already been processed. */ + r = gather_pid_metadata(context, argv + 1 + CONTEXT_COMM, iovec, &n_to_free); + if (r < 0) goto finish; n_iovec = n_to_free; @@ -1268,15 +1248,23 @@ static int process_kernel(int argc, char* argv[]) { assert(n_iovec <= ELEMENTSOF(iovec)); - r = send_iovec(iovec, n_iovec, STDIN_FILENO); + if (STRPTR_IN_SET(context[CONTEXT_UNIT], + SPECIAL_JOURNALD_SERVICE, + SPECIAL_INIT_SCOPE)) + r = submit_coredump((const char**) context, + iovec, ELEMENTSOF(iovec), n_iovec, + STDIN_FILENO); + else + r = send_iovec(iovec, n_iovec, STDIN_FILENO); finish: for (i = 0; i < n_to_free; i++) free(iovec[i].iov_base); - /* Those fields are allocated by gather_pid_metadata_and_process_special_crash */ + /* Those fields are allocated by gather_pid_metadata */ free(context[CONTEXT_COMM]); free(context[CONTEXT_EXE]); + free(context[CONTEXT_UNIT]); return r; } @@ -1305,12 +1293,13 @@ static int process_backtrace(int argc, char *argv[]) { context[CONTEXT_TIMESTAMP] = argv[2 + CONTEXT_TIMESTAMP]; context[CONTEXT_RLIMIT] = argv[2 + CONTEXT_RLIMIT]; - n_allocated = 33; /* 25 metadata, 2 static, +unknown input, rounded up */ + n_allocated = 33 + COREDUMP_STORAGE_EXTERNAL; + /* 25 metadata, 2 static, +unknown input, 4 storage, rounded up */ iovec = new(struct iovec, n_allocated); if (!iovec) return log_oom(); - r = gather_pid_metadata_and_process_special_crash(context, argv + 2 + CONTEXT_COMM, iovec, &n_to_free); + r = gather_pid_metadata(context, argv + 2 + CONTEXT_COMM, iovec, &n_to_free); if (r < 0) goto finish; if (r > 0) { @@ -1364,9 +1353,10 @@ static int process_backtrace(int argc, char *argv[]) { for (i = 0; i < n_to_free; i++) free(iovec[i].iov_base); - /* Those fields are allocated by gather_pid_metadata_and_process_special_crash */ + /* Those fields are allocated by gather_pid_metadata */ free(context[CONTEXT_COMM]); free(context[CONTEXT_EXE]); + free(context[CONTEXT_UNIT]); return r; } -- cgit v1.2.3-54-g00ecf