summaryrefslogtreecommitdiff
path: root/src/journal-remote
diff options
context:
space:
mode:
authorKlearchos Chaloulos <klearchos.chaloulos@nokia.com>2016-04-05 13:47:04 +0300
committerKlearchos Chaloulos <klearchos.chaloulos@nokia.com>2016-04-05 17:37:00 +0300
commitd79ca7a622abbb0df6f5166cc0e4669373d9a614 (patch)
treef49167fcfbbdb02c267b0026e1c1022fa4fab7ae /src/journal-remote
parent050d7e19983e6123cba650907d7d33acf2640956 (diff)
journal-upload: Update watchdog while in curl_easy_perform
It is observed that a combination of high log throughput, low I/O speed on journal remote side and many nodes uploading simultaneously caused the journal-upload process to dump core because of watchdog starvation. This is caused because journal-upload stays in curl_easy_perform(), because it cannot upload fast enough to reach the end of the journal. Currently journal-upload will return from curl_easy_perform() only when the end of the journal is reached. Therefore a check is added in journal_input_callback(), which will update the watchdog if the elapsed time since the start of the uploading process is greater than WATCHDOG_USEC/2.
Diffstat (limited to 'src/journal-remote')
-rw-r--r--src/journal-remote/journal-upload-journal.c25
-rw-r--r--src/journal-remote/journal-upload.c1
-rw-r--r--src/journal-remote/journal-upload.h1
3 files changed, 27 insertions, 0 deletions
diff --git a/src/journal-remote/journal-upload-journal.c b/src/journal-remote/journal-upload-journal.c
index e61b6bc68f..ac6eb58a9f 100644
--- a/src/journal-remote/journal-upload-journal.c
+++ b/src/journal-remote/journal-upload-journal.c
@@ -25,6 +25,7 @@
#include "log.h"
#include "utf8.h"
#include "util.h"
+#include "sd-daemon.h"
/**
* Write up to size bytes to buf. Return negative on error, and number of
@@ -242,6 +243,28 @@ static ssize_t write_entry(char *buf, size_t size, Uploader *u) {
assert_not_reached("WTF?");
}
+static inline void check_update_watchdog(Uploader *u) {
+ usec_t watchdog_usec;
+ static usec_t before;
+ usec_t after;
+ usec_t elapsed_time;
+
+ if (sd_watchdog_enabled(false, &watchdog_usec) < 0)
+ return;
+ if (u->reset_reference_timestamp) {
+ before = now(CLOCK_MONOTONIC);
+ u->reset_reference_timestamp = false;
+ } else {
+ after = now(CLOCK_MONOTONIC);
+ elapsed_time = usec_sub(after, before);
+ if (elapsed_time > watchdog_usec / 2) {
+ log_debug("Update watchdog timer");
+ sd_notify(false, "WATCHDOG=1");
+ u->reset_reference_timestamp = true;
+ }
+ }
+}
+
static size_t journal_input_callback(void *buf, size_t size, size_t nmemb, void *userp) {
Uploader *u = userp;
int r;
@@ -252,6 +275,8 @@ static size_t journal_input_callback(void *buf, size_t size, size_t nmemb, void
assert(u);
assert(nmemb <= SSIZE_MAX / size);
+ check_update_watchdog(u);
+
j = u->journal;
while (j && filled < size * nmemb) {
diff --git a/src/journal-remote/journal-upload.c b/src/journal-remote/journal-upload.c
index 6e1c3bb9ef..f2e9117f9f 100644
--- a/src/journal-remote/journal-upload.c
+++ b/src/journal-remote/journal-upload.c
@@ -494,6 +494,7 @@ static int perform_upload(Uploader *u) {
assert(u);
+ u->reset_reference_timestamp = true;
code = curl_easy_perform(u->easy);
if (code) {
if (u->error[0])
diff --git a/src/journal-remote/journal-upload.h b/src/journal-remote/journal-upload.h
index b8cd04d527..a31735bd08 100644
--- a/src/journal-remote/journal-upload.h
+++ b/src/journal-remote/journal-upload.h
@@ -48,6 +48,7 @@ typedef struct Uploader {
size_t entries_sent;
char *last_cursor, *current_cursor;
+ bool reset_reference_timestamp;
} Uploader;
#define JOURNAL_UPLOAD_POLL_TIMEOUT (10 * USEC_PER_SEC)