From 2aaec9b4f61c1c1fa3374f40f0f1e828b6c53e1e Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Thu, 10 Dec 2015 09:38:50 -0500 Subject: journal: decompress_startswith can return an error The return value was used directly in an if, so an error was treated as success; we need to bail out instead. An error should not happen, unless we have a compression/decompression mismatch, so output a debug line. --- src/journal/sd-journal.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src/journal') diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 5cde7f17f7..cd5160154a 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -1940,10 +1940,14 @@ _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void ** compression = o->object.flags & OBJECT_COMPRESSION_MASK; if (compression) { #if defined(HAVE_XZ) || defined(HAVE_LZ4) - if (decompress_startswith(compression, + r = decompress_startswith(compression, o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, - field, field_length, '=')) { + field, field_length, '='); + if (r < 0) + log_debug_errno(r, "Cannot decompress %s object of length %zu at offset "OFSfmt": %m", + object_compressed_to_string(compression), l, p); + else if (r > 0) { size_t rsize; -- cgit v1.2.3-54-g00ecf From 1f4b467daa2999ab0e4345c1f1069567852f567f Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Fri, 11 Dec 2015 09:10:33 -0500 Subject: journal: in some cases we have to decompress the full lz4 field lz4 has to decompress a whole "sequence" at a time. When the compressed data is composed of a repeating pattern, the whole set of repeats has do be docompressed, and the output buffer has to be big enough. This is unfortunate, because potentially the slowdown is very big. We are only interested in the field name, but we might have to decompress the whole thing. But the full cost will be borne out only when the full entry is a repeating pattern. In practice this shouldn't happen (apart from tests and the like). Hopefully lz4 will be fixed to avoid this problem, or it will grow a new function which we can use [1], so this fix should be remporary. [1] https://groups.google.com/d/msg/lz4c/_3kkz5N6n00/oTahzqErCgAJ --- src/journal/compress.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'src/journal') diff --git a/src/journal/compress.c b/src/journal/compress.c index 1a3d2cdd80..a3de0882a5 100644 --- a/src/journal/compress.c +++ b/src/journal/compress.c @@ -306,6 +306,7 @@ int decompress_startswith_lz4(const void *src, uint64_t src_size, * prefix */ int r; + size_t size; assert(src); assert(src_size > 0); @@ -322,10 +323,18 @@ int decompress_startswith_lz4(const void *src, uint64_t src_size, r = LZ4_decompress_safe_partial(src + 8, *buffer, src_size - 8, prefix_len + 1, *buffer_size); + if (r >= 0) + size = (unsigned) r; + else { + /* lz4 always tries to decode full "sequence", so in + * pathological cases might need to decompress the + * full field. */ + r = decompress_blob_lz4(src, src_size, buffer, buffer_size, &size, 0); + if (r < 0) + return r; + } - if (r < 0) - return -EBADMSG; - if ((unsigned) r >= prefix_len + 1) + if (size >= prefix_len + 1) return memcmp(*buffer, prefix, prefix_len) == 0 && ((const uint8_t*) *buffer)[prefix_len] == extra; else -- cgit v1.2.3-54-g00ecf From 5d6f46b6bf7804af8c1ba780f72a37dc37193428 Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Sun, 13 Dec 2015 13:39:12 -0500 Subject: journal: add dst_allocated_size parameter for compress_blob compress_blob took src, src_size, dst and *dst_size, but dst_size wasn't used as an input parameter with the size of dst, but only as an output parameter. dst was implicitly assumed to be at least src_size-1. This code wasn't *wrong*, because the only real caller in journal-file.c got it right. But it was misleading, and the tests in test-compress.c got it wrong, and worked only because the output buffer happened to be the same size as input buffer. So add a seperate dst_allocated_size parameter to make it explicit what the size of the buffer is, and to allow test to proceed with different output buffer sizes. --- src/journal/compress.c | 12 ++++++++---- src/journal/compress.h | 13 ++++++++----- src/journal/journal-file.c | 2 +- src/journal/test-compress-benchmark.c | 7 ++++--- src/journal/test-compress.c | 12 +++++------- 5 files changed, 26 insertions(+), 20 deletions(-) (limited to 'src/journal') diff --git a/src/journal/compress.c b/src/journal/compress.c index a3de0882a5..e1ab4a46f5 100644 --- a/src/journal/compress.c +++ b/src/journal/compress.c @@ -58,7 +58,8 @@ static const char* const object_compressed_table[_OBJECT_COMPRESSED_MAX] = { DEFINE_STRING_TABLE_LOOKUP(object_compressed, int); -int compress_blob_xz(const void *src, uint64_t src_size, void *dst, size_t *dst_size) { +int compress_blob_xz(const void *src, uint64_t src_size, + void *dst, size_t dst_alloc_size, size_t *dst_size) { #ifdef HAVE_XZ static const lzma_options_lzma opt = { 1u << 20u, NULL, 0, LZMA_LC_DEFAULT, LZMA_LP_DEFAULT, @@ -74,6 +75,7 @@ int compress_blob_xz(const void *src, uint64_t src_size, void *dst, size_t *dst_ assert(src); assert(src_size > 0); assert(dst); + assert(dst_alloc_size > 0); assert(dst_size); /* Returns < 0 if we couldn't compress the data or the @@ -83,7 +85,7 @@ int compress_blob_xz(const void *src, uint64_t src_size, void *dst, size_t *dst_ return -ENOBUFS; ret = lzma_stream_buffer_encode((lzma_filter*) filters, LZMA_CHECK_NONE, NULL, - src, src_size, dst, &out_pos, src_size - 1); + src, src_size, dst, &out_pos, dst_alloc_size); if (ret != LZMA_OK) return -ENOBUFS; @@ -94,13 +96,15 @@ int compress_blob_xz(const void *src, uint64_t src_size, void *dst, size_t *dst_ #endif } -int compress_blob_lz4(const void *src, uint64_t src_size, void *dst, size_t *dst_size) { +int compress_blob_lz4(const void *src, uint64_t src_size, + void *dst, size_t dst_alloc_size, size_t *dst_size) { #ifdef HAVE_LZ4 int r; assert(src); assert(src_size > 0); assert(dst); + assert(dst_alloc_size > 0); assert(dst_size); /* Returns < 0 if we couldn't compress the data or the @@ -109,7 +113,7 @@ int compress_blob_lz4(const void *src, uint64_t src_size, void *dst, size_t *dst if (src_size < 9) return -ENOBUFS; - r = LZ4_compress_limitedOutput(src, dst + 8, src_size, src_size - 8 - 1); + r = LZ4_compress_limitedOutput(src, dst + 8, src_size, (int) dst_alloc_size - 8); if (r <= 0) return -ENOBUFS; diff --git a/src/journal/compress.h b/src/journal/compress.h index 9a065eb763..758598730a 100644 --- a/src/journal/compress.h +++ b/src/journal/compress.h @@ -28,17 +28,20 @@ const char* object_compressed_to_string(int compression); int object_compressed_from_string(const char *compression); -int compress_blob_xz(const void *src, uint64_t src_size, void *dst, size_t *dst_size); -int compress_blob_lz4(const void *src, uint64_t src_size, void *dst, size_t *dst_size); +int compress_blob_xz(const void *src, uint64_t src_size, + void *dst, size_t dst_alloc_size, size_t *dst_size); +int compress_blob_lz4(const void *src, uint64_t src_size, + void *dst, size_t dst_alloc_size, size_t *dst_size); -static inline int compress_blob(const void *src, uint64_t src_size, void *dst, size_t *dst_size) { +static inline int compress_blob(const void *src, uint64_t src_size, + void *dst, size_t dst_alloc_size, size_t *dst_size) { int r; #ifdef HAVE_LZ4 - r = compress_blob_lz4(src, src_size, dst, dst_size); + r = compress_blob_lz4(src, src_size, dst, dst_alloc_size, dst_size); if (r == 0) return OBJECT_COMPRESSED_LZ4; #else - r = compress_blob_xz(src, src_size, dst, dst_size); + r = compress_blob_xz(src, src_size, dst, dst_alloc_size, dst_size); if (r == 0) return OBJECT_COMPRESSED_XZ; #endif diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index f9ff9545dd..751e1423fc 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -1085,7 +1085,7 @@ static int journal_file_append_data( if (JOURNAL_FILE_COMPRESS(f) && size >= COMPRESSION_SIZE_THRESHOLD) { size_t rsize = 0; - compression = compress_blob(data, size, o->data.payload, &rsize); + compression = compress_blob(data, size, o->data.payload, size - 1, &rsize); if (compression >= 0) { o->object.size = htole64(offsetof(Object, data.payload) + rsize); diff --git a/src/journal/test-compress-benchmark.c b/src/journal/test-compress-benchmark.c index 93ea9c6318..baed0d82a4 100644 --- a/src/journal/test-compress-benchmark.c +++ b/src/journal/test-compress-benchmark.c @@ -27,7 +27,8 @@ #include "string-util.h" #include "util.h" -typedef int (compress_t)(const void *src, uint64_t src_size, void *dst, size_t *dst_size); +typedef int (compress_t)(const void *src, uint64_t src_size, void *dst, + size_t dst_alloc_size, size_t *dst_size); typedef int (decompress_t)(const void *src, uint64_t src_size, void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max); @@ -111,8 +112,8 @@ static void test_compress_decompress(const char* label, const char* type, memzero(buf, MIN(size + 1000, MAX_SIZE)); - r = compress(text, size, buf, &j); - /* assume compression must be successful except for small inputs */ + r = compress(text, size, buf, size, &j); + /* assume compression must be successful except for small or random inputs */ assert_se(r == 0 || (size < 2048 && r == -ENOBUFS) || streq(type, "random")); /* check for overwrites */ diff --git a/src/journal/test-compress.c b/src/journal/test-compress.c index b9d90a8988..62b5a508be 100644 --- a/src/journal/test-compress.c +++ b/src/journal/test-compress.c @@ -38,7 +38,7 @@ #endif typedef int (compress_blob_t)(const void *src, uint64_t src_size, - void *dst, size_t *dst_size); + void *dst, size_t dst_alloc_size, size_t *dst_size); typedef int (decompress_blob_t)(const void *src, uint64_t src_size, void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max); @@ -57,15 +57,14 @@ static void test_compress_decompress(int compression, size_t data_len, bool may_fail) { char compressed[512]; - size_t csize = 512; - size_t usize = 0; + size_t csize, usize = 0; _cleanup_free_ char *decompressed = NULL; int r; log_info("/* testing %s %s blob compression/decompression */", object_compressed_to_string(compression), data); - r = compress(data, data_len, compressed, &csize); + r = compress(data, data_len, compressed, sizeof(compressed), &csize); if (r == -ENOBUFS) { log_info_errno(r, "compression failed: %m"); assert_se(may_fail); @@ -102,15 +101,14 @@ static void test_decompress_startswith(int compression, bool may_fail) { char compressed[512]; - size_t csize = 512; - size_t usize = 0; + size_t csize, usize = 0; _cleanup_free_ char *decompressed = NULL; int r; log_info("/* testing decompress_startswith with %s on %s text*/", object_compressed_to_string(compression), data); - r = compress(data, data_len, compressed, &csize); + r = compress(data, data_len, compressed, sizeof(compressed), &csize); if (r == -ENOBUFS) { log_info_errno(r, "compression failed: %m"); assert_se(may_fail); -- cgit v1.2.3-54-g00ecf From d487b8151319a19313786feeba3af610bfb1a45f Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Sun, 13 Dec 2015 14:20:21 -0500 Subject: journal: fix reporting of output size in compres_stream_lz4 The header is 7 bytes, and this size was not accounted for in total_out. This means that we could create a file that was 7 bytes longer than requested, and the debug output was also inconsistent. --- src/journal/compress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/journal') diff --git a/src/journal/compress.c b/src/journal/compress.c index e1ab4a46f5..1828165894 100644 --- a/src/journal/compress.c +++ b/src/journal/compress.c @@ -451,7 +451,7 @@ int compress_stream_lz4(int fdf, int fdt, uint64_t max_bytes) { _cleanup_(LZ4F_freeCompressionContextp) LZ4F_compressionContext_t ctx = NULL; _cleanup_free_ char *buf = NULL; char *src = NULL; - size_t size, n, total_in = 0, total_out = 0, offset = 0, frame_size; + size_t size, n, total_in = 0, total_out, offset = 0, frame_size; struct stat st; int r; static const LZ4F_compressOptions_t options = { @@ -474,7 +474,7 @@ int compress_stream_lz4(int fdf, int fdt, uint64_t max_bytes) { if (!buf) return -ENOMEM; - n = offset = LZ4F_compressBegin(ctx, buf, size, &preferences); + n = offset = total_out = LZ4F_compressBegin(ctx, buf, size, &preferences); if (LZ4F_isError(n)) return -EINVAL; -- cgit v1.2.3-54-g00ecf From e3cc7fc45b395416462200e61af6877966488ea0 Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Sun, 13 Dec 2015 14:24:55 -0500 Subject: journal: add "xfail" test for partial lz4 decompression Add a test that LZ4_decompress_safe_partial does (not) work as expected, so that if it starts to work at some point, we'll catch this and adjust our code. --- src/journal/test-compress.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'src/journal') diff --git a/src/journal/test-compress.c b/src/journal/test-compress.c index 62b5a508be..026fb173bc 100644 --- a/src/journal/test-compress.c +++ b/src/journal/test-compress.c @@ -17,6 +17,10 @@ along with systemd; If not, see . ***/ +#ifdef HAVE_LZ4 +#include +#endif + #include "alloc-util.h" #include "compress.h" #include "fd-util.h" @@ -197,6 +201,42 @@ static void test_compress_stream(int compression, assert_se(unlink(pattern2) == 0); } +#ifdef HAVE_LZ4 +static void test_lz4_decompress_partial(void) { + char buf[20000]; + size_t buf_size = sizeof(buf), compressed; + int r; + + char huge[4096*1024]; + memset(huge, 'x', sizeof(huge)); + memcpy(huge, "HUGE=", 5); + + r = LZ4_compress_limitedOutput(huge, buf, sizeof(huge), buf_size); + assert_se(r >= 0); + compressed = r; + log_info("Compressed %zu → %zu", sizeof(huge), compressed); + + r = LZ4_decompress_safe(buf, huge, r, sizeof(huge)); + assert_se(r >= 0); + log_info("Decompressed → %i", r); + + r = LZ4_decompress_safe_partial(buf, huge, + compressed, + 12, (int) sizeof(huge)); + assert_se(r >= 0); + log_info("Decompressed partial %i/%zu → %i", 12, sizeof(huge), r); + + /* We expect this to fail, because that's how current lz4 works. If this + * call succeeds, then lz4 has been fixed, and we need to change our code. + */ + r = LZ4_decompress_safe_partial(buf, huge, + compressed, + 12, (int) sizeof(huge) - 1); + assert_se(r < 0); + log_info("Decompressed partial %i/%zu → %i", 12, sizeof(huge), r); +} +#endif + int main(int argc, char *argv[]) { const char text[] = "text\0foofoofoofoo AAAA aaaaaaaaa ghost busters barbarbar FFF" @@ -239,6 +279,8 @@ int main(int argc, char *argv[]) { test_compress_stream(OBJECT_COMPRESSED_LZ4, "lz4cat", compress_stream_lz4, decompress_stream_lz4, argv[0]); + + test_lz4_decompress_partial(); #else log_info("/* LZ4 test skipped */"); #endif -- cgit v1.2.3-54-g00ecf From ae5e1b19e78ed85259868d12ca8042fd2e87d423 Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Sun, 13 Dec 2015 14:33:17 -0500 Subject: journal: add the "repeating sequence" test case This was the case that caused various problems that were fixed in preceding patches, so it is good to add a test that uses it directly. In "may_fail" test cases try again with a bigger buffer. Instead of allocating various buffers on the stack, malloc them. This is more reliable in case of big buffers, and allows tools like valgrind and address sanitizer to find overflows more easily. --- src/journal/test-compress.c | 89 +++++++++++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 35 deletions(-) (limited to 'src/journal') diff --git a/src/journal/test-compress.c b/src/journal/test-compress.c index 026fb173bc..68c9a4d76c 100644 --- a/src/journal/test-compress.c +++ b/src/journal/test-compress.c @@ -104,42 +104,45 @@ static void test_decompress_startswith(int compression, size_t data_len, bool may_fail) { - char compressed[512]; - size_t csize, usize = 0; - _cleanup_free_ char *decompressed = NULL; + char *compressed; + _cleanup_free_ char *compressed1 = NULL, *compressed2 = NULL, *decompressed = NULL; + size_t csize, usize = 0, len; int r; - log_info("/* testing decompress_startswith with %s on %s text*/", + log_info("/* testing decompress_startswith with %s on %.20s text*/", object_compressed_to_string(compression), data); - r = compress(data, data_len, compressed, sizeof(compressed), &csize); +#define BUFSIZE_1 512 +#define BUFSIZE_2 20000 + + compressed = compressed1 = malloc(BUFSIZE_1); + assert_se(compressed1); + r = compress(data, data_len, compressed, BUFSIZE_1, &csize); if (r == -ENOBUFS) { log_info_errno(r, "compression failed: %m"); assert_se(may_fail); - return; + + compressed = compressed2 = malloc(BUFSIZE_2); + assert_se(compressed2); + r = compress(data, data_len, compressed, BUFSIZE_2, &csize); + assert(r == 0); } assert_se(r == 0); - assert_se(decompress_sw(compressed, - csize, - (void **) &decompressed, - &usize, - data, strlen(data), '\0') > 0); - assert_se(decompress_sw(compressed, - csize, - (void **) &decompressed, - &usize, - data, strlen(data), 'w') == 0); - assert_se(decompress_sw(compressed, - csize, - (void **) &decompressed, - &usize, - "barbarbar", 9, ' ') == 0); - assert_se(decompress_sw(compressed, - csize, - (void **) &decompressed, - &usize, - data, strlen(data), '\0') > 0); + len = strlen(data); + + r = decompress_sw(compressed, csize, (void **) &decompressed, &usize, data, len, '\0'); + assert_se(r > 0); + r = decompress_sw(compressed, csize, (void **) &decompressed, &usize, data, len, 'w'); + assert_se(r == 0); + r = decompress_sw(compressed, csize, (void **) &decompressed, &usize, "barbarbar", 9, ' '); + assert_se(r == 0); + r = decompress_sw(compressed, csize, (void **) &decompressed, &usize, data, len - 1, data[len-1]); + assert_se(r > 0); + r = decompress_sw(compressed, csize, (void **) &decompressed, &usize, data, len - 1, 'w'); + assert_se(r == 0); + r = decompress_sw(compressed, csize, (void **) &decompressed, &usize, data, len, '\0'); + assert_se(r > 0); } static void test_compress_stream(int compression, @@ -206,34 +209,36 @@ static void test_lz4_decompress_partial(void) { char buf[20000]; size_t buf_size = sizeof(buf), compressed; int r; + _cleanup_free_ char *huge = NULL; - char huge[4096*1024]; - memset(huge, 'x', sizeof(huge)); +#define HUGE_SIZE (4096*1024) + huge = malloc(HUGE_SIZE); + memset(huge, 'x', HUGE_SIZE); memcpy(huge, "HUGE=", 5); - r = LZ4_compress_limitedOutput(huge, buf, sizeof(huge), buf_size); + r = LZ4_compress_limitedOutput(huge, buf, HUGE_SIZE, buf_size); assert_se(r >= 0); compressed = r; - log_info("Compressed %zu → %zu", sizeof(huge), compressed); + log_info("Compressed %i → %zu", HUGE_SIZE, compressed); - r = LZ4_decompress_safe(buf, huge, r, sizeof(huge)); + r = LZ4_decompress_safe(buf, huge, r, HUGE_SIZE); assert_se(r >= 0); log_info("Decompressed → %i", r); r = LZ4_decompress_safe_partial(buf, huge, compressed, - 12, (int) sizeof(huge)); + 12, HUGE_SIZE); assert_se(r >= 0); - log_info("Decompressed partial %i/%zu → %i", 12, sizeof(huge), r); + log_info("Decompressed partial %i/%i → %i", 12, HUGE_SIZE, r); /* We expect this to fail, because that's how current lz4 works. If this * call succeeds, then lz4 has been fixed, and we need to change our code. */ r = LZ4_decompress_safe_partial(buf, huge, compressed, - 12, (int) sizeof(huge) - 1); + 12, HUGE_SIZE-1); assert_se(r < 0); - log_info("Decompressed partial %i/%zu → %i", 12, sizeof(huge), r); + log_info("Decompressed partial %i/%i → %i", 12, HUGE_SIZE-1, r); } #endif @@ -244,6 +249,11 @@ int main(int argc, char *argv[]) { char data[512] = "random\0"; + char huge[4096*1024]; + memset(huge, 'x', sizeof(huge)); + memcpy(huge, "HUGE=", 5); + char_array_0(huge); + log_set_max_level(LOG_DEBUG); random_bytes(data + 7, sizeof(data) - 7); @@ -253,12 +263,17 @@ int main(int argc, char *argv[]) { text, sizeof(text), false); test_compress_decompress(OBJECT_COMPRESSED_XZ, compress_blob_xz, decompress_blob_xz, data, sizeof(data), true); + test_decompress_startswith(OBJECT_COMPRESSED_XZ, compress_blob_xz, decompress_startswith_xz, text, sizeof(text), false); test_decompress_startswith(OBJECT_COMPRESSED_XZ, compress_blob_xz, decompress_startswith_xz, data, sizeof(data), true); + test_decompress_startswith(OBJECT_COMPRESSED_XZ, + compress_blob_xz, decompress_startswith_xz, + huge, sizeof(huge), true); + test_compress_stream(OBJECT_COMPRESSED_XZ, "xzcat", compress_stream_xz, decompress_stream_xz, argv[0]); #else @@ -270,12 +285,16 @@ int main(int argc, char *argv[]) { text, sizeof(text), false); test_compress_decompress(OBJECT_COMPRESSED_LZ4, compress_blob_lz4, decompress_blob_lz4, data, sizeof(data), true); + test_decompress_startswith(OBJECT_COMPRESSED_LZ4, compress_blob_lz4, decompress_startswith_lz4, text, sizeof(text), false); test_decompress_startswith(OBJECT_COMPRESSED_LZ4, compress_blob_lz4, decompress_startswith_lz4, data, sizeof(data), true); + test_decompress_startswith(OBJECT_COMPRESSED_LZ4, + compress_blob_lz4, decompress_startswith_lz4, + huge, sizeof(huge), true); test_compress_stream(OBJECT_COMPRESSED_LZ4, "lz4cat", compress_stream_lz4, decompress_stream_lz4, argv[0]); -- cgit v1.2.3-54-g00ecf