From 4b5bc5396c090ee41c45cab9052372d296c4a2f4 Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Sat, 6 Dec 2014 21:33:27 -0500 Subject: coredump: use lz4frame api to compress coredumps This converts the stream compression to use the new lz4frame api, compatible with lz4cat. Previous code used custom headers, so the compressed file was not compatible with lz4 command line tools. I considered this the last blocker to using lz4 by default. Speed seems to be reasonable, although a bit (a few percent) slower than the lz4 binary, even though compression is the same. I don't consider this important. It could be caused by the overhead of library calls, but is probably caused by slightly different buffer sizes or such. The code in this patch uses mmap, since since this allows the buffer to be reused while not making the code more complicated at all. In my testing, this version is noticably faster (~20%) than a naive single-buffered version. mmap can cause the program to be killed with SIGBUS, if the underlying file is truncated or a disk error occurs. We only use this from within coredump and coredumpctl, so I don't consider this an issue. Old decompression code is retained and is used if the new code fails indicating a format error. There have been reports of various smaller distributions using previous lz4 code, i.e. the old format, and it is nice to provide backwards compatibility. We can remove the legacy code in a few versions. The way that blobs are compressed in the journal is not affected. --- src/journal/compress.c | 201 ++++++++++++++++++++++++++++++++------------ src/journal/test-compress.c | 9 +- 2 files changed, 150 insertions(+), 60 deletions(-) (limited to 'src') diff --git a/src/journal/compress.c b/src/journal/compress.c index c66043e503..8e8403f6f8 100644 --- a/src/journal/compress.c +++ b/src/journal/compress.c @@ -22,6 +22,7 @@ #include #include #include +#include #ifdef HAVE_XZ # include @@ -29,6 +30,7 @@ #ifdef HAVE_LZ4 # include +# include #endif #include "compress.h" @@ -37,6 +39,11 @@ #include "sparse-endian.h" #include "journal-def.h" +#ifdef HAVE_LZ4 +DEFINE_TRIVIAL_CLEANUP_FUNC(LZ4F_compressionContext_t, LZ4F_freeCompressionContext); +DEFINE_TRIVIAL_CLEANUP_FUNC(LZ4F_decompressionContext_t, LZ4F_freeDecompressionContext); +#endif + #define ALIGN_8(l) ALIGN_TO(l, sizeof(size_t)) static const char* const object_compressed_table[_OBJECT_COMPRESSED_MAX] = { @@ -416,81 +423,99 @@ int compress_stream_xz(int fdf, int fdt, uint64_t max_bytes) { #endif } -#define LZ4_BUFSIZE (512*1024) +#define LZ4_BUFSIZE (512*1024u) int compress_stream_lz4(int fdf, int fdt, uint64_t max_bytes) { #ifdef HAVE_LZ4 + LZ4F_errorCode_t c; + _cleanup_(LZ4F_freeCompressionContextp) LZ4F_compressionContext_t ctx = NULL; + _cleanup_free_ char *buf = NULL; + char *src = NULL; + size_t size, n, total_in = 0, total_out = 0, offset = 0, frame_size; + struct stat st; + int r; + static const LZ4F_compressOptions_t options = { + .stableSrc = 1, + }; + static const LZ4F_preferences_t preferences = { + .frameInfo.blockSizeID = 5, + }; - _cleanup_free_ char *buf1 = NULL, *buf2 = NULL, *out = NULL; - char *buf; - LZ4_stream_t lz4_data = {}; - le32_t header; - size_t total_in = 0, total_out = sizeof(header); - ssize_t n; + c = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION); + if (LZ4F_isError(c)) + return -ENOMEM; - assert(fdf >= 0); - assert(fdt >= 0); + if (fstat(fdf, &st) < 0) + return log_error_errno(errno, "fstat() failed: %m"); - buf1 = malloc(LZ4_BUFSIZE); - buf2 = malloc(LZ4_BUFSIZE); - out = malloc(LZ4_COMPRESSBOUND(LZ4_BUFSIZE)); - if (!buf1 || !buf2 || !out) - return log_oom(); + frame_size = LZ4F_compressBound(LZ4_BUFSIZE, &preferences); + size = frame_size + 64*1024; /* add some space for header and trailer */ + buf = malloc(size); + if (!buf) + return -ENOMEM; - buf = buf1; - for (;;) { - size_t m; - int r; + n = offset = LZ4F_compressBegin(ctx, buf, size, &preferences); + if (LZ4F_isError(n)) + return -EINVAL; - m = LZ4_BUFSIZE; - if (max_bytes != (uint64_t) -1 && (uint64_t) m > (max_bytes - total_in)) - m = (size_t) (max_bytes - total_in); + src = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fdf, 0); + if (!src) + return -errno; - n = read(fdf, buf, m); - if (n < 0) - return -errno; - if (n == 0) - break; + log_debug("Buffer size is %zu bytes, header size %zu bytes.", size, n); - total_in += n; + while (total_in < (size_t) st.st_size) { + ssize_t k; - r = LZ4_compress_continue(&lz4_data, buf, out, n); - if (r == 0) { - log_error("LZ4 compression failed."); - return -EBADMSG; + k = MIN(LZ4_BUFSIZE, st.st_size - total_in); + n = LZ4F_compressUpdate(ctx, buf + offset, size - offset, + src + total_in, k, &options); + if (LZ4F_isError(n)) { + r = -ENOTRECOVERABLE; + goto cleanup; } - header = htole32(r); - errno = 0; + total_in += k; + offset += n; + total_out += n; - n = write(fdt, &header, sizeof(header)); - if (n < 0) - return -errno; - if (n != sizeof(header)) - return errno ? -errno : -EIO; + if (max_bytes != (uint64_t) -1 && total_out > (size_t) max_bytes) { + log_debug("Compressed stream longer than %zd bytes", max_bytes); + return -EFBIG; + } - n = loop_write(fdt, out, r, false); - if (n < 0) - return n; + if (size - offset < frame_size + 4) { + log_debug("Writing %zu bytes", offset); - total_out += sizeof(header) + r; + k = loop_write(fdt, buf, offset, false); + if (k < 0) { + r = k; + goto cleanup; + } + offset = 0; + } + } - buf = buf == buf1 ? buf2 : buf1; + n = LZ4F_compressEnd(ctx, buf + offset, size - offset, &options); + if (LZ4F_isError(n)) { + r = -ENOTRECOVERABLE; + goto cleanup; } - header = htole32(0); - n = write(fdt, &header, sizeof(header)); - if (n < 0) - return -errno; - if (n != sizeof(header)) - return errno ? -errno : -EIO; + offset += n; + total_out += n; + log_debug("Writing %zu bytes", offset); + r = loop_write(fdt, buf, offset, false); + if (r < 0) + goto cleanup; log_debug("LZ4 compression finished (%zu -> %zu bytes, %.1f%%)", total_in, total_out, (double) total_out / total_in * 100); - - return 0; + cleanup: + munmap(src, st.st_size); + return r; #else return -EPROTONOSUPPORT; #endif @@ -571,9 +596,9 @@ int decompress_stream_xz(int fdf, int fdt, uint64_t max_bytes) { #endif } -int decompress_stream_lz4(int fdf, int fdt, uint64_t max_bytes) { - #ifdef HAVE_LZ4 +static int decompress_stream_lz4_v1(int fdf, int fdt, uint64_t max_bytes) { + _cleanup_free_ char *buf = NULL, *out = NULL; size_t buf_size = 0; LZ4_streamDecode_t lz4_data = {}; @@ -621,7 +646,7 @@ int decompress_stream_lz4(int fdf, int fdt, uint64_t max_bytes) { r = LZ4_decompress_safe_continue(&lz4_data, buf, out, m, 4*LZ4_BUFSIZE); if (r <= 0) - log_error("LZ4 decompression failed."); + log_error("LZ4 decompression failed (legacy format)."); total_out += r; @@ -635,11 +660,77 @@ int decompress_stream_lz4(int fdf, int fdt, uint64_t max_bytes) { return r; } - log_debug("LZ4 decompression finished (%zu -> %zu bytes, %.1f%%)", + log_debug("LZ4 decompression finished (legacy format, %zu -> %zu bytes, %.1f%%)", total_in, total_out, (double) total_out / total_in * 100); return 0; +} + +static int decompress_stream_lz4_v2(int in, int out, uint64_t max_bytes) { + size_t c; + _cleanup_(LZ4F_freeDecompressionContextp) LZ4F_decompressionContext_t ctx = NULL; + _cleanup_free_ char *buf = NULL; + char *src; + struct stat st; + int r = 0; + size_t total_in = 0, total_out = 0; + + c = LZ4F_createDecompressionContext(&ctx, LZ4F_VERSION); + if (LZ4F_isError(c)) + return -ENOMEM; + + if (fstat(in, &st) < 0) + return log_error_errno(errno, "fstat() failed: %m"); + + buf = malloc(LZ4_BUFSIZE); + if (!buf) + return -ENOMEM; + + src = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, in, 0); + if (!src) + return -errno; + + while (total_in < (size_t) st.st_size) { + size_t produced = LZ4_BUFSIZE; + size_t used = st.st_size - total_in; + + c = LZ4F_decompress(ctx, buf, &produced, src + total_in, &used, NULL); + if (LZ4F_isError(c)) { + r = -EBADMSG; + goto cleanup; + } + + total_in += used; + total_out += produced; + + if (max_bytes != (uint64_t) -1 && total_out > (size_t) max_bytes) { + r = log_debug_errno(EFBIG, "Decompressed stream longer than %zd bytes", max_bytes); + goto cleanup; + } + + r = loop_write(out, buf, produced, false); + if (r < 0) + goto cleanup; + } + + log_debug("LZ4 decompression finished (%zu -> %zu bytes, %.1f%%)", + total_in, total_out, + (double) total_out / total_in * 100); + cleanup: + munmap(src, st.st_size); + return r; +} +#endif + +int decompress_stream_lz4(int fdf, int fdt, uint64_t max_bytes) { +#ifdef HAVE_LZ4 + int r; + + r = decompress_stream_lz4_v2(fdf, fdt, max_bytes); + if (r == -EBADMSG) + r = decompress_stream_lz4_v1(fdf, fdt, max_bytes); + return r; #else log_error("Cannot decompress file. Compiled without LZ4 support."); return -EPROTONOSUPPORT; diff --git a/src/journal/test-compress.c b/src/journal/test-compress.c index f17c00e60d..e51b12407f 100644 --- a/src/journal/test-compress.c +++ b/src/journal/test-compress.c @@ -144,8 +144,8 @@ static void test_compress_stream(int compression, const char *srcfile) { _cleanup_close_ int src = -1, dst = -1, dst2 = -1; - char pattern[] = "/tmp/systemd-test.xz.XXXXXX", - pattern2[] = "/tmp/systemd-test.xz.XXXXXX"; + char pattern[] = "/tmp/systemd-test.compressed.XXXXXX", + pattern2[] = "/tmp/systemd-test.compressed.XXXXXX"; int r; _cleanup_free_ char *cmd = NULL, *cmd2; struct stat st = {}; @@ -185,7 +185,7 @@ static void test_compress_stream(int compression, assert_se(lseek(dst, 1, SEEK_SET) == 1); r = decompress(dst, dst2, st.st_size); - assert_se(r == -EBADMSG); + assert_se(r == -EBADMSG || r == 0); assert_se(lseek(dst, 0, SEEK_SET) == 0); assert_se(lseek(dst2, 0, SEEK_SET) == 0); @@ -236,8 +236,7 @@ int main(int argc, char *argv[]) { compress_blob_lz4, decompress_startswith_lz4, data, sizeof(data), true); - /* Produced stream is not compatible with lz4 binary, skip lz4cat check. */ - test_compress_stream(OBJECT_COMPRESSED_LZ4, NULL, + test_compress_stream(OBJECT_COMPRESSED_LZ4, "lz4cat", compress_stream_lz4, decompress_stream_lz4, argv[0]); #else log_info("/* LZ4 test skipped */"); -- cgit v1.2.3-54-g00ecf