diff options
| author | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2012-07-13 01:07:41 +0200 | 
|---|---|---|
| committer | Lennart Poettering <lennart@poettering.net> | 2012-07-13 01:07:41 +0200 | 
| commit | ba961854ddec8a8efcffab44540c33cc7dffebfa (patch) | |
| tree | 4438f3218e86cf5620affd2b4850e90890f0e3da | |
| parent | 3a419b98485e347413f723f46ceb38dcf2c94688 (diff) | |
journalctl: show any printable Unicode character
This makes sure we are OK in outputting all valid, non-control UTF-8
characters, instead of just printable 7bit ASCII.
| -rw-r--r-- | Makefile.am | 3 | ||||
| -rw-r--r-- | TODO | 2 | ||||
| -rw-r--r-- | src/shared/logs-show.c | 22 | ||||
| -rw-r--r-- | src/shared/utf8.c | 71 | ||||
| -rw-r--r-- | src/shared/utf8.h | 2 | 
5 files changed, 81 insertions, 19 deletions
| diff --git a/Makefile.am b/Makefile.am index 14f9455b51..507ea3acd4 100644 --- a/Makefile.am +++ b/Makefile.am @@ -726,7 +726,8 @@ libsystemd_logs_la_CFLAGS = \  libsystemd_logs_la_LIBADD = \  	libsystemd-journal-internal.la \ -	libsystemd-id128-internal.la +	libsystemd-id128-internal.la \ +	libsystemd-shared.la  # ------------------------------------------------------------------------------  noinst_LTLIBRARIES += \ @@ -121,8 +121,6 @@ Features:  * drop accountsservice's StandardOutput=syslog and Type=dbus fields -* make sure show-logs checks for utf8 validity, not ascii validity -  * when breaking cycles drop sysv services first, then services from /run, then from /etc, then from /usr  * readahead: when bumping /sys readahead variable save mtime and compare later to detect changes diff --git a/src/shared/logs-show.c b/src/shared/logs-show.c index 540b5a2a2c..e1119224e2 100644 --- a/src/shared/logs-show.c +++ b/src/shared/logs-show.c @@ -28,19 +28,10 @@  #include "logs-show.h"  #include "log.h"  #include "util.h" +#include "utf8.h"  #define PRINT_THRESHOLD 128 -static bool contains_unprintable(const void *p, size_t l) { -        const char *j; - -        for (j = p; j < (const char *) p + l; j++) -                if (*j < ' ' || *j >= 127) -                        return true; - -        return false; -} -  static int parse_field(const void *data, size_t length, const char *field, char **target, size_t *target_size) {          size_t fl, nl;          void *buf; @@ -80,7 +71,7 @@ static bool shall_print(bool show_all, char *p, size_t l) {          if (l > PRINT_THRESHOLD)                  return false; -        if (contains_unprintable(p, l)) +        if (!utf8_is_printable_n(p, l))                  return false;          return true; @@ -226,7 +217,7 @@ static int output_short(sd_journal *j, unsigned line, unsigned n_columns, bool s          if (show_all)                  printf(": %.*s\n", (int) message_len, message); -        else if (contains_unprintable(message, message_len)) { +        else if (!utf8_is_printable_n(message, message_len)) {                  char bytes[FORMAT_BYTES_MAX];                  printf(": [%s blob data]\n", format_bytes(bytes, sizeof(bytes), message_len));          } else if (message_len + n < n_columns) @@ -298,7 +289,7 @@ static int output_verbose(sd_journal *j, unsigned line, unsigned n_columns, bool          SD_JOURNAL_FOREACH_DATA(j, data, length) {                  if (!show_all && (length > PRINT_THRESHOLD || -                                  contains_unprintable(data, length))) { +                                  !utf8_is_printable_n(data, length))) {                          const char *c;                          char bytes[FORMAT_BYTES_MAX]; @@ -367,7 +358,7 @@ static int output_export(sd_journal *j, unsigned line, unsigned n_columns, bool                      memcmp(data, "_BOOT_ID=", 9) == 0)                          continue; -                if (contains_unprintable(data, length)) { +                if (!utf8_is_printable_n(data, length)) {                          const char *c;                          uint64_t le64; @@ -394,8 +385,7 @@ static int output_export(sd_journal *j, unsigned line, unsigned n_columns, bool  }  static void json_escape(const char* p, size_t l) { - -        if (contains_unprintable(p, l)) { +        if (!utf8_is_printable_n(p, l)) {                  bool not_first = false;                  fputs("[ ", stdout); diff --git a/src/shared/utf8.c b/src/shared/utf8.c index 13f0521e8c..a6f5b3f9e5 100644 --- a/src/shared/utf8.c +++ b/src/shared/utf8.c @@ -78,6 +78,77 @@ static inline void merge_continuation_char(uint32_t *u_ch, uint8_t ch) {          *u_ch |= ch & 0x3f;  } +static bool is_unicode_control(uint32_t ch) { + +        /* +          0 to ' '-1 is the C0 range. +          DEL=0x7F, and DEL+1 to 0x9F is C1 range. +          '\t' is in C0 range, but more or less harmless and commonly used. +        */ + +        return (ch < ' ' && ch != '\t') || +                (0x7F <= ch && ch <= 0x9F); +} + +char* utf8_is_printable_n(const char* str, size_t length) { +        uint32_t val = 0; +        uint32_t min = 0; +        const uint8_t *p; + +        assert(str); + +        for (p = (const uint8_t*) str; length; p++, length--) { +                if (*p < 128) { +                        val = *p; +                } else { +                        if ((*p & 0xe0) == 0xc0) { /* 110xxxxx two-char seq. */ +                                min = 128; +                                val = (uint32_t) (*p & 0x1e); +                                goto ONE_REMAINING; +                        } else if ((*p & 0xf0) == 0xe0) { /* 1110xxxx three-char seq.*/ +                                min = (1 << 11); +                                val = (uint32_t) (*p & 0x0f); +                                goto TWO_REMAINING; +                        } else if ((*p & 0xf8) == 0xf0) { /* 11110xxx four-char seq */ +                                min = (1 << 16); +                                val = (uint32_t) (*p & 0x07); +                        } else +                                goto error; + +                        p++; +                        length--; +                        if (!length || !is_continuation_char(*p)) +                                goto error; +                        merge_continuation_char(&val, *p); + +                TWO_REMAINING: +                        p++; +                        length--; +                        if (!is_continuation_char(*p)) +                                goto error; +                        merge_continuation_char(&val, *p); + +                ONE_REMAINING: +                        p++; +                        length--; +                        if (!is_continuation_char(*p)) +                                goto error; +                        merge_continuation_char(&val, *p); + +                        if (val < min) +                                goto error; +                } + +                if (is_unicode_control(val)) +                        goto error; +        } + +        return (char*) str; + +error: +        return NULL; +} +  static char* utf8_validate(const char *str, char *output) {          uint32_t val = 0;          uint32_t min = 0; diff --git a/src/shared/utf8.h b/src/shared/utf8.h index af2420fda4..fec76b487a 100644 --- a/src/shared/utf8.h +++ b/src/shared/utf8.h @@ -27,6 +27,8 @@  char *utf8_is_valid(const char *s) _pure_;  char *ascii_is_valid(const char *s) _pure_; +char *utf8_is_printable_n(const char* str, size_t length) _pure_; +  char *utf8_filter(const char *s);  char *ascii_filter(const char *s); | 
