diff options
author | Shawn Landden <shawn@churchofgit.com> | 2013-09-20 18:37:33 -0700 |
---|---|---|
committer | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2013-10-13 17:56:54 -0400 |
commit | f405e86de361ec305dc2b8634efeaa23dc144053 (patch) | |
tree | b3fe37e4a143a5d2dee14eb34d96f96cb03b84fc /src/shared/util.c | |
parent | 14a9283eb38a93ec384c322ccbe06352c86a25f8 (diff) |
util, utf8: make ellipsize take multi-byte characters into account
rename old versions to ascii_*
Do not take into account zerowidth characters, but do consider double-wide characters.
Import needed utf8 helper code from glib.
v3: rebase ontop of utf8 restructuring work
[zj: tweak the algorithm a bit, move new code to separate file]
Diffstat (limited to 'src/shared/util.c')
-rw-r--r-- | src/shared/util.c | 78 |
1 files changed, 77 insertions, 1 deletions
diff --git a/src/shared/util.c b/src/shared/util.c index 31cea79f8b..e18421e520 100644 --- a/src/shared/util.c +++ b/src/shared/util.c @@ -74,6 +74,8 @@ #include "env-util.h" #include "fileio.h" #include "device-nodes.h" +#include "utf8.h" +#include "gunicode.h" int saved_argc = 0; char **saved_argv = NULL; @@ -3288,7 +3290,7 @@ int running_in_chroot(void) { a.st_ino != b.st_ino; } -char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) { +static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) { size_t x; char *r; @@ -3319,6 +3321,80 @@ char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigne return r; } +char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) { + size_t x; + char *e; + const char *i, *j; + unsigned k, len, len2; + + assert(s); + assert(percent <= 100); + assert(new_length >= 3); + + /* if no multibyte characters use ascii_ellipsize_mem for speed */ + if (ascii_is_valid(s)) + return ascii_ellipsize_mem(s, old_length, new_length, percent); + + if (old_length <= 3 || old_length <= new_length) + return strndup(s, old_length); + + x = (new_length * percent) / 100; + + if (x > new_length - 3) + x = new_length - 3; + + k = 0; + for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) { + int c; + + c = utf8_encoded_to_unichar(i); + if (c < 0) + return NULL; + k += unichar_iswide(c) ? 2 : 1; + } + + if (k > x) /* last character was wide and went over quota */ + x ++; + + for (j = s + old_length; k < new_length && j > i; ) { + int c; + + j = utf8_prev_char(j); + c = utf8_encoded_to_unichar(j); + if (c < 0) + return NULL; + k += unichar_iswide(c) ? 2 : 1; + } + assert(i <= j); + + /* we don't actually need to ellipsize */ + if (i == j) + return memdup(s, old_length + 1); + + /* make space for ellipsis */ + j = utf8_next_char(j); + + len = i - s; + len2 = s + old_length - j; + e = new(char, len + 3 + len2 + 1); + if (!e) + return NULL; + + /* + printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n", + old_length, new_length, x, len, len2, k); + */ + + memcpy(e, s, len); + e[len] = 0xe2; /* tri-dot ellipsis: … */ + e[len + 1] = 0x80; + e[len + 2] = 0xa6; + + memcpy(e + len + 3, j, len2 + 1); + + return e; +} + char *ellipsize(const char *s, size_t length, unsigned percent) { return ellipsize_mem(s, strlen(s), length, percent); } |