summaryrefslogtreecommitdiff
path: root/src/shared/util.c
diff options
context:
space:
mode:
authorShawn Landden <shawn@churchofgit.com>2013-09-20 18:37:33 -0700
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2013-10-13 17:56:54 -0400
commitf405e86de361ec305dc2b8634efeaa23dc144053 (patch)
treeb3fe37e4a143a5d2dee14eb34d96f96cb03b84fc /src/shared/util.c
parent14a9283eb38a93ec384c322ccbe06352c86a25f8 (diff)
util, utf8: make ellipsize take multi-byte characters into account
rename old versions to ascii_* Do not take into account zerowidth characters, but do consider double-wide characters. Import needed utf8 helper code from glib. v3: rebase ontop of utf8 restructuring work [zj: tweak the algorithm a bit, move new code to separate file]
Diffstat (limited to 'src/shared/util.c')
-rw-r--r--src/shared/util.c78
1 files changed, 77 insertions, 1 deletions
diff --git a/src/shared/util.c b/src/shared/util.c
index 31cea79f8b..e18421e520 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -74,6 +74,8 @@
#include "env-util.h"
#include "fileio.h"
#include "device-nodes.h"
+#include "utf8.h"
+#include "gunicode.h"
int saved_argc = 0;
char **saved_argv = NULL;
@@ -3288,7 +3290,7 @@ int running_in_chroot(void) {
a.st_ino != b.st_ino;
}
-char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
+static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
size_t x;
char *r;
@@ -3319,6 +3321,80 @@ char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigne
return r;
}
+char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
+ size_t x;
+ char *e;
+ const char *i, *j;
+ unsigned k, len, len2;
+
+ assert(s);
+ assert(percent <= 100);
+ assert(new_length >= 3);
+
+ /* if no multibyte characters use ascii_ellipsize_mem for speed */
+ if (ascii_is_valid(s))
+ return ascii_ellipsize_mem(s, old_length, new_length, percent);
+
+ if (old_length <= 3 || old_length <= new_length)
+ return strndup(s, old_length);
+
+ x = (new_length * percent) / 100;
+
+ if (x > new_length - 3)
+ x = new_length - 3;
+
+ k = 0;
+ for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) {
+ int c;
+
+ c = utf8_encoded_to_unichar(i);
+ if (c < 0)
+ return NULL;
+ k += unichar_iswide(c) ? 2 : 1;
+ }
+
+ if (k > x) /* last character was wide and went over quota */
+ x ++;
+
+ for (j = s + old_length; k < new_length && j > i; ) {
+ int c;
+
+ j = utf8_prev_char(j);
+ c = utf8_encoded_to_unichar(j);
+ if (c < 0)
+ return NULL;
+ k += unichar_iswide(c) ? 2 : 1;
+ }
+ assert(i <= j);
+
+ /* we don't actually need to ellipsize */
+ if (i == j)
+ return memdup(s, old_length + 1);
+
+ /* make space for ellipsis */
+ j = utf8_next_char(j);
+
+ len = i - s;
+ len2 = s + old_length - j;
+ e = new(char, len + 3 + len2 + 1);
+ if (!e)
+ return NULL;
+
+ /*
+ printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n",
+ old_length, new_length, x, len, len2, k);
+ */
+
+ memcpy(e, s, len);
+ e[len] = 0xe2; /* tri-dot ellipsis: … */
+ e[len + 1] = 0x80;
+ e[len + 2] = 0xa6;
+
+ memcpy(e + len + 3, j, len2 + 1);
+
+ return e;
+}
+
char *ellipsize(const char *s, size_t length, unsigned percent) {
return ellipsize_mem(s, strlen(s), length, percent);
}