summaryrefslogtreecommitdiff
path: root/src/libsystemd-terminal/test-term-parser.c
diff options
context:
space:
mode:
authorDavid Herrmann <dh.herrmann@gmail.com>2014-06-15 14:50:00 +0200
committerDavid Herrmann <dh.herrmann@gmail.com>2014-07-18 12:53:41 +0200
commit1c9633d669948155455e29b0c6e770995a8b1ca3 (patch)
tree9fac4ba6ccdd564b36caa2363999667aa601f182 /src/libsystemd-terminal/test-term-parser.c
parent28622e8f5b28412d97bf2f3a5df49c419be1e2c5 (diff)
terminal: add parser state-machine
The term-parser is used to parse any input from TTY-clients. It reads CSI, DCS, OSC and ST control sequences and normal escape sequences. It doesn't do anything with the parsed data besides detecting the sequence and returning it. The caller has to react to them. The parser also comes with its own UTF-8 helpers. The reason for that is that we don't want to assert() or hard-fail on parsing errors. Instead, we treat any invalid UTF-8 sequences as ISO-8859-1. This allows pasting invalid data into a terminal (which cannot be controlled through the TTY, anyway) and we still deal with it in a proper manner. This is _required_ for 8-bit and 7-bit DEC modes (including the g0-g3 mappings), so it's not just an ugly fallback because we can (it's still horribly ugly but at least we have an excuse).
Diffstat (limited to 'src/libsystemd-terminal/test-term-parser.c')
-rw-r--r--src/libsystemd-terminal/test-term-parser.c143
1 files changed, 143 insertions, 0 deletions
diff --git a/src/libsystemd-terminal/test-term-parser.c b/src/libsystemd-terminal/test-term-parser.c
new file mode 100644
index 0000000000..ed16f5f276
--- /dev/null
+++ b/src/libsystemd-terminal/test-term-parser.c
@@ -0,0 +1,143 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+/***
+ This file is part of systemd.
+
+ Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com>
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+/*
+ * Terminal Parser Tests
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "macro.h"
+#include "term-internal.h"
+#include "util.h"
+
+static void test_term_utf8_invalid(void) {
+ term_utf8 p = { };
+ const uint32_t *res;
+ size_t len;
+
+ res = term_utf8_decode(NULL, NULL, 0);
+ assert_se(res == NULL);
+
+ res = term_utf8_decode(&p, NULL, 0);
+ assert_se(res != NULL);
+
+ len = 5;
+ res = term_utf8_decode(NULL, &len, 0);
+ assert_se(res == NULL);
+ assert_se(len == 0);
+
+ len = 5;
+ res = term_utf8_decode(&p, &len, 0);
+ assert_se(res != NULL);
+ assert_se(len == 1);
+
+ len = 5;
+ res = term_utf8_decode(&p, &len, 0xCf);
+ assert_se(res == NULL);
+ assert_se(len == 0);
+
+ len = 5;
+ res = term_utf8_decode(&p, &len, 0x0);
+ assert_se(res != NULL);
+ assert_se(len == 2);
+}
+
+static void test_term_utf8_range(void) {
+ term_utf8 p = { };
+ const uint32_t *res;
+ char u8[4];
+ uint32_t i, j;
+ size_t ulen, len;
+
+ /* Convert all ucs-4 chars to utf-8 and back */
+
+ for (i = 0; i < 0x10FFFF; ++i) {
+ ulen = term_utf8_encode(u8, i);
+ if (!ulen)
+ continue;
+
+ for (j = 0; j < ulen; ++j) {
+ res = term_utf8_decode(&p, &len, u8[j]);
+ if (!res) {
+ assert_se(j + 1 != ulen);
+ continue;
+ }
+
+ assert_se(j + 1 == ulen);
+ assert_se(len == 1 && *res == i);
+ assert_se(i <= 127 || ulen >= 2);
+ }
+ }
+}
+
+static void test_term_utf8_mix(void) {
+ static const char source[] = {
+ 0x00, /* normal 0 */
+ 0xC0, 0x80, /* overlong 0 */
+ 0xC0, 0x81, /* overlong 1 */
+ 0xE0, 0x80, 0x81, /* overlong 1 */
+ 0xF0, 0x80, 0x80, 0x81, /* overlong 1 */
+ 0xC0, 0x00, /* invalid continuation */
+ 0xC0, 0xC0, 0x81, /* invalid continuation with a following overlong 1 */
+ 0xF8, 0x80, 0x80, 0x80, 0x81, /* overlong 1 with 5 bytes */
+ 0xE0, 0x80, 0xC0, 0x81, /* invalid 3-byte followed by valid 2-byte */
+ 0xF0, 0x80, 0x80, 0xC0, 0x81, /* invalid 4-byte followed by valid 2-byte */
+ };
+ static const uint32_t result[] = {
+ 0x0000,
+ 0x0000,
+ 0x0001,
+ 0x0001,
+ 0x0001,
+ 0x00C0, 0x0000,
+ 0x00C0, 0x0001,
+ 0x00F8, 0x0080, 0x0080, 0x0080, 0x0081,
+ 0x00E0, 0x0080, 0x0001,
+ 0x00F0, 0x0080, 0x0080, 0x0001,
+ };
+ term_utf8 p = { };
+ const uint32_t *res;
+ unsigned int i, j;
+ size_t len;
+
+ for (i = 0, j = 0; i < sizeof(source); ++i) {
+ res = term_utf8_decode(&p, &len, source[i]);
+ if (!res)
+ continue;
+
+ assert_se(j + len <= ELEMENTSOF(result));
+ assert_se(!memcmp(res, &result[j], sizeof(uint32_t) * len));
+ j += len;
+ }
+
+ assert_se(j == ELEMENTSOF(result));
+}
+
+int main(int argc, char *argv[]) {
+ test_term_utf8_invalid();
+ test_term_utf8_range();
+ test_term_utf8_mix();
+
+ return 0;
+}