summaryrefslogtreecommitdiff
path: root/src/libsystemd-terminal/term-parser.c
diff options
context:
space:
mode:
authorDavid Herrmann <dh.herrmann@gmail.com>2014-06-15 14:50:00 +0200
committerDavid Herrmann <dh.herrmann@gmail.com>2014-07-18 12:53:41 +0200
commit1c9633d669948155455e29b0c6e770995a8b1ca3 (patch)
tree9fac4ba6ccdd564b36caa2363999667aa601f182 /src/libsystemd-terminal/term-parser.c
parent28622e8f5b28412d97bf2f3a5df49c419be1e2c5 (diff)
terminal: add parser state-machine
The term-parser is used to parse any input from TTY-clients. It reads CSI, DCS, OSC and ST control sequences and normal escape sequences. It doesn't do anything with the parsed data besides detecting the sequence and returning it. The caller has to react to them. The parser also comes with its own UTF-8 helpers. The reason for that is that we don't want to assert() or hard-fail on parsing errors. Instead, we treat any invalid UTF-8 sequences as ISO-8859-1. This allows pasting invalid data into a terminal (which cannot be controlled through the TTY, anyway) and we still deal with it in a proper manner. This is _required_ for 8-bit and 7-bit DEC modes (including the g0-g3 mappings), so it's not just an ugly fallback because we can (it's still horribly ugly but at least we have an excuse).
Diffstat (limited to 'src/libsystemd-terminal/term-parser.c')
-rw-r--r--src/libsystemd-terminal/term-parser.c1626
1 files changed, 1626 insertions, 0 deletions
diff --git a/src/libsystemd-terminal/term-parser.c b/src/libsystemd-terminal/term-parser.c
new file mode 100644
index 0000000000..1c968520bd
--- /dev/null
+++ b/src/libsystemd-terminal/term-parser.c
@@ -0,0 +1,1626 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+ This file is part of systemd.
+
+ Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com>
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+/*
+ * Terminal Parser
+ * This file contains a bunch of UTF-8 helpers and the main ctlseq-parser. The
+ * parser is a simple state-machine that correctly parses all CSI, DCS, OSC, ST
+ * control sequences and generic escape sequences.
+ * The parser itself does not perform any actions but lets the caller react to
+ * detected sequences.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include "macro.h"
+#include "term-internal.h"
+#include "util.h"
+
+/**
+ * term_utf8_encode() - Encode single UCS-4 character as UTF-8
+ * @out_utf8: output buffer of at least 4 bytes or NULL
+ * @g: UCS-4 character to encode
+ *
+ * This encodes a single UCS-4 character as UTF-8 and writes it into @out_utf8.
+ * The length of the character is returned. It is not zero-terminated! If the
+ * output buffer is NULL, only the length is returned.
+ *
+ * Returns: The length in bytes that the UTF-8 representation does or would
+ * occupy.
+ */
+size_t term_utf8_encode(char *out_utf8, uint32_t g) {
+ if (g < (1 << 7)) {
+ if (out_utf8)
+ out_utf8[0] = g & 0x7f;
+ return 1;
+ } else if (g < (1 << 11)) {
+ if (out_utf8) {
+ out_utf8[0] = 0xc0 | ((g >> 6) & 0x1f);
+ out_utf8[1] = 0x80 | (g & 0x3f);
+ }
+ return 2;
+ } else if (g < (1 << 16)) {
+ if (out_utf8) {
+ out_utf8[0] = 0xe0 | ((g >> 12) & 0x0f);
+ out_utf8[1] = 0x80 | ((g >> 6) & 0x3f);
+ out_utf8[2] = 0x80 | (g & 0x3f);
+ }
+ return 3;
+ } else if (g < (1 << 21)) {
+ if (out_utf8) {
+ out_utf8[0] = 0xf0 | ((g >> 18) & 0x07);
+ out_utf8[1] = 0x80 | ((g >> 12) & 0x3f);
+ out_utf8[2] = 0x80 | ((g >> 6) & 0x3f);
+ out_utf8[3] = 0x80 | (g & 0x3f);
+ }
+ return 4;
+ } else {
+ return 0;
+ }
+}
+
+/**
+ * term_utf8_decode() - Try decoding the next UCS-4 character
+ * @p: decoder object to operate on or NULL
+ * @out_len: output buffer for length of decoded UCS-4 string or NULL
+ * @c: next char to push into decoder
+ *
+ * This decodes a UTF-8 stream. It must be called for each input-byte of the
+ * UTF-8 stream and returns a UCS-4 stream. The length of the returned UCS-4
+ * string (number of parsed characters) is stored in @out_len if non-NULL. A
+ * pointer to the string is returned (or NULL if none was parsed). The string
+ * is not zero-terminated! Furthermore, the string is only valid until the next
+ * invokation of this function. It is also bound to the parser-state @p.
+ *
+ * This function is highly optimized to work with terminal-emulators. Instead
+ * of being strict about UTF-8 validity, this tries to perform a fallback to
+ * ISO-8859-1 in case a wrong series was detected. Therefore, this function
+ * might return multiple UCS-4 characters by parsing just a single UTF-8 byte.
+ *
+ * The parser state @p should be allocated and managed by the caller. There're
+ * no helpers to do that for you. To initialize it, simply reset it to all
+ * zero. You can reset or free the object at any point in time.
+ *
+ * Returns: Pointer to the UCS-4 string or NULL.
+ */
+const uint32_t *term_utf8_decode(term_utf8 *p, size_t *out_len, char c) {
+ uint32_t t, *res = NULL;
+ uint8_t byte;
+ size_t len = 0;
+
+ if (!p)
+ goto out;
+
+ byte = c;
+
+ if (!p->valid || p->i_bytes >= p->n_bytes) {
+ /*
+ * If the previous sequence was invalid or fully parsed, start
+ * parsing a fresh new sequence.
+ */
+
+ if ((byte & 0xE0) == 0xC0) {
+ /* start of two byte sequence */
+ t = byte & 0x1F;
+ p->n_bytes = 2;
+ p->i_bytes = 1;
+ p->valid = 1;
+ } else if ((byte & 0xF0) == 0xE0) {
+ /* start of three byte sequence */
+ t = byte & 0x0F;
+ p->n_bytes = 3;
+ p->i_bytes = 1;
+ p->valid = 1;
+ } else if ((byte & 0xF8) == 0xF0) {
+ /* start of four byte sequence */
+ t = byte & 0x07;
+ p->n_bytes = 4;
+ p->i_bytes = 1;
+ p->valid = 1;
+ } else {
+ /* Either of:
+ * - single ASCII 7-bit char
+ * - out-of-sync continuation byte
+ * - overlong encoding
+ * All of them are treated as single byte ISO-8859-1 */
+ t = byte;
+ p->n_bytes = 1;
+ p->i_bytes = 1;
+ p->valid = 0;
+ }
+
+ p->chars[0] = byte;
+ p->ucs4 = t << (6 * (p->n_bytes - p->i_bytes));
+ } else {
+ /*
+ * ..otherwise, try to continue the previous sequence..
+ */
+
+ if ((byte & 0xC0) == 0x80) {
+ /*
+ * Valid continuation byte. Append to sequence and
+ * update the ucs4 cache accordingly.
+ */
+
+ t = byte & 0x3F;
+ p->chars[p->i_bytes++] = byte;
+ p->ucs4 |= t << (6 * (p->n_bytes - p->i_bytes));
+ } else {
+ /*
+ * Invalid continuation? Treat cached sequence as
+ * ISO-8859-1, but parse the new char as valid new
+ * starting character. If it's a new single-byte UTF-8
+ * sequence, we immediately return it in the same run,
+ * otherwise, we might suffer from starvation.
+ */
+
+ if ((byte & 0xE0) == 0xC0 ||
+ (byte & 0xF0) == 0xE0 ||
+ (byte & 0xF8) == 0xF0) {
+ /*
+ * New multi-byte sequence. Move to-be-returned
+ * data at the end and start new sequence. Only
+ * return the old sequence.
+ */
+
+ memmove(p->chars + 1,
+ p->chars,
+ sizeof(*p->chars) * p->i_bytes);
+ res = p->chars + 1;
+ len = p->i_bytes;
+
+ if ((byte & 0xE0) == 0xC0) {
+ /* start of two byte sequence */
+ t = byte & 0x1F;
+ p->n_bytes = 2;
+ p->i_bytes = 1;
+ p->valid = 1;
+ } else if ((byte & 0xF0) == 0xE0) {
+ /* start of three byte sequence */
+ t = byte & 0x0F;
+ p->n_bytes = 3;
+ p->i_bytes = 1;
+ p->valid = 1;
+ } else if ((byte & 0xF8) == 0xF0) {
+ /* start of four byte sequence */
+ t = byte & 0x07;
+ p->n_bytes = 4;
+ p->i_bytes = 1;
+ p->valid = 1;
+ }
+
+ p->chars[0] = byte;
+ p->ucs4 = t << (6 * (p->n_bytes - p->i_bytes));
+
+ goto out;
+ } else {
+ /*
+ * New single byte sequence, append to output
+ * and return combined sequence.
+ */
+
+ p->chars[p->i_bytes++] = byte;
+ p->valid = 0;
+ }
+ }
+ }
+
+ /*
+ * Check whether a full sequence (valid or invalid) has been parsed and
+ * then return it. Otherwise, return nothing.
+ */
+ if (p->valid) {
+ /* still parsing? then bail out */
+ if (p->i_bytes < p->n_bytes)
+ goto out;
+
+ res = &p->ucs4;
+ len = 1;
+ } else {
+ res = p->chars;
+ len = p->i_bytes;
+ }
+
+ p->valid = 0;
+ p->i_bytes = 0;
+ p->n_bytes = 0;
+
+out:
+ if (out_len)
+ *out_len = len;
+ return len > 0 ? res : NULL;
+}
+
+/*
+ * Command Parser
+ * The ctl-seq parser "term_parser" only detects whole sequences, it does not
+ * detect the specific command. Once a sequence is parsed, the command-parsers
+ * are used to figure out their meaning. Note that this depends on whether we
+ * run on the host or terminal side.
+ */
+
+static unsigned int term_parse_host_control(const term_seq *seq) {
+ assert_return(seq, TERM_CMD_NONE);
+
+ switch (seq->terminator) {
+ case 0x00: /* NUL */
+ return TERM_CMD_NULL;
+ case 0x05: /* ENQ */
+ return TERM_CMD_ENQ;
+ case 0x07: /* BEL */
+ return TERM_CMD_BEL;
+ case 0x08: /* BS */
+ return TERM_CMD_BS;
+ case 0x09: /* HT */
+ return TERM_CMD_HT;
+ case 0x0a: /* LF */
+ return TERM_CMD_LF;
+ case 0x0b: /* VT */
+ return TERM_CMD_VT;
+ case 0x0c: /* FF */
+ return TERM_CMD_FF;
+ case 0x0d: /* CR */
+ return TERM_CMD_CR;
+ case 0x0e: /* SO */
+ return TERM_CMD_SO;
+ case 0x0f: /* SI */
+ return TERM_CMD_SI;
+ case 0x11: /* DC1 */
+ return TERM_CMD_DC1;
+ case 0x13: /* DC3 */
+ return TERM_CMD_DC3;
+ case 0x18: /* CAN */
+ /* this is already handled by the state-machine */
+ break;
+ case 0x1a: /* SUB */
+ return TERM_CMD_SUB;
+ case 0x1b: /* ESC */
+ /* this is already handled by the state-machine */
+ break;
+ case 0x1f: /* DEL */
+ /* this is already handled by the state-machine */
+ break;
+ case 0x84: /* IND */
+ return TERM_CMD_IND;
+ case 0x85: /* NEL */
+ return TERM_CMD_NEL;
+ case 0x88: /* HTS */
+ return TERM_CMD_HTS;
+ case 0x8d: /* RI */
+ return TERM_CMD_RI;
+ case 0x8e: /* SS2 */
+ return TERM_CMD_SS2;
+ case 0x8f: /* SS3 */
+ return TERM_CMD_SS3;
+ case 0x90: /* DCS */
+ /* this is already handled by the state-machine */
+ break;
+ case 0x96: /* SPA */
+ return TERM_CMD_SPA;
+ case 0x97: /* EPA */
+ return TERM_CMD_EPA;
+ case 0x98: /* SOS */
+ /* this is already handled by the state-machine */
+ break;
+ case 0x9a: /* DECID */
+ return TERM_CMD_DECID;
+ case 0x9b: /* CSI */
+ /* this is already handled by the state-machine */
+ break;
+ case 0x9c: /* ST */
+ return TERM_CMD_ST;
+ case 0x9d: /* OSC */
+ /* this is already handled by the state-machine */
+ break;
+ case 0x9e: /* PM */
+ /* this is already handled by the state-machine */
+ break;
+ case 0x9f: /* APC */
+ /* this is already handled by the state-machine */
+ break;
+ }
+
+ return TERM_CMD_NONE;
+}
+
+static inline int charset_from_cmd(uint32_t raw, unsigned int flags, bool require_96) {
+ static const struct {
+ uint32_t raw;
+ unsigned int flags;
+ } charset_cmds[] = {
+ /* 96-compat charsets */
+ [TERM_CHARSET_ISO_LATIN1_SUPPLEMENTAL] = { .raw = 'A', .flags = 0 },
+ [TERM_CHARSET_ISO_LATIN2_SUPPLEMENTAL] = { .raw = 'B', .flags = 0 },
+ [TERM_CHARSET_ISO_LATIN5_SUPPLEMENTAL] = { .raw = 'M', .flags = 0 },
+ [TERM_CHARSET_ISO_GREEK_SUPPLEMENTAL] = { .raw = 'F', .flags = 0 },
+ [TERM_CHARSET_ISO_HEBREW_SUPPLEMENTAL] = { .raw = 'H', .flags = 0 },
+ [TERM_CHARSET_ISO_LATIN_CYRILLIC] = { .raw = 'L', .flags = 0 },
+
+ /* 94-compat charsets */
+ [TERM_CHARSET_DEC_SPECIAL_GRAPHIC] = { .raw = '0', .flags = 0 },
+ [TERM_CHARSET_DEC_SUPPLEMENTAL] = { .raw = '5', .flags = TERM_SEQ_FLAG_PERCENT },
+ [TERM_CHARSET_DEC_TECHNICAL] = { .raw = '>', .flags = 0 },
+ [TERM_CHARSET_CYRILLIC_DEC] = { .raw = '4', .flags = TERM_SEQ_FLAG_AND },
+ [TERM_CHARSET_DUTCH_NRCS] = { .raw = '4', .flags = 0 },
+ [TERM_CHARSET_FINNISH_NRCS] = { .raw = '5', .flags = 0 },
+ [TERM_CHARSET_FRENCH_NRCS] = { .raw = 'R', .flags = 0 },
+ [TERM_CHARSET_FRENCH_CANADIAN_NRCS] = { .raw = '9', .flags = 0 },
+ [TERM_CHARSET_GERMAN_NRCS] = { .raw = 'K', .flags = 0 },
+ [TERM_CHARSET_GREEK_DEC] = { .raw = '?', .flags = TERM_SEQ_FLAG_DQUOTE },
+ [TERM_CHARSET_GREEK_NRCS] = { .raw = '>', .flags = TERM_SEQ_FLAG_DQUOTE },
+ [TERM_CHARSET_HEBREW_DEC] = { .raw = '4', .flags = TERM_SEQ_FLAG_DQUOTE },
+ [TERM_CHARSET_HEBREW_NRCS] = { .raw = '=', .flags = TERM_SEQ_FLAG_PERCENT },
+ [TERM_CHARSET_ITALIAN_NRCS] = { .raw = 'Y', .flags = 0 },
+ [TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = '`', .flags = 0 },
+ [TERM_CHARSET_PORTUGUESE_NRCS] = { .raw = '6', .flags = TERM_SEQ_FLAG_PERCENT },
+ [TERM_CHARSET_RUSSIAN_NRCS] = { .raw = '5', .flags = TERM_SEQ_FLAG_AND },
+ [TERM_CHARSET_SCS_NRCS] = { .raw = '3', .flags = TERM_SEQ_FLAG_PERCENT },
+ [TERM_CHARSET_SPANISH_NRCS] = { .raw = 'Z', .flags = 0 },
+ [TERM_CHARSET_SWEDISH_NRCS] = { .raw = '7', .flags = 0 },
+ [TERM_CHARSET_SWISS_NRCS] = { .raw = '=', .flags = 0 },
+ [TERM_CHARSET_TURKISH_DEC] = { .raw = '0', .flags = TERM_SEQ_FLAG_PERCENT },
+ [TERM_CHARSET_TURKISH_NRCS] = { .raw = '2', .flags = TERM_SEQ_FLAG_PERCENT },
+
+ /* special charsets */
+ [TERM_CHARSET_USERPREF_SUPPLEMENTAL] = { .raw = '<', .flags = 0 },
+
+ /* secondary choices */
+ [TERM_CHARSET_CNT + TERM_CHARSET_FINNISH_NRCS] = { .raw = 'C', .flags = 0 },
+ [TERM_CHARSET_CNT + TERM_CHARSET_FRENCH_NRCS] = { .raw = 'f', .flags = 0 },
+ [TERM_CHARSET_CNT + TERM_CHARSET_FRENCH_CANADIAN_NRCS] = { .raw = 'Q', .flags = 0 },
+ [TERM_CHARSET_CNT + TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = 'E', .flags = 0 },
+ [TERM_CHARSET_CNT + TERM_CHARSET_SWEDISH_NRCS] = { .raw = 'H', .flags = 0 }, /* unused; conflicts with ISO_HEBREW */
+
+ /* tertiary choices */
+ [TERM_CHARSET_CNT + TERM_CHARSET_CNT + TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = '6', .flags = 0 },
+ };
+ size_t i, cs;
+
+ /*
+ * Secondary choice on SWEDISH_NRCS and primary choice on
+ * ISO_HEBREW_SUPPLEMENTAL have a conflict: raw=="H", flags==0.
+ * We always choose the ISO 96-compat set, which is what VT510 does.
+ */
+
+ for (i = 0; i < ELEMENTSOF(charset_cmds); ++i) {
+ if (charset_cmds[i].raw == raw && charset_cmds[i].flags == flags) {
+ cs = i;
+ while (cs >= TERM_CHARSET_CNT)
+ cs -= TERM_CHARSET_CNT;
+
+ if (!require_96 || cs < TERM_CHARSET_96_CNT || cs >= TERM_CHARSET_94_CNT)
+ return cs;
+ }
+ }
+
+ return -ENOENT;
+}
+
+/* true if exactly one bit in @value is set */
+static inline bool exactly_one_bit_set(unsigned int value) {
+ return __builtin_popcount(value) == 1;
+}
+
+static unsigned int term_parse_host_escape(const term_seq *seq, unsigned int *cs_out) {
+ unsigned int t, flags;
+ int cs;
+
+ assert_return(seq, TERM_CMD_NONE);
+
+ flags = seq->intermediates;
+ t = TERM_SEQ_FLAG_POPEN | TERM_SEQ_FLAG_PCLOSE | TERM_SEQ_FLAG_MULT |
+ TERM_SEQ_FLAG_PLUS | TERM_SEQ_FLAG_MINUS | TERM_SEQ_FLAG_DOT |
+ TERM_SEQ_FLAG_SLASH;
+
+ if (exactly_one_bit_set(flags & t)) {
+ switch (flags & t) {
+ case TERM_SEQ_FLAG_POPEN:
+ case TERM_SEQ_FLAG_PCLOSE:
+ case TERM_SEQ_FLAG_MULT:
+ case TERM_SEQ_FLAG_PLUS:
+ cs = charset_from_cmd(seq->terminator, flags & ~t, false);
+ break;
+ case TERM_SEQ_FLAG_MINUS:
+ case TERM_SEQ_FLAG_DOT:
+ case TERM_SEQ_FLAG_SLASH:
+ cs = charset_from_cmd(seq->terminator, flags & ~t, true);
+ break;
+ default:
+ cs = -ENOENT;
+ break;
+ }
+
+ if (cs >= 0) {
+ if (cs_out)
+ *cs_out = cs;
+ return TERM_CMD_SCS;
+ }
+
+ /* looked like a charset-cmd but wasn't; continue */
+ }
+
+ switch (seq->terminator) {
+ case '3':
+ if (flags == TERM_SEQ_FLAG_HASH) /* DECDHL top-half */
+ return TERM_CMD_DECDHL_TH;
+ break;
+ case '4':
+ if (flags == TERM_SEQ_FLAG_HASH) /* DECDHL bottom-half */
+ return TERM_CMD_DECDHL_BH;
+ break;
+ case '5':
+ if (flags == TERM_SEQ_FLAG_HASH) /* DECSWL */
+ return TERM_CMD_DECSWL;
+ break;
+ case '6':
+ if (flags == 0) /* DECBI */
+ return TERM_CMD_DECBI;
+ else if (flags == TERM_SEQ_FLAG_HASH) /* DECDWL */
+ return TERM_CMD_DECDWL;
+ break;
+ case '7':
+ if (flags == 0) /* DECSC */
+ return TERM_CMD_DECSC;
+ break;
+ case '8':
+ if (flags == 0) /* DECRC */
+ return TERM_CMD_DECRC;
+ else if (flags == TERM_SEQ_FLAG_HASH) /* DECALN */
+ return TERM_CMD_DECALN;
+ break;
+ case '9':
+ if (flags == 0) /* DECFI */
+ return TERM_CMD_DECFI;
+ break;
+ case '<':
+ if (flags == 0) /* DECANM */
+ return TERM_CMD_DECANM;
+ break;
+ case '=':
+ if (flags == 0) /* DECKPAM */
+ return TERM_CMD_DECKPAM;
+ break;
+ case '>':
+ if (flags == 0) /* DECKPNM */
+ return TERM_CMD_DECKPNM;
+ break;
+ case '@':
+ if (flags == TERM_SEQ_FLAG_PERCENT) {
+ /* Select default character set */
+ return TERM_CMD_XTERM_SDCS;
+ }
+ break;
+ case 'D':
+ if (flags == 0) /* IND */
+ return TERM_CMD_IND;
+ break;
+ case 'E':
+ if (flags == 0) /* NEL */
+ return TERM_CMD_NEL;
+ break;
+ case 'F':
+ if (flags == 0) /* Cursor to lower-left corner of screen */
+ return TERM_CMD_XTERM_CLLHP;
+ else if (flags == TERM_SEQ_FLAG_SPACE) /* S7C1T */
+ return TERM_CMD_S7C1T;
+ break;
+ case 'G':
+ if (flags == TERM_SEQ_FLAG_SPACE) { /* S8C1T */
+ return TERM_CMD_S8C1T;
+ } else if (flags == TERM_SEQ_FLAG_PERCENT) {
+ /* Select UTF-8 character set */
+ return TERM_CMD_XTERM_SUCS;
+ }
+ break;
+ case 'H':
+ if (flags == 0) /* HTS */
+ return TERM_CMD_HTS;
+ break;
+ case 'L':
+ if (flags == TERM_SEQ_FLAG_SPACE) {
+ /* Set ANSI conformance level 1 */
+ return TERM_CMD_XTERM_SACL1;
+ }
+ break;
+ case 'M':
+ if (flags == 0) { /* RI */
+ return TERM_CMD_RI;
+ } else if (flags == TERM_SEQ_FLAG_SPACE) {
+ /* Set ANSI conformance level 2 */
+ return TERM_CMD_XTERM_SACL2;
+ }
+ break;
+ case 'N':
+ if (flags == 0) { /* SS2 */
+ return TERM_CMD_SS2;
+ } else if (flags == TERM_SEQ_FLAG_SPACE) {
+ /* Set ANSI conformance level 3 */
+ return TERM_CMD_XTERM_SACL3;
+ }
+ break;
+ case 'O':
+ if (flags == 0) /* SS3 */
+ return TERM_CMD_SS3;
+ break;
+ case 'P':
+ if (flags == 0) /* DCS: this is already handled by the state-machine */
+ return 0;
+ break;
+ case 'V':
+ if (flags == 0) /* SPA */
+ return TERM_CMD_SPA;
+ break;
+ case 'W':
+ if (flags == 0) /* EPA */
+ return TERM_CMD_EPA;
+ break;
+ case 'X':
+ if (flags == 0) { /* SOS */
+ /* this is already handled by the state-machine */
+ break;
+ }
+ break;
+ case 'Z':
+ if (flags == 0) /* DECID */
+ return TERM_CMD_DECID;
+ break;
+ case '[':
+ if (flags == 0) { /* CSI */
+ /* this is already handled by the state-machine */
+ break;
+ }
+ break;
+ case '\\':
+ if (flags == 0) /* ST */
+ return TERM_CMD_ST;
+ break;
+ case ']':
+ if (flags == 0) { /* OSC */
+ /* this is already handled by the state-machine */
+ break;
+ }
+ break;
+ case '^':
+ if (flags == 0) { /* PM */
+ /* this is already handled by the state-machine */
+ break;
+ }
+ break;
+ case '_':
+ if (flags == 0) { /* APC */
+ /* this is already handled by the state-machine */
+ break;
+ }
+ break;
+ case 'c':
+ if (flags == 0) /* RIS */
+ return TERM_CMD_RIS;
+ break;
+ case 'l':
+ if (flags == 0) /* Memory lock */
+ return TERM_CMD_XTERM_MLHP;
+ break;
+ case 'm':
+ if (flags == 0) /* Memory unlock */
+ return TERM_CMD_XTERM_MUHP;
+ break;
+ case 'n':
+ if (flags == 0) /* LS2 */
+ return TERM_CMD_LS2;
+ break;
+ case 'o':
+ if (flags == 0) /* LS3 */
+ return TERM_CMD_LS3;
+ break;
+ case '|':
+ if (flags == 0) /* LS3R */
+ return TERM_CMD_LS3R;
+ break;
+ case '}':
+ if (flags == 0) /* LS2R */
+ return TERM_CMD_LS2R;
+ break;
+ case '~':
+ if (flags == 0) /* LS1R */
+ return TERM_CMD_LS1R;
+ break;
+ }
+
+ return TERM_CMD_NONE;
+}
+
+static unsigned int term_parse_host_csi(const term_seq *seq) {
+ unsigned int flags;
+
+ assert_return(seq, TERM_CMD_NONE);
+
+ flags = seq->intermediates;
+
+ switch (seq->terminator) {
+ case 'A':
+ if (flags == 0) /* CUU */
+ return TERM_CMD_CUU;
+ break;
+ case 'a':
+ if (flags == 0) /* HPR */
+ return TERM_CMD_HPR;
+ break;
+ case 'B':
+ if (flags == 0) /* CUD */
+ return TERM_CMD_CUD;
+ break;
+ case 'b':
+ if (flags == 0) /* REP */
+ return TERM_CMD_REP;
+ break;
+ case 'C':
+ if (flags == 0) /* CUF */
+ return TERM_CMD_CUF;
+ break;
+ case 'c':
+ if (flags == 0) /* DA1 */
+ return TERM_CMD_DA1;
+ else if (flags == TERM_SEQ_FLAG_GT) /* DA2 */
+ return TERM_CMD_DA2;
+ else if (flags == TERM_SEQ_FLAG_EQUAL) /* DA3 */
+ return TERM_CMD_DA3;
+ break;
+ case 'D':
+ if (flags == 0) /* CUB */
+ return TERM_CMD_CUB;
+ break;
+ case 'd':
+ if (flags == 0) /* VPA */
+ return TERM_CMD_VPA;
+ break;
+ case 'E':
+ if (flags == 0) /* CNL */
+ return TERM_CMD_CNL;
+ break;
+ case 'e':
+ if (flags == 0) /* VPR */
+ return TERM_CMD_VPR;
+ break;
+ case 'F':
+ if (flags == 0) /* CPL */
+ return TERM_CMD_CPL;
+ break;
+ case 'f':
+ if (flags == 0) /* HVP */
+ return TERM_CMD_HVP;
+ break;
+ case 'G':
+ if (flags == 0) /* CHA */
+ return TERM_CMD_CHA;
+ break;
+ case 'g':
+ if (flags == 0) /* TBC */
+ return TERM_CMD_TBC;
+ else if (flags == TERM_SEQ_FLAG_MULT) /* DECLFKC */
+ return TERM_CMD_DECLFKC;
+ break;
+ case 'H':
+ if (flags == 0) /* CUP */
+ return TERM_CMD_CUP;
+ break;
+ case 'h':
+ if (flags == 0) /* SM ANSI */
+ return TERM_CMD_SM_ANSI;
+ else if (flags == TERM_SEQ_FLAG_WHAT) /* SM DEC */
+ return TERM_CMD_SM_DEC;
+ break;
+ case 'I':
+ if (flags == 0) /* CHT */
+ return TERM_CMD_CHT;
+ break;
+ case 'i':
+ if (flags == 0) /* MC ANSI */
+ return TERM_CMD_MC_ANSI;
+ else if (flags == TERM_SEQ_FLAG_WHAT) /* MC DEC */
+ return TERM_CMD_MC_DEC;
+ break;
+ case 'J':
+ if (flags == 0) /* ED */
+ return TERM_CMD_ED;
+ else if (flags == TERM_SEQ_FLAG_WHAT) /* DECSED */
+ return TERM_CMD_DECSED;
+ break;
+ case 'K':
+ if (flags == 0) /* EL */
+ return TERM_CMD_EL;
+ else if (flags == TERM_SEQ_FLAG_WHAT) /* DECSEL */
+ return TERM_CMD_DECSEL;
+ break;
+ case 'L':
+ if (flags == 0) /* IL */
+ return TERM_CMD_IL;
+ break;
+ case 'l':
+ if (flags == 0) /* RM ANSI */
+ return TERM_CMD_RM_ANSI;
+ else if (flags == TERM_SEQ_FLAG_WHAT) /* RM DEC */
+ return TERM_CMD_RM_DEC;
+ break;
+ case 'M':
+ if (flags == 0) /* DL */
+ return TERM_CMD_DL;
+ break;
+ case 'm':
+ if (flags == 0) /* SGR */
+ return TERM_CMD_SGR;
+ else if (flags == TERM_SEQ_FLAG_GT) /* XTERM SMR */
+ return TERM_CMD_XTERM_SRV;
+ break;
+ case 'n':
+ if (flags == 0) /* DSR ANSI */
+ return TERM_CMD_DSR_ANSI;
+ else if (flags == TERM_SEQ_FLAG_GT) /* XTERM RMR */
+ return TERM_CMD_XTERM_RRV;
+ else if (flags == TERM_SEQ_FLAG_WHAT) /* DSR DEC */
+ return TERM_CMD_DSR_DEC;
+ break;
+ case 'P':
+ if (flags == 0) /* DCH */
+ return TERM_CMD_DCH;
+ else if (flags == TERM_SEQ_FLAG_SPACE) /* PPA */
+ return TERM_CMD_PPA;
+ break;
+ case 'p':
+ if (flags == 0) /* DECSSL */
+ return TERM_CMD_DECSSL;
+ else if (flags == TERM_SEQ_FLAG_SPACE) /* DECSSCLS */
+ return TERM_CMD_DECSSCLS;
+ else if (flags == TERM_SEQ_FLAG_BANG) /* DECSTR */
+ return TERM_CMD_DECSTR;
+ else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECSCL */
+ return TERM_CMD_DECSCL;
+ else if (flags == TERM_SEQ_FLAG_CASH) /* DECRQM-ANSI */
+ return TERM_CMD_DECRQM_ANSI;
+ else if (flags == (TERM_SEQ_FLAG_CASH | TERM_SEQ_FLAG_WHAT)) /* DECRQM-DEC */
+ return TERM_CMD_DECRQM_DEC;
+ else if (flags == TERM_SEQ_FLAG_PCLOSE) /* DECSDPT */
+ return TERM_CMD_DECSDPT;
+ else if (flags == TERM_SEQ_FLAG_MULT) /* DECSPPCS */
+ return TERM_CMD_DECSPPCS;
+ else if (flags == TERM_SEQ_FLAG_PLUS) /* DECSR */
+ return TERM_CMD_DECSR;
+ else if (flags == TERM_SEQ_FLAG_COMMA) /* DECLTOD */
+ return TERM_CMD_DECLTOD;
+ else if (flags == TERM_SEQ_FLAG_GT) /* XTERM SPM */
+ return TERM_CMD_XTERM_SPM;
+ break;
+ case 'Q':
+ if (flags == TERM_SEQ_FLAG_SPACE) /* PPR */
+ return TERM_CMD_PPR;
+ break;
+ case 'q':
+ if (flags == 0) /* DECLL */
+ return TERM_CMD_DECLL;
+ else if (flags == TERM_SEQ_FLAG_SPACE) /* DECSCUSR */
+ return TERM_CMD_DECSCUSR;
+ else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECSCA */
+ return TERM_CMD_DECSCA;
+ else if (flags == TERM_SEQ_FLAG_CASH) /* DECSDDT */
+ return TERM_CMD_DECSDDT;
+ else if (flags == TERM_SEQ_FLAG_MULT) /* DECSRC */
+ return TERM_CMD_DECSR;
+ else if (flags == TERM_SEQ_FLAG_PLUS) /* DECELF */
+ return TERM_CMD_DECELF;
+ else if (flags == TERM_SEQ_FLAG_COMMA) /* DECTID */
+ return TERM_CMD_DECTID;
+ break;
+ case 'R':
+ if (flags == TERM_SEQ_FLAG_SPACE) /* PPB */
+ return TERM_CMD_PPB;
+ break;
+ case 'r':
+ if (flags == 0) {
+ /* DECSTBM */
+ return TERM_CMD_DECSTBM;
+ } else if (flags == TERM_SEQ_FLAG_SPACE) {
+ /* DECSKCV */
+ return TERM_CMD_DECSKCV;
+ } else if (flags == TERM_SEQ_FLAG_CASH) {
+ /* DECCARA */
+ return TERM_CMD_DECCARA;
+ } else if (flags == TERM_SEQ_FLAG_MULT) {
+ /* DECSCS */
+ return TERM_CMD_DECSCS;
+ } else if (flags == TERM_SEQ_FLAG_PLUS) {
+ /* DECSMKR */
+ return TERM_CMD_DECSMKR;
+ } else if (flags == TERM_SEQ_FLAG_WHAT) {
+ /*
+ * There's a conflict between DECPCTERM and XTERM-RPM.
+ * XTERM-RPM takes a single argument, DECPCTERM takes 2.
+ * Split both up and forward the call to the closer
+ * match.
+ */
+ if (seq->n_args <= 1) /* XTERM RPM */
+ return TERM_CMD_XTERM_RPM;
+ else if (seq->n_args >= 2) /* DECPCTERM */
+ return TERM_CMD_DECPCTERM;
+ }
+ break;
+ case 'S':
+ if (flags == 0) /* SU */
+ return TERM_CMD_SU;
+ else if (flags == TERM_SEQ_FLAG_WHAT) /* XTERM SGFX */
+ return TERM_CMD_XTERM_SGFX;
+ break;
+ case 's':
+ if (flags == 0) {
+ /*
+ * There's a conflict between DECSLRM and SC-ANSI which
+ * cannot be resolved without knowing the state of
+ * DECLRMM. We leave that decision up to the caller.
+ */
+ return TERM_CMD_DECSLRM_OR_SC;
+ } else if (flags == TERM_SEQ_FLAG_CASH) {
+ /* DECSPRTT */
+ return TERM_CMD_DECSPRTT;
+ } else if (flags == TERM_SEQ_FLAG_MULT) {
+ /* DECSFC */
+ return TERM_CMD_DECSFC;
+ } else if (flags == TERM_SEQ_FLAG_WHAT) {
+ /* XTERM SPM */
+ return TERM_CMD_XTERM_SPM;
+ }
+ break;
+ case 'T':
+ if (flags == 0) {
+ /*
+ * Awesome: There's a conflict between SD and XTERM IHMT
+ * that we have to resolve by checking the parameter
+ * count.. XTERM_IHMT needs exactly 5 arguments, SD
+ * takes 0 or 1. We're conservative here and give both
+ * a wider range to allow unused arguments (compat...).
+ */
+ if (seq->n_args >= 5) {
+ /* XTERM IHMT */
+ return TERM_CMD_XTERM_IHMT;
+ } else if (seq->n_args < 5) {
+ /* SD */
+ return TERM_CMD_SD;
+ }
+ } else if (flags == TERM_SEQ_FLAG_GT) {
+ /* XTERM RTM */
+ return TERM_CMD_XTERM_RTM;
+ }
+ break;
+ case 't':
+ if (flags == 0) {
+ if (seq->n_args > 0 && seq->args[0] < 24) {
+ /* XTERM WM */
+ return TERM_CMD_XTERM_WM;
+ } else {
+ /* DECSLPP */
+ return TERM_CMD_DECSLPP;
+ }
+ } else if (flags == TERM_SEQ_FLAG_SPACE) {
+ /* DECSWBV */
+ return TERM_CMD_DECSWBV;
+ } else if (flags == TERM_SEQ_FLAG_DQUOTE) {
+ /* DECSRFR */
+ return TERM_CMD_DECSRFR;
+ } else if (flags == TERM_SEQ_FLAG_CASH) {
+ /* DECRARA */
+ return TERM_CMD_DECRARA;
+ } else if (flags == TERM_SEQ_FLAG_GT) {
+ /* XTERM STM */
+ return TERM_CMD_XTERM_STM;
+ }
+ break;
+ case 'U':
+ if (flags == 0) /* NP */
+ return TERM_CMD_NP;
+ break;
+ case 'u':
+ if (flags == 0) {
+ /* RC */
+ return TERM_CMD_RC;
+ } else if (flags == TERM_SEQ_FLAG_SPACE) {
+ /* DECSMBV */
+ return TERM_CMD_DECSMBV;
+ } else if (flags == TERM_SEQ_FLAG_DQUOTE) {
+ /* DECSTRL */
+ return TERM_CMD_DECSTRL;
+ } else if (flags == TERM_SEQ_FLAG_WHAT) {
+ /* DECRQUPSS */
+ return TERM_CMD_DECRQUPSS;
+ } else if (seq->args[0] == 1 && flags == TERM_SEQ_FLAG_CASH) {
+ /* DECRQTSR */
+ return TERM_CMD_DECRQTSR;
+ } else if (flags == TERM_SEQ_FLAG_MULT) {
+ /* DECSCP */
+ return TERM_CMD_DECSCP;
+ } else if (flags == TERM_SEQ_FLAG_COMMA) {
+ /* DECRQKT */
+ return TERM_CMD_DECRQKT;
+ }
+ break;
+ case 'V':
+ if (flags == 0) /* PP */
+ return TERM_CMD_PP;
+ break;
+ case 'v':
+ if (flags == TERM_SEQ_FLAG_SPACE) /* DECSLCK */
+ return TERM_CMD_DECSLCK;
+ else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECRQDE */
+ return TERM_CMD_DECRQDE;
+ else if (flags == TERM_SEQ_FLAG_CASH) /* DECCRA */
+ return TERM_CMD_DECCRA;
+ else if (flags == TERM_SEQ_FLAG_COMMA) /* DECRPKT */
+ return TERM_CMD_DECRPKT;
+ break;
+ case 'W':
+ if (seq->args[0] == 5 && flags == TERM_SEQ_FLAG_WHAT) {
+ /* DECST8C */
+ return TERM_CMD_DECST8C;
+ }
+ break;
+ case 'w':
+ if (flags == TERM_SEQ_FLAG_CASH) /* DECRQPSR */
+ return TERM_CMD_DECRQPSR;
+ else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECEFR */
+ return TERM_CMD_DECEFR;
+ else if (flags == TERM_SEQ_FLAG_PLUS) /* DECSPP */
+ return TERM_CMD_DECSPP;
+ break;
+ case 'X':
+ if (flags == 0) /* ECH */
+ return TERM_CMD_ECH;
+ break;
+ case 'x':
+ if (flags == 0) /* DECREQTPARM */
+ return TERM_CMD_DECREQTPARM;
+ else if (flags == TERM_SEQ_FLAG_CASH) /* DECFRA */
+ return TERM_CMD_DECFRA;
+ else if (flags == TERM_SEQ_FLAG_MULT) /* DECSACE */
+ return TERM_CMD_DECSACE;
+ else if (flags == TERM_SEQ_FLAG_PLUS) /* DECRQPKFM */
+ return TERM_CMD_DECRQPKFM;
+ break;
+ case 'y':
+ if (flags == 0) /* DECTST */
+ return TERM_CMD_DECTST;
+ else if (flags == TERM_SEQ_FLAG_MULT) /* DECRQCRA */
+ return TERM_CMD_DECRQCRA;
+ else if (flags == TERM_SEQ_FLAG_PLUS) /* DECPKFMR */
+ return TERM_CMD_DECPKFMR;
+ break;
+ case 'Z':
+ if (flags == 0) /* CBT */
+ return TERM_CMD_CBT;
+ break;
+ case 'z':
+ if (flags == TERM_SEQ_FLAG_CASH) /* DECERA */
+ return TERM_CMD_DECERA;
+ else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECELR */
+ return TERM_CMD_DECELR;
+ else if (flags == TERM_SEQ_FLAG_MULT) /* DECINVM */
+ return TERM_CMD_DECINVM;
+ else if (flags == TERM_SEQ_FLAG_PLUS) /* DECPKA */
+ return TERM_CMD_DECPKA;
+ break;
+ case '@':
+ if (flags == 0) /* ICH */
+ return TERM_CMD_ICH;
+ break;
+ case '`':
+ if (flags == 0) /* HPA */
+ return TERM_CMD_HPA;
+ break;
+ case '{':
+ if (flags == TERM_SEQ_FLAG_CASH) /* DECSERA */
+ return TERM_CMD_DECSERA;
+ else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECSLE */
+ return TERM_CMD_DECSLE;
+ break;
+ case '|':
+ if (flags == TERM_SEQ_FLAG_CASH) /* DECSCPP */
+ return TERM_CMD_DECSCPP;
+ else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECRQLP */
+ return TERM_CMD_DECRQLP;
+ else if (flags == TERM_SEQ_FLAG_MULT) /* DECSNLS */
+ return TERM_CMD_DECSNLS;
+ break;
+ case '}':
+ if (flags == TERM_SEQ_FLAG_SPACE) /* DECKBD */
+ return TERM_CMD_DECKBD;
+ else if (flags == TERM_SEQ_FLAG_CASH) /* DECSASD */
+ return TERM_CMD_DECSASD;
+ else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECIC */
+ return TERM_CMD_DECIC;
+ break;
+ case '~':
+ if (flags == TERM_SEQ_FLAG_SPACE) /* DECTME */
+ return TERM_CMD_DECTME;
+ else if (flags == TERM_SEQ_FLAG_CASH) /* DECSSDT */
+ return TERM_CMD_DECSSDT;
+ else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECDC */
+ return TERM_CMD_DECDC;
+ break;
+ }
+
+ return TERM_CMD_NONE;
+}
+
+/*
+ * State Machine
+ * This parser controls the parser-state and returns any detected sequence to
+ * the caller. The parser is based on this state-diagram from Paul Williams:
+ * http://vt100.net/emu/
+ * It was written from scratch and extended where needed.
+ * This parser is fully compatible up to the vt500 series. We expect UCS-4 as
+ * input. It's the callers responsibility to do any UTF-8 parsing.
+ */
+
+enum parser_state {
+ STATE_NONE, /* placeholder */
+ STATE_GROUND, /* initial state and ground */
+ STATE_ESC, /* ESC sequence was started */
+ STATE_ESC_INT, /* intermediate escape characters */
+ STATE_CSI_ENTRY, /* starting CSI sequence */
+ STATE_CSI_PARAM, /* CSI parameters */
+ STATE_CSI_INT, /* intermediate CSI characters */
+ STATE_CSI_IGNORE, /* CSI error; ignore this CSI sequence */
+ STATE_DCS_ENTRY, /* starting DCS sequence */
+ STATE_DCS_PARAM, /* DCS parameters */
+ STATE_DCS_INT, /* intermediate DCS characters */
+ STATE_DCS_PASS, /* DCS data passthrough */
+ STATE_DCS_IGNORE, /* DCS error; ignore this DCS sequence */
+ STATE_OSC_STRING, /* parsing OSC sequence */
+ STATE_ST_IGNORE, /* unimplemented seq; ignore until ST */
+ STATE_NUM
+};
+
+enum parser_action {
+ ACTION_NONE, /* placeholder */
+ ACTION_CLEAR, /* clear parameters */
+ ACTION_IGNORE, /* ignore the character entirely */
+ ACTION_PRINT, /* print the character on the console */
+ ACTION_EXECUTE, /* execute single control character (C0/C1) */
+ ACTION_COLLECT, /* collect intermediate character */
+ ACTION_PARAM, /* collect parameter character */
+ ACTION_ESC_DISPATCH, /* dispatch escape sequence */
+ ACTION_CSI_DISPATCH, /* dispatch csi sequence */
+ ACTION_DCS_START, /* start of DCS data */
+ ACTION_DCS_COLLECT, /* collect DCS data */
+ ACTION_DCS_CONSUME, /* consume DCS terminator */
+ ACTION_DCS_DISPATCH, /* dispatch dcs sequence */
+ ACTION_OSC_START, /* start of OSC data */
+ ACTION_OSC_COLLECT, /* collect OSC data */
+ ACTION_OSC_CONSUME, /* consume OSC terminator */
+ ACTION_OSC_DISPATCH, /* dispatch osc sequence */
+ ACTION_NUM
+};
+
+int term_parser_new(term_parser **out, bool host) {
+ _term_parser_free_ term_parser *parser = NULL;
+
+ assert_return(out, -EINVAL);
+
+ parser = new0(term_parser, 1);
+ if (!parser)
+ return -ENOMEM;
+
+ parser->is_host = host;
+ parser->st_alloc = 64;
+ parser->seq.st = new0(char, parser->st_alloc + 1);
+ if (!parser->seq.st)
+ return -ENOMEM;
+
+ *out = parser;
+ parser = NULL;
+ return 0;
+}
+
+term_parser *term_parser_free(term_parser *parser) {
+ if (!parser)
+ return NULL;
+
+ free(parser->seq.st);
+ free(parser);
+ return NULL;
+}
+
+static inline void parser_clear(term_parser *parser) {
+ unsigned int i;
+
+ parser->seq.command = TERM_CMD_NONE;
+ parser->seq.terminator = 0;
+ parser->seq.intermediates = 0;
+ parser->seq.charset = TERM_CHARSET_NONE;
+ parser->seq.n_args = 0;
+ for (i = 0; i < TERM_PARSER_ARG_MAX; ++i)
+ parser->seq.args[i] = -1;
+
+ parser->seq.n_st = 0;
+ parser->seq.st[0] = 0;
+}
+
+static int parser_ignore(term_parser *parser, uint32_t raw) {
+ parser_clear(parser);
+ parser->seq.type = TERM_SEQ_IGNORE;
+ parser->seq.command = TERM_CMD_NONE;
+ parser->seq.terminator = raw;
+ parser->seq.charset = TERM_CHARSET_NONE;
+
+ return parser->seq.type;
+}
+
+static int parser_print(term_parser *parser, uint32_t raw) {
+ parser_clear(parser);
+ parser->seq.type = TERM_SEQ_GRAPHIC;
+ parser->seq.command = TERM_CMD_GRAPHIC;
+ parser->seq.terminator = raw;
+ parser->seq.charset = TERM_CHARSET_NONE;
+
+ return parser->seq.type;
+}
+
+static int parser_execute(term_parser *parser, uint32_t raw) {
+ parser_clear(parser);
+ parser->seq.type = TERM_SEQ_CONTROL;
+ parser->seq.command = TERM_CMD_GRAPHIC;
+ parser->seq.terminator = raw;
+ parser->seq.charset = TERM_CHARSET_NONE;
+ if (!parser->is_host)
+ parser->seq.command = term_parse_host_control(&parser->seq);
+
+ return parser->seq.type;
+}
+
+static void parser_collect(term_parser *parser, uint32_t raw) {
+ /*
+ * Usually, characters from 0x30 to 0x3f are only allowed as leading
+ * markers (or as part of the parameters), characters from 0x20 to 0x2f
+ * are only allowed as trailing markers. However, our state-machine
+ * already verifies those restrictions so we can handle them the same
+ * way here. Note that we safely allow markers to be specified multiple
+ * times.
+ */
+
+ if (raw >= 0x20 && raw <= 0x3f)
+ parser->seq.intermediates |= 1 << (raw - 0x20);
+}
+
+static void parser_param(term_parser *parser, uint32_t raw) {
+ int new;
+
+ if (raw == ';') {
+ if (parser->seq.n_args < TERM_PARSER_ARG_MAX)
+ ++parser->seq.n_args;
+
+ return;
+ }
+
+ if (parser->seq.n_args >= TERM_PARSER_ARG_MAX)
+ return;
+
+ if (raw >= '0' && raw <= '9') {
+ new = parser->seq.args[parser->seq.n_args];
+ if (new < 0)
+ new = 0;
+ new = new * 10 + raw - '0';
+
+ /* VT510 tells us to clamp all values to [0, 9999], however, it
+ * also allows commands with values up to 2^15-1. We simply use
+ * 2^16 as maximum here to be compatible to all commands, but
+ * avoid overflows in any calculations. */
+ if (new > 0xffff)
+ new = 0xffff;
+
+ parser->seq.args[parser->seq.n_args] = new;
+ }
+}
+
+static int parser_esc(term_parser *parser, uint32_t raw) {
+ parser->seq.type = TERM_SEQ_ESCAPE;
+ parser->seq.command = TERM_CMD_NONE;
+ parser->seq.terminator = raw;
+ parser->seq.charset = TERM_CHARSET_NONE;
+ if (!parser->is_host)
+ parser->seq.command = term_parse_host_escape(&parser->seq, &parser->seq.charset);
+
+ return parser->seq.type;
+}
+
+static int parser_csi(term_parser *parser, uint32_t raw) {
+ /* parser->seq is cleared during CSI-ENTER state, thus there's no need
+ * to clear invalid fields here. */
+
+ if (parser->seq.n_args < TERM_PARSER_ARG_MAX) {
+ if (parser->seq.n_args > 0 ||
+ parser->seq.args[parser->seq.n_args] >= 0)
+ ++parser->seq.n_args;
+ }
+
+ parser->seq.type = TERM_SEQ_CSI;
+ parser->seq.command = TERM_CMD_NONE;
+ parser->seq.terminator = raw;
+ parser->seq.charset = TERM_CHARSET_NONE;
+ if (!parser->is_host)
+ parser->seq.command = term_parse_host_csi(&parser->seq);
+
+ return parser->seq.type;
+}
+
+/* perform state transition and dispatch related actions */
+static int parser_transition(term_parser *parser, uint32_t raw, unsigned int state, unsigned int action) {
+ if (state != STATE_NONE)
+ parser->state = state;
+
+ switch (action) {
+ case ACTION_NONE:
+ return TERM_SEQ_NONE;
+ case ACTION_CLEAR:
+ parser_clear(parser);
+ return TERM_SEQ_NONE;
+ case ACTION_IGNORE:
+ return parser_ignore(parser, raw);
+ case ACTION_PRINT:
+ return parser_print(parser, raw);
+ case ACTION_EXECUTE:
+ return parser_execute(parser, raw);
+ case ACTION_COLLECT:
+ parser_collect(parser, raw);
+ return TERM_SEQ_NONE;
+ case ACTION_PARAM:
+ parser_param(parser, raw);
+ return TERM_SEQ_NONE;
+ case ACTION_ESC_DISPATCH:
+ return parser_esc(parser, raw);
+ case ACTION_CSI_DISPATCH:
+ return parser_csi(parser, raw);
+ case ACTION_DCS_START:
+ /* not implemented */
+ return TERM_SEQ_NONE;
+ case ACTION_DCS_COLLECT:
+ /* not implemented */
+ return TERM_SEQ_NONE;
+ case ACTION_DCS_CONSUME:
+ /* not implemented */
+ return TERM_SEQ_NONE;
+ case ACTION_DCS_DISPATCH:
+ /* not implemented */
+ return TERM_SEQ_NONE;
+ case ACTION_OSC_START:
+ /* not implemented */
+ return TERM_SEQ_NONE;
+ case ACTION_OSC_COLLECT:
+ /* not implemented */
+ return TERM_SEQ_NONE;
+ case ACTION_OSC_CONSUME:
+ /* not implemented */
+ return TERM_SEQ_NONE;
+ case ACTION_OSC_DISPATCH:
+ /* not implemented */
+ return TERM_SEQ_NONE;
+ default:
+ assert_not_reached("invalid vte-parser action");
+ return TERM_SEQ_NONE;
+ }
+}
+
+static int parser_feed_to_state(term_parser *parser, uint32_t raw) {
+ switch (parser->state) {
+ case STATE_NONE:
+ /*
+ * During initialization, parser->state is cleared. Treat this
+ * as STATE_GROUND. We will then never get to STATE_NONE again.
+ */
+ case STATE_GROUND:
+ switch (raw) {
+ case 0x00 ... 0x1f: /* C0 */
+ case 0x80 ... 0x9b: /* C1 \ { ST } */
+ case 0x9d ... 0x9f:
+ return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
+ case 0x9c: /* ST */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
+ }
+
+ return parser_transition(parser, raw, STATE_NONE, ACTION_PRINT);
+ case STATE_ESC:
+ switch (raw) {
+ case 0x00 ... 0x1f: /* C0 */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
+ case 0x20 ... 0x2f: /* [' ' - '\'] */
+ return parser_transition(parser, raw, STATE_ESC_INT, ACTION_COLLECT);
+ case 0x30 ... 0x4f: /* ['0' - '~'] \ { 'P', 'X', '[', ']', '^', '_' } */
+ case 0x51 ... 0x57:
+ case 0x59 ... 0x5a:
+ case 0x5c:
+ case 0x60 ... 0x7e:
+ return parser_transition(parser, raw, STATE_GROUND, ACTION_ESC_DISPATCH);
+ case 0x50: /* 'P' */
+ return parser_transition(parser, raw, STATE_DCS_ENTRY, ACTION_CLEAR);
+ case 0x5b: /* '[' */
+ return parser_transition(parser, raw, STATE_CSI_ENTRY, ACTION_CLEAR);
+ case 0x5d: /* ']' */
+ return parser_transition(parser, raw, STATE_OSC_STRING, ACTION_CLEAR);
+ case 0x58: /* 'X' */
+ case 0x5e: /* '^' */
+ case 0x5f: /* '_' */
+ return parser_transition(parser, raw, STATE_ST_IGNORE, ACTION_NONE);
+ case 0x7f: /* DEL */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
+ case 0x9c: /* ST */
+ return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
+ }
+
+ return parser_transition(parser, raw, STATE_ESC_INT, ACTION_COLLECT);
+ case STATE_ESC_INT:
+ switch (raw) {
+ case 0x00 ... 0x1f: /* C0 */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
+ case 0x20 ... 0x2f: /* [' ' - '\'] */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
+ case 0x30 ... 0x7e: /* ['0' - '~'] */
+ return parser_transition(parser, raw, STATE_GROUND, ACTION_ESC_DISPATCH);
+ case 0x7f: /* DEL */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
+ case 0x9c: /* ST */
+ return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
+ }
+
+ return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
+ case STATE_CSI_ENTRY:
+ switch (raw) {
+ case 0x00 ... 0x1f: /* C0 */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
+ case 0x20 ... 0x2f: /* [' ' - '\'] */
+ return parser_transition(parser, raw, STATE_CSI_INT, ACTION_COLLECT);
+ case 0x3a: /* ':' */
+ return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
+ case 0x30 ... 0x39: /* ['0' - '9'] */
+ case 0x3b: /* ';' */
+ return parser_transition(parser, raw, STATE_CSI_PARAM, ACTION_PARAM);
+ case 0x3c ... 0x3f: /* ['<' - '?'] */
+ return parser_transition(parser, raw, STATE_CSI_PARAM, ACTION_COLLECT);
+ case 0x40 ... 0x7e: /* ['@' - '~'] */
+ return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH);
+ case 0x7f: /* DEL */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
+ case 0x9c: /* ST */
+ return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
+ }
+
+ return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
+ case STATE_CSI_PARAM:
+ switch (raw) {
+ case 0x00 ... 0x1f: /* C0 */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
+ case 0x20 ... 0x2f: /* [' ' - '\'] */
+ return parser_transition(parser, raw, STATE_CSI_INT, ACTION_COLLECT);
+ case 0x30 ... 0x39: /* ['0' - '9'] */
+ case 0x3b: /* ';' */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_PARAM);
+ case 0x3a: /* ':' */
+ case 0x3c ... 0x3f: /* ['<' - '?'] */
+ return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
+ case 0x40 ... 0x7e: /* ['@' - '~'] */
+ return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH);
+ case 0x7f: /* DEL */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
+ case 0x9c: /* ST */
+ return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
+ }
+
+ return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
+ case STATE_CSI_INT:
+ switch (raw) {
+ case 0x00 ... 0x1f: /* C0 */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
+ case 0x20 ... 0x2f: /* [' ' - '\'] */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
+ case 0x30 ... 0x3f: /* ['0' - '?'] */
+ return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
+ case 0x40 ... 0x7e: /* ['@' - '~'] */
+ return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH);
+ case 0x7f: /* DEL */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
+ case 0x9c: /* ST */
+ return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
+ }
+
+ return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
+ case STATE_CSI_IGNORE:
+ switch (raw) {
+ case 0x00 ... 0x1f: /* C0 */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
+ case 0x20 ... 0x3f: /* [' ' - '?'] */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
+ case 0x40 ... 0x7e: /* ['@' - '~'] */
+ return parser_transition(parser, raw, STATE_GROUND, ACTION_NONE);
+ case 0x7f: /* DEL */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
+ case 0x9c: /* ST */
+ return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
+ }
+
+ return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
+ case STATE_DCS_ENTRY:
+ switch (raw) {
+ case 0x00 ... 0x1f: /* C0 */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
+ case 0x20 ... 0x2f: /* [' ' - '\'] */
+ return parser_transition(parser, raw, STATE_DCS_INT, ACTION_COLLECT);
+ case 0x3a: /* ':' */
+ return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE);
+ case 0x30 ... 0x39: /* ['0' - '9'] */
+ case 0x3b: /* ';' */
+ return parser_transition(parser, raw, STATE_DCS_PARAM, ACTION_PARAM);
+ case 0x3c ... 0x3f: /* ['<' - '?'] */
+ return parser_transition(parser, raw, STATE_DCS_PARAM, ACTION_COLLECT);
+ case 0x40 ... 0x7e: /* ['@' - '~'] */
+ return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
+ case 0x7f: /* DEL */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
+ case 0x9c: /* ST */
+ return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
+ }
+
+ return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
+ case STATE_DCS_PARAM:
+ switch (raw) {
+ case 0x00 ... 0x1f: /* C0 */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
+ case 0x20 ... 0x2f: /* [' ' - '\'] */
+ return parser_transition(parser, raw, STATE_DCS_INT, ACTION_COLLECT);
+ case 0x30 ... 0x39: /* ['0' - '9'] */
+ case 0x3b: /* ';' */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_PARAM);
+ case 0x3a: /* ':' */
+ case 0x3c ... 0x3f: /* ['<' - '?'] */
+ return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE);
+ case 0x40 ... 0x7e: /* ['@' - '~'] */
+ return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
+ case 0x7f: /* DEL */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
+ case 0x9c: /* ST */
+ return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
+ }
+
+ return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
+ case STATE_DCS_INT:
+ switch (raw) {
+ case 0x00 ... 0x1f: /* C0 */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
+ case 0x20 ... 0x2f: /* [' ' - '\'] */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
+ case 0x30 ... 0x3f: /* ['0' - '?'] */
+ return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE);
+ case 0x40 ... 0x7e: /* ['@' - '~'] */
+ return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
+ case 0x7f: /* DEL */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
+ case 0x9c: /* ST */
+ return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
+ }
+
+ return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
+ case STATE_DCS_PASS:
+ switch (raw) {
+ case 0x00 ... 0x7e: /* ASCII \ { DEL } */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_DCS_COLLECT);
+ case 0x7f: /* DEL */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
+ case 0x9c: /* ST */
+ return parser_transition(parser, raw, STATE_GROUND, ACTION_DCS_DISPATCH);
+ }
+
+ return parser_transition(parser, raw, STATE_NONE, ACTION_DCS_COLLECT);
+ case STATE_DCS_IGNORE:
+ switch (raw) {
+ case 0x00 ... 0x7f: /* ASCII */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
+ case 0x9c: /* ST */
+ return parser_transition(parser, raw, STATE_GROUND, ACTION_NONE);
+ }
+
+ return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
+ case STATE_OSC_STRING:
+ switch (raw) {
+ case 0x00 ... 0x06: /* C0 \ { BEL } */
+ case 0x08 ... 0x1f:
+ return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
+ case 0x20 ... 0x7f: /* [' ' - DEL] */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_OSC_COLLECT);
+ case 0x07: /* BEL */
+ case 0x9c: /* ST */
+ return parser_transition(parser, raw, STATE_GROUND, ACTION_OSC_DISPATCH);
+ }
+
+ return parser_transition(parser, raw, STATE_NONE, ACTION_OSC_COLLECT);
+ case STATE_ST_IGNORE:
+ switch (raw) {
+ case 0x00 ... 0x7f: /* ASCII */
+ return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
+ case 0x9c: /* ST */
+ return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
+ }
+
+ return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
+ }
+
+ assert_not_reached("bad vte-parser state");
+ return -EINVAL;
+}
+
+int term_parser_feed(term_parser *parser, const term_seq **seq_out, uint32_t raw) {
+ int r;
+
+ assert_return(parser, -EINVAL);
+ assert_return(seq_out, -EINVAL);
+
+ /*
+ * Notes:
+ * * DEC treats GR codes as GL. We don't do that as we require UTF-8
+ * as charset and, thus, it doesn't make sense to treat GR special.
+ * * During control sequences, unexpected C1 codes cancel the sequence
+ * and immediately start a new one. C0 codes, however, may or may not
+ * be ignored/executed depending on the sequence.
+ */
+
+ switch (raw) {
+ case 0x18: /* CAN */
+ r = parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
+ break;
+ case 0x1a: /* SUB */
+ r = parser_transition(parser, raw, STATE_GROUND, ACTION_EXECUTE);
+ break;
+ case 0x80 ... 0x8f: /* C1 \ {DCS, SOS, CSI, ST, OSC, PM, APC} */
+ case 0x91 ... 0x97:
+ case 0x99 ... 0x9a:
+ r = parser_transition(parser, raw, STATE_GROUND, ACTION_EXECUTE);
+ break;
+ case 0x1b: /* ESC */
+ r = parser_transition(parser, raw, STATE_ESC, ACTION_CLEAR);
+ break;
+ case 0x98: /* SOS */
+ case 0x9e: /* PM */
+ case 0x9f: /* APC */
+ r = parser_transition(parser, raw, STATE_ST_IGNORE, ACTION_NONE);
+ break;
+ case 0x90: /* DCS */
+ r = parser_transition(parser, raw, STATE_DCS_ENTRY, ACTION_CLEAR);
+ break;
+ case 0x9d: /* OSC */
+ r = parser_transition(parser, raw, STATE_OSC_STRING, ACTION_CLEAR);
+ break;
+ case 0x9b: /* CSI */
+ r = parser_transition(parser, raw, STATE_CSI_ENTRY, ACTION_CLEAR);
+ break;
+ default:
+ r = parser_feed_to_state(parser, raw);
+ break;
+ }
+
+ if (r <= 0)
+ *seq_out = NULL;
+ else
+ *seq_out = &parser->seq;
+
+ return r;
+}