diff options
Diffstat (limited to 'src/libsystemd-terminal/term-parser.c')
-rw-r--r-- | src/libsystemd-terminal/term-parser.c | 1702 |
1 files changed, 0 insertions, 1702 deletions
diff --git a/src/libsystemd-terminal/term-parser.c b/src/libsystemd-terminal/term-parser.c deleted file mode 100644 index 8dc1da2f9c..0000000000 --- a/src/libsystemd-terminal/term-parser.c +++ /dev/null @@ -1,1702 +0,0 @@ -/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ - -/*** - This file is part of systemd. - - Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com> - - systemd is free software; you can redistribute it and/or modify it - under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or - (at your option) any later version. - - systemd is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with systemd; If not, see <http://www.gnu.org/licenses/>. -***/ - -/* - * Terminal Parser - * This file contains a bunch of UTF-8 helpers and the main ctlseq-parser. The - * parser is a simple state-machine that correctly parses all CSI, DCS, OSC, ST - * control sequences and generic escape sequences. - * The parser itself does not perform any actions but lets the caller react to - * detected sequences. - */ - -#include <stdbool.h> -#include <stdint.h> -#include <stdlib.h> -#include "macro.h" -#include "term-internal.h" -#include "util.h" - -static const uint8_t default_palette[18][3] = { - { 0, 0, 0 }, /* black */ - { 205, 0, 0 }, /* red */ - { 0, 205, 0 }, /* green */ - { 205, 205, 0 }, /* yellow */ - { 0, 0, 238 }, /* blue */ - { 205, 0, 205 }, /* magenta */ - { 0, 205, 205 }, /* cyan */ - { 229, 229, 229 }, /* light grey */ - { 127, 127, 127 }, /* dark grey */ - { 255, 0, 0 }, /* light red */ - { 0, 255, 0 }, /* light green */ - { 255, 255, 0 }, /* light yellow */ - { 92, 92, 255 }, /* light blue */ - { 255, 0, 255 }, /* light magenta */ - { 0, 255, 255 }, /* light cyan */ - { 255, 255, 255 }, /* white */ - - { 229, 229, 229 }, /* light grey */ - { 0, 0, 0 }, /* black */ -}; - -static uint32_t term_color_to_argb32(const term_color *color, const term_attr *attr, const uint8_t *palette) { - static const uint8_t bval[] = { - 0x00, 0x5f, 0x87, - 0xaf, 0xd7, 0xff, - }; - uint8_t r, g, b, t; - - assert(color); - - if (!palette) - palette = (void*)default_palette; - - switch (color->ccode) { - case TERM_CCODE_RGB: - r = color->red; - g = color->green; - b = color->blue; - - break; - case TERM_CCODE_256: - t = color->c256; - if (t < 16) { - r = palette[t * 3 + 0]; - g = palette[t * 3 + 1]; - b = palette[t * 3 + 2]; - } else if (t < 232) { - t -= 16; - b = bval[t % 6]; - t /= 6; - g = bval[t % 6]; - t /= 6; - r = bval[t % 6]; - } else { - t = (t - 232) * 10 + 8; - r = t; - g = t; - b = t; - } - - break; - case TERM_CCODE_BLACK ... TERM_CCODE_LIGHT_WHITE: - t = color->ccode - TERM_CCODE_BLACK; - - /* bold causes light colors (only for foreground colors) */ - if (t < 8 && attr->bold && color == &attr->fg) - t += 8; - - r = palette[t * 3 + 0]; - g = palette[t * 3 + 1]; - b = palette[t * 3 + 2]; - break; - case TERM_CCODE_DEFAULT: - /* fallthrough */ - default: - t = 16 + !(color == &attr->fg); - r = palette[t * 3 + 0]; - g = palette[t * 3 + 1]; - b = palette[t * 3 + 2]; - break; - } - - return (0xff << 24) | (r << 16) | (g << 8) | b; -} - -/** - * term_attr_to_argb32() - Encode terminal colors as native ARGB32 value - * @color: Terminal attributes to work on - * @fg: Storage for foreground color (or NULL) - * @bg: Storage for background color (or NULL) - * @palette: The color palette to use (or NULL for default) - * - * This encodes the colors attr->fg and attr->bg as native-endian ARGB32 values - * and returns them. Any color conversions are automatically applied. - */ -void term_attr_to_argb32(const term_attr *attr, uint32_t *fg, uint32_t *bg, const uint8_t *palette) { - uint32_t f, b, t; - - assert(attr); - - f = term_color_to_argb32(&attr->fg, attr, palette); - b = term_color_to_argb32(&attr->bg, attr, palette); - - if (attr->inverse) { - t = f; - f = b; - b = t; - } - - if (fg) - *fg = f; - if (bg) - *bg = b; -} - -/** - * term_utf8_decode() - Try decoding the next UCS-4 character - * @p: decoder object to operate on or NULL - * @out_len: output storage for pointer to decoded UCS-4 string or NULL - * @c: next char to push into decoder - * - * This decodes a UTF-8 stream. It must be called for each input-byte of the - * UTF-8 stream and returns a UCS-4 stream. A pointer to the parsed UCS-4 - * string is stored in @out_buf if non-NULL. The length of this string (number - * of parsed UCS4 characters) is returned as result. The string is not - * zero-terminated! Furthermore, the string is only valid until the next - * invocation of this function. It is also bound to the parser state @p and - * must not be freed nor written to by the caller. - * - * This function is highly optimized to work with terminal-emulators. Instead - * of being strict about UTF-8 validity, this tries to perform a fallback to - * ISO-8859-1 in case a wrong series was detected. Therefore, this function - * might return multiple UCS-4 characters by parsing just a single UTF-8 byte. - * - * The parser state @p should be allocated and managed by the caller. There're - * no helpers to do that for you. To initialize it, simply reset it to all - * zero. You can reset or free the object at any point in time. - * - * Returns: Number of parsed UCS4 characters - */ -size_t term_utf8_decode(term_utf8 *p, uint32_t **out_buf, char c) { - static uint32_t ucs4_null = 0; - uint32_t t, *res = NULL; - uint8_t byte; - size_t len = 0; - - if (!p) - goto out; - - byte = c; - - if (!p->valid || p->i_bytes >= p->n_bytes) { - /* - * If the previous sequence was invalid or fully parsed, start - * parsing a fresh new sequence. - */ - - if ((byte & 0xE0) == 0xC0) { - /* start of two byte sequence */ - t = byte & 0x1F; - p->n_bytes = 2; - p->i_bytes = 1; - p->valid = 1; - } else if ((byte & 0xF0) == 0xE0) { - /* start of three byte sequence */ - t = byte & 0x0F; - p->n_bytes = 3; - p->i_bytes = 1; - p->valid = 1; - } else if ((byte & 0xF8) == 0xF0) { - /* start of four byte sequence */ - t = byte & 0x07; - p->n_bytes = 4; - p->i_bytes = 1; - p->valid = 1; - } else { - /* Either of: - * - single ASCII 7-bit char - * - out-of-sync continuation byte - * - overlong encoding - * All of them are treated as single byte ISO-8859-1 */ - t = byte; - p->n_bytes = 1; - p->i_bytes = 1; - p->valid = 0; - } - - p->chars[0] = byte; - p->ucs4 = t << (6 * (p->n_bytes - p->i_bytes)); - } else { - /* - * ..otherwise, try to continue the previous sequence.. - */ - - if ((byte & 0xC0) == 0x80) { - /* - * Valid continuation byte. Append to sequence and - * update the ucs4 cache accordingly. - */ - - t = byte & 0x3F; - p->chars[p->i_bytes++] = byte; - p->ucs4 |= t << (6 * (p->n_bytes - p->i_bytes)); - } else { - /* - * Invalid continuation? Treat cached sequence as - * ISO-8859-1, but parse the new char as valid new - * starting character. If it's a new single-byte UTF-8 - * sequence, we immediately return it in the same run, - * otherwise, we might suffer from starvation. - */ - - if ((byte & 0xE0) == 0xC0 || - (byte & 0xF0) == 0xE0 || - (byte & 0xF8) == 0xF0) { - /* - * New multi-byte sequence. Move to-be-returned - * data at the end and start new sequence. Only - * return the old sequence. - */ - - memmove(p->chars + 1, - p->chars, - sizeof(*p->chars) * p->i_bytes); - res = p->chars + 1; - len = p->i_bytes; - - if ((byte & 0xE0) == 0xC0) { - /* start of two byte sequence */ - t = byte & 0x1F; - p->n_bytes = 2; - p->i_bytes = 1; - p->valid = 1; - } else if ((byte & 0xF0) == 0xE0) { - /* start of three byte sequence */ - t = byte & 0x0F; - p->n_bytes = 3; - p->i_bytes = 1; - p->valid = 1; - } else if ((byte & 0xF8) == 0xF0) { - /* start of four byte sequence */ - t = byte & 0x07; - p->n_bytes = 4; - p->i_bytes = 1; - p->valid = 1; - } else - assert_not_reached("Should not happen"); - - p->chars[0] = byte; - p->ucs4 = t << (6 * (p->n_bytes - p->i_bytes)); - - goto out; - } else { - /* - * New single byte sequence, append to output - * and return combined sequence. - */ - - p->chars[p->i_bytes++] = byte; - p->valid = 0; - } - } - } - - /* - * Check whether a full sequence (valid or invalid) has been parsed and - * then return it. Otherwise, return nothing. - */ - if (p->valid) { - /* still parsing? then bail out */ - if (p->i_bytes < p->n_bytes) - goto out; - - res = &p->ucs4; - len = 1; - } else { - res = p->chars; - len = p->i_bytes; - } - - p->valid = 0; - p->i_bytes = 0; - p->n_bytes = 0; - -out: - if (out_buf) - *out_buf = res ? : &ucs4_null; - return len; -} - -/* - * Command Parser - * The ctl-seq parser "term_parser" only detects whole sequences, it does not - * detect the specific command. Once a sequence is parsed, the command-parsers - * are used to figure out their meaning. Note that this depends on whether we - * run on the host or terminal side. - */ - -static unsigned int term_parse_host_control(const term_seq *seq) { - assert_return(seq, TERM_CMD_NONE); - - switch (seq->terminator) { - case 0x00: /* NUL */ - return TERM_CMD_NULL; - case 0x05: /* ENQ */ - return TERM_CMD_ENQ; - case 0x07: /* BEL */ - return TERM_CMD_BEL; - case 0x08: /* BS */ - return TERM_CMD_BS; - case 0x09: /* HT */ - return TERM_CMD_HT; - case 0x0a: /* LF */ - return TERM_CMD_LF; - case 0x0b: /* VT */ - return TERM_CMD_VT; - case 0x0c: /* FF */ - return TERM_CMD_FF; - case 0x0d: /* CR */ - return TERM_CMD_CR; - case 0x0e: /* SO */ - return TERM_CMD_SO; - case 0x0f: /* SI */ - return TERM_CMD_SI; - case 0x11: /* DC1 */ - return TERM_CMD_DC1; - case 0x13: /* DC3 */ - return TERM_CMD_DC3; - case 0x18: /* CAN */ - /* this is already handled by the state-machine */ - break; - case 0x1a: /* SUB */ - return TERM_CMD_SUB; - case 0x1b: /* ESC */ - /* this is already handled by the state-machine */ - break; - case 0x1f: /* DEL */ - /* this is already handled by the state-machine */ - break; - case 0x84: /* IND */ - return TERM_CMD_IND; - case 0x85: /* NEL */ - return TERM_CMD_NEL; - case 0x88: /* HTS */ - return TERM_CMD_HTS; - case 0x8d: /* RI */ - return TERM_CMD_RI; - case 0x8e: /* SS2 */ - return TERM_CMD_SS2; - case 0x8f: /* SS3 */ - return TERM_CMD_SS3; - case 0x90: /* DCS */ - /* this is already handled by the state-machine */ - break; - case 0x96: /* SPA */ - return TERM_CMD_SPA; - case 0x97: /* EPA */ - return TERM_CMD_EPA; - case 0x98: /* SOS */ - /* this is already handled by the state-machine */ - break; - case 0x9a: /* DECID */ - return TERM_CMD_DECID; - case 0x9b: /* CSI */ - /* this is already handled by the state-machine */ - break; - case 0x9c: /* ST */ - return TERM_CMD_ST; - case 0x9d: /* OSC */ - /* this is already handled by the state-machine */ - break; - case 0x9e: /* PM */ - /* this is already handled by the state-machine */ - break; - case 0x9f: /* APC */ - /* this is already handled by the state-machine */ - break; - } - - return TERM_CMD_NONE; -} - -static inline int charset_from_cmd(uint32_t raw, unsigned int flags, bool require_96) { - static const struct { - uint32_t raw; - unsigned int flags; - } charset_cmds[] = { - /* 96-compat charsets */ - [TERM_CHARSET_ISO_LATIN1_SUPPLEMENTAL] = { .raw = 'A', .flags = 0 }, - [TERM_CHARSET_ISO_LATIN2_SUPPLEMENTAL] = { .raw = 'B', .flags = 0 }, - [TERM_CHARSET_ISO_LATIN5_SUPPLEMENTAL] = { .raw = 'M', .flags = 0 }, - [TERM_CHARSET_ISO_GREEK_SUPPLEMENTAL] = { .raw = 'F', .flags = 0 }, - [TERM_CHARSET_ISO_HEBREW_SUPPLEMENTAL] = { .raw = 'H', .flags = 0 }, - [TERM_CHARSET_ISO_LATIN_CYRILLIC] = { .raw = 'L', .flags = 0 }, - - /* 94-compat charsets */ - [TERM_CHARSET_DEC_SPECIAL_GRAPHIC] = { .raw = '0', .flags = 0 }, - [TERM_CHARSET_DEC_SUPPLEMENTAL] = { .raw = '5', .flags = TERM_SEQ_FLAG_PERCENT }, - [TERM_CHARSET_DEC_TECHNICAL] = { .raw = '>', .flags = 0 }, - [TERM_CHARSET_CYRILLIC_DEC] = { .raw = '4', .flags = TERM_SEQ_FLAG_AND }, - [TERM_CHARSET_DUTCH_NRCS] = { .raw = '4', .flags = 0 }, - [TERM_CHARSET_FINNISH_NRCS] = { .raw = '5', .flags = 0 }, - [TERM_CHARSET_FRENCH_NRCS] = { .raw = 'R', .flags = 0 }, - [TERM_CHARSET_FRENCH_CANADIAN_NRCS] = { .raw = '9', .flags = 0 }, - [TERM_CHARSET_GERMAN_NRCS] = { .raw = 'K', .flags = 0 }, - [TERM_CHARSET_GREEK_DEC] = { .raw = '?', .flags = TERM_SEQ_FLAG_DQUOTE }, - [TERM_CHARSET_GREEK_NRCS] = { .raw = '>', .flags = TERM_SEQ_FLAG_DQUOTE }, - [TERM_CHARSET_HEBREW_DEC] = { .raw = '4', .flags = TERM_SEQ_FLAG_DQUOTE }, - [TERM_CHARSET_HEBREW_NRCS] = { .raw = '=', .flags = TERM_SEQ_FLAG_PERCENT }, - [TERM_CHARSET_ITALIAN_NRCS] = { .raw = 'Y', .flags = 0 }, - [TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = '`', .flags = 0 }, - [TERM_CHARSET_PORTUGUESE_NRCS] = { .raw = '6', .flags = TERM_SEQ_FLAG_PERCENT }, - [TERM_CHARSET_RUSSIAN_NRCS] = { .raw = '5', .flags = TERM_SEQ_FLAG_AND }, - [TERM_CHARSET_SCS_NRCS] = { .raw = '3', .flags = TERM_SEQ_FLAG_PERCENT }, - [TERM_CHARSET_SPANISH_NRCS] = { .raw = 'Z', .flags = 0 }, - [TERM_CHARSET_SWEDISH_NRCS] = { .raw = '7', .flags = 0 }, - [TERM_CHARSET_SWISS_NRCS] = { .raw = '=', .flags = 0 }, - [TERM_CHARSET_TURKISH_DEC] = { .raw = '0', .flags = TERM_SEQ_FLAG_PERCENT }, - [TERM_CHARSET_TURKISH_NRCS] = { .raw = '2', .flags = TERM_SEQ_FLAG_PERCENT }, - - /* special charsets */ - [TERM_CHARSET_USERPREF_SUPPLEMENTAL] = { .raw = '<', .flags = 0 }, - - /* secondary choices */ - [TERM_CHARSET_CNT + TERM_CHARSET_FINNISH_NRCS] = { .raw = 'C', .flags = 0 }, - [TERM_CHARSET_CNT + TERM_CHARSET_FRENCH_NRCS] = { .raw = 'f', .flags = 0 }, - [TERM_CHARSET_CNT + TERM_CHARSET_FRENCH_CANADIAN_NRCS] = { .raw = 'Q', .flags = 0 }, - [TERM_CHARSET_CNT + TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = 'E', .flags = 0 }, - [TERM_CHARSET_CNT + TERM_CHARSET_SWEDISH_NRCS] = { .raw = 'H', .flags = 0 }, /* unused; conflicts with ISO_HEBREW */ - - /* tertiary choices */ - [TERM_CHARSET_CNT + TERM_CHARSET_CNT + TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = '6', .flags = 0 }, - }; - size_t i, cs; - - /* - * Secondary choice on SWEDISH_NRCS and primary choice on - * ISO_HEBREW_SUPPLEMENTAL have a conflict: raw=="H", flags==0. - * We always choose the ISO 96-compat set, which is what VT510 does. - */ - - for (i = 0; i < ELEMENTSOF(charset_cmds); ++i) { - if (charset_cmds[i].raw == raw && charset_cmds[i].flags == flags) { - cs = i; - while (cs >= TERM_CHARSET_CNT) - cs -= TERM_CHARSET_CNT; - - if (!require_96 || cs < TERM_CHARSET_96_CNT || cs >= TERM_CHARSET_94_CNT) - return cs; - } - } - - return -ENOENT; -} - -/* true if exactly one bit in @value is set */ -static inline bool exactly_one_bit_set(unsigned int value) { - return __builtin_popcount(value) == 1; -} - -static unsigned int term_parse_host_escape(const term_seq *seq, unsigned int *cs_out) { - unsigned int t, flags; - int cs; - - assert_return(seq, TERM_CMD_NONE); - - flags = seq->intermediates; - t = TERM_SEQ_FLAG_POPEN | TERM_SEQ_FLAG_PCLOSE | TERM_SEQ_FLAG_MULT | - TERM_SEQ_FLAG_PLUS | TERM_SEQ_FLAG_MINUS | TERM_SEQ_FLAG_DOT | - TERM_SEQ_FLAG_SLASH; - - if (exactly_one_bit_set(flags & t)) { - switch (flags & t) { - case TERM_SEQ_FLAG_POPEN: - case TERM_SEQ_FLAG_PCLOSE: - case TERM_SEQ_FLAG_MULT: - case TERM_SEQ_FLAG_PLUS: - cs = charset_from_cmd(seq->terminator, flags & ~t, false); - break; - case TERM_SEQ_FLAG_MINUS: - case TERM_SEQ_FLAG_DOT: - case TERM_SEQ_FLAG_SLASH: - cs = charset_from_cmd(seq->terminator, flags & ~t, true); - break; - default: - cs = -ENOENT; - break; - } - - if (cs >= 0) { - if (cs_out) - *cs_out = cs; - return TERM_CMD_SCS; - } - - /* looked like a charset-cmd but wasn't; continue */ - } - - switch (seq->terminator) { - case '3': - if (flags == TERM_SEQ_FLAG_HASH) /* DECDHL top-half */ - return TERM_CMD_DECDHL_TH; - break; - case '4': - if (flags == TERM_SEQ_FLAG_HASH) /* DECDHL bottom-half */ - return TERM_CMD_DECDHL_BH; - break; - case '5': - if (flags == TERM_SEQ_FLAG_HASH) /* DECSWL */ - return TERM_CMD_DECSWL; - break; - case '6': - if (flags == 0) /* DECBI */ - return TERM_CMD_DECBI; - else if (flags == TERM_SEQ_FLAG_HASH) /* DECDWL */ - return TERM_CMD_DECDWL; - break; - case '7': - if (flags == 0) /* DECSC */ - return TERM_CMD_DECSC; - break; - case '8': - if (flags == 0) /* DECRC */ - return TERM_CMD_DECRC; - else if (flags == TERM_SEQ_FLAG_HASH) /* DECALN */ - return TERM_CMD_DECALN; - break; - case '9': - if (flags == 0) /* DECFI */ - return TERM_CMD_DECFI; - break; - case '<': - if (flags == 0) /* DECANM */ - return TERM_CMD_DECANM; - break; - case '=': - if (flags == 0) /* DECKPAM */ - return TERM_CMD_DECKPAM; - break; - case '>': - if (flags == 0) /* DECKPNM */ - return TERM_CMD_DECKPNM; - break; - case '@': - if (flags == TERM_SEQ_FLAG_PERCENT) { - /* Select default character set */ - return TERM_CMD_XTERM_SDCS; - } - break; - case 'D': - if (flags == 0) /* IND */ - return TERM_CMD_IND; - break; - case 'E': - if (flags == 0) /* NEL */ - return TERM_CMD_NEL; - break; - case 'F': - if (flags == 0) /* Cursor to lower-left corner of screen */ - return TERM_CMD_XTERM_CLLHP; - else if (flags == TERM_SEQ_FLAG_SPACE) /* S7C1T */ - return TERM_CMD_S7C1T; - break; - case 'G': - if (flags == TERM_SEQ_FLAG_SPACE) { /* S8C1T */ - return TERM_CMD_S8C1T; - } else if (flags == TERM_SEQ_FLAG_PERCENT) { - /* Select UTF-8 character set */ - return TERM_CMD_XTERM_SUCS; - } - break; - case 'H': - if (flags == 0) /* HTS */ - return TERM_CMD_HTS; - break; - case 'L': - if (flags == TERM_SEQ_FLAG_SPACE) { - /* Set ANSI conformance level 1 */ - return TERM_CMD_XTERM_SACL1; - } - break; - case 'M': - if (flags == 0) { /* RI */ - return TERM_CMD_RI; - } else if (flags == TERM_SEQ_FLAG_SPACE) { - /* Set ANSI conformance level 2 */ - return TERM_CMD_XTERM_SACL2; - } - break; - case 'N': - if (flags == 0) { /* SS2 */ - return TERM_CMD_SS2; - } else if (flags == TERM_SEQ_FLAG_SPACE) { - /* Set ANSI conformance level 3 */ - return TERM_CMD_XTERM_SACL3; - } - break; - case 'O': - if (flags == 0) /* SS3 */ - return TERM_CMD_SS3; - break; - case 'P': - if (flags == 0) /* DCS: this is already handled by the state-machine */ - return 0; - break; - case 'V': - if (flags == 0) /* SPA */ - return TERM_CMD_SPA; - break; - case 'W': - if (flags == 0) /* EPA */ - return TERM_CMD_EPA; - break; - case 'X': - if (flags == 0) { /* SOS */ - /* this is already handled by the state-machine */ - break; - } - break; - case 'Z': - if (flags == 0) /* DECID */ - return TERM_CMD_DECID; - break; - case '[': - if (flags == 0) { /* CSI */ - /* this is already handled by the state-machine */ - break; - } - break; - case '\\': - if (flags == 0) /* ST */ - return TERM_CMD_ST; - break; - case ']': - if (flags == 0) { /* OSC */ - /* this is already handled by the state-machine */ - break; - } - break; - case '^': - if (flags == 0) { /* PM */ - /* this is already handled by the state-machine */ - break; - } - break; - case '_': - if (flags == 0) { /* APC */ - /* this is already handled by the state-machine */ - break; - } - break; - case 'c': - if (flags == 0) /* RIS */ - return TERM_CMD_RIS; - break; - case 'l': - if (flags == 0) /* Memory lock */ - return TERM_CMD_XTERM_MLHP; - break; - case 'm': - if (flags == 0) /* Memory unlock */ - return TERM_CMD_XTERM_MUHP; - break; - case 'n': - if (flags == 0) /* LS2 */ - return TERM_CMD_LS2; - break; - case 'o': - if (flags == 0) /* LS3 */ - return TERM_CMD_LS3; - break; - case '|': - if (flags == 0) /* LS3R */ - return TERM_CMD_LS3R; - break; - case '}': - if (flags == 0) /* LS2R */ - return TERM_CMD_LS2R; - break; - case '~': - if (flags == 0) /* LS1R */ - return TERM_CMD_LS1R; - break; - } - - return TERM_CMD_NONE; -} - -static unsigned int term_parse_host_csi(const term_seq *seq) { - unsigned int flags; - - assert_return(seq, TERM_CMD_NONE); - - flags = seq->intermediates; - - switch (seq->terminator) { - case 'A': - if (flags == 0) /* CUU */ - return TERM_CMD_CUU; - break; - case 'a': - if (flags == 0) /* HPR */ - return TERM_CMD_HPR; - break; - case 'B': - if (flags == 0) /* CUD */ - return TERM_CMD_CUD; - break; - case 'b': - if (flags == 0) /* REP */ - return TERM_CMD_REP; - break; - case 'C': - if (flags == 0) /* CUF */ - return TERM_CMD_CUF; - break; - case 'c': - if (flags == 0) /* DA1 */ - return TERM_CMD_DA1; - else if (flags == TERM_SEQ_FLAG_GT) /* DA2 */ - return TERM_CMD_DA2; - else if (flags == TERM_SEQ_FLAG_EQUAL) /* DA3 */ - return TERM_CMD_DA3; - break; - case 'D': - if (flags == 0) /* CUB */ - return TERM_CMD_CUB; - break; - case 'd': - if (flags == 0) /* VPA */ - return TERM_CMD_VPA; - break; - case 'E': - if (flags == 0) /* CNL */ - return TERM_CMD_CNL; - break; - case 'e': - if (flags == 0) /* VPR */ - return TERM_CMD_VPR; - break; - case 'F': - if (flags == 0) /* CPL */ - return TERM_CMD_CPL; - break; - case 'f': - if (flags == 0) /* HVP */ - return TERM_CMD_HVP; - break; - case 'G': - if (flags == 0) /* CHA */ - return TERM_CMD_CHA; - break; - case 'g': - if (flags == 0) /* TBC */ - return TERM_CMD_TBC; - else if (flags == TERM_SEQ_FLAG_MULT) /* DECLFKC */ - return TERM_CMD_DECLFKC; - break; - case 'H': - if (flags == 0) /* CUP */ - return TERM_CMD_CUP; - break; - case 'h': - if (flags == 0) /* SM ANSI */ - return TERM_CMD_SM_ANSI; - else if (flags == TERM_SEQ_FLAG_WHAT) /* SM DEC */ - return TERM_CMD_SM_DEC; - break; - case 'I': - if (flags == 0) /* CHT */ - return TERM_CMD_CHT; - break; - case 'i': - if (flags == 0) /* MC ANSI */ - return TERM_CMD_MC_ANSI; - else if (flags == TERM_SEQ_FLAG_WHAT) /* MC DEC */ - return TERM_CMD_MC_DEC; - break; - case 'J': - if (flags == 0) /* ED */ - return TERM_CMD_ED; - else if (flags == TERM_SEQ_FLAG_WHAT) /* DECSED */ - return TERM_CMD_DECSED; - break; - case 'K': - if (flags == 0) /* EL */ - return TERM_CMD_EL; - else if (flags == TERM_SEQ_FLAG_WHAT) /* DECSEL */ - return TERM_CMD_DECSEL; - break; - case 'L': - if (flags == 0) /* IL */ - return TERM_CMD_IL; - break; - case 'l': - if (flags == 0) /* RM ANSI */ - return TERM_CMD_RM_ANSI; - else if (flags == TERM_SEQ_FLAG_WHAT) /* RM DEC */ - return TERM_CMD_RM_DEC; - break; - case 'M': - if (flags == 0) /* DL */ - return TERM_CMD_DL; - break; - case 'm': - if (flags == 0) /* SGR */ - return TERM_CMD_SGR; - else if (flags == TERM_SEQ_FLAG_GT) /* XTERM SMR */ - return TERM_CMD_XTERM_SRV; - break; - case 'n': - if (flags == 0) /* DSR ANSI */ - return TERM_CMD_DSR_ANSI; - else if (flags == TERM_SEQ_FLAG_GT) /* XTERM RMR */ - return TERM_CMD_XTERM_RRV; - else if (flags == TERM_SEQ_FLAG_WHAT) /* DSR DEC */ - return TERM_CMD_DSR_DEC; - break; - case 'P': - if (flags == 0) /* DCH */ - return TERM_CMD_DCH; - else if (flags == TERM_SEQ_FLAG_SPACE) /* PPA */ - return TERM_CMD_PPA; - break; - case 'p': - if (flags == 0) /* DECSSL */ - return TERM_CMD_DECSSL; - else if (flags == TERM_SEQ_FLAG_SPACE) /* DECSSCLS */ - return TERM_CMD_DECSSCLS; - else if (flags == TERM_SEQ_FLAG_BANG) /* DECSTR */ - return TERM_CMD_DECSTR; - else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECSCL */ - return TERM_CMD_DECSCL; - else if (flags == TERM_SEQ_FLAG_CASH) /* DECRQM-ANSI */ - return TERM_CMD_DECRQM_ANSI; - else if (flags == (TERM_SEQ_FLAG_CASH | TERM_SEQ_FLAG_WHAT)) /* DECRQM-DEC */ - return TERM_CMD_DECRQM_DEC; - else if (flags == TERM_SEQ_FLAG_PCLOSE) /* DECSDPT */ - return TERM_CMD_DECSDPT; - else if (flags == TERM_SEQ_FLAG_MULT) /* DECSPPCS */ - return TERM_CMD_DECSPPCS; - else if (flags == TERM_SEQ_FLAG_PLUS) /* DECSR */ - return TERM_CMD_DECSR; - else if (flags == TERM_SEQ_FLAG_COMMA) /* DECLTOD */ - return TERM_CMD_DECLTOD; - else if (flags == TERM_SEQ_FLAG_GT) /* XTERM SPM */ - return TERM_CMD_XTERM_SPM; - break; - case 'Q': - if (flags == TERM_SEQ_FLAG_SPACE) /* PPR */ - return TERM_CMD_PPR; - break; - case 'q': - if (flags == 0) /* DECLL */ - return TERM_CMD_DECLL; - else if (flags == TERM_SEQ_FLAG_SPACE) /* DECSCUSR */ - return TERM_CMD_DECSCUSR; - else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECSCA */ - return TERM_CMD_DECSCA; - else if (flags == TERM_SEQ_FLAG_CASH) /* DECSDDT */ - return TERM_CMD_DECSDDT; - else if (flags == TERM_SEQ_FLAG_MULT) /* DECSRC */ - return TERM_CMD_DECSR; - else if (flags == TERM_SEQ_FLAG_PLUS) /* DECELF */ - return TERM_CMD_DECELF; - else if (flags == TERM_SEQ_FLAG_COMMA) /* DECTID */ - return TERM_CMD_DECTID; - break; - case 'R': - if (flags == TERM_SEQ_FLAG_SPACE) /* PPB */ - return TERM_CMD_PPB; - break; - case 'r': - if (flags == 0) { - /* DECSTBM */ - return TERM_CMD_DECSTBM; - } else if (flags == TERM_SEQ_FLAG_SPACE) { - /* DECSKCV */ - return TERM_CMD_DECSKCV; - } else if (flags == TERM_SEQ_FLAG_CASH) { - /* DECCARA */ - return TERM_CMD_DECCARA; - } else if (flags == TERM_SEQ_FLAG_MULT) { - /* DECSCS */ - return TERM_CMD_DECSCS; - } else if (flags == TERM_SEQ_FLAG_PLUS) { - /* DECSMKR */ - return TERM_CMD_DECSMKR; - } else if (flags == TERM_SEQ_FLAG_WHAT) { - /* - * There's a conflict between DECPCTERM and XTERM-RPM. - * XTERM-RPM takes a single argument, DECPCTERM takes 2. - * Split both up and forward the call to the closer - * match. - */ - if (seq->n_args <= 1) /* XTERM RPM */ - return TERM_CMD_XTERM_RPM; - else if (seq->n_args >= 2) /* DECPCTERM */ - return TERM_CMD_DECPCTERM; - } - break; - case 'S': - if (flags == 0) /* SU */ - return TERM_CMD_SU; - else if (flags == TERM_SEQ_FLAG_WHAT) /* XTERM SGFX */ - return TERM_CMD_XTERM_SGFX; - break; - case 's': - if (flags == 0) { - /* - * There's a conflict between DECSLRM and SC-ANSI which - * cannot be resolved without knowing the state of - * DECLRMM. We leave that decision up to the caller. - */ - return TERM_CMD_DECSLRM_OR_SC; - } else if (flags == TERM_SEQ_FLAG_CASH) { - /* DECSPRTT */ - return TERM_CMD_DECSPRTT; - } else if (flags == TERM_SEQ_FLAG_MULT) { - /* DECSFC */ - return TERM_CMD_DECSFC; - } else if (flags == TERM_SEQ_FLAG_WHAT) { - /* XTERM SPM */ - return TERM_CMD_XTERM_SPM; - } - break; - case 'T': - if (flags == 0) { - /* - * Awesome: There's a conflict between SD and XTERM IHMT - * that we have to resolve by checking the parameter - * count.. XTERM_IHMT needs exactly 5 arguments, SD - * takes 0 or 1. We're conservative here and give both - * a wider range to allow unused arguments (compat...). - */ - if (seq->n_args >= 5) { - /* XTERM IHMT */ - return TERM_CMD_XTERM_IHMT; - } else if (seq->n_args < 5) { - /* SD */ - return TERM_CMD_SD; - } - } else if (flags == TERM_SEQ_FLAG_GT) { - /* XTERM RTM */ - return TERM_CMD_XTERM_RTM; - } - break; - case 't': - if (flags == 0) { - if (seq->n_args > 0 && seq->args[0] < 24) { - /* XTERM WM */ - return TERM_CMD_XTERM_WM; - } else { - /* DECSLPP */ - return TERM_CMD_DECSLPP; - } - } else if (flags == TERM_SEQ_FLAG_SPACE) { - /* DECSWBV */ - return TERM_CMD_DECSWBV; - } else if (flags == TERM_SEQ_FLAG_DQUOTE) { - /* DECSRFR */ - return TERM_CMD_DECSRFR; - } else if (flags == TERM_SEQ_FLAG_CASH) { - /* DECRARA */ - return TERM_CMD_DECRARA; - } else if (flags == TERM_SEQ_FLAG_GT) { - /* XTERM STM */ - return TERM_CMD_XTERM_STM; - } - break; - case 'U': - if (flags == 0) /* NP */ - return TERM_CMD_NP; - break; - case 'u': - if (flags == 0) { - /* RC */ - return TERM_CMD_RC; - } else if (flags == TERM_SEQ_FLAG_SPACE) { - /* DECSMBV */ - return TERM_CMD_DECSMBV; - } else if (flags == TERM_SEQ_FLAG_DQUOTE) { - /* DECSTRL */ - return TERM_CMD_DECSTRL; - } else if (flags == TERM_SEQ_FLAG_WHAT) { - /* DECRQUPSS */ - return TERM_CMD_DECRQUPSS; - } else if (seq->args[0] == 1 && flags == TERM_SEQ_FLAG_CASH) { - /* DECRQTSR */ - return TERM_CMD_DECRQTSR; - } else if (flags == TERM_SEQ_FLAG_MULT) { - /* DECSCP */ - return TERM_CMD_DECSCP; - } else if (flags == TERM_SEQ_FLAG_COMMA) { - /* DECRQKT */ - return TERM_CMD_DECRQKT; - } - break; - case 'V': - if (flags == 0) /* PP */ - return TERM_CMD_PP; - break; - case 'v': - if (flags == TERM_SEQ_FLAG_SPACE) /* DECSLCK */ - return TERM_CMD_DECSLCK; - else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECRQDE */ - return TERM_CMD_DECRQDE; - else if (flags == TERM_SEQ_FLAG_CASH) /* DECCRA */ - return TERM_CMD_DECCRA; - else if (flags == TERM_SEQ_FLAG_COMMA) /* DECRPKT */ - return TERM_CMD_DECRPKT; - break; - case 'W': - if (seq->args[0] == 5 && flags == TERM_SEQ_FLAG_WHAT) { - /* DECST8C */ - return TERM_CMD_DECST8C; - } - break; - case 'w': - if (flags == TERM_SEQ_FLAG_CASH) /* DECRQPSR */ - return TERM_CMD_DECRQPSR; - else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECEFR */ - return TERM_CMD_DECEFR; - else if (flags == TERM_SEQ_FLAG_PLUS) /* DECSPP */ - return TERM_CMD_DECSPP; - break; - case 'X': - if (flags == 0) /* ECH */ - return TERM_CMD_ECH; - break; - case 'x': - if (flags == 0) /* DECREQTPARM */ - return TERM_CMD_DECREQTPARM; - else if (flags == TERM_SEQ_FLAG_CASH) /* DECFRA */ - return TERM_CMD_DECFRA; - else if (flags == TERM_SEQ_FLAG_MULT) /* DECSACE */ - return TERM_CMD_DECSACE; - else if (flags == TERM_SEQ_FLAG_PLUS) /* DECRQPKFM */ - return TERM_CMD_DECRQPKFM; - break; - case 'y': - if (flags == 0) /* DECTST */ - return TERM_CMD_DECTST; - else if (flags == TERM_SEQ_FLAG_MULT) /* DECRQCRA */ - return TERM_CMD_DECRQCRA; - else if (flags == TERM_SEQ_FLAG_PLUS) /* DECPKFMR */ - return TERM_CMD_DECPKFMR; - break; - case 'Z': - if (flags == 0) /* CBT */ - return TERM_CMD_CBT; - break; - case 'z': - if (flags == TERM_SEQ_FLAG_CASH) /* DECERA */ - return TERM_CMD_DECERA; - else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECELR */ - return TERM_CMD_DECELR; - else if (flags == TERM_SEQ_FLAG_MULT) /* DECINVM */ - return TERM_CMD_DECINVM; - else if (flags == TERM_SEQ_FLAG_PLUS) /* DECPKA */ - return TERM_CMD_DECPKA; - break; - case '@': - if (flags == 0) /* ICH */ - return TERM_CMD_ICH; - break; - case '`': - if (flags == 0) /* HPA */ - return TERM_CMD_HPA; - break; - case '{': - if (flags == TERM_SEQ_FLAG_CASH) /* DECSERA */ - return TERM_CMD_DECSERA; - else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECSLE */ - return TERM_CMD_DECSLE; - break; - case '|': - if (flags == TERM_SEQ_FLAG_CASH) /* DECSCPP */ - return TERM_CMD_DECSCPP; - else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECRQLP */ - return TERM_CMD_DECRQLP; - else if (flags == TERM_SEQ_FLAG_MULT) /* DECSNLS */ - return TERM_CMD_DECSNLS; - break; - case '}': - if (flags == TERM_SEQ_FLAG_SPACE) /* DECKBD */ - return TERM_CMD_DECKBD; - else if (flags == TERM_SEQ_FLAG_CASH) /* DECSASD */ - return TERM_CMD_DECSASD; - else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECIC */ - return TERM_CMD_DECIC; - break; - case '~': - if (flags == TERM_SEQ_FLAG_SPACE) /* DECTME */ - return TERM_CMD_DECTME; - else if (flags == TERM_SEQ_FLAG_CASH) /* DECSSDT */ - return TERM_CMD_DECSSDT; - else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECDC */ - return TERM_CMD_DECDC; - break; - } - - return TERM_CMD_NONE; -} - -/* - * State Machine - * This parser controls the parser-state and returns any detected sequence to - * the caller. The parser is based on this state-diagram from Paul Williams: - * http://vt100.net/emu/ - * It was written from scratch and extended where needed. - * This parser is fully compatible up to the vt500 series. We expect UCS-4 as - * input. It's the callers responsibility to do any UTF-8 parsing. - */ - -enum parser_state { - STATE_NONE, /* placeholder */ - STATE_GROUND, /* initial state and ground */ - STATE_ESC, /* ESC sequence was started */ - STATE_ESC_INT, /* intermediate escape characters */ - STATE_CSI_ENTRY, /* starting CSI sequence */ - STATE_CSI_PARAM, /* CSI parameters */ - STATE_CSI_INT, /* intermediate CSI characters */ - STATE_CSI_IGNORE, /* CSI error; ignore this CSI sequence */ - STATE_DCS_ENTRY, /* starting DCS sequence */ - STATE_DCS_PARAM, /* DCS parameters */ - STATE_DCS_INT, /* intermediate DCS characters */ - STATE_DCS_PASS, /* DCS data passthrough */ - STATE_DCS_IGNORE, /* DCS error; ignore this DCS sequence */ - STATE_OSC_STRING, /* parsing OSC sequence */ - STATE_ST_IGNORE, /* unimplemented seq; ignore until ST */ - STATE_NUM -}; - -enum parser_action { - ACTION_NONE, /* placeholder */ - ACTION_CLEAR, /* clear parameters */ - ACTION_IGNORE, /* ignore the character entirely */ - ACTION_PRINT, /* print the character on the console */ - ACTION_EXECUTE, /* execute single control character (C0/C1) */ - ACTION_COLLECT, /* collect intermediate character */ - ACTION_PARAM, /* collect parameter character */ - ACTION_ESC_DISPATCH, /* dispatch escape sequence */ - ACTION_CSI_DISPATCH, /* dispatch csi sequence */ - ACTION_DCS_START, /* start of DCS data */ - ACTION_DCS_COLLECT, /* collect DCS data */ - ACTION_DCS_CONSUME, /* consume DCS terminator */ - ACTION_DCS_DISPATCH, /* dispatch dcs sequence */ - ACTION_OSC_START, /* start of OSC data */ - ACTION_OSC_COLLECT, /* collect OSC data */ - ACTION_OSC_CONSUME, /* consume OSC terminator */ - ACTION_OSC_DISPATCH, /* dispatch osc sequence */ - ACTION_NUM -}; - -int term_parser_new(term_parser **out, bool host) { - _term_parser_free_ term_parser *parser = NULL; - - assert_return(out, -EINVAL); - - parser = new0(term_parser, 1); - if (!parser) - return -ENOMEM; - - parser->is_host = host; - parser->st_alloc = 64; - parser->seq.st = new0(char, parser->st_alloc + 1); - if (!parser->seq.st) - return -ENOMEM; - - *out = parser; - parser = NULL; - return 0; -} - -term_parser *term_parser_free(term_parser *parser) { - if (!parser) - return NULL; - - free(parser->seq.st); - free(parser); - return NULL; -} - -static inline void parser_clear(term_parser *parser) { - unsigned int i; - - parser->seq.command = TERM_CMD_NONE; - parser->seq.terminator = 0; - parser->seq.intermediates = 0; - parser->seq.charset = TERM_CHARSET_NONE; - parser->seq.n_args = 0; - for (i = 0; i < TERM_PARSER_ARG_MAX; ++i) - parser->seq.args[i] = -1; - - parser->seq.n_st = 0; - parser->seq.st[0] = 0; -} - -static int parser_ignore(term_parser *parser, uint32_t raw) { - parser_clear(parser); - parser->seq.type = TERM_SEQ_IGNORE; - parser->seq.command = TERM_CMD_NONE; - parser->seq.terminator = raw; - parser->seq.charset = TERM_CHARSET_NONE; - - return parser->seq.type; -} - -static int parser_print(term_parser *parser, uint32_t raw) { - parser_clear(parser); - parser->seq.type = TERM_SEQ_GRAPHIC; - parser->seq.command = TERM_CMD_GRAPHIC; - parser->seq.terminator = raw; - parser->seq.charset = TERM_CHARSET_NONE; - - return parser->seq.type; -} - -static int parser_execute(term_parser *parser, uint32_t raw) { - parser_clear(parser); - parser->seq.type = TERM_SEQ_CONTROL; - parser->seq.command = TERM_CMD_GRAPHIC; - parser->seq.terminator = raw; - parser->seq.charset = TERM_CHARSET_NONE; - if (!parser->is_host) - parser->seq.command = term_parse_host_control(&parser->seq); - - return parser->seq.type; -} - -static void parser_collect(term_parser *parser, uint32_t raw) { - /* - * Usually, characters from 0x30 to 0x3f are only allowed as leading - * markers (or as part of the parameters), characters from 0x20 to 0x2f - * are only allowed as trailing markers. However, our state-machine - * already verifies those restrictions so we can handle them the same - * way here. Note that we safely allow markers to be specified multiple - * times. - */ - - if (raw >= 0x20 && raw <= 0x3f) - parser->seq.intermediates |= 1 << (raw - 0x20); -} - -static void parser_param(term_parser *parser, uint32_t raw) { - int new; - - if (raw == ';') { - if (parser->seq.n_args < TERM_PARSER_ARG_MAX) - ++parser->seq.n_args; - - return; - } - - if (parser->seq.n_args >= TERM_PARSER_ARG_MAX) - return; - - if (raw >= '0' && raw <= '9') { - new = parser->seq.args[parser->seq.n_args]; - if (new < 0) - new = 0; - new = new * 10 + raw - '0'; - - /* VT510 tells us to clamp all values to [0, 9999], however, it - * also allows commands with values up to 2^15-1. We simply use - * 2^16 as maximum here to be compatible to all commands, but - * avoid overflows in any calculations. */ - if (new > 0xffff) - new = 0xffff; - - parser->seq.args[parser->seq.n_args] = new; - } -} - -static int parser_esc(term_parser *parser, uint32_t raw) { - parser->seq.type = TERM_SEQ_ESCAPE; - parser->seq.command = TERM_CMD_NONE; - parser->seq.terminator = raw; - parser->seq.charset = TERM_CHARSET_NONE; - if (!parser->is_host) - parser->seq.command = term_parse_host_escape(&parser->seq, &parser->seq.charset); - - return parser->seq.type; -} - -static int parser_csi(term_parser *parser, uint32_t raw) { - /* parser->seq is cleared during CSI-ENTER state, thus there's no need - * to clear invalid fields here. */ - - if (parser->seq.n_args < TERM_PARSER_ARG_MAX) { - if (parser->seq.n_args > 0 || - parser->seq.args[parser->seq.n_args] >= 0) - ++parser->seq.n_args; - } - - parser->seq.type = TERM_SEQ_CSI; - parser->seq.command = TERM_CMD_NONE; - parser->seq.terminator = raw; - parser->seq.charset = TERM_CHARSET_NONE; - if (!parser->is_host) - parser->seq.command = term_parse_host_csi(&parser->seq); - - return parser->seq.type; -} - -/* perform state transition and dispatch related actions */ -static int parser_transition(term_parser *parser, uint32_t raw, unsigned int state, unsigned int action) { - if (state != STATE_NONE) - parser->state = state; - - switch (action) { - case ACTION_NONE: - return TERM_SEQ_NONE; - case ACTION_CLEAR: - parser_clear(parser); - return TERM_SEQ_NONE; - case ACTION_IGNORE: - return parser_ignore(parser, raw); - case ACTION_PRINT: - return parser_print(parser, raw); - case ACTION_EXECUTE: - return parser_execute(parser, raw); - case ACTION_COLLECT: - parser_collect(parser, raw); - return TERM_SEQ_NONE; - case ACTION_PARAM: - parser_param(parser, raw); - return TERM_SEQ_NONE; - case ACTION_ESC_DISPATCH: - return parser_esc(parser, raw); - case ACTION_CSI_DISPATCH: - return parser_csi(parser, raw); - case ACTION_DCS_START: - /* not implemented */ - return TERM_SEQ_NONE; - case ACTION_DCS_COLLECT: - /* not implemented */ - return TERM_SEQ_NONE; - case ACTION_DCS_CONSUME: - /* not implemented */ - return TERM_SEQ_NONE; - case ACTION_DCS_DISPATCH: - /* not implemented */ - return TERM_SEQ_NONE; - case ACTION_OSC_START: - /* not implemented */ - return TERM_SEQ_NONE; - case ACTION_OSC_COLLECT: - /* not implemented */ - return TERM_SEQ_NONE; - case ACTION_OSC_CONSUME: - /* not implemented */ - return TERM_SEQ_NONE; - case ACTION_OSC_DISPATCH: - /* not implemented */ - return TERM_SEQ_NONE; - default: - assert_not_reached("invalid vte-parser action"); - return TERM_SEQ_NONE; - } -} - -static int parser_feed_to_state(term_parser *parser, uint32_t raw) { - switch (parser->state) { - case STATE_NONE: - /* - * During initialization, parser->state is cleared. Treat this - * as STATE_GROUND. We will then never get to STATE_NONE again. - */ - case STATE_GROUND: - switch (raw) { - case 0x00 ... 0x1f: /* C0 */ - case 0x80 ... 0x9b: /* C1 \ { ST } */ - case 0x9d ... 0x9f: - return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE); - case 0x9c: /* ST */ - return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); - } - - return parser_transition(parser, raw, STATE_NONE, ACTION_PRINT); - case STATE_ESC: - switch (raw) { - case 0x00 ... 0x1f: /* C0 */ - return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE); - case 0x20 ... 0x2f: /* [' ' - '\'] */ - return parser_transition(parser, raw, STATE_ESC_INT, ACTION_COLLECT); - case 0x30 ... 0x4f: /* ['0' - '~'] \ { 'P', 'X', '[', ']', '^', '_' } */ - case 0x51 ... 0x57: - case 0x59 ... 0x5a: - case 0x5c: - case 0x60 ... 0x7e: - return parser_transition(parser, raw, STATE_GROUND, ACTION_ESC_DISPATCH); - case 0x50: /* 'P' */ - return parser_transition(parser, raw, STATE_DCS_ENTRY, ACTION_CLEAR); - case 0x5b: /* '[' */ - return parser_transition(parser, raw, STATE_CSI_ENTRY, ACTION_CLEAR); - case 0x5d: /* ']' */ - return parser_transition(parser, raw, STATE_OSC_STRING, ACTION_CLEAR); - case 0x58: /* 'X' */ - case 0x5e: /* '^' */ - case 0x5f: /* '_' */ - return parser_transition(parser, raw, STATE_ST_IGNORE, ACTION_NONE); - case 0x7f: /* DEL */ - return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); - case 0x9c: /* ST */ - return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); - } - - return parser_transition(parser, raw, STATE_ESC_INT, ACTION_COLLECT); - case STATE_ESC_INT: - switch (raw) { - case 0x00 ... 0x1f: /* C0 */ - return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE); - case 0x20 ... 0x2f: /* [' ' - '\'] */ - return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT); - case 0x30 ... 0x7e: /* ['0' - '~'] */ - return parser_transition(parser, raw, STATE_GROUND, ACTION_ESC_DISPATCH); - case 0x7f: /* DEL */ - return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); - case 0x9c: /* ST */ - return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); - } - - return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT); - case STATE_CSI_ENTRY: - switch (raw) { - case 0x00 ... 0x1f: /* C0 */ - return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE); - case 0x20 ... 0x2f: /* [' ' - '\'] */ - return parser_transition(parser, raw, STATE_CSI_INT, ACTION_COLLECT); - case 0x3a: /* ':' */ - return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE); - case 0x30 ... 0x39: /* ['0' - '9'] */ - case 0x3b: /* ';' */ - return parser_transition(parser, raw, STATE_CSI_PARAM, ACTION_PARAM); - case 0x3c ... 0x3f: /* ['<' - '?'] */ - return parser_transition(parser, raw, STATE_CSI_PARAM, ACTION_COLLECT); - case 0x40 ... 0x7e: /* ['@' - '~'] */ - return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH); - case 0x7f: /* DEL */ - return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); - case 0x9c: /* ST */ - return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); - } - - return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE); - case STATE_CSI_PARAM: - switch (raw) { - case 0x00 ... 0x1f: /* C0 */ - return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE); - case 0x20 ... 0x2f: /* [' ' - '\'] */ - return parser_transition(parser, raw, STATE_CSI_INT, ACTION_COLLECT); - case 0x30 ... 0x39: /* ['0' - '9'] */ - case 0x3b: /* ';' */ - return parser_transition(parser, raw, STATE_NONE, ACTION_PARAM); - case 0x3a: /* ':' */ - case 0x3c ... 0x3f: /* ['<' - '?'] */ - return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE); - case 0x40 ... 0x7e: /* ['@' - '~'] */ - return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH); - case 0x7f: /* DEL */ - return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); - case 0x9c: /* ST */ - return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); - } - - return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE); - case STATE_CSI_INT: - switch (raw) { - case 0x00 ... 0x1f: /* C0 */ - return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE); - case 0x20 ... 0x2f: /* [' ' - '\'] */ - return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT); - case 0x30 ... 0x3f: /* ['0' - '?'] */ - return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE); - case 0x40 ... 0x7e: /* ['@' - '~'] */ - return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH); - case 0x7f: /* DEL */ - return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); - case 0x9c: /* ST */ - return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); - } - - return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE); - case STATE_CSI_IGNORE: - switch (raw) { - case 0x00 ... 0x1f: /* C0 */ - return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE); - case 0x20 ... 0x3f: /* [' ' - '?'] */ - return parser_transition(parser, raw, STATE_NONE, ACTION_NONE); - case 0x40 ... 0x7e: /* ['@' - '~'] */ - return parser_transition(parser, raw, STATE_GROUND, ACTION_NONE); - case 0x7f: /* DEL */ - return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); - case 0x9c: /* ST */ - return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); - } - - return parser_transition(parser, raw, STATE_NONE, ACTION_NONE); - case STATE_DCS_ENTRY: - switch (raw) { - case 0x00 ... 0x1f: /* C0 */ - return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); - case 0x20 ... 0x2f: /* [' ' - '\'] */ - return parser_transition(parser, raw, STATE_DCS_INT, ACTION_COLLECT); - case 0x3a: /* ':' */ - return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE); - case 0x30 ... 0x39: /* ['0' - '9'] */ - case 0x3b: /* ';' */ - return parser_transition(parser, raw, STATE_DCS_PARAM, ACTION_PARAM); - case 0x3c ... 0x3f: /* ['<' - '?'] */ - return parser_transition(parser, raw, STATE_DCS_PARAM, ACTION_COLLECT); - case 0x40 ... 0x7e: /* ['@' - '~'] */ - return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME); - case 0x7f: /* DEL */ - return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); - case 0x9c: /* ST */ - return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); - } - - return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME); - case STATE_DCS_PARAM: - switch (raw) { - case 0x00 ... 0x1f: /* C0 */ - return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); - case 0x20 ... 0x2f: /* [' ' - '\'] */ - return parser_transition(parser, raw, STATE_DCS_INT, ACTION_COLLECT); - case 0x30 ... 0x39: /* ['0' - '9'] */ - case 0x3b: /* ';' */ - return parser_transition(parser, raw, STATE_NONE, ACTION_PARAM); - case 0x3a: /* ':' */ - case 0x3c ... 0x3f: /* ['<' - '?'] */ - return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE); - case 0x40 ... 0x7e: /* ['@' - '~'] */ - return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME); - case 0x7f: /* DEL */ - return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); - case 0x9c: /* ST */ - return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); - } - - return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME); - case STATE_DCS_INT: - switch (raw) { - case 0x00 ... 0x1f: /* C0 */ - return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); - case 0x20 ... 0x2f: /* [' ' - '\'] */ - return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT); - case 0x30 ... 0x3f: /* ['0' - '?'] */ - return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE); - case 0x40 ... 0x7e: /* ['@' - '~'] */ - return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME); - case 0x7f: /* DEL */ - return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); - case 0x9c: /* ST */ - return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); - } - - return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME); - case STATE_DCS_PASS: - switch (raw) { - case 0x00 ... 0x7e: /* ASCII \ { DEL } */ - return parser_transition(parser, raw, STATE_NONE, ACTION_DCS_COLLECT); - case 0x7f: /* DEL */ - return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); - case 0x9c: /* ST */ - return parser_transition(parser, raw, STATE_GROUND, ACTION_DCS_DISPATCH); - } - - return parser_transition(parser, raw, STATE_NONE, ACTION_DCS_COLLECT); - case STATE_DCS_IGNORE: - switch (raw) { - case 0x00 ... 0x7f: /* ASCII */ - return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); - case 0x9c: /* ST */ - return parser_transition(parser, raw, STATE_GROUND, ACTION_NONE); - } - - return parser_transition(parser, raw, STATE_NONE, ACTION_NONE); - case STATE_OSC_STRING: - switch (raw) { - case 0x00 ... 0x06: /* C0 \ { BEL } */ - case 0x08 ... 0x1f: - return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); - case 0x20 ... 0x7f: /* [' ' - DEL] */ - return parser_transition(parser, raw, STATE_NONE, ACTION_OSC_COLLECT); - case 0x07: /* BEL */ - case 0x9c: /* ST */ - return parser_transition(parser, raw, STATE_GROUND, ACTION_OSC_DISPATCH); - } - - return parser_transition(parser, raw, STATE_NONE, ACTION_OSC_COLLECT); - case STATE_ST_IGNORE: - switch (raw) { - case 0x00 ... 0x7f: /* ASCII */ - return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); - case 0x9c: /* ST */ - return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); - } - - return parser_transition(parser, raw, STATE_NONE, ACTION_NONE); - } - - assert_not_reached("bad vte-parser state"); - return -EINVAL; -} - -int term_parser_feed(term_parser *parser, const term_seq **seq_out, uint32_t raw) { - int r; - - assert_return(parser, -EINVAL); - assert_return(seq_out, -EINVAL); - - /* - * Notes: - * * DEC treats GR codes as GL. We don't do that as we require UTF-8 - * as charset and, thus, it doesn't make sense to treat GR special. - * * During control sequences, unexpected C1 codes cancel the sequence - * and immediately start a new one. C0 codes, however, may or may not - * be ignored/executed depending on the sequence. - */ - - switch (raw) { - case 0x18: /* CAN */ - r = parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); - break; - case 0x1a: /* SUB */ - r = parser_transition(parser, raw, STATE_GROUND, ACTION_EXECUTE); - break; - case 0x80 ... 0x8f: /* C1 \ {DCS, SOS, CSI, ST, OSC, PM, APC} */ - case 0x91 ... 0x97: - case 0x99 ... 0x9a: - r = parser_transition(parser, raw, STATE_GROUND, ACTION_EXECUTE); - break; - case 0x1b: /* ESC */ - r = parser_transition(parser, raw, STATE_ESC, ACTION_CLEAR); - break; - case 0x98: /* SOS */ - case 0x9e: /* PM */ - case 0x9f: /* APC */ - r = parser_transition(parser, raw, STATE_ST_IGNORE, ACTION_NONE); - break; - case 0x90: /* DCS */ - r = parser_transition(parser, raw, STATE_DCS_ENTRY, ACTION_CLEAR); - break; - case 0x9d: /* OSC */ - r = parser_transition(parser, raw, STATE_OSC_STRING, ACTION_CLEAR); - break; - case 0x9b: /* CSI */ - r = parser_transition(parser, raw, STATE_CSI_ENTRY, ACTION_CLEAR); - break; - default: - r = parser_feed_to_state(parser, raw); - break; - } - - if (r <= 0) - *seq_out = NULL; - else - *seq_out = &parser->seq; - - return r; -} |