diff options
Diffstat (limited to 'src/libsystemd-terminal/term-charset.c')
-rw-r--r-- | src/libsystemd-terminal/term-charset.c | 491 |
1 files changed, 491 insertions, 0 deletions
diff --git a/src/libsystemd-terminal/term-charset.c b/src/libsystemd-terminal/term-charset.c new file mode 100644 index 0000000000..a00a1912da --- /dev/null +++ b/src/libsystemd-terminal/term-charset.c @@ -0,0 +1,491 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com> + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +/* + * VTE Character Sets + * These are predefined charactersets that can be loaded into GL and GR. By + * default we use unicode_lower and unicode_upper, that is, both sets have the + * exact unicode mapping. unicode_lower is effectively ASCII and unicode_upper + * as defined by the unicode standard (I guess, ISO 8859-1). + * Several other character sets are defined here. However, all of them are + * limited to the 96 character space of GL or GR. Everything beyond GR (which + * was not supported by the classic VTs by DEC but is available in VT emulators + * that support unicode/UTF8) is always mapped to unicode and cannot be changed + * by these character sets. Even mapping GL and GR is only available for + * backwards compatibility as new applications can use the Unicode functionality + * of the VTE. + * + * Moreover, mapping GR is almost unnecessary to support. In fact, Unicode UTF-8 + * support in VTE works by reading every incoming data as UTF-8 stream. This + * maps GL/ASCII to ASCII, as UTF-8 is backwards compatible to ASCII, however, + * everything that has the 8th bit set is a >=2-byte haracter in UTF-8. That is, + * this is in no way backwards compatible to >=VT220 8bit support. Therefore, if + * someone maps a character set into GR and wants to use them with this VTE, + * then they must already send UTF-8 characters to use GR (all GR characters are + * 8-bits). Hence, they can easily also send the correct UTF-8 character for the + * unicode mapping. + * The only advantage is that most characters in many sets are 3-byte UTF-8 + * characters and by mapping the set into GR/GL you can use 2 or 1 byte UTF-8 + * characters which saves bandwidth. + * Another reason is, if you have older applications that use the VT220 8-bit + * support and you put a ASCII/8bit-extension to UTF-8 converter in between, you + * need these mappings to have the application behave correctly if it uses GL/GR + * mappings extensively. + * + * Anyway, we support GL/GR mappings so here are the most commonly used maps as + * defined by Unicode-standard, DEC-private maps and other famous charmaps. + * + * Characters 1-32 are always the control characters (part of CL) and cannot be + * mapped. Characters 34-127 (94 characters) are part of GL and can be mapped. + * Characters 33 and 128 are not part of GL and always mapped by the VTE. + * However, for GR they can be mapped differently (96 chars) so we have to + * include them. The mapper has to take care not to use them in GL. + */ + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include "term-internal.h" + +/* + * Lower Unicode character set. This maps the characters to the basic ASCII + * characters 33-126. These are all graphics characters defined in ASCII. + */ +term_charset term_unicode_lower = { + [0] = 32, + [1] = 33, + [2] = 34, + [3] = 35, + [4] = 36, + [5] = 37, + [6] = 38, + [7] = 39, + [8] = 40, + [9] = 41, + [10] = 42, + [11] = 43, + [12] = 44, + [13] = 45, + [14] = 46, + [15] = 47, + [16] = 48, + [17] = 49, + [18] = 50, + [19] = 51, + [20] = 52, + [21] = 53, + [22] = 54, + [23] = 55, + [24] = 56, + [25] = 57, + [26] = 58, + [27] = 59, + [28] = 60, + [29] = 61, + [30] = 62, + [31] = 63, + [32] = 64, + [33] = 65, + [34] = 66, + [35] = 67, + [36] = 68, + [37] = 69, + [38] = 70, + [39] = 71, + [40] = 72, + [41] = 73, + [42] = 74, + [43] = 75, + [44] = 76, + [45] = 77, + [46] = 78, + [47] = 79, + [48] = 80, + [49] = 81, + [50] = 82, + [51] = 83, + [52] = 84, + [53] = 85, + [54] = 86, + [55] = 87, + [56] = 88, + [57] = 89, + [58] = 90, + [59] = 91, + [60] = 92, + [61] = 93, + [62] = 94, + [63] = 95, + [64] = 96, + [65] = 97, + [66] = 98, + [67] = 99, + [68] = 100, + [69] = 101, + [70] = 102, + [71] = 103, + [72] = 104, + [73] = 105, + [74] = 106, + [75] = 107, + [76] = 108, + [77] = 109, + [78] = 110, + [79] = 111, + [80] = 112, + [81] = 113, + [82] = 114, + [83] = 115, + [84] = 116, + [85] = 117, + [86] = 118, + [87] = 119, + [88] = 120, + [89] = 121, + [90] = 122, + [91] = 123, + [92] = 124, + [93] = 125, + [94] = 126, + [95] = 127, +}; + +/* + * Upper Unicode Table + * This maps all characters to the upper unicode characters 161-254. These are + * not compatible to any older 8 bit character sets. See the Unicode standard + * for the definitions of each symbol. + */ +term_charset term_unicode_upper = { + [0] = 160, + [1] = 161, + [2] = 162, + [3] = 163, + [4] = 164, + [5] = 165, + [6] = 166, + [7] = 167, + [8] = 168, + [9] = 169, + [10] = 170, + [11] = 171, + [12] = 172, + [13] = 173, + [14] = 174, + [15] = 175, + [16] = 176, + [17] = 177, + [18] = 178, + [19] = 179, + [20] = 180, + [21] = 181, + [22] = 182, + [23] = 183, + [24] = 184, + [25] = 185, + [26] = 186, + [27] = 187, + [28] = 188, + [29] = 189, + [30] = 190, + [31] = 191, + [32] = 192, + [33] = 193, + [34] = 194, + [35] = 195, + [36] = 196, + [37] = 197, + [38] = 198, + [39] = 199, + [40] = 200, + [41] = 201, + [42] = 202, + [43] = 203, + [44] = 204, + [45] = 205, + [46] = 206, + [47] = 207, + [48] = 208, + [49] = 209, + [50] = 210, + [51] = 211, + [52] = 212, + [53] = 213, + [54] = 214, + [55] = 215, + [56] = 216, + [57] = 217, + [58] = 218, + [59] = 219, + [60] = 220, + [61] = 221, + [62] = 222, + [63] = 223, + [64] = 224, + [65] = 225, + [66] = 226, + [67] = 227, + [68] = 228, + [69] = 229, + [70] = 230, + [71] = 231, + [72] = 232, + [73] = 233, + [74] = 234, + [75] = 235, + [76] = 236, + [77] = 237, + [78] = 238, + [79] = 239, + [80] = 240, + [81] = 241, + [82] = 242, + [83] = 243, + [84] = 244, + [85] = 245, + [86] = 246, + [87] = 247, + [88] = 248, + [89] = 249, + [90] = 250, + [91] = 251, + [92] = 252, + [93] = 253, + [94] = 254, + [95] = 255, +}; + +/* + * The DEC supplemental graphics set. For its definition see here: + * http://vt100.net/docs/vt220-rm/table2-3b.html + * Its basically a mixture of common European symbols that are not part of + * ASCII. Most often, this is mapped into GR to extend the basci ASCII part. + * + * This is very similar to unicode_upper, however, few symbols differ so do not + * mix them up! + */ +term_charset term_dec_supplemental_graphics = { + [0] = -1, /* undefined */ + [1] = 161, + [2] = 162, + [3] = 163, + [4] = 0, + [5] = 165, + [6] = 0, + [7] = 167, + [8] = 164, + [9] = 169, + [10] = 170, + [11] = 171, + [12] = 0, + [13] = 0, + [14] = 0, + [15] = 0, + [16] = 176, + [17] = 177, + [18] = 178, + [19] = 179, + [20] = 0, + [21] = 181, + [22] = 182, + [23] = 183, + [24] = 0, + [25] = 185, + [26] = 186, + [27] = 187, + [28] = 188, + [29] = 189, + [30] = 0, + [31] = 191, + [32] = 192, + [33] = 193, + [34] = 194, + [35] = 195, + [36] = 196, + [37] = 197, + [38] = 198, + [39] = 199, + [40] = 200, + [41] = 201, + [42] = 202, + [43] = 203, + [44] = 204, + [45] = 205, + [46] = 206, + [47] = 207, + [48] = 0, + [49] = 209, + [50] = 210, + [51] = 211, + [52] = 212, + [53] = 213, + [54] = 214, + [55] = 338, + [56] = 216, + [57] = 217, + [58] = 218, + [59] = 219, + [60] = 220, + [61] = 376, + [62] = 0, + [63] = 223, + [64] = 224, + [65] = 225, + [66] = 226, + [67] = 227, + [68] = 228, + [69] = 229, + [70] = 230, + [71] = 231, + [72] = 232, + [73] = 233, + [74] = 234, + [75] = 235, + [76] = 236, + [77] = 237, + [78] = 238, + [79] = 239, + [80] = 0, + [81] = 241, + [82] = 242, + [83] = 243, + [84] = 244, + [85] = 245, + [86] = 246, + [87] = 339, + [88] = 248, + [89] = 249, + [90] = 250, + [91] = 251, + [92] = 252, + [93] = 255, + [94] = 0, + [95] = -1, /* undefined */ +}; + +/* + * DEC special graphics character set. See here for its definition: + * http://vt100.net/docs/vt220-rm/table2-4.html + * This contains several characters to create ASCII drawings and similar. Its + * commonly mapped into GR to extend the basic ASCII characters. + * + * Lower 62 characters map to ASCII 33-64, everything beyond is special and + * commonly used for ASCII drawings. It depends on the Unicode Standard 3.2 for + * the extended horizontal scan-line characters 3, 5, 7, and 9. + */ +term_charset term_dec_special_graphics = { + [0] = -1, /* undefined */ + [1] = 33, + [2] = 34, + [3] = 35, + [4] = 36, + [5] = 37, + [6] = 38, + [7] = 39, + [8] = 40, + [9] = 41, + [10] = 42, + [11] = 43, + [12] = 44, + [13] = 45, + [14] = 46, + [15] = 47, + [16] = 48, + [17] = 49, + [18] = 50, + [19] = 51, + [20] = 52, + [21] = 53, + [22] = 54, + [23] = 55, + [24] = 56, + [25] = 57, + [26] = 58, + [27] = 59, + [28] = 60, + [29] = 61, + [30] = 62, + [31] = 63, + [32] = 64, + [33] = 65, + [34] = 66, + [35] = 67, + [36] = 68, + [37] = 69, + [38] = 70, + [39] = 71, + [40] = 72, + [41] = 73, + [42] = 74, + [43] = 75, + [44] = 76, + [45] = 77, + [46] = 78, + [47] = 79, + [48] = 80, + [49] = 81, + [50] = 82, + [51] = 83, + [52] = 84, + [53] = 85, + [54] = 86, + [55] = 87, + [56] = 88, + [57] = 89, + [58] = 90, + [59] = 91, + [60] = 92, + [61] = 93, + [62] = 94, + [63] = 0, + [64] = 9830, + [65] = 9618, + [66] = 9225, + [67] = 9228, + [68] = 9229, + [69] = 9226, + [70] = 176, + [71] = 177, + [72] = 9252, + [73] = 9227, + [74] = 9496, + [75] = 9488, + [76] = 9484, + [77] = 9492, + [78] = 9532, + [79] = 9146, + [80] = 9147, + [81] = 9472, + [82] = 9148, + [83] = 9149, + [84] = 9500, + [85] = 9508, + [86] = 9524, + [87] = 9516, + [88] = 9474, + [89] = 8804, + [90] = 8805, + [91] = 960, + [92] = 8800, + [93] = 163, + [94] = 8901, + [95] = -1, /* undefined */ +}; |