diff options
author | Lennart Poettering <lennart@poettering.net> | 2015-10-23 18:52:53 +0200 |
---|---|---|
committer | Lennart Poettering <lennart@poettering.net> | 2015-10-24 23:04:42 +0200 |
commit | 4f5dd3943bef8a04be7e3b838b822bb9a7ad6cb3 (patch) | |
tree | deda3509752ff079dded29c145e4a8fae41812f1 /src/basic/util.c | |
parent | dea7b6b043f0cd9e34ee719b9b612c3a4776387e (diff) |
util: split out escaping code into escape.[ch]
This really deserves its own file, given how much code this is now.
Diffstat (limited to 'src/basic/util.c')
-rw-r--r-- | src/basic/util.c | 458 |
1 files changed, 2 insertions, 456 deletions
diff --git a/src/basic/util.c b/src/basic/util.c index 641e0b4d89..d6488f8fd6 100644 --- a/src/basic/util.c +++ b/src/basic/util.c @@ -77,6 +77,7 @@ #include "def.h" #include "device-nodes.h" #include "env-util.h" +#include "escape.h" #include "exit-status.h" #include "fileio.h" #include "formats-util.h" @@ -96,8 +97,8 @@ #include "strv.h" #include "terminal-util.h" #include "utf8.h" -#include "util.h" #include "virt.h" +#include "util.h" /* Put this test here for a lack of better place */ assert_cc(EAGAIN == EWOULDBLOCK); @@ -214,69 +215,6 @@ char* first_word(const char *s, const char *word) { return (char*) p; } -size_t cescape_char(char c, char *buf) { - char * buf_old = buf; - - switch (c) { - - case '\a': - *(buf++) = '\\'; - *(buf++) = 'a'; - break; - case '\b': - *(buf++) = '\\'; - *(buf++) = 'b'; - break; - case '\f': - *(buf++) = '\\'; - *(buf++) = 'f'; - break; - case '\n': - *(buf++) = '\\'; - *(buf++) = 'n'; - break; - case '\r': - *(buf++) = '\\'; - *(buf++) = 'r'; - break; - case '\t': - *(buf++) = '\\'; - *(buf++) = 't'; - break; - case '\v': - *(buf++) = '\\'; - *(buf++) = 'v'; - break; - case '\\': - *(buf++) = '\\'; - *(buf++) = '\\'; - break; - case '"': - *(buf++) = '\\'; - *(buf++) = '"'; - break; - case '\'': - *(buf++) = '\\'; - *(buf++) = '\''; - break; - - default: - /* For special chars we prefer octal over - * hexadecimal encoding, simply because glib's - * g_strescape() does the same */ - if ((c < ' ') || (c >= 127)) { - *(buf++) = '\\'; - *(buf++) = octchar((unsigned char) c >> 6); - *(buf++) = octchar((unsigned char) c >> 3); - *(buf++) = octchar((unsigned char) c); - } else - *(buf++) = c; - break; - } - - return buf - buf_old; -} - int close_nointr(int fd) { assert(fd >= 0); @@ -1566,338 +1504,6 @@ int undecchar(char c) { return -EINVAL; } -char *cescape(const char *s) { - char *r, *t; - const char *f; - - assert(s); - - /* Does C style string escaping. May be reversed with - * cunescape(). */ - - r = new(char, strlen(s)*4 + 1); - if (!r) - return NULL; - - for (f = s, t = r; *f; f++) - t += cescape_char(*f, t); - - *t = 0; - - return r; -} - -int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode) { - int r = 1; - - assert(p); - assert(*p); - assert(ret); - - /* Unescapes C style. Returns the unescaped character in ret, - * unless we encountered a \u sequence in which case the full - * unicode character is returned in ret_unicode, instead. */ - - if (length != (size_t) -1 && length < 1) - return -EINVAL; - - switch (p[0]) { - - case 'a': - *ret = '\a'; - break; - case 'b': - *ret = '\b'; - break; - case 'f': - *ret = '\f'; - break; - case 'n': - *ret = '\n'; - break; - case 'r': - *ret = '\r'; - break; - case 't': - *ret = '\t'; - break; - case 'v': - *ret = '\v'; - break; - case '\\': - *ret = '\\'; - break; - case '"': - *ret = '"'; - break; - case '\'': - *ret = '\''; - break; - - case 's': - /* This is an extension of the XDG syntax files */ - *ret = ' '; - break; - - case 'x': { - /* hexadecimal encoding */ - int a, b; - - if (length != (size_t) -1 && length < 3) - return -EINVAL; - - a = unhexchar(p[1]); - if (a < 0) - return -EINVAL; - - b = unhexchar(p[2]); - if (b < 0) - return -EINVAL; - - /* Don't allow NUL bytes */ - if (a == 0 && b == 0) - return -EINVAL; - - *ret = (char) ((a << 4U) | b); - r = 3; - break; - } - - case 'u': { - /* C++11 style 16bit unicode */ - - int a[4]; - unsigned i; - uint32_t c; - - if (length != (size_t) -1 && length < 5) - return -EINVAL; - - for (i = 0; i < 4; i++) { - a[i] = unhexchar(p[1 + i]); - if (a[i] < 0) - return a[i]; - } - - c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3]; - - /* Don't allow 0 chars */ - if (c == 0) - return -EINVAL; - - if (c < 128) - *ret = c; - else { - if (!ret_unicode) - return -EINVAL; - - *ret = 0; - *ret_unicode = c; - } - - r = 5; - break; - } - - case 'U': { - /* C++11 style 32bit unicode */ - - int a[8]; - unsigned i; - uint32_t c; - - if (length != (size_t) -1 && length < 9) - return -EINVAL; - - for (i = 0; i < 8; i++) { - a[i] = unhexchar(p[1 + i]); - if (a[i] < 0) - return a[i]; - } - - c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) | - ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] << 8U) | ((uint32_t) a[6] << 4U) | (uint32_t) a[7]; - - /* Don't allow 0 chars */ - if (c == 0) - return -EINVAL; - - /* Don't allow invalid code points */ - if (!unichar_is_valid(c)) - return -EINVAL; - - if (c < 128) - *ret = c; - else { - if (!ret_unicode) - return -EINVAL; - - *ret = 0; - *ret_unicode = c; - } - - r = 9; - break; - } - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': { - /* octal encoding */ - int a, b, c; - uint32_t m; - - if (length != (size_t) -1 && length < 3) - return -EINVAL; - - a = unoctchar(p[0]); - if (a < 0) - return -EINVAL; - - b = unoctchar(p[1]); - if (b < 0) - return -EINVAL; - - c = unoctchar(p[2]); - if (c < 0) - return -EINVAL; - - /* don't allow NUL bytes */ - if (a == 0 && b == 0 && c == 0) - return -EINVAL; - - /* Don't allow bytes above 255 */ - m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c; - if (m > 255) - return -EINVAL; - - *ret = m; - r = 3; - break; - } - - default: - return -EINVAL; - } - - return r; -} - -int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) { - char *r, *t; - const char *f; - size_t pl; - - assert(s); - assert(ret); - - /* Undoes C style string escaping, and optionally prefixes it. */ - - pl = prefix ? strlen(prefix) : 0; - - r = new(char, pl+length+1); - if (!r) - return -ENOMEM; - - if (prefix) - memcpy(r, prefix, pl); - - for (f = s, t = r + pl; f < s + length; f++) { - size_t remaining; - uint32_t u; - char c; - int k; - - remaining = s + length - f; - assert(remaining > 0); - - if (*f != '\\') { - /* A literal literal, copy verbatim */ - *(t++) = *f; - continue; - } - - if (remaining == 1) { - if (flags & UNESCAPE_RELAX) { - /* A trailing backslash, copy verbatim */ - *(t++) = *f; - continue; - } - - free(r); - return -EINVAL; - } - - k = cunescape_one(f + 1, remaining - 1, &c, &u); - if (k < 0) { - if (flags & UNESCAPE_RELAX) { - /* Invalid escape code, let's take it literal then */ - *(t++) = '\\'; - continue; - } - - free(r); - return k; - } - - if (c != 0) - /* Non-Unicode? Let's encode this directly */ - *(t++) = c; - else - /* Unicode? Then let's encode this in UTF-8 */ - t += utf8_encode_unichar(t, u); - - f += k; - } - - *t = 0; - - *ret = r; - return t - r; -} - -int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) { - return cunescape_length_with_prefix(s, length, NULL, flags, ret); -} - -int cunescape(const char *s, UnescapeFlags flags, char **ret) { - return cunescape_length(s, strlen(s), flags, ret); -} - -char *xescape(const char *s, const char *bad) { - char *r, *t; - const char *f; - - /* Escapes all chars in bad, in addition to \ and all special - * chars, in \xFF style escaping. May be reversed with - * cunescape(). */ - - r = new(char, strlen(s) * 4 + 1); - if (!r) - return NULL; - - for (f = s, t = r; *f; f++) { - - if ((*f < ' ') || (*f >= 127) || - (*f == '\\') || strchr(bad, *f)) { - *(t++) = '\\'; - *(t++) = 'x'; - *(t++) = hexchar(*f >> 4); - *(t++) = hexchar(*f); - } else - *(t++) = *f; - } - - *t = 0; - - return r; -} - char *ascii_strlower(char *t) { char *p; @@ -6242,66 +5848,6 @@ int rename_noreplace(int olddirfd, const char *oldpath, int newdirfd, const char return 0; } -static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad) { - assert(bad); - - for (; *s; s++) { - if (*s == '\\' || strchr(bad, *s)) - *(t++) = '\\'; - - *(t++) = *s; - } - - return t; -} - -char *shell_escape(const char *s, const char *bad) { - char *r, *t; - - r = new(char, strlen(s)*2+1); - if (!r) - return NULL; - - t = strcpy_backslash_escaped(r, s, bad); - *t = 0; - - return r; -} - -char *shell_maybe_quote(const char *s) { - const char *p; - char *r, *t; - - assert(s); - - /* Encloses a string in double quotes if necessary to make it - * OK as shell string. */ - - for (p = s; *p; p++) - if (*p <= ' ' || - *p >= 127 || - strchr(SHELL_NEED_QUOTES, *p)) - break; - - if (!*p) - return strdup(s); - - r = new(char, 1+strlen(s)*2+1+1); - if (!r) - return NULL; - - t = r; - *(t++) = '"'; - t = mempcpy(t, s, p - s); - - t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE); - - *(t++)= '"'; - *t = 0; - - return r; -} - int parse_mode(const char *s, mode_t *ret) { char *x; long l; |