From 4034a06ddb82ec9868cd52496fef2f5faa25575f Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 23 Mar 2015 18:55:36 +0700 Subject: util: rework word parsing and c unescaping code When parsing words from input files, optionally automatically unescape the passed strings, controllable via a new flags parameter. Make use of this in tmpfiles, and port everything else over, too. This improves parsing quite a bit, since we no longer have to process the same string multiple times with different calls, where an earlier call might corrupt the input for a later call. --- src/shared/condition.c | 2 +- src/shared/strv.c | 4 +- src/shared/strv.h | 2 +- src/shared/util.c | 280 +++++++++++++++++++++++++++++-------------------- src/shared/util.h | 9 +- 5 files changed, 179 insertions(+), 118 deletions(-) (limited to 'src/shared') diff --git a/src/shared/condition.c b/src/shared/condition.c index 0a77607eea..db12df952b 100644 --- a/src/shared/condition.c +++ b/src/shared/condition.c @@ -100,7 +100,7 @@ static int condition_test_kernel_command_line(Condition *c) { _cleanup_free_ char *word = NULL; bool found; - r = unquote_first_word(&p, &word, true); + r = unquote_first_word(&p, &word, UNQUOTE_RELAX); if (r < 0) return r; if (r == 0) diff --git a/src/shared/strv.c b/src/shared/strv.c index 8c6ba6a633..d44a72fc48 100644 --- a/src/shared/strv.c +++ b/src/shared/strv.c @@ -278,7 +278,7 @@ char **strv_split_newlines(const char *s) { return l; } -int strv_split_quoted(char ***t, const char *s, bool relax) { +int strv_split_quoted(char ***t, const char *s, UnquoteFlags flags) { size_t n = 0, allocated = 0; _cleanup_strv_free_ char **l = NULL; int r; @@ -289,7 +289,7 @@ int strv_split_quoted(char ***t, const char *s, bool relax) { for (;;) { _cleanup_free_ char *word = NULL; - r = unquote_first_word(&s, &word, relax); + r = unquote_first_word(&s, &word, flags); if (r < 0) return r; if (r == 0) diff --git a/src/shared/strv.h b/src/shared/strv.h index a80ccd6427..22f8f98fda 100644 --- a/src/shared/strv.h +++ b/src/shared/strv.h @@ -73,7 +73,7 @@ static inline bool strv_isempty(char * const *l) { char **strv_split(const char *s, const char *separator); char **strv_split_newlines(const char *s); -int strv_split_quoted(char ***t, const char *s, bool relax); +int strv_split_quoted(char ***t, const char *s, UnquoteFlags flags); char *strv_join(char **l, const char *separator); char *strv_join_quoted(char **l); diff --git a/src/shared/util.c b/src/shared/util.c index ad548da82a..8b76531d4f 100644 --- a/src/shared/util.c +++ b/src/shared/util.c @@ -1347,6 +1347,125 @@ char *cescape(const char *s) { return r; } +static int cunescape_one(const char *p, size_t length, char *ret) { + int r = 1; + + assert(p); + assert(*p); + assert(ret); + + if (length != (size_t) -1 && length < 1) + return -EINVAL; + + switch (p[0]) { + + case 'a': + *ret = '\a'; + break; + case 'b': + *ret = '\b'; + break; + case 'f': + *ret = '\f'; + break; + case 'n': + *ret = '\n'; + break; + case 'r': + *ret = '\r'; + break; + case 't': + *ret = '\t'; + break; + case 'v': + *ret = '\v'; + break; + case '\\': + *ret = '\\'; + break; + case '"': + *ret = '"'; + break; + case '\'': + *ret = '\''; + break; + + case 's': + /* This is an extension of the XDG syntax files */ + *ret = ' '; + break; + + case 'x': { + /* hexadecimal encoding */ + int a, b; + + if (length != (size_t) -1 && length < 3) + return -EINVAL; + + a = unhexchar(p[1]); + if (a < 0) + return -EINVAL; + + b = unhexchar(p[2]); + if (b < 0) + return -EINVAL; + + /* don't allow NUL bytes */ + if (a == 0 && b == 0) + return -EINVAL; + + *ret = (char) ((a << 4) | b); + r = 3; + break; + } + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': { + /* octal encoding */ + int a, b, c, m; + + if (length != (size_t) -1 && length < 4) + return -EINVAL; + + a = unoctchar(p[0]); + if (a < 0) + return -EINVAL; + + b = unoctchar(p[1]); + if (b < 0) + return -EINVAL; + + c = unoctchar(p[2]); + if (c < 0) + return -EINVAL; + + /* don't allow NUL bytes */ + if (a == 0 && b == 0 && c == 0) + return -EINVAL; + + /* Don't allow bytes above 255 */ + m = (a << 6) | (b << 3) | c; + if (m > 255) + return -EINVAL; + + *ret = (char) m; + r = 3; + break; + } + + default: + return -EINVAL; + } + + return r; +} + char *cunescape_length_with_prefix(const char *s, size_t length, const char *prefix) { char *r, *t; const char *f; @@ -1366,115 +1485,27 @@ char *cunescape_length_with_prefix(const char *s, size_t length, const char *pre memcpy(r, prefix, pl); for (f = s, t = r + pl; f < s + length; f++) { - size_t remaining = s + length - f; + size_t remaining; + int k; + + remaining = s + length - f; assert(remaining > 0); - if (*f != '\\') { /* a literal literal */ + if (*f != '\\' || remaining == 1) { + /* a literal literal, or a trailing backslash, copy verbatim */ *(t++) = *f; continue; } - if (--remaining == 0) { /* copy trailing backslash verbatim */ - *(t++) = *f; - break; - } - - f++; - - switch (*f) { - - case 'a': - *(t++) = '\a'; - break; - case 'b': - *(t++) = '\b'; - break; - case 'f': - *(t++) = '\f'; - break; - case 'n': - *(t++) = '\n'; - break; - case 'r': - *(t++) = '\r'; - break; - case 't': - *(t++) = '\t'; - break; - case 'v': - *(t++) = '\v'; - break; - case '\\': - *(t++) = '\\'; - break; - case '"': - *(t++) = '"'; - break; - case '\'': - *(t++) = '\''; - break; - - case 's': - /* This is an extension of the XDG syntax files */ - *(t++) = ' '; - break; - - case 'x': { - /* hexadecimal encoding */ - int a = -1, b = -1; - - if (remaining >= 2) { - a = unhexchar(f[1]); - b = unhexchar(f[2]); - } - - if (a < 0 || b < 0 || (a == 0 && b == 0)) { - /* Invalid escape code, let's take it literal then */ - *(t++) = '\\'; - *(t++) = 'x'; - } else { - *(t++) = (char) ((a << 4) | b); - f += 2; - } - - break; - } - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': { - /* octal encoding */ - int a = -1, b = -1, c = -1; - - if (remaining >= 3) { - a = unoctchar(f[0]); - b = unoctchar(f[1]); - c = unoctchar(f[2]); - } - - if (a < 0 || b < 0 || c < 0 || (a == 0 && b == 0 && c == 0)) { - /* Invalid escape code, let's take it literal then */ - *(t++) = '\\'; - *(t++) = f[0]; - } else { - *(t++) = (char) ((a << 6) | (b << 3) | c); - f += 2; - } - - break; - } - - default: + k = cunescape_one(f + 1, remaining - 1, t); + if (k < 0) { /* Invalid escape code, let's take it literal then */ *(t++) = '\\'; - *(t++) = *f; - break; + continue; } + + f += k; + t++; } *t = 0; @@ -3411,7 +3442,7 @@ char **replace_env_argv(char **argv, char **env) { if (e) { int r; - r = strv_split_quoted(&m, e, true); + r = strv_split_quoted(&m, e, UNQUOTE_RELAX); if (r < 0) { ret[k] = NULL; strv_free(ret); @@ -6382,7 +6413,7 @@ int parse_proc_cmdline(int (*parse_item)(const char *key, const char *value)) { _cleanup_free_ char *word = NULL; char *value = NULL; - r = unquote_first_word(&p, &word, true); + r = unquote_first_word(&p, &word, UNQUOTE_RELAX); if (r < 0) return r; if (r == 0) @@ -6422,7 +6453,7 @@ int get_proc_cmdline_key(const char *key, char **value) { _cleanup_free_ char *word = NULL; const char *e; - r = unquote_first_word(&p, &word, true); + r = unquote_first_word(&p, &word, UNQUOTE_RELAX); if (r < 0) return r; if (r == 0) @@ -7275,9 +7306,10 @@ int is_dir(const char* path, bool follow) { return !!S_ISDIR(st.st_mode); } -int unquote_first_word(const char **p, char **ret, bool relax) { +int unquote_first_word(const char **p, char **ret, UnquoteFlags flags) { _cleanup_free_ char *s = NULL; size_t allocated = 0, sz = 0; + int r; enum { START, @@ -7335,7 +7367,7 @@ int unquote_first_word(const char **p, char **ret, bool relax) { case VALUE_ESCAPE: if (c == 0) { - if (relax) + if (flags & UNQUOTE_RELAX) goto finish; return -EINVAL; } @@ -7343,6 +7375,14 @@ int unquote_first_word(const char **p, char **ret, bool relax) { if (!GREEDY_REALLOC(s, allocated, sz+2)) return -ENOMEM; + if (flags & UNQUOTE_CUNESCAPE) { + r = cunescape_one(*p, (size_t) -1, &c); + if (r < 0) + return -EINVAL; + + (*p) += r - 1; + } + s[sz++] = c; state = VALUE; @@ -7350,7 +7390,7 @@ int unquote_first_word(const char **p, char **ret, bool relax) { case SINGLE_QUOTE: if (c == 0) { - if (relax) + if (flags & UNQUOTE_RELAX) goto finish; return -EINVAL; } else if (c == '\'') @@ -7368,7 +7408,7 @@ int unquote_first_word(const char **p, char **ret, bool relax) { case SINGLE_QUOTE_ESCAPE: if (c == 0) { - if (relax) + if (flags & UNQUOTE_RELAX) goto finish; return -EINVAL; } @@ -7376,6 +7416,14 @@ int unquote_first_word(const char **p, char **ret, bool relax) { if (!GREEDY_REALLOC(s, allocated, sz+2)) return -ENOMEM; + if (flags & UNQUOTE_CUNESCAPE) { + r = cunescape_one(*p, (size_t) -1, &c); + if (r < 0) + return -EINVAL; + + (*p) += r - 1; + } + s[sz++] = c; state = SINGLE_QUOTE; break; @@ -7398,7 +7446,7 @@ int unquote_first_word(const char **p, char **ret, bool relax) { case DOUBLE_QUOTE_ESCAPE: if (c == 0) { - if (relax) + if (flags & UNQUOTE_RELAX) goto finish; return -EINVAL; } @@ -7406,6 +7454,14 @@ int unquote_first_word(const char **p, char **ret, bool relax) { if (!GREEDY_REALLOC(s, allocated, sz+2)) return -ENOMEM; + if (flags & UNQUOTE_CUNESCAPE) { + r = cunescape_one(*p, (size_t) -1, &c); + if (r < 0) + return -EINVAL; + + (*p) += r - 1; + } + s[sz++] = c; state = DOUBLE_QUOTE; break; @@ -7435,7 +7491,7 @@ finish: return 1; } -int unquote_many_words(const char **p, ...) { +int unquote_many_words(const char **p, UnquoteFlags flags, ...) { va_list ap; char **l; int n = 0, i, c, r; @@ -7446,7 +7502,7 @@ int unquote_many_words(const char **p, ...) { assert(p); /* Count how many words are expected */ - va_start(ap, p); + va_start(ap, flags); for (;;) { if (!va_arg(ap, char **)) break; @@ -7461,7 +7517,7 @@ int unquote_many_words(const char **p, ...) { l = newa0(char*, n); for (c = 0; c < n; c++) { - r = unquote_first_word(p, &l[c], false); + r = unquote_first_word(p, &l[c], flags); if (r < 0) { int j; @@ -7477,7 +7533,7 @@ int unquote_many_words(const char **p, ...) { /* If we managed to parse all words, return them in the passed * in parameters */ - va_start(ap, p); + va_start(ap, flags); for (i = 0; i < n; i++) { char **v; diff --git a/src/shared/util.h b/src/shared/util.h index 29e85bb7e1..124c7c06d4 100644 --- a/src/shared/util.h +++ b/src/shared/util.h @@ -1017,8 +1017,13 @@ int take_password_lock(const char *root); int is_symlink(const char *path); int is_dir(const char *path, bool follow); -int unquote_first_word(const char **p, char **ret, bool relax); -int unquote_many_words(const char **p, ...) _sentinel_; +typedef enum UnquoteFlags{ + UNQUOTE_RELAX = 1, + UNQUOTE_CUNESCAPE = 2, +} UnquoteFlags; + +int unquote_first_word(const char **p, char **ret, UnquoteFlags flags); +int unquote_many_words(const char **p, UnquoteFlags flags, ...) _sentinel_; int free_and_strdup(char **p, const char *s); -- cgit v1.2.3-54-g00ecf