From 05654e712f0ccfea7bbeb3e4de89861a670f974e Mon Sep 17 00:00:00 2001 From: Filipe Brandenburger Date: Fri, 29 May 2015 23:42:47 -0700 Subject: util: Refactor common cunescape block in unquote_first_word --- src/basic/util.c | 68 ++++++-------------------------------------------------- 1 file changed, 7 insertions(+), 61 deletions(-) (limited to 'src/basic') diff --git a/src/basic/util.c b/src/basic/util.c index e0c5069ff8..e0d1220153 100644 --- a/src/basic/util.c +++ b/src/basic/util.c @@ -5209,35 +5209,6 @@ int unquote_first_word(const char **p, char **ret, UnquoteFlags flags) { break; - case VALUE_ESCAPE: - if (c == 0) { - if (flags & UNQUOTE_RELAX) - goto finish; - return -EINVAL; - } - - if (!GREEDY_REALLOC(s, allocated, sz+7)) - return -ENOMEM; - - if (flags & UNQUOTE_CUNESCAPE) { - uint32_t u; - - r = cunescape_one(*p, (size_t) -1, &c, &u); - if (r < 0) - return -EINVAL; - - (*p) += r - 1; - - if (c != 0) - s[sz++] = c; /* normal explicit char */ - else - sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */ - } else - s[sz++] = c; - - state = VALUE; - break; - case SINGLE_QUOTE: if (c == 0) { if (flags & UNQUOTE_RELAX) @@ -5256,35 +5227,6 @@ int unquote_first_word(const char **p, char **ret, UnquoteFlags flags) { break; - case SINGLE_QUOTE_ESCAPE: - if (c == 0) { - if (flags & UNQUOTE_RELAX) - goto finish; - return -EINVAL; - } - - if (!GREEDY_REALLOC(s, allocated, sz+7)) - return -ENOMEM; - - if (flags & UNQUOTE_CUNESCAPE) { - uint32_t u; - - r = cunescape_one(*p, (size_t) -1, &c, &u); - if (r < 0) - return -EINVAL; - - (*p) += r - 1; - - if (c != 0) - s[sz++] = c; - else - sz += utf8_encode_unichar(s + sz, u); - } else - s[sz++] = c; - - state = SINGLE_QUOTE; - break; - case DOUBLE_QUOTE: if (c == 0) return -EINVAL; @@ -5301,7 +5243,9 @@ int unquote_first_word(const char **p, char **ret, UnquoteFlags flags) { break; + case SINGLE_QUOTE_ESCAPE: case DOUBLE_QUOTE_ESCAPE: + case VALUE_ESCAPE: if (c == 0) { if (flags & UNQUOTE_RELAX) goto finish; @@ -5321,13 +5265,15 @@ int unquote_first_word(const char **p, char **ret, UnquoteFlags flags) { (*p) += r - 1; if (c != 0) - s[sz++] = c; + s[sz++] = c; /* normal explicit char */ else - sz += utf8_encode_unichar(s + sz, u); + sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */ } else s[sz++] = c; - state = DOUBLE_QUOTE; + state = (state == SINGLE_QUOTE_ESCAPE) ? SINGLE_QUOTE : + (state == DOUBLE_QUOTE_ESCAPE) ? DOUBLE_QUOTE : + VALUE; break; case SPACE: -- cgit v1.2.3-54-g00ecf From d6293c070e6e4b83d8e7ec56e465b0b215d55d98 Mon Sep 17 00:00:00 2001 From: Filipe Brandenburger Date: Tue, 2 Jun 2015 21:08:24 -0700 Subject: util: New flag UNQUOTE_UNESCAPE_RELAX for unquote_first_word The new flag UNQUOTE_UNESCAPE_RELAX preserves unrecognized escape sequences verbatim in unquote_first_word, either when it's a trailing backslash (similar to UNQUOTE_RELAX, but in this case keep the extra backslash in the output) or in the middle of a sequence string. Add unit test cases to ensure the new flag works as expected and to prevent regressions from being introduced. Tested with a follow up commit converting config_parse_exec() to start using unquote_first_word, in which case this flags makes it possible to preserve unrecognized escape sequences. Relevant bug: https://bugs.freedesktop.org/show_bug.cgi?id=90794 --- src/basic/util.c | 27 ++++++++++++--- src/basic/util.h | 5 +-- src/test/test-util.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 120 insertions(+), 6 deletions(-) (limited to 'src/basic') diff --git a/src/basic/util.c b/src/basic/util.c index e0d1220153..46a48c4d60 100644 --- a/src/basic/util.c +++ b/src/basic/util.c @@ -5246,21 +5246,39 @@ int unquote_first_word(const char **p, char **ret, UnquoteFlags flags) { case SINGLE_QUOTE_ESCAPE: case DOUBLE_QUOTE_ESCAPE: case VALUE_ESCAPE: + if (!GREEDY_REALLOC(s, allocated, sz+7)) + return -ENOMEM; + if (c == 0) { + if ((flags & UNQUOTE_CUNESCAPE_RELAX) && + (state == VALUE_ESCAPE || flags & UNQUOTE_RELAX)) { + /* If we find an unquoted trailing backslash and we're in + * UNQUOTE_CUNESCAPE_RELAX mode, keep it verbatim in the + * output. + * + * Unbalanced quotes will only be allowed in UNQUOTE_RELAX + * mode, UNQUOTE_CUNESCAP_RELAX mode does not allow them. + */ + s[sz++] = '\\'; + goto finish; + } if (flags & UNQUOTE_RELAX) goto finish; return -EINVAL; } - if (!GREEDY_REALLOC(s, allocated, sz+7)) - return -ENOMEM; - if (flags & UNQUOTE_CUNESCAPE) { uint32_t u; r = cunescape_one(*p, (size_t) -1, &c, &u); - if (r < 0) + if (r < 0) { + if (flags & UNQUOTE_CUNESCAPE_RELAX) { + s[sz++] = '\\'; + s[sz++] = c; + goto end_escape; + } return -EINVAL; + } (*p) += r - 1; @@ -5271,6 +5289,7 @@ int unquote_first_word(const char **p, char **ret, UnquoteFlags flags) { } else s[sz++] = c; +end_escape: state = (state == SINGLE_QUOTE_ESCAPE) ? SINGLE_QUOTE : (state == DOUBLE_QUOTE_ESCAPE) ? DOUBLE_QUOTE : VALUE; diff --git a/src/basic/util.h b/src/basic/util.h index 7aca46d777..748f22f1a2 100644 --- a/src/basic/util.h +++ b/src/basic/util.h @@ -839,8 +839,9 @@ int is_dir(const char *path, bool follow); int is_device_node(const char *path); typedef enum UnquoteFlags { - UNQUOTE_RELAX = 1, - UNQUOTE_CUNESCAPE = 2, + UNQUOTE_RELAX = 1, + UNQUOTE_CUNESCAPE = 2, + UNQUOTE_CUNESCAPE_RELAX = 4, } UnquoteFlags; int unquote_first_word(const char **p, char **ret, UnquoteFlags flags); diff --git a/src/test/test-util.c b/src/test/test-util.c index ed8db45115..b3e79cd6f5 100644 --- a/src/test/test-util.c +++ b/src/test/test-util.c @@ -1304,6 +1304,100 @@ static void test_unquote_first_word(void) { assert_se(streq(t, "pi\360\237\222\251le")); free(t); assert_se(p == original + 32); + + p = original = "fooo\\"; + assert_se(unquote_first_word(&p, &t, UNQUOTE_RELAX) > 0); + assert_se(streq(t, "fooo")); + free(t); + assert_se(p == original + 5); + + p = original = "fooo\\"; + assert_se(unquote_first_word(&p, &t, UNQUOTE_CUNESCAPE_RELAX) > 0); + assert_se(streq(t, "fooo\\")); + free(t); + assert_se(p == original + 5); + + p = original = "fooo\\"; + assert_se(unquote_first_word(&p, &t, UNQUOTE_CUNESCAPE_RELAX|UNQUOTE_RELAX) > 0); + assert_se(streq(t, "fooo\\")); + free(t); + assert_se(p == original + 5); + + p = original = "fooo\\"; + assert_se(unquote_first_word(&p, &t, UNQUOTE_CUNESCAPE|UNQUOTE_CUNESCAPE_RELAX) > 0); + assert_se(streq(t, "fooo\\")); + free(t); + assert_se(p == original + 5); + + p = original = "\"foo\\"; + assert_se(unquote_first_word(&p, &t, 0) == -EINVAL); + assert_se(p == original + 5); + + p = original = "\"foo\\"; + assert_se(unquote_first_word(&p, &t, UNQUOTE_RELAX) > 0); + assert_se(streq(t, "foo")); + free(t); + assert_se(p == original + 5); + + p = original = "\"foo\\"; + assert_se(unquote_first_word(&p, &t, UNQUOTE_CUNESCAPE_RELAX) == -EINVAL); + assert_se(p == original + 5); + + p = original = "\"foo\\"; + assert_se(unquote_first_word(&p, &t, UNQUOTE_CUNESCAPE_RELAX|UNQUOTE_RELAX) > 0); + assert_se(streq(t, "foo\\")); + free(t); + assert_se(p == original + 5); + + p = original = "\"foo\\"; + assert_se(unquote_first_word(&p, &t, UNQUOTE_CUNESCAPE|UNQUOTE_CUNESCAPE_RELAX|UNQUOTE_RELAX) > 0); + assert_se(streq(t, "foo\\")); + free(t); + assert_se(p == original + 5); + + p = original = "fooo\\ bar quux"; + assert_se(unquote_first_word(&p, &t, UNQUOTE_RELAX) > 0); + assert_se(streq(t, "fooo bar")); + free(t); + assert_se(p == original + 10); + + p = original = "fooo\\ bar quux"; + assert_se(unquote_first_word(&p, &t, UNQUOTE_CUNESCAPE_RELAX) > 0); + assert_se(streq(t, "fooo bar")); + free(t); + assert_se(p == original + 10); + + p = original = "fooo\\ bar quux"; + assert_se(unquote_first_word(&p, &t, UNQUOTE_CUNESCAPE_RELAX|UNQUOTE_RELAX) > 0); + assert_se(streq(t, "fooo bar")); + free(t); + assert_se(p == original + 10); + + p = original = "fooo\\ bar quux"; + assert_se(unquote_first_word(&p, &t, UNQUOTE_CUNESCAPE) == -EINVAL); + assert_se(p == original + 5); + + p = original = "fooo\\ bar quux"; + assert_se(unquote_first_word(&p, &t, UNQUOTE_CUNESCAPE|UNQUOTE_CUNESCAPE_RELAX) > 0); + assert_se(streq(t, "fooo\\ bar")); + free(t); + assert_se(p == original + 10); + + p = original = "\\w+@\\K[\\d.]+"; + assert_se(unquote_first_word(&p, &t, UNQUOTE_CUNESCAPE) == -EINVAL); + assert_se(p == original + 1); + + p = original = "\\w+@\\K[\\d.]+"; + assert_se(unquote_first_word(&p, &t, UNQUOTE_CUNESCAPE|UNQUOTE_CUNESCAPE_RELAX) > 0); + assert_se(streq(t, "\\w+@\\K[\\d.]+")); + free(t); + assert_se(p == original + 12); + + p = original = "\\w+\\b"; + assert_se(unquote_first_word(&p, &t, UNQUOTE_CUNESCAPE|UNQUOTE_CUNESCAPE_RELAX) > 0); + assert_se(streq(t, "\\w+\b")); + free(t); + assert_se(p == original + 5); } static void test_unquote_many_words(void) { -- cgit v1.2.3-54-g00ecf From b59292b296ad71a20a40d7c347b6ca71df48892d Mon Sep 17 00:00:00 2001 From: Filipe Brandenburger Date: Mon, 8 Jun 2015 21:31:43 -0700 Subject: util: Introduce unquote_first_word_and_warn It will try to unquot_first_word, but if it runs into escaping problems it will retry it adding UNQUOTE_CUNESCAPE_RELAX to the flags. If it succeeds on the second try, it will log a warning about it. If it fails both times, it will log an error. Add test cases to confirm it behaves as expected. --- src/basic/util.c | 30 +++++++++++ src/basic/util.h | 1 + src/test/test-util.c | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 176 insertions(+) (limited to 'src/basic') diff --git a/src/basic/util.c b/src/basic/util.c index 46a48c4d60..727be56f58 100644 --- a/src/basic/util.c +++ b/src/basic/util.c @@ -5320,6 +5320,36 @@ finish: return 1; } +int unquote_first_word_and_warn( + const char **p, + char **ret, + UnquoteFlags flags, + const char *unit, + const char *filename, + unsigned line, + const char *rvalue) { + /* Try to unquote it, if it fails, warn about it and try again but this + * time using UNQUOTE_CUNESCAPE_RELAX to keep the backslashes verbatim + * in invalid escape sequences. */ + const char *save; + int r; + + save = *p; + r = unquote_first_word(p, ret, flags); + if (r < 0 && !(flags&UNQUOTE_CUNESCAPE_RELAX)) { + /* Retry it with UNQUOTE_CUNESCAPE_RELAX. */ + *p = save; + r = unquote_first_word(p, ret, flags|UNQUOTE_CUNESCAPE_RELAX); + if (r < 0) + log_syntax(unit, LOG_ERR, filename, line, EINVAL, + "Unbalanced quoting in command line, ignoring: \"%s\"", rvalue); + else + log_syntax(unit, LOG_WARNING, filename, line, EINVAL, + "Invalid escape sequences in command line: \"%s\"", rvalue); + } + return r; +} + int unquote_many_words(const char **p, UnquoteFlags flags, ...) { va_list ap; char **l; diff --git a/src/basic/util.h b/src/basic/util.h index 748f22f1a2..a1d1dd15c3 100644 --- a/src/basic/util.h +++ b/src/basic/util.h @@ -845,6 +845,7 @@ typedef enum UnquoteFlags { } UnquoteFlags; int unquote_first_word(const char **p, char **ret, UnquoteFlags flags); +int unquote_first_word_and_warn(const char **p, char **ret, UnquoteFlags flags, const char *unit, const char *filename, unsigned line, const char *rvalue); int unquote_many_words(const char **p, UnquoteFlags flags, ...) _sentinel_; int free_and_strdup(char **p, const char *s); diff --git a/src/test/test-util.c b/src/test/test-util.c index b3e79cd6f5..ad9ea3bcce 100644 --- a/src/test/test-util.c +++ b/src/test/test-util.c @@ -1400,6 +1400,150 @@ static void test_unquote_first_word(void) { assert_se(p == original + 5); } +static void test_unquote_first_word_and_warn(void) { + const char *p, *original; + char *t; + + p = original = "foobar waldo"; + assert_se(unquote_first_word_and_warn(&p, &t, 0, NULL, "fake", 1, original) > 0); + assert_se(streq(t, "foobar")); + free(t); + assert_se(p == original + 7); + + assert_se(unquote_first_word_and_warn(&p, &t, 0, NULL, "fake", 1, original) > 0); + assert_se(streq(t, "waldo")); + free(t); + assert_se(p == original + 12); + + assert_se(unquote_first_word_and_warn(&p, &t, 0, NULL, "fake", 1, original) == 0); + assert_se(!t); + assert_se(p == original + 12); + + p = original = "\"foobar\" \'waldo\'"; + assert_se(unquote_first_word_and_warn(&p, &t, 0, NULL, "fake", 1, original) > 0); + assert_se(streq(t, "foobar")); + free(t); + assert_se(p == original + 9); + + assert_se(unquote_first_word_and_warn(&p, &t, 0, NULL, "fake", 1, original) > 0); + assert_se(streq(t, "waldo")); + free(t); + assert_se(p == original + 16); + + assert_se(unquote_first_word_and_warn(&p, &t, 0, NULL, "fake", 1, original) == 0); + assert_se(!t); + assert_se(p == original + 16); + + p = original = "\""; + assert_se(unquote_first_word_and_warn(&p, &t, 0, NULL, "fake", 1, original) == -EINVAL); + assert_se(p == original + 1); + + p = original = "\'"; + assert_se(unquote_first_word_and_warn(&p, &t, 0, NULL, "fake", 1, original) == -EINVAL); + assert_se(p == original + 1); + + p = original = "\'fooo"; + assert_se(unquote_first_word_and_warn(&p, &t, 0, NULL, "fake", 1, original) == -EINVAL); + assert_se(p == original + 5); + + p = original = "\'fooo"; + assert_se(unquote_first_word_and_warn(&p, &t, UNQUOTE_RELAX, NULL, "fake", 1, original) > 0); + assert_se(streq(t, "fooo")); + free(t); + assert_se(p == original + 5); + + p = original = " foo\\ba\\x6ar "; + assert_se(unquote_first_word_and_warn(&p, &t, UNQUOTE_CUNESCAPE, NULL, "fake", 1, original) > 0); + assert_se(streq(t, "foo\ba\x6ar")); + free(t); + assert_se(p == original + 13); + + p = original = " foo\\ba\\x6ar "; + assert_se(unquote_first_word_and_warn(&p, &t, 0, NULL, "fake", 1, original) > 0); + assert_se(streq(t, "foobax6ar")); + free(t); + assert_se(p == original + 13); + + p = original = " f\\u00f6o \"pi\\U0001F4A9le\" "; + assert_se(unquote_first_word_and_warn(&p, &t, UNQUOTE_CUNESCAPE, NULL, "fake", 1, original) > 0); + assert_se(streq(t, "föo")); + free(t); + assert_se(p == original + 13); + + assert_se(unquote_first_word_and_warn(&p, &t, UNQUOTE_CUNESCAPE, NULL, "fake", 1, original) > 0); + assert_se(streq(t, "pi\360\237\222\251le")); + free(t); + assert_se(p == original + 32); + + p = original = "fooo\\"; + assert_se(unquote_first_word_and_warn(&p, &t, UNQUOTE_RELAX, NULL, "fake", 1, original) > 0); + assert_se(streq(t, "fooo")); + free(t); + assert_se(p == original + 5); + + p = original = "fooo\\"; + assert_se(unquote_first_word_and_warn(&p, &t, 0, NULL, "fake", 1, original) > 0); + assert_se(streq(t, "fooo\\")); + free(t); + assert_se(p == original + 5); + + p = original = "fooo\\"; + assert_se(unquote_first_word_and_warn(&p, &t, UNQUOTE_CUNESCAPE, NULL, "fake", 1, original) > 0); + assert_se(streq(t, "fooo\\")); + free(t); + assert_se(p == original + 5); + + p = original = "\"foo\\"; + assert_se(unquote_first_word_and_warn(&p, &t, 0, NULL, "fake", 1, original) == -EINVAL); + assert_se(p == original + 5); + + p = original = "\"foo\\"; + assert_se(unquote_first_word_and_warn(&p, &t, UNQUOTE_RELAX, NULL, "fake", 1, original) > 0); + assert_se(streq(t, "foo")); + free(t); + assert_se(p == original + 5); + + p = original = "\"foo\\"; + assert_se(unquote_first_word_and_warn(&p, &t, UNQUOTE_CUNESCAPE, NULL, "fake", 1, original) == -EINVAL); + assert_se(p == original + 5); + + p = original = "\"foo\\"; + assert_se(unquote_first_word_and_warn(&p, &t, UNQUOTE_CUNESCAPE|UNQUOTE_RELAX, NULL, "fake", 1, original) > 0); + assert_se(streq(t, "foo")); + free(t); + assert_se(p == original + 5); + + p = original = "fooo\\ bar quux"; + assert_se(unquote_first_word_and_warn(&p, &t, UNQUOTE_RELAX, NULL, "fake", 1, original) > 0); + assert_se(streq(t, "fooo bar")); + free(t); + assert_se(p == original + 10); + + p = original = "fooo\\ bar quux"; + assert_se(unquote_first_word_and_warn(&p, &t, 0, NULL, "fake", 1, original) > 0); + assert_se(streq(t, "fooo bar")); + free(t); + assert_se(p == original + 10); + + p = original = "fooo\\ bar quux"; + assert_se(unquote_first_word_and_warn(&p, &t, UNQUOTE_CUNESCAPE, NULL, "fake", 1, original) > 0); + assert_se(streq(t, "fooo\\ bar")); + free(t); + assert_se(p == original + 10); + + p = original = "\\w+@\\K[\\d.]+"; + assert_se(unquote_first_word_and_warn(&p, &t, UNQUOTE_CUNESCAPE, NULL, "fake", 1, original) > 0); + assert_se(streq(t, "\\w+@\\K[\\d.]+")); + free(t); + assert_se(p == original + 12); + + p = original = "\\w+\\b"; + assert_se(unquote_first_word_and_warn(&p, &t, UNQUOTE_CUNESCAPE, NULL, "fake", 1, original) > 0); + assert_se(streq(t, "\\w+\b")); + free(t); + assert_se(p == original + 5); +} + static void test_unquote_many_words(void) { const char *p, *original; char *a, *b, *c; @@ -1704,6 +1848,7 @@ int main(int argc, char *argv[]) { test_glob_exists(); test_execute_directory(); test_unquote_first_word(); + test_unquote_first_word_and_warn(); test_unquote_many_words(); test_parse_proc_cmdline(); test_raw_clone(); -- cgit v1.2.3-54-g00ecf