From b85e1c2534ca3b396c2aaa7de384995b42d12e1b Mon Sep 17 00:00:00 2001 From: Filipe Brandenburger Date: Tue, 3 Nov 2015 20:13:11 -0800 Subject: extract-word: move start block outside the for loop This block runs once before all the other handling, so move it outside the main loop and put it in its own loop until it's finished doing its job. Tested by confirming `make check` (and particularly test-extract-word) still passes and by booting a system with binaries including this commit. --- src/basic/extract-word.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) (limited to 'src/basic') diff --git a/src/basic/extract-word.c b/src/basic/extract-word.c index 6721b85c0a..23e3d557c0 100644 --- a/src/basic/extract-word.c +++ b/src/basic/extract-word.c @@ -29,12 +29,12 @@ int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) { _cleanup_free_ char *s = NULL; size_t allocated = 0, sz = 0; + char c; int r; char quote = 0; /* 0 or ' or " */ bool backslash = false; /* whether we've just seen a backslash */ bool separator = false; /* whether we've just seen a separator */ - bool start = true; /* false means we're looking at a value */ assert(p); assert(ret); @@ -51,31 +51,30 @@ int extract_first_word(const char **p, char **ret, const char *separators, Extra * (because of an uneven number of quotes or similar), leaves * the pointer *p at the first invalid character. */ - for (;;) { - char c = **p; + if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) + if (!GREEDY_REALLOC(s, allocated, sz+1)) + return -ENOMEM; - if (start) { + for (;;) { + c = **p; + if (c == 0) + goto finish_force_terminate; + else if (strchr(separators, c)) { + (*p) ++; if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) - if (!GREEDY_REALLOC(s, allocated, sz+1)) - return -ENOMEM; - - if (c == 0) - goto finish_force_terminate; - else if (strchr(separators, c)) { - (*p) ++; - if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) - goto finish_force_next; - continue; - } - + goto finish_force_next; + } else { /* We found a non-blank character, so we will always * want to return a string (even if it is empty), * allocate it here. */ if (!GREEDY_REALLOC(s, allocated, sz+1)) return -ENOMEM; - - start = false; + break; } + } + + for (;;) { + c = **p; if (backslash) { if (!GREEDY_REALLOC(s, allocated, sz+7)) -- cgit v1.2.3-54-g00ecf From 3ff13c298d6d53293680c383768c3054fb9fcc30 Mon Sep 17 00:00:00 2001 From: Filipe Brandenburger Date: Thu, 5 Nov 2015 21:17:11 -0800 Subject: extract-word: replace an use of `goto` with structured code Using `goto` might be appropriate for the "finish" cases but it was really not necessary at this point of the code... Just use if/else blocks to accomplish the same. Confirmed that the test cases in test-extract-word keep working as expected. --- src/basic/extract-word.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'src/basic') diff --git a/src/basic/extract-word.c b/src/basic/extract-word.c index 23e3d557c0..1b3123bb7d 100644 --- a/src/basic/extract-word.c +++ b/src/basic/extract-word.c @@ -106,21 +106,19 @@ int extract_first_word(const char **p, char **ret, const char *separators, Extra if (flags & EXTRACT_CUNESCAPE_RELAX) { s[sz++] = '\\'; s[sz++] = c; - goto end_escape; - } - return -EINVAL; + } else + return -EINVAL; + } else { + (*p) += r - 1; + + if (c != 0) + s[sz++] = c; /* normal explicit char */ + else + sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */ } - - (*p) += r - 1; - - if (c != 0) - s[sz++] = c; /* normal explicit char */ - else - sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */ } else s[sz++] = c; -end_escape: backslash = false; } else if (quote) { /* inside either single or double quotes */ -- cgit v1.2.3-54-g00ecf From 8372da448f3c738e0154d988538d497f7e2e1f83 Mon Sep 17 00:00:00 2001 From: Filipe Brandenburger Date: Thu, 5 Nov 2015 21:31:29 -0800 Subject: extract-word: Check for early bail out before inspecting separators It's a pretty small optimization but doesn't hurt... Tested with test-extract-word. --- src/basic/extract-word.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/basic') diff --git a/src/basic/extract-word.c b/src/basic/extract-word.c index 1b3123bb7d..b08851b89b 100644 --- a/src/basic/extract-word.c +++ b/src/basic/extract-word.c @@ -39,13 +39,13 @@ int extract_first_word(const char **p, char **ret, const char *separators, Extra assert(p); assert(ret); - if (!separators) - separators = WHITESPACE; - /* Bail early if called after last value or with no input */ if (!*p) goto finish_force_terminate; + if (!separators) + separators = WHITESPACE; + /* Parses the first word of a string, and returns it in * *ret. Removes all quotes in the process. When parsing fails * (because of an uneven number of quotes or similar), leaves -- cgit v1.2.3-54-g00ecf From 93de9eb76d628cf731120d97332e03600c167271 Mon Sep 17 00:00:00 2001 From: Filipe Brandenburger Date: Thu, 5 Nov 2015 21:41:04 -0800 Subject: extract-word: increment pointer p and keep c in sync in for loop This will make it easier to use inner loops to keep looping in the same state, by just updating p and c in the same way in the inner loops. Tested that no regressions were created in test-extract-word. --- src/basic/extract-word.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'src/basic') diff --git a/src/basic/extract-word.c b/src/basic/extract-word.c index b08851b89b..b0056a8485 100644 --- a/src/basic/extract-word.c +++ b/src/basic/extract-word.c @@ -42,6 +42,7 @@ int extract_first_word(const char **p, char **ret, const char *separators, Extra /* Bail early if called after last value or with no input */ if (!*p) goto finish_force_terminate; + c = **p; if (!separators) separators = WHITESPACE; @@ -55,14 +56,14 @@ int extract_first_word(const char **p, char **ret, const char *separators, Extra if (!GREEDY_REALLOC(s, allocated, sz+1)) return -ENOMEM; - for (;;) { - c = **p; + for (;; (*p) ++, c = **p) { if (c == 0) goto finish_force_terminate; else if (strchr(separators, c)) { - (*p) ++; - if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) + if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) { + (*p) ++; goto finish_force_next; + } } else { /* We found a non-blank character, so we will always * want to return a string (even if it is empty), @@ -73,9 +74,7 @@ int extract_first_word(const char **p, char **ret, const char *separators, Extra } } - for (;;) { - c = **p; - + for (;; (*p) ++, c = **p) { if (backslash) { if (!GREEDY_REALLOC(s, allocated, sz+7)) return -ENOMEM; @@ -163,8 +162,6 @@ int extract_first_word(const char **p, char **ret, const char *separators, Extra s[sz++] = c; } } - - (*p) ++; } finish_force_terminate: -- cgit v1.2.3-54-g00ecf From 27fc921b658adc5baa988c4c213888b016a60b18 Mon Sep 17 00:00:00 2001 From: Filipe Brandenburger Date: Thu, 5 Nov 2015 21:51:24 -0800 Subject: extract-word: Do not re-evaluate the state on each parsed character Use inner loops to keep processing the same state, except when there is a state change, then break back to the outer loop so that the correct branch can be selected again. Tested that no regressions were introduced in test-extract-word. --- src/basic/extract-word.c | 75 +++++++++++++++++++++++++++--------------------- 1 file changed, 43 insertions(+), 32 deletions(-) (limited to 'src/basic') diff --git a/src/basic/extract-word.c b/src/basic/extract-word.c index b0056a8485..67511a32e1 100644 --- a/src/basic/extract-word.c +++ b/src/basic/extract-word.c @@ -121,45 +121,56 @@ int extract_first_word(const char **p, char **ret, const char *separators, Extra backslash = false; } else if (quote) { /* inside either single or double quotes */ - if (c == 0) { - if (flags & EXTRACT_RELAX) - goto finish_force_terminate; - return -EINVAL; - } else if (c == quote) /* found the end quote */ - quote = 0; - else if (c == '\\') - backslash = true; - else { - if (!GREEDY_REALLOC(s, allocated, sz+2)) - return -ENOMEM; + for (;; (*p) ++, c = **p) { + if (c == 0) { + if (flags & EXTRACT_RELAX) + goto finish_force_terminate; + return -EINVAL; + } else if (c == quote) { /* found the end quote */ + quote = 0; + break; + } else if (c == '\\') { + backslash = true; + break; + } else { + if (!GREEDY_REALLOC(s, allocated, sz+2)) + return -ENOMEM; - s[sz++] = c; + s[sz++] = c; + } } } else if (separator) { - if (c == 0) - goto finish_force_terminate; - if (!strchr(separators, c)) - goto finish; + for (;; (*p) ++, c = **p) { + if (c == 0) + goto finish_force_terminate; + if (!strchr(separators, c)) + goto finish; + } } else { - if (c == 0) - goto finish_force_terminate; - else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES)) - quote = c; - else if (c == '\\') - backslash = true; - else if (strchr(separators, c)) { - if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) { - (*p) ++; - goto finish_force_next; - } - separator = true; - } else { - if (!GREEDY_REALLOC(s, allocated, sz+2)) - return -ENOMEM; + for (;; (*p) ++, c = **p) { + if (c == 0) + goto finish_force_terminate; + else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES)) { + quote = c; + break; + } else if (c == '\\') { + backslash = true; + break; + } else if (strchr(separators, c)) { + if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) { + (*p) ++; + goto finish_force_next; + } + separator = true; + break; + } else { + if (!GREEDY_REALLOC(s, allocated, sz+2)) + return -ENOMEM; - s[sz++] = c; + s[sz++] = c; + } } } } -- cgit v1.2.3-54-g00ecf From 0247447e96f1385cf0c48e3e6b696214fbe36802 Mon Sep 17 00:00:00 2001 From: Filipe Brandenburger Date: Thu, 5 Nov 2015 21:57:26 -0800 Subject: extract-word: Skip coalesced separators in place Just skip them in place, instead of setting separator=true. We only do that in a single place (while finding a separator outside of quote or backslash states) so we don't really need a separate state for it. Tested that no regressions were introduced in test-extract-word. Ran a full `make check` and also installed the binaries on a test system and did not see any issues related to parsing unit files or starting units after a reboot. --- src/basic/extract-word.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'src/basic') diff --git a/src/basic/extract-word.c b/src/basic/extract-word.c index 67511a32e1..ff6d211ef4 100644 --- a/src/basic/extract-word.c +++ b/src/basic/extract-word.c @@ -34,7 +34,6 @@ int extract_first_word(const char **p, char **ret, const char *separators, Extra char quote = 0; /* 0 or ' or " */ bool backslash = false; /* whether we've just seen a backslash */ - bool separator = false; /* whether we've just seen a separator */ assert(p); assert(ret); @@ -140,14 +139,6 @@ int extract_first_word(const char **p, char **ret, const char *separators, Extra } } - } else if (separator) { - for (;; (*p) ++, c = **p) { - if (c == 0) - goto finish_force_terminate; - if (!strchr(separators, c)) - goto finish; - } - } else { for (;; (*p) ++, c = **p) { if (c == 0) @@ -163,8 +154,15 @@ int extract_first_word(const char **p, char **ret, const char *separators, Extra (*p) ++; goto finish_force_next; } - separator = true; - break; + /* Skip additional coalesced separators. */ + for (;; (*p) ++, c = **p) { + if (c == 0) + goto finish_force_terminate; + if (!strchr(separators, c)) + break; + } + goto finish; + } else { if (!GREEDY_REALLOC(s, allocated, sz+2)) return -ENOMEM; -- cgit v1.2.3-54-g00ecf