/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/

/***
  This file is part of systemd.

  Copyright 2010 Lennart Poettering

  systemd is free software; you can redistribute it and/or modify it
  under the terms of the GNU Lesser General Public License as published by
  the Free Software Foundation; either version 2.1 of the License, or
  (at your option) any later version.

  systemd is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public License
  along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/

#include "escape.h"
#include "utf8.h"
#include "util.h"
#include "extract-word.h"

int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
        _cleanup_free_ char *s = NULL;
        size_t allocated = 0, sz = 0;
        int r;

        char quote = 0;                 /* 0 or ' or " */
        bool backslash = false;         /* whether we've just seen a backslash */
        bool separator = false;         /* whether we've just seen a separator */
        bool start = true;              /* false means we're looking at a value */

        assert(p);
        assert(ret);

        if (!separators)
                separators = WHITESPACE;

        /* Bail early if called after last value or with no input */
        if (!*p)
                goto finish_force_terminate;

        /* Parses the first word of a string, and returns it in
         * *ret. Removes all quotes in the process. When parsing fails
         * (because of an uneven number of quotes or similar), leaves
         * the pointer *p at the first invalid character. */

        for (;;) {
                char c = **p;

                if (start) {
                        if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
                                if (!GREEDY_REALLOC(s, allocated, sz+1))
                                        return -ENOMEM;

                        if (c == 0)
                                goto finish_force_terminate;
                        else if (strchr(separators, c)) {
                                (*p) ++;
                                if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
                                        goto finish_force_next;
                                continue;
                        }

                        /* We found a non-blank character, so we will always
                         * want to return a string (even if it is empty),
                         * allocate it here. */
                        if (!GREEDY_REALLOC(s, allocated, sz+1))
                                return -ENOMEM;

                        start = false;
                }

                if (backslash) {
                        if (!GREEDY_REALLOC(s, allocated, sz+7))
                                return -ENOMEM;

                        if (c == 0) {
                                if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
                                    (!quote || flags & EXTRACT_RELAX)) {
                                        /* If we find an unquoted trailing backslash and we're in
                                         * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
                                         * output.
                                         *
                                         * Unbalanced quotes will only be allowed in EXTRACT_RELAX
                                         * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
                                         */
                                        s[sz++] = '\\';
                                        goto finish_force_terminate;
                                }
                                if (flags & EXTRACT_RELAX)
                                        goto finish_force_terminate;
                                return -EINVAL;
                        }

                        if (flags & EXTRACT_CUNESCAPE) {
                                uint32_t u;

                                r = cunescape_one(*p, (size_t) -1, &c, &u);
                                if (r < 0) {
                                        if (flags & EXTRACT_CUNESCAPE_RELAX) {
                                                s[sz++] = '\\';
                                                s[sz++] = c;
                                                goto end_escape;
                                        }
                                        return -EINVAL;
                                }

                                (*p) += r - 1;

                                if (c != 0)
                                        s[sz++] = c; /* normal explicit char */
                                else
                                        sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */
                        } else
                                s[sz++] = c;

end_escape:
                        backslash = false;

                } else if (quote) {     /* inside either single or double quotes */
                        if (c == 0) {
                                if (flags & EXTRACT_RELAX)
                                        goto finish_force_terminate;
                                return -EINVAL;
                        } else if (c == quote)          /* found the end quote */
                                quote = 0;
                        else if (c == '\\')
                                backslash = true;
                        else {
                                if (!GREEDY_REALLOC(s, allocated, sz+2))
                                        return -ENOMEM;

                                s[sz++] = c;
                        }

                } else if (separator) {
                        if (c == 0)
                                goto finish_force_terminate;
                        if (!strchr(separators, c))
                                goto finish;

                } else {
                        if (c == 0)
                                goto finish_force_terminate;
                        else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES))
                                quote = c;
                        else if (c == '\\')
                                backslash = true;
                        else if (strchr(separators, c)) {
                                if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
                                        (*p) ++;
                                        goto finish_force_next;
                                }
                                separator = true;
                        } else {
                                if (!GREEDY_REALLOC(s, allocated, sz+2))
                                        return -ENOMEM;

                                s[sz++] = c;
                        }
                }

                (*p) ++;
        }

finish_force_terminate:
        *p = NULL;
finish:
        if (!s) {
                *p = NULL;
                *ret = NULL;
                return 0;
        }

finish_force_next:
        s[sz] = 0;
        *ret = s;
        s = NULL;

        return 1;
}

int extract_first_word_and_warn(
                const char **p,
                char **ret,
                const char *separators,
                ExtractFlags flags,
                const char *unit,
                const char *filename,
                unsigned line,
                const char *rvalue) {

        /* Try to unquote it, if it fails, warn about it and try again
         * but this time using EXTRACT_CUNESCAPE_RELAX to keep the
         * backslashes verbatim in invalid escape sequences. */

        const char *save;
        int r;

        save = *p;
        r = extract_first_word(p, ret, separators, flags);
        if (r >= 0)
                return r;

        if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {

                /* Retry it with EXTRACT_CUNESCAPE_RELAX. */
                *p = save;
                r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
                if (r >= 0) {
                        /* It worked this time, hence it must have been an invalid escape sequence we could correct. */
                        log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Invalid escape sequences in line, correcting: \"%s\"", rvalue);
                        return r;
                }

                /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
                if (r == -EINVAL)
                        return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
        }

        /* Can be any error, report it */
        return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
}

int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) {
        va_list ap;
        char **l;
        int n = 0, i, c, r;

        /* Parses a number of words from a string, stripping any
         * quotes if necessary. */

        assert(p);

        /* Count how many words are expected */
        va_start(ap, flags);
        for (;;) {
                if (!va_arg(ap, char **))
                        break;
                n++;
        }
        va_end(ap);

        if (n <= 0)
                return 0;

        /* Read all words into a temporary array */
        l = newa0(char*, n);
        for (c = 0; c < n; c++) {

                r = extract_first_word(p, &l[c], separators, flags);
                if (r < 0) {
                        int j;

                        for (j = 0; j < c; j++)
                                free(l[j]);

                        return r;
                }

                if (r == 0)
                        break;
        }

        /* If we managed to parse all words, return them in the passed
         * in parameters */
        va_start(ap, flags);
        for (i = 0; i < n; i++) {
                char **v;

                v = va_arg(ap, char **);
                assert(v);

                *v = l[i];
        }
        va_end(ap);

        return c;
}