/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
  This file is part of systemd.
  Copyright 2013 Lennart Poettering
  systemd is free software; you can redistribute it and/or modify it
  under the terms of the GNU Lesser General Public License as published by
  the Free Software Foundation; either version 2.1 of the License, or
  (at your option) any later version.
  systemd is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  Lesser General Public License for more details.
  You should have received a copy of the GNU Lesser General Public License
  along with systemd; If not, see .
***/
#include 
#include "util.h"
#include "xml.h"
enum {
        STATE_NULL,
        STATE_TEXT,
        STATE_TAG,
        STATE_ATTRIBUTE,
};
static void inc_lines(unsigned *line, const char *s, size_t n) {
        const char *p = s;
        if (!line)
                return;
        for (;;) {
                const char *f;
                f = memchr(p, '\n', n);
                if (!f)
                        return;
                n -= (f - p) + 1;
                p = f + 1;
                (*line)++;
        }
}
/* We don't actually do real XML here. We only read a simplistic
 * subset, that is a bit less strict that XML and lacks all the more
 * complex features, like entities, or namespaces. However, we do
 * support some HTML5-like simplifications */
int xml_tokenize(const char **p, char **name, void **state, unsigned *line) {
        const char *c, *e, *b;
        char *ret;
        int t;
        assert(p);
        assert(*p);
        assert(name);
        assert(state);
        t = PTR_TO_INT(*state);
        c = *p;
        if (t == STATE_NULL) {
                if (line)
                        *line = 1;
                t = STATE_TEXT;
        }
        for (;;) {
                if (*c == 0)
                        return XML_END;
                switch (t) {
                case STATE_TEXT: {
                        int x;
                        e = strchrnul(c, '<');
                        if (e > c) {
                                /* More text... */
                                ret = strndup(c, e - c);
                                if (!ret)
                                        return -ENOMEM;
                                inc_lines(line, c, e - c);
                                *name = ret;
                                *p = e;
                                *state = INT_TO_PTR(STATE_TEXT);
                                return XML_TEXT;
                        }
                        assert(*e == '<');
                        b = c + 1;
                        if (startswith(b, "!--")) {
                                /* A comment */
                                e = strstr(b + 3, "-->");
                                if (!e)
                                        return -EINVAL;
                                inc_lines(line, b, e + 3 - b);
                                c = e + 3;
                                continue;
                        }
                        if (*b == '?') {
                                /* Processing instruction */
                                e = strstr(b + 1, "?>");
                                if (!e)
                                        return -EINVAL;
                                inc_lines(line, b, e + 2 - b);
                                c = e + 2;
                                continue;
                        }
                        if (*b == '!') {
                                /* DTD */
                                e = strchr(b + 1, '>');
                                if (!e)
                                        return -EINVAL;
                                inc_lines(line, b, e + 1 - b);
                                c = e + 1;
                                continue;
                        }
                        if (*b == '/') {
                                /* A closing tag */
                                x = XML_TAG_CLOSE;
                                b++;
                        } else
                                x = XML_TAG_OPEN;
                        e = strpbrk(b, WHITESPACE "/>");
                        if (!e)
                                return -EINVAL;
                        ret = strndup(b, e - b);
                        if (!ret)
                                return -ENOMEM;
                        *name = ret;
                        *p = e;
                        *state = INT_TO_PTR(STATE_TAG);
                        return x;
                }
                case STATE_TAG:
                        b = c + strspn(c, WHITESPACE);
                        if (*b == 0)
                                return -EINVAL;
                        inc_lines(line, c, b - c);
                        e = b + strcspn(b, WHITESPACE "=/>");
                        if (e > b) {
                                /* An attribute */
                                ret = strndup(b, e - b);
                                if (!ret)
                                        return -ENOMEM;
                                *name = ret;
                                *p = e;
                                *state = INT_TO_PTR(STATE_ATTRIBUTE);
                                return XML_ATTRIBUTE_NAME;
                        }
                        if (startswith(b, "/>")) {
                                /* An empty tag */
                                *name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */
                                *p = b + 2;
                                *state = INT_TO_PTR(STATE_TEXT);
                                return XML_TAG_CLOSE_EMPTY;
                        }
                        if (*b != '>')
                                return -EINVAL;
                        c = b + 1;
                        t = STATE_TEXT;
                        continue;
                case STATE_ATTRIBUTE:
                        if (*c == '=') {
                                c++;
                                if (*c == '\'' || *c == '\"') {
                                        /* Tag with a quoted value */
                                        e = strchr(c+1, *c);
                                        if (!e)
                                                return -EINVAL;
                                        inc_lines(line, c, e - c);
                                        ret = strndup(c+1, e - c - 1);
                                        if (!ret)
                                                return -ENOMEM;
                                        *name = ret;
                                        *p = e + 1;
                                        *state = INT_TO_PTR(STATE_TAG);
                                        return XML_ATTRIBUTE_VALUE;
                                }
                                /* Tag with a value without quotes */
                                b = strpbrk(c, WHITESPACE ">");
                                if (!b)
                                        b = c;
                                ret = strndup(c, b - c);
                                if (!ret)
                                        return -ENOMEM;
                                *name = ret;
                                *p = b;
                                *state = INT_TO_PTR(STATE_TAG);
                                return XML_ATTRIBUTE_VALUE;
                        }
                        t = STATE_TAG;
                        continue;
                }
        }
        assert_not_reached("Bad state");
}