diff options
author | Lennart Poettering <lennart@poettering.net> | 2013-12-28 03:03:50 +0100 |
---|---|---|
committer | Lennart Poettering <lennart@poettering.net> | 2013-12-28 03:04:29 +0100 |
commit | 08bcebf36eb85f5e75b968de8c648e6614cc534b (patch) | |
tree | f81cd12d77c240ea3ea873d59890da755aae0c12 /src | |
parent | 79ccff06c19c0ee74b9744928bc40f2ce658fde2 (diff) |
shared: add simplistic XML parser for usage in the D-Bus policy language compat parser
Diffstat (limited to 'src')
-rw-r--r-- | src/shared/xml.c | 216 | ||||
-rw-r--r-- | src/shared/xml.h | 34 | ||||
-rw-r--r-- | src/test/test-xml.c | 83 |
3 files changed, 333 insertions, 0 deletions
diff --git a/src/shared/xml.c b/src/shared/xml.c new file mode 100644 index 0000000000..be56b08ce9 --- /dev/null +++ b/src/shared/xml.c @@ -0,0 +1,216 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2013 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <string.h> + +#include "util.h" +#include "xml.h" + +enum { + STATE_TEXT, + STATE_TAG, + STATE_ATTRIBUTE, +}; + +/* We don't actually do real XML here. We only read a simplistic + * subset, that is a bit less strict that XML and lacks all the more + * complex features, like entities, or namespaces. However, we do + * support some HTML5-like simplifications */ + +int xml_tokenize(const char **p, char **name, void **state) { + const char *c, *e, *b; + char *ret; + int t; + + assert(p); + assert(*p); + assert(name); + assert(state); + + t = PTR_TO_INT(*state); + c = *p; + + for (;;) { + if (*c == 0) + return XML_END; + + switch (t) { + + case STATE_TEXT: { + int x; + + e = strchrnul(c, '<'); + if (e > c) { + /* More text... */ + ret = strndup(c, e - c); + if (!ret) + return -ENOMEM; + + *name = ret; + *p = e; + *state = INT_TO_PTR(STATE_TEXT); + + return XML_TEXT; + } + + assert(*e == '<'); + b = c + 1; + + if (startswith(b, "!--")) { + /* A comment */ + e = strstr(b + 3, "-->"); + if (!e) + return -EINVAL; + + c = e + 3; + continue; + } + + if (*b == '?') { + /* Processing instruction */ + + e = strstr(b + 1, "?>"); + if (!e) + return -EINVAL; + + c = e + 2; + continue; + } + + if (*b == '!') { + /* DTD */ + + e = strchr(b + 1, '>'); + if (!e) + return -EINVAL; + + c = e + 1; + continue; + } + + if (*b == '/') { + /* A closing tag */ + x = XML_TAG_CLOSE; + b++; + } else + x = XML_TAG_OPEN; + + e = strpbrk(b, WHITESPACE "/>"); + if (!e) + return -EINVAL; + + ret = strndup(b, e - b); + if (!ret) + return -ENOMEM; + + *name = ret; + *p = e; + *state = INT_TO_PTR(STATE_TAG); + + return x; + } + + case STATE_TAG: + + b = c + strspn(c, WHITESPACE); + if (*b == 0) + return -EINVAL; + + e = b + strcspn(b, WHITESPACE "=/>"); + if (e > b) { + /* An attribute */ + + ret = strndup(b, e - b); + if (!ret) + return -ENOMEM; + + *name = ret; + *p = e; + *state = INT_TO_PTR(STATE_ATTRIBUTE); + + return XML_ATTRIBUTE_NAME; + } + + if (startswith(b, "/>")) { + /* An empty tag */ + + *name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */ + *p = b + 2; + *state = INT_TO_PTR(STATE_TEXT); + + return XML_TAG_CLOSE_EMPTY; + } + + if (*b != '>') + return -EINVAL; + + c = b + 1; + t = STATE_TEXT; + continue; + + case STATE_ATTRIBUTE: + + if (*c == '=') { + c++; + + if (*c == '\'' || *c == '\"') { + /* Tag with a quoted value */ + + e = strchr(c+1, *c); + if (!e) + return -EINVAL; + + ret = strndup(c+1, e - c - 1); + if (!ret) + return -ENOMEM; + + *name = ret; + *p = e + 1; + *state = INT_TO_PTR(STATE_TAG); + + return XML_ATTRIBUTE_VALUE; + + } + + /* Tag with a value without quotes */ + + b = strpbrk(c, WHITESPACE ">"); + if (!b) + b = c; + + ret = strndup(c, b - c); + if (!ret) + return -ENOMEM; + + *name = ret; + *p = b; + *state = INT_TO_PTR(STATE_TAG); + return XML_ATTRIBUTE_VALUE; + } + + t = STATE_TAG; + continue; + } + + } + + assert_not_reached("Bad state"); +} diff --git a/src/shared/xml.h b/src/shared/xml.h new file mode 100644 index 0000000000..18ebbd9e44 --- /dev/null +++ b/src/shared/xml.h @@ -0,0 +1,34 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +#pragma once + +/*** + This file is part of systemd. + + Copyright 2013 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +enum { + XML_END, + XML_TEXT, + XML_TAG_OPEN, + XML_TAG_CLOSE, + XML_TAG_CLOSE_EMPTY, + XML_ATTRIBUTE_NAME, + XML_ATTRIBUTE_VALUE +}; + +int xml_tokenize(const char **p, char **name, void **state); diff --git a/src/test/test-xml.c b/src/test/test-xml.c new file mode 100644 index 0000000000..7a34f143ee --- /dev/null +++ b/src/test/test-xml.c @@ -0,0 +1,83 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2013 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <stdarg.h> + +#include "xml.h" +#include "util.h" + +static void test_one(const char *data, ...) { + void *state = NULL; + va_list ap; + + va_start(ap, data); + + for (;;) { + _cleanup_free_ char *name = NULL; + int t, tt; + const char *nn; + + t = xml_tokenize(&data, &name, &state); + assert_se(t >= 0); + + tt = va_arg(ap, int); + assert_se(tt >= 0); + + assert_se(t == tt); + if (t == XML_END) + break; + + nn = va_arg(ap, const char *); + assert_se(streq_ptr(nn, name)); + } + + va_end(ap); +} + +int main(int argc, char *argv[]) { + + test_one("", XML_END); + + test_one("<foo></foo>", + XML_TAG_OPEN, "foo", + XML_TAG_CLOSE, "foo", + XML_END); + + test_one("<foo waldo=piep meh=\"huhu\"/>", + XML_TAG_OPEN, "foo", + XML_ATTRIBUTE_NAME, "waldo", + XML_ATTRIBUTE_VALUE, "piep", + XML_ATTRIBUTE_NAME, "meh", + XML_ATTRIBUTE_VALUE, "huhu", + XML_TAG_CLOSE_EMPTY, NULL, + XML_END); + + test_one("xxxx\n" + "<foo><?xml foo?> <!-- zzzz --> </foo>", + XML_TEXT, "xxxx\n", + XML_TAG_OPEN, "foo", + XML_TEXT, " ", + XML_TEXT, " ", + XML_TAG_CLOSE, "foo", + XML_END); + + return 0; +} |