summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorKay Sievers <kay@vrfy.org>2012-10-22 16:27:00 +0200
committerKay Sievers <kay@vrfy.org>2012-10-22 16:27:00 +0200
commit955bd501c20ad0bc64104ed706225a824dafc375 (patch)
tree942856e2fd2cd20bf6604056e8cf91056d2d3bbc /src
parentb87377fca3c938303a2fb25229abf8c14814841b (diff)
shared: strbuf - add string de-duplication facility
Diffstat (limited to 'src')
-rw-r--r--src/shared/strbuf.c155
-rw-r--r--src/shared/strbuf.h56
2 files changed, 211 insertions, 0 deletions
diff --git a/src/shared/strbuf.c b/src/shared/strbuf.c
new file mode 100644
index 0000000000..5c858cb68e
--- /dev/null
+++ b/src/shared/strbuf.c
@@ -0,0 +1,155 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+ This file is part of systemd.
+
+ Copyright 2012 Kay Sievers <kay.sievers@vrfy.org>
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "util.h"
+#include "strbuf.h"
+
+struct strbuf *strbuf_new(void) {
+ struct strbuf *str;
+
+ str = new0(struct strbuf, 1);
+ if (!str)
+ return NULL;
+
+ str->buf = new0(char, 1);
+ if (!str->buf)
+ goto err;
+ str->len = 1;
+
+ str->root = new0(struct strbuf_node, 1);
+ if (!str->root)
+ goto err;
+ str->nodes_count = 1;
+ return str;
+err:
+ free(str->buf);
+ free(str->root);
+ free(str);
+ return NULL;
+}
+
+static void strbuf_node_cleanup(struct strbuf_node *node) {
+ size_t i;
+
+ for (i = 0; i < node->children_count; i++)
+ strbuf_node_cleanup(node->children[i].child);
+ free(node->children);
+ free(node);
+}
+
+void strbuf_complete(struct strbuf *str) {
+ if (!str)
+ return;
+ if (str->root)
+ strbuf_node_cleanup(str->root);
+ str->root = NULL;
+}
+
+void strbuf_cleanup(struct strbuf *str) {
+ if (!str)
+ return;
+ if (str->root)
+ strbuf_node_cleanup(str->root);
+ free(str->buf);
+ free(str);
+}
+
+static int strbuf_children_cmp(const void *v1, const void *v2) {
+ const struct strbuf_child_entry *n1 = v1;
+ const struct strbuf_child_entry *n2 = v2;
+
+ return n1->c - n2->c;
+}
+
+ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) {
+ uint8_t c;
+ struct strbuf_node *node;
+ size_t depth;
+ char *buf_new;
+ struct strbuf_child_entry *child;
+ struct strbuf_node *node_child;
+ ssize_t off;
+
+ if (!str->root)
+ return -EINVAL;
+
+ /* search string; start from last character to find possibly matching tails */
+ if (len == 0)
+ return 0;
+ str->in_count++;
+ str->in_len += len;
+
+ node = str->root;
+ c = s[len-1];
+ for (depth = 0; depth <= len; depth++) {
+ struct strbuf_child_entry search;
+
+ /* match against current node */
+ off = node->value_off + node->value_len - len;
+ if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) {
+ str->dedup_len += len;
+ str->dedup_count++;
+ return off;
+ }
+
+ /* lookup child node */
+ c = s[len - 1 - depth];
+ search.c = c;
+ child = bsearch(&search, node->children, node->children_count, sizeof(struct strbuf_child_entry),
+ strbuf_children_cmp);
+ if (!child)
+ break;
+ node = child->child;
+ }
+
+ /* add new string */
+ buf_new = realloc(str->buf, str->len + len+1);
+ if (!buf_new)
+ return -ENOMEM;
+ str->buf = buf_new;
+ off = str->len;
+ memcpy(str->buf + off, s, len);
+ str->len += len;
+ str->buf[str->len++] = '\0';
+
+ /* new node */
+ node_child = new0(struct strbuf_node, 1);
+ if (!node_child)
+ return -ENOMEM;
+ str->nodes_count++;
+ node_child->value_off = off;
+ node_child->value_len = len;
+
+ /* extend array, add new entry, sort for bisection */
+ child = realloc(node->children, (node->children_count + 1) * sizeof(struct strbuf_child_entry));
+ if (!child)
+ return -ENOMEM;
+ node->children = child;
+ node->children[node->children_count].c = c;
+ node->children[node->children_count].child = node_child;
+ node->children_count++;
+ qsort(node->children, node->children_count, sizeof(struct strbuf_child_entry), strbuf_children_cmp);
+
+ return off;
+}
diff --git a/src/shared/strbuf.h b/src/shared/strbuf.h
new file mode 100644
index 0000000000..35f232ddb0
--- /dev/null
+++ b/src/shared/strbuf.h
@@ -0,0 +1,56 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+ This file is part of systemd.
+
+ Copyright 2012 Kay Sievers <kay.sievers@vrfy.org>
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+struct strbuf {
+ char *buf;
+ size_t len;
+ struct strbuf_node *root;
+
+ size_t nodes_count;
+ size_t in_count;
+ size_t in_len;
+ size_t dedup_len;
+ size_t dedup_count;
+};
+
+struct strbuf_node {
+ size_t value_off;
+ size_t value_len;
+
+ struct strbuf_child_entry *children;
+ uint8_t children_count;
+};
+
+struct strbuf_child_entry {
+ uint8_t c;
+ struct strbuf_node *child;
+};
+
+struct strbuf *strbuf_new(void);
+ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len);
+void strbuf_complete(struct strbuf *str);
+void strbuf_cleanup(struct strbuf *str);