summaryrefslogtreecommitdiff
path: root/src/shared/hashmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/shared/hashmap.c')
-rw-r--r--src/shared/hashmap.c1795
1 files changed, 1565 insertions, 230 deletions
diff --git a/src/shared/hashmap.c b/src/shared/hashmap.c
index fa4edf11c6..2bc3b38739 100644
--- a/src/shared/hashmap.c
+++ b/src/shared/hashmap.c
@@ -1,7 +1,10 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
/***
- This file is part of eudev, forked from systemd.
+ This file is part of systemd.
Copyright 2010 Lennart Poettering
+ Copyright 2014 Michal Schmidt
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
@@ -24,38 +27,250 @@
#include "util.h"
#include "hashmap.h"
+#include "set.h"
#include "macro.h"
#include "siphash24.h"
+#include "strv.h"
+#include "list.h"
#include "mempool.h"
-#define INITIAL_N_BUCKETS 31
-
-struct hashmap_entry {
+/*
+ * Implementation of hashmaps.
+ * Addressing: open
+ * - uses less RAM compared to closed addressing (chaining), because
+ * our entries are small (especially in Sets, which tend to contain
+ * the majority of entries in systemd).
+ * Collision resolution: Robin Hood
+ * - tends to equalize displacement of entries from their optimal buckets.
+ * Probe sequence: linear
+ * - though theoretically worse than random probing/uniform hashing/double
+ * hashing, it is good for cache locality.
+ *
+ * References:
+ * Celis, P. 1986. Robin Hood Hashing.
+ * Ph.D. Dissertation. University of Waterloo, Waterloo, Ont., Canada, Canada.
+ * https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf
+ * - The results are derived for random probing. Suggests deletion with
+ * tombstones and two mean-centered search methods. None of that works
+ * well for linear probing.
+ *
+ * Janson, S. 2005. Individual displacements for linear probing hashing with different insertion policies.
+ * ACM Trans. Algorithms 1, 2 (October 2005), 177-213.
+ * DOI=10.1145/1103963.1103964 http://doi.acm.org/10.1145/1103963.1103964
+ * http://www.math.uu.se/~svante/papers/sj157.pdf
+ * - Applies to Robin Hood with linear probing. Contains remarks on
+ * the unsuitability of mean-centered search with linear probing.
+ *
+ * Viola, A. 2005. Exact distribution of individual displacements in linear probing hashing.
+ * ACM Trans. Algorithms 1, 2 (October 2005), 214-242.
+ * DOI=10.1145/1103963.1103965 http://doi.acm.org/10.1145/1103963.1103965
+ * - Similar to Janson. Note that Viola writes about C_{m,n} (number of probes
+ * in a successful search), and Janson writes about displacement. C = d + 1.
+ *
+ * Goossaert, E. 2013. Robin Hood hashing: backward shift deletion.
+ * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
+ * - Explanation of backward shift deletion with pictures.
+ *
+ * Khuong, P. 2013. The Other Robin Hood Hashing.
+ * http://www.pvk.ca/Blog/2013/11/26/the-other-robin-hood-hashing/
+ * - Short summary of random vs. linear probing, and tombstones vs. backward shift.
+ */
+
+/*
+ * XXX Ideas for improvement:
+ * For unordered hashmaps, randomize iteration order, similarly to Perl:
+ * http://blog.booking.com/hardening-perls-hash-function.html
+ */
+
+/* INV_KEEP_FREE = 1 / (1 - max_load_factor)
+ * e.g. 1 / (1 - 0.8) = 5 ... keep one fifth of the buckets free. */
+#define INV_KEEP_FREE 5U
+
+/* Fields common to entries of all hashmap/set types */
+struct hashmap_base_entry {
const void *key;
+};
+
+/* Entry types for specific hashmap/set types
+ * hashmap_base_entry must be at the beginning of each entry struct. */
+
+struct plain_hashmap_entry {
+ struct hashmap_base_entry b;
void *value;
- struct hashmap_entry *bucket_next, *bucket_previous;
- struct hashmap_entry *iterate_next, *iterate_previous;
};
-struct Hashmap {
- const struct hash_ops *hash_ops;
+struct ordered_hashmap_entry {
+ struct plain_hashmap_entry p;
+ unsigned iterate_next, iterate_previous;
+};
+
+struct set_entry {
+ struct hashmap_base_entry b;
+};
+
+/* In several functions it is advantageous to have the hash table extended
+ * virtually by a couple of additional buckets. We reserve special index values
+ * for these "swap" buckets. */
+#define _IDX_SWAP_BEGIN (UINT_MAX - 3)
+#define IDX_PUT (_IDX_SWAP_BEGIN + 0)
+#define IDX_TMP (_IDX_SWAP_BEGIN + 1)
+#define _IDX_SWAP_END (_IDX_SWAP_BEGIN + 2)
+
+#define IDX_FIRST (UINT_MAX - 1) /* special index for freshly initialized iterators */
+#define IDX_NIL UINT_MAX /* special index value meaning "none" or "end" */
+
+assert_cc(IDX_FIRST == _IDX_SWAP_END);
+assert_cc(IDX_FIRST == _IDX_ITERATOR_FIRST);
+
+/* Storage space for the "swap" buckets.
+ * All entry types can fit into a ordered_hashmap_entry. */
+struct swap_entries {
+ struct ordered_hashmap_entry e[_IDX_SWAP_END - _IDX_SWAP_BEGIN];
+};
+
+/* Distance from Initial Bucket */
+typedef uint8_t dib_raw_t;
+#define DIB_RAW_OVERFLOW ((dib_raw_t)0xfdU) /* indicates DIB value is greater than representable */
+#define DIB_RAW_REHASH ((dib_raw_t)0xfeU) /* entry yet to be rehashed during in-place resize */
+#define DIB_RAW_FREE ((dib_raw_t)0xffU) /* a free bucket */
+#define DIB_RAW_INIT ((char)DIB_RAW_FREE) /* a byte to memset a DIB store with when initializing */
+
+#define DIB_FREE UINT_MAX
+
+#ifdef ENABLE_HASHMAP_DEBUG
+struct hashmap_debug_info {
+ LIST_FIELDS(struct hashmap_debug_info, debug_list);
+ unsigned max_entries; /* high watermark of n_entries */
+
+ /* who allocated this hashmap */
+ int line;
+ const char *file;
+ const char *func;
+
+ /* fields to detect modification while iterating */
+ unsigned put_count; /* counts puts into the hashmap */
+ unsigned rem_count; /* counts removals from hashmap */
+ unsigned last_rem_idx; /* remembers last removal index */
+};
- struct hashmap_entry *iterate_list_head, *iterate_list_tail;
+/* Tracks all existing hashmaps. Get at it from gdb. See sd_dump_hashmaps.py */
+static LIST_HEAD(struct hashmap_debug_info, hashmap_debug_list);
+
+#define HASHMAP_DEBUG_FIELDS struct hashmap_debug_info debug;
+
+#else /* !ENABLE_HASHMAP_DEBUG */
+#define HASHMAP_DEBUG_FIELDS
+#endif /* ENABLE_HASHMAP_DEBUG */
+
+enum HashmapType {
+ HASHMAP_TYPE_PLAIN,
+ HASHMAP_TYPE_ORDERED,
+ HASHMAP_TYPE_SET,
+ _HASHMAP_TYPE_MAX
+};
+
+struct _packed_ indirect_storage {
+ char *storage; /* where buckets and DIBs are stored */
+ uint8_t hash_key[HASH_KEY_SIZE]; /* hash key; changes during resize */
+
+ unsigned n_entries; /* number of stored entries */
+ unsigned n_buckets; /* number of buckets */
+
+ unsigned idx_lowest_entry; /* Index below which all buckets are free.
+ Makes "while(hashmap_steal_first())" loops
+ O(n) instead of O(n^2) for unordered hashmaps. */
+ uint8_t _pad[3]; /* padding for the whole HashmapBase */
+ /* The bitfields in HashmapBase complete the alignment of the whole thing. */
+};
+
+struct direct_storage {
+ /* This gives us 39 bytes on 64bit, or 35 bytes on 32bit.
+ * That's room for 4 set_entries + 4 DIB bytes + 3 unused bytes on 64bit,
+ * or 7 set_entries + 7 DIB bytes + 0 unused bytes on 32bit. */
+ char storage[sizeof(struct indirect_storage)];
+};
+
+#define DIRECT_BUCKETS(entry_t) \
+ (sizeof(struct direct_storage) / (sizeof(entry_t) + sizeof(dib_raw_t)))
+
+/* We should be able to store at least one entry directly. */
+assert_cc(DIRECT_BUCKETS(struct ordered_hashmap_entry) >= 1);
+
+/* We have 3 bits for n_direct_entries. */
+assert_cc(DIRECT_BUCKETS(struct set_entry) < (1 << 3));
+
+/* Hashmaps with directly stored entries all use this shared hash key.
+ * It's no big deal if the key is guessed, because there can be only
+ * a handful of directly stored entries in a hashmap. When a hashmap
+ * outgrows direct storage, it gets its own key for indirect storage. */
+static uint8_t shared_hash_key[HASH_KEY_SIZE];
+static bool shared_hash_key_initialized;
+
+/* Fields that all hashmap/set types must have */
+struct HashmapBase {
+ const struct hash_ops *hash_ops; /* hash and compare ops to use */
+
+ union _packed_ {
+ struct indirect_storage indirect; /* if has_indirect */
+ struct direct_storage direct; /* if !has_indirect */
+ };
+
+ enum HashmapType type:2; /* HASHMAP_TYPE_* */
+ bool has_indirect:1; /* whether indirect storage is used */
+ unsigned n_direct_entries:3; /* Number of entries in direct storage.
+ * Only valid if !has_indirect. */
+ bool from_pool:1; /* whether was allocated from mempool */
+ HASHMAP_DEBUG_FIELDS /* optional hashmap_debug_info */
+};
+
+/* Specific hash types
+ * HashmapBase must be at the beginning of each hashmap struct. */
+
+struct Hashmap {
+ struct HashmapBase b;
+};
- struct hashmap_entry ** buckets;
- unsigned n_buckets, n_entries;
+struct OrderedHashmap {
+ struct HashmapBase b;
+ unsigned iterate_list_head, iterate_list_tail;
+};
- uint8_t hash_key[HASH_KEY_SIZE];
- bool from_pool:1;
+struct Set {
+ struct HashmapBase b;
};
-struct hashmap_tile {
- Hashmap h;
- struct hashmap_entry *initial_buckets[INITIAL_N_BUCKETS];
+DEFINE_MEMPOOL(hashmap_pool, Hashmap, 8);
+DEFINE_MEMPOOL(ordered_hashmap_pool, OrderedHashmap, 8);
+/* No need for a separate Set pool */
+assert_cc(sizeof(Hashmap) == sizeof(Set));
+
+struct hashmap_type_info {
+ size_t head_size;
+ size_t entry_size;
+ struct mempool *mempool;
+ unsigned n_direct_buckets;
};
-static DEFINE_MEMPOOL(hashmap_pool, struct hashmap_tile, 8);
-static DEFINE_MEMPOOL(hashmap_entry_pool, struct hashmap_entry, 64);
+static const struct hashmap_type_info hashmap_type_info[_HASHMAP_TYPE_MAX] = {
+ [HASHMAP_TYPE_PLAIN] = {
+ .head_size = sizeof(Hashmap),
+ .entry_size = sizeof(struct plain_hashmap_entry),
+ .mempool = &hashmap_pool,
+ .n_direct_buckets = DIRECT_BUCKETS(struct plain_hashmap_entry),
+ },
+ [HASHMAP_TYPE_ORDERED] = {
+ .head_size = sizeof(OrderedHashmap),
+ .entry_size = sizeof(struct ordered_hashmap_entry),
+ .mempool = &ordered_hashmap_pool,
+ .n_direct_buckets = DIRECT_BUCKETS(struct ordered_hashmap_entry),
+ },
+ [HASHMAP_TYPE_SET] = {
+ .head_size = sizeof(Set),
+ .entry_size = sizeof(struct set_entry),
+ .mempool = &hashmap_pool,
+ .n_direct_buckets = DIRECT_BUCKETS(struct set_entry),
+ },
+};
unsigned long string_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) {
uint64_t u;
@@ -87,9 +302,82 @@ const struct hash_ops trivial_hash_ops = {
.compare = trivial_compare_func
};
-static unsigned bucket_hash(Hashmap *h, const void *p) {
- return (unsigned) (h->hash_ops->hash(p, h->hash_key) % h->n_buckets);
+unsigned long uint64_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) {
+ uint64_t u;
+ siphash24((uint8_t*) &u, p, sizeof(uint64_t), hash_key);
+ return (unsigned long) u;
+}
+
+int uint64_compare_func(const void *_a, const void *_b) {
+ uint64_t a, b;
+ a = *(const uint64_t*) _a;
+ b = *(const uint64_t*) _b;
+ return a < b ? -1 : (a > b ? 1 : 0);
+}
+
+const struct hash_ops uint64_hash_ops = {
+ .hash = uint64_hash_func,
+ .compare = uint64_compare_func
+};
+
+#if SIZEOF_DEV_T != 8
+unsigned long devt_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) {
+ uint64_t u;
+ siphash24((uint8_t*) &u, p, sizeof(dev_t), hash_key);
+ return (unsigned long) u;
+}
+
+int devt_compare_func(const void *_a, const void *_b) {
+ dev_t a, b;
+ a = *(const dev_t*) _a;
+ b = *(const dev_t*) _b;
+ return a < b ? -1 : (a > b ? 1 : 0);
+}
+
+const struct hash_ops devt_hash_ops = {
+ .hash = devt_hash_func,
+ .compare = devt_compare_func
+};
+#endif
+
+static unsigned n_buckets(HashmapBase *h) {
+ return h->has_indirect ? h->indirect.n_buckets
+ : hashmap_type_info[h->type].n_direct_buckets;
+}
+
+static unsigned n_entries(HashmapBase *h) {
+ return h->has_indirect ? h->indirect.n_entries
+ : h->n_direct_entries;
+}
+
+static void n_entries_inc(HashmapBase *h) {
+ if (h->has_indirect)
+ h->indirect.n_entries++;
+ else
+ h->n_direct_entries++;
+}
+
+static void n_entries_dec(HashmapBase *h) {
+ if (h->has_indirect)
+ h->indirect.n_entries--;
+ else
+ h->n_direct_entries--;
+}
+
+static char *storage_ptr(HashmapBase *h) {
+ return h->has_indirect ? h->indirect.storage
+ : h->direct.storage;
+}
+
+static uint8_t *hash_key(HashmapBase *h) {
+ return h->has_indirect ? h->indirect.hash_key
+ : shared_hash_key;
+}
+
+static unsigned base_bucket_hash(HashmapBase *h, const void *p) {
+ return (unsigned) (h->hash_ops->hash(p, hash_key(h)) % n_buckets(h));
}
+#define bucket_hash(h, p) base_bucket_hash(HASHMAP_BASE(h), p)
static void get_hash_key(uint8_t hash_key[HASH_KEY_SIZE], bool reuse_is_ok) {
static uint8_t current[HASH_KEY_SIZE];
@@ -110,131 +398,484 @@ static void get_hash_key(uint8_t hash_key[HASH_KEY_SIZE], bool reuse_is_ok) {
memcpy(hash_key, current, sizeof(current));
}
-Hashmap *hashmap_new(const struct hash_ops *hash_ops) {
- bool b;
- struct hashmap_tile *ht;
- Hashmap *h;
+static struct hashmap_base_entry *bucket_at(HashmapBase *h, unsigned idx) {
+ return (struct hashmap_base_entry*)
+ (storage_ptr(h) + idx * hashmap_type_info[h->type].entry_size);
+}
+
+static struct plain_hashmap_entry *plain_bucket_at(Hashmap *h, unsigned idx) {
+ return (struct plain_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx);
+}
+
+static struct ordered_hashmap_entry *ordered_bucket_at(OrderedHashmap *h, unsigned idx) {
+ return (struct ordered_hashmap_entry*) bucket_at(HASHMAP_BASE(h), idx);
+}
+
+static struct set_entry *set_bucket_at(Set *h, unsigned idx) {
+ return (struct set_entry*) bucket_at(HASHMAP_BASE(h), idx);
+}
+
+static struct ordered_hashmap_entry *bucket_at_swap(struct swap_entries *swap, unsigned idx) {
+ return &swap->e[idx - _IDX_SWAP_BEGIN];
+}
- b = is_main_thread();
+/* Returns a pointer to the bucket at index idx.
+ * Understands real indexes and swap indexes, hence "_virtual". */
+static struct hashmap_base_entry *bucket_at_virtual(HashmapBase *h, struct swap_entries *swap,
+ unsigned idx) {
+ if (idx < _IDX_SWAP_BEGIN)
+ return bucket_at(h, idx);
- if (b) {
- ht = mempool_alloc_tile(&hashmap_pool);
- if (!ht)
- return NULL;
+ if (idx < _IDX_SWAP_END)
+ return &bucket_at_swap(swap, idx)->p.b;
- memzero(ht, sizeof(struct hashmap_tile));
- } else {
- ht = malloc0(sizeof(struct hashmap_tile));
+ assert_not_reached("Invalid index");
+}
+
+static dib_raw_t *dib_raw_ptr(HashmapBase *h) {
+ return (dib_raw_t*)
+ (storage_ptr(h) + hashmap_type_info[h->type].entry_size * n_buckets(h));
+}
+
+static unsigned bucket_distance(HashmapBase *h, unsigned idx, unsigned from) {
+ return idx >= from ? idx - from
+ : n_buckets(h) + idx - from;
+}
+
+static unsigned bucket_calculate_dib(HashmapBase *h, unsigned idx, dib_raw_t raw_dib) {
+ unsigned initial_bucket;
+
+ if (raw_dib == DIB_RAW_FREE)
+ return DIB_FREE;
+
+ if (_likely_(raw_dib < DIB_RAW_OVERFLOW))
+ return raw_dib;
+
+ /*
+ * Having an overflow DIB value is very unlikely. The hash function
+ * would have to be bad. For example, in a table of size 2^24 filled
+ * to load factor 0.9 the maximum observed DIB is only about 60.
+ * In theory (assuming I used Maxima correctly), for an infinite size
+ * hash table with load factor 0.8 the probability of a given entry
+ * having DIB > 40 is 1.9e-8.
+ * This returns the correct DIB value by recomputing the hash value in
+ * the unlikely case. XXX Hitting this case could be a hint to rehash.
+ */
+ initial_bucket = bucket_hash(h, bucket_at(h, idx)->key);
+ return bucket_distance(h, idx, initial_bucket);
+}
+
+static void bucket_set_dib(HashmapBase *h, unsigned idx, unsigned dib) {
+ dib_raw_ptr(h)[idx] = dib != DIB_FREE ? MIN(dib, DIB_RAW_OVERFLOW) : DIB_RAW_FREE;
+}
+
+static unsigned skip_free_buckets(HashmapBase *h, unsigned idx) {
+ dib_raw_t *dibs;
+
+ dibs = dib_raw_ptr(h);
+
+ for ( ; idx < n_buckets(h); idx++)
+ if (dibs[idx] != DIB_RAW_FREE)
+ return idx;
+
+ return IDX_NIL;
+}
+
+static void bucket_mark_free(HashmapBase *h, unsigned idx) {
+ memset(bucket_at(h, idx), 0, hashmap_type_info[h->type].entry_size);
+ bucket_set_dib(h, idx, DIB_FREE);
+}
- if (!ht)
- return NULL;
+static void bucket_move_entry(HashmapBase *h, struct swap_entries *swap,
+ unsigned from, unsigned to) {
+ struct hashmap_base_entry *e_from, *e_to;
+
+ assert(from != to);
+
+ e_from = bucket_at_virtual(h, swap, from);
+ e_to = bucket_at_virtual(h, swap, to);
+
+ memcpy(e_to, e_from, hashmap_type_info[h->type].entry_size);
+
+ if (h->type == HASHMAP_TYPE_ORDERED) {
+ OrderedHashmap *lh = (OrderedHashmap*) h;
+ struct ordered_hashmap_entry *le, *le_to;
+
+ le_to = (struct ordered_hashmap_entry*) e_to;
+
+ if (le_to->iterate_next != IDX_NIL) {
+ le = (struct ordered_hashmap_entry*)
+ bucket_at_virtual(h, swap, le_to->iterate_next);
+ le->iterate_previous = to;
+ }
+
+ if (le_to->iterate_previous != IDX_NIL) {
+ le = (struct ordered_hashmap_entry*)
+ bucket_at_virtual(h, swap, le_to->iterate_previous);
+ le->iterate_next = to;
+ }
+
+ if (lh->iterate_list_head == from)
+ lh->iterate_list_head = to;
+ if (lh->iterate_list_tail == from)
+ lh->iterate_list_tail = to;
}
+}
- h = &ht->h;
- h->hash_ops = hash_ops ? hash_ops : &trivial_hash_ops;
+static unsigned next_idx(HashmapBase *h, unsigned idx) {
+ return (idx + 1U) % n_buckets(h);
+}
- h->n_buckets = INITIAL_N_BUCKETS;
- h->n_entries = 0;
- h->iterate_list_head = h->iterate_list_tail = NULL;
+static unsigned prev_idx(HashmapBase *h, unsigned idx) {
+ return (n_buckets(h) + idx - 1U) % n_buckets(h);
+}
- h->buckets = ht->initial_buckets;
+static void *entry_value(HashmapBase *h, struct hashmap_base_entry *e) {
+ switch (h->type) {
- h->from_pool = b;
+ case HASHMAP_TYPE_PLAIN:
+ case HASHMAP_TYPE_ORDERED:
+ return ((struct plain_hashmap_entry*)e)->value;
- get_hash_key(h->hash_key, true);
+ case HASHMAP_TYPE_SET:
+ return (void*) e->key;
- return h;
+ default:
+ assert_not_reached("Unknown hashmap type");
+ }
}
-static void link_entry(Hashmap *h, struct hashmap_entry *e, unsigned hash) {
+static void base_remove_entry(HashmapBase *h, unsigned idx) {
+ unsigned left, right, prev, dib;
+ dib_raw_t raw_dib, *dibs;
+
+ dibs = dib_raw_ptr(h);
+ assert(dibs[idx] != DIB_RAW_FREE);
+
+#ifdef ENABLE_HASHMAP_DEBUG
+ h->debug.rem_count++;
+ h->debug.last_rem_idx = idx;
+#endif
+
+ left = idx;
+ /* Find the stop bucket ("right"). It is either free or has DIB == 0. */
+ for (right = next_idx(h, left); ; right = next_idx(h, right)) {
+ raw_dib = dibs[right];
+ if (raw_dib == 0 || raw_dib == DIB_RAW_FREE)
+ break;
+
+ /* The buckets are not supposed to be all occupied and with DIB > 0.
+ * That would mean we could make everyone better off by shifting them
+ * backward. This scenario is impossible. */
+ assert(left != right);
+ }
+
+ if (h->type == HASHMAP_TYPE_ORDERED) {
+ OrderedHashmap *lh = (OrderedHashmap*) h;
+ struct ordered_hashmap_entry *le = ordered_bucket_at(lh, idx);
+
+ if (le->iterate_next != IDX_NIL)
+ ordered_bucket_at(lh, le->iterate_next)->iterate_previous = le->iterate_previous;
+ else
+ lh->iterate_list_tail = le->iterate_previous;
+
+ if (le->iterate_previous != IDX_NIL)
+ ordered_bucket_at(lh, le->iterate_previous)->iterate_next = le->iterate_next;
+ else
+ lh->iterate_list_head = le->iterate_next;
+ }
+
+ /* Now shift all buckets in the interval (left, right) one step backwards */
+ for (prev = left, left = next_idx(h, left); left != right;
+ prev = left, left = next_idx(h, left)) {
+ dib = bucket_calculate_dib(h, left, dibs[left]);
+ assert(dib != 0);
+ bucket_move_entry(h, NULL, left, prev);
+ bucket_set_dib(h, prev, dib - 1);
+ }
+
+ bucket_mark_free(h, prev);
+ n_entries_dec(h);
+}
+#define remove_entry(h, idx) base_remove_entry(HASHMAP_BASE(h), idx)
+
+static unsigned hashmap_iterate_in_insertion_order(OrderedHashmap *h, Iterator *i) {
+ struct ordered_hashmap_entry *e;
+ unsigned idx;
+
assert(h);
- assert(e);
-
- /* Insert into hash table */
- e->bucket_next = h->buckets[hash];
- e->bucket_previous = NULL;
- if (h->buckets[hash])
- h->buckets[hash]->bucket_previous = e;
- h->buckets[hash] = e;
-
- /* Insert into iteration list */
- e->iterate_previous = h->iterate_list_tail;
- e->iterate_next = NULL;
- if (h->iterate_list_tail) {
- assert(h->iterate_list_head);
- h->iterate_list_tail->iterate_next = e;
+ assert(i);
+
+ if (i->idx == IDX_NIL)
+ goto at_end;
+
+ if (i->idx == IDX_FIRST && h->iterate_list_head == IDX_NIL)
+ goto at_end;
+
+ if (i->idx == IDX_FIRST) {
+ idx = h->iterate_list_head;
+ e = ordered_bucket_at(h, idx);
} else {
- assert(!h->iterate_list_head);
- h->iterate_list_head = e;
+ idx = i->idx;
+ e = ordered_bucket_at(h, idx);
+ /*
+ * We allow removing the current entry while iterating, but removal may cause
+ * a backward shift. The next entry may thus move one bucket to the left.
+ * To detect when it happens, we remember the key pointer of the entry we were
+ * going to iterate next. If it does not match, there was a backward shift.
+ */
+ if (e->p.b.key != i->next_key) {
+ idx = prev_idx(HASHMAP_BASE(h), idx);
+ e = ordered_bucket_at(h, idx);
+ }
+ assert(e->p.b.key == i->next_key);
}
- h->iterate_list_tail = e;
- h->n_entries++;
- assert(h->n_entries >= 1);
+#ifdef ENABLE_HASHMAP_DEBUG
+ i->prev_idx = idx;
+#endif
+
+ if (e->iterate_next != IDX_NIL) {
+ struct ordered_hashmap_entry *n;
+ i->idx = e->iterate_next;
+ n = ordered_bucket_at(h, i->idx);
+ i->next_key = n->p.b.key;
+ } else
+ i->idx = IDX_NIL;
+
+ return idx;
+
+at_end:
+ i->idx = IDX_NIL;
+ return IDX_NIL;
}
-static void unlink_entry(Hashmap *h, struct hashmap_entry *e, unsigned hash) {
+static unsigned hashmap_iterate_in_internal_order(HashmapBase *h, Iterator *i) {
+ unsigned idx;
+
assert(h);
- assert(e);
+ assert(i);
- /* Remove from iteration list */
- if (e->iterate_next)
- e->iterate_next->iterate_previous = e->iterate_previous;
- else
- h->iterate_list_tail = e->iterate_previous;
+ if (i->idx == IDX_NIL)
+ goto at_end;
- if (e->iterate_previous)
- e->iterate_previous->iterate_next = e->iterate_next;
- else
- h->iterate_list_head = e->iterate_next;
+ if (i->idx == IDX_FIRST) {
+ /* fast forward to the first occupied bucket */
+ if (h->has_indirect) {
+ i->idx = skip_free_buckets(h, h->indirect.idx_lowest_entry);
+ h->indirect.idx_lowest_entry = i->idx;
+ } else
+ i->idx = skip_free_buckets(h, 0);
+
+ if (i->idx == IDX_NIL)
+ goto at_end;
+ } else {
+ struct hashmap_base_entry *e;
+
+ assert(i->idx > 0);
- /* Remove from hash table bucket list */
- if (e->bucket_next)
- e->bucket_next->bucket_previous = e->bucket_previous;
+ e = bucket_at(h, i->idx);
+ /*
+ * We allow removing the current entry while iterating, but removal may cause
+ * a backward shift. The next entry may thus move one bucket to the left.
+ * To detect when it happens, we remember the key pointer of the entry we were
+ * going to iterate next. If it does not match, there was a backward shift.
+ */
+ if (e->key != i->next_key)
+ e = bucket_at(h, --i->idx);
+
+ assert(e->key == i->next_key);
+ }
- if (e->bucket_previous)
- e->bucket_previous->bucket_next = e->bucket_next;
+ idx = i->idx;
+#ifdef ENABLE_HASHMAP_DEBUG
+ i->prev_idx = idx;
+#endif
+
+ i->idx = skip_free_buckets(h, i->idx + 1);
+ if (i->idx != IDX_NIL)
+ i->next_key = bucket_at(h, i->idx)->key;
else
- h->buckets[hash] = e->bucket_next;
+ i->idx = IDX_NIL;
+
+ return idx;
- assert(h->n_entries >= 1);
- h->n_entries--;
+at_end:
+ i->idx = IDX_NIL;
+ return IDX_NIL;
}
-static void remove_entry(Hashmap *h, struct hashmap_entry *e) {
- unsigned hash;
+static unsigned hashmap_iterate_entry(HashmapBase *h, Iterator *i) {
+ if (!h) {
+ i->idx = IDX_NIL;
+ return IDX_NIL;
+ }
- assert(h);
- assert(e);
+#ifdef ENABLE_HASHMAP_DEBUG
+ if (i->idx == IDX_FIRST) {
+ i->put_count = h->debug.put_count;
+ i->rem_count = h->debug.rem_count;
+ } else {
+ /* While iterating, must not add any new entries */
+ assert(i->put_count == h->debug.put_count);
+ /* ... or remove entries other than the current one */
+ assert(i->rem_count == h->debug.rem_count ||
+ (i->rem_count == h->debug.rem_count - 1 &&
+ i->prev_idx == h->debug.last_rem_idx));
+ /* Reset our removals counter */
+ i->rem_count = h->debug.rem_count;
+ }
+#endif
- hash = bucket_hash(h, e->key);
- unlink_entry(h, e, hash);
+ return h->type == HASHMAP_TYPE_ORDERED ? hashmap_iterate_in_insertion_order((OrderedHashmap*) h, i)
+ : hashmap_iterate_in_internal_order(h, i);
+}
- if (h->from_pool)
- mempool_free_tile(&hashmap_entry_pool, e);
- else
- free(e);
+void *internal_hashmap_iterate(HashmapBase *h, Iterator *i, const void **key) {
+ struct hashmap_base_entry *e;
+ void *data;
+ unsigned idx;
+
+ idx = hashmap_iterate_entry(h, i);
+ if (idx == IDX_NIL) {
+ if (key)
+ *key = NULL;
+
+ return NULL;
+ }
+
+ e = bucket_at(h, idx);
+ data = entry_value(h, e);
+ if (key)
+ *key = e->key;
+
+ return data;
}
-void hashmap_free(Hashmap*h) {
+void *set_iterate(Set *s, Iterator *i) {
+ return internal_hashmap_iterate(HASHMAP_BASE(s), i, NULL);
+}
- /* Free the hashmap, but nothing in it */
+#define HASHMAP_FOREACH_IDX(idx, h, i) \
+ for ((i) = ITERATOR_FIRST, (idx) = hashmap_iterate_entry((h), &(i)); \
+ (idx != IDX_NIL); \
+ (idx) = hashmap_iterate_entry((h), &(i)))
+
+static void reset_direct_storage(HashmapBase *h) {
+ const struct hashmap_type_info *hi = &hashmap_type_info[h->type];
+ void *p;
+
+ assert(!h->has_indirect);
+
+ p = mempset(h->direct.storage, 0, hi->entry_size * hi->n_direct_buckets);
+ memset(p, DIB_RAW_INIT, sizeof(dib_raw_t) * hi->n_direct_buckets);
+}
+
+static struct HashmapBase *hashmap_base_new(const struct hash_ops *hash_ops, enum HashmapType type HASHMAP_DEBUG_PARAMS) {
+ HashmapBase *h;
+ const struct hashmap_type_info *hi = &hashmap_type_info[type];
+ bool use_pool;
+
+ use_pool = is_main_thread();
+
+ h = use_pool ? mempool_alloc0_tile(hi->mempool) : malloc0(hi->head_size);
if (!h)
- return;
+ return NULL;
+
+ h->type = type;
+ h->from_pool = use_pool;
+ h->hash_ops = hash_ops ? hash_ops : &trivial_hash_ops;
- hashmap_clear(h);
+ if (type == HASHMAP_TYPE_ORDERED) {
+ OrderedHashmap *lh = (OrderedHashmap*)h;
+ lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL;
+ }
+
+ reset_direct_storage(h);
+
+ if (!shared_hash_key_initialized) {
+ random_bytes(shared_hash_key, sizeof(shared_hash_key));
+ shared_hash_key_initialized= true;
+ }
- if (h->buckets != (struct hashmap_entry**) ((uint8_t*) h + ALIGN(sizeof(Hashmap))))
- free(h->buckets);
+#ifdef ENABLE_HASHMAP_DEBUG
+ LIST_PREPEND(debug_list, hashmap_debug_list, &h->debug);
+ h->debug.func = func;
+ h->debug.file = file;
+ h->debug.line = line;
+#endif
+
+ return h;
+}
+
+Hashmap *internal_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return (Hashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS);
+}
+
+OrderedHashmap *internal_ordered_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return (OrderedHashmap*) hashmap_base_new(hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS);
+}
+
+Set *internal_set_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return (Set*) hashmap_base_new(hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS);
+}
+
+static int hashmap_base_ensure_allocated(HashmapBase **h, const struct hash_ops *hash_ops,
+ enum HashmapType type HASHMAP_DEBUG_PARAMS) {
+ HashmapBase *q;
+
+ assert(h);
+
+ if (*h)
+ return 0;
+
+ q = hashmap_base_new(hash_ops, type HASHMAP_DEBUG_PASS_ARGS);
+ if (!q)
+ return -ENOMEM;
+
+ *h = q;
+ return 0;
+}
+
+int internal_hashmap_ensure_allocated(Hashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_PLAIN HASHMAP_DEBUG_PASS_ARGS);
+}
+
+int internal_ordered_hashmap_ensure_allocated(OrderedHashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return hashmap_base_ensure_allocated((HashmapBase**)h, hash_ops, HASHMAP_TYPE_ORDERED HASHMAP_DEBUG_PASS_ARGS);
+}
+
+int internal_set_ensure_allocated(Set **s, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS) {
+ return hashmap_base_ensure_allocated((HashmapBase**)s, hash_ops, HASHMAP_TYPE_SET HASHMAP_DEBUG_PASS_ARGS);
+}
+
+static void hashmap_free_no_clear(HashmapBase *h) {
+ assert(!h->has_indirect);
+ assert(!h->n_direct_entries);
+
+#ifdef ENABLE_HASHMAP_DEBUG
+ LIST_REMOVE(debug_list, hashmap_debug_list, &h->debug);
+#endif
if (h->from_pool)
- mempool_free_tile(&hashmap_pool, container_of(h, struct hashmap_tile, h));
+ mempool_free_tile(hashmap_type_info[h->type].mempool, h);
else
free(h);
}
-void hashmap_free_free(Hashmap *h) {
+void internal_hashmap_free(HashmapBase *h) {
+
+ /* Free the hashmap, but nothing in it */
+
+ if (!h)
+ return;
+
+ internal_hashmap_clear(h);
+ hashmap_free_no_clear(h);
+}
+
+void internal_hashmap_free_free(HashmapBase *h) {
/* Free the hashmap and all data objects in it, but not the
* keys */
@@ -242,249 +883,763 @@ void hashmap_free_free(Hashmap *h) {
if (!h)
return;
- hashmap_clear_free(h);
- hashmap_free(h);
+ internal_hashmap_clear_free(h);
+ hashmap_free_no_clear(h);
}
-void hashmap_clear(Hashmap *h) {
+void hashmap_free_free_free(Hashmap *h) {
+
+ /* Free the hashmap and all data and key objects in it */
+
if (!h)
return;
- while (h->iterate_list_head)
- remove_entry(h, h->iterate_list_head);
+ hashmap_clear_free_free(h);
+ hashmap_free_no_clear(HASHMAP_BASE(h));
}
-void hashmap_clear_free(Hashmap *h) {
- void *p;
+void internal_hashmap_clear(HashmapBase *h) {
+ if (!h)
+ return;
+
+ if (h->has_indirect) {
+ free(h->indirect.storage);
+ h->has_indirect = false;
+ }
+
+ h->n_direct_entries = 0;
+ reset_direct_storage(h);
+
+ if (h->type == HASHMAP_TYPE_ORDERED) {
+ OrderedHashmap *lh = (OrderedHashmap*) h;
+ lh->iterate_list_head = lh->iterate_list_tail = IDX_NIL;
+ }
+}
+
+void internal_hashmap_clear_free(HashmapBase *h) {
+ unsigned idx;
if (!h)
return;
- while ((p = hashmap_steal_first(h)))
- free(p);
+ for (idx = skip_free_buckets(h, 0); idx != IDX_NIL;
+ idx = skip_free_buckets(h, idx + 1))
+ free(entry_value(h, bucket_at(h, idx)));
+
+ internal_hashmap_clear(h);
}
-static struct hashmap_entry *hash_scan(Hashmap *h, unsigned hash, const void *key) {
- struct hashmap_entry *e;
- assert(h);
- assert(hash < h->n_buckets);
+void hashmap_clear_free_free(Hashmap *h) {
+ unsigned idx;
- for (e = h->buckets[hash]; e; e = e->bucket_next)
- if (h->hash_ops->compare(e->key, key) == 0)
- return e;
+ if (!h)
+ return;
+
+ for (idx = skip_free_buckets(HASHMAP_BASE(h), 0); idx != IDX_NIL;
+ idx = skip_free_buckets(HASHMAP_BASE(h), idx + 1)) {
+ struct plain_hashmap_entry *e = plain_bucket_at(h, idx);
+ free((void*)e->b.key);
+ free(e->value);
+ }
- return NULL;
+ internal_hashmap_clear(HASHMAP_BASE(h));
}
-static int resize_buckets(Hashmap *h, unsigned entries_add) {
- struct hashmap_entry **n, *i;
- unsigned m, new_n_entries, new_n_buckets;
- uint8_t nkey[HASH_KEY_SIZE];
+static int resize_buckets(HashmapBase *h, unsigned entries_add);
- assert(h);
+/*
+ * Finds an empty bucket to put an entry into, starting the scan at 'idx'.
+ * Performs Robin Hood swaps as it goes. The entry to put must be placed
+ * by the caller into swap slot IDX_PUT.
+ * If used for in-place resizing, may leave a displaced entry in swap slot
+ * IDX_PUT. Caller must rehash it next.
+ * Returns: true if it left a displaced entry to rehash next in IDX_PUT,
+ * false otherwise.
+ */
+static bool hashmap_put_robin_hood(HashmapBase *h, unsigned idx,
+ struct swap_entries *swap) {
+ dib_raw_t raw_dib, *dibs;
+ unsigned dib, distance;
- new_n_entries = h->n_entries + entries_add;
+#ifdef ENABLE_HASHMAP_DEBUG
+ h->debug.put_count++;
+#endif
- /* overflow? */
- if (_unlikely_(new_n_entries < entries_add || new_n_entries > UINT_MAX / 4))
- return -ENOMEM;
+ dibs = dib_raw_ptr(h);
- new_n_buckets = new_n_entries * 4 / 3;
+ for (distance = 0; ; distance++) {
+ raw_dib = dibs[idx];
+ if (raw_dib == DIB_RAW_FREE || raw_dib == DIB_RAW_REHASH) {
+ if (raw_dib == DIB_RAW_REHASH)
+ bucket_move_entry(h, swap, idx, IDX_TMP);
- if (_likely_(new_n_buckets <= h->n_buckets))
- return 0;
+ if (h->has_indirect && h->indirect.idx_lowest_entry > idx)
+ h->indirect.idx_lowest_entry = idx;
- /* Increase by four at least */
- m = MAX((h->n_entries+1)*4-1, new_n_buckets);
+ bucket_set_dib(h, idx, distance);
+ bucket_move_entry(h, swap, IDX_PUT, idx);
+ if (raw_dib == DIB_RAW_REHASH) {
+ bucket_move_entry(h, swap, IDX_TMP, IDX_PUT);
+ return true;
+ }
- /* If we hit OOM we simply risk packed hashmaps... */
- n = new0(struct hashmap_entry*, m);
- if (!n)
- return -ENOMEM;
+ return false;
+ }
- /* Let's use a different randomized hash key for the
- * extension, so that people cannot guess what we are using
- * here forever */
- get_hash_key(nkey, false);
+ dib = bucket_calculate_dib(h, idx, raw_dib);
- for (i = h->iterate_list_head; i; i = i->iterate_next) {
- unsigned long old_bucket, new_bucket;
+ if (dib < distance) {
+ /* Found a wealthier entry. Go Robin Hood! */
- old_bucket = h->hash_ops->hash(i->key, h->hash_key) % h->n_buckets;
+ bucket_set_dib(h, idx, distance);
- /* First, drop from old bucket table */
- if (i->bucket_next)
- i->bucket_next->bucket_previous = i->bucket_previous;
+ /* swap the entries */
+ bucket_move_entry(h, swap, idx, IDX_TMP);
+ bucket_move_entry(h, swap, IDX_PUT, idx);
+ bucket_move_entry(h, swap, IDX_TMP, IDX_PUT);
- if (i->bucket_previous)
- i->bucket_previous->bucket_next = i->bucket_next;
- else
- h->buckets[old_bucket] = i->bucket_next;
+ distance = dib;
+ }
- /* Then, add to new backet table */
- new_bucket = h->hash_ops->hash(i->key, nkey) % m;
+ idx = next_idx(h, idx);
+ }
+}
- i->bucket_next = n[new_bucket];
- i->bucket_previous = NULL;
- if (n[new_bucket])
- n[new_bucket]->bucket_previous = i;
- n[new_bucket] = i;
+/*
+ * Puts an entry into a hashmap, boldly - no check whether key already exists.
+ * The caller must place the entry (only its key and value, not link indexes)
+ * in swap slot IDX_PUT.
+ * Caller must ensure: the key does not exist yet in the hashmap.
+ * that resize is not needed if !may_resize.
+ * Returns: 1 if entry was put successfully.
+ * -ENOMEM if may_resize==true and resize failed with -ENOMEM.
+ * Cannot return -ENOMEM if !may_resize.
+ */
+static int hashmap_base_put_boldly(HashmapBase *h, unsigned idx,
+ struct swap_entries *swap, bool may_resize) {
+ struct ordered_hashmap_entry *new_entry;
+ int r;
+
+ assert(idx < n_buckets(h));
+
+ new_entry = bucket_at_swap(swap, IDX_PUT);
+
+ if (may_resize) {
+ r = resize_buckets(h, 1);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ idx = bucket_hash(h, new_entry->p.b.key);
}
+ assert(n_entries(h) < n_buckets(h));
+
+ if (h->type == HASHMAP_TYPE_ORDERED) {
+ OrderedHashmap *lh = (OrderedHashmap*) h;
+
+ new_entry->iterate_next = IDX_NIL;
+ new_entry->iterate_previous = lh->iterate_list_tail;
+
+ if (lh->iterate_list_tail != IDX_NIL) {
+ struct ordered_hashmap_entry *old_tail;
- if (h->buckets != (struct hashmap_entry**) ((uint8_t*) h + ALIGN(sizeof(Hashmap))))
- free(h->buckets);
+ old_tail = ordered_bucket_at(lh, lh->iterate_list_tail);
+ assert(old_tail->iterate_next == IDX_NIL);
+ old_tail->iterate_next = IDX_PUT;
+ }
- h->buckets = n;
- h->n_buckets = m;
+ lh->iterate_list_tail = IDX_PUT;
+ if (lh->iterate_list_head == IDX_NIL)
+ lh->iterate_list_head = IDX_PUT;
+ }
+
+ assert_se(hashmap_put_robin_hood(h, idx, swap) == false);
- memcpy(h->hash_key, nkey, HASH_KEY_SIZE);
+ n_entries_inc(h);
+#ifdef ENABLE_HASHMAP_DEBUG
+ h->debug.max_entries = MAX(h->debug.max_entries, n_entries(h));
+#endif
return 1;
}
+#define hashmap_put_boldly(h, idx, swap, may_resize) \
+ hashmap_base_put_boldly(HASHMAP_BASE(h), idx, swap, may_resize)
+
+/*
+ * Returns 0 if resize is not needed.
+ * 1 if succesfully resized.
+ * -ENOMEM on allocation failure.
+ */
+static int resize_buckets(HashmapBase *h, unsigned entries_add) {
+ struct swap_entries swap;
+ char *new_storage;
+ dib_raw_t *old_dibs, *new_dibs;
+ const struct hashmap_type_info *hi;
+ unsigned idx, optimal_idx;
+ unsigned old_n_buckets, new_n_buckets, n_rehashed, new_n_entries;
+ uint8_t new_shift;
+ bool rehash_next;
-static int __hashmap_put(Hashmap *h, const void *key, void *value, unsigned hash) {
- /* For when we know no such entry exists yet */
+ assert(h);
- struct hashmap_entry *e;
+ hi = &hashmap_type_info[h->type];
+ new_n_entries = n_entries(h) + entries_add;
- if (resize_buckets(h, 1) > 0)
- hash = bucket_hash(h, key);
+ /* overflow? */
+ if (_unlikely_(new_n_entries < entries_add))
+ return -ENOMEM;
- if (h->from_pool)
- e = mempool_alloc_tile(&hashmap_entry_pool);
- else
- e = new(struct hashmap_entry, 1);
+ /* For direct storage we allow 100% load, because it's tiny. */
+ if (!h->has_indirect && new_n_entries <= hi->n_direct_buckets)
+ return 0;
- if (!e)
+ /*
+ * Load factor = n/m = 1 - (1/INV_KEEP_FREE).
+ * From it follows: m = n + n/(INV_KEEP_FREE - 1)
+ */
+ new_n_buckets = new_n_entries + new_n_entries / (INV_KEEP_FREE - 1);
+ /* overflow? */
+ if (_unlikely_(new_n_buckets < new_n_entries))
return -ENOMEM;
- e->key = key;
- e->value = value;
+ if (_unlikely_(new_n_buckets > UINT_MAX / (hi->entry_size + sizeof(dib_raw_t))))
+ return -ENOMEM;
- link_entry(h, e, hash);
+ old_n_buckets = n_buckets(h);
+
+ if (_likely_(new_n_buckets <= old_n_buckets))
+ return 0;
+
+ new_shift = log2u_round_up(MAX(
+ new_n_buckets * (hi->entry_size + sizeof(dib_raw_t)),
+ 2 * sizeof(struct direct_storage)));
+
+ /* Realloc storage (buckets and DIB array). */
+ new_storage = realloc(h->has_indirect ? h->indirect.storage : NULL,
+ 1U << new_shift);
+ if (!new_storage)
+ return -ENOMEM;
+
+ /* Must upgrade direct to indirect storage. */
+ if (!h->has_indirect) {
+ memcpy(new_storage, h->direct.storage,
+ old_n_buckets * (hi->entry_size + sizeof(dib_raw_t)));
+ h->indirect.n_entries = h->n_direct_entries;
+ h->indirect.idx_lowest_entry = 0;
+ h->n_direct_entries = 0;
+ }
+
+ /* Get a new hash key. If we've just upgraded to indirect storage,
+ * allow reusing a previously generated key. It's still a different key
+ * from the shared one that we used for direct storage. */
+ get_hash_key(h->indirect.hash_key, !h->has_indirect);
+
+ h->has_indirect = true;
+ h->indirect.storage = new_storage;
+ h->indirect.n_buckets = (1U << new_shift) /
+ (hi->entry_size + sizeof(dib_raw_t));
+
+ old_dibs = (dib_raw_t*)(new_storage + hi->entry_size * old_n_buckets);
+ new_dibs = dib_raw_ptr(h);
+
+ /*
+ * Move the DIB array to the new place, replacing valid DIB values with
+ * DIB_RAW_REHASH to indicate all of the used buckets need rehashing.
+ * Note: Overlap is not possible, because we have at least doubled the
+ * number of buckets and dib_raw_t is smaller than any entry type.
+ */
+ for (idx = 0; idx < old_n_buckets; idx++) {
+ assert(old_dibs[idx] != DIB_RAW_REHASH);
+ new_dibs[idx] = old_dibs[idx] == DIB_RAW_FREE ? DIB_RAW_FREE
+ : DIB_RAW_REHASH;
+ }
+
+ /* Zero the area of newly added entries (including the old DIB area) */
+ memset(bucket_at(h, old_n_buckets), 0,
+ (n_buckets(h) - old_n_buckets) * hi->entry_size);
+
+ /* The upper half of the new DIB array needs initialization */
+ memset(&new_dibs[old_n_buckets], DIB_RAW_INIT,
+ (n_buckets(h) - old_n_buckets) * sizeof(dib_raw_t));
+
+ /* Rehash entries that need it */
+ n_rehashed = 0;
+ for (idx = 0; idx < old_n_buckets; idx++) {
+ if (new_dibs[idx] != DIB_RAW_REHASH)
+ continue;
+
+ optimal_idx = bucket_hash(h, bucket_at(h, idx)->key);
+
+ /*
+ * Not much to do if by luck the entry hashes to its current
+ * location. Just set its DIB.
+ */
+ if (optimal_idx == idx) {
+ new_dibs[idx] = 0;
+ n_rehashed++;
+ continue;
+ }
+
+ new_dibs[idx] = DIB_RAW_FREE;
+ bucket_move_entry(h, &swap, idx, IDX_PUT);
+ /* bucket_move_entry does not clear the source */
+ memset(bucket_at(h, idx), 0, hi->entry_size);
+
+ do {
+ /*
+ * Find the new bucket for the current entry. This may make
+ * another entry homeless and load it into IDX_PUT.
+ */
+ rehash_next = hashmap_put_robin_hood(h, optimal_idx, &swap);
+ n_rehashed++;
+
+ /* Did the current entry displace another one? */
+ if (rehash_next)
+ optimal_idx = bucket_hash(h, bucket_at_swap(&swap, IDX_PUT)->p.b.key);
+ } while (rehash_next);
+ }
+
+ assert(n_rehashed == n_entries(h));
return 1;
}
+/*
+ * Finds an entry with a matching key
+ * Returns: index of the found entry, or IDX_NIL if not found.
+ */
+static unsigned base_bucket_scan(HashmapBase *h, unsigned idx, const void *key) {
+ struct hashmap_base_entry *e;
+ unsigned dib, distance;
+ dib_raw_t *dibs = dib_raw_ptr(h);
+
+ assert(idx < n_buckets(h));
+
+ for (distance = 0; ; distance++) {
+ if (dibs[idx] == DIB_RAW_FREE)
+ return IDX_NIL;
+
+ dib = bucket_calculate_dib(h, idx, dibs[idx]);
+
+ if (dib < distance)
+ return IDX_NIL;
+ if (dib == distance) {
+ e = bucket_at(h, idx);
+ if (h->hash_ops->compare(e->key, key) == 0)
+ return idx;
+ }
+
+ idx = next_idx(h, idx);
+ }
+}
+#define bucket_scan(h, idx, key) base_bucket_scan(HASHMAP_BASE(h), idx, key)
+
int hashmap_put(Hashmap *h, const void *key, void *value) {
- struct hashmap_entry *e;
- unsigned hash;
+ struct swap_entries swap;
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
assert(h);
hash = bucket_hash(h, key);
- e = hash_scan(h, hash, key);
- if (e) {
+ idx = bucket_scan(h, hash, key);
+ if (idx != IDX_NIL) {
+ e = plain_bucket_at(h, idx);
if (e->value == value)
return 0;
return -EEXIST;
}
- return __hashmap_put(h, key, value, hash);
+ e = &bucket_at_swap(&swap, IDX_PUT)->p;
+ e->b.key = key;
+ e->value = value;
+ return hashmap_put_boldly(h, hash, &swap, true);
}
-void* hashmap_get(Hashmap *h, const void *key) {
- unsigned hash;
- struct hashmap_entry *e;
+int set_put(Set *s, const void *key) {
+ struct swap_entries swap;
+ struct hashmap_base_entry *e;
+ unsigned hash, idx;
+
+ assert(s);
+
+ hash = bucket_hash(s, key);
+ idx = bucket_scan(s, hash, key);
+ if (idx != IDX_NIL)
+ return 0;
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p.b;
+ e->key = key;
+ return hashmap_put_boldly(s, hash, &swap, true);
+}
+
+int hashmap_replace(Hashmap *h, const void *key, void *value) {
+ struct swap_entries swap;
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
+
+ assert(h);
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx != IDX_NIL) {
+ e = plain_bucket_at(h, idx);
+#ifdef ENABLE_HASHMAP_DEBUG
+ /* Although the key is equal, the key pointer may have changed,
+ * and this would break our assumption for iterating. So count
+ * this operation as incompatible with iteration. */
+ if (e->b.key != key) {
+ h->b.debug.put_count++;
+ h->b.debug.rem_count++;
+ h->b.debug.last_rem_idx = idx;
+ }
+#endif
+ e->b.key = key;
+ e->value = value;
+ return 0;
+ }
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p;
+ e->b.key = key;
+ e->value = value;
+ return hashmap_put_boldly(h, hash, &swap, true);
+}
+
+int hashmap_update(Hashmap *h, const void *key, void *value) {
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
+
+ assert(h);
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return -ENOENT;
+
+ e = plain_bucket_at(h, idx);
+ e->value = value;
+ return 0;
+}
+
+void *internal_hashmap_get(HashmapBase *h, const void *key) {
+ struct hashmap_base_entry *e;
+ unsigned hash, idx;
if (!h)
return NULL;
hash = bucket_hash(h, key);
- e = hash_scan(h, hash, key);
- if (!e)
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
return NULL;
- return e->value;
+ e = bucket_at(h, idx);
+ return entry_value(h, e);
}
-void* hashmap_get2(Hashmap *h, const void *key, void **key2) {
- unsigned hash;
- struct hashmap_entry *e;
+void *hashmap_get2(Hashmap *h, const void *key, void **key2) {
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
if (!h)
return NULL;
hash = bucket_hash(h, key);
- e = hash_scan(h, hash, key);
- if (!e)
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
return NULL;
+ e = plain_bucket_at(h, idx);
if (key2)
- *key2 = (void*) e->key;
+ *key2 = (void*) e->b.key;
return e->value;
}
-bool hashmap_contains(Hashmap *h, const void *key) {
+bool internal_hashmap_contains(HashmapBase *h, const void *key) {
unsigned hash;
if (!h)
return false;
hash = bucket_hash(h, key);
- return !!hash_scan(h, hash, key);
+ return bucket_scan(h, hash, key) != IDX_NIL;
}
-void *hashmap_iterate(Hashmap *h, Iterator *i, const void **key) {
- struct hashmap_entry *e;
+void *internal_hashmap_remove(HashmapBase *h, const void *key) {
+ struct hashmap_base_entry *e;
+ unsigned hash, idx;
+ void *data;
- assert(i);
+ if (!h)
+ return NULL;
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ e = bucket_at(h, idx);
+ data = entry_value(h, e);
+ remove_entry(h, idx);
+
+ return data;
+}
+
+void *hashmap_remove2(Hashmap *h, const void *key, void **rkey) {
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
+ void *data;
+
+ if (!h) {
+ if (rkey)
+ *rkey = NULL;
+ return NULL;
+ }
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL) {
+ if (rkey)
+ *rkey = NULL;
+ return NULL;
+ }
+
+ e = plain_bucket_at(h, idx);
+ data = e->value;
+ if (rkey)
+ *rkey = (void*) e->b.key;
+
+ remove_entry(h, idx);
+
+ return data;
+}
+
+int hashmap_remove_and_put(Hashmap *h, const void *old_key, const void *new_key, void *value) {
+ struct swap_entries swap;
+ struct plain_hashmap_entry *e;
+ unsigned old_hash, new_hash, idx;
if (!h)
- goto at_end;
+ return -ENOENT;
- if (*i == ITERATOR_LAST)
- goto at_end;
+ old_hash = bucket_hash(h, old_key);
+ idx = bucket_scan(h, old_hash, old_key);
+ if (idx == IDX_NIL)
+ return -ENOENT;
- if (*i == ITERATOR_FIRST && !h->iterate_list_head)
- goto at_end;
+ new_hash = bucket_hash(h, new_key);
+ if (bucket_scan(h, new_hash, new_key) != IDX_NIL)
+ return -EEXIST;
- e = *i == ITERATOR_FIRST ? h->iterate_list_head : (struct hashmap_entry*) *i;
+ remove_entry(h, idx);
- if (e->iterate_next)
- *i = (Iterator) e->iterate_next;
- else
- *i = ITERATOR_LAST;
+ e = &bucket_at_swap(&swap, IDX_PUT)->p;
+ e->b.key = new_key;
+ e->value = value;
+ assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1);
- if (key)
- *key = e->key;
+ return 0;
+}
- return e->value;
+int set_remove_and_put(Set *s, const void *old_key, const void *new_key) {
+ struct swap_entries swap;
+ struct hashmap_base_entry *e;
+ unsigned old_hash, new_hash, idx;
-at_end:
- *i = ITERATOR_LAST;
+ if (!s)
+ return -ENOENT;
- if (key)
- *key = NULL;
+ old_hash = bucket_hash(s, old_key);
+ idx = bucket_scan(s, old_hash, old_key);
+ if (idx == IDX_NIL)
+ return -ENOENT;
+
+ new_hash = bucket_hash(s, new_key);
+ if (bucket_scan(s, new_hash, new_key) != IDX_NIL)
+ return -EEXIST;
+
+ remove_entry(s, idx);
- return NULL;
+ e = &bucket_at_swap(&swap, IDX_PUT)->p.b;
+ e->key = new_key;
+ assert_se(hashmap_put_boldly(s, new_hash, &swap, false) == 1);
+
+ return 0;
}
-void* hashmap_steal_first(Hashmap *h) {
- void *data;
+int hashmap_remove_and_replace(Hashmap *h, const void *old_key, const void *new_key, void *value) {
+ struct swap_entries swap;
+ struct plain_hashmap_entry *e;
+ unsigned old_hash, new_hash, idx_old, idx_new;
+
+ if (!h)
+ return -ENOENT;
+
+ old_hash = bucket_hash(h, old_key);
+ idx_old = bucket_scan(h, old_hash, old_key);
+ if (idx_old == IDX_NIL)
+ return -ENOENT;
+
+ old_key = bucket_at(HASHMAP_BASE(h), idx_old)->key;
+
+ new_hash = bucket_hash(h, new_key);
+ idx_new = bucket_scan(h, new_hash, new_key);
+ if (idx_new != IDX_NIL)
+ if (idx_old != idx_new) {
+ remove_entry(h, idx_new);
+ /* Compensate for a possible backward shift. */
+ if (old_key != bucket_at(HASHMAP_BASE(h), idx_old)->key)
+ idx_old = prev_idx(HASHMAP_BASE(h), idx_old);
+ assert(old_key == bucket_at(HASHMAP_BASE(h), idx_old)->key);
+ }
+
+ remove_entry(h, idx_old);
+
+ e = &bucket_at_swap(&swap, IDX_PUT)->p;
+ e->b.key = new_key;
+ e->value = value;
+ assert_se(hashmap_put_boldly(h, new_hash, &swap, false) == 1);
+
+ return 0;
+}
+
+void *hashmap_remove_value(Hashmap *h, const void *key, void *value) {
+ struct plain_hashmap_entry *e;
+ unsigned hash, idx;
if (!h)
return NULL;
- if (!h->iterate_list_head)
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
return NULL;
- data = h->iterate_list_head->value;
- remove_entry(h, h->iterate_list_head);
+ e = plain_bucket_at(h, idx);
+ if (e->value != value)
+ return NULL;
+
+ remove_entry(h, idx);
+
+ return value;
+}
+
+static unsigned find_first_entry(HashmapBase *h) {
+ Iterator i = ITERATOR_FIRST;
+
+ if (!h || !n_entries(h))
+ return IDX_NIL;
+
+ return hashmap_iterate_entry(h, &i);
+}
+
+void *internal_hashmap_first(HashmapBase *h) {
+ unsigned idx;
+
+ idx = find_first_entry(h);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ return entry_value(h, bucket_at(h, idx));
+}
+
+void *internal_hashmap_first_key(HashmapBase *h) {
+ struct hashmap_base_entry *e;
+ unsigned idx;
+
+ idx = find_first_entry(h);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ e = bucket_at(h, idx);
+ return (void*) e->key;
+}
+
+void *internal_hashmap_steal_first(HashmapBase *h) {
+ struct hashmap_base_entry *e;
+ void *data;
+ unsigned idx;
+
+ idx = find_first_entry(h);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ e = bucket_at(h, idx);
+ data = entry_value(h, e);
+ remove_entry(h, idx);
return data;
}
-unsigned hashmap_size(Hashmap *h) {
+void *internal_hashmap_steal_first_key(HashmapBase *h) {
+ struct hashmap_base_entry *e;
+ void *key;
+ unsigned idx;
+
+ idx = find_first_entry(h);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ e = bucket_at(h, idx);
+ key = (void*) e->key;
+ remove_entry(h, idx);
+
+ return key;
+}
+
+unsigned internal_hashmap_size(HashmapBase *h) {
if (!h)
return 0;
- return h->n_entries;
+ return n_entries(h);
+}
+
+unsigned internal_hashmap_buckets(HashmapBase *h) {
+
+ if (!h)
+ return 0;
+
+ return n_buckets(h);
+}
+
+int internal_hashmap_merge(Hashmap *h, Hashmap *other) {
+ Iterator i;
+ unsigned idx;
+
+ assert(h);
+
+ HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) {
+ struct plain_hashmap_entry *pe = plain_bucket_at(other, idx);
+ int r;
+
+ r = hashmap_put(h, pe->b.key, pe->value);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ return 0;
+}
+
+int set_merge(Set *s, Set *other) {
+ Iterator i;
+ unsigned idx;
+
+ assert(s);
+
+ HASHMAP_FOREACH_IDX(idx, HASHMAP_BASE(other), i) {
+ struct set_entry *se = set_bucket_at(other, idx);
+ int r;
+
+ r = set_put(s, se->b.key);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
}
-int hashmap_reserve(Hashmap *h, unsigned entries_add) {
+int internal_hashmap_reserve(HashmapBase *h, unsigned entries_add) {
int r;
assert(h);
@@ -496,20 +1651,200 @@ int hashmap_reserve(Hashmap *h, unsigned entries_add) {
return 0;
}
-char **hashmap_get_strv(Hashmap *h) {
+/*
+ * The same as hashmap_merge(), but every new item from other is moved to h.
+ * Keys already in h are skipped and stay in other.
+ * Returns: 0 on success.
+ * -ENOMEM on alloc failure, in which case no move has been done.
+ */
+int internal_hashmap_move(HashmapBase *h, HashmapBase *other) {
+ struct swap_entries swap;
+ struct hashmap_base_entry *e, *n;
+ Iterator i;
+ unsigned idx;
+ int r;
+
+ assert(h);
+
+ if (!other)
+ return 0;
+
+ assert(other->type == h->type);
+
+ /*
+ * This reserves buckets for the worst case, where none of other's
+ * entries are yet present in h. This is preferable to risking
+ * an allocation failure in the middle of the moving and having to
+ * rollback or return a partial result.
+ */
+ r = resize_buckets(h, n_entries(other));
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH_IDX(idx, other, i) {
+ unsigned h_hash;
+
+ e = bucket_at(other, idx);
+ h_hash = bucket_hash(h, e->key);
+ if (bucket_scan(h, h_hash, e->key) != IDX_NIL)
+ continue;
+
+ n = &bucket_at_swap(&swap, IDX_PUT)->p.b;
+ n->key = e->key;
+ if (h->type != HASHMAP_TYPE_SET)
+ ((struct plain_hashmap_entry*) n)->value =
+ ((struct plain_hashmap_entry*) e)->value;
+ assert_se(hashmap_put_boldly(h, h_hash, &swap, false) == 1);
+
+ remove_entry(other, idx);
+ }
+
+ return 0;
+}
+
+int internal_hashmap_move_one(HashmapBase *h, HashmapBase *other, const void *key) {
+ struct swap_entries swap;
+ unsigned h_hash, other_hash, idx;
+ struct hashmap_base_entry *e, *n;
+ int r;
+
+ assert(h);
+
+ h_hash = bucket_hash(h, key);
+ if (bucket_scan(h, h_hash, key) != IDX_NIL)
+ return -EEXIST;
+
+ if (!other)
+ return -ENOENT;
+
+ assert(other->type == h->type);
+
+ other_hash = bucket_hash(other, key);
+ idx = bucket_scan(other, other_hash, key);
+ if (idx == IDX_NIL)
+ return -ENOENT;
+
+ e = bucket_at(other, idx);
+
+ n = &bucket_at_swap(&swap, IDX_PUT)->p.b;
+ n->key = e->key;
+ if (h->type != HASHMAP_TYPE_SET)
+ ((struct plain_hashmap_entry*) n)->value =
+ ((struct plain_hashmap_entry*) e)->value;
+ r = hashmap_put_boldly(h, h_hash, &swap, true);
+ if (r < 0)
+ return r;
+
+ remove_entry(other, idx);
+ return 0;
+}
+
+HashmapBase *internal_hashmap_copy(HashmapBase *h) {
+ HashmapBase *copy;
+ int r;
+
+ assert(h);
+
+ copy = hashmap_base_new(h->hash_ops, h->type HASHMAP_DEBUG_SRC_ARGS);
+ if (!copy)
+ return NULL;
+
+ switch (h->type) {
+ case HASHMAP_TYPE_PLAIN:
+ case HASHMAP_TYPE_ORDERED:
+ r = hashmap_merge((Hashmap*)copy, (Hashmap*)h);
+ break;
+ case HASHMAP_TYPE_SET:
+ r = set_merge((Set*)copy, (Set*)h);
+ break;
+ default:
+ assert_not_reached("Unknown hashmap type");
+ }
+
+ if (r < 0) {
+ internal_hashmap_free(copy);
+ return NULL;
+ }
+
+ return copy;
+}
+
+char **internal_hashmap_get_strv(HashmapBase *h) {
char **sv;
- Iterator it;
- char *item;
- int n;
+ Iterator i;
+ unsigned idx, n;
- sv = new(char*, h->n_entries+1);
+ sv = new(char*, n_entries(h)+1);
if (!sv)
return NULL;
n = 0;
- HASHMAP_FOREACH(item, h, it)
- sv[n++] = item;
+ HASHMAP_FOREACH_IDX(idx, h, i)
+ sv[n++] = entry_value(h, bucket_at(h, idx));
sv[n] = NULL;
return sv;
}
+
+void *ordered_hashmap_next(OrderedHashmap *h, const void *key) {
+ struct ordered_hashmap_entry *e;
+ unsigned hash, idx;
+
+ assert(key);
+
+ if (!h)
+ return NULL;
+
+ hash = bucket_hash(h, key);
+ idx = bucket_scan(h, hash, key);
+ if (idx == IDX_NIL)
+ return NULL;
+
+ e = ordered_bucket_at(h, idx);
+ if (e->iterate_next == IDX_NIL)
+ return NULL;
+ return ordered_bucket_at(h, e->iterate_next)->p.value;
+}
+
+int set_consume(Set *s, void *value) {
+ int r;
+
+ r = set_put(s, value);
+ if (r < 0)
+ free(value);
+
+ return r;
+}
+
+int set_put_strdup(Set *s, const char *p) {
+ char *c;
+ int r;
+
+ assert(s);
+ assert(p);
+
+ c = strdup(p);
+ if (!c)
+ return -ENOMEM;
+
+ r = set_consume(s, c);
+ if (r == -EEXIST)
+ return 0;
+
+ return r;
+}
+
+int set_put_strdupv(Set *s, char **l) {
+ int n = 0, r;
+ char **i;
+
+ STRV_FOREACH(i, l) {
+ r = set_put_strdup(s, *i);
+ if (r < 0)
+ return r;
+
+ n += r;
+ }
+
+ return n;
+}