diff options
author | Michal Schmidt <mschmidt@redhat.com> | 2014-10-31 13:28:12 -0400 |
---|---|---|
committer | Anthony G. Basile <blueness@gentoo.org> | 2014-10-31 13:28:12 -0400 |
commit | 03221aa40a8529c0a2d1262d7a0ee54349ff38be (patch) | |
tree | 90eca73059405f7eacc32b0892df71c5d8d6cb37 /src/shared/hashmap.h | |
parent | 11c32d3baa60f9919b7eba4f58841db0039403f2 (diff) |
hashmap: rewrite the implementation
We reintroduce hashmap.{h,c}, list.h and set.h verbatim from upstream,
before we punt dead code. The following is the upstream message:
This is a rewrite of the hashmap implementation. Its advantage is lower
memory usage.
It uses open addressing (entries are stored in an array, as opposed to
linked lists). Hash collisions are resolved with linear probing and
Robin Hood displacement policy. See the references in hashmap.c.
Some fun empirical findings about hashmap usage in systemd on my laptop:
- 98 % of allocated hashmaps are Sets.
- Sets contain 78 % of all entries, plain Hashmaps 17 %, and
OrderedHashmaps 5 %.
- 60 % of allocated hashmaps contain only 1 entry.
- 90 % of allocated hashmaps contain 5 or fewer entries.
- 75 % of all entries are in hashmaps that use trivial_hash_ops.
Clearly it makes sense to:
- store entries in distinct entry types. Especially for Sets - their
entries are the most numerous and they require the least information
to store an entry.
- have a way to store small numbers of entries directly in the hashmap
structs, and only allocate the usual entry arrays when the direct
storage is full.
The implementation has an optional debugging feature (enabled by
defining the ENABLE_HASHMAP_DEBUG macro), where it:
- tracks all allocated hashmaps in a linked list so that one can
easily find them in gdb,
- tracks which function/line allocated a given hashmap, and
- checks for invalid mixing of hashmap iteration and modification.
Since entries are not allocated one-by-one anymore, mempools are not
used for entries. Originally I meant to drop mempools entirely, but it's
still worth it to use them for the hashmap structs. My testing indicates
that it makes loading of units about 5 % faster (a test with 10000 units
where more than 200000 hashmaps are allocated - pure malloc: 449±4 ms,
mempools: 427±7 ms).
Here are some memory usage numbers, taken on my laptop with a more or
less normal Fedora setup after booting with SELinux disabled (SELinux
increases systemd's memory usage significantly):
systemd (PID 1) Original New Change
dirty memory (from pmap -x 1) [KiB] 2152 1264 -41 %
total heap allocations (from gdb-heap) [KiB] 1623 756 -53 %
Signed-off-by: Anthony G. Basile <blueness@gentoo.org>
Diffstat (limited to 'src/shared/hashmap.h')
-rw-r--r-- | src/shared/hashmap.h | 385 |
1 files changed, 360 insertions, 25 deletions
diff --git a/src/shared/hashmap.h b/src/shared/hashmap.h index 22bda37437..9c6e0cab18 100644 --- a/src/shared/hashmap.h +++ b/src/shared/hashmap.h @@ -1,7 +1,12 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +#pragma once + /*** - This file is part of eudev, forked from systemd. + This file is part of systemd. Copyright 2010 Lennart Poettering + Copyright 2014 Michal Schmidt systemd is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by @@ -17,26 +22,50 @@ along with systemd; If not, see <http://www.gnu.org/licenses/>. ***/ -#pragma once - #include <stdbool.h> #include "macro.h" #include "util.h" -/* Pretty straightforward hash table implementation. As a minor - * optimization a NULL hashmap object will be treated as empty hashmap - * for all read operations. That way it is not necessary to - * instantiate an object for each Hashmap use. */ +/* + * A hash table implementation. As a minor optimization a NULL hashmap object + * will be treated as empty hashmap for all read operations. That way it is not + * necessary to instantiate an object for each Hashmap use. + * + * If ENABLE_HASHMAP_DEBUG is defined (by configuring with --enable-hashmap-debug), + * the implemention will: + * - store extra data for debugging and statistics (see tools/gdb-sd_dump_hashmaps.py) + * - perform extra checks for invalid use of iterators + */ #define HASH_KEY_SIZE 16 -typedef struct Hashmap Hashmap; -typedef struct _IteratorStruct _IteratorStruct; -typedef _IteratorStruct* Iterator; +/* The base type for all hashmap and set types. Many functions in the + * implementation take (HashmapBase*) parameters and are run-time polymorphic, + * though the API is not meant to be polymorphic (do not call functions + * prefixed with two underscores directly). */ +typedef struct HashmapBase HashmapBase; + +/* Specific hashmap/set types */ +typedef struct Hashmap Hashmap; /* Maps keys to values */ +typedef struct OrderedHashmap OrderedHashmap; /* Like Hashmap, but also remembers entry insertion order */ +typedef struct Set Set; /* Stores just keys */ -#define ITERATOR_FIRST ((Iterator) 0) -#define ITERATOR_LAST ((Iterator) -1) +/* Ideally the Iterator would be an opaque struct, but it is instantiated + * by hashmap users, so the definition has to be here. Do not use its fields + * directly. */ +typedef struct { + unsigned idx; /* index of an entry to be iterated next */ + const void *next_key; /* expected value of that entry's key pointer */ +#ifdef ENABLE_HASHMAP_DEBUG + unsigned put_count; /* hashmap's put_count recorded at start of iteration */ + unsigned rem_count; /* hashmap's rem_count in previous iteration */ + unsigned prev_idx; /* idx in previous iteration */ +#endif +} Iterator; + +#define _IDX_ITERATOR_FIRST (UINT_MAX - 1) +#define ITERATOR_FIRST ((Iterator) { .idx = _IDX_ITERATOR_FIRST, .next_key = NULL }) typedef unsigned long (*hash_func_t)(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]); typedef int (*compare_func_t)(const void *a, const void *b); @@ -57,29 +86,335 @@ unsigned long trivial_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_S int trivial_compare_func(const void *a, const void *b) _const_; extern const struct hash_ops trivial_hash_ops; -Hashmap *hashmap_new(const struct hash_ops *hash_ops); -void hashmap_free(Hashmap *h); -void hashmap_free_free(Hashmap *h); +/* 32bit values we can always just embedd in the pointer itself, but + * in order to support 32bit archs we need store 64bit values + * indirectly, since they don't fit in a pointer. */ +unsigned long uint64_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) _pure_; +int uint64_compare_func(const void *a, const void *b) _pure_; +extern const struct hash_ops uint64_hash_ops; + +/* On some archs dev_t is 32bit, and on others 64bit. And sometimes + * it's 64bit on 32bit archs, and sometimes 32bit on 64bit archs. Yuck! */ +#if SIZEOF_DEV_T != 8 +unsigned long devt_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) _pure_; +int devt_compare_func(const void *a, const void *b) _pure_; +extern const struct hash_ops devt_hash_ops = { + .hash = devt_hash_func, + .compare = devt_compare_func +}; +#else +#define devt_hash_func uint64_hash_func +#define devt_compare_func uint64_compare_func +#define devt_hash_ops uint64_hash_ops +#endif + +/* Macros for type checking */ +#define PTR_COMPATIBLE_WITH_HASHMAP_BASE(h) \ + (__builtin_types_compatible_p(typeof(h), HashmapBase*) || \ + __builtin_types_compatible_p(typeof(h), Hashmap*) || \ + __builtin_types_compatible_p(typeof(h), OrderedHashmap*) || \ + __builtin_types_compatible_p(typeof(h), Set*)) + +#define PTR_COMPATIBLE_WITH_PLAIN_HASHMAP(h) \ + (__builtin_types_compatible_p(typeof(h), Hashmap*) || \ + __builtin_types_compatible_p(typeof(h), OrderedHashmap*)) \ + +#define HASHMAP_BASE(h) \ + __builtin_choose_expr(PTR_COMPATIBLE_WITH_HASHMAP_BASE(h), \ + (HashmapBase*)(h), \ + (void)0) + +#define PLAIN_HASHMAP(h) \ + __builtin_choose_expr(PTR_COMPATIBLE_WITH_PLAIN_HASHMAP(h), \ + (Hashmap*)(h), \ + (void)0) + +#ifdef ENABLE_HASHMAP_DEBUG +# define HASHMAP_DEBUG_PARAMS , const char *func, const char *file, int line +# define HASHMAP_DEBUG_SRC_ARGS , __func__, __FILE__, __LINE__ +# define HASHMAP_DEBUG_PASS_ARGS , func, file, line +#else +# define HASHMAP_DEBUG_PARAMS +# define HASHMAP_DEBUG_SRC_ARGS +# define HASHMAP_DEBUG_PASS_ARGS +#endif + +Hashmap *internal_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS); +OrderedHashmap *internal_ordered_hashmap_new(const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS); +#define hashmap_new(ops) internal_hashmap_new(ops HASHMAP_DEBUG_SRC_ARGS) +#define ordered_hashmap_new(ops) internal_ordered_hashmap_new(ops HASHMAP_DEBUG_SRC_ARGS) + +void internal_hashmap_free(HashmapBase *h); +static inline void hashmap_free(Hashmap *h) { + internal_hashmap_free(HASHMAP_BASE(h)); +} +static inline void ordered_hashmap_free(OrderedHashmap *h) { + internal_hashmap_free(HASHMAP_BASE(h)); +} + +void internal_hashmap_free_free(HashmapBase *h); +static inline void hashmap_free_free(Hashmap *h) { + internal_hashmap_free_free(HASHMAP_BASE(h)); +} +static inline void ordered_hashmap_free_free(OrderedHashmap *h) { + internal_hashmap_free_free(HASHMAP_BASE(h)); +} + +void hashmap_free_free_free(Hashmap *h); +static inline void ordered_hashmap_free_free_free(OrderedHashmap *h) { + hashmap_free_free_free(PLAIN_HASHMAP(h)); +} + +HashmapBase *internal_hashmap_copy(HashmapBase *h); +static inline Hashmap *hashmap_copy(Hashmap *h) { + return (Hashmap*) internal_hashmap_copy(HASHMAP_BASE(h)); +} +static inline OrderedHashmap *ordered_hashmap_copy(OrderedHashmap *h) { + return (OrderedHashmap*) internal_hashmap_copy(HASHMAP_BASE(h)); +} + +int internal_hashmap_ensure_allocated(Hashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS); +int internal_ordered_hashmap_ensure_allocated(OrderedHashmap **h, const struct hash_ops *hash_ops HASHMAP_DEBUG_PARAMS); +#define hashmap_ensure_allocated(h, ops) internal_hashmap_ensure_allocated(h, ops HASHMAP_DEBUG_SRC_ARGS) +#define ordered_hashmap_ensure_allocated(h, ops) internal_ordered_hashmap_ensure_allocated(h, ops HASHMAP_DEBUG_SRC_ARGS) int hashmap_put(Hashmap *h, const void *key, void *value); -void *hashmap_get(Hashmap *h, const void *key); +static inline int ordered_hashmap_put(OrderedHashmap *h, const void *key, void *value) { + return hashmap_put(PLAIN_HASHMAP(h), key, value); +} + +int hashmap_update(Hashmap *h, const void *key, void *value); +static inline int ordered_hashmap_update(OrderedHashmap *h, const void *key, void *value) { + return hashmap_update(PLAIN_HASHMAP(h), key, value); +} + +int hashmap_replace(Hashmap *h, const void *key, void *value); +static inline int ordered_hashmap_replace(OrderedHashmap *h, const void *key, void *value) { + return hashmap_replace(PLAIN_HASHMAP(h), key, value); +} + +void *internal_hashmap_get(HashmapBase *h, const void *key); +static inline void *hashmap_get(Hashmap *h, const void *key) { + return internal_hashmap_get(HASHMAP_BASE(h), key); +} +static inline void *ordered_hashmap_get(OrderedHashmap *h, const void *key) { + return internal_hashmap_get(HASHMAP_BASE(h), key); +} + void *hashmap_get2(Hashmap *h, const void *key, void **rkey); -bool hashmap_contains(Hashmap *h, const void *key); -int hashmap_reserve(Hashmap *h, unsigned entries_add); +static inline void *ordered_hashmap_get2(OrderedHashmap *h, const void *key, void **rkey) { + return hashmap_get2(PLAIN_HASHMAP(h), key, rkey); +} -unsigned hashmap_size(Hashmap *h) _pure_; +bool internal_hashmap_contains(HashmapBase *h, const void *key); +static inline bool hashmap_contains(Hashmap *h, const void *key) { + return internal_hashmap_contains(HASHMAP_BASE(h), key); +} +static inline bool ordered_hashmap_contains(OrderedHashmap *h, const void *key) { + return internal_hashmap_contains(HASHMAP_BASE(h), key); +} -void *hashmap_iterate(Hashmap *h, Iterator *i, const void **key); +void *internal_hashmap_remove(HashmapBase *h, const void *key); +static inline void *hashmap_remove(Hashmap *h, const void *key) { + return internal_hashmap_remove(HASHMAP_BASE(h), key); +} +static inline void *ordered_hashmap_remove(OrderedHashmap *h, const void *key) { + return internal_hashmap_remove(HASHMAP_BASE(h), key); +} -void hashmap_clear(Hashmap *h); -void hashmap_clear_free(Hashmap *h); +void *hashmap_remove2(Hashmap *h, const void *key, void **rkey); +static inline void *ordered_hashmap_remove2(OrderedHashmap *h, const void *key, void **rkey) { + return hashmap_remove2(PLAIN_HASHMAP(h), key, rkey); +} -void *hashmap_steal_first(Hashmap *h); +void *hashmap_remove_value(Hashmap *h, const void *key, void *value); +static inline void *ordered_hashmap_remove_value(OrderedHashmap *h, const void *key, void *value) { + return hashmap_remove_value(PLAIN_HASHMAP(h), key, value); +} -char **hashmap_get_strv(Hashmap *h); +int hashmap_remove_and_put(Hashmap *h, const void *old_key, const void *new_key, void *value); +static inline int ordered_hashmap_remove_and_put(OrderedHashmap *h, const void *old_key, const void *new_key, void *value) { + return hashmap_remove_and_put(PLAIN_HASHMAP(h), old_key, new_key, value); +} +int hashmap_remove_and_replace(Hashmap *h, const void *old_key, const void *new_key, void *value); +static inline int ordered_hashmap_remove_and_replace(OrderedHashmap *h, const void *old_key, const void *new_key, void *value) { + return hashmap_remove_and_replace(PLAIN_HASHMAP(h), old_key, new_key, value); +} + +/* Since merging data from a OrderedHashmap into a Hashmap or vice-versa + * should just work, allow this by having looser type-checking here. */ +int internal_hashmap_merge(Hashmap *h, Hashmap *other); +#define hashmap_merge(h, other) internal_hashmap_merge(PLAIN_HASHMAP(h), PLAIN_HASHMAP(other)) +#define ordered_hashmap_merge(h, other) hashmap_merge(h, other) + +int internal_hashmap_reserve(HashmapBase *h, unsigned entries_add); +static inline int hashmap_reserve(Hashmap *h, unsigned entries_add) { + return internal_hashmap_reserve(HASHMAP_BASE(h), entries_add); +} +static inline int ordered_hashmap_reserve(OrderedHashmap *h, unsigned entries_add) { + return internal_hashmap_reserve(HASHMAP_BASE(h), entries_add); +} + +int internal_hashmap_move(HashmapBase *h, HashmapBase *other); +/* Unlike hashmap_merge, hashmap_move does not allow mixing the types. */ +static inline int hashmap_move(Hashmap *h, Hashmap *other) { + return internal_hashmap_move(HASHMAP_BASE(h), HASHMAP_BASE(other)); +} +static inline int ordered_hashmap_move(OrderedHashmap *h, OrderedHashmap *other) { + return internal_hashmap_move(HASHMAP_BASE(h), HASHMAP_BASE(other)); +} + +int internal_hashmap_move_one(HashmapBase *h, HashmapBase *other, const void *key); +static inline int hashmap_move_one(Hashmap *h, Hashmap *other, const void *key) { + return internal_hashmap_move_one(HASHMAP_BASE(h), HASHMAP_BASE(other), key); +} +static inline int ordered_hashmap_move_one(OrderedHashmap *h, OrderedHashmap *other, const void *key) { + return internal_hashmap_move_one(HASHMAP_BASE(h), HASHMAP_BASE(other), key); +} + +unsigned internal_hashmap_size(HashmapBase *h) _pure_; +static inline unsigned hashmap_size(Hashmap *h) { + return internal_hashmap_size(HASHMAP_BASE(h)); +} +static inline unsigned ordered_hashmap_size(OrderedHashmap *h) { + return internal_hashmap_size(HASHMAP_BASE(h)); +} + +static inline bool hashmap_isempty(Hashmap *h) { + return hashmap_size(h) == 0; +} +static inline bool ordered_hashmap_isempty(OrderedHashmap *h) { + return ordered_hashmap_size(h) == 0; +} + +unsigned internal_hashmap_buckets(HashmapBase *h) _pure_; +static inline unsigned hashmap_buckets(Hashmap *h) { + return internal_hashmap_buckets(HASHMAP_BASE(h)); +} +static inline unsigned ordered_hashmap_buckets(OrderedHashmap *h) { + return internal_hashmap_buckets(HASHMAP_BASE(h)); +} + +void *internal_hashmap_iterate(HashmapBase *h, Iterator *i, const void **key); +static inline void *hashmap_iterate(Hashmap *h, Iterator *i, const void **key) { + return internal_hashmap_iterate(HASHMAP_BASE(h), i, key); +} +static inline void *ordered_hashmap_iterate(OrderedHashmap *h, Iterator *i, const void **key) { + return internal_hashmap_iterate(HASHMAP_BASE(h), i, key); +} + +void internal_hashmap_clear(HashmapBase *h); +static inline void hashmap_clear(Hashmap *h) { + internal_hashmap_clear(HASHMAP_BASE(h)); +} +static inline void ordered_hashmap_clear(OrderedHashmap *h) { + internal_hashmap_clear(HASHMAP_BASE(h)); +} + +void internal_hashmap_clear_free(HashmapBase *h); +static inline void hashmap_clear_free(Hashmap *h) { + internal_hashmap_clear_free(HASHMAP_BASE(h)); +} +static inline void ordered_hashmap_clear_free(OrderedHashmap *h) { + internal_hashmap_clear_free(HASHMAP_BASE(h)); +} + +void hashmap_clear_free_free(Hashmap *h); +static inline void ordered_hashmap_clear_free_free(OrderedHashmap *h) { + hashmap_clear_free_free(PLAIN_HASHMAP(h)); +} + +/* + * Note about all *_first*() functions + * + * For plain Hashmaps and Sets the order of entries is undefined. + * The functions find whatever entry is first in the implementation + * internal order. + * + * Only for OrderedHashmaps the order is well defined and finding + * the first entry is O(1). + */ + +void *internal_hashmap_steal_first(HashmapBase *h); +static inline void *hashmap_steal_first(Hashmap *h) { + return internal_hashmap_steal_first(HASHMAP_BASE(h)); +} +static inline void *ordered_hashmap_steal_first(OrderedHashmap *h) { + return internal_hashmap_steal_first(HASHMAP_BASE(h)); +} + +void *internal_hashmap_steal_first_key(HashmapBase *h); +static inline void *hashmap_steal_first_key(Hashmap *h) { + return internal_hashmap_steal_first_key(HASHMAP_BASE(h)); +} +static inline void *ordered_hashmap_steal_first_key(OrderedHashmap *h) { + return internal_hashmap_steal_first_key(HASHMAP_BASE(h)); +} + +void *internal_hashmap_first_key(HashmapBase *h) _pure_; +static inline void *hashmap_first_key(Hashmap *h) { + return internal_hashmap_first_key(HASHMAP_BASE(h)); +} +static inline void *ordered_hashmap_first_key(OrderedHashmap *h) { + return internal_hashmap_first_key(HASHMAP_BASE(h)); +} + +void *internal_hashmap_first(HashmapBase *h) _pure_; +static inline void *hashmap_first(Hashmap *h) { + return internal_hashmap_first(HASHMAP_BASE(h)); +} +static inline void *ordered_hashmap_first(OrderedHashmap *h) { + return internal_hashmap_first(HASHMAP_BASE(h)); +} + +/* no hashmap_next */ +void *ordered_hashmap_next(OrderedHashmap *h, const void *key); + +char **internal_hashmap_get_strv(HashmapBase *h); +static inline char **hashmap_get_strv(Hashmap *h) { + return internal_hashmap_get_strv(HASHMAP_BASE(h)); +} +static inline char **ordered_hashmap_get_strv(OrderedHashmap *h) { + return internal_hashmap_get_strv(HASHMAP_BASE(h)); +} + +/* + * Hashmaps are iterated in unpredictable order. + * OrderedHashmaps are an exception to this. They are iterated in the order + * the entries were inserted. + * It is safe to remove the current entry. + */ #define HASHMAP_FOREACH(e, h, i) \ - for ((i) = ITERATOR_FIRST, (e) = hashmap_iterate((h), &(i), NULL); (e); (e) = hashmap_iterate((h), &(i), NULL)) + for ((i) = ITERATOR_FIRST, (e) = hashmap_iterate((h), &(i), NULL); \ + (e); \ + (e) = hashmap_iterate((h), &(i), NULL)) + +#define ORDERED_HASHMAP_FOREACH(e, h, i) \ + for ((i) = ITERATOR_FIRST, (e) = ordered_hashmap_iterate((h), &(i), NULL); \ + (e); \ + (e) = ordered_hashmap_iterate((h), &(i), NULL)) + +#define HASHMAP_FOREACH_KEY(e, k, h, i) \ + for ((i) = ITERATOR_FIRST, (e) = hashmap_iterate((h), &(i), (const void**) &(k)); \ + (e); \ + (e) = hashmap_iterate((h), &(i), (const void**) &(k))) + +#define ORDERED_HASHMAP_FOREACH_KEY(e, k, h, i) \ + for ((i) = ITERATOR_FIRST, (e) = ordered_hashmap_iterate((h), &(i), (const void**) &(k)); \ + (e); \ + (e) = ordered_hashmap_iterate((h), &(i), (const void**) &(k))) DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, hashmap_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, hashmap_free_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(Hashmap*, hashmap_free_free_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(OrderedHashmap*, ordered_hashmap_free_free_free); + #define _cleanup_hashmap_free_ _cleanup_(hashmap_freep) +#define _cleanup_hashmap_free_free_ _cleanup_(hashmap_free_freep) +#define _cleanup_hashmap_free_free_free_ _cleanup_(hashmap_free_free_freep) +#define _cleanup_ordered_hashmap_free_ _cleanup_(ordered_hashmap_freep) +#define _cleanup_ordered_hashmap_free_free_ _cleanup_(ordered_hashmap_free_freep) +#define _cleanup_ordered_hashmap_free_free_free_ _cleanup_(ordered_hashmap_free_free_freep) |