From 9bf3b53533cdc9b95c921b71da755401f223f765 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sun, 22 Dec 2013 19:59:12 +0100 Subject: shared: switch our hash table implementation over to SipHash SipHash appears to be the new gold standard for hashing smaller strings for hashtables these days, so let's make use of it. --- src/shared/ask-password-api.c | 2 +- src/shared/hashmap.c | 104 +++++++++++++++----------------- src/shared/hashmap.h | 10 ++-- src/shared/siphash24.c | 135 ++++++++++++++++++++++++++++++++++++++++++ src/shared/siphash24.h | 6 ++ src/shared/util.c | 60 +++++++++++-------- src/shared/util.h | 15 ++++- 7 files changed, 245 insertions(+), 87 deletions(-) create mode 100644 src/shared/siphash24.c create mode 100644 src/shared/siphash24.h (limited to 'src/shared') diff --git a/src/shared/ask-password-api.c b/src/shared/ask-password-api.c index 755abf0b5e..c9c82b2520 100644 --- a/src/shared/ask-password-api.c +++ b/src/shared/ask-password-api.c @@ -262,7 +262,7 @@ static int create_socket(char **name) { return -errno; } - snprintf(sa.un.sun_path, sizeof(sa.un.sun_path)-1, "/run/systemd/ask-password/sck.%llu", random_ull()); + snprintf(sa.un.sun_path, sizeof(sa.un.sun_path)-1, "/run/systemd/ask-password/sck.%" PRIx64, random_u64()); RUN_WITH_UMASK(0177) { r = bind(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path)); diff --git a/src/shared/hashmap.c b/src/shared/hashmap.c index 3762e3ab0d..b1dccaf4e7 100644 --- a/src/shared/hashmap.c +++ b/src/shared/hashmap.c @@ -24,13 +24,10 @@ #include #include -#ifdef HAVE_SYS_AUXV_H -#include -#endif - #include "util.h" #include "hashmap.h" #include "macro.h" +#include "siphash24.h" #define INITIAL_N_BUCKETS 31 @@ -50,8 +47,8 @@ struct Hashmap { struct hashmap_entry ** buckets; unsigned n_buckets, n_entries; - unsigned random_xor; - bool from_pool; + uint8_t hash_key[HASH_KEY_SIZE]; + bool from_pool:1; }; struct pool { @@ -134,51 +131,60 @@ __attribute__((destructor)) static void cleanup_pool(void) { #endif -unsigned string_hash_func(const void *p) { - unsigned hash = 5381; - const signed char *c; - - /* DJB's hash function */ - - for (c = p; *c; c++) - hash = (hash << 5) + hash + (unsigned) *c; - - return hash; +unsigned long string_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) { + uint64_t u; + siphash24((uint8_t*) &u, p, strlen(p), hash_key); + return (unsigned long) u; } int string_compare_func(const void *a, const void *b) { return strcmp(a, b); } -unsigned trivial_hash_func(const void *p) { - return PTR_TO_UINT(p); +unsigned long trivial_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) { + uint64_t u; + siphash24((uint8_t*) &u, &p, sizeof(p), hash_key); + return (unsigned long) u; } int trivial_compare_func(const void *a, const void *b) { return a < b ? -1 : (a > b ? 1 : 0); } -unsigned uint64_hash_func(const void *p) { +unsigned long uint64_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) { uint64_t u; - - assert_cc(sizeof(uint64_t) == 2*sizeof(unsigned)); - - u = *(const uint64_t*) p; - - return (unsigned) ((u >> 32) ^ u); + siphash24((uint8_t*) &u, p, sizeof(uint64_t), hash_key); + return (unsigned long) u; } int uint64_compare_func(const void *_a, const void *_b) { uint64_t a, b; - a = *(const uint64_t*) _a; b = *(const uint64_t*) _b; - return a < b ? -1 : (a > b ? 1 : 0); } static unsigned bucket_hash(Hashmap *h, const void *p) { - return (h->hash_func(p) ^ h->random_xor) % h->n_buckets; + return (unsigned) (h->hash_func(p, h->hash_key) % h->n_buckets); +} + +static void get_hash_key(uint8_t hash_key[HASH_KEY_SIZE], bool reuse_is_ok) { + static uint8_t current[HASH_KEY_SIZE]; + static bool current_initialized = false; + + /* Returns a hash function key to use. In order to keep things + * fast we will not generate a new key each time we allocate a + * new hash table. Instead, we'll just reuse the most recently + * generated one, except if we never generated one or when we + * are rehashing an entire hash table because we reached a + * fill level */ + + if (!current_initialized || !reuse_is_ok) { + random_bytes(current, sizeof(current)); + current_initialized = true; + } + + memcpy(hash_key, current, sizeof(current)); } Hashmap *hashmap_new(hash_func_t hash_func, compare_func_t compare_func) { @@ -214,21 +220,7 @@ Hashmap *hashmap_new(hash_func_t hash_func, compare_func_t compare_func) { h->from_pool = b; - /* Let's randomize our hash functions a bit so that they are - * harder to guess for clients. For this, start out by cheaply - * using some bits the kernel passed into the process using - * the auxiliary vector. If the hashmap grows later on we will - * rehash everything using a new random XOR mask from - * /dev/random. */ -#ifdef HAVE_SYS_AUXV_H - { - void *auxv; - auxv = (void*) getauxval(AT_RANDOM); - h->random_xor = auxv ? *(unsigned*) auxv : random_u(); - } -#else - h->random_xor = random_u(); -#endif + get_hash_key(h->hash_key, true); return h; } @@ -407,7 +399,8 @@ static struct hashmap_entry *hash_scan(Hashmap *h, unsigned hash, const void *ke static bool resize_buckets(Hashmap *h) { struct hashmap_entry **n, *i; - unsigned m, nxor; + unsigned m; + uint8_t nkey[HASH_KEY_SIZE]; assert(h); @@ -422,15 +415,15 @@ static bool resize_buckets(Hashmap *h) { if (!n) return false; - /* Let's use a different randomized xor value for the + /* Let's use a different randomized hash key for the * extension, so that people cannot guess what we are using * here forever */ - nxor = random_u(); + get_hash_key(nkey, false); for (i = h->iterate_list_head; i; i = i->iterate_next) { - unsigned hash, x; + unsigned long old_bucket, new_bucket; - hash = h->hash_func(i->key); + old_bucket = h->hash_func(i->key, h->hash_key) % h->n_buckets; /* First, drop from old bucket table */ if (i->bucket_next) @@ -439,16 +432,16 @@ static bool resize_buckets(Hashmap *h) { if (i->bucket_previous) i->bucket_previous->bucket_next = i->bucket_next; else - h->buckets[(hash ^ h->random_xor) % h->n_buckets] = i->bucket_next; + h->buckets[old_bucket] = i->bucket_next; /* Then, add to new backet table */ - x = (hash ^ nxor) % m; + new_bucket = h->hash_func(i->key, nkey) % m; - i->bucket_next = n[x]; + i->bucket_next = n[new_bucket]; i->bucket_previous = NULL; - if (n[x]) - n[x]->bucket_previous = i; - n[x] = i; + if (n[new_bucket]) + n[new_bucket]->bucket_previous = i; + n[new_bucket] = i; } if (h->buckets != (struct hashmap_entry**) ((uint8_t*) h + ALIGN(sizeof(Hashmap)))) @@ -456,7 +449,8 @@ static bool resize_buckets(Hashmap *h) { h->buckets = n; h->n_buckets = m; - h->random_xor = nxor; + + memcpy(h->hash_key, nkey, HASH_KEY_SIZE); return true; } diff --git a/src/shared/hashmap.h b/src/shared/hashmap.h index b912af8d8f..154f68eaf0 100644 --- a/src/shared/hashmap.h +++ b/src/shared/hashmap.h @@ -31,6 +31,8 @@ * for all read operations. That way it is not necessary to * instantiate an object for each Hashmap use. */ +#define HASH_KEY_SIZE 16 + typedef struct Hashmap Hashmap; typedef struct _IteratorStruct _IteratorStruct; typedef _IteratorStruct* Iterator; @@ -38,19 +40,19 @@ typedef _IteratorStruct* Iterator; #define ITERATOR_FIRST ((Iterator) 0) #define ITERATOR_LAST ((Iterator) -1) -typedef unsigned (*hash_func_t)(const void *p); +typedef unsigned long (*hash_func_t)(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]); typedef int (*compare_func_t)(const void *a, const void *b); -unsigned string_hash_func(const void *p) _pure_; +unsigned long string_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) _pure_; int string_compare_func(const void *a, const void *b) _pure_; /* This will compare the passed pointers directly, and will not * dereference them. This is hence not useful for strings or * suchlike. */ -unsigned trivial_hash_func(const void *p) _const_; +unsigned long trivial_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) _pure_; int trivial_compare_func(const void *a, const void *b) _const_; -unsigned uint64_hash_func(const void *p) _pure_; +unsigned long uint64_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) _pure_; int uint64_compare_func(const void *a, const void *b) _pure_; Hashmap *hashmap_new(hash_func_t hash_func, compare_func_t compare_func); diff --git a/src/shared/siphash24.c b/src/shared/siphash24.c new file mode 100644 index 0000000000..f68bd283a1 --- /dev/null +++ b/src/shared/siphash24.c @@ -0,0 +1,135 @@ +/* + SipHash reference C implementation + + Written in 2012 by + Jean-Philippe Aumasson + Daniel J. Bernstein + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along with + this software. If not, see . + + (Minimal changes made by Lennart Poettering, to make clean for inclusion in systemd) +*/ +#include +#include +#include + +#include "siphash24.h" + +typedef uint64_t u64; +typedef uint32_t u32; +typedef uint8_t u8; + +#define ROTL(x,b) (u64)( ((x) << (b)) | ( (x) >> (64 - (b))) ) + +#define U32TO8_LE(p, v) \ + (p)[0] = (u8)((v) ); (p)[1] = (u8)((v) >> 8); \ + (p)[2] = (u8)((v) >> 16); (p)[3] = (u8)((v) >> 24); + +#define U64TO8_LE(p, v) \ + U32TO8_LE((p), (u32)((v) )); \ + U32TO8_LE((p) + 4, (u32)((v) >> 32)); + +#define U8TO64_LE(p) \ + (((u64)((p)[0]) ) | \ + ((u64)((p)[1]) << 8) | \ + ((u64)((p)[2]) << 16) | \ + ((u64)((p)[3]) << 24) | \ + ((u64)((p)[4]) << 32) | \ + ((u64)((p)[5]) << 40) | \ + ((u64)((p)[6]) << 48) | \ + ((u64)((p)[7]) << 56)) + +#define SIPROUND \ + do { \ + v0 += v1; v1=ROTL(v1,13); v1 ^= v0; v0=ROTL(v0,32); \ + v2 += v3; v3=ROTL(v3,16); v3 ^= v2; \ + v0 += v3; v3=ROTL(v3,21); v3 ^= v0; \ + v2 += v1; v1=ROTL(v1,17); v1 ^= v2; v2=ROTL(v2,32); \ + } while(0) + +/* SipHash-2-4 */ +void siphash24(uint8_t out[8], const void *_in, size_t inlen, const uint8_t k[16]) +{ + /* "somepseudorandomlygeneratedbytes" */ + u64 v0 = 0x736f6d6570736575ULL; + u64 v1 = 0x646f72616e646f6dULL; + u64 v2 = 0x6c7967656e657261ULL; + u64 v3 = 0x7465646279746573ULL; + u64 b; + u64 k0 = U8TO64_LE( k ); + u64 k1 = U8TO64_LE( k + 8 ); + u64 m; + const u8 *in = _in; + const u8 *end = in + inlen - ( inlen % sizeof( u64 ) ); + const int left = inlen & 7; + b = ( ( u64 )inlen ) << 56; + v3 ^= k1; + v2 ^= k0; + v1 ^= k1; + v0 ^= k0; + + for ( ; in != end; in += 8 ) + { + m = U8TO64_LE( in ); +#ifdef DEBUG + printf( "(%3d) v0 %08x %08x\n", ( int )inlen, ( u32 )( v0 >> 32 ), ( u32 )v0 ); + printf( "(%3d) v1 %08x %08x\n", ( int )inlen, ( u32 )( v1 >> 32 ), ( u32 )v1 ); + printf( "(%3d) v2 %08x %08x\n", ( int )inlen, ( u32 )( v2 >> 32 ), ( u32 )v2 ); + printf( "(%3d) v3 %08x %08x\n", ( int )inlen, ( u32 )( v3 >> 32 ), ( u32 )v3 ); + printf( "(%3d) compress %08x %08x\n", ( int )inlen, ( u32 )( m >> 32 ), ( u32 )m ); +#endif + v3 ^= m; + SIPROUND; + SIPROUND; + v0 ^= m; + } + + switch( left ) + { + case 7: b |= ( ( u64 )in[ 6] ) << 48; + + case 6: b |= ( ( u64 )in[ 5] ) << 40; + + case 5: b |= ( ( u64 )in[ 4] ) << 32; + + case 4: b |= ( ( u64 )in[ 3] ) << 24; + + case 3: b |= ( ( u64 )in[ 2] ) << 16; + + case 2: b |= ( ( u64 )in[ 1] ) << 8; + + case 1: b |= ( ( u64 )in[ 0] ); break; + + case 0: break; + } + +#ifdef DEBUG + printf( "(%3d) v0 %08x %08x\n", ( int )inlen, ( u32 )( v0 >> 32 ), ( u32 )v0 ); + printf( "(%3d) v1 %08x %08x\n", ( int )inlen, ( u32 )( v1 >> 32 ), ( u32 )v1 ); + printf( "(%3d) v2 %08x %08x\n", ( int )inlen, ( u32 )( v2 >> 32 ), ( u32 )v2 ); + printf( "(%3d) v3 %08x %08x\n", ( int )inlen, ( u32 )( v3 >> 32 ), ( u32 )v3 ); + printf( "(%3d) padding %08x %08x\n", ( int )inlen, ( u32 )( b >> 32 ), ( u32 )b ); +#endif + v3 ^= b; + SIPROUND; + SIPROUND; + v0 ^= b; +#ifdef DEBUG + printf( "(%3d) v0 %08x %08x\n", ( int )inlen, ( u32 )( v0 >> 32 ), ( u32 )v0 ); + printf( "(%3d) v1 %08x %08x\n", ( int )inlen, ( u32 )( v1 >> 32 ), ( u32 )v1 ); + printf( "(%3d) v2 %08x %08x\n", ( int )inlen, ( u32 )( v2 >> 32 ), ( u32 )v2 ); + printf( "(%3d) v3 %08x %08x\n", ( int )inlen, ( u32 )( v3 >> 32 ), ( u32 )v3 ); +#endif + v2 ^= 0xff; + SIPROUND; + SIPROUND; + SIPROUND; + SIPROUND; + b = v0 ^ v1 ^ v2 ^ v3; + U64TO8_LE( out, b ); +} diff --git a/src/shared/siphash24.h b/src/shared/siphash24.h new file mode 100644 index 0000000000..62e1168a79 --- /dev/null +++ b/src/shared/siphash24.h @@ -0,0 +1,6 @@ +#pragma once + +#include +#include + +void siphash24(uint8_t out[8], const void *in, size_t inlen, const uint8_t k[16]); diff --git a/src/shared/util.c b/src/shared/util.c index 481c17245d..5c9d0bb730 100644 --- a/src/shared/util.c +++ b/src/shared/util.c @@ -61,6 +61,10 @@ #include #undef basename +#ifdef HAVE_SYS_AUXV_H +#include +#endif + #include "macro.h" #include "util.h" #include "ioprio.h" @@ -2345,42 +2349,48 @@ char* dirname_malloc(const char *path) { return dir; } -unsigned long long random_ull(void) { +void random_bytes(void *p, size_t n) { + static bool srand_called = false; _cleanup_close_ int fd; - uint64_t ull; - ssize_t r; + ssize_t k; + uint8_t *q; fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC|O_NOCTTY); if (fd < 0) goto fallback; - r = loop_read(fd, &ull, sizeof(ull), true); - if (r != sizeof(ull)) + k = loop_read(fd, p, n, true); + if (k < 0 || (size_t) k != n) goto fallback; - return ull; + return; fallback: - return random() * RAND_MAX + random(); -} -unsigned random_u(void) { - _cleanup_close_ int fd; - unsigned u; - ssize_t r; + if (!srand_called) { - fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC|O_NOCTTY); - if (fd < 0) - goto fallback; +#ifdef HAVE_SYS_AUXV_H + /* The kernel provides us with a bit of entropy in + * auxv, so let's try to make use of that to seed the + * pseudo-random generator. It's better than + * nothing... */ - r = loop_read(fd, &u, sizeof(u), true); - if (r != sizeof(u)) - goto fallback; + void *auxv; - return u; + auxv = (void*) getauxval(AT_RANDOM); + if (auxv) + srand(*(unsigned*) auxv); + else +#endif + srand(time(NULL) + gettid()); -fallback: - return random() * RAND_MAX + random(); + srand_called = true; + } + + /* If some idiot made /dev/urandom unavailable to us, he'll + * get a PRNG instead. */ + for (q = p; q < (uint8_t*) p + n; q ++) + *q = rand(); } void rename_process(const char name[8]) { @@ -4137,7 +4147,7 @@ int symlink_atomic(const char *from, const char *to) { _cleanup_free_ char *t; const char *fn; size_t k; - unsigned long long ull; + uint64_t u; unsigned i; int r; @@ -4154,10 +4164,10 @@ int symlink_atomic(const char *from, const char *to) { t[k] = '.'; x = stpcpy(t+k+1, fn); - ull = random_ull(); + u = random_u64(); for (i = 0; i < 16; i++) { - *(x++) = hexchar(ull & 0xF); - ull >>= 4; + *(x++) = hexchar(u & 0xF); + u >>= 4; } *x = 0; diff --git a/src/shared/util.h b/src/shared/util.h index 488ce3ba6d..338d79c7ac 100644 --- a/src/shared/util.h +++ b/src/shared/util.h @@ -249,8 +249,19 @@ int make_stdio(int fd); int make_null_stdio(void); int make_console_stdio(void); -unsigned long long random_ull(void); -unsigned random_u(void); +void random_bytes(void *p, size_t n); + +static inline uint64_t random_u64(void) { + uint64_t u; + random_bytes(&u, sizeof(u)); + return u; +} + +static inline uint32_t random_u32(void) { + uint32_t u; + random_bytes(&u, sizeof(u)); + return u; +} /* For basic lookup tables with strictly enumerated entries */ #define __DEFINE_STRING_TABLE_LOOKUP(name,type,scope) \ -- cgit v1.2.3-54-g00ecf