From 9bf3b53533cdc9b95c921b71da755401f223f765 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Sun, 22 Dec 2013 19:59:12 +0100
Subject: shared: switch our hash table implementation over to SipHash

SipHash appears to be the new gold standard for hashing smaller strings
for hashtables these days, so let's make use of it.
---
 src/shared/ask-password-api.c |   2 +-
 src/shared/hashmap.c          | 104 +++++++++++++++-----------------
 src/shared/hashmap.h          |  10 ++--
 src/shared/siphash24.c        | 135 ++++++++++++++++++++++++++++++++++++++++++
 src/shared/siphash24.h        |   6 ++
 src/shared/util.c             |  60 +++++++++++--------
 src/shared/util.h             |  15 ++++-
 7 files changed, 245 insertions(+), 87 deletions(-)
 create mode 100644 src/shared/siphash24.c
 create mode 100644 src/shared/siphash24.h

(limited to 'src/shared')

diff --git a/src/shared/ask-password-api.c b/src/shared/ask-password-api.c
index 755abf0b5e..c9c82b2520 100644
--- a/src/shared/ask-password-api.c
+++ b/src/shared/ask-password-api.c
@@ -262,7 +262,7 @@ static int create_socket(char **name) {
                 return -errno;
         }
 
-        snprintf(sa.un.sun_path, sizeof(sa.un.sun_path)-1, "/run/systemd/ask-password/sck.%llu", random_ull());
+        snprintf(sa.un.sun_path, sizeof(sa.un.sun_path)-1, "/run/systemd/ask-password/sck.%" PRIx64, random_u64());
 
         RUN_WITH_UMASK(0177) {
                 r = bind(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
diff --git a/src/shared/hashmap.c b/src/shared/hashmap.c
index 3762e3ab0d..b1dccaf4e7 100644
--- a/src/shared/hashmap.c
+++ b/src/shared/hashmap.c
@@ -24,13 +24,10 @@
 #include <string.h>
 #include <errno.h>
 
-#ifdef HAVE_SYS_AUXV_H
-#include <sys/auxv.h>
-#endif
-
 #include "util.h"
 #include "hashmap.h"
 #include "macro.h"
+#include "siphash24.h"
 
 #define INITIAL_N_BUCKETS 31
 
@@ -50,8 +47,8 @@ struct Hashmap {
         struct hashmap_entry ** buckets;
         unsigned n_buckets, n_entries;
 
-        unsigned random_xor;
-        bool from_pool;
+        uint8_t hash_key[HASH_KEY_SIZE];
+        bool from_pool:1;
 };
 
 struct pool {
@@ -134,51 +131,60 @@ __attribute__((destructor)) static void cleanup_pool(void) {
 
 #endif
 
-unsigned string_hash_func(const void *p) {
-        unsigned hash = 5381;
-        const signed char *c;
-
-        /* DJB's hash function */
-
-        for (c = p; *c; c++)
-                hash = (hash << 5) + hash + (unsigned) *c;
-
-        return hash;
+unsigned long string_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) {
+        uint64_t u;
+        siphash24((uint8_t*) &u, p, strlen(p), hash_key);
+        return (unsigned long) u;
 }
 
 int string_compare_func(const void *a, const void *b) {
         return strcmp(a, b);
 }
 
-unsigned trivial_hash_func(const void *p) {
-        return PTR_TO_UINT(p);
+unsigned long trivial_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) {
+        uint64_t u;
+        siphash24((uint8_t*) &u, &p, sizeof(p), hash_key);
+        return (unsigned long) u;
 }
 
 int trivial_compare_func(const void *a, const void *b) {
         return a < b ? -1 : (a > b ? 1 : 0);
 }
 
-unsigned uint64_hash_func(const void *p) {
+unsigned long uint64_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) {
         uint64_t u;
-
-        assert_cc(sizeof(uint64_t) == 2*sizeof(unsigned));
-
-        u = *(const uint64_t*) p;
-
-        return (unsigned) ((u >> 32) ^ u);
+        siphash24((uint8_t*) &u, p, sizeof(uint64_t), hash_key);
+        return (unsigned long) u;
 }
 
 int uint64_compare_func(const void *_a, const void *_b) {
         uint64_t a, b;
-
         a = *(const uint64_t*) _a;
         b = *(const uint64_t*) _b;
-
         return a < b ? -1 : (a > b ? 1 : 0);
 }
 
 static unsigned bucket_hash(Hashmap *h, const void *p) {
-        return (h->hash_func(p) ^ h->random_xor) % h->n_buckets;
+        return (unsigned) (h->hash_func(p, h->hash_key) % h->n_buckets);
+}
+
+static void get_hash_key(uint8_t hash_key[HASH_KEY_SIZE], bool reuse_is_ok) {
+        static uint8_t current[HASH_KEY_SIZE];
+        static bool current_initialized = false;
+
+        /* Returns a hash function key to use. In order to keep things
+         * fast we will not generate a new key each time we allocate a
+         * new hash table. Instead, we'll just reuse the most recently
+         * generated one, except if we never generated one or when we
+         * are rehashing an entire hash table because we reached a
+         * fill level */
+
+        if (!current_initialized || !reuse_is_ok) {
+                random_bytes(current, sizeof(current));
+                current_initialized = true;
+        }
+
+        memcpy(hash_key, current, sizeof(current));
 }
 
 Hashmap *hashmap_new(hash_func_t hash_func, compare_func_t compare_func) {
@@ -214,21 +220,7 @@ Hashmap *hashmap_new(hash_func_t hash_func, compare_func_t compare_func) {
 
         h->from_pool = b;
 
-        /* Let's randomize our hash functions a bit so that they are
-         * harder to guess for clients. For this, start out by cheaply
-         * using some bits the kernel passed into the process using
-         * the auxiliary vector. If the hashmap grows later on we will
-         * rehash everything using a new random XOR mask from
-         * /dev/random. */
-#ifdef HAVE_SYS_AUXV_H
-        {
-                void *auxv;
-                auxv = (void*) getauxval(AT_RANDOM);
-                h->random_xor = auxv ? *(unsigned*) auxv : random_u();
-        }
-#else
-        h->random_xor = random_u();
-#endif
+        get_hash_key(h->hash_key, true);
 
         return h;
 }
@@ -407,7 +399,8 @@ static struct hashmap_entry *hash_scan(Hashmap *h, unsigned hash, const void *ke
 
 static bool resize_buckets(Hashmap *h) {
         struct hashmap_entry **n, *i;
-        unsigned m, nxor;
+        unsigned m;
+        uint8_t nkey[HASH_KEY_SIZE];
 
         assert(h);
 
@@ -422,15 +415,15 @@ static bool resize_buckets(Hashmap *h) {
         if (!n)
                 return false;
 
-        /* Let's use a different randomized xor value for the
+        /* Let's use a different randomized hash key for the
          * extension, so that people cannot guess what we are using
          * here forever */
-        nxor = random_u();
+        get_hash_key(nkey, false);
 
         for (i = h->iterate_list_head; i; i = i->iterate_next) {
-                unsigned hash, x;
+                unsigned long old_bucket, new_bucket;
 
-                hash = h->hash_func(i->key);
+                old_bucket = h->hash_func(i->key, h->hash_key) % h->n_buckets;
 
                 /* First, drop from old bucket table */
                 if (i->bucket_next)
@@ -439,16 +432,16 @@ static bool resize_buckets(Hashmap *h) {
                 if (i->bucket_previous)
                         i->bucket_previous->bucket_next = i->bucket_next;
                 else
-                        h->buckets[(hash ^ h->random_xor) % h->n_buckets] = i->bucket_next;
+                        h->buckets[old_bucket] = i->bucket_next;
 
                 /* Then, add to new backet table */
-                x = (hash ^ nxor) % m;
+                new_bucket = h->hash_func(i->key, nkey)  % m;
 
-                i->bucket_next = n[x];
+                i->bucket_next = n[new_bucket];
                 i->bucket_previous = NULL;
-                if (n[x])
-                        n[x]->bucket_previous = i;
-                n[x] = i;
+                if (n[new_bucket])
+                        n[new_bucket]->bucket_previous = i;
+                n[new_bucket] = i;
         }
 
         if (h->buckets != (struct hashmap_entry**) ((uint8_t*) h + ALIGN(sizeof(Hashmap))))
@@ -456,7 +449,8 @@ static bool resize_buckets(Hashmap *h) {
 
         h->buckets = n;
         h->n_buckets = m;
-        h->random_xor = nxor;
+
+        memcpy(h->hash_key, nkey, HASH_KEY_SIZE);
 
         return true;
 }
diff --git a/src/shared/hashmap.h b/src/shared/hashmap.h
index b912af8d8f..154f68eaf0 100644
--- a/src/shared/hashmap.h
+++ b/src/shared/hashmap.h
@@ -31,6 +31,8 @@
  * for all read operations. That way it is not necessary to
  * instantiate an object for each Hashmap use. */
 
+#define HASH_KEY_SIZE 16
+
 typedef struct Hashmap Hashmap;
 typedef struct _IteratorStruct _IteratorStruct;
 typedef _IteratorStruct* Iterator;
@@ -38,19 +40,19 @@ typedef _IteratorStruct* Iterator;
 #define ITERATOR_FIRST ((Iterator) 0)
 #define ITERATOR_LAST ((Iterator) -1)
 
-typedef unsigned (*hash_func_t)(const void *p);
+typedef unsigned long (*hash_func_t)(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]);
 typedef int (*compare_func_t)(const void *a, const void *b);
 
-unsigned string_hash_func(const void *p) _pure_;
+unsigned long string_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) _pure_;
 int string_compare_func(const void *a, const void *b) _pure_;
 
 /* This will compare the passed pointers directly, and will not
  * dereference them. This is hence not useful for strings or
  * suchlike. */
-unsigned trivial_hash_func(const void *p) _const_;
+unsigned long trivial_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) _pure_;
 int trivial_compare_func(const void *a, const void *b) _const_;
 
-unsigned uint64_hash_func(const void *p) _pure_;
+unsigned long uint64_hash_func(const void *p, const uint8_t hash_key[HASH_KEY_SIZE]) _pure_;
 int uint64_compare_func(const void *a, const void *b) _pure_;
 
 Hashmap *hashmap_new(hash_func_t hash_func, compare_func_t compare_func);
diff --git a/src/shared/siphash24.c b/src/shared/siphash24.c
new file mode 100644
index 0000000000..f68bd283a1
--- /dev/null
+++ b/src/shared/siphash24.c
@@ -0,0 +1,135 @@
+/*
+   SipHash reference C implementation
+
+   Written in 2012 by
+   Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
+   Daniel J. Bernstein <djb@cr.yp.to>
+
+   To the extent possible under law, the author(s) have dedicated all copyright
+   and related and neighboring rights to this software to the public domain
+   worldwide. This software is distributed without any warranty.
+
+   You should have received a copy of the CC0 Public Domain Dedication along with
+   this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
+
+   (Minimal changes made by Lennart Poettering, to make clean for inclusion in systemd)
+*/
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "siphash24.h"
+
+typedef uint64_t u64;
+typedef uint32_t u32;
+typedef uint8_t u8;
+
+#define ROTL(x,b) (u64)( ((x) << (b)) | ( (x) >> (64 - (b))) )
+
+#define U32TO8_LE(p, v)         \
+    (p)[0] = (u8)((v)      ); (p)[1] = (u8)((v) >>  8); \
+    (p)[2] = (u8)((v) >> 16); (p)[3] = (u8)((v) >> 24);
+
+#define U64TO8_LE(p, v)         \
+  U32TO8_LE((p),     (u32)((v)      ));   \
+  U32TO8_LE((p) + 4, (u32)((v) >> 32));
+
+#define U8TO64_LE(p) \
+  (((u64)((p)[0])      ) | \
+   ((u64)((p)[1]) <<  8) | \
+   ((u64)((p)[2]) << 16) | \
+   ((u64)((p)[3]) << 24) | \
+   ((u64)((p)[4]) << 32) | \
+   ((u64)((p)[5]) << 40) | \
+   ((u64)((p)[6]) << 48) | \
+   ((u64)((p)[7]) << 56))
+
+#define SIPROUND            \
+  do {              \
+    v0 += v1; v1=ROTL(v1,13); v1 ^= v0; v0=ROTL(v0,32); \
+    v2 += v3; v3=ROTL(v3,16); v3 ^= v2;     \
+    v0 += v3; v3=ROTL(v3,21); v3 ^= v0;     \
+    v2 += v1; v1=ROTL(v1,17); v1 ^= v2; v2=ROTL(v2,32); \
+  } while(0)
+
+/* SipHash-2-4 */
+void siphash24(uint8_t out[8], const void *_in, size_t inlen, const uint8_t k[16])
+{
+  /* "somepseudorandomlygeneratedbytes" */
+  u64 v0 = 0x736f6d6570736575ULL;
+  u64 v1 = 0x646f72616e646f6dULL;
+  u64 v2 = 0x6c7967656e657261ULL;
+  u64 v3 = 0x7465646279746573ULL;
+  u64 b;
+  u64 k0 = U8TO64_LE( k );
+  u64 k1 = U8TO64_LE( k + 8 );
+  u64 m;
+  const u8 *in = _in;
+  const u8 *end = in + inlen - ( inlen % sizeof( u64 ) );
+  const int left = inlen & 7;
+  b = ( ( u64 )inlen ) << 56;
+  v3 ^= k1;
+  v2 ^= k0;
+  v1 ^= k1;
+  v0 ^= k0;
+
+  for ( ; in != end; in += 8 )
+  {
+    m = U8TO64_LE( in );
+#ifdef DEBUG
+    printf( "(%3d) v0 %08x %08x\n", ( int )inlen, ( u32 )( v0 >> 32 ), ( u32 )v0 );
+    printf( "(%3d) v1 %08x %08x\n", ( int )inlen, ( u32 )( v1 >> 32 ), ( u32 )v1 );
+    printf( "(%3d) v2 %08x %08x\n", ( int )inlen, ( u32 )( v2 >> 32 ), ( u32 )v2 );
+    printf( "(%3d) v3 %08x %08x\n", ( int )inlen, ( u32 )( v3 >> 32 ), ( u32 )v3 );
+    printf( "(%3d) compress %08x %08x\n", ( int )inlen, ( u32 )( m >> 32 ), ( u32 )m );
+#endif
+    v3 ^= m;
+    SIPROUND;
+    SIPROUND;
+    v0 ^= m;
+  }
+
+  switch( left )
+  {
+  case 7: b |= ( ( u64 )in[ 6] )  << 48;
+
+  case 6: b |= ( ( u64 )in[ 5] )  << 40;
+
+  case 5: b |= ( ( u64 )in[ 4] )  << 32;
+
+  case 4: b |= ( ( u64 )in[ 3] )  << 24;
+
+  case 3: b |= ( ( u64 )in[ 2] )  << 16;
+
+  case 2: b |= ( ( u64 )in[ 1] )  <<  8;
+
+  case 1: b |= ( ( u64 )in[ 0] ); break;
+
+  case 0: break;
+  }
+
+#ifdef DEBUG
+  printf( "(%3d) v0 %08x %08x\n", ( int )inlen, ( u32 )( v0 >> 32 ), ( u32 )v0 );
+  printf( "(%3d) v1 %08x %08x\n", ( int )inlen, ( u32 )( v1 >> 32 ), ( u32 )v1 );
+  printf( "(%3d) v2 %08x %08x\n", ( int )inlen, ( u32 )( v2 >> 32 ), ( u32 )v2 );
+  printf( "(%3d) v3 %08x %08x\n", ( int )inlen, ( u32 )( v3 >> 32 ), ( u32 )v3 );
+  printf( "(%3d) padding   %08x %08x\n", ( int )inlen, ( u32 )( b >> 32 ), ( u32 )b );
+#endif
+  v3 ^= b;
+  SIPROUND;
+  SIPROUND;
+  v0 ^= b;
+#ifdef DEBUG
+  printf( "(%3d) v0 %08x %08x\n", ( int )inlen, ( u32 )( v0 >> 32 ), ( u32 )v0 );
+  printf( "(%3d) v1 %08x %08x\n", ( int )inlen, ( u32 )( v1 >> 32 ), ( u32 )v1 );
+  printf( "(%3d) v2 %08x %08x\n", ( int )inlen, ( u32 )( v2 >> 32 ), ( u32 )v2 );
+  printf( "(%3d) v3 %08x %08x\n", ( int )inlen, ( u32 )( v3 >> 32 ), ( u32 )v3 );
+#endif
+  v2 ^= 0xff;
+  SIPROUND;
+  SIPROUND;
+  SIPROUND;
+  SIPROUND;
+  b = v0 ^ v1 ^ v2  ^ v3;
+  U64TO8_LE( out, b );
+}
diff --git a/src/shared/siphash24.h b/src/shared/siphash24.h
new file mode 100644
index 0000000000..62e1168a79
--- /dev/null
+++ b/src/shared/siphash24.h
@@ -0,0 +1,6 @@
+#pragma once
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+void siphash24(uint8_t out[8], const void *in, size_t inlen, const uint8_t k[16]);
diff --git a/src/shared/util.c b/src/shared/util.c
index 481c17245d..5c9d0bb730 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -61,6 +61,10 @@
 #include <libgen.h>
 #undef basename
 
+#ifdef HAVE_SYS_AUXV_H
+#include <sys/auxv.h>
+#endif
+
 #include "macro.h"
 #include "util.h"
 #include "ioprio.h"
@@ -2345,42 +2349,48 @@ char* dirname_malloc(const char *path) {
         return dir;
 }
 
-unsigned long long random_ull(void) {
+void random_bytes(void *p, size_t n) {
+        static bool srand_called = false;
         _cleanup_close_ int fd;
-        uint64_t ull;
-        ssize_t r;
+        ssize_t k;
+        uint8_t *q;
 
         fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC|O_NOCTTY);
         if (fd < 0)
                 goto fallback;
 
-        r = loop_read(fd, &ull, sizeof(ull), true);
-        if (r != sizeof(ull))
+        k = loop_read(fd, p, n, true);
+        if (k < 0 || (size_t) k != n)
                 goto fallback;
 
-        return ull;
+        return;
 
 fallback:
-        return random() * RAND_MAX + random();
-}
 
-unsigned random_u(void) {
-        _cleanup_close_ int fd;
-        unsigned u;
-        ssize_t r;
+        if (!srand_called) {
 
-        fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC|O_NOCTTY);
-        if (fd < 0)
-                goto fallback;
+#ifdef HAVE_SYS_AUXV_H
+                /* The kernel provides us with a bit of entropy in
+                 * auxv, so let's try to make use of that to seed the
+                 * pseudo-random generator. It's better than
+                 * nothing... */
 
-        r = loop_read(fd, &u, sizeof(u), true);
-        if (r != sizeof(u))
-                goto fallback;
+                void *auxv;
 
-        return u;
+                auxv = (void*) getauxval(AT_RANDOM);
+                if (auxv)
+                        srand(*(unsigned*) auxv);
+                else
+#endif
+                        srand(time(NULL) + gettid());
 
-fallback:
-        return random() * RAND_MAX + random();
+                srand_called = true;
+        }
+
+        /* If some idiot made /dev/urandom unavailable to us, he'll
+         * get a PRNG instead. */
+        for (q = p; q < (uint8_t*) p + n; q ++)
+                *q = rand();
 }
 
 void rename_process(const char name[8]) {
@@ -4137,7 +4147,7 @@ int symlink_atomic(const char *from, const char *to) {
         _cleanup_free_ char *t;
         const char *fn;
         size_t k;
-        unsigned long long ull;
+        uint64_t u;
         unsigned i;
         int r;
 
@@ -4154,10 +4164,10 @@ int symlink_atomic(const char *from, const char *to) {
         t[k] = '.';
         x = stpcpy(t+k+1, fn);
 
-        ull = random_ull();
+        u = random_u64();
         for (i = 0; i < 16; i++) {
-                *(x++) = hexchar(ull & 0xF);
-                ull >>= 4;
+                *(x++) = hexchar(u & 0xF);
+                u >>= 4;
         }
 
         *x = 0;
diff --git a/src/shared/util.h b/src/shared/util.h
index 488ce3ba6d..338d79c7ac 100644
--- a/src/shared/util.h
+++ b/src/shared/util.h
@@ -249,8 +249,19 @@ int make_stdio(int fd);
 int make_null_stdio(void);
 int make_console_stdio(void);
 
-unsigned long long random_ull(void);
-unsigned random_u(void);
+void random_bytes(void *p, size_t n);
+
+static inline uint64_t random_u64(void) {
+        uint64_t u;
+        random_bytes(&u, sizeof(u));
+        return u;
+}
+
+static inline uint32_t random_u32(void) {
+        uint32_t u;
+        random_bytes(&u, sizeof(u));
+        return u;
+}
 
 /* For basic lookup tables with strictly enumerated entries */
 #define __DEFINE_STRING_TABLE_LOOKUP(name,type,scope)                   \
-- 
cgit v1.2.3-54-g00ecf