#include <stdio.h>
#include <pthread.h>
-#define BUCKET_SIZE_RESIZE_THRESHOLD 32
-#define MAX_NR_BUCKETS 1048576 /* 1M buckets */
+#define DEBUG /* Test */
+
+#ifdef DEBUG
+#define dbg_printf(args...) printf(args)
+#else
+#define dbg_printf(args...)
+#endif
+
+#define CHAIN_LEN_TARGET 1
+#define CHAIN_LEN_RESIZE_THRESHOLD 2
#ifndef max
#define max(a, b) ((a) > (b) ? (a) : (b))
struct rcu_table {
unsigned long size; /* always a power of 2 */
+ unsigned long resize_target;
+ int resize_initiated;
struct rcu_head head;
struct rcu_ht_node *tbl[0];
};
struct rcu_ht {
struct rcu_table *t; /* shared */
ht_hash_fct hash_fct;
- void *hashseed;
+ ht_compare_fct compare_fct;
+ unsigned long hash_seed;
pthread_mutex_t resize_mutex; /* resize mutex: add/del mutex */
- unsigned long target_size;
void (*ht_call_rcu)(struct rcu_head *head,
void (*func)(struct rcu_head *head));
};
struct rcu_ht *ht;
};
-static
-void ht_resize_lazy(struct rcu_ht *ht, struct rcu_table *t, int growth);
-
-static
-void check_resize(struct rcu_ht *ht, struct rcu_table *t,
- unsigned long chain_len)
-{
- //printf("check resize chain len %lu\n", chain_len);
- if (chain_len >= BUCKET_SIZE_RESIZE_THRESHOLD)
- ht_resize_lazy(ht, t, chain_len / BUCKET_SIZE_RESIZE_THRESHOLD);
-}
-
/*
* Algorithm to reverse bits in a word by lookup table, extended to
* 64-bit words.
- * ref.
+ * Source:
* http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
+ * Originally from Public Domain.
*/
static const uint8_t BitReverseTable256[256] =
#endif
}
+/*
+ * Algorithm to find the log2 of a 32-bit unsigned integer.
+ * source: http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogLookup
+ * Originally from Public Domain.
+ */
+static const char LogTable256[256] =
+{
+#define LT(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n
+ -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+ LT(4), LT(5), LT(5), LT(6), LT(6), LT(6), LT(6),
+ LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7)
+};
+
+uint32_t log2_u32(uint32_t v)
+{
+ uint32_t t, tt;
+
+ if ((tt = (v >> 16)))
+ return (t = (tt >> 8))
+ ? 24 + LogTable256[t]
+ : 16 + LogTable256[tt];
+ else
+ return (t = (v >> 8))
+ ? 8 + LogTable256[t]
+ : LogTable256[v];
+}
+
+static
+void ht_resize_lazy(struct rcu_ht *ht, struct rcu_table *t, int growth);
+
+static
+void check_resize(struct rcu_ht *ht, struct rcu_table *t,
+ uint32_t chain_len)
+{
+ if (chain_len >= CHAIN_LEN_RESIZE_THRESHOLD)
+ ht_resize_lazy(ht, t,
+ log2_u32(chain_len - CHAIN_LEN_TARGET - 1));
+}
+
static
struct rcu_ht_node *clear_flag(struct rcu_ht_node *node)
{
}
static
-void _uatomic_max(unsigned long *ptr, unsigned long v)
+unsigned long _uatomic_max(unsigned long *ptr, unsigned long v)
{
unsigned long old1, old2;
do {
old2 = old1;
if (old2 >= v)
- break;
+ return old2;
} while ((old1 = uatomic_cmpxchg(ptr, old2, v)) != old2);
+ return v;
}
+/*
+ * Remove all logically deleted nodes from a bucket up to a certain node key.
+ */
static
-void _ht_add(struct rcu_ht *ht, struct rcu_table *t, struct rcu_ht_node *node)
+void _ht_gc_bucket(struct rcu_ht_node *dummy, struct rcu_ht_node *node)
{
- struct rcu_ht_node *iter_prev = NULL, *iter = NULL;
+ struct rcu_ht_node *iter_prev, *iter, *next;
- if (!t->size)
- return;
for (;;) {
- unsigned long chain_len = 0;
-
- iter_prev = rcu_dereference(t->tbl[node->hash & (t->size - 1)]);
- //printf("iter prev %p hash %lu bucket %lu\n", iter_prev,
- // node->hash, node->hash & (t->size - 1));
- assert(iter_prev);
+ iter_prev = dummy;
+ /* We can always skip the dummy node initially */
+ iter = rcu_dereference(iter_prev->next);
assert(iter_prev->reverse_hash <= node->reverse_hash);
for (;;) {
- iter = clear_flag(rcu_dereference(iter_prev->next));
if (unlikely(!iter))
- break;
- if (iter->reverse_hash < node->reverse_hash)
+ return;
+ if (clear_flag(iter)->reverse_hash > node->reverse_hash)
+ return;
+ next = rcu_dereference(clear_flag(iter)->next);
+ if (is_removed(next))
break;
iter_prev = iter;
- check_resize(ht, t, ++chain_len);
+ iter = next;
}
- /* add in iter_prev->next */
- if (is_removed(iter))
- continue;
- assert(node != iter);
- node->next = iter;
- assert(iter_prev != node);
- if (uatomic_cmpxchg(&iter_prev->next, iter, node) != iter)
- continue;
- break;
+ assert(!is_removed(iter));
+ (void) uatomic_cmpxchg(&iter_prev->next, iter, clear_flag(next));
}
}
static
-int _ht_remove(struct rcu_ht *ht, struct rcu_table *t, struct rcu_ht_node *node)
+struct rcu_ht_node *_ht_add(struct rcu_ht *ht, struct rcu_table *t,
+ struct rcu_ht_node *node, int unique)
{
- struct rcu_ht_node *iter_prev, *iter, *next, *old;
- unsigned long chain_len;
- int found, ret = 0;
- int flagged = 0;
+ struct rcu_ht_node *iter_prev, *dummy, *iter, *next;
+ unsigned long hash;
-retry:
- chain_len = 0;
- found = 0;
- iter_prev = rcu_dereference(t->tbl[node->hash & (t->size - 1)]);
- assert(iter_prev);
- assert(iter_prev->reverse_hash <= node->reverse_hash);
+ if (!t->size) {
+ assert(node->dummy);
+ return node; /* Initial first add (head) */
+ }
+ hash = bit_reverse_ulong(node->reverse_hash);
for (;;) {
- iter = clear_flag(rcu_dereference(iter_prev->next));
- if (unlikely(!iter))
- break;
- if (iter->reverse_hash < node->reverse_hash)
- break;
- if (iter == node) {
- found = 1;
- break;
+ uint32_t chain_len = 0;
+
+ /*
+ * iter_prev points to the non-removed node prior to the
+ * insert location.
+ */
+ iter_prev = rcu_dereference(t->tbl[hash & (t->size - 1)]);
+ /* We can always skip the dummy node initially */
+ iter = rcu_dereference(iter_prev->next);
+ assert(iter_prev->reverse_hash <= node->reverse_hash);
+ for (;;) {
+ if (unlikely(!iter))
+ goto insert;
+ if (clear_flag(iter)->reverse_hash > node->reverse_hash)
+ goto insert;
+ next = rcu_dereference(clear_flag(iter)->next);
+ if (is_removed(next))
+ goto gc_node;
+ if (unique
+ && !clear_flag(iter)->dummy
+ && !ht->compare_fct(node->key, node->key_len,
+ clear_flag(iter)->key,
+ clear_flag(iter)->key_len))
+ return clear_flag(iter);
+ /* Only account for identical reverse hash once */
+ if (iter_prev->reverse_hash != clear_flag(iter)->reverse_hash)
+ check_resize(ht, t, ++chain_len);
+ iter_prev = clear_flag(iter);
+ iter = next;
}
- iter_prev = iter;
- }
- if (!found) {
- ret = -ENOENT;
- goto end;
+ insert:
+ assert(node != clear_flag(iter));
+ assert(!is_removed(iter_prev));
+ assert(iter_prev != node);
+ node->next = iter;
+ if (uatomic_cmpxchg(&iter_prev->next, iter,
+ node) != iter)
+ continue; /* retry */
+ else
+ goto gc_end;
+ gc_node:
+ assert(!is_removed(iter));
+ (void) uatomic_cmpxchg(&iter_prev->next, iter, clear_flag(next));
+ /* retry */
}
- next = rcu_dereference(iter->next);
- if (!flagged) {
- if (is_removed(next)) {
- ret = -ENOENT;
+gc_end:
+ /* Garbage collect logically removed nodes in the bucket */
+ dummy = rcu_dereference(t->tbl[hash & (t->size - 1)]);
+ _ht_gc_bucket(dummy, node);
+ return node;
+}
+
+static
+int _ht_remove(struct rcu_ht *ht, struct rcu_table *t, struct rcu_ht_node *node)
+{
+ struct rcu_ht_node *dummy, *next, *old;
+ int flagged = 0;
+ unsigned long hash;
+
+ /* logically delete the node */
+ old = rcu_dereference(node->next);
+ do {
+ next = old;
+ if (is_removed(next))
goto end;
- }
- /* set deletion flag */
- if ((old = uatomic_cmpxchg(&iter->next, next,
- flag_removed(next))) != next) {
- if (old == flag_removed(next)) {
- ret = -ENOENT;
- goto end;
- } else {
- goto retry;
- }
- }
- flagged = 1;
- }
+ assert(!node->dummy);
+ old = uatomic_cmpxchg(&node->next, next,
+ flag_removed(next));
+ } while (old != next);
+
+ /* We performed the (logical) deletion. */
+ flagged = 1;
+
/*
- * Remove the element from the list. Retry if there has been a
- * concurrent add (there cannot be a concurrent delete, because
- * we won the deletion flag cmpxchg).
+ * Ensure that the node is not visible to readers anymore: lookup for
+ * the node, and remove it (along with any other logically removed node)
+ * if found.
*/
- if (uatomic_cmpxchg(&iter_prev->next, iter, clear_flag(next)) != iter)
- goto retry;
+ hash = bit_reverse_ulong(node->reverse_hash);
+ dummy = rcu_dereference(t->tbl[hash & (t->size - 1)]);
+ _ht_gc_bucket(dummy, node);
end:
- return ret;
+ /*
+ * Only the flagging action indicated that we (and no other)
+ * removed the node from the hash.
+ */
+ if (flagged) {
+ assert(is_removed(rcu_dereference(node->next)));
+ return 0;
+ } else
+ return -ENOENT;
}
static
t->size = i;
t->tbl[i] = calloc(1, sizeof(struct rcu_ht_node));
t->tbl[i]->dummy = 1;
- t->tbl[i]->hash = i;
t->tbl[i]->reverse_hash = bit_reverse_ulong(i);
- _ht_add(ht, t, t->tbl[i]);
+ (void) _ht_add(ht, t, t->tbl[i], 0);
}
- t->size = end;
+ t->resize_target = t->size = end;
+ t->resize_initiated = 0;
}
struct rcu_ht *ht_new(ht_hash_fct hash_fct,
- void *hashseed,
+ ht_compare_fct compare_fct,
+ unsigned long hash_seed,
unsigned long init_size,
void (*ht_call_rcu)(struct rcu_head *head,
void (*func)(struct rcu_head *head)))
ht = calloc(1, sizeof(struct rcu_ht));
ht->hash_fct = hash_fct;
- ht->hashseed = hashseed;
+ ht->compare_fct = compare_fct;
+ ht->hash_seed = hash_seed;
ht->ht_call_rcu = ht_call_rcu;
/* this mutex should not nest in read-side C.S. */
pthread_mutex_init(&ht->resize_mutex, NULL);
pthread_mutex_lock(&ht->resize_mutex);
init_table(ht, ht->t, 0, max(init_size, 1));
pthread_mutex_unlock(&ht->resize_mutex);
- ht->target_size = ht->t->size;
return ht;
}
-struct rcu_ht_node *ht_lookup(struct rcu_ht *ht, void *key)
+struct rcu_ht_node *ht_lookup(struct rcu_ht *ht, void *key, size_t key_len)
{
struct rcu_table *t;
struct rcu_ht_node *node;
unsigned long hash, reverse_hash;
- hash = ht->hash_fct(ht->hashseed, key);
+ hash = ht->hash_fct(key, key_len, ht->hash_seed);
reverse_hash = bit_reverse_ulong(hash);
t = rcu_dereference(ht->t);
for (;;) {
if (unlikely(!node))
break;
- if (node->reverse_hash > reverse_hash) {
+ if (unlikely(node->reverse_hash > reverse_hash)) {
node = NULL;
break;
}
- if (node->key == key) {
- if (is_removed(rcu_dereference(node->next)))
- node = NULL;
- break;
+ if (likely(!is_removed(rcu_dereference(node->next)))
+ && !node->dummy
+ && likely(!ht->compare_fct(node->key, node->key_len, key, key_len))) {
+ break;
}
node = clear_flag(rcu_dereference(node->next));
}
+ assert(!node || !node->dummy);
return node;
}
void ht_add(struct rcu_ht *ht, struct rcu_ht_node *node)
{
struct rcu_table *t;
+ unsigned long hash;
+
+ hash = ht->hash_fct(node->key, node->key_len, ht->hash_seed);
+ node->reverse_hash = bit_reverse_ulong((unsigned long) hash);
+
+ t = rcu_dereference(ht->t);
+ (void) _ht_add(ht, t, node, 0);
+}
+
+struct rcu_ht_node *ht_add_unique(struct rcu_ht *ht, struct rcu_ht_node *node)
+{
+ struct rcu_table *t;
+ unsigned long hash;
- node->hash = ht->hash_fct(ht->hashseed, node->key);
- node->reverse_hash = bit_reverse_ulong((unsigned long) node->hash);
+ hash = ht->hash_fct(node->key, node->key_len, ht->hash_seed);
+ node->reverse_hash = bit_reverse_ulong((unsigned long) hash);
t = rcu_dereference(ht->t);
- _ht_add(ht, t, node);
+ return _ht_add(ht, t, node, 1);
}
int ht_remove(struct rcu_ht *ht, struct rcu_ht_node *node)
if (!node->dummy)
return -EPERM;
node = node->next;
+ assert(!is_removed(node));
} while (node);
/* Internal sanity check: all nodes left should be dummy */
for (i = 0; i < t->size; i++) {
return ret;
}
+void ht_count_nodes(struct rcu_ht *ht,
+ unsigned long *count,
+ unsigned long *removed)
+{
+ struct rcu_table *t;
+ struct rcu_ht_node *node, *next;
+
+ *count = 0;
+ *removed = 0;
+
+ t = rcu_dereference(ht->t);
+ /* Check that the table is empty */
+ node = rcu_dereference(t->tbl[0]);
+ do {
+ next = rcu_dereference(node->next);
+ if (is_removed(next)) {
+ assert(!node->dummy);
+ (*removed)++;
+ } else if (!node->dummy)
+ (*count)++;
+ node = clear_flag(next);
+ } while (node);
+}
+
static
void ht_free_table_cb(struct rcu_head *head)
{
unsigned long new_size, old_size;
struct rcu_table *new_t, *old_t;
- //return; //TEST
-
old_t = ht->t;
old_size = old_t->size;
- new_size = CMM_LOAD_SHARED(ht->target_size);
+ new_size = CMM_LOAD_SHARED(old_t->resize_target);
+ dbg_printf("rculfhash: resize from %lu to %lu buckets\n",
+ old_size, new_size);
if (old_size == new_size)
return;
new_t = malloc(sizeof(struct rcu_table)
memcpy(&new_t->tbl, &old_t->tbl,
old_size * sizeof(struct rcu_ht_node *));
init_table(ht, new_t, old_size, new_size - old_size);
- new_t->size = new_size;
/* Changing table and size atomically wrt lookups */
rcu_assign_pointer(ht->t, new_t);
ht->ht_call_rcu(&old_t->head, ht_free_table_cb);
}
static
-void resize_target_update(struct rcu_ht *ht, struct rcu_table *t,
- int growth_order)
+unsigned long resize_target_update(struct rcu_table *t,
+ int growth_order)
{
- unsigned long new_size = t->size << growth_order;
-
- if (new_size > MAX_NR_BUCKETS)
- new_size = MAX_NR_BUCKETS;
- //printf("resize update prevtarget %lu current %lu order %d\n",
- // ht->target_size, t->size, growth_order);
- _uatomic_max(&ht->target_size, new_size);
+ return _uatomic_max(&t->resize_target,
+ t->size << growth_order);
}
void ht_resize(struct rcu_ht *ht, int growth)
{
- resize_target_update(ht, rcu_dereference(ht->t), growth);
- pthread_mutex_lock(&ht->resize_mutex);
- _do_ht_resize(ht);
- pthread_mutex_unlock(&ht->resize_mutex);
+ struct rcu_table *t = rcu_dereference(ht->t);
+ unsigned long target_size;
+
+ target_size = resize_target_update(t, growth);
+ if (t->size < target_size) {
+ CMM_STORE_SHARED(t->resize_initiated, 1);
+ pthread_mutex_lock(&ht->resize_mutex);
+ _do_ht_resize(ht);
+ pthread_mutex_unlock(&ht->resize_mutex);
+ }
}
static
void ht_resize_lazy(struct rcu_ht *ht, struct rcu_table *t, int growth)
{
struct rcu_resize_work *work;
+ unsigned long target_size;
- work = malloc(sizeof(*work));
- work->ht = ht;
- resize_target_update(ht, t, growth);
- ht->ht_call_rcu(&work->head, do_resize_cb);
+ target_size = resize_target_update(t, growth);
+ if (!CMM_LOAD_SHARED(t->resize_initiated) && t->size < target_size) {
+ work = malloc(sizeof(*work));
+ work->ht = ht;
+ ht->ht_call_rcu(&work->head, do_resize_cb);
+ CMM_STORE_SHARED(t->resize_initiated, 1);
+ }
}