/* Value of the end pointer. Should not interact with flags. */
#define END_VALUE NULL
+/*
+ * ht_items_count: Split-counters counting the number of node addition
+ * and removal in the table. Only used if the CDS_LFHT_ACCOUNTING flag
+ * is set at hash table creation.
+ *
+ * These are free-running counters, never reset to zero. They count the
+ * number of add/remove, and trigger every (1 << COUNT_COMMIT_ORDER)
+ * operations to update the global counter. We choose a power-of-2 value
+ * for the trigger to deal with 32 or 64-bit overflow of the counter.
+ */
struct ht_items_count {
unsigned long add, del;
} __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
+/*
+ * rcu_level: Contains the per order-index-level dummy node table. The
+ * size of each dummy node table is half the number of hashes contained
+ * in this order (except for order 0). The minimum allocation size
+ * parameter allows combining the dummy node arrays of the lowermost
+ * levels to improve cache locality for small index orders.
+ */
struct rcu_level {
/* Note: manually update allocation length when adding a field */
struct _cds_lfht_node nodes[0];
};
+/*
+ * rcu_table: Contains the size and desired new size if a resize
+ * operation is in progress, as well as the statically-sized array of
+ * rcu_level pointers.
+ */
struct rcu_table {
unsigned long size; /* always a power of 2, shared (RCU) */
unsigned long resize_target;
struct rcu_level *tbl[MAX_TABLE_ORDER];
};
+/*
+ * cds_lfht: Top-level data structure representing a lock-free hash
+ * table. Defined in the implementation file to make it be an opaque
+ * cookie to users.
+ */
struct cds_lfht {
struct rcu_table t;
- cds_lfht_hash_fct hash_fct;
- cds_lfht_compare_fct compare_fct;
unsigned long min_alloc_order;
unsigned long min_alloc_size;
- unsigned long hash_seed;
int flags;
/*
* We need to put the work threads offline (QSBR) when taking this
struct ht_items_count *split_count; /* split item count */
};
+/*
+ * rcu_resize_work: Contains arguments passed to RCU worker thread
+ * responsible for performing lazy resize.
+ */
struct rcu_resize_work {
struct rcu_head head;
struct cds_lfht *ht;
};
+/*
+ * partition_resize_work: Contains arguments passed to worker threads
+ * executing the hash table resize on partitions of the hash table
+ * assigned to each processor's worker thread.
+ */
struct partition_resize_work {
pthread_t thread_id;
struct cds_lfht *ht;
static
void _cds_lfht_add(struct cds_lfht *ht,
+ cds_lfht_match_fct match,
unsigned long size,
struct cds_lfht_node *node,
struct cds_lfht_iter *unique_ret,
unsigned int fls_ulong(unsigned long x)
{
-#if (CAA_BITS_PER_lONG == 32)
+#if (CAA_BITS_PER_LONG == 32)
return fls_u32(x);
#else
return fls_u64(x);
#endif
static
-void cds_lfht_resize_lazy(struct cds_lfht *ht, unsigned long size, int growth);
+void cds_lfht_resize_lazy_grow(struct cds_lfht *ht, unsigned long size, int growth);
static
void cds_lfht_resize_lazy_count(struct cds_lfht *ht, unsigned long size,
assert(split_count_mask >= 0);
cpu = sched_getcpu();
- if (unlikely(cpu < 0))
+ if (caa_unlikely(cpu < 0))
return hash & split_count_mask;
else
return cpu & split_count_mask;
unsigned long split_count;
int index;
- if (unlikely(!ht->split_count))
+ if (caa_unlikely(!ht->split_count))
return;
index = ht_get_split_count_index(hash);
split_count = uatomic_add_return(&ht->split_count[index].add, 1);
- if (unlikely(!(split_count & ((1UL << COUNT_COMMIT_ORDER) - 1)))) {
+ if (caa_unlikely(!(split_count & ((1UL << COUNT_COMMIT_ORDER) - 1)))) {
long count;
dbg_printf("add split count %lu\n", split_count);
unsigned long split_count;
int index;
- if (unlikely(!ht->split_count))
+ if (caa_unlikely(!ht->split_count))
return;
index = ht_get_split_count_index(hash);
split_count = uatomic_add_return(&ht->split_count[index].del, 1);
- if (unlikely(!(split_count & ((1UL << COUNT_COMMIT_ORDER) - 1)))) {
+ if (caa_unlikely(!(split_count & ((1UL << COUNT_COMMIT_ORDER) - 1)))) {
long count;
dbg_printf("del split count %lu\n", split_count);
dbg_printf("WARNING: large chain length: %u.\n",
chain_len);
if (chain_len >= CHAIN_LEN_RESIZE_THRESHOLD)
- cds_lfht_resize_lazy(ht, size,
+ cds_lfht_resize_lazy_grow(ht, size,
get_count_order_u32(chain_len - (CHAIN_LEN_TARGET - 1)));
}
}
static
-unsigned long _uatomic_max(unsigned long *ptr, unsigned long v)
+unsigned long _uatomic_xchg_monotonic_increase(unsigned long *ptr,
+ unsigned long v)
{
unsigned long old1, old2;
if (old2 >= v)
return old2;
} while ((old1 = uatomic_cmpxchg(ptr, old2, v)) != old2);
- return v;
+ return old2;
}
static
*/
assert(dummy != node);
for (;;) {
- if (unlikely(is_end(iter)))
+ if (caa_unlikely(is_end(iter)))
return;
- if (likely(clear_flag(iter)->p.reverse_hash > node->p.reverse_hash))
+ if (caa_likely(clear_flag(iter)->p.reverse_hash > node->p.reverse_hash))
return;
next = rcu_dereference(clear_flag(iter)->p.next);
- if (likely(is_removed(next)))
+ if (caa_likely(is_removed(next)))
break;
iter_prev = clear_flag(iter);
iter = next;
*/
static
void _cds_lfht_add(struct cds_lfht *ht,
+ cds_lfht_match_fct match,
unsigned long size,
struct cds_lfht_node *node,
struct cds_lfht_iter *unique_ret,
iter = rcu_dereference(iter_prev->p.next);
assert(iter_prev->p.reverse_hash <= node->p.reverse_hash);
for (;;) {
- if (unlikely(is_end(iter)))
+ if (caa_unlikely(is_end(iter)))
goto insert;
- if (likely(clear_flag(iter)->p.reverse_hash > node->p.reverse_hash))
+ if (caa_likely(clear_flag(iter)->p.reverse_hash > node->p.reverse_hash))
goto insert;
/* dummy node is the first node of the identical-hash-value chain */
goto insert;
next = rcu_dereference(clear_flag(iter)->p.next);
- if (unlikely(is_removed(next)))
+ if (caa_unlikely(is_removed(next)))
goto gc_node;
/* uniquely add */
* (including observe one node by one node
* by forward iterations)
*/
- cds_lfht_next_duplicate(ht, &d_iter);
+ cds_lfht_next_duplicate(ht, match, &d_iter);
if (!d_iter.node)
goto insert;
struct cds_lfht_node *new_next;
next = old;
- if (unlikely(is_removed(next)))
+ if (caa_unlikely(is_removed(next)))
return -ENOENT;
if (dummy_removal)
assert(is_dummy(next));
i, j, (1UL << (i - 1)) + j);
new_node->p.reverse_hash =
bit_reverse_ulong((1UL << (i - 1)) + j);
- _cds_lfht_add(ht, 1UL << (i - 1),
+ _cds_lfht_add(ht, NULL, 1UL << (i - 1),
new_node, NULL, 1);
}
ht->cds_lfht_rcu_read_unlock();
}
}
-struct cds_lfht *_cds_lfht_new(cds_lfht_hash_fct hash_fct,
- cds_lfht_compare_fct compare_fct,
- unsigned long hash_seed,
- unsigned long init_size,
+struct cds_lfht *_cds_lfht_new(unsigned long init_size,
unsigned long min_alloc_size,
int flags,
void (*cds_lfht_call_rcu)(struct rcu_head *head,
init_size = max(init_size, min_alloc_size);
ht = calloc(1, sizeof(struct cds_lfht));
assert(ht);
- ht->hash_fct = hash_fct;
- ht->compare_fct = compare_fct;
- ht->hash_seed = hash_seed;
+ ht->flags = flags;
ht->cds_lfht_call_rcu = cds_lfht_call_rcu;
ht->cds_lfht_synchronize_rcu = cds_lfht_synchronize_rcu;
ht->cds_lfht_rcu_read_lock = cds_lfht_rcu_read_lock;
alloc_split_items_count(ht);
/* this mutex should not nest in read-side C.S. */
pthread_mutex_init(&ht->resize_mutex, NULL);
- ht->flags = flags;
order = get_count_order_ulong(init_size);
ht->t.resize_target = 1UL << order;
- cds_lfht_create_dummy(ht, 1UL << order);
- ht->t.size = 1UL << order;
ht->min_alloc_size = min_alloc_size;
ht->min_alloc_order = get_count_order_ulong(min_alloc_size);
+ cds_lfht_create_dummy(ht, 1UL << order);
+ ht->t.size = 1UL << order;
return ht;
}
-void cds_lfht_lookup(struct cds_lfht *ht, void *key, size_t key_len,
- struct cds_lfht_iter *iter)
+void cds_lfht_lookup(struct cds_lfht *ht, cds_lfht_match_fct match,
+ unsigned long hash, void *key, struct cds_lfht_iter *iter)
{
struct cds_lfht_node *node, *next, *dummy_node;
struct _cds_lfht_node *lookup;
- unsigned long hash, reverse_hash, size;
+ unsigned long reverse_hash, size;
- hash = ht->hash_fct(key, key_len, ht->hash_seed);
reverse_hash = bit_reverse_ulong(hash);
size = rcu_dereference(ht->t.size);
node = rcu_dereference(dummy_node->p.next);
node = clear_flag(node);
for (;;) {
- if (unlikely(is_end(node))) {
+ if (caa_unlikely(is_end(node))) {
node = next = NULL;
break;
}
- if (unlikely(node->p.reverse_hash > reverse_hash)) {
+ if (caa_unlikely(node->p.reverse_hash > reverse_hash)) {
node = next = NULL;
break;
}
next = rcu_dereference(node->p.next);
- if (likely(!is_removed(next))
+ assert(node == clear_flag(node));
+ if (caa_likely(!is_removed(next))
&& !is_dummy(next)
- && clear_flag(node)->p.reverse_hash == reverse_hash
- && likely(!ht->compare_fct(node->key, node->key_len, key, key_len))) {
+ && node->p.reverse_hash == reverse_hash
+ && caa_likely(match(node, key))) {
break;
}
node = clear_flag(next);
iter->next = next;
}
-void cds_lfht_next_duplicate(struct cds_lfht *ht, struct cds_lfht_iter *iter)
+void cds_lfht_next_duplicate(struct cds_lfht *ht, cds_lfht_match_fct match,
+ struct cds_lfht_iter *iter)
{
struct cds_lfht_node *node, *next;
unsigned long reverse_hash;
void *key;
- size_t key_len;
node = iter->node;
reverse_hash = node->p.reverse_hash;
key = node->key;
- key_len = node->key_len;
next = iter->next;
node = clear_flag(next);
for (;;) {
- if (unlikely(is_end(node))) {
+ if (caa_unlikely(is_end(node))) {
node = next = NULL;
break;
}
- if (unlikely(node->p.reverse_hash > reverse_hash)) {
+ if (caa_unlikely(node->p.reverse_hash > reverse_hash)) {
node = next = NULL;
break;
}
next = rcu_dereference(node->p.next);
- if (likely(!is_removed(next))
+ if (caa_likely(!is_removed(next))
&& !is_dummy(next)
- && likely(!ht->compare_fct(node->key, node->key_len, key, key_len))) {
+ && caa_likely(match(node->key, key))) {
break;
}
node = clear_flag(next);
node = clear_flag(iter->next);
for (;;) {
- if (unlikely(is_end(node))) {
+ if (caa_unlikely(is_end(node))) {
node = next = NULL;
break;
}
next = rcu_dereference(node->p.next);
- if (likely(!is_removed(next))
+ if (caa_likely(!is_removed(next))
&& !is_dummy(next)) {
break;
}
cds_lfht_next(ht, iter);
}
-void cds_lfht_add(struct cds_lfht *ht, struct cds_lfht_node *node)
+void cds_lfht_add(struct cds_lfht *ht, unsigned long hash,
+ struct cds_lfht_node *node)
{
- unsigned long hash, size;
+ unsigned long size;
- hash = ht->hash_fct(node->key, node->key_len, ht->hash_seed);
node->p.reverse_hash = bit_reverse_ulong((unsigned long) hash);
-
size = rcu_dereference(ht->t.size);
- _cds_lfht_add(ht, size, node, NULL, 0);
+ _cds_lfht_add(ht, NULL, size, node, NULL, 0);
ht_count_add(ht, size, hash);
}
struct cds_lfht_node *cds_lfht_add_unique(struct cds_lfht *ht,
+ cds_lfht_match_fct match,
+ unsigned long hash,
struct cds_lfht_node *node)
{
- unsigned long hash, size;
+ unsigned long size;
struct cds_lfht_iter iter;
- hash = ht->hash_fct(node->key, node->key_len, ht->hash_seed);
node->p.reverse_hash = bit_reverse_ulong((unsigned long) hash);
-
size = rcu_dereference(ht->t.size);
- _cds_lfht_add(ht, size, node, &iter, 0);
+ _cds_lfht_add(ht, match, size, node, &iter, 0);
if (iter.node == node)
ht_count_add(ht, size, hash);
return iter.node;
}
struct cds_lfht_node *cds_lfht_add_replace(struct cds_lfht *ht,
+ cds_lfht_match_fct match,
+ unsigned long hash,
struct cds_lfht_node *node)
{
- unsigned long hash, size;
+ unsigned long size;
struct cds_lfht_iter iter;
- hash = ht->hash_fct(node->key, node->key_len, ht->hash_seed);
node->p.reverse_hash = bit_reverse_ulong((unsigned long) hash);
-
size = rcu_dereference(ht->t.size);
for (;;) {
- _cds_lfht_add(ht, size, node, &iter, 0);
+ _cds_lfht_add(ht, match, size, node, &iter, 0);
if (iter.node == node) {
ht_count_add(ht, size, hash);
return NULL;
}
static
-unsigned long resize_target_update(struct cds_lfht *ht, unsigned long size,
- int growth_order)
+unsigned long resize_target_grow(struct cds_lfht *ht, unsigned long new_size)
{
- return _uatomic_max(&ht->t.resize_target,
- size << growth_order);
+ return _uatomic_xchg_monotonic_increase(&ht->t.resize_target, new_size);
}
static
}
static
-void cds_lfht_resize_lazy(struct cds_lfht *ht, unsigned long size, int growth)
+void __cds_lfht_resize_lazy_launch(struct cds_lfht *ht)
{
struct rcu_resize_work *work;
- unsigned long target_size;
- target_size = resize_target_update(ht, size, growth);
/* Store resize_target before read resize_initiated */
cmm_smp_mb();
- if (!CMM_LOAD_SHARED(ht->t.resize_initiated) && size < target_size) {
+ if (!CMM_LOAD_SHARED(ht->t.resize_initiated)) {
uatomic_inc(&ht->in_progress_resize);
cmm_smp_mb(); /* increment resize count before load destroy */
if (CMM_LOAD_SHARED(ht->in_progress_destroy)) {
}
}
+static
+void cds_lfht_resize_lazy_grow(struct cds_lfht *ht, unsigned long size, int growth)
+{
+ unsigned long target_size = size << growth;
+
+ if (resize_target_grow(ht, target_size) >= target_size)
+ return;
+
+ __cds_lfht_resize_lazy_launch(ht);
+}
+
+/*
+ * We favor grow operations over shrink. A shrink operation never occurs
+ * if a grow operation is queued for lazy execution. A grow operation
+ * cancels any pending shrink lazy execution.
+ */
static
void cds_lfht_resize_lazy_count(struct cds_lfht *ht, unsigned long size,
unsigned long count)
{
- struct rcu_resize_work *work;
-
if (!(ht->flags & CDS_LFHT_AUTO_RESIZE))
return;
- resize_target_update_count(ht, count);
- /* Store resize_target before read resize_initiated */
- cmm_smp_mb();
- if (!CMM_LOAD_SHARED(ht->t.resize_initiated)) {
- uatomic_inc(&ht->in_progress_resize);
- cmm_smp_mb(); /* increment resize count before load destroy */
- if (CMM_LOAD_SHARED(ht->in_progress_destroy)) {
- uatomic_dec(&ht->in_progress_resize);
+ count = max(count, ht->min_alloc_size);
+ if (count == size)
+ return; /* Already the right size, no resize needed */
+ if (count > size) { /* lazy grow */
+ if (resize_target_grow(ht, count) >= count)
return;
+ } else { /* lazy shrink */
+ for (;;) {
+ unsigned long s;
+
+ s = uatomic_cmpxchg(&ht->t.resize_target, size, count);
+ if (s == size)
+ break; /* no resize needed */
+ if (s > size)
+ return; /* growing is/(was just) in progress */
+ if (s <= count)
+ return; /* some other thread do shrink */
+ size = s;
}
- work = malloc(sizeof(*work));
- work->ht = ht;
- ht->cds_lfht_call_rcu(&work->head, do_resize_cb);
- CMM_STORE_SHARED(ht->t.resize_initiated, 1);
}
+ __cds_lfht_resize_lazy_launch(ht);
}