* the "dummy node" tables.
* - There is one dummy node table per hash index order. The size of
* each dummy node table is half the number of hashes contained in
- * this order.
- * - call_rcu is used to garbage-collect the old order table.
+ * this order (except for order 0).
+ * - synchronzie_rcu is used to garbage-collect the old dummy node table.
* - The per-order dummy node tables contain a compact version of the
* hash table nodes. These tables are invariant after they are
* populated into the hash table.
- *
+ *
+ * Dummy node tables:
+ *
+ * hash table hash table the last all dummy node tables
+ * order size dummy node 0 1 2 3 4 5 6(index)
+ * table size
+ * 0 1 1 1
+ * 1 2 1 1 1
+ * 2 4 2 1 1 2
+ * 3 8 4 1 1 2 4
+ * 4 16 8 1 1 2 4 8
+ * 5 32 16 1 1 2 4 8 16
+ * 6 64 32 1 1 2 4 8 16 32
+ *
+ * When growing/shrinking, we only focus on the last dummy node table
+ * which size is (!order ? 1 : (1 << (order -1))).
+ *
+ * Example for growing/shrinking:
+ * grow hash table from order 5 to 6: init the index=6 dummy node table
+ * shrink hash table from order 6 to 5: fini the index=6 dummy node table
+ *
* A bit of ascii art explanation:
*
* Order index is the off-by-one compare to the actual power of 2 because
struct rcu_table t;
cds_lfht_hash_fct hash_fct;
cds_lfht_compare_fct compare_fct;
+ unsigned long min_alloc_order;
+ unsigned long min_alloc_size;
unsigned long hash_seed;
int flags;
/*
assert(size > 0);
index = hash & (size - 1);
+
+ if (index < ht->min_alloc_size) {
+ dbg_printf("lookup hash %lu index %lu order 0 aridx 0\n",
+ hash, index);
+ return &ht->t.tbl[0]->nodes[index];
+ }
/*
* equivalent to get_count_order_ulong(index + 1), but optimizes
* away the non-existing 0 special-case for
* get_count_order_ulong.
*/
order = fls_ulong(index);
-
dbg_printf("lookup hash %lu index %lu order %lu aridx %lu\n",
- hash, index, order, index & (!order ? 0 : ((1UL << (order - 1)) - 1)));
-
- return &ht->t.tbl[order]->nodes[index & (!order ? 0 : ((1UL << (order - 1)) - 1))];
+ hash, index, order, index & ((1UL << (order - 1)) - 1));
+ return &ht->t.tbl[order]->nodes[index & ((1UL << (order - 1)) - 1)];
}
/*
assert(!is_dummy(node));
assert(!is_removed(node));
- if (!size) {
- assert(dummy);
- assert(!unique_ret);
- node->p.next = flag_dummy(get_end());
- return; /* Initial first add (head) */
- }
lookup = lookup_bucket(ht, size, bit_reverse_ulong(node->p.reverse_hash));
for (;;) {
uint32_t chain_len = 0;
goto insert;
if (likely(clear_flag(iter)->p.reverse_hash > node->p.reverse_hash))
goto insert;
+
/* dummy node is the first node of the identical-hash-value chain */
if (dummy && clear_flag(iter)->p.reverse_hash == node->p.reverse_hash)
goto insert;
+
next = rcu_dereference(clear_flag(iter)->p.next);
if (unlikely(is_removed(next)))
goto gc_node;
+
+ /* uniquely add */
if (unique_ret
&& !is_dummy(next)
- && clear_flag(iter)->p.reverse_hash == node->p.reverse_hash
- && !ht->compare_fct(node->key, node->key_len,
- clear_flag(iter)->key,
- clear_flag(iter)->key_len)) {
- unique_ret->node = clear_flag(iter);
- unique_ret->next = next;
+ && clear_flag(iter)->p.reverse_hash == node->p.reverse_hash) {
+ struct cds_lfht_iter d_iter = { .node = node, .next = iter, };
+
+ /*
+ * uniquely adding inserts the node as the first
+ * node of the identical-hash-value node chain.
+ *
+ * This semantic ensures no duplicated keys
+ * should ever be observable in the table
+ * (including observe one node by one node
+ * by forward iterations)
+ */
+ cds_lfht_next_duplicate(ht, &d_iter);
+ if (!d_iter.node)
+ goto insert;
+
+ *unique_ret = d_iter;
return;
}
+
/* Only account for identical reverse hash once */
if (iter_prev->p.reverse_hash != clear_flag(iter)->p.reverse_hash
&& !is_dummy(next))
{
unsigned long j;
+ assert(i > ht->min_alloc_order);
ht->cds_lfht_rcu_read_lock();
for (j = start; j < start + len; j++) {
struct cds_lfht_node *new_node =
(struct cds_lfht_node *) &ht->t.tbl[i]->nodes[j];
dbg_printf("init populate: i %lu j %lu hash %lu\n",
- i, j, !i ? 0 : (1UL << (i - 1)) + j);
+ i, j, (1UL << (i - 1)) + j);
new_node->p.reverse_hash =
- bit_reverse_ulong(!i ? 0 : (1UL << (i - 1)) + j);
- _cds_lfht_add(ht, !i ? 0 : (1UL << (i - 1)),
+ bit_reverse_ulong((1UL << (i - 1)) + j);
+ _cds_lfht_add(ht, 1UL << (i - 1),
new_node, NULL, 1);
}
ht->cds_lfht_rcu_read_unlock();
static
void init_table(struct cds_lfht *ht,
- unsigned long first_order, unsigned long len_order)
+ unsigned long first_order, unsigned long last_order)
{
- unsigned long i, end_order;
+ unsigned long i;
- dbg_printf("init table: first_order %lu end_order %lu\n",
- first_order, first_order + len_order);
- end_order = first_order + len_order;
- for (i = first_order; i < end_order; i++) {
+ dbg_printf("init table: first_order %lu last_order %lu\n",
+ first_order, last_order);
+ assert(first_order > ht->min_alloc_order);
+ for (i = first_order; i <= last_order; i++) {
unsigned long len;
- len = !i ? 1 : 1UL << (i - 1);
+ len = 1UL << (i - 1);
dbg_printf("init order %lu len: %lu\n", i, len);
/* Stop expand if the resize target changes under us */
- if (CMM_LOAD_SHARED(ht->t.resize_target) < (!i ? 1 : (1UL << i)))
+ if (CMM_LOAD_SHARED(ht->t.resize_target) < (1UL << i))
break;
ht->t.tbl[i] = calloc(1, len * sizeof(struct _cds_lfht_node));
* Update table size.
*/
cmm_smp_wmb(); /* populate data before RCU size */
- CMM_STORE_SHARED(ht->t.size, !i ? 1 : (1UL << i));
+ CMM_STORE_SHARED(ht->t.size, 1UL << i);
- dbg_printf("init new size: %lu\n", !i ? 1 : (1UL << i));
+ dbg_printf("init new size: %lu\n", 1UL << i);
if (CMM_LOAD_SHARED(ht->in_progress_destroy))
break;
}
{
unsigned long j;
+ assert(i > ht->min_alloc_order);
ht->cds_lfht_rcu_read_lock();
for (j = start; j < start + len; j++) {
struct cds_lfht_node *fini_node =
(struct cds_lfht_node *) &ht->t.tbl[i]->nodes[j];
dbg_printf("remove entry: i %lu j %lu hash %lu\n",
- i, j, !i ? 0 : (1UL << (i - 1)) + j);
+ i, j, (1UL << (i - 1)) + j);
fini_node->p.reverse_hash =
- bit_reverse_ulong(!i ? 0 : (1UL << (i - 1)) + j);
- (void) _cds_lfht_del(ht, !i ? 0 : (1UL << (i - 1)),
- fini_node, 1);
+ bit_reverse_ulong((1UL << (i - 1)) + j);
+ (void) _cds_lfht_del(ht, 1UL << (i - 1), fini_node, 1);
}
ht->cds_lfht_rcu_read_unlock();
}
static
void fini_table(struct cds_lfht *ht,
- unsigned long first_order, unsigned long len_order)
+ unsigned long first_order, unsigned long last_order)
{
- long i, end_order;
+ long i;
void *free_by_rcu = NULL;
- dbg_printf("fini table: first_order %lu end_order %lu\n",
- first_order, first_order + len_order);
- end_order = first_order + len_order;
- assert(first_order > 0);
- for (i = end_order - 1; i >= first_order; i--) {
+ dbg_printf("fini table: first_order %lu last_order %lu\n",
+ first_order, last_order);
+ assert(first_order > ht->min_alloc_order);
+ for (i = last_order; i >= first_order; i--) {
unsigned long len;
- len = !i ? 1 : 1UL << (i - 1);
+ len = 1UL << (i - 1);
dbg_printf("fini order %lu len: %lu\n", i, len);
/* Stop shrink if the resize target changes under us */
}
}
+static
+void cds_lfht_create_dummy(struct cds_lfht *ht, unsigned long size)
+{
+ struct _cds_lfht_node *prev, *node;
+ unsigned long order, len, i, j;
+
+ ht->t.tbl[0] = calloc(1, ht->min_alloc_size * sizeof(struct _cds_lfht_node));
+ assert(ht->t.tbl[0]);
+
+ dbg_printf("create dummy: order %lu index %lu hash %lu\n", 0, 0, 0);
+ ht->t.tbl[0]->nodes[0].next = flag_dummy(get_end());
+ ht->t.tbl[0]->nodes[0].reverse_hash = 0;
+
+ for (order = 1; order < get_count_order_ulong(size) + 1; order++) {
+ len = 1UL << (order - 1);
+ if (order <= ht->min_alloc_order) {
+ ht->t.tbl[order] = (void *)(ht->t.tbl[0]->nodes + len);
+ } else {
+ ht->t.tbl[order] = calloc(1, len * sizeof(struct _cds_lfht_node));
+ assert(ht->t.tbl[order]);
+ }
+
+ i = 0;
+ prev = ht->t.tbl[i]->nodes;
+ for (j = 0; j < len; j++) {
+ if (j & (j - 1)) { /* Between power of 2 */
+ prev++;
+ } else if (j) { /* At each power of 2 */
+ i++;
+ prev = ht->t.tbl[i]->nodes;
+ }
+
+ node = &ht->t.tbl[order]->nodes[j];
+ dbg_printf("create dummy: order %lu index %lu hash %lu\n",
+ order, j, j + len);
+ node->next = prev->next;
+ assert(is_dummy(node->next));
+ node->reverse_hash = bit_reverse_ulong(j + len);
+ prev->next = flag_dummy((struct cds_lfht_node *)node);
+ }
+ }
+}
+
struct cds_lfht *_cds_lfht_new(cds_lfht_hash_fct hash_fct,
cds_lfht_compare_fct compare_fct,
unsigned long hash_seed,
unsigned long init_size,
+ unsigned long min_alloc_size,
int flags,
void (*cds_lfht_call_rcu)(struct rcu_head *head,
void (*func)(struct rcu_head *head)),
struct cds_lfht *ht;
unsigned long order;
+ /* min_alloc_size must be power of two */
+ if (!min_alloc_size || (min_alloc_size & (min_alloc_size - 1)))
+ return NULL;
/* init_size must be power of two */
- if (init_size && (init_size & (init_size - 1)))
+ if (!init_size || (init_size & (init_size - 1)))
return NULL;
+ min_alloc_size = max(min_alloc_size, MIN_TABLE_SIZE);
+ init_size = max(init_size, min_alloc_size);
ht = calloc(1, sizeof(struct cds_lfht));
assert(ht);
ht->hash_fct = hash_fct;
ht->percpu_count = alloc_per_cpu_items_count();
/* this mutex should not nest in read-side C.S. */
pthread_mutex_init(&ht->resize_mutex, NULL);
- order = get_count_order_ulong(max(init_size, MIN_TABLE_SIZE)) + 1;
ht->flags = flags;
- ht->cds_lfht_rcu_thread_offline();
- pthread_mutex_lock(&ht->resize_mutex);
- ht->t.resize_target = 1UL << (order - 1);
- init_table(ht, 0, order);
- pthread_mutex_unlock(&ht->resize_mutex);
- ht->cds_lfht_rcu_thread_online();
+ order = get_count_order_ulong(init_size);
+ ht->t.resize_target = 1UL << order;
+ cds_lfht_create_dummy(ht, 1UL << order);
+ ht->t.size = 1UL << order;
+ ht->min_alloc_size = min_alloc_size;
+ ht->min_alloc_order = get_count_order_ulong(min_alloc_size);
return ht;
}
bit_reverse_ulong(ht->t.tbl[order]->nodes[i].reverse_hash));
assert(is_dummy(ht->t.tbl[order]->nodes[i].next));
}
- poison_free(ht->t.tbl[order]);
+
+ if (order == ht->min_alloc_order)
+ poison_free(ht->t.tbl[0]);
+ else if (order > ht->min_alloc_order)
+ poison_free(ht->t.tbl[order]);
+ /* Nothing to delete for order < ht->min_alloc_order */
}
return 0;
}
{
unsigned long old_order, new_order;
- old_order = get_count_order_ulong(old_size) + 1;
- new_order = get_count_order_ulong(new_size) + 1;
+ old_order = get_count_order_ulong(old_size);
+ new_order = get_count_order_ulong(new_size);
dbg_printf("resize from %lu (order %lu) to %lu (order %lu) buckets\n",
old_size, old_order, new_size, new_order);
assert(new_size > old_size);
- init_table(ht, old_order, new_order - old_order);
+ init_table(ht, old_order + 1, new_order);
}
/* called with resize mutex held */
{
unsigned long old_order, new_order;
- new_size = max(new_size, MIN_TABLE_SIZE);
- old_order = get_count_order_ulong(old_size) + 1;
- new_order = get_count_order_ulong(new_size) + 1;
+ new_size = max(new_size, ht->min_alloc_size);
+ old_order = get_count_order_ulong(old_size);
+ new_order = get_count_order_ulong(new_size);
dbg_printf("resize from %lu (order %lu) to %lu (order %lu) buckets\n",
old_size, old_order, new_size, new_order);
assert(new_size < old_size);
/* Remove and unlink all dummy nodes to remove. */
- fini_table(ht, new_order, old_order - new_order);
+ fini_table(ht, new_order + 1, old_order);
}
void resize_target_update_count(struct cds_lfht *ht,
unsigned long count)
{
- count = max(count, MIN_TABLE_SIZE);
+ count = max(count, ht->min_alloc_size);
uatomic_set(&ht->t.resize_target, count);
}