* the "dummy node" tables.
* - There is one dummy node table per hash index order. The size of
* each dummy node table is half the number of hashes contained in
- * this order.
- * - call_rcu is used to garbage-collect the old order table.
+ * this order (except for order 0).
+ * - synchronzie_rcu is used to garbage-collect the old dummy node table.
* - The per-order dummy node tables contain a compact version of the
* hash table nodes. These tables are invariant after they are
* populated into the hash table.
- *
+ *
+ * Dummy node tables:
+ *
+ * hash table hash table the last all dummy node tables
+ * order size dummy node 0 1 2 3 4 5 6(index)
+ * table size
+ * 0 1 1 1
+ * 1 2 1 1 1
+ * 2 4 2 1 1 2
+ * 3 8 4 1 1 2 4
+ * 4 16 8 1 1 2 4 8
+ * 5 32 16 1 1 2 4 8 16
+ * 6 64 32 1 1 2 4 8 16 32
+ *
+ * When growing/shrinking, we only focus on the last dummy node table
+ * which size is (!order ? 1 : (1 << (order -1))).
+ *
+ * Example for growing/shrinking:
+ * grow hash table from order 5 to 6: init the index=6 dummy node table
+ * shrink hash table from order 6 to 5: fini the index=6 dummy node table
+ *
* A bit of ascii art explanation:
*
* Order index is the off-by-one compare to the actual power of 2 because
*
* order bits reverse
* 0 0 000 000
- * |
- * 1 | 1 001 100 <- <-
- * | | | |
- * 2 | | 2 010 010 | |
+ * 1 | 1 001 100 <-
+ * 2 | | 2 010 010 <- |
* | | | 3 011 110 | <- |
- * | | | | | | |
* 3 -> | | | 4 100 001 | |
* -> | | 5 101 101 |
* -> | 6 110 011
{
struct cds_lfht_node *dummy, *ret_next;
struct _cds_lfht_node *lookup;
- int flagged = 0;
if (!old_node) /* Return -ENOENT if asked to replace NULL node */
- goto end;
+ return -ENOENT;
assert(!is_removed(old_node));
assert(!is_dummy(old_node));
* Too late, the old node has been removed under us
* between lookup and replace. Fail.
*/
- goto end;
+ return -ENOENT;
}
assert(!is_dummy(old_next));
assert(new_node != clear_flag(old_next));
ret_next = uatomic_cmpxchg(&old_node->p.next,
old_next, flag_removed(new_node));
if (ret_next == old_next)
- break;
+ break; /* We performed the replacement. */
old_next = ret_next;
}
- /* We performed the replacement. */
- flagged = 1;
-
/*
* Ensure that the old node is not visible to readers anymore:
* lookup for the node, and remove it (along with any other
lookup = lookup_bucket(ht, size, bit_reverse_ulong(old_node->p.reverse_hash));
dummy = (struct cds_lfht_node *) lookup;
_cds_lfht_gc_bucket(dummy, new_node);
-end:
- /*
- * Only the flagging action indicated that we (and no other)
- * replaced the node from the hash table.
- */
- if (flagged) {
- assert(is_removed(rcu_dereference(old_node->p.next)));
- return 0;
- } else {
- return -ENOENT;
- }
+
+ assert(is_removed(rcu_dereference(old_node->p.next)));
+ return 0;
}
/*
goto insert;
if (likely(clear_flag(iter)->p.reverse_hash > node->p.reverse_hash))
goto insert;
+
/* dummy node is the first node of the identical-hash-value chain */
if (dummy && clear_flag(iter)->p.reverse_hash == node->p.reverse_hash)
goto insert;
+
next = rcu_dereference(clear_flag(iter)->p.next);
if (unlikely(is_removed(next)))
goto gc_node;
+
+ /* uniquely add */
if (unique_ret
&& !is_dummy(next)
- && clear_flag(iter)->p.reverse_hash == node->p.reverse_hash
- && !ht->compare_fct(node->key, node->key_len,
- clear_flag(iter)->key,
- clear_flag(iter)->key_len)) {
- unique_ret->node = clear_flag(iter);
- unique_ret->next = next;
+ && clear_flag(iter)->p.reverse_hash == node->p.reverse_hash) {
+ struct cds_lfht_iter d_iter = { .node = node, .next = iter, };
+
+ /*
+ * uniquely adding inserts the node as the first
+ * node of the identical-hash-value node chain.
+ *
+ * This semantic ensures no duplicated keys
+ * should ever be observable in the table
+ * (including observe one node by one node
+ * by forward iterations)
+ */
+ cds_lfht_next_duplicate(ht, &d_iter);
+ if (!d_iter.node)
+ goto insert;
+
+ *unique_ret = d_iter;
return;
}
+
/* Only account for identical reverse hash once */
if (iter_prev->p.reverse_hash != clear_flag(iter)->p.reverse_hash
&& !is_dummy(next))
{
struct cds_lfht_node *dummy, *next, *old;
struct _cds_lfht_node *lookup;
- int flagged = 0;
if (!node) /* Return -ENOENT if asked to delete NULL node */
- goto end;
+ return -ENOENT;
/* logically delete the node */
assert(!is_dummy(node));
next = old;
if (unlikely(is_removed(next)))
- goto end;
+ return -ENOENT;
if (dummy_removal)
assert(is_dummy(next));
else
new_next = flag_removed(next);
old = uatomic_cmpxchg(&node->p.next, next, new_next);
} while (old != next);
-
/* We performed the (logical) deletion. */
- flagged = 1;
/*
* Ensure that the node is not visible to readers anymore: lookup for
lookup = lookup_bucket(ht, size, bit_reverse_ulong(node->p.reverse_hash));
dummy = (struct cds_lfht_node *) lookup;
_cds_lfht_gc_bucket(dummy, node);
-end:
- /*
- * Only the flagging action indicated that we (and no other)
- * removed the node from the hash.
- */
- if (flagged) {
- assert(is_removed(rcu_dereference(node->p.next)));
- return 0;
- } else {
- return -ENOENT;
- }
+
+ assert(is_removed(rcu_dereference(node->p.next)));
+ return 0;
}
static
static
void init_table(struct cds_lfht *ht,
- unsigned long first_order, unsigned long len_order)
+ unsigned long first_order, unsigned long last_order)
{
- unsigned long i, end_order;
+ unsigned long i;
- dbg_printf("init table: first_order %lu end_order %lu\n",
- first_order, first_order + len_order);
- end_order = first_order + len_order;
- for (i = first_order; i < end_order; i++) {
+ dbg_printf("init table: first_order %lu last_order %lu\n",
+ first_order, last_order);
+ for (i = first_order; i <= last_order; i++) {
unsigned long len;
len = !i ? 1 : 1UL << (i - 1);
static
void fini_table(struct cds_lfht *ht,
- unsigned long first_order, unsigned long len_order)
+ unsigned long first_order, unsigned long last_order)
{
- long i, end_order;
+ long i;
void *free_by_rcu = NULL;
- dbg_printf("fini table: first_order %lu end_order %lu\n",
- first_order, first_order + len_order);
- end_order = first_order + len_order;
+ dbg_printf("fini table: first_order %lu last_order %lu\n",
+ first_order, last_order);
assert(first_order > 0);
- for (i = end_order - 1; i >= first_order; i--) {
+ for (i = last_order; i >= first_order; i--) {
unsigned long len;
len = !i ? 1 : 1UL << (i - 1);
ht->percpu_count = alloc_per_cpu_items_count();
/* this mutex should not nest in read-side C.S. */
pthread_mutex_init(&ht->resize_mutex, NULL);
- order = get_count_order_ulong(max(init_size, MIN_TABLE_SIZE)) + 1;
+ order = get_count_order_ulong(max(init_size, MIN_TABLE_SIZE));
ht->flags = flags;
ht->cds_lfht_rcu_thread_offline();
pthread_mutex_lock(&ht->resize_mutex);
- ht->t.resize_target = 1UL << (order - 1);
+ ht->t.resize_target = 1UL << order;
init_table(ht, 0, order);
pthread_mutex_unlock(&ht->resize_mutex);
ht->cds_lfht_rcu_thread_online();
{
unsigned long old_order, new_order;
- old_order = get_count_order_ulong(old_size) + 1;
- new_order = get_count_order_ulong(new_size) + 1;
+ old_order = get_count_order_ulong(old_size);
+ new_order = get_count_order_ulong(new_size);
dbg_printf("resize from %lu (order %lu) to %lu (order %lu) buckets\n",
old_size, old_order, new_size, new_order);
assert(new_size > old_size);
- init_table(ht, old_order, new_order - old_order);
+ init_table(ht, old_order + 1, new_order);
}
/* called with resize mutex held */
unsigned long old_order, new_order;
new_size = max(new_size, MIN_TABLE_SIZE);
- old_order = get_count_order_ulong(old_size) + 1;
- new_order = get_count_order_ulong(new_size) + 1;
+ old_order = get_count_order_ulong(old_size);
+ new_order = get_count_order_ulong(new_size);
dbg_printf("resize from %lu (order %lu) to %lu (order %lu) buckets\n",
old_size, old_order, new_size, new_order);
assert(new_size < old_size);
/* Remove and unlink all dummy nodes to remove. */
- fini_table(ht, new_order, old_order - new_order);
+ fini_table(ht, new_order + 1, old_order);
}