rculfhash: fix uniquely add vs cds_lfht_next observation semantic
[urcu.git] / rculfhash.c
index 0f36899371dc2d0958306023b78ae1c3834e92da..b67acc820a078011f81963608f7c366fc1a3d2ec 100644 (file)
  *   the "dummy node" tables.
  * - There is one dummy node table per hash index order. The size of
  *   each dummy node table is half the number of hashes contained in
- *   this order.
- * - call_rcu is used to garbage-collect the old order table.
+ *   this order (except for order 0).
+ * - synchronzie_rcu is used to garbage-collect the old dummy node table.
  * - The per-order dummy node tables contain a compact version of the
  *   hash table nodes. These tables are invariant after they are
  *   populated into the hash table.
- * 
+ *
+ * Dummy node tables:
+ *
+ * hash table  hash table      the last        all dummy node tables
+ * order       size            dummy node      0   1   2   3   4   5   6(index)
+ *                             table size
+ * 0           1               1               1
+ * 1           2               1               1   1
+ * 2           4               2               1   1   2
+ * 3           8               4               1   1   2   4
+ * 4           16              8               1   1   2   4   8
+ * 5           32              16              1   1   2   4   8  16
+ * 6           64              32              1   1   2   4   8  16  32
+ *
+ * When growing/shrinking, we only focus on the last dummy node table
+ * which size is (!order ? 1 : (1 << (order -1))).
+ *
+ * Example for growing/shrinking:
+ * grow hash table from order 5 to 6: init the index=6 dummy node table
+ * shrink hash table from order 6 to 5: fini the index=6 dummy node table
+ *
  * A bit of ascii art explanation:
  * 
  * Order index is the off-by-one compare to the actual power of 2 because 
  * 
  * order              bits       reverse
  * 0               0  000        000
- *                 |
- * 1               |  1  001        100       <-    <-
- *                 |  |                        |     |
- * 2               |  |  2  010        010     |     |
+ * 1               |  1  001        100             <-
+ * 2               |  |  2  010        010    <-     |
  *                 |  |  |  3  011        110  | <-  |
- *                 |  |  |  |                  |  |  |
  * 3               -> |  |  |  4  100        001  |  |
  *                    -> |  |     5  101        101  |
  *                       -> |        6  110        011
@@ -877,22 +894,38 @@ void _cds_lfht_add(struct cds_lfht *ht,
                                goto insert;
                        if (likely(clear_flag(iter)->p.reverse_hash > node->p.reverse_hash))
                                goto insert;
+
                        /* dummy node is the first node of the identical-hash-value chain */
                        if (dummy && clear_flag(iter)->p.reverse_hash == node->p.reverse_hash)
                                goto insert;
+
                        next = rcu_dereference(clear_flag(iter)->p.next);
                        if (unlikely(is_removed(next)))
                                goto gc_node;
+
+                       /* uniquely add */
                        if (unique_ret
                            && !is_dummy(next)
-                           && clear_flag(iter)->p.reverse_hash == node->p.reverse_hash
-                           && !ht->compare_fct(node->key, node->key_len,
-                                               clear_flag(iter)->key,
-                                               clear_flag(iter)->key_len)) {
-                               unique_ret->node = clear_flag(iter);
-                               unique_ret->next = next;
+                           && clear_flag(iter)->p.reverse_hash == node->p.reverse_hash) {
+                               struct cds_lfht_iter d_iter = { .node = node, .next = iter, };
+
+                               /*
+                                * uniquely adding inserts the node as the first
+                                * node of the identical-hash-value node chain.
+                                *
+                                * This semantic ensures no duplicated keys
+                                * should ever be observable in the table
+                                * (including observe one node by one node
+                                * by forward iterations)
+                                */
+                               cds_lfht_next_duplicate(ht, &d_iter);
+                               if (!d_iter.node)
+                                       goto insert;
+
+                               *unique_ret = d_iter;
                                return;
                        }
+
                        /* Only account for identical reverse hash once */
                        if (iter_prev->p.reverse_hash != clear_flag(iter)->p.reverse_hash
                            && !is_dummy(next))
@@ -945,10 +978,9 @@ int _cds_lfht_del(struct cds_lfht *ht, unsigned long size,
 {
        struct cds_lfht_node *dummy, *next, *old;
        struct _cds_lfht_node *lookup;
-       int flagged = 0;
 
        if (!node)      /* Return -ENOENT if asked to delete NULL node */
-               goto end;
+               return -ENOENT;
 
        /* logically delete the node */
        assert(!is_dummy(node));
@@ -959,7 +991,7 @@ int _cds_lfht_del(struct cds_lfht *ht, unsigned long size,
 
                next = old;
                if (unlikely(is_removed(next)))
-                       goto end;
+                       return -ENOENT;
                if (dummy_removal)
                        assert(is_dummy(next));
                else
@@ -967,9 +999,7 @@ int _cds_lfht_del(struct cds_lfht *ht, unsigned long size,
                new_next = flag_removed(next);
                old = uatomic_cmpxchg(&node->p.next, next, new_next);
        } while (old != next);
-
        /* We performed the (logical) deletion. */
-       flagged = 1;
 
        /*
         * Ensure that the node is not visible to readers anymore: lookup for
@@ -979,17 +1009,9 @@ int _cds_lfht_del(struct cds_lfht *ht, unsigned long size,
        lookup = lookup_bucket(ht, size, bit_reverse_ulong(node->p.reverse_hash));
        dummy = (struct cds_lfht_node *) lookup;
        _cds_lfht_gc_bucket(dummy, node);
-end:
-       /*
-        * Only the flagging action indicated that we (and no other)
-        * removed the node from the hash.
-        */
-       if (flagged) {
-               assert(is_removed(rcu_dereference(node->p.next)));
-               return 0;
-       } else {
-               return -ENOENT;
-       }
+
+       assert(is_removed(rcu_dereference(node->p.next)));
+       return 0;
 }
 
 static
@@ -1093,14 +1115,13 @@ void init_table_populate(struct cds_lfht *ht, unsigned long i,
 
 static
 void init_table(struct cds_lfht *ht,
-               unsigned long first_order, unsigned long len_order)
+               unsigned long first_order, unsigned long last_order)
 {
-       unsigned long i, end_order;
+       unsigned long i;
 
-       dbg_printf("init table: first_order %lu end_order %lu\n",
-                  first_order, first_order + len_order);
-       end_order = first_order + len_order;
-       for (i = first_order; i < end_order; i++) {
+       dbg_printf("init table: first_order %lu last_order %lu\n",
+                  first_order, last_order);
+       for (i = first_order; i <= last_order; i++) {
                unsigned long len;
 
                len = !i ? 1 : 1UL << (i - 1);
@@ -1193,16 +1214,15 @@ void remove_table(struct cds_lfht *ht, unsigned long i, unsigned long len)
 
 static
 void fini_table(struct cds_lfht *ht,
-               unsigned long first_order, unsigned long len_order)
+               unsigned long first_order, unsigned long last_order)
 {
-       long i, end_order;
+       long i;
        void *free_by_rcu = NULL;
 
-       dbg_printf("fini table: first_order %lu end_order %lu\n",
-                  first_order, first_order + len_order);
-       end_order = first_order + len_order;
+       dbg_printf("fini table: first_order %lu last_order %lu\n",
+                  first_order, last_order);
        assert(first_order > 0);
-       for (i = end_order - 1; i >= first_order; i--) {
+       for (i = last_order; i >= first_order; i--) {
                unsigned long len;
 
                len = !i ? 1 : 1UL << (i - 1);
@@ -1285,11 +1305,11 @@ struct cds_lfht *_cds_lfht_new(cds_lfht_hash_fct hash_fct,
        ht->percpu_count = alloc_per_cpu_items_count();
        /* this mutex should not nest in read-side C.S. */
        pthread_mutex_init(&ht->resize_mutex, NULL);
-       order = get_count_order_ulong(max(init_size, MIN_TABLE_SIZE)) + 1;
+       order = get_count_order_ulong(max(init_size, MIN_TABLE_SIZE));
        ht->flags = flags;
        ht->cds_lfht_rcu_thread_offline();
        pthread_mutex_lock(&ht->resize_mutex);
-       ht->t.resize_target = 1UL << (order - 1);
+       ht->t.resize_target = 1UL << order;
        init_table(ht, 0, order);
        pthread_mutex_unlock(&ht->resize_mutex);
        ht->cds_lfht_rcu_thread_online();
@@ -1596,12 +1616,12 @@ void _do_cds_lfht_grow(struct cds_lfht *ht,
 {
        unsigned long old_order, new_order;
 
-       old_order = get_count_order_ulong(old_size) + 1;
-       new_order = get_count_order_ulong(new_size) + 1;
+       old_order = get_count_order_ulong(old_size);
+       new_order = get_count_order_ulong(new_size);
        dbg_printf("resize from %lu (order %lu) to %lu (order %lu) buckets\n",
                   old_size, old_order, new_size, new_order);
        assert(new_size > old_size);
-       init_table(ht, old_order, new_order - old_order);
+       init_table(ht, old_order + 1, new_order);
 }
 
 /* called with resize mutex held */
@@ -1612,14 +1632,14 @@ void _do_cds_lfht_shrink(struct cds_lfht *ht,
        unsigned long old_order, new_order;
 
        new_size = max(new_size, MIN_TABLE_SIZE);
-       old_order = get_count_order_ulong(old_size) + 1;
-       new_order = get_count_order_ulong(new_size) + 1;
+       old_order = get_count_order_ulong(old_size);
+       new_order = get_count_order_ulong(new_size);
        dbg_printf("resize from %lu (order %lu) to %lu (order %lu) buckets\n",
                   old_size, old_order, new_size, new_order);
        assert(new_size < old_size);
 
        /* Remove and unlink all dummy nodes to remove. */
-       fini_table(ht, new_order, old_order - new_order);
+       fini_table(ht, new_order + 1, old_order);
 }
 
 
This page took 0.027647 seconds and 4 git commands to generate.