rcuja: implement 1 dimension pool distribution
[urcu.git] / rcuja / rcuja.c
index e6e3a3e6bcc5a10dcc6f0f24354ac5d2788c0573..34f3191a39037c24a058d33f0ebfabc405c0a82c 100644 (file)
@@ -24,6 +24,7 @@
 #include <stdint.h>
 #include <errno.h>
 #include <limits.h>
+#include <string.h>
 #include <urcu/rcuja.h>
 #include <urcu/compiler.h>
 #include <urcu/arch.h>
 #include "rcuja-internal.h"
 #include "bitfield.h"
 
+#ifndef abs
+#define abs_int(a)     ((int) (a) > 0 ? (int) (a) : -((int) (a)))
+#endif
+
 enum cds_ja_type_class {
        RCU_JA_LINEAR = 0,      /* Type A */
                        /* 32-bit: 1 to 25 children, 8 to 128 bytes */
@@ -120,8 +125,8 @@ const struct cds_ja_type ja_types[] = {
        { .type_class = RCU_JA_POOL, .min_child = 45, .max_child = ja_type_6_max_child, .max_linear_child = ja_type_6_max_linear_child, .order = 9, .nr_pool_order = ja_type_6_nr_pool_order, .pool_size_order = 7, },
 
        /*
-        * TODO: Upon node removal below min_child, if child pool is
-        * filled beyond capacity, we need to roll back to pigeon.
+        * Upon node removal below min_child, if child pool is filled
+        * beyond capacity, we roll back to pigeon.
         */
        { .type_class = RCU_JA_PIGEON, .min_child = 89, .max_child = ja_type_7_max_child, .order = 10, },
 
@@ -168,8 +173,8 @@ const struct cds_ja_type ja_types[] = {
        { .type_class = RCU_JA_POOL, .min_child = 51, .max_child = ja_type_6_max_child, .max_linear_child = ja_type_6_max_linear_child, .order = 10, .nr_pool_order = ja_type_6_nr_pool_order, .pool_size_order = 8, },
 
        /*
-        * TODO: Upon node removal below min_child, if child pool is
-        * filled beyond capacity, we need to roll back to pigeon.
+        * Upon node removal below min_child, if child pool is filled
+        * beyond capacity, we roll back to pigeon.
         */
        { .type_class = RCU_JA_PIGEON, .min_child = 101, .max_child = ja_type_7_max_child, .order = 11, },
 
@@ -239,9 +244,60 @@ enum ja_recompact {
        JA_RECOMPACT_DEL,
 };
 
+static
+struct cds_ja_inode *_ja_node_mask_ptr(struct cds_ja_inode_flag *node)
+{
+       return (struct cds_ja_inode *) (((unsigned long) node) & JA_PTR_MASK);
+}
+
+unsigned long ja_node_type(struct cds_ja_inode_flag *node)
+{
+       unsigned long type;
+
+       if (_ja_node_mask_ptr(node) == NULL) {
+               return NODE_INDEX_NULL;
+       }
+       type = (unsigned int) ((unsigned long) node & JA_TYPE_MASK);
+       assert(type < (1UL << JA_TYPE_BITS));
+       return type;
+}
+
+struct cds_ja_inode *ja_node_ptr(struct cds_ja_inode_flag *node)
+{
+       unsigned long type_index = ja_node_type(node);
+       const struct cds_ja_type *type;
+
+       type = &ja_types[type_index];
+       switch (type->type_class) {
+       case RCU_JA_LINEAR:
+       case RCU_JA_PIGEON:     /* fall-through */
+       case RCU_JA_NULL:       /* fall-through */
+       default:                /* fall-through */
+               return _ja_node_mask_ptr(node);
+       case RCU_JA_POOL:
+               switch (type->nr_pool_order) {
+               case 1:
+                       return (struct cds_ja_inode *) (((unsigned long) node) & ~(JA_POOL_1D_MASK | JA_TYPE_MASK));
+               case 2:
+                       return (struct cds_ja_inode *) (((unsigned long) node) & ~(JA_POOL_2D_MASK | JA_POOL_1D_MASK | JA_TYPE_MASK));
+               default:
+                       assert(0);
+               }
+       }
+}
+
 struct cds_ja_inode *alloc_cds_ja_node(const struct cds_ja_type *ja_type)
 {
-       return calloc(1U << ja_type->order, sizeof(char));
+       size_t len = 1U << ja_type->order;
+       void *p;
+       int ret;
+
+       ret = posix_memalign(&p, len, len);
+       if (ret || !p) {
+               return NULL;
+       }
+       memset(p, 0, len);
+       return p;
 }
 
 void free_cds_ja_node(struct cds_ja_inode *node)
@@ -277,6 +333,7 @@ static
 struct cds_ja_inode_flag *ja_linear_node_get_nth(const struct cds_ja_type *type,
                struct cds_ja_inode *node,
                struct cds_ja_inode_flag ***child_node_flag_ptr,
+               struct cds_ja_inode_flag **child_node_flag_v,
                struct cds_ja_inode_flag ***node_flag_ptr,
                uint8_t n)
 {
@@ -307,6 +364,8 @@ struct cds_ja_inode_flag *ja_linear_node_get_nth(const struct cds_ja_type *type,
        ptr = rcu_dereference(pointers[i]);
        if (caa_unlikely(child_node_flag_ptr) && ptr)
                *child_node_flag_ptr = &pointers[i];
+       if (caa_unlikely(child_node_flag_v) && ptr)
+               *child_node_flag_v = ptr;
        if (caa_unlikely(node_flag_ptr))
                *node_flag_ptr = &pointers[i];
        return ptr;
@@ -334,21 +393,43 @@ void ja_linear_node_get_ith_pos(const struct cds_ja_type *type,
 static
 struct cds_ja_inode_flag *ja_pool_node_get_nth(const struct cds_ja_type *type,
                struct cds_ja_inode *node,
+               struct cds_ja_inode_flag *node_flag,
                struct cds_ja_inode_flag ***child_node_flag_ptr,
+               struct cds_ja_inode_flag **child_node_flag_v,
                struct cds_ja_inode_flag ***node_flag_ptr,
                uint8_t n)
 {
        struct cds_ja_inode *linear;
 
        assert(type->type_class == RCU_JA_POOL);
-       /*
-        * TODO: currently, we select the pool by highest bits. We
-        * should support various encodings.
-        */
-       linear = (struct cds_ja_inode *)
-               &node->u.data[((unsigned long) n >> (CHAR_BIT - type->nr_pool_order)) << type->pool_size_order];
+
+       switch (type->nr_pool_order) {
+       case 1:
+       {
+               unsigned long bitsel, index;
+
+               bitsel = ja_node_pool_1d_bitsel(node_flag);
+               assert(bitsel < CHAR_BIT);
+               index = ((unsigned long) n >> bitsel) & type->nr_pool_order;
+               linear = (struct cds_ja_inode *) &node->u.data[index << type->pool_size_order];
+               break;
+       }
+       case 2:
+       {
+               /*
+                * TODO: currently, we select the pool by highest bits. We
+                * should support various encodings.
+                */
+               linear = (struct cds_ja_inode *)
+                       &node->u.data[((unsigned long) n >> (CHAR_BIT - type->nr_pool_order)) << type->pool_size_order];
+               break;
+       }
+       default:
+               linear = NULL;
+               assert(0);
+       }
        return ja_linear_node_get_nth(type, linear, child_node_flag_ptr,
-               node_flag_ptr, n);
+               child_node_flag_v, node_flag_ptr, n);
 }
 
 static
@@ -365,20 +446,25 @@ static
 struct cds_ja_inode_flag *ja_pigeon_node_get_nth(const struct cds_ja_type *type,
                struct cds_ja_inode *node,
                struct cds_ja_inode_flag ***child_node_flag_ptr,
+               struct cds_ja_inode_flag **child_node_flag_v,
                struct cds_ja_inode_flag ***node_flag_ptr,
                uint8_t n)
 {
        struct cds_ja_inode_flag **child_node_flag;
+       struct cds_ja_inode_flag *child_node_flag_read;
 
        assert(type->type_class == RCU_JA_PIGEON);
        child_node_flag = &((struct cds_ja_inode_flag **) node->u.data)[n];
+       child_node_flag_read = rcu_dereference(*child_node_flag);
        dbg_printf("ja_pigeon_node_get_nth child_node_flag_ptr %p\n",
                child_node_flag);
-       if (caa_unlikely(child_node_flag_ptr) && *child_node_flag)
+       if (caa_unlikely(child_node_flag_ptr) && child_node_flag_read)
                *child_node_flag_ptr = child_node_flag;
+       if (caa_unlikely(child_node_flag_v) && child_node_flag_read)
+               *child_node_flag_v = child_node_flag_read;
        if (caa_unlikely(node_flag_ptr))
                *node_flag_ptr = child_node_flag;
-       return rcu_dereference(*child_node_flag);
+       return child_node_flag_read;
 }
 
 static
@@ -386,7 +472,7 @@ struct cds_ja_inode_flag *ja_pigeon_node_get_ith_pos(const struct cds_ja_type *t
                struct cds_ja_inode *node,
                uint8_t i)
 {
-       return ja_pigeon_node_get_nth(type, node, NULL, NULL, i);
+       return ja_pigeon_node_get_nth(type, node, NULL, NULL, NULL, i);
 }
 
 /*
@@ -394,8 +480,9 @@ struct cds_ja_inode_flag *ja_pigeon_node_get_ith_pos(const struct cds_ja_type *t
  * node_flag is already rcu_dereference'd.
  */
 static
-struct cds_ja_inode_flag * ja_node_get_nth(struct cds_ja_inode_flag *node_flag,
+struct cds_ja_inode_flag *ja_node_get_nth(struct cds_ja_inode_flag *node_flag,
                struct cds_ja_inode_flag ***child_node_flag_ptr,
+               struct cds_ja_inode_flag **child_node_flag,
                struct cds_ja_inode_flag ***node_flag_ptr,
                uint8_t n)
 {
@@ -411,13 +498,16 @@ struct cds_ja_inode_flag * ja_node_get_nth(struct cds_ja_inode_flag *node_flag,
        switch (type->type_class) {
        case RCU_JA_LINEAR:
                return ja_linear_node_get_nth(type, node,
-                               child_node_flag_ptr, node_flag_ptr, n);
+                               child_node_flag_ptr, child_node_flag,
+                               node_flag_ptr, n);
        case RCU_JA_POOL:
-               return ja_pool_node_get_nth(type, node,
-                               child_node_flag_ptr, node_flag_ptr, n);
+               return ja_pool_node_get_nth(type, node, node_flag,
+                               child_node_flag_ptr, child_node_flag,
+                               node_flag_ptr, n);
        case RCU_JA_PIGEON:
                return ja_pigeon_node_get_nth(type, node,
-                               child_node_flag_ptr, node_flag_ptr, n);
+                               child_node_flag_ptr, child_node_flag,
+                               node_flag_ptr, n);
        default:
                assert(0);
                return (void *) -1UL;
@@ -485,6 +575,7 @@ int ja_linear_node_set_nth(const struct cds_ja_type *type,
 static
 int ja_pool_node_set_nth(const struct cds_ja_type *type,
                struct cds_ja_inode *node,
+               struct cds_ja_inode_flag *node_flag,
                struct cds_ja_shadow_node *shadow_node,
                uint8_t n,
                struct cds_ja_inode_flag *child_node_flag)
@@ -492,8 +583,33 @@ int ja_pool_node_set_nth(const struct cds_ja_type *type,
        struct cds_ja_inode *linear;
 
        assert(type->type_class == RCU_JA_POOL);
-       linear = (struct cds_ja_inode *)
-               &node->u.data[((unsigned long) n >> (CHAR_BIT - type->nr_pool_order)) << type->pool_size_order];
+
+       switch (type->nr_pool_order) {
+       case 1:
+       {
+               unsigned long bitsel, index;
+
+               bitsel = ja_node_pool_1d_bitsel(node_flag);
+               assert(bitsel < CHAR_BIT);
+               index = ((unsigned long) n >> bitsel) & type->nr_pool_order;
+               linear = (struct cds_ja_inode *) &node->u.data[index << type->pool_size_order];
+               break;
+       }
+       case 2:
+       {
+               /*
+                * TODO: currently, we select the pool by highest bits. We
+                * should support various encodings.
+                */
+               linear = (struct cds_ja_inode *)
+                       &node->u.data[((unsigned long) n >> (CHAR_BIT - type->nr_pool_order)) << type->pool_size_order];
+               break;
+       }
+       default:
+               linear = NULL;
+               assert(0);
+       }
+
        return ja_linear_node_set_nth(type, linear, shadow_node,
                        n, child_node_flag);
 }
@@ -523,6 +639,7 @@ int ja_pigeon_node_set_nth(const struct cds_ja_type *type,
 static
 int _ja_node_set_nth(const struct cds_ja_type *type,
                struct cds_ja_inode *node,
+               struct cds_ja_inode_flag *node_flag,
                struct cds_ja_shadow_node *shadow_node,
                uint8_t n,
                struct cds_ja_inode_flag *child_node_flag)
@@ -532,7 +649,7 @@ int _ja_node_set_nth(const struct cds_ja_type *type,
                return ja_linear_node_set_nth(type, node, shadow_node, n,
                                child_node_flag);
        case RCU_JA_POOL:
-               return ja_pool_node_set_nth(type, node, shadow_node, n,
+               return ja_pool_node_set_nth(type, node, node_flag, shadow_node, n,
                                child_node_flag);
        case RCU_JA_PIGEON:
                return ja_pigeon_node_set_nth(type, node, shadow_node, n,
@@ -645,9 +762,147 @@ int _ja_node_clear_ptr(const struct cds_ja_type *type,
        return 0;
 }
 
+/*
+ * Calculate bit distribution. Returns the bit (0 to 7) that splits the
+ * distribution in two sub-distributions containing as much elements one
+ * compared to the other.
+ */
+static
+unsigned int ja_node_sum_distribution_1d(enum ja_recompact mode,
+               struct cds_ja *ja,
+               unsigned int type_index,
+               const struct cds_ja_type *type,
+               struct cds_ja_inode *node,
+               struct cds_ja_shadow_node *shadow_node,
+               uint8_t n,
+               struct cds_ja_inode_flag *child_node_flag,
+               struct cds_ja_inode_flag **nullify_node_flag_ptr)
+{
+       uint8_t nr_one[JA_BITS_PER_BYTE];
+       unsigned int bitsel = 0, bit_i, overall_best_distance = UINT_MAX;
+       unsigned int distrib_nr_child = 0;
+
+       memset(nr_one, 0, sizeof(nr_one));
+
+       switch (type->type_class) {
+       case RCU_JA_LINEAR:
+       {
+               uint8_t nr_child =
+                       ja_linear_node_get_nr_child(type, node);
+               unsigned int i;
+
+               for (i = 0; i < nr_child; i++) {
+                       struct cds_ja_inode_flag *iter;
+                       unsigned int bit;
+                       uint8_t v;
+
+                       ja_linear_node_get_ith_pos(type, node, i, &v, &iter);
+                       if (!iter)
+                               continue;
+                       if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+                               continue;
+                       for (bit = 0; bit < JA_BITS_PER_BYTE; bit++) {
+                               if (v & (1U << bit))
+                                       nr_one[bit]++;
+                       }
+                       distrib_nr_child++;
+               }
+               break;
+       }
+       case RCU_JA_POOL:
+       {
+               unsigned int pool_nr;
+
+               for (pool_nr = 0; pool_nr < (1U << type->nr_pool_order); pool_nr++) {
+                       struct cds_ja_inode *pool =
+                               ja_pool_node_get_ith_pool(type,
+                                       node, pool_nr);
+                       uint8_t nr_child =
+                               ja_linear_node_get_nr_child(type, pool);
+                       unsigned int j;
+
+                       for (j = 0; j < nr_child; j++) {
+                               struct cds_ja_inode_flag *iter;
+                               unsigned int bit;
+                               uint8_t v;
+
+                               ja_linear_node_get_ith_pos(type, pool,
+                                               j, &v, &iter);
+                               if (!iter)
+                                       continue;
+                               if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+                                       continue;
+                               for (bit = 0; bit < JA_BITS_PER_BYTE; bit++) {
+                                       if (v & (1U << bit))
+                                               nr_one[bit]++;
+                               }
+                               distrib_nr_child++;
+                       }
+               }
+               break;
+       }
+       case RCU_JA_PIGEON:
+       {
+               uint8_t nr_child;
+               unsigned int i;
+
+               assert(mode == JA_RECOMPACT_DEL);
+               nr_child = shadow_node->nr_child;
+               for (i = 0; i < nr_child; i++) {
+                       struct cds_ja_inode_flag *iter;
+                       unsigned int bit;
+
+                       iter = ja_pigeon_node_get_ith_pos(type, node, i);
+                       if (!iter)
+                               continue;
+                       if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
+                               continue;
+                       for (bit = 0; bit < JA_BITS_PER_BYTE; bit++) {
+                               if (i & (1U << bit))
+                                       nr_one[bit]++;
+                       }
+                       distrib_nr_child++;
+               }
+               break;
+       }
+       case RCU_JA_NULL:
+               assert(mode == JA_RECOMPACT_ADD);
+               break;
+       default:
+               assert(0);
+               break;
+       }
+
+       if (mode == JA_RECOMPACT_ADD) {
+               unsigned int bit;
+
+               for (bit = 0; bit < JA_BITS_PER_BYTE; bit++) {
+                       if (n & (1U << bit))
+                               nr_one[bit]++;
+               }
+               distrib_nr_child++;
+       }
+
+       /*
+        * The best bit selector is that for which the number of ones is
+        * closest to half of the number of children in the
+        * distribution.
+        */
+       for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
+               unsigned int distance_to_best;
+
+               distance_to_best = abs_int(nr_one[bit_i] - (distrib_nr_child >> 1U));
+               if (distance_to_best < overall_best_distance) {
+                       overall_best_distance = distance_to_best;
+                       bitsel = bit_i;
+               }
+       }
+       dbg_printf("1 dimension pool bit selection: (%u)\n", bitsel);
+       return bitsel;
+}
+
 /*
  * ja_node_recompact_add: recompact a node, adding a new child.
- * TODO: for pool type, take selection bit(s) into account.
  * Return 0 on success, -EAGAIN if need to retry, or other negative
  * error value otherwise.
  */
@@ -702,7 +957,38 @@ retry:             /* for fallback */
                new_node = alloc_cds_ja_node(new_type);
                if (!new_node)
                        return -ENOMEM;
-               new_node_flag = ja_node_flag(new_node, new_type_index);
+
+               if (new_type->type_class == RCU_JA_POOL) {
+                       switch (new_type->nr_pool_order) {
+                       case 1:
+                       {
+                               unsigned int node_distrib_bitsel = 0;
+                               node_distrib_bitsel =
+                                       ja_node_sum_distribution_1d(mode, ja,
+                                               old_type_index, old_type,
+                                               old_node, shadow_node,
+                                               n, child_node_flag,
+                                               nullify_node_flag_ptr);
+                               assert(!((unsigned long) new_node & JA_POOL_1D_MASK));
+                               new_node_flag = ja_node_flag_pool_1d(new_node,
+                                       new_type_index, node_distrib_bitsel);
+                               break;
+                       }
+                       case 2:
+                       {
+                               /* TODO: pool order 2 in 2d */
+                               assert(!((unsigned long) new_node & JA_POOL_1D_MASK));
+                               assert(!((unsigned long) new_node & JA_POOL_2D_MASK));
+                               new_node_flag = ja_node_flag(new_node, new_type_index);
+                               break;
+                       }
+                       default:
+                               assert(0);
+                       }
+               } else {
+                       new_node_flag = ja_node_flag(new_node, new_type_index);
+               }
+
                dbg_printf("Recompact inherit lock from %p\n", shadow_node);
                new_shadow_node = rcuja_shadow_set(ja->ht, new_node_flag, shadow_node, ja);
                if (!new_shadow_node) {
@@ -738,7 +1024,7 @@ retry:             /* for fallback */
                                continue;
                        if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
                                continue;
-                       ret = _ja_node_set_nth(new_type, new_node,
+                       ret = _ja_node_set_nth(new_type, new_node, new_node_flag,
                                        new_shadow_node,
                                        v, iter);
                        if (new_type->type_class == RCU_JA_POOL && ret) {
@@ -770,7 +1056,7 @@ retry:             /* for fallback */
                                        continue;
                                if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
                                        continue;
-                               ret = _ja_node_set_nth(new_type, new_node,
+                               ret = _ja_node_set_nth(new_type, new_node, new_node_flag,
                                                new_shadow_node,
                                                v, iter);
                                if (new_type->type_class == RCU_JA_POOL
@@ -800,7 +1086,7 @@ retry:             /* for fallback */
                                continue;
                        if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
                                continue;
-                       ret = _ja_node_set_nth(new_type, new_node,
+                       ret = _ja_node_set_nth(new_type, new_node, new_node_flag,
                                        new_shadow_node,
                                        i, iter);
                        if (new_type->type_class == RCU_JA_POOL && ret) {
@@ -819,9 +1105,12 @@ skip_copy:
 
        if (mode == JA_RECOMPACT_ADD) {
                /* add node */
-               ret = _ja_node_set_nth(new_type, new_node,
+               ret = _ja_node_set_nth(new_type, new_node, new_node_flag,
                                new_shadow_node,
                                n, child_node_flag);
+               if (new_type->type_class == RCU_JA_POOL && ret) {
+                       goto fallback_toosmall;
+               }
                assert(!ret);
        }
        /* Return pointer to new recompacted node through old_node_flag_ptr */
@@ -882,7 +1171,7 @@ int ja_node_set_nth(struct cds_ja *ja,
        node = ja_node_ptr(*node_flag);
        type_index = ja_node_type(*node_flag);
        type = &ja_types[type_index];
-       ret = _ja_node_set_nth(type, node, shadow_node,
+       ret = _ja_node_set_nth(type, node, *node_flag, shadow_node,
                        n, child_node_flag);
        switch (ret) {
        case -ENOSPC:
@@ -950,7 +1239,7 @@ struct cds_hlist_head cds_ja_lookup(struct cds_ja *ja, uint64_t key)
                uint8_t iter_key;
 
                iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - i - 1)));
-               node_flag = ja_node_get_nth(node_flag, NULL, NULL,
+               node_flag = ja_node_get_nth(node_flag, NULL, NULL, NULL,
                        iter_key);
                dbg_printf("cds_ja_lookup iter key lookup %u finds node_flag %p\n",
                                (unsigned int) iter_key, node_flag);
@@ -978,6 +1267,7 @@ struct cds_hlist_head cds_ja_lookup(struct cds_ja *ja, uint64_t key)
 static
 int ja_attach_node(struct cds_ja *ja,
                struct cds_ja_inode_flag **attach_node_flag_ptr,
+               struct cds_ja_inode_flag *attach_node_flag,
                struct cds_ja_inode_flag **node_flag_ptr,
                struct cds_ja_inode_flag *node_flag,
                struct cds_ja_inode_flag *parent_node_flag,
@@ -1015,9 +1305,20 @@ int ja_attach_node(struct cds_ja *ja,
 
        if (node_flag_ptr && ja_node_ptr(*node_flag_ptr)) {
                /*
-                * Target node is non-NULL: it has been updated between
-                * RCU lookup and lock acquisition. We need to re-try
-                * lookup and attach.
+                * Target node has been updated between RCU lookup and
+                * lock acquisition. We need to re-try lookup and
+                * attach.
+                */
+               ret = -EAGAIN;
+               goto unlock_parent;
+       }
+
+       if (attach_node_flag_ptr && ja_node_ptr(*attach_node_flag_ptr) !=
+                       ja_node_ptr(attach_node_flag)) {
+               /*
+                * Target node has been updated between RCU lookup and
+                * lock acquisition. We need to re-try lookup and
+                * attach.
                 */
                ret = -EAGAIN;
                goto unlock_parent;
@@ -1105,6 +1406,7 @@ static
 int ja_chain_node(struct cds_ja *ja,
                struct cds_ja_inode_flag *parent_node_flag,
                struct cds_ja_inode_flag **node_flag_ptr,
+               struct cds_ja_inode_flag *node_flag,
                struct cds_hlist_head *head,
                struct cds_ja_node *node)
 {
@@ -1115,7 +1417,7 @@ int ja_chain_node(struct cds_ja *ja,
        if (!shadow_node) {
                return -EAGAIN;
        }
-       if (!ja_node_ptr(*node_flag_ptr)) {
+       if (ja_node_ptr(*node_flag_ptr) != ja_node_ptr(node_flag)) {
                ret = -EAGAIN;
                goto end;
        }
@@ -1133,7 +1435,8 @@ int cds_ja_add(struct cds_ja *ja, uint64_t key,
                **node_flag_ptr;
        struct cds_ja_inode_flag *node_flag,
                *parent_node_flag,
-               *parent2_node_flag;
+               *parent2_node_flag,
+               *attach_node_flag;
        int ret;
 
        if (caa_unlikely(key > ja->key_max)) {
@@ -1148,6 +1451,7 @@ retry:
        parent_node_flag =
                (struct cds_ja_inode_flag *) &ja->root; /* Use root ptr address as key for mutex */
        attach_node_flag_ptr = &ja->root;
+       attach_node_flag = rcu_dereference(ja->root);
        node_flag_ptr = &ja->root;
        node_flag = rcu_dereference(ja->root);
 
@@ -1156,11 +1460,13 @@ retry:
                uint8_t iter_key;
 
                dbg_printf("cds_ja_add iter attach_node_flag_ptr %p node_flag_ptr %p node_flag %p\n",
-                               *attach_node_flag_ptr, *node_flag_ptr, node_flag);
+                               attach_node_flag_ptr, node_flag_ptr, node_flag);
                if (!ja_node_ptr(node_flag)) {
                        ret = ja_attach_node(ja, attach_node_flag_ptr,
+                                       attach_node_flag,
                                        node_flag_ptr,
-                                       parent_node_flag, parent2_node_flag,
+                                       parent_node_flag,
+                                       parent2_node_flag,
                                        key, i, new_node);
                        if (ret == -EAGAIN || ret == -EEXIST)
                                goto retry;
@@ -1172,12 +1478,13 @@ retry:
                parent_node_flag = node_flag;
                node_flag = ja_node_get_nth(node_flag,
                        &attach_node_flag_ptr,
+                       &attach_node_flag,
                        &node_flag_ptr,
                        iter_key);
                dbg_printf("cds_ja_add iter key lookup %u finds node_flag %p attach_node_flag_ptr %p node_flag_ptr %p\n",
                                (unsigned int) iter_key, node_flag,
-                               *attach_node_flag_ptr,
-                               *node_flag_ptr);
+                               attach_node_flag_ptr,
+                               node_flag_ptr);
        }
 
        /*
@@ -1185,15 +1492,17 @@ retry:
         * level, or chain it if key is already present.
         */
        if (!ja_node_ptr(node_flag)) {
-               dbg_printf("cds_ja_add last attach_node_flag_ptr %p node_flag_ptr %p node_flag %p\n",
-                               *attach_node_flag_ptr, *node_flag_ptr, node_flag);
+               dbg_printf("cds_ja_add attach_node_flag_ptr %p node_flag_ptr %p node_flag %p\n",
+                               attach_node_flag_ptr, node_flag_ptr, node_flag);
                ret = ja_attach_node(ja, attach_node_flag_ptr,
+                               attach_node_flag,
                                node_flag_ptr, parent_node_flag,
                                parent2_node_flag, key, i, new_node);
        } else {
                ret = ja_chain_node(ja,
                        parent_node_flag,
                        node_flag_ptr,
+                       node_flag,
                        (struct cds_hlist_head *) attach_node_flag_ptr,
                        new_node);
        }
@@ -1249,8 +1558,20 @@ int ja_detach_node(struct cds_ja *ja,
                        ret = -EAGAIN;
                        goto end;
                }
-               assert(shadow_node->nr_child > 0);
                shadow_nodes[nr_shadow++] = shadow_node;
+
+               /*
+                * Check if node has been removed between RCU
+                * lookup and lock acquisition.
+                */
+               assert(snapshot_ptr[i + 1]);
+               if (ja_node_ptr(*snapshot_ptr[i + 1])
+                               != ja_node_ptr(snapshot[i + 1])) {
+                       ret = -ENOENT;
+                       goto end;
+               }
+
+               assert(shadow_node->nr_child > 0);
                if (shadow_node->nr_child == 1 && i > 1)
                        nr_clear++;
                nr_branch++;
@@ -1263,10 +1584,23 @@ int ja_detach_node(struct cds_ja *ja,
                                goto end;
                        }
                        shadow_nodes[nr_shadow++] = shadow_node;
+
+                       /*
+                        * Check if node has been removed between RCU
+                        * lookup and lock acquisition.
+                        */
+                       assert(snapshot_ptr[i]);
+                       if (ja_node_ptr(*snapshot_ptr[i])
+                                       != ja_node_ptr(snapshot[i])) {
+                               ret = -ENOENT;
+                               goto end;
+                       }
+
                        node_flag_ptr = snapshot_ptr[i + 1];
                        n = snapshot_n[i + 1];
                        parent_node_flag_ptr = snapshot_ptr[i];
                        parent_node_flag = snapshot[i];
+
                        if (i > 1) {
                                /*
                                 * Lock parent's parent, in case we need
@@ -1279,27 +1613,29 @@ int ja_detach_node(struct cds_ja *ja,
                                        goto end;
                                }
                                shadow_nodes[nr_shadow++] = shadow_node;
+
+                               /*
+                                * Check if node has been removed between RCU
+                                * lookup and lock acquisition.
+                                */
+                               assert(snapshot_ptr[i - 1]);
+                               if (ja_node_ptr(*snapshot_ptr[i - 1])
+                                               != ja_node_ptr(snapshot[i - 1])) {
+                                       ret = -ENOENT;
+                                       goto end;
+                               }
                        }
+
                        break;
                }
        }
 
-       /*
-        * Check if node has been removed between RCU lookup and lock
-        * acquisition.
-        */
-       assert(node_flag_ptr);
-       if (!ja_node_ptr(*node_flag_ptr)) {
-               ret = -ENOENT;
-               goto end;
-       }
-
        /*
         * At this point, we want to delete all nodes that are about to
         * be removed from shadow_nodes (except the last one, which is
         * either the root or the parent of the upmost node with 1
-        * child). OK to as to free lock here, because RCU read lock is
-        * held, and free only performed in call_rcu.
+        * child). OK to free lock here, because RCU read lock is held,
+        * and free only performed in call_rcu.
         */
 
        for (i = 0; i < nr_clear; i++) {
@@ -1336,33 +1672,47 @@ static
 int ja_unchain_node(struct cds_ja *ja,
                struct cds_ja_inode_flag *parent_node_flag,
                struct cds_ja_inode_flag **node_flag_ptr,
-               struct cds_hlist_head *head,
+               struct cds_ja_inode_flag *node_flag,
                struct cds_ja_node *node)
 {
        struct cds_ja_shadow_node *shadow_node;
        struct cds_hlist_node *hlist_node;
-       int ret = 0, count = 0;
+       struct cds_hlist_head hlist_head;
+       int ret = 0, count = 0, found = 0;
 
        shadow_node = rcuja_shadow_lookup_lock(ja->ht, parent_node_flag);
        if (!shadow_node)
                return -EAGAIN;
-       if (!ja_node_ptr(*node_flag_ptr)) {
+       if (ja_node_ptr(*node_flag_ptr) != ja_node_ptr(node_flag)) {
                ret = -EAGAIN;
                goto end;
        }
+       hlist_head.next = (struct cds_hlist_node *) ja_node_ptr(node_flag);
        /*
         * Retry if another thread removed all but one of duplicates
         * since check (this check was performed without lock).
+        * Ensure that the node we are about to remove is still in the
+        * list (while holding lock).
         */
-       cds_hlist_for_each_rcu(hlist_node, head, list) {
+       cds_hlist_for_each_rcu(hlist_node, &hlist_head) {
+               if (count == 0) {
+                       /* FIXME: currently a work-around */
+                       hlist_node->prev = (struct cds_hlist_node *) node_flag_ptr;
+               }
                count++;
+               if (hlist_node == &node->list)
+                       found++;
        }
-
-       if (count == 1) {
+       assert(found <= 1);
+       if (!found || count == 1) {
                ret = -EAGAIN;
                goto end;
        }
        cds_hlist_del_rcu(&node->list);
+       /*
+        * Validate that we indeed removed the node from linked list.
+        */
+       assert(ja_node_ptr(*node_flag_ptr) != (struct cds_ja_inode *) node);
 end:
        rcuja_shadow_unlock(shadow_node);
        return ret;
@@ -1417,13 +1767,13 @@ retry:
                snapshot[nr_snapshot++] = node_flag;
                node_flag = ja_node_get_nth(node_flag,
                        &prev_node_flag_ptr,
+                       NULL,
                        &node_flag_ptr,
                        iter_key);
                dbg_printf("cds_ja_del iter key lookup %u finds node_flag %p, prev_node_flag_ptr %p\n",
                                (unsigned int) iter_key, node_flag,
                                prev_node_flag_ptr);
        }
-
        /*
         * We reached bottom of tree, try to find the node we are trying
         * to remove. Fail if we cannot find it.
@@ -1465,7 +1815,7 @@ retry:
                                        snapshot_n, nr_snapshot, key, node);
                } else {
                        ret = ja_unchain_node(ja, snapshot[nr_snapshot - 1],
-                               node_flag_ptr, &hlist_head, match);
+                               node_flag_ptr, node_flag, match);
                }
        }
        /*
This page took 0.032582 seconds and 4 git commands to generate.