Fix rcuja: handle concurrent updates
[urcu.git] / rcuja / rcuja.c
index 12cce1d1d42e8af6fc3efe2a20e5cf0b2652e373..97381437368d295380de5230bc9c72ed9217a9ca 100644 (file)
@@ -599,6 +599,7 @@ int ja_pigeon_node_clear_ptr(const struct cds_ja_type *type,
                struct cds_ja_inode_flag **node_flag_ptr)
 {
        assert(type->type_class == RCU_JA_PIGEON);
+       dbg_printf("ja_pigeon_node_clear_ptr: clearing ptr: %p\n", *node_flag_ptr);
        rcu_assign_pointer(*node_flag_ptr, NULL);
        shadow_node->nr_child--;
        return 0;
@@ -804,7 +805,7 @@ retry:              /* for fallback */
        }
 skip_copy:
 
-       if (JA_RECOMPACT_ADD) {
+       if (mode == JA_RECOMPACT_ADD) {
                /* add node */
                ret = _ja_node_set_nth(new_type, new_node,
                                new_shadow_node,
@@ -999,6 +1000,16 @@ int ja_attach_node(struct cds_ja *ja,
                }
        }
 
+       if (*node_flag_ptr != NULL) {
+               /*
+                * Attach point is non-NULL: it has been updated between
+                * RCU lookup and lock acquisition. We need to re-try
+                * lookup and attach.
+                */
+               ret = -EAGAIN;
+               goto unlock_parent;
+       }
+
        /* Create new branch, starting from bottom */
        CDS_INIT_HLIST_HEAD(&head);
        cds_hlist_add_head_rcu(&child_node->list, &head);
@@ -1061,6 +1072,7 @@ check_error:
                        assert(!tmpret);
                }
        }
+unlock_parent:
        if (parent_shadow_node)
                rcuja_shadow_unlock(parent_shadow_node);
 unlock_shadow:
@@ -1085,8 +1097,10 @@ int ja_chain_node(struct cds_ja *ja,
        struct cds_ja_shadow_node *shadow_node;
 
        shadow_node = rcuja_shadow_lookup_lock(ja->ht, parent_node_flag);
-       if (!shadow_node)
+       if (!shadow_node) {
+               dbg_printf("AGAIN3\n");
                return -EAGAIN;
+       }
        cds_hlist_add_head_rcu(&node->list, head);
        rcuja_shadow_unlock(shadow_node);
        return 0;
@@ -1102,8 +1116,9 @@ int cds_ja_add(struct cds_ja *ja, uint64_t key,
                *parent2_node_flag;
        int ret;
 
-       if (caa_unlikely(key > ja->key_max))
+       if (caa_unlikely(key > ja->key_max)) {
                return -EINVAL;
+       }
        tree_depth = ja->tree_depth;
 
 retry:
@@ -1155,7 +1170,7 @@ retry:
                        (struct cds_hlist_head *) node_flag_ptr,
                        new_node);
        }
-       if (ret == -EAGAIN)
+       if (ret == -EAGAIN || ret == -EEXIST)
                goto retry;
 end:
        return ret;
@@ -1168,6 +1183,9 @@ end:
  * ensure that when a match value -> pointer is found in a node, it is
  * _NEVER_ changed for that node without recompaction, and recompaction
  * reallocates the node.
+ * However, when a child is removed from "linear" nodes, its pointer
+ * is set to NULL. We therefore check, while holding the locks, if this
+ * pointer is NULL, and return -ENOENT to the caller if it is the case.
  */
 static
 int ja_detach_node(struct cds_ja *ja,
@@ -1183,10 +1201,10 @@ int ja_detach_node(struct cds_ja *ja,
                        *parent_node_flag = NULL,
                        **parent_node_flag_ptr = NULL;
        struct cds_ja_inode_flag *iter_node_flag;
-       int ret, i, nr_shadow = 0, nr_clear = 0;
-       uint8_t n;
+       int ret, i, nr_shadow = 0, nr_clear = 0, nr_branch = 0;
+       uint8_t n = 0;
 
-       assert(nr_snapshot == ja->tree_depth - 1);
+       assert(nr_snapshot == ja->tree_depth + 1);
 
        /*
         * From the last internal level node going up, get the node
@@ -1195,7 +1213,7 @@ int ja_detach_node(struct cds_ja *ja,
         * which has more that one child left, we lock the parent, and
         * proceed to the node deletion (removing its children too).
         */
-       for (i = nr_snapshot - 1; i >= 1; i--) {
+       for (i = nr_snapshot - 2; i >= 1; i--) {
                struct cds_ja_shadow_node *shadow_node;
 
                shadow_node = rcuja_shadow_lookup_lock(ja->ht,
@@ -1206,17 +1224,9 @@ int ja_detach_node(struct cds_ja *ja,
                }
                assert(shadow_node->nr_child > 0);
                shadow_nodes[nr_shadow++] = shadow_node;
-               nr_clear++;
-               if (i == nr_snapshot - 1) {
-                       /*
-                        * Re-check that last internal node level has
-                        * only one child, else trigger a retry.
-                        */
-                       if (shadow_node->nr_child != 1) {
-                               ret = -EAGAIN;
-                               goto end;
-                       }
-               }
+               if (shadow_node->nr_child == 1)
+                       nr_clear++;
+               nr_branch++;
                if (shadow_node->nr_child > 1 || i == 1) {
                        /* Lock parent and break */
                        shadow_node = rcuja_shadow_lookup_lock(ja->ht,
@@ -1226,10 +1236,10 @@ int ja_detach_node(struct cds_ja *ja,
                                goto end;
                        }
                        shadow_nodes[nr_shadow++] = shadow_node;
-                       node_flag_ptr = snapshot_ptr[i];
-                       n = snapshot_n[i];
-                       parent_node_flag_ptr = snapshot_ptr[i - 1];
-                       parent_node_flag = snapshot[i - 1];
+                       node_flag_ptr = snapshot_ptr[i + 1];
+                       n = snapshot_n[i + 1];
+                       parent_node_flag_ptr = snapshot_ptr[i];
+                       parent_node_flag = snapshot[i];
                        if (i > 1) {
                                /*
                                 * Lock parent's parent, in case we need
@@ -1248,11 +1258,20 @@ int ja_detach_node(struct cds_ja *ja,
        }
 
        /*
-        * At this point, we want to delete all nodes in shadow_nodes
-        * (except the last one, which is either the root or the parent
-        * of the upmost node with 1 child). OK to as to free lock here,
-        * because RCU read lock is held, and free only performed in
-        * call_rcu.
+        * Check if node has been removed between RCU lookup and lock
+        * acquisition.
+        */
+       if (!*node_flag_ptr) {
+               ret = -ENOENT;
+               goto end;
+       }
+
+       /*
+        * At this point, we want to delete all nodes that are about to
+        * be removed from shadow_nodes (except the last one, which is
+        * either the root or the parent of the upmost node with 1
+        * child). OK to as to free lock here, because RCU read lock is
+        * held, and free only performed in call_rcu.
         */
 
        for (i = 0; i < nr_clear; i++) {
@@ -1269,9 +1288,13 @@ int ja_detach_node(struct cds_ja *ja,
        ret = ja_node_clear_ptr(ja,
                node_flag_ptr,          /* Pointer to location to nullify */
                &iter_node_flag,        /* Old new parent ptr in its parent */
-               shadow_nodes[nr_clear], /* of parent */
+               shadow_nodes[nr_branch - 1],    /* of parent */
                n);
+       if (ret)
+               goto end;
 
+       dbg_printf("ja_detach_node: publish %p instead of %p\n",
+               iter_node_flag, *parent_node_flag_ptr);
        /* Update address of parent ptr in its parent */
        rcu_assign_pointer(*parent_node_flag_ptr, iter_node_flag);
 
@@ -1284,19 +1307,25 @@ end:
 static
 int ja_unchain_node(struct cds_ja *ja,
                struct cds_ja_inode_flag *parent_node_flag,
+               struct cds_hlist_head *head,
                struct cds_ja_node *node)
 {
        struct cds_ja_shadow_node *shadow_node;
-       int ret = 0;
+       struct cds_hlist_node *hlist_node;
+       int ret = 0, count = 0;
 
        shadow_node = rcuja_shadow_lookup_lock(ja->ht, parent_node_flag);
        if (!shadow_node)
                return -EAGAIN;
        /*
         * Retry if another thread removed all but one of duplicates
-        * since check.
+        * since check (that was performed without lock).
         */
-       if (shadow_node->nr_child == 1) {
+       cds_hlist_for_each_rcu(hlist_node, head, list) {
+               count++;
+       }
+
+       if (count == 1) {
                ret = -EAGAIN;
                goto end;
        }
@@ -1318,7 +1347,7 @@ int cds_ja_del(struct cds_ja *ja, uint64_t key,
        uint8_t snapshot_n[JA_MAX_DEPTH];
        struct cds_ja_inode_flag *node_flag;
        struct cds_ja_inode_flag **prev_node_flag_ptr;
-       int nr_snapshot = 0;
+       int nr_snapshot;
        int ret;
 
        if (caa_unlikely(key > ja->key_max))
@@ -1326,11 +1355,13 @@ int cds_ja_del(struct cds_ja *ja, uint64_t key,
        tree_depth = ja->tree_depth;
 
 retry:
+       nr_snapshot = 0;
        dbg_printf("cds_ja_del attempt: key %" PRIu64 ", node %p\n",
                key, node);
 
        /* snapshot for level 0 is only for shadow node lookup */
-       snapshot_n[nr_snapshot] = 0;
+       snapshot_n[0] = 0;
+       snapshot_n[1] = 0;
        snapshot_ptr[nr_snapshot] = NULL;
        snapshot[nr_snapshot++] = (struct cds_ja_inode_flag *) &ja->root;
        node_flag = rcu_dereference(ja->root);
@@ -1346,11 +1377,7 @@ retry:
                        return -ENOENT;
                }
                iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - i - 1)));
-               if (nr_snapshot <= 1)
-                       snapshot_n[nr_snapshot] = 0;
-               else
-                       snapshot_n[nr_snapshot - 1] = iter_key;
-
+               snapshot_n[nr_snapshot + 1] = iter_key;
                snapshot_ptr[nr_snapshot] = prev_node_flag_ptr;
                snapshot[nr_snapshot++] = node_flag;
                node_flag = ja_node_get_nth(node_flag,
@@ -1366,38 +1393,51 @@ retry:
         * to remove. Fail if we cannot find it.
         */
        if (!ja_node_ptr(node_flag)) {
+               dbg_printf("cds_ja_del: no node found for key %" PRIu64 "\n",
+                               key);
                return -ENOENT;
        } else {
-               struct cds_hlist_head *hlist_head;
+               struct cds_hlist_head hlist_head;
                struct cds_hlist_node *hlist_node;
                struct cds_ja_node *entry, *match = NULL;
                int count = 0;
 
-               hlist_head = (struct cds_hlist_head *) ja_node_ptr(node_flag);
+               hlist_head.next =
+                       (struct cds_hlist_node *) ja_node_ptr(node_flag);
                cds_hlist_for_each_entry_rcu(entry,
                                hlist_node,
-                               hlist_head,
+                               &hlist_head,
                                list) {
+                       dbg_printf("cds_ja_del: compare %p with entry %p\n", node, entry);
                        if (entry == node)
                                match = entry;
                        count++;
                }
-               if (!match)
+               if (!match) {
+                       dbg_printf("cds_ja_del: no node match for node %p key %" PRIu64 "\n", node, key);
                        return -ENOENT;
+               }
                assert(count > 0);
                if (count == 1) {
                        /*
-                        * Removing last of duplicates.
+                        * Removing last of duplicates. Last snapshot
+                        * does not have a shadow node (external leafs).
                         */
                        snapshot_ptr[nr_snapshot] = prev_node_flag_ptr;
                        snapshot[nr_snapshot++] = node_flag;
                        ret = ja_detach_node(ja, snapshot, snapshot_ptr,
                                        snapshot_n, nr_snapshot, key, node);
                } else {
-                       ret = ja_unchain_node(ja, node_flag, entry);
+                       ret = ja_unchain_node(ja, snapshot[nr_snapshot - 1],
+                               &hlist_head, match);
                }
        }
-       if (ret == -EAGAIN)
+       /*
+        * Explanation of -ENOENT handling: caused by concurrent delete
+        * between RCU lookup and actual removal. Need to re-do the
+        * lookup and removal attempt.
+        */
+       if (ret == -EAGAIN || ret == -ENOENT)
                goto retry;
        return ret;
 }
@@ -1415,13 +1455,13 @@ struct cds_ja *_cds_ja_new(unsigned int key_bits,
 
        switch (key_bits) {
        case 8:
-               ja->key_max = UINT8_MAX;
-               break;
        case 16:
-               ja->key_max = UINT16_MAX;
-               break;
+       case 24:
        case 32:
-               ja->key_max = UINT32_MAX;
+       case 40:
+       case 48:
+       case 56:
+               ja->key_max = (1ULL << key_bits) - 1;
                break;
        case 64:
                ja->key_max = UINT64_MAX;
This page took 0.027531 seconds and 4 git commands to generate.