Fix rcuja: fallback when adding element to full pool
[userspace-rcu.git] / rcuja / rcuja.c
index 67964b65da65bb4abee4db5b9379f270bda06f47..fac1eb75d56a09210e87296a35b14664a09724ec 100644 (file)
@@ -277,6 +277,7 @@ static
 struct cds_ja_inode_flag *ja_linear_node_get_nth(const struct cds_ja_type *type,
                struct cds_ja_inode *node,
                struct cds_ja_inode_flag ***child_node_flag_ptr,
+               struct cds_ja_inode_flag **child_node_flag_v,
                struct cds_ja_inode_flag ***node_flag_ptr,
                uint8_t n)
 {
@@ -307,6 +308,8 @@ struct cds_ja_inode_flag *ja_linear_node_get_nth(const struct cds_ja_type *type,
        ptr = rcu_dereference(pointers[i]);
        if (caa_unlikely(child_node_flag_ptr) && ptr)
                *child_node_flag_ptr = &pointers[i];
+       if (caa_unlikely(child_node_flag_v) && ptr)
+               *child_node_flag_v = ptr;
        if (caa_unlikely(node_flag_ptr))
                *node_flag_ptr = &pointers[i];
        return ptr;
@@ -335,6 +338,7 @@ static
 struct cds_ja_inode_flag *ja_pool_node_get_nth(const struct cds_ja_type *type,
                struct cds_ja_inode *node,
                struct cds_ja_inode_flag ***child_node_flag_ptr,
+               struct cds_ja_inode_flag **child_node_flag_v,
                struct cds_ja_inode_flag ***node_flag_ptr,
                uint8_t n)
 {
@@ -348,7 +352,7 @@ struct cds_ja_inode_flag *ja_pool_node_get_nth(const struct cds_ja_type *type,
        linear = (struct cds_ja_inode *)
                &node->u.data[((unsigned long) n >> (CHAR_BIT - type->nr_pool_order)) << type->pool_size_order];
        return ja_linear_node_get_nth(type, linear, child_node_flag_ptr,
-               node_flag_ptr, n);
+               child_node_flag_v, node_flag_ptr, n);
 }
 
 static
@@ -365,20 +369,25 @@ static
 struct cds_ja_inode_flag *ja_pigeon_node_get_nth(const struct cds_ja_type *type,
                struct cds_ja_inode *node,
                struct cds_ja_inode_flag ***child_node_flag_ptr,
+               struct cds_ja_inode_flag **child_node_flag_v,
                struct cds_ja_inode_flag ***node_flag_ptr,
                uint8_t n)
 {
        struct cds_ja_inode_flag **child_node_flag;
+       struct cds_ja_inode_flag *child_node_flag_read;
 
        assert(type->type_class == RCU_JA_PIGEON);
        child_node_flag = &((struct cds_ja_inode_flag **) node->u.data)[n];
+       child_node_flag_read = rcu_dereference(*child_node_flag);
        dbg_printf("ja_pigeon_node_get_nth child_node_flag_ptr %p\n",
                child_node_flag);
-       if (caa_unlikely(child_node_flag_ptr) && *child_node_flag)
+       if (caa_unlikely(child_node_flag_ptr) && child_node_flag_read)
                *child_node_flag_ptr = child_node_flag;
+       if (caa_unlikely(child_node_flag_v) && child_node_flag_read)
+               *child_node_flag_v = child_node_flag_read;
        if (caa_unlikely(node_flag_ptr))
                *node_flag_ptr = child_node_flag;
-       return rcu_dereference(*child_node_flag);
+       return child_node_flag_read;
 }
 
 static
@@ -386,7 +395,7 @@ struct cds_ja_inode_flag *ja_pigeon_node_get_ith_pos(const struct cds_ja_type *t
                struct cds_ja_inode *node,
                uint8_t i)
 {
-       return ja_pigeon_node_get_nth(type, node, NULL, NULL, i);
+       return ja_pigeon_node_get_nth(type, node, NULL, NULL, NULL, i);
 }
 
 /*
@@ -394,8 +403,9 @@ struct cds_ja_inode_flag *ja_pigeon_node_get_ith_pos(const struct cds_ja_type *t
  * node_flag is already rcu_dereference'd.
  */
 static
-struct cds_ja_inode_flag * ja_node_get_nth(struct cds_ja_inode_flag *node_flag,
+struct cds_ja_inode_flag *ja_node_get_nth(struct cds_ja_inode_flag *node_flag,
                struct cds_ja_inode_flag ***child_node_flag_ptr,
+               struct cds_ja_inode_flag **child_node_flag,
                struct cds_ja_inode_flag ***node_flag_ptr,
                uint8_t n)
 {
@@ -411,13 +421,16 @@ struct cds_ja_inode_flag * ja_node_get_nth(struct cds_ja_inode_flag *node_flag,
        switch (type->type_class) {
        case RCU_JA_LINEAR:
                return ja_linear_node_get_nth(type, node,
-                               child_node_flag_ptr, node_flag_ptr, n);
+                               child_node_flag_ptr, child_node_flag,
+                               node_flag_ptr, n);
        case RCU_JA_POOL:
                return ja_pool_node_get_nth(type, node,
-                               child_node_flag_ptr, node_flag_ptr, n);
+                               child_node_flag_ptr, child_node_flag,
+                               node_flag_ptr, n);
        case RCU_JA_PIGEON:
                return ja_pigeon_node_get_nth(type, node,
-                               child_node_flag_ptr, node_flag_ptr, n);
+                               child_node_flag_ptr, child_node_flag,
+                               node_flag_ptr, n);
        default:
                assert(0);
                return (void *) -1UL;
@@ -822,6 +835,9 @@ skip_copy:
                ret = _ja_node_set_nth(new_type, new_node,
                                new_shadow_node,
                                n, child_node_flag);
+               if (new_type->type_class == RCU_JA_POOL && ret) {
+                       goto fallback_toosmall;
+               }
                assert(!ret);
        }
        /* Return pointer to new recompacted node through old_node_flag_ptr */
@@ -950,7 +966,7 @@ struct cds_hlist_head cds_ja_lookup(struct cds_ja *ja, uint64_t key)
                uint8_t iter_key;
 
                iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - i - 1)));
-               node_flag = ja_node_get_nth(node_flag, NULL, NULL,
+               node_flag = ja_node_get_nth(node_flag, NULL, NULL, NULL,
                        iter_key);
                dbg_printf("cds_ja_lookup iter key lookup %u finds node_flag %p\n",
                                (unsigned int) iter_key, node_flag);
@@ -978,6 +994,7 @@ struct cds_hlist_head cds_ja_lookup(struct cds_ja *ja, uint64_t key)
 static
 int ja_attach_node(struct cds_ja *ja,
                struct cds_ja_inode_flag **attach_node_flag_ptr,
+               struct cds_ja_inode_flag *attach_node_flag,
                struct cds_ja_inode_flag **node_flag_ptr,
                struct cds_ja_inode_flag *node_flag,
                struct cds_ja_inode_flag *parent_node_flag,
@@ -1013,11 +1030,22 @@ int ja_attach_node(struct cds_ja *ja,
                }
        }
 
-       if (node_flag_ptr && ja_node_ptr(*node_flag_ptr) != NULL) {
+       if (node_flag_ptr && ja_node_ptr(*node_flag_ptr)) {
+               /*
+                * Target node has been updated between RCU lookup and
+                * lock acquisition. We need to re-try lookup and
+                * attach.
+                */
+               ret = -EAGAIN;
+               goto unlock_parent;
+       }
+
+       if (attach_node_flag_ptr && ja_node_ptr(*attach_node_flag_ptr) !=
+                       ja_node_ptr(attach_node_flag)) {
                /*
-                * Attach point is non-NULL: it has been updated between
-                * RCU lookup and lock acquisition. We need to re-try
-                * lookup and attach.
+                * Target node has been updated between RCU lookup and
+                * lock acquisition. We need to re-try lookup and
+                * attach.
                 */
                ret = -EAGAIN;
                goto unlock_parent;
@@ -1104,18 +1132,26 @@ end:
 static
 int ja_chain_node(struct cds_ja *ja,
                struct cds_ja_inode_flag *parent_node_flag,
+               struct cds_ja_inode_flag **node_flag_ptr,
+               struct cds_ja_inode_flag *node_flag,
                struct cds_hlist_head *head,
                struct cds_ja_node *node)
 {
        struct cds_ja_shadow_node *shadow_node;
+       int ret = 0;
 
        shadow_node = rcuja_shadow_lookup_lock(ja->ht, parent_node_flag);
        if (!shadow_node) {
                return -EAGAIN;
        }
+       if (ja_node_ptr(*node_flag_ptr) != ja_node_ptr(node_flag)) {
+               ret = -EAGAIN;
+               goto end;
+       }
        cds_hlist_add_head_rcu(&node->list, head);
+end:
        rcuja_shadow_unlock(shadow_node);
-       return 0;
+       return ret;
 }
 
 int cds_ja_add(struct cds_ja *ja, uint64_t key,
@@ -1126,7 +1162,8 @@ int cds_ja_add(struct cds_ja *ja, uint64_t key,
                **node_flag_ptr;
        struct cds_ja_inode_flag *node_flag,
                *parent_node_flag,
-               *parent2_node_flag;
+               *parent2_node_flag,
+               *attach_node_flag;
        int ret;
 
        if (caa_unlikely(key > ja->key_max)) {
@@ -1141,6 +1178,7 @@ retry:
        parent_node_flag =
                (struct cds_ja_inode_flag *) &ja->root; /* Use root ptr address as key for mutex */
        attach_node_flag_ptr = &ja->root;
+       attach_node_flag = rcu_dereference(ja->root);
        node_flag_ptr = &ja->root;
        node_flag = rcu_dereference(ja->root);
 
@@ -1149,11 +1187,13 @@ retry:
                uint8_t iter_key;
 
                dbg_printf("cds_ja_add iter attach_node_flag_ptr %p node_flag_ptr %p node_flag %p\n",
-                               *attach_node_flag_ptr, *node_flag_ptr, node_flag);
+                               attach_node_flag_ptr, node_flag_ptr, node_flag);
                if (!ja_node_ptr(node_flag)) {
                        ret = ja_attach_node(ja, attach_node_flag_ptr,
+                                       attach_node_flag,
                                        node_flag_ptr,
-                                       parent_node_flag, parent2_node_flag,
+                                       parent_node_flag,
+                                       parent2_node_flag,
                                        key, i, new_node);
                        if (ret == -EAGAIN || ret == -EEXIST)
                                goto retry;
@@ -1165,12 +1205,13 @@ retry:
                parent_node_flag = node_flag;
                node_flag = ja_node_get_nth(node_flag,
                        &attach_node_flag_ptr,
+                       &attach_node_flag,
                        &node_flag_ptr,
                        iter_key);
                dbg_printf("cds_ja_add iter key lookup %u finds node_flag %p attach_node_flag_ptr %p node_flag_ptr %p\n",
                                (unsigned int) iter_key, node_flag,
-                               *attach_node_flag_ptr,
-                               *node_flag_ptr);
+                               attach_node_flag_ptr,
+                               node_flag_ptr);
        }
 
        /*
@@ -1178,14 +1219,17 @@ retry:
         * level, or chain it if key is already present.
         */
        if (!ja_node_ptr(node_flag)) {
-               dbg_printf("cds_ja_add last attach_node_flag_ptr %p node_flag_ptr %p node_flag %p\n",
-                               *attach_node_flag_ptr, *node_flag_ptr, node_flag);
+               dbg_printf("cds_ja_add attach_node_flag_ptr %p node_flag_ptr %p node_flag %p\n",
+                               attach_node_flag_ptr, node_flag_ptr, node_flag);
                ret = ja_attach_node(ja, attach_node_flag_ptr,
+                               attach_node_flag,
                                node_flag_ptr, parent_node_flag,
                                parent2_node_flag, key, i, new_node);
        } else {
                ret = ja_chain_node(ja,
                        parent_node_flag,
+                       node_flag_ptr,
+                       node_flag,
                        (struct cds_hlist_head *) attach_node_flag_ptr,
                        new_node);
        }
@@ -1241,9 +1285,21 @@ int ja_detach_node(struct cds_ja *ja,
                        ret = -EAGAIN;
                        goto end;
                }
-               assert(shadow_node->nr_child > 0);
                shadow_nodes[nr_shadow++] = shadow_node;
-               if (shadow_node->nr_child == 1)
+
+               /*
+                * Check if node has been removed between RCU
+                * lookup and lock acquisition.
+                */
+               assert(snapshot_ptr[i + 1]);
+               if (ja_node_ptr(*snapshot_ptr[i + 1])
+                               != ja_node_ptr(snapshot[i + 1])) {
+                       ret = -ENOENT;
+                       goto end;
+               }
+
+               assert(shadow_node->nr_child > 0);
+               if (shadow_node->nr_child == 1 && i > 1)
                        nr_clear++;
                nr_branch++;
                if (shadow_node->nr_child > 1 || i == 1) {
@@ -1255,10 +1311,23 @@ int ja_detach_node(struct cds_ja *ja,
                                goto end;
                        }
                        shadow_nodes[nr_shadow++] = shadow_node;
+
+                       /*
+                        * Check if node has been removed between RCU
+                        * lookup and lock acquisition.
+                        */
+                       assert(snapshot_ptr[i]);
+                       if (ja_node_ptr(*snapshot_ptr[i])
+                                       != ja_node_ptr(snapshot[i])) {
+                               ret = -ENOENT;
+                               goto end;
+                       }
+
                        node_flag_ptr = snapshot_ptr[i + 1];
                        n = snapshot_n[i + 1];
                        parent_node_flag_ptr = snapshot_ptr[i];
                        parent_node_flag = snapshot[i];
+
                        if (i > 1) {
                                /*
                                 * Lock parent's parent, in case we need
@@ -1271,27 +1340,29 @@ int ja_detach_node(struct cds_ja *ja,
                                        goto end;
                                }
                                shadow_nodes[nr_shadow++] = shadow_node;
+
+                               /*
+                                * Check if node has been removed between RCU
+                                * lookup and lock acquisition.
+                                */
+                               assert(snapshot_ptr[i - 1]);
+                               if (ja_node_ptr(*snapshot_ptr[i - 1])
+                                               != ja_node_ptr(snapshot[i - 1])) {
+                                       ret = -ENOENT;
+                                       goto end;
+                               }
                        }
+
                        break;
                }
        }
 
-       /*
-        * Check if node has been removed between RCU lookup and lock
-        * acquisition.
-        */
-       assert(node_flag_ptr);
-       if (ja_node_ptr(*node_flag_ptr) == NULL) {
-               ret = -ENOENT;
-               goto end;
-       }
-
        /*
         * At this point, we want to delete all nodes that are about to
         * be removed from shadow_nodes (except the last one, which is
         * either the root or the parent of the upmost node with 1
-        * child). OK to as to free lock here, because RCU read lock is
-        * held, and free only performed in call_rcu.
+        * child). OK to free lock here, because RCU read lock is held,
+        * and free only performed in call_rcu.
         */
 
        for (i = 0; i < nr_clear; i++) {
@@ -1327,29 +1398,48 @@ end:
 static
 int ja_unchain_node(struct cds_ja *ja,
                struct cds_ja_inode_flag *parent_node_flag,
-               struct cds_hlist_head *head,
+               struct cds_ja_inode_flag **node_flag_ptr,
+               struct cds_ja_inode_flag *node_flag,
                struct cds_ja_node *node)
 {
        struct cds_ja_shadow_node *shadow_node;
        struct cds_hlist_node *hlist_node;
-       int ret = 0, count = 0;
+       struct cds_hlist_head hlist_head;
+       int ret = 0, count = 0, found = 0;
 
        shadow_node = rcuja_shadow_lookup_lock(ja->ht, parent_node_flag);
        if (!shadow_node)
                return -EAGAIN;
+       if (ja_node_ptr(*node_flag_ptr) != ja_node_ptr(node_flag)) {
+               ret = -EAGAIN;
+               goto end;
+       }
+       hlist_head.next = (struct cds_hlist_node *) ja_node_ptr(node_flag);
        /*
         * Retry if another thread removed all but one of duplicates
-        * since check (that was performed without lock).
+        * since check (this check was performed without lock).
+        * Ensure that the node we are about to remove is still in the
+        * list (while holding lock).
         */
-       cds_hlist_for_each_rcu(hlist_node, head, list) {
+       cds_hlist_for_each_rcu(hlist_node, &hlist_head) {
+               if (count == 0) {
+                       /* FIXME: currently a work-around */
+                       hlist_node->prev = (struct cds_hlist_node *) node_flag_ptr;
+               }
                count++;
+               if (hlist_node == &node->list)
+                       found++;
        }
-
-       if (count == 1) {
+       assert(found <= 1);
+       if (!found || count == 1) {
                ret = -EAGAIN;
                goto end;
        }
        cds_hlist_del_rcu(&node->list);
+       /*
+        * Validate that we indeed removed the node from linked list.
+        */
+       assert(ja_node_ptr(*node_flag_ptr) != (struct cds_ja_inode *) node);
 end:
        rcuja_shadow_unlock(shadow_node);
        return ret;
@@ -1366,7 +1456,8 @@ int cds_ja_del(struct cds_ja *ja, uint64_t key,
        struct cds_ja_inode_flag **snapshot_ptr[JA_MAX_DEPTH];
        uint8_t snapshot_n[JA_MAX_DEPTH];
        struct cds_ja_inode_flag *node_flag;
-       struct cds_ja_inode_flag **prev_node_flag_ptr;
+       struct cds_ja_inode_flag **prev_node_flag_ptr,
+               **node_flag_ptr;
        int nr_snapshot;
        int ret;
 
@@ -1386,6 +1477,7 @@ retry:
        snapshot[nr_snapshot++] = (struct cds_ja_inode_flag *) &ja->root;
        node_flag = rcu_dereference(ja->root);
        prev_node_flag_ptr = &ja->root;
+       node_flag_ptr = &ja->root;
 
        /* Iterate on all internal levels */
        for (i = 1; i < tree_depth; i++) {
@@ -1403,12 +1495,12 @@ retry:
                node_flag = ja_node_get_nth(node_flag,
                        &prev_node_flag_ptr,
                        NULL,
+                       &node_flag_ptr,
                        iter_key);
                dbg_printf("cds_ja_del iter key lookup %u finds node_flag %p, prev_node_flag_ptr %p\n",
                                (unsigned int) iter_key, node_flag,
                                prev_node_flag_ptr);
        }
-
        /*
         * We reached bottom of tree, try to find the node we are trying
         * to remove. Fail if we cannot find it.
@@ -1450,7 +1542,7 @@ retry:
                                        snapshot_n, nr_snapshot, key, node);
                } else {
                        ret = ja_unchain_node(ja, snapshot[nr_snapshot - 1],
-                               &hlist_head, match);
+                               node_flag_ptr, node_flag, match);
                }
        }
        /*
This page took 0.029354 seconds and 4 git commands to generate.