+
+ if (*node_flag_ptr != NULL) {
+ /*
+ * Attach point is non-NULL: it has been updated between
+ * RCU lookup and lock acquisition. We need to re-try
+ * lookup and attach.
+ */
+ ret = -EAGAIN;
+ goto unlock_parent;
+ }
+
+ /* Create new branch, starting from bottom */
+ CDS_INIT_HLIST_HEAD(&head);
+ cds_hlist_add_head_rcu(&child_node->list, &head);
+ iter_node_flag = (struct cds_ja_inode_flag *) head.next;
+
+ for (i = ja->tree_depth; i > (int) level; i--) {
+ uint8_t iter_key;
+
+ iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (ja->tree_depth - i)));
+ dbg_printf("branch creation level %d, key %u\n",
+ i - 1, (unsigned int) iter_key);
+ iter_dest_node_flag = NULL;
+ ret = ja_node_set_nth(ja, &iter_dest_node_flag,
+ iter_key,
+ iter_node_flag,
+ NULL);
+ if (ret)
+ goto check_error;
+ created_nodes[nr_created_nodes++] = iter_dest_node_flag;
+ iter_node_flag = iter_dest_node_flag;
+ }
+
+ if (level > 1) {
+ uint8_t iter_key;
+
+ iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (ja->tree_depth - level)));
+ /* We need to use set_nth on the previous level. */
+ iter_dest_node_flag = node_flag;
+ ret = ja_node_set_nth(ja, &iter_dest_node_flag,
+ iter_key,
+ iter_node_flag,
+ shadow_node);
+ if (ret)
+ goto check_error;
+ created_nodes[nr_created_nodes++] = iter_dest_node_flag;
+ iter_node_flag = iter_dest_node_flag;
+ }
+
+ /* Publish new branch */
+ dbg_printf("Publish branch %p, replacing %p\n",
+ iter_node_flag, *node_flag_ptr);
+ rcu_assign_pointer(*node_flag_ptr, iter_node_flag);
+
+ /* Success */
+ ret = 0;
+
+check_error:
+ if (ret) {
+ for (i = 0; i < nr_created_nodes; i++) {
+ int tmpret;
+ int flags;
+
+ flags = RCUJA_SHADOW_CLEAR_FREE_LOCK;
+ if (i)
+ flags |= RCUJA_SHADOW_CLEAR_FREE_NODE;
+ tmpret = rcuja_shadow_clear(ja->ht,
+ created_nodes[i],
+ NULL,
+ flags);
+ assert(!tmpret);
+ }
+ }
+unlock_parent:
+ if (parent_shadow_node)
+ rcuja_shadow_unlock(parent_shadow_node);
+unlock_shadow:
+ if (shadow_node)
+ rcuja_shadow_unlock(shadow_node);
+end:
+ return ret;
+}
+
+/*
+ * Lock the parent containing the hlist head pointer, and add node to list of
+ * duplicates. Failure can happen if concurrent update changes the
+ * parent before we get the lock. We return -EAGAIN in that case.
+ * Return 0 on success, negative error value on failure.
+ */
+static
+int ja_chain_node(struct cds_ja *ja,
+ struct cds_ja_inode_flag *parent_node_flag,
+ struct cds_hlist_head *head,
+ struct cds_ja_node *node)
+{
+ struct cds_ja_shadow_node *shadow_node;
+
+ shadow_node = rcuja_shadow_lookup_lock(ja->ht, parent_node_flag);
+ if (!shadow_node) {
+ dbg_printf("AGAIN3\n");
+ return -EAGAIN;
+ }
+ cds_hlist_add_head_rcu(&node->list, head);
+ rcuja_shadow_unlock(shadow_node);
+ return 0;
+}
+
+int cds_ja_add(struct cds_ja *ja, uint64_t key,
+ struct cds_ja_node *new_node)
+{
+ unsigned int tree_depth, i;
+ struct cds_ja_inode_flag **node_flag_ptr; /* in parent */
+ struct cds_ja_inode_flag *node_flag,
+ *parent_node_flag,
+ *parent2_node_flag;
+ int ret;
+
+ if (caa_unlikely(key > ja->key_max)) {
+ return -EINVAL;
+ }
+ tree_depth = ja->tree_depth;
+
+retry:
+ dbg_printf("cds_ja_add attempt: key %" PRIu64 ", node %p\n",
+ key, new_node);
+ parent2_node_flag = NULL;
+ parent_node_flag =
+ (struct cds_ja_inode_flag *) &ja->root; /* Use root ptr address as key for mutex */
+ node_flag_ptr = &ja->root;
+ node_flag = rcu_dereference(ja->root);
+
+ /* Iterate on all internal levels */
+ for (i = 1; i < tree_depth; i++) {
+ uint8_t iter_key;
+
+ dbg_printf("cds_ja_add iter node_flag_ptr %p node_flag %p\n",
+ *node_flag_ptr, node_flag);
+ if (!ja_node_ptr(node_flag)) {
+ ret = ja_attach_node(ja, node_flag_ptr,
+ parent_node_flag, parent2_node_flag,
+ key, i, new_node);
+ if (ret == -EAGAIN || ret == -EEXIST)
+ goto retry;
+ else
+ goto end;
+ }
+ iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - i - 1)));
+ parent2_node_flag = parent_node_flag;
+ parent_node_flag = node_flag;
+ node_flag = ja_node_get_nth(node_flag,
+ &node_flag_ptr,
+ iter_key);
+ dbg_printf("cds_ja_add iter key lookup %u finds node_flag %p node_flag_ptr %p\n",
+ (unsigned int) iter_key, node_flag, *node_flag_ptr);
+ }
+
+ /*
+ * We reached bottom of tree, simply add node to last internal
+ * level, or chain it if key is already present.
+ */
+ if (!ja_node_ptr(node_flag)) {
+ dbg_printf("cds_ja_add last node_flag_ptr %p node_flag %p\n",
+ *node_flag_ptr, node_flag);
+ ret = ja_attach_node(ja, node_flag_ptr, parent_node_flag,
+ parent2_node_flag, key, i, new_node);
+ } else {
+ ret = ja_chain_node(ja,
+ parent_node_flag,
+ (struct cds_hlist_head *) node_flag_ptr,
+ new_node);
+ }
+ if (ret == -EAGAIN || ret == -EEXIST)
+ goto retry;
+end:
+ return ret;
+}
+
+/*
+ * Note: there is no need to lookup the pointer address associated with
+ * each node's nth item after taking the lock: it's already been done by
+ * cds_ja_del while holding the rcu read-side lock, and our node rules
+ * ensure that when a match value -> pointer is found in a node, it is
+ * _NEVER_ changed for that node without recompaction, and recompaction
+ * reallocates the node.
+ * However, when a child is removed from "linear" nodes, its pointer
+ * is set to NULL. We therefore check, while holding the locks, if this
+ * pointer is NULL, and return -ENOENT to the caller if it is the case.
+ */
+static
+int ja_detach_node(struct cds_ja *ja,
+ struct cds_ja_inode_flag **snapshot,
+ struct cds_ja_inode_flag ***snapshot_ptr,
+ uint8_t *snapshot_n,
+ int nr_snapshot,
+ uint64_t key,
+ struct cds_ja_node *node)
+{
+ struct cds_ja_shadow_node *shadow_nodes[JA_MAX_DEPTH];
+ struct cds_ja_inode_flag **node_flag_ptr = NULL,
+ *parent_node_flag = NULL,
+ **parent_node_flag_ptr = NULL;
+ struct cds_ja_inode_flag *iter_node_flag;
+ int ret, i, nr_shadow = 0, nr_clear = 0, nr_branch = 0;
+ uint8_t n = 0;
+
+ assert(nr_snapshot == ja->tree_depth + 1);
+
+ /*
+ * From the last internal level node going up, get the node
+ * lock, check if the node has only one child left. If it is the
+ * case, we continue iterating upward. When we reach a node
+ * which has more that one child left, we lock the parent, and
+ * proceed to the node deletion (removing its children too).
+ */
+ for (i = nr_snapshot - 2; i >= 1; i--) {
+ struct cds_ja_shadow_node *shadow_node;
+
+ shadow_node = rcuja_shadow_lookup_lock(ja->ht,
+ snapshot[i]);
+ if (!shadow_node) {
+ ret = -EAGAIN;
+ goto end;
+ }
+ assert(shadow_node->nr_child > 0);
+ shadow_nodes[nr_shadow++] = shadow_node;
+ if (shadow_node->nr_child == 1)
+ nr_clear++;
+ nr_branch++;
+ if (shadow_node->nr_child > 1 || i == 1) {
+ /* Lock parent and break */
+ shadow_node = rcuja_shadow_lookup_lock(ja->ht,
+ snapshot[i - 1]);
+ if (!shadow_node) {
+ ret = -EAGAIN;
+ goto end;
+ }
+ shadow_nodes[nr_shadow++] = shadow_node;
+ node_flag_ptr = snapshot_ptr[i + 1];
+ n = snapshot_n[i + 1];
+ parent_node_flag_ptr = snapshot_ptr[i];
+ parent_node_flag = snapshot[i];
+ if (i > 1) {
+ /*
+ * Lock parent's parent, in case we need
+ * to recompact parent.
+ */
+ shadow_node = rcuja_shadow_lookup_lock(ja->ht,
+ snapshot[i - 2]);
+ if (!shadow_node) {
+ ret = -EAGAIN;
+ goto end;
+ }
+ shadow_nodes[nr_shadow++] = shadow_node;
+ }
+ break;
+ }
+ }
+
+ /*
+ * Check if node has been removed between RCU lookup and lock
+ * acquisition.
+ */
+ if (!*node_flag_ptr) {
+ ret = -ENOENT;
+ goto end;
+ }
+
+ /*
+ * At this point, we want to delete all nodes that are about to
+ * be removed from shadow_nodes (except the last one, which is
+ * either the root or the parent of the upmost node with 1
+ * child). OK to as to free lock here, because RCU read lock is
+ * held, and free only performed in call_rcu.
+ */
+
+ for (i = 0; i < nr_clear; i++) {
+ ret = rcuja_shadow_clear(ja->ht,
+ shadow_nodes[i]->node_flag,
+ shadow_nodes[i],
+ RCUJA_SHADOW_CLEAR_FREE_NODE
+ | RCUJA_SHADOW_CLEAR_FREE_LOCK);
+ assert(!ret);
+ }
+
+ iter_node_flag = parent_node_flag;
+ /* Remove from parent */
+ ret = ja_node_clear_ptr(ja,
+ node_flag_ptr, /* Pointer to location to nullify */
+ &iter_node_flag, /* Old new parent ptr in its parent */
+ shadow_nodes[nr_branch - 1], /* of parent */
+ n);
+ if (ret)
+ goto end;
+
+ dbg_printf("ja_detach_node: publish %p instead of %p\n",
+ iter_node_flag, *parent_node_flag_ptr);
+ /* Update address of parent ptr in its parent */
+ rcu_assign_pointer(*parent_node_flag_ptr, iter_node_flag);
+
+end:
+ for (i = 0; i < nr_shadow; i++)
+ rcuja_shadow_unlock(shadow_nodes[i]);
+ return ret;
+}
+
+static
+int ja_unchain_node(struct cds_ja *ja,
+ struct cds_ja_inode_flag *parent_node_flag,
+ struct cds_hlist_head *head,
+ struct cds_ja_node *node)
+{
+ struct cds_ja_shadow_node *shadow_node;
+ struct cds_hlist_node *hlist_node;
+ int ret = 0, count = 0;
+
+ shadow_node = rcuja_shadow_lookup_lock(ja->ht, parent_node_flag);
+ if (!shadow_node)
+ return -EAGAIN;
+ /*
+ * Retry if another thread removed all but one of duplicates
+ * since check (that was performed without lock).
+ */
+ cds_hlist_for_each_rcu(hlist_node, head, list) {
+ count++;
+ }
+
+ if (count == 1) {
+ ret = -EAGAIN;
+ goto end;
+ }
+ cds_hlist_del_rcu(&node->list);
+end: