rcuja: free all leaf nodes at destruction
[userspace-rcu.git] / rcuja / rcuja.c
CommitLineData
61009379
MD
1/*
2 * rcuja/rcuja.c
3 *
4 * Userspace RCU library - RCU Judy Array
5 *
6 * Copyright 2012 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
195e72d3 23#define _LGPL_SOURCE
e5227865 24#include <stdint.h>
8e519e3c 25#include <errno.h>
d68c6810 26#include <limits.h>
61009379 27#include <urcu/rcuja.h>
d68c6810
MD
28#include <urcu/compiler.h>
29#include <urcu/arch.h>
30#include <assert.h>
8e519e3c 31#include <urcu-pointer.h>
f07b240f 32#include <urcu/uatomic.h>
b4540e8a 33#include <stdint.h>
8e519e3c 34
61009379 35#include "rcuja-internal.h"
d68c6810 36#include "bitfield.h"
61009379 37
d96bfb0d 38enum cds_ja_type_class {
e5227865 39 RCU_JA_LINEAR = 0, /* Type A */
fd800776
MD
40 /* 32-bit: 1 to 25 children, 8 to 128 bytes */
41 /* 64-bit: 1 to 28 children, 16 to 256 bytes */
42 RCU_JA_POOL = 1, /* Type B */
43 /* 32-bit: 26 to 100 children, 256 to 512 bytes */
44 /* 64-bit: 29 to 112 children, 512 to 1024 bytes */
e5227865 45 RCU_JA_PIGEON = 2, /* Type C */
fd800776
MD
46 /* 32-bit: 101 to 256 children, 1024 bytes */
47 /* 64-bit: 113 to 256 children, 2048 bytes */
e5227865 48 /* Leaf nodes are implicit from their height in the tree */
1db4943c 49 RCU_JA_NR_TYPES,
e1db2db5
MD
50
51 RCU_JA_NULL, /* not an encoded type, but keeps code regular */
e5227865
MD
52};
53
d96bfb0d
MD
54struct cds_ja_type {
55 enum cds_ja_type_class type_class;
8e519e3c
MD
56 uint16_t min_child; /* minimum number of children: 1 to 256 */
57 uint16_t max_child; /* maximum number of children: 1 to 256 */
58 uint16_t max_linear_child; /* per-pool max nr. children: 1 to 256 */
59 uint16_t order; /* node size is (1 << order), in bytes */
fd800776
MD
60 uint16_t nr_pool_order; /* number of pools */
61 uint16_t pool_size_order; /* pool size */
e5227865
MD
62};
63
64/*
65 * Iteration on the array to find the right node size for the number of
d68c6810 66 * children stops when it reaches .max_child == 256 (this is the largest
e5227865 67 * possible node size, which contains 256 children).
d68c6810
MD
68 * The min_child overlaps with the previous max_child to provide an
69 * hysteresis loop to reallocation for patterns of cyclic add/removal
70 * within the same node.
71 * The node the index within the following arrays is represented on 3
72 * bits. It identifies the node type, min/max number of children, and
73 * the size order.
3d45251f
MD
74 * The max_child values for the RCU_JA_POOL below result from
75 * statistical approximation: over million populations, the max_child
76 * covers between 97% and 99% of the populations generated. Therefore, a
77 * fallback should exist to cover the rare extreme population unbalance
78 * cases, but it will not have a major impact on speed nor space
79 * consumption, since those are rare cases.
e5227865 80 */
e5227865 81
d68c6810
MD
82#if (CAA_BITS_PER_LONG < 64)
83/* 32-bit pointers */
1db4943c
MD
84enum {
85 ja_type_0_max_child = 1,
86 ja_type_1_max_child = 3,
87 ja_type_2_max_child = 6,
88 ja_type_3_max_child = 12,
89 ja_type_4_max_child = 25,
90 ja_type_5_max_child = 48,
91 ja_type_6_max_child = 92,
92 ja_type_7_max_child = 256,
e1db2db5 93 ja_type_8_max_child = 0, /* NULL */
1db4943c
MD
94};
95
8e519e3c
MD
96enum {
97 ja_type_0_max_linear_child = 1,
98 ja_type_1_max_linear_child = 3,
99 ja_type_2_max_linear_child = 6,
100 ja_type_3_max_linear_child = 12,
101 ja_type_4_max_linear_child = 25,
102 ja_type_5_max_linear_child = 24,
103 ja_type_6_max_linear_child = 23,
104};
105
1db4943c
MD
106enum {
107 ja_type_5_nr_pool_order = 1,
108 ja_type_6_nr_pool_order = 2,
109};
110
d96bfb0d 111const struct cds_ja_type ja_types[] = {
8e519e3c
MD
112 { .type_class = RCU_JA_LINEAR, .min_child = 1, .max_child = ja_type_0_max_child, .max_linear_child = ja_type_0_max_linear_child, .order = 3, },
113 { .type_class = RCU_JA_LINEAR, .min_child = 1, .max_child = ja_type_1_max_child, .max_linear_child = ja_type_1_max_linear_child, .order = 4, },
114 { .type_class = RCU_JA_LINEAR, .min_child = 3, .max_child = ja_type_2_max_child, .max_linear_child = ja_type_2_max_linear_child, .order = 5, },
115 { .type_class = RCU_JA_LINEAR, .min_child = 4, .max_child = ja_type_3_max_child, .max_linear_child = ja_type_3_max_linear_child, .order = 6, },
116 { .type_class = RCU_JA_LINEAR, .min_child = 10, .max_child = ja_type_4_max_child, .max_linear_child = ja_type_4_max_linear_child, .order = 7, },
e5227865 117
fd800776 118 /* Pools may fill sooner than max_child */
8e519e3c
MD
119 { .type_class = RCU_JA_POOL, .min_child = 20, .max_child = ja_type_5_max_child, .max_linear_child = ja_type_5_max_linear_child, .order = 8, .nr_pool_order = ja_type_5_nr_pool_order, .pool_size_order = 7, },
120 { .type_class = RCU_JA_POOL, .min_child = 45, .max_child = ja_type_6_max_child, .max_linear_child = ja_type_6_max_linear_child, .order = 9, .nr_pool_order = ja_type_6_nr_pool_order, .pool_size_order = 7, },
3d45251f
MD
121
122 /*
123 * TODO: Upon node removal below min_child, if child pool is
124 * filled beyond capacity, we need to roll back to pigeon.
125 */
1db4943c 126 { .type_class = RCU_JA_PIGEON, .min_child = 89, .max_child = ja_type_7_max_child, .order = 10, },
e1db2db5
MD
127
128 { .type_class = RCU_JA_NULL, .min_child = 0, .max_child = ja_type_8_max_child, },
d68c6810 129};
d68c6810
MD
130#else /* !(CAA_BITS_PER_LONG < 64) */
131/* 64-bit pointers */
1db4943c
MD
132enum {
133 ja_type_0_max_child = 1,
134 ja_type_1_max_child = 3,
135 ja_type_2_max_child = 7,
136 ja_type_3_max_child = 14,
137 ja_type_4_max_child = 28,
138 ja_type_5_max_child = 54,
139 ja_type_6_max_child = 104,
140 ja_type_7_max_child = 256,
e1db2db5 141 ja_type_8_max_child = 256,
1db4943c
MD
142};
143
8e519e3c
MD
144enum {
145 ja_type_0_max_linear_child = 1,
146 ja_type_1_max_linear_child = 3,
147 ja_type_2_max_linear_child = 7,
148 ja_type_3_max_linear_child = 14,
149 ja_type_4_max_linear_child = 28,
150 ja_type_5_max_linear_child = 27,
151 ja_type_6_max_linear_child = 26,
152};
153
1db4943c
MD
154enum {
155 ja_type_5_nr_pool_order = 1,
156 ja_type_6_nr_pool_order = 2,
157};
158
d96bfb0d 159const struct cds_ja_type ja_types[] = {
8e519e3c
MD
160 { .type_class = RCU_JA_LINEAR, .min_child = 1, .max_child = ja_type_0_max_child, .max_linear_child = ja_type_0_max_linear_child, .order = 4, },
161 { .type_class = RCU_JA_LINEAR, .min_child = 1, .max_child = ja_type_1_max_child, .max_linear_child = ja_type_1_max_linear_child, .order = 5, },
162 { .type_class = RCU_JA_LINEAR, .min_child = 3, .max_child = ja_type_2_max_child, .max_linear_child = ja_type_2_max_linear_child, .order = 6, },
163 { .type_class = RCU_JA_LINEAR, .min_child = 5, .max_child = ja_type_3_max_child, .max_linear_child = ja_type_3_max_linear_child, .order = 7, },
164 { .type_class = RCU_JA_LINEAR, .min_child = 10, .max_child = ja_type_4_max_child, .max_linear_child = ja_type_4_max_linear_child, .order = 8, },
e5227865 165
3d45251f 166 /* Pools may fill sooner than max_child. */
8e519e3c
MD
167 { .type_class = RCU_JA_POOL, .min_child = 22, .max_child = ja_type_5_max_child, .max_linear_child = ja_type_5_max_linear_child, .order = 9, .nr_pool_order = ja_type_5_nr_pool_order, .pool_size_order = 8, },
168 { .type_class = RCU_JA_POOL, .min_child = 51, .max_child = ja_type_6_max_child, .max_linear_child = ja_type_6_max_linear_child, .order = 10, .nr_pool_order = ja_type_6_nr_pool_order, .pool_size_order = 8, },
e5227865 169
3d45251f
MD
170 /*
171 * TODO: Upon node removal below min_child, if child pool is
172 * filled beyond capacity, we need to roll back to pigeon.
173 */
1db4943c 174 { .type_class = RCU_JA_PIGEON, .min_child = 101, .max_child = ja_type_7_max_child, .order = 11, },
e1db2db5
MD
175
176 { .type_class = RCU_JA_NULL, .min_child = 0, .max_child = ja_type_8_max_child, },
e5227865 177};
d68c6810 178#endif /* !(BITS_PER_LONG < 64) */
e5227865 179
1db4943c
MD
180static inline __attribute__((unused))
181void static_array_size_check(void)
182{
e1db2db5 183 CAA_BUILD_BUG_ON(CAA_ARRAY_SIZE(ja_types) < JA_TYPE_MAX_NR);
1db4943c
MD
184}
185
e5227865 186/*
d96bfb0d 187 * The cds_ja_node contains the compressed node data needed for
1db4943c
MD
188 * read-side. For linear and pool node configurations, it starts with a
189 * byte counting the number of children in the node. Then, the
190 * node-specific data is placed.
191 * The node mutex, if any is needed, protecting concurrent updated of
192 * each node is placed in a separate hash table indexed by node address.
193 * For the pigeon configuration, the number of children is also kept in
194 * a separate hash table, indexed by node address, because it is only
195 * required for updates.
e5227865 196 */
1db4943c 197
ff38c745
MD
198#define DECLARE_LINEAR_NODE(index) \
199 struct { \
200 uint8_t nr_child; \
201 uint8_t child_value[ja_type_## index ##_max_linear_child]; \
b4540e8a 202 struct cds_ja_inode_flag *child_ptr[ja_type_## index ##_max_linear_child]; \
ff38c745
MD
203 }
204
205#define DECLARE_POOL_NODE(index) \
206 struct { \
207 struct { \
208 uint8_t nr_child; \
209 uint8_t child_value[ja_type_## index ##_max_linear_child]; \
b4540e8a 210 struct cds_ja_inode_flag *child_ptr[ja_type_## index ##_max_linear_child]; \
ff38c745
MD
211 } linear[1U << ja_type_## index ##_nr_pool_order]; \
212 }
1db4943c 213
b4540e8a 214struct cds_ja_inode {
1db4943c
MD
215 union {
216 /* Linear configuration */
217 DECLARE_LINEAR_NODE(0) conf_0;
218 DECLARE_LINEAR_NODE(1) conf_1;
219 DECLARE_LINEAR_NODE(2) conf_2;
220 DECLARE_LINEAR_NODE(3) conf_3;
221 DECLARE_LINEAR_NODE(4) conf_4;
222
223 /* Pool configuration */
224 DECLARE_POOL_NODE(5) conf_5;
225 DECLARE_POOL_NODE(6) conf_6;
226
227 /* Pigeon configuration */
228 struct {
b4540e8a 229 struct cds_ja_inode_flag *child[ja_type_7_max_child];
1db4943c
MD
230 } conf_7;
231 /* data aliasing nodes for computed accesses */
b4540e8a 232 uint8_t data[sizeof(struct cds_ja_inode_flag *) * ja_type_7_max_child];
1db4943c 233 } u;
e5227865
MD
234};
235
2e313670
MD
236enum ja_recompact {
237 JA_RECOMPACT,
238 JA_RECOMPACT_ADD,
239 JA_RECOMPACT_DEL,
240};
241
b4540e8a 242struct cds_ja_inode *alloc_cds_ja_node(const struct cds_ja_type *ja_type)
e5227865 243{
1db4943c 244 return calloc(1U << ja_type->order, sizeof(char));
e5227865
MD
245}
246
b4540e8a 247void free_cds_ja_node(struct cds_ja_inode *node)
e5227865
MD
248{
249 free(node);
250}
251
d68c6810
MD
252#define __JA_ALIGN_MASK(v, mask) (((v) + (mask)) & ~(mask))
253#define JA_ALIGN(v, align) __JA_ALIGN_MASK(v, (typeof(v)) (align) - 1)
254#define __JA_FLOOR_MASK(v, mask) ((v) & ~(mask))
255#define JA_FLOOR(v, align) __JA_FLOOR_MASK(v, (typeof(v)) (align) - 1)
256
257static
1db4943c 258uint8_t *align_ptr_size(uint8_t *ptr)
d68c6810 259{
1db4943c 260 return (uint8_t *) JA_ALIGN((unsigned long) ptr, sizeof(void *));
d68c6810
MD
261}
262
11c5e016 263static
d96bfb0d 264uint8_t ja_linear_node_get_nr_child(const struct cds_ja_type *type,
b4540e8a 265 struct cds_ja_inode *node)
11c5e016
MD
266{
267 assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
2e313670 268 return rcu_dereference(node->u.data[0]);
11c5e016
MD
269}
270
13a7f5a6
MD
271/*
272 * The order in which values and pointers are does does not matter: if
273 * a value is missing, we return NULL. If a value is there, but its
274 * associated pointers is still NULL, we return NULL too.
275 */
d68c6810 276static
b4540e8a
MD
277struct cds_ja_inode_flag *ja_linear_node_get_nth(const struct cds_ja_type *type,
278 struct cds_ja_inode *node,
5a9a87dd 279 struct cds_ja_inode_flag ***child_node_flag_ptr,
8e519e3c 280 uint8_t n)
d68c6810
MD
281{
282 uint8_t nr_child;
283 uint8_t *values;
b4540e8a
MD
284 struct cds_ja_inode_flag **pointers;
285 struct cds_ja_inode_flag *ptr;
d68c6810
MD
286 unsigned int i;
287
8e519e3c 288 assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
d68c6810 289
11c5e016 290 nr_child = ja_linear_node_get_nr_child(type, node);
13a7f5a6 291 cmm_smp_rmb(); /* read nr_child before values and pointers */
8e519e3c
MD
292 assert(nr_child <= type->max_linear_child);
293 assert(type->type_class != RCU_JA_LINEAR || nr_child >= type->min_child);
d68c6810 294
1db4943c 295 values = &node->u.data[1];
d68c6810 296 for (i = 0; i < nr_child; i++) {
13a7f5a6 297 if (CMM_LOAD_SHARED(values[i]) == n)
d68c6810
MD
298 break;
299 }
300 if (i >= nr_child)
301 return NULL;
b4540e8a 302 pointers = (struct cds_ja_inode_flag **) align_ptr_size(&values[type->max_linear_child]);
13a7f5a6 303 ptr = rcu_dereference(pointers[i]);
2e313670
MD
304 if (caa_unlikely(child_node_flag_ptr) && ptr)
305 *child_node_flag_ptr = &pointers[i];
d68c6810
MD
306 return ptr;
307}
308
11c5e016 309static
5a9a87dd 310void ja_linear_node_get_ith_pos(const struct cds_ja_type *type,
b4540e8a 311 struct cds_ja_inode *node,
11c5e016
MD
312 uint8_t i,
313 uint8_t *v,
b4540e8a 314 struct cds_ja_inode_flag **iter)
11c5e016
MD
315{
316 uint8_t *values;
b4540e8a 317 struct cds_ja_inode_flag **pointers;
11c5e016
MD
318
319 assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
320 assert(i < ja_linear_node_get_nr_child(type, node));
321
322 values = &node->u.data[1];
323 *v = values[i];
b4540e8a 324 pointers = (struct cds_ja_inode_flag **) align_ptr_size(&values[type->max_linear_child]);
11c5e016
MD
325 *iter = pointers[i];
326}
327
d68c6810 328static
b4540e8a
MD
329struct cds_ja_inode_flag *ja_pool_node_get_nth(const struct cds_ja_type *type,
330 struct cds_ja_inode *node,
5a9a87dd 331 struct cds_ja_inode_flag ***child_node_flag_ptr,
8e519e3c 332 uint8_t n)
d68c6810 333{
b4540e8a 334 struct cds_ja_inode *linear;
d68c6810 335
fd800776 336 assert(type->type_class == RCU_JA_POOL);
e1db2db5
MD
337 /*
338 * TODO: currently, we select the pool by highest bits. We
339 * should support various encodings.
340 */
b4540e8a 341 linear = (struct cds_ja_inode *)
1db4943c 342 &node->u.data[((unsigned long) n >> (CHAR_BIT - type->nr_pool_order)) << type->pool_size_order];
5a9a87dd 343 return ja_linear_node_get_nth(type, linear, child_node_flag_ptr, n);
d68c6810
MD
344}
345
11c5e016 346static
b4540e8a
MD
347struct cds_ja_inode *ja_pool_node_get_ith_pool(const struct cds_ja_type *type,
348 struct cds_ja_inode *node,
11c5e016
MD
349 uint8_t i)
350{
351 assert(type->type_class == RCU_JA_POOL);
b4540e8a 352 return (struct cds_ja_inode *)
11c5e016
MD
353 &node->u.data[(unsigned int) i << type->pool_size_order];
354}
355
d68c6810 356static
b4540e8a
MD
357struct cds_ja_inode_flag *ja_pigeon_node_get_nth(const struct cds_ja_type *type,
358 struct cds_ja_inode *node,
5a9a87dd 359 struct cds_ja_inode_flag ***child_node_flag_ptr,
8e519e3c 360 uint8_t n)
d68c6810 361{
5a9a87dd
MD
362 struct cds_ja_inode_flag **child_node_flag;
363
d68c6810 364 assert(type->type_class == RCU_JA_PIGEON);
5a9a87dd 365 child_node_flag = &((struct cds_ja_inode_flag **) node->u.data)[n];
582a6ade
MD
366 dbg_printf("ja_pigeon_node_get_nth child_node_flag_ptr %p\n",
367 child_node_flag);
368 if (caa_unlikely(child_node_flag_ptr) && *child_node_flag)
5a9a87dd
MD
369 *child_node_flag_ptr = child_node_flag;
370 return rcu_dereference(*child_node_flag);
d68c6810
MD
371}
372
2e313670
MD
373static
374struct cds_ja_inode_flag *ja_pigeon_node_get_ith_pos(const struct cds_ja_type *type,
375 struct cds_ja_inode *node,
376 uint8_t i)
377{
378 return ja_pigeon_node_get_nth(type, node, NULL, i);
379}
380
13a7f5a6
MD
381/*
382 * ja_node_get_nth: get nth item from a node.
383 * node_flag is already rcu_dereference'd.
384 */
d68c6810 385static
41975c12 386struct cds_ja_inode_flag * ja_node_get_nth(struct cds_ja_inode_flag *node_flag,
5a9a87dd 387 struct cds_ja_inode_flag ***child_node_flag_ptr,
8e519e3c 388 uint8_t n)
d68c6810
MD
389{
390 unsigned int type_index;
b4540e8a 391 struct cds_ja_inode *node;
d96bfb0d 392 const struct cds_ja_type *type;
d68c6810 393
d68c6810 394 node = ja_node_ptr(node_flag);
5a9a87dd 395 assert(node != NULL);
d68c6810
MD
396 type_index = ja_node_type(node_flag);
397 type = &ja_types[type_index];
398
399 switch (type->type_class) {
400 case RCU_JA_LINEAR:
5a9a87dd
MD
401 return ja_linear_node_get_nth(type, node,
402 child_node_flag_ptr, n);
fd800776 403 case RCU_JA_POOL:
5a9a87dd
MD
404 return ja_pool_node_get_nth(type, node,
405 child_node_flag_ptr, n);
d68c6810 406 case RCU_JA_PIGEON:
5a9a87dd
MD
407 return ja_pigeon_node_get_nth(type, node,
408 child_node_flag_ptr, n);
d68c6810
MD
409 default:
410 assert(0);
411 return (void *) -1UL;
412 }
413}
414
8e519e3c 415static
d96bfb0d 416int ja_linear_node_set_nth(const struct cds_ja_type *type,
b4540e8a 417 struct cds_ja_inode *node,
d96bfb0d 418 struct cds_ja_shadow_node *shadow_node,
8e519e3c 419 uint8_t n,
b4540e8a 420 struct cds_ja_inode_flag *child_node_flag)
8e519e3c
MD
421{
422 uint8_t nr_child;
423 uint8_t *values, *nr_child_ptr;
b4540e8a 424 struct cds_ja_inode_flag **pointers;
2e313670 425 unsigned int i, unused = 0;
8e519e3c
MD
426
427 assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
428
429 nr_child_ptr = &node->u.data[0];
a2a7ff59 430 dbg_printf("linear set nth: nr_child_ptr %p\n", nr_child_ptr);
8e519e3c
MD
431 nr_child = *nr_child_ptr;
432 assert(nr_child <= type->max_linear_child);
8e519e3c
MD
433
434 values = &node->u.data[1];
2e313670
MD
435 pointers = (struct cds_ja_inode_flag **) align_ptr_size(&values[type->max_linear_child]);
436 /* Check if node value is already populated */
8e519e3c 437 for (i = 0; i < nr_child; i++) {
2e313670
MD
438 if (values[i] == n) {
439 if (pointers[i])
440 return -EEXIST;
441 else
442 break;
443 } else {
444 if (!pointers[i])
445 unused++;
446 }
8e519e3c 447 }
2e313670
MD
448 if (i == nr_child && nr_child >= type->max_linear_child) {
449 if (unused)
450 return -ERANGE; /* recompact node */
451 else
452 return -ENOSPC; /* No space left in this node type */
453 }
454
455 assert(pointers[i] == NULL);
456 rcu_assign_pointer(pointers[i], child_node_flag);
457 /* If we expanded the nr_child, increment it */
458 if (i == nr_child) {
459 CMM_STORE_SHARED(values[nr_child], n);
460 /* write pointer and value before nr_child */
461 cmm_smp_wmb();
462 CMM_STORE_SHARED(*nr_child_ptr, nr_child + 1);
8e519e3c 463 }
e1db2db5 464 shadow_node->nr_child++;
a2a7ff59
MD
465 dbg_printf("linear set nth: %u child, shadow: %u child, for node %p shadow %p\n",
466 (unsigned int) CMM_LOAD_SHARED(*nr_child_ptr),
467 (unsigned int) shadow_node->nr_child,
468 node, shadow_node);
469
8e519e3c
MD
470 return 0;
471}
472
473static
d96bfb0d 474int ja_pool_node_set_nth(const struct cds_ja_type *type,
b4540e8a 475 struct cds_ja_inode *node,
d96bfb0d 476 struct cds_ja_shadow_node *shadow_node,
8e519e3c 477 uint8_t n,
b4540e8a 478 struct cds_ja_inode_flag *child_node_flag)
8e519e3c 479{
b4540e8a 480 struct cds_ja_inode *linear;
8e519e3c
MD
481
482 assert(type->type_class == RCU_JA_POOL);
b4540e8a 483 linear = (struct cds_ja_inode *)
8e519e3c 484 &node->u.data[((unsigned long) n >> (CHAR_BIT - type->nr_pool_order)) << type->pool_size_order];
e1db2db5
MD
485 return ja_linear_node_set_nth(type, linear, shadow_node,
486 n, child_node_flag);
8e519e3c
MD
487}
488
489static
d96bfb0d 490int ja_pigeon_node_set_nth(const struct cds_ja_type *type,
b4540e8a 491 struct cds_ja_inode *node,
d96bfb0d 492 struct cds_ja_shadow_node *shadow_node,
8e519e3c 493 uint8_t n,
b4540e8a 494 struct cds_ja_inode_flag *child_node_flag)
8e519e3c 495{
b4540e8a 496 struct cds_ja_inode_flag **ptr;
8e519e3c
MD
497
498 assert(type->type_class == RCU_JA_PIGEON);
b4540e8a 499 ptr = &((struct cds_ja_inode_flag **) node->u.data)[n];
5a9a87dd 500 if (*ptr)
8e519e3c
MD
501 return -EEXIST;
502 rcu_assign_pointer(*ptr, child_node_flag);
e1db2db5 503 shadow_node->nr_child++;
8e519e3c
MD
504 return 0;
505}
506
d68c6810 507/*
7a0b2331 508 * _ja_node_set_nth: set nth item within a node. Return an error
8e519e3c 509 * (negative error value) if it is already there.
d68c6810 510 */
8e519e3c 511static
d96bfb0d 512int _ja_node_set_nth(const struct cds_ja_type *type,
b4540e8a 513 struct cds_ja_inode *node,
d96bfb0d 514 struct cds_ja_shadow_node *shadow_node,
e1db2db5 515 uint8_t n,
b4540e8a 516 struct cds_ja_inode_flag *child_node_flag)
8e519e3c 517{
8e519e3c
MD
518 switch (type->type_class) {
519 case RCU_JA_LINEAR:
e1db2db5 520 return ja_linear_node_set_nth(type, node, shadow_node, n,
8e519e3c
MD
521 child_node_flag);
522 case RCU_JA_POOL:
e1db2db5 523 return ja_pool_node_set_nth(type, node, shadow_node, n,
8e519e3c
MD
524 child_node_flag);
525 case RCU_JA_PIGEON:
e1db2db5 526 return ja_pigeon_node_set_nth(type, node, shadow_node, n,
8e519e3c 527 child_node_flag);
e1db2db5
MD
528 case RCU_JA_NULL:
529 return -ENOSPC;
8e519e3c
MD
530 default:
531 assert(0);
532 return -EINVAL;
533 }
534
535 return 0;
536}
7a0b2331 537
2e313670 538static
af3cbd45 539int ja_linear_node_clear_ptr(const struct cds_ja_type *type,
2e313670
MD
540 struct cds_ja_inode *node,
541 struct cds_ja_shadow_node *shadow_node,
af3cbd45 542 struct cds_ja_inode_flag **node_flag_ptr)
2e313670
MD
543{
544 uint8_t nr_child;
af3cbd45 545 uint8_t *nr_child_ptr;
2e313670
MD
546
547 assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
548
549 nr_child_ptr = &node->u.data[0];
af3cbd45 550 dbg_printf("linear clear ptr: nr_child_ptr %p\n", nr_child_ptr);
2e313670
MD
551 nr_child = *nr_child_ptr;
552 assert(nr_child <= type->max_linear_child);
553
2e313670
MD
554 if (shadow_node->fallback_removal_count) {
555 shadow_node->fallback_removal_count--;
556 } else {
557 if (shadow_node->nr_child <= type->min_child) {
558 /* We need to try recompacting the node */
559 return -EFBIG;
560 }
561 }
af3cbd45
MD
562 assert(*node_flag_ptr != NULL);
563 rcu_assign_pointer(*node_flag_ptr, NULL);
2e313670
MD
564 /*
565 * Value and nr_child are never changed (would cause ABA issue).
566 * Instead, we leave the pointer to NULL and recompact the node
567 * once in a while. It is allowed to set a NULL pointer to a new
568 * value without recompaction though.
569 * Only update the shadow node accounting.
570 */
571 shadow_node->nr_child--;
af3cbd45 572 dbg_printf("linear clear ptr: %u child, shadow: %u child, for node %p shadow %p\n",
2e313670
MD
573 (unsigned int) CMM_LOAD_SHARED(*nr_child_ptr),
574 (unsigned int) shadow_node->nr_child,
575 node, shadow_node);
576
577 return 0;
578}
579
580static
af3cbd45 581int ja_pool_node_clear_ptr(const struct cds_ja_type *type,
2e313670
MD
582 struct cds_ja_inode *node,
583 struct cds_ja_shadow_node *shadow_node,
af3cbd45 584 struct cds_ja_inode_flag **node_flag_ptr,
2e313670
MD
585 uint8_t n)
586{
587 struct cds_ja_inode *linear;
588
589 assert(type->type_class == RCU_JA_POOL);
590 linear = (struct cds_ja_inode *)
591 &node->u.data[((unsigned long) n >> (CHAR_BIT - type->nr_pool_order)) << type->pool_size_order];
af3cbd45 592 return ja_linear_node_clear_ptr(type, linear, shadow_node, node_flag_ptr);
2e313670
MD
593}
594
595static
af3cbd45 596int ja_pigeon_node_clear_ptr(const struct cds_ja_type *type,
2e313670
MD
597 struct cds_ja_inode *node,
598 struct cds_ja_shadow_node *shadow_node,
af3cbd45 599 struct cds_ja_inode_flag **node_flag_ptr)
2e313670 600{
2e313670 601 assert(type->type_class == RCU_JA_PIGEON);
af3cbd45 602 rcu_assign_pointer(*node_flag_ptr, NULL);
2e313670
MD
603 shadow_node->nr_child--;
604 return 0;
605}
606
607/*
af3cbd45 608 * _ja_node_clear_ptr: clear ptr item within a node. Return an error
2e313670
MD
609 * (negative error value) if it is not found (-ENOENT).
610 */
611static
af3cbd45 612int _ja_node_clear_ptr(const struct cds_ja_type *type,
2e313670
MD
613 struct cds_ja_inode *node,
614 struct cds_ja_shadow_node *shadow_node,
af3cbd45 615 struct cds_ja_inode_flag **node_flag_ptr,
2e313670
MD
616 uint8_t n)
617{
618 switch (type->type_class) {
619 case RCU_JA_LINEAR:
af3cbd45 620 return ja_linear_node_clear_ptr(type, node, shadow_node, node_flag_ptr);
2e313670 621 case RCU_JA_POOL:
af3cbd45 622 return ja_pool_node_clear_ptr(type, node, shadow_node, node_flag_ptr, n);
2e313670 623 case RCU_JA_PIGEON:
af3cbd45 624 return ja_pigeon_node_clear_ptr(type, node, shadow_node, node_flag_ptr);
2e313670
MD
625 case RCU_JA_NULL:
626 return -ENOENT;
627 default:
628 assert(0);
629 return -EINVAL;
630 }
631
632 return 0;
633}
634
7a0b2331
MD
635/*
636 * ja_node_recompact_add: recompact a node, adding a new child.
e1db2db5 637 * TODO: for pool type, take selection bit(s) into account.
2e313670 638 * Return 0 on success, -EAGAIN if need to retry, or other negative
5a9a87dd 639 * error value otherwise.
7a0b2331
MD
640 */
641static
2e313670
MD
642int ja_node_recompact(enum ja_recompact mode,
643 struct cds_ja *ja,
e1db2db5 644 unsigned int old_type_index,
d96bfb0d 645 const struct cds_ja_type *old_type,
b4540e8a 646 struct cds_ja_inode *old_node,
5a9a87dd 647 struct cds_ja_shadow_node *shadow_node,
3d8fe307 648 struct cds_ja_inode_flag **old_node_flag_ptr, uint8_t n,
af3cbd45
MD
649 struct cds_ja_inode_flag *child_node_flag,
650 struct cds_ja_inode_flag **nullify_node_flag_ptr)
7a0b2331 651{
e1db2db5 652 unsigned int new_type_index;
b4540e8a 653 struct cds_ja_inode *new_node;
af3cbd45 654 struct cds_ja_shadow_node *new_shadow_node = NULL;
d96bfb0d 655 const struct cds_ja_type *new_type;
3d8fe307 656 struct cds_ja_inode_flag *new_node_flag, *old_node_flag;
7a0b2331 657 int ret;
f07b240f 658 int fallback = 0;
7a0b2331 659
3d8fe307
MD
660 old_node_flag = *old_node_flag_ptr;
661
2e313670
MD
662 switch (mode) {
663 case JA_RECOMPACT:
664 new_type_index = old_type_index;
665 break;
666 case JA_RECOMPACT_ADD:
667 if (!shadow_node || old_type_index == NODE_INDEX_NULL) {
668 new_type_index = 0;
669 } else {
670 new_type_index = old_type_index + 1;
671 }
672 break;
673 case JA_RECOMPACT_DEL:
674 if (old_type_index == 0) {
675 new_type_index = NODE_INDEX_NULL;
676 } else {
677 new_type_index = old_type_index - 1;
678 }
679 break;
680 default:
681 assert(0);
7a0b2331 682 }
a2a7ff59 683
f07b240f 684retry: /* for fallback */
582a6ade
MD
685 dbg_printf("Recompact from type %d to type %d\n",
686 old_type_index, new_type_index);
7a0b2331 687 new_type = &ja_types[new_type_index];
2e313670
MD
688 if (new_type_index != NODE_INDEX_NULL) {
689 new_node = alloc_cds_ja_node(new_type);
690 if (!new_node)
691 return -ENOMEM;
692 new_node_flag = ja_node_flag(new_node, new_type_index);
693 dbg_printf("Recompact inherit lock from %p\n", shadow_node);
3d8fe307 694 new_shadow_node = rcuja_shadow_set(ja->ht, new_node_flag, shadow_node, ja);
2e313670
MD
695 if (!new_shadow_node) {
696 free(new_node);
697 return -ENOMEM;
698 }
699 if (fallback)
700 new_shadow_node->fallback_removal_count =
701 JA_FALLBACK_REMOVAL_COUNT;
702 } else {
703 new_node = NULL;
704 new_node_flag = NULL;
e1db2db5 705 }
11c5e016 706
2e313670
MD
707 assert(mode != JA_RECOMPACT_ADD || old_type->type_class != RCU_JA_PIGEON);
708
709 if (new_type_index == NODE_INDEX_NULL)
710 goto skip_copy;
711
11c5e016
MD
712 switch (old_type->type_class) {
713 case RCU_JA_LINEAR:
714 {
715 uint8_t nr_child =
716 ja_linear_node_get_nr_child(old_type, old_node);
717 unsigned int i;
718
719 for (i = 0; i < nr_child; i++) {
b4540e8a 720 struct cds_ja_inode_flag *iter;
11c5e016
MD
721 uint8_t v;
722
723 ja_linear_node_get_ith_pos(old_type, old_node, i, &v, &iter);
724 if (!iter)
725 continue;
af3cbd45 726 if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
2e313670 727 continue;
f07b240f
MD
728 ret = _ja_node_set_nth(new_type, new_node,
729 new_shadow_node,
11c5e016 730 v, iter);
f07b240f
MD
731 if (new_type->type_class == RCU_JA_POOL && ret) {
732 goto fallback_toosmall;
733 }
11c5e016
MD
734 assert(!ret);
735 }
736 break;
737 }
738 case RCU_JA_POOL:
739 {
740 unsigned int pool_nr;
741
742 for (pool_nr = 0; pool_nr < (1U << old_type->nr_pool_order); pool_nr++) {
b4540e8a 743 struct cds_ja_inode *pool =
11c5e016
MD
744 ja_pool_node_get_ith_pool(old_type,
745 old_node, pool_nr);
746 uint8_t nr_child =
747 ja_linear_node_get_nr_child(old_type, pool);
748 unsigned int j;
749
750 for (j = 0; j < nr_child; j++) {
b4540e8a 751 struct cds_ja_inode_flag *iter;
11c5e016
MD
752 uint8_t v;
753
754 ja_linear_node_get_ith_pos(old_type, pool,
755 j, &v, &iter);
756 if (!iter)
757 continue;
af3cbd45 758 if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
2e313670 759 continue;
f07b240f
MD
760 ret = _ja_node_set_nth(new_type, new_node,
761 new_shadow_node,
11c5e016 762 v, iter);
f07b240f
MD
763 if (new_type->type_class == RCU_JA_POOL
764 && ret) {
765 goto fallback_toosmall;
766 }
11c5e016
MD
767 assert(!ret);
768 }
769 }
770 break;
7a0b2331 771 }
a2a7ff59 772 case RCU_JA_NULL:
2e313670 773 assert(mode == JA_RECOMPACT_ADD);
a2a7ff59 774 break;
11c5e016 775 case RCU_JA_PIGEON:
2e313670
MD
776 {
777 uint8_t nr_child;
778 unsigned int i;
779
780 assert(mode == JA_RECOMPACT_DEL);
781 nr_child = shadow_node->nr_child;
782 for (i = 0; i < nr_child; i++) {
783 struct cds_ja_inode_flag *iter;
784
785 iter = ja_pigeon_node_get_ith_pos(old_type, old_node, i);
786 if (!iter)
787 continue;
af3cbd45 788 if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
2e313670
MD
789 continue;
790 ret = _ja_node_set_nth(new_type, new_node,
791 new_shadow_node,
792 i, iter);
793 if (new_type->type_class == RCU_JA_POOL && ret) {
794 goto fallback_toosmall;
795 }
796 assert(!ret);
797 }
798 break;
799 }
11c5e016
MD
800 default:
801 assert(0);
5a9a87dd 802 ret = -EINVAL;
f07b240f 803 goto end;
11c5e016 804 }
2e313670 805skip_copy:
11c5e016 806
2e313670
MD
807 if (JA_RECOMPACT_ADD) {
808 /* add node */
809 ret = _ja_node_set_nth(new_type, new_node,
810 new_shadow_node,
811 n, child_node_flag);
812 assert(!ret);
813 }
3d8fe307
MD
814 /* Return pointer to new recompacted node through old_node_flag_ptr */
815 *old_node_flag_ptr = new_node_flag;
a2a7ff59 816 if (old_node) {
2e313670
MD
817 int flags;
818
819 flags = RCUJA_SHADOW_CLEAR_FREE_NODE;
820 /*
821 * It is OK to free the lock associated with a node
822 * going to NULL, since we are holding the parent lock.
823 * This synchronizes removal with re-add of that node.
824 */
825 if (new_type_index == NODE_INDEX_NULL)
826 flags = RCUJA_SHADOW_CLEAR_FREE_LOCK;
3d8fe307 827 ret = rcuja_shadow_clear(ja->ht, old_node_flag, shadow_node,
2e313670 828 flags);
a2a7ff59
MD
829 assert(!ret);
830 }
5a9a87dd
MD
831
832 ret = 0;
f07b240f 833end:
5a9a87dd 834 return ret;
f07b240f
MD
835
836fallback_toosmall:
837 /* fallback if next pool is too small */
af3cbd45 838 assert(new_shadow_node);
3d8fe307 839 ret = rcuja_shadow_clear(ja->ht, new_node_flag, new_shadow_node,
f07b240f
MD
840 RCUJA_SHADOW_CLEAR_FREE_NODE);
841 assert(!ret);
842
2e313670 843 /* Choose fallback type: pigeon */
f07b240f
MD
844 new_type_index = (1UL << JA_TYPE_BITS) - 1;
845 dbg_printf("Fallback to type %d\n", new_type_index);
846 uatomic_inc(&ja->nr_fallback);
847 fallback = 1;
848 goto retry;
7a0b2331
MD
849}
850
5a9a87dd 851/*
2e313670 852 * Return 0 on success, -EAGAIN if need to retry, or other negative
5a9a87dd
MD
853 * error value otherwise.
854 */
7a0b2331 855static
d96bfb0d 856int ja_node_set_nth(struct cds_ja *ja,
b4540e8a 857 struct cds_ja_inode_flag **node_flag, uint8_t n,
5a9a87dd
MD
858 struct cds_ja_inode_flag *child_node_flag,
859 struct cds_ja_shadow_node *shadow_node)
7a0b2331
MD
860{
861 int ret;
e1db2db5 862 unsigned int type_index;
d96bfb0d 863 const struct cds_ja_type *type;
b4540e8a 864 struct cds_ja_inode *node;
7a0b2331 865
a2a7ff59
MD
866 dbg_printf("ja_node_set_nth for n=%u, node %p, shadow %p\n",
867 (unsigned int) n, ja_node_ptr(*node_flag), shadow_node);
868
e1db2db5
MD
869 node = ja_node_ptr(*node_flag);
870 type_index = ja_node_type(*node_flag);
871 type = &ja_types[type_index];
e1db2db5
MD
872 ret = _ja_node_set_nth(type, node, shadow_node,
873 n, child_node_flag);
2e313670
MD
874 switch (ret) {
875 case -ENOSPC:
e1db2db5 876 /* Not enough space in node, need to recompact. */
2e313670 877 ret = ja_node_recompact(JA_RECOMPACT_ADD, ja, type_index, type, node,
af3cbd45 878 shadow_node, node_flag, n, child_node_flag, NULL);
2e313670
MD
879 break;
880 case -ERANGE:
881 /* Node needs to be recompacted. */
882 ret = ja_node_recompact(JA_RECOMPACT, ja, type_index, type, node,
af3cbd45 883 shadow_node, node_flag, n, child_node_flag, NULL);
2e313670
MD
884 break;
885 }
886 return ret;
887}
888
889/*
890 * Return 0 on success, -EAGAIN if need to retry, or other negative
891 * error value otherwise.
892 */
893static
af3cbd45
MD
894int ja_node_clear_ptr(struct cds_ja *ja,
895 struct cds_ja_inode_flag **node_flag_ptr, /* Pointer to location to nullify */
896 struct cds_ja_inode_flag **parent_node_flag_ptr, /* Address of parent ptr in its parent */
897 struct cds_ja_shadow_node *shadow_node, /* of parent */
898 uint8_t n)
2e313670
MD
899{
900 int ret;
901 unsigned int type_index;
902 const struct cds_ja_type *type;
903 struct cds_ja_inode *node;
904
af3cbd45
MD
905 dbg_printf("ja_node_clear_ptr for node %p, shadow %p, target ptr %p\n",
906 ja_node_ptr(*parent_node_flag_ptr), shadow_node, node_flag_ptr);
2e313670 907
af3cbd45
MD
908 node = ja_node_ptr(*parent_node_flag_ptr);
909 type_index = ja_node_type(*parent_node_flag_ptr);
2e313670 910 type = &ja_types[type_index];
af3cbd45 911 ret = _ja_node_clear_ptr(type, node, shadow_node, node_flag_ptr, n);
2e313670
MD
912 if (ret == -EFBIG) {
913 /* Should to try recompaction. */
914 ret = ja_node_recompact(JA_RECOMPACT_DEL, ja, type_index, type, node,
af3cbd45
MD
915 shadow_node, parent_node_flag_ptr, n, NULL,
916 node_flag_ptr);
7a0b2331
MD
917 }
918 return ret;
919}
be9a7474 920
af3cbd45 921struct cds_hlist_head cds_ja_lookup(struct cds_ja *ja, uint64_t key)
b4540e8a 922{
41975c12
MD
923 unsigned int tree_depth, i;
924 struct cds_ja_inode_flag *node_flag;
af3cbd45 925 struct cds_hlist_head head = { NULL };
41975c12
MD
926
927 if (caa_unlikely(key > ja->key_max))
af3cbd45 928 return head;
41975c12 929 tree_depth = ja->tree_depth;
5a9a87dd 930 node_flag = rcu_dereference(ja->root);
41975c12 931
5a9a87dd
MD
932 /* level 0: root node */
933 if (!ja_node_ptr(node_flag))
af3cbd45 934 return head;
5a9a87dd
MD
935
936 for (i = 1; i < tree_depth; i++) {
79b41067
MD
937 uint8_t iter_key;
938
939 iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - i - 1)));
5a9a87dd 940 node_flag = ja_node_get_nth(node_flag, NULL,
79b41067 941 iter_key);
582a6ade
MD
942 dbg_printf("cds_ja_lookup iter key lookup %u finds node_flag %p\n",
943 (unsigned int) iter_key, node_flag);
41975c12 944 if (!ja_node_ptr(node_flag))
af3cbd45 945 return head;
41975c12
MD
946 }
947
5a9a87dd 948 /* Last level lookup succeded. We got an actual match. */
af3cbd45
MD
949 head.next = (struct cds_hlist_node *) node_flag;
950 return head;
5a9a87dd
MD
951}
952
953/*
954 * We reached an unpopulated node. Create it and the children we need,
955 * and then attach the entire branch to the current node. This may
956 * trigger recompaction of the current node. Locks needed: node lock
957 * (for add), and, possibly, parent node lock (to update pointer due to
958 * node recompaction).
959 *
960 * First take node lock, check if recompaction is needed, then take
961 * parent lock (if needed). Then we can proceed to create the new
962 * branch. Publish the new branch, and release locks.
963 * TODO: we currently always take the parent lock even when not needed.
964 */
965static
966int ja_attach_node(struct cds_ja *ja,
967 struct cds_ja_inode_flag **node_flag_ptr,
968 struct cds_ja_inode_flag *node_flag,
969 struct cds_ja_inode_flag *parent_node_flag,
970 uint64_t key,
79b41067 971 unsigned int level,
5a9a87dd
MD
972 struct cds_ja_node *child_node)
973{
974 struct cds_ja_shadow_node *shadow_node = NULL,
af3cbd45 975 *parent_shadow_node = NULL;
5a9a87dd
MD
976 struct cds_ja_inode *node = ja_node_ptr(node_flag);
977 struct cds_ja_inode *parent_node = ja_node_ptr(parent_node_flag);
978 struct cds_hlist_head head;
979 struct cds_ja_inode_flag *iter_node_flag, *iter_dest_node_flag;
980 int ret, i;
a2a7ff59 981 struct cds_ja_inode_flag *created_nodes[JA_MAX_DEPTH];
5a9a87dd
MD
982 int nr_created_nodes = 0;
983
582a6ade
MD
984 dbg_printf("Attach node at level %u (node %p, node_flag %p)\n",
985 level, node, node_flag);
a2a7ff59 986
5a9a87dd 987 assert(node);
3d8fe307 988 shadow_node = rcuja_shadow_lookup_lock(ja->ht, node_flag);
5a9a87dd 989 if (!shadow_node) {
2e313670 990 ret = -EAGAIN;
5a9a87dd
MD
991 goto end;
992 }
993 if (parent_node) {
994 parent_shadow_node = rcuja_shadow_lookup_lock(ja->ht,
3d8fe307 995 parent_node_flag);
5a9a87dd 996 if (!parent_shadow_node) {
2e313670 997 ret = -EAGAIN;
5a9a87dd
MD
998 goto unlock_shadow;
999 }
1000 }
1001
a2a7ff59 1002 /* Create new branch, starting from bottom */
5a9a87dd
MD
1003 CDS_INIT_HLIST_HEAD(&head);
1004 cds_hlist_add_head_rcu(&child_node->list, &head);
a2a7ff59 1005 iter_node_flag = (struct cds_ja_inode_flag *) head.next;
5a9a87dd 1006
79b41067
MD
1007 for (i = ja->tree_depth; i > (int) level; i--) {
1008 uint8_t iter_key;
1009
1010 iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (ja->tree_depth - i)));
1011 dbg_printf("branch creation level %d, key %u\n",
1012 i - 1, (unsigned int) iter_key);
5a9a87dd
MD
1013 iter_dest_node_flag = NULL;
1014 ret = ja_node_set_nth(ja, &iter_dest_node_flag,
79b41067 1015 iter_key,
5a9a87dd
MD
1016 iter_node_flag,
1017 NULL);
1018 if (ret)
1019 goto check_error;
1020 created_nodes[nr_created_nodes++] = iter_dest_node_flag;
1021 iter_node_flag = iter_dest_node_flag;
1022 }
1023
79b41067
MD
1024 if (level > 1) {
1025 uint8_t iter_key;
1026
1027 iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (ja->tree_depth - level)));
a2a7ff59
MD
1028 /* We need to use set_nth on the previous level. */
1029 iter_dest_node_flag = node_flag;
1030 ret = ja_node_set_nth(ja, &iter_dest_node_flag,
79b41067 1031 iter_key,
a2a7ff59
MD
1032 iter_node_flag,
1033 shadow_node);
1034 if (ret)
1035 goto check_error;
1036 created_nodes[nr_created_nodes++] = iter_dest_node_flag;
1037 iter_node_flag = iter_dest_node_flag;
1038 }
1039
5a9a87dd 1040 /* Publish new branch */
a2a7ff59
MD
1041 dbg_printf("Publish branch %p, replacing %p\n",
1042 iter_node_flag, *node_flag_ptr);
5a9a87dd
MD
1043 rcu_assign_pointer(*node_flag_ptr, iter_node_flag);
1044
1045 /* Success */
1046 ret = 0;
1047
1048check_error:
1049 if (ret) {
1050 for (i = 0; i < nr_created_nodes; i++) {
1051 int tmpret;
a2a7ff59
MD
1052 int flags;
1053
1054 flags = RCUJA_SHADOW_CLEAR_FREE_LOCK;
1055 if (i)
1056 flags |= RCUJA_SHADOW_CLEAR_FREE_NODE;
5a9a87dd 1057 tmpret = rcuja_shadow_clear(ja->ht,
3d8fe307 1058 created_nodes[i],
a2a7ff59
MD
1059 NULL,
1060 flags);
5a9a87dd
MD
1061 assert(!tmpret);
1062 }
1063 }
5a9a87dd
MD
1064 if (parent_shadow_node)
1065 rcuja_shadow_unlock(parent_shadow_node);
1066unlock_shadow:
1067 if (shadow_node)
1068 rcuja_shadow_unlock(shadow_node);
1069end:
1070 return ret;
1071}
1072
1073/*
af3cbd45
MD
1074 * Lock the parent containing the hlist head pointer, and add node to list of
1075 * duplicates. Failure can happen if concurrent update changes the
1076 * parent before we get the lock. We return -EAGAIN in that case.
5a9a87dd
MD
1077 * Return 0 on success, negative error value on failure.
1078 */
1079static
1080int ja_chain_node(struct cds_ja *ja,
af3cbd45 1081 struct cds_ja_inode_flag *parent_node_flag,
5a9a87dd
MD
1082 struct cds_hlist_head *head,
1083 struct cds_ja_node *node)
1084{
1085 struct cds_ja_shadow_node *shadow_node;
1086
3d8fe307 1087 shadow_node = rcuja_shadow_lookup_lock(ja->ht, parent_node_flag);
5a9a87dd 1088 if (!shadow_node)
2e313670 1089 return -EAGAIN;
5a9a87dd
MD
1090 cds_hlist_add_head_rcu(&node->list, head);
1091 rcuja_shadow_unlock(shadow_node);
1092 return 0;
1093}
1094
1095int cds_ja_add(struct cds_ja *ja, uint64_t key,
1096 struct cds_ja_node *new_node)
1097{
1098 unsigned int tree_depth, i;
5a9a87dd
MD
1099 struct cds_ja_inode_flag **node_flag_ptr; /* in parent */
1100 struct cds_ja_inode_flag *node_flag,
1101 *parent_node_flag,
1102 *parent2_node_flag;
1103 int ret;
1104
1105 if (caa_unlikely(key > ja->key_max))
1106 return -EINVAL;
1107 tree_depth = ja->tree_depth;
1108
1109retry:
a2a7ff59
MD
1110 dbg_printf("cds_ja_add attempt: key %" PRIu64 ", node %p\n",
1111 key, new_node);
5a9a87dd 1112 parent2_node_flag = NULL;
b0f74e47
MD
1113 parent_node_flag =
1114 (struct cds_ja_inode_flag *) &ja->root; /* Use root ptr address as key for mutex */
5a9a87dd 1115 node_flag_ptr = &ja->root;
35170a44 1116 node_flag = rcu_dereference(ja->root);
5a9a87dd
MD
1117
1118 /* Iterate on all internal levels */
a2a7ff59 1119 for (i = 1; i < tree_depth; i++) {
79b41067
MD
1120 uint8_t iter_key;
1121
582a6ade
MD
1122 dbg_printf("cds_ja_add iter node_flag_ptr %p node_flag %p\n",
1123 *node_flag_ptr, node_flag);
5a9a87dd
MD
1124 if (!ja_node_ptr(node_flag)) {
1125 ret = ja_attach_node(ja, node_flag_ptr,
1126 parent_node_flag, parent2_node_flag,
1127 key, i, new_node);
2e313670 1128 if (ret == -EAGAIN || ret == -EEXIST)
5a9a87dd
MD
1129 goto retry;
1130 else
1131 goto end;
1132 }
79b41067 1133 iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - i - 1)));
5a9a87dd
MD
1134 parent2_node_flag = parent_node_flag;
1135 parent_node_flag = node_flag;
1136 node_flag = ja_node_get_nth(node_flag,
1137 &node_flag_ptr,
79b41067 1138 iter_key);
582a6ade
MD
1139 dbg_printf("cds_ja_add iter key lookup %u finds node_flag %p node_flag_ptr %p\n",
1140 (unsigned int) iter_key, node_flag, *node_flag_ptr);
5a9a87dd
MD
1141 }
1142
1143 /*
1144 * We reached bottom of tree, simply add node to last internal
1145 * level, or chain it if key is already present.
1146 */
1147 if (!ja_node_ptr(node_flag)) {
582a6ade
MD
1148 dbg_printf("cds_ja_add last node_flag_ptr %p node_flag %p\n",
1149 *node_flag_ptr, node_flag);
5a9a87dd
MD
1150 ret = ja_attach_node(ja, node_flag_ptr, parent_node_flag,
1151 parent2_node_flag, key, i, new_node);
1152 } else {
1153 ret = ja_chain_node(ja,
af3cbd45
MD
1154 parent_node_flag,
1155 (struct cds_hlist_head *) node_flag_ptr,
5a9a87dd
MD
1156 new_node);
1157 }
2e313670 1158 if (ret == -EAGAIN)
5a9a87dd
MD
1159 goto retry;
1160end:
1161 return ret;
b4540e8a
MD
1162}
1163
af3cbd45
MD
1164/*
1165 * Note: there is no need to lookup the pointer address associated with
1166 * each node's nth item after taking the lock: it's already been done by
1167 * cds_ja_del while holding the rcu read-side lock, and our node rules
1168 * ensure that when a match value -> pointer is found in a node, it is
1169 * _NEVER_ changed for that node without recompaction, and recompaction
1170 * reallocates the node.
1171 */
35170a44
MD
1172static
1173int ja_detach_node(struct cds_ja *ja,
1174 struct cds_ja_inode_flag **snapshot,
af3cbd45
MD
1175 struct cds_ja_inode_flag ***snapshot_ptr,
1176 uint8_t *snapshot_n,
35170a44
MD
1177 int nr_snapshot,
1178 uint64_t key,
1179 struct cds_ja_node *node)
1180{
af3cbd45
MD
1181 struct cds_ja_shadow_node *shadow_nodes[JA_MAX_DEPTH];
1182 struct cds_ja_inode_flag **node_flag_ptr = NULL,
1183 *parent_node_flag = NULL,
1184 **parent_node_flag_ptr = NULL;
1185 struct cds_ja_inode_flag *iter_node_flag;
1186 int ret, i, nr_shadow = 0, nr_clear = 0;
1187 uint8_t n;
35170a44
MD
1188
1189 assert(nr_snapshot == ja->tree_depth - 1);
1190
af3cbd45
MD
1191 /*
1192 * From the last internal level node going up, get the node
1193 * lock, check if the node has only one child left. If it is the
1194 * case, we continue iterating upward. When we reach a node
1195 * which has more that one child left, we lock the parent, and
1196 * proceed to the node deletion (removing its children too).
1197 */
1198 for (i = nr_snapshot - 1; i >= 1; i--) {
1199 struct cds_ja_shadow_node *shadow_node;
1200
1201 shadow_node = rcuja_shadow_lookup_lock(ja->ht,
3d8fe307 1202 snapshot[i]);
af3cbd45
MD
1203 if (!shadow_node) {
1204 ret = -EAGAIN;
1205 goto end;
1206 }
1207 assert(shadow_node->nr_child > 0);
1208 shadow_nodes[nr_shadow++] = shadow_node;
1209 nr_clear++;
1210 if (i == nr_snapshot - 1) {
1211 /*
1212 * Re-check that last internal node level has
1213 * only one child, else trigger a retry.
1214 */
1215 if (shadow_node->nr_child != 1) {
1216 ret = -EAGAIN;
1217 goto end;
1218 }
1219 }
1220 if (shadow_node->nr_child > 1 || i == 1) {
1221 /* Lock parent and break */
1222 shadow_node = rcuja_shadow_lookup_lock(ja->ht,
3d8fe307 1223 snapshot[i - 1]);
af3cbd45
MD
1224 if (!shadow_node) {
1225 ret = -EAGAIN;
1226 goto end;
1227 }
1228 shadow_nodes[nr_shadow++] = shadow_node;
1229 node_flag_ptr = snapshot_ptr[i];
1230 n = snapshot_n[i];
1231 parent_node_flag_ptr = snapshot_ptr[i - 1];
1232 parent_node_flag = snapshot[i - 1];
1233 if (i > 1) {
1234 /*
1235 * Lock parent's parent, in case we need
1236 * to recompact parent.
1237 */
1238 shadow_node = rcuja_shadow_lookup_lock(ja->ht,
3d8fe307 1239 snapshot[i - 2]);
af3cbd45
MD
1240 if (!shadow_node) {
1241 ret = -EAGAIN;
1242 goto end;
1243 }
1244 shadow_nodes[nr_shadow++] = shadow_node;
1245 }
1246 break;
1247 }
1248 }
1249
1250 /*
1251 * At this point, we want to delete all nodes in shadow_nodes
1252 * (except the last one, which is either the root or the parent
1253 * of the upmost node with 1 child). OK to as to free lock here,
1254 * because RCU read lock is held, and free only performed in
1255 * call_rcu.
1256 */
1257
1258 for (i = 0; i < nr_clear; i++) {
1259 ret = rcuja_shadow_clear(ja->ht,
3d8fe307 1260 shadow_nodes[i]->node_flag,
af3cbd45
MD
1261 shadow_nodes[i],
1262 RCUJA_SHADOW_CLEAR_FREE_NODE
1263 | RCUJA_SHADOW_CLEAR_FREE_LOCK);
1264 assert(!ret);
1265 }
1266
1267 iter_node_flag = parent_node_flag;
1268 /* Remove from parent */
1269 ret = ja_node_clear_ptr(ja,
1270 node_flag_ptr, /* Pointer to location to nullify */
1271 &iter_node_flag, /* Old new parent ptr in its parent */
1272 shadow_nodes[nr_clear], /* of parent */
1273 n);
1274
1275 /* Update address of parent ptr in its parent */
1276 rcu_assign_pointer(*parent_node_flag_ptr, iter_node_flag);
1277
1278end:
1279 for (i = 0; i < nr_shadow; i++)
1280 rcuja_shadow_unlock(shadow_nodes[i]);
35170a44
MD
1281 return ret;
1282}
1283
af3cbd45
MD
1284static
1285int ja_unchain_node(struct cds_ja *ja,
1286 struct cds_ja_inode_flag *parent_node_flag,
1287 struct cds_ja_node *node)
1288{
1289 struct cds_ja_shadow_node *shadow_node;
1290 int ret = 0;
1291
3d8fe307 1292 shadow_node = rcuja_shadow_lookup_lock(ja->ht, parent_node_flag);
af3cbd45
MD
1293 if (!shadow_node)
1294 return -EAGAIN;
1295 /*
1296 * Retry if another thread removed all but one of duplicates
1297 * since check.
1298 */
1299 if (shadow_node->nr_child == 1) {
1300 ret = -EAGAIN;
1301 goto end;
1302 }
1303 cds_hlist_del_rcu(&node->list);
1304end:
1305 rcuja_shadow_unlock(shadow_node);
1306 return ret;
1307}
1308
1309/*
1310 * Called with RCU read lock held.
1311 */
35170a44
MD
1312int cds_ja_del(struct cds_ja *ja, uint64_t key,
1313 struct cds_ja_node *node)
1314{
1315 unsigned int tree_depth, i;
1316 struct cds_ja_inode_flag *snapshot[JA_MAX_DEPTH];
af3cbd45
MD
1317 struct cds_ja_inode_flag **snapshot_ptr[JA_MAX_DEPTH];
1318 uint8_t snapshot_n[JA_MAX_DEPTH];
35170a44 1319 struct cds_ja_inode_flag *node_flag;
af3cbd45 1320 struct cds_ja_inode_flag **prev_node_flag_ptr;
35170a44
MD
1321 int nr_snapshot = 0;
1322 int ret;
1323
1324 if (caa_unlikely(key > ja->key_max))
1325 return -EINVAL;
1326 tree_depth = ja->tree_depth;
1327
1328retry:
1329 dbg_printf("cds_ja_del attempt: key %" PRIu64 ", node %p\n",
1330 key, node);
1331
1332 /* snapshot for level 0 is only for shadow node lookup */
af3cbd45
MD
1333 snapshot_n[nr_snapshot] = 0;
1334 snapshot_ptr[nr_snapshot] = NULL;
35170a44
MD
1335 snapshot[nr_snapshot++] = (struct cds_ja_inode_flag *) &ja->root;
1336 node_flag = rcu_dereference(ja->root);
af3cbd45 1337 prev_node_flag_ptr = &ja->root;
35170a44
MD
1338
1339 /* Iterate on all internal levels */
1340 for (i = 1; i < tree_depth; i++) {
1341 uint8_t iter_key;
1342
1343 dbg_printf("cds_ja_del iter node_flag %p\n",
1344 node_flag);
1345 if (!ja_node_ptr(node_flag)) {
1346 return -ENOENT;
1347 }
35170a44 1348 iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - i - 1)));
af3cbd45
MD
1349 if (nr_snapshot <= 1)
1350 snapshot_n[nr_snapshot] = 0;
1351 else
1352 snapshot_n[nr_snapshot - 1] = iter_key;
1353
1354 snapshot_ptr[nr_snapshot] = prev_node_flag_ptr;
1355 snapshot[nr_snapshot++] = node_flag;
35170a44 1356 node_flag = ja_node_get_nth(node_flag,
af3cbd45 1357 &prev_node_flag_ptr,
35170a44 1358 iter_key);
af3cbd45
MD
1359 dbg_printf("cds_ja_del iter key lookup %u finds node_flag %p, prev_node_flag_ptr %p\n",
1360 (unsigned int) iter_key, node_flag,
1361 prev_node_flag_ptr);
35170a44
MD
1362 }
1363
1364 /*
1365 * We reached bottom of tree, try to find the node we are trying
1366 * to remove. Fail if we cannot find it.
1367 */
1368 if (!ja_node_ptr(node_flag)) {
1369 return -ENOENT;
1370 } else {
1371 struct cds_hlist_head *hlist_head;
1372 struct cds_hlist_node *hlist_node;
af3cbd45
MD
1373 struct cds_ja_node *entry, *match = NULL;
1374 int count = 0;
35170a44
MD
1375
1376 hlist_head = (struct cds_hlist_head *) ja_node_ptr(node_flag);
af3cbd45 1377 cds_hlist_for_each_entry_rcu(entry,
35170a44
MD
1378 hlist_node,
1379 hlist_head,
1380 list) {
af3cbd45
MD
1381 if (entry == node)
1382 match = entry;
1383 count++;
35170a44 1384 }
af3cbd45 1385 if (!match)
35170a44 1386 return -ENOENT;
af3cbd45
MD
1387 assert(count > 0);
1388 if (count == 1) {
1389 /*
1390 * Removing last of duplicates.
1391 */
1392 snapshot_ptr[nr_snapshot] = prev_node_flag_ptr;
1393 snapshot[nr_snapshot++] = node_flag;
1394 ret = ja_detach_node(ja, snapshot, snapshot_ptr,
1395 snapshot_n, nr_snapshot, key, node);
1396 } else {
1397 ret = ja_unchain_node(ja, node_flag, entry);
1398 }
35170a44
MD
1399 }
1400 if (ret == -EAGAIN)
1401 goto retry;
1402 return ret;
1403}
1404
b4540e8a
MD
1405struct cds_ja *_cds_ja_new(unsigned int key_bits,
1406 const struct rcu_flavor_struct *flavor)
be9a7474
MD
1407{
1408 struct cds_ja *ja;
b0f74e47 1409 int ret;
f07b240f 1410 struct cds_ja_shadow_node *root_shadow_node;
be9a7474
MD
1411
1412 ja = calloc(sizeof(*ja), 1);
1413 if (!ja)
1414 goto ja_error;
b4540e8a
MD
1415
1416 switch (key_bits) {
1417 case 8:
1418 ja->key_max = UINT8_MAX;
1419 break;
1420 case 16:
1421 ja->key_max = UINT16_MAX;
1422 break;
1423 case 32:
1424 ja->key_max = UINT32_MAX;
1425 break;
1426 case 64:
1427 ja->key_max = UINT64_MAX;
1428 break;
1429 default:
1430 goto check_error;
1431 }
1432
be9a7474 1433 /* ja->root is NULL */
5a9a87dd 1434 /* tree_depth 0 is for pointer to root node */
582a6ade 1435 ja->tree_depth = (key_bits >> JA_LOG2_BITS_PER_BYTE) + 1;
a2a7ff59 1436 assert(ja->tree_depth <= JA_MAX_DEPTH);
be9a7474
MD
1437 ja->ht = rcuja_create_ht(flavor);
1438 if (!ja->ht)
1439 goto ht_error;
b0f74e47
MD
1440
1441 /*
1442 * Note: we should not free this node until judy array destroy.
1443 */
f07b240f 1444 root_shadow_node = rcuja_shadow_set(ja->ht,
3d8fe307
MD
1445 (struct cds_ja_inode_flag *) &ja->root,
1446 NULL, ja);
f07b240f
MD
1447 if (!root_shadow_node) {
1448 ret = -ENOMEM;
b0f74e47 1449 goto ht_node_error;
f07b240f 1450 }
3d8fe307 1451 root_shadow_node->level = 0;
b0f74e47 1452
be9a7474
MD
1453 return ja;
1454
b0f74e47
MD
1455ht_node_error:
1456 ret = rcuja_delete_ht(ja->ht);
1457 assert(!ret);
be9a7474 1458ht_error:
b4540e8a 1459check_error:
be9a7474
MD
1460 free(ja);
1461ja_error:
1462 return NULL;
1463}
1464
3d8fe307
MD
1465/*
1466 * Called from RCU read-side CS.
1467 */
1468__attribute__((visibility("protected")))
1469void rcuja_free_all_children(struct cds_ja_shadow_node *shadow_node,
1470 struct cds_ja_inode_flag *node_flag,
1471 void (*free_node_cb)(struct rcu_head *head))
1472{
1473 const struct rcu_flavor_struct *flavor;
1474 unsigned int type_index;
1475 struct cds_ja_inode *node;
1476 const struct cds_ja_type *type;
1477
1478 flavor = cds_lfht_rcu_flavor(shadow_node->ja->ht);
1479 node = ja_node_ptr(node_flag);
1480 assert(node != NULL);
1481 type_index = ja_node_type(node_flag);
1482 type = &ja_types[type_index];
1483
1484 switch (type->type_class) {
1485 case RCU_JA_LINEAR:
1486 {
1487 uint8_t nr_child =
1488 ja_linear_node_get_nr_child(type, node);
1489 unsigned int i;
1490
1491 for (i = 0; i < nr_child; i++) {
1492 struct cds_ja_inode_flag *iter;
1493 struct cds_hlist_head head;
1494 struct cds_ja_node *entry;
1495 struct cds_hlist_node *pos;
1496 uint8_t v;
1497
1498 ja_linear_node_get_ith_pos(type, node, i, &v, &iter);
1499 if (!iter)
1500 continue;
1501 head.next = (struct cds_hlist_node *) iter;
1502 cds_hlist_for_each_entry_rcu(entry, pos, &head, list) {
1503 flavor->update_call_rcu(&entry->head, free_node_cb);
1504 }
1505 }
1506 break;
1507 }
1508 case RCU_JA_POOL:
1509 {
1510 unsigned int pool_nr;
1511
1512 for (pool_nr = 0; pool_nr < (1U << type->nr_pool_order); pool_nr++) {
1513 struct cds_ja_inode *pool =
1514 ja_pool_node_get_ith_pool(type, node, pool_nr);
1515 uint8_t nr_child =
1516 ja_linear_node_get_nr_child(type, pool);
1517 unsigned int j;
1518
1519 for (j = 0; j < nr_child; j++) {
1520 struct cds_ja_inode_flag *iter;
1521 struct cds_hlist_head head;
1522 struct cds_ja_node *entry;
1523 struct cds_hlist_node *pos;
1524 uint8_t v;
1525
1526 ja_linear_node_get_ith_pos(type, node, j, &v, &iter);
1527 if (!iter)
1528 continue;
1529 head.next = (struct cds_hlist_node *) iter;
1530 cds_hlist_for_each_entry_rcu(entry, pos, &head, list) {
1531 flavor->update_call_rcu(&entry->head, free_node_cb);
1532 }
1533 }
1534 }
1535 break;
1536 }
1537 case RCU_JA_NULL:
1538 break;
1539 case RCU_JA_PIGEON:
1540 {
1541 uint8_t nr_child;
1542 unsigned int i;
1543
1544 nr_child = shadow_node->nr_child;
1545 for (i = 0; i < nr_child; i++) {
1546 struct cds_ja_inode_flag *iter;
1547 struct cds_hlist_head head;
1548 struct cds_ja_node *entry;
1549 struct cds_hlist_node *pos;
1550
1551 iter = ja_pigeon_node_get_ith_pos(type, node, i);
1552 if (!iter)
1553 continue;
1554 head.next = (struct cds_hlist_node *) iter;
1555 cds_hlist_for_each_entry_rcu(entry, pos, &head, list) {
1556 flavor->update_call_rcu(&entry->head, free_node_cb);
1557 }
1558 }
1559 break;
1560 }
1561 default:
1562 assert(0);
1563 }
1564}
1565
be9a7474
MD
1566/*
1567 * There should be no more concurrent add to the judy array while it is
1568 * being destroyed (ensured by the caller).
1569 */
3d8fe307
MD
1570int cds_ja_destroy(struct cds_ja *ja,
1571 void (*free_node_cb)(struct rcu_head *head))
be9a7474 1572{
b4540e8a
MD
1573 int ret;
1574
be9a7474 1575 rcuja_shadow_prune(ja->ht,
3d8fe307
MD
1576 RCUJA_SHADOW_CLEAR_FREE_NODE | RCUJA_SHADOW_CLEAR_FREE_LOCK,
1577 free_node_cb);
b4540e8a
MD
1578 ret = rcuja_delete_ht(ja->ht);
1579 if (ret)
1580 return ret;
f07b240f
MD
1581 if (uatomic_read(&ja->nr_fallback))
1582 fprintf(stderr,
1583 "[warning] RCU Judy Array used %lu fallback node(s)\n",
1584 uatomic_read(&ja->nr_fallback));
b4540e8a 1585 free(ja);
41975c12 1586 return 0;
be9a7474 1587}
This page took 0.097574 seconds and 4 git commands to generate.