rcuja-range: allow variable number of bits for key
[userspace-rcu.git] / rcuja / rcuja.c
CommitLineData
61009379
MD
1/*
2 * rcuja/rcuja.c
3 *
4 * Userspace RCU library - RCU Judy Array
5 *
78ed159a 6 * Copyright (C) 2000 - 2002 Hewlett-Packard Company
170e1186 7 * Copyright 2012-2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
61009379
MD
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
195e72d3 24#define _LGPL_SOURCE
e5227865 25#include <stdint.h>
8e519e3c 26#include <errno.h>
d68c6810 27#include <limits.h>
b1a90ce3 28#include <string.h>
12f246ac 29#include <assert.h>
61009379 30#include <urcu/rcuja.h>
d68c6810
MD
31#include <urcu/compiler.h>
32#include <urcu/arch.h>
8e519e3c 33#include <urcu-pointer.h>
f07b240f 34#include <urcu/uatomic.h>
8e519e3c 35
61009379
MD
36#include "rcuja-internal.h"
37
b1a90ce3
MD
38#ifndef abs
39#define abs_int(a) ((int) (a) > 0 ? (int) (a) : -((int) (a)))
40#endif
41
d96bfb0d 42enum cds_ja_type_class {
e5227865 43 RCU_JA_LINEAR = 0, /* Type A */
fd800776
MD
44 /* 32-bit: 1 to 25 children, 8 to 128 bytes */
45 /* 64-bit: 1 to 28 children, 16 to 256 bytes */
46 RCU_JA_POOL = 1, /* Type B */
47 /* 32-bit: 26 to 100 children, 256 to 512 bytes */
48 /* 64-bit: 29 to 112 children, 512 to 1024 bytes */
e5227865 49 RCU_JA_PIGEON = 2, /* Type C */
fd800776
MD
50 /* 32-bit: 101 to 256 children, 1024 bytes */
51 /* 64-bit: 113 to 256 children, 2048 bytes */
e5227865 52 /* Leaf nodes are implicit from their height in the tree */
1db4943c 53 RCU_JA_NR_TYPES,
e1db2db5
MD
54
55 RCU_JA_NULL, /* not an encoded type, but keeps code regular */
e5227865
MD
56};
57
d96bfb0d
MD
58struct cds_ja_type {
59 enum cds_ja_type_class type_class;
8e519e3c
MD
60 uint16_t min_child; /* minimum number of children: 1 to 256 */
61 uint16_t max_child; /* maximum number of children: 1 to 256 */
62 uint16_t max_linear_child; /* per-pool max nr. children: 1 to 256 */
63 uint16_t order; /* node size is (1 << order), in bytes */
fd800776
MD
64 uint16_t nr_pool_order; /* number of pools */
65 uint16_t pool_size_order; /* pool size */
e5227865
MD
66};
67
68/*
69 * Iteration on the array to find the right node size for the number of
d68c6810 70 * children stops when it reaches .max_child == 256 (this is the largest
e5227865 71 * possible node size, which contains 256 children).
d68c6810
MD
72 * The min_child overlaps with the previous max_child to provide an
73 * hysteresis loop to reallocation for patterns of cyclic add/removal
74 * within the same node.
75 * The node the index within the following arrays is represented on 3
76 * bits. It identifies the node type, min/max number of children, and
77 * the size order.
3d45251f
MD
78 * The max_child values for the RCU_JA_POOL below result from
79 * statistical approximation: over million populations, the max_child
80 * covers between 97% and 99% of the populations generated. Therefore, a
81 * fallback should exist to cover the rare extreme population unbalance
82 * cases, but it will not have a major impact on speed nor space
83 * consumption, since those are rare cases.
e5227865 84 */
e5227865 85
d68c6810
MD
86#if (CAA_BITS_PER_LONG < 64)
87/* 32-bit pointers */
1db4943c
MD
88enum {
89 ja_type_0_max_child = 1,
90 ja_type_1_max_child = 3,
91 ja_type_2_max_child = 6,
92 ja_type_3_max_child = 12,
93 ja_type_4_max_child = 25,
94 ja_type_5_max_child = 48,
95 ja_type_6_max_child = 92,
96 ja_type_7_max_child = 256,
e1db2db5 97 ja_type_8_max_child = 0, /* NULL */
1db4943c
MD
98};
99
8e519e3c
MD
100enum {
101 ja_type_0_max_linear_child = 1,
102 ja_type_1_max_linear_child = 3,
103 ja_type_2_max_linear_child = 6,
104 ja_type_3_max_linear_child = 12,
105 ja_type_4_max_linear_child = 25,
106 ja_type_5_max_linear_child = 24,
107 ja_type_6_max_linear_child = 23,
108};
109
1db4943c
MD
110enum {
111 ja_type_5_nr_pool_order = 1,
112 ja_type_6_nr_pool_order = 2,
113};
114
d96bfb0d 115const struct cds_ja_type ja_types[] = {
8e519e3c
MD
116 { .type_class = RCU_JA_LINEAR, .min_child = 1, .max_child = ja_type_0_max_child, .max_linear_child = ja_type_0_max_linear_child, .order = 3, },
117 { .type_class = RCU_JA_LINEAR, .min_child = 1, .max_child = ja_type_1_max_child, .max_linear_child = ja_type_1_max_linear_child, .order = 4, },
118 { .type_class = RCU_JA_LINEAR, .min_child = 3, .max_child = ja_type_2_max_child, .max_linear_child = ja_type_2_max_linear_child, .order = 5, },
119 { .type_class = RCU_JA_LINEAR, .min_child = 4, .max_child = ja_type_3_max_child, .max_linear_child = ja_type_3_max_linear_child, .order = 6, },
120 { .type_class = RCU_JA_LINEAR, .min_child = 10, .max_child = ja_type_4_max_child, .max_linear_child = ja_type_4_max_linear_child, .order = 7, },
e5227865 121
fd800776 122 /* Pools may fill sooner than max_child */
1cee749c 123 /* This pool is hardcoded at index 5. See ja_node_ptr(). */
8e519e3c 124 { .type_class = RCU_JA_POOL, .min_child = 20, .max_child = ja_type_5_max_child, .max_linear_child = ja_type_5_max_linear_child, .order = 8, .nr_pool_order = ja_type_5_nr_pool_order, .pool_size_order = 7, },
1cee749c 125 /* This pool is hardcoded at index 6. See ja_node_ptr(). */
8e519e3c 126 { .type_class = RCU_JA_POOL, .min_child = 45, .max_child = ja_type_6_max_child, .max_linear_child = ja_type_6_max_linear_child, .order = 9, .nr_pool_order = ja_type_6_nr_pool_order, .pool_size_order = 7, },
3d45251f
MD
127
128 /*
b1a90ce3
MD
129 * Upon node removal below min_child, if child pool is filled
130 * beyond capacity, we roll back to pigeon.
3d45251f 131 */
58c16c03 132 { .type_class = RCU_JA_PIGEON, .min_child = 83, .max_child = ja_type_7_max_child, .order = 10, },
e1db2db5
MD
133
134 { .type_class = RCU_JA_NULL, .min_child = 0, .max_child = ja_type_8_max_child, },
d68c6810 135};
d68c6810
MD
136#else /* !(CAA_BITS_PER_LONG < 64) */
137/* 64-bit pointers */
1db4943c
MD
138enum {
139 ja_type_0_max_child = 1,
140 ja_type_1_max_child = 3,
141 ja_type_2_max_child = 7,
142 ja_type_3_max_child = 14,
143 ja_type_4_max_child = 28,
144 ja_type_5_max_child = 54,
145 ja_type_6_max_child = 104,
146 ja_type_7_max_child = 256,
e1db2db5 147 ja_type_8_max_child = 256,
1db4943c
MD
148};
149
8e519e3c
MD
150enum {
151 ja_type_0_max_linear_child = 1,
152 ja_type_1_max_linear_child = 3,
153 ja_type_2_max_linear_child = 7,
154 ja_type_3_max_linear_child = 14,
155 ja_type_4_max_linear_child = 28,
156 ja_type_5_max_linear_child = 27,
157 ja_type_6_max_linear_child = 26,
158};
159
1db4943c
MD
160enum {
161 ja_type_5_nr_pool_order = 1,
162 ja_type_6_nr_pool_order = 2,
163};
164
d96bfb0d 165const struct cds_ja_type ja_types[] = {
8e519e3c
MD
166 { .type_class = RCU_JA_LINEAR, .min_child = 1, .max_child = ja_type_0_max_child, .max_linear_child = ja_type_0_max_linear_child, .order = 4, },
167 { .type_class = RCU_JA_LINEAR, .min_child = 1, .max_child = ja_type_1_max_child, .max_linear_child = ja_type_1_max_linear_child, .order = 5, },
168 { .type_class = RCU_JA_LINEAR, .min_child = 3, .max_child = ja_type_2_max_child, .max_linear_child = ja_type_2_max_linear_child, .order = 6, },
169 { .type_class = RCU_JA_LINEAR, .min_child = 5, .max_child = ja_type_3_max_child, .max_linear_child = ja_type_3_max_linear_child, .order = 7, },
170 { .type_class = RCU_JA_LINEAR, .min_child = 10, .max_child = ja_type_4_max_child, .max_linear_child = ja_type_4_max_linear_child, .order = 8, },
e5227865 171
3d45251f 172 /* Pools may fill sooner than max_child. */
1cee749c 173 /* This pool is hardcoded at index 5. See ja_node_ptr(). */
8e519e3c 174 { .type_class = RCU_JA_POOL, .min_child = 22, .max_child = ja_type_5_max_child, .max_linear_child = ja_type_5_max_linear_child, .order = 9, .nr_pool_order = ja_type_5_nr_pool_order, .pool_size_order = 8, },
1cee749c 175 /* This pool is hardcoded at index 6. See ja_node_ptr(). */
8e519e3c 176 { .type_class = RCU_JA_POOL, .min_child = 51, .max_child = ja_type_6_max_child, .max_linear_child = ja_type_6_max_linear_child, .order = 10, .nr_pool_order = ja_type_6_nr_pool_order, .pool_size_order = 8, },
e5227865 177
3d45251f 178 /*
b1a90ce3
MD
179 * Upon node removal below min_child, if child pool is filled
180 * beyond capacity, we roll back to pigeon.
3d45251f 181 */
64457f6c 182 { .type_class = RCU_JA_PIGEON, .min_child = 95, .max_child = ja_type_7_max_child, .order = 11, },
e1db2db5
MD
183
184 { .type_class = RCU_JA_NULL, .min_child = 0, .max_child = ja_type_8_max_child, },
e5227865 185};
d68c6810 186#endif /* !(BITS_PER_LONG < 64) */
e5227865 187
1db4943c
MD
188static inline __attribute__((unused))
189void static_array_size_check(void)
190{
e1db2db5 191 CAA_BUILD_BUG_ON(CAA_ARRAY_SIZE(ja_types) < JA_TYPE_MAX_NR);
1db4943c
MD
192}
193
e5227865 194/*
d96bfb0d 195 * The cds_ja_node contains the compressed node data needed for
1db4943c
MD
196 * read-side. For linear and pool node configurations, it starts with a
197 * byte counting the number of children in the node. Then, the
198 * node-specific data is placed.
199 * The node mutex, if any is needed, protecting concurrent updated of
200 * each node is placed in a separate hash table indexed by node address.
201 * For the pigeon configuration, the number of children is also kept in
202 * a separate hash table, indexed by node address, because it is only
203 * required for updates.
e5227865 204 */
1db4943c 205
ff38c745
MD
206#define DECLARE_LINEAR_NODE(index) \
207 struct { \
208 uint8_t nr_child; \
209 uint8_t child_value[ja_type_## index ##_max_linear_child]; \
b4540e8a 210 struct cds_ja_inode_flag *child_ptr[ja_type_## index ##_max_linear_child]; \
ff38c745
MD
211 }
212
213#define DECLARE_POOL_NODE(index) \
214 struct { \
215 struct { \
216 uint8_t nr_child; \
217 uint8_t child_value[ja_type_## index ##_max_linear_child]; \
b4540e8a 218 struct cds_ja_inode_flag *child_ptr[ja_type_## index ##_max_linear_child]; \
ff38c745
MD
219 } linear[1U << ja_type_## index ##_nr_pool_order]; \
220 }
1db4943c 221
b4540e8a 222struct cds_ja_inode {
1db4943c
MD
223 union {
224 /* Linear configuration */
225 DECLARE_LINEAR_NODE(0) conf_0;
226 DECLARE_LINEAR_NODE(1) conf_1;
227 DECLARE_LINEAR_NODE(2) conf_2;
228 DECLARE_LINEAR_NODE(3) conf_3;
229 DECLARE_LINEAR_NODE(4) conf_4;
230
231 /* Pool configuration */
232 DECLARE_POOL_NODE(5) conf_5;
233 DECLARE_POOL_NODE(6) conf_6;
234
235 /* Pigeon configuration */
236 struct {
b4540e8a 237 struct cds_ja_inode_flag *child[ja_type_7_max_child];
1db4943c
MD
238 } conf_7;
239 /* data aliasing nodes for computed accesses */
b4540e8a 240 uint8_t data[sizeof(struct cds_ja_inode_flag *) * ja_type_7_max_child];
1db4943c 241 } u;
e5227865
MD
242};
243
2e313670 244enum ja_recompact {
19ddcd04
MD
245 JA_RECOMPACT_ADD_SAME,
246 JA_RECOMPACT_ADD_NEXT,
2e313670
MD
247 JA_RECOMPACT_DEL,
248};
249
b023ba9f
MD
250enum ja_lookup_inequality {
251 JA_LOOKUP_BE,
252 JA_LOOKUP_AE,
253};
254
255enum ja_direction {
256 JA_LEFT,
257 JA_RIGHT,
258 JA_LEFTMOST,
259 JA_RIGHTMOST,
260};
261
b1a90ce3
MD
262static
263struct cds_ja_inode *_ja_node_mask_ptr(struct cds_ja_inode_flag *node)
264{
265 return (struct cds_ja_inode *) (((unsigned long) node) & JA_PTR_MASK);
266}
267
268unsigned long ja_node_type(struct cds_ja_inode_flag *node)
269{
270 unsigned long type;
271
272 if (_ja_node_mask_ptr(node) == NULL) {
273 return NODE_INDEX_NULL;
274 }
275 type = (unsigned int) ((unsigned long) node & JA_TYPE_MASK);
276 assert(type < (1UL << JA_TYPE_BITS));
277 return type;
278}
279
354981c2
MD
280static
281struct cds_ja_inode *alloc_cds_ja_node(struct cds_ja *ja,
282 const struct cds_ja_type *ja_type)
e5227865 283{
b1a90ce3
MD
284 size_t len = 1U << ja_type->order;
285 void *p;
286 int ret;
287
288 ret = posix_memalign(&p, len, len);
289 if (ret || !p) {
290 return NULL;
291 }
292 memset(p, 0, len);
354981c2 293 uatomic_inc(&ja->nr_nodes_allocated);
b1a90ce3 294 return p;
e5227865
MD
295}
296
354981c2 297void free_cds_ja_node(struct cds_ja *ja, struct cds_ja_inode *node)
e5227865
MD
298{
299 free(node);
48cbe001 300 if (node)
354981c2 301 uatomic_inc(&ja->nr_nodes_freed);
e5227865
MD
302}
303
d68c6810
MD
304#define __JA_ALIGN_MASK(v, mask) (((v) + (mask)) & ~(mask))
305#define JA_ALIGN(v, align) __JA_ALIGN_MASK(v, (typeof(v)) (align) - 1)
306#define __JA_FLOOR_MASK(v, mask) ((v) & ~(mask))
307#define JA_FLOOR(v, align) __JA_FLOOR_MASK(v, (typeof(v)) (align) - 1)
308
309static
1db4943c 310uint8_t *align_ptr_size(uint8_t *ptr)
d68c6810 311{
1db4943c 312 return (uint8_t *) JA_ALIGN((unsigned long) ptr, sizeof(void *));
d68c6810
MD
313}
314
11c5e016 315static
d96bfb0d 316uint8_t ja_linear_node_get_nr_child(const struct cds_ja_type *type,
b4540e8a 317 struct cds_ja_inode *node)
11c5e016
MD
318{
319 assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
2e313670 320 return rcu_dereference(node->u.data[0]);
11c5e016
MD
321}
322
13a7f5a6
MD
323/*
324 * The order in which values and pointers are does does not matter: if
325 * a value is missing, we return NULL. If a value is there, but its
326 * associated pointers is still NULL, we return NULL too.
327 */
d68c6810 328static
b4540e8a
MD
329struct cds_ja_inode_flag *ja_linear_node_get_nth(const struct cds_ja_type *type,
330 struct cds_ja_inode *node,
b0ca2d21 331 struct cds_ja_inode_flag ***node_flag_ptr,
8e519e3c 332 uint8_t n)
d68c6810
MD
333{
334 uint8_t nr_child;
335 uint8_t *values;
b4540e8a
MD
336 struct cds_ja_inode_flag **pointers;
337 struct cds_ja_inode_flag *ptr;
d68c6810
MD
338 unsigned int i;
339
8e519e3c 340 assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
d68c6810 341
11c5e016 342 nr_child = ja_linear_node_get_nr_child(type, node);
13a7f5a6 343 cmm_smp_rmb(); /* read nr_child before values and pointers */
8e519e3c
MD
344 assert(nr_child <= type->max_linear_child);
345 assert(type->type_class != RCU_JA_LINEAR || nr_child >= type->min_child);
d68c6810 346
1db4943c 347 values = &node->u.data[1];
d68c6810 348 for (i = 0; i < nr_child; i++) {
13a7f5a6 349 if (CMM_LOAD_SHARED(values[i]) == n)
d68c6810
MD
350 break;
351 }
b0ca2d21
MD
352 if (i >= nr_child) {
353 if (caa_unlikely(node_flag_ptr))
354 *node_flag_ptr = NULL;
d68c6810 355 return NULL;
b0ca2d21 356 }
b4540e8a 357 pointers = (struct cds_ja_inode_flag **) align_ptr_size(&values[type->max_linear_child]);
13a7f5a6 358 ptr = rcu_dereference(pointers[i]);
b0ca2d21
MD
359 if (caa_unlikely(node_flag_ptr))
360 *node_flag_ptr = &pointers[i];
d68c6810
MD
361 return ptr;
362}
363
291b2543 364static
b023ba9f 365struct cds_ja_inode_flag *ja_linear_node_get_direction(const struct cds_ja_type *type,
291b2543 366 struct cds_ja_inode *node,
36305a3d 367 int n, uint8_t *result_key,
b023ba9f 368 enum ja_direction dir)
291b2543
MD
369{
370 uint8_t nr_child;
371 uint8_t *values;
372 struct cds_ja_inode_flag **pointers;
102b3189 373 struct cds_ja_inode_flag *ptr, *match_ptr = NULL;
b023ba9f 374 unsigned int i;
102b3189 375 int match_v;
291b2543
MD
376
377 assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
b023ba9f
MD
378 assert(dir == JA_LEFT || dir == JA_RIGHT);
379
380 if (dir == JA_LEFT) {
381 match_v = -1;
382 } else {
383 match_v = JA_ENTRY_PER_NODE;
384 }
291b2543
MD
385
386 nr_child = ja_linear_node_get_nr_child(type, node);
387 cmm_smp_rmb(); /* read nr_child before values and pointers */
388 assert(nr_child <= type->max_linear_child);
389 assert(type->type_class != RCU_JA_LINEAR || nr_child >= type->min_child);
390
391 values = &node->u.data[1];
86d700e9 392 pointers = (struct cds_ja_inode_flag **) align_ptr_size(&values[type->max_linear_child]);
291b2543
MD
393 for (i = 0; i < nr_child; i++) {
394 unsigned int v;
395
396 v = CMM_LOAD_SHARED(values[i]);
86d700e9
MD
397 ptr = CMM_LOAD_SHARED(pointers[i]);
398 if (!ptr)
399 continue;
b023ba9f
MD
400 if (dir == JA_LEFT) {
401 if ((int) v < n && (int) v > match_v) {
402 match_v = v;
102b3189 403 match_ptr = ptr;
b023ba9f
MD
404 }
405 } else {
406 if ((int) v > n && (int) v < match_v) {
407 match_v = v;
102b3189 408 match_ptr = ptr;
b023ba9f 409 }
291b2543
MD
410 }
411 }
b023ba9f 412
102b3189 413 if (!match_ptr) {
291b2543
MD
414 return NULL;
415 }
b023ba9f
MD
416 assert(match_v >= 0 && match_v < JA_ENTRY_PER_NODE);
417
36305a3d 418 *result_key = (uint8_t) match_v;
102b3189 419 return match_ptr;
291b2543
MD
420}
421
11c5e016 422static
5a9a87dd 423void ja_linear_node_get_ith_pos(const struct cds_ja_type *type,
b4540e8a 424 struct cds_ja_inode *node,
11c5e016
MD
425 uint8_t i,
426 uint8_t *v,
b4540e8a 427 struct cds_ja_inode_flag **iter)
11c5e016
MD
428{
429 uint8_t *values;
b4540e8a 430 struct cds_ja_inode_flag **pointers;
11c5e016
MD
431
432 assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
433 assert(i < ja_linear_node_get_nr_child(type, node));
434
435 values = &node->u.data[1];
436 *v = values[i];
b4540e8a 437 pointers = (struct cds_ja_inode_flag **) align_ptr_size(&values[type->max_linear_child]);
11c5e016
MD
438 *iter = pointers[i];
439}
440
d68c6810 441static
b4540e8a
MD
442struct cds_ja_inode_flag *ja_pool_node_get_nth(const struct cds_ja_type *type,
443 struct cds_ja_inode *node,
b1a90ce3 444 struct cds_ja_inode_flag *node_flag,
b0ca2d21 445 struct cds_ja_inode_flag ***node_flag_ptr,
8e519e3c 446 uint8_t n)
d68c6810 447{
b4540e8a 448 struct cds_ja_inode *linear;
d68c6810 449
fd800776 450 assert(type->type_class == RCU_JA_POOL);
b1a90ce3
MD
451
452 switch (type->nr_pool_order) {
453 case 1:
454 {
455 unsigned long bitsel, index;
456
457 bitsel = ja_node_pool_1d_bitsel(node_flag);
458 assert(bitsel < CHAR_BIT);
19ddcd04 459 index = ((unsigned long) n >> bitsel) & 0x1;
b1a90ce3
MD
460 linear = (struct cds_ja_inode *) &node->u.data[index << type->pool_size_order];
461 break;
462 }
463 case 2:
464 {
19ddcd04
MD
465 unsigned long bitsel[2], index[2], rindex;
466
467 ja_node_pool_2d_bitsel(node_flag, bitsel);
468 assert(bitsel[0] < CHAR_BIT);
469 assert(bitsel[1] < CHAR_BIT);
470 index[0] = ((unsigned long) n >> bitsel[0]) & 0x1;
471 index[0] <<= 1;
472 index[1] = ((unsigned long) n >> bitsel[1]) & 0x1;
473 rindex = index[0] | index[1];
474 linear = (struct cds_ja_inode *) &node->u.data[rindex << type->pool_size_order];
b1a90ce3
MD
475 break;
476 }
477 default:
478 linear = NULL;
479 assert(0);
480 }
48cbe001 481 return ja_linear_node_get_nth(type, linear, node_flag_ptr, n);
d68c6810
MD
482}
483
11c5e016 484static
b4540e8a
MD
485struct cds_ja_inode *ja_pool_node_get_ith_pool(const struct cds_ja_type *type,
486 struct cds_ja_inode *node,
11c5e016
MD
487 uint8_t i)
488{
489 assert(type->type_class == RCU_JA_POOL);
b4540e8a 490 return (struct cds_ja_inode *)
11c5e016
MD
491 &node->u.data[(unsigned int) i << type->pool_size_order];
492}
493
291b2543 494static
b023ba9f 495struct cds_ja_inode_flag *ja_pool_node_get_direction(const struct cds_ja_type *type,
291b2543 496 struct cds_ja_inode *node,
36305a3d 497 int n, uint8_t *result_key,
b023ba9f 498 enum ja_direction dir)
291b2543
MD
499{
500 unsigned int pool_nr;
b023ba9f 501 int match_v;
291b2543
MD
502 struct cds_ja_inode_flag *match_node_flag = NULL;
503
504 assert(type->type_class == RCU_JA_POOL);
b023ba9f
MD
505 assert(dir == JA_LEFT || dir == JA_RIGHT);
506
507 if (dir == JA_LEFT) {
508 match_v = -1;
509 } else {
510 match_v = JA_ENTRY_PER_NODE;
511 }
291b2543
MD
512
513 for (pool_nr = 0; pool_nr < (1U << type->nr_pool_order); pool_nr++) {
514 struct cds_ja_inode *pool =
515 ja_pool_node_get_ith_pool(type,
516 node, pool_nr);
517 uint8_t nr_child =
518 ja_linear_node_get_nr_child(type, pool);
519 unsigned int j;
520
521 for (j = 0; j < nr_child; j++) {
522 struct cds_ja_inode_flag *iter;
523 uint8_t v;
524
525 ja_linear_node_get_ith_pos(type, pool,
526 j, &v, &iter);
527 if (!iter)
528 continue;
b023ba9f
MD
529 if (dir == JA_LEFT) {
530 if ((int) v < n && (int) v > match_v) {
531 match_v = v;
532 match_node_flag = iter;
533 }
534 } else {
535 if ((int) v > n && (int) v < match_v) {
536 match_v = v;
537 match_node_flag = iter;
538 }
291b2543
MD
539 }
540 }
541 }
36305a3d
MD
542 if (match_node_flag)
543 *result_key = (uint8_t) match_v;
291b2543
MD
544 return match_node_flag;
545}
546
d68c6810 547static
b4540e8a
MD
548struct cds_ja_inode_flag *ja_pigeon_node_get_nth(const struct cds_ja_type *type,
549 struct cds_ja_inode *node,
b0ca2d21 550 struct cds_ja_inode_flag ***node_flag_ptr,
8e519e3c 551 uint8_t n)
d68c6810 552{
48cbe001
MD
553 struct cds_ja_inode_flag **child_node_flag_ptr;
554 struct cds_ja_inode_flag *child_node_flag;
5a9a87dd 555
d68c6810 556 assert(type->type_class == RCU_JA_PIGEON);
48cbe001
MD
557 child_node_flag_ptr = &((struct cds_ja_inode_flag **) node->u.data)[n];
558 child_node_flag = rcu_dereference(*child_node_flag_ptr);
582a6ade 559 dbg_printf("ja_pigeon_node_get_nth child_node_flag_ptr %p\n",
48cbe001 560 child_node_flag_ptr);
b0ca2d21 561 if (caa_unlikely(node_flag_ptr))
48cbe001
MD
562 *node_flag_ptr = child_node_flag_ptr;
563 return child_node_flag;
d68c6810
MD
564}
565
291b2543 566static
b023ba9f 567struct cds_ja_inode_flag *ja_pigeon_node_get_direction(const struct cds_ja_type *type,
291b2543 568 struct cds_ja_inode *node,
36305a3d 569 int n, uint8_t *result_key,
b023ba9f 570 enum ja_direction dir)
291b2543
MD
571{
572 struct cds_ja_inode_flag **child_node_flag_ptr;
573 struct cds_ja_inode_flag *child_node_flag;
574 int i;
575
576 assert(type->type_class == RCU_JA_PIGEON);
b023ba9f
MD
577 assert(dir == JA_LEFT || dir == JA_RIGHT);
578
579 if (dir == JA_LEFT) {
580 /* n - 1 is first value left of n */
581 for (i = n - 1; i >= 0; i--) {
582 child_node_flag_ptr = &((struct cds_ja_inode_flag **) node->u.data)[i];
583 child_node_flag = rcu_dereference(*child_node_flag_ptr);
584 if (child_node_flag) {
585 dbg_printf("ja_pigeon_node_get_left child_node_flag %p\n",
586 child_node_flag);
53f943b0 587 *result_key = (uint8_t) i;
b023ba9f
MD
588 return child_node_flag;
589 }
590 }
591 } else {
592 /* n + 1 is first value right of n */
593 for (i = n + 1; i < JA_ENTRY_PER_NODE; i++) {
594 child_node_flag_ptr = &((struct cds_ja_inode_flag **) node->u.data)[i];
595 child_node_flag = rcu_dereference(*child_node_flag_ptr);
596 if (child_node_flag) {
597 dbg_printf("ja_pigeon_node_get_right child_node_flag %p\n",
598 child_node_flag);
53f943b0 599 *result_key = (uint8_t) i;
b023ba9f
MD
600 return child_node_flag;
601 }
291b2543
MD
602 }
603 }
604 return NULL;
605}
606
2e313670
MD
607static
608struct cds_ja_inode_flag *ja_pigeon_node_get_ith_pos(const struct cds_ja_type *type,
609 struct cds_ja_inode *node,
610 uint8_t i)
611{
48cbe001 612 return ja_pigeon_node_get_nth(type, node, NULL, i);
2e313670
MD
613}
614
13a7f5a6
MD
615/*
616 * ja_node_get_nth: get nth item from a node.
617 * node_flag is already rcu_dereference'd.
618 */
d68c6810 619static
b62a8d0c 620struct cds_ja_inode_flag *ja_node_get_nth(struct cds_ja_inode_flag *node_flag,
b0ca2d21 621 struct cds_ja_inode_flag ***node_flag_ptr,
8e519e3c 622 uint8_t n)
d68c6810
MD
623{
624 unsigned int type_index;
b4540e8a 625 struct cds_ja_inode *node;
d96bfb0d 626 const struct cds_ja_type *type;
d68c6810 627
d68c6810 628 node = ja_node_ptr(node_flag);
5a9a87dd 629 assert(node != NULL);
d68c6810
MD
630 type_index = ja_node_type(node_flag);
631 type = &ja_types[type_index];
632
633 switch (type->type_class) {
634 case RCU_JA_LINEAR:
5a9a87dd 635 return ja_linear_node_get_nth(type, node,
b62a8d0c 636 node_flag_ptr, n);
fd800776 637 case RCU_JA_POOL:
b1a90ce3 638 return ja_pool_node_get_nth(type, node, node_flag,
b62a8d0c 639 node_flag_ptr, n);
d68c6810 640 case RCU_JA_PIGEON:
5a9a87dd 641 return ja_pigeon_node_get_nth(type, node,
b62a8d0c 642 node_flag_ptr, n);
d68c6810
MD
643 default:
644 assert(0);
645 return (void *) -1UL;
646 }
647}
648
291b2543 649static
b023ba9f 650struct cds_ja_inode_flag *ja_node_get_direction(struct cds_ja_inode_flag *node_flag,
36305a3d 651 int n, uint8_t *result_key,
b023ba9f 652 enum ja_direction dir)
291b2543
MD
653{
654 unsigned int type_index;
655 struct cds_ja_inode *node;
656 const struct cds_ja_type *type;
657
658 node = ja_node_ptr(node_flag);
659 assert(node != NULL);
660 type_index = ja_node_type(node_flag);
661 type = &ja_types[type_index];
662
663 switch (type->type_class) {
664 case RCU_JA_LINEAR:
36305a3d 665 return ja_linear_node_get_direction(type, node, n, result_key, dir);
291b2543 666 case RCU_JA_POOL:
36305a3d 667 return ja_pool_node_get_direction(type, node, n, result_key, dir);
291b2543 668 case RCU_JA_PIGEON:
36305a3d 669 return ja_pigeon_node_get_direction(type, node, n, result_key, dir);
291b2543
MD
670 default:
671 assert(0);
672 return (void *) -1UL;
673 }
674}
675
676static
b023ba9f 677struct cds_ja_inode_flag *ja_node_get_leftright(struct cds_ja_inode_flag *node_flag,
36305a3d 678 unsigned int n, uint8_t *result_key,
b023ba9f 679 enum ja_direction dir)
291b2543 680{
36305a3d 681 return ja_node_get_direction(node_flag, n, result_key, dir);
b023ba9f
MD
682}
683
684static
685struct cds_ja_inode_flag *ja_node_get_minmax(struct cds_ja_inode_flag *node_flag,
36305a3d 686 uint8_t *result_key,
b023ba9f
MD
687 enum ja_direction dir)
688{
689 switch (dir) {
690 case JA_LEFTMOST:
691 return ja_node_get_direction(node_flag,
36305a3d 692 -1, result_key, JA_RIGHT);
b023ba9f
MD
693 case JA_RIGHTMOST:
694 return ja_node_get_direction(node_flag,
36305a3d 695 JA_ENTRY_PER_NODE, result_key, JA_LEFT);
b023ba9f
MD
696 default:
697 assert(0);
698 }
291b2543
MD
699}
700
8e519e3c 701static
d96bfb0d 702int ja_linear_node_set_nth(const struct cds_ja_type *type,
b4540e8a 703 struct cds_ja_inode *node,
d96bfb0d 704 struct cds_ja_shadow_node *shadow_node,
8e519e3c 705 uint8_t n,
b4540e8a 706 struct cds_ja_inode_flag *child_node_flag)
8e519e3c
MD
707{
708 uint8_t nr_child;
709 uint8_t *values, *nr_child_ptr;
b4540e8a 710 struct cds_ja_inode_flag **pointers;
2e313670 711 unsigned int i, unused = 0;
8e519e3c
MD
712
713 assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
714
715 nr_child_ptr = &node->u.data[0];
48cbe001
MD
716 dbg_printf("linear set nth: n %u, nr_child_ptr %p\n",
717 (unsigned int) n, nr_child_ptr);
8e519e3c
MD
718 nr_child = *nr_child_ptr;
719 assert(nr_child <= type->max_linear_child);
8e519e3c
MD
720
721 values = &node->u.data[1];
2e313670
MD
722 pointers = (struct cds_ja_inode_flag **) align_ptr_size(&values[type->max_linear_child]);
723 /* Check if node value is already populated */
8e519e3c 724 for (i = 0; i < nr_child; i++) {
2e313670
MD
725 if (values[i] == n) {
726 if (pointers[i])
727 return -EEXIST;
728 else
729 break;
730 } else {
731 if (!pointers[i])
732 unused++;
733 }
8e519e3c 734 }
2e313670
MD
735 if (i == nr_child && nr_child >= type->max_linear_child) {
736 if (unused)
737 return -ERANGE; /* recompact node */
738 else
739 return -ENOSPC; /* No space left in this node type */
740 }
741
742 assert(pointers[i] == NULL);
743 rcu_assign_pointer(pointers[i], child_node_flag);
744 /* If we expanded the nr_child, increment it */
745 if (i == nr_child) {
746 CMM_STORE_SHARED(values[nr_child], n);
747 /* write pointer and value before nr_child */
748 cmm_smp_wmb();
749 CMM_STORE_SHARED(*nr_child_ptr, nr_child + 1);
8e519e3c 750 }
e1db2db5 751 shadow_node->nr_child++;
a2a7ff59
MD
752 dbg_printf("linear set nth: %u child, shadow: %u child, for node %p shadow %p\n",
753 (unsigned int) CMM_LOAD_SHARED(*nr_child_ptr),
754 (unsigned int) shadow_node->nr_child,
755 node, shadow_node);
756
8e519e3c
MD
757 return 0;
758}
759
760static
d96bfb0d 761int ja_pool_node_set_nth(const struct cds_ja_type *type,
b4540e8a 762 struct cds_ja_inode *node,
b1a90ce3 763 struct cds_ja_inode_flag *node_flag,
d96bfb0d 764 struct cds_ja_shadow_node *shadow_node,
8e519e3c 765 uint8_t n,
b4540e8a 766 struct cds_ja_inode_flag *child_node_flag)
8e519e3c 767{
b4540e8a 768 struct cds_ja_inode *linear;
8e519e3c
MD
769
770 assert(type->type_class == RCU_JA_POOL);
b1a90ce3
MD
771
772 switch (type->nr_pool_order) {
773 case 1:
774 {
775 unsigned long bitsel, index;
776
777 bitsel = ja_node_pool_1d_bitsel(node_flag);
778 assert(bitsel < CHAR_BIT);
19ddcd04 779 index = ((unsigned long) n >> bitsel) & 0x1;
b1a90ce3
MD
780 linear = (struct cds_ja_inode *) &node->u.data[index << type->pool_size_order];
781 break;
782 }
783 case 2:
784 {
19ddcd04
MD
785 unsigned long bitsel[2], index[2], rindex;
786
787 ja_node_pool_2d_bitsel(node_flag, bitsel);
788 assert(bitsel[0] < CHAR_BIT);
789 assert(bitsel[1] < CHAR_BIT);
790 index[0] = ((unsigned long) n >> bitsel[0]) & 0x1;
791 index[0] <<= 1;
792 index[1] = ((unsigned long) n >> bitsel[1]) & 0x1;
793 rindex = index[0] | index[1];
794 linear = (struct cds_ja_inode *) &node->u.data[rindex << type->pool_size_order];
b1a90ce3
MD
795 break;
796 }
797 default:
798 linear = NULL;
799 assert(0);
800 }
801
e1db2db5
MD
802 return ja_linear_node_set_nth(type, linear, shadow_node,
803 n, child_node_flag);
8e519e3c
MD
804}
805
806static
d96bfb0d 807int ja_pigeon_node_set_nth(const struct cds_ja_type *type,
b4540e8a 808 struct cds_ja_inode *node,
d96bfb0d 809 struct cds_ja_shadow_node *shadow_node,
8e519e3c 810 uint8_t n,
b4540e8a 811 struct cds_ja_inode_flag *child_node_flag)
8e519e3c 812{
b4540e8a 813 struct cds_ja_inode_flag **ptr;
8e519e3c
MD
814
815 assert(type->type_class == RCU_JA_PIGEON);
b4540e8a 816 ptr = &((struct cds_ja_inode_flag **) node->u.data)[n];
5a9a87dd 817 if (*ptr)
8e519e3c
MD
818 return -EEXIST;
819 rcu_assign_pointer(*ptr, child_node_flag);
e1db2db5 820 shadow_node->nr_child++;
8e519e3c
MD
821 return 0;
822}
823
d68c6810 824/*
7a0b2331 825 * _ja_node_set_nth: set nth item within a node. Return an error
8e519e3c 826 * (negative error value) if it is already there.
d68c6810 827 */
8e519e3c 828static
d96bfb0d 829int _ja_node_set_nth(const struct cds_ja_type *type,
b4540e8a 830 struct cds_ja_inode *node,
b1a90ce3 831 struct cds_ja_inode_flag *node_flag,
d96bfb0d 832 struct cds_ja_shadow_node *shadow_node,
e1db2db5 833 uint8_t n,
b4540e8a 834 struct cds_ja_inode_flag *child_node_flag)
8e519e3c 835{
8e519e3c
MD
836 switch (type->type_class) {
837 case RCU_JA_LINEAR:
e1db2db5 838 return ja_linear_node_set_nth(type, node, shadow_node, n,
8e519e3c
MD
839 child_node_flag);
840 case RCU_JA_POOL:
b1a90ce3 841 return ja_pool_node_set_nth(type, node, node_flag, shadow_node, n,
8e519e3c
MD
842 child_node_flag);
843 case RCU_JA_PIGEON:
e1db2db5 844 return ja_pigeon_node_set_nth(type, node, shadow_node, n,
8e519e3c 845 child_node_flag);
e1db2db5
MD
846 case RCU_JA_NULL:
847 return -ENOSPC;
8e519e3c
MD
848 default:
849 assert(0);
850 return -EINVAL;
851 }
852
853 return 0;
854}
7a0b2331 855
2e313670 856static
af3cbd45 857int ja_linear_node_clear_ptr(const struct cds_ja_type *type,
2e313670
MD
858 struct cds_ja_inode *node,
859 struct cds_ja_shadow_node *shadow_node,
af3cbd45 860 struct cds_ja_inode_flag **node_flag_ptr)
2e313670
MD
861{
862 uint8_t nr_child;
af3cbd45 863 uint8_t *nr_child_ptr;
2e313670
MD
864
865 assert(type->type_class == RCU_JA_LINEAR || type->type_class == RCU_JA_POOL);
866
867 nr_child_ptr = &node->u.data[0];
2e313670
MD
868 nr_child = *nr_child_ptr;
869 assert(nr_child <= type->max_linear_child);
870
48cbe001
MD
871 if (type->type_class == RCU_JA_LINEAR) {
872 assert(!shadow_node->fallback_removal_count);
873 if (shadow_node->nr_child <= type->min_child) {
2e313670
MD
874 /* We need to try recompacting the node */
875 return -EFBIG;
876 }
877 }
19ddcd04 878 dbg_printf("linear clear ptr: nr_child_ptr %p\n", nr_child_ptr);
af3cbd45
MD
879 assert(*node_flag_ptr != NULL);
880 rcu_assign_pointer(*node_flag_ptr, NULL);
2e313670
MD
881 /*
882 * Value and nr_child are never changed (would cause ABA issue).
883 * Instead, we leave the pointer to NULL and recompact the node
884 * once in a while. It is allowed to set a NULL pointer to a new
885 * value without recompaction though.
886 * Only update the shadow node accounting.
887 */
888 shadow_node->nr_child--;
af3cbd45 889 dbg_printf("linear clear ptr: %u child, shadow: %u child, for node %p shadow %p\n",
2e313670
MD
890 (unsigned int) CMM_LOAD_SHARED(*nr_child_ptr),
891 (unsigned int) shadow_node->nr_child,
892 node, shadow_node);
2e313670
MD
893 return 0;
894}
895
896static
af3cbd45 897int ja_pool_node_clear_ptr(const struct cds_ja_type *type,
2e313670 898 struct cds_ja_inode *node,
19ddcd04 899 struct cds_ja_inode_flag *node_flag,
2e313670 900 struct cds_ja_shadow_node *shadow_node,
af3cbd45 901 struct cds_ja_inode_flag **node_flag_ptr,
2e313670
MD
902 uint8_t n)
903{
904 struct cds_ja_inode *linear;
905
906 assert(type->type_class == RCU_JA_POOL);
19ddcd04
MD
907
908 if (shadow_node->fallback_removal_count) {
909 shadow_node->fallback_removal_count--;
910 } else {
911 /* We should try recompacting the node */
912 if (shadow_node->nr_child <= type->min_child)
913 return -EFBIG;
914 }
915
916 switch (type->nr_pool_order) {
917 case 1:
918 {
919 unsigned long bitsel, index;
920
921 bitsel = ja_node_pool_1d_bitsel(node_flag);
922 assert(bitsel < CHAR_BIT);
923 index = ((unsigned long) n >> bitsel) & type->nr_pool_order;
924 linear = (struct cds_ja_inode *) &node->u.data[index << type->pool_size_order];
925 break;
926 }
927 case 2:
928 {
929 unsigned long bitsel[2], index[2], rindex;
930
931 ja_node_pool_2d_bitsel(node_flag, bitsel);
932 assert(bitsel[0] < CHAR_BIT);
933 assert(bitsel[1] < CHAR_BIT);
934 index[0] = ((unsigned long) n >> bitsel[0]) & 0x1;
935 index[0] <<= 1;
936 index[1] = ((unsigned long) n >> bitsel[1]) & 0x1;
937 rindex = index[0] | index[1];
938 linear = (struct cds_ja_inode *) &node->u.data[rindex << type->pool_size_order];
939 break;
940 }
941 default:
942 linear = NULL;
943 assert(0);
944 }
945
af3cbd45 946 return ja_linear_node_clear_ptr(type, linear, shadow_node, node_flag_ptr);
2e313670
MD
947}
948
949static
af3cbd45 950int ja_pigeon_node_clear_ptr(const struct cds_ja_type *type,
2e313670
MD
951 struct cds_ja_inode *node,
952 struct cds_ja_shadow_node *shadow_node,
af3cbd45 953 struct cds_ja_inode_flag **node_flag_ptr)
2e313670 954{
2e313670 955 assert(type->type_class == RCU_JA_PIGEON);
19ddcd04
MD
956
957 if (shadow_node->fallback_removal_count) {
958 shadow_node->fallback_removal_count--;
959 } else {
960 /* We should try recompacting the node */
961 if (shadow_node->nr_child <= type->min_child)
962 return -EFBIG;
963 }
4d6ef45e 964 dbg_printf("ja_pigeon_node_clear_ptr: clearing ptr: %p\n", *node_flag_ptr);
af3cbd45 965 rcu_assign_pointer(*node_flag_ptr, NULL);
2e313670
MD
966 shadow_node->nr_child--;
967 return 0;
968}
969
970/*
af3cbd45 971 * _ja_node_clear_ptr: clear ptr item within a node. Return an error
2e313670
MD
972 * (negative error value) if it is not found (-ENOENT).
973 */
974static
af3cbd45 975int _ja_node_clear_ptr(const struct cds_ja_type *type,
2e313670 976 struct cds_ja_inode *node,
19ddcd04 977 struct cds_ja_inode_flag *node_flag,
2e313670 978 struct cds_ja_shadow_node *shadow_node,
af3cbd45 979 struct cds_ja_inode_flag **node_flag_ptr,
2e313670
MD
980 uint8_t n)
981{
982 switch (type->type_class) {
983 case RCU_JA_LINEAR:
af3cbd45 984 return ja_linear_node_clear_ptr(type, node, shadow_node, node_flag_ptr);
2e313670 985 case RCU_JA_POOL:
19ddcd04 986 return ja_pool_node_clear_ptr(type, node, node_flag, shadow_node, node_flag_ptr, n);
2e313670 987 case RCU_JA_PIGEON:
af3cbd45 988 return ja_pigeon_node_clear_ptr(type, node, shadow_node, node_flag_ptr);
2e313670
MD
989 case RCU_JA_NULL:
990 return -ENOENT;
991 default:
992 assert(0);
993 return -EINVAL;
994 }
995
996 return 0;
997}
998
b1a90ce3
MD
999/*
1000 * Calculate bit distribution. Returns the bit (0 to 7) that splits the
1001 * distribution in two sub-distributions containing as much elements one
1002 * compared to the other.
1003 */
1004static
1005unsigned int ja_node_sum_distribution_1d(enum ja_recompact mode,
1006 struct cds_ja *ja,
1007 unsigned int type_index,
1008 const struct cds_ja_type *type,
1009 struct cds_ja_inode *node,
1010 struct cds_ja_shadow_node *shadow_node,
1011 uint8_t n,
1012 struct cds_ja_inode_flag *child_node_flag,
1013 struct cds_ja_inode_flag **nullify_node_flag_ptr)
1014{
1015 uint8_t nr_one[JA_BITS_PER_BYTE];
1016 unsigned int bitsel = 0, bit_i, overall_best_distance = UINT_MAX;
1017 unsigned int distrib_nr_child = 0;
1018
1019 memset(nr_one, 0, sizeof(nr_one));
1020
1021 switch (type->type_class) {
1022 case RCU_JA_LINEAR:
1023 {
1024 uint8_t nr_child =
1025 ja_linear_node_get_nr_child(type, node);
1026 unsigned int i;
1027
1028 for (i = 0; i < nr_child; i++) {
1029 struct cds_ja_inode_flag *iter;
b1a90ce3
MD
1030 uint8_t v;
1031
1032 ja_linear_node_get_ith_pos(type, node, i, &v, &iter);
1033 if (!iter)
1034 continue;
1035 if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
1036 continue;
f5531dd9
MD
1037 for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
1038 if (v & (1U << bit_i))
1039 nr_one[bit_i]++;
b1a90ce3
MD
1040 }
1041 distrib_nr_child++;
1042 }
1043 break;
1044 }
1045 case RCU_JA_POOL:
1046 {
1047 unsigned int pool_nr;
1048
1049 for (pool_nr = 0; pool_nr < (1U << type->nr_pool_order); pool_nr++) {
1050 struct cds_ja_inode *pool =
1051 ja_pool_node_get_ith_pool(type,
1052 node, pool_nr);
1053 uint8_t nr_child =
1054 ja_linear_node_get_nr_child(type, pool);
1055 unsigned int j;
1056
1057 for (j = 0; j < nr_child; j++) {
1058 struct cds_ja_inode_flag *iter;
b1a90ce3
MD
1059 uint8_t v;
1060
1061 ja_linear_node_get_ith_pos(type, pool,
1062 j, &v, &iter);
1063 if (!iter)
1064 continue;
1065 if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
1066 continue;
f5531dd9
MD
1067 for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
1068 if (v & (1U << bit_i))
1069 nr_one[bit_i]++;
b1a90ce3
MD
1070 }
1071 distrib_nr_child++;
1072 }
1073 }
1074 break;
1075 }
1076 case RCU_JA_PIGEON:
1077 {
b1a90ce3
MD
1078 unsigned int i;
1079
1080 assert(mode == JA_RECOMPACT_DEL);
48cbe001 1081 for (i = 0; i < JA_ENTRY_PER_NODE; i++) {
b1a90ce3 1082 struct cds_ja_inode_flag *iter;
b1a90ce3
MD
1083
1084 iter = ja_pigeon_node_get_ith_pos(type, node, i);
1085 if (!iter)
1086 continue;
1087 if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
1088 continue;
f5531dd9
MD
1089 for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
1090 if (i & (1U << bit_i))
1091 nr_one[bit_i]++;
b1a90ce3
MD
1092 }
1093 distrib_nr_child++;
1094 }
1095 break;
1096 }
1097 case RCU_JA_NULL:
19ddcd04 1098 assert(mode == JA_RECOMPACT_ADD_NEXT);
b1a90ce3
MD
1099 break;
1100 default:
1101 assert(0);
1102 break;
1103 }
1104
19ddcd04 1105 if (mode == JA_RECOMPACT_ADD_NEXT || mode == JA_RECOMPACT_ADD_SAME) {
f5531dd9
MD
1106 for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
1107 if (n & (1U << bit_i))
1108 nr_one[bit_i]++;
b1a90ce3
MD
1109 }
1110 distrib_nr_child++;
1111 }
1112
1113 /*
1114 * The best bit selector is that for which the number of ones is
1115 * closest to half of the number of children in the
f5531dd9
MD
1116 * distribution. We calculate the distance using the double of
1117 * the sub-distribution sizes to eliminate truncation error.
b1a90ce3
MD
1118 */
1119 for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
1120 unsigned int distance_to_best;
1121
1b34283b 1122 distance_to_best = abs_int(((unsigned int) nr_one[bit_i] << 1U) - distrib_nr_child);
b1a90ce3
MD
1123 if (distance_to_best < overall_best_distance) {
1124 overall_best_distance = distance_to_best;
1125 bitsel = bit_i;
1126 }
1127 }
1128 dbg_printf("1 dimension pool bit selection: (%u)\n", bitsel);
1129 return bitsel;
1130}
1131
19ddcd04
MD
1132/*
1133 * Calculate bit distribution in two dimensions. Returns the two bits
1134 * (each 0 to 7) that splits the distribution in four sub-distributions
1135 * containing as much elements one compared to the other.
1136 */
1137static
1138void ja_node_sum_distribution_2d(enum ja_recompact mode,
1139 struct cds_ja *ja,
1140 unsigned int type_index,
1141 const struct cds_ja_type *type,
1142 struct cds_ja_inode *node,
1143 struct cds_ja_shadow_node *shadow_node,
1144 uint8_t n,
1145 struct cds_ja_inode_flag *child_node_flag,
1146 struct cds_ja_inode_flag **nullify_node_flag_ptr,
1147 unsigned int *_bitsel)
1148{
1149 uint8_t nr_2d_11[JA_BITS_PER_BYTE][JA_BITS_PER_BYTE],
1150 nr_2d_10[JA_BITS_PER_BYTE][JA_BITS_PER_BYTE],
1151 nr_2d_01[JA_BITS_PER_BYTE][JA_BITS_PER_BYTE],
1152 nr_2d_00[JA_BITS_PER_BYTE][JA_BITS_PER_BYTE];
1153 unsigned int bitsel[2] = { 0, 1 };
4a073c53
MD
1154 unsigned int bit_i, bit_j;
1155 int overall_best_distance = INT_MAX;
19ddcd04
MD
1156 unsigned int distrib_nr_child = 0;
1157
1158 memset(nr_2d_11, 0, sizeof(nr_2d_11));
1159 memset(nr_2d_10, 0, sizeof(nr_2d_10));
4a073c53
MD
1160 memset(nr_2d_01, 0, sizeof(nr_2d_01));
1161 memset(nr_2d_00, 0, sizeof(nr_2d_00));
19ddcd04
MD
1162
1163 switch (type->type_class) {
1164 case RCU_JA_LINEAR:
1165 {
1166 uint8_t nr_child =
1167 ja_linear_node_get_nr_child(type, node);
1168 unsigned int i;
1169
1170 for (i = 0; i < nr_child; i++) {
1171 struct cds_ja_inode_flag *iter;
1172 uint8_t v;
1173
1174 ja_linear_node_get_ith_pos(type, node, i, &v, &iter);
1175 if (!iter)
1176 continue;
1177 if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
1178 continue;
1179 for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
1180 for (bit_j = 0; bit_j < bit_i; bit_j++) {
7f14b43a
MD
1181 if (v & (1U << bit_i)) {
1182 if (v & (1U << bit_j)) {
1183 nr_2d_11[bit_i][bit_j]++;
1184 } else {
1185 nr_2d_10[bit_i][bit_j]++;
1186 }
1187 } else {
1188 if (v & (1U << bit_j)) {
1189 nr_2d_01[bit_i][bit_j]++;
1190 } else {
1191 nr_2d_00[bit_i][bit_j]++;
1192 }
19ddcd04
MD
1193 }
1194 }
1195 }
1196 distrib_nr_child++;
1197 }
1198 break;
1199 }
1200 case RCU_JA_POOL:
1201 {
1202 unsigned int pool_nr;
1203
1204 for (pool_nr = 0; pool_nr < (1U << type->nr_pool_order); pool_nr++) {
1205 struct cds_ja_inode *pool =
1206 ja_pool_node_get_ith_pool(type,
1207 node, pool_nr);
1208 uint8_t nr_child =
1209 ja_linear_node_get_nr_child(type, pool);
1210 unsigned int j;
1211
1212 for (j = 0; j < nr_child; j++) {
1213 struct cds_ja_inode_flag *iter;
1214 uint8_t v;
1215
1216 ja_linear_node_get_ith_pos(type, pool,
1217 j, &v, &iter);
1218 if (!iter)
1219 continue;
1220 if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
1221 continue;
1222 for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
1223 for (bit_j = 0; bit_j < bit_i; bit_j++) {
7f14b43a
MD
1224 if (v & (1U << bit_i)) {
1225 if (v & (1U << bit_j)) {
1226 nr_2d_11[bit_i][bit_j]++;
1227 } else {
1228 nr_2d_10[bit_i][bit_j]++;
1229 }
1230 } else {
1231 if (v & (1U << bit_j)) {
1232 nr_2d_01[bit_i][bit_j]++;
1233 } else {
1234 nr_2d_00[bit_i][bit_j]++;
1235 }
19ddcd04
MD
1236 }
1237 }
1238 }
1239 distrib_nr_child++;
1240 }
1241 }
1242 break;
1243 }
1244 case RCU_JA_PIGEON:
1245 {
19ddcd04
MD
1246 unsigned int i;
1247
1248 assert(mode == JA_RECOMPACT_DEL);
48cbe001 1249 for (i = 0; i < JA_ENTRY_PER_NODE; i++) {
19ddcd04
MD
1250 struct cds_ja_inode_flag *iter;
1251
1252 iter = ja_pigeon_node_get_ith_pos(type, node, i);
1253 if (!iter)
1254 continue;
1255 if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
1256 continue;
1257 for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
1258 for (bit_j = 0; bit_j < bit_i; bit_j++) {
7f14b43a
MD
1259 if (i & (1U << bit_i)) {
1260 if (i & (1U << bit_j)) {
1261 nr_2d_11[bit_i][bit_j]++;
1262 } else {
1263 nr_2d_10[bit_i][bit_j]++;
1264 }
1265 } else {
1266 if (i & (1U << bit_j)) {
1267 nr_2d_01[bit_i][bit_j]++;
1268 } else {
1269 nr_2d_00[bit_i][bit_j]++;
1270 }
19ddcd04
MD
1271 }
1272 }
1273 }
1274 distrib_nr_child++;
1275 }
1276 break;
1277 }
1278 case RCU_JA_NULL:
1279 assert(mode == JA_RECOMPACT_ADD_NEXT);
1280 break;
1281 default:
1282 assert(0);
1283 break;
1284 }
1285
1286 if (mode == JA_RECOMPACT_ADD_NEXT || mode == JA_RECOMPACT_ADD_SAME) {
1287 for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
1288 for (bit_j = 0; bit_j < bit_i; bit_j++) {
7f14b43a
MD
1289 if (n & (1U << bit_i)) {
1290 if (n & (1U << bit_j)) {
1291 nr_2d_11[bit_i][bit_j]++;
1292 } else {
1293 nr_2d_10[bit_i][bit_j]++;
1294 }
1295 } else {
1296 if (n & (1U << bit_j)) {
1297 nr_2d_01[bit_i][bit_j]++;
1298 } else {
1299 nr_2d_00[bit_i][bit_j]++;
1300 }
19ddcd04
MD
1301 }
1302 }
1303 }
1304 distrib_nr_child++;
1305 }
1306
1307 /*
1308 * The best bit selector is that for which the number of nodes
1309 * in each sub-class is closest to one-fourth of the number of
1310 * children in the distribution. We calculate the distance using
1311 * 4 times the size of the sub-distribution to eliminate
1312 * truncation error.
1313 */
1314 for (bit_i = 0; bit_i < JA_BITS_PER_BYTE; bit_i++) {
1315 for (bit_j = 0; bit_j < bit_i; bit_j++) {
4a073c53 1316 int distance_to_best[4];
19ddcd04 1317
1b34283b
MD
1318 distance_to_best[0] = ((unsigned int) nr_2d_11[bit_i][bit_j] << 2U) - distrib_nr_child;
1319 distance_to_best[1] = ((unsigned int) nr_2d_10[bit_i][bit_j] << 2U) - distrib_nr_child;
1320 distance_to_best[2] = ((unsigned int) nr_2d_01[bit_i][bit_j] << 2U) - distrib_nr_child;
1321 distance_to_best[3] = ((unsigned int) nr_2d_00[bit_i][bit_j] << 2U) - distrib_nr_child;
19ddcd04 1322
4a073c53
MD
1323 /* Consider worse distance above best */
1324 if (distance_to_best[1] > 0 && distance_to_best[1] > distance_to_best[0])
19ddcd04 1325 distance_to_best[0] = distance_to_best[1];
4a073c53 1326 if (distance_to_best[2] > 0 && distance_to_best[2] > distance_to_best[0])
19ddcd04 1327 distance_to_best[0] = distance_to_best[2];
4a073c53 1328 if (distance_to_best[3] > 0 && distance_to_best[3] > distance_to_best[0])
19ddcd04 1329 distance_to_best[0] = distance_to_best[3];
4a073c53 1330
19ddcd04
MD
1331 /*
1332 * If our worse distance is better than overall,
1333 * we become new best candidate.
1334 */
1335 if (distance_to_best[0] < overall_best_distance) {
1336 overall_best_distance = distance_to_best[0];
1337 bitsel[0] = bit_i;
1338 bitsel[1] = bit_j;
1339 }
1340 }
1341 }
1342
1343 dbg_printf("2 dimensions pool bit selection: (%u,%u)\n", bitsel[0], bitsel[1]);
1344
1345 /* Return our bit selection */
1346 _bitsel[0] = bitsel[0];
1347 _bitsel[1] = bitsel[1];
1348}
1349
48cbe001
MD
1350static
1351unsigned int find_nearest_type_index(unsigned int type_index,
1352 unsigned int nr_nodes)
1353{
1354 const struct cds_ja_type *type;
1355
1356 assert(type_index != NODE_INDEX_NULL);
1357 if (nr_nodes == 0)
1358 return NODE_INDEX_NULL;
1359 for (;;) {
1360 type = &ja_types[type_index];
1361 if (nr_nodes < type->min_child)
1362 type_index--;
1363 else if (nr_nodes > type->max_child)
1364 type_index++;
1365 else
1366 break;
1367 }
1368 return type_index;
1369}
1370
7a0b2331
MD
1371/*
1372 * ja_node_recompact_add: recompact a node, adding a new child.
2e313670 1373 * Return 0 on success, -EAGAIN if need to retry, or other negative
5a9a87dd 1374 * error value otherwise.
7a0b2331
MD
1375 */
1376static
2e313670
MD
1377int ja_node_recompact(enum ja_recompact mode,
1378 struct cds_ja *ja,
e1db2db5 1379 unsigned int old_type_index,
d96bfb0d 1380 const struct cds_ja_type *old_type,
b4540e8a 1381 struct cds_ja_inode *old_node,
5a9a87dd 1382 struct cds_ja_shadow_node *shadow_node,
3d8fe307 1383 struct cds_ja_inode_flag **old_node_flag_ptr, uint8_t n,
af3cbd45 1384 struct cds_ja_inode_flag *child_node_flag,
48cbe001
MD
1385 struct cds_ja_inode_flag **nullify_node_flag_ptr,
1386 int level)
7a0b2331 1387{
e1db2db5 1388 unsigned int new_type_index;
b4540e8a 1389 struct cds_ja_inode *new_node;
af3cbd45 1390 struct cds_ja_shadow_node *new_shadow_node = NULL;
d96bfb0d 1391 const struct cds_ja_type *new_type;
3d8fe307 1392 struct cds_ja_inode_flag *new_node_flag, *old_node_flag;
7a0b2331 1393 int ret;
f07b240f 1394 int fallback = 0;
7a0b2331 1395
3d8fe307
MD
1396 old_node_flag = *old_node_flag_ptr;
1397
48cbe001
MD
1398 /*
1399 * Need to find nearest type index even for ADD_SAME, because
1400 * this recompaction, when applied to linear nodes, will garbage
1401 * collect dummy (NULL) entries, and can therefore cause a few
1402 * linear representations to be skipped.
1403 */
2e313670 1404 switch (mode) {
19ddcd04 1405 case JA_RECOMPACT_ADD_SAME:
48cbe001
MD
1406 new_type_index = find_nearest_type_index(old_type_index,
1407 shadow_node->nr_child + 1);
1408 dbg_printf("Recompact for node with %u children\n",
1409 shadow_node->nr_child + 1);
2e313670 1410 break;
19ddcd04 1411 case JA_RECOMPACT_ADD_NEXT:
2e313670
MD
1412 if (!shadow_node || old_type_index == NODE_INDEX_NULL) {
1413 new_type_index = 0;
48cbe001 1414 dbg_printf("Recompact for NULL\n");
2e313670 1415 } else {
48cbe001
MD
1416 new_type_index = find_nearest_type_index(old_type_index,
1417 shadow_node->nr_child + 1);
1418 dbg_printf("Recompact for node with %u children\n",
1419 shadow_node->nr_child + 1);
2e313670
MD
1420 }
1421 break;
1422 case JA_RECOMPACT_DEL:
48cbe001
MD
1423 new_type_index = find_nearest_type_index(old_type_index,
1424 shadow_node->nr_child - 1);
1425 dbg_printf("Recompact for node with %u children\n",
1426 shadow_node->nr_child - 1);
2e313670
MD
1427 break;
1428 default:
1429 assert(0);
7a0b2331 1430 }
a2a7ff59 1431
f07b240f 1432retry: /* for fallback */
582a6ade
MD
1433 dbg_printf("Recompact from type %d to type %d\n",
1434 old_type_index, new_type_index);
7a0b2331 1435 new_type = &ja_types[new_type_index];
2e313670 1436 if (new_type_index != NODE_INDEX_NULL) {
354981c2 1437 new_node = alloc_cds_ja_node(ja, new_type);
2e313670
MD
1438 if (!new_node)
1439 return -ENOMEM;
b1a90ce3
MD
1440
1441 if (new_type->type_class == RCU_JA_POOL) {
1442 switch (new_type->nr_pool_order) {
1443 case 1:
1444 {
19ddcd04
MD
1445 unsigned int node_distrib_bitsel;
1446
b1a90ce3
MD
1447 node_distrib_bitsel =
1448 ja_node_sum_distribution_1d(mode, ja,
1449 old_type_index, old_type,
1450 old_node, shadow_node,
1451 n, child_node_flag,
1452 nullify_node_flag_ptr);
1453 assert(!((unsigned long) new_node & JA_POOL_1D_MASK));
1454 new_node_flag = ja_node_flag_pool_1d(new_node,
1455 new_type_index, node_distrib_bitsel);
1456 break;
1457 }
1458 case 2:
1459 {
19ddcd04
MD
1460 unsigned int node_distrib_bitsel[2];
1461
1462 ja_node_sum_distribution_2d(mode, ja,
1463 old_type_index, old_type,
1464 old_node, shadow_node,
1465 n, child_node_flag,
1466 nullify_node_flag_ptr,
1467 node_distrib_bitsel);
b1a90ce3
MD
1468 assert(!((unsigned long) new_node & JA_POOL_1D_MASK));
1469 assert(!((unsigned long) new_node & JA_POOL_2D_MASK));
19ddcd04
MD
1470 new_node_flag = ja_node_flag_pool_2d(new_node,
1471 new_type_index, node_distrib_bitsel);
b1a90ce3
MD
1472 break;
1473 }
1474 default:
1475 assert(0);
1476 }
1477 } else {
1478 new_node_flag = ja_node_flag(new_node, new_type_index);
1479 }
1480
2e313670 1481 dbg_printf("Recompact inherit lock from %p\n", shadow_node);
48cbe001 1482 new_shadow_node = rcuja_shadow_set(ja->ht, new_node_flag, shadow_node, ja, level);
2e313670 1483 if (!new_shadow_node) {
354981c2 1484 free_cds_ja_node(ja, new_node);
2e313670
MD
1485 return -ENOMEM;
1486 }
1487 if (fallback)
1488 new_shadow_node->fallback_removal_count =
1489 JA_FALLBACK_REMOVAL_COUNT;
1490 } else {
1491 new_node = NULL;
1492 new_node_flag = NULL;
e1db2db5 1493 }
11c5e016 1494
19ddcd04 1495 assert(mode != JA_RECOMPACT_ADD_NEXT || old_type->type_class != RCU_JA_PIGEON);
2e313670
MD
1496
1497 if (new_type_index == NODE_INDEX_NULL)
1498 goto skip_copy;
1499
11c5e016
MD
1500 switch (old_type->type_class) {
1501 case RCU_JA_LINEAR:
1502 {
1503 uint8_t nr_child =
1504 ja_linear_node_get_nr_child(old_type, old_node);
1505 unsigned int i;
1506
1507 for (i = 0; i < nr_child; i++) {
b4540e8a 1508 struct cds_ja_inode_flag *iter;
11c5e016
MD
1509 uint8_t v;
1510
1511 ja_linear_node_get_ith_pos(old_type, old_node, i, &v, &iter);
1512 if (!iter)
1513 continue;
af3cbd45 1514 if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
2e313670 1515 continue;
b1a90ce3 1516 ret = _ja_node_set_nth(new_type, new_node, new_node_flag,
f07b240f 1517 new_shadow_node,
11c5e016 1518 v, iter);
f07b240f
MD
1519 if (new_type->type_class == RCU_JA_POOL && ret) {
1520 goto fallback_toosmall;
1521 }
11c5e016
MD
1522 assert(!ret);
1523 }
1524 break;
1525 }
1526 case RCU_JA_POOL:
1527 {
1528 unsigned int pool_nr;
1529
1530 for (pool_nr = 0; pool_nr < (1U << old_type->nr_pool_order); pool_nr++) {
b4540e8a 1531 struct cds_ja_inode *pool =
11c5e016
MD
1532 ja_pool_node_get_ith_pool(old_type,
1533 old_node, pool_nr);
1534 uint8_t nr_child =
1535 ja_linear_node_get_nr_child(old_type, pool);
1536 unsigned int j;
1537
1538 for (j = 0; j < nr_child; j++) {
b4540e8a 1539 struct cds_ja_inode_flag *iter;
11c5e016
MD
1540 uint8_t v;
1541
1542 ja_linear_node_get_ith_pos(old_type, pool,
1543 j, &v, &iter);
1544 if (!iter)
1545 continue;
af3cbd45 1546 if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
2e313670 1547 continue;
b1a90ce3 1548 ret = _ja_node_set_nth(new_type, new_node, new_node_flag,
f07b240f 1549 new_shadow_node,
11c5e016 1550 v, iter);
f07b240f
MD
1551 if (new_type->type_class == RCU_JA_POOL
1552 && ret) {
1553 goto fallback_toosmall;
1554 }
11c5e016
MD
1555 assert(!ret);
1556 }
1557 }
1558 break;
7a0b2331 1559 }
a2a7ff59 1560 case RCU_JA_NULL:
19ddcd04 1561 assert(mode == JA_RECOMPACT_ADD_NEXT);
a2a7ff59 1562 break;
11c5e016 1563 case RCU_JA_PIGEON:
2e313670 1564 {
2e313670
MD
1565 unsigned int i;
1566
1567 assert(mode == JA_RECOMPACT_DEL);
48cbe001 1568 for (i = 0; i < JA_ENTRY_PER_NODE; i++) {
2e313670
MD
1569 struct cds_ja_inode_flag *iter;
1570
1571 iter = ja_pigeon_node_get_ith_pos(old_type, old_node, i);
1572 if (!iter)
1573 continue;
af3cbd45 1574 if (mode == JA_RECOMPACT_DEL && *nullify_node_flag_ptr == iter)
2e313670 1575 continue;
b1a90ce3 1576 ret = _ja_node_set_nth(new_type, new_node, new_node_flag,
2e313670
MD
1577 new_shadow_node,
1578 i, iter);
1579 if (new_type->type_class == RCU_JA_POOL && ret) {
1580 goto fallback_toosmall;
1581 }
1582 assert(!ret);
1583 }
1584 break;
1585 }
11c5e016
MD
1586 default:
1587 assert(0);
5a9a87dd 1588 ret = -EINVAL;
f07b240f 1589 goto end;
11c5e016 1590 }
2e313670 1591skip_copy:
11c5e016 1592
19ddcd04 1593 if (mode == JA_RECOMPACT_ADD_NEXT || mode == JA_RECOMPACT_ADD_SAME) {
2e313670 1594 /* add node */
b1a90ce3 1595 ret = _ja_node_set_nth(new_type, new_node, new_node_flag,
2e313670
MD
1596 new_shadow_node,
1597 n, child_node_flag);
7b413155
MD
1598 if (new_type->type_class == RCU_JA_POOL && ret) {
1599 goto fallback_toosmall;
1600 }
2e313670
MD
1601 assert(!ret);
1602 }
19ddcd04
MD
1603
1604 if (fallback) {
1605 dbg_printf("Using fallback for %u children, node type index: %u, mode %s\n",
1606 new_shadow_node->nr_child, old_type_index, mode == JA_RECOMPACT_ADD_NEXT ? "add_next" :
1607 (mode == JA_RECOMPACT_DEL ? "del" : "add_same"));
354981c2 1608 uatomic_inc(&ja->node_fallback_count_distribution[new_shadow_node->nr_child]);
19ddcd04
MD
1609 }
1610
3d8fe307
MD
1611 /* Return pointer to new recompacted node through old_node_flag_ptr */
1612 *old_node_flag_ptr = new_node_flag;
a2a7ff59 1613 if (old_node) {
2e313670
MD
1614 int flags;
1615
1616 flags = RCUJA_SHADOW_CLEAR_FREE_NODE;
1617 /*
1618 * It is OK to free the lock associated with a node
1619 * going to NULL, since we are holding the parent lock.
1620 * This synchronizes removal with re-add of that node.
1621 */
1622 if (new_type_index == NODE_INDEX_NULL)
48cbe001 1623 flags |= RCUJA_SHADOW_CLEAR_FREE_LOCK;
3d8fe307 1624 ret = rcuja_shadow_clear(ja->ht, old_node_flag, shadow_node,
2e313670 1625 flags);
a2a7ff59
MD
1626 assert(!ret);
1627 }
5a9a87dd
MD
1628
1629 ret = 0;
f07b240f 1630end:
5a9a87dd 1631 return ret;
f07b240f
MD
1632
1633fallback_toosmall:
1634 /* fallback if next pool is too small */
af3cbd45 1635 assert(new_shadow_node);
3d8fe307 1636 ret = rcuja_shadow_clear(ja->ht, new_node_flag, new_shadow_node,
f07b240f
MD
1637 RCUJA_SHADOW_CLEAR_FREE_NODE);
1638 assert(!ret);
1639
19ddcd04
MD
1640 switch (mode) {
1641 case JA_RECOMPACT_ADD_SAME:
1642 /*
1643 * JA_RECOMPACT_ADD_SAME is only triggered if a linear
1644 * node within a pool has unused entries. It should
1645 * therefore _never_ be too small.
1646 */
4a073c53 1647 assert(0);
4cde8267
MD
1648
1649 /* Fall-through */
19ddcd04
MD
1650 case JA_RECOMPACT_ADD_NEXT:
1651 {
1652 const struct cds_ja_type *next_type;
1653
1654 /*
1655 * Recompaction attempt on add failed. Should only
1656 * happen if target node type is pool. Caused by
1657 * hard-to-split distribution. Recompact using the next
1658 * distribution size.
1659 */
1660 assert(new_type->type_class == RCU_JA_POOL);
1661 next_type = &ja_types[new_type_index + 1];
1662 /*
1663 * Try going to the next pool size if our population
1664 * fits within its range. This is not flagged as a
1665 * fallback.
1666 */
1667 if (shadow_node->nr_child + 1 >= next_type->min_child
1668 && shadow_node->nr_child + 1 <= next_type->max_child) {
1669 new_type_index++;
1670 goto retry;
1671 } else {
1672 new_type_index++;
1673 dbg_printf("Add fallback to type %d\n", new_type_index);
1674 uatomic_inc(&ja->nr_fallback);
1675 fallback = 1;
1676 goto retry;
1677 }
1678 break;
1679 }
1680 case JA_RECOMPACT_DEL:
1681 /*
1682 * Recompaction attempt on delete failed. Should only
1683 * happen if target node type is pool. This is caused by
1684 * a hard-to-split distribution. Recompact on same node
1685 * size, but flag current node as "fallback" to ensure
1686 * we don't attempt recompaction before some activity
1687 * has reshuffled our node.
1688 */
1689 assert(new_type->type_class == RCU_JA_POOL);
1690 new_type_index = old_type_index;
1691 dbg_printf("Delete fallback keeping type %d\n", new_type_index);
1692 uatomic_inc(&ja->nr_fallback);
1693 fallback = 1;
1694 goto retry;
1695 default:
1696 assert(0);
1697 return -EINVAL;
1698 }
1699
1700 /*
1701 * Last resort fallback: pigeon.
1702 */
f07b240f
MD
1703 new_type_index = (1UL << JA_TYPE_BITS) - 1;
1704 dbg_printf("Fallback to type %d\n", new_type_index);
1705 uatomic_inc(&ja->nr_fallback);
1706 fallback = 1;
1707 goto retry;
7a0b2331
MD
1708}
1709
5a9a87dd 1710/*
2e313670 1711 * Return 0 on success, -EAGAIN if need to retry, or other negative
5a9a87dd
MD
1712 * error value otherwise.
1713 */
7a0b2331 1714static
d96bfb0d 1715int ja_node_set_nth(struct cds_ja *ja,
b4540e8a 1716 struct cds_ja_inode_flag **node_flag, uint8_t n,
5a9a87dd 1717 struct cds_ja_inode_flag *child_node_flag,
48cbe001
MD
1718 struct cds_ja_shadow_node *shadow_node,
1719 int level)
7a0b2331
MD
1720{
1721 int ret;
e1db2db5 1722 unsigned int type_index;
d96bfb0d 1723 const struct cds_ja_type *type;
b4540e8a 1724 struct cds_ja_inode *node;
7a0b2331 1725
a2a7ff59
MD
1726 dbg_printf("ja_node_set_nth for n=%u, node %p, shadow %p\n",
1727 (unsigned int) n, ja_node_ptr(*node_flag), shadow_node);
1728
e1db2db5
MD
1729 node = ja_node_ptr(*node_flag);
1730 type_index = ja_node_type(*node_flag);
1731 type = &ja_types[type_index];
b1a90ce3 1732 ret = _ja_node_set_nth(type, node, *node_flag, shadow_node,
e1db2db5 1733 n, child_node_flag);
2e313670
MD
1734 switch (ret) {
1735 case -ENOSPC:
19ddcd04
MD
1736 /* Not enough space in node, need to recompact to next type. */
1737 ret = ja_node_recompact(JA_RECOMPACT_ADD_NEXT, ja, type_index, type, node,
48cbe001 1738 shadow_node, node_flag, n, child_node_flag, NULL, level);
2e313670
MD
1739 break;
1740 case -ERANGE:
1741 /* Node needs to be recompacted. */
19ddcd04 1742 ret = ja_node_recompact(JA_RECOMPACT_ADD_SAME, ja, type_index, type, node,
48cbe001 1743 shadow_node, node_flag, n, child_node_flag, NULL, level);
2e313670
MD
1744 break;
1745 }
1746 return ret;
1747}
1748
1749/*
1750 * Return 0 on success, -EAGAIN if need to retry, or other negative
1751 * error value otherwise.
1752 */
1753static
af3cbd45
MD
1754int ja_node_clear_ptr(struct cds_ja *ja,
1755 struct cds_ja_inode_flag **node_flag_ptr, /* Pointer to location to nullify */
1756 struct cds_ja_inode_flag **parent_node_flag_ptr, /* Address of parent ptr in its parent */
1757 struct cds_ja_shadow_node *shadow_node, /* of parent */
48cbe001 1758 uint8_t n, int level)
2e313670
MD
1759{
1760 int ret;
1761 unsigned int type_index;
1762 const struct cds_ja_type *type;
1763 struct cds_ja_inode *node;
1764
af3cbd45
MD
1765 dbg_printf("ja_node_clear_ptr for node %p, shadow %p, target ptr %p\n",
1766 ja_node_ptr(*parent_node_flag_ptr), shadow_node, node_flag_ptr);
2e313670 1767
af3cbd45
MD
1768 node = ja_node_ptr(*parent_node_flag_ptr);
1769 type_index = ja_node_type(*parent_node_flag_ptr);
2e313670 1770 type = &ja_types[type_index];
19ddcd04 1771 ret = _ja_node_clear_ptr(type, node, *parent_node_flag_ptr, shadow_node, node_flag_ptr, n);
2e313670 1772 if (ret == -EFBIG) {
19ddcd04 1773 /* Should try recompaction. */
2e313670 1774 ret = ja_node_recompact(JA_RECOMPACT_DEL, ja, type_index, type, node,
af3cbd45 1775 shadow_node, parent_node_flag_ptr, n, NULL,
48cbe001 1776 node_flag_ptr, level);
7a0b2331
MD
1777 }
1778 return ret;
1779}
be9a7474 1780
03ec1aeb 1781struct cds_ja_node *cds_ja_lookup(struct cds_ja *ja, uint64_t key)
b4540e8a 1782{
41975c12
MD
1783 unsigned int tree_depth, i;
1784 struct cds_ja_inode_flag *node_flag;
1785
102b3189 1786 if (caa_unlikely(key > ja->key_max || key == UINT64_MAX))
03ec1aeb 1787 return NULL;
41975c12 1788 tree_depth = ja->tree_depth;
5a9a87dd 1789 node_flag = rcu_dereference(ja->root);
41975c12 1790
5a9a87dd
MD
1791 /* level 0: root node */
1792 if (!ja_node_ptr(node_flag))
03ec1aeb 1793 return NULL;
5a9a87dd
MD
1794
1795 for (i = 1; i < tree_depth; i++) {
79b41067
MD
1796 uint8_t iter_key;
1797
1798 iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - i - 1)));
48cbe001 1799 node_flag = ja_node_get_nth(node_flag, NULL, iter_key);
582a6ade
MD
1800 dbg_printf("cds_ja_lookup iter key lookup %u finds node_flag %p\n",
1801 (unsigned int) iter_key, node_flag);
41975c12 1802 if (!ja_node_ptr(node_flag))
03ec1aeb 1803 return NULL;
41975c12
MD
1804 }
1805
5a9a87dd 1806 /* Last level lookup succeded. We got an actual match. */
03ec1aeb 1807 return (struct cds_ja_node *) node_flag;
5a9a87dd
MD
1808}
1809
b023ba9f
MD
1810static
1811struct cds_ja_node *cds_ja_lookup_inequality(struct cds_ja *ja, uint64_t key,
36305a3d 1812 uint64_t *result_key, enum ja_lookup_inequality mode)
291b2543
MD
1813{
1814 int tree_depth, level;
1815 struct cds_ja_inode_flag *node_flag, *cur_node_depth[JA_MAX_DEPTH];
36305a3d
MD
1816 uint8_t cur_key[JA_MAX_DEPTH];
1817 uint64_t _result_key = 0;
b023ba9f 1818 enum ja_direction dir;
291b2543 1819
b023ba9f
MD
1820 switch (mode) {
1821 case JA_LOOKUP_BE:
b023ba9f 1822 case JA_LOOKUP_AE:
102b3189 1823 if (caa_unlikely(key > ja->key_max || key == UINT64_MAX))
b023ba9f
MD
1824 return NULL;
1825 break;
1826 default:
03ec1aeb 1827 return NULL;
b023ba9f 1828 }
291b2543
MD
1829
1830 memset(cur_node_depth, 0, sizeof(cur_node_depth));
36305a3d 1831 memset(cur_key, 0, sizeof(cur_key));
291b2543
MD
1832 tree_depth = ja->tree_depth;
1833 node_flag = rcu_dereference(ja->root);
1834 cur_node_depth[0] = node_flag;
1835
1836 /* level 0: root node */
1837 if (!ja_node_ptr(node_flag))
03ec1aeb 1838 return NULL;
291b2543
MD
1839
1840 for (level = 1; level < tree_depth; level++) {
1841 uint8_t iter_key;
1842
1843 iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - level - 1)));
1844 node_flag = ja_node_get_nth(node_flag, NULL, iter_key);
1845 if (!ja_node_ptr(node_flag))
1846 break;
36305a3d 1847 cur_key[level - 1] = iter_key;
291b2543 1848 cur_node_depth[level] = node_flag;
b023ba9f 1849 dbg_printf("cds_ja_lookup_inequality iter key lookup %u finds node_flag %p\n",
291b2543
MD
1850 (unsigned int) iter_key, node_flag);
1851 }
1852
1853 if (level == tree_depth) {
1854 /* Last level lookup succeded. We got an equal match. */
36305a3d
MD
1855 if (result_key)
1856 *result_key = key;
03ec1aeb 1857 return (struct cds_ja_node *) node_flag;
291b2543
MD
1858 }
1859
1860 /*
b023ba9f 1861 * Find highest value left/right of current node.
291b2543 1862 * Current node is cur_node_depth[level].
b023ba9f
MD
1863 * Start at current level. If we cannot find any key left/right
1864 * of ours, go one level up, seek highest value left/right of
1865 * current (recursively), and when we find one, get the
1866 * rightmost/leftmost child of its rightmost/leftmost child
1867 * (recursively).
291b2543 1868 */
b023ba9f
MD
1869 switch (mode) {
1870 case JA_LOOKUP_BE:
1871 dir = JA_LEFT;
1872 break;
1873 case JA_LOOKUP_AE:
1874 dir = JA_RIGHT;
1875 break;
1876 default:
1877 assert(0);
1878 }
291b2543
MD
1879 for (; level > 0; level--) {
1880 uint8_t iter_key;
1881
1882 iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - level - 1)));
b023ba9f 1883 node_flag = ja_node_get_leftright(cur_node_depth[level - 1],
36305a3d 1884 iter_key, &cur_key[level - 1], dir);
86d700e9
MD
1885 dbg_printf("cds_ja_lookup_inequality find sibling from %u at %u finds node_flag %p\n",
1886 (unsigned int) iter_key, (unsigned int) cur_key[level - 1],
1887 node_flag);
36305a3d 1888 /* If found left/right sibling, find rightmost/leftmost child. */
291b2543
MD
1889 if (ja_node_ptr(node_flag))
1890 break;
1891 }
1892
1893 if (!level) {
b023ba9f 1894 /* Reached the root and could not find a left/right sibling. */
03ec1aeb 1895 return NULL;
291b2543
MD
1896 }
1897
1898 level++;
3c52f0f9
MD
1899
1900 /*
4cef6f97 1901 * From this point, we are guaranteed to be able to find a
b023ba9f
MD
1902 * "below than"/"above than" match. ja_attach_node() and
1903 * ja_detach_node() both guarantee that it is not possible for a
1904 * lookup to reach a dead-end.
3c52f0f9
MD
1905 */
1906
b023ba9f
MD
1907 /*
1908 * Find rightmost/leftmost child of rightmost/leftmost child
1909 * (recursively).
1910 */
1911 switch (mode) {
1912 case JA_LOOKUP_BE:
1913 dir = JA_RIGHTMOST;
1914 break;
1915 case JA_LOOKUP_AE:
1916 dir = JA_LEFTMOST;
1917 break;
1918 default:
1919 assert(0);
1920 }
291b2543 1921 for (; level < tree_depth; level++) {
36305a3d 1922 node_flag = ja_node_get_minmax(node_flag, &cur_key[level - 1], dir);
86d700e9
MD
1923 dbg_printf("cds_ja_lookup_inequality find minmax at %u finds node_flag %p\n",
1924 (unsigned int) cur_key[level - 1],
1925 node_flag);
291b2543
MD
1926 if (!ja_node_ptr(node_flag))
1927 break;
1928 }
1929
4cef6f97 1930 assert(level == tree_depth);
291b2543 1931
36305a3d
MD
1932 if (result_key) {
1933 for (level = 1; level < tree_depth; level++) {
1934 _result_key |= ((uint64_t) cur_key[level - 1])
1935 << (JA_BITS_PER_BYTE * (tree_depth - level - 1));
1936 }
1937 *result_key = _result_key;
1938 }
03ec1aeb 1939 return (struct cds_ja_node *) node_flag;
291b2543
MD
1940}
1941
36305a3d
MD
1942struct cds_ja_node *cds_ja_lookup_below_equal(struct cds_ja *ja,
1943 uint64_t key, uint64_t *result_key)
b023ba9f 1944{
86d700e9 1945 dbg_printf("cds_ja_lookup_below_equal key %" PRIu64 "\n", key);
36305a3d 1946 return cds_ja_lookup_inequality(ja, key, result_key, JA_LOOKUP_BE);
b023ba9f
MD
1947}
1948
36305a3d
MD
1949struct cds_ja_node *cds_ja_lookup_above_equal(struct cds_ja *ja,
1950 uint64_t key, uint64_t *result_key)
b023ba9f 1951{
86d700e9 1952 dbg_printf("cds_ja_lookup_above_equal key %" PRIu64 "\n", key);
36305a3d 1953 return cds_ja_lookup_inequality(ja, key, result_key, JA_LOOKUP_AE);
b023ba9f
MD
1954}
1955
5a9a87dd
MD
1956/*
1957 * We reached an unpopulated node. Create it and the children we need,
1958 * and then attach the entire branch to the current node. This may
1959 * trigger recompaction of the current node. Locks needed: node lock
1960 * (for add), and, possibly, parent node lock (to update pointer due to
1961 * node recompaction).
1962 *
1963 * First take node lock, check if recompaction is needed, then take
1964 * parent lock (if needed). Then we can proceed to create the new
1965 * branch. Publish the new branch, and release locks.
1966 * TODO: we currently always take the parent lock even when not needed.
47d2eab3
MD
1967 *
1968 * ja_attach_node() ensures that a lookup will _never_ see a branch that
1969 * leads to a dead-end: before attaching a branch, the entire content of
1970 * the new branch is populated, thus creating a cluster, before
1971 * attaching the cluster to the rest of the tree, thus making it visible
1972 * to lookups.
5a9a87dd
MD
1973 */
1974static
1975int ja_attach_node(struct cds_ja *ja,
b0ca2d21 1976 struct cds_ja_inode_flag **attach_node_flag_ptr,
b62a8d0c 1977 struct cds_ja_inode_flag *attach_node_flag,
48cbe001
MD
1978 struct cds_ja_inode_flag *parent_attach_node_flag,
1979 struct cds_ja_inode_flag **old_node_flag_ptr,
1980 struct cds_ja_inode_flag *old_node_flag,
5a9a87dd 1981 uint64_t key,
79b41067 1982 unsigned int level,
5a9a87dd
MD
1983 struct cds_ja_node *child_node)
1984{
1985 struct cds_ja_shadow_node *shadow_node = NULL,
af3cbd45 1986 *parent_shadow_node = NULL;
5a9a87dd
MD
1987 struct cds_ja_inode_flag *iter_node_flag, *iter_dest_node_flag;
1988 int ret, i;
a2a7ff59 1989 struct cds_ja_inode_flag *created_nodes[JA_MAX_DEPTH];
5a9a87dd
MD
1990 int nr_created_nodes = 0;
1991
48cbe001
MD
1992 dbg_printf("Attach node at level %u (old_node_flag %p, attach_node_flag_ptr %p attach_node_flag %p, parent_attach_node_flag %p)\n",
1993 level, old_node_flag, attach_node_flag_ptr, attach_node_flag, parent_attach_node_flag);
a2a7ff59 1994
48cbe001
MD
1995 assert(!old_node_flag);
1996 if (attach_node_flag) {
1997 shadow_node = rcuja_shadow_lookup_lock(ja->ht, attach_node_flag);
1998 if (!shadow_node) {
1999 ret = -EAGAIN;
2000 goto end;
2001 }
5a9a87dd 2002 }
48cbe001 2003 if (parent_attach_node_flag) {
5a9a87dd 2004 parent_shadow_node = rcuja_shadow_lookup_lock(ja->ht,
48cbe001 2005 parent_attach_node_flag);
5a9a87dd 2006 if (!parent_shadow_node) {
2e313670 2007 ret = -EAGAIN;
5a9a87dd
MD
2008 goto unlock_shadow;
2009 }
2010 }
2011
48cbe001 2012 if (old_node_flag_ptr && ja_node_ptr(*old_node_flag_ptr)) {
b306a0fe 2013 /*
c112acaa
MD
2014 * Target node has been updated between RCU lookup and
2015 * lock acquisition. We need to re-try lookup and
2016 * attach.
2017 */
2018 ret = -EAGAIN;
2019 goto unlock_parent;
2020 }
2021
9be99d4a
MD
2022 /*
2023 * Perform a lookup query to handle the case where
2024 * old_node_flag_ptr is NULL. We cannot use it to check if the
2025 * node has been populated between RCU lookup and mutex
2026 * acquisition.
2027 */
2028 if (!old_node_flag_ptr) {
2029 uint8_t iter_key;
2030 struct cds_ja_inode_flag *lookup_node_flag;
2031 struct cds_ja_inode_flag **lookup_node_flag_ptr;
2032
2033 iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (ja->tree_depth - level)));
2034 lookup_node_flag = ja_node_get_nth(attach_node_flag,
2035 &lookup_node_flag_ptr,
2036 iter_key);
2037 if (lookup_node_flag) {
2038 ret = -EEXIST;
2039 goto unlock_parent;
2040 }
2041 }
2042
c112acaa 2043 if (attach_node_flag_ptr && ja_node_ptr(*attach_node_flag_ptr) !=
b62a8d0c 2044 ja_node_ptr(attach_node_flag)) {
c112acaa
MD
2045 /*
2046 * Target node has been updated between RCU lookup and
2047 * lock acquisition. We need to re-try lookup and
2048 * attach.
b306a0fe
MD
2049 */
2050 ret = -EAGAIN;
2051 goto unlock_parent;
2052 }
2053
a2a7ff59 2054 /* Create new branch, starting from bottom */
03ec1aeb 2055 iter_node_flag = (struct cds_ja_inode_flag *) child_node;
5a9a87dd 2056
48cbe001 2057 for (i = ja->tree_depth - 1; i >= (int) level; i--) {
79b41067
MD
2058 uint8_t iter_key;
2059
48cbe001 2060 iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (ja->tree_depth - i - 1)));
79b41067 2061 dbg_printf("branch creation level %d, key %u\n",
48cbe001 2062 i, (unsigned int) iter_key);
5a9a87dd
MD
2063 iter_dest_node_flag = NULL;
2064 ret = ja_node_set_nth(ja, &iter_dest_node_flag,
79b41067 2065 iter_key,
5a9a87dd 2066 iter_node_flag,
48cbe001 2067 NULL, i);
9be99d4a
MD
2068 if (ret) {
2069 dbg_printf("branch creation error %d\n", ret);
5a9a87dd 2070 goto check_error;
9be99d4a 2071 }
5a9a87dd
MD
2072 created_nodes[nr_created_nodes++] = iter_dest_node_flag;
2073 iter_node_flag = iter_dest_node_flag;
2074 }
48cbe001 2075 assert(level > 0);
5a9a87dd 2076
48cbe001
MD
2077 /* Publish branch */
2078 if (level == 1) {
2079 /*
2080 * Attaching to root node.
2081 */
2082 rcu_assign_pointer(ja->root, iter_node_flag);
2083 } else {
79b41067
MD
2084 uint8_t iter_key;
2085
2086 iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (ja->tree_depth - level)));
48cbe001
MD
2087 dbg_printf("publish branch at level %d, key %u\n",
2088 level - 1, (unsigned int) iter_key);
a2a7ff59 2089 /* We need to use set_nth on the previous level. */
48cbe001 2090 iter_dest_node_flag = attach_node_flag;
a2a7ff59 2091 ret = ja_node_set_nth(ja, &iter_dest_node_flag,
79b41067 2092 iter_key,
a2a7ff59 2093 iter_node_flag,
48cbe001 2094 shadow_node, level - 1);
9be99d4a
MD
2095 if (ret) {
2096 dbg_printf("branch publish error %d\n", ret);
a2a7ff59 2097 goto check_error;
9be99d4a 2098 }
48cbe001
MD
2099 /*
2100 * Attach branch
2101 */
2102 rcu_assign_pointer(*attach_node_flag_ptr, iter_dest_node_flag);
a2a7ff59
MD
2103 }
2104
5a9a87dd
MD
2105 /* Success */
2106 ret = 0;
2107
2108check_error:
2109 if (ret) {
2110 for (i = 0; i < nr_created_nodes; i++) {
2111 int tmpret;
a2a7ff59
MD
2112 int flags;
2113
2114 flags = RCUJA_SHADOW_CLEAR_FREE_LOCK;
2115 if (i)
2116 flags |= RCUJA_SHADOW_CLEAR_FREE_NODE;
5a9a87dd 2117 tmpret = rcuja_shadow_clear(ja->ht,
3d8fe307 2118 created_nodes[i],
a2a7ff59
MD
2119 NULL,
2120 flags);
5a9a87dd
MD
2121 assert(!tmpret);
2122 }
2123 }
b306a0fe 2124unlock_parent:
5a9a87dd
MD
2125 if (parent_shadow_node)
2126 rcuja_shadow_unlock(parent_shadow_node);
2127unlock_shadow:
2128 if (shadow_node)
2129 rcuja_shadow_unlock(shadow_node);
2130end:
2131 return ret;
2132}
2133
2134/*
03ec1aeb
MD
2135 * Lock the parent containing the pointer to list of duplicates, and add
2136 * node to this list. Failure can happen if concurrent update changes
2137 * the parent before we get the lock. We return -EAGAIN in that case.
5a9a87dd
MD
2138 * Return 0 on success, negative error value on failure.
2139 */
2140static
2141int ja_chain_node(struct cds_ja *ja,
af3cbd45 2142 struct cds_ja_inode_flag *parent_node_flag,
fa112799 2143 struct cds_ja_inode_flag **node_flag_ptr,
c112acaa 2144 struct cds_ja_inode_flag *node_flag,
28fd1038 2145 struct cds_ja_node *last_node,
5a9a87dd
MD
2146 struct cds_ja_node *node)
2147{
2148 struct cds_ja_shadow_node *shadow_node;
28fd1038
MD
2149 struct cds_ja_node *iter_node;
2150 int ret = 0, found = 0;
5a9a87dd 2151
3d8fe307 2152 shadow_node = rcuja_shadow_lookup_lock(ja->ht, parent_node_flag);
b306a0fe 2153 if (!shadow_node) {
2e313670 2154 return -EAGAIN;
b306a0fe 2155 }
28fd1038
MD
2156 /*
2157 * Ensure that previous node is still there at end of list.
2158 */
2159 iter_node = (struct cds_ja_node *) ja_node_ptr(node_flag);
2160 if ((struct cds_ja_node *) ja_node_ptr(*node_flag_ptr) != iter_node) {
2161 ret = -EAGAIN;
2162 goto end;
2163 }
2164 cds_ja_for_each_duplicate(iter_node) {
2165 if (iter_node == last_node)
2166 found = 1;
2167 }
2168 if (!found) {
fa112799
MD
2169 ret = -EAGAIN;
2170 goto end;
2171 }
03ec1aeb 2172 /*
28fd1038
MD
2173 * Add node to tail of list to ensure that RCU traversals will
2174 * always see either the prior node or the newly added if
2175 * executed concurrently with a sequence of add followed by del
2176 * on the same key. Safe against concurrent RCU read traversals.
03ec1aeb 2177 */
28fd1038
MD
2178 node->next = NULL;
2179 rcu_assign_pointer(last_node->next, node);
fa112799 2180end:
5a9a87dd 2181 rcuja_shadow_unlock(shadow_node);
fa112799 2182 return ret;
5a9a87dd
MD
2183}
2184
75d573aa
MD
2185static
2186int _cds_ja_add(struct cds_ja *ja, uint64_t key,
6475613c 2187 struct cds_ja_node *node,
75d573aa 2188 struct cds_ja_node **unique_node_ret)
5a9a87dd
MD
2189{
2190 unsigned int tree_depth, i;
48cbe001 2191 struct cds_ja_inode_flag *attach_node_flag,
5a9a87dd 2192 *parent_node_flag,
b62a8d0c 2193 *parent2_node_flag,
48cbe001
MD
2194 *node_flag,
2195 *parent_attach_node_flag;
2196 struct cds_ja_inode_flag **attach_node_flag_ptr,
2197 **parent_node_flag_ptr,
2198 **node_flag_ptr;
5a9a87dd
MD
2199 int ret;
2200
102b3189 2201 if (caa_unlikely(key > ja->key_max || key == UINT64_MAX)) {
5a9a87dd 2202 return -EINVAL;
b306a0fe 2203 }
5a9a87dd
MD
2204 tree_depth = ja->tree_depth;
2205
2206retry:
a2a7ff59 2207 dbg_printf("cds_ja_add attempt: key %" PRIu64 ", node %p\n",
6475613c 2208 key, node);
5a9a87dd 2209 parent2_node_flag = NULL;
b0f74e47
MD
2210 parent_node_flag =
2211 (struct cds_ja_inode_flag *) &ja->root; /* Use root ptr address as key for mutex */
48cbe001 2212 parent_node_flag_ptr = NULL;
35170a44 2213 node_flag = rcu_dereference(ja->root);
48cbe001 2214 node_flag_ptr = &ja->root;
5a9a87dd
MD
2215
2216 /* Iterate on all internal levels */
a2a7ff59 2217 for (i = 1; i < tree_depth; i++) {
79b41067
MD
2218 uint8_t iter_key;
2219
48cbe001
MD
2220 if (!ja_node_ptr(node_flag))
2221 break;
2222 dbg_printf("cds_ja_add iter parent2_node_flag %p parent_node_flag %p node_flag_ptr %p node_flag %p\n",
2223 parent2_node_flag, parent_node_flag, node_flag_ptr, node_flag);
79b41067 2224 iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - i - 1)));
5a9a87dd
MD
2225 parent2_node_flag = parent_node_flag;
2226 parent_node_flag = node_flag;
48cbe001 2227 parent_node_flag_ptr = node_flag_ptr;
5a9a87dd
MD
2228 node_flag = ja_node_get_nth(node_flag,
2229 &node_flag_ptr,
79b41067 2230 iter_key);
5a9a87dd
MD
2231 }
2232
2233 /*
48cbe001
MD
2234 * We reached either bottom of tree or internal NULL node,
2235 * simply add node to last internal level, or chain it if key is
2236 * already present.
5a9a87dd
MD
2237 */
2238 if (!ja_node_ptr(node_flag)) {
48cbe001
MD
2239 dbg_printf("cds_ja_add NULL parent2_node_flag %p parent_node_flag %p node_flag_ptr %p node_flag %p\n",
2240 parent2_node_flag, parent_node_flag, node_flag_ptr, node_flag);
75d573aa 2241
48cbe001
MD
2242 attach_node_flag = parent_node_flag;
2243 attach_node_flag_ptr = parent_node_flag_ptr;
2244 parent_attach_node_flag = parent2_node_flag;
2245
b0ca2d21 2246 ret = ja_attach_node(ja, attach_node_flag_ptr,
b62a8d0c 2247 attach_node_flag,
48cbe001
MD
2248 parent_attach_node_flag,
2249 node_flag_ptr,
2250 node_flag,
6475613c 2251 key, i, node);
5a9a87dd 2252 } else {
28fd1038
MD
2253 struct cds_ja_node *iter_node, *last_node = NULL;
2254
75d573aa
MD
2255 if (unique_node_ret) {
2256 *unique_node_ret = (struct cds_ja_node *) ja_node_ptr(node_flag);
2257 return -EEXIST;
2258 }
2259
28fd1038
MD
2260 /* Find last duplicate */
2261 iter_node = (struct cds_ja_node *) ja_node_ptr(node_flag);
2262 cds_ja_for_each_duplicate_rcu(iter_node)
2263 last_node = iter_node;
2264
48cbe001
MD
2265 dbg_printf("cds_ja_add duplicate parent2_node_flag %p parent_node_flag %p node_flag_ptr %p node_flag %p\n",
2266 parent2_node_flag, parent_node_flag, node_flag_ptr, node_flag);
75d573aa 2267
48cbe001
MD
2268 attach_node_flag = node_flag;
2269 attach_node_flag_ptr = node_flag_ptr;
2270 parent_attach_node_flag = parent_node_flag;
2271
5a9a87dd 2272 ret = ja_chain_node(ja,
48cbe001
MD
2273 parent_attach_node_flag,
2274 attach_node_flag_ptr,
2275 attach_node_flag,
28fd1038 2276 last_node,
6475613c 2277 node);
5a9a87dd 2278 }
b306a0fe 2279 if (ret == -EAGAIN || ret == -EEXIST)
5a9a87dd 2280 goto retry;
48cbe001 2281
5a9a87dd 2282 return ret;
b4540e8a
MD
2283}
2284
75d573aa 2285int cds_ja_add(struct cds_ja *ja, uint64_t key,
6475613c 2286 struct cds_ja_node *node)
75d573aa 2287{
6475613c 2288 return _cds_ja_add(ja, key, node, NULL);
75d573aa
MD
2289}
2290
2291struct cds_ja_node *cds_ja_add_unique(struct cds_ja *ja, uint64_t key,
6475613c 2292 struct cds_ja_node *node)
75d573aa
MD
2293{
2294 int ret;
2295 struct cds_ja_node *ret_node;
2296
6475613c 2297 ret = _cds_ja_add(ja, key, node, &ret_node);
75d573aa
MD
2298 if (ret == -EEXIST)
2299 return ret_node;
2300 else
6475613c 2301 return node;
75d573aa
MD
2302}
2303
af3cbd45
MD
2304/*
2305 * Note: there is no need to lookup the pointer address associated with
2306 * each node's nth item after taking the lock: it's already been done by
2307 * cds_ja_del while holding the rcu read-side lock, and our node rules
2308 * ensure that when a match value -> pointer is found in a node, it is
2309 * _NEVER_ changed for that node without recompaction, and recompaction
2310 * reallocates the node.
b306a0fe
MD
2311 * However, when a child is removed from "linear" nodes, its pointer
2312 * is set to NULL. We therefore check, while holding the locks, if this
2313 * pointer is NULL, and return -ENOENT to the caller if it is the case.
4cef6f97
MD
2314 *
2315 * ja_detach_node() ensures that a lookup will _never_ see a branch that
2316 * leads to a dead-end: when removing branch, it makes sure to perform
2317 * the "cut" at the highest node that has only one child, effectively
2318 * replacing it with a NULL pointer.
af3cbd45 2319 */
35170a44
MD
2320static
2321int ja_detach_node(struct cds_ja *ja,
2322 struct cds_ja_inode_flag **snapshot,
af3cbd45
MD
2323 struct cds_ja_inode_flag ***snapshot_ptr,
2324 uint8_t *snapshot_n,
35170a44
MD
2325 int nr_snapshot,
2326 uint64_t key,
2327 struct cds_ja_node *node)
2328{
af3cbd45
MD
2329 struct cds_ja_shadow_node *shadow_nodes[JA_MAX_DEPTH];
2330 struct cds_ja_inode_flag **node_flag_ptr = NULL,
2331 *parent_node_flag = NULL,
2332 **parent_node_flag_ptr = NULL;
b62a8d0c 2333 struct cds_ja_inode_flag *iter_node_flag;
4d6ef45e
MD
2334 int ret, i, nr_shadow = 0, nr_clear = 0, nr_branch = 0;
2335 uint8_t n = 0;
35170a44 2336
4d6ef45e 2337 assert(nr_snapshot == ja->tree_depth + 1);
35170a44 2338
af3cbd45
MD
2339 /*
2340 * From the last internal level node going up, get the node
2341 * lock, check if the node has only one child left. If it is the
2342 * case, we continue iterating upward. When we reach a node
2343 * which has more that one child left, we lock the parent, and
2344 * proceed to the node deletion (removing its children too).
2345 */
4d6ef45e 2346 for (i = nr_snapshot - 2; i >= 1; i--) {
af3cbd45
MD
2347 struct cds_ja_shadow_node *shadow_node;
2348
2349 shadow_node = rcuja_shadow_lookup_lock(ja->ht,
3d8fe307 2350 snapshot[i]);
af3cbd45
MD
2351 if (!shadow_node) {
2352 ret = -EAGAIN;
2353 goto end;
2354 }
af3cbd45 2355 shadow_nodes[nr_shadow++] = shadow_node;
b62a8d0c
MD
2356
2357 /*
2358 * Check if node has been removed between RCU
2359 * lookup and lock acquisition.
2360 */
2361 assert(snapshot_ptr[i + 1]);
2362 if (ja_node_ptr(*snapshot_ptr[i + 1])
2363 != ja_node_ptr(snapshot[i + 1])) {
2364 ret = -ENOENT;
2365 goto end;
2366 }
2367
2368 assert(shadow_node->nr_child > 0);
d810c97f 2369 if (shadow_node->nr_child == 1 && i > 1)
4d6ef45e
MD
2370 nr_clear++;
2371 nr_branch++;
af3cbd45
MD
2372 if (shadow_node->nr_child > 1 || i == 1) {
2373 /* Lock parent and break */
2374 shadow_node = rcuja_shadow_lookup_lock(ja->ht,
3d8fe307 2375 snapshot[i - 1]);
af3cbd45
MD
2376 if (!shadow_node) {
2377 ret = -EAGAIN;
2378 goto end;
2379 }
2380 shadow_nodes[nr_shadow++] = shadow_node;
b62a8d0c 2381
c112acaa
MD
2382 /*
2383 * Check if node has been removed between RCU
2384 * lookup and lock acquisition.
2385 */
b62a8d0c
MD
2386 assert(snapshot_ptr[i]);
2387 if (ja_node_ptr(*snapshot_ptr[i])
2388 != ja_node_ptr(snapshot[i])) {
c112acaa
MD
2389 ret = -ENOENT;
2390 goto end;
2391 }
2392
b62a8d0c 2393 node_flag_ptr = snapshot_ptr[i + 1];
4d6ef45e
MD
2394 n = snapshot_n[i + 1];
2395 parent_node_flag_ptr = snapshot_ptr[i];
2396 parent_node_flag = snapshot[i];
c112acaa 2397
af3cbd45
MD
2398 if (i > 1) {
2399 /*
2400 * Lock parent's parent, in case we need
2401 * to recompact parent.
2402 */
2403 shadow_node = rcuja_shadow_lookup_lock(ja->ht,
3d8fe307 2404 snapshot[i - 2]);
af3cbd45
MD
2405 if (!shadow_node) {
2406 ret = -EAGAIN;
2407 goto end;
2408 }
2409 shadow_nodes[nr_shadow++] = shadow_node;
b62a8d0c
MD
2410
2411 /*
2412 * Check if node has been removed between RCU
2413 * lookup and lock acquisition.
2414 */
2415 assert(snapshot_ptr[i - 1]);
2416 if (ja_node_ptr(*snapshot_ptr[i - 1])
2417 != ja_node_ptr(snapshot[i - 1])) {
2418 ret = -ENOENT;
2419 goto end;
2420 }
af3cbd45 2421 }
b62a8d0c 2422
af3cbd45
MD
2423 break;
2424 }
2425 }
2426
2427 /*
4d6ef45e
MD
2428 * At this point, we want to delete all nodes that are about to
2429 * be removed from shadow_nodes (except the last one, which is
2430 * either the root or the parent of the upmost node with 1
b62a8d0c
MD
2431 * child). OK to free lock here, because RCU read lock is held,
2432 * and free only performed in call_rcu.
af3cbd45
MD
2433 */
2434
2435 for (i = 0; i < nr_clear; i++) {
2436 ret = rcuja_shadow_clear(ja->ht,
3d8fe307 2437 shadow_nodes[i]->node_flag,
af3cbd45
MD
2438 shadow_nodes[i],
2439 RCUJA_SHADOW_CLEAR_FREE_NODE
2440 | RCUJA_SHADOW_CLEAR_FREE_LOCK);
2441 assert(!ret);
2442 }
2443
2444 iter_node_flag = parent_node_flag;
2445 /* Remove from parent */
2446 ret = ja_node_clear_ptr(ja,
2447 node_flag_ptr, /* Pointer to location to nullify */
2448 &iter_node_flag, /* Old new parent ptr in its parent */
4d6ef45e 2449 shadow_nodes[nr_branch - 1], /* of parent */
48cbe001 2450 n, nr_branch - 1);
b306a0fe
MD
2451 if (ret)
2452 goto end;
af3cbd45 2453
4d6ef45e
MD
2454 dbg_printf("ja_detach_node: publish %p instead of %p\n",
2455 iter_node_flag, *parent_node_flag_ptr);
af3cbd45
MD
2456 /* Update address of parent ptr in its parent */
2457 rcu_assign_pointer(*parent_node_flag_ptr, iter_node_flag);
2458
2459end:
2460 for (i = 0; i < nr_shadow; i++)
2461 rcuja_shadow_unlock(shadow_nodes[i]);
35170a44
MD
2462 return ret;
2463}
2464
af3cbd45
MD
2465static
2466int ja_unchain_node(struct cds_ja *ja,
2467 struct cds_ja_inode_flag *parent_node_flag,
fa112799 2468 struct cds_ja_inode_flag **node_flag_ptr,
013a6083 2469 struct cds_ja_inode_flag *node_flag,
af3cbd45
MD
2470 struct cds_ja_node *node)
2471{
2472 struct cds_ja_shadow_node *shadow_node;
03ec1aeb 2473 struct cds_ja_node *iter_node, **iter_node_ptr, **prev_node_ptr = NULL;
013a6083 2474 int ret = 0, count = 0, found = 0;
af3cbd45 2475
3d8fe307 2476 shadow_node = rcuja_shadow_lookup_lock(ja->ht, parent_node_flag);
af3cbd45
MD
2477 if (!shadow_node)
2478 return -EAGAIN;
013a6083 2479 if (ja_node_ptr(*node_flag_ptr) != ja_node_ptr(node_flag)) {
fa112799
MD
2480 ret = -EAGAIN;
2481 goto end;
2482 }
af3cbd45 2483 /*
03ec1aeb
MD
2484 * Find the previous node's next pointer pointing to our node,
2485 * so we can update it. Retry if another thread removed all but
2486 * one of duplicates since check (this check was performed
2487 * without lock). Ensure that the node we are about to remove is
2488 * still in the list (while holding lock). No need for RCU
2489 * traversal here since we hold the lock on the parent.
af3cbd45 2490 */
03ec1aeb
MD
2491 iter_node_ptr = (struct cds_ja_node **) node_flag_ptr;
2492 iter_node = (struct cds_ja_node *) ja_node_ptr(node_flag);
2493 cds_ja_for_each_duplicate(iter_node) {
f2758d14 2494 count++;
03ec1aeb
MD
2495 if (iter_node == node) {
2496 prev_node_ptr = iter_node_ptr;
013a6083 2497 found++;
03ec1aeb
MD
2498 }
2499 iter_node_ptr = &iter_node->next;
f2758d14 2500 }
013a6083
MD
2501 assert(found <= 1);
2502 if (!found || count == 1) {
af3cbd45
MD
2503 ret = -EAGAIN;
2504 goto end;
2505 }
03ec1aeb 2506 CMM_STORE_SHARED(*prev_node_ptr, node->next);
ade342cb
MD
2507 /*
2508 * Validate that we indeed removed the node from linked list.
2509 */
2510 assert(ja_node_ptr(*node_flag_ptr) != (struct cds_ja_inode *) node);
af3cbd45
MD
2511end:
2512 rcuja_shadow_unlock(shadow_node);
2513 return ret;
2514}
2515
2516/*
2517 * Called with RCU read lock held.
2518 */
35170a44
MD
2519int cds_ja_del(struct cds_ja *ja, uint64_t key,
2520 struct cds_ja_node *node)
2521{
2522 unsigned int tree_depth, i;
2523 struct cds_ja_inode_flag *snapshot[JA_MAX_DEPTH];
af3cbd45
MD
2524 struct cds_ja_inode_flag **snapshot_ptr[JA_MAX_DEPTH];
2525 uint8_t snapshot_n[JA_MAX_DEPTH];
35170a44 2526 struct cds_ja_inode_flag *node_flag;
fa112799
MD
2527 struct cds_ja_inode_flag **prev_node_flag_ptr,
2528 **node_flag_ptr;
4d6ef45e 2529 int nr_snapshot;
35170a44
MD
2530 int ret;
2531
102b3189 2532 if (caa_unlikely(key > ja->key_max || key == UINT64_MAX))
35170a44
MD
2533 return -EINVAL;
2534 tree_depth = ja->tree_depth;
2535
2536retry:
4d6ef45e 2537 nr_snapshot = 0;
35170a44
MD
2538 dbg_printf("cds_ja_del attempt: key %" PRIu64 ", node %p\n",
2539 key, node);
2540
2541 /* snapshot for level 0 is only for shadow node lookup */
4d6ef45e
MD
2542 snapshot_n[0] = 0;
2543 snapshot_n[1] = 0;
af3cbd45 2544 snapshot_ptr[nr_snapshot] = NULL;
35170a44
MD
2545 snapshot[nr_snapshot++] = (struct cds_ja_inode_flag *) &ja->root;
2546 node_flag = rcu_dereference(ja->root);
af3cbd45 2547 prev_node_flag_ptr = &ja->root;
fa112799 2548 node_flag_ptr = &ja->root;
35170a44
MD
2549
2550 /* Iterate on all internal levels */
2551 for (i = 1; i < tree_depth; i++) {
2552 uint8_t iter_key;
2553
2554 dbg_printf("cds_ja_del iter node_flag %p\n",
2555 node_flag);
2556 if (!ja_node_ptr(node_flag)) {
2557 return -ENOENT;
2558 }
35170a44 2559 iter_key = (uint8_t) (key >> (JA_BITS_PER_BYTE * (tree_depth - i - 1)));
4d6ef45e 2560 snapshot_n[nr_snapshot + 1] = iter_key;
af3cbd45
MD
2561 snapshot_ptr[nr_snapshot] = prev_node_flag_ptr;
2562 snapshot[nr_snapshot++] = node_flag;
35170a44 2563 node_flag = ja_node_get_nth(node_flag,
fa112799 2564 &node_flag_ptr,
35170a44 2565 iter_key);
48cbe001
MD
2566 if (node_flag)
2567 prev_node_flag_ptr = node_flag_ptr;
af3cbd45
MD
2568 dbg_printf("cds_ja_del iter key lookup %u finds node_flag %p, prev_node_flag_ptr %p\n",
2569 (unsigned int) iter_key, node_flag,
2570 prev_node_flag_ptr);
35170a44 2571 }
35170a44
MD
2572 /*
2573 * We reached bottom of tree, try to find the node we are trying
2574 * to remove. Fail if we cannot find it.
2575 */
2576 if (!ja_node_ptr(node_flag)) {
4d6ef45e
MD
2577 dbg_printf("cds_ja_del: no node found for key %" PRIu64 "\n",
2578 key);
35170a44
MD
2579 return -ENOENT;
2580 } else {
03ec1aeb 2581 struct cds_ja_node *iter_node, *match = NULL;
af3cbd45 2582 int count = 0;
35170a44 2583
03ec1aeb
MD
2584 iter_node = (struct cds_ja_node *) ja_node_ptr(node_flag);
2585 cds_ja_for_each_duplicate_rcu(iter_node) {
2586 dbg_printf("cds_ja_del: compare %p with iter_node %p\n", node, iter_node);
2587 if (iter_node == node)
2588 match = iter_node;
af3cbd45 2589 count++;
35170a44 2590 }
03ec1aeb 2591
4d6ef45e
MD
2592 if (!match) {
2593 dbg_printf("cds_ja_del: no node match for node %p key %" PRIu64 "\n", node, key);
35170a44 2594 return -ENOENT;
4d6ef45e 2595 }
af3cbd45
MD
2596 assert(count > 0);
2597 if (count == 1) {
2598 /*
4d6ef45e
MD
2599 * Removing last of duplicates. Last snapshot
2600 * does not have a shadow node (external leafs).
af3cbd45
MD
2601 */
2602 snapshot_ptr[nr_snapshot] = prev_node_flag_ptr;
2603 snapshot[nr_snapshot++] = node_flag;
2604 ret = ja_detach_node(ja, snapshot, snapshot_ptr,
2605 snapshot_n, nr_snapshot, key, node);
2606 } else {
f2758d14 2607 ret = ja_unchain_node(ja, snapshot[nr_snapshot - 1],
013a6083 2608 node_flag_ptr, node_flag, match);
af3cbd45 2609 }
35170a44 2610 }
b306a0fe
MD
2611 /*
2612 * Explanation of -ENOENT handling: caused by concurrent delete
2613 * between RCU lookup and actual removal. Need to re-do the
2614 * lookup and removal attempt.
2615 */
2616 if (ret == -EAGAIN || ret == -ENOENT)
35170a44
MD
2617 goto retry;
2618 return ret;
2619}
2620
b4540e8a
MD
2621struct cds_ja *_cds_ja_new(unsigned int key_bits,
2622 const struct rcu_flavor_struct *flavor)
be9a7474
MD
2623{
2624 struct cds_ja *ja;
b0f74e47 2625 int ret;
f07b240f 2626 struct cds_ja_shadow_node *root_shadow_node;
be9a7474
MD
2627
2628 ja = calloc(sizeof(*ja), 1);
2629 if (!ja)
2630 goto ja_error;
b4540e8a
MD
2631
2632 switch (key_bits) {
2633 case 8:
b4540e8a 2634 case 16:
1216b3d2 2635 case 24:
b4540e8a 2636 case 32:
1216b3d2
MD
2637 case 40:
2638 case 48:
2639 case 56:
2640 ja->key_max = (1ULL << key_bits) - 1;
b4540e8a
MD
2641 break;
2642 case 64:
2643 ja->key_max = UINT64_MAX;
2644 break;
2645 default:
2646 goto check_error;
2647 }
2648
be9a7474 2649 /* ja->root is NULL */
5a9a87dd 2650 /* tree_depth 0 is for pointer to root node */
582a6ade 2651 ja->tree_depth = (key_bits >> JA_LOG2_BITS_PER_BYTE) + 1;
a2a7ff59 2652 assert(ja->tree_depth <= JA_MAX_DEPTH);
be9a7474
MD
2653 ja->ht = rcuja_create_ht(flavor);
2654 if (!ja->ht)
2655 goto ht_error;
b0f74e47
MD
2656
2657 /*
2658 * Note: we should not free this node until judy array destroy.
2659 */
f07b240f 2660 root_shadow_node = rcuja_shadow_set(ja->ht,
3d8fe307 2661 (struct cds_ja_inode_flag *) &ja->root,
48cbe001 2662 NULL, ja, 0);
f07b240f
MD
2663 if (!root_shadow_node) {
2664 ret = -ENOMEM;
b0f74e47 2665 goto ht_node_error;
f07b240f 2666 }
b0f74e47 2667
be9a7474
MD
2668 return ja;
2669
b0f74e47
MD
2670ht_node_error:
2671 ret = rcuja_delete_ht(ja->ht);
2672 assert(!ret);
be9a7474 2673ht_error:
b4540e8a 2674check_error:
be9a7474
MD
2675 free(ja);
2676ja_error:
2677 return NULL;
2678}
2679
19ddcd04 2680static
354981c2 2681void print_debug_fallback_distribution(struct cds_ja *ja)
19ddcd04
MD
2682{
2683 int i;
2684
2685 fprintf(stderr, "Fallback node distribution:\n");
2686 for (i = 0; i < JA_ENTRY_PER_NODE; i++) {
354981c2 2687 if (!ja->node_fallback_count_distribution[i])
19ddcd04
MD
2688 continue;
2689 fprintf(stderr, " %3u: %4lu\n",
354981c2 2690 i, ja->node_fallback_count_distribution[i]);
19ddcd04
MD
2691 }
2692}
2693
021c72c0 2694static
19a748d9 2695int ja_final_checks(struct cds_ja *ja)
021c72c0
MD
2696{
2697 double fallback_ratio;
2698 unsigned long na, nf, nr_fallback;
19a748d9 2699 int ret = 0;
021c72c0
MD
2700
2701 fallback_ratio = (double) uatomic_read(&ja->nr_fallback);
2702 fallback_ratio /= (double) uatomic_read(&ja->nr_nodes_allocated);
2703 nr_fallback = uatomic_read(&ja->nr_fallback);
2704 if (nr_fallback)
2705 fprintf(stderr,
2706 "[warning] RCU Judy Array used %lu fallback node(s) (ratio: %g)\n",
2707 uatomic_read(&ja->nr_fallback),
2708 fallback_ratio);
2709
2710 na = uatomic_read(&ja->nr_nodes_allocated);
2711 nf = uatomic_read(&ja->nr_nodes_freed);
19a748d9
MD
2712 dbg_printf("Nodes allocated: %lu, Nodes freed: %lu.\n", na, nf);
2713 if (nr_fallback)
2714 print_debug_fallback_distribution(ja);
2715
021c72c0
MD
2716 if (na != nf) {
2717 fprintf(stderr, "[error] Judy array leaked %ld nodes. Allocated: %lu, freed: %lu.\n",
2718 (long) na - nf, na, nf);
19a748d9 2719 ret = -1;
021c72c0 2720 }
19a748d9 2721 return ret;
021c72c0
MD
2722}
2723
be9a7474 2724/*
dc0e9798
MD
2725 * There should be no more concurrent add, delete, nor look-up performed
2726 * on the Judy array while it is being destroyed (ensured by the
2727 * caller).
be9a7474 2728 */
99e6e3dc 2729int cds_ja_destroy(struct cds_ja *ja)
be9a7474 2730{
48cbe001 2731 const struct rcu_flavor_struct *flavor;
b4540e8a
MD
2732 int ret;
2733
48cbe001 2734 flavor = cds_lfht_rcu_flavor(ja->ht);
be9a7474 2735 rcuja_shadow_prune(ja->ht,
99e6e3dc 2736 RCUJA_SHADOW_CLEAR_FREE_NODE | RCUJA_SHADOW_CLEAR_FREE_LOCK);
48cbe001 2737 flavor->thread_offline();
b4540e8a
MD
2738 ret = rcuja_delete_ht(ja->ht);
2739 if (ret)
2740 return ret;
f2ae7af7
MD
2741
2742 /* Wait for in-flight call_rcu free to complete. */
2743 flavor->barrier();
2744
48cbe001 2745 flavor->thread_online();
19a748d9 2746 ret = ja_final_checks(ja);
b4540e8a 2747 free(ja);
19a748d9 2748 return ret;
be9a7474 2749}
This page took 0.16327 seconds and 4 git commands to generate.