rbtree API change: handle node memory allocation internally
[urcu.git] / urcu-rbtree.c
1 /*
2 * urcu-rbtree.c
3 *
4 * Userspace RCU library - Red-Black Tree
5 *
6 * Copyright (c) 2010 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 *
22 * Implementation of RCU-adapted data structures and operations based on the RB
23 * tree algorithms found in chapter 12 of:
24 *
25 * Thomas H. Cormen, Charles E. Leiserson, Ronald L. Rivest, and
26 * Clifford Stein. Introduction to Algorithms, Third Edition. The MIT
27 * Press, September 2009.
28 */
29
30 #define _BSD_SOURCE
31 #define _LGPL_SOURCE
32
33 #include <stdio.h>
34 #include <pthread.h>
35 #include <assert.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include <urcu/rcurbtree.h>
40 #include <urcu-pointer.h>
41 #include <urcu-call-rcu.h>
42 #include <urcu/compiler.h>
43
44 /*
45 * Explanation of next/prev walk coherency and search coherency when
46 * performed concurrently with updates.
47 *
48 * next/prev walk coherency with respect to concurrent updates:
49 *
50 * There are 3 scenarios for which we need to model and validate this:
51 * rotation, transplant and "teleportation" (the latter being a remote
52 * transplant in a remove non-nil case).
53 *
54 * - rotation left (right is symmetric)
55 * xl and yr point to the same parent nodes before/after left
56 * rotation. yll and ylr also point to the same parent node
57 * before/after left rotation.
58 * As we are copying x, y and yl (the 3 nodes which parent/child
59 * relationship are changed) to "new" version of this node cluster,
60 * all external references to the cluster either point to the old
61 * cluster or the new one. If we take this cluster as a "black box"
62 * from the point of view of next/prev traversal, all we have to
63 * ensure is that the old and the new cluster behave in the exact same
64 * way with respect to traversal order.
65 *
66 * - transplant
67 * In this operation, we transplant a copy of "v" into its parent
68 * location (u), thus replacing it. The children of "v", vl and vr,
69 * still point to v (new version) after the transplant, so it does not
70 * change the behavior when considering the next/prev traversal. "v"
71 * being copied to a new version ensures that the parent pointers of v
72 * are pointing to its new parent (parent of u) before it is published
73 * to readers (by setting the child pointer of u's parent to the new
74 * copy of v).
75 *
76 * - teleportation
77 * This one is probably the most tricky and will require some ascii
78 * art to explain.
79 *
80 * We want to remove z from this tree:
81 *
82 * zp
83 * \
84 * z
85 * / \
86 * zl zr
87 * /
88 * a
89 * / \
90 * b ar
91 * / \
92 * y br
93 * \
94 * yr
95 * / \
96 * yrl yrr
97 *
98 * What we are going to do is to "teleport" y into z's location,
99 * reparenting yr to b. We are taking care to create a new cluster
100 * copy that is isolated from any reader. We will represent the new
101 * members of the cluster with capital letters.
102 *
103 * zp
104 * \
105 * Y
106 * / \
107 * zl ZR
108 * /
109 * A
110 * / \
111 * B ar
112 * / \
113 * YR br
114 * / \
115 * yrl yrr
116 *
117 * In this transient state, we notice that the pointers within the
118 * cluster all point to the new cluster nodes, and they point to the
119 * correct external nodes. However, no external pointer point to the
120 * cluster (yet). The first pointer to point to this cluster will be
121 * "zp->right". It will therefore make the cluster visible for search.
122 *
123 * In this intermediate state, we can walk through the new cluster
124 * when coming from the top (in a next/prev traversal), but can come
125 * back to the old cluster when going back up from the children nodes.
126 * All we have to ensure is that the two clusters, taken as a black
127 * box from a next/prev traversal perspective, yield to the exact same
128 * result.
129 *
130 * Search coherency with concurrent updates:
131 *
132 * Simple "search" (only going down the tree) is also handled by this
133 * cluster scheme. The explanation is a subset of the prev/next
134 * explanation, where we don't have to care about the intermediate
135 * stages where the children point to the old cluster, because we only
136 * ever use the top level pointers to go down into the children nodes,
137 * we never go back up. So by simply making sure that all the cluster
138 * internal nodes pointers are setup correctly before making the
139 * cluster visible to the readers (by setting the parent pointer to
140 * the topmost new node in the cluster), we are sure that readers will
141 * see a coherent view of the cluster at all times.
142 */
143
144 #ifdef DEBUG
145 #define dbg_printf(args...) printf(args)
146 #define dbg_usleep(usecs) usleep(usecs)
147 #else
148 #define dbg_printf(args...)
149 #define dbg_usleep(usecs)
150 #endif
151
152 /*
153 * Undefine this to enable the non-RCU rotate and transplant functions
154 * (for debugging). Note that these versions don't support the tree
155 * max_end updates, so lookups must be performed with
156 * rcu_rbtree_search_begin_key when using this debug facility.
157 */
158 #define RBTREE_RCU_SUPPORT_ROTATE_LEFT
159 #define RBTREE_RCU_SUPPORT_ROTATE_RIGHT
160 #define RBTREE_RCU_SUPPORT_TRANSPLANT
161
162 #ifdef EXTRA_DEBUG
163 static pthread_mutex_t test_mutex = PTHREAD_MUTEX_INITIALIZER;
164 static pthread_mutex_t outer_mutex = PTHREAD_MUTEX_INITIALIZER;
165
166 static
167 void lock_outer_mutex(void)
168 {
169 pthread_mutex_lock(&outer_mutex);
170 }
171
172 static
173 void unlock_outer_mutex(void)
174 {
175 pthread_mutex_unlock(&outer_mutex);
176 }
177
178 static
179 void lock_test_mutex(void)
180 {
181 pthread_mutex_lock(&test_mutex);
182 }
183
184 static
185 void unlock_test_mutex(void)
186 {
187 pthread_mutex_unlock(&test_mutex);
188 }
189 #endif
190
191 static
192 void set_parent(struct rcu_rbtree_node *node,
193 struct rcu_rbtree_node *parent,
194 unsigned int pos)
195 {
196 _CMM_STORE_SHARED(node->parent, ((unsigned long) parent) | pos);
197 }
198
199 static
200 struct rcu_rbtree_node *get_parent(struct rcu_rbtree_node *node)
201 {
202 return (struct rcu_rbtree_node *) (node->parent & ~1UL);
203 }
204
205 static
206 unsigned int get_pos(struct rcu_rbtree_node *node)
207 {
208 return (unsigned int) (node->parent & 1UL);
209 }
210
211 static
212 struct rcu_rbtree_node *get_parent_and_pos(struct rcu_rbtree_node *node,
213 unsigned int *pos)
214 {
215 unsigned long parent_pos = rcu_dereference(node->parent);
216
217 *pos = (unsigned int) (parent_pos & 1UL);
218 return (struct rcu_rbtree_node *) (parent_pos & ~1UL);
219 }
220
221 static
222 void set_decay(struct rcu_rbtree_node *x, struct rcu_rbtree_node *xc)
223 {
224 x->decay_next = xc;
225 }
226
227 static
228 struct rcu_rbtree_node *get_decay(struct rcu_rbtree_node *x)
229 {
230 if (!x)
231 return NULL;
232 while (x->decay_next)
233 x = x->decay_next;
234 return x;
235 }
236
237 static
238 struct rcu_rbtree_node *is_decay(struct rcu_rbtree_node *x)
239 {
240 return x->decay_next;
241 }
242
243 static
244 void _rcu_rbtree_free(struct rcu_head *head)
245 {
246 struct rcu_rbtree_node *node =
247 caa_container_of(head, struct rcu_rbtree_node, head);
248 node->rbtree->rbfree(node);
249 }
250
251 static
252 struct rcu_rbtree_node *dup_decay_node(struct rcu_rbtree *rbtree,
253 struct rcu_rbtree_node *x)
254 {
255 struct rcu_rbtree_node *xc;
256
257 if (rcu_rbtree_is_nil(rbtree, x))
258 return x;
259
260 xc = rbtree->rballoc(sizeof(*xc));
261 memcpy(xc, x, sizeof(*xc));
262 xc->decay_next = NULL;
263 set_decay(x, xc);
264 call_rcu(&x->head, _rcu_rbtree_free);
265 return xc;
266 }
267
268 /*
269 * Info for range lookups:
270 * Range lookup information is only valid when used when searching for
271 * ranges. It should never be used in next/prev traversal because the
272 * pointers to parents are not in sync with the parent vision of the
273 * children range.
274 */
275 static
276 void set_left(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node,
277 struct rcu_rbtree_node *left)
278 {
279 node->_left = left;
280 }
281
282 static
283 void set_right(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node,
284 struct rcu_rbtree_node *right)
285 {
286 node->_right = right;
287 }
288
289 static
290 void *calculate_node_max_end(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node)
291 {
292 void *max_end;
293
294 max_end = node->end;
295 if (!rcu_rbtree_is_nil(rbtree, node->_right)) {
296 if (rbtree->comp(max_end, node->_right->max_end) < 0)
297 max_end = node->_right->max_end;
298 }
299 if (!rcu_rbtree_is_nil(rbtree, node->_left)) {
300 if (rbtree->comp(max_end, node->_left->max_end) < 0)
301 max_end = node->_left->max_end;
302 }
303 return max_end;
304 }
305
306 /*
307 * TODO
308 * Deal with memory allocation errors.
309 * Can be ensured by reserving a pool of memory entries before doing the
310 * insertion, which will have to be function of number of
311 * transplantations/rotations required for the operation (which is a
312 * multiple of the tree height).
313 */
314
315 #ifdef DEBUG
316 static
317 void show_tree(struct rcu_rbtree *rbtree)
318 {
319 struct rcu_rbtree_node *node;
320
321 node = rcu_rbtree_min(rbtree, rbtree->root);
322 while (!rcu_rbtree_is_nil(rbtree, node)) {
323 assert(!is_decay(node));
324 printf("{ b:%lX e:%lX pb: %lX r:%lX l:%lX %s %s %s} ",
325 (unsigned long) node->begin,
326 (unsigned long) node->end,
327 (unsigned long) get_parent(node)->begin,
328 (unsigned long) node->_right->begin,
329 (unsigned long) node->_left->begin,
330 node->color ? "red" : "black",
331 get_pos(node) ? "right" : "left",
332 rcu_rbtree_is_nil(rbtree, node) ? "nil" : "");
333 node = rcu_rbtree_next(rbtree, node);
334 }
335 printf("\n");
336 }
337
338 #define check_max_end(rbtree, x) \
339 do { \
340 if (rcu_rbtree_is_nil(rbtree, x)) \
341 break; \
342 assert(rbtree->comp(x->max_end, \
343 calculate_node_max_end(rbtree, x)) == 0); \
344 } while (0)
345
346 #else /* DEBUG */
347 static
348 void show_tree(struct rcu_rbtree *rbtree)
349 {
350 }
351
352 static
353 void check_max_end(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *x)
354 {
355 }
356 #endif /* DEBUG */
357
358 static
359 struct rcu_rbtree_node *make_nil(struct rcu_rbtree *rbtree)
360 {
361 return &rbtree->nil_node;
362 }
363
364 /*
365 * Iterative rbtree search.
366 */
367 struct rcu_rbtree_node *rcu_rbtree_search(struct rcu_rbtree *rbtree,
368 struct rcu_rbtree_node *x,
369 void *point)
370 {
371 struct rcu_rbtree_node *xl;
372
373 dbg_printf("searching point 0x%lx\n", (unsigned long) point);
374 x = rcu_dereference(x);
375
376 while (!rcu_rbtree_is_nil(rbtree, x)) {
377 dbg_usleep(10);
378 xl = rcu_dereference(x->_left);
379 dbg_printf("search x %lx x_end %lx x_max_end %lx\n", (unsigned long) x->begin,
380 (unsigned long) x->end, (unsigned long) x->max_end);
381 dbg_printf("search xl %lx xl_end %lx xl_max_end %lx\n", (unsigned long) xl->begin,
382 (unsigned long) xl->end, (unsigned long) xl->max_end);
383 if (!rcu_rbtree_is_nil(rbtree, xl)
384 && (rbtree->comp(xl->max_end, point) > 0)) {
385 dbg_printf("go left\n");
386 x = xl;
387 } else if (rbtree->comp(x->begin, point) <= 0
388 && rbtree->comp(point, x->end) < 0) {
389 dbg_printf("got it!\n");
390 break;
391 } else if (rbtree->comp(point, x->begin) > 0) {
392 dbg_printf("go right\n");
393 x = rcu_dereference(x->_right);
394 } else {
395 dbg_printf("not found!\n");
396 x = make_nil(rbtree);
397 }
398 }
399 if (rcu_rbtree_is_nil(rbtree, x))
400 dbg_printf("Reached bottom of tree.\n");
401
402 return x;
403 }
404
405 struct rcu_rbtree_node *rcu_rbtree_search_range(struct rcu_rbtree *rbtree,
406 struct rcu_rbtree_node *x,
407 void *begin, void *end)
408 {
409 struct rcu_rbtree_node *node;
410
411 node = rcu_rbtree_search(rbtree, x, begin);
412 if (rcu_rbtree_is_nil(rbtree, node))
413 return node;
414 if (rbtree->comp(node->end, end) < 0)
415 return NULL; /* High is outside lookup range */
416 return node;
417 }
418
419 /*
420 * Search by exact range start value.
421 */
422 struct rcu_rbtree_node *rcu_rbtree_search_begin_key(struct rcu_rbtree *rbtree,
423 struct rcu_rbtree_node *x,
424 void *k)
425 {
426 x = rcu_dereference(x);
427 int comp;
428
429 while (!rcu_rbtree_is_nil(rbtree, x) && (comp = rbtree->comp(k, x->begin)) != 0) {
430 dbg_usleep(10);
431 if (comp < 0)
432 x = rcu_dereference(x->_left);
433 else
434 x = rcu_dereference(x->_right);
435 }
436 return x;
437 }
438
439 static
440 struct rcu_rbtree_node *rcu_rbtree_min_dup_decay(struct rcu_rbtree *rbtree,
441 struct rcu_rbtree_node *x,
442 struct rcu_rbtree_node **zr)
443 {
444 struct rcu_rbtree_node *xl;
445
446 x = rcu_dereference(x);
447
448 if (rcu_rbtree_is_nil(rbtree, x)) {
449 *zr = x;
450 return x;
451 } else
452 *zr = x = dup_decay_node(rbtree, x);
453
454 while (!rcu_rbtree_is_nil(rbtree, xl = rcu_dereference(x->_left))) {
455 x = dup_decay_node(rbtree, xl);
456 set_parent(x, get_decay(get_parent(x)), get_pos(x));
457 get_parent(x)->_left = get_decay(get_parent(x)->_left);
458 }
459 return x;
460 }
461
462 static
463 struct rcu_rbtree_node *rcu_rbtree_min_update_decay(struct rcu_rbtree *rbtree,
464 struct rcu_rbtree_node *x)
465 {
466 struct rcu_rbtree_node *xl;
467
468 x = rcu_dereference(x);
469
470 if (rcu_rbtree_is_nil(rbtree, x))
471 return x;
472 else {
473 set_parent(x->_right, get_decay(get_parent(x->_right)),
474 get_pos(x->_right));
475 set_parent(x->_left, get_decay(get_parent(x->_left)),
476 get_pos(x->_left));
477 }
478
479 while (!rcu_rbtree_is_nil(rbtree, xl = rcu_dereference(x->_left))) {
480 x = xl;
481 set_parent(x->_right, get_decay(get_parent(x->_right)),
482 get_pos(x->_right));
483 set_parent(x->_left, get_decay(get_parent(x->_left)),
484 get_pos(x->_left));
485 }
486 return x;
487 }
488
489 struct rcu_rbtree_node *rcu_rbtree_min(struct rcu_rbtree *rbtree,
490 struct rcu_rbtree_node *x)
491 {
492 struct rcu_rbtree_node *xl;
493
494 x = rcu_dereference(x);
495
496 if (rcu_rbtree_is_nil(rbtree, x))
497 return x;
498
499 while (!rcu_rbtree_is_nil(rbtree, xl = rcu_dereference(x->_left)))
500 x = xl;
501 return x;
502 }
503
504 struct rcu_rbtree_node *rcu_rbtree_max(struct rcu_rbtree *rbtree,
505 struct rcu_rbtree_node *x)
506 {
507 struct rcu_rbtree_node *xr;
508
509 x = rcu_dereference(x);
510
511 if (rcu_rbtree_is_nil(rbtree, x))
512 return x;
513
514 while (!rcu_rbtree_is_nil(rbtree, xr = rcu_dereference(x->_right)))
515 x = xr;
516 return x;
517 }
518
519 /*
520 * RCU read lock must be held across the next/prev calls to ensure validity of
521 * the returned node.
522 */
523 struct rcu_rbtree_node *rcu_rbtree_next(struct rcu_rbtree *rbtree,
524 struct rcu_rbtree_node *x)
525 {
526 struct rcu_rbtree_node *xr, *y;
527 unsigned int x_pos;
528
529 x = rcu_dereference(x);
530
531 if (!rcu_rbtree_is_nil(rbtree, xr = rcu_dereference(x->_right)))
532 return rcu_rbtree_min(rbtree, xr);
533 y = get_parent_and_pos(x, &x_pos);
534 while (!rcu_rbtree_is_nil(rbtree, y) && x_pos == IS_RIGHT) {
535 x = y;
536 y = get_parent_and_pos(y, &x_pos);
537 }
538 return y;
539 }
540
541 struct rcu_rbtree_node *rcu_rbtree_prev(struct rcu_rbtree *rbtree,
542 struct rcu_rbtree_node *x)
543 {
544 struct rcu_rbtree_node *xl, *y;
545 unsigned int x_pos;
546
547 x = rcu_dereference(x);
548
549 if (!rcu_rbtree_is_nil(rbtree, xl = rcu_dereference(x->_left)))
550 return rcu_rbtree_max(rbtree, xl);
551 y = get_parent_and_pos(x, &x_pos);
552 while (!rcu_rbtree_is_nil(rbtree, y) && x_pos == IS_LEFT) {
553 x = y;
554 y = get_parent_and_pos(y, &x_pos);
555 }
556 return y;
557 }
558
559 /*
560 * "node" needs to be non-visible by readers.
561 */
562 static
563 void populate_node_end(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node,
564 unsigned int copy_parents, struct rcu_rbtree_node *stop)
565 {
566 struct rcu_rbtree_node *prev = NULL, *orig_node = node, *top;
567
568 do {
569 void *max_end;
570
571 assert(node);
572 assert(!rcu_rbtree_is_nil(rbtree, node));
573
574 if (prev && copy_parents) {
575 node = dup_decay_node(rbtree, node);
576 if (get_pos(prev) == IS_RIGHT)
577 node->_right = prev;
578 else
579 node->_left = prev;
580 set_parent(prev, node, get_pos(prev));
581 }
582
583 max_end = calculate_node_max_end(rbtree, node);
584 /*
585 * Compare the node max_end keys to make sure we replace
586 * references to a key belonging to a node we remove
587 * from the tree. Otherwise we would still be using this
588 * pointer as an invalid reference after garbage
589 * collection of the node and of its associated
590 * begin/end pointers.
591 */
592 if (max_end != node->max_end) {
593 node->max_end = max_end;
594 } else {
595 top = get_parent(node);
596 cmm_smp_wmb(); /* write into node before publish */
597 /* make new branch visible to readers */
598 if (rcu_rbtree_is_nil(rbtree, top))
599 _CMM_STORE_SHARED(rbtree->root, node);
600 if (get_pos(node) == IS_LEFT)
601 _CMM_STORE_SHARED(top->_left, node);
602 else
603 _CMM_STORE_SHARED(top->_right, node);
604 goto end;
605 }
606
607 /* Check for propagation stop */
608 if (node == stop)
609 return;
610
611 prev = node;
612 node = get_parent(node);
613 } while (!rcu_rbtree_is_nil(rbtree, node));
614
615 top = node; /* nil */
616 cmm_smp_wmb(); /* write into node before publish */
617 /* make new branch visible to readers */
618 _CMM_STORE_SHARED(rbtree->root, prev);
619
620 end:
621 if (!copy_parents)
622 return;
623 /* update children */
624 node = orig_node;
625 do {
626 assert(!rcu_rbtree_is_nil(rbtree, node));
627 set_parent(node->_left, get_decay(get_parent(node->_left)), IS_LEFT);
628 set_parent(node->_right, get_decay(get_parent(node->_right)), IS_RIGHT);
629 } while ((node = get_parent(node)) != top);
630 }
631
632 /*
633 * We have to ensure these assumptions are correct for prev/next
634 * traversal:
635 *
636 * with x being a right child, the assumption that:
637 * get_parent(x)->_right == x
638 * or if x is a left child, the assumption that:
639 * get_parent(x)->_left == x
640 *
641 * This explains why we have to allocate a vc copy of the node for left_rotate,
642 * right_rotate and transplant operations.
643 *
644 * We always ensure that the right/left child and correct parent is set in the
645 * node copies *before* we reparent the children and make the upper-level point
646 * to the copy.
647 */
648
649 /* RCU: copy x and y, atomically point to new versions. GC old. */
650 /* Should be eventually followed by a cmm_smp_wmc() */
651
652 #ifdef RBTREE_RCU_SUPPORT_ROTATE_LEFT
653
654 static
655 void left_rotate(struct rcu_rbtree *rbtree,
656 struct rcu_rbtree_node *x)
657 {
658 struct rcu_rbtree_node *y, *y_left;
659
660 dbg_printf("left rotate %lx\n", (unsigned long) x->begin);
661
662 y = x->_right;
663 y_left = y->_left;
664
665 /* Now operate on new copy, decay old versions */
666 x = dup_decay_node(rbtree, x);
667 y = dup_decay_node(rbtree, y);
668 y_left = dup_decay_node(rbtree, y_left);
669
670 check_max_end(rbtree, get_parent(x));
671 check_max_end(rbtree, x);
672 check_max_end(rbtree, y);
673
674 /* Internal node modifications */
675 set_parent(y, get_parent(x), get_pos(x));
676 set_parent(x, y, IS_LEFT);
677 set_left(rbtree, y, x);
678 set_right(rbtree, x, y_left);
679
680 if (!rcu_rbtree_is_nil(rbtree, y_left))
681 set_parent(y_left, x, IS_RIGHT);
682
683 /*
684 * We only changed the relative position of x and y wrt their
685 * children, and reparented y (but are keeping the same nodes in
686 * place, so its parent does not need to have end value
687 * recalculated).
688 */
689 x->max_end = calculate_node_max_end(rbtree, x);
690 y->max_end = calculate_node_max_end(rbtree, y);
691
692 cmm_smp_wmb(); /* write into node before publish */
693
694 /* External references update (visible by readers) */
695 if (rcu_rbtree_is_nil(rbtree, get_parent(y)))
696 _CMM_STORE_SHARED(rbtree->root, y);
697 else if (get_pos(y) == IS_LEFT)
698 _CMM_STORE_SHARED(get_parent(y)->_left, y);
699 else
700 _CMM_STORE_SHARED(get_parent(y)->_right, y);
701
702 /* Point children to new copy (parent only used by updates/next/prev) */
703 set_parent(x->_left, get_decay(get_parent(x->_left)),
704 get_pos(x->_left));
705 set_parent(y->_right, get_decay(get_parent(y->_right)),
706 get_pos(y->_right));
707 if (!rcu_rbtree_is_nil(rbtree, y_left)) {
708 set_parent(y_left->_right,
709 get_decay(get_parent(y_left->_right)),
710 get_pos(y_left->_right));
711 set_parent(y_left->_left,
712 get_decay(get_parent(y_left->_left)),
713 get_pos(y_left->_left));
714 }
715
716 /* Sanity checks */
717 assert(y == rbtree->root || get_parent(y)->_left == y
718 || get_parent(y)->_right == y);
719 assert(x == rbtree->root || get_parent(x)->_left == x
720 || get_parent(x)->_right == x);
721 assert(rcu_rbtree_is_nil(rbtree, x->_right) || get_parent(x->_right) == x);
722 assert(rcu_rbtree_is_nil(rbtree, x->_left) || get_parent(x->_left) == x);
723 assert(rcu_rbtree_is_nil(rbtree, y->_right) || get_parent(y->_right) == y);
724 assert(rcu_rbtree_is_nil(rbtree, y->_left) || get_parent(y->_left) == y);
725 assert(!is_decay(rbtree->root));
726 assert(!is_decay(x));
727 assert(!is_decay(y));
728 assert(!is_decay(x->_right));
729 assert(!is_decay(x->_left));
730 assert(!is_decay(y->_right));
731 assert(!is_decay(y->_left));
732 check_max_end(rbtree, get_parent(y));
733 check_max_end(rbtree, x);
734 check_max_end(rbtree, y);
735 }
736
737 #else
738
739 /* non-rcu version */
740 static
741 void left_rotate(struct rcu_rbtree *rbtree,
742 struct rcu_rbtree_node *x)
743 {
744 struct rcu_rbtree_node *y;
745
746 lock_test_mutex();
747 y = x->_right;
748 x->_right = y->_left;
749 if (!rcu_rbtree_is_nil(rbtree, y->_left))
750 set_parent(y->_left, x, IS_RIGHT);
751 set_parent(y, get_parent(x), get_pos(x));
752 if (rcu_rbtree_is_nil(rbtree, get_parent(x)))
753 rbtree->root = y;
754 else if (x == get_parent(x)->_left) {
755 get_parent(x)->_left = y;
756 } else {
757 get_parent(x)->_right = y;
758 }
759 y->_left = x;
760 set_parent(x, y, IS_LEFT);
761 unlock_test_mutex();
762 }
763
764 #endif
765
766 #ifdef RBTREE_RCU_SUPPORT_ROTATE_RIGHT
767 static
768 void right_rotate(struct rcu_rbtree *rbtree,
769 struct rcu_rbtree_node *x)
770 {
771 struct rcu_rbtree_node *y, *y_right;
772
773 dbg_printf("right rotate %lx\n", (unsigned long) x->begin);
774
775 y = x->_left;
776 y_right = y->_right;
777
778 /* Now operate on new copy, decay old versions */
779 x = dup_decay_node(rbtree, x);
780 y = dup_decay_node(rbtree, y);
781 y_right = dup_decay_node(rbtree, y_right);
782
783 check_max_end(rbtree, get_parent(x));
784 check_max_end(rbtree, x);
785 check_max_end(rbtree, y);
786
787 /* Internal node modifications */
788 set_parent(y, get_parent(x), get_pos(x));
789 set_parent(x, y, IS_RIGHT);
790 set_right(rbtree, y, x);
791 set_left(rbtree, x, y_right);
792
793 if (!rcu_rbtree_is_nil(rbtree, y_right))
794 set_parent(y_right, x, IS_LEFT);
795
796 /*
797 * We only changed the relative position of x and y wrt their
798 * children, and reparented y (but are keeping the same nodes in
799 * place, so its parent does not need to have end value
800 * recalculated).
801 */
802 x->max_end = calculate_node_max_end(rbtree, x);
803 y->max_end = calculate_node_max_end(rbtree, y);
804
805 cmm_smp_wmb(); /* write into node before publish */
806
807 /* External references update (visible by readers) */
808 if (rcu_rbtree_is_nil(rbtree, get_parent(y)))
809 _CMM_STORE_SHARED(rbtree->root, y);
810 else if (get_pos(y) == IS_RIGHT)
811 _CMM_STORE_SHARED(get_parent(y)->_right, y);
812 else
813 _CMM_STORE_SHARED(get_parent(y)->_left, y);
814
815 /* Point children to new copy (parent only used by updates/next/prev) */
816 set_parent(x->_right, get_decay(get_parent(x->_right)),
817 get_pos(x->_right));
818 set_parent(y->_left, get_decay(get_parent(y->_left)),
819 get_pos(y->_left));
820 if (!rcu_rbtree_is_nil(rbtree, y_right)) {
821 set_parent(y_right->_left,
822 get_decay(get_parent(y_right->_left)),
823 get_pos(y_right->_left));
824 set_parent(y_right->_right,
825 get_decay(get_parent(y_right->_right)),
826 get_pos(y_right->_right));
827 }
828
829 /* Sanity checks */
830 assert(y == rbtree->root || get_parent(y)->_right == y
831 || get_parent(y)->_left == y);
832 assert(x == rbtree->root || get_parent(x)->_right == x
833 || get_parent(x)->_left == x);
834 assert(rcu_rbtree_is_nil(rbtree, x->_left) || get_parent(x->_left) == x);
835 assert(rcu_rbtree_is_nil(rbtree, x->_right) || get_parent(x->_right) == x);
836 assert(rcu_rbtree_is_nil(rbtree, y->_left) || get_parent(y->_left) == y);
837 assert(rcu_rbtree_is_nil(rbtree, y->_right) || get_parent(y->_right) == y);
838 assert(!is_decay(rbtree->root));
839 assert(!is_decay(x));
840 assert(!is_decay(y));
841 assert(!is_decay(x->_left));
842 assert(!is_decay(x->_right));
843 assert(!is_decay(y->_left));
844 assert(!is_decay(y->_right));
845 check_max_end(rbtree, x);
846 check_max_end(rbtree, y);
847 check_max_end(rbtree, get_parent(y));
848 }
849
850 #else
851
852 /* non-rcu version */
853 static
854 void right_rotate(struct rcu_rbtree *rbtree,
855 struct rcu_rbtree_node *x)
856 {
857 struct rcu_rbtree_node *y;
858
859 lock_test_mutex();
860 y = x->_left;
861 x->_left = y->_right;
862 if (!rcu_rbtree_is_nil(rbtree, y->_right))
863 set_parent(y->_right, x, IS_LEFT);
864 set_parent(y, get_parent(x), get_pos(x));
865 if (rcu_rbtree_is_nil(rbtree, get_parent(x)))
866 rbtree->root = y;
867 else if (x == get_parent(x)->_right) {
868 get_parent(x)->_right = y;
869 } else {
870 get_parent(x)->_left = y;
871 }
872 y->_right = x;
873 set_parent(x, y, IS_RIGHT);
874 unlock_test_mutex();
875 }
876
877 #endif
878
879 static void rcu_rbtree_insert_fixup(struct rcu_rbtree *rbtree,
880 struct rcu_rbtree_node *z)
881 {
882 struct rcu_rbtree_node *y;
883
884 dbg_printf("insert fixup %p\n", z->begin);
885 assert(!is_decay(rbtree->root));
886
887 while (get_parent(z)->color == COLOR_RED) {
888 if (get_parent(z) == get_parent(get_parent(z))->_left) {
889 y = get_parent(get_parent(z))->_right;
890 if (y->color == COLOR_RED) {
891 get_parent(z)->color = COLOR_BLACK;
892 y->color = COLOR_BLACK;
893 get_parent(get_parent(z))->color = COLOR_RED;
894 z = get_parent(get_parent(z));
895 } else {
896 if (z == get_parent(z)->_right) {
897 z = get_parent(z);
898 left_rotate(rbtree, z);
899 z = get_decay(z);
900 assert(!is_decay(rbtree->root));
901 }
902 get_parent(z)->color = COLOR_BLACK;
903 get_parent(get_parent(z))->color = COLOR_RED;
904 assert(!is_decay(z));
905 assert(!is_decay(get_parent(z)));
906 assert(!is_decay(get_parent(get_parent(z))));
907 right_rotate(rbtree, get_parent(get_parent(z)));
908 assert(!is_decay(z));
909 assert(!is_decay(rbtree->root));
910 }
911 } else {
912 y = get_parent(get_parent(z))->_left;
913 if (y->color == COLOR_RED) {
914 get_parent(z)->color = COLOR_BLACK;
915 y->color = COLOR_BLACK;
916 get_parent(get_parent(z))->color = COLOR_RED;
917 z = get_parent(get_parent(z));
918 } else {
919 if (z == get_parent(z)->_left) {
920 z = get_parent(z);
921 right_rotate(rbtree, z);
922 z = get_decay(z);
923 assert(!is_decay(rbtree->root));
924 }
925 get_parent(z)->color = COLOR_BLACK;
926 get_parent(get_parent(z))->color = COLOR_RED;
927 left_rotate(rbtree, get_parent(get_parent(z)));
928 assert(!is_decay(z));
929 assert(!is_decay(rbtree->root));
930 }
931 }
932 }
933 rbtree->root->color = COLOR_BLACK;
934 }
935
936 /*
937 * rcu_rbtree_insert - Insert a node in the RCU rbtree
938 *
939 * Returns 0 on success, or < 0 on error.
940 */
941 int rcu_rbtree_insert(struct rcu_rbtree *rbtree,
942 void *begin, void *end)
943 {
944 struct rcu_rbtree_node *x, *y, *z;
945
946 z = rbtree->rballoc(sizeof(*z));
947 z->begin = begin;
948 z->end = end;
949
950 dbg_printf("insert %p\n", z->begin);
951 assert(!is_decay(rbtree->root));
952
953 y = make_nil(rbtree);
954 x = rbtree->root;
955 while (!rcu_rbtree_is_nil(rbtree, x)) {
956 y = x;
957 if (rbtree->comp(z->begin, x->begin) < 0)
958 x = x->_left;
959 else
960 x = x->_right;
961 }
962
963 z->_left = make_nil(rbtree);
964 z->_right = make_nil(rbtree);
965 z->color = COLOR_RED;
966 z->decay_next = NULL;
967 z->max_end = z->end;
968 z->rbtree = rbtree;
969
970 if (rcu_rbtree_is_nil(rbtree, y)) {
971 set_parent(z, y, IS_RIGHT); /* pos arbitrary for root node */
972 /*
973 * Order stores to z (children/parents) before stores
974 * that will make it visible to the rest of the tree.
975 */
976 cmm_smp_wmb();
977 _CMM_STORE_SHARED(rbtree->root, z);
978 } else if (rbtree->comp(z->begin, y->begin) < 0) {
979 y = dup_decay_node(rbtree, y);
980 set_parent(z, y, IS_LEFT);
981 if (get_pos(z) == IS_LEFT)
982 _CMM_STORE_SHARED(y->_left, z);
983 else
984 _CMM_STORE_SHARED(y->_right, z);
985 populate_node_end(rbtree, y, 1, NULL);
986 } else {
987 y = dup_decay_node(rbtree, y);
988 set_parent(z, y, IS_RIGHT);
989 if (get_pos(z) == IS_LEFT)
990 _CMM_STORE_SHARED(y->_left, z);
991 else
992 _CMM_STORE_SHARED(y->_right, z);
993 populate_node_end(rbtree, y, 1, NULL);
994 }
995 rcu_rbtree_insert_fixup(rbtree, z);
996 /*
997 * Make sure to commit all _CMM_STORE_SHARED() for non-coherent caches.
998 */
999 cmm_smp_wmc();
1000 show_tree(rbtree);
1001 check_max_end(rbtree, z);
1002 check_max_end(rbtree, y);
1003
1004 return 0;
1005 }
1006
1007 /*
1008 * Transplant v into u position.
1009 */
1010
1011 #ifdef RBTREE_RCU_SUPPORT_TRANSPLANT
1012
1013 static
1014 void rcu_rbtree_transplant(struct rcu_rbtree *rbtree,
1015 struct rcu_rbtree_node *u,
1016 struct rcu_rbtree_node *v,
1017 unsigned int copy_parents,
1018 struct rcu_rbtree_node *stop)
1019 {
1020 dbg_printf("transplant %p\n", v->begin);
1021
1022 if (!rcu_rbtree_is_nil(rbtree, v))
1023 v = dup_decay_node(rbtree, v);
1024
1025 if (rcu_rbtree_is_nil(rbtree, get_parent(u))) {
1026 /* pos is arbitrary for root node */
1027 set_parent(v, get_parent(u), IS_RIGHT);
1028 cmm_smp_wmb(); /* write into node before publish */
1029 _CMM_STORE_SHARED(rbtree->root, v);
1030 } else {
1031 struct rcu_rbtree_node *vp;
1032
1033 vp = get_parent(u);
1034 if (copy_parents)
1035 vp = dup_decay_node(rbtree, vp);
1036 set_parent(v, vp, get_pos(u));
1037 if (get_pos(v) == IS_LEFT)
1038 _CMM_STORE_SHARED(vp->_left, v);
1039 else
1040 _CMM_STORE_SHARED(vp->_right, v);
1041 populate_node_end(rbtree, vp, copy_parents, stop);
1042 check_max_end(rbtree, vp);
1043 }
1044
1045 /* Point children to new copy (parent only used by updates/next/prev) */
1046 if (!rcu_rbtree_is_nil(rbtree, v)) {
1047 set_parent(v->_right, get_decay(get_parent(v->_right)),
1048 get_pos(v->_right));
1049 set_parent(v->_left, get_decay(get_parent(v->_left)),
1050 get_pos(v->_left));
1051 }
1052 assert(!is_decay(rbtree->root));
1053 check_max_end(rbtree, v);
1054 }
1055
1056 #else
1057
1058 /* Non-RCU version */
1059 static
1060 void rcu_rbtree_transplant(struct rcu_rbtree *rbtree,
1061 struct rcu_rbtree_node *u,
1062 struct rcu_rbtree_node *v,
1063 unsigned int copy_parents,
1064 struct rcu_rbtree_node *stop)
1065 {
1066 dbg_printf("transplant %p\n", v->begin);
1067
1068 lock_test_mutex();
1069 if (rcu_rbtree_is_nil(rbtree, get_parent(u)))
1070 rbtree->root = v;
1071 else if (u == get_parent(u)->_left)
1072 get_parent(u)->_left = v;
1073 else
1074 get_parent(u)->_right = v;
1075 set_parent(v, get_parent(u), get_pos(u));
1076 unlock_test_mutex();
1077 }
1078
1079 #endif
1080
1081 static void rcu_rbtree_remove_fixup(struct rcu_rbtree *rbtree,
1082 struct rcu_rbtree_node *x)
1083 {
1084 dbg_printf("remove fixup %p\n", x->begin);
1085
1086 while (x != rbtree->root && x->color == COLOR_BLACK) {
1087 assert(!is_decay(get_parent(x)));
1088 assert(!is_decay(get_parent(x)->_left));
1089 if (x == get_parent(x)->_left) {
1090 struct rcu_rbtree_node *w;
1091
1092 w = get_parent(x)->_right;
1093
1094 if (w->color == COLOR_RED) {
1095 w->color = COLOR_BLACK;
1096 get_parent(x)->color = COLOR_RED;
1097 left_rotate(rbtree, get_parent(x));
1098 x = get_decay(x);
1099 assert(!is_decay(rbtree->root));
1100 w = get_parent(x)->_right;
1101 }
1102 if (w->_left->color == COLOR_BLACK
1103 && w->_right->color == COLOR_BLACK) {
1104 w->color = COLOR_RED;
1105 x = get_parent(x);
1106 assert(!is_decay(rbtree->root));
1107 assert(!is_decay(x));
1108 } else {
1109 if (w->_right->color == COLOR_BLACK) {
1110 w->_left->color = COLOR_BLACK;
1111 w->color = COLOR_RED;
1112 right_rotate(rbtree, w);
1113 assert(!is_decay(rbtree->root));
1114 x = get_decay(x);
1115 w = get_parent(x)->_right;
1116 }
1117 w->color = get_parent(x)->color;
1118 get_parent(x)->color = COLOR_BLACK;
1119 w->_right->color = COLOR_BLACK;
1120 left_rotate(rbtree, get_parent(x));
1121 assert(!is_decay(rbtree->root));
1122 x = rbtree->root;
1123 }
1124 } else {
1125 struct rcu_rbtree_node *w;
1126
1127 w = get_parent(x)->_left;
1128
1129 if (w->color == COLOR_RED) {
1130 w->color = COLOR_BLACK;
1131 get_parent(x)->color = COLOR_RED;
1132 right_rotate(rbtree, get_parent(x));
1133 assert(!is_decay(rbtree->root));
1134 x = get_decay(x);
1135 w = get_parent(x)->_left;
1136 }
1137 if (w->_right->color == COLOR_BLACK
1138 && w->_left->color == COLOR_BLACK) {
1139 w->color = COLOR_RED;
1140 x = get_parent(x);
1141 assert(!is_decay(rbtree->root));
1142 assert(!is_decay(x));
1143 } else {
1144 if (w->_left->color == COLOR_BLACK) {
1145 w->_right->color = COLOR_BLACK;
1146 w->color = COLOR_RED;
1147 left_rotate(rbtree, w);
1148 assert(!is_decay(rbtree->root));
1149 x = get_decay(x);
1150 w = get_parent(x)->_left;
1151 }
1152 w->color = get_parent(x)->color;
1153 get_parent(x)->color = COLOR_BLACK;
1154 w->_left->color = COLOR_BLACK;
1155 right_rotate(rbtree, get_parent(x));
1156 assert(!is_decay(rbtree->root));
1157 x = rbtree->root;
1158 }
1159 }
1160 }
1161 x->color = COLOR_BLACK;
1162 }
1163
1164 /*
1165 * Delete z. All non-copied children left/right positions are unchanged.
1166 */
1167 static
1168 void rcu_rbtree_remove_nonil(struct rcu_rbtree *rbtree,
1169 struct rcu_rbtree_node *z,
1170 struct rcu_rbtree_node *y)
1171 {
1172 struct rcu_rbtree_node *x;
1173
1174 dbg_printf("remove nonil %p\n", z->begin);
1175 show_tree(rbtree);
1176
1177 assert(!is_decay(z));
1178 assert(!is_decay(y));
1179 assert(!is_decay(y->_right));
1180 assert(!is_decay(get_parent(y)));
1181 x = y->_right;
1182 assert(!is_decay(x));
1183 if (get_parent(y) == z) {
1184 y = dup_decay_node(rbtree, y);
1185 set_parent(x, y, get_pos(x)); /* parent for nil */
1186 /* y is z's right node */
1187 set_left(rbtree, y, z->_left);
1188 y->max_end = calculate_node_max_end(rbtree, y);
1189 rcu_rbtree_transplant(rbtree, z, y, 1, NULL);
1190 } else {
1191 struct rcu_rbtree_node *oy_right, *z_right;
1192
1193 /*
1194 * Need to make sure y is always visible by readers.
1195 */
1196 y = rcu_rbtree_min_dup_decay(rbtree, z->_right, &z_right);
1197 assert(!is_decay(y));
1198 assert(!is_decay(z));
1199 oy_right = y->_right;
1200
1201 /*
1202 * The max child begin of z_right does not change, because
1203 * we're only changing its left children.
1204 */
1205 y->_right = z_right;
1206 set_parent(y->_right, y, IS_RIGHT);
1207 assert(!is_decay(z->_left));
1208 y->_left = z->_left;
1209 assert(!is_decay(oy_right));
1210 /*
1211 * Transplant of oy_right to old y's location will only
1212 * trigger a "end" value update of the already copied branch
1213 * (which is not visible yet). We are transplanting
1214 * oy_right as a left child of old y's parent, so the
1215 * min values update propagated upward necessarily stops
1216 * at z_right.
1217 */
1218 rcu_rbtree_transplant(rbtree, y, oy_right, 0, y);
1219 y->max_end = calculate_node_max_end(rbtree, y);
1220 rcu_rbtree_transplant(rbtree, z, y, 1, NULL);
1221 /* Update children */
1222 (void) rcu_rbtree_min_update_decay(rbtree, y->_right);
1223 }
1224 y = get_decay(y);
1225 assert(!is_decay(z));
1226 assert(!is_decay(z->_left));
1227 y->color = z->color;
1228 set_parent(y->_left, y, IS_LEFT);
1229 set_parent(y->_right, get_decay(get_parent(y->_right)), IS_RIGHT);
1230 assert(!is_decay(y->_left));
1231 assert(!is_decay(y->_right));
1232 }
1233
1234 int rcu_rbtree_remove(struct rcu_rbtree *rbtree,
1235 struct rcu_rbtree_node *z)
1236 {
1237 struct rcu_rbtree_node *x, *y;
1238 unsigned int y_original_color;
1239
1240 assert(!is_decay(rbtree->root));
1241 dbg_printf("remove %p\n", z->begin);
1242 show_tree(rbtree);
1243
1244 assert(!is_decay(z));
1245 y = z;
1246 y_original_color = y->color;
1247
1248 if (rcu_rbtree_is_nil(rbtree, z->_left)) {
1249 rcu_rbtree_transplant(rbtree, z, z->_right, 1, NULL);
1250 assert(!is_decay(z));
1251 x = get_decay(z->_right);
1252 show_tree(rbtree);
1253 } else if (rcu_rbtree_is_nil(rbtree, z->_right)) {
1254 rcu_rbtree_transplant(rbtree, z, z->_left, 1, NULL);
1255 assert(!is_decay(z));
1256 x = get_decay(z->_left);
1257 show_tree(rbtree);
1258 } else {
1259 y = rcu_rbtree_min(rbtree, z->_right);
1260 assert(!is_decay(y));
1261 y_original_color = y->color;
1262 x = y->_right;
1263 rcu_rbtree_remove_nonil(rbtree, z, y);
1264 x = get_decay(x);
1265 show_tree(rbtree);
1266 }
1267 if (y_original_color == COLOR_BLACK)
1268 rcu_rbtree_remove_fixup(rbtree, x);
1269 show_tree(rbtree);
1270 check_max_end(rbtree, x);
1271 check_max_end(rbtree, get_decay(y));
1272 /*
1273 * Commit all _CMM_STORE_SHARED().
1274 */
1275 cmm_smp_wmc();
1276 call_rcu(&z->head, _rcu_rbtree_free);
1277
1278 return 0;
1279 }
This page took 0.053779 seconds and 5 git commands to generate.