Add explanation about reader vs writer concurrency behavior
[urcu.git] / urcu-rbtree.c
1 /*
2 * urcu-rbtree.c
3 *
4 * Userspace RCU library - Red-Black Tree
5 *
6 * Copyright (c) 2010 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 *
22 * Implementation of RCU-adapted data structures and operations based on the RB
23 * tree algorithms found in chapter 12 of:
24 *
25 * Thomas H. Cormen, Charles E. Leiserson, Ronald L. Rivest, and
26 * Clifford Stein. Introduction to Algorithms, Third Edition. The MIT
27 * Press, September 2009.
28 */
29
30 #define _BSD_SOURCE
31 #define _LGPL_SOURCE
32
33 #include <stdio.h>
34 #include <pthread.h>
35 #include <assert.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include <urcu/rcurbtree.h>
40 #include <urcu-pointer.h>
41 #include <urcu-call-rcu.h>
42 #include <urcu/compiler.h>
43
44 /*
45 * Explanation of next/prev walk coherency and search coherency when
46 * performed concurrently with updates.
47 *
48 * next/prev walk coherency with respect to concurrent updates:
49 *
50 * There are 3 scenarios for which we need to model and validate this:
51 * rotation, transplant and "teleportation" (the latter being a remote
52 * transplant in a remove non-nil case).
53 *
54 * - rotation left (right is symmetric)
55 * xl and yr point to the same parent nodes before/after left
56 * rotation. yll and ylr also point to the same parent node
57 * before/after left rotation.
58 * As we are copying x, y and yl (the 3 nodes which parent/child
59 * relationship are changed) to "new" version of this node cluster,
60 * all external references to the cluster either point to the old
61 * cluster or the new one. If we take this cluster as a "black box"
62 * from the point of view of next/prev traversal, all we have to
63 * ensure is that the old and the new cluster behave in the exact same
64 * way with respect to traversal order.
65 *
66 * - transplant
67 * In this operation, we transplant a copy of "v" into its parent
68 * location (u), thus replacing it. The children of "v", vl and vr,
69 * still point to v (new version) after the transplant, so it does not
70 * change the behavior when considering the next/prev traversal. "v"
71 * being copied to a new version ensures that the parent pointers of v
72 * are pointing to its new parent (parent of u) before it is published
73 * to readers (by setting the child pointer of u's parent to the new
74 * copy of v).
75 *
76 * - teleportation
77 * This one is probably the most tricky and will require some ascii
78 * art to explain.
79 *
80 * We want to remove z from this tree:
81 *
82 * zp
83 * \
84 * z
85 * / \
86 * zl zr
87 * /
88 * a
89 * / \
90 * b ar
91 * / \
92 * y br
93 * \
94 * yr
95 * / \
96 * yrl yrr
97 *
98 * What we are going to do is to "teleport" y into z's location,
99 * reparenting yr to b. We are taking care to create a new cluster
100 * copy that is isolated from any reader. We will represent the new
101 * members of the cluster with capital letters.
102 *
103 * zp
104 * \
105 * Y
106 * / \
107 * zl ZR
108 * /
109 * A
110 * / \
111 * B ar
112 * / \
113 * YR br
114 * / \
115 * yrl yrr
116 *
117 * In this transient state, we notice that the pointers within the
118 * cluster all point to the new cluster nodes, and they point to the
119 * correct external nodes. However, no external pointer point to the
120 * cluster (yet). The first pointer to point to this cluster will be
121 * "zp->right". It will therefore make the cluster visible for search.
122 *
123 * In this intermediate state, we can walk through the new cluster
124 * when coming from the top (in a next/prev traversal), but can come
125 * back to the old cluster when going back up from the children nodes.
126 * All we have to ensure is that the two clusters, taken as a black
127 * box from a next/prev traversal perspective, yield to the exact same
128 * result.
129 *
130 * Search coherency with concurrent updates:
131 *
132 * Simple "search" (only going down the tree) is also handled by this
133 * cluster scheme. The explanation is a subset of the prev/next
134 * explanation, where we don't have to care about the intermediate
135 * stages where the children point to the old cluster, because we only
136 * ever use the top level pointers to go down into the children nodes,
137 * we never go back up. So by simply making sure that all the cluster
138 * internal nodes pointers are setup correctly before making the
139 * cluster visible to the readers (by setting the parent pointer to
140 * the topmost new node in the cluster), we are sure that readers will
141 * see a coherent view of the cluster at all times.
142 */
143
144 #ifdef DEBUG
145 #define dbg_printf(args...) printf(args)
146 #define dbg_usleep(usecs) usleep(usecs)
147 #else
148 #define dbg_printf(args...)
149 #define dbg_usleep(usecs)
150 #endif
151
152 /*
153 * Undefine this to enable the non-RCU rotate and transplant functions
154 * (for debugging).
155 */
156 #define RBTREE_RCU_SUPPORT_ROTATE_LEFT
157 #define RBTREE_RCU_SUPPORT_ROTATE_RIGHT
158 #define RBTREE_RCU_SUPPORT_TRANSPLANT
159
160 #ifdef EXTRA_DEBUG
161 static pthread_mutex_t test_mutex = PTHREAD_MUTEX_INITIALIZER;
162 static pthread_mutex_t outer_mutex = PTHREAD_MUTEX_INITIALIZER;
163
164 static
165 void lock_outer_mutex(void)
166 {
167 pthread_mutex_lock(&outer_mutex);
168 }
169
170 static
171 void unlock_outer_mutex(void)
172 {
173 pthread_mutex_unlock(&outer_mutex);
174 }
175
176 static
177 void lock_test_mutex(void)
178 {
179 pthread_mutex_lock(&test_mutex);
180 }
181
182 static
183 void unlock_test_mutex(void)
184 {
185 pthread_mutex_unlock(&test_mutex);
186 }
187 #endif
188
189 static
190 void set_parent(struct rcu_rbtree_node *node,
191 struct rcu_rbtree_node *parent,
192 unsigned int pos)
193 {
194 _CMM_STORE_SHARED(node->parent, ((unsigned long) parent) | pos);
195 }
196
197 static
198 struct rcu_rbtree_node *get_parent(struct rcu_rbtree_node *node)
199 {
200 return (struct rcu_rbtree_node *) (node->parent & ~1UL);
201 }
202
203 static
204 unsigned int get_pos(struct rcu_rbtree_node *node)
205 {
206 return (unsigned int) (node->parent & 1UL);
207 }
208
209 static
210 struct rcu_rbtree_node *get_parent_and_pos(struct rcu_rbtree_node *node,
211 unsigned int *pos)
212 {
213 unsigned long parent_pos = rcu_dereference(node->parent);
214
215 *pos = (unsigned int) (parent_pos & 1UL);
216 return (struct rcu_rbtree_node *) (parent_pos & ~1UL);
217 }
218
219 static
220 void set_decay(struct rcu_rbtree_node *x, struct rcu_rbtree_node *xc)
221 {
222 x->decay_next = xc;
223 }
224
225 static
226 struct rcu_rbtree_node *get_decay(struct rcu_rbtree_node *x)
227 {
228 if (!x)
229 return NULL;
230 while (x->decay_next)
231 x = x->decay_next;
232 return x;
233 }
234
235 static
236 struct rcu_rbtree_node *is_decay(struct rcu_rbtree_node *x)
237 {
238 return x->decay_next;
239 }
240
241 static
242 struct rcu_rbtree_node *dup_decay_node(struct rcu_rbtree *rbtree,
243 struct rcu_rbtree_node *x)
244 {
245 struct rcu_rbtree_node *xc;
246
247 if (rcu_rbtree_is_nil(rbtree, x))
248 return x;
249
250 xc = rbtree->rballoc();
251 memcpy(xc, x, sizeof(struct rcu_rbtree_node));
252 xc->decay_next = NULL;
253 set_decay(x, xc);
254 call_rcu(&x->head, rbtree->rbfree);
255 return xc;
256 }
257
258 /*
259 * Info for range lookups:
260 * Range lookup information is only valid when used when searching for
261 * ranges. It should never be used in next/prev traversal because the
262 * pointers to parents are not in sync with the parent vision of the
263 * children range.
264 */
265 static
266 void set_left(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node,
267 struct rcu_rbtree_node *left)
268 {
269 node->_left = left;
270 }
271
272 static
273 void set_right(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node,
274 struct rcu_rbtree_node *right)
275 {
276 node->_right = right;
277 }
278
279 static
280 void *calculate_node_max_end(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node)
281 {
282 void *max_end;
283
284 max_end = node->end;
285 if (!rcu_rbtree_is_nil(rbtree, node->_right)) {
286 if (rbtree->comp(max_end, node->_right->max_end) < 0)
287 max_end = node->_right->max_end;
288 }
289 if (!rcu_rbtree_is_nil(rbtree, node->_left)) {
290 if (rbtree->comp(max_end, node->_left->max_end) < 0)
291 max_end = node->_left->max_end;
292 }
293 return max_end;
294 }
295
296 /*
297 * TODO
298 * Deal with memory allocation errors.
299 * Can be ensured by reserving a pool of memory entries before doing the
300 * insertion, which will have to be function of number of
301 * transplantations/rotations required for the operation (which is a
302 * multiple of the tree height).
303 */
304
305 #ifdef DEBUG
306 static
307 void show_tree(struct rcu_rbtree *rbtree)
308 {
309 struct rcu_rbtree_node *node;
310
311 node = rcu_rbtree_min(rbtree, rbtree->root);
312 while (!rcu_rbtree_is_nil(rbtree, node)) {
313 assert(!is_decay(node));
314 printf("{ b:%lX e:%lX pb: %lX r:%lX l:%lX %s %s %s} ",
315 (unsigned long) node->begin,
316 (unsigned long) node->end,
317 (unsigned long) get_parent(node)->begin,
318 (unsigned long) node->_right->begin,
319 (unsigned long) node->_left->begin,
320 node->color ? "red" : "black",
321 get_pos(node) ? "right" : "left",
322 rcu_rbtree_is_nil(rbtree, node) ? "nil" : "");
323 node = rcu_rbtree_next(rbtree, node);
324 }
325 printf("\n");
326 }
327
328 #define check_max_end(rbtree, x) \
329 do { \
330 if (rcu_rbtree_is_nil(rbtree, x)) \
331 break; \
332 assert(rbtree->comp(x->max_end, \
333 calculate_node_max_end(rbtree, x)) == 0); \
334 } while (0)
335
336 #else /* DEBUG */
337 static
338 void show_tree(struct rcu_rbtree *rbtree)
339 {
340 }
341
342 static
343 void check_max_end(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *x)
344 {
345 }
346 #endif /* DEBUG */
347
348 static
349 struct rcu_rbtree_node *make_nil(struct rcu_rbtree *rbtree)
350 {
351 return &rbtree->nil_node;
352 }
353
354 /*
355 * Iterative rbtree search.
356 */
357 struct rcu_rbtree_node *rcu_rbtree_search(struct rcu_rbtree *rbtree,
358 struct rcu_rbtree_node *x,
359 void *point)
360 {
361 struct rcu_rbtree_node *xl;
362
363 dbg_printf("searching point 0x%lx\n", (unsigned long) point);
364 x = rcu_dereference(x);
365
366 while (!rcu_rbtree_is_nil(rbtree, x)) {
367 dbg_usleep(10);
368 xl = rcu_dereference(x->_left);
369 dbg_printf("search x %lx x_end %lx x_max_end %lx\n", (unsigned long) x->begin,
370 (unsigned long) x->end, (unsigned long) x->max_end);
371 dbg_printf("search xl %lx xl_end %lx xl_max_end %lx\n", (unsigned long) xl->begin,
372 (unsigned long) xl->end, (unsigned long) xl->max_end);
373 if (!rcu_rbtree_is_nil(rbtree, xl)
374 && (rbtree->comp(xl->max_end, point) > 0)) {
375 dbg_printf("go left\n");
376 x = xl;
377 } else if (rbtree->comp(x->begin, point) >= 0
378 && rbtree->comp(point, x->end) < 0) {
379 dbg_printf("got it!\n");
380 break;
381 } else if (rbtree->comp(point, x->begin) > 0) {
382 dbg_printf("go right\n");
383 x = rcu_dereference(x->_right);
384 } else {
385 dbg_printf("not found!\n");
386 x = make_nil(rbtree);
387 }
388 }
389 if (rcu_rbtree_is_nil(rbtree, x))
390 dbg_printf("Reached bottom of tree.\n");
391
392 return x;
393 }
394
395 struct rcu_rbtree_node *rcu_rbtree_search_range(struct rcu_rbtree *rbtree,
396 struct rcu_rbtree_node *x,
397 void *begin, void *end)
398 {
399 struct rcu_rbtree_node *node;
400
401 node = rcu_rbtree_search(rbtree, x, begin);
402 if (rcu_rbtree_is_nil(rbtree, node))
403 return node;
404 if (rbtree->comp(node->end, end) < 0)
405 return NULL; /* High is outside lookup range */
406 return node;
407 }
408
409 /*
410 * Search by exact range start value.
411 */
412 struct rcu_rbtree_node *rcu_rbtree_search_begin_key(struct rcu_rbtree *rbtree,
413 struct rcu_rbtree_node *x,
414 void *k)
415 {
416 x = rcu_dereference(x);
417 int comp;
418
419 while (!rcu_rbtree_is_nil(rbtree, x) && (comp = rbtree->comp(k, x->begin)) != 0) {
420 dbg_usleep(10);
421 if (comp < 0)
422 x = rcu_dereference(x->_left);
423 else
424 x = rcu_dereference(x->_right);
425 }
426 return x;
427 }
428
429 static
430 struct rcu_rbtree_node *rcu_rbtree_min_dup_decay(struct rcu_rbtree *rbtree,
431 struct rcu_rbtree_node *x,
432 struct rcu_rbtree_node **zr)
433 {
434 struct rcu_rbtree_node *xl;
435
436 x = rcu_dereference(x);
437
438 if (rcu_rbtree_is_nil(rbtree, x)) {
439 *zr = x;
440 return x;
441 } else
442 *zr = x = dup_decay_node(rbtree, x);
443
444 while (!rcu_rbtree_is_nil(rbtree, xl = rcu_dereference(x->_left))) {
445 x = dup_decay_node(rbtree, xl);
446 set_parent(x, get_decay(get_parent(x)), get_pos(x));
447 get_parent(x)->_left = get_decay(get_parent(x)->_left);
448 }
449 return x;
450 }
451
452 static
453 struct rcu_rbtree_node *rcu_rbtree_min_update_decay(struct rcu_rbtree *rbtree,
454 struct rcu_rbtree_node *x)
455 {
456 struct rcu_rbtree_node *xl;
457
458 x = rcu_dereference(x);
459
460 if (rcu_rbtree_is_nil(rbtree, x))
461 return x;
462 else {
463 set_parent(x->_right, get_decay(get_parent(x->_right)),
464 get_pos(x->_right));
465 set_parent(x->_left, get_decay(get_parent(x->_left)),
466 get_pos(x->_left));
467 }
468
469 while (!rcu_rbtree_is_nil(rbtree, xl = rcu_dereference(x->_left))) {
470 x = xl;
471 set_parent(x->_right, get_decay(get_parent(x->_right)),
472 get_pos(x->_right));
473 set_parent(x->_left, get_decay(get_parent(x->_left)),
474 get_pos(x->_left));
475 }
476 return x;
477 }
478
479 struct rcu_rbtree_node *rcu_rbtree_min(struct rcu_rbtree *rbtree,
480 struct rcu_rbtree_node *x)
481 {
482 struct rcu_rbtree_node *xl;
483
484 x = rcu_dereference(x);
485
486 if (rcu_rbtree_is_nil(rbtree, x))
487 return x;
488
489 while (!rcu_rbtree_is_nil(rbtree, xl = rcu_dereference(x->_left)))
490 x = xl;
491 return x;
492 }
493
494 struct rcu_rbtree_node *rcu_rbtree_max(struct rcu_rbtree *rbtree,
495 struct rcu_rbtree_node *x)
496 {
497 struct rcu_rbtree_node *xr;
498
499 x = rcu_dereference(x);
500
501 if (rcu_rbtree_is_nil(rbtree, x))
502 return x;
503
504 while (!rcu_rbtree_is_nil(rbtree, xr = rcu_dereference(x->_right)))
505 x = xr;
506 return x;
507 }
508
509 /*
510 * RCU read lock must be held across the next/prev calls to ensure validity of
511 * the returned node.
512 */
513 struct rcu_rbtree_node *rcu_rbtree_next(struct rcu_rbtree *rbtree,
514 struct rcu_rbtree_node *x)
515 {
516 struct rcu_rbtree_node *xr, *y;
517 unsigned int x_pos;
518
519 x = rcu_dereference(x);
520
521 if (!rcu_rbtree_is_nil(rbtree, xr = rcu_dereference(x->_right)))
522 return rcu_rbtree_min(rbtree, xr);
523 y = get_parent_and_pos(x, &x_pos);
524 while (!rcu_rbtree_is_nil(rbtree, y) && x_pos == IS_RIGHT) {
525 x = y;
526 y = get_parent_and_pos(y, &x_pos);
527 }
528 return y;
529 }
530
531 struct rcu_rbtree_node *rcu_rbtree_prev(struct rcu_rbtree *rbtree,
532 struct rcu_rbtree_node *x)
533 {
534 struct rcu_rbtree_node *xl, *y;
535 unsigned int x_pos;
536
537 x = rcu_dereference(x);
538
539 if (!rcu_rbtree_is_nil(rbtree, xl = rcu_dereference(x->_left)))
540 return rcu_rbtree_max(rbtree, xl);
541 y = get_parent_and_pos(x, &x_pos);
542 while (!rcu_rbtree_is_nil(rbtree, y) && x_pos == IS_LEFT) {
543 x = y;
544 y = get_parent_and_pos(y, &x_pos);
545 }
546 return y;
547 }
548
549 /*
550 * "node" needs to be non-visible by readers.
551 */
552 static
553 void populate_node_end(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node,
554 unsigned int copy_parents, struct rcu_rbtree_node *stop)
555 {
556 struct rcu_rbtree_node *prev = NULL, *orig_node = node, *top;
557
558 do {
559 void *max_end;
560
561 assert(node);
562 assert(!rcu_rbtree_is_nil(rbtree, node));
563
564 if (prev && copy_parents) {
565 node = dup_decay_node(rbtree, node);
566 if (get_pos(prev) == IS_RIGHT)
567 node->_right = prev;
568 else
569 node->_left = prev;
570 set_parent(prev, node, get_pos(prev));
571 }
572
573 max_end = calculate_node_max_end(rbtree, node);
574 /*
575 * Compare the node max_end keys to make sure we replace
576 * references to a key belonging to a node we remove
577 * from the tree. Otherwise we would still be using this
578 * pointer as an invalid reference after garbage
579 * collection of the node and of its associated
580 * begin/end pointers.
581 */
582 if (max_end != node->max_end) {
583 node->max_end = max_end;
584 } else {
585 top = get_parent(node);
586 cmm_smp_wmb(); /* write into node before publish */
587 /* make new branch visible to readers */
588 if (rcu_rbtree_is_nil(rbtree, top))
589 _CMM_STORE_SHARED(rbtree->root, node);
590 if (get_pos(node) == IS_LEFT)
591 _CMM_STORE_SHARED(top->_left, node);
592 else
593 _CMM_STORE_SHARED(top->_right, node);
594 goto end;
595 }
596
597 /* Check for propagation stop */
598 if (node == stop)
599 return;
600
601 prev = node;
602 node = get_parent(node);
603 } while (!rcu_rbtree_is_nil(rbtree, node));
604
605 top = node; /* nil */
606 cmm_smp_wmb(); /* write into node before publish */
607 /* make new branch visible to readers */
608 _CMM_STORE_SHARED(rbtree->root, prev);
609
610 end:
611 if (!copy_parents)
612 return;
613 /* update children */
614 node = orig_node;
615 do {
616 assert(!rcu_rbtree_is_nil(rbtree, node));
617 set_parent(node->_left, get_decay(get_parent(node->_left)), IS_LEFT);
618 set_parent(node->_right, get_decay(get_parent(node->_right)), IS_RIGHT);
619 } while ((node = get_parent(node)) != top);
620 }
621
622 /*
623 * We have to ensure these assumptions are correct for prev/next
624 * traversal:
625 *
626 * with x being a right child, the assumption that:
627 * get_parent(x)->_right == x
628 * or if x is a left child, the assumption that:
629 * get_parent(x)->_left == x
630 *
631 * This explains why we have to allocate a vc copy of the node for left_rotate,
632 * right_rotate and transplant operations.
633 *
634 * We always ensure that the right/left child and correct parent is set in the
635 * node copies *before* we reparent the children and make the upper-level point
636 * to the copy.
637 */
638
639 /* RCU: copy x and y, atomically point to new versions. GC old. */
640 /* Should be eventually followed by a cmm_smp_wmc() */
641
642 #ifdef RBTREE_RCU_SUPPORT_ROTATE_LEFT
643
644 static
645 void left_rotate(struct rcu_rbtree *rbtree,
646 struct rcu_rbtree_node *x)
647 {
648 struct rcu_rbtree_node *y, *y_left;
649
650 dbg_printf("left rotate %lx\n", (unsigned long) x->begin);
651
652 y = x->_right;
653 y_left = y->_left;
654
655 /* Now operate on new copy, decay old versions */
656 x = dup_decay_node(rbtree, x);
657 y = dup_decay_node(rbtree, y);
658 y_left = dup_decay_node(rbtree, y_left);
659
660 check_max_end(rbtree, get_parent(x));
661 check_max_end(rbtree, x);
662 check_max_end(rbtree, y);
663
664 /* Internal node modifications */
665 set_parent(y, get_parent(x), get_pos(x));
666 set_parent(x, y, IS_LEFT);
667 set_left(rbtree, y, x);
668 set_right(rbtree, x, y_left);
669
670 if (!rcu_rbtree_is_nil(rbtree, y_left))
671 set_parent(y_left, x, IS_RIGHT);
672
673 /*
674 * We only changed the relative position of x and y wrt their
675 * children, and reparented y (but are keeping the same nodes in
676 * place, so its parent does not need to have end value
677 * recalculated).
678 */
679 x->max_end = calculate_node_max_end(rbtree, x);
680 y->max_end = calculate_node_max_end(rbtree, y);
681
682 cmm_smp_wmb(); /* write into node before publish */
683
684 /* External references update (visible by readers) */
685 if (rcu_rbtree_is_nil(rbtree, get_parent(y)))
686 _CMM_STORE_SHARED(rbtree->root, y);
687 else if (get_pos(y) == IS_LEFT)
688 _CMM_STORE_SHARED(get_parent(y)->_left, y);
689 else
690 _CMM_STORE_SHARED(get_parent(y)->_right, y);
691
692 /* Point children to new copy (parent only used by updates/next/prev) */
693 set_parent(x->_left, get_decay(get_parent(x->_left)),
694 get_pos(x->_left));
695 set_parent(y->_right, get_decay(get_parent(y->_right)),
696 get_pos(y->_right));
697 if (!rcu_rbtree_is_nil(rbtree, y_left)) {
698 set_parent(y_left->_right,
699 get_decay(get_parent(y_left->_right)),
700 get_pos(y_left->_right));
701 set_parent(y_left->_left,
702 get_decay(get_parent(y_left->_left)),
703 get_pos(y_left->_left));
704 }
705
706 /* Sanity checks */
707 assert(y == rbtree->root || get_parent(y)->_left == y
708 || get_parent(y)->_right == y);
709 assert(x == rbtree->root || get_parent(x)->_left == x
710 || get_parent(x)->_right == x);
711 assert(rcu_rbtree_is_nil(rbtree, x->_right) || get_parent(x->_right) == x);
712 assert(rcu_rbtree_is_nil(rbtree, x->_left) || get_parent(x->_left) == x);
713 assert(rcu_rbtree_is_nil(rbtree, y->_right) || get_parent(y->_right) == y);
714 assert(rcu_rbtree_is_nil(rbtree, y->_left) || get_parent(y->_left) == y);
715 assert(!is_decay(rbtree->root));
716 assert(!is_decay(x));
717 assert(!is_decay(y));
718 assert(!is_decay(x->_right));
719 assert(!is_decay(x->_left));
720 assert(!is_decay(y->_right));
721 assert(!is_decay(y->_left));
722 check_max_end(rbtree, get_parent(y));
723 check_max_end(rbtree, x);
724 check_max_end(rbtree, y);
725 }
726
727 #else
728
729 /* non-rcu version */
730 static
731 void left_rotate(struct rcu_rbtree *rbtree,
732 struct rcu_rbtree_node *x)
733 {
734 struct rcu_rbtree_node *y;
735
736 lock_test_mutex();
737 y = x->_right;
738 x->_right = y->_left;
739 if (!rcu_rbtree_is_nil(rbtree, y->_left))
740 set_parent(y->_left, x, IS_RIGHT);
741 set_parent(y, get_parent(x), get_pos(x));
742 if (rcu_rbtree_is_nil(rbtree, get_parent(x)))
743 rbtree->root = y;
744 else if (x == get_parent(x)->_left) {
745 get_parent(x)->_left = y;
746 } else {
747 get_parent(x)->_right = y;
748 }
749 y->_left = x;
750 set_parent(x, y, IS_LEFT);
751 unlock_test_mutex();
752 }
753
754 #endif
755
756 #ifdef RBTREE_RCU_SUPPORT_ROTATE_RIGHT
757 static
758 void right_rotate(struct rcu_rbtree *rbtree,
759 struct rcu_rbtree_node *x)
760 {
761 struct rcu_rbtree_node *y, *y_right;
762
763 dbg_printf("right rotate %lx\n", (unsigned long) x->begin);
764
765 y = x->_left;
766 y_right = y->_right;
767
768 /* Now operate on new copy, decay old versions */
769 x = dup_decay_node(rbtree, x);
770 y = dup_decay_node(rbtree, y);
771 y_right = dup_decay_node(rbtree, y_right);
772
773 check_max_end(rbtree, get_parent(x));
774 check_max_end(rbtree, x);
775 check_max_end(rbtree, y);
776
777 /* Internal node modifications */
778 set_parent(y, get_parent(x), get_pos(x));
779 set_parent(x, y, IS_RIGHT);
780 set_right(rbtree, y, x);
781 set_left(rbtree, x, y_right);
782
783 if (!rcu_rbtree_is_nil(rbtree, y_right))
784 set_parent(y_right, x, IS_LEFT);
785
786 /*
787 * We only changed the relative position of x and y wrt their
788 * children, and reparented y (but are keeping the same nodes in
789 * place, so its parent does not need to have end value
790 * recalculated).
791 */
792 x->max_end = calculate_node_max_end(rbtree, x);
793 y->max_end = calculate_node_max_end(rbtree, y);
794
795 cmm_smp_wmb(); /* write into node before publish */
796
797 /* External references update (visible by readers) */
798 if (rcu_rbtree_is_nil(rbtree, get_parent(y)))
799 _CMM_STORE_SHARED(rbtree->root, y);
800 else if (get_pos(y) == IS_RIGHT)
801 _CMM_STORE_SHARED(get_parent(y)->_right, y);
802 else
803 _CMM_STORE_SHARED(get_parent(y)->_left, y);
804
805 /* Point children to new copy (parent only used by updates/next/prev) */
806 set_parent(x->_right, get_decay(get_parent(x->_right)),
807 get_pos(x->_right));
808 set_parent(y->_left, get_decay(get_parent(y->_left)),
809 get_pos(y->_left));
810 if (!rcu_rbtree_is_nil(rbtree, y_right)) {
811 set_parent(y_right->_left,
812 get_decay(get_parent(y_right->_left)),
813 get_pos(y_right->_left));
814 set_parent(y_right->_right,
815 get_decay(get_parent(y_right->_right)),
816 get_pos(y_right->_right));
817 }
818
819 /* Sanity checks */
820 assert(y == rbtree->root || get_parent(y)->_right == y
821 || get_parent(y)->_left == y);
822 assert(x == rbtree->root || get_parent(x)->_right == x
823 || get_parent(x)->_left == x);
824 assert(rcu_rbtree_is_nil(rbtree, x->_left) || get_parent(x->_left) == x);
825 assert(rcu_rbtree_is_nil(rbtree, x->_right) || get_parent(x->_right) == x);
826 assert(rcu_rbtree_is_nil(rbtree, y->_left) || get_parent(y->_left) == y);
827 assert(rcu_rbtree_is_nil(rbtree, y->_right) || get_parent(y->_right) == y);
828 assert(!is_decay(rbtree->root));
829 assert(!is_decay(x));
830 assert(!is_decay(y));
831 assert(!is_decay(x->_left));
832 assert(!is_decay(x->_right));
833 assert(!is_decay(y->_left));
834 assert(!is_decay(y->_right));
835 check_max_end(rbtree, x);
836 check_max_end(rbtree, y);
837 check_max_end(rbtree, get_parent(y));
838 }
839
840 #else
841
842 /* non-rcu version */
843 static
844 void right_rotate(struct rcu_rbtree *rbtree,
845 struct rcu_rbtree_node *x)
846 {
847 struct rcu_rbtree_node *y;
848
849 lock_test_mutex();
850 y = x->_left;
851 x->_left = y->_right;
852 if (!rcu_rbtree_is_nil(rbtree, y->_right))
853 set_parent(y->_right, x, IS_LEFT);
854 set_parent(y, get_parent(x), get_pos(x));
855 if (rcu_rbtree_is_nil(rbtree, get_parent(x)))
856 rbtree->root = y;
857 else if (x == get_parent(x)->_right) {
858 get_parent(x)->_right = y;
859 } else {
860 get_parent(x)->_left = y;
861 }
862 y->_right = x;
863 set_parent(x, y, IS_RIGHT);
864 unlock_test_mutex();
865 }
866
867 #endif
868
869 static void rcu_rbtree_insert_fixup(struct rcu_rbtree *rbtree,
870 struct rcu_rbtree_node *z)
871 {
872 struct rcu_rbtree_node *y;
873
874 dbg_printf("insert fixup %p\n", z->begin);
875 assert(!is_decay(rbtree->root));
876
877 while (get_parent(z)->color == COLOR_RED) {
878 if (get_parent(z) == get_parent(get_parent(z))->_left) {
879 y = get_parent(get_parent(z))->_right;
880 if (y->color == COLOR_RED) {
881 get_parent(z)->color = COLOR_BLACK;
882 y->color = COLOR_BLACK;
883 get_parent(get_parent(z))->color = COLOR_RED;
884 z = get_parent(get_parent(z));
885 } else {
886 if (z == get_parent(z)->_right) {
887 z = get_parent(z);
888 left_rotate(rbtree, z);
889 z = get_decay(z);
890 assert(!is_decay(rbtree->root));
891 }
892 get_parent(z)->color = COLOR_BLACK;
893 get_parent(get_parent(z))->color = COLOR_RED;
894 assert(!is_decay(z));
895 assert(!is_decay(get_parent(z)));
896 assert(!is_decay(get_parent(get_parent(z))));
897 right_rotate(rbtree, get_parent(get_parent(z)));
898 assert(!is_decay(z));
899 assert(!is_decay(rbtree->root));
900 }
901 } else {
902 y = get_parent(get_parent(z))->_left;
903 if (y->color == COLOR_RED) {
904 get_parent(z)->color = COLOR_BLACK;
905 y->color = COLOR_BLACK;
906 get_parent(get_parent(z))->color = COLOR_RED;
907 z = get_parent(get_parent(z));
908 } else {
909 if (z == get_parent(z)->_left) {
910 z = get_parent(z);
911 right_rotate(rbtree, z);
912 z = get_decay(z);
913 assert(!is_decay(rbtree->root));
914 }
915 get_parent(z)->color = COLOR_BLACK;
916 get_parent(get_parent(z))->color = COLOR_RED;
917 left_rotate(rbtree, get_parent(get_parent(z)));
918 assert(!is_decay(z));
919 assert(!is_decay(rbtree->root));
920 }
921 }
922 }
923 rbtree->root->color = COLOR_BLACK;
924 }
925
926 /*
927 * rcu_rbtree_insert - Insert a node in the RCU rbtree
928 *
929 * Returns 0 on success, or < 0 on error.
930 */
931 int rcu_rbtree_insert(struct rcu_rbtree *rbtree,
932 struct rcu_rbtree_node *z)
933 {
934 struct rcu_rbtree_node *x, *y;
935
936 dbg_printf("insert %p\n", z->begin);
937 assert(!is_decay(rbtree->root));
938
939 y = make_nil(rbtree);
940 x = rbtree->root;
941 while (!rcu_rbtree_is_nil(rbtree, x)) {
942 y = x;
943 if (rbtree->comp(z->begin, x->begin) < 0)
944 x = x->_left;
945 else
946 x = x->_right;
947 }
948
949 z->_left = make_nil(rbtree);
950 z->_right = make_nil(rbtree);
951 z->color = COLOR_RED;
952 z->decay_next = NULL;
953 z->max_end = z->end;
954
955 if (rcu_rbtree_is_nil(rbtree, y)) {
956 set_parent(z, y, IS_RIGHT); /* pos arbitrary for root node */
957 /*
958 * Order stores to z (children/parents) before stores
959 * that will make it visible to the rest of the tree.
960 */
961 cmm_smp_wmb();
962 _CMM_STORE_SHARED(rbtree->root, z);
963 } else if (rbtree->comp(z->begin, y->begin) < 0) {
964 y = dup_decay_node(rbtree, y);
965 set_parent(z, y, IS_LEFT);
966 if (get_pos(z) == IS_LEFT)
967 _CMM_STORE_SHARED(y->_left, z);
968 else
969 _CMM_STORE_SHARED(y->_right, z);
970 populate_node_end(rbtree, y, 1, NULL);
971 } else {
972 y = dup_decay_node(rbtree, y);
973 set_parent(z, y, IS_RIGHT);
974 if (get_pos(z) == IS_LEFT)
975 _CMM_STORE_SHARED(y->_left, z);
976 else
977 _CMM_STORE_SHARED(y->_right, z);
978 populate_node_end(rbtree, y, 1, NULL);
979 }
980 rcu_rbtree_insert_fixup(rbtree, z);
981 /*
982 * Make sure to commit all _CMM_STORE_SHARED() for non-coherent caches.
983 */
984 cmm_smp_wmc();
985 show_tree(rbtree);
986 check_max_end(rbtree, z);
987 check_max_end(rbtree, y);
988
989 return 0;
990 }
991
992 /*
993 * Transplant v into u position.
994 */
995
996 #ifdef RBTREE_RCU_SUPPORT_TRANSPLANT
997
998 static
999 void rcu_rbtree_transplant(struct rcu_rbtree *rbtree,
1000 struct rcu_rbtree_node *u,
1001 struct rcu_rbtree_node *v,
1002 unsigned int copy_parents,
1003 struct rcu_rbtree_node *stop)
1004 {
1005 dbg_printf("transplant %p\n", v->begin);
1006
1007 if (!rcu_rbtree_is_nil(rbtree, v))
1008 v = dup_decay_node(rbtree, v);
1009
1010 if (rcu_rbtree_is_nil(rbtree, get_parent(u))) {
1011 /* pos is arbitrary for root node */
1012 set_parent(v, get_parent(u), IS_RIGHT);
1013 cmm_smp_wmb(); /* write into node before publish */
1014 _CMM_STORE_SHARED(rbtree->root, v);
1015 } else {
1016 struct rcu_rbtree_node *vp;
1017
1018 vp = get_parent(u);
1019 if (copy_parents)
1020 vp = dup_decay_node(rbtree, vp);
1021 set_parent(v, vp, get_pos(u));
1022 if (get_pos(v) == IS_LEFT)
1023 _CMM_STORE_SHARED(vp->_left, v);
1024 else
1025 _CMM_STORE_SHARED(vp->_right, v);
1026 populate_node_end(rbtree, vp, copy_parents, stop);
1027 check_max_end(rbtree, vp);
1028 }
1029
1030 /* Point children to new copy (parent only used by updates/next/prev) */
1031 if (!rcu_rbtree_is_nil(rbtree, v)) {
1032 set_parent(v->_right, get_decay(get_parent(v->_right)),
1033 get_pos(v->_right));
1034 set_parent(v->_left, get_decay(get_parent(v->_left)),
1035 get_pos(v->_left));
1036 }
1037 assert(!is_decay(rbtree->root));
1038 check_max_end(rbtree, v);
1039 }
1040
1041 #else
1042
1043 /* Non-RCU version */
1044 static
1045 void rcu_rbtree_transplant(struct rcu_rbtree *rbtree,
1046 struct rcu_rbtree_node *u,
1047 struct rcu_rbtree_node *v,
1048 unsigned int copy_parents,
1049 struct rcu_rbtree_node *stop)
1050 {
1051 dbg_printf("transplant %p\n", v->begin);
1052
1053 lock_test_mutex();
1054 if (rcu_rbtree_is_nil(rbtree, get_parent(u)))
1055 rbtree->root = v;
1056 else if (u == get_parent(u)->_left)
1057 get_parent(u)->_left = v;
1058 else
1059 get_parent(u)->_right = v;
1060 set_parent(v, get_parent(u), get_pos(u));
1061 unlock_test_mutex();
1062 }
1063
1064 #endif
1065
1066 static void rcu_rbtree_remove_fixup(struct rcu_rbtree *rbtree,
1067 struct rcu_rbtree_node *x)
1068 {
1069 dbg_printf("remove fixup %p\n", x->begin);
1070
1071 while (x != rbtree->root && x->color == COLOR_BLACK) {
1072 assert(!is_decay(get_parent(x)));
1073 assert(!is_decay(get_parent(x)->_left));
1074 if (x == get_parent(x)->_left) {
1075 struct rcu_rbtree_node *w;
1076
1077 w = get_parent(x)->_right;
1078
1079 if (w->color == COLOR_RED) {
1080 w->color = COLOR_BLACK;
1081 get_parent(x)->color = COLOR_RED;
1082 left_rotate(rbtree, get_parent(x));
1083 x = get_decay(x);
1084 assert(!is_decay(rbtree->root));
1085 w = get_parent(x)->_right;
1086 }
1087 if (w->_left->color == COLOR_BLACK
1088 && w->_right->color == COLOR_BLACK) {
1089 w->color = COLOR_RED;
1090 x = get_parent(x);
1091 assert(!is_decay(rbtree->root));
1092 assert(!is_decay(x));
1093 } else {
1094 if (w->_right->color == COLOR_BLACK) {
1095 w->_left->color = COLOR_BLACK;
1096 w->color = COLOR_RED;
1097 right_rotate(rbtree, w);
1098 assert(!is_decay(rbtree->root));
1099 x = get_decay(x);
1100 w = get_parent(x)->_right;
1101 }
1102 w->color = get_parent(x)->color;
1103 get_parent(x)->color = COLOR_BLACK;
1104 w->_right->color = COLOR_BLACK;
1105 left_rotate(rbtree, get_parent(x));
1106 assert(!is_decay(rbtree->root));
1107 x = rbtree->root;
1108 }
1109 } else {
1110 struct rcu_rbtree_node *w;
1111
1112 w = get_parent(x)->_left;
1113
1114 if (w->color == COLOR_RED) {
1115 w->color = COLOR_BLACK;
1116 get_parent(x)->color = COLOR_RED;
1117 right_rotate(rbtree, get_parent(x));
1118 assert(!is_decay(rbtree->root));
1119 x = get_decay(x);
1120 w = get_parent(x)->_left;
1121 }
1122 if (w->_right->color == COLOR_BLACK
1123 && w->_left->color == COLOR_BLACK) {
1124 w->color = COLOR_RED;
1125 x = get_parent(x);
1126 assert(!is_decay(rbtree->root));
1127 assert(!is_decay(x));
1128 } else {
1129 if (w->_left->color == COLOR_BLACK) {
1130 w->_right->color = COLOR_BLACK;
1131 w->color = COLOR_RED;
1132 left_rotate(rbtree, w);
1133 assert(!is_decay(rbtree->root));
1134 x = get_decay(x);
1135 w = get_parent(x)->_left;
1136 }
1137 w->color = get_parent(x)->color;
1138 get_parent(x)->color = COLOR_BLACK;
1139 w->_left->color = COLOR_BLACK;
1140 right_rotate(rbtree, get_parent(x));
1141 assert(!is_decay(rbtree->root));
1142 x = rbtree->root;
1143 }
1144 }
1145 }
1146 x->color = COLOR_BLACK;
1147 }
1148
1149 /*
1150 * Delete z. All non-copied children left/right positions are unchanged.
1151 */
1152 static
1153 void rcu_rbtree_remove_nonil(struct rcu_rbtree *rbtree,
1154 struct rcu_rbtree_node *z,
1155 struct rcu_rbtree_node *y)
1156 {
1157 struct rcu_rbtree_node *x;
1158
1159 dbg_printf("remove nonil %p\n", z->begin);
1160 show_tree(rbtree);
1161
1162 assert(!is_decay(z));
1163 assert(!is_decay(y));
1164 assert(!is_decay(y->_right));
1165 assert(!is_decay(get_parent(y)));
1166 x = y->_right;
1167 assert(!is_decay(x));
1168 if (get_parent(y) == z) {
1169 y = dup_decay_node(rbtree, y);
1170 set_parent(x, y, get_pos(x)); /* parent for nil */
1171 /* y is z's right node */
1172 set_left(rbtree, y, z->_left);
1173 y->max_end = calculate_node_max_end(rbtree, y);
1174 rcu_rbtree_transplant(rbtree, z, y, 1, NULL);
1175 } else {
1176 struct rcu_rbtree_node *oy_right, *z_right;
1177
1178 /*
1179 * Need to make sure y is always visible by readers.
1180 */
1181 y = rcu_rbtree_min_dup_decay(rbtree, z->_right, &z_right);
1182 assert(!is_decay(y));
1183 assert(!is_decay(z));
1184 oy_right = y->_right;
1185
1186 /*
1187 * The max child begin of z_right does not change, because
1188 * we're only changing its left children.
1189 */
1190 y->_right = z_right;
1191 set_parent(y->_right, y, IS_RIGHT);
1192 assert(!is_decay(z->_left));
1193 y->_left = z->_left;
1194 assert(!is_decay(oy_right));
1195 /*
1196 * Transplant of oy_right to old y's location will only
1197 * trigger a "end" value update of the already copied branch
1198 * (which is not visible yet). We are transplanting
1199 * oy_right as a left child of old y's parent, so the
1200 * min values update propagated upward necessarily stops
1201 * at z_right.
1202 */
1203 rcu_rbtree_transplant(rbtree, y, oy_right, 0, y);
1204 y->max_end = calculate_node_max_end(rbtree, y);
1205 rcu_rbtree_transplant(rbtree, z, y, 1, NULL);
1206 /* Update children */
1207 (void) rcu_rbtree_min_update_decay(rbtree, y->_right);
1208 }
1209 y = get_decay(y);
1210 assert(!is_decay(z));
1211 assert(!is_decay(z->_left));
1212 y->color = z->color;
1213 set_parent(y->_left, y, IS_LEFT);
1214 set_parent(y->_right, get_decay(get_parent(y->_right)), IS_RIGHT);
1215 assert(!is_decay(y->_left));
1216 assert(!is_decay(y->_right));
1217 }
1218
1219 int rcu_rbtree_remove(struct rcu_rbtree *rbtree,
1220 struct rcu_rbtree_node *z)
1221 {
1222 struct rcu_rbtree_node *x, *y;
1223 unsigned int y_original_color;
1224
1225 assert(!is_decay(rbtree->root));
1226 dbg_printf("remove %p\n", z->begin);
1227 show_tree(rbtree);
1228
1229 assert(!is_decay(z));
1230 y = z;
1231 y_original_color = y->color;
1232
1233 if (rcu_rbtree_is_nil(rbtree, z->_left)) {
1234 rcu_rbtree_transplant(rbtree, z, z->_right, 1, NULL);
1235 assert(!is_decay(z));
1236 x = get_decay(z->_right);
1237 show_tree(rbtree);
1238 } else if (rcu_rbtree_is_nil(rbtree, z->_right)) {
1239 rcu_rbtree_transplant(rbtree, z, z->_left, 1, NULL);
1240 assert(!is_decay(z));
1241 x = get_decay(z->_left);
1242 show_tree(rbtree);
1243 } else {
1244 y = rcu_rbtree_min(rbtree, z->_right);
1245 assert(!is_decay(y));
1246 y_original_color = y->color;
1247 x = y->_right;
1248 rcu_rbtree_remove_nonil(rbtree, z, y);
1249 x = get_decay(x);
1250 show_tree(rbtree);
1251 }
1252 if (y_original_color == COLOR_BLACK)
1253 rcu_rbtree_remove_fixup(rbtree, x);
1254 show_tree(rbtree);
1255 check_max_end(rbtree, x);
1256 check_max_end(rbtree, get_decay(y));
1257 /*
1258 * Commit all _CMM_STORE_SHARED().
1259 */
1260 cmm_smp_wmc();
1261
1262 return 0;
1263 }
This page took 0.073733 seconds and 5 git commands to generate.