Merge branch 'master' into rbtree2
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Sat, 3 Sep 2011 14:44:43 +0000 (10:44 -0400)
committerMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Sat, 3 Sep 2011 14:44:43 +0000 (10:44 -0400)
Conflicts:
Makefile.am

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Makefile.am
tests/Makefile.am
tests/test_urcu_rbtree.c [new file with mode: 0644]
urcu-rbtree.c [new file with mode: 0644]
urcu/rcurbtree.h [new file with mode: 0644]

index 0cde84acc6515a405062e62ea93f72b756ee66df..8f06cf947f8e8b2b9788e537715f1688b0d11198 100644 (file)
@@ -14,7 +14,8 @@ nobase_dist_include_HEADERS = urcu/compiler.h urcu/hlist.h urcu/list.h \
                urcu/uatomic/generic.h urcu/arch/generic.h urcu/wfstack.h \
                urcu/wfqueue.h urcu/rculfstack.h urcu/rculfqueue.h \
                urcu/ref.h urcu/map/*.h urcu/static/*.h urcu/cds.h \
-               urcu/urcu_ref.h urcu/urcu-futex.h urcu/uatomic_arch.h
+               urcu/urcu_ref.h urcu/urcu-futex.h urcu/uatomic_arch.h \
+               urcu/rcurbtree.h
 nobase_nodist_include_HEADERS = urcu/arch.h urcu/uatomic.h urcu/config.h
 
 EXTRA_DIST = $(top_srcdir)/urcu/arch/*.h $(top_srcdir)/urcu/uatomic/*.h \
@@ -36,7 +37,7 @@ endif
 lib_LTLIBRARIES = liburcu-common.la \
                liburcu.la liburcu-qsbr.la \
                liburcu-mb.la liburcu-signal.la liburcu-bp.la \
-               liburcu-cds.la
+               liburcu-cds.la liburcu-rbtree.la
 
 #
 # liburcu-common contains wait-free queues (needed by call_rcu) as well
@@ -64,6 +65,8 @@ liburcu_bp_la_LIBADD = liburcu-common.la
 liburcu_cds_la_SOURCES = rculfqueue.c rculfstack.c $(COMPAT)
 liburcu_cds_la_LIBADD = liburcu-common.la
 
+liburcu_rbtree_la_SOURCES = urcu-rbtree.c $(COMPAT)
+
 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = liburcu-cds.pc liburcu.pc liburcu-bp.pc liburcu-qsbr.pc \
        liburcu-signal.pc liburcu-mb.pc
index 399fe9cad7ebac30df8de2f106ea45b6d463a741..51e961ce82b8816689eccbf61e03b3692b15ddad 100644 (file)
@@ -15,7 +15,8 @@ noinst_PROGRAMS = test_urcu test_urcu_dynamic_link test_urcu_timing \
         test_urcu_bp test_urcu_bp_dynamic_link test_cycles_per_loop \
        test_urcu_lfq test_urcu_wfq test_urcu_lfs test_urcu_wfs \
        test_urcu_wfq_dynlink test_urcu_wfs_dynlink \
-       test_urcu_lfq_dynlink test_urcu_lfs_dynlink
+       test_urcu_lfq_dynlink test_urcu_lfs_dynlink \
+       test_urcu_rbtree
 noinst_HEADERS = rcutorture.h
 
 if COMPAT_ARCH
@@ -45,6 +46,8 @@ URCU_SIGNAL_LIB=$(top_builddir)/liburcu-signal.la
 URCU_BP_LIB=$(top_builddir)/liburcu-bp.la
 URCU_CDS_LIB=$(top_builddir)/liburcu-cds.la
 
+URCU_RBTREE=$(URCU) $(top_srcdir)/urcu-rbtree.c
+
 EXTRA_DIST = $(top_srcdir)/tests/api.h runall.sh
 
 test_urcu_SOURCES = test_urcu.c $(URCU)
@@ -175,6 +178,8 @@ test_urcu_wfs_dynlink_SOURCES = test_urcu_wfs.c
 test_urcu_wfs_dynlink_CFLAGS = -DDYNAMIC_LINK_TEST $(AM_CFLAGS)
 test_urcu_wfs_dynlink_LDADD = $(URCU_COMMON_LIB)
 
+test_urcu_rbtree_SOURCES = test_urcu_rbtree.c $(URCU_RBTREE)
+
 urcutorture.c: api.h
 
 check-am:
diff --git a/tests/test_urcu_rbtree.c b/tests/test_urcu_rbtree.c
new file mode 100644 (file)
index 0000000..0d80f0c
--- /dev/null
@@ -0,0 +1,611 @@
+/*
+ * test_urcu_rbtree.c
+ *
+ * Userspace RCU library - test program for RB tree
+ *
+ * Copyright February 2010 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE
+#ifndef DYNAMIC_LINK_TEST
+#define _LGPL_SOURCE
+#else
+#define debug_yield_read()
+#endif
+#include "../config.h"
+#include <stdio.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include <sched.h>
+#include <errno.h>
+#include <time.h>
+
+#include <urcu/arch.h>
+
+extern int __thread disable_debug;
+
+/* hardcoded number of CPUs */
+#define NR_CPUS 16384
+
+/* number of insert/delete */
+#define NR_RAND 6
+//#define NR_RAND 7
+
+#if defined(_syscall0)
+_syscall0(pid_t, gettid)
+#elif defined(__NR_gettid)
+static inline pid_t gettid(void)
+{
+       return syscall(__NR_gettid);
+}
+#else
+#warning "use pid as tid"
+static inline pid_t gettid(void)
+{
+       return getpid();
+}
+#endif
+
+#include <urcu.h>
+#include <urcu/rcurbtree.h>
+#include <urcu-defer.h>
+
+int tree_comp(void *a, void *b)
+{
+       if ((unsigned long)a < (unsigned long)b)
+               return -1;
+       else if ((unsigned long)a > (unsigned long)b)
+               return 1;
+       else
+               return 0;
+}
+
+static DEFINE_RCU_RBTREE(rbtree, tree_comp, malloc, free, call_rcu);
+
+static volatile int test_go, test_stop;
+
+static unsigned long wdelay;
+
+static unsigned long duration;
+
+/* read-side C.S. duration, in loops */
+static unsigned long rduration;
+
+/* write-side C.S. duration, in loops */
+static unsigned long wduration;
+
+static inline void loop_sleep(unsigned long l)
+{
+       while(l-- != 0)
+               caa_cpu_relax();
+}
+
+static int verbose_mode;
+
+#define printf_verbose(fmt, args...)           \
+       do {                                    \
+               if (verbose_mode)               \
+                       printf(fmt, args);      \
+       } while (0)
+
+static unsigned int cpu_affinities[NR_CPUS];
+static unsigned int next_aff = 0;
+static int use_affinity = 0;
+
+pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+#ifndef HAVE_CPU_SET_T
+typedef unsigned long cpu_set_t;
+# define CPU_ZERO(cpuset) do { *(cpuset) = 0; } while(0)
+# define CPU_SET(cpu, cpuset) do { *(cpuset) |= (1UL << (cpu)); } while(0)
+#endif
+
+static void set_affinity(void)
+{
+       cpu_set_t mask;
+       int cpu;
+       int ret;
+
+       if (!use_affinity)
+               return;
+
+#if HAVE_SCHED_SETAFFINITY
+       ret = pthread_mutex_lock(&affinity_mutex);
+       if (ret) {
+               perror("Error in pthread mutex lock");
+               exit(-1);
+       }
+       cpu = cpu_affinities[next_aff++];
+       ret = pthread_mutex_unlock(&affinity_mutex);
+       if (ret) {
+               perror("Error in pthread mutex unlock");
+               exit(-1);
+       }
+
+       CPU_ZERO(&mask);
+       CPU_SET(cpu, &mask);
+#if SCHED_SETAFFINITY_ARGS == 2
+       sched_setaffinity(0, &mask);
+#else
+       sched_setaffinity(0, sizeof(mask), &mask);
+#endif
+#endif /* HAVE_SCHED_SETAFFINITY */
+}
+
+/*
+ * returns 0 if test should end.
+ */
+static int test_duration_write(void)
+{
+       return !test_stop;
+}
+
+static int test_duration_read(void)
+{
+       return !test_stop;
+}
+
+static unsigned long long __thread nr_writes;
+static unsigned long long __thread nr_reads;
+
+static unsigned int nr_readers;
+static unsigned int nr_writers;
+
+static unsigned long global_items;
+static void **global_key = NULL;
+
+pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+void rcu_copy_mutex_lock(void)
+{
+       int ret;
+       ret = pthread_mutex_lock(&rcu_copy_mutex);
+       if (ret) {
+               perror("Error in pthread mutex lock");
+               exit(-1);
+       }
+}
+
+void rcu_copy_mutex_unlock(void)
+{
+       int ret;
+
+       ret = pthread_mutex_unlock(&rcu_copy_mutex);
+       if (ret) {
+               perror("Error in pthread mutex unlock");
+               exit(-1);
+       }
+}
+
+static
+void set_lookup_index(struct rcu_rbtree_node *node,
+               char *lookup_hit)
+{
+       int i;
+
+       for (i = 0; i < global_items; i++) {
+               if (node->begin == global_key[i]
+                   && !lookup_hit[i]) {
+                       lookup_hit[i] = 1;
+                       break;
+               }
+       }
+}
+
+void *thr_reader(void *_count)
+{
+       unsigned long long *count = _count;
+       struct rcu_rbtree_node *node;
+       int i, index;
+       char *lookup_hit;
+
+       printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
+                       "reader", pthread_self(), (unsigned long)gettid());
+
+       set_affinity();
+
+       rcu_register_thread();
+
+       lookup_hit = malloc(sizeof(*lookup_hit) * global_items);
+
+       while (!test_go)
+       {
+       }
+       cmm_smp_mb();
+
+       for (;;) {
+               /* search bottom of range */
+               for (i = 0; i < global_items; i++) {
+                       rcu_read_lock();
+                       node = rcu_rbtree_search(&rbtree,
+                                                rcu_dereference(rbtree.root),
+                                                global_key[i]);
+                       assert(!rcu_rbtree_is_nil(&rbtree, node));
+                       rcu_read_unlock();
+               }
+
+               /* search end of range */
+               for (i = 0; i < global_items; i++) {
+                       rcu_read_lock();
+                       node = rcu_rbtree_search(&rbtree,
+                                                rcu_dereference(rbtree.root),
+                                                (void*) ((unsigned long) global_key[i] + 3));
+                       assert(!rcu_rbtree_is_nil(&rbtree, node));
+                       rcu_read_unlock();
+               }
+
+               /* search range (middle) */
+               for (i = 0; i < global_items; i++) {
+                       rcu_read_lock();
+                       node = rcu_rbtree_search_range(&rbtree,
+                                                rcu_dereference(rbtree.root),
+                                                (void*) ((unsigned long) global_key[i] + 1),
+                                                (void*) ((unsigned long) global_key[i] + 2));
+                       assert(!rcu_rbtree_is_nil(&rbtree, node));
+                       rcu_read_unlock();
+               }
+
+               /* search begin key */
+               for (i = 0; i < global_items; i++) {
+                       rcu_read_lock();
+                       node = rcu_rbtree_search_begin_key(&rbtree,
+                                                rcu_dereference(rbtree.root),
+                                                global_key[i]);
+                       assert(!rcu_rbtree_is_nil(&rbtree, node));
+                       rcu_read_unlock();
+               }
+
+               /* min + next */
+               memset(lookup_hit, 0, sizeof(*lookup_hit) * global_items);
+
+               rcu_read_lock();
+               node = rcu_rbtree_min(&rbtree,
+                                     rcu_dereference(rbtree.root));
+               while (!rcu_rbtree_is_nil(&rbtree, node)) {
+                       set_lookup_index(node, lookup_hit);
+                       node = rcu_rbtree_next(&rbtree, node);
+               }
+               rcu_read_unlock();
+
+               for (i = 0; i < global_items; i++)
+                       assert(lookup_hit[i]);
+
+               /* max + prev */
+               memset(lookup_hit, 0, sizeof(*lookup_hit) * global_items);
+
+               rcu_read_lock();
+               node = rcu_rbtree_max(&rbtree,
+                                     rcu_dereference(rbtree.root));
+               while (!rcu_rbtree_is_nil(&rbtree, node)) {
+                       set_lookup_index(node, lookup_hit);
+                       node = rcu_rbtree_prev(&rbtree, node);
+               }
+               rcu_read_unlock();
+
+               for (i = 0; i < global_items; i++)
+                       assert(lookup_hit[i]);
+
+               debug_yield_read();
+               if (unlikely(rduration))
+                       loop_sleep(rduration);
+               nr_reads++;
+               if (unlikely(!test_duration_read()))
+                       break;
+       }
+
+       rcu_unregister_thread();
+
+       /* test extra thread registration */
+       rcu_register_thread();
+       rcu_unregister_thread();
+
+       free(lookup_hit);
+
+       *count = nr_reads;
+       printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
+                       "reader", pthread_self(), (unsigned long)gettid());
+       return ((void*)1);
+
+}
+
+void *thr_writer(void *_count)
+{
+       unsigned long long *count = _count;
+       struct rcu_rbtree_node *node;
+       void *key[NR_RAND];
+       int i;
+
+       printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
+                       "writer", pthread_self(), (unsigned long)gettid());
+
+       set_affinity();
+
+       //disable_debug = 1;
+
+       rcu_register_thread();
+
+       while (!test_go)
+       {
+       }
+       cmm_smp_mb();
+
+       for (;;) {
+               rcu_copy_mutex_lock();
+
+               for (i = 0; i < NR_RAND; i++) {
+                       //key[i] = (void *)(unsigned long)(rand() % 2048);
+                       key[i] = (void *)(unsigned long)(((unsigned long) rand() * 4) % 2048);
+                       //For more collisions
+                       //key[i] = (void *)(unsigned long)(rand() % 6);
+                       //node->begin = key[i];
+                       //node->end = (void *)((unsigned long) key[i] + 1);
+                       //node->end = (void *)((unsigned long) key[i] + 4);
+                       rcu_read_lock();
+                       rcu_rbtree_insert(&rbtree, key[i],
+                                         (void *)((unsigned long) key[i] + 4));
+                       rcu_read_unlock();
+               }
+               rcu_copy_mutex_unlock();
+
+               if (unlikely(wduration))
+                       loop_sleep(wduration);
+
+               rcu_copy_mutex_lock();
+               for (i = 0; i < NR_RAND; i++) {
+#if 0
+                       node = rcu_rbtree_min(rbtree, rbtree->root);
+                       while (!rcu_rbtree_is_nil(&rbtree, node)) {
+                               printf("{ 0x%lX p:%lX r:%lX l:%lX %s %s %s} ",
+                                       (unsigned long)node->key,
+                                       node->p->key,
+                                       node->right->key,
+                                       node->left->key,
+                                       node->color ? "red" : "black",
+                                       node->pos ? "right" : "left",
+                                       node->nil ? "nil" : "");
+                               node = rcu_rbtree_next(rbtree, node);
+                       }
+                       printf("\n");
+#endif
+                       rcu_read_lock();
+                       node = rcu_rbtree_search(&rbtree, rbtree.root, key[i]);
+                       assert(!rcu_rbtree_is_nil(&rbtree, node));
+                       rcu_rbtree_remove(&rbtree, node);
+                       rcu_read_unlock();
+               }
+
+               rcu_copy_mutex_unlock();
+               nr_writes++;
+               if (unlikely(!test_duration_write()))
+                       break;
+               if (unlikely(wdelay))
+                       loop_sleep(wdelay);
+       }
+
+       rcu_unregister_thread();
+
+       printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
+                       "writer", pthread_self(), (unsigned long)gettid());
+       *count = nr_writes;
+       return ((void*)2);
+}
+
+void show_usage(int argc, char **argv)
+{
+       printf("Usage : %s nr_readers nr_writers duration (s)", argv[0]);
+#ifdef DEBUG_YIELD
+       printf(" [-r] [-w] (yield reader and/or writer)");
+#endif
+       printf(" [-d delay] (writer period (us))");
+       printf(" [-c duration] (reader C.S. duration (in loops))");
+       printf(" [-e duration] (writer C.S. duration (in loops))");
+       printf(" [-v] (verbose output)");
+       printf(" [-a cpu#] [-a cpu#]... (affinity)");
+       printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+       int err;
+       pthread_t *tid_reader, *tid_writer;
+       void *tret;
+       unsigned long long *count_reader, *count_writer;
+       unsigned long long tot_reads = 0, tot_writes = 0;
+       int i, a;
+       struct rcu_rbtree_node *node;
+
+       if (argc < 4) {
+               show_usage(argc, argv);
+               return -1;
+       }
+
+       err = sscanf(argv[1], "%u", &nr_readers);
+       if (err != 1) {
+               show_usage(argc, argv);
+               return -1;
+       }
+
+       err = sscanf(argv[2], "%u", &nr_writers);
+       if (err != 1) {
+               show_usage(argc, argv);
+               return -1;
+       }
+       
+       err = sscanf(argv[3], "%lu", &duration);
+       if (err != 1) {
+               show_usage(argc, argv);
+               return -1;
+       }
+
+       for (i = 4; i < argc; i++) {
+               if (argv[i][0] != '-')
+                       continue;
+               switch (argv[i][1]) {
+#ifdef DEBUG_YIELD
+               case 'r':
+                       yield_active |= YIELD_READ;
+                       break;
+               case 'w':
+                       yield_active |= YIELD_WRITE;
+                       break;
+#endif
+               case 'a':
+                       if (argc < i + 2) {
+                               show_usage(argc, argv);
+                               return -1;
+                       }
+                       a = atoi(argv[++i]);
+                       cpu_affinities[next_aff++] = a;
+                       use_affinity = 1;
+                       printf_verbose("Adding CPU %d affinity\n", a);
+                       break;
+               case 'c':
+                       if (argc < i + 2) {
+                               show_usage(argc, argv);
+                               return -1;
+                       }
+                       rduration = atol(argv[++i]);
+                       break;
+               case 'd':
+                       if (argc < i + 2) {
+                               show_usage(argc, argv);
+                               return -1;
+                       }
+                       wdelay = atol(argv[++i]);
+                       break;
+               case 'e':
+                       if (argc < i + 2) {
+                               show_usage(argc, argv);
+                               return -1;
+                       }
+                       wduration = atol(argv[++i]);
+                       break;
+               case 'v':
+                       verbose_mode = 1;
+                       break;
+               case 'g':
+                       if (argc < i + 2) {
+                               show_usage(argc, argv);
+                               return -1;
+                       }
+                       global_items = atol(argv[++i]);
+                       break;
+               }
+       }
+
+       printf_verbose("running test for %lu seconds, %u readers, %u writers.\n",
+               duration, nr_readers, nr_writers);
+       printf_verbose("Writer delay : %lu loops.\n", wdelay);
+       printf_verbose("Reader duration : %lu loops.\n", rduration);
+       printf_verbose("thread %-6s, thread id : %lx, tid %lu\n",
+                       "main", pthread_self(), (unsigned long)gettid());
+
+       tid_reader = malloc(sizeof(*tid_reader) * nr_readers);
+       tid_writer = malloc(sizeof(*tid_writer) * nr_writers);
+       count_reader = malloc(sizeof(*count_reader) * nr_readers);
+       count_writer = malloc(sizeof(*count_writer) * nr_writers);
+       global_key = malloc(sizeof(*global_key) * global_items);
+
+       srand(time(NULL));
+
+       err = create_all_cpu_call_rcu_data(0);
+       assert(!err);
+
+       next_aff = 0;
+
+       for (i = 0; i < nr_readers; i++) {
+               err = pthread_create(&tid_reader[i], NULL, thr_reader,
+                                    &count_reader[i]);
+               if (err != 0)
+                       exit(1);
+       }
+       for (i = 0; i < nr_writers; i++) {
+               err = pthread_create(&tid_writer[i], NULL, thr_writer,
+                                    &count_writer[i]);
+               if (err != 0)
+                       exit(1);
+       }
+
+       rcu_register_thread();
+       rcu_read_lock();
+       /* Insert items looked up by readers */
+       for (i = 0; i < global_items; i++) {
+               global_key[i] = (void *)(unsigned long)(((unsigned long) rand() * 4) % 2048);
+               //global_key[i] = (void *)(unsigned long)(rand() % 2048);
+               //For more collisions
+               //global_key[i] = (void *)(unsigned long)(rand() % 6);
+               //node->begin = global_key[i];
+               //node->end = (void *)((unsigned long) global_key[i] + 1);
+               //node->end = (void *)((unsigned long) global_key[i] + 4);
+               rcu_rbtree_insert(&rbtree, global_key[i],
+                                 (void *)((unsigned long) global_key[i] + 4));
+       }
+       rcu_read_unlock();
+
+       cmm_smp_mb();
+
+       test_go = 1;
+
+       sleep(duration);
+
+       test_stop = 1;
+
+       for (i = 0; i < nr_readers; i++) {
+               err = pthread_join(tid_reader[i], &tret);
+               if (err != 0)
+                       exit(1);
+               tot_reads += count_reader[i];
+       }
+       for (i = 0; i < nr_writers; i++) {
+               err = pthread_join(tid_writer[i], &tret);
+               if (err != 0)
+                       exit(1);
+               tot_writes += count_writer[i];
+       }
+       
+       rcu_read_lock();
+       for (i = 0; i < global_items; i++) {
+               node = rcu_rbtree_search(&rbtree, rbtree.root, global_key[i]);
+               assert(!rcu_rbtree_is_nil(&rbtree, node));
+               rcu_rbtree_remove(&rbtree, node);
+       }
+       rcu_read_unlock();
+       rcu_unregister_thread();
+
+       printf_verbose("total number of reads : %llu, writes %llu\n", tot_reads,
+              tot_writes);
+       printf("SUMMARY %-25s testdur %4lu nr_readers %3u rdur %6lu wdur %6lu "
+               "nr_writers %3u "
+               "wdelay %6lu nr_reads %12llu nr_writes %12llu nr_ops %12llu "
+               "global_items %6lu\n",
+               argv[0], duration, nr_readers, rduration, wduration,
+               nr_writers, wdelay, tot_reads, tot_writes,
+               tot_reads + tot_writes, global_items);
+       free(tid_reader);
+       free(tid_writer);
+       free(count_reader);
+       free(count_writer);
+       free(global_key);
+       return 0;
+}
diff --git a/urcu-rbtree.c b/urcu-rbtree.c
new file mode 100644 (file)
index 0000000..01733f0
--- /dev/null
@@ -0,0 +1,1288 @@
+/*
+ * urcu-rbtree.c
+ *
+ * Userspace RCU library - Red-Black Tree
+ *
+ * Copyright (c) 2010 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Implementation of RCU-adapted data structures and operations based on the RB
+ * tree algorithms found in chapter 12 of:
+ *
+ * Thomas H. Cormen, Charles E. Leiserson, Ronald L. Rivest, and
+ * Clifford Stein. Introduction to Algorithms, Third Edition. The MIT
+ * Press, September 2009.
+ */
+
+#define _BSD_SOURCE
+#define _LGPL_SOURCE
+
+#include <stdio.h>
+#include <pthread.h>
+#include <assert.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <urcu/rcurbtree.h>
+#include <urcu-pointer.h>
+#include <urcu-call-rcu.h>
+#include <urcu/compiler.h>
+
+/*
+ * Explanation of next/prev walk coherency and search coherency when
+ * performed concurrently with updates.
+ *
+ * next/prev walk coherency with respect to concurrent updates:
+ *
+ * There are 3 scenarios for which we need to model and validate this:
+ * rotation, transplant and "teleportation" (the latter being a remote
+ * transplant in a remove non-nil case).
+ *
+ * - rotation left (right is symmetric)
+ *   xl and yr point to the same parent nodes before/after left
+ *   rotation. yll and ylr also point to the same parent node
+ *   before/after left rotation.
+ *   As we are copying x, y and yl (the 3 nodes which parent/child
+ *   relationship are changed) to "new" version of this node cluster,
+ *   all external references to the cluster either point to the old
+ *   cluster or the new one. If we take this cluster as a "black box"
+ *   from the point of view of next/prev traversal, all we have to
+ *   ensure is that the old and the new cluster behave in the exact same
+ *   way with respect to traversal order.
+ *
+ * - transplant
+ *   In this operation, we transplant a copy of "v" into its parent
+ *   location (u), thus replacing it. The children of "v", vl and vr,
+ *   still point to v (new version) after the transplant, so it does not
+ *   change the behavior when considering the next/prev traversal. "v"
+ *   being copied to a new version ensures that the parent pointers of v
+ *   are pointing to its new parent (parent of u) before it is published
+ *   to readers (by setting the child pointer of u's parent to the new
+ *   copy of v).
+ *
+ * - teleportation
+ *   This one is probably the most tricky and will require some ascii
+ *   art to explain.
+ *
+ *   We want to remove z from this tree:
+ *
+ *                zp
+ *                 \
+ *                  z
+ *                 /  \
+ *                zl   zr
+ *                    /
+ *                   a
+ *                  / \
+ *                 b   ar
+ *                / \
+ *               y   br
+ *                \
+ *                 yr
+ *                /  \
+ *               yrl yrr
+ *
+ *   What we are going to do is to "teleport" y into z's location,
+ *   reparenting yr to b. We are taking care to create a new cluster
+ *   copy that is isolated from any reader. We will represent the new
+ *   members of the cluster with capital letters.
+ *
+ *                zp
+ *                 \
+ *                  Y
+ *                 /  \
+ *                zl   ZR
+ *                    /
+ *                   A
+ *                  / \
+ *                 B   ar
+ *                / \
+ *               YR  br
+ *              /  \
+ *             yrl yrr
+ *
+ *   In this transient state, we notice that the pointers within the
+ *   cluster all point to the new cluster nodes, and they point to the
+ *   correct external nodes. However, no external pointer point to the
+ *   cluster (yet). The first pointer to point to this cluster will be
+ *   "zp->right". It will therefore make the cluster visible for search.
+ *
+ *   In this intermediate state, we can walk through the new cluster
+ *   when coming from the top (in a next/prev traversal), but can come
+ *   back to the old cluster when going back up from the children nodes.
+ *   All we have to ensure is that the two clusters, taken as a black
+ *   box from a next/prev traversal perspective, yield to the exact same
+ *   result.
+ *
+ *   Search coherency with concurrent updates:
+ *
+ *   Simple "search" (only going down the tree) is also handled by this
+ *   cluster scheme. The explanation is a subset of the prev/next
+ *   explanation, where we don't have to care about the intermediate
+ *   stages where the children point to the old cluster, because we only
+ *   ever use the top level pointers to go down into the children nodes,
+ *   we never go back up. So by simply making sure that all the cluster
+ *   internal nodes pointers are setup correctly before making the
+ *   cluster visible to the readers (by setting the parent pointer to
+ *   the topmost new node in the cluster), we are sure that readers will
+ *   see a coherent view of the cluster at all times.
+ */
+
+#ifdef DEBUG
+#define dbg_printf(args...)    printf(args)
+#define dbg_usleep(usecs)      usleep(usecs)
+#else
+#define dbg_printf(args...)
+#define dbg_usleep(usecs)
+#endif
+
+/*
+ * Undefine this to enable the non-RCU rotate and transplant functions
+ * (for debugging). Note that these versions don't support the tree
+ * max_end updates, so lookups must be performed with
+ * rcu_rbtree_search_begin_key when using this debug facility.
+ */
+#define RBTREE_RCU_SUPPORT_ROTATE_LEFT
+#define RBTREE_RCU_SUPPORT_ROTATE_RIGHT
+#define RBTREE_RCU_SUPPORT_TRANSPLANT
+
+#ifdef EXTRA_DEBUG
+static pthread_mutex_t test_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_mutex_t outer_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static
+void lock_outer_mutex(void)
+{
+       pthread_mutex_lock(&outer_mutex);
+}
+
+static
+void unlock_outer_mutex(void)
+{
+       pthread_mutex_unlock(&outer_mutex);
+}
+
+static
+void lock_test_mutex(void)
+{
+       pthread_mutex_lock(&test_mutex);
+}
+
+static
+void unlock_test_mutex(void)
+{
+       pthread_mutex_unlock(&test_mutex);
+}
+#endif
+
+static
+void set_parent(struct rcu_rbtree_node *node,
+               struct rcu_rbtree_node *parent,
+               unsigned int pos)
+{
+       _CMM_STORE_SHARED(node->parent, ((unsigned long) parent) | pos);
+}
+
+static
+struct rcu_rbtree_node *get_parent(struct rcu_rbtree_node *node)
+{
+       return (struct rcu_rbtree_node *) (node->parent & ~1UL);
+}
+
+static
+unsigned int get_pos(struct rcu_rbtree_node *node)
+{
+       return (unsigned int) (node->parent & 1UL);
+}
+
+static
+struct rcu_rbtree_node *get_parent_and_pos(struct rcu_rbtree_node *node,
+                               unsigned int *pos)
+{
+       unsigned long parent_pos = rcu_dereference(node->parent);
+
+       *pos = (unsigned int) (parent_pos & 1UL);
+       return (struct rcu_rbtree_node *) (parent_pos & ~1UL);
+}
+
+static
+void set_decay(struct rcu_rbtree_node *x, struct rcu_rbtree_node *xc)
+{
+       x->decay_next = xc;
+}
+
+static
+struct rcu_rbtree_node *get_decay(struct rcu_rbtree_node *x)
+{
+       if (!x)
+               return NULL;
+       while (x->decay_next)
+               x = x->decay_next;
+       return x;
+}
+
+static
+struct rcu_rbtree_node *is_decay(struct rcu_rbtree_node *x)
+{
+       return x->decay_next;
+}
+
+static
+struct rcu_rbtree_node *_rcu_rbtree_alloc_node(struct rcu_rbtree *rbtree)
+{
+       return rbtree->rballoc(sizeof(struct rcu_rbtree_node));
+}
+
+static
+void _rcu_rbtree_free_node(struct rcu_head *head)
+{
+       struct rcu_rbtree_node *node =
+               caa_container_of(head, struct rcu_rbtree_node, head);
+       node->rbtree->rbfree(node);
+}
+
+static
+struct rcu_rbtree_node *dup_decay_node(struct rcu_rbtree *rbtree,
+                               struct rcu_rbtree_node *x)
+{
+       struct rcu_rbtree_node *xc;
+
+       if (rcu_rbtree_is_nil(rbtree, x))
+               return x;
+
+       xc = _rcu_rbtree_alloc_node(rbtree);
+       memcpy(xc, x, sizeof(*xc));
+       xc->decay_next = NULL;
+       set_decay(x, xc);
+       rbtree->call_rcu(&x->head, _rcu_rbtree_free_node);
+       return xc;
+}
+
+/*
+ * Info for range lookups:
+ * Range lookup information is only valid when used when searching for
+ * ranges. It should never be used in next/prev traversal because the
+ * pointers to parents are not in sync with the parent vision of the
+ * children range.
+ */
+static
+void set_left(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node,
+                       struct rcu_rbtree_node *left)
+{
+       node->_left = left;
+}
+
+static
+void set_right(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node,
+                       struct rcu_rbtree_node *right)
+{
+       node->_right = right;
+}
+
+static
+void *calculate_node_max_end(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node)
+{
+       void *max_end;
+
+       max_end = node->end;
+       if (!rcu_rbtree_is_nil(rbtree, node->_right)) {
+               if (rbtree->comp(max_end, node->_right->max_end) < 0)
+                       max_end = node->_right->max_end;
+       }
+       if (!rcu_rbtree_is_nil(rbtree, node->_left)) {
+               if (rbtree->comp(max_end, node->_left->max_end) < 0)
+                       max_end = node->_left->max_end;
+       }
+       return max_end;
+}
+
+/*
+ * TODO
+ * Deal with memory allocation errors.
+ * Can be ensured by reserving a pool of memory entries before doing the
+ * insertion, which will have to be function of number of
+ * transplantations/rotations required for the operation (which is a
+ * multiple of the tree height).
+ */
+
+#ifdef DEBUG
+static
+void show_tree(struct rcu_rbtree *rbtree)
+{
+       struct rcu_rbtree_node *node;
+
+       node = rcu_rbtree_min(rbtree, rbtree->root);
+       while (!rcu_rbtree_is_nil(rbtree, node)) {
+               assert(!is_decay(node));
+               printf("{ b:%lX e:%lX pb: %lX r:%lX l:%lX %s %s %s} ",
+                       (unsigned long) node->begin,
+                       (unsigned long) node->end,
+                       (unsigned long) get_parent(node)->begin,
+                       (unsigned long) node->_right->begin,
+                       (unsigned long) node->_left->begin,
+                       node->color ? "red" : "black",
+                       get_pos(node) ? "right" : "left",
+                       rcu_rbtree_is_nil(rbtree, node) ? "nil" : "");
+               node = rcu_rbtree_next(rbtree, node);
+       }
+       printf("\n");
+}
+
+#define check_max_end(rbtree, x)                               \
+       do {                                                    \
+               if (rcu_rbtree_is_nil(rbtree, x))               \
+                       break;                                  \
+               assert(rbtree->comp(x->max_end,                 \
+                       calculate_node_max_end(rbtree, x)) == 0); \
+       } while (0)
+
+#else /* DEBUG */
+static
+void show_tree(struct rcu_rbtree *rbtree)
+{
+}
+
+static
+void check_max_end(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *x)
+{
+}
+#endif /* DEBUG */
+
+static
+struct rcu_rbtree_node *make_nil(struct rcu_rbtree *rbtree)
+{
+       return &rbtree->nil_node;
+}
+
+/*
+ * Iterative rbtree search.
+ */
+struct rcu_rbtree_node *rcu_rbtree_search(struct rcu_rbtree *rbtree,
+                                         struct rcu_rbtree_node *x,
+                                         void *point)
+{
+       struct rcu_rbtree_node *xl;
+
+       dbg_printf("searching point 0x%lx\n", (unsigned long) point);
+       x = rcu_dereference(x);
+
+       while (!rcu_rbtree_is_nil(rbtree, x)) {
+               dbg_usleep(10);
+               xl = rcu_dereference(x->_left);
+               dbg_printf("search x %lx x_end %lx x_max_end %lx\n", (unsigned long) x->begin,
+                                               (unsigned long) x->end, (unsigned long) x->max_end);
+               dbg_printf("search xl %lx xl_end %lx xl_max_end %lx\n", (unsigned long) xl->begin,
+                       (unsigned long) xl->end, (unsigned long) xl->max_end);
+               if (!rcu_rbtree_is_nil(rbtree, xl)
+                   && (rbtree->comp(xl->max_end, point) > 0)) {
+                       dbg_printf("go left\n");
+                       x = xl;
+               } else if (rbtree->comp(x->begin, point) <= 0
+                          && rbtree->comp(point, x->end) < 0) {
+                       dbg_printf("got it!\n");
+                       break;
+               } else if (rbtree->comp(point, x->begin) > 0) {
+                       dbg_printf("go right\n");
+                       x = rcu_dereference(x->_right);
+               } else {
+                       dbg_printf("not found!\n");
+                       x = make_nil(rbtree);
+               }
+       }
+       if (rcu_rbtree_is_nil(rbtree, x))
+               dbg_printf("Reached bottom of tree.\n");
+
+       return x;
+}
+
+struct rcu_rbtree_node *rcu_rbtree_search_range(struct rcu_rbtree *rbtree,
+                                         struct rcu_rbtree_node *x,
+                                         void *begin, void *end)
+{
+       struct rcu_rbtree_node *node;
+
+       node = rcu_rbtree_search(rbtree, x, begin);
+       if (rcu_rbtree_is_nil(rbtree, node))
+               return node;
+       if (rbtree->comp(node->end, end) < 0)
+               return NULL;    /* High is outside lookup range */
+       return node;
+}
+
+/*
+ * Search by exact range start value.
+ */
+struct rcu_rbtree_node *rcu_rbtree_search_begin_key(struct rcu_rbtree *rbtree,
+                                         struct rcu_rbtree_node *x,
+                                         void *k)
+{
+       x = rcu_dereference(x);
+       int comp;
+
+       while (!rcu_rbtree_is_nil(rbtree, x) && (comp = rbtree->comp(k, x->begin)) != 0) {
+               dbg_usleep(10);
+               if (comp < 0)
+                       x = rcu_dereference(x->_left);
+               else
+                       x = rcu_dereference(x->_right);
+       }
+       return x;
+}
+
+static
+struct rcu_rbtree_node *rcu_rbtree_min_dup_decay(struct rcu_rbtree *rbtree,
+                                                struct rcu_rbtree_node *x,
+                                                struct rcu_rbtree_node **zr)
+{
+       struct rcu_rbtree_node *xl;
+
+       x = rcu_dereference(x);
+
+       if (rcu_rbtree_is_nil(rbtree, x)) {
+               *zr = x;
+               return x;
+       } else
+               *zr = x = dup_decay_node(rbtree, x);
+
+       while (!rcu_rbtree_is_nil(rbtree, xl = rcu_dereference(x->_left))) {
+               x = dup_decay_node(rbtree, xl);
+               set_parent(x, get_decay(get_parent(x)), get_pos(x));
+               get_parent(x)->_left = get_decay(get_parent(x)->_left);
+       }
+       return x;
+}
+
+static
+struct rcu_rbtree_node *rcu_rbtree_min_update_decay(struct rcu_rbtree *rbtree,
+                                                   struct rcu_rbtree_node *x)
+{
+       struct rcu_rbtree_node *xl;
+
+       x = rcu_dereference(x);
+
+       if (rcu_rbtree_is_nil(rbtree, x))
+               return x;
+       else {
+               set_parent(x->_right, get_decay(get_parent(x->_right)),
+                          get_pos(x->_right));
+               set_parent(x->_left, get_decay(get_parent(x->_left)),
+                          get_pos(x->_left));
+       }
+
+       while (!rcu_rbtree_is_nil(rbtree, xl = rcu_dereference(x->_left))) {
+               x = xl;
+               set_parent(x->_right, get_decay(get_parent(x->_right)),
+                          get_pos(x->_right));
+               set_parent(x->_left, get_decay(get_parent(x->_left)),
+                          get_pos(x->_left));
+       }
+       return x;
+}
+
+struct rcu_rbtree_node *rcu_rbtree_min(struct rcu_rbtree *rbtree,
+                                      struct rcu_rbtree_node *x)
+{
+       struct rcu_rbtree_node *xl;
+
+       x = rcu_dereference(x);
+
+       if (rcu_rbtree_is_nil(rbtree, x))
+               return x;
+
+       while (!rcu_rbtree_is_nil(rbtree, xl = rcu_dereference(x->_left)))
+               x = xl;
+       return x;
+}
+
+struct rcu_rbtree_node *rcu_rbtree_max(struct rcu_rbtree *rbtree,
+                                      struct rcu_rbtree_node *x)
+{
+       struct rcu_rbtree_node *xr;
+
+       x = rcu_dereference(x);
+
+       if (rcu_rbtree_is_nil(rbtree, x))
+               return x;
+
+       while (!rcu_rbtree_is_nil(rbtree, xr = rcu_dereference(x->_right)))
+               x = xr;
+       return x;
+}
+
+/*
+ * RCU read lock must be held across the next/prev calls to ensure validity of
+ * the returned node.
+ */
+struct rcu_rbtree_node *rcu_rbtree_next(struct rcu_rbtree *rbtree,
+                                       struct rcu_rbtree_node *x)
+{
+       struct rcu_rbtree_node *xr, *y;
+       unsigned int x_pos;
+
+       x = rcu_dereference(x);
+
+       if (!rcu_rbtree_is_nil(rbtree, xr = rcu_dereference(x->_right)))
+               return rcu_rbtree_min(rbtree, xr);
+       y = get_parent_and_pos(x, &x_pos);
+       while (!rcu_rbtree_is_nil(rbtree, y) && x_pos == IS_RIGHT) {
+               x = y;
+               y = get_parent_and_pos(y, &x_pos);
+       }
+       return y;
+}
+
+struct rcu_rbtree_node *rcu_rbtree_prev(struct rcu_rbtree *rbtree,
+                                       struct rcu_rbtree_node *x)
+{
+       struct rcu_rbtree_node *xl, *y;
+       unsigned int x_pos;
+
+       x = rcu_dereference(x);
+
+       if (!rcu_rbtree_is_nil(rbtree, xl = rcu_dereference(x->_left)))
+               return rcu_rbtree_max(rbtree, xl);
+       y = get_parent_and_pos(x, &x_pos);
+       while (!rcu_rbtree_is_nil(rbtree, y) && x_pos == IS_LEFT) {
+               x = y;
+               y = get_parent_and_pos(y, &x_pos);
+       }
+       return y;
+}
+
+/*
+ * "node" needs to be non-visible by readers.
+ */
+static
+void populate_node_end(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node,
+               unsigned int copy_parents, struct rcu_rbtree_node *stop)
+{
+       struct rcu_rbtree_node *prev = NULL, *orig_node = node, *top;
+
+       do {
+               void *max_end;
+
+               assert(node);
+               assert(!rcu_rbtree_is_nil(rbtree, node));
+
+               if (prev && copy_parents) {
+                       node = dup_decay_node(rbtree, node);
+                       if (get_pos(prev) == IS_RIGHT)
+                               node->_right = prev;
+                       else
+                               node->_left = prev;
+                       set_parent(prev, node, get_pos(prev));
+               }
+
+               max_end = calculate_node_max_end(rbtree, node);
+               /*
+                * Compare the node max_end keys to make sure we replace
+                * references to a key belonging to a node we remove
+                * from the tree. Otherwise we would still be using this
+                * pointer as an invalid reference after garbage
+                * collection of the node and of its associated
+                * begin/end pointers.
+                */
+               if (max_end != node->max_end) {
+                       node->max_end = max_end;
+               } else {
+                       top = get_parent(node);
+                       cmm_smp_wmb();  /* write into node before publish */
+                       /* make new branch visible to readers */
+                       if (rcu_rbtree_is_nil(rbtree, top))
+                               _CMM_STORE_SHARED(rbtree->root, node);
+                       if (get_pos(node) == IS_LEFT)
+                               _CMM_STORE_SHARED(top->_left, node);
+                       else
+                               _CMM_STORE_SHARED(top->_right, node);
+                       goto end;
+               }
+
+               /* Check for propagation stop */
+               if (node == stop)
+                       return;
+
+               prev = node;
+               node = get_parent(node);
+       } while (!rcu_rbtree_is_nil(rbtree, node));
+
+       top = node;     /* nil */
+       cmm_smp_wmb();  /* write into node before publish */
+       /* make new branch visible to readers */
+       _CMM_STORE_SHARED(rbtree->root, prev);
+
+end:
+       if (!copy_parents)
+               return;
+       /* update children */
+       node = orig_node;
+       do {
+               assert(!rcu_rbtree_is_nil(rbtree, node));
+               set_parent(node->_left, get_decay(get_parent(node->_left)), IS_LEFT);
+               set_parent(node->_right, get_decay(get_parent(node->_right)), IS_RIGHT);
+       } while ((node = get_parent(node)) != top);
+}
+
+/*
+ * We have to ensure these assumptions are correct for prev/next
+ * traversal:
+ *
+ * with x being a right child, the assumption that:
+ *   get_parent(x)->_right == x
+ * or if x is a left child, the assumption that:
+ *   get_parent(x)->_left == x
+ *
+ * This explains why we have to allocate a vc copy of the node for left_rotate,
+ * right_rotate and transplant operations.
+ *
+ * We always ensure that the right/left child and correct parent is set in the
+ * node copies *before* we reparent the children and make the upper-level point
+ * to the copy.
+ */
+
+/* RCU: copy x and y, atomically point to new versions. GC old. */
+/* Should be eventually followed by a cmm_smp_wmc() */
+
+#ifdef RBTREE_RCU_SUPPORT_ROTATE_LEFT
+
+static
+void left_rotate(struct rcu_rbtree *rbtree,
+                struct rcu_rbtree_node *x)
+{
+       struct rcu_rbtree_node *y, *y_left;
+
+       dbg_printf("left rotate %lx\n", (unsigned long) x->begin);
+
+       y = x->_right;
+       y_left = y->_left;
+
+       /* Now operate on new copy, decay old versions */
+       x = dup_decay_node(rbtree, x);
+       y = dup_decay_node(rbtree, y);
+       y_left = dup_decay_node(rbtree, y_left);
+
+       check_max_end(rbtree, get_parent(x));
+       check_max_end(rbtree, x);
+       check_max_end(rbtree, y);
+
+       /* Internal node modifications */
+       set_parent(y, get_parent(x), get_pos(x));
+       set_parent(x, y, IS_LEFT);
+       set_left(rbtree, y, x);
+       set_right(rbtree, x, y_left);
+
+       if (!rcu_rbtree_is_nil(rbtree, y_left))
+               set_parent(y_left, x, IS_RIGHT);
+
+       /*
+        * We only changed the relative position of x and y wrt their
+        * children, and reparented y (but are keeping the same nodes in
+        * place, so its parent does not need to have end value
+        * recalculated).
+        */
+       x->max_end = calculate_node_max_end(rbtree, x);
+       y->max_end = calculate_node_max_end(rbtree, y);
+
+       cmm_smp_wmb();  /* write into node before publish */
+
+       /* External references update (visible by readers) */
+       if (rcu_rbtree_is_nil(rbtree, get_parent(y)))
+               _CMM_STORE_SHARED(rbtree->root, y);
+       else if (get_pos(y) == IS_LEFT)
+               _CMM_STORE_SHARED(get_parent(y)->_left, y);
+       else
+               _CMM_STORE_SHARED(get_parent(y)->_right, y);
+
+       /* Point children to new copy (parent only used by updates/next/prev) */
+       set_parent(x->_left, get_decay(get_parent(x->_left)),
+               get_pos(x->_left));
+       set_parent(y->_right, get_decay(get_parent(y->_right)),
+               get_pos(y->_right));
+       if (!rcu_rbtree_is_nil(rbtree, y_left)) {
+               set_parent(y_left->_right,
+                       get_decay(get_parent(y_left->_right)),
+                       get_pos(y_left->_right));
+               set_parent(y_left->_left,
+                       get_decay(get_parent(y_left->_left)),
+                       get_pos(y_left->_left));
+       }
+
+       /* Sanity checks */
+       assert(y == rbtree->root || get_parent(y)->_left == y
+               || get_parent(y)->_right == y);
+       assert(x == rbtree->root || get_parent(x)->_left == x
+               || get_parent(x)->_right == x);
+       assert(rcu_rbtree_is_nil(rbtree, x->_right) || get_parent(x->_right) == x);
+       assert(rcu_rbtree_is_nil(rbtree, x->_left) || get_parent(x->_left) == x);
+       assert(rcu_rbtree_is_nil(rbtree, y->_right) || get_parent(y->_right) == y);
+       assert(rcu_rbtree_is_nil(rbtree, y->_left) || get_parent(y->_left) == y);
+       assert(!is_decay(rbtree->root));
+       assert(!is_decay(x));
+       assert(!is_decay(y));
+       assert(!is_decay(x->_right));
+       assert(!is_decay(x->_left));
+       assert(!is_decay(y->_right));
+       assert(!is_decay(y->_left));
+       check_max_end(rbtree, get_parent(y));
+       check_max_end(rbtree, x);
+       check_max_end(rbtree, y);
+}
+
+#else
+
+/* non-rcu version */
+static
+void left_rotate(struct rcu_rbtree *rbtree,
+                struct rcu_rbtree_node *x)
+{
+       struct rcu_rbtree_node *y;
+
+       lock_test_mutex();
+       y = x->_right;
+       x->_right = y->_left;
+       if (!rcu_rbtree_is_nil(rbtree, y->_left))
+               set_parent(y->_left, x, IS_RIGHT);
+       set_parent(y, get_parent(x), get_pos(x));
+       if (rcu_rbtree_is_nil(rbtree, get_parent(x)))
+               rbtree->root = y;
+       else if (x == get_parent(x)->_left) {
+               get_parent(x)->_left = y;
+       } else {
+               get_parent(x)->_right = y;
+       }
+       y->_left = x;
+       set_parent(x, y, IS_LEFT);
+       unlock_test_mutex();
+}
+
+#endif
+
+#ifdef RBTREE_RCU_SUPPORT_ROTATE_RIGHT
+static
+void right_rotate(struct rcu_rbtree *rbtree,
+                 struct rcu_rbtree_node *x)
+{
+       struct rcu_rbtree_node *y, *y_right;
+
+       dbg_printf("right rotate %lx\n", (unsigned long) x->begin);
+
+       y = x->_left;
+       y_right = y->_right;
+
+       /* Now operate on new copy, decay old versions */
+       x = dup_decay_node(rbtree, x);
+       y = dup_decay_node(rbtree, y);
+       y_right = dup_decay_node(rbtree, y_right);
+
+       check_max_end(rbtree, get_parent(x));
+       check_max_end(rbtree, x);
+       check_max_end(rbtree, y);
+
+       /* Internal node modifications */
+       set_parent(y, get_parent(x), get_pos(x));
+       set_parent(x, y, IS_RIGHT);
+       set_right(rbtree, y, x);
+       set_left(rbtree, x, y_right);
+
+       if (!rcu_rbtree_is_nil(rbtree, y_right))
+               set_parent(y_right, x, IS_LEFT);
+
+       /*
+        * We only changed the relative position of x and y wrt their
+        * children, and reparented y (but are keeping the same nodes in
+        * place, so its parent does not need to have end value
+        * recalculated).
+        */
+       x->max_end = calculate_node_max_end(rbtree, x);
+       y->max_end = calculate_node_max_end(rbtree, y);
+
+       cmm_smp_wmb();  /* write into node before publish */
+
+       /* External references update (visible by readers) */
+       if (rcu_rbtree_is_nil(rbtree, get_parent(y)))
+               _CMM_STORE_SHARED(rbtree->root, y);
+       else if (get_pos(y) == IS_RIGHT)
+               _CMM_STORE_SHARED(get_parent(y)->_right, y);
+       else
+               _CMM_STORE_SHARED(get_parent(y)->_left, y);
+
+       /* Point children to new copy (parent only used by updates/next/prev) */
+       set_parent(x->_right, get_decay(get_parent(x->_right)),
+               get_pos(x->_right));
+       set_parent(y->_left, get_decay(get_parent(y->_left)),
+               get_pos(y->_left));
+       if (!rcu_rbtree_is_nil(rbtree, y_right)) {
+               set_parent(y_right->_left,
+                       get_decay(get_parent(y_right->_left)),
+                       get_pos(y_right->_left));
+               set_parent(y_right->_right,
+                       get_decay(get_parent(y_right->_right)),
+                       get_pos(y_right->_right));
+       }
+
+       /* Sanity checks */
+       assert(y == rbtree->root || get_parent(y)->_right == y
+               || get_parent(y)->_left == y);
+       assert(x == rbtree->root || get_parent(x)->_right == x
+               || get_parent(x)->_left == x);
+       assert(rcu_rbtree_is_nil(rbtree, x->_left) || get_parent(x->_left) == x);
+       assert(rcu_rbtree_is_nil(rbtree, x->_right) || get_parent(x->_right) == x);
+       assert(rcu_rbtree_is_nil(rbtree, y->_left) || get_parent(y->_left) == y);
+       assert(rcu_rbtree_is_nil(rbtree, y->_right) || get_parent(y->_right) == y);
+       assert(!is_decay(rbtree->root));
+       assert(!is_decay(x));
+       assert(!is_decay(y));
+       assert(!is_decay(x->_left));
+       assert(!is_decay(x->_right));
+       assert(!is_decay(y->_left));
+       assert(!is_decay(y->_right));
+       check_max_end(rbtree, x);
+       check_max_end(rbtree, y);
+       check_max_end(rbtree, get_parent(y));
+}
+
+#else
+
+/* non-rcu version */
+static
+void right_rotate(struct rcu_rbtree *rbtree,
+                 struct rcu_rbtree_node *x)
+{
+       struct rcu_rbtree_node *y;
+
+       lock_test_mutex();
+       y = x->_left;
+       x->_left = y->_right;
+       if (!rcu_rbtree_is_nil(rbtree, y->_right))
+               set_parent(y->_right, x, IS_LEFT);
+       set_parent(y, get_parent(x), get_pos(x));
+       if (rcu_rbtree_is_nil(rbtree, get_parent(x)))
+               rbtree->root = y;
+       else if (x == get_parent(x)->_right) {
+               get_parent(x)->_right = y;
+       } else {
+               get_parent(x)->_left = y;
+       }
+       y->_right = x;
+       set_parent(x, y, IS_RIGHT);
+       unlock_test_mutex();
+}
+
+#endif
+
+static void rcu_rbtree_insert_fixup(struct rcu_rbtree *rbtree,
+                                   struct rcu_rbtree_node *z)
+{
+       struct rcu_rbtree_node *y;
+
+       dbg_printf("insert fixup %p\n", z->begin);
+       assert(!is_decay(rbtree->root));
+
+       while (get_parent(z)->color == COLOR_RED) {
+               if (get_parent(z) == get_parent(get_parent(z))->_left) {
+                       y = get_parent(get_parent(z))->_right;
+                       if (y->color == COLOR_RED) {
+                               get_parent(z)->color = COLOR_BLACK;
+                               y->color = COLOR_BLACK;
+                               get_parent(get_parent(z))->color = COLOR_RED;
+                               z = get_parent(get_parent(z));
+                       } else {
+                               if (z == get_parent(z)->_right) {
+                                       z = get_parent(z);
+                                       left_rotate(rbtree, z);
+                                       z = get_decay(z);
+                                       assert(!is_decay(rbtree->root));
+                               }
+                               get_parent(z)->color = COLOR_BLACK;
+                               get_parent(get_parent(z))->color = COLOR_RED;
+                               assert(!is_decay(z));
+                               assert(!is_decay(get_parent(z)));
+                               assert(!is_decay(get_parent(get_parent(z))));
+                               right_rotate(rbtree, get_parent(get_parent(z)));
+                               assert(!is_decay(z));
+                               assert(!is_decay(rbtree->root));
+                       }
+               } else {
+                       y = get_parent(get_parent(z))->_left;
+                       if (y->color == COLOR_RED) {
+                               get_parent(z)->color = COLOR_BLACK;
+                               y->color = COLOR_BLACK;
+                               get_parent(get_parent(z))->color = COLOR_RED;
+                               z = get_parent(get_parent(z));
+                       } else {
+                               if (z == get_parent(z)->_left) {
+                                       z = get_parent(z);
+                                       right_rotate(rbtree, z);
+                                       z = get_decay(z);
+                                       assert(!is_decay(rbtree->root));
+                               }
+                               get_parent(z)->color = COLOR_BLACK;
+                               get_parent(get_parent(z))->color = COLOR_RED;
+                               left_rotate(rbtree, get_parent(get_parent(z)));
+                               assert(!is_decay(z));
+                               assert(!is_decay(rbtree->root));
+                       }
+               }
+       }
+       rbtree->root->color = COLOR_BLACK;
+}
+
+/*
+ * rcu_rbtree_insert - Insert a node in the RCU rbtree
+ *
+ * Returns 0 on success, or < 0 on error.
+ */
+int rcu_rbtree_insert(struct rcu_rbtree *rbtree,
+                     void *begin, void *end)
+{
+       struct rcu_rbtree_node *x, *y, *z;
+
+       z = _rcu_rbtree_alloc_node(rbtree);
+       if (!z)
+               return -ENOMEM;
+       z->begin = begin;
+       z->end = end;
+
+       dbg_printf("insert %p\n", z->begin);
+       assert(!is_decay(rbtree->root));
+
+       y = make_nil(rbtree);
+       x = rbtree->root;
+       while (!rcu_rbtree_is_nil(rbtree, x)) {
+               y = x;
+               if (rbtree->comp(z->begin, x->begin) < 0)
+                       x = x->_left;
+               else
+                       x = x->_right;
+       }
+
+       z->_left = make_nil(rbtree);
+       z->_right = make_nil(rbtree);
+       z->color = COLOR_RED;
+       z->decay_next = NULL;
+       z->max_end = z->end;
+       z->rbtree = rbtree;
+
+       if (rcu_rbtree_is_nil(rbtree, y)) {
+               set_parent(z, y, IS_RIGHT); /* pos arbitrary for root node */
+               /*
+                * Order stores to z (children/parents) before stores
+                * that will make it visible to the rest of the tree.
+                */
+               cmm_smp_wmb();
+               _CMM_STORE_SHARED(rbtree->root, z);
+       } else if (rbtree->comp(z->begin, y->begin) < 0) {
+               y = dup_decay_node(rbtree, y);
+               set_parent(z, y, IS_LEFT);
+               if (get_pos(z) == IS_LEFT)
+                       _CMM_STORE_SHARED(y->_left, z);
+               else
+                       _CMM_STORE_SHARED(y->_right, z);
+               populate_node_end(rbtree, y, 1, NULL);
+       } else {
+               y = dup_decay_node(rbtree, y);
+               set_parent(z, y, IS_RIGHT);
+               if (get_pos(z) == IS_LEFT)
+                       _CMM_STORE_SHARED(y->_left, z);
+               else
+                       _CMM_STORE_SHARED(y->_right, z);
+               populate_node_end(rbtree, y, 1, NULL);
+       }
+       rcu_rbtree_insert_fixup(rbtree, z);
+       /*
+        * Make sure to commit all _CMM_STORE_SHARED() for non-coherent caches.
+        */
+       cmm_smp_wmc();
+       show_tree(rbtree);
+       check_max_end(rbtree, z);
+       check_max_end(rbtree, y);
+
+       return 0;
+}
+
+/*
+ * Transplant v into u position.
+ */
+
+#ifdef RBTREE_RCU_SUPPORT_TRANSPLANT
+
+static
+void rcu_rbtree_transplant(struct rcu_rbtree *rbtree,
+                       struct rcu_rbtree_node *u,
+                       struct rcu_rbtree_node *v,
+                       unsigned int copy_parents,
+                       struct rcu_rbtree_node *stop)
+{
+       dbg_printf("transplant %p\n", v->begin);
+
+       if (!rcu_rbtree_is_nil(rbtree, v))
+               v = dup_decay_node(rbtree, v);
+
+       if (rcu_rbtree_is_nil(rbtree, get_parent(u))) {
+               /* pos is arbitrary for root node */
+               set_parent(v, get_parent(u), IS_RIGHT);
+               cmm_smp_wmb();  /* write into node before publish */
+               _CMM_STORE_SHARED(rbtree->root, v);
+       } else {
+               struct rcu_rbtree_node *vp;
+
+               vp = get_parent(u);
+               if (copy_parents)
+                       vp = dup_decay_node(rbtree, vp);
+               set_parent(v, vp, get_pos(u));
+               if (get_pos(v) == IS_LEFT)
+                       _CMM_STORE_SHARED(vp->_left, v);
+               else
+                       _CMM_STORE_SHARED(vp->_right, v);
+               populate_node_end(rbtree, vp, copy_parents, stop);
+               check_max_end(rbtree, vp);
+       }
+
+       /* Point children to new copy (parent only used by updates/next/prev) */
+       if (!rcu_rbtree_is_nil(rbtree, v)) {
+               set_parent(v->_right, get_decay(get_parent(v->_right)),
+                       get_pos(v->_right));
+               set_parent(v->_left, get_decay(get_parent(v->_left)),
+                       get_pos(v->_left));
+       }
+       assert(!is_decay(rbtree->root));
+       check_max_end(rbtree, v);
+}
+
+#else
+
+/* Non-RCU version */
+static
+void rcu_rbtree_transplant(struct rcu_rbtree *rbtree,
+                          struct rcu_rbtree_node *u,
+                          struct rcu_rbtree_node *v,
+                          unsigned int copy_parents,
+                          struct rcu_rbtree_node *stop)
+{
+       dbg_printf("transplant %p\n", v->begin);
+
+       lock_test_mutex();
+       if (rcu_rbtree_is_nil(rbtree, get_parent(u)))
+               rbtree->root = v;
+       else if (u == get_parent(u)->_left)
+               get_parent(u)->_left = v;
+       else
+               get_parent(u)->_right = v;
+       set_parent(v, get_parent(u), get_pos(u));
+       unlock_test_mutex();
+}
+
+#endif
+
+static void rcu_rbtree_remove_fixup(struct rcu_rbtree *rbtree,
+                                   struct rcu_rbtree_node *x)
+{
+       dbg_printf("remove fixup %p\n", x->begin);
+
+       while (x != rbtree->root && x->color == COLOR_BLACK) {
+               assert(!is_decay(get_parent(x)));
+               assert(!is_decay(get_parent(x)->_left));
+               if (x == get_parent(x)->_left) {
+                       struct rcu_rbtree_node *w;
+
+                       w = get_parent(x)->_right;
+
+                       if (w->color == COLOR_RED) {
+                               w->color = COLOR_BLACK;
+                               get_parent(x)->color = COLOR_RED;
+                               left_rotate(rbtree, get_parent(x));
+                               x = get_decay(x);
+                               assert(!is_decay(rbtree->root));
+                               w = get_parent(x)->_right;
+                       }
+                       if (w->_left->color == COLOR_BLACK
+                           && w->_right->color == COLOR_BLACK) {
+                               w->color = COLOR_RED;
+                               x = get_parent(x);
+                               assert(!is_decay(rbtree->root));
+                               assert(!is_decay(x));
+                       } else {
+                               if (w->_right->color == COLOR_BLACK) {
+                                       w->_left->color = COLOR_BLACK;
+                                       w->color = COLOR_RED;
+                                       right_rotate(rbtree, w);
+                                       assert(!is_decay(rbtree->root));
+                                       x = get_decay(x);
+                                       w = get_parent(x)->_right;
+                               }
+                               w->color = get_parent(x)->color;
+                               get_parent(x)->color = COLOR_BLACK;
+                               w->_right->color = COLOR_BLACK;
+                               left_rotate(rbtree, get_parent(x));
+                               assert(!is_decay(rbtree->root));
+                               x = rbtree->root;
+                       }
+               } else {
+                       struct rcu_rbtree_node *w;
+
+                       w = get_parent(x)->_left;
+
+                       if (w->color == COLOR_RED) {
+                               w->color = COLOR_BLACK;
+                               get_parent(x)->color = COLOR_RED;
+                               right_rotate(rbtree, get_parent(x));
+                               assert(!is_decay(rbtree->root));
+                               x = get_decay(x);
+                               w = get_parent(x)->_left;
+                       }
+                       if (w->_right->color == COLOR_BLACK
+                           && w->_left->color == COLOR_BLACK) {
+                               w->color = COLOR_RED;
+                               x = get_parent(x);
+                               assert(!is_decay(rbtree->root));
+                               assert(!is_decay(x));
+                       } else {
+                               if (w->_left->color == COLOR_BLACK) {
+                                       w->_right->color = COLOR_BLACK;
+                                       w->color = COLOR_RED;
+                                       left_rotate(rbtree, w);
+                                       assert(!is_decay(rbtree->root));
+                                       x = get_decay(x);
+                                       w = get_parent(x)->_left;
+                               }
+                               w->color = get_parent(x)->color;
+                               get_parent(x)->color = COLOR_BLACK;
+                               w->_left->color = COLOR_BLACK;
+                               right_rotate(rbtree, get_parent(x));
+                               assert(!is_decay(rbtree->root));
+                               x = rbtree->root;
+                       }
+               }
+       }
+       x->color = COLOR_BLACK;
+}
+
+/*
+ * Delete z. All non-copied children left/right positions are unchanged.
+ */
+static
+void rcu_rbtree_remove_nonil(struct rcu_rbtree *rbtree,
+                            struct rcu_rbtree_node *z,
+                            struct rcu_rbtree_node *y)
+{
+       struct rcu_rbtree_node *x;
+
+       dbg_printf("remove nonil %p\n", z->begin);
+       show_tree(rbtree);
+
+       assert(!is_decay(z));
+       assert(!is_decay(y));
+       assert(!is_decay(y->_right));
+       assert(!is_decay(get_parent(y)));
+       x = y->_right;
+       assert(!is_decay(x));
+       if (get_parent(y) == z) {
+               y = dup_decay_node(rbtree, y);
+               set_parent(x, y, get_pos(x));   /* parent for nil */
+               /* y is z's right node */
+               set_left(rbtree, y, z->_left);
+               y->max_end = calculate_node_max_end(rbtree, y);
+               rcu_rbtree_transplant(rbtree, z, y, 1, NULL);
+       } else {
+               struct rcu_rbtree_node *oy_right, *z_right;
+
+               /*
+                * Need to make sure y is always visible by readers.
+                */
+               y = rcu_rbtree_min_dup_decay(rbtree, z->_right, &z_right);
+               assert(!is_decay(y));
+               assert(!is_decay(z));
+               oy_right = y->_right;
+
+               /*
+                * The max child begin of z_right does not change, because
+                * we're only changing its left children.
+                */
+               y->_right = z_right;
+               set_parent(y->_right, y, IS_RIGHT);
+               assert(!is_decay(z->_left));
+               y->_left = z->_left;
+               assert(!is_decay(oy_right));
+               /*
+                * Transplant of oy_right to old y's location will only
+                * trigger a "end" value update of the already copied branch
+                * (which is not visible yet). We are transplanting
+                * oy_right as a left child of old y's parent, so the
+                * min values update propagated upward necessarily stops
+                * at z_right.
+                */
+               rcu_rbtree_transplant(rbtree, y, oy_right, 0, y);
+               y->max_end = calculate_node_max_end(rbtree, y);
+               rcu_rbtree_transplant(rbtree, z, y, 1, NULL);
+               /* Update children */
+               (void) rcu_rbtree_min_update_decay(rbtree, y->_right);
+       }
+       y = get_decay(y);
+       assert(!is_decay(z));
+       assert(!is_decay(z->_left));
+       y->color = z->color;
+       set_parent(y->_left, y, IS_LEFT);
+       set_parent(y->_right, get_decay(get_parent(y->_right)), IS_RIGHT);
+       assert(!is_decay(y->_left));
+       assert(!is_decay(y->_right));
+}
+
+int rcu_rbtree_remove(struct rcu_rbtree *rbtree,
+                     struct rcu_rbtree_node *z)
+{
+       struct rcu_rbtree_node *x, *y;
+       unsigned int y_original_color;
+
+       assert(!is_decay(rbtree->root));
+       dbg_printf("remove %p\n", z->begin);
+       show_tree(rbtree);
+
+       assert(!is_decay(z));
+       y = z;
+       y_original_color = y->color;
+
+       if (rcu_rbtree_is_nil(rbtree, z->_left)) {
+               rcu_rbtree_transplant(rbtree, z, z->_right, 1, NULL);
+               assert(!is_decay(z));
+               x = get_decay(z->_right);
+               show_tree(rbtree);
+       } else if (rcu_rbtree_is_nil(rbtree, z->_right)) {
+               rcu_rbtree_transplant(rbtree, z, z->_left, 1, NULL);
+               assert(!is_decay(z));
+               x = get_decay(z->_left);
+               show_tree(rbtree);
+       } else {
+               y = rcu_rbtree_min(rbtree, z->_right);
+               assert(!is_decay(y));
+               y_original_color = y->color;
+               x = y->_right;
+               rcu_rbtree_remove_nonil(rbtree, z, y);
+               x = get_decay(x);
+               show_tree(rbtree);
+       }
+       if (y_original_color == COLOR_BLACK)
+               rcu_rbtree_remove_fixup(rbtree, x);
+       show_tree(rbtree);
+       check_max_end(rbtree, x);
+       check_max_end(rbtree, get_decay(y));
+       /*
+        * Commit all _CMM_STORE_SHARED().
+        */
+       cmm_smp_wmc();
+       rbtree->call_rcu(&z->head, _rcu_rbtree_free_node);
+
+       return 0;
+}
diff --git a/urcu/rcurbtree.h b/urcu/rcurbtree.h
new file mode 100644 (file)
index 0000000..4ee639e
--- /dev/null
@@ -0,0 +1,206 @@
+#ifndef URCU_RBTREE_H
+#define URCU_RBTREE_H
+
+/*
+ * urcu-rbtree.h
+ *
+ * Userspace RCU library - Red-Black Tree
+ *
+ * Copyright (c) 2010 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Implementation of RCU-adapted data structures and operations based on the RB
+ * tree algorithms found in chapter 12 of:
+ *
+ * Thomas H. Cormen, Charles E. Leiserson, Ronald L. Rivest, and
+ * Clifford Stein. Introduction to Algorithms, Third Edition. The MIT
+ * Press, September 2009.
+ */
+
+#include <pthread.h>
+#include <urcu-call-rcu.h>
+
+#define COLOR_BLACK    0
+#define COLOR_RED      1
+
+#define IS_LEFT                0
+#define IS_RIGHT       1
+
+/*
+ * Node key comparison function.
+ * < 0 : a lower than b.
+ * > 0 : a greater than b.
+ * == 0 : a equals b.
+ */
+typedef int (*rcu_rbtree_comp)(void *a, void *b);
+
+/*
+ * Allocation and deletion functions.
+ */
+typedef void *(*rcu_rbtree_alloc)(size_t size);
+typedef void (*rcu_rbtree_free)(void *ptr);
+
+/*
+ * struct rcu_rbtree_node must be aligned at least on 2 bytes.
+ * Lowest bit reserved for position (left/right) in pointer to parent.
+ *
+ * Set "high" to key + 1 to insert single-value nodes.
+ */
+struct rcu_rbtree_node {
+       /* useful node information returned by search */
+       void *begin;            /* Start of range (inclusive) */
+       void *end;              /* range end (exclusive) */
+       /* internally reserved */
+       void *max_end;          /* max range end of node and children */
+       /* parent uses low bit for "0 -> is left, 1 -> is right" */
+       unsigned long parent;
+       /* _left and _right must be updated with set_left(), set_right() */
+       struct rcu_rbtree_node *_left, *_right;
+       struct rcu_rbtree_node *decay_next;
+       struct rcu_rbtree *rbtree;
+       struct rcu_head head;           /* For delayed free */
+       unsigned int color:1;
+};
+
+struct rcu_rbtree {
+       struct rcu_rbtree_node *root;
+       struct rcu_rbtree_node nil_node;
+       rcu_rbtree_comp comp;
+       rcu_rbtree_alloc rballoc;
+       rcu_rbtree_free rbfree;
+       void (*call_rcu)(struct rcu_head *head,
+                        void (*func)(struct rcu_head *head));
+};
+
+#define DEFINE_RCU_RBTREE(x, _comp, _rballoc, _rbfree, _call_rcu) \
+       struct rcu_rbtree x =                           \
+               {                                       \
+                       .comp = _comp,                  \
+                       .rballoc = _rballoc,            \
+                       .rbfree = _rbfree,              \
+                       .call_rcu = _call_rcu,          \
+                       .nil_node = {                   \
+                               .color = COLOR_BLACK,   \
+                       },                              \
+                       .root = &(x).nil_node,          \
+               };
+
+/*
+ * Each of the search primitive and "prev"/"next" iteration must be called with
+ * the RCU read-side lock held.
+ *
+ * Insertion and removal must be protected by a mutex. At the moment, insertion
+ * and removal use defer_rcu, so calling them with rcu read-lock held is
+ * prohibited.
+ */
+
+/*
+ * Node insertion. Returns 0 on success. May fail with -ENOMEM.
+ * Caller must have exclusive write access and hold RCU read-side lock.
+ */
+int rcu_rbtree_insert(struct rcu_rbtree *rbtree,
+                     void *begin, void *end);
+
+/*
+ * Remove node from tree.
+ * Must wait for a grace period after removal before performing deletion of the
+ * node. Note: it is illegal to re-use the same node pointer passed to "insert"
+ * also to "remove", because it may have been copied and garbage-collected since
+ * the insertion. A "search" for the key in the tree should be done to get
+ * "node".
+ * Returns 0 on success. May fail with -ENOMEM.
+ *
+ * Caller must have exclusive write access and hold RCU read-side lock
+ * across "search" and "remove".
+ */
+int rcu_rbtree_remove(struct rcu_rbtree *rbtree,
+                     struct rcu_rbtree_node *node);
+
+/* RCU read-side */
+
+/*
+ * For all these read-side privimitives, RCU read-side lock must be held
+ * across the duration for which the search is done and the returned
+ * rbtree node is expected to be valid.
+ */
+
+/*
+ * Search point in range starting from node x (node x is typically the
+ * rbtree root node). Returns nil node if not found.
+ */
+struct rcu_rbtree_node *rcu_rbtree_search(struct rcu_rbtree *rbtree,
+                                         struct rcu_rbtree_node *x,
+                                         void *point);
+
+/*
+ * Search range starting from node x (typically the rbtree root node).
+ * Returns the first range containing the range specified as parameters.
+ * Returns nil node if not found.
+ *
+ * Note: ranges in the rbtree should not partially overlap when this search
+ * range function is used. Otherwise, a range matching the low value (but not
+ * containing the high value) could hide a range that would match this query.
+ * It is OK for the ranges to overlap entirely though.
+ */
+struct rcu_rbtree_node *rcu_rbtree_search_range(struct rcu_rbtree *rbtree,
+                                         struct rcu_rbtree_node *x,
+                                         void *begin, void *end);
+
+/*
+ * Search exact range begin value starting from node x (typically rbtree
+ * root node). Returns nil node if not found.
+ * This function is only useful if you do not use the range feature at
+ * all and only care about range begin values.
+ */
+struct rcu_rbtree_node *rcu_rbtree_search_begin_key(struct rcu_rbtree *rbtree,
+                                         struct rcu_rbtree_node *x,
+                                         void *key);
+
+/*
+ * Search for minimum node of the tree under node x.
+ */
+struct rcu_rbtree_node *rcu_rbtree_min(struct rcu_rbtree *rbtree,
+                                      struct rcu_rbtree_node *x);
+
+/*
+ * Search for maximum node of the tree under node x.
+ */
+struct rcu_rbtree_node *rcu_rbtree_max(struct rcu_rbtree *rbtree,
+                                      struct rcu_rbtree_node *x);
+
+/*
+ * Get next node after node x.
+ */
+struct rcu_rbtree_node *rcu_rbtree_next(struct rcu_rbtree *rbtree,
+                                       struct rcu_rbtree_node *x);
+
+/*
+ * Get previous node before node x.
+ */
+struct rcu_rbtree_node *rcu_rbtree_prev(struct rcu_rbtree *rbtree,
+                                       struct rcu_rbtree_node *x);
+
+/*
+ * Sentinel (bottom nodes).
+ * Don't care about p, left, right, pos and key values.
+ */
+static
+int rcu_rbtree_is_nil(struct rcu_rbtree *rbtree, struct rcu_rbtree_node *node)
+{
+       return node == &rbtree->nil_node;
+}
+
+#endif /* URCU_RBTREE_H */
This page took 0.046777 seconds and 4 git commands to generate.