urcu-call-rcu-impl.h

   1 /*
   2  * urcu-call-rcu.c
   3  *
   4  * Userspace RCU library - batch memory reclamation with kernel API
   5  *
   6  * Copyright (c) 2010 Paul E. McKenney <paulmck@linux.vnet.ibm.com>
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #define _GNU_SOURCE
  24 #include <stdio.h>
  25 #include <pthread.h>
  26 #include <signal.h>
  27 #include <assert.h>
  28 #include <stdlib.h>
  29 #include <stdint.h>
  30 #include <string.h>
  31 #include <errno.h>
  32 #include <poll.h>
  33 #include <sys/time.h>
  34 #include <unistd.h>
  35 #include <sched.h>
  36
  37 #include "config.h"
  38 #include "urcu/wfqueue.h"
  39 #include "urcu-call-rcu.h"
  40 #include "urcu-pointer.h"
  41 #include "urcu/list.h"
  42 #include "urcu/futex.h"
  43
  44 /* Data structure that identifies a call_rcu thread. */
  45
  46 struct call_rcu_data {
  47         struct cds_wfq_queue cbs;
  48         unsigned long flags;
  49         int32_t futex;
  50         unsigned long qlen; /* maintained for debugging. */
  51         pthread_t tid;
  52         int cpu_affinity;
  53         struct cds_list_head list;
  54 } __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
  55
  56 /*
  57  * List of all call_rcu_data structures to keep valgrind happy.
  58  * Protected by call_rcu_mutex.
  59  */
  60
  61 CDS_LIST_HEAD(call_rcu_data_list);
  62
  63 /* Link a thread using call_rcu() to its call_rcu thread. */
  64
  65 static __thread struct call_rcu_data *thread_call_rcu_data;
  66
  67 /* Guard call_rcu thread creation. */
  68
  69 static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;
  70
  71 /* If a given thread does not have its own call_rcu thread, this is default. */
  72
  73 static struct call_rcu_data *default_call_rcu_data;
  74
  75 /*
  76  * If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
  77  * available, then we can have call_rcu threads assigned to individual
  78  * CPUs rather than only to specific threads.
  79  */
  80
  81 #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF)
  82
  83 /*
  84  * Pointer to array of pointers to per-CPU call_rcu_data structures
  85  * and # CPUs. per_cpu_call_rcu_data is a RCU-protected pointer to an
  86  * array of RCU-protected pointers to call_rcu_data. call_rcu acts as a
  87  * RCU read-side and reads per_cpu_call_rcu_data and the per-cpu pointer
  88  * without mutex. The call_rcu_mutex protects updates.
  89  */
  90
  91 static struct call_rcu_data **per_cpu_call_rcu_data;
  92 static long maxcpus;
  93
  94 static void maxcpus_reset(void)
  95 {
  96         maxcpus = 0;
  97 }
  98
  99 /* Allocate the array if it has not already been allocated. */
 100
 101 static void alloc_cpu_call_rcu_data(void)
 102 {
 103         struct call_rcu_data **p;
 104         static int warned = 0;
 105
 106         if (maxcpus != 0)
 107                 return;
 108         maxcpus = sysconf(_SC_NPROCESSORS_CONF);
 109         if (maxcpus <= 0) {
 110                 return;
 111         }
 112         p = malloc(maxcpus * sizeof(*per_cpu_call_rcu_data));
 113         if (p != NULL) {
 114                 memset(p, '\0', maxcpus * sizeof(*per_cpu_call_rcu_data));
 115                 rcu_set_pointer(&per_cpu_call_rcu_data, p);
 116         } else {
 117                 if (!warned) {
 118                         fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
 119                 }
 120                 warned = 1;
 121         }
 122 }
 123
 124 #else /* #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */
 125
 126 /*
 127  * per_cpu_call_rcu_data should be constant, but some functions below, used both
 128  * for cases where cpu number is available and not available, assume it it not
 129  * constant.
 130  */
 131 static struct call_rcu_data **per_cpu_call_rcu_data = NULL;
 132 static const long maxcpus = -1;
 133
 134 static void maxcpus_reset(void)
 135 {
 136 }
 137
 138 static void alloc_cpu_call_rcu_data(void)
 139 {
 140 }
 141
 142 static int sched_getcpu(void)
 143 {
 144         return -1;
 145 }
 146
 147 #endif /* #else #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */
 148
 149 /* Acquire the specified pthread mutex. */
 150
 151 static void call_rcu_lock(pthread_mutex_t *pmp)
 152 {
 153         if (pthread_mutex_lock(pmp) != 0) {
 154                 perror("pthread_mutex_lock");
 155                 exit(-1);
 156         }
 157 }
 158
 159 /* Release the specified pthread mutex. */
 160
 161 static void call_rcu_unlock(pthread_mutex_t *pmp)
 162 {
 163         if (pthread_mutex_unlock(pmp) != 0) {
 164                 perror("pthread_mutex_unlock");
 165                 exit(-1);
 166         }
 167 }
 168
 169 #if HAVE_SCHED_SETAFFINITY
 170 static
 171 int set_thread_cpu_affinity(struct call_rcu_data *crdp)
 172 {
 173         cpu_set_t mask;
 174
 175         if (crdp->cpu_affinity < 0)
 176                 return 0;
 177
 178         CPU_ZERO(&mask);
 179         CPU_SET(crdp->cpu_affinity, &mask);
 180 #if SCHED_SETAFFINITY_ARGS == 2
 181         return sched_setaffinity(0, &mask);
 182 #else
 183         return sched_setaffinity(0, sizeof(mask), &mask);
 184 #endif
 185 }
 186 #else
 187 static
 188 int set_thread_cpu_affinity(struct call_rcu_data *crdp)
 189 {
 190         return 0;
 191 }
 192 #endif
 193
 194 static void call_rcu_wait(struct call_rcu_data *crdp)
 195 {
 196         /* Read call_rcu list before read futex */
 197         cmm_smp_mb();
 198         if (uatomic_read(&crdp->futex) == -1)
 199                 futex_async(&crdp->futex, FUTEX_WAIT, -1,
 200                       NULL, NULL, 0);
 201 }
 202
 203 static void call_rcu_wake_up(struct call_rcu_data *crdp)
 204 {
 205         /* Write to call_rcu list before reading/writing futex */
 206         cmm_smp_mb();
 207         if (caa_unlikely(uatomic_read(&crdp->futex) == -1)) {
 208                 uatomic_set(&crdp->futex, 0);
 209                 futex_async(&crdp->futex, FUTEX_WAKE, 1,
 210                       NULL, NULL, 0);
 211         }
 212 }
 213
 214 /* This is the code run by each call_rcu thread. */
 215
 216 static void *call_rcu_thread(void *arg)
 217 {
 218         unsigned long cbcount;
 219         struct cds_wfq_node *cbs;
 220         struct cds_wfq_node **cbs_tail;
 221         struct call_rcu_data *crdp = (struct call_rcu_data *)arg;
 222         struct rcu_head *rhp;
 223         int rt = !!(uatomic_read(&crdp->flags) & URCU_CALL_RCU_RT);
 224
 225         if (set_thread_cpu_affinity(crdp) != 0) {
 226                 perror("pthread_setaffinity_np");
 227                 exit(-1);
 228         }
 229
 230         /*
 231          * If callbacks take a read-side lock, we need to be registered.
 232          */
 233         rcu_register_thread();
 234
 235         thread_call_rcu_data = crdp;
 236         if (!rt) {
 237                 uatomic_dec(&crdp->futex);
 238                 /* Decrement futex before reading call_rcu list */
 239                 cmm_smp_mb();
 240         }
 241         for (;;) {
 242                 if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
 243                         while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
 244                                 poll(NULL, 0, 1);
 245                         _CMM_STORE_SHARED(crdp->cbs.head, NULL);
 246                         cbs_tail = (struct cds_wfq_node **)
 247                                 uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
 248                         synchronize_rcu();
 249                         cbcount = 0;
 250                         do {
 251                                 while (cbs->next == NULL &&
 252                                        &cbs->next != cbs_tail)
 253                                         poll(NULL, 0, 1);
 254                                 if (cbs == &crdp->cbs.dummy) {
 255                                         cbs = cbs->next;
 256                                         continue;
 257                                 }
 258                                 rhp = (struct rcu_head *)cbs;
 259                                 cbs = cbs->next;
 260                                 rhp->func(rhp);
 261                                 cbcount++;
 262                         } while (cbs != NULL);
 263                         uatomic_sub(&crdp->qlen, cbcount);
 264                 }
 265                 if (uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOP)
 266                         break;
 267                 rcu_thread_offline();
 268                 if (!rt) {
 269                         if (&crdp->cbs.head
 270                             == _CMM_LOAD_SHARED(crdp->cbs.tail)) {
 271                                 call_rcu_wait(crdp);
 272                                 poll(NULL, 0, 10);
 273                                 uatomic_dec(&crdp->futex);
 274                                 /*
 275                                  * Decrement futex before reading
 276                                  * call_rcu list.
 277                                  */
 278                                 cmm_smp_mb();
 279                         } else {
 280                                 poll(NULL, 0, 10);
 281                         }
 282                 } else {
 283                         poll(NULL, 0, 10);
 284                 }
 285                 rcu_thread_online();
 286         }
 287         if (!rt) {
 288                 /*
 289                  * Read call_rcu list before write futex.
 290                  */
 291                 cmm_smp_mb();
 292                 uatomic_set(&crdp->futex, 0);
 293         }
 294         uatomic_or(&crdp->flags, URCU_CALL_RCU_STOPPED);
 295         rcu_unregister_thread();
 296         return NULL;
 297 }
 298
 299 /*
 300  * Create both a call_rcu thread and the corresponding call_rcu_data
 301  * structure, linking the structure in as specified.  Caller must hold
 302  * call_rcu_mutex.
 303  */
 304
 305 static void call_rcu_data_init(struct call_rcu_data **crdpp,
 306                                unsigned long flags,
 307                                int cpu_affinity)
 308 {
 309         struct call_rcu_data *crdp;
 310
 311         crdp = malloc(sizeof(*crdp));
 312         if (crdp == NULL) {
 313                 fprintf(stderr, "Out of memory.\n");
 314                 exit(-1);
 315         }
 316         memset(crdp, '\0', sizeof(*crdp));
 317         cds_wfq_init(&crdp->cbs);
 318         crdp->qlen = 0;
 319         crdp->futex = 0;
 320         crdp->flags = flags;
 321         cds_list_add(&crdp->list, &call_rcu_data_list);
 322         crdp->cpu_affinity = cpu_affinity;
 323         cmm_smp_mb();  /* Structure initialized before pointer is planted. */
 324         *crdpp = crdp;
 325         if (pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp) != 0) {
 326                 perror("pthread_create");
 327                 exit(-1);
 328         }
 329 }
 330
 331 /*
 332  * Return a pointer to the call_rcu_data structure for the specified
 333  * CPU, returning NULL if there is none.  We cannot automatically
 334  * created it because the platform we are running on might not define
 335  * sched_getcpu().
 336  *
 337  * The call to this function and use of the returned call_rcu_data
 338  * should be protected by RCU read-side lock.
 339  */
 340
 341 struct call_rcu_data *get_cpu_call_rcu_data(int cpu)
 342 {
 343         static int warned = 0;
 344         struct call_rcu_data **pcpu_crdp;
 345
 346         pcpu_crdp = rcu_dereference(per_cpu_call_rcu_data);
 347         if (pcpu_crdp == NULL)
 348                 return NULL;
 349         if (!warned && maxcpus > 0 && (cpu < 0 || maxcpus <= cpu)) {
 350                 fprintf(stderr, "[error] liburcu: get CPU # out of range\n");
 351                 warned = 1;
 352         }
 353         if (cpu < 0 || maxcpus <= cpu)
 354                 return NULL;
 355         return rcu_dereference(pcpu_crdp[cpu]);
 356 }
 357
 358 /*
 359  * Return the tid corresponding to the call_rcu thread whose
 360  * call_rcu_data structure is specified.
 361  */
 362
 363 pthread_t get_call_rcu_thread(struct call_rcu_data *crdp)
 364 {
 365         return crdp->tid;
 366 }
 367
 368 /*
 369  * Create a call_rcu_data structure (with thread) and return a pointer.
 370  */
 371
 372 static struct call_rcu_data *__create_call_rcu_data(unsigned long flags,
 373                                                     int cpu_affinity)
 374 {
 375         struct call_rcu_data *crdp;
 376
 377         call_rcu_data_init(&crdp, flags, cpu_affinity);
 378         return crdp;
 379 }
 380
 381 struct call_rcu_data *create_call_rcu_data(unsigned long flags,
 382                                            int cpu_affinity)
 383 {
 384         struct call_rcu_data *crdp;
 385
 386         call_rcu_lock(&call_rcu_mutex);
 387         crdp = __create_call_rcu_data(flags, cpu_affinity);
 388         call_rcu_unlock(&call_rcu_mutex);
 389         return crdp;
 390 }
 391
 392 /*
 393  * Set the specified CPU to use the specified call_rcu_data structure.
 394  *
 395  * Use NULL to remove a CPU's call_rcu_data structure, but it is
 396  * the caller's responsibility to dispose of the removed structure.
 397  * Use get_cpu_call_rcu_data() to obtain a pointer to the old structure
 398  * (prior to NULLing it out, of course).
 399  *
 400  * The caller must wait for a grace-period to pass between return from
 401  * set_cpu_call_rcu_data() and call to call_rcu_data_free() passing the
 402  * previous call rcu data as argument.
 403  */
 404
 405 int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
 406 {
 407         static int warned = 0;
 408
 409         call_rcu_lock(&call_rcu_mutex);
 410         alloc_cpu_call_rcu_data();
 411         if (cpu < 0 || maxcpus <= cpu) {
 412                 if (!warned) {
 413                         fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
 414                         warned = 1;
 415                 }
 416                 call_rcu_unlock(&call_rcu_mutex);
 417                 errno = EINVAL;
 418                 return -EINVAL;
 419         }
 420
 421         if (per_cpu_call_rcu_data == NULL) {
 422                 call_rcu_unlock(&call_rcu_mutex);
 423                 errno = ENOMEM;
 424                 return -ENOMEM;
 425         }
 426
 427         if (per_cpu_call_rcu_data[cpu] != NULL && crdp != NULL) {
 428                 call_rcu_unlock(&call_rcu_mutex);
 429                 errno = EEXIST;
 430                 return -EEXIST;
 431         }
 432
 433         rcu_set_pointer(&per_cpu_call_rcu_data[cpu], crdp);
 434         call_rcu_unlock(&call_rcu_mutex);
 435         return 0;
 436 }
 437
 438 /*
 439  * Return a pointer to the default call_rcu_data structure, creating
 440  * one if need be.  Because we never free call_rcu_data structures,
 441  * we don't need to be in an RCU read-side critical section.
 442  */
 443
 444 struct call_rcu_data *get_default_call_rcu_data(void)
 445 {
 446         if (default_call_rcu_data != NULL)
 447                 return rcu_dereference(default_call_rcu_data);
 448         call_rcu_lock(&call_rcu_mutex);
 449         if (default_call_rcu_data != NULL) {
 450                 call_rcu_unlock(&call_rcu_mutex);
 451                 return default_call_rcu_data;
 452         }
 453         call_rcu_data_init(&default_call_rcu_data, 0, -1);
 454         call_rcu_unlock(&call_rcu_mutex);
 455         return default_call_rcu_data;
 456 }
 457
 458 /*
 459  * Return the call_rcu_data structure that applies to the currently
 460  * running thread.  Any call_rcu_data structure assigned specifically
 461  * to this thread has first priority, followed by any call_rcu_data
 462  * structure assigned to the CPU on which the thread is running,
 463  * followed by the default call_rcu_data structure.  If there is not
 464  * yet a default call_rcu_data structure, one will be created.
 465  *
 466  * Calls to this function and use of the returned call_rcu_data should
 467  * be protected by RCU read-side lock.
 468  */
 469 struct call_rcu_data *get_call_rcu_data(void)
 470 {
 471         struct call_rcu_data *crd;
 472
 473         if (thread_call_rcu_data != NULL)
 474                 return thread_call_rcu_data;
 475
 476         if (maxcpus > 0) {
 477                 crd = get_cpu_call_rcu_data(sched_getcpu());
 478                 if (crd)
 479                         return crd;
 480         }
 481
 482         return get_default_call_rcu_data();
 483 }
 484
 485 /*
 486  * Return a pointer to this task's call_rcu_data if there is one.
 487  */
 488
 489 struct call_rcu_data *get_thread_call_rcu_data(void)
 490 {
 491         return thread_call_rcu_data;
 492 }
 493
 494 /*
 495  * Set this task's call_rcu_data structure as specified, regardless
 496  * of whether or not this task already had one.  (This allows switching
 497  * to and from real-time call_rcu threads, for example.)
 498  *
 499  * Use NULL to remove a thread's call_rcu_data structure, but it is
 500  * the caller's responsibility to dispose of the removed structure.
 501  * Use get_thread_call_rcu_data() to obtain a pointer to the old structure
 502  * (prior to NULLing it out, of course).
 503  */
 504
 505 void set_thread_call_rcu_data(struct call_rcu_data *crdp)
 506 {
 507         thread_call_rcu_data = crdp;
 508 }
 509
 510 /*
 511  * Create a separate call_rcu thread for each CPU.  This does not
 512  * replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data()
 513  * function if you want that behavior. Should be paired with
 514  * free_all_cpu_call_rcu_data() to teardown these call_rcu worker
 515  * threads.
 516  */
 517
 518 int create_all_cpu_call_rcu_data(unsigned long flags)
 519 {
 520         int i;
 521         struct call_rcu_data *crdp;
 522         int ret;
 523
 524         call_rcu_lock(&call_rcu_mutex);
 525         alloc_cpu_call_rcu_data();
 526         call_rcu_unlock(&call_rcu_mutex);
 527         if (maxcpus <= 0) {
 528                 errno = EINVAL;
 529                 return -EINVAL;
 530         }
 531         if (per_cpu_call_rcu_data == NULL) {
 532                 errno = ENOMEM;
 533                 return -ENOMEM;
 534         }
 535         for (i = 0; i < maxcpus; i++) {
 536                 call_rcu_lock(&call_rcu_mutex);
 537                 if (get_cpu_call_rcu_data(i)) {
 538                         call_rcu_unlock(&call_rcu_mutex);
 539                         continue;
 540                 }
 541                 crdp = __create_call_rcu_data(flags, i);
 542                 if (crdp == NULL) {
 543                         call_rcu_unlock(&call_rcu_mutex);
 544                         errno = ENOMEM;
 545                         return -ENOMEM;
 546                 }
 547                 call_rcu_unlock(&call_rcu_mutex);
 548                 if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
 549                         call_rcu_data_free(crdp);
 550
 551                         /* it has been created by other thread */
 552                         if (ret == -EEXIST)
 553                                 continue;
 554
 555                         return ret;
 556                 }
 557         }
 558         return 0;
 559 }
 560
 561 /*
 562  * Wake up the call_rcu thread corresponding to the specified
 563  * call_rcu_data structure.
 564  */
 565 static void wake_call_rcu_thread(struct call_rcu_data *crdp)
 566 {
 567         if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RT))
 568                 call_rcu_wake_up(crdp);
 569 }
 570
 571 /*
 572  * Schedule a function to be invoked after a following grace period.
 573  * This is the only function that must be called -- the others are
 574  * only present to allow applications to tune their use of RCU for
 575  * maximum performance.
 576  *
 577  * Note that unless a call_rcu thread has not already been created,
 578  * the first invocation of call_rcu() will create one.  So, if you
 579  * need the first invocation of call_rcu() to be fast, make sure
 580  * to create a call_rcu thread first.  One way to accomplish this is
 581  * "get_call_rcu_data();", and another is create_all_cpu_call_rcu_data().
 582  *
 583  * call_rcu must be called by registered RCU read-side threads.
 584  */
 585
 586 void call_rcu(struct rcu_head *head,
 587               void (*func)(struct rcu_head *head))
 588 {
 589         struct call_rcu_data *crdp;
 590
 591         cds_wfq_node_init(&head->next);
 592         head->func = func;
 593         /* Holding rcu read-side lock across use of per-cpu crdp */
 594         rcu_read_lock();
 595         crdp = get_call_rcu_data();
 596         cds_wfq_enqueue(&crdp->cbs, &head->next);
 597         uatomic_inc(&crdp->qlen);
 598         wake_call_rcu_thread(crdp);
 599         rcu_read_unlock();
 600 }
 601
 602 /*
 603  * Free up the specified call_rcu_data structure, terminating the
 604  * associated call_rcu thread.  The caller must have previously
 605  * removed the call_rcu_data structure from per-thread or per-CPU
 606  * usage.  For example, set_cpu_call_rcu_data(cpu, NULL) for per-CPU
 607  * call_rcu_data structures or set_thread_call_rcu_data(NULL) for
 608  * per-thread call_rcu_data structures.
 609  *
 610  * We silently refuse to free up the default call_rcu_data structure
 611  * because that is where we put any leftover callbacks.  Note that
 612  * the possibility of self-spawning callbacks makes it impossible
 613  * to execute all the callbacks in finite time without putting any
 614  * newly spawned callbacks somewhere else.  The "somewhere else" of
 615  * last resort is the default call_rcu_data structure.
 616  *
 617  * We also silently refuse to free NULL pointers.  This simplifies
 618  * the calling code.
 619  *
 620  * The caller must wait for a grace-period to pass between return from
 621  * set_cpu_call_rcu_data() and call to call_rcu_data_free() passing the
 622  * previous call rcu data as argument.
 623  */
 624 void call_rcu_data_free(struct call_rcu_data *crdp)
 625 {
 626         struct cds_wfq_node *cbs;
 627         struct cds_wfq_node **cbs_tail;
 628         struct cds_wfq_node **cbs_endprev;
 629
 630         if (crdp == NULL || crdp == default_call_rcu_data) {
 631                 return;
 632         }
 633         if ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0) {
 634                 uatomic_or(&crdp->flags, URCU_CALL_RCU_STOP);
 635                 wake_call_rcu_thread(crdp);
 636                 while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0)
 637                         poll(NULL, 0, 1);
 638         }
 639         if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
 640                 while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
 641                         poll(NULL, 0, 1);
 642                 _CMM_STORE_SHARED(crdp->cbs.head, NULL);
 643                 cbs_tail = (struct cds_wfq_node **)
 644                         uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
 645                 /* Create default call rcu data if need be */
 646                 (void) get_default_call_rcu_data();
 647                 cbs_endprev = (struct cds_wfq_node **)
 648                         uatomic_xchg(&default_call_rcu_data, cbs_tail);
 649                 *cbs_endprev = cbs;
 650                 uatomic_add(&default_call_rcu_data->qlen,
 651                             uatomic_read(&crdp->qlen));
 652                 wake_call_rcu_thread(default_call_rcu_data);
 653         }
 654
 655         call_rcu_lock(&call_rcu_mutex);
 656         cds_list_del(&crdp->list);
 657         call_rcu_unlock(&call_rcu_mutex);
 658
 659         free(crdp);
 660 }
 661
 662 /*
 663  * Clean up all the per-CPU call_rcu threads.
 664  */
 665 void free_all_cpu_call_rcu_data(void)
 666 {
 667         int cpu;
 668         struct call_rcu_data **crdp;
 669         static int warned = 0;
 670
 671         if (maxcpus <= 0)
 672                 return;
 673
 674         crdp = malloc(sizeof(*crdp) * maxcpus);
 675         if (!crdp) {
 676                 if (!warned) {
 677                         fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
 678                 }
 679                 warned = 1;
 680                 return;
 681         }
 682
 683         for (cpu = 0; cpu < maxcpus; cpu++) {
 684                 crdp[cpu] = get_cpu_call_rcu_data(cpu);
 685                 if (crdp[cpu] == NULL)
 686                         continue;
 687                 set_cpu_call_rcu_data(cpu, NULL);
 688         }
 689         /*
 690          * Wait for call_rcu sites acting as RCU readers of the
 691          * call_rcu_data to become quiescent.
 692          */
 693         synchronize_rcu();
 694         for (cpu = 0; cpu < maxcpus; cpu++) {
 695                 if (crdp[cpu] == NULL)
 696                         continue;
 697                 call_rcu_data_free(crdp[cpu]);
 698         }
 699         free(crdp);
 700 }
 701
 702 /*
 703  * Acquire the call_rcu_mutex in order to ensure that the child sees
 704  * all of the call_rcu() data structures in a consistent state.
 705  * Suitable for pthread_atfork() and friends.
 706  */
 707 void call_rcu_before_fork(void)
 708 {
 709         call_rcu_lock(&call_rcu_mutex);
 710 }
 711
 712 /*
 713  * Clean up call_rcu data structures in the parent of a successful fork()
 714  * that is not followed by exec() in the child.  Suitable for
 715  * pthread_atfork() and friends.
 716  */
 717 void call_rcu_after_fork_parent(void)
 718 {
 719         call_rcu_unlock(&call_rcu_mutex);
 720 }
 721
 722 /*
 723  * Clean up call_rcu data structures in the child of a successful fork()
 724  * that is not followed by exec().  Suitable for pthread_atfork() and
 725  * friends.
 726  */
 727 void call_rcu_after_fork_child(void)
 728 {
 729         struct call_rcu_data *crdp, *next;
 730
 731         /* Release the mutex. */
 732         call_rcu_unlock(&call_rcu_mutex);
 733
 734         /* Do nothing when call_rcu() has not been used */
 735         if (cds_list_empty(&call_rcu_data_list))
 736                 return;
 737
 738         /*
 739          * Allocate a new default call_rcu_data structure in order
 740          * to get a working call_rcu thread to go with it.
 741          */
 742         default_call_rcu_data = NULL;
 743         (void)get_default_call_rcu_data();
 744
 745         /* Cleanup call_rcu_data pointers before use */
 746         maxcpus_reset();
 747         free(per_cpu_call_rcu_data);
 748         rcu_set_pointer(&per_cpu_call_rcu_data, NULL);
 749         thread_call_rcu_data = NULL;
 750
 751         /* Dispose of all of the rest of the call_rcu_data structures. */
 752         cds_list_for_each_entry_safe(crdp, next, &call_rcu_data_list, list) {
 753                 if (crdp == default_call_rcu_data)
 754                         continue;
 755                 uatomic_set(&crdp->flags, URCU_CALL_RCU_STOPPED);
 756                 call_rcu_data_free(crdp);
 757         }
 758 }