urcu-call-rcu-impl.h

   1 /*
   2  * urcu-call-rcu.c
   3  *
   4  * Userspace RCU library - batch memory reclamation with kernel API
   5  *
   6  * Copyright (c) 2010 Paul E. McKenney <paulmck@linux.vnet.ibm.com>
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #define _GNU_SOURCE
  24 #define _LGPL_SOURCE
  25 #include <stdio.h>
  26 #include <pthread.h>
  27 #include <signal.h>
  28 #include <assert.h>
  29 #include <stdlib.h>
  30 #include <stdint.h>
  31 #include <string.h>
  32 #include <errno.h>
  33 #include <poll.h>
  34 #include <sys/time.h>
  35 #include <unistd.h>
  36 #include <sched.h>
  37
  38 #include "config.h"
  39 #include "urcu/wfcqueue.h"
  40 #include "urcu-call-rcu.h"
  41 #include "urcu-pointer.h"
  42 #include "urcu/list.h"
  43 #include "urcu/futex.h"
  44 #include "urcu/tls-compat.h"
  45 #include "urcu-die.h"
  46
  47 /* Data structure that identifies a call_rcu thread. */
  48
  49 struct call_rcu_data {
  50         /*
  51          * We do not align head on a different cache-line than tail
  52          * mainly because call_rcu callback-invocation threads use
  53          * batching ("splice") to get an entire list of callbacks, which
  54          * effectively empties the queue, and requires to touch the tail
  55          * anyway.
  56          */
  57         struct cds_wfcq_tail cbs_tail;
  58         struct cds_wfcq_head cbs_head;
  59         unsigned long flags;
  60         int32_t futex;
  61         unsigned long qlen; /* maintained for debugging. */
  62         pthread_t tid;
  63         int cpu_affinity;
  64         struct cds_list_head list;
  65 } __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
  66
  67 /*
  68  * List of all call_rcu_data structures to keep valgrind happy.
  69  * Protected by call_rcu_mutex.
  70  */
  71
  72 static CDS_LIST_HEAD(call_rcu_data_list);
  73
  74 /* Link a thread using call_rcu() to its call_rcu thread. */
  75
  76 static DEFINE_URCU_TLS(struct call_rcu_data *, thread_call_rcu_data);
  77
  78 /* Guard call_rcu thread creation. */
  79
  80 static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;
  81
  82 /* If a given thread does not have its own call_rcu thread, this is default. */
  83
  84 static struct call_rcu_data *default_call_rcu_data;
  85
  86 /*
  87  * If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
  88  * available, then we can have call_rcu threads assigned to individual
  89  * CPUs rather than only to specific threads.
  90  */
  91
  92 #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF)
  93
  94 /*
  95  * Pointer to array of pointers to per-CPU call_rcu_data structures
  96  * and # CPUs. per_cpu_call_rcu_data is a RCU-protected pointer to an
  97  * array of RCU-protected pointers to call_rcu_data. call_rcu acts as a
  98  * RCU read-side and reads per_cpu_call_rcu_data and the per-cpu pointer
  99  * without mutex. The call_rcu_mutex protects updates.
 100  */
 101
 102 static struct call_rcu_data **per_cpu_call_rcu_data;
 103 static long maxcpus;
 104
 105 static void maxcpus_reset(void)
 106 {
 107         maxcpus = 0;
 108 }
 109
 110 /* Allocate the array if it has not already been allocated. */
 111
 112 static void alloc_cpu_call_rcu_data(void)
 113 {
 114         struct call_rcu_data **p;
 115         static int warned = 0;
 116
 117         if (maxcpus != 0)
 118                 return;
 119         maxcpus = sysconf(_SC_NPROCESSORS_CONF);
 120         if (maxcpus <= 0) {
 121                 return;
 122         }
 123         p = malloc(maxcpus * sizeof(*per_cpu_call_rcu_data));
 124         if (p != NULL) {
 125                 memset(p, '\0', maxcpus * sizeof(*per_cpu_call_rcu_data));
 126                 rcu_set_pointer(&per_cpu_call_rcu_data, p);
 127         } else {
 128                 if (!warned) {
 129                         fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
 130                 }
 131                 warned = 1;
 132         }
 133 }
 134
 135 #else /* #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */
 136
 137 /*
 138  * per_cpu_call_rcu_data should be constant, but some functions below, used both
 139  * for cases where cpu number is available and not available, assume it it not
 140  * constant.
 141  */
 142 static struct call_rcu_data **per_cpu_call_rcu_data = NULL;
 143 static const long maxcpus = -1;
 144
 145 static void maxcpus_reset(void)
 146 {
 147 }
 148
 149 static void alloc_cpu_call_rcu_data(void)
 150 {
 151 }
 152
 153 static int sched_getcpu(void)
 154 {
 155         return -1;
 156 }
 157
 158 #endif /* #else #if defined(HAVE_SCHED_GETCPU) && defined(HAVE_SYSCONF) */
 159
 160 /* Acquire the specified pthread mutex. */
 161
 162 static void call_rcu_lock(pthread_mutex_t *pmp)
 163 {
 164         int ret;
 165
 166         ret = pthread_mutex_lock(pmp);
 167         if (ret)
 168                 urcu_die(ret);
 169 }
 170
 171 /* Release the specified pthread mutex. */
 172
 173 static void call_rcu_unlock(pthread_mutex_t *pmp)
 174 {
 175         int ret;
 176
 177         ret = pthread_mutex_unlock(pmp);
 178         if (ret)
 179                 urcu_die(ret);
 180 }
 181
 182 #if HAVE_SCHED_SETAFFINITY
 183 static
 184 int set_thread_cpu_affinity(struct call_rcu_data *crdp)
 185 {
 186         cpu_set_t mask;
 187
 188         if (crdp->cpu_affinity < 0)
 189                 return 0;
 190
 191         CPU_ZERO(&mask);
 192         CPU_SET(crdp->cpu_affinity, &mask);
 193 #if SCHED_SETAFFINITY_ARGS == 2
 194         return sched_setaffinity(0, &mask);
 195 #else
 196         return sched_setaffinity(0, sizeof(mask), &mask);
 197 #endif
 198 }
 199 #else
 200 static
 201 int set_thread_cpu_affinity(struct call_rcu_data *crdp)
 202 {
 203         return 0;
 204 }
 205 #endif
 206
 207 static void call_rcu_wait(struct call_rcu_data *crdp)
 208 {
 209         /* Read call_rcu list before read futex */
 210         cmm_smp_mb();
 211         if (uatomic_read(&crdp->futex) == -1)
 212                 futex_async(&crdp->futex, FUTEX_WAIT, -1,
 213                       NULL, NULL, 0);
 214 }
 215
 216 static void call_rcu_wake_up(struct call_rcu_data *crdp)
 217 {
 218         /* Write to call_rcu list before reading/writing futex */
 219         cmm_smp_mb();
 220         if (caa_unlikely(uatomic_read(&crdp->futex) == -1)) {
 221                 uatomic_set(&crdp->futex, 0);
 222                 futex_async(&crdp->futex, FUTEX_WAKE, 1,
 223                       NULL, NULL, 0);
 224         }
 225 }
 226
 227 /* This is the code run by each call_rcu thread. */
 228
 229 static void *call_rcu_thread(void *arg)
 230 {
 231         unsigned long cbcount;
 232         struct call_rcu_data *crdp = (struct call_rcu_data *) arg;
 233         int rt = !!(uatomic_read(&crdp->flags) & URCU_CALL_RCU_RT);
 234         int ret;
 235
 236         ret = set_thread_cpu_affinity(crdp);
 237         if (ret)
 238                 urcu_die(errno);
 239
 240         /*
 241          * If callbacks take a read-side lock, we need to be registered.
 242          */
 243         rcu_register_thread();
 244
 245         URCU_TLS(thread_call_rcu_data) = crdp;
 246         if (!rt) {
 247                 uatomic_dec(&crdp->futex);
 248                 /* Decrement futex before reading call_rcu list */
 249                 cmm_smp_mb();
 250         }
 251         for (;;) {
 252                 struct cds_wfcq_head cbs_tmp_head;
 253                 struct cds_wfcq_tail cbs_tmp_tail;
 254                 struct cds_wfcq_node *cbs, *cbs_tmp_n;
 255                 enum cds_wfcq_ret splice_ret;
 256
 257                 cds_wfcq_init(&cbs_tmp_head, &cbs_tmp_tail);
 258                 splice_ret = __cds_wfcq_splice_blocking(&cbs_tmp_head,
 259                         &cbs_tmp_tail, &crdp->cbs_head, &crdp->cbs_tail);
 260                 assert(splice_ret != CDS_WFCQ_RET_WOULDBLOCK);
 261                 assert(splice_ret != CDS_WFCQ_RET_DEST_NON_EMPTY);
 262                 if (splice_ret != CDS_WFCQ_RET_SRC_EMPTY) {
 263                         synchronize_rcu();
 264                         cbcount = 0;
 265                         __cds_wfcq_for_each_blocking_safe(&cbs_tmp_head,
 266                                         &cbs_tmp_tail, cbs, cbs_tmp_n) {
 267                                 struct rcu_head *rhp;
 268
 269                                 rhp = caa_container_of(cbs,
 270                                         struct rcu_head, next);
 271                                 rhp->func(rhp);
 272                                 cbcount++;
 273                         }
 274                         uatomic_sub(&crdp->qlen, cbcount);
 275                 }
 276                 if (uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOP)
 277                         break;
 278                 rcu_thread_offline();
 279                 if (!rt) {
 280                         if (cds_wfcq_empty(&crdp->cbs_head,
 281                                         &crdp->cbs_tail)) {
 282                                 call_rcu_wait(crdp);
 283                                 poll(NULL, 0, 10);
 284                                 uatomic_dec(&crdp->futex);
 285                                 /*
 286                                  * Decrement futex before reading
 287                                  * call_rcu list.
 288                                  */
 289                                 cmm_smp_mb();
 290                         } else {
 291                                 poll(NULL, 0, 10);
 292                         }
 293                 } else {
 294                         poll(NULL, 0, 10);
 295                 }
 296                 rcu_thread_online();
 297         }
 298         if (!rt) {
 299                 /*
 300                  * Read call_rcu list before write futex.
 301                  */
 302                 cmm_smp_mb();
 303                 uatomic_set(&crdp->futex, 0);
 304         }
 305         uatomic_or(&crdp->flags, URCU_CALL_RCU_STOPPED);
 306         rcu_unregister_thread();
 307         return NULL;
 308 }
 309
 310 /*
 311  * Create both a call_rcu thread and the corresponding call_rcu_data
 312  * structure, linking the structure in as specified.  Caller must hold
 313  * call_rcu_mutex.
 314  */
 315
 316 static void call_rcu_data_init(struct call_rcu_data **crdpp,
 317                                unsigned long flags,
 318                                int cpu_affinity)
 319 {
 320         struct call_rcu_data *crdp;
 321         int ret;
 322
 323         crdp = malloc(sizeof(*crdp));
 324         if (crdp == NULL)
 325                 urcu_die(errno);
 326         memset(crdp, '\0', sizeof(*crdp));
 327         cds_wfcq_init(&crdp->cbs_head, &crdp->cbs_tail);
 328         crdp->qlen = 0;
 329         crdp->futex = 0;
 330         crdp->flags = flags;
 331         cds_list_add(&crdp->list, &call_rcu_data_list);
 332         crdp->cpu_affinity = cpu_affinity;
 333         cmm_smp_mb();  /* Structure initialized before pointer is planted. */
 334         *crdpp = crdp;
 335         ret = pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp);
 336         if (ret)
 337                 urcu_die(ret);
 338 }
 339
 340 /*
 341  * Return a pointer to the call_rcu_data structure for the specified
 342  * CPU, returning NULL if there is none.  We cannot automatically
 343  * created it because the platform we are running on might not define
 344  * sched_getcpu().
 345  *
 346  * The call to this function and use of the returned call_rcu_data
 347  * should be protected by RCU read-side lock.
 348  */
 349
 350 struct call_rcu_data *get_cpu_call_rcu_data(int cpu)
 351 {
 352         static int warned = 0;
 353         struct call_rcu_data **pcpu_crdp;
 354
 355         pcpu_crdp = rcu_dereference(per_cpu_call_rcu_data);
 356         if (pcpu_crdp == NULL)
 357                 return NULL;
 358         if (!warned && maxcpus > 0 && (cpu < 0 || maxcpus <= cpu)) {
 359                 fprintf(stderr, "[error] liburcu: get CPU # out of range\n");
 360                 warned = 1;
 361         }
 362         if (cpu < 0 || maxcpus <= cpu)
 363                 return NULL;
 364         return rcu_dereference(pcpu_crdp[cpu]);
 365 }
 366
 367 /*
 368  * Return the tid corresponding to the call_rcu thread whose
 369  * call_rcu_data structure is specified.
 370  */
 371
 372 pthread_t get_call_rcu_thread(struct call_rcu_data *crdp)
 373 {
 374         return crdp->tid;
 375 }
 376
 377 /*
 378  * Create a call_rcu_data structure (with thread) and return a pointer.
 379  */
 380
 381 static struct call_rcu_data *__create_call_rcu_data(unsigned long flags,
 382                                                     int cpu_affinity)
 383 {
 384         struct call_rcu_data *crdp;
 385
 386         call_rcu_data_init(&crdp, flags, cpu_affinity);
 387         return crdp;
 388 }
 389
 390 struct call_rcu_data *create_call_rcu_data(unsigned long flags,
 391                                            int cpu_affinity)
 392 {
 393         struct call_rcu_data *crdp;
 394
 395         call_rcu_lock(&call_rcu_mutex);
 396         crdp = __create_call_rcu_data(flags, cpu_affinity);
 397         call_rcu_unlock(&call_rcu_mutex);
 398         return crdp;
 399 }
 400
 401 /*
 402  * Set the specified CPU to use the specified call_rcu_data structure.
 403  *
 404  * Use NULL to remove a CPU's call_rcu_data structure, but it is
 405  * the caller's responsibility to dispose of the removed structure.
 406  * Use get_cpu_call_rcu_data() to obtain a pointer to the old structure
 407  * (prior to NULLing it out, of course).
 408  *
 409  * The caller must wait for a grace-period to pass between return from
 410  * set_cpu_call_rcu_data() and call to call_rcu_data_free() passing the
 411  * previous call rcu data as argument.
 412  */
 413
 414 int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
 415 {
 416         static int warned = 0;
 417
 418         call_rcu_lock(&call_rcu_mutex);
 419         alloc_cpu_call_rcu_data();
 420         if (cpu < 0 || maxcpus <= cpu) {
 421                 if (!warned) {
 422                         fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
 423                         warned = 1;
 424                 }
 425                 call_rcu_unlock(&call_rcu_mutex);
 426                 errno = EINVAL;
 427                 return -EINVAL;
 428         }
 429
 430         if (per_cpu_call_rcu_data == NULL) {
 431                 call_rcu_unlock(&call_rcu_mutex);
 432                 errno = ENOMEM;
 433                 return -ENOMEM;
 434         }
 435
 436         if (per_cpu_call_rcu_data[cpu] != NULL && crdp != NULL) {
 437                 call_rcu_unlock(&call_rcu_mutex);
 438                 errno = EEXIST;
 439                 return -EEXIST;
 440         }
 441
 442         rcu_set_pointer(&per_cpu_call_rcu_data[cpu], crdp);
 443         call_rcu_unlock(&call_rcu_mutex);
 444         return 0;
 445 }
 446
 447 /*
 448  * Return a pointer to the default call_rcu_data structure, creating
 449  * one if need be.  Because we never free call_rcu_data structures,
 450  * we don't need to be in an RCU read-side critical section.
 451  */
 452
 453 struct call_rcu_data *get_default_call_rcu_data(void)
 454 {
 455         if (default_call_rcu_data != NULL)
 456                 return rcu_dereference(default_call_rcu_data);
 457         call_rcu_lock(&call_rcu_mutex);
 458         if (default_call_rcu_data != NULL) {
 459                 call_rcu_unlock(&call_rcu_mutex);
 460                 return default_call_rcu_data;
 461         }
 462         call_rcu_data_init(&default_call_rcu_data, 0, -1);
 463         call_rcu_unlock(&call_rcu_mutex);
 464         return default_call_rcu_data;
 465 }
 466
 467 /*
 468  * Return the call_rcu_data structure that applies to the currently
 469  * running thread.  Any call_rcu_data structure assigned specifically
 470  * to this thread has first priority, followed by any call_rcu_data
 471  * structure assigned to the CPU on which the thread is running,
 472  * followed by the default call_rcu_data structure.  If there is not
 473  * yet a default call_rcu_data structure, one will be created.
 474  *
 475  * Calls to this function and use of the returned call_rcu_data should
 476  * be protected by RCU read-side lock.
 477  */
 478 struct call_rcu_data *get_call_rcu_data(void)
 479 {
 480         struct call_rcu_data *crd;
 481
 482         if (URCU_TLS(thread_call_rcu_data) != NULL)
 483                 return URCU_TLS(thread_call_rcu_data);
 484
 485         if (maxcpus > 0) {
 486                 crd = get_cpu_call_rcu_data(sched_getcpu());
 487                 if (crd)
 488                         return crd;
 489         }
 490
 491         return get_default_call_rcu_data();
 492 }
 493
 494 /*
 495  * Return a pointer to this task's call_rcu_data if there is one.
 496  */
 497
 498 struct call_rcu_data *get_thread_call_rcu_data(void)
 499 {
 500         return URCU_TLS(thread_call_rcu_data);
 501 }
 502
 503 /*
 504  * Set this task's call_rcu_data structure as specified, regardless
 505  * of whether or not this task already had one.  (This allows switching
 506  * to and from real-time call_rcu threads, for example.)
 507  *
 508  * Use NULL to remove a thread's call_rcu_data structure, but it is
 509  * the caller's responsibility to dispose of the removed structure.
 510  * Use get_thread_call_rcu_data() to obtain a pointer to the old structure
 511  * (prior to NULLing it out, of course).
 512  */
 513
 514 void set_thread_call_rcu_data(struct call_rcu_data *crdp)
 515 {
 516         URCU_TLS(thread_call_rcu_data) = crdp;
 517 }
 518
 519 /*
 520  * Create a separate call_rcu thread for each CPU.  This does not
 521  * replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data()
 522  * function if you want that behavior. Should be paired with
 523  * free_all_cpu_call_rcu_data() to teardown these call_rcu worker
 524  * threads.
 525  */
 526
 527 int create_all_cpu_call_rcu_data(unsigned long flags)
 528 {
 529         int i;
 530         struct call_rcu_data *crdp;
 531         int ret;
 532
 533         call_rcu_lock(&call_rcu_mutex);
 534         alloc_cpu_call_rcu_data();
 535         call_rcu_unlock(&call_rcu_mutex);
 536         if (maxcpus <= 0) {
 537                 errno = EINVAL;
 538                 return -EINVAL;
 539         }
 540         if (per_cpu_call_rcu_data == NULL) {
 541                 errno = ENOMEM;
 542                 return -ENOMEM;
 543         }
 544         for (i = 0; i < maxcpus; i++) {
 545                 call_rcu_lock(&call_rcu_mutex);
 546                 if (get_cpu_call_rcu_data(i)) {
 547                         call_rcu_unlock(&call_rcu_mutex);
 548                         continue;
 549                 }
 550                 crdp = __create_call_rcu_data(flags, i);
 551                 if (crdp == NULL) {
 552                         call_rcu_unlock(&call_rcu_mutex);
 553                         errno = ENOMEM;
 554                         return -ENOMEM;
 555                 }
 556                 call_rcu_unlock(&call_rcu_mutex);
 557                 if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
 558                         call_rcu_data_free(crdp);
 559
 560                         /* it has been created by other thread */
 561                         if (ret == -EEXIST)
 562                                 continue;
 563
 564                         return ret;
 565                 }
 566         }
 567         return 0;
 568 }
 569
 570 /*
 571  * Wake up the call_rcu thread corresponding to the specified
 572  * call_rcu_data structure.
 573  */
 574 static void wake_call_rcu_thread(struct call_rcu_data *crdp)
 575 {
 576         if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RT))
 577                 call_rcu_wake_up(crdp);
 578 }
 579
 580 /*
 581  * Schedule a function to be invoked after a following grace period.
 582  * This is the only function that must be called -- the others are
 583  * only present to allow applications to tune their use of RCU for
 584  * maximum performance.
 585  *
 586  * Note that unless a call_rcu thread has not already been created,
 587  * the first invocation of call_rcu() will create one.  So, if you
 588  * need the first invocation of call_rcu() to be fast, make sure
 589  * to create a call_rcu thread first.  One way to accomplish this is
 590  * "get_call_rcu_data();", and another is create_all_cpu_call_rcu_data().
 591  *
 592  * call_rcu must be called by registered RCU read-side threads.
 593  */
 594
 595 void call_rcu(struct rcu_head *head,
 596               void (*func)(struct rcu_head *head))
 597 {
 598         struct call_rcu_data *crdp;
 599
 600         cds_wfcq_node_init(&head->next);
 601         head->func = func;
 602         /* Holding rcu read-side lock across use of per-cpu crdp */
 603         rcu_read_lock();
 604         crdp = get_call_rcu_data();
 605         cds_wfcq_enqueue(&crdp->cbs_head, &crdp->cbs_tail, &head->next);
 606         uatomic_inc(&crdp->qlen);
 607         wake_call_rcu_thread(crdp);
 608         rcu_read_unlock();
 609 }
 610
 611 /*
 612  * Free up the specified call_rcu_data structure, terminating the
 613  * associated call_rcu thread.  The caller must have previously
 614  * removed the call_rcu_data structure from per-thread or per-CPU
 615  * usage.  For example, set_cpu_call_rcu_data(cpu, NULL) for per-CPU
 616  * call_rcu_data structures or set_thread_call_rcu_data(NULL) for
 617  * per-thread call_rcu_data structures.
 618  *
 619  * We silently refuse to free up the default call_rcu_data structure
 620  * because that is where we put any leftover callbacks.  Note that
 621  * the possibility of self-spawning callbacks makes it impossible
 622  * to execute all the callbacks in finite time without putting any
 623  * newly spawned callbacks somewhere else.  The "somewhere else" of
 624  * last resort is the default call_rcu_data structure.
 625  *
 626  * We also silently refuse to free NULL pointers.  This simplifies
 627  * the calling code.
 628  *
 629  * The caller must wait for a grace-period to pass between return from
 630  * set_cpu_call_rcu_data() and call to call_rcu_data_free() passing the
 631  * previous call rcu data as argument.
 632  *
 633  * Note: introducing __cds_wfcq_splice_blocking() in this function fixed
 634  * a list corruption bug in the 0.7.x series. The equivalent fix
 635  * appeared in 0.6.8 for the stable-0.6 branch.
 636  */
 637 void call_rcu_data_free(struct call_rcu_data *crdp)
 638 {
 639         if (crdp == NULL || crdp == default_call_rcu_data) {
 640                 return;
 641         }
 642         if ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0) {
 643                 uatomic_or(&crdp->flags, URCU_CALL_RCU_STOP);
 644                 wake_call_rcu_thread(crdp);
 645                 while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0)
 646                         poll(NULL, 0, 1);
 647         }
 648         if (!cds_wfcq_empty(&crdp->cbs_head, &crdp->cbs_tail)) {
 649                 /* Create default call rcu data if need be */
 650                 (void) get_default_call_rcu_data();
 651                 __cds_wfcq_splice_blocking(&default_call_rcu_data->cbs_head,
 652                         &default_call_rcu_data->cbs_tail,
 653                         &crdp->cbs_head, &crdp->cbs_tail);
 654                 uatomic_add(&default_call_rcu_data->qlen,
 655                             uatomic_read(&crdp->qlen));
 656                 wake_call_rcu_thread(default_call_rcu_data);
 657         }
 658
 659         call_rcu_lock(&call_rcu_mutex);
 660         cds_list_del(&crdp->list);
 661         call_rcu_unlock(&call_rcu_mutex);
 662
 663         free(crdp);
 664 }
 665
 666 /*
 667  * Clean up all the per-CPU call_rcu threads.
 668  */
 669 void free_all_cpu_call_rcu_data(void)
 670 {
 671         int cpu;
 672         struct call_rcu_data **crdp;
 673         static int warned = 0;
 674
 675         if (maxcpus <= 0)
 676                 return;
 677
 678         crdp = malloc(sizeof(*crdp) * maxcpus);
 679         if (!crdp) {
 680                 if (!warned) {
 681                         fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
 682                 }
 683                 warned = 1;
 684                 return;
 685         }
 686
 687         for (cpu = 0; cpu < maxcpus; cpu++) {
 688                 crdp[cpu] = get_cpu_call_rcu_data(cpu);
 689                 if (crdp[cpu] == NULL)
 690                         continue;
 691                 set_cpu_call_rcu_data(cpu, NULL);
 692         }
 693         /*
 694          * Wait for call_rcu sites acting as RCU readers of the
 695          * call_rcu_data to become quiescent.
 696          */
 697         synchronize_rcu();
 698         for (cpu = 0; cpu < maxcpus; cpu++) {
 699                 if (crdp[cpu] == NULL)
 700                         continue;
 701                 call_rcu_data_free(crdp[cpu]);
 702         }
 703         free(crdp);
 704 }
 705
 706 /*
 707  * Acquire the call_rcu_mutex in order to ensure that the child sees
 708  * all of the call_rcu() data structures in a consistent state.
 709  * Suitable for pthread_atfork() and friends.
 710  */
 711 void call_rcu_before_fork(void)
 712 {
 713         call_rcu_lock(&call_rcu_mutex);
 714 }
 715
 716 /*
 717  * Clean up call_rcu data structures in the parent of a successful fork()
 718  * that is not followed by exec() in the child.  Suitable for
 719  * pthread_atfork() and friends.
 720  */
 721 void call_rcu_after_fork_parent(void)
 722 {
 723         call_rcu_unlock(&call_rcu_mutex);
 724 }
 725
 726 /*
 727  * Clean up call_rcu data structures in the child of a successful fork()
 728  * that is not followed by exec().  Suitable for pthread_atfork() and
 729  * friends.
 730  */
 731 void call_rcu_after_fork_child(void)
 732 {
 733         struct call_rcu_data *crdp, *next;
 734
 735         /* Release the mutex. */
 736         call_rcu_unlock(&call_rcu_mutex);
 737
 738         /* Do nothing when call_rcu() has not been used */
 739         if (cds_list_empty(&call_rcu_data_list))
 740                 return;
 741
 742         /*
 743          * Allocate a new default call_rcu_data structure in order
 744          * to get a working call_rcu thread to go with it.
 745          */
 746         default_call_rcu_data = NULL;
 747         (void)get_default_call_rcu_data();
 748
 749         /* Cleanup call_rcu_data pointers before use */
 750         maxcpus_reset();
 751         free(per_cpu_call_rcu_data);
 752         rcu_set_pointer(&per_cpu_call_rcu_data, NULL);
 753         URCU_TLS(thread_call_rcu_data) = NULL;
 754
 755         /* Dispose of all of the rest of the call_rcu_data structures. */
 756         cds_list_for_each_entry_safe(crdp, next, &call_rcu_data_list, list) {
 757                 if (crdp == default_call_rcu_data)
 758                         continue;
 759                 uatomic_set(&crdp->flags, URCU_CALL_RCU_STOPPED);
 760                 call_rcu_data_free(crdp);
 761         }
 762 }