urcu-call-rcu-impl.h

   1 /*
   2  * urcu-call-rcu.c
   3  *
   4  * Userspace RCU library - batch memory reclamation with kernel API
   5  *
   6  * Copyright (c) 2010 Paul E. McKenney <paulmck@linux.vnet.ibm.com>
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #define _GNU_SOURCE
  24 #include <stdio.h>
  25 #include <pthread.h>
  26 #include <signal.h>
  27 #include <assert.h>
  28 #include <stdlib.h>
  29 #include <stdint.h>
  30 #include <string.h>
  31 #include <errno.h>
  32 #include <poll.h>
  33 #include <sys/time.h>
  34 #include <unistd.h>
  35 #include <sched.h>
  36
  37 #include "config.h"
  38 #include "urcu/wfqueue.h"
  39 #include "urcu-call-rcu.h"
  40 #include "urcu-pointer.h"
  41 #include "urcu/list.h"
  42 #include "urcu/futex.h"
  43 #include "urcu/tls-compat.h"
  44 #include "urcu-die.h"
  45
  46 #define SET_AFFINITY_CHECK_PERIOD               (1U << 8)       /* 256 */
  47 #define SET_AFFINITY_CHECK_PERIOD_MASK          (SET_AFFINITY_CHECK_PERIOD - 1)
  48
  49 /* Data structure that identifies a call_rcu thread. */
  50
  51 struct call_rcu_data {
  52         struct cds_wfq_queue cbs;
  53         unsigned long flags;
  54         int32_t futex;
  55         unsigned long qlen; /* maintained for debugging. */
  56         pthread_t tid;
  57         int cpu_affinity;
  58         unsigned long gp_count;
  59         struct cds_list_head list;
  60 } __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
  61
  62 /*
  63  * List of all call_rcu_data structures to keep valgrind happy.
  64  * Protected by call_rcu_mutex.
  65  */
  66
  67 CDS_LIST_HEAD(call_rcu_data_list);
  68
  69 /* Link a thread using call_rcu() to its call_rcu thread. */
  70
  71 static DEFINE_URCU_TLS(struct call_rcu_data *, thread_call_rcu_data);
  72
  73 /*
  74  * Guard call_rcu thread creation and atfork handlers.
  75  */
  76 static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;
  77
  78 /* If a given thread does not have its own call_rcu thread, this is default. */
  79
  80 static struct call_rcu_data *default_call_rcu_data;
  81
  82 /*
  83  * If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
  84  * available, then we can have call_rcu threads assigned to individual
  85  * CPUs rather than only to specific threads.
  86  */
  87
  88 #ifdef HAVE_SCHED_GETCPU
  89
  90 static int urcu_sched_getcpu(void)
  91 {
  92         return sched_getcpu();
  93 }
  94
  95 #else /* #ifdef HAVE_SCHED_GETCPU */
  96
  97 static int urcu_sched_getcpu(void)
  98 {
  99         return -1;
 100 }
 101
 102 #endif /* #else #ifdef HAVE_SCHED_GETCPU */
 103
 104 #if defined(HAVE_SYSCONF) && defined(HAVE_SCHED_GETCPU)
 105
 106 /*
 107  * Pointer to array of pointers to per-CPU call_rcu_data structures
 108  * and # CPUs. per_cpu_call_rcu_data is a RCU-protected pointer to an
 109  * array of RCU-protected pointers to call_rcu_data. call_rcu acts as a
 110  * RCU read-side and reads per_cpu_call_rcu_data and the per-cpu pointer
 111  * without mutex. The call_rcu_mutex protects updates.
 112  */
 113
 114 static struct call_rcu_data **per_cpu_call_rcu_data;
 115 static long maxcpus;
 116
 117 static void maxcpus_reset(void)
 118 {
 119         maxcpus = 0;
 120 }
 121
 122 /* Allocate the array if it has not already been allocated. */
 123
 124 static void alloc_cpu_call_rcu_data(void)
 125 {
 126         struct call_rcu_data **p;
 127         static int warned = 0;
 128
 129         if (maxcpus != 0)
 130                 return;
 131         maxcpus = sysconf(_SC_NPROCESSORS_CONF);
 132         if (maxcpus <= 0) {
 133                 return;
 134         }
 135         p = malloc(maxcpus * sizeof(*per_cpu_call_rcu_data));
 136         if (p != NULL) {
 137                 memset(p, '\0', maxcpus * sizeof(*per_cpu_call_rcu_data));
 138                 rcu_set_pointer(&per_cpu_call_rcu_data, p);
 139         } else {
 140                 if (!warned) {
 141                         fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
 142                 }
 143                 warned = 1;
 144         }
 145 }
 146
 147 #else /* #if defined(HAVE_SYSCONF) && defined(HAVE_SCHED_GETCPU) */
 148
 149 /*
 150  * per_cpu_call_rcu_data should be constant, but some functions below, used both
 151  * for cases where cpu number is available and not available, assume it it not
 152  * constant.
 153  */
 154 static struct call_rcu_data **per_cpu_call_rcu_data = NULL;
 155 static const long maxcpus = -1;
 156
 157 static void maxcpus_reset(void)
 158 {
 159 }
 160
 161 static void alloc_cpu_call_rcu_data(void)
 162 {
 163 }
 164
 165 #endif /* #else #if defined(HAVE_SYSCONF) && defined(HAVE_SCHED_GETCPU) */
 166
 167 /* Acquire the specified pthread mutex. */
 168
 169 static void call_rcu_lock(pthread_mutex_t *pmp)
 170 {
 171         int ret;
 172
 173         ret = pthread_mutex_lock(pmp);
 174         if (ret)
 175                 urcu_die(ret);
 176 }
 177
 178 /* Release the specified pthread mutex. */
 179
 180 static void call_rcu_unlock(pthread_mutex_t *pmp)
 181 {
 182         int ret;
 183
 184         ret = pthread_mutex_unlock(pmp);
 185         if (ret)
 186                 urcu_die(ret);
 187 }
 188
 189 /*
 190  * Periodically retry setting CPU affinity if we migrate.
 191  * Losing affinity can be caused by CPU hotunplug/hotplug, or by
 192  * cpuset(7).
 193  */
 194 #if HAVE_SCHED_SETAFFINITY
 195 static
 196 int set_thread_cpu_affinity(struct call_rcu_data *crdp)
 197 {
 198         cpu_set_t mask;
 199         int ret;
 200
 201         if (crdp->cpu_affinity < 0)
 202                 return 0;
 203         if (++crdp->gp_count & SET_AFFINITY_CHECK_PERIOD_MASK)
 204                 return 0;
 205         if (urcu_sched_getcpu() == crdp->cpu_affinity)
 206                 return 0;
 207
 208         CPU_ZERO(&mask);
 209         CPU_SET(crdp->cpu_affinity, &mask);
 210 #if SCHED_SETAFFINITY_ARGS == 2
 211         ret = sched_setaffinity(0, &mask);
 212 #else
 213         ret = sched_setaffinity(0, sizeof(mask), &mask);
 214 #endif
 215         /*
 216          * EINVAL is fine: can be caused by hotunplugged CPUs, or by
 217          * cpuset(7). This is why we should always retry if we detect
 218          * migration.
 219          */
 220         if (ret && errno == EINVAL) {
 221                 ret = 0;
 222                 errno = 0;
 223         }
 224         return ret;
 225 }
 226 #else
 227 static
 228 int set_thread_cpu_affinity(struct call_rcu_data *crdp)
 229 {
 230         return 0;
 231 }
 232 #endif
 233
 234 static void call_rcu_wait(struct call_rcu_data *crdp)
 235 {
 236         /* Read call_rcu list before read futex */
 237         cmm_smp_mb();
 238         if (uatomic_read(&crdp->futex) == -1)
 239                 futex_async(&crdp->futex, FUTEX_WAIT, -1,
 240                       NULL, NULL, 0);
 241 }
 242
 243 static void call_rcu_wake_up(struct call_rcu_data *crdp)
 244 {
 245         /* Write to call_rcu list before reading/writing futex */
 246         cmm_smp_mb();
 247         if (caa_unlikely(uatomic_read(&crdp->futex) == -1)) {
 248                 uatomic_set(&crdp->futex, 0);
 249                 futex_async(&crdp->futex, FUTEX_WAKE, 1,
 250                       NULL, NULL, 0);
 251         }
 252 }
 253
 254 /* This is the code run by each call_rcu thread. */
 255
 256 static void *call_rcu_thread(void *arg)
 257 {
 258         unsigned long cbcount;
 259         struct cds_wfq_node *cbs;
 260         struct cds_wfq_node **cbs_tail;
 261         struct call_rcu_data *crdp = (struct call_rcu_data *)arg;
 262         struct rcu_head *rhp;
 263         int rt = !!(uatomic_read(&crdp->flags) & URCU_CALL_RCU_RT);
 264
 265         if (set_thread_cpu_affinity(crdp))
 266                 urcu_die(errno);
 267
 268         /*
 269          * If callbacks take a read-side lock, we need to be registered.
 270          */
 271         rcu_register_thread();
 272
 273         URCU_TLS(thread_call_rcu_data) = crdp;
 274         if (!rt) {
 275                 uatomic_dec(&crdp->futex);
 276                 /* Decrement futex before reading call_rcu list */
 277                 cmm_smp_mb();
 278         }
 279         for (;;) {
 280                 if (set_thread_cpu_affinity(crdp))
 281                         urcu_die(errno);
 282
 283                 if (uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSE) {
 284                         /*
 285                          * Pause requested. Become quiescent: remove
 286                          * ourself from all global lists, and don't
 287                          * process any callback. The callback lists may
 288                          * still be non-empty though.
 289                          */
 290                         rcu_unregister_thread();
 291                         cmm_smp_mb__before_uatomic_or();
 292                         uatomic_or(&crdp->flags, URCU_CALL_RCU_PAUSED);
 293                         while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSE) != 0)
 294                                 poll(NULL, 0, 1);
 295                         uatomic_and(&crdp->flags, ~URCU_CALL_RCU_PAUSED);
 296                         cmm_smp_mb__after_uatomic_and();
 297                         rcu_register_thread();
 298                 }
 299
 300                 if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
 301                         while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
 302                                 poll(NULL, 0, 1);
 303                         _CMM_STORE_SHARED(crdp->cbs.head, NULL);
 304                         cbs_tail = (struct cds_wfq_node **)
 305                                 uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
 306                         synchronize_rcu();
 307                         cbcount = 0;
 308                         do {
 309                                 while (cbs->next == NULL &&
 310                                        &cbs->next != cbs_tail)
 311                                         poll(NULL, 0, 1);
 312                                 if (cbs == &crdp->cbs.dummy) {
 313                                         cbs = cbs->next;
 314                                         continue;
 315                                 }
 316                                 rhp = (struct rcu_head *)cbs;
 317                                 cbs = cbs->next;
 318                                 rhp->func(rhp);
 319                                 cbcount++;
 320                         } while (cbs != NULL);
 321                         uatomic_sub(&crdp->qlen, cbcount);
 322                 }
 323                 if (uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOP)
 324                         break;
 325                 rcu_thread_offline();
 326                 if (!rt) {
 327                         if (&crdp->cbs.head
 328                             == _CMM_LOAD_SHARED(crdp->cbs.tail)) {
 329                                 call_rcu_wait(crdp);
 330                                 poll(NULL, 0, 10);
 331                                 uatomic_dec(&crdp->futex);
 332                                 /*
 333                                  * Decrement futex before reading
 334                                  * call_rcu list.
 335                                  */
 336                                 cmm_smp_mb();
 337                         } else {
 338                                 poll(NULL, 0, 10);
 339                         }
 340                 } else {
 341                         poll(NULL, 0, 10);
 342                 }
 343                 rcu_thread_online();
 344         }
 345         if (!rt) {
 346                 /*
 347                  * Read call_rcu list before write futex.
 348                  */
 349                 cmm_smp_mb();
 350                 uatomic_set(&crdp->futex, 0);
 351         }
 352         uatomic_or(&crdp->flags, URCU_CALL_RCU_STOPPED);
 353         rcu_unregister_thread();
 354         return NULL;
 355 }
 356
 357 /*
 358  * Create both a call_rcu thread and the corresponding call_rcu_data
 359  * structure, linking the structure in as specified.  Caller must hold
 360  * call_rcu_mutex.
 361  */
 362
 363 static void call_rcu_data_init(struct call_rcu_data **crdpp,
 364                                unsigned long flags,
 365                                int cpu_affinity)
 366 {
 367         struct call_rcu_data *crdp;
 368         int ret;
 369
 370         crdp = malloc(sizeof(*crdp));
 371         if (crdp == NULL)
 372                 urcu_die(errno);
 373         memset(crdp, '\0', sizeof(*crdp));
 374         cds_wfq_init(&crdp->cbs);
 375         crdp->qlen = 0;
 376         crdp->futex = 0;
 377         crdp->flags = flags;
 378         cds_list_add(&crdp->list, &call_rcu_data_list);
 379         crdp->cpu_affinity = cpu_affinity;
 380         crdp->gp_count = 0;
 381         cmm_smp_mb();  /* Structure initialized before pointer is planted. */
 382         *crdpp = crdp;
 383         ret = pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp);
 384         if (ret)
 385                 urcu_die(ret);
 386 }
 387
 388 /*
 389  * Return a pointer to the call_rcu_data structure for the specified
 390  * CPU, returning NULL if there is none.  We cannot automatically
 391  * created it because the platform we are running on might not define
 392  * urcu_sched_getcpu().
 393  *
 394  * The call to this function and use of the returned call_rcu_data
 395  * should be protected by RCU read-side lock.
 396  */
 397
 398 struct call_rcu_data *get_cpu_call_rcu_data(int cpu)
 399 {
 400         static int warned = 0;
 401         struct call_rcu_data **pcpu_crdp;
 402
 403         pcpu_crdp = rcu_dereference(per_cpu_call_rcu_data);
 404         if (pcpu_crdp == NULL)
 405                 return NULL;
 406         if (!warned && maxcpus > 0 && (cpu < 0 || maxcpus <= cpu)) {
 407                 fprintf(stderr, "[error] liburcu: get CPU # out of range\n");
 408                 warned = 1;
 409         }
 410         if (cpu < 0 || maxcpus <= cpu)
 411                 return NULL;
 412         return rcu_dereference(pcpu_crdp[cpu]);
 413 }
 414
 415 /*
 416  * Return the tid corresponding to the call_rcu thread whose
 417  * call_rcu_data structure is specified.
 418  */
 419
 420 pthread_t get_call_rcu_thread(struct call_rcu_data *crdp)
 421 {
 422         return crdp->tid;
 423 }
 424
 425 /*
 426  * Create a call_rcu_data structure (with thread) and return a pointer.
 427  */
 428
 429 static struct call_rcu_data *__create_call_rcu_data(unsigned long flags,
 430                                                     int cpu_affinity)
 431 {
 432         struct call_rcu_data *crdp;
 433
 434         call_rcu_data_init(&crdp, flags, cpu_affinity);
 435         return crdp;
 436 }
 437
 438 struct call_rcu_data *create_call_rcu_data(unsigned long flags,
 439                                            int cpu_affinity)
 440 {
 441         struct call_rcu_data *crdp;
 442
 443         call_rcu_lock(&call_rcu_mutex);
 444         crdp = __create_call_rcu_data(flags, cpu_affinity);
 445         call_rcu_unlock(&call_rcu_mutex);
 446         return crdp;
 447 }
 448
 449 /*
 450  * Set the specified CPU to use the specified call_rcu_data structure.
 451  *
 452  * Use NULL to remove a CPU's call_rcu_data structure, but it is
 453  * the caller's responsibility to dispose of the removed structure.
 454  * Use get_cpu_call_rcu_data() to obtain a pointer to the old structure
 455  * (prior to NULLing it out, of course).
 456  *
 457  * The caller must wait for a grace-period to pass between return from
 458  * set_cpu_call_rcu_data() and call to call_rcu_data_free() passing the
 459  * previous call rcu data as argument.
 460  */
 461
 462 int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
 463 {
 464         static int warned = 0;
 465
 466         call_rcu_lock(&call_rcu_mutex);
 467         alloc_cpu_call_rcu_data();
 468         if (cpu < 0 || maxcpus <= cpu) {
 469                 if (!warned) {
 470                         fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
 471                         warned = 1;
 472                 }
 473                 call_rcu_unlock(&call_rcu_mutex);
 474                 errno = EINVAL;
 475                 return -EINVAL;
 476         }
 477
 478         if (per_cpu_call_rcu_data == NULL) {
 479                 call_rcu_unlock(&call_rcu_mutex);
 480                 errno = ENOMEM;
 481                 return -ENOMEM;
 482         }
 483
 484         if (per_cpu_call_rcu_data[cpu] != NULL && crdp != NULL) {
 485                 call_rcu_unlock(&call_rcu_mutex);
 486                 errno = EEXIST;
 487                 return -EEXIST;
 488         }
 489
 490         rcu_set_pointer(&per_cpu_call_rcu_data[cpu], crdp);
 491         call_rcu_unlock(&call_rcu_mutex);
 492         return 0;
 493 }
 494
 495 /*
 496  * Return a pointer to the default call_rcu_data structure, creating
 497  * one if need be.  Because we never free call_rcu_data structures,
 498  * we don't need to be in an RCU read-side critical section.
 499  */
 500
 501 struct call_rcu_data *get_default_call_rcu_data(void)
 502 {
 503         if (default_call_rcu_data != NULL)
 504                 return rcu_dereference(default_call_rcu_data);
 505         call_rcu_lock(&call_rcu_mutex);
 506         if (default_call_rcu_data != NULL) {
 507                 call_rcu_unlock(&call_rcu_mutex);
 508                 return default_call_rcu_data;
 509         }
 510         call_rcu_data_init(&default_call_rcu_data, 0, -1);
 511         call_rcu_unlock(&call_rcu_mutex);
 512         return default_call_rcu_data;
 513 }
 514
 515 /*
 516  * Return the call_rcu_data structure that applies to the currently
 517  * running thread.  Any call_rcu_data structure assigned specifically
 518  * to this thread has first priority, followed by any call_rcu_data
 519  * structure assigned to the CPU on which the thread is running,
 520  * followed by the default call_rcu_data structure.  If there is not
 521  * yet a default call_rcu_data structure, one will be created.
 522  *
 523  * Calls to this function and use of the returned call_rcu_data should
 524  * be protected by RCU read-side lock.
 525  */
 526 struct call_rcu_data *get_call_rcu_data(void)
 527 {
 528         struct call_rcu_data *crd;
 529
 530         if (URCU_TLS(thread_call_rcu_data) != NULL)
 531                 return URCU_TLS(thread_call_rcu_data);
 532
 533         if (maxcpus > 0) {
 534                 crd = get_cpu_call_rcu_data(urcu_sched_getcpu());
 535                 if (crd)
 536                         return crd;
 537         }
 538
 539         return get_default_call_rcu_data();
 540 }
 541
 542 /*
 543  * Return a pointer to this task's call_rcu_data if there is one.
 544  */
 545
 546 struct call_rcu_data *get_thread_call_rcu_data(void)
 547 {
 548         return URCU_TLS(thread_call_rcu_data);
 549 }
 550
 551 /*
 552  * Set this task's call_rcu_data structure as specified, regardless
 553  * of whether or not this task already had one.  (This allows switching
 554  * to and from real-time call_rcu threads, for example.)
 555  *
 556  * Use NULL to remove a thread's call_rcu_data structure, but it is
 557  * the caller's responsibility to dispose of the removed structure.
 558  * Use get_thread_call_rcu_data() to obtain a pointer to the old structure
 559  * (prior to NULLing it out, of course).
 560  */
 561
 562 void set_thread_call_rcu_data(struct call_rcu_data *crdp)
 563 {
 564         URCU_TLS(thread_call_rcu_data) = crdp;
 565 }
 566
 567 /*
 568  * Create a separate call_rcu thread for each CPU.  This does not
 569  * replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data()
 570  * function if you want that behavior. Should be paired with
 571  * free_all_cpu_call_rcu_data() to teardown these call_rcu worker
 572  * threads.
 573  */
 574
 575 int create_all_cpu_call_rcu_data(unsigned long flags)
 576 {
 577         int i;
 578         struct call_rcu_data *crdp;
 579         int ret;
 580
 581         call_rcu_lock(&call_rcu_mutex);
 582         alloc_cpu_call_rcu_data();
 583         call_rcu_unlock(&call_rcu_mutex);
 584         if (maxcpus <= 0) {
 585                 errno = EINVAL;
 586                 return -EINVAL;
 587         }
 588         if (per_cpu_call_rcu_data == NULL) {
 589                 errno = ENOMEM;
 590                 return -ENOMEM;
 591         }
 592         for (i = 0; i < maxcpus; i++) {
 593                 call_rcu_lock(&call_rcu_mutex);
 594                 if (get_cpu_call_rcu_data(i)) {
 595                         call_rcu_unlock(&call_rcu_mutex);
 596                         continue;
 597                 }
 598                 crdp = __create_call_rcu_data(flags, i);
 599                 if (crdp == NULL) {
 600                         call_rcu_unlock(&call_rcu_mutex);
 601                         errno = ENOMEM;
 602                         return -ENOMEM;
 603                 }
 604                 call_rcu_unlock(&call_rcu_mutex);
 605                 if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
 606                         call_rcu_data_free(crdp);
 607
 608                         /* it has been created by other thread */
 609                         if (ret == -EEXIST)
 610                                 continue;
 611
 612                         return ret;
 613                 }
 614         }
 615         return 0;
 616 }
 617
 618 /*
 619  * Wake up the call_rcu thread corresponding to the specified
 620  * call_rcu_data structure.
 621  */
 622 static void wake_call_rcu_thread(struct call_rcu_data *crdp)
 623 {
 624         if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RT))
 625                 call_rcu_wake_up(crdp);
 626 }
 627
 628 /*
 629  * Schedule a function to be invoked after a following grace period.
 630  * This is the only function that must be called -- the others are
 631  * only present to allow applications to tune their use of RCU for
 632  * maximum performance.
 633  *
 634  * Note that unless a call_rcu thread has not already been created,
 635  * the first invocation of call_rcu() will create one.  So, if you
 636  * need the first invocation of call_rcu() to be fast, make sure
 637  * to create a call_rcu thread first.  One way to accomplish this is
 638  * "get_call_rcu_data();", and another is create_all_cpu_call_rcu_data().
 639  *
 640  * call_rcu must be called by registered RCU read-side threads.
 641  */
 642
 643 void call_rcu(struct rcu_head *head,
 644               void (*func)(struct rcu_head *head))
 645 {
 646         struct call_rcu_data *crdp;
 647
 648         cds_wfq_node_init(&head->next);
 649         head->func = func;
 650         /* Holding rcu read-side lock across use of per-cpu crdp */
 651         _rcu_read_lock();
 652         crdp = get_call_rcu_data();
 653         cds_wfq_enqueue(&crdp->cbs, &head->next);
 654         uatomic_inc(&crdp->qlen);
 655         wake_call_rcu_thread(crdp);
 656         _rcu_read_unlock();
 657 }
 658
 659 /*
 660  * Free up the specified call_rcu_data structure, terminating the
 661  * associated call_rcu thread.  The caller must have previously
 662  * removed the call_rcu_data structure from per-thread or per-CPU
 663  * usage.  For example, set_cpu_call_rcu_data(cpu, NULL) for per-CPU
 664  * call_rcu_data structures or set_thread_call_rcu_data(NULL) for
 665  * per-thread call_rcu_data structures.
 666  *
 667  * We silently refuse to free up the default call_rcu_data structure
 668  * because that is where we put any leftover callbacks.  Note that
 669  * the possibility of self-spawning callbacks makes it impossible
 670  * to execute all the callbacks in finite time without putting any
 671  * newly spawned callbacks somewhere else.  The "somewhere else" of
 672  * last resort is the default call_rcu_data structure.
 673  *
 674  * We also silently refuse to free NULL pointers.  This simplifies
 675  * the calling code.
 676  *
 677  * The caller must wait for a grace-period to pass between return from
 678  * set_cpu_call_rcu_data() and call to call_rcu_data_free() passing the
 679  * previous call rcu data as argument.
 680  */
 681 void call_rcu_data_free(struct call_rcu_data *crdp)
 682 {
 683         struct cds_wfq_node *cbs;
 684         struct cds_wfq_node **cbs_tail;
 685         struct cds_wfq_node **cbs_endprev;
 686
 687         if (crdp == NULL || crdp == default_call_rcu_data) {
 688                 return;
 689         }
 690         if ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0) {
 691                 uatomic_or(&crdp->flags, URCU_CALL_RCU_STOP);
 692                 wake_call_rcu_thread(crdp);
 693                 while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0)
 694                         poll(NULL, 0, 1);
 695         }
 696         if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
 697                 while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
 698                         poll(NULL, 0, 1);
 699                 _CMM_STORE_SHARED(crdp->cbs.head, NULL);
 700                 cbs_tail = (struct cds_wfq_node **)
 701                         uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
 702                 /* Create default call rcu data if need be */
 703                 (void) get_default_call_rcu_data();
 704                 cbs_endprev = (struct cds_wfq_node **)
 705                         uatomic_xchg(&default_call_rcu_data->cbs.tail,
 706                                         cbs_tail);
 707                 _CMM_STORE_SHARED(*cbs_endprev, cbs);
 708                 uatomic_add(&default_call_rcu_data->qlen,
 709                             uatomic_read(&crdp->qlen));
 710                 wake_call_rcu_thread(default_call_rcu_data);
 711         }
 712
 713         call_rcu_lock(&call_rcu_mutex);
 714         cds_list_del(&crdp->list);
 715         call_rcu_unlock(&call_rcu_mutex);
 716
 717         free(crdp);
 718 }
 719
 720 /*
 721  * Clean up all the per-CPU call_rcu threads.
 722  */
 723 void free_all_cpu_call_rcu_data(void)
 724 {
 725         int cpu;
 726         struct call_rcu_data **crdp;
 727         static int warned = 0;
 728
 729         if (maxcpus <= 0)
 730                 return;
 731
 732         crdp = malloc(sizeof(*crdp) * maxcpus);
 733         if (!crdp) {
 734                 if (!warned) {
 735                         fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
 736                 }
 737                 warned = 1;
 738                 return;
 739         }
 740
 741         for (cpu = 0; cpu < maxcpus; cpu++) {
 742                 crdp[cpu] = get_cpu_call_rcu_data(cpu);
 743                 if (crdp[cpu] == NULL)
 744                         continue;
 745                 set_cpu_call_rcu_data(cpu, NULL);
 746         }
 747         /*
 748          * Wait for call_rcu sites acting as RCU readers of the
 749          * call_rcu_data to become quiescent.
 750          */
 751         synchronize_rcu();
 752         for (cpu = 0; cpu < maxcpus; cpu++) {
 753                 if (crdp[cpu] == NULL)
 754                         continue;
 755                 call_rcu_data_free(crdp[cpu]);
 756         }
 757         free(crdp);
 758 }
 759
 760 /*
 761  * Acquire the call_rcu_mutex in order to ensure that the child sees
 762  * all of the call_rcu() data structures in a consistent state. Ensure
 763  * that all call_rcu threads are in a quiescent state across fork.
 764  * Suitable for pthread_atfork() and friends.
 765  */
 766 void call_rcu_before_fork(void)
 767 {
 768         struct call_rcu_data *crdp;
 769
 770         call_rcu_lock(&call_rcu_mutex);
 771
 772         cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
 773                 uatomic_or(&crdp->flags, URCU_CALL_RCU_PAUSE);
 774                 cmm_smp_mb__after_uatomic_or();
 775                 wake_call_rcu_thread(crdp);
 776         }
 777         cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
 778                 while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSED) == 0)
 779                         poll(NULL, 0, 1);
 780         }
 781 }
 782
 783 /*
 784  * Clean up call_rcu data structures in the parent of a successful fork()
 785  * that is not followed by exec() in the child.  Suitable for
 786  * pthread_atfork() and friends.
 787  */
 788 void call_rcu_after_fork_parent(void)
 789 {
 790         struct call_rcu_data *crdp;
 791
 792         cds_list_for_each_entry(crdp, &call_rcu_data_list, list)
 793                 uatomic_and(&crdp->flags, ~URCU_CALL_RCU_PAUSE);
 794         cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
 795                 while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSED) != 0)
 796                         poll(NULL, 0, 1);
 797         }
 798         call_rcu_unlock(&call_rcu_mutex);
 799 }
 800
 801 /*
 802  * Clean up call_rcu data structures in the child of a successful fork()
 803  * that is not followed by exec().  Suitable for pthread_atfork() and
 804  * friends.
 805  */
 806 void call_rcu_after_fork_child(void)
 807 {
 808         struct call_rcu_data *crdp, *next;
 809
 810         /* Release the mutex. */
 811         call_rcu_unlock(&call_rcu_mutex);
 812
 813         /* Do nothing when call_rcu() has not been used */
 814         if (cds_list_empty(&call_rcu_data_list))
 815                 return;
 816
 817         /*
 818          * Allocate a new default call_rcu_data structure in order
 819          * to get a working call_rcu thread to go with it.
 820          */
 821         default_call_rcu_data = NULL;
 822         (void)get_default_call_rcu_data();
 823
 824         /* Cleanup call_rcu_data pointers before use */
 825         maxcpus_reset();
 826         free(per_cpu_call_rcu_data);
 827         rcu_set_pointer(&per_cpu_call_rcu_data, NULL);
 828         URCU_TLS(thread_call_rcu_data) = NULL;
 829
 830         /*
 831          * Dispose of all of the rest of the call_rcu_data structures.
 832          * Leftover call_rcu callbacks will be merged into the new
 833          * default call_rcu thread queue.
 834          */
 835         cds_list_for_each_entry_safe(crdp, next, &call_rcu_data_list, list) {
 836                 if (crdp == default_call_rcu_data)
 837                         continue;
 838                 uatomic_set(&crdp->flags, URCU_CALL_RCU_STOPPED);
 839                 call_rcu_data_free(crdp);
 840         }
 841 }