urcu-call-rcu-impl.h

   1 /*
   2  * urcu-call-rcu.c
   3  *
   4  * Userspace RCU library - batch memory reclamation with kernel API
   5  *
   6  * Copyright (c) 2010 Paul E. McKenney <paulmck@linux.vnet.ibm.com>
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #define _GNU_SOURCE
  24 #define _LGPL_SOURCE
  25 #include <stdio.h>
  26 #include <pthread.h>
  27 #include <signal.h>
  28 #include <assert.h>
  29 #include <stdlib.h>
  30 #include <stdint.h>
  31 #include <string.h>
  32 #include <errno.h>
  33 #include <poll.h>
  34 #include <sys/time.h>
  35 #include <unistd.h>
  36 #include <sched.h>
  37
  38 #include "config.h"
  39 #include "urcu/wfcqueue.h"
  40 #include "urcu-call-rcu.h"
  41 #include "urcu-pointer.h"
  42 #include "urcu/list.h"
  43 #include "urcu/futex.h"
  44 #include "urcu/tls-compat.h"
  45 #include "urcu/ref.h"
  46 #include "urcu-die.h"
  47
  48 #define SET_AFFINITY_CHECK_PERIOD               (1U << 8)       /* 256 */
  49 #define SET_AFFINITY_CHECK_PERIOD_MASK          (SET_AFFINITY_CHECK_PERIOD - 1)
  50
  51 /* Data structure that identifies a call_rcu thread. */
  52
  53 struct call_rcu_data {
  54         /*
  55          * We do not align head on a different cache-line than tail
  56          * mainly because call_rcu callback-invocation threads use
  57          * batching ("splice") to get an entire list of callbacks, which
  58          * effectively empties the queue, and requires to touch the tail
  59          * anyway.
  60          */
  61         struct cds_wfcq_tail cbs_tail;
  62         struct cds_wfcq_head cbs_head;
  63         unsigned long flags;
  64         int32_t futex;
  65         unsigned long qlen; /* maintained for debugging. */
  66         pthread_t tid;
  67         int cpu_affinity;
  68         unsigned long gp_count;
  69         struct cds_list_head list;
  70 } __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
  71
  72 struct call_rcu_completion {
  73         int barrier_count;
  74         int32_t futex;
  75         struct urcu_ref ref;
  76 };
  77
  78 struct call_rcu_completion_work {
  79         struct rcu_head head;
  80         struct call_rcu_completion *completion;
  81 };
  82
  83 /*
  84  * List of all call_rcu_data structures to keep valgrind happy.
  85  * Protected by call_rcu_mutex.
  86  */
  87
  88 static CDS_LIST_HEAD(call_rcu_data_list);
  89
  90 /* Link a thread using call_rcu() to its call_rcu thread. */
  91
  92 static DEFINE_URCU_TLS(struct call_rcu_data *, thread_call_rcu_data);
  93
  94 /*
  95  * Guard call_rcu thread creation and atfork handlers.
  96  */
  97 static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;
  98
  99 /* If a given thread does not have its own call_rcu thread, this is default. */
 100
 101 static struct call_rcu_data *default_call_rcu_data;
 102
 103 /*
 104  * If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
 105  * available, then we can have call_rcu threads assigned to individual
 106  * CPUs rather than only to specific threads.
 107  */
 108
 109 #ifdef HAVE_SCHED_GETCPU
 110
 111 static int urcu_sched_getcpu(void)
 112 {
 113         return sched_getcpu();
 114 }
 115
 116 #else /* #ifdef HAVE_SCHED_GETCPU */
 117
 118 static int urcu_sched_getcpu(void)
 119 {
 120         return -1;
 121 }
 122
 123 #endif /* #else #ifdef HAVE_SCHED_GETCPU */
 124
 125 #if defined(HAVE_SYSCONF) && defined(HAVE_SCHED_GETCPU)
 126
 127 /*
 128  * Pointer to array of pointers to per-CPU call_rcu_data structures
 129  * and # CPUs. per_cpu_call_rcu_data is a RCU-protected pointer to an
 130  * array of RCU-protected pointers to call_rcu_data. call_rcu acts as a
 131  * RCU read-side and reads per_cpu_call_rcu_data and the per-cpu pointer
 132  * without mutex. The call_rcu_mutex protects updates.
 133  */
 134
 135 static struct call_rcu_data **per_cpu_call_rcu_data;
 136 static long maxcpus;
 137
 138 static void maxcpus_reset(void)
 139 {
 140         maxcpus = 0;
 141 }
 142
 143 /* Allocate the array if it has not already been allocated. */
 144
 145 static void alloc_cpu_call_rcu_data(void)
 146 {
 147         struct call_rcu_data **p;
 148         static int warned = 0;
 149
 150         if (maxcpus != 0)
 151                 return;
 152         maxcpus = sysconf(_SC_NPROCESSORS_CONF);
 153         if (maxcpus <= 0) {
 154                 return;
 155         }
 156         p = malloc(maxcpus * sizeof(*per_cpu_call_rcu_data));
 157         if (p != NULL) {
 158                 memset(p, '\0', maxcpus * sizeof(*per_cpu_call_rcu_data));
 159                 rcu_set_pointer(&per_cpu_call_rcu_data, p);
 160         } else {
 161                 if (!warned) {
 162                         fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
 163                 }
 164                 warned = 1;
 165         }
 166 }
 167
 168 #else /* #if defined(HAVE_SYSCONF) && defined(HAVE_SCHED_GETCPU) */
 169
 170 /*
 171  * per_cpu_call_rcu_data should be constant, but some functions below, used both
 172  * for cases where cpu number is available and not available, assume it it not
 173  * constant.
 174  */
 175 static struct call_rcu_data **per_cpu_call_rcu_data = NULL;
 176 static const long maxcpus = -1;
 177
 178 static void maxcpus_reset(void)
 179 {
 180 }
 181
 182 static void alloc_cpu_call_rcu_data(void)
 183 {
 184 }
 185
 186 #endif /* #else #if defined(HAVE_SYSCONF) && defined(HAVE_SCHED_GETCPU) */
 187
 188 /* Acquire the specified pthread mutex. */
 189
 190 static void call_rcu_lock(pthread_mutex_t *pmp)
 191 {
 192         int ret;
 193
 194         ret = pthread_mutex_lock(pmp);
 195         if (ret)
 196                 urcu_die(ret);
 197 }
 198
 199 /* Release the specified pthread mutex. */
 200
 201 static void call_rcu_unlock(pthread_mutex_t *pmp)
 202 {
 203         int ret;
 204
 205         ret = pthread_mutex_unlock(pmp);
 206         if (ret)
 207                 urcu_die(ret);
 208 }
 209
 210 /*
 211  * Periodically retry setting CPU affinity if we migrate.
 212  * Losing affinity can be caused by CPU hotunplug/hotplug, or by
 213  * cpuset(7).
 214  */
 215 #if HAVE_SCHED_SETAFFINITY
 216 static
 217 int set_thread_cpu_affinity(struct call_rcu_data *crdp)
 218 {
 219         cpu_set_t mask;
 220         int ret;
 221
 222         if (crdp->cpu_affinity < 0)
 223                 return 0;
 224         if (++crdp->gp_count & SET_AFFINITY_CHECK_PERIOD_MASK)
 225                 return 0;
 226         if (urcu_sched_getcpu() == crdp->cpu_affinity)
 227                 return 0;
 228
 229         CPU_ZERO(&mask);
 230         CPU_SET(crdp->cpu_affinity, &mask);
 231 #if SCHED_SETAFFINITY_ARGS == 2
 232         ret = sched_setaffinity(0, &mask);
 233 #else
 234         ret = sched_setaffinity(0, sizeof(mask), &mask);
 235 #endif
 236         /*
 237          * EINVAL is fine: can be caused by hotunplugged CPUs, or by
 238          * cpuset(7). This is why we should always retry if we detect
 239          * migration.
 240          */
 241         if (ret && errno == EINVAL) {
 242                 ret = 0;
 243                 errno = 0;
 244         }
 245         return ret;
 246 }
 247 #else
 248 static
 249 int set_thread_cpu_affinity(struct call_rcu_data *crdp)
 250 {
 251         return 0;
 252 }
 253 #endif
 254
 255 static void call_rcu_wait(struct call_rcu_data *crdp)
 256 {
 257         /* Read call_rcu list before read futex */
 258         cmm_smp_mb();
 259         if (uatomic_read(&crdp->futex) != -1)
 260                 return;
 261         while (futex_async(&crdp->futex, FUTEX_WAIT, -1,
 262                         NULL, NULL, 0)) {
 263                 switch (errno) {
 264                 case EWOULDBLOCK:
 265                         /* Value already changed. */
 266                         return;
 267                 case EINTR:
 268                         /* Retry if interrupted by signal. */
 269                         break;  /* Get out of switch. */
 270                 default:
 271                         /* Unexpected error. */
 272                         urcu_die(errno);
 273                 }
 274         }
 275 }
 276
 277 static void call_rcu_wake_up(struct call_rcu_data *crdp)
 278 {
 279         /* Write to call_rcu list before reading/writing futex */
 280         cmm_smp_mb();
 281         if (caa_unlikely(uatomic_read(&crdp->futex) == -1)) {
 282                 uatomic_set(&crdp->futex, 0);
 283                 if (futex_async(&crdp->futex, FUTEX_WAKE, 1,
 284                                 NULL, NULL, 0) < 0)
 285                         urcu_die(errno);
 286         }
 287 }
 288
 289 static void call_rcu_completion_wait(struct call_rcu_completion *completion)
 290 {
 291         /* Read completion barrier count before read futex */
 292         cmm_smp_mb();
 293         if (uatomic_read(&completion->futex) != -1)
 294                 return;
 295         while (futex_async(&completion->futex, FUTEX_WAIT, -1,
 296                         NULL, NULL, 0)) {
 297                 switch (errno) {
 298                 case EWOULDBLOCK:
 299                         /* Value already changed. */
 300                         return;
 301                 case EINTR:
 302                         /* Retry if interrupted by signal. */
 303                         break;  /* Get out of switch. */
 304                 default:
 305                         /* Unexpected error. */
 306                         urcu_die(errno);
 307                 }
 308         }
 309 }
 310
 311 static void call_rcu_completion_wake_up(struct call_rcu_completion *completion)
 312 {
 313         /* Write to completion barrier count before reading/writing futex */
 314         cmm_smp_mb();
 315         if (caa_unlikely(uatomic_read(&completion->futex) == -1)) {
 316                 uatomic_set(&completion->futex, 0);
 317                 if (futex_async(&completion->futex, FUTEX_WAKE, 1,
 318                                 NULL, NULL, 0) < 0)
 319                         urcu_die(errno);
 320         }
 321 }
 322
 323 /* This is the code run by each call_rcu thread. */
 324
 325 static void *call_rcu_thread(void *arg)
 326 {
 327         unsigned long cbcount;
 328         struct call_rcu_data *crdp = (struct call_rcu_data *) arg;
 329         int rt = !!(uatomic_read(&crdp->flags) & URCU_CALL_RCU_RT);
 330
 331         if (set_thread_cpu_affinity(crdp))
 332                 urcu_die(errno);
 333
 334         /*
 335          * If callbacks take a read-side lock, we need to be registered.
 336          */
 337         rcu_register_thread();
 338
 339         URCU_TLS(thread_call_rcu_data) = crdp;
 340         if (!rt) {
 341                 uatomic_dec(&crdp->futex);
 342                 /* Decrement futex before reading call_rcu list */
 343                 cmm_smp_mb();
 344         }
 345         for (;;) {
 346                 struct cds_wfcq_head cbs_tmp_head;
 347                 struct cds_wfcq_tail cbs_tmp_tail;
 348                 struct cds_wfcq_node *cbs, *cbs_tmp_n;
 349                 enum cds_wfcq_ret splice_ret;
 350
 351                 if (set_thread_cpu_affinity(crdp))
 352                         urcu_die(errno);
 353
 354                 if (uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSE) {
 355                         /*
 356                          * Pause requested. Become quiescent: remove
 357                          * ourself from all global lists, and don't
 358                          * process any callback. The callback lists may
 359                          * still be non-empty though.
 360                          */
 361                         rcu_unregister_thread();
 362                         cmm_smp_mb__before_uatomic_or();
 363                         uatomic_or(&crdp->flags, URCU_CALL_RCU_PAUSED);
 364                         while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSE) != 0)
 365                                 (void) poll(NULL, 0, 1);
 366                         uatomic_and(&crdp->flags, ~URCU_CALL_RCU_PAUSED);
 367                         cmm_smp_mb__after_uatomic_and();
 368                         rcu_register_thread();
 369                 }
 370
 371                 cds_wfcq_init(&cbs_tmp_head, &cbs_tmp_tail);
 372                 splice_ret = __cds_wfcq_splice_blocking(&cbs_tmp_head,
 373                         &cbs_tmp_tail, &crdp->cbs_head, &crdp->cbs_tail);
 374                 assert(splice_ret != CDS_WFCQ_RET_WOULDBLOCK);
 375                 assert(splice_ret != CDS_WFCQ_RET_DEST_NON_EMPTY);
 376                 if (splice_ret != CDS_WFCQ_RET_SRC_EMPTY) {
 377                         synchronize_rcu();
 378                         cbcount = 0;
 379                         __cds_wfcq_for_each_blocking_safe(&cbs_tmp_head,
 380                                         &cbs_tmp_tail, cbs, cbs_tmp_n) {
 381                                 struct rcu_head *rhp;
 382
 383                                 rhp = caa_container_of(cbs,
 384                                         struct rcu_head, next);
 385                                 rhp->func(rhp);
 386                                 cbcount++;
 387                         }
 388                         uatomic_sub(&crdp->qlen, cbcount);
 389                 }
 390                 if (uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOP)
 391                         break;
 392                 rcu_thread_offline();
 393                 if (!rt) {
 394                         if (cds_wfcq_empty(&crdp->cbs_head,
 395                                         &crdp->cbs_tail)) {
 396                                 call_rcu_wait(crdp);
 397                                 (void) poll(NULL, 0, 10);
 398                                 uatomic_dec(&crdp->futex);
 399                                 /*
 400                                  * Decrement futex before reading
 401                                  * call_rcu list.
 402                                  */
 403                                 cmm_smp_mb();
 404                         } else {
 405                                 (void) poll(NULL, 0, 10);
 406                         }
 407                 } else {
 408                         (void) poll(NULL, 0, 10);
 409                 }
 410                 rcu_thread_online();
 411         }
 412         if (!rt) {
 413                 /*
 414                  * Read call_rcu list before write futex.
 415                  */
 416                 cmm_smp_mb();
 417                 uatomic_set(&crdp->futex, 0);
 418         }
 419         uatomic_or(&crdp->flags, URCU_CALL_RCU_STOPPED);
 420         rcu_unregister_thread();
 421         return NULL;
 422 }
 423
 424 /*
 425  * Create both a call_rcu thread and the corresponding call_rcu_data
 426  * structure, linking the structure in as specified.  Caller must hold
 427  * call_rcu_mutex.
 428  */
 429
 430 static void call_rcu_data_init(struct call_rcu_data **crdpp,
 431                                unsigned long flags,
 432                                int cpu_affinity)
 433 {
 434         struct call_rcu_data *crdp;
 435         int ret;
 436
 437         crdp = malloc(sizeof(*crdp));
 438         if (crdp == NULL)
 439                 urcu_die(errno);
 440         memset(crdp, '\0', sizeof(*crdp));
 441         cds_wfcq_init(&crdp->cbs_head, &crdp->cbs_tail);
 442         crdp->qlen = 0;
 443         crdp->futex = 0;
 444         crdp->flags = flags;
 445         cds_list_add(&crdp->list, &call_rcu_data_list);
 446         crdp->cpu_affinity = cpu_affinity;
 447         crdp->gp_count = 0;
 448         cmm_smp_mb();  /* Structure initialized before pointer is planted. */
 449         *crdpp = crdp;
 450         ret = pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp);
 451         if (ret)
 452                 urcu_die(ret);
 453 }
 454
 455 /*
 456  * Return a pointer to the call_rcu_data structure for the specified
 457  * CPU, returning NULL if there is none.  We cannot automatically
 458  * created it because the platform we are running on might not define
 459  * urcu_sched_getcpu().
 460  *
 461  * The call to this function and use of the returned call_rcu_data
 462  * should be protected by RCU read-side lock.
 463  */
 464
 465 struct call_rcu_data *get_cpu_call_rcu_data(int cpu)
 466 {
 467         static int warned = 0;
 468         struct call_rcu_data **pcpu_crdp;
 469
 470         pcpu_crdp = rcu_dereference(per_cpu_call_rcu_data);
 471         if (pcpu_crdp == NULL)
 472                 return NULL;
 473         if (!warned && maxcpus > 0 && (cpu < 0 || maxcpus <= cpu)) {
 474                 fprintf(stderr, "[error] liburcu: get CPU # out of range\n");
 475                 warned = 1;
 476         }
 477         if (cpu < 0 || maxcpus <= cpu)
 478                 return NULL;
 479         return rcu_dereference(pcpu_crdp[cpu]);
 480 }
 481
 482 /*
 483  * Return the tid corresponding to the call_rcu thread whose
 484  * call_rcu_data structure is specified.
 485  */
 486
 487 pthread_t get_call_rcu_thread(struct call_rcu_data *crdp)
 488 {
 489         return crdp->tid;
 490 }
 491
 492 /*
 493  * Create a call_rcu_data structure (with thread) and return a pointer.
 494  */
 495
 496 static struct call_rcu_data *__create_call_rcu_data(unsigned long flags,
 497                                                     int cpu_affinity)
 498 {
 499         struct call_rcu_data *crdp;
 500
 501         call_rcu_data_init(&crdp, flags, cpu_affinity);
 502         return crdp;
 503 }
 504
 505 struct call_rcu_data *create_call_rcu_data(unsigned long flags,
 506                                            int cpu_affinity)
 507 {
 508         struct call_rcu_data *crdp;
 509
 510         call_rcu_lock(&call_rcu_mutex);
 511         crdp = __create_call_rcu_data(flags, cpu_affinity);
 512         call_rcu_unlock(&call_rcu_mutex);
 513         return crdp;
 514 }
 515
 516 /*
 517  * Set the specified CPU to use the specified call_rcu_data structure.
 518  *
 519  * Use NULL to remove a CPU's call_rcu_data structure, but it is
 520  * the caller's responsibility to dispose of the removed structure.
 521  * Use get_cpu_call_rcu_data() to obtain a pointer to the old structure
 522  * (prior to NULLing it out, of course).
 523  *
 524  * The caller must wait for a grace-period to pass between return from
 525  * set_cpu_call_rcu_data() and call to call_rcu_data_free() passing the
 526  * previous call rcu data as argument.
 527  */
 528
 529 int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
 530 {
 531         static int warned = 0;
 532
 533         call_rcu_lock(&call_rcu_mutex);
 534         alloc_cpu_call_rcu_data();
 535         if (cpu < 0 || maxcpus <= cpu) {
 536                 if (!warned) {
 537                         fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
 538                         warned = 1;
 539                 }
 540                 call_rcu_unlock(&call_rcu_mutex);
 541                 errno = EINVAL;
 542                 return -EINVAL;
 543         }
 544
 545         if (per_cpu_call_rcu_data == NULL) {
 546                 call_rcu_unlock(&call_rcu_mutex);
 547                 errno = ENOMEM;
 548                 return -ENOMEM;
 549         }
 550
 551         if (per_cpu_call_rcu_data[cpu] != NULL && crdp != NULL) {
 552                 call_rcu_unlock(&call_rcu_mutex);
 553                 errno = EEXIST;
 554                 return -EEXIST;
 555         }
 556
 557         rcu_set_pointer(&per_cpu_call_rcu_data[cpu], crdp);
 558         call_rcu_unlock(&call_rcu_mutex);
 559         return 0;
 560 }
 561
 562 /*
 563  * Return a pointer to the default call_rcu_data structure, creating
 564  * one if need be.  Because we never free call_rcu_data structures,
 565  * we don't need to be in an RCU read-side critical section.
 566  */
 567
 568 struct call_rcu_data *get_default_call_rcu_data(void)
 569 {
 570         if (default_call_rcu_data != NULL)
 571                 return rcu_dereference(default_call_rcu_data);
 572         call_rcu_lock(&call_rcu_mutex);
 573         if (default_call_rcu_data != NULL) {
 574                 call_rcu_unlock(&call_rcu_mutex);
 575                 return default_call_rcu_data;
 576         }
 577         call_rcu_data_init(&default_call_rcu_data, 0, -1);
 578         call_rcu_unlock(&call_rcu_mutex);
 579         return default_call_rcu_data;
 580 }
 581
 582 /*
 583  * Return the call_rcu_data structure that applies to the currently
 584  * running thread.  Any call_rcu_data structure assigned specifically
 585  * to this thread has first priority, followed by any call_rcu_data
 586  * structure assigned to the CPU on which the thread is running,
 587  * followed by the default call_rcu_data structure.  If there is not
 588  * yet a default call_rcu_data structure, one will be created.
 589  *
 590  * Calls to this function and use of the returned call_rcu_data should
 591  * be protected by RCU read-side lock.
 592  */
 593 struct call_rcu_data *get_call_rcu_data(void)
 594 {
 595         struct call_rcu_data *crd;
 596
 597         if (URCU_TLS(thread_call_rcu_data) != NULL)
 598                 return URCU_TLS(thread_call_rcu_data);
 599
 600         if (maxcpus > 0) {
 601                 crd = get_cpu_call_rcu_data(urcu_sched_getcpu());
 602                 if (crd)
 603                         return crd;
 604         }
 605
 606         return get_default_call_rcu_data();
 607 }
 608
 609 /*
 610  * Return a pointer to this task's call_rcu_data if there is one.
 611  */
 612
 613 struct call_rcu_data *get_thread_call_rcu_data(void)
 614 {
 615         return URCU_TLS(thread_call_rcu_data);
 616 }
 617
 618 /*
 619  * Set this task's call_rcu_data structure as specified, regardless
 620  * of whether or not this task already had one.  (This allows switching
 621  * to and from real-time call_rcu threads, for example.)
 622  *
 623  * Use NULL to remove a thread's call_rcu_data structure, but it is
 624  * the caller's responsibility to dispose of the removed structure.
 625  * Use get_thread_call_rcu_data() to obtain a pointer to the old structure
 626  * (prior to NULLing it out, of course).
 627  */
 628
 629 void set_thread_call_rcu_data(struct call_rcu_data *crdp)
 630 {
 631         URCU_TLS(thread_call_rcu_data) = crdp;
 632 }
 633
 634 /*
 635  * Create a separate call_rcu thread for each CPU.  This does not
 636  * replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data()
 637  * function if you want that behavior. Should be paired with
 638  * free_all_cpu_call_rcu_data() to teardown these call_rcu worker
 639  * threads.
 640  */
 641
 642 int create_all_cpu_call_rcu_data(unsigned long flags)
 643 {
 644         int i;
 645         struct call_rcu_data *crdp;
 646         int ret;
 647
 648         call_rcu_lock(&call_rcu_mutex);
 649         alloc_cpu_call_rcu_data();
 650         call_rcu_unlock(&call_rcu_mutex);
 651         if (maxcpus <= 0) {
 652                 errno = EINVAL;
 653                 return -EINVAL;
 654         }
 655         if (per_cpu_call_rcu_data == NULL) {
 656                 errno = ENOMEM;
 657                 return -ENOMEM;
 658         }
 659         for (i = 0; i < maxcpus; i++) {
 660                 call_rcu_lock(&call_rcu_mutex);
 661                 if (get_cpu_call_rcu_data(i)) {
 662                         call_rcu_unlock(&call_rcu_mutex);
 663                         continue;
 664                 }
 665                 crdp = __create_call_rcu_data(flags, i);
 666                 if (crdp == NULL) {
 667                         call_rcu_unlock(&call_rcu_mutex);
 668                         errno = ENOMEM;
 669                         return -ENOMEM;
 670                 }
 671                 call_rcu_unlock(&call_rcu_mutex);
 672                 if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
 673                         call_rcu_data_free(crdp);
 674
 675                         /* it has been created by other thread */
 676                         if (ret == -EEXIST)
 677                                 continue;
 678
 679                         return ret;
 680                 }
 681         }
 682         return 0;
 683 }
 684
 685 /*
 686  * Wake up the call_rcu thread corresponding to the specified
 687  * call_rcu_data structure.
 688  */
 689 static void wake_call_rcu_thread(struct call_rcu_data *crdp)
 690 {
 691         if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RT))
 692                 call_rcu_wake_up(crdp);
 693 }
 694
 695 static void _call_rcu(struct rcu_head *head,
 696                       void (*func)(struct rcu_head *head),
 697                       struct call_rcu_data *crdp)
 698 {
 699         cds_wfcq_node_init(&head->next);
 700         head->func = func;
 701         cds_wfcq_enqueue(&crdp->cbs_head, &crdp->cbs_tail, &head->next);
 702         uatomic_inc(&crdp->qlen);
 703         wake_call_rcu_thread(crdp);
 704 }
 705
 706 /*
 707  * Schedule a function to be invoked after a following grace period.
 708  * This is the only function that must be called -- the others are
 709  * only present to allow applications to tune their use of RCU for
 710  * maximum performance.
 711  *
 712  * Note that unless a call_rcu thread has not already been created,
 713  * the first invocation of call_rcu() will create one.  So, if you
 714  * need the first invocation of call_rcu() to be fast, make sure
 715  * to create a call_rcu thread first.  One way to accomplish this is
 716  * "get_call_rcu_data();", and another is create_all_cpu_call_rcu_data().
 717  *
 718  * call_rcu must be called by registered RCU read-side threads.
 719  */
 720 void call_rcu(struct rcu_head *head,
 721               void (*func)(struct rcu_head *head))
 722 {
 723         struct call_rcu_data *crdp;
 724
 725         /* Holding rcu read-side lock across use of per-cpu crdp */
 726         _rcu_read_lock();
 727         crdp = get_call_rcu_data();
 728         _call_rcu(head, func, crdp);
 729         _rcu_read_unlock();
 730 }
 731
 732 /*
 733  * Free up the specified call_rcu_data structure, terminating the
 734  * associated call_rcu thread.  The caller must have previously
 735  * removed the call_rcu_data structure from per-thread or per-CPU
 736  * usage.  For example, set_cpu_call_rcu_data(cpu, NULL) for per-CPU
 737  * call_rcu_data structures or set_thread_call_rcu_data(NULL) for
 738  * per-thread call_rcu_data structures.
 739  *
 740  * We silently refuse to free up the default call_rcu_data structure
 741  * because that is where we put any leftover callbacks.  Note that
 742  * the possibility of self-spawning callbacks makes it impossible
 743  * to execute all the callbacks in finite time without putting any
 744  * newly spawned callbacks somewhere else.  The "somewhere else" of
 745  * last resort is the default call_rcu_data structure.
 746  *
 747  * We also silently refuse to free NULL pointers.  This simplifies
 748  * the calling code.
 749  *
 750  * The caller must wait for a grace-period to pass between return from
 751  * set_cpu_call_rcu_data() and call to call_rcu_data_free() passing the
 752  * previous call rcu data as argument.
 753  *
 754  * Note: introducing __cds_wfcq_splice_blocking() in this function fixed
 755  * a list corruption bug in the 0.7.x series. The equivalent fix
 756  * appeared in 0.6.8 for the stable-0.6 branch.
 757  */
 758 void call_rcu_data_free(struct call_rcu_data *crdp)
 759 {
 760         if (crdp == NULL || crdp == default_call_rcu_data) {
 761                 return;
 762         }
 763         if ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0) {
 764                 uatomic_or(&crdp->flags, URCU_CALL_RCU_STOP);
 765                 wake_call_rcu_thread(crdp);
 766                 while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0)
 767                         (void) poll(NULL, 0, 1);
 768         }
 769         if (!cds_wfcq_empty(&crdp->cbs_head, &crdp->cbs_tail)) {
 770                 /* Create default call rcu data if need be */
 771                 (void) get_default_call_rcu_data();
 772                 __cds_wfcq_splice_blocking(&default_call_rcu_data->cbs_head,
 773                         &default_call_rcu_data->cbs_tail,
 774                         &crdp->cbs_head, &crdp->cbs_tail);
 775                 uatomic_add(&default_call_rcu_data->qlen,
 776                             uatomic_read(&crdp->qlen));
 777                 wake_call_rcu_thread(default_call_rcu_data);
 778         }
 779
 780         call_rcu_lock(&call_rcu_mutex);
 781         cds_list_del(&crdp->list);
 782         call_rcu_unlock(&call_rcu_mutex);
 783
 784         free(crdp);
 785 }
 786
 787 /*
 788  * Clean up all the per-CPU call_rcu threads.
 789  */
 790 void free_all_cpu_call_rcu_data(void)
 791 {
 792         int cpu;
 793         struct call_rcu_data **crdp;
 794         static int warned = 0;
 795
 796         if (maxcpus <= 0)
 797                 return;
 798
 799         crdp = malloc(sizeof(*crdp) * maxcpus);
 800         if (!crdp) {
 801                 if (!warned) {
 802                         fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
 803                 }
 804                 warned = 1;
 805                 return;
 806         }
 807
 808         for (cpu = 0; cpu < maxcpus; cpu++) {
 809                 crdp[cpu] = get_cpu_call_rcu_data(cpu);
 810                 if (crdp[cpu] == NULL)
 811                         continue;
 812                 set_cpu_call_rcu_data(cpu, NULL);
 813         }
 814         /*
 815          * Wait for call_rcu sites acting as RCU readers of the
 816          * call_rcu_data to become quiescent.
 817          */
 818         synchronize_rcu();
 819         for (cpu = 0; cpu < maxcpus; cpu++) {
 820                 if (crdp[cpu] == NULL)
 821                         continue;
 822                 call_rcu_data_free(crdp[cpu]);
 823         }
 824         free(crdp);
 825 }
 826
 827 static
 828 void free_completion(struct urcu_ref *ref)
 829 {
 830         struct call_rcu_completion *completion;
 831
 832         completion = caa_container_of(ref, struct call_rcu_completion, ref);
 833         free(completion);
 834 }
 835
 836 static
 837 void _rcu_barrier_complete(struct rcu_head *head)
 838 {
 839         struct call_rcu_completion_work *work;
 840         struct call_rcu_completion *completion;
 841
 842         work = caa_container_of(head, struct call_rcu_completion_work, head);
 843         completion = work->completion;
 844         if (!uatomic_sub_return(&completion->barrier_count, 1))
 845                 call_rcu_completion_wake_up(completion);
 846         urcu_ref_put(&completion->ref, free_completion);
 847         free(work);
 848 }
 849
 850 /*
 851  * Wait for all in-flight call_rcu callbacks to complete execution.
 852  */
 853 void rcu_barrier(void)
 854 {
 855         struct call_rcu_data *crdp;
 856         struct call_rcu_completion *completion;
 857         int count = 0;
 858         int was_online;
 859
 860         /* Put in offline state in QSBR. */
 861         was_online = _rcu_read_ongoing();
 862         if (was_online)
 863                 rcu_thread_offline();
 864         /*
 865          * Calling a rcu_barrier() within a RCU read-side critical
 866          * section is an error.
 867          */
 868         if (_rcu_read_ongoing()) {
 869                 static int warned = 0;
 870
 871                 if (!warned) {
 872                         fprintf(stderr, "[error] liburcu: rcu_barrier() called from within RCU read-side critical section.\n");
 873                 }
 874                 warned = 1;
 875                 goto online;
 876         }
 877
 878         completion = calloc(sizeof(*completion), 1);
 879         if (!completion)
 880                 urcu_die(errno);
 881
 882         call_rcu_lock(&call_rcu_mutex);
 883         cds_list_for_each_entry(crdp, &call_rcu_data_list, list)
 884                 count++;
 885
 886         /* Referenced by rcu_barrier() and each call_rcu thread. */
 887         urcu_ref_set(&completion->ref, count + 1);
 888         completion->barrier_count = count;
 889
 890         cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
 891                 struct call_rcu_completion_work *work;
 892
 893                 work = calloc(sizeof(*work), 1);
 894                 if (!work)
 895                         urcu_die(errno);
 896                 work->completion = completion;
 897                 _call_rcu(&work->head, _rcu_barrier_complete, crdp);
 898         }
 899         call_rcu_unlock(&call_rcu_mutex);
 900
 901         /* Wait for them */
 902         for (;;) {
 903                 uatomic_dec(&completion->futex);
 904                 /* Decrement futex before reading barrier_count */
 905                 cmm_smp_mb();
 906                 if (!uatomic_read(&completion->barrier_count))
 907                         break;
 908                 call_rcu_completion_wait(completion);
 909         }
 910
 911         urcu_ref_put(&completion->ref, free_completion);
 912
 913 online:
 914         if (was_online)
 915                 rcu_thread_online();
 916 }
 917
 918 /*
 919  * Acquire the call_rcu_mutex in order to ensure that the child sees
 920  * all of the call_rcu() data structures in a consistent state. Ensure
 921  * that all call_rcu threads are in a quiescent state across fork.
 922  * Suitable for pthread_atfork() and friends.
 923  */
 924 void call_rcu_before_fork(void)
 925 {
 926         struct call_rcu_data *crdp;
 927
 928         call_rcu_lock(&call_rcu_mutex);
 929
 930         cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
 931                 uatomic_or(&crdp->flags, URCU_CALL_RCU_PAUSE);
 932                 cmm_smp_mb__after_uatomic_or();
 933                 wake_call_rcu_thread(crdp);
 934         }
 935         cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
 936                 while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSED) == 0)
 937                         (void) poll(NULL, 0, 1);
 938         }
 939 }
 940
 941 /*
 942  * Clean up call_rcu data structures in the parent of a successful fork()
 943  * that is not followed by exec() in the child.  Suitable for
 944  * pthread_atfork() and friends.
 945  */
 946 void call_rcu_after_fork_parent(void)
 947 {
 948         struct call_rcu_data *crdp;
 949
 950         cds_list_for_each_entry(crdp, &call_rcu_data_list, list)
 951                 uatomic_and(&crdp->flags, ~URCU_CALL_RCU_PAUSE);
 952         cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
 953                 while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSED) != 0)
 954                         (void) poll(NULL, 0, 1);
 955         }
 956         call_rcu_unlock(&call_rcu_mutex);
 957 }
 958
 959 /*
 960  * Clean up call_rcu data structures in the child of a successful fork()
 961  * that is not followed by exec().  Suitable for pthread_atfork() and
 962  * friends.
 963  */
 964 void call_rcu_after_fork_child(void)
 965 {
 966         struct call_rcu_data *crdp, *next;
 967
 968         /* Release the mutex. */
 969         call_rcu_unlock(&call_rcu_mutex);
 970
 971         /* Do nothing when call_rcu() has not been used */
 972         if (cds_list_empty(&call_rcu_data_list))
 973                 return;
 974
 975         /*
 976          * Allocate a new default call_rcu_data structure in order
 977          * to get a working call_rcu thread to go with it.
 978          */
 979         default_call_rcu_data = NULL;
 980         (void)get_default_call_rcu_data();
 981
 982         /* Cleanup call_rcu_data pointers before use */
 983         maxcpus_reset();
 984         free(per_cpu_call_rcu_data);
 985         rcu_set_pointer(&per_cpu_call_rcu_data, NULL);
 986         URCU_TLS(thread_call_rcu_data) = NULL;
 987
 988         /*
 989          * Dispose of all of the rest of the call_rcu_data structures.
 990          * Leftover call_rcu callbacks will be merged into the new
 991          * default call_rcu thread queue.
 992          */
 993         cds_list_for_each_entry_safe(crdp, next, &call_rcu_data_list, list) {
 994                 if (crdp == default_call_rcu_data)
 995                         continue;
 996                 uatomic_set(&crdp->flags, URCU_CALL_RCU_STOPPED);
 997                 call_rcu_data_free(crdp);
 998         }
 999 }