urcu-call-rcu-impl.h

   1 /*
   2  * urcu-call-rcu.c
   3  *
   4  * Userspace RCU library - batch memory reclamation with kernel API
   5  *
   6  * Copyright (c) 2010 Paul E. McKenney <paulmck@linux.vnet.ibm.com>
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #define _GNU_SOURCE
  24 #include <stdio.h>
  25 #include <pthread.h>
  26 #include <signal.h>
  27 #include <assert.h>
  28 #include <stdlib.h>
  29 #include <stdint.h>
  30 #include <string.h>
  31 #include <errno.h>
  32 #include <poll.h>
  33 #include <sys/time.h>
  34 #include <unistd.h>
  35 #include <sched.h>
  36
  37 #include "config.h"
  38 #include "urcu/wfqueue.h"
  39 #include "urcu-call-rcu.h"
  40 #include "urcu-pointer.h"
  41 #include "urcu/list.h"
  42 #include "urcu/futex.h"
  43 #include "urcu/tls-compat.h"
  44 #include "urcu-die.h"
  45
  46 #define SET_AFFINITY_CHECK_PERIOD               (1U << 8)       /* 256 */
  47 #define SET_AFFINITY_CHECK_PERIOD_MASK          (SET_AFFINITY_CHECK_PERIOD - 1)
  48
  49 /* Data structure that identifies a call_rcu thread. */
  50
  51 struct call_rcu_data {
  52         struct cds_wfq_queue cbs;
  53         unsigned long flags;
  54         int32_t futex;
  55         unsigned long qlen; /* maintained for debugging. */
  56         pthread_t tid;
  57         int cpu_affinity;
  58         unsigned long gp_count;
  59         struct cds_list_head list;
  60 } __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
  61
  62 /*
  63  * List of all call_rcu_data structures to keep valgrind happy.
  64  * Protected by call_rcu_mutex.
  65  */
  66
  67 CDS_LIST_HEAD(call_rcu_data_list);
  68
  69 /* Link a thread using call_rcu() to its call_rcu thread. */
  70
  71 static DEFINE_URCU_TLS(struct call_rcu_data *, thread_call_rcu_data);
  72
  73 /*
  74  * Guard call_rcu thread creation and atfork handlers.
  75  */
  76 static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;
  77
  78 /* If a given thread does not have its own call_rcu thread, this is default. */
  79
  80 static struct call_rcu_data *default_call_rcu_data;
  81
  82 /*
  83  * If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
  84  * available, then we can have call_rcu threads assigned to individual
  85  * CPUs rather than only to specific threads.
  86  */
  87
  88 #ifdef HAVE_SCHED_GETCPU
  89
  90 static int urcu_sched_getcpu(void)
  91 {
  92         return sched_getcpu();
  93 }
  94
  95 #else /* #ifdef HAVE_SCHED_GETCPU */
  96
  97 static int urcu_sched_getcpu(void)
  98 {
  99         return -1;
 100 }
 101
 102 #endif /* #else #ifdef HAVE_SCHED_GETCPU */
 103
 104 #if defined(HAVE_SYSCONF) && defined(HAVE_SCHED_GETCPU)
 105
 106 /*
 107  * Pointer to array of pointers to per-CPU call_rcu_data structures
 108  * and # CPUs. per_cpu_call_rcu_data is a RCU-protected pointer to an
 109  * array of RCU-protected pointers to call_rcu_data. call_rcu acts as a
 110  * RCU read-side and reads per_cpu_call_rcu_data and the per-cpu pointer
 111  * without mutex. The call_rcu_mutex protects updates.
 112  */
 113
 114 static struct call_rcu_data **per_cpu_call_rcu_data;
 115 static long maxcpus;
 116
 117 static void maxcpus_reset(void)
 118 {
 119         maxcpus = 0;
 120 }
 121
 122 /* Allocate the array if it has not already been allocated. */
 123
 124 static void alloc_cpu_call_rcu_data(void)
 125 {
 126         struct call_rcu_data **p;
 127         static int warned = 0;
 128
 129         if (maxcpus != 0)
 130                 return;
 131         maxcpus = sysconf(_SC_NPROCESSORS_CONF);
 132         if (maxcpus <= 0) {
 133                 return;
 134         }
 135         p = malloc(maxcpus * sizeof(*per_cpu_call_rcu_data));
 136         if (p != NULL) {
 137                 memset(p, '\0', maxcpus * sizeof(*per_cpu_call_rcu_data));
 138                 rcu_set_pointer(&per_cpu_call_rcu_data, p);
 139         } else {
 140                 if (!warned) {
 141                         fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
 142                 }
 143                 warned = 1;
 144         }
 145 }
 146
 147 #else /* #if defined(HAVE_SYSCONF) && defined(HAVE_SCHED_GETCPU) */
 148
 149 /*
 150  * per_cpu_call_rcu_data should be constant, but some functions below, used both
 151  * for cases where cpu number is available and not available, assume it it not
 152  * constant.
 153  */
 154 static struct call_rcu_data **per_cpu_call_rcu_data = NULL;
 155 static const long maxcpus = -1;
 156
 157 static void maxcpus_reset(void)
 158 {
 159 }
 160
 161 static void alloc_cpu_call_rcu_data(void)
 162 {
 163 }
 164
 165 #endif /* #else #if defined(HAVE_SYSCONF) && defined(HAVE_SCHED_GETCPU) */
 166
 167 /* Acquire the specified pthread mutex. */
 168
 169 static void call_rcu_lock(pthread_mutex_t *pmp)
 170 {
 171         int ret;
 172
 173         ret = pthread_mutex_lock(pmp);
 174         if (ret)
 175                 urcu_die(ret);
 176 }
 177
 178 /* Release the specified pthread mutex. */
 179
 180 static void call_rcu_unlock(pthread_mutex_t *pmp)
 181 {
 182         int ret;
 183
 184         ret = pthread_mutex_unlock(pmp);
 185         if (ret)
 186                 urcu_die(ret);
 187 }
 188
 189 /*
 190  * Periodically retry setting CPU affinity if we migrate.
 191  * Losing affinity can be caused by CPU hotunplug/hotplug, or by
 192  * cpuset(7).
 193  */
 194 #if HAVE_SCHED_SETAFFINITY
 195 static
 196 int set_thread_cpu_affinity(struct call_rcu_data *crdp)
 197 {
 198         cpu_set_t mask;
 199         int ret;
 200
 201         if (crdp->cpu_affinity < 0)
 202                 return 0;
 203         if (++crdp->gp_count & SET_AFFINITY_CHECK_PERIOD_MASK)
 204                 return 0;
 205         if (urcu_sched_getcpu() == crdp->cpu_affinity)
 206                 return 0;
 207
 208         CPU_ZERO(&mask);
 209         CPU_SET(crdp->cpu_affinity, &mask);
 210 #if SCHED_SETAFFINITY_ARGS == 2
 211         ret = sched_setaffinity(0, &mask);
 212 #else
 213         ret = sched_setaffinity(0, sizeof(mask), &mask);
 214 #endif
 215         /*
 216          * EINVAL is fine: can be caused by hotunplugged CPUs, or by
 217          * cpuset(7). This is why we should always retry if we detect
 218          * migration.
 219          */
 220         if (ret && errno == EINVAL) {
 221                 ret = 0;
 222                 errno = 0;
 223         }
 224         return ret;
 225 }
 226 #else
 227 static
 228 int set_thread_cpu_affinity(struct call_rcu_data *crdp)
 229 {
 230         return 0;
 231 }
 232 #endif
 233
 234 static void call_rcu_wait(struct call_rcu_data *crdp)
 235 {
 236         /* Read call_rcu list before read futex */
 237         cmm_smp_mb();
 238         if (uatomic_read(&crdp->futex) != -1)
 239                 return;
 240         while (futex_async(&crdp->futex, FUTEX_WAIT, -1,
 241                         NULL, NULL, 0)) {
 242                 switch (errno) {
 243                 case EWOULDBLOCK:
 244                         /* Value already changed. */
 245                         return;
 246                 case EINTR:
 247                         /* Retry if interrupted by signal. */
 248                         break;  /* Get out of switch. */
 249                 default:
 250                         /* Unexpected error. */
 251                         urcu_die(errno);
 252                 }
 253         }
 254 }
 255
 256 static void call_rcu_wake_up(struct call_rcu_data *crdp)
 257 {
 258         /* Write to call_rcu list before reading/writing futex */
 259         cmm_smp_mb();
 260         if (caa_unlikely(uatomic_read(&crdp->futex) == -1)) {
 261                 uatomic_set(&crdp->futex, 0);
 262                 if (futex_async(&crdp->futex, FUTEX_WAKE, 1,
 263                                 NULL, NULL, 0) < 0)
 264                         urcu_die(errno);
 265         }
 266 }
 267
 268 /* This is the code run by each call_rcu thread. */
 269
 270 static void *call_rcu_thread(void *arg)
 271 {
 272         unsigned long cbcount;
 273         struct cds_wfq_node *cbs;
 274         struct cds_wfq_node **cbs_tail;
 275         struct call_rcu_data *crdp = (struct call_rcu_data *)arg;
 276         struct rcu_head *rhp;
 277         int rt = !!(uatomic_read(&crdp->flags) & URCU_CALL_RCU_RT);
 278
 279         if (set_thread_cpu_affinity(crdp))
 280                 urcu_die(errno);
 281
 282         /*
 283          * If callbacks take a read-side lock, we need to be registered.
 284          */
 285         rcu_register_thread();
 286
 287         URCU_TLS(thread_call_rcu_data) = crdp;
 288         if (!rt) {
 289                 uatomic_dec(&crdp->futex);
 290                 /* Decrement futex before reading call_rcu list */
 291                 cmm_smp_mb();
 292         }
 293         for (;;) {
 294                 if (set_thread_cpu_affinity(crdp))
 295                         urcu_die(errno);
 296
 297                 if (uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSE) {
 298                         /*
 299                          * Pause requested. Become quiescent: remove
 300                          * ourself from all global lists, and don't
 301                          * process any callback. The callback lists may
 302                          * still be non-empty though.
 303                          */
 304                         rcu_unregister_thread();
 305                         cmm_smp_mb__before_uatomic_or();
 306                         uatomic_or(&crdp->flags, URCU_CALL_RCU_PAUSED);
 307                         while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSE) != 0)
 308                                 poll(NULL, 0, 1);
 309                         uatomic_and(&crdp->flags, ~URCU_CALL_RCU_PAUSED);
 310                         cmm_smp_mb__after_uatomic_and();
 311                         rcu_register_thread();
 312                 }
 313
 314                 if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
 315                         while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
 316                                 poll(NULL, 0, 1);
 317                         _CMM_STORE_SHARED(crdp->cbs.head, NULL);
 318                         cbs_tail = (struct cds_wfq_node **)
 319                                 uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
 320                         synchronize_rcu();
 321                         cbcount = 0;
 322                         do {
 323                                 while (cbs->next == NULL &&
 324                                        &cbs->next != cbs_tail)
 325                                         poll(NULL, 0, 1);
 326                                 if (cbs == &crdp->cbs.dummy) {
 327                                         cbs = cbs->next;
 328                                         continue;
 329                                 }
 330                                 rhp = (struct rcu_head *)cbs;
 331                                 cbs = cbs->next;
 332                                 rhp->func(rhp);
 333                                 cbcount++;
 334                         } while (cbs != NULL);
 335                         uatomic_sub(&crdp->qlen, cbcount);
 336                 }
 337                 if (uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOP)
 338                         break;
 339                 rcu_thread_offline();
 340                 if (!rt) {
 341                         if (&crdp->cbs.head
 342                             == _CMM_LOAD_SHARED(crdp->cbs.tail)) {
 343                                 call_rcu_wait(crdp);
 344                                 poll(NULL, 0, 10);
 345                                 uatomic_dec(&crdp->futex);
 346                                 /*
 347                                  * Decrement futex before reading
 348                                  * call_rcu list.
 349                                  */
 350                                 cmm_smp_mb();
 351                         } else {
 352                                 poll(NULL, 0, 10);
 353                         }
 354                 } else {
 355                         poll(NULL, 0, 10);
 356                 }
 357                 rcu_thread_online();
 358         }
 359         if (!rt) {
 360                 /*
 361                  * Read call_rcu list before write futex.
 362                  */
 363                 cmm_smp_mb();
 364                 uatomic_set(&crdp->futex, 0);
 365         }
 366         uatomic_or(&crdp->flags, URCU_CALL_RCU_STOPPED);
 367         rcu_unregister_thread();
 368         return NULL;
 369 }
 370
 371 /*
 372  * Create both a call_rcu thread and the corresponding call_rcu_data
 373  * structure, linking the structure in as specified.  Caller must hold
 374  * call_rcu_mutex.
 375  */
 376
 377 static void call_rcu_data_init(struct call_rcu_data **crdpp,
 378                                unsigned long flags,
 379                                int cpu_affinity)
 380 {
 381         struct call_rcu_data *crdp;
 382         int ret;
 383
 384         crdp = malloc(sizeof(*crdp));
 385         if (crdp == NULL)
 386                 urcu_die(errno);
 387         memset(crdp, '\0', sizeof(*crdp));
 388         cds_wfq_init(&crdp->cbs);
 389         crdp->qlen = 0;
 390         crdp->futex = 0;
 391         crdp->flags = flags;
 392         cds_list_add(&crdp->list, &call_rcu_data_list);
 393         crdp->cpu_affinity = cpu_affinity;
 394         crdp->gp_count = 0;
 395         cmm_smp_mb();  /* Structure initialized before pointer is planted. */
 396         *crdpp = crdp;
 397         ret = pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp);
 398         if (ret)
 399                 urcu_die(ret);
 400 }
 401
 402 /*
 403  * Return a pointer to the call_rcu_data structure for the specified
 404  * CPU, returning NULL if there is none.  We cannot automatically
 405  * created it because the platform we are running on might not define
 406  * urcu_sched_getcpu().
 407  *
 408  * The call to this function and use of the returned call_rcu_data
 409  * should be protected by RCU read-side lock.
 410  */
 411
 412 struct call_rcu_data *get_cpu_call_rcu_data(int cpu)
 413 {
 414         static int warned = 0;
 415         struct call_rcu_data **pcpu_crdp;
 416
 417         pcpu_crdp = rcu_dereference(per_cpu_call_rcu_data);
 418         if (pcpu_crdp == NULL)
 419                 return NULL;
 420         if (!warned && maxcpus > 0 && (cpu < 0 || maxcpus <= cpu)) {
 421                 fprintf(stderr, "[error] liburcu: get CPU # out of range\n");
 422                 warned = 1;
 423         }
 424         if (cpu < 0 || maxcpus <= cpu)
 425                 return NULL;
 426         return rcu_dereference(pcpu_crdp[cpu]);
 427 }
 428
 429 /*
 430  * Return the tid corresponding to the call_rcu thread whose
 431  * call_rcu_data structure is specified.
 432  */
 433
 434 pthread_t get_call_rcu_thread(struct call_rcu_data *crdp)
 435 {
 436         return crdp->tid;
 437 }
 438
 439 /*
 440  * Create a call_rcu_data structure (with thread) and return a pointer.
 441  */
 442
 443 static struct call_rcu_data *__create_call_rcu_data(unsigned long flags,
 444                                                     int cpu_affinity)
 445 {
 446         struct call_rcu_data *crdp;
 447
 448         call_rcu_data_init(&crdp, flags, cpu_affinity);
 449         return crdp;
 450 }
 451
 452 struct call_rcu_data *create_call_rcu_data(unsigned long flags,
 453                                            int cpu_affinity)
 454 {
 455         struct call_rcu_data *crdp;
 456
 457         call_rcu_lock(&call_rcu_mutex);
 458         crdp = __create_call_rcu_data(flags, cpu_affinity);
 459         call_rcu_unlock(&call_rcu_mutex);
 460         return crdp;
 461 }
 462
 463 /*
 464  * Set the specified CPU to use the specified call_rcu_data structure.
 465  *
 466  * Use NULL to remove a CPU's call_rcu_data structure, but it is
 467  * the caller's responsibility to dispose of the removed structure.
 468  * Use get_cpu_call_rcu_data() to obtain a pointer to the old structure
 469  * (prior to NULLing it out, of course).
 470  *
 471  * The caller must wait for a grace-period to pass between return from
 472  * set_cpu_call_rcu_data() and call to call_rcu_data_free() passing the
 473  * previous call rcu data as argument.
 474  */
 475
 476 int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
 477 {
 478         static int warned = 0;
 479
 480         call_rcu_lock(&call_rcu_mutex);
 481         alloc_cpu_call_rcu_data();
 482         if (cpu < 0 || maxcpus <= cpu) {
 483                 if (!warned) {
 484                         fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
 485                         warned = 1;
 486                 }
 487                 call_rcu_unlock(&call_rcu_mutex);
 488                 errno = EINVAL;
 489                 return -EINVAL;
 490         }
 491
 492         if (per_cpu_call_rcu_data == NULL) {
 493                 call_rcu_unlock(&call_rcu_mutex);
 494                 errno = ENOMEM;
 495                 return -ENOMEM;
 496         }
 497
 498         if (per_cpu_call_rcu_data[cpu] != NULL && crdp != NULL) {
 499                 call_rcu_unlock(&call_rcu_mutex);
 500                 errno = EEXIST;
 501                 return -EEXIST;
 502         }
 503
 504         rcu_set_pointer(&per_cpu_call_rcu_data[cpu], crdp);
 505         call_rcu_unlock(&call_rcu_mutex);
 506         return 0;
 507 }
 508
 509 /*
 510  * Return a pointer to the default call_rcu_data structure, creating
 511  * one if need be.  Because we never free call_rcu_data structures,
 512  * we don't need to be in an RCU read-side critical section.
 513  */
 514
 515 struct call_rcu_data *get_default_call_rcu_data(void)
 516 {
 517         if (default_call_rcu_data != NULL)
 518                 return rcu_dereference(default_call_rcu_data);
 519         call_rcu_lock(&call_rcu_mutex);
 520         if (default_call_rcu_data != NULL) {
 521                 call_rcu_unlock(&call_rcu_mutex);
 522                 return default_call_rcu_data;
 523         }
 524         call_rcu_data_init(&default_call_rcu_data, 0, -1);
 525         call_rcu_unlock(&call_rcu_mutex);
 526         return default_call_rcu_data;
 527 }
 528
 529 /*
 530  * Return the call_rcu_data structure that applies to the currently
 531  * running thread.  Any call_rcu_data structure assigned specifically
 532  * to this thread has first priority, followed by any call_rcu_data
 533  * structure assigned to the CPU on which the thread is running,
 534  * followed by the default call_rcu_data structure.  If there is not
 535  * yet a default call_rcu_data structure, one will be created.
 536  *
 537  * Calls to this function and use of the returned call_rcu_data should
 538  * be protected by RCU read-side lock.
 539  */
 540 struct call_rcu_data *get_call_rcu_data(void)
 541 {
 542         struct call_rcu_data *crd;
 543
 544         if (URCU_TLS(thread_call_rcu_data) != NULL)
 545                 return URCU_TLS(thread_call_rcu_data);
 546
 547         if (maxcpus > 0) {
 548                 crd = get_cpu_call_rcu_data(urcu_sched_getcpu());
 549                 if (crd)
 550                         return crd;
 551         }
 552
 553         return get_default_call_rcu_data();
 554 }
 555
 556 /*
 557  * Return a pointer to this task's call_rcu_data if there is one.
 558  */
 559
 560 struct call_rcu_data *get_thread_call_rcu_data(void)
 561 {
 562         return URCU_TLS(thread_call_rcu_data);
 563 }
 564
 565 /*
 566  * Set this task's call_rcu_data structure as specified, regardless
 567  * of whether or not this task already had one.  (This allows switching
 568  * to and from real-time call_rcu threads, for example.)
 569  *
 570  * Use NULL to remove a thread's call_rcu_data structure, but it is
 571  * the caller's responsibility to dispose of the removed structure.
 572  * Use get_thread_call_rcu_data() to obtain a pointer to the old structure
 573  * (prior to NULLing it out, of course).
 574  */
 575
 576 void set_thread_call_rcu_data(struct call_rcu_data *crdp)
 577 {
 578         URCU_TLS(thread_call_rcu_data) = crdp;
 579 }
 580
 581 /*
 582  * Create a separate call_rcu thread for each CPU.  This does not
 583  * replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data()
 584  * function if you want that behavior. Should be paired with
 585  * free_all_cpu_call_rcu_data() to teardown these call_rcu worker
 586  * threads.
 587  */
 588
 589 int create_all_cpu_call_rcu_data(unsigned long flags)
 590 {
 591         int i;
 592         struct call_rcu_data *crdp;
 593         int ret;
 594
 595         call_rcu_lock(&call_rcu_mutex);
 596         alloc_cpu_call_rcu_data();
 597         call_rcu_unlock(&call_rcu_mutex);
 598         if (maxcpus <= 0) {
 599                 errno = EINVAL;
 600                 return -EINVAL;
 601         }
 602         if (per_cpu_call_rcu_data == NULL) {
 603                 errno = ENOMEM;
 604                 return -ENOMEM;
 605         }
 606         for (i = 0; i < maxcpus; i++) {
 607                 call_rcu_lock(&call_rcu_mutex);
 608                 if (get_cpu_call_rcu_data(i)) {
 609                         call_rcu_unlock(&call_rcu_mutex);
 610                         continue;
 611                 }
 612                 crdp = __create_call_rcu_data(flags, i);
 613                 if (crdp == NULL) {
 614                         call_rcu_unlock(&call_rcu_mutex);
 615                         errno = ENOMEM;
 616                         return -ENOMEM;
 617                 }
 618                 call_rcu_unlock(&call_rcu_mutex);
 619                 if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
 620                         call_rcu_data_free(crdp);
 621
 622                         /* it has been created by other thread */
 623                         if (ret == -EEXIST)
 624                                 continue;
 625
 626                         return ret;
 627                 }
 628         }
 629         return 0;
 630 }
 631
 632 /*
 633  * Wake up the call_rcu thread corresponding to the specified
 634  * call_rcu_data structure.
 635  */
 636 static void wake_call_rcu_thread(struct call_rcu_data *crdp)
 637 {
 638         if (!(_CMM_LOAD_SHARED(crdp->flags) & URCU_CALL_RCU_RT))
 639                 call_rcu_wake_up(crdp);
 640 }
 641
 642 /*
 643  * Schedule a function to be invoked after a following grace period.
 644  * This is the only function that must be called -- the others are
 645  * only present to allow applications to tune their use of RCU for
 646  * maximum performance.
 647  *
 648  * Note that unless a call_rcu thread has not already been created,
 649  * the first invocation of call_rcu() will create one.  So, if you
 650  * need the first invocation of call_rcu() to be fast, make sure
 651  * to create a call_rcu thread first.  One way to accomplish this is
 652  * "get_call_rcu_data();", and another is create_all_cpu_call_rcu_data().
 653  *
 654  * call_rcu must be called by registered RCU read-side threads.
 655  */
 656
 657 void call_rcu(struct rcu_head *head,
 658               void (*func)(struct rcu_head *head))
 659 {
 660         struct call_rcu_data *crdp;
 661
 662         cds_wfq_node_init(&head->next);
 663         head->func = func;
 664         /* Holding rcu read-side lock across use of per-cpu crdp */
 665         _rcu_read_lock();
 666         crdp = get_call_rcu_data();
 667         cds_wfq_enqueue(&crdp->cbs, &head->next);
 668         uatomic_inc(&crdp->qlen);
 669         wake_call_rcu_thread(crdp);
 670         _rcu_read_unlock();
 671 }
 672
 673 /*
 674  * Free up the specified call_rcu_data structure, terminating the
 675  * associated call_rcu thread.  The caller must have previously
 676  * removed the call_rcu_data structure from per-thread or per-CPU
 677  * usage.  For example, set_cpu_call_rcu_data(cpu, NULL) for per-CPU
 678  * call_rcu_data structures or set_thread_call_rcu_data(NULL) for
 679  * per-thread call_rcu_data structures.
 680  *
 681  * We silently refuse to free up the default call_rcu_data structure
 682  * because that is where we put any leftover callbacks.  Note that
 683  * the possibility of self-spawning callbacks makes it impossible
 684  * to execute all the callbacks in finite time without putting any
 685  * newly spawned callbacks somewhere else.  The "somewhere else" of
 686  * last resort is the default call_rcu_data structure.
 687  *
 688  * We also silently refuse to free NULL pointers.  This simplifies
 689  * the calling code.
 690  *
 691  * The caller must wait for a grace-period to pass between return from
 692  * set_cpu_call_rcu_data() and call to call_rcu_data_free() passing the
 693  * previous call rcu data as argument.
 694  */
 695 void call_rcu_data_free(struct call_rcu_data *crdp)
 696 {
 697         struct cds_wfq_node *cbs;
 698         struct cds_wfq_node **cbs_tail;
 699         struct cds_wfq_node **cbs_endprev;
 700
 701         if (crdp == NULL || crdp == default_call_rcu_data) {
 702                 return;
 703         }
 704         if ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0) {
 705                 uatomic_or(&crdp->flags, URCU_CALL_RCU_STOP);
 706                 wake_call_rcu_thread(crdp);
 707                 while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_STOPPED) == 0)
 708                         poll(NULL, 0, 1);
 709         }
 710         if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
 711                 while ((cbs = _CMM_LOAD_SHARED(crdp->cbs.head)) == NULL)
 712                         poll(NULL, 0, 1);
 713                 _CMM_STORE_SHARED(crdp->cbs.head, NULL);
 714                 cbs_tail = (struct cds_wfq_node **)
 715                         uatomic_xchg(&crdp->cbs.tail, &crdp->cbs.head);
 716                 /* Create default call rcu data if need be */
 717                 (void) get_default_call_rcu_data();
 718                 cbs_endprev = (struct cds_wfq_node **)
 719                         uatomic_xchg(&default_call_rcu_data->cbs.tail,
 720                                         cbs_tail);
 721                 _CMM_STORE_SHARED(*cbs_endprev, cbs);
 722                 uatomic_add(&default_call_rcu_data->qlen,
 723                             uatomic_read(&crdp->qlen));
 724                 wake_call_rcu_thread(default_call_rcu_data);
 725         }
 726
 727         call_rcu_lock(&call_rcu_mutex);
 728         cds_list_del(&crdp->list);
 729         call_rcu_unlock(&call_rcu_mutex);
 730
 731         free(crdp);
 732 }
 733
 734 /*
 735  * Clean up all the per-CPU call_rcu threads.
 736  */
 737 void free_all_cpu_call_rcu_data(void)
 738 {
 739         int cpu;
 740         struct call_rcu_data **crdp;
 741         static int warned = 0;
 742
 743         if (maxcpus <= 0)
 744                 return;
 745
 746         crdp = malloc(sizeof(*crdp) * maxcpus);
 747         if (!crdp) {
 748                 if (!warned) {
 749                         fprintf(stderr, "[error] liburcu: unable to allocate per-CPU pointer array\n");
 750                 }
 751                 warned = 1;
 752                 return;
 753         }
 754
 755         for (cpu = 0; cpu < maxcpus; cpu++) {
 756                 crdp[cpu] = get_cpu_call_rcu_data(cpu);
 757                 if (crdp[cpu] == NULL)
 758                         continue;
 759                 set_cpu_call_rcu_data(cpu, NULL);
 760         }
 761         /*
 762          * Wait for call_rcu sites acting as RCU readers of the
 763          * call_rcu_data to become quiescent.
 764          */
 765         synchronize_rcu();
 766         for (cpu = 0; cpu < maxcpus; cpu++) {
 767                 if (crdp[cpu] == NULL)
 768                         continue;
 769                 call_rcu_data_free(crdp[cpu]);
 770         }
 771         free(crdp);
 772 }
 773
 774 /*
 775  * Acquire the call_rcu_mutex in order to ensure that the child sees
 776  * all of the call_rcu() data structures in a consistent state. Ensure
 777  * that all call_rcu threads are in a quiescent state across fork.
 778  * Suitable for pthread_atfork() and friends.
 779  */
 780 void call_rcu_before_fork(void)
 781 {
 782         struct call_rcu_data *crdp;
 783
 784         call_rcu_lock(&call_rcu_mutex);
 785
 786         cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
 787                 uatomic_or(&crdp->flags, URCU_CALL_RCU_PAUSE);
 788                 cmm_smp_mb__after_uatomic_or();
 789                 wake_call_rcu_thread(crdp);
 790         }
 791         cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
 792                 while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSED) == 0)
 793                         poll(NULL, 0, 1);
 794         }
 795 }
 796
 797 /*
 798  * Clean up call_rcu data structures in the parent of a successful fork()
 799  * that is not followed by exec() in the child.  Suitable for
 800  * pthread_atfork() and friends.
 801  */
 802 void call_rcu_after_fork_parent(void)
 803 {
 804         struct call_rcu_data *crdp;
 805
 806         cds_list_for_each_entry(crdp, &call_rcu_data_list, list)
 807                 uatomic_and(&crdp->flags, ~URCU_CALL_RCU_PAUSE);
 808         cds_list_for_each_entry(crdp, &call_rcu_data_list, list) {
 809                 while ((uatomic_read(&crdp->flags) & URCU_CALL_RCU_PAUSED) != 0)
 810                         poll(NULL, 0, 1);
 811         }
 812         call_rcu_unlock(&call_rcu_mutex);
 813 }
 814
 815 /*
 816  * Clean up call_rcu data structures in the child of a successful fork()
 817  * that is not followed by exec().  Suitable for pthread_atfork() and
 818  * friends.
 819  */
 820 void call_rcu_after_fork_child(void)
 821 {
 822         struct call_rcu_data *crdp, *next;
 823
 824         /* Release the mutex. */
 825         call_rcu_unlock(&call_rcu_mutex);
 826
 827         /* Do nothing when call_rcu() has not been used */
 828         if (cds_list_empty(&call_rcu_data_list))
 829                 return;
 830
 831         /*
 832          * Allocate a new default call_rcu_data structure in order
 833          * to get a working call_rcu thread to go with it.
 834          */
 835         default_call_rcu_data = NULL;
 836         (void)get_default_call_rcu_data();
 837
 838         /* Cleanup call_rcu_data pointers before use */
 839         maxcpus_reset();
 840         free(per_cpu_call_rcu_data);
 841         rcu_set_pointer(&per_cpu_call_rcu_data, NULL);
 842         URCU_TLS(thread_call_rcu_data) = NULL;
 843
 844         /*
 845          * Dispose of all of the rest of the call_rcu_data structures.
 846          * Leftover call_rcu callbacks will be merged into the new
 847          * default call_rcu thread queue.
 848          */
 849         cds_list_for_each_entry_safe(crdp, next, &call_rcu_data_list, list) {
 850                 if (crdp == default_call_rcu_data)
 851                         continue;
 852                 uatomic_set(&crdp->flags, URCU_CALL_RCU_STOPPED);
 853                 call_rcu_data_free(crdp);
 854         }
 855 }