urcu.c

   1 /*
   2  * urcu.c
   3  *
   4  * Userspace RCU library
   5  *
   6  * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
   7  * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  *
  23  * IBM's contributions to this file may be relicensed under LGPLv2 or later.
  24  */
  25
  26 #include <stdio.h>
  27 #include <pthread.h>
  28 #include <signal.h>
  29 #include <assert.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32 #include <errno.h>
  33 #include <poll.h>
  34
  35 #include "urcu-static.h"
  36 /* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */
  37 #include "urcu.h"
  38
  39 #ifndef URCU_MB
  40 static int init_done;
  41
  42 void __attribute__((constructor)) urcu_init(void);
  43 void __attribute__((destructor)) urcu_exit(void);
  44 #else
  45 void urcu_init(void)
  46 {
  47 }
  48 #endif
  49
  50 static pthread_mutex_t urcu_mutex = PTHREAD_MUTEX_INITIALIZER;
  51
  52 int gp_futex;
  53
  54 /*
  55  * Global grace period counter.
  56  * Contains the current RCU_GP_CTR_BIT.
  57  * Also has a RCU_GP_COUNT of 1, to accelerate the reader fast path.
  58  * Written to only by writer with mutex taken. Read by both writer and readers.
  59  */
  60 long urcu_gp_ctr = RCU_GP_COUNT;
  61
  62 /*
  63  * Written to only by each individual reader. Read by both the reader and the
  64  * writers.
  65  */
  66 long __thread urcu_active_readers;
  67
  68 /* Thread IDs of registered readers */
  69 #define INIT_NUM_THREADS 4
  70
  71 struct reader_registry {
  72         pthread_t tid;
  73         long *urcu_active_readers;
  74         char *need_mb;
  75 };
  76
  77 #ifdef DEBUG_YIELD
  78 unsigned int yield_active;
  79 unsigned int __thread rand_yield;
  80 #endif
  81
  82 static struct reader_registry *registry;
  83 static char __thread need_mb;
  84 static int num_readers, alloc_readers;
  85
  86 static void internal_urcu_lock(void)
  87 {
  88         int ret;
  89
  90 #ifndef DISTRUST_SIGNALS_EXTREME
  91         ret = pthread_mutex_lock(&urcu_mutex);
  92         if (ret) {
  93                 perror("Error in pthread mutex lock");
  94                 exit(-1);
  95         }
  96 #else /* #ifndef DISTRUST_SIGNALS_EXTREME */
  97         while ((ret = pthread_mutex_trylock(&urcu_mutex)) != 0) {
  98                 if (ret != EBUSY && ret != EINTR) {
  99                         printf("ret = %d, errno = %d\n", ret, errno);
 100                         perror("Error in pthread mutex lock");
 101                         exit(-1);
 102                 }
 103                 if (need_mb) {
 104                         smp_mb();
 105                         need_mb = 0;
 106                         smp_mb();
 107                 }
 108                 poll(NULL,0,10);
 109         }
 110 #endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */
 111 }
 112
 113 static void internal_urcu_unlock(void)
 114 {
 115         int ret;
 116
 117         ret = pthread_mutex_unlock(&urcu_mutex);
 118         if (ret) {
 119                 perror("Error in pthread mutex unlock");
 120                 exit(-1);
 121         }
 122 }
 123
 124 /*
 125  * called with urcu_mutex held.
 126  */
 127 static void switch_next_urcu_qparity(void)
 128 {
 129         STORE_SHARED(urcu_gp_ctr, urcu_gp_ctr ^ RCU_GP_CTR_BIT);
 130 }
 131
 132 #ifdef URCU_MB
 133 static void force_mb_single_thread(struct reader_registry *index)
 134 {
 135         smp_mb();
 136 }
 137
 138 static void force_mb_all_threads(void)
 139 {
 140         smp_mb();
 141 }
 142 #else /* #ifdef URCU_MB */
 143 static void force_mb_single_thread(struct reader_registry *index)
 144 {
 145         assert(registry);
 146         /*
 147          * pthread_kill has a smp_mb(). But beware, we assume it performs
 148          * a cache flush on architectures with non-coherent cache. Let's play
 149          * safe and don't assume anything : we use smp_mc() to make sure the
 150          * cache flush is enforced.
 151          */
 152         *index->need_mb = 1;
 153         smp_mc();       /* write ->need_mb before sending the signals */
 154         pthread_kill(index->tid, SIGURCU);
 155         smp_mb();
 156         /*
 157          * Wait for sighandler (and thus mb()) to execute on every thread.
 158          * BUSY-LOOP.
 159          */
 160         while (*index->need_mb) {
 161                 poll(NULL, 0, 1);
 162         }
 163         smp_mb();       /* read ->need_mb before ending the barrier */
 164 }
 165
 166 static void force_mb_all_threads(void)
 167 {
 168         struct reader_registry *index;
 169         /*
 170          * Ask for each threads to execute a smp_mb() so we can consider the
 171          * compiler barriers around rcu read lock as real memory barriers.
 172          */
 173         if (!registry)
 174                 return;
 175         /*
 176          * pthread_kill has a smp_mb(). But beware, we assume it performs
 177          * a cache flush on architectures with non-coherent cache. Let's play
 178          * safe and don't assume anything : we use smp_mc() to make sure the
 179          * cache flush is enforced.
 180          */
 181         for (index = registry; index < registry + num_readers; index++) {
 182                 *index->need_mb = 1;
 183                 smp_mc();       /* write need_mb before sending the signal */
 184                 pthread_kill(index->tid, SIGURCU);
 185         }
 186         /*
 187          * Wait for sighandler (and thus mb()) to execute on every thread.
 188          *
 189          * Note that the pthread_kill() will never be executed on systems
 190          * that correctly deliver signals in a timely manner.  However, it
 191          * is not uncommon for kernels to have bugs that can result in
 192          * lost or unduly delayed signals.
 193          *
 194          * If you are seeing the below pthread_kill() executing much at
 195          * all, we suggest testing the underlying kernel and filing the
 196          * relevant bug report.  For Linux kernels, we recommend getting
 197          * the Linux Test Project (LTP).
 198          */
 199         for (index = registry; index < registry + num_readers; index++) {
 200                 while (*index->need_mb) {
 201                         pthread_kill(index->tid, SIGURCU);
 202                         poll(NULL, 0, 1);
 203                 }
 204         }
 205         smp_mb();       /* read ->need_mb before ending the barrier */
 206 }
 207 #endif /* #else #ifdef URCU_MB */
 208
 209 /*
 210  * synchronize_rcu() waiting. Single thread.
 211  */
 212 static void wait_gp(struct reader_registry *index)
 213 {
 214         uatomic_dec(&gp_futex);
 215         force_mb_single_thread(index); /* Write futex before read reader_gp */
 216         if (!rcu_old_gp_ongoing(index->urcu_active_readers)) {
 217                 /* Read reader_gp before write futex */
 218                 force_mb_single_thread(index);
 219                 /* Callbacks are queued, don't wait. */
 220                 uatomic_set(&gp_futex, 0);
 221         } else {
 222                 /* Read reader_gp before read futex */
 223                 force_mb_single_thread(index);
 224                 if (uatomic_read(&gp_futex) == -1)
 225                         futex(&gp_futex, FUTEX_WAIT, -1,
 226                               NULL, NULL, 0);
 227         }
 228 }
 229
 230 void wait_for_quiescent_state(void)
 231 {
 232         struct reader_registry *index;
 233
 234         if (!registry)
 235                 return;
 236         /*
 237          * Wait for each thread urcu_active_readers count to become 0.
 238          */
 239         for (index = registry; index < registry + num_readers; index++) {
 240                 int wait_loops = 0;
 241 #ifndef HAS_INCOHERENT_CACHES
 242                 while (rcu_old_gp_ongoing(index->urcu_active_readers)) {
 243                         if (wait_loops++ == RCU_QS_ACTIVE_ATTEMPTS) {
 244                                 wait_gp(index);
 245                         } else {
 246                                 cpu_relax();
 247                         }
 248                 }
 249 #else /* #ifndef HAS_INCOHERENT_CACHES */
 250                 /*
 251                  * BUSY-LOOP. Force the reader thread to commit its
 252                  * urcu_active_readers update to memory if we wait for too long.
 253                  */
 254                 while (rcu_old_gp_ongoing(index->urcu_active_readers)) {
 255                         switch (wait_loops++) {
 256                         case RCU_QS_ACTIVE_ATTEMPTS:
 257                                 wait_gp(index);
 258                                 break;
 259                         case KICK_READER_LOOPS:
 260                                 force_mb_single_thread(index);
 261                                 wait_loops = 0;
 262                                 break;
 263                         default:
 264                                 cpu_relax();
 265                         }
 266                 }
 267 #endif /* #else #ifndef HAS_INCOHERENT_CACHES */
 268         }
 269 }
 270
 271 void synchronize_rcu(void)
 272 {
 273         internal_urcu_lock();
 274
 275         /* All threads should read qparity before accessing data structure
 276          * where new ptr points to. Must be done within internal_urcu_lock
 277          * because it iterates on reader threads.*/
 278         /* Write new ptr before changing the qparity */
 279         force_mb_all_threads();
 280
 281         switch_next_urcu_qparity();     /* 0 -> 1 */
 282
 283         /*
 284          * Must commit qparity update to memory before waiting for parity
 285          * 0 quiescent state. Failure to do so could result in the writer
 286          * waiting forever while new readers are always accessing data (no
 287          * progress).
 288          * Ensured by STORE_SHARED and LOAD_SHARED.
 289          */
 290
 291         /*
 292          * Adding a smp_mb() which is _not_ formally required, but makes the
 293          * model easier to understand. It does not have a big performance impact
 294          * anyway, given this is the write-side.
 295          */
 296         smp_mb();
 297
 298         /*
 299          * Wait for previous parity to be empty of readers.
 300          */
 301         wait_for_quiescent_state();     /* Wait readers in parity 0 */
 302
 303         /*
 304          * Must finish waiting for quiescent state for parity 0 before
 305          * committing qparity update to memory. Failure to do so could result in
 306          * the writer waiting forever while new readers are always accessing
 307          * data (no progress).
 308          * Ensured by STORE_SHARED and LOAD_SHARED.
 309          */
 310
 311         /*
 312          * Adding a smp_mb() which is _not_ formally required, but makes the
 313          * model easier to understand. It does not have a big performance impact
 314          * anyway, given this is the write-side.
 315          */
 316         smp_mb();
 317
 318         switch_next_urcu_qparity();     /* 1 -> 0 */
 319
 320         /*
 321          * Must commit qparity update to memory before waiting for parity
 322          * 1 quiescent state. Failure to do so could result in the writer
 323          * waiting forever while new readers are always accessing data (no
 324          * progress).
 325          * Ensured by STORE_SHARED and LOAD_SHARED.
 326          */
 327
 328         /*
 329          * Adding a smp_mb() which is _not_ formally required, but makes the
 330          * model easier to understand. It does not have a big performance impact
 331          * anyway, given this is the write-side.
 332          */
 333         smp_mb();
 334
 335         /*
 336          * Wait for previous parity to be empty of readers.
 337          */
 338         wait_for_quiescent_state();     /* Wait readers in parity 1 */
 339
 340         /* Finish waiting for reader threads before letting the old ptr being
 341          * freed. Must be done within internal_urcu_lock because it iterates on
 342          * reader threads. */
 343         force_mb_all_threads();
 344
 345         internal_urcu_unlock();
 346 }
 347
 348 /*
 349  * library wrappers to be used by non-LGPL compatible source code.
 350  */
 351
 352 void rcu_read_lock(void)
 353 {
 354         _rcu_read_lock();
 355 }
 356
 357 void rcu_read_unlock(void)
 358 {
 359         _rcu_read_unlock();
 360 }
 361
 362 void *rcu_dereference(void *p)
 363 {
 364         return _rcu_dereference(p);
 365 }
 366
 367 void *rcu_assign_pointer_sym(void **p, void *v)
 368 {
 369         wmb();
 370         return STORE_SHARED(p, v);
 371 }
 372
 373 void *rcu_xchg_pointer_sym(void **p, void *v)
 374 {
 375         wmb();
 376         return uatomic_xchg(p, v);
 377 }
 378
 379 void *rcu_cmpxchg_pointer_sym(void **p, void *old, void *_new)
 380 {
 381         wmb();
 382         return uatomic_cmpxchg(p, old, _new);
 383 }
 384
 385 void *rcu_publish_content_sym(void **p, void *v)
 386 {
 387         void *oldptr;
 388
 389         oldptr = _rcu_xchg_pointer(p, v);
 390         synchronize_rcu();
 391         return oldptr;
 392 }
 393
 394 static void rcu_add_reader(pthread_t id)
 395 {
 396         struct reader_registry *oldarray;
 397
 398         if (!registry) {
 399                 alloc_readers = INIT_NUM_THREADS;
 400                 num_readers = 0;
 401                 registry =
 402                         malloc(sizeof(struct reader_registry) * alloc_readers);
 403         }
 404         if (alloc_readers < num_readers + 1) {
 405                 oldarray = registry;
 406                 registry = malloc(sizeof(struct reader_registry)
 407                                 * (alloc_readers << 1));
 408                 memcpy(registry, oldarray,
 409                         sizeof(struct reader_registry) * alloc_readers);
 410                 alloc_readers <<= 1;
 411                 free(oldarray);
 412         }
 413         registry[num_readers].tid = id;
 414         /* reference to the TLS of _this_ reader thread. */
 415         registry[num_readers].urcu_active_readers = &urcu_active_readers;
 416         registry[num_readers].need_mb = &need_mb;
 417         num_readers++;
 418 }
 419
 420 /*
 421  * Never shrink (implementation limitation).
 422  * This is O(nb threads). Eventually use a hash table.
 423  */
 424 static void rcu_remove_reader(pthread_t id)
 425 {
 426         struct reader_registry *index;
 427
 428         assert(registry != NULL);
 429         for (index = registry; index < registry + num_readers; index++) {
 430                 if (pthread_equal(index->tid, id)) {
 431                         memcpy(index, &registry[num_readers - 1],
 432                                 sizeof(struct reader_registry));
 433                         registry[num_readers - 1].tid = 0;
 434                         registry[num_readers - 1].urcu_active_readers = NULL;
 435                         num_readers--;
 436                         return;
 437                 }
 438         }
 439         /* Hrm not found, forgot to register ? */
 440         assert(0);
 441 }
 442
 443 void rcu_register_thread(void)
 444 {
 445         internal_urcu_lock();
 446         urcu_init();    /* In case gcc does not support constructor attribute */
 447         rcu_add_reader(pthread_self());
 448         internal_urcu_unlock();
 449 }
 450
 451 void rcu_unregister_thread(void)
 452 {
 453         internal_urcu_lock();
 454         rcu_remove_reader(pthread_self());
 455         internal_urcu_unlock();
 456 }
 457
 458 #ifndef URCU_MB
 459 static void sigurcu_handler(int signo, siginfo_t *siginfo, void *context)
 460 {
 461         /*
 462          * Executing this smp_mb() is the only purpose of this signal handler.
 463          * It punctually promotes barrier() into smp_mb() on every thread it is
 464          * executed on.
 465          */
 466         smp_mb();
 467         need_mb = 0;
 468         smp_mb();
 469 }
 470
 471 /*
 472  * urcu_init constructor. Called when the library is linked, but also when
 473  * reader threads are calling rcu_register_thread().
 474  * Should only be called by a single thread at a given time. This is ensured by
 475  * holing the internal_urcu_lock() from rcu_register_thread() or by running at
 476  * library load time, which should not be executed by multiple threads nor
 477  * concurrently with rcu_register_thread() anyway.
 478  */
 479 void urcu_init(void)
 480 {
 481         struct sigaction act;
 482         int ret;
 483
 484         if (init_done)
 485                 return;
 486         init_done = 1;
 487
 488         act.sa_sigaction = sigurcu_handler;
 489         act.sa_flags = SA_SIGINFO | SA_RESTART;
 490         sigemptyset(&act.sa_mask);
 491         ret = sigaction(SIGURCU, &act, NULL);
 492         if (ret) {
 493                 perror("Error in sigaction");
 494                 exit(-1);
 495         }
 496 }
 497
 498 void urcu_exit(void)
 499 {
 500         struct sigaction act;
 501         int ret;
 502
 503         ret = sigaction(SIGURCU, NULL, &act);
 504         if (ret) {
 505                 perror("Error in sigaction");
 506                 exit(-1);
 507         }
 508         assert(act.sa_sigaction == sigurcu_handler);
 509         free(registry);
 510 }
 511 #endif /* #ifndef URCU_MB */