urcu.c

   1 /*
   2  * urcu.c
   3  *
   4  * Userspace RCU library
   5  *
   6  * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
   7  * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  *
  23  * IBM's contributions to this file may be relicensed under LGPLv2 or later.
  24  */
  25
  26 #include <stdio.h>
  27 #include <pthread.h>
  28 #include <signal.h>
  29 #include <assert.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32 #include <errno.h>
  33 #include <poll.h>
  34
  35 #include "urcu-static.h"
  36 /* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */
  37 #include "urcu.h"
  38
  39 void __attribute__((constructor)) urcu_init(void);
  40 void __attribute__((destructor)) urcu_exit(void);
  41
  42 int init_done;
  43
  44 pthread_mutex_t urcu_mutex = PTHREAD_MUTEX_INITIALIZER;
  45
  46 /*
  47  * Global grace period counter.
  48  * Contains the current RCU_GP_CTR_BIT.
  49  * Also has a RCU_GP_CTR_BIT of 1, to accelerate the reader fast path.
  50  * Written to only by writer with mutex taken. Read by both writer and readers.
  51  */
  52 long urcu_gp_ctr = RCU_GP_COUNT;
  53
  54 /*
  55  * Written to only by each individual reader. Read by both the reader and the
  56  * writers.
  57  */
  58 long __thread urcu_active_readers;
  59
  60 /* Thread IDs of registered readers */
  61 #define INIT_NUM_THREADS 4
  62
  63 struct reader_registry {
  64         pthread_t tid;
  65         long *urcu_active_readers;
  66         char *need_mb;
  67 };
  68
  69 #ifdef DEBUG_YIELD
  70 unsigned int yield_active;
  71 unsigned int __thread rand_yield;
  72 #endif
  73
  74 static struct reader_registry *registry;
  75 static char __thread need_mb;
  76 static int num_readers, alloc_readers;
  77
  78 void internal_urcu_lock(void)
  79 {
  80         int ret;
  81
  82 #ifndef DISTRUST_SIGNALS_EXTREME
  83         ret = pthread_mutex_lock(&urcu_mutex);
  84         if (ret) {
  85                 perror("Error in pthread mutex lock");
  86                 exit(-1);
  87         }
  88 #else /* #ifndef DISTRUST_SIGNALS_EXTREME */
  89         while ((ret = pthread_mutex_trylock(&urcu_mutex)) != 0) {
  90                 if (ret != EBUSY && ret != EINTR) {
  91                         printf("ret = %d, errno = %d\n", ret, errno);
  92                         perror("Error in pthread mutex lock");
  93                         exit(-1);
  94                 }
  95                 if (need_mb) {
  96                         smp_mb();
  97                         need_mb = 0;
  98                         smp_mb();
  99                 }
 100                 poll(NULL,0,10);
 101         }
 102 #endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */
 103 }
 104
 105 void internal_urcu_unlock(void)
 106 {
 107         int ret;
 108
 109         ret = pthread_mutex_unlock(&urcu_mutex);
 110         if (ret) {
 111                 perror("Error in pthread mutex unlock");
 112                 exit(-1);
 113         }
 114 }
 115
 116 /*
 117  * called with urcu_mutex held.
 118  */
 119 static void switch_next_urcu_qparity(void)
 120 {
 121         STORE_SHARED(urcu_gp_ctr, urcu_gp_ctr ^ RCU_GP_CTR_BIT);
 122 }
 123
 124 #ifdef DEBUG_FULL_MB
 125 #ifdef HAS_INCOHERENT_CACHES
 126 static void force_mb_single_thread(struct reader_registry *index)
 127 {
 128         smp_mb();
 129 }
 130 #endif /* #ifdef HAS_INCOHERENT_CACHES */
 131
 132 static void force_mb_all_threads(void)
 133 {
 134         smp_mb();
 135 }
 136 #else /* #ifdef DEBUG_FULL_MB */
 137 #ifdef HAS_INCOHERENT_CACHES
 138 static void force_mb_single_thread(struct reader_registry *index)
 139 {
 140         assert(registry);
 141         /*
 142          * pthread_kill has a smp_mb(). But beware, we assume it performs
 143          * a cache flush on architectures with non-coherent cache. Let's play
 144          * safe and don't assume anything : we use smp_mc() to make sure the
 145          * cache flush is enforced.
 146          */
 147         *index->need_mb = 1;
 148         smp_mc();       /* write ->need_mb before sending the signals */
 149         pthread_kill(index->tid, SIGURCU);
 150         smp_mb();
 151         /*
 152          * Wait for sighandler (and thus mb()) to execute on every thread.
 153          * BUSY-LOOP.
 154          */
 155         while (*index->need_mb) {
 156                 poll(NULL, 0, 1);
 157         }
 158         smp_mb();       /* read ->need_mb before ending the barrier */
 159 }
 160 #endif /* #ifdef HAS_INCOHERENT_CACHES */
 161
 162 static void force_mb_all_threads(void)
 163 {
 164         struct reader_registry *index;
 165         /*
 166          * Ask for each threads to execute a smp_mb() so we can consider the
 167          * compiler barriers around rcu read lock as real memory barriers.
 168          */
 169         if (!registry)
 170                 return;
 171         /*
 172          * pthread_kill has a smp_mb(). But beware, we assume it performs
 173          * a cache flush on architectures with non-coherent cache. Let's play
 174          * safe and don't assume anything : we use smp_mc() to make sure the
 175          * cache flush is enforced.
 176          */
 177         for (index = registry; index < registry + num_readers; index++) {
 178                 *index->need_mb = 1;
 179                 smp_mc();       /* write need_mb before sending the signal */
 180                 pthread_kill(index->tid, SIGURCU);
 181         }
 182         /*
 183          * Wait for sighandler (and thus mb()) to execute on every thread.
 184          *
 185          * Note that the pthread_kill() will never be executed on systems
 186          * that correctly deliver signals in a timely manner.  However, it
 187          * is not uncommon for kernels to have bugs that can result in
 188          * lost or unduly delayed signals.
 189          *
 190          * If you are seeing the below pthread_kill() executing much at
 191          * all, we suggest testing the underlying kernel and filing the
 192          * relevant bug report.  For Linux kernels, we recommend getting
 193          * the Linux Test Project (LTP).
 194          */
 195         for (index = registry; index < registry + num_readers; index++) {
 196                 while (*index->need_mb) {
 197                         pthread_kill(index->tid, SIGURCU);
 198                         poll(NULL, 0, 1);
 199                 }
 200         }
 201         smp_mb();       /* read ->need_mb before ending the barrier */
 202 }
 203 #endif /* #else #ifdef DEBUG_FULL_MB */
 204
 205 void wait_for_quiescent_state(void)
 206 {
 207         struct reader_registry *index;
 208
 209         if (!registry)
 210                 return;
 211         /*
 212          * Wait for each thread urcu_active_readers count to become 0.
 213          */
 214         for (index = registry; index < registry + num_readers; index++) {
 215 #ifndef HAS_INCOHERENT_CACHES
 216                 while (rcu_old_gp_ongoing(index->urcu_active_readers))
 217                         cpu_relax();
 218 #else /* #ifndef HAS_INCOHERENT_CACHES */
 219                 int wait_loops = 0;
 220                 /*
 221                  * BUSY-LOOP. Force the reader thread to commit its
 222                  * urcu_active_readers update to memory if we wait for too long.
 223                  */
 224                 while (rcu_old_gp_ongoing(index->urcu_active_readers)) {
 225                         if (wait_loops++ == KICK_READER_LOOPS) {
 226                                 force_mb_single_thread(index);
 227                                 wait_loops = 0;
 228                         } else {
 229                                 cpu_relax();
 230                         }
 231                 }
 232 #endif /* #else #ifndef HAS_INCOHERENT_CACHES */
 233         }
 234 }
 235
 236 void synchronize_rcu(void)
 237 {
 238         internal_urcu_lock();
 239
 240         /* All threads should read qparity before accessing data structure
 241          * where new ptr points to. Must be done within internal_urcu_lock
 242          * because it iterates on reader threads.*/
 243         /* Write new ptr before changing the qparity */
 244         force_mb_all_threads();
 245
 246         switch_next_urcu_qparity();     /* 0 -> 1 */
 247
 248         /*
 249          * Must commit qparity update to memory before waiting for parity
 250          * 0 quiescent state. Failure to do so could result in the writer
 251          * waiting forever while new readers are always accessing data (no
 252          * progress).
 253          * Ensured by STORE_SHARED and LOAD_SHARED.
 254          */
 255
 256         /*
 257          * Adding a smp_mb() which is _not_ formally required, but makes the
 258          * model easier to understand. It does not have a big performance impact
 259          * anyway, given this is the write-side.
 260          */
 261         smp_mb();
 262
 263         /*
 264          * Wait for previous parity to be empty of readers.
 265          */
 266         wait_for_quiescent_state();     /* Wait readers in parity 0 */
 267
 268         /*
 269          * Must finish waiting for quiescent state for parity 0 before
 270          * committing qparity update to memory. Failure to do so could result in
 271          * the writer waiting forever while new readers are always accessing
 272          * data (no progress).
 273          * Ensured by STORE_SHARED and LOAD_SHARED.
 274          */
 275
 276         /*
 277          * Adding a smp_mb() which is _not_ formally required, but makes the
 278          * model easier to understand. It does not have a big performance impact
 279          * anyway, given this is the write-side.
 280          */
 281         smp_mb();
 282
 283         switch_next_urcu_qparity();     /* 1 -> 0 */
 284
 285         /*
 286          * Must commit qparity update to memory before waiting for parity
 287          * 1 quiescent state. Failure to do so could result in the writer
 288          * waiting forever while new readers are always accessing data (no
 289          * progress).
 290          * Ensured by STORE_SHARED and LOAD_SHARED.
 291          */
 292
 293         /*
 294          * Adding a smp_mb() which is _not_ formally required, but makes the
 295          * model easier to understand. It does not have a big performance impact
 296          * anyway, given this is the write-side.
 297          */
 298         smp_mb();
 299
 300         /*
 301          * Wait for previous parity to be empty of readers.
 302          */
 303         wait_for_quiescent_state();     /* Wait readers in parity 1 */
 304
 305         /* Finish waiting for reader threads before letting the old ptr being
 306          * freed. Must be done within internal_urcu_lock because it iterates on
 307          * reader threads. */
 308         force_mb_all_threads();
 309
 310         internal_urcu_unlock();
 311 }
 312
 313 /*
 314  * library wrappers to be used by non-LGPL compatible source code.
 315  */
 316
 317 void rcu_read_lock(void)
 318 {
 319         _rcu_read_lock();
 320 }
 321
 322 void rcu_read_unlock(void)
 323 {
 324         _rcu_read_unlock();
 325 }
 326
 327 void *rcu_dereference(void *p)
 328 {
 329         return _rcu_dereference(p);
 330 }
 331
 332 void *rcu_assign_pointer_sym(void **p, void *v)
 333 {
 334         wmb();
 335         return STORE_SHARED(p, v);
 336 }
 337
 338 void *rcu_xchg_pointer_sym(void **p, void *v)
 339 {
 340         wmb();
 341         return xchg(p, v);
 342 }
 343
 344 void *rcu_publish_content_sym(void **p, void *v)
 345 {
 346         void *oldptr;
 347
 348         oldptr = _rcu_xchg_pointer(p, v);
 349         synchronize_rcu();
 350         return oldptr;
 351 }
 352
 353 static void rcu_add_reader(pthread_t id)
 354 {
 355         struct reader_registry *oldarray;
 356
 357         if (!registry) {
 358                 alloc_readers = INIT_NUM_THREADS;
 359                 num_readers = 0;
 360                 registry =
 361                         malloc(sizeof(struct reader_registry) * alloc_readers);
 362         }
 363         if (alloc_readers < num_readers + 1) {
 364                 oldarray = registry;
 365                 registry = malloc(sizeof(struct reader_registry)
 366                                 * (alloc_readers << 1));
 367                 memcpy(registry, oldarray,
 368                         sizeof(struct reader_registry) * alloc_readers);
 369                 alloc_readers <<= 1;
 370                 free(oldarray);
 371         }
 372         registry[num_readers].tid = id;
 373         /* reference to the TLS of _this_ reader thread. */
 374         registry[num_readers].urcu_active_readers = &urcu_active_readers;
 375         registry[num_readers].need_mb = &need_mb;
 376         num_readers++;
 377 }
 378
 379 /*
 380  * Never shrink (implementation limitation).
 381  * This is O(nb threads). Eventually use a hash table.
 382  */
 383 static void rcu_remove_reader(pthread_t id)
 384 {
 385         struct reader_registry *index;
 386
 387         assert(registry != NULL);
 388         for (index = registry; index < registry + num_readers; index++) {
 389                 if (pthread_equal(index->tid, id)) {
 390                         memcpy(index, &registry[num_readers - 1],
 391                                 sizeof(struct reader_registry));
 392                         registry[num_readers - 1].tid = 0;
 393                         registry[num_readers - 1].urcu_active_readers = NULL;
 394                         num_readers--;
 395                         return;
 396                 }
 397         }
 398         /* Hrm not found, forgot to register ? */
 399         assert(0);
 400 }
 401
 402 void rcu_register_thread(void)
 403 {
 404         internal_urcu_lock();
 405         urcu_init();    /* In case gcc does not support constructor attribute */
 406         rcu_add_reader(pthread_self());
 407         internal_urcu_unlock();
 408 }
 409
 410 void rcu_unregister_thread(void)
 411 {
 412         internal_urcu_lock();
 413         rcu_remove_reader(pthread_self());
 414         internal_urcu_unlock();
 415 }
 416
 417 #ifndef DEBUG_FULL_MB
 418 static void sigurcu_handler(int signo, siginfo_t *siginfo, void *context)
 419 {
 420         /*
 421          * Executing this smp_mb() is the only purpose of this signal handler.
 422          * It punctually promotes barrier() into smp_mb() on every thread it is
 423          * executed on.
 424          */
 425         smp_mb();
 426         need_mb = 0;
 427         smp_mb();
 428 }
 429
 430 /*
 431  * urcu_init constructor. Called when the library is linked, but also when
 432  * reader threads are calling rcu_register_thread().
 433  * Should only be called by a single thread at a given time. This is ensured by
 434  * holing the internal_urcu_lock() from rcu_register_thread() or by running at
 435  * library load time, which should not be executed by multiple threads nor
 436  * concurrently with rcu_register_thread() anyway.
 437  */
 438 void urcu_init(void)
 439 {
 440         struct sigaction act;
 441         int ret;
 442
 443         if (init_done)
 444                 return;
 445         init_done = 1;
 446
 447         act.sa_sigaction = sigurcu_handler;
 448         ret = sigaction(SIGURCU, &act, NULL);
 449         if (ret) {
 450                 perror("Error in sigaction");
 451                 exit(-1);
 452         }
 453 }
 454
 455 void urcu_exit(void)
 456 {
 457         struct sigaction act;
 458         int ret;
 459
 460         ret = sigaction(SIGURCU, NULL, &act);
 461         if (ret) {
 462                 perror("Error in sigaction");
 463                 exit(-1);
 464         }
 465         assert(act.sa_sigaction == sigurcu_handler);
 466         free(registry);
 467 }
 468 #endif /* #ifndef DEBUG_FULL_MB */