tls-compat: fix comment typo
[userspace-rcu.git] / urcu-bp.c
CommitLineData
fdee2e6d
MD
1/*
2 * urcu-bp.c
3 *
4 * Userspace RCU library, "bulletproof" version.
5 *
6982d6d7 6 * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
fdee2e6d
MD
7 * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 *
23 * IBM's contributions to this file may be relicensed under LGPLv2 or later.
24 */
25
0617bf4c 26#define _GNU_SOURCE
71c811bf 27#define _LGPL_SOURCE
fdee2e6d
MD
28#include <stdio.h>
29#include <pthread.h>
30#include <signal.h>
31#include <assert.h>
32#include <stdlib.h>
33#include <string.h>
34#include <errno.h>
35#include <poll.h>
36#include <unistd.h>
37#include <sys/mman.h>
38
71c811bf 39#include "urcu/wfqueue.h"
57760d44 40#include "urcu/map/urcu-bp.h"
af7c2dbe 41#include "urcu/static/urcu-bp.h"
618b2595 42#include "urcu-pointer.h"
bd252a04 43#include "urcu/tls-compat.h"
71c811bf 44
4a6d7378
MD
45#include "urcu-die.h"
46
fdee2e6d 47/* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */
71c811bf 48#undef _LGPL_SOURCE
fdee2e6d 49#include "urcu-bp.h"
71c811bf 50#define _LGPL_SOURCE
fdee2e6d 51
4c1ae2ea
MD
52#ifndef MAP_ANONYMOUS
53#define MAP_ANONYMOUS MAP_ANON
54#endif
55
c7eaf61c
MD
56#ifdef __linux__
57static
58void *mremap_wrapper(void *old_address, size_t old_size,
59 size_t new_size, int flags)
60{
61 return mremap(old_address, old_size, new_size, flags);
62}
63#else
45a4872f
MD
64
65#define MREMAP_MAYMOVE 1
66#define MREMAP_FIXED 2
67
68/*
89451e1b 69 * mremap wrapper for non-Linux systems not allowing MAYMOVE.
45a4872f
MD
70 * This is not generic.
71*/
c7eaf61c
MD
72static
73void *mremap_wrapper(void *old_address, size_t old_size,
74 size_t new_size, int flags)
45a4872f 75{
89451e1b
MD
76 assert(!(flags & MREMAP_MAYMOVE));
77
78 return MAP_FAILED;
45a4872f
MD
79}
80#endif
81
fdee2e6d
MD
82/* Sleep delay in us */
83#define RCU_SLEEP_DELAY 1000
89451e1b
MD
84#define INIT_NR_THREADS 8
85#define ARENA_INIT_ALLOC \
86 sizeof(struct registry_chunk) \
87 + INIT_NR_THREADS * sizeof(struct rcu_reader)
fdee2e6d 88
b7b6a8f5
PB
89/*
90 * Active attempts to check for reader Q.S. before calling sleep().
91 */
92#define RCU_QS_ACTIVE_ATTEMPTS 100
93
e038e286
MD
94static
95void __attribute__((constructor)) rcu_bp_init(void);
96static
02be5561 97void __attribute__((destructor)) rcu_bp_exit(void);
fdee2e6d 98
6abb4bd5 99static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
fdee2e6d 100
e038e286
MD
101static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
102static int initialized;
103
104static pthread_key_t urcu_bp_key;
105
fdee2e6d
MD
106#ifdef DEBUG_YIELD
107unsigned int yield_active;
1745be1a 108__DEFINE_URCU_TLS_GLOBAL(unsigned int, rand_yield);
fdee2e6d
MD
109#endif
110
111/*
112 * Global grace period counter.
02be5561 113 * Contains the current RCU_GP_CTR_PHASE.
fdee2e6d
MD
114 * Also has a RCU_GP_COUNT of 1, to accelerate the reader fast path.
115 * Written to only by writer with mutex taken. Read by both writer and readers.
116 */
02be5561 117long rcu_gp_ctr = RCU_GP_COUNT;
fdee2e6d
MD
118
119/*
120 * Pointer to registry elements. Written to only by each individual reader. Read
121 * by both the reader and the writers.
122 */
1745be1a 123__DEFINE_URCU_TLS_GLOBAL(struct rcu_reader *, rcu_reader);
fdee2e6d 124
16aa9ee8 125static CDS_LIST_HEAD(registry);
fdee2e6d 126
89451e1b
MD
127struct registry_chunk {
128 size_t data_len; /* data length */
e038e286 129 size_t used; /* amount of data used */
89451e1b
MD
130 struct cds_list_head node; /* chunk_list node */
131 char data[];
132};
133
fdee2e6d 134struct registry_arena {
89451e1b 135 struct cds_list_head chunk_list;
fdee2e6d
MD
136};
137
89451e1b
MD
138static struct registry_arena registry_arena = {
139 .chunk_list = CDS_LIST_HEAD_INIT(registry_arena.chunk_list),
140};
fdee2e6d 141
4cf1675f
MD
142/* Saved fork signal mask, protected by rcu_gp_lock */
143static sigset_t saved_fork_signal_mask;
144
6abb4bd5 145static void mutex_lock(pthread_mutex_t *mutex)
fdee2e6d
MD
146{
147 int ret;
148
149#ifndef DISTRUST_SIGNALS_EXTREME
6abb4bd5 150 ret = pthread_mutex_lock(mutex);
4a6d7378
MD
151 if (ret)
152 urcu_die(ret);
fdee2e6d 153#else /* #ifndef DISTRUST_SIGNALS_EXTREME */
6abb4bd5 154 while ((ret = pthread_mutex_trylock(mutex)) != 0) {
4a6d7378
MD
155 if (ret != EBUSY && ret != EINTR)
156 urcu_die(ret);
fdee2e6d
MD
157 poll(NULL,0,10);
158 }
159#endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */
160}
161
6abb4bd5 162static void mutex_unlock(pthread_mutex_t *mutex)
fdee2e6d
MD
163{
164 int ret;
165
6abb4bd5 166 ret = pthread_mutex_unlock(mutex);
4a6d7378
MD
167 if (ret)
168 urcu_die(ret);
fdee2e6d
MD
169}
170
2dfb8b5e 171void update_counter_and_wait(void)
fdee2e6d 172{
16aa9ee8 173 CDS_LIST_HEAD(qsreaders);
fdee2e6d 174 int wait_loops = 0;
02be5561 175 struct rcu_reader *index, *tmp;
fdee2e6d 176
32c15e4e 177 /* Switch parity: 0 -> 1, 1 -> 0 */
6cf3827c 178 CMM_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR_PHASE);
2dfb8b5e
MD
179
180 /*
181 * Must commit qparity update to memory before waiting for other parity
182 * quiescent state. Failure to do so could result in the writer waiting
183 * forever while new readers are always accessing data (no progress).
6cf3827c 184 * Ensured by CMM_STORE_SHARED and CMM_LOAD_SHARED.
2dfb8b5e
MD
185 */
186
187 /*
5481ddb3 188 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
2dfb8b5e
MD
189 * model easier to understand. It does not have a big performance impact
190 * anyway, given this is the write-side.
191 */
5481ddb3 192 cmm_smp_mb();
2dfb8b5e 193
fdee2e6d 194 /*
02be5561 195 * Wait for each thread rcu_reader.ctr count to become 0.
fdee2e6d
MD
196 */
197 for (;;) {
198 wait_loops++;
16aa9ee8 199 cds_list_for_each_entry_safe(index, tmp, &registry, node) {
fdee2e6d 200 if (!rcu_old_gp_ongoing(&index->ctr))
16aa9ee8 201 cds_list_move(&index->node, &qsreaders);
fdee2e6d
MD
202 }
203
16aa9ee8 204 if (cds_list_empty(&registry)) {
fdee2e6d
MD
205 break;
206 } else {
207 if (wait_loops == RCU_QS_ACTIVE_ATTEMPTS)
208 usleep(RCU_SLEEP_DELAY);
209 else
06f22bdb 210 caa_cpu_relax();
fdee2e6d
MD
211 }
212 }
213 /* put back the reader list in the registry */
16aa9ee8 214 cds_list_splice(&qsreaders, &registry);
fdee2e6d
MD
215}
216
217void synchronize_rcu(void)
218{
219 sigset_t newmask, oldmask;
220 int ret;
221
264716f7 222 ret = sigfillset(&newmask);
fdee2e6d 223 assert(!ret);
264716f7 224 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
fdee2e6d
MD
225 assert(!ret);
226
6abb4bd5 227 mutex_lock(&rcu_gp_lock);
fdee2e6d 228
16aa9ee8 229 if (cds_list_empty(&registry))
2dfb8b5e 230 goto out;
fdee2e6d
MD
231
232 /* All threads should read qparity before accessing data structure
2dfb8b5e 233 * where new ptr points to. */
fdee2e6d 234 /* Write new ptr before changing the qparity */
5481ddb3 235 cmm_smp_mb();
fdee2e6d 236
fdee2e6d
MD
237 /*
238 * Wait for previous parity to be empty of readers.
239 */
2dfb8b5e 240 update_counter_and_wait(); /* 0 -> 1, wait readers in parity 0 */
fdee2e6d
MD
241
242 /*
5481ddb3 243 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
fdee2e6d
MD
244 * model easier to understand. It does not have a big performance impact
245 * anyway, given this is the write-side.
246 */
5481ddb3 247 cmm_smp_mb();
fdee2e6d 248
fdee2e6d 249 /*
2dfb8b5e 250 * Wait for previous parity to be empty of readers.
fdee2e6d 251 */
2dfb8b5e 252 update_counter_and_wait(); /* 1 -> 0, wait readers in parity 1 */
fdee2e6d
MD
253
254 /*
2dfb8b5e
MD
255 * Finish waiting for reader threads before letting the old ptr being
256 * freed.
fdee2e6d 257 */
5481ddb3 258 cmm_smp_mb();
2dfb8b5e 259out:
6abb4bd5 260 mutex_unlock(&rcu_gp_lock);
fdee2e6d
MD
261 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
262 assert(!ret);
263}
264
265/*
266 * library wrappers to be used by non-LGPL compatible source code.
267 */
268
269void rcu_read_lock(void)
270{
271 _rcu_read_lock();
272}
273
274void rcu_read_unlock(void)
275{
276 _rcu_read_unlock();
277}
278
279/*
89451e1b
MD
280 * Only grow for now. If empty, allocate a ARENA_INIT_ALLOC sized chunk.
281 * Else, try expanding the last chunk. If this fails, allocate a new
282 * chunk twice as big as the last chunk.
283 * Memory used by chunks _never_ moves. A chunk could theoretically be
284 * freed when all "used" slots are released, but we don't do it at this
285 * point.
fdee2e6d 286 */
89451e1b
MD
287static
288void expand_arena(struct registry_arena *arena)
fdee2e6d 289{
89451e1b
MD
290 struct registry_chunk *new_chunk, *last_chunk;
291 size_t old_chunk_len, new_chunk_len;
292
293 /* No chunk. */
294 if (cds_list_empty(&arena->chunk_list)) {
295 assert(ARENA_INIT_ALLOC >=
296 sizeof(struct registry_chunk)
297 + sizeof(struct rcu_reader));
298 new_chunk_len = ARENA_INIT_ALLOC;
299 new_chunk = mmap(NULL, new_chunk_len,
0bcbf365
MD
300 PROT_READ | PROT_WRITE,
301 MAP_ANONYMOUS | MAP_PRIVATE,
302 -1, 0);
89451e1b
MD
303 if (new_chunk == MAP_FAILED)
304 abort();
305 bzero(new_chunk, new_chunk_len);
306 new_chunk->data_len =
307 new_chunk_len - sizeof(struct registry_chunk);
308 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
309 return; /* We're done. */
310 }
0bcbf365 311
89451e1b
MD
312 /* Try expanding last chunk. */
313 last_chunk = cds_list_entry(arena->chunk_list.prev,
314 struct registry_chunk, node);
315 old_chunk_len =
316 last_chunk->data_len + sizeof(struct registry_chunk);
317 new_chunk_len = old_chunk_len << 1;
318
319 /* Don't allow memory mapping to move, just expand. */
320 new_chunk = mremap_wrapper(last_chunk, old_chunk_len,
321 new_chunk_len, 0);
322 if (new_chunk != MAP_FAILED) {
323 /* Should not have moved. */
324 assert(new_chunk == last_chunk);
325 bzero((char *) last_chunk + old_chunk_len,
326 new_chunk_len - old_chunk_len);
327 last_chunk->data_len =
328 new_chunk_len - sizeof(struct registry_chunk);
329 return; /* We're done. */
330 }
0617bf4c 331
89451e1b
MD
332 /* Remap did not succeed, we need to add a new chunk. */
333 new_chunk = mmap(NULL, new_chunk_len,
334 PROT_READ | PROT_WRITE,
335 MAP_ANONYMOUS | MAP_PRIVATE,
336 -1, 0);
337 if (new_chunk == MAP_FAILED)
338 abort();
339 bzero(new_chunk, new_chunk_len);
340 new_chunk->data_len =
341 new_chunk_len - sizeof(struct registry_chunk);
342 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
343}
344
345static
346struct rcu_reader *arena_alloc(struct registry_arena *arena)
347{
348 struct registry_chunk *chunk;
349 struct rcu_reader *rcu_reader_reg;
350 int expand_done = 0; /* Only allow to expand once per alloc */
351 size_t len = sizeof(struct rcu_reader);
fdee2e6d 352
89451e1b
MD
353retry:
354 cds_list_for_each_entry(chunk, &arena->chunk_list, node) {
355 if (chunk->data_len - chunk->used < len)
356 continue;
357 /* Find spot */
358 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
359 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
360 rcu_reader_reg++) {
361 if (!rcu_reader_reg->alloc) {
362 rcu_reader_reg->alloc = 1;
363 chunk->used += len;
364 return rcu_reader_reg;
365 }
366 }
367 }
368
369 if (!expand_done) {
370 expand_arena(arena);
371 expand_done = 1;
372 goto retry;
373 }
374
375 return NULL;
fdee2e6d
MD
376}
377
378/* Called with signals off and mutex locked */
89451e1b
MD
379static
380void add_thread(void)
fdee2e6d 381{
02be5561 382 struct rcu_reader *rcu_reader_reg;
e038e286 383 int ret;
fdee2e6d 384
89451e1b
MD
385 rcu_reader_reg = arena_alloc(&registry_arena);
386 if (!rcu_reader_reg)
387 abort();
e038e286
MD
388 ret = pthread_setspecific(urcu_bp_key, rcu_reader_reg);
389 if (ret)
390 abort();
fdee2e6d
MD
391
392 /* Add to registry */
02be5561
MD
393 rcu_reader_reg->tid = pthread_self();
394 assert(rcu_reader_reg->ctr == 0);
16aa9ee8 395 cds_list_add(&rcu_reader_reg->node, &registry);
89451e1b
MD
396 /*
397 * Reader threads are pointing to the reader registry. This is
398 * why its memory should never be relocated.
399 */
bd252a04 400 URCU_TLS(rcu_reader) = rcu_reader_reg;
fdee2e6d
MD
401}
402
e038e286
MD
403/* Called with mutex locked */
404static
405void cleanup_thread(struct registry_chunk *chunk,
406 struct rcu_reader *rcu_reader_reg)
407{
408 rcu_reader_reg->ctr = 0;
409 cds_list_del(&rcu_reader_reg->node);
410 rcu_reader_reg->tid = 0;
411 rcu_reader_reg->alloc = 0;
412 chunk->used -= sizeof(struct rcu_reader);
413}
414
415static
416struct registry_chunk *find_chunk(struct rcu_reader *rcu_reader_reg)
fdee2e6d 417{
89451e1b 418 struct registry_chunk *chunk;
fdee2e6d 419
89451e1b 420 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
e038e286
MD
421 if (rcu_reader_reg < (struct rcu_reader *) &chunk->data[0])
422 continue;
423 if (rcu_reader_reg >= (struct rcu_reader *) &chunk->data[chunk->data_len])
424 continue;
425 return chunk;
426 }
427 return NULL;
428}
89451e1b 429
e038e286
MD
430/* Called with signals off and mutex locked */
431static
432void remove_thread(void)
433{
434 struct rcu_reader *rcu_reader_reg;
89451e1b 435
e038e286
MD
436 rcu_reader_reg = URCU_TLS(rcu_reader);
437 cleanup_thread(find_chunk(rcu_reader_reg), rcu_reader_reg);
438 URCU_TLS(rcu_reader) = NULL;
fdee2e6d
MD
439}
440
441/* Disable signals, take mutex, add to registry */
442void rcu_bp_register(void)
443{
444 sigset_t newmask, oldmask;
445 int ret;
446
264716f7 447 ret = sigfillset(&newmask);
e038e286
MD
448 if (ret)
449 abort();
264716f7 450 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
e038e286
MD
451 if (ret)
452 abort();
fdee2e6d
MD
453
454 /*
455 * Check if a signal concurrently registered our thread since
e038e286
MD
456 * the check in rcu_read_lock().
457 */
bd252a04 458 if (URCU_TLS(rcu_reader))
fdee2e6d
MD
459 goto end;
460
e038e286
MD
461 /*
462 * Take care of early registration before urcu_bp constructor.
463 */
464 rcu_bp_init();
465
6abb4bd5 466 mutex_lock(&rcu_gp_lock);
fdee2e6d 467 add_thread();
6abb4bd5 468 mutex_unlock(&rcu_gp_lock);
fdee2e6d
MD
469end:
470 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
e038e286
MD
471 if (ret)
472 abort();
473}
474
475/* Disable signals, take mutex, remove from registry */
476static
477void rcu_bp_unregister(void)
478{
479 sigset_t newmask, oldmask;
480 int ret;
481
482 ret = sigfillset(&newmask);
483 if (ret)
484 abort();
485 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
486 if (ret)
487 abort();
488
489 mutex_lock(&rcu_gp_lock);
490 remove_thread();
491 mutex_unlock(&rcu_gp_lock);
492 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
493 if (ret)
494 abort();
495}
496
497/*
498 * Remove thread from the registry when it exits, and flag it as
499 * destroyed so garbage collection can take care of it.
500 */
501static
502void urcu_bp_thread_exit_notifier(void *rcu_key)
503{
504 assert(rcu_key == URCU_TLS(rcu_reader));
505 rcu_bp_unregister();
506}
507
508static
509void rcu_bp_init(void)
510{
511 mutex_lock(&init_lock);
512 if (!initialized) {
513 int ret;
514
515 ret = pthread_key_create(&urcu_bp_key,
516 urcu_bp_thread_exit_notifier);
517 if (ret)
518 abort();
519 initialized = 1;
520 }
521 mutex_unlock(&init_lock);
fdee2e6d
MD
522}
523
e038e286 524static
9380711a 525void rcu_bp_exit(void)
fdee2e6d 526{
89451e1b 527 struct registry_chunk *chunk, *tmp;
e038e286 528 int ret;
89451e1b
MD
529
530 cds_list_for_each_entry_safe(chunk, tmp,
531 &registry_arena.chunk_list, node) {
532 munmap(chunk, chunk->data_len + sizeof(struct registry_chunk));
533 }
e038e286
MD
534 ret = pthread_key_delete(urcu_bp_key);
535 if (ret)
536 abort();
fdee2e6d 537}
4cf1675f
MD
538
539/*
540 * Holding the rcu_gp_lock across fork will make sure we fork() don't race with
541 * a concurrent thread executing with this same lock held. This ensures that the
542 * registry is in a coherent state in the child.
543 */
544void rcu_bp_before_fork(void)
545{
546 sigset_t newmask, oldmask;
547 int ret;
548
264716f7 549 ret = sigfillset(&newmask);
4cf1675f 550 assert(!ret);
264716f7 551 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
4cf1675f
MD
552 assert(!ret);
553 mutex_lock(&rcu_gp_lock);
554 saved_fork_signal_mask = oldmask;
555}
556
557void rcu_bp_after_fork_parent(void)
558{
559 sigset_t oldmask;
560 int ret;
561
562 oldmask = saved_fork_signal_mask;
563 mutex_unlock(&rcu_gp_lock);
564 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
565 assert(!ret);
566}
567
e038e286
MD
568/*
569 * Prune all entries from registry except our own thread. Fits the Linux
570 * fork behavior. Called with rcu_gp_lock held.
571 */
572static
573void urcu_bp_prune_registry(void)
574{
575 struct registry_chunk *chunk;
576 struct rcu_reader *rcu_reader_reg;
577
578 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
579 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
580 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
581 rcu_reader_reg++) {
582 if (!rcu_reader_reg->alloc)
583 continue;
584 if (rcu_reader_reg->tid == pthread_self())
585 continue;
586 cleanup_thread(chunk, rcu_reader_reg);
587 }
588 }
589}
590
4cf1675f
MD
591void rcu_bp_after_fork_child(void)
592{
593 sigset_t oldmask;
594 int ret;
595
e038e286 596 urcu_bp_prune_registry();
4cf1675f
MD
597 oldmask = saved_fork_signal_mask;
598 mutex_unlock(&rcu_gp_lock);
599 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
600 assert(!ret);
601}
5e77fc1f 602
9b7981bb
MD
603void *rcu_dereference_sym_bp(void *p)
604{
605 return _rcu_dereference(p);
606}
607
5efd3cd2
MD
608void *rcu_set_pointer_sym_bp(void **p, void *v)
609{
610 cmm_wmb();
424d4ed5
MD
611 uatomic_set(p, v);
612 return v;
5efd3cd2
MD
613}
614
615void *rcu_xchg_pointer_sym_bp(void **p, void *v)
616{
617 cmm_wmb();
618 return uatomic_xchg(p, v);
619}
620
621void *rcu_cmpxchg_pointer_sym_bp(void **p, void *old, void *_new)
622{
623 cmm_wmb();
624 return uatomic_cmpxchg(p, old, _new);
625}
626
5e6b23a6 627DEFINE_RCU_FLAVOR(rcu_flavor);
541d828d 628
5e77fc1f 629#include "urcu-call-rcu-impl.h"
0376e7b2 630#include "urcu-defer-impl.h"
This page took 0.055633 seconds and 4 git commands to generate.