Version 0.7.17
[userspace-rcu.git] / urcu-bp.c
1 /*
2 * urcu-bp.c
3 *
4 * Userspace RCU library, "bulletproof" version.
5 *
6 * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 *
23 * IBM's contributions to this file may be relicensed under LGPLv2 or later.
24 */
25
26 #define _GNU_SOURCE
27 #define _LGPL_SOURCE
28 #include <stdio.h>
29 #include <pthread.h>
30 #include <signal.h>
31 #include <assert.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <errno.h>
35 #include <poll.h>
36 #include <unistd.h>
37 #include <sys/mman.h>
38
39 #include "urcu/wfqueue.h"
40 #include "urcu/map/urcu-bp.h"
41 #include "urcu/static/urcu-bp.h"
42 #include "urcu-pointer.h"
43 #include "urcu/tls-compat.h"
44
45 #include "urcu-die.h"
46
47 /* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */
48 #undef _LGPL_SOURCE
49 #include "urcu-bp.h"
50 #define _LGPL_SOURCE
51
52 #ifndef MAP_ANONYMOUS
53 #define MAP_ANONYMOUS MAP_ANON
54 #endif
55
56 #ifdef __linux__
57 static
58 void *mremap_wrapper(void *old_address, size_t old_size,
59 size_t new_size, int flags)
60 {
61 return mremap(old_address, old_size, new_size, flags);
62 }
63 #else
64
65 #define MREMAP_MAYMOVE 1
66 #define MREMAP_FIXED 2
67
68 /*
69 * mremap wrapper for non-Linux systems not allowing MAYMOVE.
70 * This is not generic.
71 */
72 static
73 void *mremap_wrapper(void *old_address, size_t old_size,
74 size_t new_size, int flags)
75 {
76 assert(!(flags & MREMAP_MAYMOVE));
77
78 return MAP_FAILED;
79 }
80 #endif
81
82 /* Sleep delay in ms */
83 #define RCU_SLEEP_DELAY_MS 10
84 #define INIT_NR_THREADS 8
85 #define ARENA_INIT_ALLOC \
86 sizeof(struct registry_chunk) \
87 + INIT_NR_THREADS * sizeof(struct rcu_reader)
88
89 /*
90 * Active attempts to check for reader Q.S. before calling sleep().
91 */
92 #define RCU_QS_ACTIVE_ATTEMPTS 100
93
94 static
95 int rcu_bp_refcount;
96
97 static
98 void __attribute__((constructor)) rcu_bp_init(void);
99 static
100 void __attribute__((destructor)) _rcu_bp_exit(void);
101
102 /*
103 * rcu_gp_lock ensures mutual exclusion between threads calling
104 * synchronize_rcu().
105 */
106 static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
107 /*
108 * rcu_registry_lock ensures mutual exclusion between threads
109 * registering and unregistering themselves to/from the registry, and
110 * with threads reading that registry from synchronize_rcu(). However,
111 * this lock is not held all the way through the completion of awaiting
112 * for the grace period. It is sporadically released between iterations
113 * on the registry.
114 * rcu_registry_lock may nest inside rcu_gp_lock.
115 */
116 static pthread_mutex_t rcu_registry_lock = PTHREAD_MUTEX_INITIALIZER;
117
118 static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
119 static int initialized;
120
121 static pthread_key_t urcu_bp_key;
122
123 #ifdef DEBUG_YIELD
124 unsigned int yield_active;
125 __DEFINE_URCU_TLS_GLOBAL(unsigned int, rand_yield);
126 #endif
127
128 /*
129 * Global grace period counter.
130 * Contains the current RCU_GP_CTR_PHASE.
131 * Also has a RCU_GP_COUNT of 1, to accelerate the reader fast path.
132 * Written to only by writer with mutex taken. Read by both writer and readers.
133 */
134 long rcu_gp_ctr = RCU_GP_COUNT;
135
136 /*
137 * Pointer to registry elements. Written to only by each individual reader. Read
138 * by both the reader and the writers.
139 */
140 __DEFINE_URCU_TLS_GLOBAL(struct rcu_reader *, rcu_reader);
141
142 static CDS_LIST_HEAD(registry);
143
144 struct registry_chunk {
145 size_t data_len; /* data length */
146 size_t used; /* amount of data used */
147 struct cds_list_head node; /* chunk_list node */
148 char data[];
149 };
150
151 struct registry_arena {
152 struct cds_list_head chunk_list;
153 };
154
155 static struct registry_arena registry_arena = {
156 .chunk_list = CDS_LIST_HEAD_INIT(registry_arena.chunk_list),
157 };
158
159 /* Saved fork signal mask, protected by rcu_gp_lock */
160 static sigset_t saved_fork_signal_mask;
161
162 static void mutex_lock(pthread_mutex_t *mutex)
163 {
164 int ret;
165
166 #ifndef DISTRUST_SIGNALS_EXTREME
167 ret = pthread_mutex_lock(mutex);
168 if (ret)
169 urcu_die(ret);
170 #else /* #ifndef DISTRUST_SIGNALS_EXTREME */
171 while ((ret = pthread_mutex_trylock(mutex)) != 0) {
172 if (ret != EBUSY && ret != EINTR)
173 urcu_die(ret);
174 poll(NULL,0,10);
175 }
176 #endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */
177 }
178
179 static void mutex_unlock(pthread_mutex_t *mutex)
180 {
181 int ret;
182
183 ret = pthread_mutex_unlock(mutex);
184 if (ret)
185 urcu_die(ret);
186 }
187
188 /*
189 * Always called with rcu_registry lock held. Releases this lock between
190 * iterations and grabs it again. Holds the lock when it returns.
191 */
192 void update_counter_and_wait(void)
193 {
194 CDS_LIST_HEAD(qsreaders);
195 unsigned int wait_loops = 0;
196 struct rcu_reader *index, *tmp;
197
198 /* Switch parity: 0 -> 1, 1 -> 0 */
199 CMM_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR_PHASE);
200
201 /*
202 * Must commit qparity update to memory before waiting for other parity
203 * quiescent state. Failure to do so could result in the writer waiting
204 * forever while new readers are always accessing data (no progress).
205 * Ensured by CMM_STORE_SHARED and CMM_LOAD_SHARED.
206 */
207
208 /*
209 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
210 * model easier to understand. It does not have a big performance impact
211 * anyway, given this is the write-side.
212 */
213 cmm_smp_mb();
214
215 /*
216 * Wait for each thread rcu_reader.ctr count to become 0.
217 */
218 for (;;) {
219 if (wait_loops < RCU_QS_ACTIVE_ATTEMPTS)
220 wait_loops++;
221
222 cds_list_for_each_entry_safe(index, tmp, &registry, node) {
223 if (!rcu_old_gp_ongoing(&index->ctr))
224 cds_list_move(&index->node, &qsreaders);
225 }
226
227 if (cds_list_empty(&registry)) {
228 break;
229 } else {
230 /* Temporarily unlock the registry lock. */
231 mutex_unlock(&rcu_registry_lock);
232 if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS)
233 (void) poll(NULL, 0, RCU_SLEEP_DELAY_MS);
234 else
235 caa_cpu_relax();
236 /* Re-lock the registry lock before the next loop. */
237 mutex_lock(&rcu_registry_lock);
238 }
239 }
240 /* put back the reader list in the registry */
241 cds_list_splice(&qsreaders, &registry);
242 }
243
244 void synchronize_rcu(void)
245 {
246 sigset_t newmask, oldmask;
247 int ret;
248
249 ret = sigfillset(&newmask);
250 assert(!ret);
251 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
252 assert(!ret);
253
254 mutex_lock(&rcu_gp_lock);
255 mutex_lock(&rcu_registry_lock);
256
257 if (cds_list_empty(&registry))
258 goto out;
259
260 /* All threads should read qparity before accessing data structure
261 * where new ptr points to. */
262 /* Write new ptr before changing the qparity */
263 cmm_smp_mb();
264
265 /*
266 * Wait for previous parity to be empty of readers.
267 * update_counter_and_wait() can release and grab again
268 * rcu_registry_lock interally.
269 */
270 update_counter_and_wait(); /* 0 -> 1, wait readers in parity 0 */
271
272 /*
273 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
274 * model easier to understand. It does not have a big performance impact
275 * anyway, given this is the write-side.
276 */
277 cmm_smp_mb();
278
279 /*
280 * Wait for previous parity to be empty of readers.
281 * update_counter_and_wait() can release and grab again
282 * rcu_registry_lock interally.
283 */
284 update_counter_and_wait(); /* 1 -> 0, wait readers in parity 1 */
285
286 /*
287 * Finish waiting for reader threads before letting the old ptr being
288 * freed.
289 */
290 cmm_smp_mb();
291 out:
292 mutex_unlock(&rcu_registry_lock);
293 mutex_unlock(&rcu_gp_lock);
294 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
295 assert(!ret);
296 }
297
298 /*
299 * library wrappers to be used by non-LGPL compatible source code.
300 */
301
302 void rcu_read_lock(void)
303 {
304 _rcu_read_lock();
305 }
306
307 void rcu_read_unlock(void)
308 {
309 _rcu_read_unlock();
310 }
311
312 /*
313 * Only grow for now. If empty, allocate a ARENA_INIT_ALLOC sized chunk.
314 * Else, try expanding the last chunk. If this fails, allocate a new
315 * chunk twice as big as the last chunk.
316 * Memory used by chunks _never_ moves. A chunk could theoretically be
317 * freed when all "used" slots are released, but we don't do it at this
318 * point.
319 */
320 static
321 void expand_arena(struct registry_arena *arena)
322 {
323 struct registry_chunk *new_chunk, *last_chunk;
324 size_t old_chunk_len, new_chunk_len;
325
326 /* No chunk. */
327 if (cds_list_empty(&arena->chunk_list)) {
328 assert(ARENA_INIT_ALLOC >=
329 sizeof(struct registry_chunk)
330 + sizeof(struct rcu_reader));
331 new_chunk_len = ARENA_INIT_ALLOC;
332 new_chunk = mmap(NULL, new_chunk_len,
333 PROT_READ | PROT_WRITE,
334 MAP_ANONYMOUS | MAP_PRIVATE,
335 -1, 0);
336 if (new_chunk == MAP_FAILED)
337 abort();
338 bzero(new_chunk, new_chunk_len);
339 new_chunk->data_len =
340 new_chunk_len - sizeof(struct registry_chunk);
341 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
342 return; /* We're done. */
343 }
344
345 /* Try expanding last chunk. */
346 last_chunk = cds_list_entry(arena->chunk_list.prev,
347 struct registry_chunk, node);
348 old_chunk_len =
349 last_chunk->data_len + sizeof(struct registry_chunk);
350 new_chunk_len = old_chunk_len << 1;
351
352 /* Don't allow memory mapping to move, just expand. */
353 new_chunk = mremap_wrapper(last_chunk, old_chunk_len,
354 new_chunk_len, 0);
355 if (new_chunk != MAP_FAILED) {
356 /* Should not have moved. */
357 assert(new_chunk == last_chunk);
358 bzero((char *) last_chunk + old_chunk_len,
359 new_chunk_len - old_chunk_len);
360 last_chunk->data_len =
361 new_chunk_len - sizeof(struct registry_chunk);
362 return; /* We're done. */
363 }
364
365 /* Remap did not succeed, we need to add a new chunk. */
366 new_chunk = mmap(NULL, new_chunk_len,
367 PROT_READ | PROT_WRITE,
368 MAP_ANONYMOUS | MAP_PRIVATE,
369 -1, 0);
370 if (new_chunk == MAP_FAILED)
371 abort();
372 bzero(new_chunk, new_chunk_len);
373 new_chunk->data_len =
374 new_chunk_len - sizeof(struct registry_chunk);
375 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
376 }
377
378 static
379 struct rcu_reader *arena_alloc(struct registry_arena *arena)
380 {
381 struct registry_chunk *chunk;
382 struct rcu_reader *rcu_reader_reg;
383 int expand_done = 0; /* Only allow to expand once per alloc */
384 size_t len = sizeof(struct rcu_reader);
385
386 retry:
387 cds_list_for_each_entry(chunk, &arena->chunk_list, node) {
388 if (chunk->data_len - chunk->used < len)
389 continue;
390 /* Find spot */
391 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
392 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
393 rcu_reader_reg++) {
394 if (!rcu_reader_reg->alloc) {
395 rcu_reader_reg->alloc = 1;
396 chunk->used += len;
397 return rcu_reader_reg;
398 }
399 }
400 }
401
402 if (!expand_done) {
403 expand_arena(arena);
404 expand_done = 1;
405 goto retry;
406 }
407
408 return NULL;
409 }
410
411 /* Called with signals off and mutex locked */
412 static
413 void add_thread(void)
414 {
415 struct rcu_reader *rcu_reader_reg;
416 int ret;
417
418 rcu_reader_reg = arena_alloc(&registry_arena);
419 if (!rcu_reader_reg)
420 abort();
421 ret = pthread_setspecific(urcu_bp_key, rcu_reader_reg);
422 if (ret)
423 abort();
424
425 /* Add to registry */
426 rcu_reader_reg->tid = pthread_self();
427 assert(rcu_reader_reg->ctr == 0);
428 cds_list_add(&rcu_reader_reg->node, &registry);
429 /*
430 * Reader threads are pointing to the reader registry. This is
431 * why its memory should never be relocated.
432 */
433 URCU_TLS(rcu_reader) = rcu_reader_reg;
434 }
435
436 /* Called with mutex locked */
437 static
438 void cleanup_thread(struct registry_chunk *chunk,
439 struct rcu_reader *rcu_reader_reg)
440 {
441 rcu_reader_reg->ctr = 0;
442 cds_list_del(&rcu_reader_reg->node);
443 rcu_reader_reg->tid = 0;
444 rcu_reader_reg->alloc = 0;
445 chunk->used -= sizeof(struct rcu_reader);
446 }
447
448 static
449 struct registry_chunk *find_chunk(struct rcu_reader *rcu_reader_reg)
450 {
451 struct registry_chunk *chunk;
452
453 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
454 if (rcu_reader_reg < (struct rcu_reader *) &chunk->data[0])
455 continue;
456 if (rcu_reader_reg >= (struct rcu_reader *) &chunk->data[chunk->data_len])
457 continue;
458 return chunk;
459 }
460 return NULL;
461 }
462
463 /* Called with signals off and mutex locked */
464 static
465 void remove_thread(struct rcu_reader *rcu_reader_reg)
466 {
467 cleanup_thread(find_chunk(rcu_reader_reg), rcu_reader_reg);
468 URCU_TLS(rcu_reader) = NULL;
469 }
470
471 /* Disable signals, take mutex, add to registry */
472 void rcu_bp_register(void)
473 {
474 sigset_t newmask, oldmask;
475 int ret;
476
477 ret = sigfillset(&newmask);
478 if (ret)
479 abort();
480 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
481 if (ret)
482 abort();
483
484 /*
485 * Check if a signal concurrently registered our thread since
486 * the check in rcu_read_lock().
487 */
488 if (URCU_TLS(rcu_reader))
489 goto end;
490
491 /*
492 * Take care of early registration before urcu_bp constructor.
493 */
494 rcu_bp_init();
495
496 mutex_lock(&rcu_registry_lock);
497 add_thread();
498 mutex_unlock(&rcu_registry_lock);
499 end:
500 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
501 if (ret)
502 abort();
503 }
504
505 /* Disable signals, take mutex, remove from registry */
506 static
507 void rcu_bp_unregister(struct rcu_reader *rcu_reader_reg)
508 {
509 sigset_t newmask, oldmask;
510 int ret;
511
512 ret = sigfillset(&newmask);
513 if (ret)
514 abort();
515 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
516 if (ret)
517 abort();
518
519 mutex_lock(&rcu_registry_lock);
520 remove_thread(rcu_reader_reg);
521 mutex_unlock(&rcu_registry_lock);
522 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
523 if (ret)
524 abort();
525 _rcu_bp_exit();
526 }
527
528 /*
529 * Remove thread from the registry when it exits, and flag it as
530 * destroyed so garbage collection can take care of it.
531 */
532 static
533 void urcu_bp_thread_exit_notifier(void *rcu_key)
534 {
535 rcu_bp_unregister(rcu_key);
536 }
537
538 static
539 void rcu_bp_init(void)
540 {
541 mutex_lock(&init_lock);
542 if (!rcu_bp_refcount++) {
543 int ret;
544
545 ret = pthread_key_create(&urcu_bp_key,
546 urcu_bp_thread_exit_notifier);
547 if (ret)
548 abort();
549 initialized = 1;
550 }
551 mutex_unlock(&init_lock);
552 }
553
554 static
555 void _rcu_bp_exit(void)
556 {
557 mutex_lock(&init_lock);
558 if (!--rcu_bp_refcount) {
559 struct registry_chunk *chunk, *tmp;
560 int ret;
561
562 cds_list_for_each_entry_safe(chunk, tmp,
563 &registry_arena.chunk_list, node) {
564 munmap(chunk, chunk->data_len
565 + sizeof(struct registry_chunk));
566 }
567 ret = pthread_key_delete(urcu_bp_key);
568 if (ret)
569 abort();
570 }
571 mutex_unlock(&init_lock);
572 }
573
574 /*
575 * Keep ABI compability within stable versions. This has never been
576 * exposed through a header, but needs to stay in the .so until the
577 * soname is bumped.
578 */
579 void rcu_bp_exit(void)
580 {
581 }
582
583 /*
584 * Holding the rcu_gp_lock and rcu_registry_lock across fork will make
585 * sure we fork() don't race with a concurrent thread executing with
586 * any of those locks held. This ensures that the registry and data
587 * protected by rcu_gp_lock are in a coherent state in the child.
588 */
589 void rcu_bp_before_fork(void)
590 {
591 sigset_t newmask, oldmask;
592 int ret;
593
594 ret = sigfillset(&newmask);
595 assert(!ret);
596 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
597 assert(!ret);
598 mutex_lock(&rcu_gp_lock);
599 mutex_lock(&rcu_registry_lock);
600 saved_fork_signal_mask = oldmask;
601 }
602
603 void rcu_bp_after_fork_parent(void)
604 {
605 sigset_t oldmask;
606 int ret;
607
608 oldmask = saved_fork_signal_mask;
609 mutex_unlock(&rcu_registry_lock);
610 mutex_unlock(&rcu_gp_lock);
611 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
612 assert(!ret);
613 }
614
615 /*
616 * Prune all entries from registry except our own thread. Fits the Linux
617 * fork behavior. Called with rcu_gp_lock and rcu_registry_lock held.
618 */
619 static
620 void urcu_bp_prune_registry(void)
621 {
622 struct registry_chunk *chunk;
623 struct rcu_reader *rcu_reader_reg;
624
625 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
626 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
627 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
628 rcu_reader_reg++) {
629 if (!rcu_reader_reg->alloc)
630 continue;
631 if (rcu_reader_reg->tid == pthread_self())
632 continue;
633 cleanup_thread(chunk, rcu_reader_reg);
634 }
635 }
636 }
637
638 void rcu_bp_after_fork_child(void)
639 {
640 sigset_t oldmask;
641 int ret;
642
643 urcu_bp_prune_registry();
644 oldmask = saved_fork_signal_mask;
645 mutex_unlock(&rcu_registry_lock);
646 mutex_unlock(&rcu_gp_lock);
647 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
648 assert(!ret);
649 }
650
651 void *rcu_dereference_sym_bp(void *p)
652 {
653 return _rcu_dereference(p);
654 }
655
656 void *rcu_set_pointer_sym_bp(void **p, void *v)
657 {
658 cmm_wmb();
659 uatomic_set(p, v);
660 return v;
661 }
662
663 void *rcu_xchg_pointer_sym_bp(void **p, void *v)
664 {
665 cmm_wmb();
666 return uatomic_xchg(p, v);
667 }
668
669 void *rcu_cmpxchg_pointer_sym_bp(void **p, void *old, void *_new)
670 {
671 cmm_wmb();
672 return uatomic_cmpxchg(p, old, _new);
673 }
674
675 DEFINE_RCU_FLAVOR(rcu_flavor);
676
677 #include "urcu-call-rcu-impl.h"
678 #include "urcu-defer-impl.h"
This page took 0.041655 seconds and 4 git commands to generate.