4b4fe45b2c4bcaefe3dde281d0074f87caf0bc6b
[urcu.git] / src / urcu-bp.c
1 /*
2 * urcu-bp.c
3 *
4 * Userspace RCU library, "bulletproof" version.
5 *
6 * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 *
23 * IBM's contributions to this file may be relicensed under LGPLv2 or later.
24 */
25
26 #define _LGPL_SOURCE
27 #include <stdio.h>
28 #include <pthread.h>
29 #include <signal.h>
30 #include <assert.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <errno.h>
34 #include <poll.h>
35 #include <unistd.h>
36 #include <sys/mman.h>
37
38 #include "urcu/arch.h"
39 #include "urcu/wfcqueue.h"
40 #include "urcu/map/urcu-bp.h"
41 #include "urcu/static/urcu-bp.h"
42 #include "urcu-pointer.h"
43 #include "urcu/tls-compat.h"
44
45 #include "urcu-die.h"
46
47 /* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */
48 #undef _LGPL_SOURCE
49 #include "urcu-bp.h"
50 #define _LGPL_SOURCE
51
52 #ifndef MAP_ANONYMOUS
53 #define MAP_ANONYMOUS MAP_ANON
54 #endif
55
56 #ifdef __linux__
57 static
58 void *mremap_wrapper(void *old_address, size_t old_size,
59 size_t new_size, int flags)
60 {
61 return mremap(old_address, old_size, new_size, flags);
62 }
63 #else
64
65 #define MREMAP_MAYMOVE 1
66 #define MREMAP_FIXED 2
67
68 /*
69 * mremap wrapper for non-Linux systems not allowing MAYMOVE.
70 * This is not generic.
71 */
72 static
73 void *mremap_wrapper(void *old_address, size_t old_size,
74 size_t new_size, int flags)
75 {
76 assert(!(flags & MREMAP_MAYMOVE));
77
78 return MAP_FAILED;
79 }
80 #endif
81
82 /* Sleep delay in ms */
83 #define RCU_SLEEP_DELAY_MS 10
84 #define INIT_NR_THREADS 8
85 #define ARENA_INIT_ALLOC \
86 sizeof(struct registry_chunk) \
87 + INIT_NR_THREADS * sizeof(struct rcu_reader)
88
89 /*
90 * Active attempts to check for reader Q.S. before calling sleep().
91 */
92 #define RCU_QS_ACTIVE_ATTEMPTS 100
93
94 static
95 int rcu_bp_refcount;
96
97 /* If the headers do not support membarrier system call, fall back smp_mb. */
98 #ifdef __NR_membarrier
99 # define membarrier(...) syscall(__NR_membarrier, __VA_ARGS__)
100 #else
101 # define membarrier(...) -ENOSYS
102 #endif
103
104 enum membarrier_cmd {
105 MEMBARRIER_CMD_QUERY = 0,
106 MEMBARRIER_CMD_SHARED = (1 << 0),
107 };
108
109 static
110 void __attribute__((constructor)) rcu_bp_init(void);
111 static
112 void __attribute__((destructor)) rcu_bp_exit(void);
113
114 int urcu_bp_has_sys_membarrier;
115
116 /*
117 * rcu_gp_lock ensures mutual exclusion between threads calling
118 * synchronize_rcu().
119 */
120 static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
121 /*
122 * rcu_registry_lock ensures mutual exclusion between threads
123 * registering and unregistering themselves to/from the registry, and
124 * with threads reading that registry from synchronize_rcu(). However,
125 * this lock is not held all the way through the completion of awaiting
126 * for the grace period. It is sporadically released between iterations
127 * on the registry.
128 * rcu_registry_lock may nest inside rcu_gp_lock.
129 */
130 static pthread_mutex_t rcu_registry_lock = PTHREAD_MUTEX_INITIALIZER;
131
132 static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
133 static int initialized;
134
135 static pthread_key_t urcu_bp_key;
136
137 struct rcu_gp rcu_gp = { .ctr = RCU_GP_COUNT };
138
139 /*
140 * Pointer to registry elements. Written to only by each individual reader. Read
141 * by both the reader and the writers.
142 */
143 DEFINE_URCU_TLS(struct rcu_reader *, rcu_reader);
144
145 static CDS_LIST_HEAD(registry);
146
147 struct registry_chunk {
148 size_t data_len; /* data length */
149 size_t used; /* amount of data used */
150 struct cds_list_head node; /* chunk_list node */
151 char data[];
152 };
153
154 struct registry_arena {
155 struct cds_list_head chunk_list;
156 };
157
158 static struct registry_arena registry_arena = {
159 .chunk_list = CDS_LIST_HEAD_INIT(registry_arena.chunk_list),
160 };
161
162 /* Saved fork signal mask, protected by rcu_gp_lock */
163 static sigset_t saved_fork_signal_mask;
164
165 static void mutex_lock(pthread_mutex_t *mutex)
166 {
167 int ret;
168
169 #ifndef DISTRUST_SIGNALS_EXTREME
170 ret = pthread_mutex_lock(mutex);
171 if (ret)
172 urcu_die(ret);
173 #else /* #ifndef DISTRUST_SIGNALS_EXTREME */
174 while ((ret = pthread_mutex_trylock(mutex)) != 0) {
175 if (ret != EBUSY && ret != EINTR)
176 urcu_die(ret);
177 poll(NULL,0,10);
178 }
179 #endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */
180 }
181
182 static void mutex_unlock(pthread_mutex_t *mutex)
183 {
184 int ret;
185
186 ret = pthread_mutex_unlock(mutex);
187 if (ret)
188 urcu_die(ret);
189 }
190
191 static void smp_mb_master(void)
192 {
193 if (caa_likely(urcu_bp_has_sys_membarrier))
194 (void) membarrier(MEMBARRIER_CMD_SHARED, 0);
195 else
196 cmm_smp_mb();
197 }
198
199 /*
200 * Always called with rcu_registry lock held. Releases this lock between
201 * iterations and grabs it again. Holds the lock when it returns.
202 */
203 static void wait_for_readers(struct cds_list_head *input_readers,
204 struct cds_list_head *cur_snap_readers,
205 struct cds_list_head *qsreaders)
206 {
207 unsigned int wait_loops = 0;
208 struct rcu_reader *index, *tmp;
209
210 /*
211 * Wait for each thread URCU_TLS(rcu_reader).ctr to either
212 * indicate quiescence (not nested), or observe the current
213 * rcu_gp.ctr value.
214 */
215 for (;;) {
216 if (wait_loops < RCU_QS_ACTIVE_ATTEMPTS)
217 wait_loops++;
218
219 cds_list_for_each_entry_safe(index, tmp, input_readers, node) {
220 switch (rcu_reader_state(&index->ctr)) {
221 case RCU_READER_ACTIVE_CURRENT:
222 if (cur_snap_readers) {
223 cds_list_move(&index->node,
224 cur_snap_readers);
225 break;
226 }
227 /* Fall-through */
228 case RCU_READER_INACTIVE:
229 cds_list_move(&index->node, qsreaders);
230 break;
231 case RCU_READER_ACTIVE_OLD:
232 /*
233 * Old snapshot. Leaving node in
234 * input_readers will make us busy-loop
235 * until the snapshot becomes current or
236 * the reader becomes inactive.
237 */
238 break;
239 }
240 }
241
242 if (cds_list_empty(input_readers)) {
243 break;
244 } else {
245 /* Temporarily unlock the registry lock. */
246 mutex_unlock(&rcu_registry_lock);
247 if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS)
248 (void) poll(NULL, 0, RCU_SLEEP_DELAY_MS);
249 else
250 caa_cpu_relax();
251 /* Re-lock the registry lock before the next loop. */
252 mutex_lock(&rcu_registry_lock);
253 }
254 }
255 }
256
257 void synchronize_rcu(void)
258 {
259 CDS_LIST_HEAD(cur_snap_readers);
260 CDS_LIST_HEAD(qsreaders);
261 sigset_t newmask, oldmask;
262 int ret;
263
264 ret = sigfillset(&newmask);
265 assert(!ret);
266 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
267 assert(!ret);
268
269 mutex_lock(&rcu_gp_lock);
270
271 mutex_lock(&rcu_registry_lock);
272
273 if (cds_list_empty(&registry))
274 goto out;
275
276 /* All threads should read qparity before accessing data structure
277 * where new ptr points to. */
278 /* Write new ptr before changing the qparity */
279 smp_mb_master();
280
281 /*
282 * Wait for readers to observe original parity or be quiescent.
283 * wait_for_readers() can release and grab again rcu_registry_lock
284 * interally.
285 */
286 wait_for_readers(&registry, &cur_snap_readers, &qsreaders);
287
288 /*
289 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
290 * model easier to understand. It does not have a big performance impact
291 * anyway, given this is the write-side.
292 */
293 cmm_smp_mb();
294
295 /* Switch parity: 0 -> 1, 1 -> 0 */
296 CMM_STORE_SHARED(rcu_gp.ctr, rcu_gp.ctr ^ RCU_GP_CTR_PHASE);
297
298 /*
299 * Must commit qparity update to memory before waiting for other parity
300 * quiescent state. Failure to do so could result in the writer waiting
301 * forever while new readers are always accessing data (no progress).
302 * Ensured by CMM_STORE_SHARED and CMM_LOAD_SHARED.
303 */
304
305 /*
306 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
307 * model easier to understand. It does not have a big performance impact
308 * anyway, given this is the write-side.
309 */
310 cmm_smp_mb();
311
312 /*
313 * Wait for readers to observe new parity or be quiescent.
314 * wait_for_readers() can release and grab again rcu_registry_lock
315 * interally.
316 */
317 wait_for_readers(&cur_snap_readers, NULL, &qsreaders);
318
319 /*
320 * Put quiescent reader list back into registry.
321 */
322 cds_list_splice(&qsreaders, &registry);
323
324 /*
325 * Finish waiting for reader threads before letting the old ptr being
326 * freed.
327 */
328 smp_mb_master();
329 out:
330 mutex_unlock(&rcu_registry_lock);
331 mutex_unlock(&rcu_gp_lock);
332 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
333 assert(!ret);
334 }
335
336 /*
337 * library wrappers to be used by non-LGPL compatible source code.
338 */
339
340 void rcu_read_lock(void)
341 {
342 _rcu_read_lock();
343 }
344
345 void rcu_read_unlock(void)
346 {
347 _rcu_read_unlock();
348 }
349
350 int rcu_read_ongoing(void)
351 {
352 return _rcu_read_ongoing();
353 }
354
355 /*
356 * Only grow for now. If empty, allocate a ARENA_INIT_ALLOC sized chunk.
357 * Else, try expanding the last chunk. If this fails, allocate a new
358 * chunk twice as big as the last chunk.
359 * Memory used by chunks _never_ moves. A chunk could theoretically be
360 * freed when all "used" slots are released, but we don't do it at this
361 * point.
362 */
363 static
364 void expand_arena(struct registry_arena *arena)
365 {
366 struct registry_chunk *new_chunk, *last_chunk;
367 size_t old_chunk_len, new_chunk_len;
368
369 /* No chunk. */
370 if (cds_list_empty(&arena->chunk_list)) {
371 assert(ARENA_INIT_ALLOC >=
372 sizeof(struct registry_chunk)
373 + sizeof(struct rcu_reader));
374 new_chunk_len = ARENA_INIT_ALLOC;
375 new_chunk = mmap(NULL, new_chunk_len,
376 PROT_READ | PROT_WRITE,
377 MAP_ANONYMOUS | MAP_PRIVATE,
378 -1, 0);
379 if (new_chunk == MAP_FAILED)
380 abort();
381 memset(new_chunk, 0, new_chunk_len);
382 new_chunk->data_len =
383 new_chunk_len - sizeof(struct registry_chunk);
384 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
385 return; /* We're done. */
386 }
387
388 /* Try expanding last chunk. */
389 last_chunk = cds_list_entry(arena->chunk_list.prev,
390 struct registry_chunk, node);
391 old_chunk_len =
392 last_chunk->data_len + sizeof(struct registry_chunk);
393 new_chunk_len = old_chunk_len << 1;
394
395 /* Don't allow memory mapping to move, just expand. */
396 new_chunk = mremap_wrapper(last_chunk, old_chunk_len,
397 new_chunk_len, 0);
398 if (new_chunk != MAP_FAILED) {
399 /* Should not have moved. */
400 assert(new_chunk == last_chunk);
401 memset((char *) last_chunk + old_chunk_len, 0,
402 new_chunk_len - old_chunk_len);
403 last_chunk->data_len =
404 new_chunk_len - sizeof(struct registry_chunk);
405 return; /* We're done. */
406 }
407
408 /* Remap did not succeed, we need to add a new chunk. */
409 new_chunk = mmap(NULL, new_chunk_len,
410 PROT_READ | PROT_WRITE,
411 MAP_ANONYMOUS | MAP_PRIVATE,
412 -1, 0);
413 if (new_chunk == MAP_FAILED)
414 abort();
415 memset(new_chunk, 0, new_chunk_len);
416 new_chunk->data_len =
417 new_chunk_len - sizeof(struct registry_chunk);
418 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
419 }
420
421 static
422 struct rcu_reader *arena_alloc(struct registry_arena *arena)
423 {
424 struct registry_chunk *chunk;
425 struct rcu_reader *rcu_reader_reg;
426 int expand_done = 0; /* Only allow to expand once per alloc */
427 size_t len = sizeof(struct rcu_reader);
428
429 retry:
430 cds_list_for_each_entry(chunk, &arena->chunk_list, node) {
431 if (chunk->data_len - chunk->used < len)
432 continue;
433 /* Find spot */
434 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
435 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
436 rcu_reader_reg++) {
437 if (!rcu_reader_reg->alloc) {
438 rcu_reader_reg->alloc = 1;
439 chunk->used += len;
440 return rcu_reader_reg;
441 }
442 }
443 }
444
445 if (!expand_done) {
446 expand_arena(arena);
447 expand_done = 1;
448 goto retry;
449 }
450
451 return NULL;
452 }
453
454 /* Called with signals off and mutex locked */
455 static
456 void add_thread(void)
457 {
458 struct rcu_reader *rcu_reader_reg;
459 int ret;
460
461 rcu_reader_reg = arena_alloc(&registry_arena);
462 if (!rcu_reader_reg)
463 abort();
464 ret = pthread_setspecific(urcu_bp_key, rcu_reader_reg);
465 if (ret)
466 abort();
467
468 /* Add to registry */
469 rcu_reader_reg->tid = pthread_self();
470 assert(rcu_reader_reg->ctr == 0);
471 cds_list_add(&rcu_reader_reg->node, &registry);
472 /*
473 * Reader threads are pointing to the reader registry. This is
474 * why its memory should never be relocated.
475 */
476 URCU_TLS(rcu_reader) = rcu_reader_reg;
477 }
478
479 /* Called with mutex locked */
480 static
481 void cleanup_thread(struct registry_chunk *chunk,
482 struct rcu_reader *rcu_reader_reg)
483 {
484 rcu_reader_reg->ctr = 0;
485 cds_list_del(&rcu_reader_reg->node);
486 rcu_reader_reg->tid = 0;
487 rcu_reader_reg->alloc = 0;
488 chunk->used -= sizeof(struct rcu_reader);
489 }
490
491 static
492 struct registry_chunk *find_chunk(struct rcu_reader *rcu_reader_reg)
493 {
494 struct registry_chunk *chunk;
495
496 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
497 if (rcu_reader_reg < (struct rcu_reader *) &chunk->data[0])
498 continue;
499 if (rcu_reader_reg >= (struct rcu_reader *) &chunk->data[chunk->data_len])
500 continue;
501 return chunk;
502 }
503 return NULL;
504 }
505
506 /* Called with signals off and mutex locked */
507 static
508 void remove_thread(struct rcu_reader *rcu_reader_reg)
509 {
510 cleanup_thread(find_chunk(rcu_reader_reg), rcu_reader_reg);
511 URCU_TLS(rcu_reader) = NULL;
512 }
513
514 /* Disable signals, take mutex, add to registry */
515 void rcu_bp_register(void)
516 {
517 sigset_t newmask, oldmask;
518 int ret;
519
520 ret = sigfillset(&newmask);
521 if (ret)
522 abort();
523 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
524 if (ret)
525 abort();
526
527 /*
528 * Check if a signal concurrently registered our thread since
529 * the check in rcu_read_lock().
530 */
531 if (URCU_TLS(rcu_reader))
532 goto end;
533
534 /*
535 * Take care of early registration before urcu_bp constructor.
536 */
537 rcu_bp_init();
538
539 mutex_lock(&rcu_registry_lock);
540 add_thread();
541 mutex_unlock(&rcu_registry_lock);
542 end:
543 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
544 if (ret)
545 abort();
546 }
547
548 /* Disable signals, take mutex, remove from registry */
549 static
550 void rcu_bp_unregister(struct rcu_reader *rcu_reader_reg)
551 {
552 sigset_t newmask, oldmask;
553 int ret;
554
555 ret = sigfillset(&newmask);
556 if (ret)
557 abort();
558 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
559 if (ret)
560 abort();
561
562 mutex_lock(&rcu_registry_lock);
563 remove_thread(rcu_reader_reg);
564 mutex_unlock(&rcu_registry_lock);
565 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
566 if (ret)
567 abort();
568 rcu_bp_exit();
569 }
570
571 /*
572 * Remove thread from the registry when it exits, and flag it as
573 * destroyed so garbage collection can take care of it.
574 */
575 static
576 void urcu_bp_thread_exit_notifier(void *rcu_key)
577 {
578 rcu_bp_unregister(rcu_key);
579 }
580
581 static
582 void rcu_bp_init(void)
583 {
584 mutex_lock(&init_lock);
585 if (!rcu_bp_refcount++) {
586 int ret;
587
588 ret = pthread_key_create(&urcu_bp_key,
589 urcu_bp_thread_exit_notifier);
590 if (ret)
591 abort();
592 ret = membarrier(MEMBARRIER_CMD_QUERY, 0);
593 if (ret >= 0 && (ret & MEMBARRIER_CMD_SHARED)) {
594 urcu_bp_has_sys_membarrier = 1;
595 }
596 initialized = 1;
597 }
598 mutex_unlock(&init_lock);
599 }
600
601 static
602 void rcu_bp_exit(void)
603 {
604 mutex_lock(&init_lock);
605 if (!--rcu_bp_refcount) {
606 struct registry_chunk *chunk, *tmp;
607 int ret;
608
609 cds_list_for_each_entry_safe(chunk, tmp,
610 &registry_arena.chunk_list, node) {
611 munmap(chunk, chunk->data_len
612 + sizeof(struct registry_chunk));
613 }
614 CDS_INIT_LIST_HEAD(&registry_arena.chunk_list);
615 ret = pthread_key_delete(urcu_bp_key);
616 if (ret)
617 abort();
618 }
619 mutex_unlock(&init_lock);
620 }
621
622 /*
623 * Holding the rcu_gp_lock and rcu_registry_lock across fork will make
624 * sure we fork() don't race with a concurrent thread executing with
625 * any of those locks held. This ensures that the registry and data
626 * protected by rcu_gp_lock are in a coherent state in the child.
627 */
628 void rcu_bp_before_fork(void)
629 {
630 sigset_t newmask, oldmask;
631 int ret;
632
633 ret = sigfillset(&newmask);
634 assert(!ret);
635 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
636 assert(!ret);
637 mutex_lock(&rcu_gp_lock);
638 mutex_lock(&rcu_registry_lock);
639 saved_fork_signal_mask = oldmask;
640 }
641
642 void rcu_bp_after_fork_parent(void)
643 {
644 sigset_t oldmask;
645 int ret;
646
647 oldmask = saved_fork_signal_mask;
648 mutex_unlock(&rcu_registry_lock);
649 mutex_unlock(&rcu_gp_lock);
650 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
651 assert(!ret);
652 }
653
654 /*
655 * Prune all entries from registry except our own thread. Fits the Linux
656 * fork behavior. Called with rcu_gp_lock and rcu_registry_lock held.
657 */
658 static
659 void urcu_bp_prune_registry(void)
660 {
661 struct registry_chunk *chunk;
662 struct rcu_reader *rcu_reader_reg;
663
664 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
665 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
666 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
667 rcu_reader_reg++) {
668 if (!rcu_reader_reg->alloc)
669 continue;
670 if (rcu_reader_reg->tid == pthread_self())
671 continue;
672 cleanup_thread(chunk, rcu_reader_reg);
673 }
674 }
675 }
676
677 void rcu_bp_after_fork_child(void)
678 {
679 sigset_t oldmask;
680 int ret;
681
682 urcu_bp_prune_registry();
683 oldmask = saved_fork_signal_mask;
684 mutex_unlock(&rcu_registry_lock);
685 mutex_unlock(&rcu_gp_lock);
686 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
687 assert(!ret);
688 }
689
690 void *rcu_dereference_sym_bp(void *p)
691 {
692 return _rcu_dereference(p);
693 }
694
695 void *rcu_set_pointer_sym_bp(void **p, void *v)
696 {
697 cmm_wmb();
698 uatomic_set(p, v);
699 return v;
700 }
701
702 void *rcu_xchg_pointer_sym_bp(void **p, void *v)
703 {
704 cmm_wmb();
705 return uatomic_xchg(p, v);
706 }
707
708 void *rcu_cmpxchg_pointer_sym_bp(void **p, void *old, void *_new)
709 {
710 cmm_wmb();
711 return uatomic_cmpxchg(p, old, _new);
712 }
713
714 DEFINE_RCU_FLAVOR(rcu_flavor);
715
716 #include "urcu-call-rcu-impl.h"
717 #include "urcu-defer-impl.h"
This page took 0.059874 seconds and 4 git commands to generate.