Fix: handle sys_futex() FUTEX_WAIT interrupted by signal
[urcu.git] / urcu-bp.c
1 /*
2 * urcu-bp.c
3 *
4 * Userspace RCU library, "bulletproof" version.
5 *
6 * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 *
23 * IBM's contributions to this file may be relicensed under LGPLv2 or later.
24 */
25
26 #define _GNU_SOURCE
27 #define _LGPL_SOURCE
28 #include <stdio.h>
29 #include <pthread.h>
30 #include <signal.h>
31 #include <assert.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <errno.h>
35 #include <poll.h>
36 #include <unistd.h>
37 #include <sys/mman.h>
38
39 #include "urcu/wfcqueue.h"
40 #include "urcu/map/urcu-bp.h"
41 #include "urcu/static/urcu-bp.h"
42 #include "urcu-pointer.h"
43 #include "urcu/tls-compat.h"
44
45 #include "urcu-die.h"
46
47 /* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */
48 #undef _LGPL_SOURCE
49 #include "urcu-bp.h"
50 #define _LGPL_SOURCE
51
52 #ifndef MAP_ANONYMOUS
53 #define MAP_ANONYMOUS MAP_ANON
54 #endif
55
56 #ifdef __linux__
57 static
58 void *mremap_wrapper(void *old_address, size_t old_size,
59 size_t new_size, int flags)
60 {
61 return mremap(old_address, old_size, new_size, flags);
62 }
63 #else
64
65 #define MREMAP_MAYMOVE 1
66 #define MREMAP_FIXED 2
67
68 /*
69 * mremap wrapper for non-Linux systems not allowing MAYMOVE.
70 * This is not generic.
71 */
72 static
73 void *mremap_wrapper(void *old_address, size_t old_size,
74 size_t new_size, int flags)
75 {
76 assert(!(flags & MREMAP_MAYMOVE));
77
78 return MAP_FAILED;
79 }
80 #endif
81
82 /* Sleep delay in ms */
83 #define RCU_SLEEP_DELAY_MS 10
84 #define INIT_NR_THREADS 8
85 #define ARENA_INIT_ALLOC \
86 sizeof(struct registry_chunk) \
87 + INIT_NR_THREADS * sizeof(struct rcu_reader)
88
89 /*
90 * Active attempts to check for reader Q.S. before calling sleep().
91 */
92 #define RCU_QS_ACTIVE_ATTEMPTS 100
93
94 static
95 int rcu_bp_refcount;
96
97 static
98 void __attribute__((constructor)) rcu_bp_init(void);
99 static
100 void __attribute__((destructor)) rcu_bp_exit(void);
101
102 /*
103 * rcu_gp_lock ensures mutual exclusion between threads calling
104 * synchronize_rcu().
105 */
106 static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
107 /*
108 * rcu_registry_lock ensures mutual exclusion between threads
109 * registering and unregistering themselves to/from the registry, and
110 * with threads reading that registry from synchronize_rcu(). However,
111 * this lock is not held all the way through the completion of awaiting
112 * for the grace period. It is sporadically released between iterations
113 * on the registry.
114 * rcu_registry_lock may nest inside rcu_gp_lock.
115 */
116 static pthread_mutex_t rcu_registry_lock = PTHREAD_MUTEX_INITIALIZER;
117
118 static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
119 static int initialized;
120
121 static pthread_key_t urcu_bp_key;
122
123 struct rcu_gp rcu_gp = { .ctr = RCU_GP_COUNT };
124
125 /*
126 * Pointer to registry elements. Written to only by each individual reader. Read
127 * by both the reader and the writers.
128 */
129 DEFINE_URCU_TLS(struct rcu_reader *, rcu_reader);
130
131 static CDS_LIST_HEAD(registry);
132
133 struct registry_chunk {
134 size_t data_len; /* data length */
135 size_t used; /* amount of data used */
136 struct cds_list_head node; /* chunk_list node */
137 char data[];
138 };
139
140 struct registry_arena {
141 struct cds_list_head chunk_list;
142 };
143
144 static struct registry_arena registry_arena = {
145 .chunk_list = CDS_LIST_HEAD_INIT(registry_arena.chunk_list),
146 };
147
148 /* Saved fork signal mask, protected by rcu_gp_lock */
149 static sigset_t saved_fork_signal_mask;
150
151 static void mutex_lock(pthread_mutex_t *mutex)
152 {
153 int ret;
154
155 #ifndef DISTRUST_SIGNALS_EXTREME
156 ret = pthread_mutex_lock(mutex);
157 if (ret)
158 urcu_die(ret);
159 #else /* #ifndef DISTRUST_SIGNALS_EXTREME */
160 while ((ret = pthread_mutex_trylock(mutex)) != 0) {
161 if (ret != EBUSY && ret != EINTR)
162 urcu_die(ret);
163 poll(NULL,0,10);
164 }
165 #endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */
166 }
167
168 static void mutex_unlock(pthread_mutex_t *mutex)
169 {
170 int ret;
171
172 ret = pthread_mutex_unlock(mutex);
173 if (ret)
174 urcu_die(ret);
175 }
176
177 /*
178 * Always called with rcu_registry lock held. Releases this lock between
179 * iterations and grabs it again. Holds the lock when it returns.
180 */
181 static void wait_for_readers(struct cds_list_head *input_readers,
182 struct cds_list_head *cur_snap_readers,
183 struct cds_list_head *qsreaders)
184 {
185 unsigned int wait_loops = 0;
186 struct rcu_reader *index, *tmp;
187
188 /*
189 * Wait for each thread URCU_TLS(rcu_reader).ctr to either
190 * indicate quiescence (not nested), or observe the current
191 * rcu_gp.ctr value.
192 */
193 for (;;) {
194 if (wait_loops < RCU_QS_ACTIVE_ATTEMPTS)
195 wait_loops++;
196
197 cds_list_for_each_entry_safe(index, tmp, input_readers, node) {
198 switch (rcu_reader_state(&index->ctr)) {
199 case RCU_READER_ACTIVE_CURRENT:
200 if (cur_snap_readers) {
201 cds_list_move(&index->node,
202 cur_snap_readers);
203 break;
204 }
205 /* Fall-through */
206 case RCU_READER_INACTIVE:
207 cds_list_move(&index->node, qsreaders);
208 break;
209 case RCU_READER_ACTIVE_OLD:
210 /*
211 * Old snapshot. Leaving node in
212 * input_readers will make us busy-loop
213 * until the snapshot becomes current or
214 * the reader becomes inactive.
215 */
216 break;
217 }
218 }
219
220 if (cds_list_empty(input_readers)) {
221 break;
222 } else {
223 /* Temporarily unlock the registry lock. */
224 mutex_unlock(&rcu_registry_lock);
225 if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS)
226 (void) poll(NULL, 0, RCU_SLEEP_DELAY_MS);
227 else
228 caa_cpu_relax();
229 /* Re-lock the registry lock before the next loop. */
230 mutex_lock(&rcu_registry_lock);
231 }
232 }
233 }
234
235 void synchronize_rcu(void)
236 {
237 CDS_LIST_HEAD(cur_snap_readers);
238 CDS_LIST_HEAD(qsreaders);
239 sigset_t newmask, oldmask;
240 int ret;
241
242 ret = sigfillset(&newmask);
243 assert(!ret);
244 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
245 assert(!ret);
246
247 mutex_lock(&rcu_gp_lock);
248
249 mutex_lock(&rcu_registry_lock);
250
251 if (cds_list_empty(&registry))
252 goto out;
253
254 /* All threads should read qparity before accessing data structure
255 * where new ptr points to. */
256 /* Write new ptr before changing the qparity */
257 cmm_smp_mb();
258
259 /*
260 * Wait for readers to observe original parity or be quiescent.
261 * wait_for_readers() can release and grab again rcu_registry_lock
262 * interally.
263 */
264 wait_for_readers(&registry, &cur_snap_readers, &qsreaders);
265
266 /*
267 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
268 * model easier to understand. It does not have a big performance impact
269 * anyway, given this is the write-side.
270 */
271 cmm_smp_mb();
272
273 /* Switch parity: 0 -> 1, 1 -> 0 */
274 CMM_STORE_SHARED(rcu_gp.ctr, rcu_gp.ctr ^ RCU_GP_CTR_PHASE);
275
276 /*
277 * Must commit qparity update to memory before waiting for other parity
278 * quiescent state. Failure to do so could result in the writer waiting
279 * forever while new readers are always accessing data (no progress).
280 * Ensured by CMM_STORE_SHARED and CMM_LOAD_SHARED.
281 */
282
283 /*
284 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
285 * model easier to understand. It does not have a big performance impact
286 * anyway, given this is the write-side.
287 */
288 cmm_smp_mb();
289
290 /*
291 * Wait for readers to observe new parity or be quiescent.
292 * wait_for_readers() can release and grab again rcu_registry_lock
293 * interally.
294 */
295 wait_for_readers(&cur_snap_readers, NULL, &qsreaders);
296
297 /*
298 * Put quiescent reader list back into registry.
299 */
300 cds_list_splice(&qsreaders, &registry);
301
302 /*
303 * Finish waiting for reader threads before letting the old ptr being
304 * freed.
305 */
306 cmm_smp_mb();
307 out:
308 mutex_unlock(&rcu_registry_lock);
309 mutex_unlock(&rcu_gp_lock);
310 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
311 assert(!ret);
312 }
313
314 /*
315 * library wrappers to be used by non-LGPL compatible source code.
316 */
317
318 void rcu_read_lock(void)
319 {
320 _rcu_read_lock();
321 }
322
323 void rcu_read_unlock(void)
324 {
325 _rcu_read_unlock();
326 }
327
328 int rcu_read_ongoing(void)
329 {
330 return _rcu_read_ongoing();
331 }
332
333 /*
334 * Only grow for now. If empty, allocate a ARENA_INIT_ALLOC sized chunk.
335 * Else, try expanding the last chunk. If this fails, allocate a new
336 * chunk twice as big as the last chunk.
337 * Memory used by chunks _never_ moves. A chunk could theoretically be
338 * freed when all "used" slots are released, but we don't do it at this
339 * point.
340 */
341 static
342 void expand_arena(struct registry_arena *arena)
343 {
344 struct registry_chunk *new_chunk, *last_chunk;
345 size_t old_chunk_len, new_chunk_len;
346
347 /* No chunk. */
348 if (cds_list_empty(&arena->chunk_list)) {
349 assert(ARENA_INIT_ALLOC >=
350 sizeof(struct registry_chunk)
351 + sizeof(struct rcu_reader));
352 new_chunk_len = ARENA_INIT_ALLOC;
353 new_chunk = mmap(NULL, new_chunk_len,
354 PROT_READ | PROT_WRITE,
355 MAP_ANONYMOUS | MAP_PRIVATE,
356 -1, 0);
357 if (new_chunk == MAP_FAILED)
358 abort();
359 bzero(new_chunk, new_chunk_len);
360 new_chunk->data_len =
361 new_chunk_len - sizeof(struct registry_chunk);
362 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
363 return; /* We're done. */
364 }
365
366 /* Try expanding last chunk. */
367 last_chunk = cds_list_entry(arena->chunk_list.prev,
368 struct registry_chunk, node);
369 old_chunk_len =
370 last_chunk->data_len + sizeof(struct registry_chunk);
371 new_chunk_len = old_chunk_len << 1;
372
373 /* Don't allow memory mapping to move, just expand. */
374 new_chunk = mremap_wrapper(last_chunk, old_chunk_len,
375 new_chunk_len, 0);
376 if (new_chunk != MAP_FAILED) {
377 /* Should not have moved. */
378 assert(new_chunk == last_chunk);
379 bzero((char *) last_chunk + old_chunk_len,
380 new_chunk_len - old_chunk_len);
381 last_chunk->data_len =
382 new_chunk_len - sizeof(struct registry_chunk);
383 return; /* We're done. */
384 }
385
386 /* Remap did not succeed, we need to add a new chunk. */
387 new_chunk = mmap(NULL, new_chunk_len,
388 PROT_READ | PROT_WRITE,
389 MAP_ANONYMOUS | MAP_PRIVATE,
390 -1, 0);
391 if (new_chunk == MAP_FAILED)
392 abort();
393 bzero(new_chunk, new_chunk_len);
394 new_chunk->data_len =
395 new_chunk_len - sizeof(struct registry_chunk);
396 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
397 }
398
399 static
400 struct rcu_reader *arena_alloc(struct registry_arena *arena)
401 {
402 struct registry_chunk *chunk;
403 struct rcu_reader *rcu_reader_reg;
404 int expand_done = 0; /* Only allow to expand once per alloc */
405 size_t len = sizeof(struct rcu_reader);
406
407 retry:
408 cds_list_for_each_entry(chunk, &arena->chunk_list, node) {
409 if (chunk->data_len - chunk->used < len)
410 continue;
411 /* Find spot */
412 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
413 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
414 rcu_reader_reg++) {
415 if (!rcu_reader_reg->alloc) {
416 rcu_reader_reg->alloc = 1;
417 chunk->used += len;
418 return rcu_reader_reg;
419 }
420 }
421 }
422
423 if (!expand_done) {
424 expand_arena(arena);
425 expand_done = 1;
426 goto retry;
427 }
428
429 return NULL;
430 }
431
432 /* Called with signals off and mutex locked */
433 static
434 void add_thread(void)
435 {
436 struct rcu_reader *rcu_reader_reg;
437 int ret;
438
439 rcu_reader_reg = arena_alloc(&registry_arena);
440 if (!rcu_reader_reg)
441 abort();
442 ret = pthread_setspecific(urcu_bp_key, rcu_reader_reg);
443 if (ret)
444 abort();
445
446 /* Add to registry */
447 rcu_reader_reg->tid = pthread_self();
448 assert(rcu_reader_reg->ctr == 0);
449 cds_list_add(&rcu_reader_reg->node, &registry);
450 /*
451 * Reader threads are pointing to the reader registry. This is
452 * why its memory should never be relocated.
453 */
454 URCU_TLS(rcu_reader) = rcu_reader_reg;
455 }
456
457 /* Called with mutex locked */
458 static
459 void cleanup_thread(struct registry_chunk *chunk,
460 struct rcu_reader *rcu_reader_reg)
461 {
462 rcu_reader_reg->ctr = 0;
463 cds_list_del(&rcu_reader_reg->node);
464 rcu_reader_reg->tid = 0;
465 rcu_reader_reg->alloc = 0;
466 chunk->used -= sizeof(struct rcu_reader);
467 }
468
469 static
470 struct registry_chunk *find_chunk(struct rcu_reader *rcu_reader_reg)
471 {
472 struct registry_chunk *chunk;
473
474 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
475 if (rcu_reader_reg < (struct rcu_reader *) &chunk->data[0])
476 continue;
477 if (rcu_reader_reg >= (struct rcu_reader *) &chunk->data[chunk->data_len])
478 continue;
479 return chunk;
480 }
481 return NULL;
482 }
483
484 /* Called with signals off and mutex locked */
485 static
486 void remove_thread(struct rcu_reader *rcu_reader_reg)
487 {
488 cleanup_thread(find_chunk(rcu_reader_reg), rcu_reader_reg);
489 URCU_TLS(rcu_reader) = NULL;
490 }
491
492 /* Disable signals, take mutex, add to registry */
493 void rcu_bp_register(void)
494 {
495 sigset_t newmask, oldmask;
496 int ret;
497
498 ret = sigfillset(&newmask);
499 if (ret)
500 abort();
501 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
502 if (ret)
503 abort();
504
505 /*
506 * Check if a signal concurrently registered our thread since
507 * the check in rcu_read_lock().
508 */
509 if (URCU_TLS(rcu_reader))
510 goto end;
511
512 /*
513 * Take care of early registration before urcu_bp constructor.
514 */
515 rcu_bp_init();
516
517 mutex_lock(&rcu_registry_lock);
518 add_thread();
519 mutex_unlock(&rcu_registry_lock);
520 end:
521 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
522 if (ret)
523 abort();
524 }
525
526 /* Disable signals, take mutex, remove from registry */
527 static
528 void rcu_bp_unregister(struct rcu_reader *rcu_reader_reg)
529 {
530 sigset_t newmask, oldmask;
531 int ret;
532
533 ret = sigfillset(&newmask);
534 if (ret)
535 abort();
536 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
537 if (ret)
538 abort();
539
540 mutex_lock(&rcu_registry_lock);
541 remove_thread(rcu_reader_reg);
542 mutex_unlock(&rcu_registry_lock);
543 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
544 if (ret)
545 abort();
546 rcu_bp_exit();
547 }
548
549 /*
550 * Remove thread from the registry when it exits, and flag it as
551 * destroyed so garbage collection can take care of it.
552 */
553 static
554 void urcu_bp_thread_exit_notifier(void *rcu_key)
555 {
556 rcu_bp_unregister(rcu_key);
557 }
558
559 static
560 void rcu_bp_init(void)
561 {
562 mutex_lock(&init_lock);
563 if (!rcu_bp_refcount++) {
564 int ret;
565
566 ret = pthread_key_create(&urcu_bp_key,
567 urcu_bp_thread_exit_notifier);
568 if (ret)
569 abort();
570 initialized = 1;
571 }
572 mutex_unlock(&init_lock);
573 }
574
575 static
576 void rcu_bp_exit(void)
577 {
578 mutex_lock(&init_lock);
579 if (!--rcu_bp_refcount) {
580 struct registry_chunk *chunk, *tmp;
581 int ret;
582
583 cds_list_for_each_entry_safe(chunk, tmp,
584 &registry_arena.chunk_list, node) {
585 munmap(chunk, chunk->data_len
586 + sizeof(struct registry_chunk));
587 }
588 ret = pthread_key_delete(urcu_bp_key);
589 if (ret)
590 abort();
591 }
592 mutex_unlock(&init_lock);
593 }
594
595 /*
596 * Holding the rcu_gp_lock and rcu_registry_lock across fork will make
597 * sure we fork() don't race with a concurrent thread executing with
598 * any of those locks held. This ensures that the registry and data
599 * protected by rcu_gp_lock are in a coherent state in the child.
600 */
601 void rcu_bp_before_fork(void)
602 {
603 sigset_t newmask, oldmask;
604 int ret;
605
606 ret = sigfillset(&newmask);
607 assert(!ret);
608 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
609 assert(!ret);
610 mutex_lock(&rcu_gp_lock);
611 mutex_lock(&rcu_registry_lock);
612 saved_fork_signal_mask = oldmask;
613 }
614
615 void rcu_bp_after_fork_parent(void)
616 {
617 sigset_t oldmask;
618 int ret;
619
620 oldmask = saved_fork_signal_mask;
621 mutex_unlock(&rcu_registry_lock);
622 mutex_unlock(&rcu_gp_lock);
623 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
624 assert(!ret);
625 }
626
627 /*
628 * Prune all entries from registry except our own thread. Fits the Linux
629 * fork behavior. Called with rcu_gp_lock and rcu_registry_lock held.
630 */
631 static
632 void urcu_bp_prune_registry(void)
633 {
634 struct registry_chunk *chunk;
635 struct rcu_reader *rcu_reader_reg;
636
637 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
638 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
639 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
640 rcu_reader_reg++) {
641 if (!rcu_reader_reg->alloc)
642 continue;
643 if (rcu_reader_reg->tid == pthread_self())
644 continue;
645 cleanup_thread(chunk, rcu_reader_reg);
646 }
647 }
648 }
649
650 void rcu_bp_after_fork_child(void)
651 {
652 sigset_t oldmask;
653 int ret;
654
655 urcu_bp_prune_registry();
656 oldmask = saved_fork_signal_mask;
657 mutex_unlock(&rcu_registry_lock);
658 mutex_unlock(&rcu_gp_lock);
659 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
660 assert(!ret);
661 }
662
663 void *rcu_dereference_sym_bp(void *p)
664 {
665 return _rcu_dereference(p);
666 }
667
668 void *rcu_set_pointer_sym_bp(void **p, void *v)
669 {
670 cmm_wmb();
671 uatomic_set(p, v);
672 return v;
673 }
674
675 void *rcu_xchg_pointer_sym_bp(void **p, void *v)
676 {
677 cmm_wmb();
678 return uatomic_xchg(p, v);
679 }
680
681 void *rcu_cmpxchg_pointer_sym_bp(void **p, void *old, void *_new)
682 {
683 cmm_wmb();
684 return uatomic_cmpxchg(p, old, _new);
685 }
686
687 DEFINE_RCU_FLAVOR(rcu_flavor);
688
689 #include "urcu-call-rcu-impl.h"
690 #include "urcu-defer-impl.h"
This page took 0.066864 seconds and 4 git commands to generate.