Fix: urcu-bp interaction with threads vs constructors/destructors
[userspace-rcu.git] / urcu-bp.c
1 /*
2 * urcu-bp.c
3 *
4 * Userspace RCU library, "bulletproof" version.
5 *
6 * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 *
23 * IBM's contributions to this file may be relicensed under LGPLv2 or later.
24 */
25
26 #define _GNU_SOURCE
27 #define _LGPL_SOURCE
28 #include <stdio.h>
29 #include <pthread.h>
30 #include <signal.h>
31 #include <assert.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <errno.h>
35 #include <poll.h>
36 #include <unistd.h>
37 #include <sys/mman.h>
38
39 #include "urcu/wfqueue.h"
40 #include "urcu/map/urcu-bp.h"
41 #include "urcu/static/urcu-bp.h"
42 #include "urcu-pointer.h"
43 #include "urcu/tls-compat.h"
44
45 #include "urcu-die.h"
46
47 /* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */
48 #undef _LGPL_SOURCE
49 #include "urcu-bp.h"
50 #define _LGPL_SOURCE
51
52 #ifndef MAP_ANONYMOUS
53 #define MAP_ANONYMOUS MAP_ANON
54 #endif
55
56 #ifdef __linux__
57 static
58 void *mremap_wrapper(void *old_address, size_t old_size,
59 size_t new_size, int flags)
60 {
61 return mremap(old_address, old_size, new_size, flags);
62 }
63 #else
64
65 #define MREMAP_MAYMOVE 1
66 #define MREMAP_FIXED 2
67
68 /*
69 * mremap wrapper for non-Linux systems not allowing MAYMOVE.
70 * This is not generic.
71 */
72 static
73 void *mremap_wrapper(void *old_address, size_t old_size,
74 size_t new_size, int flags)
75 {
76 assert(!(flags & MREMAP_MAYMOVE));
77
78 return MAP_FAILED;
79 }
80 #endif
81
82 /* Sleep delay in us */
83 #define RCU_SLEEP_DELAY 1000
84 #define INIT_NR_THREADS 8
85 #define ARENA_INIT_ALLOC \
86 sizeof(struct registry_chunk) \
87 + INIT_NR_THREADS * sizeof(struct rcu_reader)
88
89 /*
90 * Active attempts to check for reader Q.S. before calling sleep().
91 */
92 #define RCU_QS_ACTIVE_ATTEMPTS 100
93
94 static
95 int rcu_bp_refcount;
96
97 static
98 void __attribute__((constructor)) rcu_bp_init(void);
99 static
100 void __attribute__((destructor)) rcu_bp_exit(void);
101
102 static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
103
104 static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
105 static int initialized;
106
107 static pthread_key_t urcu_bp_key;
108
109 #ifdef DEBUG_YIELD
110 unsigned int yield_active;
111 __DEFINE_URCU_TLS_GLOBAL(unsigned int, rand_yield);
112 #endif
113
114 /*
115 * Global grace period counter.
116 * Contains the current RCU_GP_CTR_PHASE.
117 * Also has a RCU_GP_COUNT of 1, to accelerate the reader fast path.
118 * Written to only by writer with mutex taken. Read by both writer and readers.
119 */
120 long rcu_gp_ctr = RCU_GP_COUNT;
121
122 /*
123 * Pointer to registry elements. Written to only by each individual reader. Read
124 * by both the reader and the writers.
125 */
126 __DEFINE_URCU_TLS_GLOBAL(struct rcu_reader *, rcu_reader);
127
128 static CDS_LIST_HEAD(registry);
129
130 struct registry_chunk {
131 size_t data_len; /* data length */
132 size_t used; /* amount of data used */
133 struct cds_list_head node; /* chunk_list node */
134 char data[];
135 };
136
137 struct registry_arena {
138 struct cds_list_head chunk_list;
139 };
140
141 static struct registry_arena registry_arena = {
142 .chunk_list = CDS_LIST_HEAD_INIT(registry_arena.chunk_list),
143 };
144
145 /* Saved fork signal mask, protected by rcu_gp_lock */
146 static sigset_t saved_fork_signal_mask;
147
148 static void mutex_lock(pthread_mutex_t *mutex)
149 {
150 int ret;
151
152 #ifndef DISTRUST_SIGNALS_EXTREME
153 ret = pthread_mutex_lock(mutex);
154 if (ret)
155 urcu_die(ret);
156 #else /* #ifndef DISTRUST_SIGNALS_EXTREME */
157 while ((ret = pthread_mutex_trylock(mutex)) != 0) {
158 if (ret != EBUSY && ret != EINTR)
159 urcu_die(ret);
160 poll(NULL,0,10);
161 }
162 #endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */
163 }
164
165 static void mutex_unlock(pthread_mutex_t *mutex)
166 {
167 int ret;
168
169 ret = pthread_mutex_unlock(mutex);
170 if (ret)
171 urcu_die(ret);
172 }
173
174 void update_counter_and_wait(void)
175 {
176 CDS_LIST_HEAD(qsreaders);
177 int wait_loops = 0;
178 struct rcu_reader *index, *tmp;
179
180 /* Switch parity: 0 -> 1, 1 -> 0 */
181 CMM_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR_PHASE);
182
183 /*
184 * Must commit qparity update to memory before waiting for other parity
185 * quiescent state. Failure to do so could result in the writer waiting
186 * forever while new readers are always accessing data (no progress).
187 * Ensured by CMM_STORE_SHARED and CMM_LOAD_SHARED.
188 */
189
190 /*
191 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
192 * model easier to understand. It does not have a big performance impact
193 * anyway, given this is the write-side.
194 */
195 cmm_smp_mb();
196
197 /*
198 * Wait for each thread rcu_reader.ctr count to become 0.
199 */
200 for (;;) {
201 wait_loops++;
202 cds_list_for_each_entry_safe(index, tmp, &registry, node) {
203 if (!rcu_old_gp_ongoing(&index->ctr))
204 cds_list_move(&index->node, &qsreaders);
205 }
206
207 if (cds_list_empty(&registry)) {
208 break;
209 } else {
210 if (wait_loops == RCU_QS_ACTIVE_ATTEMPTS)
211 usleep(RCU_SLEEP_DELAY);
212 else
213 caa_cpu_relax();
214 }
215 }
216 /* put back the reader list in the registry */
217 cds_list_splice(&qsreaders, &registry);
218 }
219
220 void synchronize_rcu(void)
221 {
222 sigset_t newmask, oldmask;
223 int ret;
224
225 ret = sigfillset(&newmask);
226 assert(!ret);
227 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
228 assert(!ret);
229
230 mutex_lock(&rcu_gp_lock);
231
232 if (cds_list_empty(&registry))
233 goto out;
234
235 /* All threads should read qparity before accessing data structure
236 * where new ptr points to. */
237 /* Write new ptr before changing the qparity */
238 cmm_smp_mb();
239
240 /*
241 * Wait for previous parity to be empty of readers.
242 */
243 update_counter_and_wait(); /* 0 -> 1, wait readers in parity 0 */
244
245 /*
246 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
247 * model easier to understand. It does not have a big performance impact
248 * anyway, given this is the write-side.
249 */
250 cmm_smp_mb();
251
252 /*
253 * Wait for previous parity to be empty of readers.
254 */
255 update_counter_and_wait(); /* 1 -> 0, wait readers in parity 1 */
256
257 /*
258 * Finish waiting for reader threads before letting the old ptr being
259 * freed.
260 */
261 cmm_smp_mb();
262 out:
263 mutex_unlock(&rcu_gp_lock);
264 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
265 assert(!ret);
266 }
267
268 /*
269 * library wrappers to be used by non-LGPL compatible source code.
270 */
271
272 void rcu_read_lock(void)
273 {
274 _rcu_read_lock();
275 }
276
277 void rcu_read_unlock(void)
278 {
279 _rcu_read_unlock();
280 }
281
282 /*
283 * Only grow for now. If empty, allocate a ARENA_INIT_ALLOC sized chunk.
284 * Else, try expanding the last chunk. If this fails, allocate a new
285 * chunk twice as big as the last chunk.
286 * Memory used by chunks _never_ moves. A chunk could theoretically be
287 * freed when all "used" slots are released, but we don't do it at this
288 * point.
289 */
290 static
291 void expand_arena(struct registry_arena *arena)
292 {
293 struct registry_chunk *new_chunk, *last_chunk;
294 size_t old_chunk_len, new_chunk_len;
295
296 /* No chunk. */
297 if (cds_list_empty(&arena->chunk_list)) {
298 assert(ARENA_INIT_ALLOC >=
299 sizeof(struct registry_chunk)
300 + sizeof(struct rcu_reader));
301 new_chunk_len = ARENA_INIT_ALLOC;
302 new_chunk = mmap(NULL, new_chunk_len,
303 PROT_READ | PROT_WRITE,
304 MAP_ANONYMOUS | MAP_PRIVATE,
305 -1, 0);
306 if (new_chunk == MAP_FAILED)
307 abort();
308 bzero(new_chunk, new_chunk_len);
309 new_chunk->data_len =
310 new_chunk_len - sizeof(struct registry_chunk);
311 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
312 return; /* We're done. */
313 }
314
315 /* Try expanding last chunk. */
316 last_chunk = cds_list_entry(arena->chunk_list.prev,
317 struct registry_chunk, node);
318 old_chunk_len =
319 last_chunk->data_len + sizeof(struct registry_chunk);
320 new_chunk_len = old_chunk_len << 1;
321
322 /* Don't allow memory mapping to move, just expand. */
323 new_chunk = mremap_wrapper(last_chunk, old_chunk_len,
324 new_chunk_len, 0);
325 if (new_chunk != MAP_FAILED) {
326 /* Should not have moved. */
327 assert(new_chunk == last_chunk);
328 bzero((char *) last_chunk + old_chunk_len,
329 new_chunk_len - old_chunk_len);
330 last_chunk->data_len =
331 new_chunk_len - sizeof(struct registry_chunk);
332 return; /* We're done. */
333 }
334
335 /* Remap did not succeed, we need to add a new chunk. */
336 new_chunk = mmap(NULL, new_chunk_len,
337 PROT_READ | PROT_WRITE,
338 MAP_ANONYMOUS | MAP_PRIVATE,
339 -1, 0);
340 if (new_chunk == MAP_FAILED)
341 abort();
342 bzero(new_chunk, new_chunk_len);
343 new_chunk->data_len =
344 new_chunk_len - sizeof(struct registry_chunk);
345 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
346 }
347
348 static
349 struct rcu_reader *arena_alloc(struct registry_arena *arena)
350 {
351 struct registry_chunk *chunk;
352 struct rcu_reader *rcu_reader_reg;
353 int expand_done = 0; /* Only allow to expand once per alloc */
354 size_t len = sizeof(struct rcu_reader);
355
356 retry:
357 cds_list_for_each_entry(chunk, &arena->chunk_list, node) {
358 if (chunk->data_len - chunk->used < len)
359 continue;
360 /* Find spot */
361 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
362 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
363 rcu_reader_reg++) {
364 if (!rcu_reader_reg->alloc) {
365 rcu_reader_reg->alloc = 1;
366 chunk->used += len;
367 return rcu_reader_reg;
368 }
369 }
370 }
371
372 if (!expand_done) {
373 expand_arena(arena);
374 expand_done = 1;
375 goto retry;
376 }
377
378 return NULL;
379 }
380
381 /* Called with signals off and mutex locked */
382 static
383 void add_thread(void)
384 {
385 struct rcu_reader *rcu_reader_reg;
386 int ret;
387
388 rcu_reader_reg = arena_alloc(&registry_arena);
389 if (!rcu_reader_reg)
390 abort();
391 ret = pthread_setspecific(urcu_bp_key, rcu_reader_reg);
392 if (ret)
393 abort();
394
395 /* Add to registry */
396 rcu_reader_reg->tid = pthread_self();
397 assert(rcu_reader_reg->ctr == 0);
398 cds_list_add(&rcu_reader_reg->node, &registry);
399 /*
400 * Reader threads are pointing to the reader registry. This is
401 * why its memory should never be relocated.
402 */
403 URCU_TLS(rcu_reader) = rcu_reader_reg;
404 }
405
406 /* Called with mutex locked */
407 static
408 void cleanup_thread(struct registry_chunk *chunk,
409 struct rcu_reader *rcu_reader_reg)
410 {
411 rcu_reader_reg->ctr = 0;
412 cds_list_del(&rcu_reader_reg->node);
413 rcu_reader_reg->tid = 0;
414 rcu_reader_reg->alloc = 0;
415 chunk->used -= sizeof(struct rcu_reader);
416 }
417
418 static
419 struct registry_chunk *find_chunk(struct rcu_reader *rcu_reader_reg)
420 {
421 struct registry_chunk *chunk;
422
423 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
424 if (rcu_reader_reg < (struct rcu_reader *) &chunk->data[0])
425 continue;
426 if (rcu_reader_reg >= (struct rcu_reader *) &chunk->data[chunk->data_len])
427 continue;
428 return chunk;
429 }
430 return NULL;
431 }
432
433 /* Called with signals off and mutex locked */
434 static
435 void remove_thread(struct rcu_reader *rcu_reader_reg)
436 {
437 cleanup_thread(find_chunk(rcu_reader_reg), rcu_reader_reg);
438 URCU_TLS(rcu_reader) = NULL;
439 }
440
441 /* Disable signals, take mutex, add to registry */
442 void rcu_bp_register(void)
443 {
444 sigset_t newmask, oldmask;
445 int ret;
446
447 ret = sigfillset(&newmask);
448 if (ret)
449 abort();
450 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
451 if (ret)
452 abort();
453
454 /*
455 * Check if a signal concurrently registered our thread since
456 * the check in rcu_read_lock().
457 */
458 if (URCU_TLS(rcu_reader))
459 goto end;
460
461 /*
462 * Take care of early registration before urcu_bp constructor.
463 */
464 rcu_bp_init();
465
466 mutex_lock(&rcu_gp_lock);
467 add_thread();
468 mutex_unlock(&rcu_gp_lock);
469 end:
470 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
471 if (ret)
472 abort();
473 }
474
475 /* Disable signals, take mutex, remove from registry */
476 static
477 void rcu_bp_unregister(struct rcu_reader *rcu_reader_reg)
478 {
479 sigset_t newmask, oldmask;
480 int ret;
481
482 ret = sigfillset(&newmask);
483 if (ret)
484 abort();
485 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
486 if (ret)
487 abort();
488
489 mutex_lock(&rcu_gp_lock);
490 remove_thread(rcu_reader_reg);
491 mutex_unlock(&rcu_gp_lock);
492 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
493 if (ret)
494 abort();
495 rcu_bp_exit();
496 }
497
498 /*
499 * Remove thread from the registry when it exits, and flag it as
500 * destroyed so garbage collection can take care of it.
501 */
502 static
503 void urcu_bp_thread_exit_notifier(void *rcu_key)
504 {
505 rcu_bp_unregister(rcu_key);
506 }
507
508 static
509 void rcu_bp_init(void)
510 {
511 mutex_lock(&init_lock);
512 if (!rcu_bp_refcount++) {
513 int ret;
514
515 ret = pthread_key_create(&urcu_bp_key,
516 urcu_bp_thread_exit_notifier);
517 if (ret)
518 abort();
519 initialized = 1;
520 }
521 mutex_unlock(&init_lock);
522 }
523
524 static
525 void rcu_bp_exit(void)
526 {
527 mutex_lock(&init_lock);
528 if (!--rcu_bp_refcount) {
529 struct registry_chunk *chunk, *tmp;
530 int ret;
531
532 cds_list_for_each_entry_safe(chunk, tmp,
533 &registry_arena.chunk_list, node) {
534 munmap(chunk, chunk->data_len
535 + sizeof(struct registry_chunk));
536 }
537 ret = pthread_key_delete(urcu_bp_key);
538 if (ret)
539 abort();
540 }
541 mutex_unlock(&init_lock);
542 }
543
544 /*
545 * Holding the rcu_gp_lock across fork will make sure we fork() don't race with
546 * a concurrent thread executing with this same lock held. This ensures that the
547 * registry is in a coherent state in the child.
548 */
549 void rcu_bp_before_fork(void)
550 {
551 sigset_t newmask, oldmask;
552 int ret;
553
554 ret = sigfillset(&newmask);
555 assert(!ret);
556 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
557 assert(!ret);
558 mutex_lock(&rcu_gp_lock);
559 saved_fork_signal_mask = oldmask;
560 }
561
562 void rcu_bp_after_fork_parent(void)
563 {
564 sigset_t oldmask;
565 int ret;
566
567 oldmask = saved_fork_signal_mask;
568 mutex_unlock(&rcu_gp_lock);
569 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
570 assert(!ret);
571 }
572
573 /*
574 * Prune all entries from registry except our own thread. Fits the Linux
575 * fork behavior. Called with rcu_gp_lock held.
576 */
577 static
578 void urcu_bp_prune_registry(void)
579 {
580 struct registry_chunk *chunk;
581 struct rcu_reader *rcu_reader_reg;
582
583 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
584 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
585 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
586 rcu_reader_reg++) {
587 if (!rcu_reader_reg->alloc)
588 continue;
589 if (rcu_reader_reg->tid == pthread_self())
590 continue;
591 cleanup_thread(chunk, rcu_reader_reg);
592 }
593 }
594 }
595
596 void rcu_bp_after_fork_child(void)
597 {
598 sigset_t oldmask;
599 int ret;
600
601 urcu_bp_prune_registry();
602 oldmask = saved_fork_signal_mask;
603 mutex_unlock(&rcu_gp_lock);
604 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
605 assert(!ret);
606 }
607
608 void *rcu_dereference_sym_bp(void *p)
609 {
610 return _rcu_dereference(p);
611 }
612
613 void *rcu_set_pointer_sym_bp(void **p, void *v)
614 {
615 cmm_wmb();
616 uatomic_set(p, v);
617 return v;
618 }
619
620 void *rcu_xchg_pointer_sym_bp(void **p, void *v)
621 {
622 cmm_wmb();
623 return uatomic_xchg(p, v);
624 }
625
626 void *rcu_cmpxchg_pointer_sym_bp(void **p, void *old, void *_new)
627 {
628 cmm_wmb();
629 return uatomic_cmpxchg(p, old, _new);
630 }
631
632 DEFINE_RCU_FLAVOR(rcu_flavor);
633
634 #include "urcu-call-rcu-impl.h"
635 #include "urcu-defer-impl.h"
This page took 0.041986 seconds and 4 git commands to generate.