Fix urcu-bp: don't move registry
[userspace-rcu.git] / urcu-bp.c
CommitLineData
fdee2e6d
MD
1/*
2 * urcu-bp.c
3 *
4 * Userspace RCU library, "bulletproof" version.
5 *
6982d6d7 6 * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
fdee2e6d
MD
7 * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 *
23 * IBM's contributions to this file may be relicensed under LGPLv2 or later.
24 */
25
0617bf4c 26#define _GNU_SOURCE
71c811bf 27#define _LGPL_SOURCE
fdee2e6d
MD
28#include <stdio.h>
29#include <pthread.h>
30#include <signal.h>
31#include <assert.h>
32#include <stdlib.h>
33#include <string.h>
34#include <errno.h>
35#include <poll.h>
36#include <unistd.h>
37#include <sys/mman.h>
38
71c811bf 39#include "urcu/wfqueue.h"
57760d44 40#include "urcu/map/urcu-bp.h"
af7c2dbe 41#include "urcu/static/urcu-bp.h"
618b2595 42#include "urcu-pointer.h"
bd252a04 43#include "urcu/tls-compat.h"
71c811bf 44
4a6d7378
MD
45#include "urcu-die.h"
46
fdee2e6d 47/* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */
71c811bf 48#undef _LGPL_SOURCE
fdee2e6d 49#include "urcu-bp.h"
71c811bf 50#define _LGPL_SOURCE
fdee2e6d 51
4c1ae2ea
MD
52#ifndef MAP_ANONYMOUS
53#define MAP_ANONYMOUS MAP_ANON
54#endif
55
c7eaf61c
MD
56#ifdef __linux__
57static
58void *mremap_wrapper(void *old_address, size_t old_size,
59 size_t new_size, int flags)
60{
61 return mremap(old_address, old_size, new_size, flags);
62}
63#else
45a4872f
MD
64
65#define MREMAP_MAYMOVE 1
66#define MREMAP_FIXED 2
67
68/*
89451e1b 69 * mremap wrapper for non-Linux systems not allowing MAYMOVE.
45a4872f
MD
70 * This is not generic.
71*/
c7eaf61c
MD
72static
73void *mremap_wrapper(void *old_address, size_t old_size,
74 size_t new_size, int flags)
45a4872f 75{
89451e1b
MD
76 assert(!(flags & MREMAP_MAYMOVE));
77
78 return MAP_FAILED;
45a4872f
MD
79}
80#endif
81
fdee2e6d
MD
82/* Sleep delay in us */
83#define RCU_SLEEP_DELAY 1000
89451e1b
MD
84#define INIT_NR_THREADS 8
85#define ARENA_INIT_ALLOC \
86 sizeof(struct registry_chunk) \
87 + INIT_NR_THREADS * sizeof(struct rcu_reader)
fdee2e6d 88
b7b6a8f5
PB
89/*
90 * Active attempts to check for reader Q.S. before calling sleep().
91 */
92#define RCU_QS_ACTIVE_ATTEMPTS 100
93
02be5561 94void __attribute__((destructor)) rcu_bp_exit(void);
fdee2e6d 95
6abb4bd5 96static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
fdee2e6d
MD
97
98#ifdef DEBUG_YIELD
99unsigned int yield_active;
bd252a04 100DEFINE_URCU_TLS(unsigned int, rand_yield);
fdee2e6d
MD
101#endif
102
103/*
104 * Global grace period counter.
02be5561 105 * Contains the current RCU_GP_CTR_PHASE.
fdee2e6d
MD
106 * Also has a RCU_GP_COUNT of 1, to accelerate the reader fast path.
107 * Written to only by writer with mutex taken. Read by both writer and readers.
108 */
02be5561 109long rcu_gp_ctr = RCU_GP_COUNT;
fdee2e6d
MD
110
111/*
112 * Pointer to registry elements. Written to only by each individual reader. Read
113 * by both the reader and the writers.
114 */
bd252a04 115DEFINE_URCU_TLS(struct rcu_reader *, rcu_reader);
fdee2e6d 116
16aa9ee8 117static CDS_LIST_HEAD(registry);
fdee2e6d 118
89451e1b
MD
119struct registry_chunk {
120 size_t data_len; /* data length */
121 size_t used; /* data used */
122 struct cds_list_head node; /* chunk_list node */
123 char data[];
124};
125
fdee2e6d 126struct registry_arena {
89451e1b 127 struct cds_list_head chunk_list;
fdee2e6d
MD
128};
129
89451e1b
MD
130static struct registry_arena registry_arena = {
131 .chunk_list = CDS_LIST_HEAD_INIT(registry_arena.chunk_list),
132};
fdee2e6d 133
4cf1675f
MD
134/* Saved fork signal mask, protected by rcu_gp_lock */
135static sigset_t saved_fork_signal_mask;
136
fdee2e6d
MD
137static void rcu_gc_registry(void);
138
6abb4bd5 139static void mutex_lock(pthread_mutex_t *mutex)
fdee2e6d
MD
140{
141 int ret;
142
143#ifndef DISTRUST_SIGNALS_EXTREME
6abb4bd5 144 ret = pthread_mutex_lock(mutex);
4a6d7378
MD
145 if (ret)
146 urcu_die(ret);
fdee2e6d 147#else /* #ifndef DISTRUST_SIGNALS_EXTREME */
6abb4bd5 148 while ((ret = pthread_mutex_trylock(mutex)) != 0) {
4a6d7378
MD
149 if (ret != EBUSY && ret != EINTR)
150 urcu_die(ret);
fdee2e6d
MD
151 poll(NULL,0,10);
152 }
153#endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */
154}
155
6abb4bd5 156static void mutex_unlock(pthread_mutex_t *mutex)
fdee2e6d
MD
157{
158 int ret;
159
6abb4bd5 160 ret = pthread_mutex_unlock(mutex);
4a6d7378
MD
161 if (ret)
162 urcu_die(ret);
fdee2e6d
MD
163}
164
2dfb8b5e 165void update_counter_and_wait(void)
fdee2e6d 166{
16aa9ee8 167 CDS_LIST_HEAD(qsreaders);
fdee2e6d 168 int wait_loops = 0;
02be5561 169 struct rcu_reader *index, *tmp;
fdee2e6d 170
32c15e4e 171 /* Switch parity: 0 -> 1, 1 -> 0 */
6cf3827c 172 CMM_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR_PHASE);
2dfb8b5e
MD
173
174 /*
175 * Must commit qparity update to memory before waiting for other parity
176 * quiescent state. Failure to do so could result in the writer waiting
177 * forever while new readers are always accessing data (no progress).
6cf3827c 178 * Ensured by CMM_STORE_SHARED and CMM_LOAD_SHARED.
2dfb8b5e
MD
179 */
180
181 /*
5481ddb3 182 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
2dfb8b5e
MD
183 * model easier to understand. It does not have a big performance impact
184 * anyway, given this is the write-side.
185 */
5481ddb3 186 cmm_smp_mb();
2dfb8b5e 187
fdee2e6d 188 /*
02be5561 189 * Wait for each thread rcu_reader.ctr count to become 0.
fdee2e6d
MD
190 */
191 for (;;) {
192 wait_loops++;
16aa9ee8 193 cds_list_for_each_entry_safe(index, tmp, &registry, node) {
fdee2e6d 194 if (!rcu_old_gp_ongoing(&index->ctr))
16aa9ee8 195 cds_list_move(&index->node, &qsreaders);
fdee2e6d
MD
196 }
197
16aa9ee8 198 if (cds_list_empty(&registry)) {
fdee2e6d
MD
199 break;
200 } else {
201 if (wait_loops == RCU_QS_ACTIVE_ATTEMPTS)
202 usleep(RCU_SLEEP_DELAY);
203 else
06f22bdb 204 caa_cpu_relax();
fdee2e6d
MD
205 }
206 }
207 /* put back the reader list in the registry */
16aa9ee8 208 cds_list_splice(&qsreaders, &registry);
fdee2e6d
MD
209}
210
211void synchronize_rcu(void)
212{
213 sigset_t newmask, oldmask;
214 int ret;
215
264716f7 216 ret = sigfillset(&newmask);
fdee2e6d 217 assert(!ret);
264716f7 218 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
fdee2e6d
MD
219 assert(!ret);
220
6abb4bd5 221 mutex_lock(&rcu_gp_lock);
fdee2e6d 222
16aa9ee8 223 if (cds_list_empty(&registry))
2dfb8b5e 224 goto out;
fdee2e6d
MD
225
226 /* All threads should read qparity before accessing data structure
2dfb8b5e 227 * where new ptr points to. */
fdee2e6d 228 /* Write new ptr before changing the qparity */
5481ddb3 229 cmm_smp_mb();
fdee2e6d 230
2dfb8b5e
MD
231 /* Remove old registry elements */
232 rcu_gc_registry();
fdee2e6d
MD
233
234 /*
235 * Wait for previous parity to be empty of readers.
236 */
2dfb8b5e 237 update_counter_and_wait(); /* 0 -> 1, wait readers in parity 0 */
fdee2e6d
MD
238
239 /*
5481ddb3 240 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
fdee2e6d
MD
241 * model easier to understand. It does not have a big performance impact
242 * anyway, given this is the write-side.
243 */
5481ddb3 244 cmm_smp_mb();
fdee2e6d 245
fdee2e6d 246 /*
2dfb8b5e 247 * Wait for previous parity to be empty of readers.
fdee2e6d 248 */
2dfb8b5e 249 update_counter_and_wait(); /* 1 -> 0, wait readers in parity 1 */
fdee2e6d
MD
250
251 /*
2dfb8b5e
MD
252 * Finish waiting for reader threads before letting the old ptr being
253 * freed.
fdee2e6d 254 */
5481ddb3 255 cmm_smp_mb();
2dfb8b5e 256out:
6abb4bd5 257 mutex_unlock(&rcu_gp_lock);
fdee2e6d
MD
258 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
259 assert(!ret);
260}
261
262/*
263 * library wrappers to be used by non-LGPL compatible source code.
264 */
265
266void rcu_read_lock(void)
267{
268 _rcu_read_lock();
269}
270
271void rcu_read_unlock(void)
272{
273 _rcu_read_unlock();
274}
275
276/*
89451e1b
MD
277 * Only grow for now. If empty, allocate a ARENA_INIT_ALLOC sized chunk.
278 * Else, try expanding the last chunk. If this fails, allocate a new
279 * chunk twice as big as the last chunk.
280 * Memory used by chunks _never_ moves. A chunk could theoretically be
281 * freed when all "used" slots are released, but we don't do it at this
282 * point.
fdee2e6d 283 */
89451e1b
MD
284static
285void expand_arena(struct registry_arena *arena)
fdee2e6d 286{
89451e1b
MD
287 struct registry_chunk *new_chunk, *last_chunk;
288 size_t old_chunk_len, new_chunk_len;
289
290 /* No chunk. */
291 if (cds_list_empty(&arena->chunk_list)) {
292 assert(ARENA_INIT_ALLOC >=
293 sizeof(struct registry_chunk)
294 + sizeof(struct rcu_reader));
295 new_chunk_len = ARENA_INIT_ALLOC;
296 new_chunk = mmap(NULL, new_chunk_len,
0bcbf365
MD
297 PROT_READ | PROT_WRITE,
298 MAP_ANONYMOUS | MAP_PRIVATE,
299 -1, 0);
89451e1b
MD
300 if (new_chunk == MAP_FAILED)
301 abort();
302 bzero(new_chunk, new_chunk_len);
303 new_chunk->data_len =
304 new_chunk_len - sizeof(struct registry_chunk);
305 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
306 return; /* We're done. */
307 }
0bcbf365 308
89451e1b
MD
309 /* Try expanding last chunk. */
310 last_chunk = cds_list_entry(arena->chunk_list.prev,
311 struct registry_chunk, node);
312 old_chunk_len =
313 last_chunk->data_len + sizeof(struct registry_chunk);
314 new_chunk_len = old_chunk_len << 1;
315
316 /* Don't allow memory mapping to move, just expand. */
317 new_chunk = mremap_wrapper(last_chunk, old_chunk_len,
318 new_chunk_len, 0);
319 if (new_chunk != MAP_FAILED) {
320 /* Should not have moved. */
321 assert(new_chunk == last_chunk);
322 bzero((char *) last_chunk + old_chunk_len,
323 new_chunk_len - old_chunk_len);
324 last_chunk->data_len =
325 new_chunk_len - sizeof(struct registry_chunk);
326 return; /* We're done. */
327 }
0617bf4c 328
89451e1b
MD
329 /* Remap did not succeed, we need to add a new chunk. */
330 new_chunk = mmap(NULL, new_chunk_len,
331 PROT_READ | PROT_WRITE,
332 MAP_ANONYMOUS | MAP_PRIVATE,
333 -1, 0);
334 if (new_chunk == MAP_FAILED)
335 abort();
336 bzero(new_chunk, new_chunk_len);
337 new_chunk->data_len =
338 new_chunk_len - sizeof(struct registry_chunk);
339 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
340}
341
342static
343struct rcu_reader *arena_alloc(struct registry_arena *arena)
344{
345 struct registry_chunk *chunk;
346 struct rcu_reader *rcu_reader_reg;
347 int expand_done = 0; /* Only allow to expand once per alloc */
348 size_t len = sizeof(struct rcu_reader);
fdee2e6d 349
89451e1b
MD
350retry:
351 cds_list_for_each_entry(chunk, &arena->chunk_list, node) {
352 if (chunk->data_len - chunk->used < len)
353 continue;
354 /* Find spot */
355 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
356 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
357 rcu_reader_reg++) {
358 if (!rcu_reader_reg->alloc) {
359 rcu_reader_reg->alloc = 1;
360 chunk->used += len;
361 return rcu_reader_reg;
362 }
363 }
364 }
365
366 if (!expand_done) {
367 expand_arena(arena);
368 expand_done = 1;
369 goto retry;
370 }
371
372 return NULL;
fdee2e6d
MD
373}
374
375/* Called with signals off and mutex locked */
89451e1b
MD
376static
377void add_thread(void)
fdee2e6d 378{
02be5561 379 struct rcu_reader *rcu_reader_reg;
fdee2e6d 380
89451e1b
MD
381 rcu_reader_reg = arena_alloc(&registry_arena);
382 if (!rcu_reader_reg)
383 abort();
fdee2e6d
MD
384
385 /* Add to registry */
02be5561
MD
386 rcu_reader_reg->tid = pthread_self();
387 assert(rcu_reader_reg->ctr == 0);
16aa9ee8 388 cds_list_add(&rcu_reader_reg->node, &registry);
89451e1b
MD
389 /*
390 * Reader threads are pointing to the reader registry. This is
391 * why its memory should never be relocated.
392 */
bd252a04 393 URCU_TLS(rcu_reader) = rcu_reader_reg;
fdee2e6d
MD
394}
395
396/* Called with signals off and mutex locked */
397static void rcu_gc_registry(void)
398{
89451e1b 399 struct registry_chunk *chunk;
02be5561 400 struct rcu_reader *rcu_reader_reg;
fdee2e6d 401
89451e1b
MD
402 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
403 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
404 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
405 rcu_reader_reg++) {
406 pthread_t tid;
407 int ret;
408
409 if (!rcu_reader_reg->alloc)
410 continue;
411 tid = rcu_reader_reg->tid;
412 ret = pthread_kill(tid, 0);
413 assert(ret != EINVAL);
414 if (ret == ESRCH) {
415 cds_list_del(&rcu_reader_reg->node);
416 rcu_reader_reg->ctr = 0;
417 rcu_reader_reg->alloc = 0;
418 chunk->used -= sizeof(struct rcu_reader);
419 }
420
fdee2e6d
MD
421 }
422 }
423}
424
425/* Disable signals, take mutex, add to registry */
426void rcu_bp_register(void)
427{
428 sigset_t newmask, oldmask;
429 int ret;
430
264716f7 431 ret = sigfillset(&newmask);
fdee2e6d 432 assert(!ret);
264716f7 433 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
fdee2e6d
MD
434 assert(!ret);
435
436 /*
437 * Check if a signal concurrently registered our thread since
438 * the check in rcu_read_lock(). */
bd252a04 439 if (URCU_TLS(rcu_reader))
fdee2e6d
MD
440 goto end;
441
6abb4bd5 442 mutex_lock(&rcu_gp_lock);
fdee2e6d 443 add_thread();
6abb4bd5 444 mutex_unlock(&rcu_gp_lock);
fdee2e6d
MD
445end:
446 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
447 assert(!ret);
448}
449
9380711a 450void rcu_bp_exit(void)
fdee2e6d 451{
89451e1b
MD
452 struct registry_chunk *chunk, *tmp;
453
454 cds_list_for_each_entry_safe(chunk, tmp,
455 &registry_arena.chunk_list, node) {
456 munmap(chunk, chunk->data_len + sizeof(struct registry_chunk));
457 }
fdee2e6d 458}
4cf1675f
MD
459
460/*
461 * Holding the rcu_gp_lock across fork will make sure we fork() don't race with
462 * a concurrent thread executing with this same lock held. This ensures that the
463 * registry is in a coherent state in the child.
464 */
465void rcu_bp_before_fork(void)
466{
467 sigset_t newmask, oldmask;
468 int ret;
469
264716f7 470 ret = sigfillset(&newmask);
4cf1675f 471 assert(!ret);
264716f7 472 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
4cf1675f
MD
473 assert(!ret);
474 mutex_lock(&rcu_gp_lock);
475 saved_fork_signal_mask = oldmask;
476}
477
478void rcu_bp_after_fork_parent(void)
479{
480 sigset_t oldmask;
481 int ret;
482
483 oldmask = saved_fork_signal_mask;
484 mutex_unlock(&rcu_gp_lock);
485 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
486 assert(!ret);
487}
488
489void rcu_bp_after_fork_child(void)
490{
491 sigset_t oldmask;
492 int ret;
493
494 rcu_gc_registry();
495 oldmask = saved_fork_signal_mask;
496 mutex_unlock(&rcu_gp_lock);
497 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
498 assert(!ret);
499}
5e77fc1f 500
9b7981bb
MD
501void *rcu_dereference_sym_bp(void *p)
502{
503 return _rcu_dereference(p);
504}
505
5efd3cd2
MD
506void *rcu_set_pointer_sym_bp(void **p, void *v)
507{
508 cmm_wmb();
424d4ed5
MD
509 uatomic_set(p, v);
510 return v;
5efd3cd2
MD
511}
512
513void *rcu_xchg_pointer_sym_bp(void **p, void *v)
514{
515 cmm_wmb();
516 return uatomic_xchg(p, v);
517}
518
519void *rcu_cmpxchg_pointer_sym_bp(void **p, void *old, void *_new)
520{
521 cmm_wmb();
522 return uatomic_cmpxchg(p, old, _new);
523}
524
5e6b23a6 525DEFINE_RCU_FLAVOR(rcu_flavor);
541d828d 526
5e77fc1f 527#include "urcu-call-rcu-impl.h"
0376e7b2 528#include "urcu-defer-impl.h"
This page took 0.052279 seconds and 4 git commands to generate.