Fix urcu-bp: don't move registry
[userspace-rcu.git] / urcu-bp.c
1 /*
2 * urcu-bp.c
3 *
4 * Userspace RCU library, "bulletproof" version.
5 *
6 * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 *
23 * IBM's contributions to this file may be relicensed under LGPLv2 or later.
24 */
25
26 #define _GNU_SOURCE
27 #define _LGPL_SOURCE
28 #include <stdio.h>
29 #include <pthread.h>
30 #include <signal.h>
31 #include <assert.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <errno.h>
35 #include <poll.h>
36 #include <unistd.h>
37 #include <sys/mman.h>
38
39 #include "urcu/wfqueue.h"
40 #include "urcu/map/urcu-bp.h"
41 #include "urcu/static/urcu-bp.h"
42 #include "urcu-pointer.h"
43 #include "urcu/tls-compat.h"
44
45 #include "urcu-die.h"
46
47 /* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */
48 #undef _LGPL_SOURCE
49 #include "urcu-bp.h"
50 #define _LGPL_SOURCE
51
52 #ifndef MAP_ANONYMOUS
53 #define MAP_ANONYMOUS MAP_ANON
54 #endif
55
56 #ifdef __linux__
57 static
58 void *mremap_wrapper(void *old_address, size_t old_size,
59 size_t new_size, int flags)
60 {
61 return mremap(old_address, old_size, new_size, flags);
62 }
63 #else
64
65 #define MREMAP_MAYMOVE 1
66 #define MREMAP_FIXED 2
67
68 /*
69 * mremap wrapper for non-Linux systems not allowing MAYMOVE.
70 * This is not generic.
71 */
72 static
73 void *mremap_wrapper(void *old_address, size_t old_size,
74 size_t new_size, int flags)
75 {
76 assert(!(flags & MREMAP_MAYMOVE));
77
78 return MAP_FAILED;
79 }
80 #endif
81
82 /* Sleep delay in us */
83 #define RCU_SLEEP_DELAY 1000
84 #define INIT_NR_THREADS 8
85 #define ARENA_INIT_ALLOC \
86 sizeof(struct registry_chunk) \
87 + INIT_NR_THREADS * sizeof(struct rcu_reader)
88
89 /*
90 * Active attempts to check for reader Q.S. before calling sleep().
91 */
92 #define RCU_QS_ACTIVE_ATTEMPTS 100
93
94 void __attribute__((destructor)) rcu_bp_exit(void);
95
96 static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
97
98 #ifdef DEBUG_YIELD
99 unsigned int yield_active;
100 DEFINE_URCU_TLS(unsigned int, rand_yield);
101 #endif
102
103 /*
104 * Global grace period counter.
105 * Contains the current RCU_GP_CTR_PHASE.
106 * Also has a RCU_GP_COUNT of 1, to accelerate the reader fast path.
107 * Written to only by writer with mutex taken. Read by both writer and readers.
108 */
109 long rcu_gp_ctr = RCU_GP_COUNT;
110
111 /*
112 * Pointer to registry elements. Written to only by each individual reader. Read
113 * by both the reader and the writers.
114 */
115 DEFINE_URCU_TLS(struct rcu_reader *, rcu_reader);
116
117 static CDS_LIST_HEAD(registry);
118
119 struct registry_chunk {
120 size_t data_len; /* data length */
121 size_t used; /* data used */
122 struct cds_list_head node; /* chunk_list node */
123 char data[];
124 };
125
126 struct registry_arena {
127 struct cds_list_head chunk_list;
128 };
129
130 static struct registry_arena registry_arena = {
131 .chunk_list = CDS_LIST_HEAD_INIT(registry_arena.chunk_list),
132 };
133
134 /* Saved fork signal mask, protected by rcu_gp_lock */
135 static sigset_t saved_fork_signal_mask;
136
137 static void rcu_gc_registry(void);
138
139 static void mutex_lock(pthread_mutex_t *mutex)
140 {
141 int ret;
142
143 #ifndef DISTRUST_SIGNALS_EXTREME
144 ret = pthread_mutex_lock(mutex);
145 if (ret)
146 urcu_die(ret);
147 #else /* #ifndef DISTRUST_SIGNALS_EXTREME */
148 while ((ret = pthread_mutex_trylock(mutex)) != 0) {
149 if (ret != EBUSY && ret != EINTR)
150 urcu_die(ret);
151 poll(NULL,0,10);
152 }
153 #endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */
154 }
155
156 static void mutex_unlock(pthread_mutex_t *mutex)
157 {
158 int ret;
159
160 ret = pthread_mutex_unlock(mutex);
161 if (ret)
162 urcu_die(ret);
163 }
164
165 void update_counter_and_wait(void)
166 {
167 CDS_LIST_HEAD(qsreaders);
168 int wait_loops = 0;
169 struct rcu_reader *index, *tmp;
170
171 /* Switch parity: 0 -> 1, 1 -> 0 */
172 CMM_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR_PHASE);
173
174 /*
175 * Must commit qparity update to memory before waiting for other parity
176 * quiescent state. Failure to do so could result in the writer waiting
177 * forever while new readers are always accessing data (no progress).
178 * Ensured by CMM_STORE_SHARED and CMM_LOAD_SHARED.
179 */
180
181 /*
182 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
183 * model easier to understand. It does not have a big performance impact
184 * anyway, given this is the write-side.
185 */
186 cmm_smp_mb();
187
188 /*
189 * Wait for each thread rcu_reader.ctr count to become 0.
190 */
191 for (;;) {
192 wait_loops++;
193 cds_list_for_each_entry_safe(index, tmp, &registry, node) {
194 if (!rcu_old_gp_ongoing(&index->ctr))
195 cds_list_move(&index->node, &qsreaders);
196 }
197
198 if (cds_list_empty(&registry)) {
199 break;
200 } else {
201 if (wait_loops == RCU_QS_ACTIVE_ATTEMPTS)
202 usleep(RCU_SLEEP_DELAY);
203 else
204 caa_cpu_relax();
205 }
206 }
207 /* put back the reader list in the registry */
208 cds_list_splice(&qsreaders, &registry);
209 }
210
211 void synchronize_rcu(void)
212 {
213 sigset_t newmask, oldmask;
214 int ret;
215
216 ret = sigfillset(&newmask);
217 assert(!ret);
218 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
219 assert(!ret);
220
221 mutex_lock(&rcu_gp_lock);
222
223 if (cds_list_empty(&registry))
224 goto out;
225
226 /* All threads should read qparity before accessing data structure
227 * where new ptr points to. */
228 /* Write new ptr before changing the qparity */
229 cmm_smp_mb();
230
231 /* Remove old registry elements */
232 rcu_gc_registry();
233
234 /*
235 * Wait for previous parity to be empty of readers.
236 */
237 update_counter_and_wait(); /* 0 -> 1, wait readers in parity 0 */
238
239 /*
240 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
241 * model easier to understand. It does not have a big performance impact
242 * anyway, given this is the write-side.
243 */
244 cmm_smp_mb();
245
246 /*
247 * Wait for previous parity to be empty of readers.
248 */
249 update_counter_and_wait(); /* 1 -> 0, wait readers in parity 1 */
250
251 /*
252 * Finish waiting for reader threads before letting the old ptr being
253 * freed.
254 */
255 cmm_smp_mb();
256 out:
257 mutex_unlock(&rcu_gp_lock);
258 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
259 assert(!ret);
260 }
261
262 /*
263 * library wrappers to be used by non-LGPL compatible source code.
264 */
265
266 void rcu_read_lock(void)
267 {
268 _rcu_read_lock();
269 }
270
271 void rcu_read_unlock(void)
272 {
273 _rcu_read_unlock();
274 }
275
276 /*
277 * Only grow for now. If empty, allocate a ARENA_INIT_ALLOC sized chunk.
278 * Else, try expanding the last chunk. If this fails, allocate a new
279 * chunk twice as big as the last chunk.
280 * Memory used by chunks _never_ moves. A chunk could theoretically be
281 * freed when all "used" slots are released, but we don't do it at this
282 * point.
283 */
284 static
285 void expand_arena(struct registry_arena *arena)
286 {
287 struct registry_chunk *new_chunk, *last_chunk;
288 size_t old_chunk_len, new_chunk_len;
289
290 /* No chunk. */
291 if (cds_list_empty(&arena->chunk_list)) {
292 assert(ARENA_INIT_ALLOC >=
293 sizeof(struct registry_chunk)
294 + sizeof(struct rcu_reader));
295 new_chunk_len = ARENA_INIT_ALLOC;
296 new_chunk = mmap(NULL, new_chunk_len,
297 PROT_READ | PROT_WRITE,
298 MAP_ANONYMOUS | MAP_PRIVATE,
299 -1, 0);
300 if (new_chunk == MAP_FAILED)
301 abort();
302 bzero(new_chunk, new_chunk_len);
303 new_chunk->data_len =
304 new_chunk_len - sizeof(struct registry_chunk);
305 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
306 return; /* We're done. */
307 }
308
309 /* Try expanding last chunk. */
310 last_chunk = cds_list_entry(arena->chunk_list.prev,
311 struct registry_chunk, node);
312 old_chunk_len =
313 last_chunk->data_len + sizeof(struct registry_chunk);
314 new_chunk_len = old_chunk_len << 1;
315
316 /* Don't allow memory mapping to move, just expand. */
317 new_chunk = mremap_wrapper(last_chunk, old_chunk_len,
318 new_chunk_len, 0);
319 if (new_chunk != MAP_FAILED) {
320 /* Should not have moved. */
321 assert(new_chunk == last_chunk);
322 bzero((char *) last_chunk + old_chunk_len,
323 new_chunk_len - old_chunk_len);
324 last_chunk->data_len =
325 new_chunk_len - sizeof(struct registry_chunk);
326 return; /* We're done. */
327 }
328
329 /* Remap did not succeed, we need to add a new chunk. */
330 new_chunk = mmap(NULL, new_chunk_len,
331 PROT_READ | PROT_WRITE,
332 MAP_ANONYMOUS | MAP_PRIVATE,
333 -1, 0);
334 if (new_chunk == MAP_FAILED)
335 abort();
336 bzero(new_chunk, new_chunk_len);
337 new_chunk->data_len =
338 new_chunk_len - sizeof(struct registry_chunk);
339 cds_list_add_tail(&new_chunk->node, &arena->chunk_list);
340 }
341
342 static
343 struct rcu_reader *arena_alloc(struct registry_arena *arena)
344 {
345 struct registry_chunk *chunk;
346 struct rcu_reader *rcu_reader_reg;
347 int expand_done = 0; /* Only allow to expand once per alloc */
348 size_t len = sizeof(struct rcu_reader);
349
350 retry:
351 cds_list_for_each_entry(chunk, &arena->chunk_list, node) {
352 if (chunk->data_len - chunk->used < len)
353 continue;
354 /* Find spot */
355 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
356 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
357 rcu_reader_reg++) {
358 if (!rcu_reader_reg->alloc) {
359 rcu_reader_reg->alloc = 1;
360 chunk->used += len;
361 return rcu_reader_reg;
362 }
363 }
364 }
365
366 if (!expand_done) {
367 expand_arena(arena);
368 expand_done = 1;
369 goto retry;
370 }
371
372 return NULL;
373 }
374
375 /* Called with signals off and mutex locked */
376 static
377 void add_thread(void)
378 {
379 struct rcu_reader *rcu_reader_reg;
380
381 rcu_reader_reg = arena_alloc(&registry_arena);
382 if (!rcu_reader_reg)
383 abort();
384
385 /* Add to registry */
386 rcu_reader_reg->tid = pthread_self();
387 assert(rcu_reader_reg->ctr == 0);
388 cds_list_add(&rcu_reader_reg->node, &registry);
389 /*
390 * Reader threads are pointing to the reader registry. This is
391 * why its memory should never be relocated.
392 */
393 URCU_TLS(rcu_reader) = rcu_reader_reg;
394 }
395
396 /* Called with signals off and mutex locked */
397 static void rcu_gc_registry(void)
398 {
399 struct registry_chunk *chunk;
400 struct rcu_reader *rcu_reader_reg;
401
402 cds_list_for_each_entry(chunk, &registry_arena.chunk_list, node) {
403 for (rcu_reader_reg = (struct rcu_reader *) &chunk->data[0];
404 rcu_reader_reg < (struct rcu_reader *) &chunk->data[chunk->data_len];
405 rcu_reader_reg++) {
406 pthread_t tid;
407 int ret;
408
409 if (!rcu_reader_reg->alloc)
410 continue;
411 tid = rcu_reader_reg->tid;
412 ret = pthread_kill(tid, 0);
413 assert(ret != EINVAL);
414 if (ret == ESRCH) {
415 cds_list_del(&rcu_reader_reg->node);
416 rcu_reader_reg->ctr = 0;
417 rcu_reader_reg->alloc = 0;
418 chunk->used -= sizeof(struct rcu_reader);
419 }
420
421 }
422 }
423 }
424
425 /* Disable signals, take mutex, add to registry */
426 void rcu_bp_register(void)
427 {
428 sigset_t newmask, oldmask;
429 int ret;
430
431 ret = sigfillset(&newmask);
432 assert(!ret);
433 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
434 assert(!ret);
435
436 /*
437 * Check if a signal concurrently registered our thread since
438 * the check in rcu_read_lock(). */
439 if (URCU_TLS(rcu_reader))
440 goto end;
441
442 mutex_lock(&rcu_gp_lock);
443 add_thread();
444 mutex_unlock(&rcu_gp_lock);
445 end:
446 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
447 assert(!ret);
448 }
449
450 void rcu_bp_exit(void)
451 {
452 struct registry_chunk *chunk, *tmp;
453
454 cds_list_for_each_entry_safe(chunk, tmp,
455 &registry_arena.chunk_list, node) {
456 munmap(chunk, chunk->data_len + sizeof(struct registry_chunk));
457 }
458 }
459
460 /*
461 * Holding the rcu_gp_lock across fork will make sure we fork() don't race with
462 * a concurrent thread executing with this same lock held. This ensures that the
463 * registry is in a coherent state in the child.
464 */
465 void rcu_bp_before_fork(void)
466 {
467 sigset_t newmask, oldmask;
468 int ret;
469
470 ret = sigfillset(&newmask);
471 assert(!ret);
472 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
473 assert(!ret);
474 mutex_lock(&rcu_gp_lock);
475 saved_fork_signal_mask = oldmask;
476 }
477
478 void rcu_bp_after_fork_parent(void)
479 {
480 sigset_t oldmask;
481 int ret;
482
483 oldmask = saved_fork_signal_mask;
484 mutex_unlock(&rcu_gp_lock);
485 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
486 assert(!ret);
487 }
488
489 void rcu_bp_after_fork_child(void)
490 {
491 sigset_t oldmask;
492 int ret;
493
494 rcu_gc_registry();
495 oldmask = saved_fork_signal_mask;
496 mutex_unlock(&rcu_gp_lock);
497 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
498 assert(!ret);
499 }
500
501 void *rcu_dereference_sym_bp(void *p)
502 {
503 return _rcu_dereference(p);
504 }
505
506 void *rcu_set_pointer_sym_bp(void **p, void *v)
507 {
508 cmm_wmb();
509 uatomic_set(p, v);
510 return v;
511 }
512
513 void *rcu_xchg_pointer_sym_bp(void **p, void *v)
514 {
515 cmm_wmb();
516 return uatomic_xchg(p, v);
517 }
518
519 void *rcu_cmpxchg_pointer_sym_bp(void **p, void *old, void *_new)
520 {
521 cmm_wmb();
522 return uatomic_cmpxchg(p, old, _new);
523 }
524
525 DEFINE_RCU_FLAVOR(rcu_flavor);
526
527 #include "urcu-call-rcu-impl.h"
528 #include "urcu-defer-impl.h"
This page took 0.03988 seconds and 4 git commands to generate.