Commit | Line | Data |
---|---|---|
fdee2e6d MD |
1 | /* |
2 | * urcu-bp.c | |
3 | * | |
4 | * Userspace RCU library, "bulletproof" version. | |
5 | * | |
6 | * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> | |
7 | * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. | |
8 | * | |
9 | * This library is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU Lesser General Public | |
11 | * License as published by the Free Software Foundation; either | |
12 | * version 2.1 of the License, or (at your option) any later version. | |
13 | * | |
14 | * This library is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * Lesser General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU Lesser General Public | |
20 | * License along with this library; if not, write to the Free Software | |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 | * | |
23 | * IBM's contributions to this file may be relicensed under LGPLv2 or later. | |
24 | */ | |
25 | ||
26 | #include <stdio.h> | |
27 | #include <pthread.h> | |
28 | #include <signal.h> | |
29 | #include <assert.h> | |
30 | #include <stdlib.h> | |
31 | #include <string.h> | |
32 | #include <errno.h> | |
33 | #include <poll.h> | |
34 | #include <unistd.h> | |
35 | #include <sys/mman.h> | |
36 | ||
37 | #include "urcu-bp-static.h" | |
38 | /* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */ | |
39 | #include "urcu-bp.h" | |
40 | ||
41 | /* Sleep delay in us */ | |
42 | #define RCU_SLEEP_DELAY 1000 | |
43 | #define ARENA_INIT_ALLOC 16 | |
44 | ||
45 | void __attribute__((destructor)) urcu_bp_exit(void); | |
46 | ||
47 | static pthread_mutex_t urcu_mutex = PTHREAD_MUTEX_INITIALIZER; | |
48 | ||
49 | #ifdef DEBUG_YIELD | |
50 | unsigned int yield_active; | |
51 | unsigned int __thread rand_yield; | |
52 | #endif | |
53 | ||
54 | /* | |
55 | * Global grace period counter. | |
56 | * Contains the current RCU_GP_CTR_BIT. | |
57 | * Also has a RCU_GP_COUNT of 1, to accelerate the reader fast path. | |
58 | * Written to only by writer with mutex taken. Read by both writer and readers. | |
59 | */ | |
60 | long urcu_gp_ctr = RCU_GP_COUNT; | |
61 | ||
62 | /* | |
63 | * Pointer to registry elements. Written to only by each individual reader. Read | |
64 | * by both the reader and the writers. | |
65 | */ | |
66 | struct urcu_reader __thread *urcu_reader; | |
67 | ||
68 | static LIST_HEAD(registry); | |
69 | ||
70 | struct registry_arena { | |
71 | void *p; | |
72 | size_t len; | |
73 | size_t used; | |
74 | }; | |
75 | ||
76 | static struct registry_arena registry_arena; | |
77 | ||
78 | static void rcu_gc_registry(void); | |
79 | ||
80 | static void internal_urcu_lock(void) | |
81 | { | |
82 | int ret; | |
83 | ||
84 | #ifndef DISTRUST_SIGNALS_EXTREME | |
85 | ret = pthread_mutex_lock(&urcu_mutex); | |
86 | if (ret) { | |
87 | perror("Error in pthread mutex lock"); | |
88 | exit(-1); | |
89 | } | |
90 | #else /* #ifndef DISTRUST_SIGNALS_EXTREME */ | |
91 | while ((ret = pthread_mutex_trylock(&urcu_mutex)) != 0) { | |
92 | if (ret != EBUSY && ret != EINTR) { | |
93 | printf("ret = %d, errno = %d\n", ret, errno); | |
94 | perror("Error in pthread mutex lock"); | |
95 | exit(-1); | |
96 | } | |
97 | if (urcu_reader.need_mb) { | |
98 | smp_mb(); | |
99 | urcu_reader.need_mb = 0; | |
100 | smp_mb(); | |
101 | } | |
102 | poll(NULL,0,10); | |
103 | } | |
104 | #endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */ | |
105 | } | |
106 | ||
107 | static void internal_urcu_unlock(void) | |
108 | { | |
109 | int ret; | |
110 | ||
111 | ret = pthread_mutex_unlock(&urcu_mutex); | |
112 | if (ret) { | |
113 | perror("Error in pthread mutex unlock"); | |
114 | exit(-1); | |
115 | } | |
116 | } | |
117 | ||
118 | /* | |
119 | * called with urcu_mutex held. | |
120 | */ | |
121 | static void switch_next_urcu_qparity(void) | |
122 | { | |
123 | STORE_SHARED(urcu_gp_ctr, urcu_gp_ctr ^ RCU_GP_CTR_BIT); | |
124 | } | |
125 | ||
126 | void wait_for_quiescent_state(void) | |
127 | { | |
128 | LIST_HEAD(qsreaders); | |
129 | int wait_loops = 0; | |
130 | struct urcu_reader *index, *tmp; | |
131 | ||
132 | if (list_empty(®istry)) | |
133 | return; | |
134 | /* | |
135 | * Wait for each thread urcu_reader.ctr count to become 0. | |
136 | */ | |
137 | for (;;) { | |
138 | wait_loops++; | |
139 | list_for_each_entry_safe(index, tmp, ®istry, head) { | |
140 | if (!rcu_old_gp_ongoing(&index->ctr)) | |
141 | list_move(&index->head, &qsreaders); | |
142 | } | |
143 | ||
144 | if (list_empty(®istry)) { | |
145 | break; | |
146 | } else { | |
147 | if (wait_loops == RCU_QS_ACTIVE_ATTEMPTS) | |
148 | usleep(RCU_SLEEP_DELAY); | |
149 | else | |
150 | cpu_relax(); | |
151 | } | |
152 | } | |
153 | /* put back the reader list in the registry */ | |
154 | list_splice(&qsreaders, ®istry); | |
155 | } | |
156 | ||
157 | void synchronize_rcu(void) | |
158 | { | |
159 | sigset_t newmask, oldmask; | |
160 | int ret; | |
161 | ||
162 | ret = sigemptyset(&newmask); | |
163 | assert(!ret); | |
164 | ret = pthread_sigmask(SIG_SETMASK, &newmask, &oldmask); | |
165 | assert(!ret); | |
166 | ||
167 | internal_urcu_lock(); | |
168 | ||
169 | /* Remove old registry elements */ | |
170 | rcu_gc_registry(); | |
171 | ||
172 | /* All threads should read qparity before accessing data structure | |
173 | * where new ptr points to. Must be done within internal_urcu_lock | |
174 | * because it iterates on reader threads.*/ | |
175 | /* Write new ptr before changing the qparity */ | |
176 | smp_mb(); | |
177 | ||
178 | switch_next_urcu_qparity(); /* 0 -> 1 */ | |
179 | ||
180 | /* | |
181 | * Must commit qparity update to memory before waiting for parity | |
182 | * 0 quiescent state. Failure to do so could result in the writer | |
183 | * waiting forever while new readers are always accessing data (no | |
184 | * progress). | |
185 | * Ensured by STORE_SHARED and LOAD_SHARED. | |
186 | */ | |
187 | ||
188 | /* | |
189 | * Adding a smp_mb() which is _not_ formally required, but makes the | |
190 | * model easier to understand. It does not have a big performance impact | |
191 | * anyway, given this is the write-side. | |
192 | */ | |
193 | smp_mb(); | |
194 | ||
195 | /* | |
196 | * Wait for previous parity to be empty of readers. | |
197 | */ | |
198 | wait_for_quiescent_state(); /* Wait readers in parity 0 */ | |
199 | ||
200 | /* | |
201 | * Must finish waiting for quiescent state for parity 0 before | |
202 | * committing qparity update to memory. Failure to do so could result in | |
203 | * the writer waiting forever while new readers are always accessing | |
204 | * data (no progress). | |
205 | * Ensured by STORE_SHARED and LOAD_SHARED. | |
206 | */ | |
207 | ||
208 | /* | |
209 | * Adding a smp_mb() which is _not_ formally required, but makes the | |
210 | * model easier to understand. It does not have a big performance impact | |
211 | * anyway, given this is the write-side. | |
212 | */ | |
213 | smp_mb(); | |
214 | ||
215 | switch_next_urcu_qparity(); /* 1 -> 0 */ | |
216 | ||
217 | /* | |
218 | * Must commit qparity update to memory before waiting for parity | |
219 | * 1 quiescent state. Failure to do so could result in the writer | |
220 | * waiting forever while new readers are always accessing data (no | |
221 | * progress). | |
222 | * Ensured by STORE_SHARED and LOAD_SHARED. | |
223 | */ | |
224 | ||
225 | /* | |
226 | * Adding a smp_mb() which is _not_ formally required, but makes the | |
227 | * model easier to understand. It does not have a big performance impact | |
228 | * anyway, given this is the write-side. | |
229 | */ | |
230 | smp_mb(); | |
231 | ||
232 | /* | |
233 | * Wait for previous parity to be empty of readers. | |
234 | */ | |
235 | wait_for_quiescent_state(); /* Wait readers in parity 1 */ | |
236 | ||
237 | /* Finish waiting for reader threads before letting the old ptr being | |
238 | * freed. Must be done within internal_urcu_lock because it iterates on | |
239 | * reader threads. */ | |
240 | smp_mb(); | |
241 | ||
242 | internal_urcu_unlock(); | |
243 | ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL); | |
244 | assert(!ret); | |
245 | } | |
246 | ||
247 | /* | |
248 | * library wrappers to be used by non-LGPL compatible source code. | |
249 | */ | |
250 | ||
251 | void rcu_read_lock(void) | |
252 | { | |
253 | _rcu_read_lock(); | |
254 | } | |
255 | ||
256 | void rcu_read_unlock(void) | |
257 | { | |
258 | _rcu_read_unlock(); | |
259 | } | |
260 | ||
261 | /* | |
262 | * only grow for now. | |
263 | */ | |
264 | static void resize_arena(struct registry_arena *arena, size_t len) | |
265 | { | |
266 | void *new_arena; | |
267 | ||
268 | new_arena = mmap(arena->p, len, | |
269 | PROT_READ | PROT_WRITE, | |
270 | MAP_ANONYMOUS | MAP_PRIVATE, | |
271 | -1, 0); | |
272 | /* | |
273 | * re-used the same region ? | |
274 | */ | |
275 | if (new_arena == arena->p) | |
276 | return; | |
277 | ||
278 | memcpy(new_arena, arena->p, arena->len); | |
279 | bzero(new_arena + arena->len, len - arena->len); | |
280 | arena->p = new_arena; | |
281 | } | |
282 | ||
283 | /* Called with signals off and mutex locked */ | |
284 | static void add_thread(void) | |
285 | { | |
286 | struct urcu_reader *urcu_reader_reg; | |
287 | ||
288 | if (registry_arena.len | |
289 | < registry_arena.used + sizeof(struct urcu_reader)) | |
290 | resize_arena(®istry_arena, | |
291 | max(registry_arena.len << 1, ARENA_INIT_ALLOC)); | |
292 | /* | |
293 | * Find a free spot. | |
294 | */ | |
295 | for (urcu_reader_reg = registry_arena.p; | |
296 | (void *)urcu_reader_reg < registry_arena.p + registry_arena.len; | |
297 | urcu_reader_reg++) { | |
298 | if (!urcu_reader_reg->alloc) | |
299 | break; | |
300 | } | |
301 | urcu_reader_reg->alloc = 1; | |
302 | registry_arena.used += sizeof(struct urcu_reader); | |
303 | ||
304 | /* Add to registry */ | |
305 | urcu_reader_reg->tid = pthread_self(); | |
306 | assert(urcu_reader_reg->ctr == 0); | |
307 | list_add(&urcu_reader_reg->head, ®istry); | |
308 | urcu_reader = urcu_reader_reg; | |
309 | } | |
310 | ||
311 | /* Called with signals off and mutex locked */ | |
312 | static void rcu_gc_registry(void) | |
313 | { | |
314 | struct urcu_reader *urcu_reader_reg; | |
315 | pthread_t tid; | |
316 | int ret; | |
317 | ||
318 | for (urcu_reader_reg = registry_arena.p; | |
319 | (void *)urcu_reader_reg < registry_arena.p + registry_arena.len; | |
320 | urcu_reader_reg++) { | |
321 | if (!urcu_reader_reg->alloc) | |
322 | continue; | |
323 | tid = urcu_reader_reg->tid; | |
324 | ret = pthread_kill(tid, 0); | |
325 | assert(ret != EINVAL); | |
326 | if (ret == ESRCH) { | |
327 | list_del(&urcu_reader_reg->head); | |
328 | urcu_reader_reg->alloc = 0; | |
329 | registry_arena.used -= sizeof(struct urcu_reader); | |
330 | } | |
331 | } | |
332 | } | |
333 | ||
334 | /* Disable signals, take mutex, add to registry */ | |
335 | void rcu_bp_register(void) | |
336 | { | |
337 | sigset_t newmask, oldmask; | |
338 | int ret; | |
339 | ||
340 | ret = sigemptyset(&newmask); | |
341 | assert(!ret); | |
342 | ret = pthread_sigmask(SIG_SETMASK, &newmask, &oldmask); | |
343 | assert(!ret); | |
344 | ||
345 | /* | |
346 | * Check if a signal concurrently registered our thread since | |
347 | * the check in rcu_read_lock(). */ | |
348 | if (urcu_reader) | |
349 | goto end; | |
350 | ||
351 | internal_urcu_lock(); | |
352 | add_thread(); | |
353 | internal_urcu_unlock(); | |
354 | end: | |
355 | ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL); | |
356 | assert(!ret); | |
357 | } | |
358 | ||
359 | void urcu_bp_exit() | |
360 | { | |
361 | munmap(registry_arena.p, registry_arena.len); | |
362 | } |