Commit | Line | Data |
---|---|---|
8a953620 MD |
1 | /* |
2 | * rcutorture.h: simple user-level performance/stress test of RCU. | |
3 | * | |
4 | * Usage: | |
5 | * ./rcu <nreaders> rperf [ <cpustride> ] | |
6 | * Run a read-side performance test with the specified | |
7 | * number of readers spaced by <cpustride>. | |
8 | * Thus "./rcu 16 rperf 2" would run 16 readers on even-numbered | |
9 | * CPUs from 0 to 30. | |
10 | * ./rcu <nupdaters> uperf [ <cpustride> ] | |
11 | * Run an update-side performance test with the specified | |
12 | * number of updaters and specified CPU spacing. | |
13 | * ./rcu <nreaders> perf [ <cpustride> ] | |
14 | * Run a combined read/update performance test with the specified | |
15 | * number of readers and one updater and specified CPU spacing. | |
16 | * The readers run on the low-numbered CPUs and the updater | |
17 | * of the highest-numbered CPU. | |
18 | * | |
19 | * The above tests produce output as follows: | |
20 | * | |
21 | * n_reads: 46008000 n_updates: 146026 nreaders: 2 nupdaters: 1 duration: 1 | |
22 | * ns/read: 43.4707 ns/update: 6848.1 | |
23 | * | |
24 | * The first line lists the total number of RCU reads and updates executed | |
25 | * during the test, the number of reader threads, the number of updater | |
26 | * threads, and the duration of the test in seconds. The second line | |
27 | * lists the average duration of each type of operation in nanoseconds, | |
28 | * or "nan" if the corresponding type of operation was not performed. | |
29 | * | |
30 | * ./rcu <nreaders> stress | |
31 | * Run a stress test with the specified number of readers and | |
32 | * one updater. None of the threads are affinitied to any | |
33 | * particular CPU. | |
34 | * | |
35 | * This test produces output as follows: | |
36 | * | |
37 | * n_reads: 114633217 n_updates: 3903415 n_mberror: 0 | |
38 | * rcu_stress_count: 114618391 14826 0 0 0 0 0 0 0 0 0 | |
39 | * | |
40 | * The first line lists the number of RCU read and update operations | |
41 | * executed, followed by the number of memory-ordering violations | |
42 | * (which will be zero in a correct RCU implementation). The second | |
43 | * line lists the number of readers observing progressively more stale | |
44 | * data. A correct RCU implementation will have all but the first two | |
45 | * numbers non-zero. | |
46 | * | |
47 | * This program is free software; you can redistribute it and/or modify | |
48 | * it under the terms of the GNU General Public License as published by | |
49 | * the Free Software Foundation; either version 2 of the License, or | |
50 | * (at your option) any later version. | |
51 | * | |
52 | * This program is distributed in the hope that it will be useful, | |
53 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
54 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
55 | * GNU General Public License for more details. | |
56 | * | |
57 | * You should have received a copy of the GNU General Public License | |
58 | * along with this program; if not, write to the Free Software | |
3282a76b | 59 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
8a953620 MD |
60 | * |
61 | * Copyright (c) 2008 Paul E. McKenney, IBM Corporation. | |
62 | */ | |
63 | ||
64 | /* | |
65 | * Test variables. | |
66 | */ | |
67 | ||
b57aee66 | 68 | #include <stdlib.h> |
b57aee66 | 69 | |
8a953620 MD |
70 | DEFINE_PER_THREAD(long long, n_reads_pt); |
71 | DEFINE_PER_THREAD(long long, n_updates_pt); | |
72 | ||
73 | long long n_reads = 0LL; | |
74 | long n_updates = 0L; | |
6ee91d83 | 75 | int nthreadsrunning; |
8a953620 MD |
76 | char argsbuf[64]; |
77 | ||
78 | #define GOFLAG_INIT 0 | |
79 | #define GOFLAG_RUN 1 | |
80 | #define GOFLAG_STOP 2 | |
81 | ||
4967f005 PB |
82 | volatile int goflag __attribute__((__aligned__(CAA_CACHE_LINE_SIZE))) |
83 | = GOFLAG_INIT; | |
8a953620 MD |
84 | |
85 | #define RCU_READ_RUN 1000 | |
86 | ||
87 | //MD | |
88 | #define RCU_READ_NESTABLE | |
89 | ||
90 | #ifdef RCU_READ_NESTABLE | |
91 | #define rcu_read_lock_nest() rcu_read_lock() | |
92 | #define rcu_read_unlock_nest() rcu_read_unlock() | |
93 | #else /* #ifdef RCU_READ_NESTABLE */ | |
94 | #define rcu_read_lock_nest() | |
95 | #define rcu_read_unlock_nest() | |
96 | #endif /* #else #ifdef RCU_READ_NESTABLE */ | |
97 | ||
1a43bbd8 MD |
98 | #ifdef TORTURE_QSBR |
99 | #define mark_rcu_quiescent_state rcu_quiescent_state | |
100 | #define put_thread_offline rcu_thread_offline | |
101 | #define put_thread_online rcu_thread_online | |
102 | #endif | |
103 | ||
8a953620 MD |
104 | #ifndef mark_rcu_quiescent_state |
105 | #define mark_rcu_quiescent_state() do ; while (0) | |
106 | #endif /* #ifdef mark_rcu_quiescent_state */ | |
107 | ||
108 | #ifndef put_thread_offline | |
109 | #define put_thread_offline() do ; while (0) | |
110 | #define put_thread_online() do ; while (0) | |
111 | #define put_thread_online_delay() do ; while (0) | |
112 | #else /* #ifndef put_thread_offline */ | |
113 | #define put_thread_online_delay() synchronize_rcu() | |
114 | #endif /* #else #ifndef put_thread_offline */ | |
115 | ||
116 | /* | |
117 | * Performance test. | |
118 | */ | |
119 | ||
120 | void *rcu_read_perf_test(void *arg) | |
121 | { | |
7106ddf8 | 122 | struct call_rcu_data *crdp; |
8a953620 MD |
123 | int i; |
124 | int me = (long)arg; | |
8a953620 MD |
125 | long long n_reads_local = 0; |
126 | ||
121a5d44 | 127 | rcu_register_thread(); |
8a953620 | 128 | run_on(me); |
ec4e58a3 | 129 | uatomic_inc(&nthreadsrunning); |
3bf02b5b | 130 | put_thread_offline(); |
8a953620 | 131 | while (goflag == GOFLAG_INIT) |
775aff2e | 132 | (void) poll(NULL, 0, 1); |
3bf02b5b | 133 | put_thread_online(); |
8a953620 MD |
134 | while (goflag == GOFLAG_RUN) { |
135 | for (i = 0; i < RCU_READ_RUN; i++) { | |
136 | rcu_read_lock(); | |
137 | /* rcu_read_lock_nest(); */ | |
138 | /* rcu_read_unlock_nest(); */ | |
139 | rcu_read_unlock(); | |
140 | } | |
141 | n_reads_local += RCU_READ_RUN; | |
142 | mark_rcu_quiescent_state(); | |
143 | } | |
144 | __get_thread_var(n_reads_pt) += n_reads_local; | |
145 | put_thread_offline(); | |
7106ddf8 PM |
146 | crdp = get_thread_call_rcu_data(); |
147 | set_thread_call_rcu_data(NULL); | |
148 | call_rcu_data_free(crdp); | |
121a5d44 | 149 | rcu_unregister_thread(); |
8a953620 MD |
150 | |
151 | return (NULL); | |
152 | } | |
153 | ||
154 | void *rcu_update_perf_test(void *arg) | |
155 | { | |
156 | long long n_updates_local = 0; | |
157 | ||
b57aee66 PM |
158 | if ((random() & 0xf00) == 0) { |
159 | struct call_rcu_data *crdp; | |
160 | ||
c1d2c60b | 161 | crdp = create_call_rcu_data(0, -1); |
b57aee66 PM |
162 | if (crdp != NULL) { |
163 | fprintf(stderr, | |
164 | "Using per-thread call_rcu() worker.\n"); | |
165 | set_thread_call_rcu_data(crdp); | |
166 | } | |
167 | } | |
ec4e58a3 | 168 | uatomic_inc(&nthreadsrunning); |
8a953620 | 169 | while (goflag == GOFLAG_INIT) |
775aff2e | 170 | (void) poll(NULL, 0, 1); |
8a953620 MD |
171 | while (goflag == GOFLAG_RUN) { |
172 | synchronize_rcu(); | |
173 | n_updates_local++; | |
174 | } | |
175 | __get_thread_var(n_updates_pt) += n_updates_local; | |
b0b31506 | 176 | return NULL; |
8a953620 MD |
177 | } |
178 | ||
179 | void perftestinit(void) | |
180 | { | |
181 | init_per_thread(n_reads_pt, 0LL); | |
182 | init_per_thread(n_updates_pt, 0LL); | |
ec4e58a3 | 183 | uatomic_set(&nthreadsrunning, 0); |
8a953620 MD |
184 | } |
185 | ||
186 | void perftestrun(int nthreads, int nreaders, int nupdaters) | |
187 | { | |
188 | int t; | |
189 | int duration = 1; | |
190 | ||
5481ddb3 | 191 | cmm_smp_mb(); |
ec4e58a3 | 192 | while (uatomic_read(&nthreadsrunning) < nthreads) |
775aff2e | 193 | (void) poll(NULL, 0, 1); |
8a953620 | 194 | goflag = GOFLAG_RUN; |
5481ddb3 | 195 | cmm_smp_mb(); |
8a953620 | 196 | sleep(duration); |
5481ddb3 | 197 | cmm_smp_mb(); |
8a953620 | 198 | goflag = GOFLAG_STOP; |
5481ddb3 | 199 | cmm_smp_mb(); |
8a953620 MD |
200 | wait_all_threads(); |
201 | for_each_thread(t) { | |
202 | n_reads += per_thread(n_reads_pt, t); | |
203 | n_updates += per_thread(n_updates_pt, t); | |
204 | } | |
205 | printf("n_reads: %lld n_updates: %ld nreaders: %d nupdaters: %d duration: %d\n", | |
206 | n_reads, n_updates, nreaders, nupdaters, duration); | |
207 | printf("ns/read: %g ns/update: %g\n", | |
208 | ((duration * 1000*1000*1000.*(double)nreaders) / | |
209 | (double)n_reads), | |
210 | ((duration * 1000*1000*1000.*(double)nupdaters) / | |
211 | (double)n_updates)); | |
7106ddf8 PM |
212 | if (get_cpu_call_rcu_data(0)) { |
213 | fprintf(stderr, "Deallocating per-CPU call_rcu threads.\n"); | |
214 | free_all_cpu_call_rcu_data(); | |
215 | } | |
8a953620 MD |
216 | exit(0); |
217 | } | |
218 | ||
219 | void perftest(int nreaders, int cpustride) | |
220 | { | |
221 | int i; | |
222 | long arg; | |
223 | ||
224 | perftestinit(); | |
225 | for (i = 0; i < nreaders; i++) { | |
226 | arg = (long)(i * cpustride); | |
227 | create_thread(rcu_read_perf_test, (void *)arg); | |
228 | } | |
229 | arg = (long)(i * cpustride); | |
230 | create_thread(rcu_update_perf_test, (void *)arg); | |
231 | perftestrun(i + 1, nreaders, 1); | |
232 | } | |
233 | ||
234 | void rperftest(int nreaders, int cpustride) | |
235 | { | |
236 | int i; | |
237 | long arg; | |
238 | ||
239 | perftestinit(); | |
240 | init_per_thread(n_reads_pt, 0LL); | |
241 | for (i = 0; i < nreaders; i++) { | |
242 | arg = (long)(i * cpustride); | |
243 | create_thread(rcu_read_perf_test, (void *)arg); | |
244 | } | |
245 | perftestrun(i, nreaders, 0); | |
246 | } | |
247 | ||
248 | void uperftest(int nupdaters, int cpustride) | |
249 | { | |
250 | int i; | |
251 | long arg; | |
252 | ||
253 | perftestinit(); | |
254 | init_per_thread(n_reads_pt, 0LL); | |
255 | for (i = 0; i < nupdaters; i++) { | |
256 | arg = (long)(i * cpustride); | |
257 | create_thread(rcu_update_perf_test, (void *)arg); | |
258 | } | |
259 | perftestrun(i, 0, nupdaters); | |
260 | } | |
261 | ||
262 | /* | |
263 | * Stress test. | |
264 | */ | |
265 | ||
266 | #define RCU_STRESS_PIPE_LEN 10 | |
267 | ||
268 | struct rcu_stress { | |
269 | int pipe_count; | |
270 | int mbtest; | |
271 | }; | |
272 | ||
b0b31506 | 273 | struct rcu_stress rcu_stress_array[RCU_STRESS_PIPE_LEN] = { { 0 } }; |
8a953620 MD |
274 | struct rcu_stress *rcu_stress_current; |
275 | int rcu_stress_idx = 0; | |
276 | ||
277 | int n_mberror = 0; | |
278 | DEFINE_PER_THREAD(long long [RCU_STRESS_PIPE_LEN + 1], rcu_stress_count); | |
279 | ||
280 | int garbage = 0; | |
281 | ||
282 | void *rcu_read_stress_test(void *arg) | |
283 | { | |
284 | int i; | |
285 | int itercnt = 0; | |
286 | struct rcu_stress *p; | |
287 | int pc; | |
288 | ||
121a5d44 | 289 | rcu_register_thread(); |
3bf02b5b | 290 | put_thread_offline(); |
8a953620 | 291 | while (goflag == GOFLAG_INIT) |
775aff2e | 292 | (void) poll(NULL, 0, 1); |
3bf02b5b | 293 | put_thread_online(); |
8a953620 MD |
294 | while (goflag == GOFLAG_RUN) { |
295 | rcu_read_lock(); | |
296 | p = rcu_dereference(rcu_stress_current); | |
297 | if (p->mbtest == 0) | |
298 | n_mberror++; | |
299 | rcu_read_lock_nest(); | |
300 | for (i = 0; i < 100; i++) | |
301 | garbage++; | |
302 | rcu_read_unlock_nest(); | |
303 | pc = p->pipe_count; | |
304 | rcu_read_unlock(); | |
305 | if ((pc > RCU_STRESS_PIPE_LEN) || (pc < 0)) | |
306 | pc = RCU_STRESS_PIPE_LEN; | |
307 | __get_thread_var(rcu_stress_count)[pc]++; | |
308 | __get_thread_var(n_reads_pt)++; | |
309 | mark_rcu_quiescent_state(); | |
310 | if ((++itercnt % 0x1000) == 0) { | |
311 | put_thread_offline(); | |
312 | put_thread_online_delay(); | |
313 | put_thread_online(); | |
314 | } | |
315 | } | |
316 | put_thread_offline(); | |
121a5d44 | 317 | rcu_unregister_thread(); |
8a953620 MD |
318 | |
319 | return (NULL); | |
320 | } | |
321 | ||
b57aee66 PM |
322 | static pthread_mutex_t call_rcu_test_mutex = PTHREAD_MUTEX_INITIALIZER; |
323 | static pthread_cond_t call_rcu_test_cond = PTHREAD_COND_INITIALIZER; | |
324 | ||
325 | void rcu_update_stress_test_rcu(struct rcu_head *head) | |
326 | { | |
327 | if (pthread_mutex_lock(&call_rcu_test_mutex) != 0) { | |
328 | perror("pthread_mutex_lock"); | |
329 | exit(-1); | |
330 | } | |
331 | if (pthread_cond_signal(&call_rcu_test_cond) != 0) { | |
332 | perror("pthread_cond_signal"); | |
333 | exit(-1); | |
334 | } | |
335 | if (pthread_mutex_unlock(&call_rcu_test_mutex) != 0) { | |
336 | perror("pthread_mutex_unlock"); | |
337 | exit(-1); | |
338 | } | |
339 | } | |
340 | ||
8a953620 MD |
341 | void *rcu_update_stress_test(void *arg) |
342 | { | |
343 | int i; | |
344 | struct rcu_stress *p; | |
b57aee66 | 345 | struct rcu_head rh; |
8a953620 MD |
346 | |
347 | while (goflag == GOFLAG_INIT) | |
775aff2e | 348 | (void) poll(NULL, 0, 1); |
8a953620 MD |
349 | while (goflag == GOFLAG_RUN) { |
350 | i = rcu_stress_idx + 1; | |
351 | if (i >= RCU_STRESS_PIPE_LEN) | |
352 | i = 0; | |
353 | p = &rcu_stress_array[i]; | |
354 | p->mbtest = 0; | |
5481ddb3 | 355 | cmm_smp_mb(); |
8a953620 MD |
356 | p->pipe_count = 0; |
357 | p->mbtest = 1; | |
358 | rcu_assign_pointer(rcu_stress_current, p); | |
359 | rcu_stress_idx = i; | |
360 | for (i = 0; i < RCU_STRESS_PIPE_LEN; i++) | |
361 | if (i != rcu_stress_idx) | |
362 | rcu_stress_array[i].pipe_count++; | |
b57aee66 PM |
363 | if (n_updates & 0x1) |
364 | synchronize_rcu(); | |
365 | else { | |
366 | if (pthread_mutex_lock(&call_rcu_test_mutex) != 0) { | |
367 | perror("pthread_mutex_lock"); | |
368 | exit(-1); | |
369 | } | |
370 | call_rcu(&rh, rcu_update_stress_test_rcu); | |
371 | if (pthread_cond_wait(&call_rcu_test_cond, | |
372 | &call_rcu_test_mutex) != 0) { | |
373 | perror("pthread_cond_wait"); | |
374 | exit(-1); | |
375 | } | |
376 | if (pthread_mutex_unlock(&call_rcu_test_mutex) != 0) { | |
377 | perror("pthread_mutex_unlock"); | |
378 | exit(-1); | |
379 | } | |
380 | } | |
8a953620 MD |
381 | n_updates++; |
382 | } | |
b0b31506 | 383 | return NULL; |
8a953620 MD |
384 | } |
385 | ||
386 | void *rcu_fake_update_stress_test(void *arg) | |
387 | { | |
b57aee66 PM |
388 | if ((random() & 0xf00) == 0) { |
389 | struct call_rcu_data *crdp; | |
390 | ||
c1d2c60b | 391 | crdp = create_call_rcu_data(0, -1); |
b57aee66 PM |
392 | if (crdp != NULL) { |
393 | fprintf(stderr, | |
394 | "Using per-thread call_rcu() worker.\n"); | |
395 | set_thread_call_rcu_data(crdp); | |
396 | } | |
397 | } | |
8a953620 | 398 | while (goflag == GOFLAG_INIT) |
775aff2e | 399 | (void) poll(NULL, 0, 1); |
8a953620 MD |
400 | while (goflag == GOFLAG_RUN) { |
401 | synchronize_rcu(); | |
775aff2e | 402 | (void) poll(NULL, 0, 1); |
8a953620 | 403 | } |
b0b31506 | 404 | return NULL; |
8a953620 MD |
405 | } |
406 | ||
407 | void stresstest(int nreaders) | |
408 | { | |
409 | int i; | |
410 | int t; | |
411 | long long *p; | |
412 | long long sum; | |
413 | ||
414 | init_per_thread(n_reads_pt, 0LL); | |
415 | for_each_thread(t) { | |
416 | p = &per_thread(rcu_stress_count,t)[0]; | |
417 | for (i = 0; i <= RCU_STRESS_PIPE_LEN; i++) | |
418 | p[i] = 0LL; | |
419 | } | |
420 | rcu_stress_current = &rcu_stress_array[0]; | |
421 | rcu_stress_current->pipe_count = 0; | |
422 | rcu_stress_current->mbtest = 1; | |
423 | for (i = 0; i < nreaders; i++) | |
424 | create_thread(rcu_read_stress_test, NULL); | |
425 | create_thread(rcu_update_stress_test, NULL); | |
426 | for (i = 0; i < 5; i++) | |
427 | create_thread(rcu_fake_update_stress_test, NULL); | |
5481ddb3 | 428 | cmm_smp_mb(); |
8a953620 | 429 | goflag = GOFLAG_RUN; |
5481ddb3 | 430 | cmm_smp_mb(); |
8a953620 | 431 | sleep(10); |
5481ddb3 | 432 | cmm_smp_mb(); |
8a953620 | 433 | goflag = GOFLAG_STOP; |
5481ddb3 | 434 | cmm_smp_mb(); |
8a953620 MD |
435 | wait_all_threads(); |
436 | for_each_thread(t) | |
437 | n_reads += per_thread(n_reads_pt, t); | |
b0b31506 | 438 | printf("n_reads: %lld n_updates: %ld n_mberror: %d\n", |
8a953620 MD |
439 | n_reads, n_updates, n_mberror); |
440 | printf("rcu_stress_count:"); | |
441 | for (i = 0; i <= RCU_STRESS_PIPE_LEN; i++) { | |
442 | sum = 0LL; | |
443 | for_each_thread(t) { | |
444 | sum += per_thread(rcu_stress_count, t)[i]; | |
445 | } | |
446 | printf(" %lld", sum); | |
447 | } | |
448 | printf("\n"); | |
7106ddf8 PM |
449 | if (get_cpu_call_rcu_data(0)) { |
450 | fprintf(stderr, "Deallocating per-CPU call_rcu threads.\n"); | |
451 | free_all_cpu_call_rcu_data(); | |
452 | } | |
8a953620 MD |
453 | exit(0); |
454 | } | |
455 | ||
456 | /* | |
457 | * Mainprogram. | |
458 | */ | |
459 | ||
460 | void usage(int argc, char *argv[]) | |
461 | { | |
462 | fprintf(stderr, "Usage: %s [nreaders [ perf | stress ] ]\n", argv[0]); | |
463 | exit(-1); | |
464 | } | |
465 | ||
466 | int main(int argc, char *argv[]) | |
467 | { | |
468 | int nreaders = 1; | |
469 | int cpustride = 1; | |
470 | ||
471 | smp_init(); | |
472 | //rcu_init(); | |
b57aee66 PM |
473 | srandom(time(NULL)); |
474 | if (random() & 0x100) { | |
475 | fprintf(stderr, "Allocating per-CPU call_rcu threads.\n"); | |
476 | if (create_all_cpu_call_rcu_data(0)) | |
477 | perror("create_all_cpu_call_rcu_data"); | |
478 | } | |
8a953620 | 479 | |
9b171f46 MD |
480 | #ifdef DEBUG_YIELD |
481 | yield_active |= YIELD_READ; | |
482 | yield_active |= YIELD_WRITE; | |
483 | #endif | |
484 | ||
8a953620 MD |
485 | if (argc > 1) { |
486 | nreaders = strtoul(argv[1], NULL, 0); | |
487 | if (argc == 2) | |
488 | perftest(nreaders, cpustride); | |
489 | if (argc > 3) | |
490 | cpustride = strtoul(argv[3], NULL, 0); | |
491 | if (strcmp(argv[2], "perf") == 0) | |
492 | perftest(nreaders, cpustride); | |
493 | else if (strcmp(argv[2], "rperf") == 0) | |
494 | rperftest(nreaders, cpustride); | |
495 | else if (strcmp(argv[2], "uperf") == 0) | |
496 | uperftest(nreaders, cpustride); | |
497 | else if (strcmp(argv[2], "stress") == 0) | |
498 | stresstest(nreaders); | |
499 | usage(argc, argv); | |
500 | } | |
501 | perftest(nreaders, cpustride); | |
0578089f | 502 | return 0; |
8a953620 | 503 | } |